Skip to content

Commit

Permalink
broke Hbeta out for testing, ported Hbeta and x2p to numpy
Browse files Browse the repository at this point in the history
  • Loading branch information
kylemcdonald committed Feb 25, 2016
1 parent acdff0d commit 57f735c
Show file tree
Hide file tree
Showing 3 changed files with 245 additions and 13 deletions.
236 changes: 236 additions & 0 deletions Porting.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import logging\n",
"from oct2py import Oct2Py, get_log\n",
"oc = Oct2Py(logger=get_log())\n",
"oc.addpath('./src')\n",
"oc.logger = get_log('new_log')\n",
"oc.logger.setLevel(logging.INFO)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# switch this to cython\n",
"def Hbeta(D, beta):\n",
" P = np.exp(-D * beta)\n",
" sumP = np.sum(P)\n",
" H = np.log(sumP) + beta * np.sum(np.multiply(D, P)) / sumP\n",
" P = P / sumP\n",
" return H, P"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ans = 2\n",
"'Hbeta' is a function from the file /Users/kyle/Documents/Learning/Parametric t-SNE/src/Hbeta.m\n",
"\n",
"Hbeta computes the Gaussian kernel values given a vector of\n",
" squared Euclidean distances, and the precision of the Gaussian kernel.\n",
" The function also computes the perplexity of the distribution.\n",
"\n",
"\n",
"Additional help for built-in functions and operators is\n",
"available in the online version of the manual. Use the command\n",
"'doc <topic>' to search the manual index.\n",
"\n",
"Help and information about Octave is also available on the WWW\n",
"at http://www.octave.org and via the [email protected]\n",
"mailing list.\n",
"CPU times: user 77.7 ms, sys: 38.8 ms, total: 117 ms\n",
"Wall time: 170 ms\n",
"CPU times: user 65 µs, sys: 11 µs, total: 76 µs\n",
"Wall time: 79.9 µs\n",
"True\n"
]
}
],
"source": [
"D = np.random.random(784)\n",
"beta = 1 + np.random.randint(64)\n",
"%time Ho, Po = oc.Hbeta(D, beta)\n",
"%time Hp, Pp = Hbeta(D, beta)\n",
"print np.allclose(Ho, Hp) and np.allclose(Po, Pp)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# switch this to cython\n",
"def x2p(X, u=15, tol=1e-4, print_iter=500, max_tries=50):\n",
" # Initialize some variables\n",
" n = X.shape[0] # number of instances\n",
" P = np.zeros((n, n)) # empty probability matrix\n",
" beta = np.ones(n) # empty precision vector\n",
" logU = np.log(u) # log of perplexity (= entropy)\n",
" \n",
" # Compute pairwise distances\n",
" print('Computing pairwise distances...')\n",
" sum_X = np.sum(np.square(X), axis=1)\n",
" # note: translating sum_X' from matlab to numpy means using reshape to add a dimension\n",
" D = sum_X + sum_X.reshape(-1,1) + -2 * X.dot(X.T)\n",
"\n",
" # Run over all datapoints\n",
" print('Computing P-values...')\n",
" for i in range(n):\n",
" \n",
" if print_iter and i % print_iter == 0:\n",
" print('Computed P-values {} of {} datapoints...'.format(i, n))\n",
" \n",
" # Set minimum and maximum values for precision\n",
" betamin = float('-inf')\n",
" betamax = float('+inf')\n",
" \n",
" # Compute the Gaussian kernel and entropy for the current precision\n",
" indices = np.concatenate((np.arange(0, i), np.arange(i + 1, n)))\n",
" Di = D[i, indices]\n",
" H, thisP = Hbeta(Di, beta[i])\n",
" \n",
" # Evaluate whether the perplexity is within tolerance\n",
" Hdiff = H - logU\n",
" tries = 0\n",
" while abs(Hdiff) > tol and tries < max_tries:\n",
" \n",
" # If not, increase or decrease precision\n",
" if Hdiff > 0:\n",
" betamin = beta[i]\n",
" if np.isinf(betamax):\n",
" beta[i] *= 2\n",
" else:\n",
" beta[i] = (beta[i] + betamax) / 2\n",
" else:\n",
" betamax = beta[i]\n",
" if np.isinf(betamin):\n",
" beta[i] /= 2\n",
" else:\n",
" beta[i] = (beta[i] + betamin) / 2\n",
" \n",
" # Recompute the values\n",
" H, thisP = Hbeta(Di, beta[i])\n",
" Hdiff = H - logU\n",
" tries += 1\n",
" \n",
" # Set the final row of P\n",
" P[i, indices] = thisP\n",
" \n",
" print('Mean value of sigma: {}'.format(np.mean(np.sqrt(1 / beta))))\n",
" print('Minimum value of sigma: {}'.format(np.min(np.sqrt(1 / beta))))\n",
" print('Maximum value of sigma: {}'.format(np.max(np.sqrt(1 / beta))))\n",
" \n",
" return P, beta"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Computing pairwise distances...\n",
"Computing P-values...\n",
"Computed P-values 500 of 1024 datapoints...\n",
"Computed P-values 1000 of 1024 datapoints...\n",
"Mean value of sigma: 0.47672\n",
"Minimum value of sigma: 0.29817\n",
"Maximum value of sigma: 0.64675\n",
"CPU times: user 992 ms, sys: 469 ms, total: 1.46 s\n",
"Wall time: 2.2 s\n",
"Computing pairwise distances...\n",
"Computing P-values...\n",
"Computed P-values 0 of 1024 datapoints...\n",
"Computed P-values 500 of 1024 datapoints...\n",
"Computed P-values 1000 of 1024 datapoints...\n",
"Mean value of sigma: 0.476717700287\n",
"Minimum value of sigma: 0.298168280787\n",
"Maximum value of sigma: 0.646745154894\n",
"CPU times: user 501 ms, sys: 5.93 ms, total: 507 ms\n",
"Wall time: 506 ms\n",
"True\n"
]
}
],
"source": [
"X = np.random.random((1024, 32))\n",
"%time Po, betao = oc.x2p(X)\n",
"%time Pp, betap = x2p(X)\n",
"print np.allclose(Po, Pp) and np.allclose(betao.flatten(), betap.flatten())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
9 changes: 9 additions & 0 deletions src/Hbeta.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
function [H, P] = Hbeta(D, beta)
%Hbeta computes the Gaussian kernel values given a vector of
% squared Euclidean distances, and the precision of the Gaussian kernel.
% The function also computes the perplexity of the distribution.
P = exp(-D * beta);
sumP = sum(P);
H = log(sumP) + beta * sum(D .* P) / sumP;
P = P / sumP;
end
13 changes: 0 additions & 13 deletions src/x2p.m
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,3 @@
disp(['Minimum value of sigma: ' num2str(min(sqrt(1 ./ beta)))]);
disp(['Maximum value of sigma: ' num2str(max(sqrt(1 ./ beta)))]);
end



% Function that computes the Gaussian kernel values given a vector of
% squared Euclidean distances, and the precision of the Gaussian kernel.
% The function also computes the perplexity of the distribution.
function [H, P] = Hbeta(D, beta)
P = exp(-D * beta);
sumP = sum(P);
H = log(sumP) + beta * sum(D .* P) / sumP;
P = P / sumP;
end

0 comments on commit 57f735c

Please sign in to comment.