-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
broke Hbeta out for testing, ported Hbeta and x2p to numpy
- Loading branch information
1 parent
acdff0d
commit 57f735c
Showing
3 changed files
with
245 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%matplotlib inline\n", | ||
"from matplotlib import pyplot as plt\n", | ||
"import numpy as np" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import logging\n", | ||
"from oct2py import Oct2Py, get_log\n", | ||
"oc = Oct2Py(logger=get_log())\n", | ||
"oc.addpath('./src')\n", | ||
"oc.logger = get_log('new_log')\n", | ||
"oc.logger.setLevel(logging.INFO)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# switch this to cython\n", | ||
"def Hbeta(D, beta):\n", | ||
" P = np.exp(-D * beta)\n", | ||
" sumP = np.sum(P)\n", | ||
" H = np.log(sumP) + beta * np.sum(np.multiply(D, P)) / sumP\n", | ||
" P = P / sumP\n", | ||
" return H, P" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"ans = 2\n", | ||
"'Hbeta' is a function from the file /Users/kyle/Documents/Learning/Parametric t-SNE/src/Hbeta.m\n", | ||
"\n", | ||
"Hbeta computes the Gaussian kernel values given a vector of\n", | ||
" squared Euclidean distances, and the precision of the Gaussian kernel.\n", | ||
" The function also computes the perplexity of the distribution.\n", | ||
"\n", | ||
"\n", | ||
"Additional help for built-in functions and operators is\n", | ||
"available in the online version of the manual. Use the command\n", | ||
"'doc <topic>' to search the manual index.\n", | ||
"\n", | ||
"Help and information about Octave is also available on the WWW\n", | ||
"at http://www.octave.org and via the [email protected]\n", | ||
"mailing list.\n", | ||
"CPU times: user 77.7 ms, sys: 38.8 ms, total: 117 ms\n", | ||
"Wall time: 170 ms\n", | ||
"CPU times: user 65 µs, sys: 11 µs, total: 76 µs\n", | ||
"Wall time: 79.9 µs\n", | ||
"True\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"D = np.random.random(784)\n", | ||
"beta = 1 + np.random.randint(64)\n", | ||
"%time Ho, Po = oc.Hbeta(D, beta)\n", | ||
"%time Hp, Pp = Hbeta(D, beta)\n", | ||
"print np.allclose(Ho, Hp) and np.allclose(Po, Pp)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# switch this to cython\n", | ||
"def x2p(X, u=15, tol=1e-4, print_iter=500, max_tries=50):\n", | ||
" # Initialize some variables\n", | ||
" n = X.shape[0] # number of instances\n", | ||
" P = np.zeros((n, n)) # empty probability matrix\n", | ||
" beta = np.ones(n) # empty precision vector\n", | ||
" logU = np.log(u) # log of perplexity (= entropy)\n", | ||
" \n", | ||
" # Compute pairwise distances\n", | ||
" print('Computing pairwise distances...')\n", | ||
" sum_X = np.sum(np.square(X), axis=1)\n", | ||
" # note: translating sum_X' from matlab to numpy means using reshape to add a dimension\n", | ||
" D = sum_X + sum_X.reshape(-1,1) + -2 * X.dot(X.T)\n", | ||
"\n", | ||
" # Run over all datapoints\n", | ||
" print('Computing P-values...')\n", | ||
" for i in range(n):\n", | ||
" \n", | ||
" if print_iter and i % print_iter == 0:\n", | ||
" print('Computed P-values {} of {} datapoints...'.format(i, n))\n", | ||
" \n", | ||
" # Set minimum and maximum values for precision\n", | ||
" betamin = float('-inf')\n", | ||
" betamax = float('+inf')\n", | ||
" \n", | ||
" # Compute the Gaussian kernel and entropy for the current precision\n", | ||
" indices = np.concatenate((np.arange(0, i), np.arange(i + 1, n)))\n", | ||
" Di = D[i, indices]\n", | ||
" H, thisP = Hbeta(Di, beta[i])\n", | ||
" \n", | ||
" # Evaluate whether the perplexity is within tolerance\n", | ||
" Hdiff = H - logU\n", | ||
" tries = 0\n", | ||
" while abs(Hdiff) > tol and tries < max_tries:\n", | ||
" \n", | ||
" # If not, increase or decrease precision\n", | ||
" if Hdiff > 0:\n", | ||
" betamin = beta[i]\n", | ||
" if np.isinf(betamax):\n", | ||
" beta[i] *= 2\n", | ||
" else:\n", | ||
" beta[i] = (beta[i] + betamax) / 2\n", | ||
" else:\n", | ||
" betamax = beta[i]\n", | ||
" if np.isinf(betamin):\n", | ||
" beta[i] /= 2\n", | ||
" else:\n", | ||
" beta[i] = (beta[i] + betamin) / 2\n", | ||
" \n", | ||
" # Recompute the values\n", | ||
" H, thisP = Hbeta(Di, beta[i])\n", | ||
" Hdiff = H - logU\n", | ||
" tries += 1\n", | ||
" \n", | ||
" # Set the final row of P\n", | ||
" P[i, indices] = thisP\n", | ||
" \n", | ||
" print('Mean value of sigma: {}'.format(np.mean(np.sqrt(1 / beta))))\n", | ||
" print('Minimum value of sigma: {}'.format(np.min(np.sqrt(1 / beta))))\n", | ||
" print('Maximum value of sigma: {}'.format(np.max(np.sqrt(1 / beta))))\n", | ||
" \n", | ||
" return P, beta" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": { | ||
"collapsed": false, | ||
"scrolled": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Computing pairwise distances...\n", | ||
"Computing P-values...\n", | ||
"Computed P-values 500 of 1024 datapoints...\n", | ||
"Computed P-values 1000 of 1024 datapoints...\n", | ||
"Mean value of sigma: 0.47672\n", | ||
"Minimum value of sigma: 0.29817\n", | ||
"Maximum value of sigma: 0.64675\n", | ||
"CPU times: user 992 ms, sys: 469 ms, total: 1.46 s\n", | ||
"Wall time: 2.2 s\n", | ||
"Computing pairwise distances...\n", | ||
"Computing P-values...\n", | ||
"Computed P-values 0 of 1024 datapoints...\n", | ||
"Computed P-values 500 of 1024 datapoints...\n", | ||
"Computed P-values 1000 of 1024 datapoints...\n", | ||
"Mean value of sigma: 0.476717700287\n", | ||
"Minimum value of sigma: 0.298168280787\n", | ||
"Maximum value of sigma: 0.646745154894\n", | ||
"CPU times: user 501 ms, sys: 5.93 ms, total: 507 ms\n", | ||
"Wall time: 506 ms\n", | ||
"True\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"X = np.random.random((1024, 32))\n", | ||
"%time Po, betao = oc.x2p(X)\n", | ||
"%time Pp, betap = x2p(X)\n", | ||
"print np.allclose(Po, Pp) and np.allclose(betao.flatten(), betap.flatten())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.10" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
function [H, P] = Hbeta(D, beta) | ||
%Hbeta computes the Gaussian kernel values given a vector of | ||
% squared Euclidean distances, and the precision of the Gaussian kernel. | ||
% The function also computes the perplexity of the distribution. | ||
P = exp(-D * beta); | ||
sumP = sum(P); | ||
H = log(sumP) + beta * sum(D .* P) / sumP; | ||
P = P / sumP; | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters