-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
some first steps at understanding the paper / code
- Loading branch information
1 parent
2a03b23
commit acdff0d
Showing
1 changed file
with
215 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# import os\n", | ||
"# os.environ['KERAS_BACKEND'] = 'tensorflow'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from __future__ import print_function\n", | ||
"import numpy as np\n", | ||
"np.random.seed(0)\n", | ||
"\n", | ||
"from keras.datasets import mnist\n", | ||
"from keras.models import Sequential\n", | ||
"from keras.layers.core import Dense, Dropout, Activation\n", | ||
"from keras.optimizers import SGD\n", | ||
"from keras.utils import np_utils\n", | ||
"from keras.objectives import categorical_crossentropy" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"CPU times: user 1.19 s, sys: 429 ms, total: 1.62 s\n", | ||
"Wall time: 1.64 s\n", | ||
"60000 train samples\n", | ||
"10000 test samples\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%time (X_train, y_train), (X_test, y_test) = mnist.load_data()\n", | ||
"\n", | ||
"X_train = X_train.reshape(60000, 784)\n", | ||
"X_test = X_test.reshape(10000, 784)\n", | ||
"X_train = X_train.astype('float32')\n", | ||
"X_test = X_test.astype('float32')\n", | ||
"X_train /= 255\n", | ||
"X_test /= 255\n", | ||
"print(X_train.shape[0], 'train samples')\n", | ||
"print(X_test.shape[0], 'test samples')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 37, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import theano\n", | ||
"from theano.tensor.extra_ops import fill_diagonal\n", | ||
"from theano import tensor as T\n", | ||
"from keras import backend as K\n", | ||
"\n", | ||
"batch_size = 128\n", | ||
"\n", | ||
"# function [H, P] = Hbeta(D, beta)\n", | ||
"def Hbeta(D, beta):\n", | ||
" # P = exp(-D * beta);\n", | ||
" P = K.exp(-D * beta)\n", | ||
" # sumP = sum(P);\n", | ||
" sumP = K.sum(P)\n", | ||
" # H = log(sumP) + beta * sum(D .* P) / sumP;\n", | ||
" H = K.log(sumP) + beta * K.sum(K.prod(D, P)) / sumP\n", | ||
" # P = P / sumP;\n", | ||
" P = P / sumP\n", | ||
" return H, P\n", | ||
"\n", | ||
"# https://github.com/kylemcdonald/Parametric-t-SNE/blob/master/src/x2p.m\n", | ||
"def x2p(X, u=15, tol=1e-4):\n", | ||
" # n = size(X, 1); % number of instances\n", | ||
"# n = K.eval(K.shape(X)[0]) # this doesn't work: \"An input of the graph .. was not provided and not given a value\"\n", | ||
" n = batch_size\n", | ||
" # P = zeros(n, n); % empty probability matrix\n", | ||
" P = K.zeros((n, n))\n", | ||
" # beta = ones(n, 1); % empty precision vector\n", | ||
" beta = K.ones((n, 1))\n", | ||
" # logU = log(u); % log of perplexity (= entropy)\n", | ||
" logU = K.log(u)\n", | ||
" \n", | ||
" # sum_X = sum(X .^ 2, 2);\n", | ||
" sum_X = K.sum(K.square(X), axis=1)\n", | ||
" # D = bsxfun(@plus, sum_X, bsxfun(@plus, sum_X', -2 * X * X'));\n", | ||
" D = sum_X + (K.transpose(sum_X) + -2 * X * K.transpose(X))\n", | ||
" \n", | ||
" for i in range(n):\n", | ||
" # Di = D(i, [1:i-1 i+1:end]);\n", | ||
" Di = D[i] # can we use the whole row and make the diagonal zero later?\n", | ||
" # [H, thisP] = Hbeta(Di, beta(i));\n", | ||
" H, thisP = Hbeta(Di, beta[i])\n", | ||
" \n", | ||
" # ... a lot more right here\n", | ||
" \n", | ||
" P[i] = thisP\n", | ||
" \n", | ||
" return P #, beta\n", | ||
" \n", | ||
"# curX is the high-dimensional input (Keras loss functions call this y_true)\n", | ||
"# activations is the low-dimensional output (Keras loss functions call this y_pred)\n", | ||
"def tsne(curX, activations):\n", | ||
" perplexity = 30\n", | ||
" \n", | ||
" # these joint probabilities should be pre-computed per-batch and passed to the fit() function\n", | ||
" \n", | ||
" # P{i} = x2p(curX{i}, perplexity, 1e-5); % compute affinities using fixed perplexity\n", | ||
" P = x2p(curX, perplexity, 1e-5)\n", | ||
" # P{i}(isnan(P{i})) = 0; % make sure we don't have NaN's\n", | ||
" # P = T.set_subtensor(P[T.isnan(P)], 0) # something like this?\n", | ||
" # P = T.switch(T.isnan(P), 0, P) # or like this? \n", | ||
" # P{i} = (P{i} + P{i}') / 2; % make symmetric\n", | ||
" P = (P + K.transpose(P)) / 2 # this seems to be missing the step of normalizing by \"2n\", just normalizes by \"2\"\n", | ||
" # P{i} = P{i} ./ sum(P{i}(:)); % obtain estimation of joint probabilities\n", | ||
" P = P / K.sum(P) # but maybe this makes up for the missing \"n\" above?\n", | ||
" # P{i} = max(P{i}, eps);\n", | ||
" P = K.maximum(P, K.epsilon())\n", | ||
"\n", | ||
" # v = length(network{end}.bias_upW) - 1\n", | ||
" v = K.shape(activations)[1] - 1\n", | ||
" \n", | ||
" # sum_act = sum(activations .^ 2, 2)\n", | ||
" sum_act = K.sum(K.square(activations), axis=1)\n", | ||
" # Q = (1 + (bsxfun(@plus, sum_act, bsxfun(@plus, sum_act', -2 * activations * activations')) ./ v)) .^ -((v + 1) / 2)\n", | ||
" Q = K.pow(1 + ((sum_act + (K.transpose(sum_act) + -2 * activations * K.transpose(activations))) / v), -((v + 1) / 2))\n", | ||
" # Q(1:n+1:end) = 0\n", | ||
" fill_diagonal(Q, 0) # Theano-only\n", | ||
" # Q = Q ./ sum(Q(:))\n", | ||
" Q = K.maximum(Q, K.epsilon())\n", | ||
" \n", | ||
" # C = sum(sum(P{1} .* log((P{1} + eps) ./ (Q + eps))))\n", | ||
" C = K.sum(K.sum(K.prod(P, K.log((P + K.epsilon()) / (Q + K.epsilon())))))\n", | ||
" return C" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"model = Sequential()\n", | ||
"model.add(Dense(500, input_shape=(784,)))\n", | ||
"model.add(Activation('relu'))\n", | ||
"model.add(Dense(500))\n", | ||
"model.add(Activation('relu'))\n", | ||
"model.add(Dense(2000))\n", | ||
"model.add(Activation('relu'))\n", | ||
"model.add(Dense(2))\n", | ||
"\n", | ||
"sgd = SGD()\n", | ||
"%time model.compile(loss=tsne, optimizer=sgd)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"model.fit(X_train, Y_train,\n", | ||
" batch_size=batch_size,\n", | ||
" nb_epoch=20,\n", | ||
" verbose=2)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.10" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |