diff --git a/src/Math-TSNE/PMTSNE.class.st b/src/Math-TSNE/PMTSNE.class.st index 251da7c2a..92f183245 100644 --- a/src/Math-TSNE/PMTSNE.class.st +++ b/src/Math-TSNE/PMTSNE.class.st @@ -3,28 +3,47 @@ Implementation of t-SNE (t-Distributed Stochastic Neighbor Embedding) algorithm https://lvdmaaten.github.io/tsne/ -t-SNE is a technique for dimensionality reduction that is particularly well suited for the visualization of high-dimensional datasets. +t-SNE is a technique for dimensionality reduction that is particularly well suited for the visualization of high-dimensional datasets. " Class { #name : #PMTSNE, #superclass : #Object, #instVars : [ - 'ndims', - 'initialdims', + 'outputDims', + 'initialDims', 'perplexity', 'x', 'y', 'maxIter', - 'epsilon', 'sumY', 'initialMomentum', 'finalMomentum', - 'eta', - 'minGain' + 'learningRate', + 'minGain', + 'job', + 'computeErrorEvery' ], #category : #'Math-TSNE' } +{ #category : #running } +PMTSNE class >> entropyOf: distanceVector andPRow: pVector withBeta: beta [ + "Calculates gaussian kernel values for a distanceVector, along with perplexity + Inputs: distanceVector - a PMVector containing distances + pRow - calculated p-rows are stored here + beta - a Float, precision which is used to compute entropy + Outputs: entropy - log(Shannon's entropy) for calculated pVector + pVector - The conditional probability pji + " + + | pVectorTemp sumP entropy | + pVectorTemp := (-1 * distanceVector * beta) exp. + sumP := pVectorTemp sum max: (Float epsilon). + entropy := sumP ln + (beta * (distanceVector * pVectorTemp) / sumP). + pVector copyFrom: (pVectorTemp / sumP). + ^ entropy +] + { #category : #examples } PMTSNE class >> example1 [ | points | @@ -33,7 +52,6 @@ PMTSNE class >> example1 [ perplexity: 10; x: points; initialDims: 2; - epsilon: 5; start; y ] @@ -52,88 +70,296 @@ PMTSNE class >> gridDataGeneratorOf: size [ ^ PMMatrix rows: array ] +{ #category : #accessing } +PMTSNE >> computeErrorEvery [ + ^ computeErrorEvery +] + +{ #category : #accessing } +PMTSNE >> computeErrorEvery: aNumber [ + computeErrorEvery := aNumber +] + +{ #category : #accessing } +PMTSNE >> computeErrorEveryDefaultValue [ + ^ 10 +] + +{ #category : #'stepping and presenter' } +PMTSNE >> computeGradient: p withProgressUpdate: iteration [ + "Computes gradient of KL divergence" + + | num sumNum q pq dY tmp yiDiff error | + "Calculates num and q" + num := self computeLowDimensionalStudentT. + sumNum := num sum sum max: (Float epsilon). + q := num collect: [:element | + (element / sumNum) max: (Float epsilon) + ]. + pq := p - q. + dY := OrderedCollection new. + 1 to: (x dimension x) do: [ :i | + tmp := (pq rowAt: i) hadamardProduct: (num rowAt: i). + "Create a matrix of rows filled with 'tmp'" + tmp := (PMMatrix rows: ((1 to: outputDims) collect: [ :j | tmp])). + yiDiff := (PMMatrix rows: (y rowsCollect: [ :row | (y rowAt: i) - row ])) transpose. + dY add: (tmp hadamardProduct: yiDiff) sum + ]. + dY := PMMatrix rows: dY. + + (iteration % computeErrorEvery = 0) ifTrue: [ + error := PMMatrix rows: (x dimension x) columns: (x dimension x). + 1 to: (x dimension x) do: [ :i | + 1 to: (x dimension x) do: [ :j | + error rowAt: i columnAt: j put: ( + (p rowAt: i columnAt: j) * (((p rowAt: i columnAt: j) / (q rowAt: i columnAt: j)) ln) + ). + ]. + ]. + error := error sum sum. + job title: ('' join: { 'Step 3/3: Performing gradient descent '. iteration. '/'. maxIter.' error = '. error }). + job progress: (iteration/maxIter). + ]. + + ^ dY +] + +{ #category : #running } +PMTSNE >> computeLowDimensionalAffinities [ + "Computes affinity of the reduced dimension/output" + + | num sumNum q | + num := self computeLowDimensionalStudentT. + sumNum := num sum sum max: (Float epsilon). + q := num collect: [:element | + (element / sumNum) max: (Float epsilon) + ]. + ^ q +] + +{ #category : #running } +PMTSNE >> computeLowDimensionalStudentT [ + "Computes Student's T distribution with 1-degree of freedom for y" + + | num tmp | + sumY := (y hadamardProduct: y) sum. + tmp := ((y* (y transpose)) * (-2)). + tmp := PMMatrix rows: (tmp rowsCollect: [ :each| each + sumY]). + tmp := PMMatrix rows: ((tmp transpose) rowsCollect: [:each| each + sumY]). + num := (1 + tmp) collect: [ :ele | 1.0 / ele ]. + num := num setDiagonal: (PMVector zeros: (x dimension x)). + ^ num +] + { #category : #accessing } PMTSNE >> computePValues [ -" -#Compute P-values - P = x2p(X, 1e-5, perplexity); - P = P + Math.transpose(P); - P = P / Math.sum(P); - P = P * 4; # early exaggeration - P = Math.maximum(P, 1e-12); -" -| p n | -p := self x2p. -p := p + p transpose. -p := p / (p sum). -p := p *4. -p := p max: 1e-12. -^ p + "Computes joint probablity matrix P" + | p sumP | + p := self computePairwiseAffinities. + p := p + p transpose. + sumP := p sum sum. + p := p collect: [ :element | + "4 is for early exaggeration, will be removed after 100 iterations" + (element / sumP * 4) asFloat max: (Float epsilon). + ]. + ^ p ] -{ #category : #'as yet unclassified' } +{ #category : #running } PMTSNE >> computePairwiseAffinities [ -" - sum_Y = np.sum(np.square(Y), 1); - num = 1 / (1 + Math.add(Math.add(-2 * Math.dot(Y, Y.T), sum_Y).T, sum_Y)); - num[range(n), range(n)] = 0; - Q = num / Math.sum(num); - Q = Math.maximum(Q, 1e-12);" - |num tmp| - sumY := (x dot:x) sum. "PMVector" - tmp := ((y* (y transpose)) * (-2)) transpose. - num := ((PMMatrix rows: (tmp rowsCollect: [ :each| each + sumY])) ). + "Computes a similarity matrix by making sure each Gaussian has same perplexity. + It identifies required precision (beta = (1/ variance**2)) for each row using a + binary search. The precision is selected based on input perplexity. + " + + | p d beta logU n betaMin betaMax distanceVector pVector entropy tries entropyDiff | + n := x numberOfRows. + d := self computePairwiseDistances. + p := PMMatrix zerosRows: n cols: n. + beta := PMVector ones: n. + logU := self perplexity ln. + distanceVector := PMVector new: n - 1. + pVector := PMVector new: n - 1. + job title: 'Step 2/3: Computing joint probablity for point 1 of ', n asString. + job progress: 0.0. + 1 to: n do: [ :i | + "Job progress gets updated every 10 rows" + (i % 10 = 0) ifTrue: [ + job title: ('' join: {'Step 2/3: Computing joint probablity for point '. i asString. ' of '. n asString}). + job progress: (i/n). + ]. - + "Set minimum and maximum value of precision" + betaMin := Float infinity negated. + betaMax := Float infinity. + + "Ignore i-th element of the row d[i] and copy rest in distanceVector. + Also initialize pVector to 0" + 1 to: n do: [ :index | + (index = i) ifFalse: [ + (index < i) + ifTrue: [ distanceVector at: index put: (d rowAt: i columnAt: index). + pVector at: index put: 0. ] + ifFalse: [ distanceVector at: (index - 1) put: (d rowAt: i columnAt: index). + pVector at: (index - 1) put: 0.]. + ]. + ]. + entropy := self class entropyOf: distanceVector andPRow: pVector withBeta: (beta at: i). + entropyDiff := entropy - logU. + tries := 0. + [ (entropyDiff abs > 1e-5) & (tries < 50)] whileTrue: [ + (entropyDiff > 0) + ifTrue: [ + betaMin := beta at: i. + ((betaMax = Float infinity) | (betaMin = Float infinity negated)) + ifTrue: [ beta at: i put: ((beta at: i) * 2) ] + ifFalse: [ beta at: i put: (((beta at: i) + betaMax) / 2) + ]. + ] + ifFalse: [ + betaMax := beta at: i. + ((betaMax = Float infinity) | (betaMin = Float infinity negated)) + ifTrue: [ beta at: i put: ((beta at: i) / 2) ] + ifFalse: [ beta at: i put: (((beta at: i) + betaMin) / 2) + ]. + ]. + entropy := self class entropyOf: distanceVector andPRow: pVector withBeta: (beta at: i). + entropyDiff := entropy - logU. + tries := tries + 1. + ]. + 1 to: n do: [ :index | + (index = i) ifFalse: [ + (index < i) + ifTrue: [ p rowAt: i columnAt: index put: (pVector at: index) ] + ifFalse: [ p rowAt: i columnAt: index put: (pVector at: index - 1) ]. + ]. + ]. + ]. + ^ p ] -{ #category : #'as yet unclassified' } +{ #category : #running } PMTSNE >> computePairwiseDistances [ | sumX d tmp| - sumX := (x dot: x) sum. + sumX := (x hadamardProduct: x) sum. tmp := (x * (x transpose)) * (-2). tmp := PMMatrix rows: (tmp rowsCollect: [ :each| each + sumX ]). d := PMMatrix rows: ((tmp transpose) rowsCollect: [:each| each + sumX]). ^ d ] +{ #category : #accessing } +PMTSNE >> finalMomentum [ + ^ finalMomentum +] + +{ #category : #accessing } +PMTSNE >> finalMomentum: aFloat [ + finalMomentum := aFloat +] + +{ #category : #accessing } +PMTSNE >> finalMomentumDefaultValue [ + ^ 0.8 +] + { #category : #running } -PMTSNE >> epsilon: aFloat [ - epsilon := aFloat +PMTSNE >> gradientDescent [ + "Tries to minimize the cost, which is KL divergence" + + | p gains iY momentum dY yMeanAccumulator | + job title: 'Step 3/3: Performing gradient descent'. + p := self computePValues. + gains := PMMatrix onesRows: x dimension x cols: outputDims. + iY := PMMatrix zerosRows: x dimension x cols: outputDims. + momentum := initialMomentum. + 1 to: maxIter do: [ :iteration | + dY := self computeGradient: p withProgressUpdate: iteration. + momentum := iteration < 20 + ifTrue: [ initialMomentum ] + ifFalse: [ finalMomentum ]. + 1 to: (x dimension x) do: [ :i | + 1 to: outputDims do: [ :j | + ((dY rowAt: i columnAt: j) > 0) = ((iY rowAt: i columnAt: j) > 0) + ifTrue: [ gains rowAt: i columnAt: j put: (((gains rowAt: i columnAt: j) * 0.8) max: minGain) ] + ifFalse: [ gains rowAt: i columnAt: j put: (gains rowAt: i columnAt: j) + 0.2 ]. + ] + ]. + iY := iY * momentum - ((dY hadamardProduct: gains) * learningRate). + y := y + iY. + yMeanAccumulator := PMVectorAccumulator new: outputDims. + y rowsDo: [ :row | + yMeanAccumulator accumulate: row. + ]. + y := PMMatrix rows: (y rowsCollect: [ :row | + row - (yMeanAccumulator average) + ]). + "Stop exaggeration" + (iteration = 100) ifTrue: [ p := p * (1/4) ]. + ]. ] -{ #category : #'as yet unclassified' } +{ #category : #accessing } PMTSNE >> initialDims [ - ^initialdims + ^ initialDims ] -{ #category : #'as yet unclassified' } +{ #category : #accessing } PMTSNE >> initialDims: aFloat [ - initialdims := aFloat + initialDims := aFloat ] -{ #category : #'as yet unclassified' } +{ #category : #accessing } PMTSNE >> initialDimsDefaultValue [ ^ 50 ] +{ #category : #accessing } +PMTSNE >> initialMomentum [ + ^ initialMomentum +] + +{ #category : #accessing } +PMTSNE >> initialMomentum: aFloat [ + initialMomentum := aFloat +] + +{ #category : #accessing } +PMTSNE >> initialMomentumDefaultValue [ + ^ 0.5 +] + { #category : #initialization } PMTSNE >> initialize [ - maxIter := 1000. - initialMomentum := 0.5. - finalMomentum := 0.8. - eta := 500. - minGain := 0.01. + "These parameters rarely need to be modified" + maxIter := self maxIterDefaultValue. + initialMomentum := self initialMomentumDefaultValue. + finalMomentum := self finalMomentumDefaultValue. + learningRate := self learningRateDefaultValue. + minGain := self minGainDefaultValue. + computeErrorEvery := self computeErrorEveryDefaultValue. + self initializeJob. ] +{ #category : #initialization } +PMTSNE >> initializeJob [ + "This job represents all the steps in t-SNE" + job := [ + self initializeUninitializedParameters. + self reduceXToInputDims. + self initializeYWithRandomValues. + self gradientDescent. + ] asJob. +] + { #category : #initialization } PMTSNE >> initializeUninitializedParameters [ perplexity ifNil: [ perplexity := self perplexityDefaultValue ]. - ndims ifNil: [ ndims := self ndimsDefaultValue ]. - initialdims ifNil: [ initialdims := self initialDimsDefaultValue ] + outputDims ifNil: [ outputDims := self outputDimsDefaultValue ]. + initialDims ifNil: [ initialDims := self initialDimsDefaultValue ] ] { #category : #initialization } @@ -143,7 +369,7 @@ PMTSNE >> initializeYWithRandomValues [ | a b rows columns d | rows := x dimension x. - columns := ndims. + columns := outputDims. d := PMNormalDistribution new:0 sigma: 1. a := (1 to: rows) collect: [ :row | @@ -154,12 +380,62 @@ PMTSNE >> initializeYWithRandomValues [ ] { #category : #accessing } -PMTSNE >> ndims [ - ^ ndims +PMTSNE >> learningRate [ + ^ learningRate ] -{ #category : #'as yet unclassified' } -PMTSNE >> ndimsDefaultValue [ +{ #category : #accessing } +PMTSNE >> learningRate: aNumber [ + learningRate := aNumber +] + +{ #category : #accessing } +PMTSNE >> learningRateDefaultValue [ + ^ 500 +] + +{ #category : #accessing } +PMTSNE >> maxIter [ + ^ maxIter +] + +{ #category : #accessing } +PMTSNE >> maxIter: aNumber [ + maxIter := aNumber +] + +{ #category : #accessing } +PMTSNE >> maxIterDefaultValue [ + ^ 1000 +] + +{ #category : #accessing } +PMTSNE >> minGain [ + ^ minGain +] + +{ #category : #accessing } +PMTSNE >> minGain: aFloat [ + minGain := aFloat +] + +{ #category : #accessing } +PMTSNE >> minGainDefaultValue [ + ^ 0.01 +] + +{ #category : #accessing } +PMTSNE >> outputDims [ + ^ outputDims +] + +{ #category : #accessing } +PMTSNE >> outputDims: anInteger [ + outputDims := anInteger +] + +{ #category : #accessing } +PMTSNE >> outputDimsDefaultValue [ ^ 2 ] @@ -173,58 +449,38 @@ PMTSNE >> perplexity: aFloat [ perplexity := aFloat ] -{ #category : #'as yet unclassified' } +{ #category : #accessing } PMTSNE >> perplexityDefaultValue [ ^ 30.0 ] { #category : #running } -PMTSNE >> runPcaOnX [ - "Runs PCA on X in order to reduce its dimensionality to initialdims dimensions. - - print ""Preprocessing the data using PCA..."" - (n, d) = X.shape; - X = X - Math.tile(Math.mean(X, 0), (n, 1)); - (l, M) = Math.linalg.eig(Math.dot(X.T, X)); - Y = Math.dot(X, M[:,0:no_dims]); - return Y; -" +PMTSNE >> reduceXToInputDims [ + "Runs PCA on X in order to reduce its dimensionality to initialDims dimensions." - "| analyzer i | - analyzer := PMPrincipalComponentAnalyser new: initialdims. - i := 1. - x dimension x - timesRepeat: [ analyzer accumulate: (x rowAt: i). - i := i + 1 ]. - ^ analyzer components" + self reduceXToInputDimsUsing: PMPrincipalComponentAnalyserJacobiTransformation. ] -{ #category : #accessing } -PMTSNE >> start [ - self initializeUninitializedParameters. - self runPcaOnX. - self initializeYWithRandomValues. - self x2p +{ #category : #running } +PMTSNE >> reduceXToInputDimsUsing: aClass [ + "Runs aClass PCA on X in order to reduce its dimensionality to initialDims dimensions." + + | scaler pca | + job title: 'Step 1/3: Reducing input dimensions.'. + scaler := PMStandardizationScaler new. + initialDims ifNil: [ initialDims := self initialDimsDefaultValue ]. + pca := aClass new componentsNumber: (initialDims min: x dimension y). + x := pca fitAndTransform: (scaler fitAndTransform: x) ] -{ #category : #'stepping and presenter' } -PMTSNE >> step [ - self computePairwiseAffinities +{ #category : #running } +PMTSNE >> start [ + job run. ] -{ #category : #'as yet unclassified' } -PMTSNE >> x2p [ - | p d beta logU n betaMin betaMax| - n := x numberOfRows. - d := self computePairwiseDistances. - p := PMMatrix zerosRows: n cols: n. - beta := PMMatrix onesRows: n cols: 1. - logU := self perplexity log. - n timesRepeat: [ - betaMin := Float infinity. - betaMax := Float infinity negated. - - ] +{ #category : #accessing } +PMTSNE >> x [ + ^ x ] { #category : #accessing } @@ -236,3 +492,8 @@ PMTSNE >> x: aPMMatrix [ PMTSNE >> y [ ^ y ] + +{ #category : #accessing } +PMTSNE >> y: aNumber [ + y:= aNumber +] diff --git a/src/Math-Tests-TSNE/PMTSNETest.class.st b/src/Math-Tests-TSNE/PMTSNETest.class.st index c76c1cdd3..322b5cc15 100644 --- a/src/Math-Tests-TSNE/PMTSNETest.class.st +++ b/src/Math-Tests-TSNE/PMTSNETest.class.st @@ -1,9 +1,47 @@ +" +I test the method of the class PMTSNE. +" Class { #name : #PMTSNETest, #superclass : #TestCase, #category : #'Math-Tests-TSNE' } +{ #category : #tests } +PMTSNETest >> testComputePValues [ + | t | + + t := (PMTSNE new) + x: (PMMatrix rows: #(#(1 2 3) #(4 5 6) #(7 8 9))); + perplexity: 30.0. + self assert: t computePValues closeTo: (PMMatrix rows: {{0. 2/3. 2/3.}. {2/3. 0. 2/3}. {2/3. 2/3. 0.}}) +] + +{ #category : #tests } +PMTSNETest >> testComputePairwiseAffinities [ + | t correctAffinities | + + t := (PMTSNE new) + x: (PMMatrix rows: #(#(0 0) #(2 2) #(2 0))); + perplexity: 30.0. + correctAffinities := PMMatrix rows: #( + #(0 0.5 0.5) + #(0.5 0 0.5) + #(0.5 0.5 0) + ). + self assert: t computePairwiseAffinities closeTo: correctAffinities. + + "This test case has large pairwise distances, which makes exp(-D*beta) = 0" + t x: (PMMatrix rows: #(#(1 2 3.14) #(0 4 -43) #(-5 -9 -150) #(120 5 1))). + correctAffinities := PMMatrix rows: #( + #(0 0.333333 0.333333 0.333333) + #(0.333333 0 0.333333 0.333333) + #(0.333333 0.333333 0 0.333333) + #(0.333333 0.333333 0.333333 0) + ). + self assert: t computePairwiseAffinities closeTo: correctAffinities. +] + { #category : #tests } PMTSNETest >> testComputePairwiseDistances [ | t | @@ -13,6 +51,18 @@ PMTSNETest >> testComputePairwiseDistances [ self assert: t computePairwiseDistances equals: (PMMatrix rows: #(#(0 8) #(8 0))) ] +{ #category : #tests } +PMTSNETest >> testEntropyOfAndPRowWithBeta [ + | distanceVector pVector entropy | + "Input points are (0, 0), (2, 2) and (2, 0)" + distanceVector := #(0 8 4) asPMVector. + pVector := PMVector new: (distanceVector size). + entropy := PMTSNE entropyOf: distanceVector andPRow: pVector withBeta: 2.0. + self assert: entropy closeTo: 0.003020119571023052. + self assert: pVector closeTo: { 0.99966454 . 0.00000011 . 0.00033535 } asPMVector. + +] + { #category : #tests } PMTSNETest >> testInitialDimsSetByDefaultWithFifty [ | t | @@ -22,3 +72,12 @@ PMTSNETest >> testInitialDimsSetByDefaultWithFifty [ start. self assert: t initialDims equals: 50 ] + +{ #category : #tests } +PMTSNETest >> testreduceXToInputDimsUsing [ + | t | + t := (PMTSNE new) + x: (PMMatrix rows: #(#(0 0) #(2 2) #(2 0))). + t reduceXToInputDimsUsing: PMPrincipalComponentAnalyserJacobiTransformation. + self assert: (t x) closeTo: (PMMatrix rows: #(#(-0.5 -1.5) #(-0.5 1.5) #(1 0))). +]