diff --git a/src/Math-TSNE/PMTSNE.class.st b/src/Math-TSNE/PMTSNE.class.st
index 251da7c2a..92f183245 100644
--- a/src/Math-TSNE/PMTSNE.class.st
+++ b/src/Math-TSNE/PMTSNE.class.st
@@ -3,28 +3,47 @@ Implementation of t-SNE (t-Distributed Stochastic Neighbor Embedding) algorithm
 
 https://lvdmaaten.github.io/tsne/
 
-t-SNE is a technique for dimensionality reduction that is particularly well suited for the visualization of high-dimensional datasets. 
+t-SNE is a technique for dimensionality reduction that is particularly well suited for the visualization of high-dimensional datasets.
 "
 Class {
 	#name : #PMTSNE,
 	#superclass : #Object,
 	#instVars : [
-		'ndims',
-		'initialdims',
+		'outputDims',
+		'initialDims',
 		'perplexity',
 		'x',
 		'y',
 		'maxIter',
-		'epsilon',
 		'sumY',
 		'initialMomentum',
 		'finalMomentum',
-		'eta',
-		'minGain'
+		'learningRate',
+		'minGain',
+		'job',
+		'computeErrorEvery'
 	],
 	#category : #'Math-TSNE'
 }
 
+{ #category : #running }
+PMTSNE class >> entropyOf: distanceVector andPRow: pVector withBeta: beta [
+	"Calculates gaussian kernel values for a distanceVector, along with perplexity
+	 Inputs: distanceVector - a PMVector containing distances
+				pRow - calculated p-rows are stored here
+				beta - a Float, precision which is used to compute entropy
+	 Outputs: entropy - log(Shannon's entropy) for calculated pVector
+				 pVector - The conditional probability pji
+	"
+
+	| pVectorTemp sumP entropy |
+	pVectorTemp := (-1 * distanceVector * beta) exp.
+	sumP := pVectorTemp sum max: (Float epsilon).
+	entropy := sumP ln + (beta * (distanceVector * pVectorTemp) / sumP).
+	pVector copyFrom: (pVectorTemp / sumP).
+	^ entropy
+]
+
 { #category : #examples }
 PMTSNE class >> example1 [
 	| points |
@@ -33,7 +52,6 @@ PMTSNE class >> example1 [
 		perplexity: 10;
 		x: points;
 		initialDims: 2;
-		epsilon: 5;
 		start;
 		y
 ]
@@ -52,88 +70,296 @@ PMTSNE class >> gridDataGeneratorOf: size [
 	^ PMMatrix rows: array
 ]
 
+{ #category : #accessing }
+PMTSNE >> computeErrorEvery [
+	^ computeErrorEvery 
+]
+
+{ #category : #accessing }
+PMTSNE >> computeErrorEvery: aNumber [
+	computeErrorEvery := aNumber
+]
+
+{ #category : #accessing }
+PMTSNE >> computeErrorEveryDefaultValue [
+	^ 10
+]
+
+{ #category : #'stepping and presenter' }
+PMTSNE >> computeGradient: p withProgressUpdate: iteration [
+	"Computes gradient of KL divergence"
+
+	| num sumNum q pq dY tmp yiDiff error |
+	"Calculates num and q"
+	num := self computeLowDimensionalStudentT.
+	sumNum := num sum sum max: (Float epsilon).
+	q := num collect: [:element |
+				(element / sumNum) max: (Float epsilon)
+			].
+	pq := p - q.
+	dY := OrderedCollection new. 
+	1 to: (x dimension x) do: [ :i |
+		tmp := (pq rowAt: i) hadamardProduct: (num rowAt: i).
+		"Create a matrix of rows filled with 'tmp'"
+		tmp := (PMMatrix rows: ((1 to: outputDims) collect: [ :j | tmp])).
+		yiDiff := (PMMatrix rows: (y rowsCollect: [ :row | (y rowAt: i) - row ])) transpose.
+		dY add: (tmp hadamardProduct: yiDiff) sum
+		 ].
+	dY := PMMatrix rows: dY.
+	
+	(iteration % computeErrorEvery = 0) ifTrue: [
+		error := PMMatrix rows: (x dimension x) columns: (x dimension x).
+		1 to: (x dimension x) do: [ :i |
+			1 to: (x dimension x) do: [ :j |
+				error rowAt: i columnAt: j put: (
+					(p rowAt: i columnAt: j) * (((p rowAt: i columnAt: j) / (q rowAt: i columnAt: j)) ln)
+				).
+				].
+			].
+		error := error sum sum.
+		job title: ('' join: { 'Step 3/3: Performing gradient descent '. iteration. '/'. maxIter.' error = '. error }).
+		job progress: (iteration/maxIter).
+		].
+
+	^ dY
+]
+
+{ #category : #running }
+PMTSNE >> computeLowDimensionalAffinities [
+	"Computes affinity of the reduced dimension/output"
+
+	| num sumNum q |
+	num := self computeLowDimensionalStudentT.
+	sumNum := num sum sum max: (Float epsilon).
+	q := num collect: [:element |
+				(element / sumNum) max: (Float epsilon)
+			].
+	^ q
+]
+
+{ #category : #running }
+PMTSNE >> computeLowDimensionalStudentT [
+	"Computes Student's T distribution with 1-degree of freedom for y"
+
+	| num tmp |
+	sumY :=  (y hadamardProduct: y) sum.
+	tmp := ((y* (y transpose)) * (-2)).
+	tmp := PMMatrix rows: (tmp rowsCollect: [ :each| each + sumY]).
+	tmp := PMMatrix rows: ((tmp transpose) rowsCollect: [:each| each + sumY]).
+	num := (1 + tmp) collect: [ :ele | 1.0 / ele ].
+	num := num setDiagonal: (PMVector zeros: (x dimension x)).
+	^ num
+]
+
 { #category : #accessing }
 PMTSNE >> computePValues [
-"
-#Compute P-values
-	P = x2p(X, 1e-5, perplexity);
-	P = P + Math.transpose(P);
-	P = P / Math.sum(P);
-	P = P * 4;									# early exaggeration
-	P = Math.maximum(P, 1e-12);
-"
-| p n |
-p := self x2p.
-p := p + p transpose.
-p := p / (p sum).
-p := p *4.
-p := p max: 1e-12.
-^ p
+	"Computes joint probablity matrix P"
+	| p sumP |
+	p := self computePairwiseAffinities.
+	p := p + p transpose.
+	sumP := p sum sum.
+	p := p collect: [ :element |
+		"4 is for early exaggeration, will be removed after 100 iterations"
+		(element / sumP * 4) asFloat max: (Float epsilon).
+		 ].
+	^ p
 ]
 
-{ #category : #'as yet unclassified' }
+{ #category : #running }
 PMTSNE >> computePairwiseAffinities [
-"
-	sum_Y = np.sum(np.square(Y), 1);
-		num = 1 / (1 + Math.add(Math.add(-2 * Math.dot(Y, Y.T), sum_Y).T, sum_Y));
-		num[range(n), range(n)] = 0;
-		Q = num / Math.sum(num);
-		Q = Math.maximum(Q, 1e-12);"
-		|num tmp|
-	sumY :=  (x dot:x) sum. "PMVector"
-	tmp := ((y* (y transpose)) * (-2)) transpose.
-	num := ((PMMatrix rows: (tmp rowsCollect: [ :each| each + sumY])) ).
+	"Computes a similarity matrix by making sure each Gaussian has same perplexity.
+	It identifies required precision (beta = (1/ variance**2)) for each row using a
+	binary search. The precision is selected based on input perplexity.
+	"
+	
+	| p d beta logU n betaMin betaMax distanceVector pVector entropy tries entropyDiff |
+	n := x numberOfRows.
+	d := self computePairwiseDistances.
+	p := PMMatrix zerosRows: n cols: n.
+	beta := PMVector ones: n.
+	logU := self perplexity ln.
+	distanceVector := PMVector new: n - 1.
+	pVector := PMVector new: n - 1.
 	
+	job title: 'Step 2/3: Computing joint probablity for point 1 of ', n asString.
+	job progress: 0.0.
 	
+	1 to: n do: [ :i |
+		"Job progress gets updated every 10 rows"
+		(i % 10 = 0) ifTrue: [
+			job title: ('' join: {'Step 2/3: Computing joint probablity for point '. i asString. ' of '. n asString}).
+			job progress: (i/n).
+		 ].
 		
-	
+		"Set minimum and maximum value of precision"
+		betaMin := Float infinity negated.
+		betaMax := Float infinity.
+		
+		"Ignore i-th element of the row d[i] and copy rest in distanceVector.
+		 Also initialize pVector to 0"
+		1 to: n do: [ :index |
+			(index = i) ifFalse: [ 
+				(index < i)
+					ifTrue: [ distanceVector at: index put: (d rowAt: i columnAt: index).
+								 pVector at: index put: 0. ]
+					ifFalse: [ distanceVector at: (index - 1) put: (d rowAt: i columnAt: index).
+								  pVector at: (index - 1) put: 0.].
+				 ].
+			].
+		entropy := self class entropyOf: distanceVector andPRow: pVector withBeta: (beta at: i).
+		entropyDiff := entropy - logU.
+		tries := 0.
+		[ (entropyDiff abs > 1e-5) & (tries < 50)]	whileTrue: [ 
+			(entropyDiff > 0)
+				ifTrue: [ 
+					betaMin := beta at: i.
+					((betaMax = Float infinity) | (betaMin = Float infinity negated))
+						ifTrue: [ beta at: i put: ((beta at: i) * 2) ]
+						ifFalse: [ beta at: i put: (((beta at: i) + betaMax) / 2)
+						].
+					 ]
+				ifFalse: [ 
+					betaMax := beta at: i.
+					((betaMax = Float infinity) | (betaMin = Float infinity negated))
+						ifTrue: [ beta at: i put: ((beta at: i) / 2) ]
+						ifFalse: [ beta at: i put: (((beta at: i) + betaMin) / 2)
+						].
+					].
+				entropy := self class entropyOf: distanceVector andPRow: pVector withBeta: (beta at: i).
+				entropyDiff := entropy - logU.
+				tries := tries + 1.
+		 	].
+		1 to: n do: [ :index |
+			(index = i) ifFalse: [
+				(index < i)
+					ifTrue: [ p rowAt: i columnAt: index put: (pVector at: index) ]
+					ifFalse: [ p rowAt: i columnAt: index put: (pVector at: index - 1) ].
+				].
+			 ].
+		 ].
+	^ p
 ]
 
-{ #category : #'as yet unclassified' }
+{ #category : #running }
 PMTSNE >> computePairwiseDistances [
 	| sumX d tmp|
-	sumX := (x dot: x) sum.
+	sumX := (x hadamardProduct: x) sum.
 	tmp := (x * (x transpose)) * (-2).
 	tmp := PMMatrix rows: (tmp rowsCollect: [ :each| each + sumX ]).
 	d := PMMatrix rows: ((tmp transpose) rowsCollect: [:each| each + sumX]).
 	^ d
 ]
 
+{ #category : #accessing }
+PMTSNE >> finalMomentum [
+	^ finalMomentum
+]
+
+{ #category : #accessing }
+PMTSNE >> finalMomentum: aFloat [
+	finalMomentum := aFloat
+]
+
+{ #category : #accessing }
+PMTSNE >> finalMomentumDefaultValue [
+	^ 0.8
+]
+
 { #category : #running }
-PMTSNE >> epsilon: aFloat [
-	epsilon := aFloat
+PMTSNE >> gradientDescent [
+	"Tries to minimize the cost, which is KL divergence"
+
+	| p gains iY momentum dY yMeanAccumulator |
+	job title: 'Step 3/3: Performing gradient descent'.
+	p := self computePValues.
+	gains := PMMatrix onesRows: x dimension x cols: outputDims.
+	iY := PMMatrix zerosRows: x dimension x cols: outputDims.
+	momentum := initialMomentum.
+	1 to: maxIter do: [ :iteration | 
+		dY := self computeGradient: p withProgressUpdate: iteration.
+		momentum := iteration < 20
+			ifTrue: [ initialMomentum ]
+			ifFalse: [ finalMomentum ].
+		1 to: (x dimension x) do: [ :i |
+			1 to: outputDims do: [ :j |
+				((dY rowAt: i columnAt: j) > 0) = ((iY rowAt: i columnAt: j) > 0)
+					ifTrue: [ gains rowAt: i columnAt: j put: (((gains rowAt: i columnAt: j) * 0.8) max: minGain) ]
+					ifFalse: [ gains rowAt: i columnAt: j put: (gains rowAt: i columnAt: j) + 0.2 ].
+				]
+			].
+		iY := iY * momentum - ((dY hadamardProduct: gains) * learningRate).
+		y := y + iY.
+		yMeanAccumulator := PMVectorAccumulator new: outputDims.
+		y rowsDo: [ :row |
+			yMeanAccumulator accumulate: row.
+			].
+		y := PMMatrix rows: (y rowsCollect: [ :row |
+			row - (yMeanAccumulator average)
+			]).
+		"Stop exaggeration"
+		(iteration = 100) ifTrue: [ p := p * (1/4) ].
+		].
 ]
 
-{ #category : #'as yet unclassified' }
+{ #category : #accessing }
 PMTSNE >> initialDims [
-	^initialdims 
+	^ initialDims 
 ]
 
-{ #category : #'as yet unclassified' }
+{ #category : #accessing }
 PMTSNE >> initialDims: aFloat [
-	initialdims := aFloat
+	initialDims := aFloat
 ]
 
-{ #category : #'as yet unclassified' }
+{ #category : #accessing }
 PMTSNE >> initialDimsDefaultValue [
 	^ 50
 ]
 
+{ #category : #accessing }
+PMTSNE >> initialMomentum [
+	^ initialMomentum
+]
+
+{ #category : #accessing }
+PMTSNE >> initialMomentum: aFloat [
+	initialMomentum := aFloat
+]
+
+{ #category : #accessing }
+PMTSNE >> initialMomentumDefaultValue [
+	^ 0.5
+]
+
 { #category : #initialization }
 PMTSNE >> initialize [
-	maxIter := 1000.
-	initialMomentum := 0.5.
-	finalMomentum := 0.8.
-	eta := 500.
-	minGain := 0.01.
+	"These parameters rarely need to be modified"
+	maxIter := self maxIterDefaultValue.
+	initialMomentum := self initialMomentumDefaultValue.
+	finalMomentum := self finalMomentumDefaultValue.
+	learningRate := self learningRateDefaultValue.
+	minGain := self minGainDefaultValue.
+	computeErrorEvery := self computeErrorEveryDefaultValue.
+	self initializeJob.
 	
 ]
 
+{ #category : #initialization }
+PMTSNE >> initializeJob [
+	"This job represents all the steps in t-SNE"
+	job := [ 
+		self initializeUninitializedParameters.
+		self reduceXToInputDims.
+		self initializeYWithRandomValues.
+		self gradientDescent.
+	 ] asJob.
+]
+
 { #category : #initialization }
 PMTSNE >> initializeUninitializedParameters [
 	perplexity ifNil: [ perplexity := self perplexityDefaultValue ].
-	ndims ifNil: [ ndims := self ndimsDefaultValue ].
-	initialdims ifNil: [ initialdims := self initialDimsDefaultValue ]
+	outputDims ifNil: [ outputDims := self outputDimsDefaultValue ].
+	initialDims ifNil: [ initialDims := self initialDimsDefaultValue ]
 ]
 
 { #category : #initialization }
@@ -143,7 +369,7 @@ PMTSNE >> initializeYWithRandomValues [
 
 	| a b rows columns d |
 	rows := x dimension x.
-	columns := ndims.
+	columns := outputDims.
 	d := PMNormalDistribution new:0 sigma: 1.
 	a := (1 to: rows)
 		collect: [ :row | 
@@ -154,12 +380,62 @@ PMTSNE >> initializeYWithRandomValues [
 ]
 
 { #category : #accessing }
-PMTSNE >> ndims [
-	^ ndims
+PMTSNE >> learningRate [
+	^ learningRate 
 ]
 
-{ #category : #'as yet unclassified' }
-PMTSNE >> ndimsDefaultValue [
+{ #category : #accessing }
+PMTSNE >> learningRate: aNumber [
+	learningRate := aNumber
+]
+
+{ #category : #accessing }
+PMTSNE >> learningRateDefaultValue [
+	^ 500
+]
+
+{ #category : #accessing }
+PMTSNE >> maxIter [
+	^ maxIter
+]
+
+{ #category : #accessing }
+PMTSNE >> maxIter: aNumber [
+	maxIter := aNumber
+]
+
+{ #category : #accessing }
+PMTSNE >> maxIterDefaultValue [
+	^ 1000
+]
+
+{ #category : #accessing }
+PMTSNE >> minGain [
+	^ minGain
+]
+
+{ #category : #accessing }
+PMTSNE >> minGain: aFloat [
+	minGain := aFloat
+]
+
+{ #category : #accessing }
+PMTSNE >> minGainDefaultValue [
+	^ 0.01
+]
+
+{ #category : #accessing }
+PMTSNE >> outputDims [
+	^ outputDims
+]
+
+{ #category : #accessing }
+PMTSNE >> outputDims: anInteger [
+	outputDims := anInteger
+]
+
+{ #category : #accessing }
+PMTSNE >> outputDimsDefaultValue [
 	^ 2
 ]
 
@@ -173,58 +449,38 @@ PMTSNE >> perplexity: aFloat [
 	perplexity := aFloat
 ]
 
-{ #category : #'as yet unclassified' }
+{ #category : #accessing }
 PMTSNE >> perplexityDefaultValue [
 	^ 30.0
 ]
 
 { #category : #running }
-PMTSNE >> runPcaOnX [
-	"Runs PCA on X in order to reduce its dimensionality to initialdims dimensions.
-
-	print ""Preprocessing the data using PCA...""
-	(n, d) = X.shape;
-	X = X - Math.tile(Math.mean(X, 0), (n, 1));
-	(l, M) = Math.linalg.eig(Math.dot(X.T, X));
-	Y = Math.dot(X, M[:,0:no_dims]);
-	return Y;
-"
+PMTSNE >> reduceXToInputDims [
+	"Runs PCA on X in order to reduce its dimensionality to initialDims dimensions."
 
-	"| analyzer i |
-	analyzer := PMPrincipalComponentAnalyser new: initialdims.
-	i := 1.
-	x dimension x
-		timesRepeat: [ analyzer accumulate: (x rowAt: i).
-			i := i + 1 ].
-	^ analyzer components"
+	self reduceXToInputDimsUsing: PMPrincipalComponentAnalyserJacobiTransformation.
 ]
 
-{ #category : #accessing }
-PMTSNE >> start [
-	self initializeUninitializedParameters.
-	self runPcaOnX.
-	self initializeYWithRandomValues.
-	self x2p
+{ #category : #running }
+PMTSNE >> reduceXToInputDimsUsing: aClass [
+	"Runs aClass PCA on X in order to reduce its dimensionality to initialDims dimensions."
+
+	| scaler pca |
+	job title: 'Step 1/3: Reducing input dimensions.'.
+	scaler := PMStandardizationScaler new.
+	initialDims ifNil: [ initialDims := self initialDimsDefaultValue ].
+	pca := aClass new componentsNumber: (initialDims min: x dimension y).
+	x := pca fitAndTransform: (scaler fitAndTransform: x)
 ]
 
-{ #category : #'stepping and presenter' }
-PMTSNE >> step [
-	self computePairwiseAffinities
+{ #category : #running }
+PMTSNE >> start [
+	job run.
 ]
 
-{ #category : #'as yet unclassified' }
-PMTSNE >> x2p [
-	| p d beta logU n betaMin betaMax|
-	n := x numberOfRows.
-	d := self computePairwiseDistances.
-	p := PMMatrix zerosRows: n cols: n.
-	beta := PMMatrix onesRows: n cols: 1.
-	logU := self perplexity log.
-	n timesRepeat: [
-		betaMin := Float infinity.
-		betaMax := Float infinity negated.
-		
-		 ]
+{ #category : #accessing }
+PMTSNE >> x [
+	^ x
 ]
 
 { #category : #accessing }
@@ -236,3 +492,8 @@ PMTSNE >> x: aPMMatrix [
 PMTSNE >> y [
 	^ y
 ]
+
+{ #category : #accessing }
+PMTSNE >> y: aNumber [
+	y:= aNumber
+]
diff --git a/src/Math-Tests-TSNE/PMTSNETest.class.st b/src/Math-Tests-TSNE/PMTSNETest.class.st
index c76c1cdd3..322b5cc15 100644
--- a/src/Math-Tests-TSNE/PMTSNETest.class.st
+++ b/src/Math-Tests-TSNE/PMTSNETest.class.st
@@ -1,9 +1,47 @@
+"
+I test the method of the class PMTSNE.
+"
 Class {
 	#name : #PMTSNETest,
 	#superclass : #TestCase,
 	#category : #'Math-Tests-TSNE'
 }
 
+{ #category : #tests }
+PMTSNETest >> testComputePValues [
+	| t |
+	
+	t := (PMTSNE new)
+		x: (PMMatrix rows: #(#(1 2 3) #(4 5 6) #(7 8 9)));
+		perplexity: 30.0.
+	self assert: t computePValues closeTo: (PMMatrix rows: {{0. 2/3. 2/3.}. {2/3. 0. 2/3}. {2/3. 2/3. 0.}})
+]
+
+{ #category : #tests }
+PMTSNETest >> testComputePairwiseAffinities [
+	| t correctAffinities |
+	
+	t := (PMTSNE new)
+		x: (PMMatrix rows: #(#(0 0) #(2 2) #(2 0)));
+		perplexity: 30.0.
+	correctAffinities := PMMatrix rows: #(
+		#(0 0.5 0.5)
+		#(0.5 0 0.5)
+		#(0.5 0.5 0)
+		).
+	self assert: t computePairwiseAffinities closeTo: correctAffinities.
+	
+	"This test case has large pairwise distances, which makes exp(-D*beta) = 0"
+	t x: (PMMatrix rows: #(#(1 2 3.14) #(0 4 -43) #(-5 -9 -150) #(120 5 1))).
+	correctAffinities := PMMatrix rows: #(
+		#(0 0.333333 0.333333 0.333333)
+		#(0.333333 0 0.333333 0.333333)
+		#(0.333333 0.333333 0 0.333333)
+		#(0.333333 0.333333 0.333333 0)
+		).
+	self assert: t computePairwiseAffinities closeTo: correctAffinities.
+]
+
 { #category : #tests }
 PMTSNETest >> testComputePairwiseDistances [
 	| t |
@@ -13,6 +51,18 @@ PMTSNETest >> testComputePairwiseDistances [
 	self assert: t computePairwiseDistances equals: (PMMatrix rows: #(#(0 8) #(8 0)))
 ]
 
+{ #category : #tests }
+PMTSNETest >> testEntropyOfAndPRowWithBeta [
+	| distanceVector pVector entropy |
+	"Input points are (0, 0), (2, 2) and (2, 0)"
+	distanceVector := #(0 8 4) asPMVector.
+	pVector := PMVector new: (distanceVector size).	
+	entropy := PMTSNE entropyOf: distanceVector andPRow: pVector withBeta: 2.0.
+	self assert: entropy closeTo: 0.003020119571023052.
+	self assert: pVector closeTo: { 0.99966454 . 0.00000011 . 0.00033535 } asPMVector.
+	
+]
+
 { #category : #tests }
 PMTSNETest >> testInitialDimsSetByDefaultWithFifty [
 	| t |
@@ -22,3 +72,12 @@ PMTSNETest >> testInitialDimsSetByDefaultWithFifty [
 		start.
 	self assert: t initialDims equals: 50
 ]
+
+{ #category : #tests }
+PMTSNETest >> testreduceXToInputDimsUsing [
+	| t |
+	t := (PMTSNE new)
+			x: (PMMatrix rows: #(#(0 0) #(2 2) #(2 0))).
+	t reduceXToInputDimsUsing: PMPrincipalComponentAnalyserJacobiTransformation.
+	self assert: (t x) closeTo: (PMMatrix rows: #(#(-0.5 -1.5) #(-0.5 1.5) #(1 0))).
+]