tsne/0000755000176200001440000000000015156722772011241 5ustar liggesuserstsne/MD50000644000176200001440000000050415156722772011550 0ustar liggesusersd1168a687983a7b6b9b54e1d8a59f6c9 *DESCRIPTION d65aaf552fc71192a426c3b41f3b0c48 *NAMESPACE 5471f3ef5214bec2300bc2a10b65c88e *NEWS 71333231bc449618cda267caa0e0f019 *R/tsne-internal.R 70aacb37a3268708b21da542ac77b96d *R/tsne.R bb49103a06187a42f001e2671f2072f7 *man/tsne-package.Rd b085bd2cdbbc2f88aab4172f4312be83 *man/tsne.Rd tsne/R/0000755000176200001440000000000015156620050011424 5ustar liggesuserstsne/R/tsne-internal.R0000644000176200001440000000247415156620047014347 0ustar liggesusers.Hbeta <- function(D, beta){ P = exp(-D * beta) sumP = sum(P) if (sumP == 0){ H = 0 P = D * 0 } else { H = log(sumP) + beta * sum(D %*% P) /sumP P = P/sumP } r = {} r$H = H r$P = P r } .x2p <- function(X,perplexity = 15,tol = 1e-5){ if (inherits(X, 'dist')) { D = X n = attr(D,'Size') } else{ D = dist(X) n = attr(D,'Size') } D = as.matrix(D) P = matrix(0, n, n ) beta = rep(1, n) logU = log(perplexity) for (i in 1:n){ betamin = -Inf betamax = Inf Di = D[i, -i] hbeta = .Hbeta(Di, beta[i]) H = hbeta$H; thisP = hbeta$P Hdiff = H - logU; tries = 0; while(abs(Hdiff) > tol && tries < 50){ if (Hdiff > 0){ betamin = beta[i] if (is.infinite(betamax)) beta[i] = beta[i] * 2 else beta[i] = (beta[i] + betamax)/2 } else{ betamax = beta[i] if (is.infinite(betamin)) beta[i] = beta[i]/ 2 else beta[i] = ( beta[i] + betamin) / 2 } hbeta = .Hbeta(Di, beta[i]) H = hbeta$H thisP = hbeta$P Hdiff = H - logU tries = tries + 1 } P[i,-i] = thisP } r = {} r$P = P r$beta = beta sigma = sqrt(1/beta) message('sigma summary: ', paste(names(summary(sigma)),':',summary(sigma),'|',collapse='')) r } .pca <- function(X, n.comp=ncol(X)) { X = scale(X, scale = FALSE) e = eigen(t(X) %*% X) X %*% Re(e$vectors[, 1:n.comp]) } tsne/R/tsne.R0000644000176200001440000000416315156620050012524 0ustar liggesuserstsne <- function(X, initial_config = NULL, k=2, initial_dims=30, perplexity=30, max_iter = 1000, min_cost=0, epoch_callback=NULL,whiten=TRUE, epoch=100 ){ if (inherits(X, 'dist')) { n = attr(X,'Size') } else { X = as.matrix(X) X = X - min(X) X = X/max(X) initial_dims = min(initial_dims,ncol(X)) if (whiten) X<-.pca(as.matrix(X),n.comp=initial_dims) n = nrow(X) } momentum = .5 final_momentum = .8 mom_switch_iter = 20 epsilon = 500 min_gain = .01 initial_P_gain = 4 eps = 2^(-52) # typical machine precision if (!is.null(initial_config) && is.matrix(initial_config)) { if (nrow(initial_config) != n | ncol(initial_config) != k){ stop('initial_config argument does not match necessary configuration for X') } ydata = initial_config initial_P_gain = 1 } else { ydata = matrix(rnorm(k * n),n) } P = .x2p(X,perplexity, 1e-5)$P P = .5 * (P + t(P)) P[P < eps]<-eps P = P/sum(P) P = P * initial_P_gain grads = matrix(0,nrow(ydata),ncol(ydata)) incs = matrix(0,nrow(ydata),ncol(ydata)) gains = matrix(1,nrow(ydata),ncol(ydata)) for (iter in 1:max_iter){ if (iter %% epoch == 0) { # epoch cost = sum(apply(P * log((P+eps)/(Q+eps)),1,sum)) message("Epoch: Iteration #",iter," error is: ",cost) if (cost < min_cost) break if (!is.null(epoch_callback)) epoch_callback(ydata) } sum_ydata = apply(ydata^2, 1, sum) num = 1/(1 + sum_ydata + sweep(-2 * ydata %*% t(ydata),2, -t(sum_ydata))) diag(num)=0 Q = num / sum(num) if (any(is.nan(num))) message ('NaN in grad. descent') Q[Q < eps] = eps stiffnesses = 4 * (P-Q) * num sum_stiffness = colSums(stiffnesses) for (i in 1:n){ stiffnesses[i, i] = stiffnesses[i, i] - sum_stiffness[i] } grads = -4 * (stiffnesses %*% ydata) gains = ((gains + .2) * abs(sign(grads) != sign(incs)) + gains * .8 * abs(sign(grads) == sign(incs))) gains[gains < min_gain] = min_gain incs = momentum * incs - epsilon * (gains * grads) ydata = ydata + incs ydata = sweep(ydata,2,apply(ydata,2,mean)) if (iter == mom_switch_iter) momentum = final_momentum if (iter == 100 && is.null(initial_config)) P = P/4 } ydata } tsne/NEWS0000644000176200001440000000110015156620054011716 0ustar liggesusers* version 0.1 - Initial release. main tsne function included. * version 0.1-1 - fixed misc. documentation omissions. * version 0.1-2 - using an initial configuration parameter automatically places the tsne in the 'late stage' mode, which is mainly focused on small scale adjustments. * version 0.1-3 - fix a broken gain equation, and made distance matrix checks more reliable * version 0.2-0 - align preprocessing and optimization with reference implementation: replace whitening with standard PCA, change momentum switch to iteration 20, use vectorized gradient computation tsne/NAMESPACE0000644000176200001440000000010415156616347012453 0ustar liggesusersexportPattern("^[[:alpha:]]+") importFrom("stats", "dist", "rnorm") tsne/man/0000755000176200001440000000000015156616347012014 5ustar liggesuserstsne/man/tsne-package.Rd0000644000176200001440000000215515156616347014650 0ustar liggesusers\name{tsne-package} \Rdversion{1.1} \alias{tsne-package} \docType{package} \title{The tsne-package for multidimensional scaling} \description{ This package contains one function called \link[tsne]{tsne} which contains all the functionality. } \details{ \tabular{ll}{ Package: \tab tsne\cr Type: \tab Package\cr Version: \tab 0.1\cr Date: \tab 2010-02-19\cr License: \tab GPL\cr LazyLoad: \tab yes\cr } } \author{ Justin Donaldson https://github.com/jdonaldson/rtsne Maintainer: Justin Donaldson (jdonaldson@gmail.com) } \references{ L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. \emph{Journal of Machine Learning Research} 9 (Nov) : 2579-2605, 2008. L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In \emph{Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics} (AISTATS), JMLR W&CP 5:384-391, 2009. } \keyword{ package } % \seealso{ % ~~ Optional links to other man pages, e.g. ~~ % ~~ \code{\link[:-package]{}} ~~ % } % \examples{ % ~~ simple examples of the most important functions ~~ % } tsne/man/tsne.Rd0000644000176200001440000000601515156616347013256 0ustar liggesusers\name{tsne} \Rdversion{1.1} \alias{tsne} %- Also NEED an '\alias' for EACH other topic documented here. \title{ The t-SNE method for dimensionality reduction } \description{ Provides a simple function interface for specifying t-SNE dimensionality reduction on R matrices or "dist" objects. } \usage{ tsne(X, initial_config = NULL, k = 2, initial_dims = 30, perplexity = 30, max_iter = 1000, min_cost = 0, epoch_callback = NULL, whiten = TRUE, epoch=100) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{ The R matrix or "dist" object } \item{initial_config}{ an argument providing a matrix specifying the initial embedding for X. See Details. } \item{k}{ the dimension of the resulting embedding. } \item{initial_dims}{ The number of dimensions to use in reduction method. } \item{perplexity}{ Perplexity parameter. (optimal number of neighbors) } \item{max_iter}{ Maximum number of iterations to perform. } \item{min_cost}{ The minimum cost value (error) to halt iteration. } \item{epoch_callback}{ A callback function used after each epoch (an epoch here means a set number of iterations) } \item{whiten}{ A boolean value indicating whether the matrix data should be whitened. } \item{epoch}{ The number of iterations in between update messages. } } %%\details{ %% ~~ If necessary, more details than the description above ~~ %%} \value{ An R object containing a \emph{ydata} embedding matrix, as well as a the matrix of probabilities \emph{P} } \details{ When the initial_config argument is specified, the algorithm will automatically enter the \emph{final momentum} stage. This stage has less large scale adjustment to the embedding, and is intended for small scale tweaking of positioning. This can greatly speed up the generation of embeddings for various similar X datasets, while also preserving overall embedding orientation. } \references{ L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. \emph{Journal of Machine Learning Research} 9 (Nov) : 2579-2605, 2008. L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In \emph{Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics} (AISTATS), JMLR W&CP 5:384-391, 2009. } \author{ Justin Donaldson (jdonaldson@gmail.com) } %%\note{ %% ~~further notes~~ %%} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \link{dist} } \examples{\dontrun{ colors = rainbow(length(unique(iris$Species))) names(colors) = unique(iris$Species) ecb = function(x,y){ plot(x,t='n'); text(x,labels=iris$Species, col=colors[iris$Species]) } tsne_iris = tsne(iris[,1:4], epoch_callback = ecb, perplexity=50) # compare to PCA dev.new() pca_iris = princomp(iris[,1:4])$scores[,1:2] plot(pca_iris, t='n') text(pca_iris, labels=iris$Species,col=colors[iris$Species]) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. % \keyword{ ~kwd1 } % \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line tsne/DESCRIPTION0000644000176200001440000000126615156722772012754 0ustar liggesusersPackage: tsne Type: Package Title: T-Distributed Stochastic Neighbor Embedding for R (t-SNE) Version: 0.2-0 Date: 2026-03-18 Authors@R: person(given = "Justin", family = "Donaldson", role = c("aut", "cre"), email = "jdonaldson@gmail.com") Description: A "pure R" implementation of the t-SNE algorithm. License: GPL LazyLoad: yes NeedsCompilation: no URL: https://github.com/jdonaldson/rtsne/ BugReports: https://github.com/jdonaldson/rtsne/issues Packaged: 2026-03-18 21:59:14 UTC; jdonaldson Author: Justin Donaldson [aut, cre] Maintainer: Justin Donaldson Repository: CRAN Date/Publication: 2026-03-19 07:30:02 UTC