fingerprint/0000755000176200001440000000000015164116553012610 5ustar liggesusersfingerprint/tests/0000755000176200001440000000000015163764461013760 5ustar liggesusersfingerprint/tests/doRUnit.R0000644000176200001440000000400015061401251015437 0ustar liggesusersif(require("RUnit", quietly=TRUE)) { ## --- Setup --- pkg <- "fingerprint" # <-- Change to package name! if(Sys.getenv("RCMDCHECK") == "FALSE") { ## Path to unit tests for standalone running under Makefile (not R CMD check) ## PKG/tests/../inst/unitTests path <- file.path(getwd(), "..", "inst", "unitTests") } else { ## Path to unit tests for R CMD check ## PKG.Rcheck/tests/../PKG/unitTests path <- system.file(package=pkg, "unitTests") } cat("\nRunning unit tests\n") print(list(pkg=pkg, getwd=getwd(), pathToUnitTests=path)) library(package=pkg, character.only=TRUE) ## If desired, load the name space to allow testing of private functions ## if (is.element(pkg, loadedNamespaces())) ## attach(loadNamespace(pkg), name=paste("namespace", pkg, sep=":"), pos=3) ## ## or simply call PKG:::myPrivateFunction() in tests ## --- Testing --- ## Define tests testSuite <- defineTestSuite(name=paste(pkg, "fingerprint Unit Tests"), dirs=path) ## Run tests <- runTestSuite(testSuite) ## Default report name pathReport <- file.path(path, "report") ## Report to stdout and text files cat("------------------- UNIT TEST SUMMARY ---------------------\n\n") printTextProtocol(tests, showDetails=FALSE) #printTextProtocol(tests, showDetails=FALSE, # fileName=paste(pathReport, "Summary.txt", sep="")) #printTextProtocol(tests, showDetails=TRUE, # fileName=paste(pathReport, ".txt", sep="")) ## Report to HTML file #printHTMLProtocol(tests, fileName=paste(pathReport, ".html", sep="")) ## Return stop() to cause R CMD check stop in case of ## - failures i.e. FALSE to unit tests or ## - errors i.e. R errors tmp <- getErrors(tests) if(tmp$nFail > 0 | tmp$nErr > 0) { stop(paste("\n\nunit testing failed (#test failures: ", tmp$nFail, ", #R errors: ", tmp$nErr, ")\n\n", sep="")) } } else { warning("cannot run unit tests -- package RUnit is not available") } fingerprint/INDEX0000644000176200001440000000270415061401251013371 0ustar liggesusers! Logical Operators for Fingerprints as.character Generates a String Representation of a Fingerprint balance Generate a Balanced Code Fingerprint bit.importance Evaluate the Discriminatory Power of Individual Bits in a Binary Fingerprint bit.spectrum Generate a Bit Spectrum from a List of Fingerprints cdk.lf Functions to parse lines from fingerprint files distance Calculates the Similarity or Dissimilarity Between Two Fingerprints euc.vector Euclidean Representation of Binary Fingerprints featvec-class Class "featvec" featvec.to.binaryfp Convert a Set of Feature Fingerprints to Binary Fingerprints fingerprint-class Class "fingerpint" fold Fold a fingerprint fp.factor.matrix Converts a List of Fingerprints to a data.frame of Factors fp.read Functions to Read Fingerprints From Files fp.sim.matrix Calculates a Similarity Matrix for a Set of Fingerprints fp.to.matrix Converts a List of Fingerprints to a Matrix length Fingerprint Bit Length random.fingerprint Generate Randomized Fingerprints show,fingerprint-method String Representation of a Fingerprint fingerprint/MD50000644000176200001440000000435215164116553013124 0ustar liggesusersaa63c800ee74199d6ec4219e2803a9da *ChangeLog 5d92229163efbdefe9c7b2fec0d460bb *DESCRIPTION 273a1345a5199deee0497cc556812bde *INDEX 86d3f3a95c324c9479bd8986968f4327 *LICENSE 836321b1e72589b01e942d88414a1401 *NAMESPACE f28cc7591c8d64e99d90d9f91383a1c6 *R/balance.R 2c642bc1f05561df93d2c76a9fae1748 *R/bitimp.R c7c9a1eb689e0e64018a60088d1d0589 *R/bitspec.R 702410c34c11fa12b05cda292e710a5b *R/feature.R c4e0d4b459b8de6a1a982b6cc873d964 *R/featurefp.R a1afff02d8d8eac2e9f961d5863832d1 *R/fingerprint.R 654f051f7cffd07f87bab08eef19dd14 *R/matrix.R d9de2ae0a85c4931e11176c3dc33899c *R/misc.R 8972e18553803e569bf2eb11d586f7af *R/ops.R 7cbe42dbff870aedf3bff372cd5f0b82 *R/read.R fe6078d069c08a91d845aaf6fafc073f *R/zzz.R 334f0de6ed55dc79f59addf091097353 *inst/unitTests/Makefile f22df9f3397f61d3e9679ad049e14c83 *inst/unitTests/bits.fps c7b11a520d31f1e0a0a38ccaee75b0f1 *inst/unitTests/runit.fp.R c7fc4f4bf4a1061b4d609cf20c3b0f34 *inst/unitTests/test.ecfp 0efea50e6b9d5435b4b9704d4391cacd *man/balance.Rd d76542452b5b085238f6708e4c24593c *man/bitimp.Rd 2724c6a774f5ff7745f8278e50d447ca *man/bitspec.Rd b122ce42392d47066017465710d513f7 *man/c.Rd 9807f646ce5102d3e5ea14a9c7e77cbf *man/count-methods.Rd 6b353c421c70366b7eab59c1e7633338 *man/distance-methods.Rd 5c5c1ad2b1efbef8378736a3155f3ace *man/facmat.Rd 8ae5afaebabcc29868fa0d10e6fa0f88 *man/feature-methods.Rd d8c5a144cb28883bc0ce058b79ace5f6 *man/feature.Rd 79473fa713f0131f2a8eeaf07a92a5cb *man/featvec.Rd 49b6393f52ba81a7d27388712fa5311a *man/fingerprint.Rd 6494638b1a4553d6535a5ffb7ce3bd17 *man/fold.Rd 628d41caa54adf763db1ab716b5733b8 *man/fplogical.Rd 37e3fca205a20eed21d28da853c0f3ef *man/length.Rd d7d5cd92ea1c2c9149d754af8109b813 *man/linefunc.Rd 4ff20ca34f27226cf8eea5286100602f *man/mat.Rd 365f63ab760d607ddc8833ace8efb297 *man/read.Rd ce65a09d4194e1d28a28c83a6bc77866 *man/rndfp.Rd a0e79a3d465933089eed74a7f56ac5c4 *man/shannon.Rd 9f69d49ea6ad6a8b2c69b696a8c2715a *man/show.Rd a0c7e4abd4c8f6c862240cc08764cd50 *man/sim.Rd 531d7aae2eb984e674968da4b856a60e *man/string.Rd ca09bc634ca0b37f30dda125555410f6 *man/vec.Rd 7afe32c102f466fbc046191637bc6229 *src/fpdistance.c 3c7a939828404a253b652bf1191c9f60 *src/readfps.c f0b1c7e2062664dbefc7d59abc6a91c4 *src/registerDynamicSymbol.c 7dae096c648f491093bf635c811d5658 *tests/doRUnit.R fingerprint/R/0000755000176200001440000000000015163764222013012 5ustar liggesusersfingerprint/R/matrix.R0000644000176200001440000000417015061401251014426 0ustar liggesusersfp.sim.matrix <- function(fplist, fplist2=NULL, method='tanimoto') { sim <- NA if (!is.null(fplist2)) { sim <- do.call('rbind', lapply(fplist, function(fp) unlist(lapply(fplist2, function(x) distance(x,fp, method=method))))) ##diag(sim) <- 1.0 return(sim) } if (method == 'dice') { sim <- .dice.sim.mat(fplist) } else if (method == 'tanimoto') { sim <- .tanimoto.sim.mat(fplist) } else { sim <- matrix(0,nrow=length(fplist), ncol=length(fplist)) for (i in 1:(length(fplist)-1)) { v <- unlist(lapply( fplist[(i+1):length(fplist)], distance, fp2=fplist[[i]], method=method)) sim[i,(i+1):length(fplist)] <- v sim[(i+1):length(fplist),i] <- v } } diag(sim) <- 1.0 return(sim) } ## Takes the fingerprints, P bits, for a set of N molecules supplied as ## a list structure and creates an N x P matrix fp.to.matrix <- function( fplist ) { size <- fplist[[1]]@nbit m <- matrix(0, nrow=length(fplist), ncol=size) cnt <- 1 for ( i in fplist ) { m[cnt,i@bits] <- 1 cnt <- cnt + 1 } m } fp.factor.matrix <- function( fplist ) { size <- fplist[[1]]@nbit m <- data.frame(fp.to.matrix(fplist)) m[] <- lapply(m, factor, levels=0:1) m } .dice.sim.mat <- function(fplist) { m <- fp.to.matrix(fplist) mat<-m%*%t(m) len<-length(m[,1]) s<-mat.or.vec(len,len) rs<-rowSums(m) #since its is binary just add the row values. for (i in 1:(len-1)) { for (j in (i+1):len) { s[i,j]=(2*(mat[i,j])/(rs[i]+rs[j])) s[j,i]=s[i,j] } } diag(s) <- 1.0 return(s) } .tanimoto.sim.mat <- function(fplist){ m <- fp.to.matrix(fplist) mat<-m%*%t(m) len<-length(m[,1]) s<-mat.or.vec(len,len) ret <- .C("m_tanimoto", as.double(mat), as.integer(len), as.double(s), PACKAGE="fingerprint") ret <- matrix(ret[[3]], nrow=len, ncol=len, byrow=TRUE) return(ret) ## for (i in 1:len){ ## for (j in 1:len){ ## s[i,j]<- mat[i,j]/(mat[i,i]+mat[j,j]-mat[i,j]) # Formula for Tanimoto Calculation ## } ## } ## return(s) } fingerprint/R/misc.R0000644000176200001440000002110115061401251014046 0ustar liggesusers setGeneric("fold", function(fp) standardGeneric("fold")) setMethod("fold", "fingerprint", function(fp) { size <- fp@nbit if (size %% 2 != 0) { stop('Need to supply a fingerprint of even numbered length') } bfp <- rep(FALSE, size) bfp[fp@bits] <- TRUE subfplen <- size/2 b1 <- which(bfp[1:subfplen]) b2 <- which(bfp[(subfplen+1):size]) subfp1 <- new("fingerprint", nbit=subfplen, bits=b1, provider="R"); subfp2 <- new("fingerprint", nbit=subfplen, bits=b2, provider="R") foldedfp <- subfp1 | subfp2 foldedfp@folded <- TRUE return(foldedfp) }) setGeneric("euc.vector", function(fp) standardGeneric("euc.vector")) setMethod("euc.vector", "fingerprint", function(fp) { coord <- rep(0,length(fp)) coord[fp@bits] <- 1.0 / sqrt(length(fp)) coord }) setGeneric("distance", function(fp1,fp2,method,a,b) standardGeneric("distance")) setMethod("distance", c("featvec", "featvec", "missing", "missing", "missing"), function(fp1, fp2) { distance(fp1, fp2, "tanimoto" ) }) setMethod("distance", c("featvec", "featvec", "character", "missing", "missing"), function(fp1, fp2, method=c("tanimoto", "dice", "robust")) { method <- match.arg(method) n1 <- length(fp1) n2 <- length(fp2) ## extract the feature strings, ignoring counts for now f1 <- sapply(fp1@features, function(x) x@feature) f2 <- sapply(fp2@features, function(x) x@feature) n12 <- length(intersect(f1,f2)) if (method == 'tanimoto') { return(n12/(n1+n2-n12)) } else if (method == "robust") { return(0.5 + 0.5 * n12 * n12 / (n1*n2)) } else if (method == "dice") { return(2.0 * n12 / (n1+n2)) } }) setMethod("distance", c("fingerprint", "fingerprint", "missing", "missing", "missing"), function(fp1,fp2) { distance(fp1,fp2,"tanimoto") }) setMethod("distance", c("fingerprint", "fingerprint", "character", "numeric", "numeric"), function(fp1, fp2, method="tversky", a, b) { if (!is.null(method) && !is.na(method) && method != "tversky") distance(fp1, fp2, method) if ( length(fp1) != length(fp2)) stop("Fingerprints must of the same bit length") if (a < 0 || b < 0) stop("a and b must be positive") tmp <- fp1 & fp2 xiy <- length(tmp@bits) tmp <- fp1 | fp2 xuy <- length(tmp@bits) x <- length(fp1@bits) y <- length(fp2@bits) return( xiy / (a*x + b*y + (1-a-b)*xiy ) ) }) setMethod("distance", c("fingerprint", "fingerprint", "character", "missing", "missing"), function(fp1,fp2, method=c('tanimoto', 'euclidean', 'mt', 'simple', 'jaccard', 'dice', 'russelrao', 'rodgerstanimoto','cosine', 'achiai', 'carbo', 'baroniurbanibuser', 'kulczynski2', 'hamming', 'meanHamming', 'soergel', 'patternDifference', 'variance', 'size', 'shape', 'hamann', 'yule', 'pearson', 'dispersion', 'mcconnaughey', 'stiles', 'simpson', 'petke', 'stanimoto', 'seuclidean' )) { if (method == 'tversky') stop("If Tversky metric is desired, must specify a and b") if ( length(fp1) != length(fp2)) stop("Fingerprints must of the same bit length") method <- match.arg(method) n <- length(fp1) if (method == 'tanimoto') { f1 <- numeric(n) f2 <- numeric(n) f1[fp1@bits] <- 1 f2[fp2@bits] <- 1 sim <- 0.0 ret <- .C("fpdistance", as.double(f1), as.double(f2), as.integer(n), as.integer(1), as.double(sim), PACKAGE="fingerprint") return (ret[[5]]) } else if (method == 'euclidean') { f1 <- numeric(n) f2 <- numeric(n) f1[fp1@bits] <- 1 f2[fp2@bits] <- 1 sim <- 0.0 ret <- .C("fpdistance", as.double(f1), as.double(f2), as.integer(n), as.integer(2), as.double(sim), PACKAGE="fingerprint") return (ret[[5]]) } size <- n ## in A & B tmp <- fp1 & fp2 c <- length(tmp@bits) ## in A not in B tmp <- (fp1 | fp2) & !fp2 a <- length(tmp@bits) ## in B not in A tmp <- (fp1 | fp2) & !fp1 b <- length(tmp@bits) ## not in A, not in B tmp <- !(fp1 | fp2) d <- length(tmp@bits) dist <- NULL ## Simlarity if (method == 'stanimoto') { dist <- c / (a+b+c) } else if (method == 'seuclidean') { dist <- sqrt((d+c) / (a+b+c+d)) } else if (method == 'dice') { dist <- c / (.5*a + .5*b + c) } else if (method == 'mt') { t1 <- c/(size-d) t0 <- d/(size-c) phat <- ((size-d) + c)/(2*size) dist <- (2-phat)*t1/3 + (1+phat)*t0/3 } else if (method == 'simple') { dist <- (c+d)/n } else if (method == 'jaccard') { dist <- c/(a+b+c) } else if (method == 'russelrao') { dist <- c/size } else if (method == 'rodgerstanimoto') { dist <- (c+d)/(2*a+2*b+c+d) } else if (method == 'cosine' || method == 'achiai' || method == 'carbo') { dist <- c/sqrt((a+c)*(b+c)) } else if (method == 'baroniurbanibuser') { dist <- (sqrt(c*d)+c)/(sqrt(c*d)+a+b+c) } else if (method == 'kulczynski2') { dist <- .5*(c/(a+c)+c/(b+c)) } ## Dissimilarity else if (method == 'hamming') { dist <- a+b } else if (method == 'meanHamming') { dist <- (a+b)/(a+b+c+d) }else if (method == 'soergel') { dist <- (a+b)/(a+b+c) } else if (method == 'patternDifference') { dist <- (a*b)/(a+b+c+d)^2 } else if (method == 'variance') { dist <- (a+b)/(4*n) } else if (method == 'size') { dist <- (a-b)^2/n^2 } else if (method == 'shape') { dist <- (a+b)/n-((a-b)/(n))^2 } ## Composite else if (method == 'hamann') { dist <- (c+d-a-b)/(a+b+c+d) } else if (method == 'yule') { dist <- (c*d-a*b)/(c*d+a*b) } else if (method == 'pearson') { dist <- (c*d-a*b)/sqrt((a+c)*(b+c)*(a+d)*(b+d)) } else if (method == 'dispersion') { dist <- (c*d-a*b)/n^2 } else if (method == 'mcconnaughey') { dist <- (c^2-a*b)/((a+c)*(b+c)) } else if (method == 'stiles') { dist <- log10(n*(abs(c*d-a*b)-n/2)^2/((a+c)*(b+c)*(a+d)*(b+d))) } ## Asymmetric else if (method == 'simpson') { dist <- c/min((a+c),(b+c)) } else if (method == 'petke') { dist <- c/max((a+c),(b+c)) } dist }) setGeneric("random.fingerprint", function(nbit, on) standardGeneric("random.fingerprint")) setMethod("random.fingerprint", c("numeric", "numeric"), function(nbit, on) { if (nbit <= 0) stop("Bit length must be positive integer") if (on <= 0) stop("Number of bits to be set to 1 must be positive integer") bits <- sample(1:nbit, size=on) new("fingerprint", nbit=nbit, bits=bits, provider="R", folded=FALSE) }) fingerprint/R/fingerprint.R0000644000176200001440000000425615163764222015473 0ustar liggesuserssetClass("fingerprint", representation(bits="numeric", nbit="numeric", folded="logical", provider="character", name="character", misc="list"), validity=function(object) { if (any(object@bits > object@nbit)) return("Bit positions were greater than the specified bit length") else return(TRUE) }, prototype(bits=c(), nbit=0, folded=FALSE, provider="", name="", misc=list())) #setGeneric("show", function(object) standardGeneric("show")) setMethod("show", "fingerprint", function(object) { cat("Fingerprint object\n") cat(" name = ", object@name, "\n") cat(" length = ", object@nbit, "\n") cat(" folded = ", object@folded, "\n") cat(" source = ", object@provider, "\n") cat(" bits on = ", paste(sort(object@bits), collapse=' '), "\n") }) setMethod('as.character', "fingerprint", function(x) { s <- numeric(x@nbit) s[x@bits] <- 1 paste(s,sep='',collapse='') }) setMethod("length", "fingerprint", function(x) { x@nbit }) parseCall <- function (obj) { if (!inherits(obj, "call")) { stop("Must supply a 'call' object") } srep <- deparse(obj) if (length(srep) > 1) srep <- paste(srep, sep = "", collapse = "") fname <- unlist(strsplit(srep, "\\("))[1] func <- unlist(strsplit(srep, paste(fname, "\\(", sep = "")))[2] func <- unlist(strsplit(func, "")) func <- paste(func[-length(func)], sep = "", collapse = "") func <- unlist(strsplit(func, ",")) vals <- list() nms <- c() cnt <- 1 for (args in func) { arg <- unlist(strsplit(args, "="))[1] val <- unlist(strsplit(args, "="))[2] arg <- gsub(" ", "", arg) val <- gsub(" ", "", val) vals[[cnt]] <- val nms[cnt] <- arg cnt <- cnt + 1 } names(vals) <- nms vals } fingerprint/R/zzz.R0000644000176200001440000000004115061401251013750 0ustar liggesusers.onLoad <- function(lib, pkg) {} fingerprint/R/read.R0000644000176200001440000000724115061401251014037 0ustar liggesusersjchem.binary.lf <- function(line) { molid <- strsplit(line, "\t")[[1]][1] bitpos <- .Call("parse_jchem_binary", as.character(line), as.integer(nchar(line)) ) if (is.null(bitpos)) return(NULL) list(molid, bitpos+1, list()) ## we add 1, since C does bit positions from 0 } fps.lf <- function(line) { toks <- strsplit(line, "\\s")[[1]]; title <- paste(toks[2:length(toks)], collapse=' ', sep='') bitpos <- .Call("parse_hex", as.character(toks[1]), as.integer(nchar(toks[1]))) if (is.null(bitpos)) return(NULL) if (length(toks) > 2) { misc <- list(toks[-c(1,2)]) } else { misc <- list() } list(title, bitpos+1, misc) ## we add 1, since C does bit positions from 0 } cdk.lf <- function(line) { p <- regexpr("{([0-9,\\s]*)}",line,perl=T) s <- gsub(',','',substr(line, p+1, p+attr(p,"match.length")-2)) s <- lapply( strsplit(s,' '), as.numeric ) molid <- gsub("\\s+","", strsplit(line, "\\{")[[1]][1]) list(molid, s[[1]], list()) } moe.lf <- function(line) { p <- regexpr("\"([0-9\\s]*)\"",line, perl=T) s <- substr(line, p+1, p+attr(p,"match.length")-2) s <- lapply( strsplit(s,' '), as.numeric ) list(NA, s[[1]], list()) } bci.lf <- function(line) { tokens <- strsplit(line, '\\s')[[1]] name <- tokens[1] tokens <- tokens[-c(1, length(tokens), length(tokens)-1)] list(name, as.numeric(tokens), list()) } ecfp.lf <- function(line) { tokens <- strsplit(line, '\\s')[[1]] name <- tokens[1] tokens <- tokens[-1] list(name, tokens, list()) } ## TODO we should be iterating over lines and not reading ## them all in fp.read <- function(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE, binary=TRUE) { lf.name <- deparse(substitute(lf)) provider <- lf.name fplist <- list() fcon <- file(description=f,open='r') lines = readLines(fcon,n=-1) if (header && lf.name != 'fps.lf') lines = lines[-1] if (lf.name == 'fps.lf') { binary <- TRUE size <- NULL ## process the header block nheaderline = 0 for (line in lines) { if (substr(line,1,1) != '#') break nheaderline <- nheaderline + 1 if (nheaderline == 1 && length(grep("#FPS1", line)) != 1) stop("Invalid FPS format") if (length(grep("#num_bits", line)) == 1) size <- as.numeric(strsplit(line, '=')[[1]][2]) if (length(grep("#software", line)) == 1) provider <- as.character(strsplit(line, '=')[[1]][2]) } lines <- lines[ (nheaderline+1):length(lines) ] if (is.null(size)) { # num_bit size <- nchar(strsplit(line, '\\s')[[1]][1]) * 4 } } c = 1 for (line in lines) { dat <- lf(line) if (is.null(dat)) { warning(sprintf("Couldn't parse: %s", line)) next } if (is.na(dat[[1]])) name <- "" else name <- dat[[1]] misc <- dat[[3]] ## usually empty if (binary) { fplist[[c]] <- new("fingerprint", nbit=size, bits=as.numeric(dat[[2]]), folded=FALSE, provider=provider, name=name, misc=misc) } else { ## convert the features to 'feature' objects feats <- lapply(dat[[2]], function(x) new("feature", feature=x)) fplist[[c]] <- new("featvec", features=feats, provider=provider, name=name, misc=misc) } c <- c+1 } close(fcon) fplist } ## Need to supply the length of the bit string since fp.read does ## not provide that information fp.read.to.matrix <- function(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE) { fplist <- fp.read(f, size, lf, header) fpmat <- fp.to.matrix(fplist) fpmat } fingerprint/R/ops.R0000644000176200001440000000311615061401251013722 0ustar liggesuserssetMethod("&", c("fingerprint", "fingerprint"), function(e1, e2) { if (e1@nbit != e2@nbit) stop("fp1 & fp2 must of the same bit length") andbits <- intersect(e1@bits, e2@bits) new("fingerprint", bits=andbits, nbit=e1@nbit, provider="R") }) setMethod("|", c("fingerprint", "fingerprint"), function(e1, e2) { if (e1@nbit != e2@nbit) stop("fp1 & fp2 must of the same bit length") orbits <- union(e1@bits, e2@bits) new("fingerprint", bits=orbits, nbit=e1@nbit, provider="R") }) setMethod("!", c("fingerprint"), function(x) { bs <- 1:(x@nbit) if (length(x@bits) > 0) b <- bs[ -x@bits ] else b <- bs ret <- new("fingerprint", bits=b, nbit=x@nbit, provider="R") return(ret) }) setMethod("xor", c("fingerprint", "fingerprint"), function(x,y) { if (x@nbit != y@nbit) stop("e1 & e2 must of the same bit length") tmp1 <- rep(FALSE, x@nbit) tmp2 <- rep(FALSE, y@nbit) tmp1[x@bits] <- TRUE tmp2[y@bits] <- TRUE tmp3 <- xor(tmp1,tmp2) xorbits <- which(tmp3) new("fingerprint", bits=xorbits, nbit=x@nbit, provider="R") }) fingerprint/R/bitspec.R0000644000176200001440000000130415163764221014563 0ustar liggesusersbit.spectrum <- function(fplist) { if (!is.list(fplist)) stop("Must provide a list of fingerprint objects") if (any(!unlist(lapply(fplist, inherits, "fingerprint")))) stop("Must provide a list of fingerprint objects"); nbit <- length(fplist[[1]]) spec <- numeric(nbit) for (i in 1:length(fplist)) { bits <- fplist[[i]]@bits spec[bits] <- spec[bits]+1 } spec / length(fplist) } shannon <- function(fplist) { if (!is.list(fplist)) stop("Must provide a list of fingerprint objects") if (any(!unlist(lapply(fplist, inherits, "fingerprint")))) stop("Must provide a list of fingerprint objects"); bs <- bit.spectrum(fplist) bs <- bs[ bs != 0 ] -1 * sum( bs * log2(bs) ) } fingerprint/R/featurefp.R0000644000176200001440000000364715061401251015113 0ustar liggesusers## A feature fingerprint will be a vector of feature objects setClass("featvec", representation(features="list", provider="character", name="character", misc="list"), validity=function(object) { ## features must be a list of feature objects klasses <- unique(sapply(object@features, class)) if (length(klasses) != 1 || klasses != 'feature') return("Must supply a list of 'feature' objects") iss4s <- sapply(object@features, isS4) if (!all(iss4s)) return("Must supply a list of 'feature' objects") return(TRUE) }, prototype(features=list(), provider="", name="", misc=list())) setMethod('show', 'featvec', function(object) { cat("Feature fingerprint\n") cat(" name = ", object@name, "\n") cat(" source = ", object@provider, "\n") cat(" features = ", paste(sapply(object@features, as.character), collapse=' '), "\n") }) setMethod('as.character', 'featvec', function(x) { return(paste(sapply(x@features, as.character), collapse=' ')) }) setMethod("length", "featvec", function(x) { length(x@features) }) ## featvec.to.binaryfp <- function(fps, bit.length = 256) { ## if (!all(sapply(fps, class) == 'featvec')) ## stop("Must supply a list of feature vector fingerprints") ## ## get all the features ## features <- sort(unique(unlist(lapply(fps, as.numeric)))) ## nbit <- length(features) ## if (nbit %% 2 == 1) nbit <- nbit + 1 ## ## based on the entire feature set, convert original fps to binary fps ## fps <- lapply(fps, function(x) { ## bitpos <- match(as.numeric(x), features) ## new("fingerprint", nbit=nbit, folded=FALSE, provider=x@provider,name=x@name, bits=bitpos) ## }) ## return(fps) ## } fingerprint/R/bitimp.R0000644000176200001440000000055315061401251014407 0ustar liggesusersbit.importance <- function(actives, background) { bs.actives <- bit.spectrum(actives) bs.background <- bit.spectrum(background) m <- length(actives) n <- length(background) pa <- (m*bs.actives+bs.background)/(m+1) pb <- (n*bs.background+bs.actives)/(n+1) kl <- pa * log(pa/pb) + (1-pa) * log( (1-pa)/(1-pb) ) kl[is.nan(kl)] <- NA return(kl) } fingerprint/R/feature.R0000644000176200001440000000342115061401251014553 0ustar liggesusers## Define a feature and its count setClass("feature", contains = 'integer', representation(feature='character', count='integer'), validity=function(object) { if (is.na(object@feature) || is.null(object@feature)) return("feature must be a string") if (object@count < 0) return("count must be zero or a positive integer") return(TRUE) }, prototype(feature='', count=as.integer(1)) ) setMethod('show', 'feature', function(object) { cat(sprintf('%s:%d', object@feature, object@count), '\n') }) setMethod('as.character', signature(x='feature'), function(x) sprintf("%s:%d", x@feature, x@count)) setMethod('c', signature(x='feature'), function(x, ..., recursive=FALSE) { elems <- list(x, ...) ret <- list() for (i in seq_along(elems)) { ret[[i]] <- new("feature", feature=elems[[i]]@feature, count=as.integer(elems[[i]]@count)) } return(ret) }) ## getters/setters setGeneric("feature", function(object) standardGeneric("feature")) setMethod("feature", "feature", function(object) object@feature) setGeneric("feature<-", function(this, value) standardGeneric("feature<-")) setReplaceMethod("feature", signature=signature("feature", "character"), function(this, value) { this@feature <- value this }) setGeneric("count", function(object) standardGeneric("count")) setMethod("count", "feature", function(object) object@count) setGeneric("count<-", function(this, value) standardGeneric("count<-")) setReplaceMethod("count", signature=signature("feature", "numeric"), function(this, value) { this@count <- as.integer(value) this }) fingerprint/R/balance.R0000644000176200001440000000072015061401251014504 0ustar liggesusersbalance <- function(fplist) { if (is.list(fplist)) { lapply(fplist, function(fp) { compl <- !fp new('fingerprint', nbit = 2 * length(fp), bits = c(fp@bits, compl@bits+length(fp)), provider='R', name='balanced') }) } else { fp <- fplist compl <- !fp new('fingerprint', nbit = 2 * length(fp), bits = c(fp@bits, compl@bits+length(fp)), provider='R', name='balanced') } } fingerprint/src/0000755000176200001440000000000015163767203013402 5ustar liggesusersfingerprint/src/readfps.c0000644000176200001440000000434315061401251015157 0ustar liggesusers#include #include /* Bulk of the code provided by Andrew Dalke, modified by me to be usable from R */ int bit_is_on(char*,int); #define charmask(c) ((unsigned char)((c) & 0xff)) static int to_int(int c) { if (c >= '0' && c <= '9') { return c - '0'; } if (c >= 'A' && c <= 'F') { return c - 'A' + 10; } if (c >= 'a' && c <= 'f') { return c - 'a' + 10; } return -1; } SEXP parse_hex(SEXP hexstr, SEXP hexlen) { int i,j; const char *argbuf; int arglen; argbuf = (const char*) CHAR(STRING_ELT(hexstr,0)); arglen = INTEGER(hexlen)[0]; char* retbuf = (char*) R_alloc(arglen/2, sizeof(char)); for (i=j=0; i < arglen; i += 2) { int top = to_int(charmask(argbuf[i])); int bot = to_int(charmask(argbuf[i+1])); if (top == -1 || bot == -1) { return R_NilValue; } retbuf[j++] = (top << 4) + bot; } // determine the number of on bits int n_on = 0; for (i = 0; i < arglen*4; i++) if (bit_is_on(retbuf, i)) n_on++; // now, we save the positions of the bits int *bitpos = (int*) R_alloc(n_on, sizeof(int)); j = 0; for (i = 0; i < arglen*4; i++) { if (bit_is_on(retbuf, i)) bitpos[j++] = i; } SEXP retsexp; PROTECT(retsexp = allocVector(INTSXP, n_on)); for (i = 0; i < n_on; i++) INTEGER(retsexp)[i] = bitpos[i]; UNPROTECT(1); return(retsexp); } int bit_is_on(char *fp, int B) { return fp[B / 8] >> (B%8) & 0x01; } SEXP parse_jchem_binary(SEXP bstr, SEXP len) { int i,j; const char *argbuf; int arglen; argbuf = (const char*) CHAR(STRING_ELT(bstr,0)); arglen = INTEGER(len)[0]; // determine number of 1's int n_on = 0; i = 0; while (i < arglen) { if (argbuf[i++] == 9) break; } int startPos = i; while (i < arglen) { if (argbuf[i++] == 49) n_on++; } // no get the actual bit positions int *bitpos = (int*) R_alloc(n_on, sizeof(int)); int bitIdx = 0; j = 0; for (i = startPos; i < arglen; i++) { int c = argbuf[i]; if (c != 49 && c != 48) continue; if (c == 49) bitpos[j++] = bitIdx; bitIdx++; } SEXP retsexp; PROTECT(retsexp = allocVector(INTSXP, n_on)); for (i = 0; i < n_on; i++) INTEGER(retsexp)[i] = bitpos[i]; UNPROTECT(1); return(retsexp); } fingerprint/src/fpdistance.c0000644000176200001440000000330715061401251015652 0ustar liggesusers#include #define X(_m,_i,_j,_nrow) _m[ _i + _nrow * _j ] #define METRIC_TANIMOTO 1 #define METRIC_EUCLIDEAN 2 double d_tanimoto(double*,double*,int); double d_euclidean(double*,double*,int); void m_tanimoto(double *m, int *nrow, double *ret) { int i,j; for (i = 0; i < *nrow; i++) { for (j = i+1; j < *nrow; j++) { double mij = X(m, i,j, *nrow); double mii = X(m, i,i, *nrow); double mjj = X(m, j,j, *nrow); X(ret, i, j, *nrow) = X(ret, j, i, *nrow) = mij / (mii+mjj-mij); } } return; } /** fp1 and fp2 should be an array of 1's and 0's, of length equal to the size of the fingerprint **/ void fpdistance(double *fp1, double *fp2, int *nbit, int *metric, double *ret) { double r = 0.0; switch(*metric) { case METRIC_TANIMOTO: r = d_tanimoto(fp1, fp2, *nbit); break; case METRIC_EUCLIDEAN: r = d_euclidean(fp1, fp2, *nbit); } *ret = r; return; } /** http://www.daylight.com/dayhtml/doc/theory/theory.finger.html **/ double d_tanimoto(double *fp1, double *fp2, int nbit) { int i; int nc = 0; int na = 0; int nb = 0; if (nbit <= 0) return(-1.0); for (i = 0; i < nbit; i++) { if (fp1[i] == 1 && fp2[i] == 1) nc++; if (fp1[i] == 1 && fp2[i] == 0) na++; if (fp2[i] == 1 && fp1[i] == 0) nb++; } return ((double) nc) / (double) (na + nb + nc); } /** http://www.daylight.com/dayhtml/doc/theory/theory.finger.html **/ double d_euclidean(double *fp1, double *fp2, int nbit) { int i; int nc = 0; int nd = 0; if (nbit <= 0) return(-1.0); for (i = 0; i < nbit; i++) { if (fp1[i] == 1 && fp2[i] == 1) nc++; if (fp1[i] == 0 && fp2[i] == 0) nd++; } return sqrt(((double) nc + (double) nd) / (double) nbit); } fingerprint/src/registerDynamicSymbol.c0000644000176200001440000000030615061401251020045 0ustar liggesusers#include #include #include void R_init_markovchain(DllInfo* info) { R_registerRoutines(info, NULL, NULL, NULL, NULL); R_useDynamicSymbols(info, TRUE); } fingerprint/ChangeLog0000644000176200001440000001105215061401251014345 0ustar liggesusers2013-10-28 Guha * src/fpdistance.c (m_tanimoto): Reduced the number of iterations based on comments from John May 2013-10-27 Guha * R/matrix.R (.tanimoto.sim.mat): Updated Tanimoto matrix code form Abhik Seal to use C code for nested loops. 2013-10-22 Rajarshi Guha * R/matrix.R (fp.sim.matrix): Updated to use the new matrix multiplication based Tanimoto calculation for similarity matrices contributed by Abhik Seal 2013-10-06 Rajarshi Guha * R/zzz.R (.onLoad): Removed unecessary usage of require * R/misc.R: Removed debug code 2013-10-05 Rajarshi Guha * Added the 'feature' class to represent alphanumeric features (usually substructures but could be arbitrary hashes) and their counts * Updated the 'featvec' fingerprint class to use 'feature' objects * Removed featvec.to.binary since the featvec fingerprint type can also include non numeric features 2013-04-05 Rajarshi Guha * Updated package to remove use of deprecated methods 2012-10-30 Rajarshi Guha * R/matrix.R (fp.sim.matrix): Added code provided by Abhik Seal to speed up pairwise similarity matrix calculation when the Dice metric is specified. 2012-02-21 Rajarshi Guha * R/read.R (jchem.binary.lf): Added a line parser for JChem binary string formatted fingerprints. This is based on a C function to parse the fingerprint portion of a line 2011-07-26 Rajarshi Guha * man/sim.Rd: Updated man page for fp.sim.matrix to indicate the use of two fingerprint lists * R/matrix.R (fp.sim.matrix): Updated similarity matrix calculation to support cross-similarity (ie, similarity matrix from two (possibly different lengths) lists of fingerprints 2011-06-03 Rajarshi Guha * src/fpdistance.c: Cleaned up uncessary headers and unused variables * src/readfps.c: Cleaned up unecessary headers 2011-06-02 Rajarshi Guha * R/read.R (fp.read): Updated line functions to return a third component that can be used to return the remainder of a line if a format allows other items than just a title and fingerprint. (fps.lf): Updated FPS line function to actually return remaining components of a fingerprint line. Updated main reader to set the misc field of a fingerprint object to hold this list 2011-06-01 Rajarshi Guha * src/readfps.c (parse_hex): Added a C function to parse hex-encoded fingerprints from the FPS format and return the bit positions that are set to 1 * R/read.R (fps.lf): Added a new line parser to handle the fingerprint lines from the FPS format (fp.read): Updated main fingerprint reader to handle the multi-line header from FPS format fingerprint files 2011-04-14 Rajarshi Guha * R/bitspec.R (shannon): Added a method to evaluate the Shannon entropy for a list of fingerprints. Also added a man page 2010-11-07 Rajarshi Guha * R/read.R: Updaetd the CDK line parser to extract the molecule id and return it so that the fingerpint object contains the molecule id 2010-10-20 Rajarshi Guha * R/read.R (fp.read): Updated to support reading of feature fingerprints. The user must now indicate whether a binary or a feature fingerprint is being read. Also added a new line parsing function to process generic feature fingerprints. * R/misc.R: Added similarity metrics for feature fingerprints. Currently tanimoto, dice and robust metrics are supported. * R/featurefp.R: Added a class to support fingerprints that are represented as lists of numeric or string features such as circular fingerprints. Also added a method to convert a collection of feature fingerprints to a fixed-length binary string representation. * man/featvec.Rd: Added man page for te featvec class, used to represent fingerprints characterized as numeric or character features 2010-06-02 Rajarshi Guha * R/balance.R (balance): Added a method to generate balanced fingerprints, which have 50% bit density, but are 2x the size of the input fingerprints. * R/bitimp.R (bit.importance): Added a function to evaluate the importance of each bit in a binary fingerprint in terms of the Kullback Liebler divergence between a set of actives and a background collection. fingerprint/NAMESPACE0000644000176200001440000000113515061401251014013 0ustar liggesusersimportFrom("methods", "new") exportClasses("fingerprint") exportClasses("featvec") exportClasses("feature") exportMethods("fold", "euc.vector", "distance", "random.fingerprint", "as.character", "length", "feature", "count", 'feature<-', 'count<-') export("fp.sim.matrix", "fp.to.matrix", "fp.factor.matrix", "fp.read.to.matrix", "fp.read","shannon", ##"featvec.to.binaryfp", "moe.lf", "bci.lf", "cdk.lf", "ecfp.lf", "fps.lf", "jchem.binary.lf", "bit.spectrum", "balance", "bit.importance") useDynLib(fingerprint,.registration = TRUE) fingerprint/LICENSE0000644000176200001440000002613515061401251013610 0ustar liggesusers Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. fingerprint/inst/0000755000176200001440000000000015061401251013551 5ustar liggesusersfingerprint/inst/unitTests/0000755000176200001440000000000015061401251015553 5ustar liggesusersfingerprint/inst/unitTests/Makefile0000644000176200001440000000036315163770067017236 0ustar liggesusersTOP=../.. PKG=${shell cd ${TOP};pwd} SUITE=doRUnit.R R=R all: inst test inst: # Install package cd ${TOP}/..;\ ${R} CMD INSTALL ${PKG} test: # Run unit tests export RCMDCHECK=FALSE;\ cd ${TOP}/tests;\ ${R} --vanilla --slave < ${SUITE} fingerprint/inst/unitTests/bits.fps0000644000176200001440000006450115061401251017234 0ustar liggesusers#FPS1 #num_bits=166 #software=OEChem/1.7.4 (20100809) #type=RDMACCS-OpenEye/1 #source=bits.smi #date=2011-06-01T12:49:56 010000000000000000000000000000000000000000 Fake-0 0 000000000008000000000000000000000000000000 Fake-43 43 000000000400000000000000000000000000000000 [Li] 34 000200000000000000000000000000000000000000 [Be] 9 000002000000000000000000000000000000000000 [B] 17 000000000000000000000000000000000000000001 [N] 160 000000000000000000000000000000000000000008 [O] 163 000000000002000000000000000000002000000000 [F] 41,133 000000000400000000000000000000000000000000 [Na] 34 000200000000000000000000000000000000000000 [Mg] 9 000002000000000000000000000000000000000000 [Al] 17 000008000000000000000000000000000000000000 [Si] 19 000000100000000000000000000000000000000000 [P] 28 000000000000000000008000000000000000000000 [S] 87 000000000000000000000000400000002000000000 [Cl] 102,133 000000000400000000000000000000000000000000 [K] 34 000200000000000000000000000000000000000000 [Ca] 9 100000000000000000000000000000000000000000 [Sc] 4 100000000000000000000000000000000000000000 [Ti] 4 400000000000000000000000000000000000000000 [V] 6 400000000000000000000000000000000000000000 [Cr] 6 400000000000000000000000000000000000000000 [Mn] 6 000100000000000000000000000000000000000000 [Fe] 8 000100000000000000000000000000000000000000 [Co] 8 000100000000000000000000000000000000000000 [Ni] 8 000800000000000000000000000000000000000000 [Cu] 11 000800000000000000000000000000000000000000 [Zn] 11 000002000000000000000000000000000000000000 [Ga] 17 040000000000000000000000000000000000000000 [Ge] 2 040000000000000000000000000000000000000000 [As] 2 040000000000000000000000000000000000000000 [Se] 2 000000000020000000000000000000002000000000 [Br] 45,133 000000000400000000000000000000000000000000 [Rb] 34 000200000000000000000000000000000000000000 [Sr] 9 100000000000000000000000000000000000000000 [Y] 4 100000000000000000000000000000000000000000 [Zr] 4 400000000000000000000000000000000000000000 [Nb] 6 400000000000000000000000000000000000000000 [Mo] 6 400000000000000000000000000000000000000000 [Tc] 6 000100000000000000000000000000000000000000 [Ru] 8 000100000000000000000000000000000000000000 [Rh] 8 000100000000000000000000000000000000000000 [Pd] 8 000800000000000000000000000000000000000000 [Ag] 11 000800000000000000000000000000000000000000 [Cd] 11 000002000000000000000000000000000000000000 [In] 17 040000000000000000000000000000000000000000 [Sn] 2 040000000000000000000000000000000000000000 [Sb] 2 040000000000000000000000000000000000000000 [Te] 2 000000040000000000000000000000002000000000 [I] 26,133 000000000400000000000000000000000000000000 [Cs] 34 000200000000000000000000000000000000000000 [Ba] 9 200000000000000000000000000000000000000000 [La] 5 200000000000000000000000000000000000000000 [Ce] 5 200000000000000000000000000000000000000000 [Pr] 5 200000000000000000000000000000000000000000 [Nd] 5 200000000000000000000000000000000000000000 [Pm] 5 200000000000000000000000000000000000000000 [Sm] 5 200000000000000000000000000000000000000000 [Eu] 5 200000000000000000000000000000000000000000 [Gd] 5 200000000000000000000000000000000000000000 [Tb] 5 200000000000000000000000000000000000000000 [Dy] 5 200000000000000000000000000000000000000000 [Ho] 5 200000000000000000000000000000000000000000 [Er] 5 200000000000000000000000000000000000000000 [Tm] 5 200000000000000000000000000000000000000000 [Yb] 5 200000000000000000000000000000000000000000 [Lu] 5 100000000000000000000000000000000000000000 [Hf] 4 400000000000000000000000000000000000000000 [Ta] 6 400000000000000000000000000000000000000000 [W] 6 400000000000000000000000000000000000000000 [Re] 6 000100000000000000000000000000000000000000 [Os] 8 000100000000000000000000000000000000000000 [Ir] 8 000100000000000000000000000000000000000000 [Pt] 8 000800000000000000000000000000000000000000 [Au] 11 000800000000000000000000000000000000000000 [Hg] 11 040002000000000000000000000000000000000000 [Tl] 2,17 040000000000000000000000000000000000000000 [Pb] 2 040000000000000000000000000000000000000000 [Bi] 2 000000000400000000000000000000000000000000 [Fr] 34 000200000000000000000000000000000000000000 [Ra] 9 080000000000000000000000000000000000000000 [Ac] 3 080000000000000000000000000000000000000000 [Th] 3 080000000000000000000000000000000000000000 [Pa] 3 080000000000000000000000000000000000000000 [U] 3 080000000000000000000000000000000000000000 [Np] 3 080000000000000000000000000000000000000000 [Pu] 3 080000000000000000000000000000000000000000 [Am] 3 080000000000000000000000000000000000000000 [Cm] 3 080000000000000000000000000000000000000000 [Bk] 3 080000000000000000000000000000000000000000 [Cf] 3 080000000000000000000000000000000000000000 [Es] 3 080000000000000000000000000000000000000000 [Fm] 3 080000000000000000000000000000000000000000 [Md] 3 080000000000000000000000000000000000000000 [No] 3 0a0000000000000000000000000000000000000000 [Lr] 3,1 000000000000000000000000000000000000000016 c1ccccc1 161,162,164 000000000000000000000000000000100000010036 c1ccccc1.c1ccccc1 124,144,161,162,164,165 000000100000000000000000020000000001000016 c1ccccp1 28,97,136,161,162,164 00000000000000010000048000000000000100001a c1ccco1 56,82,95,163,136,161,164 000000000800000000008480000000000001000012 c1cccs1 35,82,87,95,136,161,164 040000000000000000000480000000000001000012 [se]1cccc1 2,82,95,136,161,164 040000000000000000000480000000000001000012 [te]1cccc1 2,82,95,136,161,164 000400000000000000000000000020000000040010 C1CCC1 10,117,146,164 800400000004280038500020000080090621400011 N1NNN1 7,10,42,51,53,67,68,69,76,78,93,119,120,123,129,130,136,141,150,160,164 0010800000000000500220302000000a000418a009 ON(C)C 12,23,68,70,73,85,92,93,101,121,123,138,147,148,157,159,160,163 0010840000100000503400602400c04b0065082019 On1cncccc1 12,18,23,44,68,70,74,76,77,93,94,98,101,118,119,120,121,123,126,136,138,141,142,147,157,160,163,164 0010800000100001400400302600800b000108b01d c1ccon(C)c1 12,23,44,56,70,74,92,93,97,98,101,119,120,121,123,136,147,156,157,159,160,162,163,164 0010840000000001410004a03401810b008108181b c1cccon2c13.c2cc3 12,18,23,56,64,70,82,93,95,98,100,101,104,112,119,120,121,123,136,143,147,155,156,160,161,163,164 0010000000000140400220302000400a000018a009 O=[N+](C)C 12,48,62,70,73,85,92,93,101,118,121,123,147,148,157,159,160,163 0010800002000100502000302000400a000408a009 O[N+](=C)C 12,23,33,48,68,70,77,92,93,101,118,121,123,138,147,157,159,160,163 00200000000000001c008000000000080400000000 SS 13,66,67,68,87,123,130 004000000004000000000000000200040404025008 OC(O)O 14,42,105,122,130,138,145,156,158,163 004000000004000000000000000200040404025208 O=C(O)O 14,42,105,122,130,138,145,153,156,158,163 004000000004000000000000000200040404025208 OC(=O)O 14,42,105,122,130,138,145,153,156,158,163 004000000004000000000000000200040404025208 OC(O)=O 14,42,105,122,130,138,145,153,156,158,163 00400000000000018000048000028144004102421a c1coc(=O)o1 14,56,71,82,95,105,112,119,122,126,136,142,145,153,158,161,163,164 008020000000000100002000001000000001041118 C1CO1 15,21,56,85,108,136,146,152,156,163,164 000020000000000000000000000020000000040010 C1CC1 21,117,146,164 000000000200000000000000040000000000000000 C=C 33,98 000001000000000000000000000000000000000000 C#C 16 000004000000000000000000000020800100040010 C1CCCCCC1 18,117,127,128,146,164 00000000000000000000000000082e800100148400 CCCCCCC 107,113,114,115,117,127,128,146,148,154,159 000010000214000000080808042010000404403809 C=C(N)O 20,33,42,44,75,83,91,98,109,116,130,138,150,155,156,157,160,163 000000000220000000080000040400002000000016 C=C(c1ccccc1)Br 33,45,75,98,106,133,161,162,164 000040000004000000000800002210040404407809 NC(O)O 22,42,83,105,109,116,122,130,138,150,155,156,157,158,160,163 000040000004000000000800002210040404407a09 NC(=O)O 22,42,83,105,109,116,122,130,138,150,153,155,156,157,158,160,163 000040000004000000200000002250040404405809 N=C(O)O 22,42,77,105,109,116,118,122,130,138,150,155,156,158,160,163 0000400000000801018004e80132936d006110dd1b CCOc1nnc(o1)C 22,51,56,64,79,82,91,93,94,95,96,105,108,109,112,113,116,119,120,122,123,125,126,136,141,142,148,152,154,155,156,158,159,160,161,163,164 000000010004000000100800000200000420402801 NC(N)N 24,42,76,83,105,130,141,150,155,157,160 00000002000002000008008604012040014484101c C1CC2=CC(C1C2)O 25,49,75,89,90,95,98,104,117,126,128,138,142,146,151,156,162,163,164 000000080004000000100a00080000000420402501 NCN 27,42,76,81,83,99,130,141,150,152,154,157,160 000000080004000000100a00080000000420402501 NC([H])N 27,42,76,81,83,99,130,141,150,152,154,157,160 000000080004000000100a00080000000420402501 NC([H])([H])N 27,42,76,81,83,99,130,141,150,152,154,157,160 000008200000000000022010008000000010188000 C[Si](C)(C)C 19,29,73,85,92,111,140,147,148,159 000000400002000010000820000000082000400001 NF 30,41,68,83,93,123,133,150,160 000000800100000014008830000000080000408001 CSN 31,32,66,68,83,87,92,93,123,150,159,160 00000000010000001c008820000000080400400001 NS 32,66,67,68,83,87,93,123,130,150,160 000000000100000014018020000040080000400001 N=S 32,66,68,72,87,93,118,123,150,160 000000000200000000000000040000000000000000 C=C 33,98 000000000200000000000000040000000000000000 [H]C([H])=C([H])[H] 33,98 000000000200000000200000000040000000400001 C=N 33,77,118,150,160 000000000000000000200000000040000000400001 [CH]=N 77,118,150,160 000000000000000000200000000040000000400001 [CH]=N 77,118,150,160 00000000080000000000a480000020000103040110 S1CCCC1 35,82,85,87,95,117,128,136,137,146,152,164 00000000080000000000a480000020000103040110 S1C([H])C([H])C([H])C1 35,82,85,87,95,117,128,136,137,146,152,164 00000000080000000000a480000020000103040110 S1CCCC1[H] 35,82,85,87,95,117,128,136,137,146,152,164 000000000800000000008480000000000001000012 s1cccc1 35,82,87,95,136,161,164 000000001004000000100800002210000424403809 NC(O)N 36,42,76,83,105,109,116,130,138,141,150,155,156,157,160,163 000000001004000000300800002250000424403809 N=C(O)N 36,42,76,77,83,105,109,116,118,130,138,141,150,155,156,157,160,163 000000001004000000100800002210000420402a09 NC(=O)N 36,42,76,83,105,109,116,130,141,150,153,155,157,160,163 000000801940300015d08da0012291495461402a1b c1(nc(=O)[nH]s1)N 31,32,35,36,46,52,53,64,66,68,76,78,79,80,82,83,87,93,95,96,105,109,112,116,119,120,123,126,130,132,134,136,141,142,150,153,155,157,160,161,163,164 00000000200400000010080000000000042040a801 NC(C)N 37,42,76,83,130,141,150,155,157,159,160 00000000200400000030080000004000042040a801 N=C(C)N 37,42,76,77,83,118,130,141,150,155,157,159,160 000010002214000000180800040000000420402801 NC(=C)N 20,33,37,42,44,75,76,83,98,130,141,150,155,157,160 000000802940080025d08ca0000080095221402813 c1c(nns1)N 31,32,35,37,46,51,64,66,69,76,78,79,82,83,87,93,95,119,120,123,129,132,134,136,141,150,155,157,160,161,164 000000002000080011d004e0010080090021408a1b Cc1[nH]nc(n1)C=O 37,51,64,68,76,78,79,82,93,94,95,96,119,120,123,136,141,150,153,155,159,160,161,163,164 00000000c080411a1401800020820008820c0a4008 OS(=O)(=O)[O-] 38,39,47,48,54,57,59,60,66,68,72,87,101,105,111,123,129,135,138,139,145,147,158,163 00000000c080401a140180002002000802040a4008 OS(=O)[O] 38,39,47,54,57,59,60,66,68,72,87,101,105,123,129,138,145,147,158,163 00000000c080541a14018202a08220088e060e4508 C(CS(=O)(=O)O)S 38,39,47,50,52,54,57,59,60,66,68,72,81,87,89,101,103,105,111,117,123,129,130,131,135,137,138,145,146,147,152,154,158,163 00000000800000001c008000200000080404000008 S-O 39,66,67,68,87,101,123,130,138,163 00000000800000001c008000200000080404000008 [H]S-O[H] 39,66,67,68,87,101,123,130,138,163 000000000001000000000000000000000000000001 C#N 40,160 000000000200000000200000000040000000400001 C=N 33,77,118,150,160 000000000200000000200000000040000000400001 [H]C=N[H] 33,77,118,150,160 000000000200000000200000000040000000400001 [H]C([H])=N[H] 33,77,118,150,160 000000080004000000000a00083010000404403509 NCO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163 000000080004000000000a00083010000404403509 [H][NH]CO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163 000000080004000000000a00083010000404403509 [H][N]([H])CO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163 000000000210000000000800040000000000402001 C=CN 33,44,83,98,150,157,160 000000000210000000200000040040000000400001 C=C=N 33,44,77,98,118,150,160 000000080044000000008a00080000000400402501 SCN 27,42,46,81,83,87,99,130,150,152,154,157,160 000080008044000054008820200000280600400009 SON 23,39,42,46,66,68,70,83,87,93,101,123,125,129,130,150,160,163 000000000040000000018800000000000000402001 S=CN 46,72,83,87,150,157,160 000000000044000000208000000040000400400001 SC=N 42,46,77,87,118,130,150,160 000000000040000000218000000040000000400001 S=C=N 46,72,77,87,118,150,160 0010800088400410550484a02400816b024108001b c1ccc[sH]1On1cccc1 12,23,35,39,46,50,60,64,66,68,70,74,82,87,93,95,98,101,112,119,120,121,123,125,126,129,136,142,147,160,161,163,164 000100400000010000000000400000082000000000 Cl[Rh+2] 8,30,48,102,123,133 000000000000010000000000000000000000000008 [O-2] 48,163 000000000000010000008000000000000000000000 [35S-2] 48,87 000100000000010000000000000000000000000000 [Pt+2] 8,48 0000000002000200000a0000040000000000108000 C=C(C)C 33,49,73,75,98,148,159 000000000200020000080000040000000000008016 C=C(C)c1ccccc1 33,49,75,98,159,161,162,164 000000008000040014008010200000080004008008 CSO 39,50,66,68,87,92,101,123,138,159,163 00000000800004841400810020000008000400001e c1ccccc1SO 39,50,58,63,66,68,80,87,101,123,138,161,162,163,164 000000000000080018000820000000080420400001 NN 51,67,68,83,93,123,130,141,150,160 000000000000080018000820000000080420400001 [H]NN[H] 51,67,68,83,93,123,130,141,150,160 000000000000080018000820000000080420400001 [H]N([H])N([H])[H] 51,67,68,83,93,123,130,141,150,160 000000000000080018000020000040080420400001 N=N 51,67,68,93,118,123,130,141,150,160 000000000000080018000020000040080420400001 N=N[H] 51,67,68,93,118,123,130,141,150,160 000000000000080018000020000040080420400001 [H]N=N[H] 51,67,68,93,118,123,130,141,150,160 000000000000080000000020000000080020000001 N#N 51,93,123,141,160 000000000000080001000020020080090021000017 c1cnncc1 51,64,93,97,119,120,123,136,141,160,161,162,164 000000000000100000800a02884020000422442501 NCCCN 52,79,81,83,89,99,103,110,117,130,137,141,146,150,152,154,157,160 000000000000100000800a02884020000422442501 N([H])([H])C([H])([H])C([H])([H])C([H])([H])N([H])([H]) 52,79,81,83,89,99,103,110,117,130,137,141,146,150,152,154,157,160 000000001004100001900009032291418465425a1f c1c([nH]c(=O)[nH]c1=O)O 36,42,52,64,76,79,88,91,96,97,105,109,112,116,119,120,126,130,135,136,138,141,142,145,150,153,155,156,158,160,161,162,163,164 000000000000200080000200801020000c06045508 OCCO 53,71,81,103,108,117,130,131,137,138,146,152,154,156,158,163 000000002002300001d044c8132791512461400a1f c1[nH]c2c(n1)[nH]c(nc2=O)F 37,41,52,53,64,76,78,79,82,86,91,94,95,96,97,100,104,105,106,109,112,116,119,120,124,126,130,133,136,141,142,150,153,155,160,161,162,163,164 000000008004400214008000200000080604004008 OSO 39,42,54,57,66,68,87,101,123,129,130,138,158,163 0000800000048000700000302000000a060408e009 ON(O)C 23,42,55,68,69,70,92,93,101,121,123,129,130,138,147,157,158,159,160,163 0000800000048000700000202000000a560408681f ON(O)c1ccccc1 23,42,55,68,69,70,93,101,121,123,129,130,132,134,138,147,155,157,158,160,161,162,163,164 00000000000000010000200002102080090304111c O1CCCCC1 56,85,97,108,117,127,128,131,136,137,146,152,156,162,163,164 00000000000000010000048000000000000100001a o1cccc1 56,82,95,136,161,163,164 080000100000000214008000000000080200000000 PS[U] 3,28,57,66,68,87,123,129 000000000000008400008100000000000000000016 Sc1ccccc1 58,63,80,87,161,162,164 000000000000000804018000200000080000000008 S=O 59,66,72,87,101,123,163 00000000800000001c008000200000080404000008 SO 39,66,67,68,87,101,123,130,138,163 000000088040641a14018a40a80000080e04486509 C(N)S(=O)O 27,39,46,50,53,54,57,59,60,66,68,72,81,83,87,94,99,101,103,123,129,130,131,138,147,150,152,154,157,158,160,163 00000000000000100002a010000000000010188000 CS(C)C 60,73,85,87,92,140,147,148,159 000000400820449e0401c58060860008a28128401a c1c(c(sc1S(=O)(=O)Cl)Br)Br 30,35,45,50,54,57,58,59,60,63,66,72,80,82,86,87,95,101,102,105,106,111,123,129,133,135,136,143,147,149,158,161,163,164 0000000008400021010085c000008011000180081b c1cc(oc1)c2nccs2 35,46,56,61,64,80,82,87,94,95,119,120,124,136,151,155,160,161,163,164 000020000000002002000000048000000000008016 CC1(C=C1)c2ccccc2 21,61,65,98,111,159,161,162,164 000000000000004050000020200040080000400009 N=O 62,68,70,93,101,118,123,150,160,163 0010800000000140414400202300c14b026108401f c1c[n+](=O)ccn1[O-] 12,23,48,62,64,70,74,78,93,96,97,101,112,118,119,120,121,123,126,129,136,141,142,147,158,160,161,162,163,164 000020000000008000008102800000000000040010 C1CC1S 21,63,80,87,89,103,146,164 00000000000000850000858000000000000100001a c1cocc1S 56,58,63,80,82,87,95,136,161,163,164 88042000080000b0140083028000a08801030c0110 C1CC1S1[U]CC1 3,7,10,21,35,60,61,63,66,68,80,81,87,89,103,117,119,123,127,128,136,137,146,147,152,164 000000000000000002020000008000000010108000 CC(C)(C)C 65,73,111,140,148,159 00000010000000001c008000000000080400000000 PS 28,66,67,68,87,123,130 000000000100000014018020000040080000400001 S=N 32,66,68,72,87,93,118,123,150,160 00000000880004010400800026008008000100101c c1ccosc1 35,39,50,56,66,87,97,98,101,119,123,136,156,162,163,164 000000100000000018000000000000080400000000 PP 28,67,68,123,130 000000100000000018000000000000080400000000 P=P 28,67,68,123,130 000000500002000010000000000000082000000000 PF 28,30,41,68,123,133 000000100000000018000000200000080404000008 PO 28,67,68,101,123,130,138,163 000000100000000010000000200000080000000008 P=O 28,68,101,123,163 000000100000000000000010000000000000008000 PC 28,92,159 000000100004000038000020000000080600400001 PNP 28,42,67,68,69,93,123,129,130,150,160 000000100000000018000820000000080400400001 [H]NP 28,67,68,83,93,123,130,150,160 0000800000000940e10400202200d14b02e128481f c1cc[n+](=O)n(c1)[O-] 23,48,51,62,64,69,70,71,74,93,97,101,112,116,118,119,120,121,123,126,129,136,141,142,143,147,149,155,158,160,161,162,163,164 000000002000200141d00ce02100800956a160281b c1(c(non1)N)N 37,53,56,64,70,76,78,79,82,83,93,94,95,96,101,119,120,123,129,130,132,134,136,141,143,149,150,155,157,160,161,163,164 080000000000200090000000200000080604004008 O[U][Np]O 3,53,68,71,101,123,129,130,138,158,163 000000000000000000020000000002000000108400 CCC 73,113,148,154,159 000000000000000000020000000002000000108400 C([2H])([2H])([2H])C([2H])([2H])C([2H])([2H])[2H] 73,113,148,154,159 000000000000000000008200800026000000048500 SCCC 81,87,103,113,114,117,146,152,154,159 00802000000000000004301008000c0300010ca111 CN1CC1 15,21,74,84,85,92,99,114,115,120,121,136,146,147,152,157,159,160,164 008020000000000000002200080000010001442111 [H]N1CC1 15,21,81,85,99,120,136,146,150,152,157,160,164 000000000600000000080000040000000000008000 C=C([Li])C 33,34,75,98,159 000000000400000000020000000000000000108000 CC([Li])C 34,73,148,159 000000000200000000000000040000000000008000 C=C([H])C 33,98,159 400100000000200010400820000000080620400001 N[Fe][W]N 6,8,53,68,78,83,93,123,129,130,141,150,160 0000000000000000014414800840a01300230c2513 n12cccc1.C2C3.n13cccc1 64,74,78,82,84,95,99,110,117,119,120,121,124,136,137,141,146,147,152,154,157,160,161,164 0000000000000000004434800840a08301230c2511 N12CCCC1.C2C3.N13CCCC1 74,78,82,84,85,95,99,110,117,119,120,121,127,128,136,137,141,146,147,152,154,157,160,164 00000010810030021c808820200010280620400009 NSOPN 28,32,39,52,53,57,66,67,68,79,83,87,93,101,116,123,125,129,130,141,150,160,163 0000000000000000018414800840a01300230c2513 n12cccc1.C2CC3.n13cccc1 64,74,79,82,84,95,99,110,117,119,120,121,124,136,137,141,146,147,152,154,157,160,161,164 0000000000000000008434800840a08301230c2511 N12CCCC1.C2CC3.N13CCCC1 74,79,82,84,85,95,99,110,117,119,120,121,127,128,136,137,141,146,147,152,154,157,160,164 000000000000000000008200000002000000008500 SC(C)[H] 81,87,113,152,154,159 000000080002000000000a00080000002000402501 NCF 27,41,81,83,99,133,150,152,154,157,160 000000000002000000000010000000002000008000 [H]CF 41,92,133,159 000000000002000000000000000002002000008500 CCF 41,113,133,152,154,159 000000080002000000000000000000002000000500 FCF 27,41,133,152,154 000000000000000000022010000000200000109008 COC 73,85,92,125,148,156,159,163 000000080002000000002010001004202000009508 COCF 27,41,85,92,108,114,125,133,152,154,156,159,163 000000000200000000000010040000200000009008 COC=C 33,92,98,125,156,159,163 000001000000000000000010000000200000009008 COC#C 16,92,125,156,159,163 000020000002000000004000000400002000040010 FC1CC1 21,41,86,106,133,146,164 000020000000000000004000400400002000040010 ClC1CC1 21,86,102,106,133,146,164 000020000020000000004000000400002000040010 BrC1CC1 21,45,86,106,133,146,164 000020040000000000004000000400002000040010 [I]C1CC1 21,26,86,106,133,146,164 000000100004000018000a22800002080600408501 NPPCC 28,42,67,68,81,83,89,93,103,113,123,129,130,150,152,154,159,160 000000100004000018000202a00002080604008508 OPPCC 28,42,67,68,81,89,101,103,113,123,129,130,138,152,154,159,163 000000100000000018000820000000080600408001 NPP=CC 28,67,68,83,93,123,129,130,150,159,160 000000100004000018000830000000080600408001 NPPC[H] 28,42,67,68,83,92,93,123,129,130,150,159,160 000000500020000018000200800002082600008500 BrPPCC 28,30,45,67,68,81,103,113,123,129,130,133,152,154,159 80040000000000000000020288402c01000144a911 CC1CCN1 7,10,81,89,99,103,110,114,115,117,120,136,146,150,152,155,157,159,160,164 800400000000200000000246884020050405447b19 C1CN[C@H]1C(=O)O 7,10,53,81,89,90,94,99,103,110,117,120,122,130,136,138,146,150,152,153,155,156,157,158,160,163,164 00002000000100000240080280c000001020442811 C1CC1(C#N)N 21,40,65,78,83,89,103,110,111,132,141,146,150,155,157,160,164 000000100004000010000222000042080600408501 N=PPCC 28,42,68,81,89,93,113,118,123,129,130,150,152,154,159,160 000000100000000010000822000002080200408501 NP=PCC 28,68,83,89,93,113,123,129,150,152,154,159,160 000000100000000010000200200002080200008508 O=PPCC 28,68,81,101,113,123,129,152,154,159,163 0000001000802000900000062092022c860c0ad708 CCOP(=O)(C(=O)O)O 28,47,53,68,71,89,90,101,105,108,111,113,122,123,125,129,130,135,138,139,145,147,152,153,154,156,158,159,163 000480000000000050200026204060081004040819 C1CC(=NO)C1 10,23,68,70,77,89,90,93,101,110,117,118,123,132,138,146,155,160,163,164 000000000840200000408fce88e8b801042344ab19 CC1(NCCS1)C(=O)N 35,46,53,78,80,81,82,83,87,89,90,91,94,95,99,103,107,109,110,111,115,116,117,119,120,130,136,137,141,146,150,152,153,155,157,159,160,163,164 0000000800000001000026c68870b0010903443119 C1COCN1 27,56,81,82,85,89,90,94,95,99,103,108,109,110,116,117,119,120,128,131,136,137,146,150,152,156,157,160,163,164 000400000000008000008106800020000000040010 C1CC(C1)S 10,63,80,87,89,90,103,117,146,164 000020000200020000080206041000000104201518 C=C1CC1CO 21,33,49,75,81,89,90,98,108,128,138,149,152,154,156,163,164 000020000000000000004206409400002104201518 C1C(C1(Cl)Cl)CO 21,81,86,89,90,102,106,108,111,128,133,138,149,152,154,156,163,164 0000040000100001002000080420d001000100b819 c1cccoc(C)n1 18,44,56,77,91,98,109,116,118,119,120,136,155,156,157,159,160,163,164 000000100000000000000010000000000000008000 PC 28,92,159 000000100000000000000010000000000000008000 P[CH3] 28,92,159 000000100000000000000010000000000000008000 P[C]([H])([H])[H] 28,92,159 000000100200000000000000000000000000000000 P=C 28,33 000000100000000000000020000000080000000001 [P]#[N] 28,93,123,160 000000100000000000000020000000080000000001 P#N 28,93,123,160 000000000000000000000000000000000000400001 [H]N[H] 150,160 0000000000000000010416c08850200308070c351b c1cccn1CCO 64,74,81,82,84,94,95,99,103,108,110,117,120,121,131,136,137,138,146,147,152,154,156,157,160,161,163,164 00000008004000000104b68209100003000708351b c1cccn1CSCO 27,46,64,74,81,82,84,85,87,89,95,96,99,108,120,121,136,137,138,147,152,154,156,157,160,161,163,164 000000080044000000008a00080000000400402501 SCN 27,42,46,81,83,87,99,130,150,152,154,157,160 00000000000000000104148008000203000108a513 CCn1cccc1 64,74,82,84,95,99,113,120,121,136,147,152,154,157,159,160,161,164 00000000000000000000081000000000000040a001 [H]CN 83,92,150,157,159,160 000000000044000000208000000040000400400001 SC=N 42,46,77,87,118,130,150,160 000004000000000000000000000020800100040010 C1CCCCCC1 18,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000000020800100040010 C1CCCCCCCCCCCCCC1 117,127,128,146,164 000000000000000000000000000020800100040010 C1CCCCCCCCCCCCCCC1 117,127,128,146,164 000000080004000000008200001000000404001508 S[CH2]O 27,42,81,87,108,130,138,152,154,156,163 000000000000000000000000000000000004001008 [H][CH]O 138,156,163 000000100000000000000200000002000000008500 CCP 28,81,113,152,154,159 000000000000000000000000000000000000008000 C[CH][2H] 159 00000000000000000002201000000000000050a001 [H]N(C)C 73,85,92,148,150,157,159,160 000000000000010000000000000000000004000008 [O-][2H] 48,138,163 000000000800000000008580100180100001000012 c1csc2c1csc2 35,80,82,87,95,100,104,119,124,136,161,164 000400000200020000080202041000800004001518 C=C1CC(C1)CO 10,33,49,75,81,89,98,108,127,138,152,154,156,163,164 80040000000000010000000000182a800901849518 CCC1CCO1 7,10,56,107,108,113,115,117,127,128,131,136,146,151,152,154,156,159,163,164 00802000000000010200000000982e800901949518 CCCC1(CO1)C 15,21,56,65,107,108,111,113,114,115,117,127,128,131,136,146,148,151,152,154,156,159,163,164 000020000200000000000000040020800100040410 C=CCCC1CC1 21,33,98,117,127,128,146,154,164 000000000200000000000000000000000000000208 O=C 33,153,163 000000000200000000000000000000008000004228 O=C.O=C 33,135,153,158,163,165 080000000004380038d00020020080090621400015 N1NN[U]NN1 3,42,51,52,53,67,68,69,76,78,79,93,97,119,120,123,129,130,136,141,150,160,162,164 000000000000000000008000000000000000000000 S[CH][2H] 87 000000000000000000000000000000000004000008 [OH] 138,163 000000000000000000000000000000000004000008 [O][3H] 138,163 000000000000000000000000000000000004000008 [3H][O][3H] 138,163 000000000000000000000000000000000000108000 CC 148,159 000000000000000000020000000002000000108400 CCC 73,113,148,154,159 000000000000000000000000000026000000148400 CCCC 113,114,117,146,148,154,159 000000000000000000000000000000000000108020 C.C.C.C.C.C.C.C.C.C.C.C 148,159,165 000000000000000000000000000000000000108000 C1.C1 148,159 fingerprint/inst/unitTests/runit.fp.R0000644000176200001440000001556615061401251017460 0ustar liggesuserstest.new.fp <- function() { fp <- new("fingerprint", bits=c(1,2,3,4), nbit=8, provider='rg',name='foo') checkTrue(!is.null(fp)) } test.distance1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) d <- distance(fp1,fp2) checkEquals(d, 0) } test.distance2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) d <- distance(fp1,fp2) checkEquals(d, 1) } test.and1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fpnew <- fp1 & fp2 bits <- fpnew@bits checkTrue( all(bits == c(1,2,3,4))) } test.and2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fpnew <- fp1 & fp2 bits <- fpnew@bits checkEquals(length(bits),0) } test.or1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fpnew <- fp1 | fp2 bits <- fpnew@bits checkTrue(all(bits == c(1,2,3,4,5,6,7,8))) } test.or2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fpnew <- fp1 | fp2 bits <- fpnew@bits checkTrue(all(bits == c(1,2,3,4))) } test.not <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) nfp1 <- !fp1 checkTrue(all(nfp1@bits == c(5,6,7,8))) checkTrue(all(fp1@bits == (!nfp1)@bits)) } test.xor1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fpnew <- xor(fp1,fp2) bits <- fpnew@bits checkEquals(length(bits),0) } test.xor2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fpnew <- xor(fp1,fp2) bits <- fpnew@bits checkEquals(length(bits),8) checkTrue(all(bits == c(1,2,3,4,5,6,7,8))) } test.fold1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) nfp <- fold(fp1) checkTrue(all(nfp@bits == c(1,2,3,4))) } test.fold2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4,8), nbit=8) nfp <- fold(fp1) checkTrue(all(nfp@bits == c(1,2,3,4))) } test.fp.to.matrix <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fp3 <- new("fingerprint", bits=c(1,2,3,5,6,7,8), nbit=8) m1 <- fp.to.matrix(list(fp1,fp2,fp3)) m2 <- rbind(c(1,1,1,1,0,0,0,0), c(0,0,0,0,1,1,1,1), c(1,1,1,0,1,1,1,1)) checkTrue(all(m1 == m2)) } test.tversky.1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) s <- distance(fp1, fp2, "tversky", a=1,b=1) checkEquals(1.0, s) } test.tversky.2 <- function() { fp1 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) s <- distance(fp1, fp2, "tversky", a=1,b=1) checkEquals(0.0, s) } test.tversky.3 <- function() { fp1 <- new("fingerprint", bits=c(4,6,7,8), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) stv <- distance(fp1, fp2, "tversky", a=1,b=1) sta <- distance(fp1, fp2) checkEquals(stv, sta) } test.tversky.4 <- function() { fp1 <- new("fingerprint", bits=c(4,6,7,8), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) stv <- distance(fp1, fp2, "tversky", a=0.5,b=0.5) std <- distance(fp1, fp2, "dice") checkEquals(stv, std) } test.fp.sim.matrix <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fp3 <- new("fingerprint", bits=c(1,2,3,5,6,7,8), nbit=8) fpl <- list(fp1,fp2,fp3) sm <- round(fp.sim.matrix(fpl),2) am <- rbind(c(1,0,0.38), c(0,1,0.57), c(0.38,0.57,1)) checkTrue(all(sm == am)) } test.fp.balance <- function() { fp1 <- new("fingerprint", bits=c(1,2,3), nbit=6) fp2 <- balance(fp1) checkTrue(12 == length(fp2)) checkEquals(c(1,2,3,10,11,12), fp2@bits) } test.fps.reader <- function() { data.file <- file.path(system.file("unitTests", "bits.fps", package="fingerprint")) fps <- fp.read(data.file, lf=fps.lf) checkEquals(323, length(fps)) ## OK, we need to pull in the bit positions Andrew specified for (i in seq_along(fps)) { expected <- sort(as.numeric(strsplit(fps[[i]]@misc[[1]],",")[[1]])+1) observed <- sort(fps[[i]]@bits) checkEquals(expected, observed, msg = sprintf("%s had a mismatch in bit positions", fps[[i]]@name)) } } ####################################### ## ## Feature vector tests ## ####################################### test.feature <- function() { f1 <- new('feature', feature='F1') checkEquals(1, f1@count) f2 <- new('feature', feature='F2', count=as.integer(12)) checkEquals(12, f2@count) } test.feature.c <- function() { f1 <- new('feature', feature='F1', count=as.integer(2)) f2 <- new('feature', feature='F2', count=as.integer(3)) fl <- c(f1, f2) checkEquals(2, length(fl)) checkEquals("list", class(fl)) checkTrue(identical(f1, fl[[1]])) checkTrue(identical(f2, fl[[2]])) } test.feature.fp <- function() { feats <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1))) fv <- new('featvec', features=feats) checkEquals(10, length(fv)) } test.feature.dist1 <- function() { f1 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1))) f2 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1))) fv1 <- new('featvec', features=f1) fv2 <- new('featvec', features=f2) d <- distance(fv1, fv2, method='tanimoto') checkEquals(1, d) } test.feature.dist2 <- function() { f1 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1))) f2 <- sapply(letters[11:20], function(x) new('feature', feature=x, count=as.integer(1))) fv1 <- new('featvec', features=f1) fv2 <- new('featvec', features=f2) d <- distance(fv1, fv2, method='tanimoto') checkEquals(0, d) } test.featvec.read <- function() { data.file <- file.path(system.file("unitTests", "test.ecfp", package="fingerprint")) fps <- fp.read(data.file, lf=ecfp.lf, binary=FALSE) checkEquals(10, length(fps)) lengths <- c(58L, 38L, 43L, 66L, 62L, 66L, 65L, 44L, 66L, 61L) ol <- sapply(fps, length) checkTrue(identical(lengths, ol)) } tester.getters.setters <- function() { f <- new("feature", feature='ABCD', count=as.integer(1)) checkEquals("ABCD", feature(f)) checkEquals(1, count(f)) feature(f) <- 'UXYZ' count(f) <- 10 checkEquals("UXYZ", feature(f)) checkEquals(10, count(f)) } fingerprint/inst/unitTests/test.ecfp0000644000176200001440000001305515061401251017375 0ustar liggesusersmol01 17 0 16 3 1 1747237384 1499521844 -1539132615 1294255210 332760439 -1549163031 1035613116 1618154665 590925877 1872154524 -1143715940 203677720 -1272768868 136120670 136597326 -1460348762 -1262922302 -1201618245 -402549409 -1270820019 929601590 -1597477966 -1274743746 -1155471474 1258428229 -1838187238 -798628285 -1773728142 -773983804 -453677277 1674451008 65948508 991735244 -1412946825 846704869 -2103621484 -886204842 1725648567 -353343892 -585443181 -533273616 2031084733 -801248129 1752802620 -976015189 -992213424 2109043264 -790336137 630139722 -505031736 -1427697183 -2090462286 -1724769936 mol02 16 9 1 0 17 32 332760439 -1362791977 367998008 1035613116 -1277879912 1747237384 71476542 -124655670 203677720 1618154665 907007053 -1707366455 1969481564 -1597477966 1966552162 547884906 -1270820019 -2135641502 -497728148 1674451008 -453677277 -2005085798 2047992816 786486417 1523337873 -2045753164 859018953 404853571 1383886699 -745001879 1985089045 -1445962196 mol03 16 1 0 17 3 32 7 332760439 367998008 1035613116 566058135 1747237384 580900652 907007053 1070061035 71476542 203677720 -124655670 -548602426 1618154665 -1707366455 1969481564 -1597477966 -881072729 547884906 -1564724132 -1270820019 -2004812302 -497728148 -2135641502 1674451008 -453677277 2047992816 786486417 1523337873 242457334 -2045753164 859018953 265023308 1381300059 404853571 -745001879 1985089045 mol04 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 367998008 71476542 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -453677277 -745491832 551850122 -773983804 1674451008 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 -1206981816 -1508180856 -149636017 -505031736 -1427697183 -2090462286 -1724769936 mol05 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -773983804 1674451008 -453677277 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 -505031736 -1427697183 -1205069278 -2090462286 -1724769936 -1698724694 -2093839777 mol06 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 -836633685 1872154524 71953198 136597326 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -1454111645 289095609 -453677277 -773983804 1674451008 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 1724895444 1790572653 1785362907 -505031736 -1427697183 -1724769936 -2090462286 mol07 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 -1549103449 1618154665 1747237384 1035613116 1294255210 -1539132615 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -1926447181 -98859492 885225145 -1598679931 1175232969 -1199556931 -1155471474 1258428229 -1462709112 1506190109 -1280036918 -1695756380 730557100 -773983804 1674451008 991735244 689610531 -888075169 650647287 1033863897 -1799143719 1119771930 -1139544385 1646645826 1040131620 2111406068 -800045143 1132802373 -10819545 137138064 -505031736 -1427697183 -2090462286 -1724769936 mol08 16 17 1 9 0 32 -1410079687 1747237384 675769755 178336375 -1362791977 -1343180157 1618154665 -1277879912 -1272768868 367998008 -587569116 71476542 -939475899 -1044865801 946229467 193705859 1852108031 557002734 1967609676 -822042736 713358128 -745491832 -964367925 -270564593 551850122 -2122102020 679321016 48182684 210231571 281647195 516865083 1706555375 -362593762 1475536852 -1294566343 461422072 -1516875559 566085027 mol09 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 1499521844 1747237384 1035613116 1294255210 332760439 590925877 -124655670 260476081 1872392852 1872154524 71953198 367998008 71476542 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1262922302 885225145 -1598679931 -402549409 1258428229 -1155471474 1506190109 -1838187238 -745491832 -773983804 551850122 1674451008 991735244 689610531 -888075169 650647287 -1799143719 846704869 1119771930 -1139544385 -886204842 -800045143 2031084733 -10819545 1752802620 -976015189 -1508180856 -794597678 -175681259 -1427697183 -505031736 -1724769936 -2090462286 mol10 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 1499521844 1747237384 1035613116 1294255210 332760439 590925877 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1262922302 885225145 -1598679931 -402549409 1258428229 -1155471474 1506190109 -1838187238 1674451008 -773983804 991735244 689610531 -888075169 650647287 -1799143719 846704869 1119771930 -1139544385 -886204842 -800045143 2031084733 -10819545 1752802620 -976015189 -1427697183 -505031736 -792685140 -1724769936 -2090462286 -2093839777 fingerprint/man/0000755000176200001440000000000015163766205013367 5ustar liggesusersfingerprint/man/linefunc.Rd0000644000176200001440000000411715163766204015463 0ustar liggesusers\name{cdk.lf, moe.lf, bci.lf} \alias{cdk.lf} \alias{moe.lf} \alias{bci.lf} \alias{ecfp.lf} \alias{fps.lf} \alias{jchem.binary.lf} \title{ Functions to parse lines from fingerprint files } \description{ These functions take a single line and parses it to produce a vector of integers which represents the position of the 'on' bits in a fingerprint. This allows the user to use \code{read.fp} with arbitrary fingerprint files. A new file format can be handled by defining a new line parser function. Currently the first three functions process fingerprint files obtained from the CDK (\url{https://cdk.sourceforge.net}), MOE (\url{https://www.chemcomp.com}), BCI (\url{http://www.digitalchemistry.co.uk/}) and the FPS format (\url{https://code.google.com/archive/p/chem-fingerprints/wikis/FPS.wiki}). The last function can be used for any fingerprint that generates hashed features (such as ECFPs or other circular fingerprints). For these cases, it is assumed that features are unsigned integers, so string features are not handled. Note that when the \code{fps.lf} function is specified, items such as the number of bits or the header flag do not need to be specified, as the format requires a header block containing some of these items. } \usage{ cdk.lf(line) moe.lf(line) bci.lf(line) ecfp.lf(line) fps.lf(line) jchem.binary.lf(line) } \arguments{ \item{line}{ The line to parse } } \value{ A list with three componenents - the name associated with the fingerprint (if available) and a vector of integers representing bits set to 1 (for the case of the first three methods) or a vector of characters representing hashed features (characteristic of circular fingerprints) or more generally, any string feature. The third component is a (possibly empty) list, which contains the remaining components of a line, when the format allows items other than an a title and the fingerprint (such as the FPS format). The content of the third component is dependent on the line function that is being used. } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \keyword{logic} fingerprint/man/sim.Rd0000644000176200001440000000363515061401251014435 0ustar liggesusers\name{fp.sim.matrix} \alias{fp.sim.matrix} \title{ Calculates a Similarity Matrix for a Set of Fingerprints } \description{ Given a set of fingerprints, a pairwise similarity can be calculated using the various distance metrics defined for binary strings. This function calculates the pairwise similarity matrix for a set of \code{fingerprint} or \code{featvec} objects supplied in a \code{list} structure. Any of the distance metrics provided by \code{\link{distance}} can be used and the default is the Tanimoto metric. Note that if the the Euclidean distance is specified then the resultant matrix is a distance matrix and not a similarity matrix } \usage{ fp.sim.matrix(fplist, fplist2=NULL, method='tanimoto') } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint} or \code{featvec}. These can be constructed by hand or read from disk via \code{\link{fp.read}} } \item{fplist2}{A list structure with each element being an object of class \code{fingerprint} or \code{featvec}. if \code{NULL} then traditional pairwise similarity is calculated with each member in \code{fplist}, otherwise the resultant N x M matrix is derived from the similarity between each member of \code{fplist} and \code{fplist2}} \item{method}{ The type of distance metric to use. The default is \code{tanimoto}. Partial matching is supported. } } \value{ A matrix with dimensions equal to \code{(length(fplist), length(fplist))} if \code{fplist2} is NULL, otherwise \code{(length(fplist), length(fplist2))} } \seealso{ \code{\link{distance}}, \code{\link{fp.read}} } \examples{ # make fingerprint objects fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6)) fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6)) fp.sim.matrix( list(fp1,fp2,fp3) ) } \keyword{logic} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/distance-methods.Rd0000644000176200001440000001157115061401251017076 0ustar liggesusers\name{distance-methods} \docType{methods} \alias{distance} \alias{distance-methods} \alias{distance,featvec,featvec,character,missing,missing-method} \alias{distance,featvec,featvec,missing,missing,missing-method} \alias{distance,fingerprint,fingerprint,character,missing,missing-method} \alias{distance,fingerprint,fingerprint,character,numeric,numeric-method} \alias{distance,fingerprint,fingerprint,missing,missing,missing-method} \title{Calculates the Similarity or Dissimilarity Between Two Fingerprints} \description{ A number of distance metrics can be calculated for binary fingerprints. Some of these are actually similarity metrics and thus represent the reverse of a distance metric. The following are distance (dissimilarity) metrics \itemize{ \item Hamming \item Mean Hamming \item Soergel \item Pattern Difference \item Variance \item Size \item Shape } The following metrics are similarity metrics and so the distance can be obtained by subtracting the value fom 1.0 \itemize{ \item Tanimoto \item Dice \item Modified Tanimoto \item Simple \item Jaccard \item Russel-Rao \item Rodgers Tanimoto \item Cosine \item Achiai \item Carbo \item Baroniurbanibuser \item Kulczynski2 \item Robust } Finally the method also provides a set of composite and asymmetric distance metrics \itemize{ \item Hamann \item Yule \item Pearson \item Dispersion \item McConnaughey \item Stiles \item Simpson \item Petke \item Tversky } The default metric is the Tanimoto coefficient. } \section{Methods}{ \describe{ \item{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "character", a = "missing", b = "missing")}}{ Similarity method for feature vector type fingerprints, supporting \code{tanimoto}, \code{robust} and \code{dice} metrics. } \item{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "missing", a = "missing", b = "missing")}}{ Evaluate Tanimoto similarity between two feature vector fingerprints } \item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "missing", b = "missing")}}{ Evaluate similarity (or dissimilrity) between two binary fingerprints. See below for a list of possible similarity (or dissimilarity) metrics } \item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "numeric", b = "numeric")}}{ Evaluate Tversky similarity between two binary fingerprints. } \item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "missing", a = "missing", b = "missing")}}{ Evaluate Tanimoto similarity between two binary fingerprints } }} \usage{ distance(fp1, fp2, method, a, b) } \arguments{ \item{fp1}{ An object of class \code{fingerprint} or \code{featvec} } \item{fp2}{ An object of class \code{fingerprint} or \code{featvec} } \item{a}{Parameter for the Tversky index} \item{b}{Parameter for the Tversky index} \item{method}{ The type of distance metric desired. Partial matching is supported and the deault is \code{tanimoto}. Alternative values are \itemize{ \item \code{euclidean} \item \code{hamming} \item \code{meanHamming} \item \code{soergel} \item \code{patternDifference} \item \code{variance} \item \code{size} \item \code{shape} \item \code{jaccard} \item \code{dice} \item \code{mt} \item \code{simple} \item \code{russelrao} \item \code{rodgerstanimoto} \item \code{cosine} \item \code{achiai} \item \code{carbo} \item \code{baroniurbanibuser} \item \code{kulczynski2} \item \code{robust} \item \code{hamann} \item \code{yule} \item \code{pearson} \item \code{mcconnaughey} \item \code{stiles} \item \code{simpson} \item \code{petke} \item \code{tversky} } If the two fingerprints are of class \code{featvec} then the following methods may be specified: \code{tanimoto}, \code{robust} and \code{dice}. } } \value{ Numeric value representing the distance in the specified metric between the supplied fingerprint objects } \examples{ # make a 2 fingerprint vectors fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) fp2 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) # calculate the tanimoto coefficient distance(fp1,fp2) # should be 1 # Invert the second fingerprint fp3 <- !fp2 distance(fp1,fp3) # should be 0 } \references{Fligner, M.A.; Verducci, J.S.; Blower, P.E.; A Modification of the Jaccard-Tanimoto Similarity Index for Diverse Selection of Chemical Compounds Using Binary Strings, \emph{Technometrics}, 2002, \emph{44}(2), 110-119 Monve, V.; Introduction to Similarity Searching in Chemistry, \emph{MATCH - Comm. Math. Comp. Chem.}, 2004, \emph{51}, 7-38 } \keyword{logic} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/mat.Rd0000644000176200001440000000241615061401251014422 0ustar liggesusers\name{fp.to.matrix} \alias{fp.to.matrix} \title{ Converts a List of Fingerprints to a Matrix } \description{ In general, fingerprint data is read from a file or obtained via calls to an external generator and the return value is a list of fingerprints. This function takes the list and returns a matrix having number of rows equal to the number of fingerprints and the number of columns equal to the length of the fingerprint. Each element is 1 or 0 (1's being specified by the positions in each fingerprint vector) } \usage{ fp.to.matrix(fplist) } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint}. These will can be constructed by hand or read from disk via \code{\link{fp.read}} } } \value{ A matrix with dimensions equal to \code{length(fplist), bit length)} where bit length is a property of the fingerprint objects in the list. } \seealso{ \code{\link{distance}}, \code{\link{fp.read}} } \examples{ # make fingerprint objects fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6)) fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6)) fp.to.matrix( list(fp1,fp2,fp3) ) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/show.Rd0000644000176200001440000000102115061401251014610 0ustar liggesusers\name{show} \alias{show,fingerprint-method} \alias{show,featvec-method} \alias{show,feature-method} \title{ String Representation of a Fingerprint or Feature } \description{ Simply summarize the fingerprint or feature } \usage{ \S4method{show}{fingerprint}(object) \S4method{show}{featvec}(object) \S4method{show}{feature}(object) } \arguments{ \item{object}{ An object of class \code{fingerprint}, \code{featvec} or \code{feature} } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \keyword{logic} fingerprint/man/balance.Rd0000644000176200001440000000234615061401251015230 0ustar liggesusers\name{balance} \alias{balance} \title{ Generate a Balanced Code Fingerprint } \description{ It has been noted that the bit density in a fingerprint can affect its ability to retrieve similar compounds from a database primarily due to complexity effects. One approach to alleviating these effects is to generate fingerprints that have a bit density of 50% (i.e., half the bits are set to 1). This method implements the balanced code approach described by Nisius and Bajorath to convert an ordinary binary fingerprint (whose bit density is not 50%) to one that has a bit density of 50%. This is acheived by appending the complement of the input fingerprint to itself (resulting in a fingerprint twice the size of the original). } \usage{ balance(fplist) } \arguments{ \item{fplist}{A single fingerprint or a list of fingerprints} } \value{ A single fingerprint objects or list of fingerprint objects that are "balanced", in that they have a bit density of 50%. Their size is 2x the size of the input fingerprints. } \seealso{ \code{\link{bit.spectrum}}, \code{\link{bit.importance}} } \references{ Nisius, B.; Bajorath, J.; \emph{ChemMedChem}, \bold{2010}, \emph{5}, 859-868. } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/fingerprint.Rd0000644000176200001440000000627615061401251016200 0ustar liggesusers\name{fingerprint-class} \docType{class} \alias{fingerprint-class} \alias{euc.vector,fingerprint-method} \alias{fold,fingerprint-method} \alias{random.fingerprint,numeric,numeric-method} \title{Class "fingerpint"} \description{This class represents binary fingerprints, usually generated by a variety of cheminformatics software, but not restricted to such } \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("fingerprint", ...)}. Fingerprints can traditionally thought of as a vector of 1's and 0's. However for large fingerprints this is inefficient and instead we simply store the positions of the bits that are on. Certain operations also need to know the length of the original bit string and this length is stored in the object at construction. Even though we store extra information along with the bit positions, conceptually we still consider the objects as simple bit strings. Thus the usual bitwise logical operations (&, |, !, xor) can be applied to objects of this class. } \section{Slots}{ \describe{ \item{\code{bits}:}{Object of class \code{"numeric"} ~~ A vector indicating the bit positions that are on. } \item{\code{nbit}:}{Object of class \code{"numeric"} ~~ Indicates the length of the original bit string.} \item{\code{folded}:}{Object of class \code{"logical"} ~~ Indicates whether the fingerprint has been folded.} \item{\code{provider}:}{Object of class \code{"character"} ~~ Indicates the source of the fingerprint. Can be useful to keep track of what software generated the fingerprint.} \item{\code{name}:}{Object of class \code{"character"} ~~ The name associated with the fingerprint. If not name is available this gets set to an empty string} \item{\code{misc}:}{Object of class \code{"list"} ~~ A holder for arbitrary items that may have been stored along with the fingerprint. Only certain formats allow extra items to be stored with the fingerprint, so in many cases this field is just an empty list} } } \section{Methods}{ \describe{ \item{distance}{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "missing", a = "missing", b = "missing")}: ... } \item{distance}{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "missing", b = "missing")}: ... } \item{euc.vector}{\code{signature(fp = "fingerprint")}: ... } \item{fold}{\code{signature(fp = "fingerprint")}: ... } \item{random.fingerprint}{\code{signature(nbit = "numeric", on = "numeric")}: ... } } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \seealso{ \code{\link{fp.read}}, \code{\link{fp.read.to.matrix}} \code{\link{fp.sim.matrix}}, \code{\link{fp.to.matrix}}, \code{\link{fp.factor.matrix}} \code{\link{random.fingerprint}} } \examples{ ## make fingerprints x <- new("fingerprint", nbit=128, bits=sample(1:128, 100)) y <- x distance(x,y) # should be 1 x <- new("fingerprint", nbit=128, bits=sample(1:128, 100)) distance(x,y) folded <- fold(x) ## binary operations on fingerprints x <- new("fingerprint", nbit=8, bits=c(1,2,3,6,8)) y <- new("fingerprint", nbit=8, bits=c(1,2,4,5,7,8)) x & y x | y !x } \keyword{classes} \keyword{logic}fingerprint/man/feature-methods.Rd0000644000176200001440000000227515061401251016740 0ustar liggesusers\name{feature-methods} \docType{methods} \alias{feature} \alias{feature-methods} \alias{feature,feature-method} \alias{feature<--methods} \alias{feature<-} \alias{feature<-,feature,character-method} \title{Get or Set the Character String Representing the Feature} \description{ Get or set the character string representing a feature of a \code{\link{feature-class}} object. The default value for the getter (as defined in the prototype) is the empty string. } \section{Methods}{ \describe{ \item{\code{signature(object = "feature")}}{Return the feature associated with the feature object} \item{\code{signature(x = "feature", value = "character")}}{Set the feature associated with the feature object} } } \usage{ \S4method{feature}{feature}(object) \S4method{feature}{feature,character}(x) <- value } \arguments{ \item{object}{ An object of class \code{\link{feature-class}} } \item{x}{ An object of class \code{\link{feature-class}} } \item{value}{ The character string to replace the current feature string with } } \value{ An character string representing the feature } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/bitimp.Rd0000644000176200001440000000221215061401251015117 0ustar liggesusers\name{bit.importance} \alias{bit.importance} \title{ Evaluate the Discriminatory Power of Individual Bits in a Binary Fingerprint } \description{ This method evaluates the Kullback-Leibler (KL) divergence to rank the individual bits in a binary fingerprint in their ability to discriminate between database and active compounds. This method is implemented based on Nisius and Bajorath and includes an m-estimate correction. } \usage{ bit.importance(actives, background) } \arguments{ \item{actives}{A list of fingerprints for the actives} \item{background}{A list of fingerprints representing the background collection} } \value{ A numeric vector of length equal to the size of the fingerprints. Each element of the vector is the KL divergence for the corresponding bit. If a bit position is never set to 1 in any of the compounds from the actives and the background, then the KL divergence for that position is undefined and \code{NA} is returned. } \seealso{ \code{\link{bit.spectrum}} } \references{ Nisius, B.; Bajorath, J.; \emph{ChemMedChem}, \bold{2010}, \emph{5}, 859-868. } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/count-methods.Rd0000644000176200001440000000226015061401251016427 0ustar liggesusers\name{count-methods} \docType{methods} \alias{count-methods} \alias{count} \alias{count,feature-method} \alias{count<--methods} \alias{count<-} \alias{count<-,feature,numeric-method} \title{Get or Set Count of Occurence of a Feature} \description{ Get or set the count of occurence associated with a \code{\link{feature-class}} object. The default value for the getter (as defined in the prototype) is 1. } \section{Methods}{ \describe{ \item{\code{signature(object = "feature")}}{Return the count associated with the feature object} \item{\code{signature(x = "feature", value = "numeric")}}{Set the count associated with the feature object} } } \usage{ \S4method{count}{feature}(object) \S4method{count}{feature,numeric}(x) <- value } \arguments{ \item{object}{ An object of class \code{\link{feature-class}} } \item{x}{ An object of class \code{\link{feature-class}} } \item{value}{ A numeric (which will be coerced to \code{integer}) indicating the count associated with the feature } } \value{ An integer representing count of occurence of the feature } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/c.Rd0000644000176200001440000000076015061401251014063 0ustar liggesusers\name{c} \alias{c,feature-method} \title{ Combine Multiple Features to Give a List of Features } \description{ Combine multiple \code{feature} objects to give a list of feature objects } \usage{ \S4method{c}{feature}(x, ..., recursive = FALSE) } \arguments{ \item{x}{ An object of class \code{feature} } \item{...}{ One or more \code{feature} objects } \item{recursive}{ Ignored } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \keyword{logic} fingerprint/man/fplogical.Rd0000644000176200001440000000147015061401251015600 0ustar liggesusers\name{fplogical} \alias{!} \alias{|} \alias{&} \alias{xor} \alias{|,fingerprint,fingerprint-method} \alias{&,fingerprint,fingerprint-method} \alias{xor,fingerprint,fingerprint-method} \alias{!,fingerprint-method} \title{ Logical Operators for Fingerprints } \description{ These functions perform logical operatiosn (AND, OR, NOT, XOR) on the supplied binary fingerprints. Thus for two fingerprints A and B we have \describe{ \item{\code{&}}{Logical AND} \item{\code{|}}{Logical OR} \item{\code{xor}}{Logical XOR} \item{\code{!}}{Logical NOT (negation)} } } \arguments{ \item{e1}{ An object of class \code{fingerprint} } \item{e2}{ An object of class \code{fingerprint} } } \value{ A fingerprint object } \keyword{logic} \keyword{methods} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/facmat.Rd0000644000176200001440000000166515061401251015101 0ustar liggesusers\name{fp.factor.matrix} \alias{fp.factor.matrix} \title{ Converts a List of Fingerprints to a data.frame of Factors } \description{ This function will convert a \code{list} of fingerprint objects to a \code{data.frame} of factors with levels 1 and 0. } \usage{ fp.factor.matrix(fplist) } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint}. These will can be constructed by hand or read from disk via \code{\link{fp.read}} } } \value{ A matrix with dimensions equal to \code{(length(fplist), length(fplist))} } \seealso{ \code{\link{distance}}, \code{\link{fp.read}} } \examples{ # make fingerprint objects fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6)) fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6)) fp.factor.matrix( list(fp1,fp2,fp3) ) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/string.Rd0000644000176200001440000000166715061401251015156 0ustar liggesusers\name{as.character} \alias{as.character} \alias{as.character,fingerprint-method} \alias{as.character,featvec-method} \alias{as.character,feature-method} \title{ Generates a String Representation of a Fingerprint } \description{ The function returns a string of 1's and 0's or a character vector of features depending on the nature of the fingerprint supplied. } \usage{ \S4method{as.character}{fingerprint}(x) \S4method{as.character}{featvec}(x) \S4method{as.character}{feature}(x) } \arguments{ \item{x}{ An object of class \code{fingerprint}, \code{featvec} or \code{feature} } } \value{ A string of 1's and 0's or else a character vector of features (with their counts) } \examples{ # make a fingerprint vector fp <- new("fingerprint", nbit=32, bits=sample(1:32, 20)) # print out the string representation as.character(fp) } \keyword{logic} \keyword{methods} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/featvec.Rd0000644000176200001440000000445415061401251015262 0ustar liggesusers\name{featvec-class} \docType{class} \alias{featvec-class} \alias{distance,featvec,featvec,missing-method} \alias{distance,featvec,featvec,character-method} \alias{length,featvec-method} \title{Class "featvec"} \description{This class represents feature vector style fingerprints, where, rather than a bit string, the fingerprint is represented as a sequence of (signed) integers or strings. Each element of the collection is a representation of a structural feature. For cases where the features are integers, this usually corresponds to a hash of the original feature string. } \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("featvec", ...)}. In contrast to traditional binary fingerprints, operations on feature vectors are slightly different and essentially correspond to operations on sets. Thus the logical and (&) would correspond to the union of the two feature vectors. } \section{Slots}{ \describe{ \item{\code{features}:}{Object of class \code{"character"} ~~ A vector containing the numeric or character features. Numeric features are treated as character strings } \item{\code{provider}:}{Object of class \code{"character"} ~~ Indicates the source of the fingerprint. Can be useful to keep track of what software generated the fingerprint.} \item{\code{name}:}{Object of class \code{"character"} ~~ The name associated with the fingerprint. If not name is available this gets set to an empty string} \item{\code{misc}:}{A list to hold arbitrary items associated with a fingerprint (such as extra fields from a fingerprint file)} } } \section{Methods}{ \describe{ \item{distance}{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "missing")}: ... } \item{distance}{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "character")}: ... } \item{as.character}{\code{signature(fp = "featvec")}: ... } \item{length}{\code{signature(fp = "featvec")}: ... } \item{show}{\code{signature(fp = "featvec")}: ... } } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \seealso{ \code{\link{fp.read}}, \code{\link{fp.read.to.matrix}} \code{\link{fp.sim.matrix}}, \code{\link{fp.to.matrix}}, \code{\link{fp.factor.matrix}} \code{\link{random.fingerprint}} } \keyword{classes} \keyword{logic}fingerprint/man/fold.Rd0000644000176200001440000000170615061401251014566 0ustar liggesusers \name{fold} \alias{fold} \title{ Fold a fingerprint } \description{ In many situations a fingerprint is generated using a large length (such as 1024 bits or more). As a result of this, the fingerprints for a dataset can be very sparse. One approach to increasing bit density of such fingerprints is to fold them. This is performed by dividing the original fingerprint bitstring into two substrings of equal length and then perform an OR on the two substrings. It should be noted that many fingerprint generating routines will perform this internally. } \usage{ fold(fp) } \arguments{ \item{fp}{ The fingerprint to fold. Should be of class \code{fingerprint}. } } \value{ An object of class \code{fingerprint} representing the folded fingerprint. } \examples{ # make a fingerprint vector fp <- new("fingerprint", nbit=64, bits=sample(1:64, 30)) fold(fp) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/length.Rd0000644000176200001440000000077215061401251015125 0ustar liggesusers\name{length} \alias{length} \alias{length,fingerprint-method} \title{ Fingerprint Bit Length } \description{ Returns the length of the fingerprint. That is, this is the length of the entire bit string and not simply the number of bits that are on. } \usage{ \S4method{length}{fingerprint}(x) } \arguments{ \item{x}{ An object of class \code{fingerprint} } } \value{ The length of the bit string } \keyword{logic} \keyword{methods} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/rndfp.Rd0000644000176200001440000000141315061401251014746 0ustar liggesusers\name{random.fingerprint} \alias{random.fingerprint} \title{ Generate Randomized Fingerprints } \description{ A utility function that can be used to generate binary fingerprints of a specified length with a specifed number of bit positions (selected randomly) set to 1. Currently bit positions are selected uniformly } \usage{ random.fingerprint(nbit,on) } \arguments{ \item{nbit}{ The length of the fingerprint, that is, the total number of bits. Must be a positive integer. } \item{on}{ How many positions should be set to 1 } } \value{ An object of class \code{fingerprint} } \examples{ # make a fingerprint vector fp <- random.fingerprint(32, 16) as.character(fp) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/vec.Rd0000644000176200001440000000157415061401251014422 0ustar liggesusers\name{euc.vector} \alias{euc.vector} \title{ Euclidean Representation of Binary Fingerprints } \description{ Ordinarily, a binary fingerprint can be considered to represent a corner of a nD hypercube. However in many cases using such a representation can lead to a very sparse space. Consequently one approach is to convert the fingerprint so that it represents points on a nD unit hypersphere. The resultant fingerprint is then a nD coordinate. } \usage{ euc.vector(fp) } \arguments{ \item{fp}{ An object of class \code{fingerprint}. } } \value{ A numeric of length equal to the bit length of the fingerprint. The result corresponds to a unit vector for a point on the nD hypersphere } \examples{ # make a fingerprint vector fp <- new("fingerprint", nbit=8, bits=c(1,3,4,5,7)) vec <- euc.vector(fp) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/feature.Rd0000644000176200001440000000234415061401251015274 0ustar liggesusers\name{feature-class} \docType{class} \alias{feature-class} \title{Class "feature"} \description{This class represents features - arbitrary alphanumeric sequences that are used to characterize molecular substructures (though there is no real restriction to molecules). A feature is associated with an integer count, indicating the occurence of that feature in a molecule. The default value is 1. } \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("feature", ...)}. } \section{Slots}{ \describe{ \item{\code{feature}:}{Object of class \code{"character"} ~ The string representation of a feature } \item{\code{count}:}{Object of class \code{"integer"} ~ The occurence of the feature. Default is 1} \item{\code{.Data}:}{???} } } \section{Methods}{ \describe{ \item{count}{\code{signature(object = "feature")}: Return the count associated with the feature} } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \seealso{ \code{\link{featvec-class}} } \examples{ ## create a new feature f <- new("feature", feature='ABCD', count=as.integer(1)) ## modify the feature string and the count feature(f) <- 'UXYZ' count(f) <- 10 } \keyword{classes} \keyword{logic}fingerprint/man/bitspec.Rd0000644000176200001440000000306715163763631015315 0ustar liggesusers\name{bit.spectrum} \alias{bit.spectrum} \title{ Generate a Bit Spectrum from a List of Fingerprints } \description{ The idea of comparing datasets using fingerprints was described in Guha & Schurer (2008). The idea is that one can summarize the dataset by counting the frequency of occurrence of each bit position. The frequency is normalized by the number of fingerprints considered. Thus a collection of N fingerprints can be converted to a single vector of numbers highlighting the most frequent bits with respect to a given dataset. A plot of this vector looks like a traditional spectrum and hence the name. The bit spectra for two datasets (assuming that the same types of fingerprints have been used) allows one to compare the similarity of the datasets, without having to do a full pairwise similarity calculation. The difference between the structural features of the datasets can be quantified by evaluating the distance between the two bit spectra. } \usage{ bit.spectrum(fplist) } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint}. These will can be constructed by hand or read from disk via \code{\link{fp.read}}. All fingerprints in the list should be of the same length. } } \value{ A numeric vector of length equal to the size of the fingerprints. } \seealso{ \code{\link{distance}}, \code{\link{fp.read}} } \references{ Guha, R.; Schurer, S.; \emph{J. Comp. Aid. Molec. Des.}, \bold{2008}, \emph{22}, 367-384. } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/shannon.Rd0000644000176200001440000000142715061401251015306 0ustar liggesusers\name{shannon} \alias{shannon} \alias{entropy} \title{ Evaluate Shannon Entropy for a Set of Fingerprints } \description{ This method evaluates the Shannon entropy for a set of fingerprints and utilizes the \code{\link{bit.spectrum}} method to obtain the relative frequencies of individual bits } \usage{ shannon(fplist) } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint}. These will can be constructed by hand or read from disk via \code{\link{fp.read}}. All fingerprints in the list should be of the same length. } } \value{ The Shannon entropy for the set of fingerprints } \seealso{ \code{\link{bit.spectrum}}, \code{\link{fp.read}} } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/read.Rd0000644000176200001440000000400115163766205014564 0ustar liggesusers\name{fp.read, fp.read.to.matrix} \alias{fp.read} \alias{fp.read.to.matrix} \title{ Functions to Read Fingerprints From Files } \description{ \code{fp.read} reads in a set of fingerprints from a file. Fingerprint output from the CDK, MOE and BCI can be handled. Each fingerprint is represented as a \code{fingerprint} object. \code{fp.read} returns a \code{list} structure, each element being a \code{fingerprint} or \code{nfeatvec} object, depending on the value of the \code{binary} argument. \code{fp.read.to.matrix} is a utility function that reads the fingerprints directly to matrix form (columns are the bit positions and the rows are the objects whose fingerprints have been evaluated). Note that this method does not currently work with feature vector fingerprints. } \usage{ fp.read(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE, binary=TRUE) fp.read.to.matrix(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE) } \arguments{ \item{f}{ File containing the fingperprints } \item{size}{ The bit length of the fingerprints being considered } \item{lf}{ A line reading function that parses a single line from a fingerprint file. A number of functions are provided that parse the fingerprints from the output of the CDK, MOE and the BCI toolkit. In addition, support is now available for the FPS format from the chemfp project (\url{https://code.google.com/archive/p/chem-fingerprints}). } \item{header}{ Indicates whether the first line of the fingerprint file is a header line } \item{binary}{ If \code{TRUE} indicates that a binary fingerprint will be read in. Otherwise indicates that a feature vector style fingerprint (such as from a circular fingerprint) is being read in } } \seealso{ \code{\link{cdk.lf}}, \code{\link{moe.lf}}, \code{\link{bci.lf}}, \code{\link{ecfp.lf}}, \code{\link{fps.lf}} } \value{ A \code{list} or \code{matrix} of fingerprints } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \keyword{logic} fingerprint/DESCRIPTION0000644000176200001440000000304315164116553014316 0ustar liggesusersPackage: fingerprint Version: 3.5.10 Date: 2026-04-03 Title: Functions to Operate on Binary Fingerprint Data Authors@R: c( person(given = "Rajarshi", family = "Guha", role = c("aut"), email = "rajarshi.guha@gmail.com"), person(given = "Zach", family = "Charlop-Powers", role = c("cre"), email = "zach.charlop.powers@gmail.com")) Author: Rajarshi Guha [aut], Zach Charlop-Powers [cre] Maintainer: Zach Charlop-Powers BugReports: https://github.com/CDK-R/cdkr/issues Description: Functions to manipulate binary fingerprints of arbitrary length. A fingerprint is represented by an object of S4 class 'fingerprint' which is internally represented a vector of integers, such that each element represents the position in the fingerprint that is set to 1. The bitwise logical functions in R are overridden so that they can be used directly with 'fingerprint' objects. A number of distance metrics are also available (many contributed by Michael Fadock). Fingerprints can be converted to Euclidean vectors (i.e., points on the unit hypersphere) and can also be folded using OR. Arbitrary fingerprint formats can be handled via line handlers. Currently handlers are provided for CDK, MOE and BCI fingerprint data. License: GPL | file LICENSE Depends: methods LazyLoad: yes Suggests: RUnit, testthat (>= 3.0.0) Config/testthat/edition: 3 NeedsCompilation: yes Packaged: 2026-04-03 16:52:07 UTC; zcpowers Repository: CRAN Date/Publication: 2026-04-04 05:11:07 UTC