### R functions ### rm(list=ls()) dyn.load("Concordance.dll") sim.multinom <- function(p){ return( c(rmultinom(1,1,p)) ) } sim.all <- function(k, p, pi){ m <- dim(p)[1] z <- apply(p, 1, sim.multinom) w <- rowSums(z)>m*pi return( c(w, (w[2] || w[3])) ) } sim.es.all <- function(p, pi, B=1000){ return( rowMeans( sapply(1:B, sim.all, p, pi) ) ) } #modified from package mclust unmap <- function(classification){ n <- length(classification) u <- sort(unique(classification)) labs <- as.character(u) k <- length(u) z <- matrix(0, n, k) for (j in 1:k) z[classification == u[j], j] <- 1 dimnames(z) <- list(NULL, labs) return(z) } em.normal.univariate <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ n <- as.integer(length(data)) g <- as.integer(nlevels(as.factor(class))) z <- unmap(class) pi <- double(g) mu <- double(g) sigma <- double(g) loglik <- double(1) convergence <- integer(1) if(restriction>0){ if(length(constrain)==g){ results <- .C("em_normal_univariate", as.double(data), as.double(z), n, pi, mu, sigma, as.integer(g), loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) }else{ print("Error with constrain!") return(0) } }else{ results <- .C("em_normal_univariate", as.double(data), as.double(z), n, pi, mu, sigma, as.integer(g), loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) } print(paste("convergence within", results[[12]], "run?", results[[13]], sep=" ")) return(list(model="univariate", convergence=results[[13]], pi=results[[4]], mu=results[[5]], sigma=results[[6]], loglik=results[[8]], class=apply(array(results[[2]], dim=c(n,g)),1,order,decreasing=T)[1,], z=array(results[[2]], dim=c(n,g)))) } em.normal.complete.discordant <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ r1 <- em.normal.univariate(data[,1], class[,1], tol, restriction, constrain, iteration) r2 <- em.normal.univariate(data[,2], class[,2], tol, restriction, constrain, iteration) return(list(model="CD", convergence=c(r1$convergence, r2$convergence), pi=rbind(r1$pi, r2$pi), mu_sigma=rbind(r1$mu, r1$sigma), nu_tau=rbind(r2$mu, r2$sigma), loglik=r1$loglik+r2$loglik, class=cbind(r1$class, r2$class), z1=r1$z, z2=r2$z)) } em.normal.complete.concordant <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ n <- as.integer(dim(data)[1]) g <- as.integer(nlevels(as.factor(class))) z <- unmap(class) pi <- double(g) mu <- double(g) sigma <- double(g) nu <- double(g) tau <- double(g) loglik <- double(1) convergence <- integer(1) if(restriction>0){ if(length(constrain)==g){ results <- .C("em_normal_complete_concordant", as.double(data[,1]), as.double(data[,2]), as.double(z), n, pi, mu, sigma, nu, tau, as.integer(g), loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) }else{ print("Error with constrain!") return(0) } }else{ results <- .C("em_normal_complete_concordant", as.double(data[,1]), as.double(data[,2]), as.double(z), n, pi, mu, sigma, nu, tau, as.integer(g), loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) } print(paste("convergence within", results[[15]], "run?", results[[16]], sep=" ")) return(list(model="CC", convergence=results[[16]], pi=results[[5]], mu_sigma=rbind(results[[6]], results[[7]]), nu_tau=rbind(results[[8]], results[[9]]), loglik=results[[11]], class=apply(array(results[[3]], dim=c(n,g)),1,order,decreasing=T)[1,], z=array(results[[3]], dim=c(n,g)))) } em.normal.partial.concordant <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ n <- as.integer(dim(data)[1]) g <- as.integer(nlevels(as.factor(class))) yl.outer <- function(k, zx, zy){ return( c(zx[k,] %o% zy[k,]) ) } yl.diag <- function(k, z){ return( c(diag(z[k,])) ) } zx <- unmap(class[,1]) zy <- unmap(class[,2]) zxy <- sapply(1:dim(zx)[1], yl.outer, zx, zy) pi <- double(g*g) mu <- double(g) sigma <- double(g) nu <- double(g) tau <- double(g) loglik <- double(1) convergence <- integer(1) if(restriction>0){ if(length(constrain)==g){ results <- .C("em_normal_partial_concordant", as.double(data[,1]), as.double(data[,2]), as.double(t(zxy)), n, pi, mu, sigma, nu, tau, g, loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) }else{ print("Error with constrain!") return(0) } }else{ results <- .C("em_normal_partial_concordant", as.double(data[,1]), as.double(data[,2]), as.double(t(zxy)), n, pi, mu, sigma, nu, tau, g, loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) } print(paste("convergence within", results[[15]], "run?", results[[16]], sep=" ")) return(list(model="PCD", convergence=results[[16]], pi=t(array(results[[5]],dim=c(g,g))), mu_sigma=rbind(results[[6]], results[[7]]), nu_tau=rbind(results[[8]], results[[9]]), loglik=results[[11]], class=apply(array(results[[3]], dim=c(n,g*g)),1,order,decreasing=T)[1,], z=array(results[[3]], dim=c(n,g*g)))) } concordance.test.discordant.partial <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ r0 <- em.normal.complete.discordant(data, class, tol, restriction, constrain, iteration) r1 <- em.normal.partial.concordant(data, class, tol, restriction, constrain, iteration) test <- NA if(min(c(r0$convergence, r1$convergence))>0){ test <- r1$loglik - r0$loglik } return(test) } concordance.test.concordant.partial <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ r0 <- em.normal.complete.concordant(data, apply(class,1,min), tol, restriction, constrain, iteration) r1 <- em.normal.partial.concordant(data, class, tol, restriction, constrain, iteration) test <- NA if(min(c(r0$convergence, r1$convergence))>0){ test <- r1$loglik - r0$loglik } return(test) } sim.normal.complete.discordant <- function(n, g, pi.x, mu.x, sigma.x, pi.y, mu.y, sigma.y){ data.x <- sample(1:g, n, replace=T, prob=pi.x) data.y <- sample(1:g, n, replace=T, prob=pi.y) for(i in 1:g){ l <- length(data.x[data.x==i]) if(l>0){ data.x[data.x==i] <- rnorm(l, mean=mu.x[i], sd=sqrt(sigma.x[i])) } l <- length(data.y[data.y==i]) if(l>0){ data.y[data.y==i] <- rnorm(l, mean=mu.y[i], sd=sqrt(sigma.y[i])) } } return(cbind(data.x, data.y)) } sim.normal.complete.concordant <- function(n, g, pi, mu.x, sigma.x, mu.y, sigma.y){ data <- sample(1:g, n, replace=T, prob=pi) data.x <- data data.y <- data for(i in 1:g){ l <- length(data[data==i]) if(l>0){ data.x[data==i] <- rnorm(l, mean=mu.x[i], sd=sqrt(sigma.x[i])) data.y[data==i] <- rnorm(l, mean=mu.y[i], sd=sqrt(sigma.y[i])) } } return(cbind(data.x, data.y)) } ### Running samples ### #simulate paired z-scores n <- 3000 g <- 3 set.seed(123) pdata <- sim.normal.complete.concordant(n, g, pi=c(0.6,0.2,0.2), c(0,-1.5,2), c(1,1.5,2), c(0,-2,1.5), c(1,2,1.5)) class <- cbind(rep(0, dim(pdata)[1]), rep(0, dim(pdata)[1])) class[pdata[,1]>0+1,1] <- 2 class[pdata[,1]<0-1,1] <- 1 class[pdata[,2]>0+1,2] <- 2 class[pdata[,2]<0-1,2] <- 1 #estimate models for parametric bootstrap cd <- em.normal.complete.discordant(pdata, class, tol=0.001, restriction=1, constrain=c(0,-1,1), iteration=1000) cc <- em.normal.complete.concordant(pdata, apply(class,1,min), tol=0.001, restriction=1, constrain=c(0,-1,1), iteration=1000) pd <- em.normal.partial.concordant(pdata, class, tol=0.001, restriction=1, constrain=c(0,-1,1), iteration=1000) #test of CC/CD t.dp <- concordance.test.discordant.partial(pdata, class, tol=0.001, restriction=1, constrain=c(0,-1,1), iteration=1000) t.cp <- concordance.test.concordant.partial(pdata, class, tol=0.001, restriction=1, constrain=c(0,-1,1), iteration=1000) #significance for CD: reject CD since t.dp > all t0.dp n <- dim(cd$class)[1] g <- dim(cd$mu_sigma)[2] t0.dp <- double(1000) i <- 1 while(i<=length(t0.dp)){ sdata <- sim.normal.complete.discordant(n, g, cd$pi[1,], cd$mu_sigma[1,], cd$mu_sigma[2,], cd$pi[2,], cd$nu_tau[1,], cd$nu_tau[2,]) sclass <- cbind(rep(0, dim(sdata)[1]), rep(0, dim(sdata)[1])) sclass[sdata[,1]>0+1,1] <- 2 sclass[sdata[,1]<0-1,1] <- 1 sclass[sdata[,2]>0+1,2] <- 2 sclass[sdata[,2]<0-1,2] <- 1 t0.dp[i] <- concordance.test.discordant.partial(sdata, sclass, tol=0.001, restriction=1, constrain=c(0,-1,1), iteration=1000) print(c(t.dp, i, t0.dp[i])) i <- i+1 } length(t0.dp) sum(!is.na(t0.dp)) sum(!is.na(t0.dp) & (t0.dp>=t.dp)) sum(!is.na(t0.dp) & (t0.dp>=t.dp)) / sum(!is.na(t0.dp)) #significance for CC: not reject CC since t.cp ~ t0.cp n <- length(cc$class) g <- dim(cc$mu_sigma)[2] t0.cp <- double(1000) i <- 1 while(i<=length(t0.cp)){ sdata <- sim.normal.complete.concordant(n, g, cc$pi, cc$mu_sigma[1,], cc$mu_sigma[2,], cc$nu_tau[1,], cc$nu_tau[2,]) sclass <- cbind(rep(0, dim(sdata)[1]), rep(0, dim(sdata)[1])) sclass[sdata[,1]>0+1,1] <- 2 sclass[sdata[,1]<0-1,1] <- 1 sclass[sdata[,2]>0+1,2] <- 2 sclass[sdata[,2]<0-1,2] <- 1 t0.cp[i] <- concordance.test.concordant.partial(sdata, sclass, tol=0.001, restriction=1, constrain=c(0,-1,1), iteration=1000) print(c(t.cp, i, t0.cp[i])) i <- i+1 } length(t0.cp) sum(!is.na(t0.cp)) sum(!is.na(t0.cp) & (t0.cp>=t.cp)) sum(!is.na(t0.cp) & (t0.cp>=t.cp)) / sum(!is.na(t0.cp)) #confirmed model cc <- em.normal.complete.concordant(pdata, apply(class,1,min), tol=0.000001, restriction=1, constrain=c(0,-1,1), iteration=10000) #random gene set analysis plot(pdata[,1], pdata[,2], main="A random gene set", xlab="z-score (data set 1)", ylab="z-score (data set 2)", pch=19, col="gray75", cex.lab=1.5, cex.axis=1.5, cex=1.25) abline(h=0, col="gray50", lwd=1, lty=3) abline(v=0, col="gray50", lwd=1, lty=3) set.seed(123) h <- sample(1:dim(pdata)[1], 100) lines(pdata[h,1], pdata[h,2], type="p", pch=19, col="gray0", cex=1.5) print( sim.es.all(cc$z[h,], cc$pi) )