rm(list=ls()) dyn.unload("Concordance.dll") dyn.load("Concordance.dll") library(UsingR) #modified from package mclust unmap <- function(classification){ n <- length(classification) u <- sort(unique(classification)) labs <- as.character(u) k <- length(u) z <- matrix(0, n, k) for (j in 1:k) z[classification == u[j], j] <- 1 dimnames(z) <- list(NULL, labs) return(z) } em.normal.univariate <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ n <- as.integer(length(data)) g <- as.integer(nlevels(as.factor(class))) z <- unmap(class) pi <- double(g) mu <- double(g) sigma <- double(g) loglik <- double(1) convergence <- integer(1) if(restriction>0){ if(length(constrain)==g){ results <- .C("em_normal_univariate", as.double(data), as.double(z), n, pi, mu, sigma, as.integer(g), loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) }else{ print("Error with constrain!") return(0) } }else{ results <- .C("em_normal_univariate", as.double(data), as.double(z), n, pi, mu, sigma, as.integer(g), loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) } print(paste("convergence within", results[[12]], "run?", results[[13]], sep=" ")) return(list(convergence=results[[13]], pi=results[[4]], mu=results[[5]], sigma=results[[6]], loglik=results[[8]], class=array(results[[2]], dim=c(n,g)))) } em.normal.complete.discordant <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000, check1=1, check2=1){ r1 <- em.normal.univariate(data[,1], class[,1], tol, restriction, constrain, iteration) r2 <- em.normal.univariate(data[,2], class[,2], tol, restriction, constrain, iteration) if(check1>0){ # par(mfrow=c(1,2)) hist(data[,1], freq=F, main=NULL, xlab="x") z <- range(data[,1]) x <- seq(from=z[1], to=z[2], length=100) fz <- function(x){ f <- 0 for(i in 1:length(r1$pi)){ f <- f + r1$pi[i]*dnorm(x, mean=r1$mu[i], sd=sqrt(r1$sigma[i])) } return(f) } lines(x, sapply(x, fz)) hist(data[,2], freq=F, main=NULL, xlab="y") z <- range(data[,2]) x <- seq(from=z[1], to=z[2], length=100) fz <- function(x){ f <- 0 for(i in 1:length(r2$pi)){ f <- f + r2$pi[i]*dnorm(x, mean=r2$mu[i], sd=sqrt(r2$sigma[i])) } return(f) } lines(x, sapply(x, fz)) } if(check2>0){ # par(mfrow=c(1,2)) nbins <- c(nclass.Sturges(data[,1]), nclass.Sturges(data[,2])) x.cuts <- seq(from=min(data[,1]), to=max(data[,1]), length=nbins[1]+1) y.cuts <- seq(from=min(data[,2]), to=max(data[,2]), length=nbins[2]+1) index.x <- cut(data[,1], x.cuts, include.lowest=T) index.y <- cut(data[,2], y.cuts, include.lowest=T) z <- matrix(0, nrow=nbins[1], ncol=nbins[2]) for (i in 1:length(index.x)){ z[index.x[i], index.y[i]] <- z[index.x[i], index.y[i]] + 1 } z <- z*nbins[1]*nbins[2]/((max(data[,1])-min(data[,1]))*(max(data[,2])-min(data[,2]))*dim(data)[1]) x <- x.cuts[1:nbins[1]] y <- y.cuts[1:nbins[2]] range.x <- range(x) range.y <- range(y) range.z <- range(z) persp(x, y, z, xlim=range.x, ylim=range.y, zlim=range.z, theta = 30, phi = 30, expand = 0.5, col = "grey", border='black', ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "Data Set 1", ylab = "Data Set 2", zlab = "", main="Observed") fz <- function(x, y){ f <- 0 for(i in 1:length(r1$pi)){ for(j in 1:length(r2$pi)){ f <- f + r1$pi[i]*dnorm(x, mean=r1$mu[i], sd=sqrt(r1$sigma[i]))*r2$pi[j]*dnorm(y, mean=r2$mu[j], sd=sqrt(r2$sigma[j])) } } return(f) } z <- outer(x, y, fz) persp(x, y, z, xlim=range.x, ylim=range.y, zlim=range.z, theta = 30, phi = 30, expand = 0.5, col = "grey", border='black', ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "Data Set 1", ylab = "Data Set 2", zlab = "", main="Fitted (CD)") } return(list(convergence=c(r1$convergence, r2$convergence), pi=rbind(r1$pi, r2$pi), mu_sigma=rbind(r1$mu, r1$sigma), nu_tau=rbind(r2$mu, r2$sigma), loglik=r1$loglik+r2$loglik, class=cbind(apply(r1$class,1,order,decreasing=T)[1,], apply(r2$class,1,order,decreasing=T)[1,]))) } em.normal.complete.concordant <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000, check1=1, check2=1){ n <- as.integer(dim(data)[1]) g <- as.integer(nlevels(as.factor(class))) z <- unmap(class) pi <- double(g) mu <- double(g) sigma <- double(g) nu <- double(g) tau <- double(g) loglik <- double(1) convergence <- integer(1) if(restriction>0){ if(length(constrain)==g){ results <- .C("em_normal_complete_concordant", as.double(data[,1]), as.double(data[,2]), as.double(z), n, pi, mu, sigma, nu, tau, as.integer(g), loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) }else{ print("Error with constrain!") return(0) } }else{ results <- .C("em_normal_complete_concordant", as.double(data[,1]), as.double(data[,2]), as.double(z), n, pi, mu, sigma, nu, tau, as.integer(g), loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) } print(paste("convergence within", results[[15]], "run?", results[[16]], sep=" ")) if(check1>0){ # par(mfrow=c(1,2)) hist(data[,1], freq=F, main=NULL, xlab="x") z <- range(data[,1]) x <- seq(from=z[1], to=z[2], length=100) fz <- function(x){ f <- 0 for(i in 1:g){ f <- f + results[[5]][i]*dnorm(x, mean=results[[6]][i], sd=sqrt(results[[7]][i])) } return(f) } lines(x, sapply(x, fz)) hist(data[,2], freq=F, main=NULL, xlab="y") z <- range(data[,2]) x <- seq(from=z[1], to=z[2], length=100) fz <- function(x){ f <- 0 for(i in 1:g){ f <- f + results[[5]][i]*dnorm(x, mean=results[[8]][i], sd=sqrt(results[[9]][i])) } return(f) } lines(x, sapply(x, fz)) } if(check2>0){ # par(mfrow=c(1,2)) nbins <- c(nclass.Sturges(data[,1]), nclass.Sturges(data[,2])) x.cuts <- seq(from=min(data[,1]), to=max(data[,1]), length=nbins[1]+1) y.cuts <- seq(from=min(data[,2]), to=max(data[,2]), length=nbins[2]+1) index.x <- cut(data[,1], x.cuts, include.lowest=T) index.y <- cut(data[,2], y.cuts, include.lowest=T) z <- matrix(0, nrow=nbins[1], ncol=nbins[2]) for (i in 1:length(index.x)){ z[index.x[i], index.y[i]] <- z[index.x[i], index.y[i]] + 1 } z <- z*nbins[1]*nbins[2]/((max(data[,1])-min(data[,1]))*(max(data[,2])-min(data[,2]))*dim(data)[1]) x <- x.cuts[1:nbins[1]] y <- y.cuts[1:nbins[2]] range.x <- range(x) range.y <- range(y) range.z <- range(z) persp(x, y, z, xlim=range.x, ylim=range.y, zlim=range.z, theta = 30, phi = 30, expand = 0.5, col = "grey", border='black', ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "Data Set 1", ylab = "Data Set 2", zlab = "", main="Observed") fz <- function(x, y){ f <- 0 for(i in 1:length(results[[5]])){ f <- f + results[[5]][i]*dnorm(x, mean=results[[6]][i], sd=sqrt(results[[7]][i]))*dnorm(y, mean=results[[8]][i], sd=sqrt(results[[9]][i])) } return(f) } z <- outer(x, y, fz) persp(x, y, z, xlim=range.x, ylim=range.y, zlim=range.z, theta = 30, phi = 30, expand = 0.5, col = "grey", border='black', ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "Data Set 1", ylab = "Data Set 2", zlab = "", main="Fitted (CC)") } return(list(convergence=results[[16]], pi=results[[5]], mu_sigma=rbind(results[[6]], results[[7]]), nu_tau=rbind(results[[8]], results[[9]]), loglik=results[[11]], class=apply(array(results[[3]], dim=c(n,g)),1,order,decreasing=T)[1,])) } em.normal.partial.concordant <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000, check1=1, check2=1){ n <- as.integer(dim(data)[1]) g <- as.integer(nlevels(as.factor(class))) zx <- unmap(class[,1]) zy <- unmap(class[,2]) pi <- double(g*g) mu <- double(g) sigma <- double(g) nu <- double(g) tau <- double(g) loglik <- double(1) convergence <- integer(1) if(restriction>0){ if(length(constrain)==g){ results <- .C("em_normal_partial_concordant", as.double(data[,1]), as.double(data[,2]), as.double(zx), as.double(zy), n, pi, mu, sigma, nu, tau, g, loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) }else{ print("Error with constrain!") return(0) } }else{ results <- .C("em_normal_partial_concordant", as.double(data[,1]), as.double(data[,2]), as.double(zx), as.double(zy), n, pi, mu, sigma, nu, tau, g, loglik, as.double(tol), as.integer(restriction), as.integer(constrain), as.integer(iteration), convergence) } print(paste("convergence within", results[[16]], "run?", results[[17]], sep=" ")) if(check1>0){ # par(mfrow=c(1,2)) hist(data[,1], freq=F, main=NULL, xlab="x") z <- range(data[,1]) x <- seq(from=z[1], to=z[2], length=100) fz <- function(x){ f <- 0 for(i in 1:g){ f <- f + sum(results[[6]][(i-1)*g+(1:g)])*dnorm(x, mean=results[[7]][i], sd=sqrt(results[[8]][i])) } return(f) } lines(x, sapply(x, fz)) hist(data[,2], freq=F, main=NULL, xlab="y") z <- range(data[,2]) x <- seq(from=z[1], to=z[2], length=100) fz <- function(x){ f <- 0 for(i in 1:g){ f <- f + sum(results[[6]][((1:g)-1)*g+i])*dnorm(x, mean=results[[9]][i], sd=sqrt(results[[10]][i])) } return(f) } lines(x, sapply(x, fz)) } if(check2>0){ # par(mfrow=c(1,2)) nbins <- c(nclass.Sturges(data[,1]), nclass.Sturges(data[,2])) x.cuts <- seq(from=min(data[,1]), to=max(data[,1]), length=nbins[1]+1) y.cuts <- seq(from=min(data[,2]), to=max(data[,2]), length=nbins[2]+1) index.x <- cut(data[,1], x.cuts, include.lowest=T) index.y <- cut(data[,2], y.cuts, include.lowest=T) z <- matrix(0, nrow=nbins[1], ncol=nbins[2]) for (i in 1:length(index.x)){ z[index.x[i], index.y[i]] <- z[index.x[i], index.y[i]] + 1 } z <- z*nbins[1]*nbins[2]/((max(data[,1])-min(data[,1]))*(max(data[,2])-min(data[,2]))*dim(data)[1]) x <- x.cuts[1:nbins[1]] y <- y.cuts[1:nbins[2]] range.x <- range(x) range.y <- range(y) range.z <- range(z) persp(x, y, z, xlim=range.x, ylim=range.y, zlim=range.z, theta = 30, phi = 30, expand = 0.5, col = "grey", border='black', ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "Data Set 1", ylab = "Data Set 2", zlab = "", main="Observed") fz <- function(x, y){ f <- 0 for(i in 1:g){ for(j in 1:g){ f <- f + results[[6]][(i-1)*g+j]*dnorm(x, mean=results[[7]][i], sd=sqrt(results[[8]][i]))*dnorm(y, mean=results[[9]][i], sd=sqrt(results[[10]][i])) } } return(f) } z <- outer(x, y, fz) persp(x, y, z, xlim=range.x, ylim=range.y, zlim=range.z, theta = 30, phi = 30, expand = 0.5, col = "grey", border='black', ltheta = 120, shade = 0.75, ticktype = "detailed", xlab = "Data Set 1", ylab = "Data Set 2", zlab = "", main="Fitted (PCD)") } return(list(convergence=results[[17]], pi=t(array(results[[6]],dim=c(g,g))), mu_sigma=rbind(results[[7]], results[[8]]), nu_tau=rbind(results[[9]], results[[10]]), loglik=results[[12]], class=cbind(apply(array(results[[3]], dim=c(n,g)),1,order,decreasing=T)[1,],apply(array(results[[4]], dim=c(n,g)),1,order,decreasing=T)[1,]))) } concordance.test.discordant.partial <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ r0 <- em.normal.complete.discordant(data, class, tol, restriction, constrain, iteration, check1=0, check2=0) r1 <- em.normal.partial.concordant(data, class, tol, restriction, constrain, iteration, check1=0, check2=0) test <- NA if(min(c(r0$convergence, r1$convergence))>0){ test <- r1$loglik - r0$loglik } return(test) } concordance.test.concordant.partial <- function(data, class, tol=0.000001, restriction=0, constrain=0, iteration=1000){ r0 <- em.normal.complete.concordant(data, apply(class,1,min), tol, restriction, constrain, iteration, check1=0, check2=0) r1 <- em.normal.partial.concordant(data, class, tol, restriction, constrain, check1=0, iteration, check2=0) test <- NA if(min(c(r0$convergence, r1$convergence))>0){ test <- r1$loglik - r0$loglik } return(test) } sim.normal.complete.discordant <- function(n, g, pi.x, mu.x, sigma.x, pi.y, mu.y, sigma.y){ data.x <- sample(1:g, n, replace=T, prob=pi.x) data.y <- sample(1:g, n, replace=T, prob=pi.y) for(i in 1:g){ l <- length(data.x[data.x==i]) if(l>0){ data.x[data.x==i] <- rnorm(l, mean=mu.x[i], sd=sqrt(sigma.x[i])) } l <- length(data.y[data.y==i]) if(l>0){ data.y[data.y==i] <- rnorm(l, mean=mu.y[i], sd=sqrt(sigma.y[i])) } } return(cbind(data.x, data.y)) } sim.normal.complete.concordant <- function(n, g, pi, mu.x, sigma.x, mu.y, sigma.y){ data <- sample(1:g, n, replace=T, prob=pi) data.x <- data data.y <- data for(i in 1:g){ l <- length(data[data==i]) if(l>0){ data.x[data==i] <- rnorm(l, mean=mu.x[i], sd=sqrt(sigma.x[i])) data.y[data==i] <- rnorm(l, mean=mu.y[i], sd=sqrt(sigma.y[i])) } } return(cbind(data.x, data.y)) } #simulations #Complete Concordance data <- cbind( c(rnorm(300,mean=0,sd=1), rnorm(100,mean=-2,sd=1), rnorm(100,mean=2,sd=1)), c(rnorm(300,mean=0,sd=1), rnorm(100,mean=-2,sd=1), rnorm(100,mean=2,sd=1)) ) pdata <- data #Complete Discordance o <- sample(1:dim(data)[1]) pdata <- cbind(data[,1], data[o,2]) scatter.with.hist(pdata[,1], pdata[,2], trend.line=NULL) class <- cbind(rep(0, dim(pdata)[1]), rep(0, dim(pdata)[1])) class[pdata[,1]>0+1.96,1] <- 2 class[pdata[,1]<0-1.96,1] <- 1 class[pdata[,2]>0+1.96,2] <- 2 class[pdata[,2]<0-1.96,2] <- 1 par(mfcol=c(4,3), mai=c(0.3, 0.3, 0.3, 0.3)) cd <- em.normal.complete.discordant(pdata, class, tol=0.001, restriction=1, constrain=c(0,-1,1), check1=1, check2=1) pd <- em.normal.partial.concordant(pdata, class, tol=0.001, restriction=1, constrain=c(0,-1,1), check1=1, check2=1) cc <- em.normal.complete.concordant(pdata, apply(class,1,min), tol=0.001, restriction=1, constrain=c(0,-1,1), check1=1, check2=1) t.dp <- concordance.test.discordant.partial(pdata, class, tol=0.001, restriction=1, constrain=c(0,-1,1)) t.cp <- concordance.test.concordant.partial(pdata, class, tol=0.001, restriction=1, constrain=c(0,-1,1)) #significance for CD n <- dim(cd$class)[1] g <- dim(cd$mu_sigma)[2] t0.dp <- double(100) i <- 1 while(i<=length(t0.dp)){ sdata <- sim.normal.complete.discordant(n, g, cd$pi[1,], cd$mu_sigma[1,], cd$mu_sigma[2,], cd$pi[2,], cd$nu_tau[1,], cd$nu_tau[2,]) sclass <- cbind(rep(0, dim(sdata)[1]), rep(0, dim(sdata)[1])) sclass[sdata[,1]>0+1.96,1] <- 2 sclass[sdata[,1]<0-1.96,1] <- 1 sclass[sdata[,2]>0+1.96,2] <- 2 sclass[sdata[,2]<0-1.96,2] <- 1 t0.dp[i] <- concordance.test.discordant.partial(sdata, sclass, tol=0.001, restriction=1, constrain=c(0,-1,1)) print(c(t.dp, i, t0.dp[i])) i <- i+1 } quantile(t0.dp, na.rm=T, prob=c(0, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1)) #significance for CC n <- length(cc$class) g <- dim(cc$mu_sigma)[2] t0.cp <- double(100) i <- 1 while(i<=length(t0.cp)){ sdata <- sim.normal.complete.concordant(n, g, cc$pi, cc$mu_sigma[1,], cc$mu_sigma[2,], cc$nu_tau[1,], cc$nu_tau[2,]) sclass <- cbind(rep(0, dim(sdata)[1]), rep(0, dim(sdata)[1])) sclass[sdata[,1]>0+1.96,1] <- 2 sclass[sdata[,1]<0-1.96,1] <- 1 sclass[sdata[,2]>0+1.96,2] <- 2 sclass[sdata[,2]<0-1.96,2] <- 1 t0.cp[i] <- concordance.test.concordant.partial(sdata, sclass, tol=0.001, restriction=1, constrain=c(0,-1,1)) print(c(t.cp, i, t0.cp[i])) i <- i+1 } quantile(t0.cp, na.rm=T, prob=c(0, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1))