#include #include #include #define PI 3.14159265 //do product recursively //fixed x, n, g, p, but different mu, sigma for different p; need an index vector id[] (i_j in equation 53) for this //remember both k and j start from 1 to n and p, respectively; need to subtract one for index consistency double prod_of(long *n, long *g, long *p, long *k, long *j, long *id, double *x, double *mu, double *sigma){ int i; double prod; long jnext; if ((*j)==1){ prod=exp(0-0.5*(x[((*k)-1)*(*p)+(*j)-1]-mu[((*j)-1)*(*g)+id[(*j)-1]])*(x[((*k)-1)*(*p)+(*j)-1]-mu[((*j)-1)*(*g)+id[(*j)-1]])/sigma[((*j)-1)*(*g)+id[(*j)-1]])/sqrt(2*PI*sigma[((*j)-1)*(*g)+id[(*j)-1]]); } else { jnext = (*j)-1; prod=prod_of(n,g,p,k,&jnext,id,x,mu,sigma)*exp(0-0.5*(x[((*k)-1)*(*p)+(*j)-1]-mu[((*j)-1)*(*g)+id[(*j)-1]])*(x[((*k)-1)*(*p)+(*j)-1]-mu[((*j)-1)*(*g)+id[(*j)-1]])/sigma[((*j)-1)*(*g)+id[(*j)-1]])/sqrt(2*PI*sigma[((*j)-1)*(*g)+id[(*j)-1]]); } return(prod); /*printf("%lf \n", prod);*/ } long index_of(long *g, long *j, long *id){ long idx; long jnext; if ((*j)==1){ idx=id[(*j)-1]; } else { jnext=(*j)-1; idx=index_of(g,&jnext,id)*(*g)+id[(*j)-1]; } return(idx); } long fact(long *j){ long prod,jnext; if ((*j)==0){ prod=1; } else{ if ((*j)==1){ prod=1; } else { jnext = (*j)-1; prod=fact(&jnext)*(*j); } } return(prod); } //link the group number with each id (id_group). For each id, calculate the number of occurrences (dd vector) of each component. //counter is used to record different dds. pi_group is the number of dd vectors in counter. //For each id, compare its dd with all dds (totally #pi_group of dds) already in counter. //If this dd is the same as the t_th vector in counter, then group number for this id is the corresponding group number t. //If this dd doesnot exist in the counter, pi_group is added by 1, and the group number for this id is the new pi_group. void linking_matrix(long *n, long *g, long *p, long *j, long *pi_group, long *rr, long *id, long *counter, long *id_group,long *id_group_diag){ int l,jj,t,temp,temp1[(*g)],sumtemp1,sumcheckdiag,checkdiag[(*g)], tempid,check; long jnext,dd[(*g)],temp2; l=0-1; if ((*j)<=(*p)){ for(l=0;l<(*g);l++){ id[(*j)-1]=l; jnext=(*j)+1; linking_matrix(n, g, p, &jnext, pi_group, rr, id, counter, id_group, id_group_diag); } /*for(jj=0; jj<(*p); jj++){ printf("id%d %d ",jj, id[jj]); } printf("\n"); for(l=0; l<(*g); l++){ printf("dd%d %d ", l, dd[l]); } printf("\n");*/ } else { temp=-1; for(l=0; l<(*g); l++){ dd[l]=0; } for(jj=0;jj<(*p);jj++){ while(id[jj]!=temp){ temp=temp+1; } dd[temp]=dd[temp]+1; } /*for(jj=0; jj<(*p); jj++){ printf("id%d %d ", jj, id[jj]); } printf("\n"); for(l=0; l<(*g); l++){ printf("counter%d %d ", l, dd[l]); } printf("\n"); */ check=0; sumcheckdiag=0; for(l=0;l<(*g);l++){ checkdiag[l]=0; } tempid=index_of(g,p,id); //printf("index %d \n", tempid); for(l=0;l<(*g);l++){ if(dd[l]==(*p)){ checkdiag[l]=1; sumcheckdiag=sumcheckdiag+checkdiag[l]; id_group_diag[tempid]=-(l+1); } } //printf("sumcheckdiag %d %d %d \n", sumcheckdiag,tempid, id_group_diag[tempid]); //printf("\n"); if(sumcheckdiag==0) { for(t=1;t<=(*pi_group);t++){ //printf("pigroup %d t %d \n", (*pi_group), t); sumtemp1=0; //printf("counter_t %d %d %d \n", counter[(t-1)*(*g)],counter[(t-1)*(*g)+1],counter[(t-1)*(*g)+2]); for(l=0;l<(*g);l++){ if(dd[l]==counter[(t-1)*(*g)+l]){ temp1[l]=0; } else{ temp1[l]=1; } sumtemp1=sumtemp1+temp1[l]; } //printf("temp1 %d %d %d \n", temp1[0],temp1[1],temp1[2]); if(sumtemp1==0){ id_group[tempid]=t; temp2=1; for(l=0;l<(*g);l++){ temp2=temp2*fact(&dd[l]); } rr[(t-1)]= fact(p)/temp2; check=1; } //printf("check %d idx %d idgroup %d rr %d\n", check, tempid, id_group[tempid],rr[(t-1)]); //printf("\n"); } if (check==0){ (*pi_group)=(*pi_group)+1; for (l=0;l<(*g);l++){ counter[((*pi_group)-1)*(*g)+l]=dd[l]; } id_group[tempid]=(*pi_group); temp2=1; for(l=0;l<(*g);l++){ temp2=temp2*fact(&dd[l]); } rr[((*pi_group)-1)]= fact(p)/temp2; //printf("check %d idx %d idgroup %d \n", check, tempid, id_group[tempid]); //printf("\n"); } } id_group[tempid]=id_group[tempid]+id_group_diag[tempid]; } /*printf("%d %d %lf \n", *j, l, total);*/ } //calculate partial density for diagonal elements double fpcdpart1_k(long *n, long *g, long *p, long *k, double *pi, double *x, double *mu, double *sigma){ double total; int l,jj; long id[(*p)]; total=0; for (l=0; l<(*g); l++){ for (jj=0; jj<(*p); jj++){ id[jj]=l; } total=total+ pi[l]*prod_of(n, g, p, k, p, id, x, mu, sigma); } return(total); } //calculate partial density for non-diagonal elements //do multiple sums using for and also recursion //remember j start from 1 to p; need to subtract one for index consistency double nest_of_fpcdpart2_k(long *n, long *g, long *p, long *k, long *j, long *pi_group, long *id, long *id_group, double *alpha, double *x, double *mu, double *sigma){ double total; int l,t; long jnext, tempid; l=0-1; total=0; if ((*j)<=(*p)){ for(l=0;l<(*g);l++){ id[(*j)-1]=l; jnext=(*j)+1; total=total+nest_of_fpcdpart2_k(n, g, p, k, &jnext, pi_group, id, id_group, alpha, x, mu, sigma); } } else { tempid=index_of(g,p,id); for (t=1;t<=(*pi_group);t++){ if(id_group[tempid]==t){ total=alpha[(t-1)]*prod_of(n, g, p, k, p, id, x, mu, sigma); } } } return(total); /*printf("%d %d %d %lf \n", *k, *j, l, total);*/ } //calculate uhat which does not include pi or alpha void nest_ez_k(long *n, long *g, long *p, long *k, long *j, long *pi_group, long *id, long *id_group, double *uhat, double *pi, double *alpha, double *x, double *mu, double *sigma){ double temp1, temp2, temp4, sumpi; int l; long jnext, idx, ii,jj,temp, temp3, id1[(*p)]; fflush(stdin); l=0-1; jj=1; temp=pow((*g),(*p)); if ((*j)<=(*p)) { for (l=0; l<(*g); l++){ id[(*j)-1]=l; jnext=(*j)+1; nest_ez_k(n,g,p,k,&jnext, pi_group, id, id_group, uhat, pi, alpha, x, mu, sigma); } /*for(ii=0; ii<(*p); ii++){ printf("%d ", id[ii]); }*/ } else { temp1=fpcdpart1_k(n, g, p, k, pi, x, mu, sigma); temp2=nest_of_fpcdpart2_k(n, g, p, k, &jj, pi_group, id1, id_group, alpha, x, mu, sigma); temp4=prod_of(n, g, p, k, p, id, x, mu, sigma); /*printf("%lf \n",temp1);*/ temp3=index_of(g,p,id); /*printf("%d", temp3); printf("\n");*/ uhat[((*k)-1)*temp+temp3]=temp4/(temp1+temp2); /*for(ii=0; ii<(*p); ii++){ printf("%d ", id[ii]); } printf("\n");*/ } } double nest_of_mu_jj_num_k(long *n, long *g, long *p, long *k, long *j, long *jj, long *ijj, long *id ,double *uhat, double *pi, double *x, double *mu, double *sigma){ double total; int l; long jnext, temp1, temp; fflush(stdin); temp=pow((*g),(*p)); l=0-1; total=0; if ((*j)<=(*p)){ if ((*j)==(*jj)){ for(l=0;l<(*g);l++){ if (l==(*ijj)){ id[(*j)-1]=l; jnext=(*j)+1; total=total+nest_of_mu_jj_num_k(n, g, p, k, &jnext, jj, ijj, id, uhat, pi, x, mu, sigma); /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } else{ total=total+0; /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } } } else{ for(l=0;l<(*g);l++){ id[(*j)-1]=l; jnext=(*j)+1; total=total+nest_of_mu_jj_num_k(n, g, p, k, &jnext, jj, ijj, id, uhat, pi, x, mu, sigma); /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } } } else { if ((*j)!=(*jj)){ temp1=index_of(g,p,id); total=uhat[((*k)-1)*temp+temp1]*x[((*k)-1)*(*p)+(*jj)-1]; } /*else { if (id[(*p)-1]==(*ijj)) { temp1=index_of(g,p,id); total=uhat[((*k)-1)*temp+temp1]*x[((*k)-1)*(*p)+(*jj)-1]; } }*/ } /*printf("%d %d %d %lf \n", *k, *j, l, total);*/ return(total); } double nest_of_mu_jj_den_k(long *n, long *g, long *p, long *k, long *j, long *jj, long *ijj, long *id, double *uhat, double *pi, double *x, double *mu, double *sigma){ double total; int l; long jnext, temp1, temp; fflush(stdin); temp=pow((*g),(*p)); l=0-1; total=0; if ((*j)<=(*p)){ if ((*j)==(*jj)){ for(l=0;l<(*g);l++){ if (l==(*ijj)){ id[(*j)-1]=l; jnext=(*j)+1; total=total+nest_of_mu_jj_den_k(n, g, p, k, &jnext, jj, ijj, id, uhat, pi, x, mu, sigma); /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } else{ total=total+0; /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } } } else{ for(l=0;l<(*g);l++){ id[(*j)-1]=l; jnext=(*j)+1; total=total+nest_of_mu_jj_den_k(n, g, p, k, &jnext, jj, ijj, id, uhat, pi, x, mu, sigma); /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } } } else { if ((*j)!=(*jj)){ temp1=index_of(g,p,id); total=uhat[((*k)-1)*temp+temp1]; } /*else { if (id[(*p)-1]==(*ijj)) { temp1=index_of(g,p,id); total=uhat[((*k)-1)*temp+temp1]; } }*/ } /*printf("%d %d %d %lf \n", *k, *j, l, total);*/ return(total); } double nest_of_sigma_jj_num_k(long *n, long *g, long *p, long *k, long *j, long *jj, long *ijj, long *id, double *uhat, double *pi, double *x, double *mu, double *sigma){ double total; int l; long jnext, temp1, temp; fflush(stdin); temp=pow((*g),(*p)); l=0-1; total=0; if ((*j)<=(*p)){ if ((*j)==(*jj)){ for(l=0;l<(*g);l++){ if (l==(*ijj)){ id[(*j)-1]=l; jnext=(*j)+1; total=total+nest_of_sigma_jj_num_k(n, g, p, k, &jnext, jj, ijj,id, uhat, pi, x, mu, sigma); /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } else{ total=total+0; /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } } } else{ for(l=0;l<(*g);l++){ id[(*j)-1]=l; jnext=(*j)+1; total=total+nest_of_sigma_jj_num_k(n, g, p, k, &jnext, jj, ijj,id, uhat, pi, x, mu, sigma); /*printf("%d %d %d %d %d %lf \n", *k, *j, l, *jj, *ijj, total);*/ } } } else { if ((*j)!=(*jj)){ temp1=index_of(g,p,id); total=uhat[((*k)-1)*temp+temp1]*(x[((*k)-1)*(*p)+(*jj)-1]-mu[((*jj)-1)*(*g)+(*ijj)])*(x[((*k)-1)*(*p)+(*jj)-1]-mu[((*jj)-1)*(*g)+(*ijj)]); } /*else { if (id[(*p)-1]==(*ijj)) { temp1=index_of(g,p,id); total=uhat[((*k)-1)*temp+temp1]*(x[((*k)-1)*(*p)+(*jj)-1]-mu[((*jj)-1)*(*g)+(*ijj)])*(x[((*k)-1)*(*p)+(*jj)-1]-mu[((*jj)-1)*(*g)+(*ijj)]); } }*/ } /*printf("%d %d %d %lf \n", *k, *j, l, total);*/ return(total); } void em_pdim_pcd_new(long *n, long *g, long *p, long *j, long *pi_group, long *id, long *id_group, long *rr, double *uhat, double *pi, double *alpha, double *x, double *mu, double *sigma, double *loglik,double *tol, long *restriction, long *constrain, long *iteration, long *convergence){ long h,t,l,jj,ij,jjj,ijj,idx,*id1,*id2,power,tempid; int i, ii,flag, iter; double *Lg,*logLg,loglik2,temppi_den, sumpi,sumalpha,sumpart2, tempmu_num_sum, tempmu_den_sum, tempsigma_num_sum, *sum_at, *tempmu_num_jj,*tempmu_den_jj,*tempsigma_num_jj, *fpcd; power=pow((*g),(*p)); id1 = (long *) malloc((*p) * sizeof(long)); id2 = (long *) malloc((*p) * sizeof(long)); sum_at= (double *) malloc((*pi_group) * sizeof(double)); tempmu_num_jj= (double *) malloc((*n) * sizeof(double)); tempmu_den_jj = (double *) malloc((*n)* sizeof(double)); tempsigma_num_jj = (double *) malloc((*n) * sizeof(double)); fpcd = (double *) malloc((*n) * sizeof(double)); logLg = (double *) malloc((*n) * sizeof(double)); /*theoretical restriction on the null*/ if((*restriction)>0){ /*initialization*/ flag=1; (*loglik) = 0; for(h=1;h<=(*n);h++){ for(i=0;i<(*p);i++){ id[i]=0; } fpcd[h-1]=fpcdpart1_k(n, g, p, &h, pi, x, mu, sigma)+nest_of_fpcdpart2_k(n, g, p, &h, j, pi_group, id, id_group, alpha, x, mu, sigma); logLg[h-1] = log(fpcd[h-1]); (*loglik)= (*loglik) + logLg[h-1]; // printf("%d %lf %lf \n", h, fpcd[h-1], logLg[h-1]); } printf("%lf \n", *loglik); /*iteration*/ iter = 0; while(flag>0 & iter<(*iteration)){ printf("%d \n",iter); /*E step for z*/ for(h=1;h<=(*n);h++){ for(i=0;i<(*p);i++){ id[i]=0; } nest_ez_k(n, g, p, &h, j, pi_group, id, id_group, uhat, pi,alpha, x, mu, sigma); } for (l=0;l0.0){ mu[jj*(*g)+ij]=0.0; } } if(constrain[jj*(*g)+ij]==0){ mu[jj*(*g)+ij] = 0.0; sigma[jj*(*g)+ij] = 1.0; } if(constrain[jj*(*g)+ij]==0+1){ if(mu[jj*(*g)+ij]<0.0){ mu[jj*(*g)+ij]=0.0; } } } } /*check convergence*/ loglik2 = (*loglik); (*loglik) = 0; for(h=1;h<=(*n);h++){ for(i=0;i<(*p);i++){ id[i]=0; } fpcd[h-1]=fpcdpart1_k(n, g, p, &h, pi, x, mu, sigma)+nest_of_fpcdpart2_k(n, g, p, &h, j, pi_group, id, id_group, alpha, x, mu, sigma); logLg[h-1] = log(fpcd[h-1]); (*loglik)= (*loglik) + logLg[h-1]; // printf("%d %lf %lf \n", h, fpcd[h-1], logLg[h-1]); } if(fabs((*loglik)-loglik2)<(*tol)){ flag = 0; } iter = iter + 1; } (*convergence) = 0; if(iter<(*iteration)){ (*convergence) = 1; } for (l=0;l<(*g);l++){ printf("%d %lf \n", l, pi[l]); } for (l=0;l<(*pi_group);l++){ printf("%d %lf \n", l, alpha[l]); } for (l=0;l<(*g)*(*p);l++){ printf("%d %lf %lf \n", l, mu[l],sigma[l]); } } /*no restriction on the null*/ else{ /*initialization*/ flag=1; (*loglik) = 0; for(h=1;h<=(*n);h++){ for(i=0;i<(*p);i++){ id[i]=0; } fpcd[h-1]=fpcdpart1_k(n, g, p, &h, pi, x, mu, sigma)+nest_of_fpcdpart2_k(n, g, p, &h, j, pi_group, id, id_group, alpha, x, mu, sigma); logLg[h-1] = log(fpcd[h-1]); (*loglik)= (*loglik) + logLg[h-1]; printf("%d %lf %lf \n", h, fpcd[h-1], logLg[h-1]); } /*printf("%lf \n", *loglik);/ /*iteration*/ iter = 0; while(flag>0 & iter<(*iteration)){ printf("%d \n",iter); /*E step for z*/ for(h=1;h<=(*n);h++){ for(i=0;i<(*p);i++){ id[i]=0; } nest_ez_k(n, g, p, &h, j, pi_group, id, id_group, uhat, pi,alpha, x, mu, sigma); } for (l=0;l0){ /*initialization*/ flag=1; (*loglik) = 0; for(h=1;h<=(*n);h++){ for(i=0;i<(*p);i++){ id[i]=0; } fpcd[h-1]=fpcdpart1_k(n, g, p, &h, pi, x, mu, sigma)+nest_of_fpcdpart2_k(n, g, p, &h, j, pi_group, id, id_group, alpha, x, mu, sigma); logLg[h-1] = log(fpcd[h-1]); (*loglik)= (*loglik) + logLg[h-1]; // printf("%d %lf %lf \n", h, fpcd[h-1], logLg[h-1]); } printf("%lf \n", *loglik); /*iteration*/ iter = 0; while(flag>0 & iter<(*iteration)){ printf("%d \n",iter); /*M step for pi*/ temppi_den=0; for(l=0;l0.0){ mu[jj*(*g)+ij]=0.0; } } if(constrain[jj*(*g)+ij]==0){ mu[jj*(*g)+ij] = 0.0; sigma[jj*(*g)+ij] = 1.0; } if(constrain[jj*(*g)+ij]==0+1){ if(mu[jj*(*g)+ij]<0.0){ mu[jj*(*g)+ij]=0.0; } } } } /*check convergence*/ loglik2 = (*loglik); (*loglik) = 0; for(h=1;h<=(*n);h++){ for(i=0;i<(*p);i++){ id[i]=0; } fpcd[h-1]=fpcdpart1_k(n, g, p, &h, pi, x, mu, sigma)+nest_of_fpcdpart2_k(n, g, p, &h, j, pi_group, id, id_group, alpha, x, mu, sigma); logLg[h-1] = log(fpcd[h-1]); (*loglik)= (*loglik) + logLg[h-1]; // printf("%d %lf %lf \n", h, fpcd[h-1], logLg[h-1]); } if(fabs((*loglik)-loglik2)<(*tol)){ flag = 0; } iter = iter + 1; } (*convergence) = 0; if(iter<(*iteration)){ (*convergence) = 1; } for (l=0;l<(*g);l++){ printf("%d %lf \n", l, pi[l]); } for (l=0;l<(*pi_group);l++){ printf("%d %lf \n", l, alpha[l]); } for (l=0;l<(*g)*(*p);l++){ printf("%d %lf %lf \n", l, mu[l],sigma[l]); } } /*no restriction on the null*/ else{ /*initialization*/ flag=1; (*loglik) = 0; for(h=1;h<=(*n);h++){ for(i=0;i<(*p);i++){ id[i]=0; } fpcd[h-1]=fpcdpart1_k(n, g, p, &h, pi, x, mu, sigma)+nest_of_fpcdpart2_k(n, g, p, &h, j, pi_group, id, id_group, alpha, x, mu, sigma); logLg[h-1] = log(fpcd[h-1]); (*loglik)= (*loglik) + logLg[h-1]; printf("%d %lf %lf \n", h, fpcd[h-1], logLg[h-1]); } /*printf("%lf \n", *loglik);/ /*iteration*/ iter = 0; while(flag>0 & iter<(*iteration)){ printf("%d \n",iter); /*M step for pi*/ temppi_den=0; for(l=0;l