cname<-"c:\\Temp\\" root<-cname red.fmat<-numeric() red.bmat<-numeric() gre.fmat<-numeric() gre.bmat<-numeric() fname1<-paste(root,"ReplicatesDataHE16.xls",sep="") f1.dat<-importData(fname1,type="EXCEL",startRow=1,colNameRow=1) f1.colnames<-names(f1.dat) row.ids<-c(1:ncol(f1.dat)) cy5.colf<-row.ids[f1.colnames == "F635.Median"] cy5.colb<-row.ids[f1.colnames == "B635.Median"] cy3.colf<-row.ids[f1.colnames == "F532.Median"] cy3.colb<-row.ids[f1.colnames == "B532.Median"] flagrow<-c(1:nrow(f1.dat)) flagrow<-flagrow[f1.dat$Flags != 0] #table(f1.dat$Flags) #length(flagrow) #f1.dat[flagrow,c(cy5.colf,cy5.colb,cy3.colf,cy3.colb)]<-NA red.f<-f1.dat[,cy5.colf] red.b<-f1.dat[,cy5.colb] gre.f<-f1.dat[,cy3.colf] gre.b<-f1.dat[,cy3.colb] par(pty="s") qqplot(log2(gre.f),log2(red.f),pch=".",xlim=range(5,15),ylim=range(5,15)) abline(0,1) qqplot(log2(gre.b),log2(red.b),pch=".",xlim=range(6,12),ylim=range(6,12)) abline(0,1) par(pty="m") blocklist<-f1.dat$Block+(f1.dat$Half-1)*48+(f1.dat$Rep-1)*96 # # Note: Here the "F" measure is log2(F/B)=log2(F)-log2(B) # # par(mfrow=c(2,2)) boxplot(split(log2(red.f/red.b),blocklist),cex=0.5,ylim=range(-1,9),xlab="BLOCK") title("RED F") boxplot(split(log2(gre.f/gre.b),blocklist),cex=0.5,ylim=range(-1,9),xlab="BLOCK") title("GRE F") boxplot(split(log2(red.b),blocklist),cex=0.5,ylim=range(6,9),xlab="BLOCK") title("RED B") boxplot(split(log2(gre.b),blocklist),cex=0.5,ylim=range(6,9),xlab="BLOCK") title("GRE B") par(mfrow=c(1,1)) boxplot(split(log2(red.f/gre.f)-log2(red.b/gre.b),blocklist),cex=0.5,ylim=range(-2,2),xlab="BLOCK") title("log(RED F/GRE F)-log(RED B/GRE B)") ############################################################################### #Quantile Normalization for Each Measure separately nq<-24 pvec<-c(1:nq)/(nq+1) index<-c(1:nrow(f1.dat)) blocks<-as.numeric(names(table(blocklist))) nblocks<-length(blocks) tvec<-rep(0,nrow(f1.dat)) tmat<-matrix(0,nrow=nrow(f1.dat),ncol=4) qmat<-matrix(0,nrow=nq,ncol=nblocks) tstr1<-names(f1.dat[,7:10]) tstr<-tstr1 tstr[1]<-paste(tstr1[1],"/",tstr1[2]," (log scale) ",sep="") tstr[2]<-paste(tstr1[2]," (log scale) ",sep="") tstr[3]<-paste(tstr1[3],"/",tstr1[4]," (log scale) ",sep="") tstr[4]<-paste(tstr1[4]," (log scale) ",sep="") f1.tmp<-cbind(log2(red.f)-log2(red.b),log2(red.b),log2(gre.f)-log2(gre.b),log2(gre.b)) hist(log2(red.f)-log2(red.b),nclass=100) range(log2(red.f)-log2(red.b),na.rm=T) par(mfrow=c(2,2)) for(icolumn in 1:4){ if(icolumn == 1) icol<-1 if(icolumn == 2) icol<-3 if(icolumn == 3) icol<-2 if(icolumn == 4) icol<-4 x<-f1.tmp[,icol] xnorm<-x*0 for(iblock in 1:nblocks){ ids<-index[blocklist==iblock] tx<-x[ids] qmat[,iblock]<-quantile(tx,prob=pvec,na.rm=T) #print(c(icol,iblock,length(tx))) } qmean<-apply(qmat,1,mean) n<-ncol(qmat) for(j in 1:nblocks){ ids<-index[blocklist==j] tx<-x[ids] txnorm<-tx #print(c(icol,j,1)) txnorm[tx < qmat[1,j] & !is.na(tx) ] <- tx[tx < qmat[1,j] & !is.na(tx)] - (qmat[1,j]-qmean[1]) for(iq in 2:length(qmean)){ #print(c(icol,j,iq)) lower<-qmean[iq-1] upper<-qmean[iq] txnorm[tx >= qmat[iq-1,j] & tx < qmat[iq,j] & !is.na(tx)]<-lower+(upper-lower)*(tx[tx >= qmat[iq-1,j] & tx < qmat[iq,j] & !is.na(tx)]-qmat[iq-1,j])/(qmat[iq,j]-qmat[iq-1,j]) } txnorm[tx >= qmat[length(qmean),j] & !is.na(tx)] <- tx[tx >= qmat[length(qmean),j] & !is.na(tx)]-(qmat[length(qmean),j]-qmean[length(qmean)]) xnorm[ids]<-txnorm } yr<-range(0,9) if(icolumn > 2) yr<-range(6.5,8) boxplot(split(as.vector(xnorm),blocklist),ylim=yr) title(tstr[icol]) tmat[,icol]<-xnorm } par(mfrow=c(1,1)) trans.mat<-tmat boxplot(split(0.5*(trans.mat[,1]+trans.mat[,3]),blocklist),ylim=range(-1,9)) title("A (Unnormalized)") boxplot(split(trans.mat[,1]-trans.mat[,3],blocklist),ylim=range(-2.5,2.5)) title("M (Unnormalized)") ############################################################################### #Quantile Normalizing R to G nq<-24 pvec<-c(1:nq)/(nq+1) index<-c(1:nrow(trans.mat)) blocks<-as.numeric(names(table(blocklist))) nblocks<-length(blocks) tvec<-rep(0,nrow(trans.mat)) tgr.mat<-matrix(0,nrow=nrow(trans.mat),ncol=4) qmat<-matrix(0,nrow=nq,ncol=nblocks) f1.tmp<-trans.mat xr<-f1.tmp[,1] xg<-f1.tmp[,3] xrnorm<-xr*0 xgnorm<-xg*0 qmeang<-quantile(xg,prob=pvec,na.rm=T) qmeanr<-quantile(xr,prob=pvec,na.rm=T) n<-length(qmeang) txg<-xg txgnorm<-txg txr<-xr txrnorm<-txr txgnorm[txg < qmeang[1] & !is.na(txg) ] <- txg[txg < qmeang[1] & !is.na(txg)] - (qmeang[1]-qmeang[1]) txrnorm[txr < qmeanr[1] & !is.na(txr) ] <- txr[txr < qmeanr[1] & !is.na(txr)] - (qmeanr[1]-qmeang[1]) for(iq in 2:length(qmeang)){ lower<-qmeang[iq-1] upper<-qmeang[iq] txgnorm[txg >= qmeang[iq-1] & txg < qmeang[iq] & !is.na(txg)]<-lower+(upper-lower)*(txg[txg >= qmeang[iq-1] & txg < qmeang[iq] & !is.na(txg)]-qmeang[iq-1])/(qmeang[iq]-qmeang[iq-1]) txrnorm[txr >= qmeanr[iq-1] & txr < qmeanr[iq] & !is.na(txr)]<-lower+(upper-lower)*(txr[txr >= qmeanr[iq-1] & txr < qmeanr[iq] & !is.na(txr)]-qmeanr[iq-1])/(qmeanr[iq]-qmeanr[iq-1]) } txgnorm[txg >= qmeang[n] & !is.na(txg)] <- txg[txg >= qmeang[n] & !is.na(txg)]-(qmeang[n]-qmeang[n]) txrnorm[txr >= qmeanr[n] & !is.na(txr)] <- txr[txr >= qmeanr[n] & !is.na(txr)]-(qmeanr[n]-qmeang[n]) tgr.mat<-cbind(txrnorm,trans.mat[,2],txgnorm,trans.mat[,4]) par(mfrow=c(2,2)) yr<-range(0,9) if(icolumn > 2) yr<-range(6.5,8) boxplot(split(as.vector(txrnorm),blocklist),ylim=yr) title(tstr[1]) boxplot(split(as.vector(txgnorm),blocklist),ylim=yr) title(tstr[3]) am.mat<-cbind(0.5*(txgnorm+txrnorm),(txrnorm-txgnorm)) boxplot(split(am.mat[,1],blocklist),ylim=range(-1,9)) title("A (G-normalized)") boxplot(split(am.mat[,2],blocklist),ylim=range(-2.5,2.5)) title("M (G-normalized)") par(mfrow=c(1,1)) plot(am.mat) abline(h=0) title("M vs A (G-normalized only)") ############################################################################### #Quantile Normalization for AM nq<-24 pvec<-c(1:nq)/(nq+1) index<-c(1:nrow(f1.dat)) tvec<-rep(0,nrow(f1.dat)) amat<-matrix(0,nrow=nrow(f1.dat),ncol=2) qmat<-matrix(0,nrow=nq,ncol=nblocks) for(icol in 1:2){ x<-am.mat[,icol] xnorm<-x*0 for(iblock in 1:nblocks){ ids<-index[blocklist==iblock] tx<-x[ids] qmat[,iblock]<-quantile(tx,prob=pvec,na.rm=T) #print(c(icol,iblock,length(tx))) } qmean<-apply(qmat,1,mean) n<-ncol(qmat) for(j in 1:nblocks){ ids<-index[blocklist==j] tx<-x[ids] txnorm<-tx #print(c(icol,j,1)) txnorm[tx < qmat[1,j] & !is.na(tx)] <- tx[tx < qmat[1,j] & !is.na(tx)] - (qmat[1,j]-qmean[1]) for(iq in 2:length(qmean)){ #print(c(icol,j,iq)) lower<-qmean[iq-1] upper<-qmean[iq] txnorm[tx >= qmat[iq-1,j] & tx < qmat[iq,j] & !is.na(tx)]<-lower+(upper-lower)*(tx[tx >= qmat[iq-1,j] & tx < qmat[iq,j] & !is.na(tx)]-qmat[iq-1,j])/(qmat[iq,j]-qmat[iq-1,j]) } #print(c(icol,iblock,nq+1)) txnorm[tx >= qmat[length(qmean),j] & !is.na(tx)] <- tx[tx >= qmat[length(qmean),j] & !is.na(tx)]-(qmat[length(qmean),j]-qmean[length(qmean)]) xnorm[ids]<-txnorm } yr<-range(0,15) tstr<-"A (G then Quantile Normalized)" if(icol == 2){ tstr<-"M (G then Quantile Normalized)" yr<-range(-2.5,2.5) } boxplot(split(as.vector(xnorm),blocklist),ylim=yr) title(tstr) amat[,icol]<-xnorm } am.mat<-amat ############################################################################### #Final Quantile Normalization for M by A plot(am.mat) abline(h=0) title("M vs A (with no correction)") nq<-24 pvec<-c(1:nq)/(nq+1) index<-c(1:nrow(f1.dat)) tvec<-rep(0,nrow(f1.dat)) nxblocks<-50 qmat<-matrix(0,nrow=nq,ncol=nxblocks) xpvec<-c(1:nxblocks)/nxblocks xa<-am.mat[,1] xbr<-quantile(xa,prob=xpvec,na.rm=T) xbr<-min(xa,na.rm=T)+(max(xa,na.rm=T)-min(xa,na.rm=T))*c(0:nxblocks)/nxblocks xblocklist<-as.numeric(cut(xa,breaks=xbr,include.lowest=T)) x<-am.mat[,2] bid<-as.numeric(names(table(xblocklist))) #for(i in 1:length(bid)){ # tx<-x[xblocklist == i] # hist(tx) #} bcount<-as.numeric(table(xblocklist)) boxplot(split(x,xblocklist)) abline(h=0) xnorm<-x*0 for(iblock in 1:nxblocks){ ids<-index[xblocklist==iblock & !is.na(xblocklist)] tx<-x[ids] print(length(tx)) if(length(tx) > nq){ qmat[,iblock]<-quantile(tx,prob=pvec,na.rm=T) } else{ qmat[,iblock]<-rep(NA,nq) } } qmean<-apply(qmat,1,function(x){return(sum(x[!is.na(x)]*bcount[!is.na(x)])/sum(bcount[!is.na(x)]))}) n<-ncol(qmat) for(j in 1:nxblocks){ ids<-index[xblocklist==j & !is.na(xblocklist)] tx<-x[ids] txnorm<-tx txnorm[tx < qmat[1,j] & !is.na(tx)] <- tx[tx < qmat[1,j] & !is.na(tx)] - (qmat[1,j]-qmean[1]) for(iq in 2:length(qmean)){ lower<-qmean[iq-1] upper<-qmean[iq] txnorm[tx >= qmat[iq-1,j] & tx < qmat[iq,j] & !is.na(tx)]<-lower+(upper-lower)*(tx[tx >= qmat[iq-1,j] & tx < qmat[iq,j] & !is.na(tx)]-qmat[iq-1,j])/(qmat[iq,j]-qmat[iq-1,j]) } txnorm[tx >= qmat[length(qmean),j] & !is.na(tx)] <- tx[tx >= qmat[length(qmean),j] & !is.na(tx)]-(qmat[length(qmean),j]-qmean[length(qmean)]) xnorm[ids]<-txnorm } #plot(am.mat[,1],xnorm) #abline(h=0) #title("M vs A (No correction)") am.mat[,2]<-xnorm tstr<-"M (G, Quantile, A Normalized)" yr<-range(-2.5,2.5) boxplot(split(xnorm,xblocklist),ylim=yr) title(tstr) abline(h=0) plot(am.mat) abline(h=0) abline(v=xbr) abline(h=median(am.mat[,2],na.rm=T),lty=2) title("M vs A (with GQA-correction)") ################################################################################################## #t-testing mrep.mat<-matrix(am.mat[,2],ncol=4,byrow=T) mrep.names<-matrix(f1.dat$ID,ncol=4,byrow=T) rep.means<-apply(mrep.mat,1,function(x){return(mean(x,na.rm=T))}) rep.sd<-apply(mrep.mat,1,function(x){return(sqrt(var(x,na.method="omit")))}) length(rep.means[!is.na(rep.means)]) #length(rep.means) length(rep.sd[!is.na(rep.sd)]) rep.tstats<-rep.means/rep.sd #sort(rep.tstats) hist(rep.tstats,breaks=c(-5000,-1000,-5,-4,-3,-2,-1,0,1,2,3,4,5,1000,5000),plot=F) alpha<-0.05 naive.critval<-qt(1-alpha/2,3) bonferroni.critval<-qt(1-alpha/(nrow(mrep.mat)*2),3) length(rep.tstats[abs(rep.tstats) > naive.critval & !is.na(rep.tstats)]) length(rep.tstats[abs(rep.tstats) > bonferroni.critval & !is.na(rep.tstats)])