####################################### # Analysis of Study Data # ####################################### ################################################################# #Reading in data from a file #Read in the data from a file using the function importData cohort.data <- importData("h:\\M3S12\\DataFiles\\cohort.xls",type="EXCEL") #Here the data are being #read in from a file kept at h:\\M3S12\DataFiles\\cohort.xls called cohort.xls. #This function reads the data in from the file to a "data frame" called cohort.data #On the pull-down menus, select Data -> Select Data and then #on the dialog box, use the selection box Existing data to select #cohort.data, then click on OK. This brings up a data frame #datasheet display for inspection. #The data frame cohort.data contains two columns called #exposure (the exposure status) and diseased (the disease status) #0 means "NO" and 1 means "YES" ##################################################################### #To construct a 2x2 table summary: we will create a matrix with 2 rows and #2 columns to contain the cross-classification counts. #To count the number of exposed individuals, there are several #alternative methods table(cohort.data$exposure) table(cohort.data[,1]) length(cohort.data[cohort.data[,1]==0,1]) length(cohort.data[cohort.data[,1]==1,1]) #To construct the 2 x 2 table for the cohort data #use table as a cross-ckassifying function. table(cohort.data[,2],cohort.data[,1]) #You should get the following table # 0 1 #0 1718 2356 #1 282 644 #which means # - 1718 not exposed and not diseased, # - 282 not exposed and diseased # - 2356 exposed and not diseased # - 644 exposed and diseased #Now put these data into a 2x2 matrix given in lectures as; there are two ways x.cohort <- matrix(0,nrow=2,ncol=2) x.cohort[1,1] <- 644 x.cohort[2,1] <- 2356 x.cohort[1,2] <- 282 x.cohort[2,2] <- 1718 #or x.cohort <- matrix(c(644,2356,282,1718),nrow=2,byrow=F) #or x.cohort <- rev(table(cohort.data$diseased[cohort.data$exposure == 1])) x.cohort <- cbind(x.cohort,rev(table(cohort.data$diseased[cohort.data$exposure == 0]))) #or x.cohort <- rev(table(cohort.data$exposure[cohort.data$diseased == 1])) x.cohort <- rbind(x.cohort,rev(table(cohort.data$exposure[cohort.data$diseased == 0]))) ###################################################################### #Numerical studies #First some calculation functions #Function pi.est computes the estimate of Pi from a vector #x where x[1] = "Number of cases", x[2] = "Number of non-cases" pi.est<-function(x){x[1]/(x[1]+x[2])} #Function relative.risk computes the relative risk (or risk ratio) estimate pi1/pi0 #that is P(F|E)/P(F|E') relative.risk<-function(x){(x[1,1]/(x[1,1]+x[2,1]))/(x[1,2]/(x[1,2]+x[2,2]))} #Function odds.ratio.cases computes the odds on exposure in cases #that is P(E|F)/P(E'|F) odds.ratio.cases<-function(x){x[1,1]/x[1,2]} #Function odds.ratio.controls computes the odds on exposure in controls #that is P(E|F')/P(E'|F') odds.ratio.controls<-function(x){x[2,1]/x[2,2]} #Function odds.ratio computes the estimate #pi1/(1-pi1) #------------- #pi0/(1-pi0) odds.ratio<-function(x){(x[1,1]*x[2,2]/(x[1,2]*x[2,1]))} #Cohort data table from lectures/assessed work x.cohort<-cbind(c(101,46028-101),c(159,34594-159)) #Case control data table x.casecontrol<-cbind(c(101,554),c(159,446)) #Column 1 in the Cohort print(x.cohort[,1]) print(pi.est(x.cohort[,1])) #Column 2 in the Cohort print(x.cohort[,2]) print(pi.est(x.cohort[,2])) print(relative.risk(x.cohort)) print(odds.ratio.cases(x.cohort)) print(odds.ratio.controls(x.cohort)) print(odds.ratio(x.cohort)) print(odds.ratio(x.casecontrol))