#######################################
#      Analysis of Study Data         #
#######################################


#################################################################
#Reading in data from a file

#Read in the data from a file using the function importData
cohort.data <- importData("h:\\M3S12\\DataFiles\\cohort.xls",type="EXCEL")

#Here the data are being 
#read in from a file kept at h:\\M3S12\DataFiles\\cohort.xls called cohort.xls.
#This function reads the data in from the file to a "data frame" called cohort.data

#On the pull-down menus, select Data -> Select Data and then
#on the dialog box, use the selection box Existing data to select
#cohort.data, then click on OK.  This brings up a data frame
#datasheet display for inspection.

#The data frame cohort.data contains two columns called 
#exposure (the exposure status) and diseased (the disease status)
#0 means "NO" and 1 means "YES"

#####################################################################

#To construct a 2x2 table summary:  we will create a matrix with 2 rows and
#2 columns to contain the cross-classification counts.

#To count the number of exposed individuals, there are several
#alternative methods

table(cohort.data$exposure)

table(cohort.data[,1])

length(cohort.data[cohort.data[,1]==0,1])
length(cohort.data[cohort.data[,1]==1,1])

#To construct the 2 x 2 table for the cohort data
#use table as a cross-ckassifying function.

table(cohort.data[,2],cohort.data[,1])

#You should get the following table
#     0    1 
#0 1718 2356
#1  282  644

#which means
# - 1718 not exposed and not diseased,
# - 282 not exposed and diseased
# - 2356 exposed and not diseased
# - 644 exposed and diseased 

#Now put these data into a 2x2 matrix given in lectures as; there are two ways

x.cohort <- matrix(0,nrow=2,ncol=2)
x.cohort[1,1] <- 644
x.cohort[2,1] <- 2356
x.cohort[1,2] <- 282
x.cohort[2,2] <- 1718

#or

x.cohort <- matrix(c(644,2356,282,1718),nrow=2,byrow=F)

#or

x.cohort <- rev(table(cohort.data$diseased[cohort.data$exposure == 1]))
x.cohort <- cbind(x.cohort,rev(table(cohort.data$diseased[cohort.data$exposure == 0])))

#or

x.cohort <- rev(table(cohort.data$exposure[cohort.data$diseased == 1]))
x.cohort <- rbind(x.cohort,rev(table(cohort.data$exposure[cohort.data$diseased == 0])))


######################################################################
#Numerical studies

#First some calculation functions
#Function pi.est computes the estimate of Pi from a vector 
#x where x[1] = "Number of cases", x[2] = "Number of non-cases"

pi.est<-function(x){x[1]/(x[1]+x[2])}

#Function relative.risk computes the relative risk (or risk ratio) estimate pi1/pi0
#that is P(F|E)/P(F|E') 
relative.risk<-function(x){(x[1,1]/(x[1,1]+x[2,1]))/(x[1,2]/(x[1,2]+x[2,2]))}

#Function odds.ratio.cases computes the odds on exposure in cases
#that is P(E|F)/P(E'|F) 
odds.ratio.cases<-function(x){x[1,1]/x[1,2]}

#Function odds.ratio.controls computes the odds on exposure in controls
#that is P(E|F')/P(E'|F') 
odds.ratio.controls<-function(x){x[2,1]/x[2,2]}

#Function odds.ratio computes the estimate 
#pi1/(1-pi1)
#-------------
#pi0/(1-pi0)
odds.ratio<-function(x){(x[1,1]*x[2,2]/(x[1,2]*x[2,1]))}

#Cohort data table from lectures/assessed work
x.cohort<-cbind(c(101,46028-101),c(159,34594-159))

#Case control data table
x.casecontrol<-cbind(c(101,554),c(159,446))

#Column 1 in the Cohort
print(x.cohort[,1])
print(pi.est(x.cohort[,1]))

#Column 2 in the Cohort
print(x.cohort[,2])
print(pi.est(x.cohort[,2]))

print(relative.risk(x.cohort))
print(odds.ratio.cases(x.cohort))
print(odds.ratio.controls(x.cohort))

print(odds.ratio(x.cohort))
print(odds.ratio(x.casecontrol))