#######################################
#         Hypothesis Testing          #
#######################################



#This script is for investigation of the statistical techniques we have been studying
#
#The exercises below investigate
#
#1. One sample tests
#2. Two sample tests
#3. One-way and Two-way ANOVA
#4. Tests for Binomial Samples
#5. 2 x 2 Tables
#

#1.BEGIN------------------------------------------------------------------------------------
#1.  One sample tests
#Set the seed for the random number generation
set.seed(102)

#True Values
mu_10
sigma_1

#Sample size
n_6

#Simulate some data
x_rnorm(n,mu,sigma)

#Z-test statistic for a test of H0: mu = c0 (in a two-sided test)
#Set c0

c0_10

#Calculate the test statistic
z_(mean(x)-c0)/(sigma/sqrt(n))
print(z)

#Set the significance level
alpha_0.05

#Calculate the critical values
CR1_qnorm(alpha/2,mean=0,sd=1)
CR2_qnorm(1-alpha/2,mean=0,sd=1)
print(c(CR1,CR2))  #Should be -1.96,+1.96


#Complete the test !
if( z < CR1 || z > CR2){
	print(paste("Z = ",round(z,3)," - REJECT H0 !"))
}
else{
	print(paste("Z = ",round(z,3)," - DO NOT REJECT H0 !"))
}

#Compute the p-value

p_pnorm(-abs(z))*2
print(paste("P-value is",round(p,6)))

#EXERCISES
#(a) Repeat the above, changing the number in set.seed
#(b) Change c0
#(c) Change n

 
#Now do the same for the T-statistic

set.seed(102)

#True Values
mu_10
sigma_1

#Sample size
n_6

#Simulate some data
x_rnorm(n,mu,sigma)

#T-test statistic for a test of H0: mu = c0 (in a two-sided test)
#Set c0

c0_10

#Calculate the test statistic
#The sample standard deviation s
s_sqrt(var(x))

#The test statistic
t_(mean(x)-c0)/(s/sqrt(n))
print(t)

#Set the significance level
alpha_0.05

#Calculate the critical values
CR1_qt(alpha/2,df=n-1)
CR2_qt(1-alpha/2,df=n-1)
print(c(CR1,CR2)) 


#Complete the test !
if( t < CR1 || t > CR2){
	print(paste("T = ",round(t,3)," - REJECT H0 !"))
}
else{
	print(paste("T = ",round(t,3)," - DO NOT REJECT H0 !"))
}



#EXERCISES
#(a) Repeat the above, changing the number in set.seed
#(b) Change c0
#(c) Change n

#Compute the p-value

p_pt(-abs(t),df=n-1)*2
print(paste("P-value is",round(p,6)))



#The Special SPLUS functions for one sample t-testing

#t.test(x, y=NULL, alternative="two.sided", mu=0, paired=F,
#       var.equal=T, conf.level=.95)

t.test(x,mu=c0,alternative="two.sided",conf.level=1-alpha)




#1.END------------------------------------------------------------------------------

#2.BEGIN------------------------------------------------------------------------------------
#1.  Two sample tests
#Set the seed for the random number generation
set.seed(102)

#True Values
mu1_10
sigma1_1

mu2_15
sigma2_1

#Sample sizes
n1_8
n2_10

#Simulate some data
x1_rnorm(n1,mu1,sigma1)
x2_rnorm(n2,mu2,sigma2)

#Z-test statistic for a test of H0: mu1 = mu2 (in a two-sided test)

#Calculate the test statistic
z_(mean(x1)-mean(x2))/(sigma1/sqrt(n1)+sigma2/sqrt(n2))
print(z)

#Set the significance level
alpha_0.05

#Calculate the critical values
CR1_qnorm(alpha/2,mean=0,sd=1)
CR2_qnorm(1-alpha/2,mean=0,sd=1)
print(c(CR1,CR2))  #Should be -1.96,+1.96


#Complete the test !
if( z < CR1 || z > CR2){
	print(paste("Z = ",round(z,3)," - REJECT H0 !"))
}
else{
	print(paste("Z = ",round(z,3)," - DO NOT REJECT H0 !"))
}

#Compute the p-value

p_pnorm(-abs(z))*2
print(paste("P-value is",round(p,6)))

#EXERCISES
#(a) Repeat the above, changing the number in set.seed
#(b) Change mu1 or mu2
#(c) Change n1 or n2

 
#Now do the same for the T-statistic

set.seed(102)

#True Values
mu1_10
sigma1_1

mu2_15
sigma2_1

#Sample sizes
n1_8
n2_10

#Simulate some data
x1_rnorm(n1,mu1,sigma1)
x2_rnorm(n2,mu2,sigma2)

#T-test statistic for a test of H0: mu1 = mu2 (in a two-sided test)

#Calculate the test statistic
#The pooled sample standard deviation s
s_sqrt(((n1-1)*var(x1)+(n2-1)*var(x2))/(n1+n2-2))

#The test statistic
t_(mean(x1)-mean(x2))/(s*(1/sqrt(n1)+1/sqrt(n2)))
print(t)

#Set the significance level
alpha_0.05

#Calculate the critical values
CR1_qt(alpha/2,df=n1+n2-2)
CR2_qt(1-alpha/2,df=n1+n2-2)
print(c(CR1,CR2)) 


#Complete the test !
if( t < CR1 || t > CR2){
	print(paste("T = ",round(t,3)," - REJECT H0 !"))
}
else{
	print(paste("T = ",round(t,3)," - DO NOT REJECT H0 !"))
}

#Compute the p-value

p_pt(-abs(t),df=n1+n2-2)*2
print(paste("P-value is",round(p,6)))


#EXERCISES
#(a) Repeat the above, changing the number in set.seed
#(b) Change mu1,mu2
#(c) Change n1,n2


#The Special SPLUS functions for one sample t-testing

t.test(x1,x2,alternative="two.sided",var.equal=T,conf.level=1-alpha)


#2.END------------------------------------------------------------------------------

#3.BEGIN------------------------------------------------------------------------------------
#ANOVA
#Set the seed for the random number generation
set.seed(102)

#ONE-WAY ANOVA : True values 

#Number of Columns
K_5

#Column means
mu_c(10,10,10,10,20)

sigma_1

#Sample sizes
n_c(6,7,10,5,3)

#Simulate some data into a matrix
column_numeric()
y_numeric()
for(i in 1:K){
	column_c(column,rep(i,n[i]))
	y_c(y,rnorm(n[i],mu[i],sigma))	
}

#Make a "data frame"
test.data_as.data.frame(cbind(column,y))

#Do the ANOVA: output the ANOVA table
#The Special SPLUS functions for doing ANOVA is "aov"

summary(aov(y ~factor(column),data=test.data))



#EXERCISES
#(a) Repeat the above, changing the number in set.seed
#(b) Change the mu vector
#(c) Change the ns

#------------------------------------------------------------------------------------
#TWO-WAY ANOVA
#Set the seed for the random number generation
set.seed(102)

#TWO-WAY ANOVA : True values 

#Number of Columns
K_5
L_3

#Column means
mu_c(10,10,10,10,20)
delta_c(5,5,5)

sigma_1

#Sample sizes
n_6

#Simulate some data into a matrix
column_numeric()
row_numeric()
y_numeric()
for(i in 1:K){
	for(j in 1:L){
		column_c(column,rep(i,n))
		row_c(row,rep(j,n))
		y_c(y,rnorm(n,mu[i]+delta[j],sigma))	
	}
}

#Make a "data frame"
test.data_as.data.frame(cbind(column,row,y))

#Do the ANOVA: output the ANOVA table
#The Special SPLUS functions for doing ANOVA is "aov"

summary(aov(y ~factor(column)+factor(row),data=test.data))



#EXERCISES
#(a) Repeat the above, changing the number in set.seed
#(b) Change the mu and delta vectors
#(c) Change the ns




#3.END------------------------------------------------------------------------------

#4.BEGIN------------------------------------------------------------------------------------
#BINOMIAL SAMPLES
#Set the seed for the random number generation
set.seed(102)

#Sample size
n_20

#Parameter
theta_0.6

#Simulate some data into a matrix
x_rbinom(1,n,theta)

#Test of H0: theta = p0
#Set p0

p0_0.5

#Set the significance level
alpha_0.05

#Calculate the critical values
CR1_qbinom(alpha/2,n,p0)
CR2_qbinom(1-alpha/2,n,p0)
print(c(CR1,CR2))  

#Complete the test !
if( x < CR1 || x > CR2){
	print(paste("X = ",round(x,3)," - REJECT H0 !"))
}
else{
	print(paste("X = ",round(x,3)," - DO NOT REJECT H0 !"))
}

#Compute the p-value
if(x < n*p0){
	p_pbinom(x,n,p0)
}
else{
	p_1-pbinom(x,n,p0)

}
print(paste("P-value is",round(p,6)))




#EXERCISES
#(a) Repeat the above, changing the number in set.seed
#(b) Change the n and theta values
#(c) Change p0

#The special function
#binom.test(x, n, p=0.5, alternative="two.sided")

binom.test(x,n,p=p0, alternative="less") 


#4.END------------------------------------------------------------------------------

#5.BEGIN------------------------------------------------------------------------------------
#2 X 2 tables
#Set the seed for the random number generation
set.seed(102)

#Sample size
n_200

#Number of Rows, Columns
n.rows_2
n.columns_2

#Simulate some data into a two classification vectors
x_sample(c(1:n.rows),size=n,rep=T)
y_sample(c(1:n.columns),size=n,rep=T)
table(x,y)

obs.table_table(x,y)


#Test of H0: independence between row/column classifications

#Set the significance level
alpha_0.05

#Degrees of freedom
d.f_(n.rows-1)*(n.columns-1)
print(d.f)

#Compute the fitted ("expected") values

fitted.table_matrix(nrow=n.rows,ncol=n.columns)
for(i in 1:n.rows){
	for(j in 1:n.columns){
		fitted.table[i,j]_sum(obs.table[i,])*sum(obs.table[,j])/sum(obs.table)
	}
}
print(fitted.table)

#Compute the test statistic q

q_sum((obs.table-fitted.table)^2/fitted.table)
print(q)

#Calculate the one sided critical value
CR_qchisq(1-alpha,df=d.f)
print(CR)  

#Complete the test !
if( q > CR){
	print(paste("Q = ",round(q,3)," - REJECT H0 !"))
}
else{
	print(paste("Q = ",round(q,3)," - DO NOT REJECT H0 !"))
}

#Compute the p-value
p_1-pchisq(q,df=d.f)
print(paste("P-value is",round(p,6)))

#EXERCISES
#(a) Repeat the above, changing the number in set.seed
#(b) Change the n-value

#The special function
#chisq.test(x, y=NULL, correct=T) 

chisq.test(obs.table)

#Fisher's Exact test
#fisher.test(x, y=NULL, node.stack.dim=1001, value.stack.dim=10000, hybrid=F) 

fisher.test(x,y)





#5.END------------------------------------------------------------------------------