####################################### # Hypothesis Testing # ####################################### #This script is for investigation of the statistical techniques we have been studying # #The exercises below investigate # #1. One sample tests #2. Two sample tests #3. One-way and Two-way ANOVA #4. Tests for Binomial Samples #5. 2 x 2 Tables # #1.BEGIN------------------------------------------------------------------------------------ #1. One sample tests #Set the seed for the random number generation set.seed(102) #True Values mu_10 sigma_1 #Sample size n_6 #Simulate some data x_rnorm(n,mu,sigma) #Z-test statistic for a test of H0: mu = c0 (in a two-sided test) #Set c0 c0_10 #Calculate the test statistic z_(mean(x)-c0)/(sigma/sqrt(n)) print(z) #Set the significance level alpha_0.05 #Calculate the critical values CR1_qnorm(alpha/2,mean=0,sd=1) CR2_qnorm(1-alpha/2,mean=0,sd=1) print(c(CR1,CR2)) #Should be -1.96,+1.96 #Complete the test ! if( z < CR1 || z > CR2){ print(paste("Z = ",round(z,3)," - REJECT H0 !")) } else{ print(paste("Z = ",round(z,3)," - DO NOT REJECT H0 !")) } #Compute the p-value p_pnorm(-abs(z))*2 print(paste("P-value is",round(p,6))) #EXERCISES #(a) Repeat the above, changing the number in set.seed #(b) Change c0 #(c) Change n #Now do the same for the T-statistic set.seed(102) #True Values mu_10 sigma_1 #Sample size n_6 #Simulate some data x_rnorm(n,mu,sigma) #T-test statistic for a test of H0: mu = c0 (in a two-sided test) #Set c0 c0_10 #Calculate the test statistic #The sample standard deviation s s_sqrt(var(x)) #The test statistic t_(mean(x)-c0)/(s/sqrt(n)) print(t) #Set the significance level alpha_0.05 #Calculate the critical values CR1_qt(alpha/2,df=n-1) CR2_qt(1-alpha/2,df=n-1) print(c(CR1,CR2)) #Complete the test ! if( t < CR1 || t > CR2){ print(paste("T = ",round(t,3)," - REJECT H0 !")) } else{ print(paste("T = ",round(t,3)," - DO NOT REJECT H0 !")) } #EXERCISES #(a) Repeat the above, changing the number in set.seed #(b) Change c0 #(c) Change n #Compute the p-value p_pt(-abs(t),df=n-1)*2 print(paste("P-value is",round(p,6))) #The Special SPLUS functions for one sample t-testing #t.test(x, y=NULL, alternative="two.sided", mu=0, paired=F, # var.equal=T, conf.level=.95) t.test(x,mu=c0,alternative="two.sided",conf.level=1-alpha) #1.END------------------------------------------------------------------------------ #2.BEGIN------------------------------------------------------------------------------------ #1. Two sample tests #Set the seed for the random number generation set.seed(102) #True Values mu1_10 sigma1_1 mu2_15 sigma2_1 #Sample sizes n1_8 n2_10 #Simulate some data x1_rnorm(n1,mu1,sigma1) x2_rnorm(n2,mu2,sigma2) #Z-test statistic for a test of H0: mu1 = mu2 (in a two-sided test) #Calculate the test statistic z_(mean(x1)-mean(x2))/(sigma1/sqrt(n1)+sigma2/sqrt(n2)) print(z) #Set the significance level alpha_0.05 #Calculate the critical values CR1_qnorm(alpha/2,mean=0,sd=1) CR2_qnorm(1-alpha/2,mean=0,sd=1) print(c(CR1,CR2)) #Should be -1.96,+1.96 #Complete the test ! if( z < CR1 || z > CR2){ print(paste("Z = ",round(z,3)," - REJECT H0 !")) } else{ print(paste("Z = ",round(z,3)," - DO NOT REJECT H0 !")) } #Compute the p-value p_pnorm(-abs(z))*2 print(paste("P-value is",round(p,6))) #EXERCISES #(a) Repeat the above, changing the number in set.seed #(b) Change mu1 or mu2 #(c) Change n1 or n2 #Now do the same for the T-statistic set.seed(102) #True Values mu1_10 sigma1_1 mu2_15 sigma2_1 #Sample sizes n1_8 n2_10 #Simulate some data x1_rnorm(n1,mu1,sigma1) x2_rnorm(n2,mu2,sigma2) #T-test statistic for a test of H0: mu1 = mu2 (in a two-sided test) #Calculate the test statistic #The pooled sample standard deviation s s_sqrt(((n1-1)*var(x1)+(n2-1)*var(x2))/(n1+n2-2)) #The test statistic t_(mean(x1)-mean(x2))/(s*(1/sqrt(n1)+1/sqrt(n2))) print(t) #Set the significance level alpha_0.05 #Calculate the critical values CR1_qt(alpha/2,df=n1+n2-2) CR2_qt(1-alpha/2,df=n1+n2-2) print(c(CR1,CR2)) #Complete the test ! if( t < CR1 || t > CR2){ print(paste("T = ",round(t,3)," - REJECT H0 !")) } else{ print(paste("T = ",round(t,3)," - DO NOT REJECT H0 !")) } #Compute the p-value p_pt(-abs(t),df=n1+n2-2)*2 print(paste("P-value is",round(p,6))) #EXERCISES #(a) Repeat the above, changing the number in set.seed #(b) Change mu1,mu2 #(c) Change n1,n2 #The Special SPLUS functions for one sample t-testing t.test(x1,x2,alternative="two.sided",var.equal=T,conf.level=1-alpha) #2.END------------------------------------------------------------------------------ #3.BEGIN------------------------------------------------------------------------------------ #ANOVA #Set the seed for the random number generation set.seed(102) #ONE-WAY ANOVA : True values #Number of Columns K_5 #Column means mu_c(10,10,10,10,20) sigma_1 #Sample sizes n_c(6,7,10,5,3) #Simulate some data into a matrix column_numeric() y_numeric() for(i in 1:K){ column_c(column,rep(i,n[i])) y_c(y,rnorm(n[i],mu[i],sigma)) } #Make a "data frame" test.data_as.data.frame(cbind(column,y)) #Do the ANOVA: output the ANOVA table #The Special SPLUS functions for doing ANOVA is "aov" summary(aov(y ~factor(column),data=test.data)) #EXERCISES #(a) Repeat the above, changing the number in set.seed #(b) Change the mu vector #(c) Change the ns #------------------------------------------------------------------------------------ #TWO-WAY ANOVA #Set the seed for the random number generation set.seed(102) #TWO-WAY ANOVA : True values #Number of Columns K_5 L_3 #Column means mu_c(10,10,10,10,20) delta_c(5,5,5) sigma_1 #Sample sizes n_6 #Simulate some data into a matrix column_numeric() row_numeric() y_numeric() for(i in 1:K){ for(j in 1:L){ column_c(column,rep(i,n)) row_c(row,rep(j,n)) y_c(y,rnorm(n,mu[i]+delta[j],sigma)) } } #Make a "data frame" test.data_as.data.frame(cbind(column,row,y)) #Do the ANOVA: output the ANOVA table #The Special SPLUS functions for doing ANOVA is "aov" summary(aov(y ~factor(column)+factor(row),data=test.data)) #EXERCISES #(a) Repeat the above, changing the number in set.seed #(b) Change the mu and delta vectors #(c) Change the ns #3.END------------------------------------------------------------------------------ #4.BEGIN------------------------------------------------------------------------------------ #BINOMIAL SAMPLES #Set the seed for the random number generation set.seed(102) #Sample size n_20 #Parameter theta_0.6 #Simulate some data into a matrix x_rbinom(1,n,theta) #Test of H0: theta = p0 #Set p0 p0_0.5 #Set the significance level alpha_0.05 #Calculate the critical values CR1_qbinom(alpha/2,n,p0) CR2_qbinom(1-alpha/2,n,p0) print(c(CR1,CR2)) #Complete the test ! if( x < CR1 || x > CR2){ print(paste("X = ",round(x,3)," - REJECT H0 !")) } else{ print(paste("X = ",round(x,3)," - DO NOT REJECT H0 !")) } #Compute the p-value if(x < n*p0){ p_pbinom(x,n,p0) } else{ p_1-pbinom(x,n,p0) } print(paste("P-value is",round(p,6))) #EXERCISES #(a) Repeat the above, changing the number in set.seed #(b) Change the n and theta values #(c) Change p0 #The special function #binom.test(x, n, p=0.5, alternative="two.sided") binom.test(x,n,p=p0, alternative="less") #4.END------------------------------------------------------------------------------ #5.BEGIN------------------------------------------------------------------------------------ #2 X 2 tables #Set the seed for the random number generation set.seed(102) #Sample size n_200 #Number of Rows, Columns n.rows_2 n.columns_2 #Simulate some data into a two classification vectors x_sample(c(1:n.rows),size=n,rep=T) y_sample(c(1:n.columns),size=n,rep=T) table(x,y) obs.table_table(x,y) #Test of H0: independence between row/column classifications #Set the significance level alpha_0.05 #Degrees of freedom d.f_(n.rows-1)*(n.columns-1) print(d.f) #Compute the fitted ("expected") values fitted.table_matrix(nrow=n.rows,ncol=n.columns) for(i in 1:n.rows){ for(j in 1:n.columns){ fitted.table[i,j]_sum(obs.table[i,])*sum(obs.table[,j])/sum(obs.table) } } print(fitted.table) #Compute the test statistic q q_sum((obs.table-fitted.table)^2/fitted.table) print(q) #Calculate the one sided critical value CR_qchisq(1-alpha,df=d.f) print(CR) #Complete the test ! if( q > CR){ print(paste("Q = ",round(q,3)," - REJECT H0 !")) } else{ print(paste("Q = ",round(q,3)," - DO NOT REJECT H0 !")) } #Compute the p-value p_1-pchisq(q,df=d.f) print(paste("P-value is",round(p,6))) #EXERCISES #(a) Repeat the above, changing the number in set.seed #(b) Change the n-value #The special function #chisq.test(x, y=NULL, correct=T) chisq.test(obs.table) #Fisher's Exact test #fisher.test(x, y=NULL, node.stack.dim=1001, value.stack.dim=10000, hybrid=F) fisher.test(x,y) #5.END------------------------------------------------------------------------------