#Sheet 1: Solutions to Exercises #1. Compute vector p[1],p[2],... where p[x]=Prob[X = x-1] p_dgeom(c(0:10),0.6) print(p) #First element of p is p[1]; this corresponds to Prob[X=0] #Hence print(paste("Pr[X=5] =",p[6],"if X ~ Geometric(0.6)")) #2. Use dpois(15,9) - first argument is x=15 , second argument is lambda=9 paste("Pr[X=15] =",dpois(15,9),"if X ~ Poisson(9)") #Note: use the "round" function to reduce the number of decimal places paste("Pr[X=15] =",round(dpois(15,9),6),"to 6 d.p. if X ~ Poisson(9)") #3. Use pbinom(12,20,0.6): pbinom does the cumulative prob. for x=12, n=20, theta = 0.6 paste("Pr[X <= 12] =",round(pbinom(12,20,0.6),6),"to 6 d.p. if X ~ Binomial(20,0.6)") #4. Use 1-ppois: ppois gives P[X <= x], you want P[X > x] for x=20 paste("Pr[X > 20] =",round(1-ppois(20,15),6),"to 6 d.p. if X ~ Poisson(15)") #5. Use pbinom evaluated at two different x values, x=45 and x=30 paste("Pr[30 < X <= 45] =",round(pbinom(45,100,0.35),6)-round(pbinom(30,100,0.35),6),"to 6 d.p. if X ~ Binomial(100,0.35)") #6. Use dpois for vector x=[0,1,2,...,20] x_c(0:20) y_dpois(x,8) plot(x,y) #or just plot(c(0:20),dpois(c(0:20),8)) #If you want to label the axes, use "xlab" and "ylab" #If you want a title, use "title" plot(c(0:20),dpois(c(0:20),8),xlab="x",ylab="Prob.") title("P.M.F. of Poisson(8)") #7. Use dgamma on a fine grid of points x_c(0:1000)/100 #x ranges from 0 to 10 in steps of 0.001 y_dgamma(x,5,2) plot(x,y) #to plot a line, use the option type = "l" plot(x,y,type="l",xlab="x",ylab="Prob. Density") #8. Use dnorm on a fine grid of points x_c(0:1000)/25-20 #x ranges from -20 to 20 in steps of 0.04 y_dnorm(x,-5,5) #NOTE: the final argument to dnorm is the standard deviation plot(x,y,type="l",xlab="x",ylab="Prob. Density") #9. Use pnorm x_6*c(0:1000)/1000-3 #x ranges from -3 to 3 in steps of 0.006 y_pnorm(x,0,1) plot(x,y,type="l",xlab="x",ylab="Cumulative Prob.") #10. Use rnorm to generate the data, hist to draw a histogram x_rnorm(5000,0,1) hist(x) axes() #To specify the bins, use breaks hist(x,breaks=8*c(0:50)/50-4) axes() #For Y=X^2 y_x^2 hist(y,breaks=c(0:50)/2.5) axes() ######################################################################### #To generate a more realistic sequence, you take in the data and use it #to estimate the probabilities of each individual base #For independent generation brca2_scan("c:\\OldE\\BioinformaticsMSc\\Data\\z74739.txt") #Display the nucleotide counts table(brca2) pvec_table(brca2)/length(brca2) #For a sequence of length 2000, use rep=T to indicate that the values are replaced #after each value in the sequence is sampled. new.brca2_sample(c(1:4),size=2000,prob=pvec,rep=T) #For dependent sampling, first use the method from p4 of sheet 1 to estimate the #Transition matrix brca2.mat_matrix(0,4,4) brca2.pmat_matrix(0,4,4) brca2.totals_table(brca2) for(i in 2:length(brca2)){ brca2.mat[brca2[i-1],brca2[i]]_brca2.mat[brca2[i-1],brca2[i]]+1 } for(i in 1:4){brca2.pmat[i,]_brca2.mat[i,]/sum(brca2.mat[i,])} #Then carefully do the simulation: we will store the value in vector new.brca2 #First, set the first element. new.brca2[1]_0 #Now loop through to element 2000 using the rows of brca2.pmat as the distribution #in the "prob" argument of the sample function for(i in 2:2000){ irow_new.brca2[i-1] pvec_brca2.pmat[irow,] new.brca2[i]_sample(c(1:4),size=1,prob=pvec) } ################################################################################