#Sheet 1: Solutions to Exercises
#1. Compute vector p[1],p[2],... where p[x]=Prob[X = x-1]
p_dgeom(c(0:10),0.6)
print(p)
#First element of p is p[1]; this corresponds to Prob[X=0]
#Hence
print(paste("Pr[X=5] =",p[6],"if X ~ Geometric(0.6)"))

#2. Use dpois(15,9) - first argument is x=15 , second argument is lambda=9
paste("Pr[X=15] =",dpois(15,9),"if X ~ Poisson(9)")

#Note: use the "round" function to reduce the number of decimal places
paste("Pr[X=15] =",round(dpois(15,9),6),"to 6 d.p. if X ~ Poisson(9)")

#3. Use pbinom(12,20,0.6):  pbinom does the cumulative prob. for x=12, n=20, theta = 0.6
paste("Pr[X <= 12] =",round(pbinom(12,20,0.6),6),"to 6 d.p. if X ~ Binomial(20,0.6)")

#4. Use 1-ppois: ppois gives P[X <= x], you want P[X > x] for x=20 
paste("Pr[X > 20] =",round(1-ppois(20,15),6),"to 6 d.p. if X ~ Poisson(15)")

#5. Use pbinom evaluated at two different x values, x=45 and x=30
paste("Pr[30 < X <= 45] =",round(pbinom(45,100,0.35),6)-round(pbinom(30,100,0.35),6),"to 6 d.p. if X ~ Binomial(100,0.35)")

#6. Use dpois for vector x=[0,1,2,...,20]
x_c(0:20)
y_dpois(x,8)
plot(x,y)

#or just
plot(c(0:20),dpois(c(0:20),8))

#If you want to label the axes, use "xlab" and "ylab"
#If you want a title, use "title"
plot(c(0:20),dpois(c(0:20),8),xlab="x",ylab="Prob.")
title("P.M.F. of Poisson(8)")


#7. Use dgamma on a fine grid of points
x_c(0:1000)/100
#x ranges from 0 to 10 in steps of 0.001
y_dgamma(x,5,2)
plot(x,y)

#to plot a line, use the option type = "l" 
plot(x,y,type="l",xlab="x",ylab="Prob. Density")

#8. Use dnorm on a fine grid of points
x_c(0:1000)/25-20
#x ranges from -20 to 20 in steps of 0.04
y_dnorm(x,-5,5)
#NOTE: the final argument to dnorm is the standard deviation
plot(x,y,type="l",xlab="x",ylab="Prob. Density")

#9.  Use pnorm
x_6*c(0:1000)/1000-3
#x ranges from -3 to 3 in steps of 0.006
y_pnorm(x,0,1)
plot(x,y,type="l",xlab="x",ylab="Cumulative Prob.")

#10. Use rnorm to generate the data, hist to draw a histogram
x_rnorm(5000,0,1)
hist(x)
axes()
#To specify the bins, use breaks
hist(x,breaks=8*c(0:50)/50-4)
axes()

#For Y=X^2
y_x^2
hist(y,breaks=c(0:50)/2.5)
axes()

#########################################################################

#To generate a more realistic sequence, you take in the data and use it 
#to estimate the probabilities of each individual base

#For independent generation

brca2_scan("c:\\OldE\\BioinformaticsMSc\\Data\\z74739.txt")
#Display the nucleotide counts
table(brca2)
pvec_table(brca2)/length(brca2)

#For a sequence of length 2000, use rep=T to indicate that the values are replaced
#after each value in the sequence is sampled.
new.brca2_sample(c(1:4),size=2000,prob=pvec,rep=T)

#For dependent sampling, first use the method from p4 of sheet 1 to estimate the
#Transition matrix

brca2.mat_matrix(0,4,4)
brca2.pmat_matrix(0,4,4)
brca2.totals_table(brca2)
for(i in 2:length(brca2)){
	brca2.mat[brca2[i-1],brca2[i]]_brca2.mat[brca2[i-1],brca2[i]]+1
}
for(i in 1:4){brca2.pmat[i,]_brca2.mat[i,]/sum(brca2.mat[i,])}
	
	
#Then carefully do the simulation: we will store the value in vector new.brca2
#First, set the first element.
new.brca2[1]_0
#Now loop through to element 2000 using the rows of brca2.pmat as the distribution
#in the "prob" argument of the sample function

for(i in 2:2000){
	irow_new.brca2[i-1]
	pvec_brca2.pmat[irow,]
	new.brca2[i]_sample(c(1:4),size=1,prob=pvec)
}
################################################################################