#! /usr/bin/env python ## student_grade_inference_stan_ppc.py import stan import numpy as np import matplotlib.pyplot as plt # Simulate data from student_grade_simulation import sample_student_grades n, p = 30, 5 X, mu, sigma = sample_student_grades(n, p) sm_data = {'n':n, 'p':p, 'tau':0.5, 'a':1, 'b':0.5, 'X':X} # Initialise stan object with open('student_grade_model_ppc.stan','r',newline='') as f: sm = stan.build(f.read(),sm_data,random_seed=1) # Select the number of MCMC chains and iterations, then sample chains, samples, burn = 4, 10000, 1000 fit=sm.sample(num_chains=chains, num_samples=samples, num_warmup=burn, save_warmup=False) def T(x): #Variance of student average scores return(np.var(np.mean(x,axis=1))) t_obs = T(X) #Value of test statistic for observed data x_rep = fit['X_rep'].reshape(n,p,samples,chains) t_rep = [[T(x_rep[:,:,i,j]) for i in range(samples)] for j in range(chains)] # Plot posterior predictive distributions of T from each chain def posterior_predictive_plots(t_rep,true_val): nc = np.matrix(t_rep).shape[0] fig,axs=plt.subplots(1,nc,figsize=(10,3),constrained_layout=True) fig.canvas.manager.set_window_title('Posterior predictive') for j in range(nc): axs[j].autoscale(enable=True, axis='x', tight=True) axs[j].set_title('Chain '+str(j+1)) axs[j].hist(np.array(t_rep[j]),200, density=True) axs[j].axvline(true_val, color='c', lw=2, linestyle='--') plt.show() posterior_predictive_plots(t_rep,t_obs) # Calculate and print posterior predictive p-values for T print("Posterior predictive p-values from variance of means:") print([np.mean(t_rep[j] >= t_obs) for j in range(chains)]) # Print posterior predictive p-values for lhd calculated in Stan print("Posterior predictive p-values from likelihood:") print(np.mean(fit['ppp'].reshape(samples,chains),axis=0))