#! /usr/bin/env python
## student_grade_inference_stan_ppc.py

import stan
import numpy as np
import matplotlib.pyplot as plt

# Simulate data
from student_grade_simulation import sample_student_grades
n, p = 30, 5
X, mu, sigma = sample_student_grades(n, p)
sm_data = {'n':n, 'p':p, 'tau':0.5, 'a':1, 'b':0.5, 'X':X}

# Initialise stan object
with open('student_grade_model_ppc.stan','r',newline='') as f:
    sm = stan.build(f.read(),sm_data,random_seed=1)

# Select the number of MCMC chains and iterations, then sample
chains, samples, burn = 4, 10000, 1000
fit=sm.sample(num_chains=chains, num_samples=samples, num_warmup=burn, save_warmup=False)

def T(x): #Variance of student average scores
    return(np.var(np.mean(x,axis=1)))

t_obs = T(X) #Value of test statistic for observed data
x_rep = fit['X_rep'].reshape(n,p,samples,chains)
t_rep = [[T(x_rep[:,:,i,j]) for i in range(samples)] for j in range(chains)]

# Plot posterior predictive distributions of T from each chain
def posterior_predictive_plots(t_rep,true_val):
    nc = np.matrix(t_rep).shape[0]
    fig,axs=plt.subplots(1,nc,figsize=(10,3),constrained_layout=True)
    fig.canvas.manager.set_window_title('Posterior predictive')
    for j in range(nc):
        axs[j].autoscale(enable=True, axis='x', tight=True)
        axs[j].set_title('Chain '+str(j+1))
        axs[j].hist(np.array(t_rep[j]),200, density=True)
        axs[j].axvline(true_val, color='c', lw=2, linestyle='--')
    plt.show()

posterior_predictive_plots(t_rep,t_obs)

# Calculate and print posterior predictive p-values for T
print("Posterior predictive p-values from variance of means:")
print([np.mean(t_rep[j] >= t_obs) for j in range(chains)])

# Print posterior predictive p-values for lhd calculated in Stan
print("Posterior predictive p-values from likelihood:")
print(np.mean(fit['ppp'].reshape(samples,chains),axis=0))