## bayesian_histogram.py

import numpy as np
from scipy.special import gammaln
from collections import Counter

def log_prior(m,theta=.01):
    return(np.log(theta)+(m-1)*np.log(1-theta))

def log_likelihood(x,m,a=0.0,b=1.0,alpha=10):
    c = Counter([int(x_i*m/(b-a)) for x_i in x-a]) #bin counts n_j
    ml = len(x)*np.log(m/(b-a)) - len(c)*gammaln(alpha/m)
    ml += sum([gammaln(alpha/m + c[i]) for i in c])
    return(ml,c)

def density(t,a,b,m,c,n,alpha):
    width = (b-a)/m
    ind = min(int((t-a)/(b-a)*m),m-1)
    return((alpha*width + c[ind])/(alpha*(b-a) + n)/width)

def model_average(x,max_m=100,grid=100,a=0.0,b=1.0,alpha=10):
    n = len(x)
    max_post = -float('inf')
    sum_probs = 0
    ave_density = np.zeros(grid)
    probs = np.zeros(max_m+1)
    grd = a+(b-a)*np.arange(grid)/(grid-1.0)
    for m in range(1,max_m+1):
        log_lhd,ctr = log_likelihood(x,m,a,b,alpha)
        log_post= log_lhd + log_prior(m)
        if log_post > max_post:
            renormalise = np.exp(max_post-log_post)
            sum_probs *= renormalise
            probs[:m] *= renormalise
            ave_density *= renormalise
            max_post = log_post
            max_m = m
            max_m_ctr = ctr
        probs[m] = np.exp(log_post-max_post)
        sum_probs += probs[m]
        densities = [density(g_i,a,b,m,ctr,n,alpha) for g_i in grd]
        ave_density += probs[m] * np.array(densities)

    ave_density /= sum_probs;
    return(ave_density, max_m, max_m_ctr, probs/sum_probs)
