The objective of this article to build the Monte Carlo Markov Chain algorithms of Gibbs and Metropolis-Hastings by hand only using simple mathematical modules.
#load packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy
from scipy.stats import gamma
from scipy.stats import norm
from random import choices
import math
import datetime
#matplotlib plot in jupyter
%matplotlib inline
Univariate Gaussian distribution $$ f(x) = \dfrac{1}{\sqrt{2 \pi \sigma^2}} \exp\left( - \dfrac{(x-\mu)^2}{2 \sigma^2} \right) $$ we assume the value of $\sigma^2=1$ then $$ f(x) = \dfrac{1}{\sqrt{2 \pi }} \exp\left( - \dfrac{(x-\mu)^2}{2 } \right) $$ Jeffrey's prior for the mean is $$ \pi(\mu ) \propto \dfrac{1}{\sigma}$$
the posterior is $$ \pi(\mu|\sigma^2, Y) \propto \exp\left( \dfrac{1}{2} \dfrac{( \mu - \bar{x} )^2}{1/n} \right) $$ hence the posterior is a Gaussian$\left( \bar{x},\frac{1}{n} \right)$ distribution.
Calculations for the above statement: $$ \pi(\mu|\sigma^2, Y) \propto \dfrac{1}{ (2 \pi )^{n/2} } \exp\left( -\dfrac{1}{2} \sum_{i=1}^{n} (x_i-\mu)^2 \right) \dfrac{1}{\sigma} \propto \exp\left( -\dfrac{1}{2} \sum_{i=1}^{n} (x_i^2- 2 \mu x_i + \mu^2 ) \right) = $$ $$ \exp\left( -\dfrac{1}{2} ( \sum_{i=1}^{n} x_i^2- 2 \mu \sum_{i=1}^{n} x_i + n \mu^2 ) \right) = \exp\left( -\dfrac{1}{2} \sum_{i=1}^{n} x_i^2 + \dfrac{1}{2} 2 \mu n \bar{x} -\dfrac{1}{2} n \mu^2 \right) = $$ $$ \exp\left( -\dfrac{1}{2} \sum_{i=1}^{n} x_i^2 + \mu n \bar{x} -\dfrac{1}{2} n \mu^2 \right) \propto \exp\left( \mu n \bar{x} -\dfrac{1}{2} n \mu^2 \right) \propto $$ let us complete the square inside the exponential $$ \propto \exp\left( -\dfrac{1}{2} n \bar{x}^2 + \dfrac{1}{2} n 2 \mu \bar{x} -\dfrac{1}{2} n \mu^2 \right) = \exp\left( -n\dfrac{1}{2} ( \bar{x}^2 - 2 \mu \bar{x} + \mu^2 ) \right) = \exp\left( \dfrac{1}{2} \dfrac{( \mu - \bar{x} )^2}{1/n} \right) $$
## generate data ###############################################
"""
consider a problem where we want to estimate the mean of a Gaussian
distribution
DGP: mean=2; variance=1; n=10 number of observations
"""
np.random.seed(seed=123)
rv1 = np.random.normal(loc=2, scale=1, size=10)#rv1 is our random sample.
rv3 = pd.DataFrame(rv1) #change format to a data frame
rv3.shape
rv3.head()
"""
for this problem we assume that we know the variance, variance =1
the objective is to estimate the mean, for this we'll use
the Ordinary Monte Carlo method
"""
m1 = np.mean(rv1)
m1
Note that the mean of the real distribution is $\mu=2$
## Ordinary Monte Carlo #######################################
#create a simulation of size 1000
np.random.seed(seed=321)
rv2 = np.random.normal(loc=m1, scale=1, size=1000) #mean=m1
fig = plt.figure()
ax = fig.add_subplot(111)
ax.hist(rv2, bins=100)
plt.show()
#estimation for the mean
np.mean(rv2)
#estimation for credible interval
np.quantile(rv2,[.05,.95])
The graph above is the histogram of simulation from the conditional posterior distribution of $\mu$.
Suppose that the true distribution is a Gamma$(3,3)$ but we don't know that and we don't know how to find this density.
We'll use a Gaussian distribution to find the posterior.
##Sampling Importance Resampling (SIR) #####################################
#suppose that we have a posterior with a Gamma(3,3) format, but we dont know.
#use gaussian in the sampling step
#loc1, scale1 are from the Gaussian
loc1=0
scale1=500 #scale is standard deviation
norm.mean(loc=loc1, scale=scale1)
norm.var(loc=loc1, scale=scale1)
norm.std(loc=loc1, scale=scale1)
np.random.seed(seed=123)
rv1 = np.random.normal(loc=loc1, scale=scale1, size=10000)
rv1[0:10]
np.mean(rv1)
np.std(rv1)
np.var(rv1)
np.max(rv1)
np.min(rv1)
b1, b2 = -5,5
x = np.linspace(b1,b2,100)
y = norm.pdf(x=x, loc=loc1, scale=scale1)
fig, ax = plt.subplots(1, 1) #it creates another plot
ax.plot(x, y,'b-', lw=2, alpha=0.6, label='Gaussian pdf')
#Gamma(3,3)
a = 3
b = 3
scale2=1/b
b1, b2 = gamma.ppf(q=[0.01,0.99], a=a, scale=scale2)
x = np.linspace(b1,b2, 100)
y = gamma.pdf(x=x, a=a, scale=scale2)
#fig, ax = plt.subplots(1, 1) #it creates another plot
ax.plot(x, y,'r-', lw=2, alpha=0.6, label='gamma pdf')
We can see from the figure that the Gaussian distribution is very flat due to the Gaussian be spread in a large area. The ideal situation is that on the step of first sampling the support of sampling is close to the posterior's majority support (places inside 1% and 99%). But we don't know the posterior format nor majority support. Idea: We could make in this an iterative process where we try to find a close enough area where is the majority support.
#Normal(0,100), what are the .01 and .99 percentiles?
norm.ppf(q=[.01,.99],loc=loc1, scale=scale1)
#p1 probabilities of Gaussian random sample
p1 = norm.pdf(x=rv1,loc=loc1, scale=scale1)
#g1 probabilities of random sample from true distribution Gamma(3,3)
g1 = gamma.pdf(x=rv1, a=a, scale=scale2)
#"w" weights for the resampling step
w=g1/p1
len1 = len(w)
fig, ax = plt.subplots(1, 1)
ax.plot( w,'r-', lw=2, alpha=0.6, label='graph of weights "w"')
#peaks show where the weights were high
#show proportion of samples that got high weight
y1=np.sort(w)
y2 = y1[-200:-1]
fig, ax = plt.subplots(1, 1)
ax.plot(y2,'r-', lw=2, alpha=0.6, label='')
#number of points that have more than 0.0001 probability of being drawn
#from a total of 10000
np.sum(w>0.0001)
#make data frame with weights and value from first draw rv1
df1 = pd.DataFrame({"sample":rv1,
"weights":w})
df1.head()
#keep only lines that have weights higher than 0.0001
#rvc1 random vector clean 1
rvc1=rv1[w>0.0001]
len(rvc1)
#wc1 weights clean 1
wc1 = w[w>0.0001]
#sampling with reposition and with weights
dd1 = choices(population=rvc1, #values to be drawn
weights=wc1, #weights of drawn
k=1000) #number of draws
#plot histogram of estimated posterior
fig = plt.figure()
ax = fig.add_subplot(111)
ax.hist(dd1, bins=100,color='b',density=True)
ax.grid(True)
plt.show()
#plot histogram of estimated posterior (SIR)
#and true posterior
fig = plt.figure()
ax = fig.add_subplot(111)
ax.hist(dd1, bins=100,color='b',density=True)
ax.plot(x, y,'r-', lw=3, alpha=1, label='Gaussian pdf')
ax.grid(True)
plt.show()
Compare the moments generated from the SIR method and the true moments.
True mean is 1.
True variance is 0.33
#SIR posterior mean, it should be close to 1
np.mean(dd1)
#SIR posterior variance, it should be close to .33
np.var(dd1)
Idea: a random variable have a Gaussian distribution. Mean is also Gaussian. Variance have a inverted Gamma so that finding the posterior is easier. Make a Gibbs sampler from this.\
Let $x$ have a Gaussian distribution. Prior of $\mu$ is Gaussian, $ \mu \sim N(0, 10^6)$ note that the variance is uninformative. Prior for variance is $ \sigma^2 \sim IG(2.001,10)$ then the prior distribution of $ \sigma^2 $ is with mean 10 and variance 99800.3.
Gaussian distribution $$ f(x)= \frac{1}{\sigma \sqrt{2 \pi}} \exp \left( -\frac{(x-\mu)^2}{2 \sigma^2} \right) $$
Inverse Gamma distribution $$ f(x) = \frac{\beta^\alpha}{\Gamma(\alpha) } x^{-(\alpha+1)} \exp \left( -\frac{\beta}{x} \right) $$
Likelihood: $$ L(\mu, \sigma| X) = \prod_{i=1}^{n} \left[ \frac{1}{\sigma \sqrt{2 \pi}} \exp \left( -\frac{(x_i-\mu)^2}{2 \sigma^2} \right) \right] $$
$$ \Longrightarrow L(\mu, \sigma| X) = \frac{1}{( \sigma \sqrt{2 \pi} )^n } \exp \left( -\frac{ 1}{2 \sigma^2} \sum_i (x_i-\mu)^2 \right) $$Posterior
\begin{equation*} \begin{split} \pi(\mu, \sigma^2| X) \propto & \left[ \frac{1}{( \sigma \sqrt{2 \pi} )^n } \exp \left( -\frac{ 1}{2 \sigma^2} \sum_i (x_i-\mu)^2 \right) \right] \\ & \left[ \frac{1}{10^{3} \sqrt{2 \pi}} \exp \left( -\frac{\mu^2}{2 \cdot 10^6} \right) \right]\\ & \left[ \frac{10^{2.001}}{\Gamma(2.001) } (\sigma^2)^{-(2.001+1)} \exp \left( -\frac{10}{\sigma^2} \right) \right] \end{split} \end{equation*}Take out everything that is a constant
\begin{equation*} \begin{split} \Longrightarrow \pi(\mu, \sigma^2| X) \propto & \left[ \frac{1}{\sigma^n } \exp \left( -\frac{ 1}{2 \sigma^2} \sum_i (x_i-\mu)^2 \right) \right] \\ & \left[\exp \left( -\frac{\mu^2}{2 \cdot 10^6} \right) \right]\\ & \left[ (\sigma^2)^{-(3.001)} \exp \left( -\frac{10}{\sigma^2} \right) \right] \end{split} \end{equation*}\begin{equation*} \begin{split} \Longrightarrow \pi(\mu, \sigma^2| X) \propto & \frac{1}{\sigma^n } \exp \left( -\frac{ 1}{2 \sigma^2} \sum_i (x_i-\mu)^2 \right) \exp \left( -\frac{\mu^2}{2 \cdot 10^6} \right)\\ & (\sigma^2)^{-(3.001)} \exp \left( -\frac{10}{\sigma^2} \right) \end{split} \end{equation*}let's work with $ \sum_i (x_i-\mu)^2 $ $$ \sum_i (x_i^2- 2 x_i \mu + \mu^2) = \sum_i x_i^2 - 2 \mu \sum_i x_i + n \mu^2 $$ hence
\begin{equation*} \begin{split} \Longrightarrow \pi(\mu, \sigma^2| X) \propto & \quad \sigma^{-6.002-n} \exp \left( -\frac{ 1}{2 \sigma^2} (\sum_i x_i^2 - 2 \mu \sum_i x_i + n \mu^2 ) -\frac{\mu^2}{2 \cdot 10^6} -\frac{10}{\sigma^2} \right) \end{split} \end{equation*}condition on $\sigma^2$ eliminate all constants $$ \begin{split} \Longrightarrow \pi(\mu | \sigma^2, X) \propto & \quad \exp \left( -\frac{ 1}{2 \sigma^2} \left( - 2 \mu \sum_i x_i + n \mu^2 \right) -\frac{\mu^2}{2 \cdot 10^6} \right) \end{split} $$
$$ \begin{split} \Longrightarrow \pi(\mu | \sigma^2, X) \propto & \quad \exp \left( \frac{ \mu \sum_i x_i}{ \sigma^2} -\frac{ n \mu^2}{2 \sigma^2} -\frac{\mu^2}{2 \cdot 10^6} \right) \end{split} $$$$ \begin{split} \Longrightarrow \pi(\mu | \sigma^2, X) \propto & \quad \exp \left( \frac{ \mu \sum_i x_i}{ \sigma^2} -\frac{ n \mu^2 10^6 }{2 \sigma^2 10^6} -\frac{\mu^2 \sigma^2 }{2 \sigma^2 10^6} \right) \end{split} $$$$ \begin{split} \Longrightarrow \pi(\mu | \sigma^2, X) \propto & \quad \exp \left( \frac{ \mu \sum_i x_i}{ \sigma^2} -\frac{ ( n 10^6 + \sigma^2 )\mu^2 }{2 \sigma^2 10^6} \right) \end{split} $$for simplicity set $a = \frac{ ( n 10^6 + \sigma^2 )}{2 \sigma^2 10^6} $ and $ b = \frac{ \sum_i x_i}{ \sigma^2} $
then we have $$\Longrightarrow \pi(\mu | \sigma^2, X) \propto \quad \exp \left( \mu b - a \mu^2 \right)$$ $$\Longrightarrow \pi(\mu | \sigma^2, X) \propto \quad \exp \left( -(-\mu b + a \mu^2) \right)$$ then we can sum $c$ (any constant) as $$ \Longrightarrow \pi(\mu | \sigma^2, X) \propto \quad \exp \left( -(c -\mu b + a \mu^2)\right) $$ let's analyze $ c -\mu b + a \mu^2 $, for a perfect square we have $$ (\sqrt{a} \mu - \sqrt{c} )^2 = a \mu^2 - 2\sqrt{a} \sqrt{c} \mu + c = c - \mu b + a \mu^2 $$ hence $ b = 2\sqrt{a} \sqrt{c} $, then $$ \frac{ \sum_i x_i}{ \sigma^2} = 2\sqrt{\frac{ ( n 10^6 + \sigma^2 )}{2 \sigma^2 10^6}} \sqrt{c} $$ find $c$ $$ \Longrightarrow \left( \frac{ \sum_i x_i}{ \sigma^2} \right)^2 = 4 \left( \frac{ ( n 10^6 + \sigma^2 )}{2 \sigma^2 10^6} \right) c $$ $$ \Longrightarrow \left( \frac{ \sum_i x_i}{ \sigma^2} \right)^2 = \left( \frac{ 2 ( n 10^6 + \sigma^2 )}{\sigma^2 10^6} \right) c $$ $$ \Longrightarrow \frac{( \sum_i x_i)^2}{ \sigma^4} \left( \frac{\sigma^2 10^6}{ 2 ( n 10^6 + \sigma^2 )} \right) = c $$ $$ \Longrightarrow c = \frac{( \sum_i x_i)^2 10^6 }{ \sigma^2 2 ( n 10^6 + \sigma^2 )} $$ hence we know that $$ \Longrightarrow \pi(\mu | \sigma^2, X) \propto \quad \exp \left( -(c -\mu b + a \mu^2)\right) $$ $$ \Longrightarrow \pi(\mu | \sigma^2, X) \propto \quad \exp \left( -(\sqrt{a} \mu - \sqrt{c})^2 \right) $$
let use analyze $ -(\sqrt{a} \mu - \sqrt{c})^2 = $
$$ - \left( \frac{\sqrt{a}}{\sqrt{a}} \right)^2 (\sqrt{a} \mu - \sqrt{c})^2 = $$$$- \left( \frac{\sqrt{a}}{\sqrt{a}} (\sqrt{a} \mu - \sqrt{c}) \right)^2 = $$$$- \left( \sqrt{a} \left( \mu - \frac{\sqrt{c}}{\sqrt{a}} \right) \right)^2 = - a \left( \mu - \frac{\sqrt{c}}{\sqrt{a}} \right)^2 $$then
$$ \Longrightarrow \pi(\mu | \sigma^2, X) \propto \quad \exp \left( -(\sqrt{a} \mu - \sqrt{c})^2 \right) $$$$ = \exp \left( - a \left( \mu - \frac{\sqrt{c}}{\sqrt{a}} \right)^2 \right) $$This is a Gaussian$ \left(\frac{\sqrt{c}}{\sqrt{a}}, \frac{1}{2 a} \right)$.
Hence
$$ \pi(\mu | \sigma^2, X) \sim N\left(\frac{\sqrt{c}}{\sqrt{a}}, \frac{1}{2 a} \right) $$Where $a = \frac{ ( n 10^6 + \sigma^2 )}{2 \sigma^2 10^6} $ and $ c = \frac{( \sum_i x_i)^2 10^6 }{ \sigma^2 2 ( n 10^6 + \sigma^2 )} $\
Let's condition on $\mu$, and find the conditional posterior for $\sigma^2$: $$ \Longrightarrow \pi(\mu, \sigma^2| X) \propto \quad \sigma^{-6.002-n} \exp \left( -\frac{ 1}{2 \sigma^2} \sum_i (x_i-\mu)^2 -\frac{\mu^2}{2 \cdot 10^6} -\frac{10}{\sigma^2} \right) $$ $$ \Longrightarrow \pi(\sigma^2|\mu, X) \propto \quad \sigma^{-6.002-n} \exp \left( -\frac{ 1}{2 \sigma^2} \sum_i (x_i-\mu)^2 -\frac{20}{2 \sigma^2} \right) $$ $$ \Longrightarrow \pi(\sigma^2|\mu, X) \propto \quad \sigma^{-6.002-n} \exp \left(\frac{ -\sum_i (x_i-\mu)^2 -20 }{2 \sigma^2} \right) $$
$$ \Longrightarrow \pi(\sigma^2|\mu, X) \propto \quad \sigma^ {2 \frac{ -6.002-n}{2}} \exp \left(\frac{ -\sum_i (x_i-\mu)^2 -20 }{2 \sigma^2} \right) $$inverted gamma distribution
let's define $ -( \alpha + 1) = \frac{ -6.002-n}{2} $ and $- \beta = \frac{ -\sum_i (x_i-\mu)^2 -20 }{2} $, hence
$ \alpha = \frac{6.002+n}{2} -1 $ and
$ \beta = \frac{ \sum_i (x_i-\mu)^2 +20 }{2}$
Hence the conditional posterior for $\sigma^2$ is $$ \pi(\sigma^2|\mu, X) \sim \text{IG} \left( \frac{6.002+n}{2} -1, \frac{ \sum_i (x_i-\mu)^2 +20 }{2} \right) $$
"""
Idea: a random variable have a Gaussian distribution. Mean is also Gaussian.
Variance have a inverted Gamma so that finding the posterior is easier.
Make a Gibbs sampler from this.
"""
#Build true process, Gaussian(4,2)
#mean=4
#var=2
np.random.seed(seed=123)
#rs1 is our random sample 1
rs1 = np.random.normal(loc=4, scale=2, size=10000)
#scale is standard deviation
# building loop for chains in Gibbs sampler #
#generate initial random values
chainSize = 10000 #size of chain
#crate empty lists
mu=np.full(chainSize, np.nan)
sigma_sq=np.full(chainSize, np.nan)
mu[0] = 1
sigma_sq[0] = 1
ii=1
#generate the chain
for ii in list(range(1, chainSize)):
#Monte Carlo Step
#--steps for conditional mu
#assign values for a1 and c1
a1 = (chainSize*10**6+sigma_sq[ii-1])/(2*sigma_sq[ii-1]*10**6)
c1 = ((np.sum(rs1))**2*10**6)/(sigma_sq[ii-1]*2*(chainSize*10**6+sigma_sq[ii-1]))
m_gauss = (c1/a1)**.5
v_gauss = 1/2*a1
mu[ii] = np.random.normal(loc=m_gauss, scale=v_gauss**.5,size=1)
#--steps for conditional sigma_sq
m_ig = (6.002+chainSize)/(2)
v_ig = (np.sum((rs1-mu[ii-1])**2)+20)/(2)
sigma_sq[ii] = scipy.stats.invgamma.rvs(a=m_ig,loc=0, scale=v_ig, size=1)
#chain graph of mu
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(mu,'r-', lw=2, alpha=0.6)
plt.show()
#chain graph of sigma_sq
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(sigma_sq,'b-', lw=2, alpha=0.6)
plt.show()
#transform np.array into series
mu_pd = pd.Series(mu)
sigma_sq_pd = pd.Series(sigma_sq)
#statistics of mu
mu_pd.describe()
np.median(mu_pd)
#the mean is close to the expected
# it should be around 4
#statistics of sigma_sq
sigma_sq_pd.describe()
np.median(sigma_sq_pd)
#the mean is way off
#it should be around 4
#maybe it is because the priori for the variance is far from the true
#mu's histogram
fig = plt.figure()
ax = fig.add_subplot(111)
ax.grid(True)
ax.hist(mu, bins=100,color='b',density=True)
plt.show()
#sigma_sq's histogram
fig = plt.figure()
ax = fig.add_subplot(111)
ax.grid(True)
ax.hist(sigma_sq, bins=100,color='r',density=True)
plt.show()
In this example we use example 7.3 of the book \textit{Uma Introducao aos Metodos Bayesianos Aplicados a Analise de Dados} - Achcar et al. The problem starts with a new software that has being developed. And we need to test how many bugs the software have in a given period of time. The tests occur in fixed time periods and then we count how many bugs occurred in a given period of time.
The following list show the values that we found for period 1 to 25: $$ [27,16,11,10,11,7,2,5,3,1,4,7,2,5,5,6,0,5,1,12,1,2,1,1] $$ those are the number of bugs in each time period.
We assume that this process follows a Poisson distribution: $$ f(m_i) = \frac{e^{- \lambda_i} \lambda_i^{m_i}}{m_i !} $$ where $ \lambda_i = \lambda_a k_1^i $. $ 0 < k_i < 1 $ and $ \lambda_a > 0 $.
Set $n=25$ then the likelihood function is: $$ L \propto \lambda_a^{d_1} k_1^{d_2} \exp \left( \lambda_a \sum_{i=1}^{n} k_1^{i} \right) $$ where $ d_1 = \sum m_i $ and $ d_2 = \sum i \cdot m_i $.
We assume prior independence and $$ \lambda_a \sim \text{Gamma}(b_1,b_2) $$ $$ k_1 \sim \text{Beta}(e_1,e_2) $$ Where $b_1=16; b_2=0.8; e_1=2.5; e_2=0.6$ are given.
The posterior distribution is $$ \pi( \lambda_a, k_1 | m ) \propto \lambda_a^{d_1+b_1+1} \exp \left[ - \left(b_2 + \sum_{i=1}^{n} k_1^i \right) \lambda_a \right] k_1^{d_2+e_1-1}(1-k_1)^{e_2-1} $$
The marginal distributions are: $$\lambda_a | k_1, m \sim \text{Gamma}\left( d_1+b_1, b_2+\sum k_1^i \right)$$ $$ \pi(k_1| \lambda_a, m) \propto k_1^{d_2+e_1-1}(1-k_1)^{e_2-1} \exp \left( - \lambda_a \sum_{i=1}^{n} k_1^i \right) $$ the second marginal posterior must be estimated using Metropolis-Hastings.
We can simplify the second expression: $$ \pi(k_1| \lambda_a, m) \propto k_1^{d_2+e_1-1}(1-k_1)^{e_2-1} \exp \psi(\lambda_a, k_1) $$ where $$ \psi(\lambda_a, k_1) = \exp \left( d_2 \ln (k_1) - \lambda_a \sum_{i=1}^{n} k_1^i \right) $$
In the Metropolis-Hastings algorithm we use a Beta$(e_1,e_2)$ to simulate $k_1^{(t)}$. Where $t$ is the index of the MCMC chain.
We decide if the new value, $k_1^{(t)}$, is accepted or not using the function:
$$ p^{(t)} \min \left( 1, \frac{\psi(\lambda_a^{t}, k_1a^{t})}{\psi(\lambda_aa^{t}, k_1a^{t-1})} \right) $$with probability $ p^{(t)}$ we accept $k_1^{(t)}$, if not we keep the old value $k_1^{(t-1)}$.
The MLE are $ \lambda_a = 18.88 $ and $k_1 = 0.89 $
#metrpolis hastings| create functions#
#define variables and sample from the problem
mm = 25 #sample size, true sample, software bugs
#vec3 is in the right order of bugs in software
vec3 = np.array([27,16,11,10,11,7,2,5,3,1,4,7,2,5,5,6,0,5,1,12,1,2,1,1])
d1 = np.sum(vec3)
vec4 = vec3*np.array(range(1, mm))
d2 = np.sum(vec4)
b1 = 16 #all the following hyperparameters
b2 = 0.8 #are given by the example's text
e1 = 2.5
e2 = 0.6
#define a function that calculate sum of geometric progression
def SumGeoProg(k1,n):
SGP = k1*(1-k1**n)/(1-k1) #sum of geometric progression
return SGP
#a test
SumGeoProg(k1=.5,n=3)
#it's working, it should be 0.875
#function psi for metroplist hastings step
k_oneUnit1 = .999
k_oneUnit2 = .888
lambda0Unit= 1000
def Psi_ratio(k_oneUnit1, k_oneUnit2,lambda0Unit):
#there is a problem because the exponent have eg -50000 then exp(-5000)=0
#solution: sum inside the exp a constant now,
#in the ratio step this will be eliminated
#it is important to make the denominator different to zero
ins1 = d2*np.log(k_oneUnit1)-lambda0Unit*SumGeoProg(k_oneUnit1,mm)
ins2 = ins1 - ins1
psi1 = np.exp(ins2)
ins1_2 = d2*np.log(k_oneUnit2)-lambda0Unit*SumGeoProg(k_oneUnit2,mm)
ins2_2 = ins1_2 - ins1
psi2 = np.exp(ins2_2)
#if psi2 is inf give value of 1
#this will make no difference after
if psi2 == float("inf"):
psi2 = 1
psi_ratio = psi2/psi1
return psi_ratio
#I think that the majority of numbers will fall in 0 or 1
#test
Psi_ratio(.11,.1,6)
#example software performance #
nn = 50000 #chain size
lambda0 = np.full(nn, np.nan)
k_one = np.full(nn, np.nan)
#assign initial values
lambda0[0] = 1
k_one[0] = .999
ii=1
#generate the chain
for ii in list(range(1, nn)):
#print(datetime.datetime.now()) #show time of loop
#print(ii/nn)
#Monte Carlo Step
#steps for conditional lambda0
shape=d1+b1
scale=b2+SumGeoProg(k_one[ii-1],mm)
lambda0[ii] = np.random.gamma(shape, scale, 1)
k_temp = scipy.stats.beta.rvs(e1, 1/e2, size=1)
psi_ratio = Psi_ratio(k_one[ii-1], k_temp, lambda0[ii])
prob1 = min(psi_ratio,1.,)
result1 = scipy.stats.bernoulli.rvs(prob1,size=1)
if result1==1:
k_one[ii] = k_temp
else:
k_one[ii] = k_one[ii-1]
#build graphs of chains for software performance -
#chain graph of k_one
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(k_one,'b-', lw=2, alpha=0.6)
plt.show()
#chain graph of lambda0
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(lambda0,'r-', lw=2, alpha=0.6)
plt.show()
#create data frame with those two np.arrays
chDf = pd.DataFrame( {"k_one":k_one, "lambda0":lambda0})
#apply burnin and take only 5 in 5 observations of chain
#burn in is 25% of the chain
burnin= int(nn/4)
nn2 = nn-burnin
chDf2 = chDf.iloc[burnin+1:nn,:]
#jump values to exclude chain dependence
list1 = list(range(0,nn2,5))
chDf3 = chDf2.iloc[list1,:]
chDf3.shape
#graphs of cleaned chains
#chain graph of k_one
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(chDf3["k_one"],'b-', lw=2, alpha=0.6)
plt.show()
#chain graph of lambda0
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(chDf3["lambda0"],'r-', lw=2, alpha=0.6)
plt.show()
#histogram of k_one
fig = plt.figure()
ax = fig.add_subplot(111)
ax.grid(True)
ax.hist(chDf3["k_one"], bins=100,color='b',density=True)
plt.show()
#histogram of lambda0
fig = plt.figure()
ax = fig.add_subplot(111)
plt.grid(True)
ax.hist(chDf3["lambda0"], bins=100,color='r',density=True)
plt.show()
#summary statistics
#for k_one
chDf3["k_one"].describe()
#median
np.median(chDf3["k_one"])
#summary statistics
#for lambda0
chDf3["lambda0"].describe()
#median
np.median(chDf3["lambda0"])
The values that we found for the mean of $\lambda_a=352.45$ and $k_1=0.58$ are far from the MLE which are $\lambda_a=18.88$ and $k_1=0.89$.