Bayesian Modelling#

Bayesian probability#

Example: estimating the probability of heads in a coin toss experiment#

# Generate a sequence of binary random variables
from scipy.stats import bernoulli, beta, uniform
p = 0.3
r = bernoulli.rvs(p, size=400)
r[:10]
array([0, 0, 0, 1, 0, 0, 0, 1, 0, 0])
import matplotlib.pyplot as plt
import numpy as np

# prior distribution: uniform prior
prior_alpha = 1
prior_beta = 1

# Now we plot the distribution as we add more data points
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(17,17))
N = np.linspace(0, len(r), 9)
for i in range(9):
  n = N[i]
  i_x = int(i/3)
  i_y = i % 3 
  r_trunc = r[:int(n)]  
  p_grid = np.linspace(0, 1, 1000)
  post_alpha = prior_alpha + np.sum(r_trunc)
  post_beta = prior_beta + len(r_trunc)- np.sum(r_trunc)
  mean = beta.mean(post_alpha, post_beta)
  std = beta.std(post_alpha, post_beta)
  ax[i_x][i_y].plot(p_grid, beta.pdf(p_grid, post_alpha, post_beta))
  ax[i_x][i_y].set_title("N = " + str(int(n))+ ", est = " + str(np.round(mean, 3)) + " +/- " + str(np.round(std, 3)))
../_images/b23dc394983d503f8659f35bcd52af1b57e8ef4f22b875cd6b0974df306a8013.png
import matplotlib.pyplot as plt
import numpy as np

# prior distribution: non - informative prior (approximation)
prior_alpha = 0.000001
prior_beta = 0.000001

# Now we plot the distribution as we add more data points
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(17,17))
N = np.linspace(0, len(r), 9)
for i in range(9):
  n = N[i]
  i_x = int(i/3)
  i_y = i % 3 
  r_trunc = r[:int(n)]  
  post_alpha = prior_alpha + np.sum(r_trunc)
  post_beta = prior_beta + len(r_trunc)- np.sum(r_trunc)
  mean = beta.mean(post_alpha, post_beta)
  std = beta.std(post_alpha, post_beta)
  ax[i_x][i_y].plot(p_grid, beta.pdf(p_grid, post_alpha, post_beta))
  ax[i_x][i_y].set_title("N = " + str(int(n))+ ", est = " + str(np.round(mean, 3)) + " +/- " + str(np.round(std, 3)))
../_images/d0151cb44969b556d0b4cccbaac8d73b4c3e93ac45b9487926015047c4aecfdc.png
import matplotlib.pyplot as plt
import numpy as np

# prior distribution: wrong confident prior
prior_alpha = 30
prior_beta = 30

# Now we plot the distribution as we add more data points
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(17,17))
N = np.linspace(0, len(r), 9)
for i in range(9):
  n = N[i]
  i_x = int(i/3)
  i_y = i % 3 
  r_trunc = r[:int(n)]  
  post_alpha = prior_alpha + np.sum(r_trunc)
  post_beta = prior_beta + len(r_trunc)- np.sum(r_trunc)
  mean = beta.mean(post_alpha, post_beta)
  std = beta.std(post_alpha, post_beta)
  ax[i_x][i_y].plot(p_grid, beta.pdf(p_grid, post_alpha, post_beta))
  ax[i_x][i_y].set_title("N = " + str(int(n))+ ", est = " + str(np.round(mean, 3)) + " +/- " + str(np.round(std, 3)))
../_images/d0f6226549d50ad6000402d39c00f2c6373fb0d720e8c02d6547401cae41b82d.png
import matplotlib.pyplot as plt
import numpy as np

# prior distribution: correct confident prior
prior_alpha = 18
prior_beta = 42

# Now we plot the distribution as we add more data points
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(17,17))
N = np.linspace(0, len(r), 9)
for i in range(9):
  n = N[i]
  i_x = int(i/3)
  i_y = i % 3 
  r_trunc = r[:int(n)]  
  post_alpha = prior_alpha + np.sum(r_trunc)
  post_beta = prior_beta + len(r_trunc)- np.sum(r_trunc)
  mean = beta.mean(post_alpha, post_beta)
  std = beta.std(post_alpha, post_beta)
  ax[i_x][i_y].plot(p_grid, beta.pdf(p_grid, post_alpha, post_beta))
  ax[i_x][i_y].set_title("N = " + str(int(n))+ ", est = " + str(np.round(mean, 3)) + " +/- " + str(np.round(std, 3)))
../_images/a1e5372c7c035a9924a3a92ac345461561394dc4718998b1fa80fd2c890a4c76.png