Hands-on Python implementation of probabilistic reasoning and Bayesian methods
Python + NumPy + Matplotlib = Perfect Bayesian Toolkit
Clean syntax, powerful libraries, and visual results make Python ideal for probabilistic programming and Bayesian inference.
• NumPy arrays for probability distributions
• Conditional probability tables with pandas
• Bayes' rule implementation in pure Python
• Visualization of Bayesian updating
• Complete Bayesian inference pipelines
NumPy arrays naturally represent probability distributions. Use np.array() for discrete probabilities and np.random for sampling.
# Import NumPy
import numpy as np
# Discrete probability distribution (coin flip)
coin_probs = np.array([0.5, 0.5]) # [heads, tails]
print(f"Coin probabilities: {coin_probs}")
print(f"Sum (must = 1.0): {coin_probs.sum()}")
# Die roll probabilities (fair 6-sided)
die_probs = np.full(6, 1/6) # 6 equal probabilities
print(f"Die probabilities: {die_probs}")
# Expected value calculation
values = np.array([1, 2, 3, 4, 5, 6])
expected_value = (values * die_probs).sum()
print(f"Expected value of die: {expected_value}")
Coin probabilities: [0.5 0.5] Sum (must = 1.0): 1.0 Die probabilities: [0.16666667 0.16666667 0.16666667 0.16666667 0.16666667 0.16666667] Expected value of die: 3.5
np.array([p1, p2, ...]) - Create probability arrayarray.sum() - Check normalization (should = 1.0)np.random.choice(values, p=probs) - Sample from distribution(values * probs).sum() - Calculate expected valueUse pandas DataFrames for conditional probability tables. Rows = conditions, columns = outcomes.
import pandas as pd
import numpy as np
# Conditional probability table: P(Test Result | Disease)
# Rows = Disease status, Columns = Test results
diagnosis_table = pd.DataFrame({
'Positive': [0.95, 0.05], # [Disease present, Disease absent]
'Negative': [0.05, 0.95] # [Disease present, Disease absent]
}, index=['Disease', 'No Disease'])
print("Conditional Probability Table:")
print(diagnosis_table)
print(f"\nRow sums (should = 1.0): {diagnosis_table.sum(axis=1).values}")
# Prior probabilities
prior_disease = 0.01 # P(Disease) = 1%
prior_no_disease = 0.99 # P(No Disease) = 99%
print(f"\nPrior P(Disease): {prior_disease}")
print(f"Prior P(No Disease): {prior_no_disease}")
Conditional Probability Table:
Positive Negative
Disease 0.95 0.05
No Disease 0.05 0.95
Row sums (should = 1.0): [1. 1.]
Prior P(Disease): 0.01
Prior P(No Disease): 0.99
pd.DataFrame(data, index=conditions) - Create probability tabledf.sum(axis=1) - Check row normalizationdf.loc[row, col] - Access specific probabilities
Bayes' Theorem: P(H|E) = P(E|H) × P(H) / P(E)
In code: posterior = likelihood × prior / evidence
import numpy as np
def bayes_theorem(prior, likelihood, evidence=None):
"""
Apply Bayes' theorem
Args:
prior: P(H) - prior probability of hypothesis
likelihood: P(E|H) - probability of evidence given hypothesis
evidence: P(E) - total probability of evidence (optional)
Returns:
posterior: P(H|E) - probability of hypothesis given evidence
"""
if evidence is None:
# Calculate evidence from law of total probability
# This is a simplified case - you'd need more params for full calculation
evidence = likelihood * prior # Simplified!
posterior = (likelihood * prior) / evidence
return posterior
# Medical diagnosis example
prior_disease = 0.01 # P(Disease)
likelihood = 0.95 # P(Positive|Disease)
false_positive = 0.05 # P(Positive|No Disease)
# Calculate evidence P(Positive) using law of total probability
evidence = (likelihood * prior_disease) + (false_positive * (1 - prior_disease))
# Apply Bayes' theorem
posterior = bayes_theorem(prior_disease, likelihood, evidence)
print(f"Prior P(Disease): {prior_disease}")
print(f"Likelihood P(Positive|Disease): {likelihood}")
print(f"Evidence P(Positive): {evidence:.4f}")
print(f"Posterior P(Disease|Positive): {posterior:.4f}")
print(f"Interpretation: {posterior*100:.1f}% chance of disease given positive test")
Prior P(Disease): 0.01 Likelihood P(Positive|Disease): 0.95 Evidence P(Positive): 0.0585 Posterior P(Disease|Positive): 0.1610 Interpretation: 16.1% chance of disease given positive test
posterior = (likelihood * prior) / evidence - Core formulaMatplotlib visualizes probability distributions, Bayesian updating, and statistical concepts.
import matplotlib.pyplot as plt
import numpy as np
# Bayesian updating example: Medical diagnosis
prior = 0.01 # Initial belief
likelihood = 0.95 # Test accuracy
false_positive = 0.05
# Calculate evidence and posterior
evidence = (likelihood * prior) + (false_positive * (1 - prior))
posterior = (likelihood * prior) / evidence
# Create visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
# Before evidence (Prior)
ax1.bar(['Disease', 'No Disease'], [prior, 1-prior],
color=['#dc3545', '#32D583'], alpha=0.7)
ax1.set_title('Before Test (Prior Belief)')
ax1.set_ylabel('Probability')
ax1.set_ylim(0, 1)
# After evidence (Posterior)
ax2.bar(['Disease', 'No Disease'], [posterior, 1-posterior],
color=['#dc3545', '#32D583'], alpha=0.7)
ax2.set_title('After Positive Test (Posterior Belief)')
ax2.set_ylabel('Probability')
ax2.set_ylim(0, 1)
plt.tight_layout()
plt.show()
print(f"Prior belief: {prior*100}% chance of disease")
print(f"Posterior belief: {posterior*100:.1f}% chance of disease")
print("Evidence completely changed our belief!")
Prior belief: 1.0% chance of disease Posterior belief: 16.1% chance of disease Evidence completely changed our belief!
Bayesian updating: Prior → Posterior after seeing evidence
plt.bar() - Compare probabilities before/afterplt.plot() - Show probability distributionsplt.hist() - Visualize sampling distributionsplt.subplot() - Compare multiple scenariosComplete pipeline: Define priors → Collect evidence → Update beliefs → Visualize results
import numpy as np
import pandas as pd
class BayesianSpamFilter:
def __init__(self):
# Prior: 40% of emails are spam
self.prior_spam = 0.4
self.prior_ham = 0.6
# Likelihood table: P(word|class)
self.likelihood = {
'FREE': {'spam': 0.8, 'ham': 0.1},
'WIN': {'spam': 0.7, 'ham': 0.05},
'CLICK': {'spam': 0.65, 'ham': 0.15}
}
def calculate_posterior(self, words):
"""
Calculate posterior probabilities for spam vs ham
Args:
words: list of words in the email
Returns:
dict with posterior probabilities
"""
# Calculate likelihoods
spam_likelihood = 1.0
ham_likelihood = 1.0
for word in words:
if word in self.likelihood:
spam_likelihood *= self.likelihood[word]['spam']
ham_likelihood *= self.likelihood[word]['ham']
# Calculate evidence (law of total probability)
evidence = (spam_likelihood * self.prior_spam) + (ham_likelihood * self.prior_ham)
# Apply Bayes' rule
posterior_spam = (spam_likelihood * self.prior_spam) / evidence
posterior_ham = (ham_likelihood * self.prior_ham) / evidence
return {
'spam': posterior_spam,
'ham': posterior_ham,
'evidence': evidence,
'spam_likelihood': spam_likelihood,
'ham_likelihood': ham_likelihood
}
# Test the spam filter
filter = BayesianSpamFilter()
test_email = ['FREE', 'WIN', 'CLICK']
result = filter.calculate_posterior(test_email)
print("Spam Filter Results:")
print(f"Email words: {test_email}")
print(f"Posterior P(Spam): {result['spam']:.4f}")
print(f"Posterior P(Ham): {result['ham']:.4f}")
print(f"Classification: {'SPAM' if result['spam'] > 0.5 else 'HAM'}")
Spam Filter Results: Email words: ['FREE', 'WIN', 'CLICK'] Posterior P(Spam): 0.9289 Posterior P(Ham): 0.0711 Classification: SPAM
np.array([p1, p2]) - Probability distributionspd.DataFrame() - Conditional probability tableslikelihood * prior / evidence - Bayes' theoremplt.bar() - Visualize belief changesnp.random.choice() - Sample from distributions
NumPy arrays represent distributions
Pandas tables structure conditional probabilities
Matplotlib plots visualize belief updates
Python functions implement Bayes' theorem