Lecture Week 2 Fri 10/11#

Visualizing random variables#

import numpy as np
x = np.random.randint(1, 7,10000)
import matplotlib.pyplot as plt
plt.hist(x,bins=6, range=(0.5,6.5), edgecolor='black')
(array([1674., 1734., 1625., 1729., 1695., 1543.]),
 array([0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5]),
 <BarContainer object of 6 artists>)
../_images/155253d7739610458eb82a4205ff92e3f8f5761a00a7ece5561dfd12b0a8e28f.png
import seaborn
seaborn.histplot(x, discrete=True)
<Axes: ylabel='Count'>
../_images/9c36ae8c9729d9176458bb2a82cfb757b7dd62e4e401f46dc19bf669c9064186.png
x = np.random.uniform(0,1,10000)
plt.hist(x, bins=4,range=(0,1),edgecolor='black')
(array([2471., 2453., 2498., 2578.]),
 array([0.  , 0.25, 0.5 , 0.75, 1.  ]),
 <BarContainer object of 4 artists>)
../_images/6f3eda32bc3531dbb0e663f2037cf8a740a6625e280075d712b78a1fb49acf9d.png
x = np.random.randn(10000)
plt.hist(x, edgecolor='black')
(array([1.000e+01, 9.200e+01, 5.810e+02, 1.874e+03, 3.010e+03, 2.794e+03,
        1.316e+03, 2.860e+02, 3.400e+01, 3.000e+00]),
 array([-3.90184365, -3.08955506, -2.27726646, -1.46497786, -0.65268927,
         0.15959933,  0.97188793,  1.78417652,  2.59646512,  3.40875371,
         4.22104231]),
 <BarContainer object of 10 artists>)
../_images/d7e61a253fbdfa05f451db72e865f8317bd6562a905dafa3f37384869405e85c.png

Estimating probability#

Estimate probability by relative frequency

Run random experiment N times

P(event A happens) = number of times A happens / total number of trials

# if X is standard normal,
# i.e. X is drawn from np.random.randn
# What is the probability that X>2
# take small sample size for example
N = 10 
x = np.random.randn(N)
print(x)
[-1.91741214 -1.55955806 -0.39938613 -0.15342849 -1.47172459  0.34478221
  0.99400821 -1.6822069  -0.27688327 -0.30929252]
# this get array of True/False
# Usually, when doing arithmetic operation on boolean array, True is 1, False is 0
x>2
array([False, False, False, False, False, False, False, False, False,
       False])
# count how many True in the array
np.sum(x>2)
0
# relative frequency
np.sum(x>2)/N
0.0
# using for loop
N = 10000
x = np.random.randn(N)
count = 0
for i in range(len(x)):
    if x[i]>2:
        count =  count + 1
print(count/N)
0.0224
# Throw 2 dice, 
# what is the probability that the sum is >= 7
N = 1000000
d1 = np.random.randint(1, 7,N)
d2 = np.random.randint(1, 7,N)
# print(d1)
# print(d2)
dsum = d1 + d2
# print(dsum)
np.sum(dsum>=7)/N
0.583966
np.mean(d1)
3.499787
np.mean(d1+d2)
7.002721