Week 7: Discrete bivariate distributions

Back to main page

Range and histogram of a joint pmf

The range and the histogram of the joint pmf of rolling a dice and recording the larger value as $X$ and the smaller value as $Y$. The blue dot is the point $(\mu_X,\mu_Y)$ and it is the center of the joint distribution.

# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(6,6))

xmean=0
for x in range(1,7):
    xmean = xmean + x*(1/36+2*(6-x)/36)  

ymean=0
for y in range(1,7):
    ymean = ymean + y*(1/36+2*(y-1)/36)   


x_range = []
y_range = []
z_val = []
for y in range(1,7):
    plt.scatter(y,y, color="red", edgecolor="black",alpha=0.3)
    x_range.append(y)
    y_range.append(y)
    z_val.append(2/36)
    for x in range(1,y):
        x_range.append(x)
        y_range.append(y)
        z_val.append(1/36)
        plt.scatter(x,y, color="red", edgecolor="black",alpha=0.6)
        
        
plt.scatter(xmean, ymean, s=100 )        
plt.xlabel("x")
plt.ylabel("y")

plt.show();
# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

# setup the figure and axes
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection='3d')

x_range = []
y_range = []
z_val = []
for y in range(1,7):
    x_range.append(y)
    y_range.append(y)
    z_val.append(1/36)
    for x in range(1,y):
        x_range.append(x)
        y_range.append(y)
        z_val.append(2/36)
        
x_cord = np.array(x_range)
y_cord = np.array(y_range)
z_cord = np.zeros(y_cord.shape)
x_width = np.ones(y_cord.shape)*0.8
y_width = np.ones(y_cord.shape)*0.8
z_height = np.array(z_val)        
ax.bar3d(x_cord-0.5, y_cord-0.5, z_cord, x_width, y_width, z_height, shade=True, alpha=1, color='#039be5', edgecolor='black')
ax.view_init(20,-90-25)

plt.show();

The range of the bivariate hypergeometric distribution

# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(10,10)) 


N=50
K=15
L=15
n=20


x_range = []
y_range = []
for x in range(K+1):
    for y in range(L+1):
        if (N-K-L>=n-x-y) & (n-x-y>=0):
            x_range.append(x)
            y_range.append(y)
            alpha = comb(K, x, exact=True) *comb(L, y, exact=True)* comb(N-K-L, n-x-y, exact=True) / comb(N, n, exact=True)
            plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*9))
            
plt.xticks(np.arange(0,K+1,1))
plt.yticks(np.arange(0,L+1,1))
plt.title("N={}, K={}, L={}".format(N,K,L))
plt.xlabel("x")
plt.ylabel("y")
plt.show();

The range of the trinomial distribution

# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


n=10
pS=0.3
pF=0.25
pI=0.45

x_range = []
y_range = []
for x in range(n):
    for y in range(1,n-x+1):
        alpha = comb(n, x, exact=True) *comb(n-x, y) * pS**x * pF**y * pI**(n-x-y)
        plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*10))

plt.xticks(np.arange(0,n,1))
plt.yticks(np.arange(0,n,1))
plt.title(r"n={}, $p_S=${}, $p_F=${}, $p_I=${}".format(n,pS,pF, pI))
plt.xlabel("x")
plt.ylabel("y")

plt.show();
# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


n=15
pS=6/10
pI=3/10
pF=1/10

x_range = []
y_range = []
for x in range(n):
    for y in range(1,n-x+1):
        alpha = comb(n, x, exact=True) *comb(n-x, y, exact=True) * pS**x * pI**y * pF**(n-x-y)
        plt.scatter(x,y, edgecolor="blue", color=(1,0,0,alpha*10))

plt.xticks(np.arange(0,n,1))
plt.yticks(np.arange(0,n,1))
plt.title(r"n={}, $p_S=${}, $p_I=${}, $p_F=${}".format(n,pS,pI, pF))
plt.xlabel("x")
plt.ylabel("y")

plt.show();

Least squares line fits a line into the distribution

# nbi:hide_in

import numpy as np

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import comb
plt.figure(figsize=(6,6)) 


N=50
K=15
L=25
n=20

meanx=n*K/N
meany=n*L/N
sigmax=np.sqrt(n*(K/N)*(1-K/N)*(N-n)/(N-1)) 
sigmay=np.sqrt(n*(L/N)*(1-L/N)*(N-n)/(N-1)) 
cov=-n*(N-n)*K*L/((N-1)*N**2)
rho = cov/(sigmax*sigmay)

x_range = []
y_range = []
for x in range(K+1):
    for y in range(L+1):
        if (N-K-L>=n-x-y) & (n-x-y>=0):
            x_range.append(x)
            y_range.append(y)
            alpha = comb(K, x, exact=True) *comb(L, y, exact=True)* comb(N-K-L, n-x-y, exact=True) / comb(N, n, exact=True)
            plt.scatter(x,y, edgecolor=(0,0,1,alpha*10), color=(1,0,0,alpha*10))
            
            
xval=np.linspace(0, K, 100)
yval = rho*sigmay/sigmax*(xval-meanx)+meany
plt.plot(xval,yval, color="red", linewidth=2)
plt.scatter(meanx, meany, s=100 )                    
plt.xticks(np.arange(0,K+1,1))
plt.yticks(np.arange(0,L+1,1))
plt.title("Linear regression line for bivariate hypergeometric")
plt.xlabel("x")
plt.ylabel("y")
plt.figtext(0.8,0.8, " N={}\n K={}\n L={}".format(N,K,L), ha="left", va="top",
            backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")
plt.show();