In [3]:
using LinearAlgebra
using Distributions
using Random
using Statistics
using Plots
In [1]:
Σ = [3. 0.; 0. 2.] # variance-covariance matrix
Out[1]:
2×2 Array{Float64,2}:
 3.0  0.0
 0.0  2.0
In [4]:
C = cholesky(Σ) # Cholesky decomposition
P = Matrix(C.L)
P*P'
Out[4]:
2×2 Array{Float64,2}:
 3.0  0.0
 0.0  2.0
In [5]:
Random.seed!(1234)
Z = randn(1000,2);
In [6]:
D = P*Z'
#var(D[1,:])
#var(D[2,:])
Out[6]:
2×1000 Array{Float64,2}:
  1.50229  -1.56187   -0.856462  -1.56389   …   1.1876    1.24195  -0.220175
 -1.42722  -0.672214  -1.07486   -0.798631     -0.241095  1.17267  -0.636479
In [7]:
β = [2,5]    # β_0 = 2, β_1 = 5
X = hcat(ones(1000), D[1,:])
ϵ = D[2,:];
In [8]:
y = X*β + ϵ;
In [9]:
@time inv(X'*X)*X'*y # It is not very numerically efficient
  0.414244 seconds (917.16 k allocations: 44.742 MiB, 4.17% gc time)
Out[9]:
2-element Array{Float64,1}:
 2.0428301785237126
 5.015823553450844 
In [10]:
# F = qr(X)  # QR decomposition. More numerically efficient.
# Q,R = Matrix(F.Q), Matrix(F.R)
@time inv(Matrix(qr(X).R))*Matrix(qr(X).Q)'*y # Compare the time and allocations with the results above
  0.214174 seconds (295.05 k allocations: 14.277 MiB, 4.40% gc time)
Out[10]:
2-element Array{Float64,1}:
 2.042830178523713 
 5.0158235534508435
In [11]:
# Put everything above in a function
# Note: set seeds to make your results replicable
function β_ols(Σ,βs,seed=1234)
    C = cholesky(Σ)
    P = Matrix(C.L)
    Random.seed!(seed)
    Z = Matrix(randn(1000,2)')
    D = P*Z
    X = hcat(ones(1000),D[1,:])
    ϵ = D[2,:]
    F = qr(X)
    Q,R = Matrix(F.Q), Matrix(F.R)
    y = X*β + ϵ
    β_ols = inv(R)*Q'*y
    return β_ols
end
Out[11]:
β_ols (generic function with 2 methods)
In [12]:
β_ols(Σ,β,2421)   # Test if the functiton works
Out[12]:
2-element Array{Float64,1}:
 2.0336774706161136
 5.019591243787603 
In [13]:
seed = 123
β_set = []
for i in 1:10000
    push!(β_set,  β_ols(Σ, β,seed))
    seed +=1
end
In [14]:
β_set = hcat(β_set...) # convert the 10000 pairs of estimates into matrix form
Out[14]:
2×10000 Array{Float64,2}:
 2.04803  1.92401  2.03222  2.02809  …  2.05606  2.01724  2.00886  2.00395
 5.00802  5.00458  4.97281  4.98928     4.99891  4.98685  4.99324  5.03577
In [15]:
mean(β_set[2,:])
mean(β_set[1,:])
Out[15]:
1.9997938152916503
In [16]:
histogram(β_set[1,:],bins=30)  # Plot a histogram of β_0^hat
Out[16]:
1.9 2.0 2.1 2.2 0 500 1000 1500 y1
In [ ]: