Example: GMM Estimation
Risk Premia Estimation using GMM¶
Start by importing the modules and functions needed
from numpy import hstack, ones, array, mat, tile, reshape, squeeze, eye, asmatrix
from numpy.linalg import inv
from pandas import read_csv, Series
from scipy.linalg import kron
from scipy.optimize import fmin_bfgs
import numpy as np
import statsmodels.api as sm
Next a callable function is used to produce iteration-by-iteration output when using the non-linear optimizer.
iteration = 0
lastValue = 0
functionCount = 0
def iter_print(params):
global iteration, lastValue, functionCount
iteration += 1
print('Func value: {0:}, Iteration: {1:}, Function Count: {2:}'.format(lastValue, iteration, functionCount))
The GMM objective, which is minimized, is defined next.
def gmm_objective(params, pRets, fRets, Winv, out=False):
global lastValue, functionCount
T,N = pRets.shape
T,K = fRets.shape
beta = squeeze(array(params[:(N*K)]))
lam = squeeze(array(params[(N*K):]))
beta = reshape(beta,(N,K))
lam = reshape(lam,(K,1))
betalam = beta @ lam
expectedRet = fRets @ beta.T
e = pRets - expectedRet
instr = tile(fRets,N)
moments1 = kron(e,ones((1,K)))
moments1 = moments1 * instr
moments2 = pRets - betalam.T
moments = hstack((moments1,moments2))
avgMoment = moments.mean(axis=0)
J = T * mat(avgMoment) * mat(Winv) * mat(avgMoment).T
J = J[0,0]
lastValue = J
functionCount += 1
if not out:
return J
else:
return J, moments
The G
matrix, which is the derivative of the GMM moments with respect to the parameters, is defined.
def gmm_G(params, pRets, fRets):
T,N = pRets.shape
T,K = fRets.shape
beta = squeeze(array(params[:(N*K)]))
lam = squeeze(array(params[(N*K):]))
beta = reshape(beta,(N,K))
lam = reshape(lam,(K,1))
G = np.zeros((N*K+K,N*K+N))
ffp = (fRets.T @ fRets) / T
G[:(N*K),:(N*K)]=kron(eye(N),ffp)
G[:(N*K),(N*K):] = kron(eye(N),-lam)
G[(N*K):,(N*K):] = -beta.T
return G
Next, the data is imported and a subset of the test portfolios is selected to make the estimation faster.
data = read_csv('FamaFrench.csv')
# Split using both named colums and ix for larger blocks
dates = data['date'].values
factors = data[['VWMe','SMB','HML']].values
riskfree = data['RF'].values
portfolios = data.iloc[:,5:].values
T,N = portfolios.shape
portfolios = portfolios[:,np.arange(0,N,2)]
T,N = portfolios.shape
excessRet = portfolios - np.reshape(riskfree,(T,1))
K = np.size(factors,1)
Starting values for the factor loadings and rick premia are estimated using OLS and simple means.
betas = []
for i in range(N):
res = sm.OLS(excessRet[:,i],sm.add_constant(factors)).fit()
betas.append(res.params[1:])
avgReturn = excessRet.mean(axis=0)
avgReturn.shape = N,1
betas = array(betas)
res = sm.OLS(avgReturn, betas).fit()
riskPremia = res.params
The starting values are computed the first step estimates are found using the non-linear optimizer. The initial weighting matrix is just the identify matrix.
riskPremia.shape = 3
startingVals = np.concatenate((betas.flatten(),riskPremia))
Winv = np.eye(N*(K+1))
args = (excessRet, factors, Winv)
iteration = 0
functionCount = 0
step1opt = fmin_bfgs(gmm_objective, startingVals, args=args, callback=iter_print)
Here we look at the risk premia estimates from the first step (inefficient) estimates.
premia = step1opt[-3:]
premia = Series(premia,index=['VWMe', 'SMB', 'HML'])
print('Annualized Risk Premia (First step)')
print(12 * premia)
Next the first step estimates are used to estimate the moment conditions which are in-turn used to estimate the optimal weighting matrix for the moment conditions. This is then used as an input for the 2nd-step estimates.
out = gmm_objective(step1opt, excessRet, factors, Winv, out=True)
S = np.cov(out[1].T)
Winv2 = inv(S)
args = (excessRet, factors, Winv2)
iteration = 0
functionCount = 0
step2opt = fmin_bfgs(gmm_objective, step1opt, args=args, callback=iter_print)
Finally the VCV of the parameter estimates is computed.
out = gmm_objective(step2opt, excessRet, factors, Winv2, out=True)
G = gmm_G(step2opt, excessRet, factors)
S = np.cov(out[1].T)
vcv = inv(G @ inv(S) @ G.T)/T
The annualized risk premia and their associated t-stats.
premia = step2opt[-3:]
premia = Series(premia,index=['VWMe', 'SMB', 'HML'])
premia_vcv = vcv[-3:,-3:]
print('Annualized Risk Premia')
print(12 * premia)
premia_stderr = np.diag(premia_vcv)
premia_stderr = Series(premia_stderr,index=['VWMe', 'SMB', 'HML'])
print('T-stats')
print(premia / premia_stderr)