Source code for linearmodels.shared.covariance
from __future__ import annotations
from numpy import (
any as npany,
arange,
argsort,
cumsum,
lexsort,
r_,
unique,
where,
zeros,
)
import linearmodels.typing.data
[docs]
def group_debias_coefficient(clusters: linearmodels.typing.data.IntArray) -> float:
r"""
Compute the group debiasing scale.
Parameters
----------
clusters : ndarray
One-dimensional array containing cluster group membership.
Returns
-------
float
The scale to debias.
Notes
-----
The debiasing coefficient is defined
.. math::
`\frac{g}{g-1}\frac{n-1}{n}`
where g is the number of groups and n is the sample size.
"""
n = clusters.shape[0]
ngroups = unique(clusters).shape[0]
return (ngroups / (ngroups - 1)) * ((n - 1) / n)
def cluster_union(
clusters: linearmodels.typing.data.IntArray,
) -> linearmodels.typing.data.IntArray:
"""
Compute a set of clusters that is nested within 2 clusters
Parameters
----------
clusters : ndarray
A nobs by 2 array of integer values of cluster group membership.
Returns
-------
ndarray
A nobs array of integer cluster group memberships
"""
sort_keys = lexsort(clusters.T)
locs = arange(clusters.shape[0])
lex_sorted = clusters[sort_keys]
sorted_locs = locs[sort_keys]
diff = npany(lex_sorted[1:] != lex_sorted[:-1], 1)
union = cumsum(r_[0, diff])
resort_locs = argsort(sorted_locs)
return union[resort_locs]
[docs]
def cov_cluster(
z: linearmodels.typing.data.Float64Array,
clusters: linearmodels.typing.data.AnyArray,
) -> linearmodels.typing.data.Float64Array:
"""
Core cluster covariance estimator
Parameters
----------
z : ndarray
n by k mean zero data array
clusters : ndarray
n by 1 array
Returns
-------
ndarray
k by k cluster asymptotic covariance
"""
num_clusters = len(unique(clusters))
sort_args = argsort(clusters)
clusters = clusters[sort_args]
locs = where(r_[True, clusters[:-1] != clusters[1:], True])[0]
z = z[sort_args]
n, k = z.shape
s = zeros((k, k))
for i in range(num_clusters):
st, en = locs[i], locs[i + 1]
z_bar = z[st:en].sum(axis=0)[:, None]
s += z_bar @ z_bar.T
s /= n
return s
[docs]
def cov_kernel(
z: linearmodels.typing.data.Float64Array, w: linearmodels.typing.data.Float64Array
) -> linearmodels.typing.data.Float64Array:
"""
Core kernel covariance estimator
Parameters
----------
z : ndarray
n by k mean zero data array
w : ndarray
m by 1
Returns
-------
ndarray
k by k kernel asymptotic covariance
"""
k = len(w)
n = z.shape[0]
if k > n:
raise ValueError(
"Length of w ({}) is larger than the number "
"of elements in z ({})".format(k, n)
)
s = z.T @ z
for i in range(1, len(w)):
op = z[i:].T @ z[:-i]
s += w[i] * (op + op.T)
s /= n
return s