Source code for linearmodels.system.gmm

"""
Covariance and weight estimation for GMM IV estimators
"""

from __future__ import annotations

from collections.abc import Sequence
from typing import cast

import numpy
from numpy import array, empty, ndarray, repeat, sqrt, zeros_like

from linearmodels.asset_pricing.covariance import _HACMixin
from linearmodels.iv.covariance import kernel_optimal_bandwidth
from linearmodels.shared.utility import AttrDict
from linearmodels.system._utility import blocked_inner_prod
import linearmodels.typing.data



[docs]
class HomoskedasticWeightMatrix:
    r"""
    Homoskedastic (unadjusted) weight estimation

    Parameters
    ----------
    center : bool
        Flag indicating whether to center the moment conditions by subtracting
        the mean before computing the weight matrix.
    debiased : bool
        Flag indicating whether to use small-sample adjustments

    Notes
    -----
    The weight matrix estimator is

    .. math::

      Z'(\Sigma \otimes I_N)Z

    where :math:`Z` is a block diagonal matrix containing both the exogenous
    regressors and instruments and :math:`\Sigma` is the covariance of the
    model residuals.

    ``center`` has no effect on this estimator since it is always centered.
    """

    def __init__(self, center: bool = False, debiased: bool = False) -> None:
        self._center = center
        self._debiased = debiased
        self._bandwidth: float | None = 0
        self._name = "Homoskedastic (Unadjusted) Weighting"
        self._config = AttrDict(center=center, debiased=debiased)

    def __str__(self) -> str:
        out = self._name
        extra = []
        for key in self._str_extra:
            extra.append(": ".join([str(key), str(self._str_extra[key])]))
        if extra:
            out += " (" + ", ".join(extra) + ")"
        return out

    def __repr__(self) -> str:
        return self.__str__() + f", id: {hex(id(self))}"

    @property
    def _str_extra(self) -> AttrDict:
        return AttrDict(Debiased=self._debiased, Center=self._center)


[docs]
    def sigma(
        self,
        eps: linearmodels.typing.data.Float64Array,
        x: Sequence[linearmodels.typing.data.Float64Array],
    ) -> linearmodels.typing.data.Float64Array:
        """
        Estimate residual covariance.

        Parameters
        ----------
        eps : ndarray
            The residuals from the system of equations.
        x : list[ndarray]
            A list of the regressor matrices for each equation in the system.

        Returns
        -------
        ndarray
            The estimated covariance matrix of the residuals.
        """
        nobs = eps.shape[0]
        eps = eps - eps.mean(0)
        sigma = eps.T @ eps / nobs
        scale = 1.0
        if self._debiased:
            k = array([a.shape[1] for a in x])[:, None]
            k = sqrt(k)
            scale = nobs / (nobs - k @ k.T)
        sigma *= scale

        return sigma



[docs]
    def weight_matrix(
        self,
        x: Sequence[linearmodels.typing.data.Float64Array],
        z: Sequence[linearmodels.typing.data.Float64Array],
        eps: linearmodels.typing.data.Float64Array,
        *,
        sigma: numpy.ndarray,
    ) -> linearmodels.typing.data.Float64Array:
        """
        Construct a GMM weight matrix for a model.

        Parameters
        ----------
        x : list[ndarray]
            List of containing model regressors for each equation in the system
        z : list[ndarray]
            List of containing instruments for each equation in the system
        eps : ndarray
            Model errors (nobs by neqn)
        sigma : ndarray
            Fixed covariance of model errors. If None, estimated from eps.

        Returns
        -------
        ndarray
            Covariance of GMM moment conditions.
        """
        nobs = z[0].shape[0]
        w = cast(ndarray, blocked_inner_prod(z, sigma) / nobs)
        return w


    @property
    def config(self) -> AttrDict:
        """
        Weight estimator configuration

        Returns
        -------
        AttrDict
            Dictionary containing weight estimator configuration information
        """
        return self._config




[docs]
class HeteroskedasticWeightMatrix(HomoskedasticWeightMatrix):
    r"""
    Heteroskedasticity robust weight estimation

    Parameters
    ----------
    center : bool
        Flag indicating whether to center the moment conditions by subtracting
        the mean before computing the weight matrix.
    debiased : bool
        Flag indicating whether to use small-sample adjustments

    Notes
    -----
    The weight matrix estimator is

    .. math::

      W   & = n^{-1}\sum_{i=1}^{n}g'_ig_i \\
      g_i & = (z_{1i}\epsilon_{1i},z_{2i}\epsilon_{2i},\ldots,z_{ki}\epsilon_{ki})

    where :math:`g_i` is the vector of scores across all equations for
    observation i.  :math:`z_{ji}` is the vector of instruments for equation
    j and :math:`\epsilon_{ji}` is the error for equation j for observation
    i.  This form allows for heteroskedasticity and arbitrary cross-sectional
    dependence between the moment conditions.
    """

    def __init__(self, center: bool = False, debiased: bool = False) -> None:
        super().__init__(center, debiased)
        self._name = "Heteroskedastic (Robust) Weighting"


[docs]
    def weight_matrix(
        self,
        x: Sequence[linearmodels.typing.data.Float64Array],
        z: Sequence[linearmodels.typing.data.Float64Array],
        eps: linearmodels.typing.data.Float64Array,
        *,
        sigma: numpy.ndarray | None = None,
    ) -> linearmodels.typing.data.Float64Array:
        """
        Construct a GMM weight matrix for a model.

        Parameters
        ----------
        x : list[ndarray]
            Model regressors (exog and endog), (nobs by nvar)
        z : list[ndarray]
            Model instruments (exog and instruments), (nobs by ninstr)
        eps : ndarray
            Model errors (nobs by 1)
        sigma : ndarray
            Fixed covariance of model errors. If None, estimated from eps.

        Returns
        -------
        ndarray
            Covariance of GMM moment conditions.
        """
        nobs = x[0].shape[0]
        k = len(x)
        k_total = sum(map(lambda a: a.shape[1], z))
        ze = empty((nobs, k_total))
        loc = 0
        for i in range(k):
            e = eps[:, [i]]
            zk = z[i].shape[1]
            ze[:, loc : loc + zk] = z[i] * e
            loc += zk
        mu = ze.mean(axis=0) if self._center else 0
        ze -= mu
        w = ze.T @ ze / nobs
        scale = self._debias_scale(nobs, x, z)
        w *= scale

        return w


    def _debias_scale(
        self,
        nobs: int,
        x: Sequence[linearmodels.typing.data.Float64Array],
        z: Sequence[linearmodels.typing.data.Float64Array],
    ) -> linearmodels.typing.data.Float64Array:
        nvar = array([a.shape[1] for a in x])
        ninstr = array([a.shape[1] for a in z])
        nvar = repeat(nvar, ninstr)
        if not self._debiased:
            nvar = zeros_like(nvar)
        nvar = cast(linearmodels.typing.data.Float64Array, sqrt(nvar))[:, None]
        scale = nobs / (nobs - nvar @ nvar.T)
        return scale




[docs]
class KernelWeightMatrix(HeteroskedasticWeightMatrix, _HACMixin):
    r"""
    Heteroskedasticity robust weight estimation

    Parameters
    ----------
    center : bool
        Flag indicating whether to center the moment conditions by subtracting
        the mean before computing the weight matrix.
    debiased : bool
        Flag indicating whether to use small-sample adjustments
    kernel : str
        Name of kernel to use.  Supported kernels include:

        * "bartlett", "newey-west" : Bartlett's kernel
        * "parzen", "gallant" : Parzen's kernel
        * "qs", "quadratic-spectral", "andrews" : Quadratic spectral kernel

    bandwidth : float
        Bandwidth to use for the kernel.  If not provided the optimal
        bandwidth will be estimated.
    optimal_bw : bool
        Flag indicating whether to estimate the optimal bandwidth, when
        bandwidth is None.  If False, nobs - 2 is used


    Notes
    -----
    The weight matrix estimator is

    .. math::

      W & = \hat{\Gamma}_0+\sum_{i=1}^{n-1} w_i (\hat{\Gamma}_i+\hat{\Gamma}_i^\prime)
      \hat{\Gamma}_j & = n^{-1}\sum_{i=1}^{n-j} g'_ig_{i+j} \\
      g_i & = (z_{1i}\epsilon_{1i},z_{2i}\epsilon_{2i},\ldots,z_{ki}\epsilon_{ki})

    where :math:`g_i` is the vector of scores across all equations for
    observation i and :math:`w_j` are the kernel weights which depend on the
    selected kernel and bandwidth.  :math:`z_{ji}` is the vector of instruments
    for equation j and :math:`\epsilon_{ji}` is the error for equation j for
    observation i.  This form allows for heteroskedasticity and autocorrelation
    between the moment conditions.
    """

    def __init__(
        self,
        center: bool = False,
        debiased: bool = False,
        kernel: str = "bartlett",
        bandwidth: float | None = None,
        optimal_bw: bool = False,
    ) -> None:
        _HACMixin.__init__(self, kernel, bandwidth)
        super().__init__(center, debiased)
        self._name = "Kernel (HAC) Weighting"
        self._check_kernel(kernel)
        self._check_bandwidth(bandwidth)
        self._predefined_bw = self._bandwidth
        self._optimal_bw = optimal_bw


[docs]
    def weight_matrix(
        self,
        x: Sequence[linearmodels.typing.data.Float64Array],
        z: Sequence[linearmodels.typing.data.Float64Array],
        eps: linearmodels.typing.data.Float64Array,
        *,
        sigma: numpy.ndarray | None = None,
    ) -> linearmodels.typing.data.Float64Array:
        """
        Construct a GMM weight matrix for a model.

        Parameters
        ----------
        x : list[ndarray]
            Model regressors (exog and endog)
        z : list[ndarray]
            Model instruments (exog and instruments)
        eps : ndarray
            Model errors (nobs by nequation)
        sigma : ndarray
            Fixed covariance of model errors. If None, estimated from eps.

        Returns
        -------
        ndarray
            Covariance of GMM moment conditions.
        """
        nobs = x[0].shape[0]
        k = len(x)
        k_total = sum(map(lambda a: a.shape[1], z))
        ze = empty((nobs, k_total))
        loc = 0
        for i in range(k):
            e = eps[:, [i]]
            zk = z[i].shape[1]
            ze[:, loc : loc + zk] = z[i] * e
            loc += zk
        mu = ze.mean(axis=0) if self._center else 0
        ze -= mu
        self._optimal_bandwidth(ze)
        w = self._kernel_cov(ze)
        scale = self._debias_scale(nobs, x, z)
        w *= scale

        return w


    def _optimal_bandwidth(
        self, moments: linearmodels.typing.data.Float64Array
    ) -> float:
        """Compute optimal bandwidth used in estimation if needed"""
        if self._predefined_bw is not None:
            return self._predefined_bw
        elif not self._optimal_bw:
            self._bandwidth = moments.shape[0] - 2
        else:
            m = moments / moments.std(0)[None, :]
            m = m.sum(1)
            self._bandwidth = kernel_optimal_bandwidth(m, kernel=self.kernel)
        assert self._bandwidth is not None
        return self._bandwidth

    @property
    def bandwidth(self) -> float:
        """Bandwidth used to estimate covariance of moment conditions"""
        assert self._bandwidth is not None
        return self._bandwidth

    @property
    def config(self) -> AttrDict:
        """
        Weight estimator configuration

        Returns
        -------
        AttrDict
            Dictionary containing weight estimator configuration information
        """
        out = AttrDict([(k, v) for k, v in self._config.items()])
        out["bandwidth"] = self.bandwidth
        return out