Source code for arch.univariate.base

"""
Core classes for ARCH models
"""

from abc import ABCMeta, abstractmethod
from collections.abc import Sequence
from copy import deepcopy
import datetime as dt
from functools import cached_property
from typing import Any, Callable, Optional, Union, cast
import warnings

import numpy as np
import pandas as pd
from pandas.util._decorators import deprecate_kwarg
from scipy.optimize import OptimizeResult
import scipy.stats as stats
from statsmodels.iolib.summary import Summary, fmt_2cols, fmt_params
from statsmodels.iolib.table import SimpleTable
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.numdiff import approx_fprime, approx_hess
from statsmodels.tools.tools import add_constant
from statsmodels.tsa.tsatools import lagmat

from arch.typing import (
    ArrayLike,
    ArrayLike1D,
    ArrayLike2D,
    DateLike,
    Float64Array,
    ForecastingMethod,
    Label,
    Literal,
)
from arch.univariate.distribution import Distribution, Normal
from arch.univariate.volatility import ConstantVariance, VolatilityProcess
from arch.utility.array import ensure1d
from arch.utility.exceptions import (
    ConvergenceWarning,
    DataScaleWarning,
    StartingValueWarning,
    convergence_warning,
    data_scale_warning,
    starting_value_warning,
)
from arch.utility.testing import WaldTestStatistic

try:
    from matplotlib.figure import Figure
except ImportError:
    pass


__all__ = [
    "implicit_constant",
    "ARCHModelResult",
    "ARCHModel",
    "ARCHModelForecast",
    "constraint",
    "format_float_fixed",
]

CONVERGENCE_WARNING: str = """\
WARNING: The optimizer did not indicate successful convergence. The message was {msg}.
See convergence_flag.
"""

# Callback variables
_callback_info = {"iter": 0, "llf": 0.0, "count": 0, "display": 1}


def _callback(parameters: Float64Array, *args: Any) -> None:
    """
    Callback for use in optimization

    Parameters
    ----------
    parameters : ndarray
        Parameter value (not used by function).
    *args
        Any other arguments passed to the minimizer.

    Notes
    -----
    Uses global values to track iteration, iteration display frequency,
    log likelihood and function count
    """

    _callback_info["iter"] += 1
    disp = "Iteration: {0:>6},   Func. Count: {1:>6.3g},   Neg. LLF: {2}"
    if _callback_info["iter"] % _callback_info["display"] == 0:
        print(
            disp.format(
                _callback_info["iter"], _callback_info["count"], _callback_info["llf"]
            )
        )


def constraint(a: Float64Array, b: Float64Array) -> list[dict[str, object]]:
    """
    Generate constraints from arrays

    Parameters
    ----------
    a : ndarray
        Parameter loadings
    b : ndarray
        Constraint bounds

    Returns
    -------
    constraints : dict
        Dictionary of inequality constraints, one for each row of a

    Notes
    -----
    Parameter constraints satisfy a.dot(parameters) - b >= 0
    """

    def factory(coeff: Float64Array, val: float) -> Callable[..., float]:
        def f(params: Float64Array, *args: Any) -> float:
            return np.dot(coeff, params) - val

        return f

    constraints = []
    for i in range(a.shape[0]):
        con = {"type": "ineq", "fun": factory(a[i], b[i])}
        constraints.append(con)

    return constraints


def format_float_fixed(x: float, max_digits: int = 10, decimal: int = 4) -> str:
    """Formats a floating point number so that if it can be well expressed
    in using a string with digits len, then it is converted simply, otherwise
    it is expressed in scientific notation"""
    # basic_format = '{:0.' + str(digits) + 'g}'
    if x == 0:
        return ("{:0." + str(decimal) + "f}").format(0.0)
    scale = np.log10(np.abs(x))
    scale = np.sign(scale) * np.ceil(np.abs(scale))
    if scale > (max_digits - 2 - decimal) or scale < -(decimal - 2):
        formatted = ("{0:" + str(max_digits) + "." + str(decimal) + "e}").format(x)
    else:
        formatted = ("{0:" + str(max_digits) + "." + str(decimal) + "f}").format(x)
    return formatted


def implicit_constant(x: Float64Array) -> bool:
    """
    Test a matrix for an implicit constant

    Parameters
    ----------
    x : ndarray
        Array to be tested

    Returns
    -------
    constant : bool
        Flag indicating whether the array has an implicit constant - whether
        the array has a set of columns that adds to a constant value
    """
    nobs = x.shape[0]
    rank = np.linalg.matrix_rank(np.hstack((np.ones((nobs, 1)), x)))
    return rank == x.shape[1]


[docs] class ARCHModel(metaclass=ABCMeta): """ Abstract base class for mean models in ARCH processes. Specifies the conditional mean process. All public methods that raise NotImplementedError should be overridden by any subclass. Private methods that raise NotImplementedError are optional to override but recommended where applicable. """ def __init__( self, y: Optional[ArrayLike] = None, volatility: Optional[VolatilityProcess] = None, distribution: Optional[Distribution] = None, hold_back: Optional[int] = None, rescale: Optional[bool] = None, ) -> None: self._name = "ARCHModel" self._is_pandas = isinstance(y, (pd.DataFrame, pd.Series)) if y is not None: self._y_series = cast(pd.Series, ensure1d(y, "y", series=True)) else: self._y_series = cast(pd.Series, ensure1d(np.empty((0,)), "y", series=True)) self._y = np.ascontiguousarray(self._y_series) if not np.all(np.isfinite(self._y)): raise ValueError( "NaN or inf values found in y. y must contains only finite values." ) self._y_original = y self._fit_indices: list[int] = [0, int(self._y.shape[0])] self._fit_y = self._y self.hold_back: Optional[int] = hold_back self._hold_back = 0 if hold_back is None else hold_back self.rescale: Optional[bool] = rescale self.scale: float = 1.0 self._backcast: Union[float, Float64Array, None] = None self._var_bounds: Optional[Float64Array] = None if isinstance(volatility, VolatilityProcess): self._volatility = volatility elif volatility is None: self._volatility = ConstantVariance() else: raise TypeError("volatility must inherit from VolatilityProcess") if isinstance(distribution, Distribution): self._distribution = distribution elif distribution is None: self._distribution = Normal() else: raise TypeError("distribution must inherit from Distribution") @property def name(self) -> str: """The name of the model.""" return self._name
[docs] def constraints(self) -> tuple[Float64Array, Float64Array]: """ Construct linear constraint arrays for use in non-linear optimization Returns ------- a : ndarray Number of constraints by number of parameters loading array b : ndarray Number of constraints array of lower bounds Notes ----- Parameters satisfy a.dot(parameters) - b >= 0 """ return np.empty((0, self.num_params)), np.empty(0)
[docs] def bounds(self) -> list[tuple[float, float]]: """ Construct bounds for parameters to use in non-linear optimization Returns ------- bounds : list (2-tuple of float) Bounds for parameters to use in estimation. """ num_params = self.num_params return [(-np.inf, np.inf)] * num_params
@property def y(self) -> Optional[ArrayLike]: """Returns the dependent variable""" return self._y_original @property def volatility(self) -> VolatilityProcess: """Set or gets the volatility process Volatility processes must be a subclass of VolatilityProcess """ return self._volatility @volatility.setter def volatility(self, value: VolatilityProcess) -> None: if not isinstance(value, VolatilityProcess): raise ValueError("Must subclass VolatilityProcess") self._volatility = value @property def distribution(self) -> Distribution: """Set or gets the error distribution Distributions must be a subclass of Distribution """ return self._distribution @distribution.setter def distribution(self, value: Distribution) -> None: if not isinstance(value, Distribution): raise ValueError("Must subclass Distribution") self._distribution = value def _check_scale(self, resids: Float64Array) -> None: check = self.rescale in (None, True) if not check: return orig_scale = scale = resids.var() rescale = 1.0 while not 0.1 <= scale < 10000.0 and scale > 0: if scale < 1.0: rescale *= 10 else: rescale /= 10 scale = orig_scale * rescale**2 if rescale == 1.0: return if self.rescale is None: warnings.warn( data_scale_warning.format(orig_scale, rescale), DataScaleWarning ) return self.scale = rescale @abstractmethod def _scale_changed(self) -> None: """ Called when the scale has changed. This allows the model to update any values that are affected by the scale changes, e.g., any logged values. """ def _r2(self, params: ArrayLike1D) -> Optional[float]: """ Computes the model r-square. Optional to over-ride. Must match signature. """ raise NotImplementedError("Subclasses optionally may provide.") @abstractmethod def _fit_no_arch_normal_errors_params(self) -> Float64Array: """ Must be overridden with closed form estimator the return parameters ony """ @abstractmethod def _fit_no_arch_normal_errors( self, cov_type: Literal["robust", "classic"] = "robust" ) -> "ARCHModelResult": """ Must be overridden with closed form estimator """ @staticmethod def _static_gaussian_loglikelihood(resids: Float64Array) -> float: nobs = resids.shape[0] sigma2 = resids.dot(resids) / nobs loglikelihood = -0.5 * nobs * np.log(2 * np.pi) loglikelihood -= 0.5 * nobs * np.log(sigma2) loglikelihood -= 0.5 * nobs return loglikelihood def _fit_parameterless_model( self, cov_type: Literal["robust", "classic"], backcast: Union[float, Float64Array], ) -> "ARCHModelResult": """ When models have no parameters, fill return values Returns ------- results : ARCHModelResult Model result from parameterless model """ y = self._fit_y # Fake convergence results, see GH #87 opt = OptimizeResult({"status": 0, "message": ""}) params = np.empty(0) param_cov = np.empty((0, 0)) first_obs, last_obs = self._fit_indices resids_final = np.full_like(self._y, np.nan) resids_final[first_obs:last_obs] = y var_bounds = self.volatility.variance_bounds(y) vol = np.zeros_like(y) self.volatility.compute_variance(params, y, vol, backcast, var_bounds) vol = cast(Float64Array, np.sqrt(vol)) # Reshape resids vol vol_final = np.empty_like(self._y, dtype=np.double) vol_final.fill(np.nan) vol_final[first_obs:last_obs] = vol names = self._all_parameter_names() r2 = self._r2(params) fit_start, fit_stop = self._fit_indices loglikelihood = -1.0 * self._loglikelihood( params, vol**2 * np.ones(fit_stop - fit_start), backcast, var_bounds ) assert isinstance(r2, float) return ARCHModelResult( params, param_cov, r2, resids_final, vol_final, cov_type, self._y_series, names, loglikelihood, self._is_pandas, opt, fit_start, fit_stop, deepcopy(self), ) def _loglikelihood( self, parameters: Float64Array, sigma2: Float64Array, backcast: Union[float, Float64Array], var_bounds: Float64Array, individual: bool = False, ) -> Union[float, Float64Array]: """ Computes the log-likelihood using the entire model Parameters ---------- parameters sigma2 backcast individual : bool, optional Returns ------- neg_llf : float Negative of model loglikelihood """ # Parse parameters _callback_info["count"] += 1 # 1. Resids mp, vp, dp = self._parse_parameters(parameters) resids = np.asarray(self.resids(mp), dtype=float) # 2. Compute sigma2 using VolatilityModel sigma2 = self.volatility.compute_variance( vp, resids, sigma2, backcast, var_bounds ) # 3. Compute log likelihood using Distribution llf = self.distribution.loglikelihood(dp, resids, sigma2, individual) if not individual: _callback_info["llf"] = llf_f = -float(llf) return llf_f return cast(np.ndarray, -llf) def _all_parameter_names(self) -> list[str]: """Returns a list containing all parameter names from the mean model, volatility model and distribution""" names = self.parameter_names() names.extend(self.volatility.parameter_names()) names.extend(self.distribution.parameter_names()) return names def _parse_parameters( self, x: Union[ArrayLike1D, Sequence[float]], ) -> tuple[Float64Array, Float64Array, Float64Array]: """Return the parameters of each model in a tuple""" x = np.asarray(x, dtype=float) km, kv = int(self.num_params), int(self.volatility.num_params) return x[:km], x[km : km + kv], x[km + kv :]
[docs] def fix( self, params: Union[ArrayLike1D, Sequence[float]], first_obs: Union[int, DateLike, None] = None, last_obs: Union[int, DateLike, None] = None, ) -> "ARCHModelFixedResult": """ Allows an ARCHModelFixedResult to be constructed from fixed parameters. Parameters ---------- params : {ndarray, Series} User specified parameters to use when generating the result. Must have the correct number of parameters for a given choice of mean model, volatility model and distribution. first_obs : {int, str, datetime, Timestamp} First observation to use when fixing model last_obs : {int, str, datetime, Timestamp} Last observation to use when fixing model Returns ------- results : ARCHModelFixedResult Object containing model results Notes ----- Parameters are not checked against model-specific constraints. """ v = self.volatility self._adjust_sample(first_obs, last_obs) resids = np.asarray(self.resids(self.starting_values()), dtype=float) sigma2 = np.zeros_like(resids) backcast = v.backcast(resids) self._backcast = backcast var_bounds = v.variance_bounds(resids) params = ensure1d(params, "params", False) loglikelihood = -1.0 * self._loglikelihood(params, sigma2, backcast, var_bounds) assert isinstance(loglikelihood, float) mp, vp, dp = self._parse_parameters(params) resids = np.asarray(self.resids(mp), dtype=float) vol = np.zeros_like(resids) self.volatility.compute_variance(vp, resids, vol, backcast, var_bounds) vol = np.asarray(np.sqrt(vol)) names = self._all_parameter_names() # Reshape resids and vol first_obs, last_obs = self._fit_indices resids_final = np.full_like(self._y, np.nan, dtype=np.double) resids_final[first_obs:last_obs] = resids vol_final = np.empty_like(self._y, dtype=np.double) vol_final.fill(np.nan) vol_final[first_obs:last_obs] = vol model_copy = deepcopy(self) return ARCHModelFixedResult( params, resids_final, vol_final, self._y_series, names, loglikelihood, self._is_pandas, model_copy, )
@abstractmethod def _adjust_sample( self, first_obs: Union[int, DateLike, None], last_obs: Union[int, DateLike, None], ) -> None: """ Performs sample adjustment for estimation Parameters ---------- first_obs : {int, str, datetime, datetime64, Timestamp} First observation to use when estimating model last_obs : {int, str, datetime, datetime64, Timestamp} Last observation to use when estimating model Notes ----- Adjusted sample must follow Python semantics of first_obs:last_obs """
[docs] def fit( self, update_freq: int = 1, disp: Union[bool, Literal["off"], Literal["final"]] = "final", starting_values: Optional[ArrayLike1D] = None, cov_type: Literal["robust", "classic"] = "robust", show_warning: bool = True, first_obs: Union[int, DateLike, None] = None, last_obs: Union[int, DateLike, None] = None, tol: Optional[float] = None, options: Optional[dict[str, Any]] = None, backcast: Union[float, Float64Array, None] = None, ) -> "ARCHModelResult": r""" Estimate model parameters Parameters ---------- update_freq : int, optional Frequency of iteration updates. Output is generated every `update_freq` iterations. Set to 0 to disable iterative output. disp : {bool, "off", "final"} Either 'final' to print optimization result or 'off' to display nothing. If using a boolean, False is "off" and True is "final" starting_values : ndarray, optional Array of starting values to use. If not provided, starting values are constructed by the model components. cov_type : str, optional Estimation method of parameter covariance. Supported options are 'robust', which does not assume the Information Matrix Equality holds and 'classic' which does. In the ARCH literature, 'robust' corresponds to Bollerslev-Wooldridge covariance estimator. show_warning : bool, optional Flag indicating whether convergence warnings should be shown. first_obs : {int, str, datetime, Timestamp} First observation to use when estimating model last_obs : {int, str, datetime, Timestamp} Last observation to use when estimating model tol : float, optional Tolerance for termination. options : dict, optional Options to pass to `scipy.optimize.minimize`. Valid entries include 'ftol', 'eps', 'disp', and 'maxiter'. backcast : {float, ndarray}, optional Value to use as backcast. Should be measure :math:`\sigma^2_0` since model-specific non-linear transformations are applied to value before computing the variance recursions. Returns ------- results : ARCHModelResult Object containing model results Notes ----- A ConvergenceWarning is raised if SciPy's optimizer indicates difficulty finding the optimum. Parameters are optimized using SLSQP. """ if self._y_original is None: raise RuntimeError("Cannot estimate model without data.") # 1. Check in ARCH or Non-normal dist. If no ARCH and normal, # use closed form v, d = self.volatility, self.distribution offsets = np.array((self.num_params, v.num_params, d.num_params), dtype=int) total_params = sum(offsets) # Closed form is applicable when model has no parameters # Or when distribution is normal and constant variance has_closed_form = ( v.closed_form and d.num_params == 0 and isinstance(v, ConstantVariance) ) self._adjust_sample(first_obs, last_obs) resids = np.asarray(self.resids(self.starting_values()), dtype=float) self._check_scale(resids) if self.scale != 1.0: # Scale changed, rescale data and reset model self._y = cast(np.ndarray, self.scale * np.asarray(self._y_original)) self._scale_changed() self._adjust_sample(first_obs, last_obs) resids = np.asarray(self.resids(self.starting_values()), dtype=float) if backcast is None: backcast = v.backcast(resids) else: assert backcast is not None backcast = v.backcast_transform(backcast) if has_closed_form: try: return self._fit_no_arch_normal_errors(cov_type=cov_type) except NotImplementedError: pass assert backcast is not None if total_params == 0: return self._fit_parameterless_model(cov_type=cov_type, backcast=backcast) sigma2 = np.zeros_like(resids) self._backcast = backcast sv_volatility = v.starting_values(resids) self._var_bounds = var_bounds = v.variance_bounds(resids) v.compute_variance(sv_volatility, resids, sigma2, backcast, var_bounds) std_resids = resids / np.sqrt(sigma2) # 2. Construct constraint matrices from all models and distribution constraints = ( self.constraints(), self.volatility.constraints(), self.distribution.constraints(), ) num_cons = [] for c in constraints: assert c is not None num_cons.append(c[0].shape[0]) num_constraints = np.array(num_cons, dtype=int) num_params = offsets.sum() a = np.zeros((int(num_constraints.sum()), int(num_params))) b = np.zeros(int(num_constraints.sum())) for i, c in enumerate(constraints): assert c is not None r_en = num_constraints[: i + 1].sum() c_en = offsets[: i + 1].sum() r_st = r_en - num_constraints[i] c_st = c_en - offsets[i] if r_en - r_st > 0: a[r_st:r_en, c_st:c_en] = c[0] b[r_st:r_en] = c[1] bounds = self.bounds() bounds.extend(v.bounds(resids)) bounds.extend(d.bounds(std_resids)) # 3. Construct starting values from all models sv = starting_values if starting_values is not None: assert sv is not None sv = ensure1d(sv, "starting_values") valid = sv.shape[0] == num_params if a.shape[0] > 0: satisfies_constraints = a.dot(sv) - b >= 0 valid = valid and satisfies_constraints.all() for i, bound in enumerate(bounds): valid = valid and bound[0] <= sv[i] <= bound[1] if not valid: warnings.warn(starting_value_warning, StartingValueWarning) starting_values = None if starting_values is None: sv = np.hstack( [self.starting_values(), sv_volatility, d.starting_values(std_resids)] ) # 4. Estimate models using constrained optimization _callback_info["count"], _callback_info["iter"] = 0, 0 if not isinstance(disp, str): disp = bool(disp) disp = "off" if not disp else "final" if update_freq <= 0 or disp == "off": _callback_info["display"] = 2**31 else: _callback_info["display"] = update_freq disp_flag = True if disp == "final" else False func = self._loglikelihood args = (sigma2, backcast, var_bounds) ineq_constraints = constraint(a, b) from scipy.optimize import minimize options = {} if options is None else options options.setdefault("disp", disp_flag) with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "Values in x were outside bounds during a minimize step", RuntimeWarning, ) opt = minimize( func, sv, args=args, method="SLSQP", bounds=bounds, constraints=ineq_constraints, tol=tol, callback=_callback, options=options, ) if show_warning: warnings.filterwarnings("always", "", ConvergenceWarning) else: warnings.filterwarnings("ignore", "", ConvergenceWarning) if opt.status != 0 and show_warning: warnings.warn( convergence_warning.format(code=opt.status, string_message=opt.message), ConvergenceWarning, ) # 5. Return results params = opt.x loglikelihood = -1.0 * opt.fun mp, vp, dp = self._parse_parameters(params) resids = np.asarray(self.resids(mp), dtype=float) vol = np.zeros_like(resids) self.volatility.compute_variance(vp, resids, vol, backcast, var_bounds) vol = cast(Float64Array, np.sqrt(vol)) try: r2 = self._r2(mp) except NotImplementedError: r2 = np.nan names = self._all_parameter_names() # Reshape resids and vol first_obs, last_obs = self._fit_indices resids_final = np.empty_like(self._y, dtype=np.double) resids_final.fill(np.nan) resids_final[first_obs:last_obs] = resids vol_final = np.empty_like(self._y, dtype=np.double) vol_final.fill(np.nan) vol_final[first_obs:last_obs] = vol fit_start, fit_stop = self._fit_indices model_copy = deepcopy(self) assert isinstance(r2, float) return ARCHModelResult( params, None, r2, resids_final, vol_final, cov_type, self._y_series, names, loglikelihood, self._is_pandas, opt, fit_start, fit_stop, model_copy, )
[docs] @abstractmethod def parameter_names(self) -> list[str]: """List of parameters names Returns ------- names : list (str) List of variable names for the mean model """
[docs] def starting_values(self) -> Float64Array: """ Returns starting values for the mean model, often the same as the values returned from fit Returns ------- sv : ndarray Starting values """ return self._fit_no_arch_normal_errors_params()
@cached_property @abstractmethod def num_params(self) -> int: """ Number of parameters in the model """
[docs] @abstractmethod def simulate( self, params: Union[ArrayLike1D, Sequence[float]], nobs: int, burn: int = 500, initial_value: Optional[float] = None, x: Optional[ArrayLike] = None, initial_value_vol: Optional[float] = None, ) -> pd.DataFrame: pass
[docs] @abstractmethod def resids( self, params: Float64Array, y: Optional[ArrayLike1D] = None, regressors: Optional[ArrayLike2D] = None, ) -> ArrayLike1D: """ Compute model residuals Parameters ---------- params : ndarray Model parameters y : ndarray, optional Alternative values to use when computing model residuals regressors : ndarray, optional Alternative regressor values to use when computing model residuals Returns ------- resids : ndarray Model residuals """
[docs] def compute_param_cov( self, params: Float64Array, backcast: Union[float, Float64Array, None] = None, robust: bool = True, ) -> Float64Array: """ Computes parameter covariances using numerical derivatives. Parameters ---------- params : ndarray Model parameters backcast : float Value to use for pre-sample observations robust : bool, optional Flag indicating whether to use robust standard errors (True) or classic MLE (False) """ resids = np.asarray(self.resids(self.starting_values()), dtype=float) var_bounds = self.volatility.variance_bounds(resids) nobs = resids.shape[0] if backcast is None and self._backcast is None: backcast = self.volatility.backcast(resids) self._backcast = backcast elif backcast is None: backcast = self._backcast kwargs = { "sigma2": np.zeros_like(resids), "backcast": backcast, "var_bounds": var_bounds, "individual": False, } hess = approx_hess(params, self._loglikelihood, kwargs=kwargs) hess /= nobs inv_hess = np.linalg.inv(hess) if robust: kwargs["individual"] = True scores = approx_fprime( params, self._loglikelihood, kwargs=kwargs ) # type: np.ndarray score_cov = np.cov(scores.T) return inv_hess.dot(score_cov).dot(inv_hess) / nobs else: return inv_hess / nobs
[docs] @abstractmethod def forecast( self, params: ArrayLike1D, horizon: int = 1, start: Union[int, DateLike, None] = None, align: Literal["origin", "target"] = "origin", method: ForecastingMethod = "analytic", simulations: int = 1000, rng: Optional[Callable[[Union[int, tuple[int, ...]]], Float64Array]] = None, random_state: Optional[np.random.RandomState] = None, *, reindex: bool = False, x: Union[dict[Label, ArrayLike], ArrayLike, None] = None, ) -> "ARCHModelForecast": """ Construct forecasts from estimated model Parameters ---------- params : {ndarray, Series} Parameters required to forecast. Must be identical in shape to the parameters computed by fitting the model. horizon : int, optional Number of steps to forecast start : {int, datetime, Timestamp, str}, optional An integer, datetime or str indicating the first observation to produce the forecast for. Datetimes can only be used with pandas inputs that have a datetime index. Strings must be convertible to a date time, such as in '1945-01-01'. align : str, optional Either 'origin' or 'target'. When set of 'origin', the t-th row of forecasts contains the forecasts for t+1, t+2, ..., t+h. When set to 'target', the t-th row contains the 1-step ahead forecast from time t-1, the 2 step from time t-2, ..., and the h-step from time t-h. 'target' simplified computing forecast errors since the realization and h-step forecast are aligned. method : {'analytic', 'simulation', 'bootstrap'} Method to use when producing the forecast. The default is analytic. The method only affects the variance forecast generation. Not all volatility models support all methods. In particular, volatility models that do not evolve in squares such as EGARCH or TARCH do not support the 'analytic' method for horizons > 1. simulations : int Number of simulations to run when computing the forecast using either simulation or bootstrap. rng : callable, optional Custom random number generator to use in simulation-based forecasts. Must produce random samples using the syntax `rng(size)` where size the 2-element tuple (simulations, horizon). random_state : RandomState, optional NumPy RandomState instance to use when method is 'bootstrap' reindex : bool, optional Whether to reindex the forecasts to have the same dimension as the series being forecast. Prior to 4.18 this was the default. As of 4.19 this is now optional. If not provided, a warning is raised about the future change in the default which will occur after September 2021. .. versionadded:: 4.19 x : {dict[label, array_like], array_like} Values to use for exogenous regressors if any are included in the model. Three formats are accepted: * 2-d array-like: This format can be used when there is a single exogenous variable. The input must have shape (nforecast, horizon) or (nobs, horizon) where nforecast is the number of forecasting periods and nobs is the original shape of y. For example, if a single series of forecasts are made from the end of the sample with a horizon of 10, then the input can be (1, 10). Alternatively, if the original data had 1000 observations, then the input can be (1000, 10), and only the final row is used to produce forecasts. When using an (nobs, horizon) array, the values much be aligned so that all values in row t are all out-of-sample at time-t. * A dictionary of 2-d array-like: This format is identical to the previous except that the dictionary keys must match the names of the exog variables. Requires that the exog variables were passed as a pandas DataFrame. * A 3-d NumPy array (or equivalent). In this format, each panel (0th axis) is a 2-d array that must have shape (nforecast, horizon) or (nobs,horizon). The array x[j] corresponds to the j-th column of the exogenous variables. Due to the complexity required to accommodate all scenarios, please see the example notebook that demonstrates the valid formats for x, and discusses alignment. .. versionadded:: 4.19 Returns ------- arch.univariate.base.ARCHModelForecast Container for forecasts. Key properties are ``mean``, ``variance`` and ``residual_variance``. Examples -------- >>> import pandas as pd >>> from arch import arch_model >>> am = arch_model(None,mean='HAR',lags=[1,5,22],vol='Constant') >>> sim_data = am.simulate([0.1,0.4,0.3,0.2,1.0], 250) >>> sim_data.index = pd.date_range('2000-01-01',periods=250) >>> am = arch_model(sim_data['data'],mean='HAR',lags=[1,5,22], vol='Constant') >>> res = am.fit() >>> fig = res.hedgehog_plot() Notes ----- The most basic 1-step ahead forecast will return a vector with the same length as the original data, where the t-th value will be the time-t forecast for time t + 1. When the horizon is > 1, and when using the default value for `align`, the forecast value in position [t, h] is the time-t, h+1 step ahead forecast. If model contains exogenous variables (model.x is not None), then only 1-step ahead forecasts are available. Using horizon > 1 will produce a warning and all columns, except the first, will be nan-filled. If `align` is 'origin', forecast[t,h] contains the forecast made using y[:t] (that is, up to but not including t) for horizon h + 1. For example, y[100,2] contains the 3-step ahead forecast using the first 100 data points, which will correspond to the realization y[100 + 2]. If `align` is 'target', then the same forecast is in location [102, 2], so that it is aligned with the observation to use when evaluating, but still in the same column. """
class _SummaryRepr: """Base class for returning summary as repr and str""" def summary(self) -> Summary: raise NotImplementedError("Subclasses must implement") def __repr__(self) -> str: out = self.__str__() + "\n" out += self.__class__.__name__ out += f", id: {hex(id(self))}" return out def __str__(self) -> str: return self.summary().as_text()
[docs] class ARCHModelFixedResult(_SummaryRepr): """ Results for fixed parameters for an ARCHModel model Parameters ---------- params : ndarray Estimated parameters resid : ndarray Residuals from model. Residuals have same shape as original data and contain nan-values in locations not used in estimation volatility : ndarray Conditional volatility from model dep_var : Series Dependent variable names : list (str) Model parameter names loglikelihood : float Loglikelihood at specified parameters is_pandas : bool Whether the original input was pandas model : ARCHModel The model object used to estimate the parameters """ def __init__( self, params: Float64Array, resid: Float64Array, volatility: Float64Array, dep_var: pd.Series, names: list[str], loglikelihood: float, is_pandas: bool, model: ARCHModel, ) -> None: self._params = params self._resid = resid self._is_pandas = is_pandas self._model = model self._datetime = dt.datetime.now() self._dep_var = dep_var self._dep_name = str(dep_var.name) self._names = list(names) self._loglikelihood = loglikelihood self._nobs = self.model._fit_y.shape[0] self._index = dep_var.index self._volatility = volatility
[docs] def summary(self) -> Summary: """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + " - " + model.volatility.name # Summary Header top_left = [ ("Dep. Variable:", self._dep_name), ("Mean Model:", model.name), ("Vol Model:", model.volatility.name), ("Distribution:", model.distribution.name), ("Method:", "User-specified Parameters"), ("", ""), ("Date:", self._datetime.strftime("%a, %b %d %Y")), ("Time:", self._datetime.strftime("%H:%M:%S")), ] top_right = [ ("R-squared:", "--"), ("Adj. R-squared:", "--"), ("Log-Likelihood:", f"{self.loglikelihood:#10.6g}"), ("AIC:", f"{self.aic:#10.6g}"), ("BIC:", f"{self.bic:#10.6g}"), ("No. Observations:", f"{self._nobs}"), ("", ""), ("", ""), ] title = model_name + " Model Results" stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt["data_fmts"][1] = "%18s" top_right = [("%-21s" % (" " + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) stubs = list(self._names) header = ["coef"] param_table_data = [[format_float_fixed(param, 10, 4)] for param in self.params] mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ("Mean Model", "Volatility Model", "Distribution") total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total : total + count] table_stubs = stubs[total : total + count] total += count table = SimpleTable( table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title, ) smry.tables.append(table) extra_text = [ "Results generated with user-specified parameters.", "Std. errors not available when the model is not estimated, ", ] smry.add_extra_txt(extra_text) return smry
@cached_property def model(self) -> ARCHModel: """ Model instance used to produce the fit """ return self._model @cached_property def loglikelihood(self) -> float: """Model loglikelihood""" return self._loglikelihood @cached_property def aic(self) -> float: """Akaike Information Criteria -2 * loglikelihood + 2 * num_params""" return -2 * self.loglikelihood + 2 * self.num_params @cached_property def num_params(self) -> int: """Number of parameters in model""" return len(self.params) @cached_property def bic(self) -> float: """ Schwarz/Bayesian Information Criteria -2 * loglikelihood + log(nobs) * num_params """ return -2 * self.loglikelihood + np.log(self.nobs) * self.num_params @cached_property def params(self) -> pd.Series: """Model Parameters""" return pd.Series(self._params, index=self._names, name="params") @cached_property def conditional_volatility(self) -> Union[Float64Array, pd.Series]: """ Estimated conditional volatility Returns ------- conditional_volatility : {ndarray, Series} nobs element array containing the conditional volatility (square root of conditional variance). The values are aligned with the input data so that the value in the t-th position is the variance of t-th error, which is computed using time-(t-1) information. """ if self._is_pandas: return pd.Series(self._volatility, name="cond_vol", index=self._index) else: return self._volatility @cached_property def nobs(self) -> int: """ Number of data points used to estimate model """ return self._nobs @cached_property def resid(self) -> Union[Float64Array, pd.Series]: """ Model residuals """ if self._is_pandas: return pd.Series(self._resid, name="resid", index=self._index) else: return self._resid @cached_property def std_resid(self) -> Union[Float64Array, pd.Series]: """ Residuals standardized by conditional volatility """ std_res = self.resid / self.conditional_volatility if isinstance(std_res, pd.Series): std_res.name = "std_resid" return std_res
[docs] def plot( self, annualize: Optional[str] = None, scale: Optional[float] = None ) -> "Figure": """ Plot standardized residuals and conditional volatility Parameters ---------- annualize : str, optional String containing frequency of data that indicates plot should contain annualized volatility. Supported values are 'D' (daily), 'W' (weekly) and 'M' (monthly), which scale variance by 252, 52, and 12, respectively. scale : float, optional Value to use when scaling returns to annualize. If scale is provided, annualize is ignored and the value in scale is used. Returns ------- fig : figure Handle to the figure Examples -------- >>> from arch import arch_model >>> am = arch_model(None) >>> sim_data = am.simulate([0.0, 0.01, 0.07, 0.92], 2520) >>> am = arch_model(sim_data['data']) >>> res = am.fit(update_freq=0, disp='off') >>> fig = res.plot() Produce a plot with annualized volatility >>> fig = res.plot(annualize='D') Override the usual scale of 252 to use 360 for an asset that trades most days of the year >>> fig = res.plot(scale=360) """ from matplotlib.axes import Axes from matplotlib.pyplot import figure def _set_tight_x(axis: Axes, index: pd.Index) -> None: try: axis.set_xlim(index[0], index[-1]) except ValueError: pass fig = figure() ax = fig.add_subplot(2, 1, 1) ax.plot(self._index.values, self.resid / self.conditional_volatility) ax.set_title("Standardized Residuals") ax.axes.xaxis.set_ticklabels([]) _set_tight_x(ax, self._index) ax = fig.add_subplot(2, 1, 2) vol = self.conditional_volatility title = "Annualized Conditional Volatility" if scale is not None: vol = vol * np.sqrt(scale) elif annualize is not None: scales = {"D": 252, "W": 52, "M": 12} if annualize in scales: vol = vol * np.sqrt(scales[annualize]) else: raise ValueError("annualize not recognized") else: title = "Conditional Volatility" ax.plot(self._index.values, vol) _set_tight_x(ax, self._index) ax.set_title(title) return fig
[docs] def forecast( self, params: Optional[ArrayLike1D] = None, horizon: int = 1, start: Union[int, DateLike, None] = None, align: Literal["origin", "target"] = "origin", method: ForecastingMethod = "analytic", simulations: int = 1000, rng: Optional[Callable[[Union[int, tuple[int, ...]]], Float64Array]] = None, random_state: Optional[np.random.RandomState] = None, *, reindex: bool = False, x: Union[dict[Label, ArrayLike], ArrayLike, None] = None, ) -> "ARCHModelForecast": """ Construct forecasts from estimated model Parameters ---------- params : ndarray, optional Alternative parameters to use. If not provided, the parameters estimated when fitting the model are used. Must be identical in shape to the parameters computed by fitting the model. horizon : int, optional Number of steps to forecast start : {int, datetime, Timestamp, str}, optional An integer, datetime or str indicating the first observation to produce the forecast for. Datetimes can only be used with pandas inputs that have a datetime index. Strings must be convertible to a date time, such as in '1945-01-01'. align : str, optional Either 'origin' or 'target'. When set of 'origin', the t-th row of forecasts contains the forecasts for t+1, t+2, ..., t+h. When set to 'target', the t-th row contains the 1-step ahead forecast from time t-1, the 2 step from time t-2, ..., and the h-step from time t-h. 'target' simplified computing forecast errors since the realization and h-step forecast are aligned. method : {'analytic', 'simulation', 'bootstrap'}, optional Method to use when producing the forecast. The default is analytic. The method only affects the variance forecast generation. Not all volatility models support all methods. In particular, volatility models that do not evolve in squares such as EGARCH or TARCH do not support the 'analytic' method for horizons > 1. simulations : int, optional Number of simulations to run when computing the forecast using either simulation or bootstrap. rng : callable, optional Custom random number generator to use in simulation-based forecasts. Must produce random samples using the syntax `rng(size)` where size the 2-element tuple (simulations, horizon). random_state : RandomState, optional NumPy RandomState instance to use when method is 'bootstrap' reindex : bool, optional Whether to reindex the forecasts to have the same dimension as the series being forecast. .. versionchanged:: 6.2 The default has been changed to False. x : {dict[label, array_like], array_like} Values to use for exogenous regressors if any are included in the model. Three formats are accepted: * 2-d array-like: This format can be used when there is a single exogenous variable. The input must have shape (nforecast, horizon) or (nobs, horizon) where nforecast is the number of forecasting periods and nobs is the original shape of y. For example, if a single series of forecasts are made from the end of the sample with a horizon of 10, then the input can be (1, 10). Alternatively, if the original data had 1000 observations, then the input can be (1000, 10), and only the final row is used to produce forecasts. * A dictionary of 2-d array-like: This format is identical to the previous except that the dictionary keys must match the names of the exog variables. Requires that the exog variables were passed as a pandas DataFrame. * A 3-d NumPy array (or equivalent). In this format, each panel (0th axis) is a 2-d array that must have shape (nforecast, horizon) or (nobs,horizon). The array x[j] corresponds to the j-th column of the exogenous variables. Due to the complexity required to accommodate all scenarios, please see the example notebook that demonstrates the valid formats for x. .. versionadded:: 4.19 Returns ------- arch.univariate.base.ARCHModelForecast Container for forecasts. Key properties are ``mean``, ``variance`` and ``residual_variance``. Notes ----- The most basic 1-step ahead forecast will return a vector with the same length as the original data, where the t-th value will be the time-t forecast for time t + 1. When the horizon is > 1, and when using the default value for `align`, the forecast value in position [t, h] is the time-t, h+1 step ahead forecast. If model contains exogenous variables (`model.x is not None`), then only 1-step ahead forecasts are available. Using horizon > 1 will produce a warning and all columns, except the first, will be nan-filled. If `align` is 'origin', forecast[t,h] contains the forecast made using y[:t] (that is, up to but not including t) for horizon h + 1. For example, y[100,2] contains the 3-step ahead forecast using the first 100 data points, which will correspond to the realization y[100 + 2]. If `align` is 'target', then the same forecast is in location [102, 2], so that it is aligned with the observation to use when evaluating, but still in the same column. """ if params is None: params = self._params else: if ( params.size != np.array(self._params).size or params.ndim != self._params.ndim ): raise ValueError("params have incorrect dimensions") if not isinstance(horizon, (int, np.integer)) or horizon < 1: raise ValueError("horizon must be an integer >= 1.") return self.model.forecast( params, horizon, start, align, method, simulations, rng, random_state, reindex=reindex, x=x, )
[docs] @deprecate_kwarg("type", "plot_type") def hedgehog_plot( self, params: Optional[ArrayLike1D] = None, horizon: int = 10, step: int = 10, start: Union[int, DateLike, None] = None, plot_type: Literal["volatility", "mean"] = "volatility", method: ForecastingMethod = "analytic", simulations: int = 1000, ) -> "Figure": """ Plot forecasts from estimated model Parameters ---------- params : {ndarray, Series} Alternative parameters to use. If not provided, the parameters computed by fitting the model are used. Must be 1-d and identical in shape to the parameters computed by fitting the model. horizon : int, optional Number of steps to forecast step : int, optional Non-negative number of forecasts to skip between spines start : int, datetime or str, optional An integer, datetime or str indicating the first observation to produce the forecast for. Datetimes can only be used with pandas inputs that have a datetime index. Strings must be convertible to a date time, such as in '1945-01-01'. If not provided, the start is set to the earliest forecastable date. plot_type : {'volatility', 'mean'} Quantity to plot, the forecast volatility or the forecast mean method : {'analytic', 'simulation', 'bootstrap'} Method to use when producing the forecast. The default is analytic. The method only affects the variance forecast generation. Not all volatility models support all methods. In particular, volatility models that do not evolve in squares such as EGARCH or TARCH do not support the 'analytic' method for horizons > 1. simulations : int Number of simulations to run when computing the forecast using either simulation or bootstrap. Returns ------- fig : figure Handle to the figure Examples -------- >>> import pandas as pd >>> from arch import arch_model >>> am = arch_model(None,mean='HAR',lags=[1,5,22],vol='Constant') >>> sim_data = am.simulate([0.1,0.4,0.3,0.2,1.0], 250) >>> sim_data.index = pd.date_range('2000-01-01',periods=250) >>> am = arch_model(sim_data['data'],mean='HAR',lags=[1,5,22], vol='Constant') >>> res = am.fit() >>> fig = res.hedgehog_plot(plot_type='mean') """ import matplotlib.pyplot as plt plot_mean = plot_type.lower() == "mean" if start is None: invalid_start = True start = 0 while invalid_start: try: forecasts = self.forecast( params, horizon, start, method=method, simulations=simulations, ) invalid_start = False except ValueError: start += 1 else: forecasts = self.forecast( params, horizon, start, method=method, simulations=simulations, ) fig, ax = plt.subplots(1, 1) use_date = isinstance(self._dep_var.index, pd.DatetimeIndex) plot_fn = ax.plot_date if use_date else ax.plot x_values = np.array(self._dep_var.index) if plot_mean: y_values = np.asarray(self._dep_var) else: y_values = np.asarray(self.conditional_volatility) plot_fn(x_values, y_values, linestyle="-", marker="") first_obs = np.min(np.where(np.logical_not(np.isnan(forecasts.mean)))[0]) spines = [] t = forecasts.mean.shape[0] for i in range(first_obs, t, step): if i + horizon + 1 > x_values.shape[0]: continue temp_x = x_values[i : i + horizon + 1] if plot_mean: spine_data = np.asarray(forecasts.mean.iloc[i], dtype=float) else: spine_data = np.asarray( np.sqrt(forecasts.variance.iloc[i]), dtype=float ) temp_y = np.hstack([y_values[i], spine_data]) line = plot_fn(temp_x, temp_y, linewidth=3, linestyle="-", marker="") spines.append(line) color = spines[0][0].get_color() for spine in spines[1:]: spine[0].set_color(color) plot_title = "Mean" if plot_mean else "Volatility" ax.set_title(self._dep_name + " " + plot_title + " Forecast Hedgehog Plot") return fig
[docs] def arch_lm_test( self, lags: Optional[int] = None, standardized: bool = False ) -> WaldTestStatistic: """ ARCH LM test for conditional heteroskedasticity Parameters ---------- lags : int, optional Number of lags to include in the model. If not specified, standardized : bool, optional Flag indicating to test the model residuals divided by their conditional standard deviations. If False, directly tests the estimated residuals. Returns ------- result : WaldTestStatistic Result of ARCH-LM test """ resids = self.resid if standardized: resids = resids / np.asarray(self.conditional_volatility) # GH 599: drop missing observations resids = resids[~np.isnan(resids)] nobs = resids.shape[0] resid2 = resids**2 lags = ( int(np.ceil(12.0 * np.power(nobs / 100.0, 1 / 4.0))) if lags is None else lags ) lags = max(min(resids.shape[0] // 2 - 1, lags), 1) if resid2.shape[0] < 3: raise ValueError("Test requires at least 3 non-nan observations") lag, lead = lagmat(resid2, lags, "both", "sep", False) lag = add_constant(lag) res = OLS(lead, lag).fit() stat = nobs * res.rsquared test_type = "R" if not standardized else "Standardized r" null = f"{test_type}esiduals are homoskedastic." alt = f"{test_type}esiduals are conditionally heteroskedastic." assert isinstance(lags, int) return WaldTestStatistic( stat, df=lags, null=null, alternative=alt, name="ARCH-LM Test" )
[docs] class ARCHModelResult(ARCHModelFixedResult): """ Results from estimation of an ARCHModel model Parameters ---------- params : ndarray Estimated parameters param_cov : {ndarray, None} Estimated variance-covariance matrix of params. If none, calls method to compute variance from model when parameter covariance is first used from result r2 : float Model R-squared resid : ndarray Residuals from model. Residuals have same shape as original data and contain nan-values in locations not used in estimation volatility : ndarray Conditional volatility from model cov_type : str String describing the covariance estimator used dep_var : Series Dependent variable names : list (str) Model parameter names loglikelihood : float Loglikelihood at estimated parameters is_pandas : bool Whether the original input was pandas optim_output : OptimizeResult Result of log-likelihood optimization fit_start : int Integer index of the first observation used to fit the model fit_stop : int Integer index of the last observation used to fit the model using slice notation `fit_start:fit_stop` model : ARCHModel The model object used to estimate the parameters """ def __init__( self, params: Float64Array, param_cov: Optional[Float64Array], r2: float, resid: Float64Array, volatility: Float64Array, cov_type: str, dep_var: pd.Series, names: list[str], loglikelihood: float, is_pandas: bool, optim_output: OptimizeResult, fit_start: int, fit_stop: int, model: ARCHModel, ) -> None: super().__init__( params, resid, volatility, dep_var, names, loglikelihood, is_pandas, model ) self._fit_indices = (fit_start, fit_stop) self._param_cov = param_cov self._r2 = r2 self.cov_type: str = cov_type self._optim_output = optim_output @cached_property def scale(self) -> float: """ The scale applied to the original data before estimating the model. If scale=1.0, the the data have not been rescaled. Otherwise, the model parameters have been estimated on scale * y. """ return self.model.scale
[docs] def conf_int(self, alpha: float = 0.05) -> pd.DataFrame: """ Parameter confidence intervals Parameters ---------- alpha : float, optional Size (prob.) to use when constructing the confidence interval. Returns ------- ci : DataFrame Array where the ith row contains the confidence interval for the ith parameter """ cv = stats.norm.ppf(1.0 - alpha / 2.0) se = self.std_err params = self.params return pd.DataFrame( np.vstack((params - cv * se, params + cv * se)).T, columns=["lower", "upper"], index=self._names, )
[docs] def summary(self) -> Summary: """ Constructs a summary of the results from a fit model. Returns ------- summary : Summary instance Object that contains tables and facilitated export to text, html or latex """ # Summary layout # 1. Overall information # 2. Mean parameters # 3. Volatility parameters # 4. Distribution parameters # 5. Notes model = self.model model_name = model.name + " - " + model.volatility.name # Summary Header top_left = [ ("Dep. Variable:", self._dep_name), ("Mean Model:", model.name), ("Vol Model:", model.volatility.name), ("Distribution:", model.distribution.name), ("Method:", "Maximum Likelihood"), ("", ""), ("Date:", self._datetime.strftime("%a, %b %d %Y")), ("Time:", self._datetime.strftime("%H:%M:%S")), ] top_right = [ ("R-squared:", f"{self.rsquared:#8.3f}"), ("Adj. R-squared:", f"{self.rsquared_adj:#8.3f}"), ("Log-Likelihood:", f"{self.loglikelihood:#10.6g}"), ("AIC:", f"{self.aic:#10.6g}"), ("BIC:", f"{self.bic:#10.6g}"), ("No. Observations:", f"{self._nobs}"), ("Df Residuals:", f"{self.nobs - self.model.num_params}"), ("Df Model:", f"{self.model.num_params}"), ] title = model_name + " Model Results" stubs = [] vals = [] for stub, val in top_left: stubs.append(stub) vals.append([val]) table = SimpleTable(vals, txt_fmt=fmt_2cols, title=title, stubs=stubs) # create summary table instance smry = Summary() # Top Table # Parameter table fmt = fmt_2cols fmt["data_fmts"][1] = "%18s" top_right = [("%-21s" % (" " + k), v) for k, v in top_right] stubs = [] vals = [] for stub, val in top_right: stubs.append(stub) vals.append([val]) table.extend_right(SimpleTable(vals, stubs=stubs)) smry.tables.append(table) conf_int = np.asarray(self.conf_int()) conf_int_str = [] for c in conf_int: conf_int_str.append( "[" + format_float_fixed(c[0], 7, 3) + "," + format_float_fixed(c[1], 7, 3) + "]" ) stubs = list(self._names) header = ["coef", "std err", "t", "P>|t|", "95.0% Conf. Int."] table_vals = ( np.asarray(self.params), np.asarray(self.std_err), np.asarray(self.tvalues), np.asarray(self.pvalues), pd.Series(conf_int_str), ) # (0,0) is a dummy format formats = [(10, 4), (9, 3), (9, 3), (9, 3), (0, 0)] param_table_data = [] for pos in range(len(table_vals[0])): row = [] for i, table_val in enumerate(table_vals): val = table_val[pos] if isinstance(val, (np.double, float)): converted = format_float_fixed(val, *formats[i]) else: converted = val row.append(converted) param_table_data.append(row) mc = self.model.num_params vc = self.model.volatility.num_params dc = self.model.distribution.num_params counts = (mc, vc, dc) titles = ("Mean Model", "Volatility Model", "Distribution") total = 0 for title, count in zip(titles, counts): if count == 0: continue table_data = param_table_data[total : total + count] table_stubs = stubs[total : total + count] total += count table = SimpleTable( table_data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title, ) smry.tables.append(table) extra_text = ["Covariance estimator: " + self.cov_type] if self.convergence_flag: string_message = self._optim_output.message extra_text.append(CONVERGENCE_WARNING.format(msg=string_message)) smry.add_extra_txt(extra_text) return smry
@cached_property def param_cov(self) -> pd.DataFrame: """Parameter covariance""" if self._param_cov is not None: param_cov = self._param_cov else: params = np.asarray(self.params) if self.cov_type == "robust": param_cov = self.model.compute_param_cov(params) else: param_cov = self.model.compute_param_cov(params, robust=False) return pd.DataFrame(param_cov, columns=self._names, index=self._names) @cached_property def rsquared(self) -> float: """ R-squared """ return self._r2 @cached_property def fit_start(self) -> int: """Start of sample used to estimate parameters""" return self._fit_indices[0] @cached_property def fit_stop(self) -> int: """End of sample used to estimate parameters""" return self._fit_indices[1] @cached_property def rsquared_adj(self) -> float: """ Degree of freedom adjusted R-squared """ return 1 - ( (1 - self.rsquared) * (self.nobs - 1) / (self.nobs - self.model.num_params) ) @cached_property def pvalues(self) -> pd.Series: """ Array of p-values for the t-statistics """ pvals = np.asarray(stats.norm.sf(np.abs(self.tvalues)) * 2, dtype=float) return pd.Series(pvals, index=self._names, name="pvalues") @cached_property def std_err(self) -> pd.Series: """ Array of parameter standard errors """ se = np.asarray(np.sqrt(np.diag(self.param_cov)), dtype=float) return pd.Series(se, index=self._names, name="std_err") @cached_property def tvalues(self) -> pd.Series: """ Array of t-statistics testing the null that the coefficient are 0 """ tvalues = self.params / self.std_err tvalues.name = "tvalues" return tvalues @cached_property def convergence_flag(self) -> int: """ scipy.optimize.minimize result flag """ return self._optim_output.status @property def optimization_result(self) -> OptimizeResult: """ Information about the convergence of the loglikelihood optimization Returns ------- optim_result : OptimizeResult Result from numerical optimization of the log-likelihood. """ return self._optim_output
def _align_forecast( f: pd.DataFrame, align: Literal["origin", "target"] ) -> pd.DataFrame: if align == "origin": return f elif align in ("target", "horizon"): for i, col in enumerate(f): f[col] = f[col].shift(i + 1) return f else: raise ValueError("Unknown alignment") def _format_forecasts( values: Float64Array, index: Union[list[Label], pd.Index], start_index: int ) -> pd.DataFrame: horizon = values.shape[1] format_str = "{0:>0" + str(int(np.ceil(np.log10(horizon + 0.5)))) + "}" columns = ["h." + format_str.format(h + 1) for h in range(horizon)] forecasts = pd.DataFrame( values, index=index[start_index:], columns=columns, dtype="float" ) return forecasts
[docs] class ARCHModelForecastSimulation: """ Container for a simulation or bootstrap-based forecasts from an ARCH Model Parameters ---------- index values residuals variances residual_variances """ def __init__( self, index: Union[list[Label], pd.Index], values: Optional[Float64Array], residuals: Optional[Float64Array], variances: Optional[Float64Array], residual_variances: Optional[Float64Array], ) -> None: self._index = pd.Index(index) self._values = values self._residuals = residuals self._variances = variances self._residual_variances = residual_variances @property def index(self) -> pd.Index: """The index aligned to dimension 0 of the simulation paths""" return self._index @property def values(self) -> Optional[Float64Array]: """The values of the process""" return self._values @property def residuals(self) -> Optional[Float64Array]: """Simulated residuals used to produce the values""" return self._residuals @property def variances(self) -> Optional[Float64Array]: """Simulated variances of the values""" return self._variances @property def residual_variances(self) -> Optional[Float64Array]: """Simulated variance of the residuals""" return self._residual_variances
def _reindex( a: Optional[Float64Array], idx: Union[list[Label], pd.Index] ) -> Optional[Float64Array]: if a is None: return a assert a is not None actual = len(idx) obs = a.shape[0] if actual > obs: addition = np.full((actual - obs,) + a.shape[1:], np.nan) a = np.concatenate([addition, a]) return a
[docs] class ARCHModelForecast: """ Container for forecasts from an ARCH Model Parameters ---------- index : {list, ndarray} mean : ndarray variance : ndarray residual_variance : ndarray simulated_paths : ndarray, optional simulated_variances : ndarray, optional simulated_residual_variances : ndarray, optional simulated_residuals : ndarray, optional align : {'origin', 'target'} """ def __init__( self, index: Union[list[Label], pd.Index], start_index: int, mean: Float64Array, variance: Float64Array, residual_variance: Float64Array, simulated_paths: Optional[Float64Array] = None, simulated_variances: Optional[Float64Array] = None, simulated_residual_variances: Optional[Float64Array] = None, simulated_residuals: Optional[Float64Array] = None, align: Literal["origin", "target"] = "origin", *, reindex: bool = False, ) -> None: mean_df = _format_forecasts(mean, index, start_index) variance_df = _format_forecasts(variance, index, start_index) residual_variance_df = _format_forecasts(residual_variance, index, start_index) if reindex: mean_df = mean_df.reindex(index) variance_df = variance_df.reindex(index) residual_variance_df = residual_variance_df.reindex(index) self._mean = _align_forecast(mean_df, align=align) self._variance = _align_forecast(variance_df, align=align) self._residual_variance = _align_forecast(residual_variance_df, align=align) if reindex: sim_index = index simulated_paths = _reindex(simulated_paths, index) simulated_residuals = _reindex(simulated_residuals, index) simulated_variances = _reindex(simulated_variances, index) simulated_residual_variances = _reindex(simulated_residual_variances, index) else: sim_index = index[start_index:] self._sim = ARCHModelForecastSimulation( sim_index, simulated_paths, simulated_residuals, simulated_variances, simulated_residual_variances, ) @property def mean(self) -> pd.DataFrame: """Forecast values for the conditional mean of the process""" return self._mean @property def variance(self) -> pd.DataFrame: """Forecast values for the conditional variance of the process""" return self._variance @property def residual_variance(self) -> pd.DataFrame: """Forecast values for the conditional variance of the residuals""" return self._residual_variance @property def simulations(self) -> "ARCHModelForecastSimulation": """ Detailed simulation results if using a simulation-based method Returns ------- ARCHModelForecastSimulation Container for simulation results """ return self._sim