Source code for mizarlabs.structural_breaks.sdfc
"""
Explosiveness tests: Chow-Type Dickey-Fuller Test
"""
from typing import Tuple
import numpy as np
import pandas as pd
from numba import jit
from numba import njit
from numba import prange
[docs]@jit(parallel=True, nopython=True)
def get_dfc_array(
indices: np.ndarray, y_diff: np.ndarray, y_lag: np.ndarray
) -> np.ndarray:
"""Returns the Chow-Type Dickey-Fuller t-values.
:param indices: Indices to iterate over
:type indices: np.ndarray
:param y_diff: Differenced time series
:type y_diff: np.ndarray
:param y_lag: Lagged time series
:type y_lag: np.ndarray
:return: Array with t-values.
:rtype: np.ndarray
"""
dfc_array = np.empty_like(indices, dtype=np.float64)
for i in prange(len(indices)):
dummy_var = np.ones_like(y_lag)
dummy_var[: indices[i]] = 0 # D_t* indicator: before t* D_t* = 0
X = y_lag * dummy_var
beta_hat, beta_var = get_beta_and_beta_var(
X,
y_diff,
)
dfc_array[i] = beta_hat[0, 0] / np.sqrt(beta_var[0, 0])
return dfc_array
[docs]@njit
def get_beta_and_beta_var(X: np.ndarray, y: np.ndarray) -> Tuple[float]:
"""
Returns the OLS estimates of the coefficients and the variance of the coefficients.
:param X: Matrix with features values
:type X: pd.DataFrame
:param y: Series with target values.
:type y: pd.Series
:return: Tuple with the coefficients and the variance
of the coefficients estimate.
:rtype: Tuple[np.ndarray, np.ndarray]
"""
Xy = np.dot(X.T, y)
XX = np.dot(X.T, X)
XX_inv = np.linalg.inv(XX)
beta_hat = np.dot(np.ascontiguousarray(XX_inv), Xy)
err = y - np.dot(X, beta_hat)
beta_hat_var = np.dot(err.T, err) / (X.shape[0] - X.shape[1]) * XX_inv
return beta_hat, beta_hat_var
[docs]class SupremumDickeyFullerChowStatTest:
def __init__(self, min_num_samples: int = 20):
"""
Chow-Type Dickey-Fuller Test statistics as
described on page 251-252 in Advances in Financial
Machine Learning by Marcos Lopez de Prado.
:param min_num_samples: min. no. of samples for the dummy variable
in the test specification to ensure enough
ones and zeros, defaults to 20
:type min_num_samples: int, optional
"""
self.min_num_samples = min_num_samples
[docs] def run(self, series_to_test: pd.Series) -> pd.Series:
indices = np.arange(
self.min_num_samples, series_to_test.shape[0] - self.min_num_samples
)
series_diff = series_to_test.diff().dropna()
series_lag = series_to_test.shift(1).dropna()
dfc_array = get_dfc_array(
indices,
np.ascontiguousarray(series_diff.values.reshape(-1, 1), dtype=np.float64),
np.ascontiguousarray(series_lag.values.reshape(-1, 1), dtype=np.float64),
)
return pd.Series(
dfc_array,
index=series_to_test.index.values[
self.min_num_samples : series_to_test.shape[0] - self.min_num_samples
],
)