Source code for mizarlabs.transformers.microstructural_features.first_generation

import numpy as np
import pandas as pd
from mizarlabs import static
from mizarlabs.transformers.utils import check_missing_columns
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin


[docs]class RollMeasure(BaseEstimator, TransformerMixin):
    """
    Implement the roll measure which gives the estimate of effective bid-ask spread.

    See page 282 of Advances in Financial Machine Learning by Marcos Lopez de
    Prado for additional information.

    :param window: The window size
    :type window: int
    :param close_column_name: The name of the close column
    :type close_column_name: str, optional
    """

    def __init__(self, window: int = 20, close_column_name: str = static.CLOSE):
        self.window = window
        self.close_column_name = close_column_name

[docs]    def fit(self, X, y=None, **fit_params):
        return self

[docs]    def transform(self, X: pd.DataFrame) -> np.ndarray:
        check_missing_columns(X, [self.close_column_name])

        close_diff = X[self.close_column_name].diff()
        close_diff_lag = close_diff.shift(1)
        return (
            2 * np.sqrt(abs(close_diff.rolling(window=self.window).cov(close_diff_lag)))
        ).values.reshape(-1, 1)


[docs]class RollImpact(RollMeasure):
    """
    Derivate from Roll Measure which takes into account dollar volume traded.

    See page 282 of Advances in Financial Machine Learning by Marcos Lopez de
    Prado for additional information.

    :param window: The window size
    :type window: int
    :param close_column_name: The name of the close column
    :type close_column_name: str, optional
    :param quote_asset_volume_column_name: The name quote asset column column
    :type quote_asset_volume_column_name: str, optional
    """

    def __init__(
        self,
        window: int = 20,
        close_column_name: str = static.CLOSE,
        quote_asset_volume_column_name: str = static.QUOTE_ASSET_VOLUME,
    ):
        super().__init__(window, close_column_name)
        self.quote_asset_volume_column_name = quote_asset_volume_column_name

[docs]    def fit(self, X, y=None, **fit_params):
        return self

[docs]    def transform(self, X: pd.DataFrame) -> np.ndarray:
        roll_measure = super().transform(X)
        return roll_measure / X[self.quote_asset_volume_column_name].values.reshape(
            -1, 1
        )


[docs]class ParkinsonVolatility(BaseEstimator, TransformerMixin):
    """
    High low volatility estimator developed by Parkinson (1980).

    :param window: The window size
    :type window: int
    :param high_column_name: The name of the high column
    :type high_column_name: str, optional
    :param low_column_name: The name of the low column
    :type low_column_name: str, optional
    """

    def __init__(
        self,
        window: int = 20,
        high_column_name: str = static.HIGH,
        low_column_name: str = static.LOW,
    ):
        self.window = window
        self.high_column_name = high_column_name
        self.low_column_name = low_column_name

[docs]    def fit(self, X, y=None, **fit_params):
        return self

[docs]    def transform(self, X: pd.DataFrame) -> np.ndarray:
        check_missing_columns(X, [self.high_column_name, self.low_column_name])

        parkinson_constant = 1 / (4 * np.log(2))

        return (
            (
                parkinson_constant
                * np.square(np.log(X[self.high_column_name] / X[self.low_column_name]))
            )
            .rolling(self.window)
            .mean()
        ).values.reshape(-1, 1)


def _get_beta(high: pd.Series, low: pd.Series, window: int) -> pd.Series:
    """
    Get beta estimate from Corwin-Schultz algorithm (p.285, Snippet 19.1).

    :param high: Series containing high prices
    :rtype high: pd.Series
    :param low: Series containing low prices
    :type low: pd.Series
    :param window: The window size of the estimator
    :rtype window: int
    :return: Beta estimates
    :rtype: pd.Series
    """
    ret = np.log(high / low)
    high_low_ret = ret ** 2
    beta = high_low_ret.rolling(window=2).sum()
    beta = beta.rolling(window=window).mean()
    return beta


def _get_gamma(high: pd.Series, low: pd.Series) -> pd.Series:
    """
    Get gamma estimate from Corwin-Schultz algorithm (p.285, Snippet 19.1).

    :param high: Series containing high prices
    :rtype high: pd.Series
    :param low: Series containing low prices
    :type low: pd.Series
    :return: Gamma estimates
    :rtype: pd.Series
    """
    high_max = high.rolling(window=2).max()
    low_min = low.rolling(window=2).min()
    gamma = np.log(high_max / low_min) ** 2
    return gamma


def _get_alpha(beta: pd.Series, gamma: pd.Series) -> pd.Series:
    """
    Get alpha from Corwin-_get_betaSchultz algorithm, (p.285, Snippet 19.1).

    :param high: Series containing high prices
    :rtype high: pd.Series
    :param low: Series containing low prices
    :type low: pd.Series
    :return: Alpha estimates
    :rtype: pd.Series
    """
    den = 3 - 2 * 2 ** 0.5
    alpha = (2 ** 0.5 - 1) * (beta ** 0.5) / den
    alpha -= (gamma / den) ** 0.5
    alpha[alpha < 0] = 0  # Set negative alphas to 0 (see p.727 of paper)
    return alpha


[docs]class CorwinSchultzSpread(BaseEstimator, TransformerMixin):
    """
    Corwin Schultz spread estimator.

    See page 284 of Advances in Financial Machine Learning by Marcos Lopez de
    Prado for additional information.

    :param window: The window size
    :type window: int
    :param high_column_name: The name of the high column
    :type high_column_name_get_beta: str, optional
    :param low_column_name: The name of the low column
    :type low_column_name: str, optional
    """

    def __init__(
        self,
        window: int = 20,
        high_column_name: str = static.HIGH,
        low_column_name: str = static.LOW,
    ):
        self.window = window
        self.high_column_name = high_column_name
        self.low_column_name = low_column_name

[docs]    def fit(self, X, y=None, **fit_params):
        return self

[docs]    def transform(self, X: pd.DataFrame) -> np.ndarray:

        check_missing_columns(X, [self.high_column_name, self.low_column_name])

        beta = _get_beta(X[self.high_column_name], X[self.low_column_name], self.window)
        gamma = _get_gamma(X[self.high_column_name], X[self.low_column_name])
        alpha = _get_alpha(beta, gamma)
        spread = 2 * (np.exp(alpha) - 1) / (1 + np.exp(alpha))

        return spread.values.reshape(-1, 1)


[docs]class BeckersParkinsonVolatility(BaseEstimator, TransformerMixin):
    """
    Get Bekker-Parkinson volatility from gamma and beta in Corwin-Schultz algorithm, (p.286, Snippet 19.2).

    See page 284 of Advances in Financial Machine Learning by Marcos Lopez de
    Prado for additional information.

    :param window: The window size
    :type window: int
    :param high_column_name: The name of the high column
    :type high_column_name: str, optional
    :param low_column_name: The name of the low column
    :type low_column_name: str, optional
    """

    def __init__(
        self,
        window: int = 20,
        high_column_name: str = static.HIGH,
        low_column_name: str = static.LOW,
    ):
        self.window = window
        self.high_column_name = high_column_name
        self.low_column_name = low_column_name

[docs]    def fit(self, X, y=None, **fit_params):
        return self

[docs]    def transform(self, X: pd.DataFrame) -> np.ndarray:

        check_missing_columns(X, [self.high_column_name, self.low_column_name])

        beta = _get_beta(X[self.high_column_name], X[self.low_column_name], self.window)
        gamma = _get_gamma(X[self.high_column_name], X[self.low_column_name])

        k2 = (8 / np.pi) ** 0.5
        den = 3 - 2 * 2 ** 0.5
        sigma = (2 ** -0.5 - 1) * beta ** 0.5 / (k2 * den)
        sigma += (gamma / (k2 ** 2 * den)) ** 0.5
        sigma[sigma < 0] = 0
        return sigma.values.reshape(-1, 1)
Source code for mizarlabs.transformers.microstructural_features.first_generation

MizarLabs

Navigation

Related Topics