Source code for mizarlabs.transformers.sampling.down_sampling

import pandas as pd
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin


[docs]class CUSUMFilter(BaseEstimator, TransformerMixin): """ Downsamples a timeseries to filter out non significant value changing samples. :param threshold: Sets the threshold to trigger a sample, setting the threshold higher will result in less samples being selected. :type threshold: float """ def __init__(self, threshold: float): self._threshold = threshold
[docs] def transform(self, X: pd.Series) -> pd.DatetimeIndex: """ Returns a pandas DatetimeIndex indicating which samples have been selected by the CUSUM filter. :param X: Time series with time indices. :type X: pd.Series :return: DatetimeIndex indicating which samples have been selected. :rtype: pd.DatetimeIndex """ filtered_indices = [] pos_threshold = 0 neg_threshold = 0 diff = X.diff() for index in diff.index[1:]: pos_threshold, neg_threshold = ( max(0, pos_threshold + diff.loc[index]), min(0, neg_threshold + diff.loc[index]), ) if neg_threshold < -self._threshold: neg_threshold = 0 filtered_indices.append(index) elif pos_threshold > self._threshold: pos_threshold = 0 filtered_indices.append(index) return pd.DatetimeIndex(filtered_indices)