Source code for tsaug._augmenter.drift

from typing import Callable, List, Optional, Tuple, Union

import numpy as np
from scipy.interpolate import CubicSpline

from .base import _Augmenter, _default_seed

[docs]class Drift(_Augmenter): """ Drift the value of time series. The augmenter drifts the value of time series from its original values randomly and smoothly. The extent of drifting is controlled by the maximal drift and the number of drift points. Parameters ---------- max_drift : float or tuple, optional The maximal amount of drift added to a time series. - If float, all series (all channels if `per_channel` is True) are drifted with the same maximum. - If tuple, the maximal drift added to a time series (a channel if `per_channel` is True) is sampled from this interval randomly. Default: 0.5. n_drift_points : int or list, optional The number of time points a new drifting trend is defined in a series. - If int, all series (all channels if `per_channel` is True) have the same number of drift points. - If list, the number of drift points defined in a series (a channel if `per_channel` is True) is sampled from this list randomly. kind : str, optional How the noise is added to the original time series. It must be either 'additive' or 'multiplicative'. Default: 'additive'. per_channel : bool, optional Whether to sample independent drifting trends for each channel in a time series or to use the same drifting trends for all channels in a time series. Default: True. normalize : bool, optional Whether the drifting trend is added to the normalized time series. If True, each channel of a time series is normalized to [0, 1] first. Default: True. repeats : int, optional The number of times a series is augmented. If greater than one, a series will be augmented so many times independently. This parameter can also be set by operator `*`. Default: 1. prob : float, optional The probability of a series is augmented. It must be in (0.0, 1.0]. This parameter can also be set by operator `@`. Default: 1.0. seed : int, optional The random seed. Default: None. """ def __init__( self, max_drift: Union[float, Tuple[float, float]] = 0.5, n_drift_points: Union[int, List[int]] = 3, kind: str = "additive", per_channel: bool = True, normalize: bool = True, repeats: int = 1, prob: float = 1.0, seed: Optional[int] = _default_seed, ): self.max_drift = max_drift self.n_drift_points = n_drift_points self.kind = kind self.per_channel = per_channel self.normalize = normalize super().__init__(repeats=repeats, prob=prob, seed=seed) @classmethod def _get_param_name(cls) -> Tuple[str, ...]: return ( "max_drift", "n_drift_points", "kind", "per_channel", "normalize", ) @property def max_drift(self) -> Union[float, Tuple[float, float]]: return self._max_drift @max_drift.setter def max_drift(self, v: Union[float, Tuple[float, float]]) -> None: MAX_DRIFT_ERROR_MSG = ( "Parameter `max_drift` must be a non-negative number " "or a 2-tuple of non-negative numbers representing an interval. " ) if not isinstance(v, (float, int)): if isinstance(v, tuple): if len(v) != 2: raise ValueError(MAX_DRIFT_ERROR_MSG) if (not isinstance(v[0], (float, int))) or ( not isinstance(v[1], (float, int)) ): raise TypeError(MAX_DRIFT_ERROR_MSG) if v[0] > v[1]: raise ValueError(MAX_DRIFT_ERROR_MSG) if (v[0] < 0.0) or (v[1] < 0.0): raise ValueError(MAX_DRIFT_ERROR_MSG) else: raise TypeError(MAX_DRIFT_ERROR_MSG) elif v < 0.0: raise ValueError(MAX_DRIFT_ERROR_MSG) self._max_drift = v @property def n_drift_points(self) -> Union[int, List[int]]: return self._n_drift_points @n_drift_points.setter def n_drift_points(self, n: Union[int, List[int]]) -> None: N_DRIFT_POINTS_ERROR_MSG = ( "Parameter `n_drift_points` must be a positive integer " "or a list of positive integers." ) if not isinstance(n, int): if isinstance(n, list): if len(n) == 0: raise ValueError(N_DRIFT_POINTS_ERROR_MSG) if not all([isinstance(nn, int) for nn in n]): raise TypeError(N_DRIFT_POINTS_ERROR_MSG) if not all([nn > 0 for nn in n]): raise ValueError(N_DRIFT_POINTS_ERROR_MSG) else: raise TypeError(N_DRIFT_POINTS_ERROR_MSG) elif n <= 0: raise ValueError(N_DRIFT_POINTS_ERROR_MSG) self._n_drift_points = n @property def per_channel(self) -> bool: return self._per_channel @per_channel.setter def per_channel(self, p: bool) -> None: if not isinstance(p, bool): raise TypeError("Paremeter `per_channel` must be boolean.") self._per_channel = p @property def normalize(self) -> bool: return self._normalize @normalize.setter def normalize(self, p: bool) -> None: if not isinstance(p, bool): raise TypeError("Paremeter `normalize` must be boolean.") self._normalize = p @property def kind(self) -> str: return self._kind @kind.setter def kind(self, k: str) -> None: if not isinstance(k, str): raise TypeError( "Parameter `kind` must be either 'additive' or 'multiplicative'." ) if k not in ("additive", "multiplicative"): raise ValueError( "Parameter `kind` must be either 'additive' or 'multiplicative'." ) self._kind = k def _augment_core( self, X: np.ndarray, Y: Optional[np.ndarray] ) -> Tuple[np.ndarray, Optional[np.ndarray]]: N, T, C = X.shape rand = np.random.RandomState(self.seed) if isinstance(self.n_drift_points, int): n_drift_points = set([self.n_drift_points]) else: n_drift_points = set(self.n_drift_points) ind = rand.choice( len(n_drift_points), N * (C if self.per_channel else 1) ) # map series to n_drift_points drift = np.zeros((N * (C if self.per_channel else 1), T)) for i, n in enumerate(n_drift_points): if not (ind == i).any(): continue anchors = np.cumsum( rand.normal(size=((ind == i).sum(), n + 2)), axis=1 ) # type: np.ndarray interpFuncs = CubicSpline( np.linspace(0, T, n + 2), anchors, axis=1 ) # type: Callable drift[ind == i, :] = interpFuncs(np.arange(T)) drift = drift.reshape((N, -1, T)).swapaxes(1, 2) drift = drift - drift[:, 0, :].reshape(N, 1, -1) drift = drift / abs(drift).max(axis=1, keepdims=True) if isinstance(self.max_drift, (float, int)): drift = drift * self.max_drift else: drift = drift * rand.uniform( low=self.max_drift[0], high=self.max_drift[1], size=(N, 1, C if self.per_channel else 1), ) if self.kind == "additive": if self.normalize: X_aug = X + drift * ( X.max(axis=1, keepdims=True) - X.min(axis=1, keepdims=True) ) else: X_aug = X + drift else: X_aug = X * (1 + drift) if Y is not None: Y_aug = Y.copy() else: Y_aug = None return X_aug, Y_aug