Source code for pydrobert.speech.pre

# Copyright 2021 Sean Robertson

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Classes for pre-processing speech signals"""


import abc
from typing import Optional
import warnings

import numpy as np

from pydrobert.speech.alias import AliasedFactory

__all__ = [
    "PreProcessor",
    "Dither",
    "Preemphasize",
]


_AXIS_DEP_MSG = (
    "Specifying axis in preprocessor.apply is deprecated. "
    "Preprocessors should be applied to 1D signals only."
)


[docs] class PreProcessor(AliasedFactory): """A container for pre-processing signals with a transform"""
[docs] @abc.abstractmethod def apply( self, signal: np.ndarray, axis: Optional[int] = None, in_place=False ) -> np.ndarray: """Applies the transformation to a signal tensor Consult the class documentation for more details on what the transformation is. Parameters ---------- signal axis Deprecated. The axis to apply the transform to. in_place Whether it is okay to modify `signal` (:obj:`True`) or whether a copy should be made (:obj:`False`) Returns ------- out : np.ndarray The transformed features """ ...
[docs] class Dither(PreProcessor): """Add random noise to a signal tensor The default axis of `apply` has been set to None, which will generate random noise for each coefficient. This is likely the desired behaviour. Setting axis to an integer will add random values along 1D slices of that axis. Intermediate values are calculated as 64-bit floats. The result is cast back to the input data type. Parameters ---------- coeff Standard deviation of dither """ coeff: float #: aliases = {"dither", "dithering"} #: def __init__(self, coeff: float = 1.0): super().__init__() self.coeff = coeff def apply( self, signal: np.ndarray, axis: Optional[int] = None, in_place: bool = False ) -> np.ndarray: if axis is not None: warnings.warn(_AXIS_DEP_MSG, DeprecationWarning) signal_dtype = signal.dtype if not in_place or signal.dtype != np.float64: signal = signal.astype(np.float64) if axis is None or not signal.shape or len(signal.shape) == 1: signal += np.random.normal(0, self.coeff, signal.shape) else: random_shape = [1] * len(signal.shape) random_shape[axis] = signal.shape[axis] signal += np.random.normal(0, self.coeff, random_shape) return signal.astype(signal_dtype, copy=False)
[docs] class Preemphasize(PreProcessor): """Attenuate the low frequencies of a signal by taking sample differences The following transformation is applied along the target axis :: new[i] = old[i] - coeff * old[i-1] for i > 1 new[0] = old[0] This is essentially a convolution with a Haar wavelet for positive `coeff`. It emphasizes high frequencies. Intermediate values are calculated as 64-bit floats. The result is cast back to the input data type. Parameters ---------- coeff Preemphasis coefficient """ coeff: float #: aliases = {"preemphasize", "preemphasis", "preemph"} #: def __init__(self, coeff: float = 0.97): super().__init__() self.coeff = coeff def apply( self, signal: np.ndarray, axis: Optional[int] = None, in_place: bool = False ) -> np.ndarray: if axis is not None: warnings.warn(_AXIS_DEP_MSG, DeprecationWarning) signal_dtype = signal.dtype if not in_place or signal_dtype != np.float64: signal = signal.astype(np.float64) if axis not in {-1, None}: signal = np.moveaxis(signal, axis, -1) signal[..., 1:] -= self.coeff * signal[..., :-1] if axis not in {-1, None}: signal = np.moveaxis(signal, -1, axis) return signal.astype(signal_dtype, copy=False)