# Copyright 2021 Sean Robertson
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes for pre-processing speech signals"""
import abc
from typing import Optional
import warnings
import numpy as np
from pydrobert.speech.alias import AliasedFactory
__all__ = [
"PreProcessor",
"Dither",
"Preemphasize",
]
_AXIS_DEP_MSG = (
"Specifying axis in preprocessor.apply is deprecated. "
"Preprocessors should be applied to 1D signals only."
)
[docs]
class PreProcessor(AliasedFactory):
"""A container for pre-processing signals with a transform"""
[docs]
@abc.abstractmethod
def apply(
self, signal: np.ndarray, axis: Optional[int] = None, in_place=False
) -> np.ndarray:
"""Applies the transformation to a signal tensor
Consult the class documentation for more details on what the transformation is.
Parameters
----------
signal
axis
Deprecated. The axis to apply the transform to.
in_place
Whether it is okay to modify `signal` (:obj:`True`) or whether a copy should
be made (:obj:`False`)
Returns
-------
out : np.ndarray
The transformed features
"""
...
[docs]
class Dither(PreProcessor):
"""Add random noise to a signal tensor
The default axis of `apply` has been set to None, which will generate random noise
for each coefficient. This is likely the desired behaviour. Setting axis to an
integer will add random values along 1D slices of that axis.
Intermediate values are calculated as 64-bit floats. The result is cast back to the
input data type.
Parameters
----------
coeff
Standard deviation of dither
"""
coeff: float #:
aliases = {"dither", "dithering"} #:
def __init__(self, coeff: float = 1.0):
super().__init__()
self.coeff = coeff
def apply(
self, signal: np.ndarray, axis: Optional[int] = None, in_place: bool = False
) -> np.ndarray:
if axis is not None:
warnings.warn(_AXIS_DEP_MSG, DeprecationWarning)
signal_dtype = signal.dtype
if not in_place or signal.dtype != np.float64:
signal = signal.astype(np.float64)
if axis is None or not signal.shape or len(signal.shape) == 1:
signal += np.random.normal(0, self.coeff, signal.shape)
else:
random_shape = [1] * len(signal.shape)
random_shape[axis] = signal.shape[axis]
signal += np.random.normal(0, self.coeff, random_shape)
return signal.astype(signal_dtype, copy=False)
[docs]
class Preemphasize(PreProcessor):
"""Attenuate the low frequencies of a signal by taking sample differences
The following transformation is applied along the target axis
::
new[i] = old[i] - coeff * old[i-1] for i > 1
new[0] = old[0]
This is essentially a convolution with a Haar wavelet for positive `coeff`. It
emphasizes high frequencies.
Intermediate values are calculated as 64-bit floats. The result is cast back to the
input data type.
Parameters
----------
coeff
Preemphasis coefficient
"""
coeff: float #:
aliases = {"preemphasize", "preemphasis", "preemph"} #:
def __init__(self, coeff: float = 0.97):
super().__init__()
self.coeff = coeff
def apply(
self, signal: np.ndarray, axis: Optional[int] = None, in_place: bool = False
) -> np.ndarray:
if axis is not None:
warnings.warn(_AXIS_DEP_MSG, DeprecationWarning)
signal_dtype = signal.dtype
if not in_place or signal_dtype != np.float64:
signal = signal.astype(np.float64)
if axis not in {-1, None}:
signal = np.moveaxis(signal, axis, -1)
signal[..., 1:] -= self.coeff * signal[..., :-1]
if axis not in {-1, None}:
signal = np.moveaxis(signal, -1, axis)
return signal.astype(signal_dtype, copy=False)