# Copyright 2021 Sean Robertson
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Scaling functions
Scaling functions transform a scalar in the frequency domain to some other real domain
(the "scale" domain). The scaling functions should be invertible. Their primary purpose
is to define the bandwidths of filters in :mod:`pydrobert.speech.filters`.
"""
import abc
import numpy as np
from pydrobert.speech.alias import AliasedFactory
__all__ = [
"BarkScaling",
"LinearScaling",
"MelScaling",
"OctaveScaling",
"ScalingFunction",
]
[docs]
class ScalingFunction(AliasedFactory):
"""Converts a frequency to some scale and back again"""
[docs]
@abc.abstractmethod
def scale_to_hertz(self, scale: float) -> float:
"""Convert scale to frequency (in Hertz)"""
pass
[docs]
@abc.abstractmethod
def hertz_to_scale(self, hertz: float) -> float:
"""Convert frequency (in Hertz) to scalar"""
pass
[docs]
class LinearScaling(ScalingFunction):
"""Linear scaling between high and low scales/frequencies
Parameters
----------
low_hz
The frequency (in Hertz) corresponding to scale 0.
slope_hz
The increase in scale corresponding to a 1 Hertz increase in frequency.
"""
low_hz: float #:
slop_hz: float #:
aliases = {"linear", "uniform"} #:
def __init__(self, low_hz: float, slope_hz: float = 1.0):
self.low_hz = low_hz
self.slope_hz = slope_hz
def scale_to_hertz(self, scale: float) -> float:
return scale / self.slope_hz + self.low_hz
def hertz_to_scale(self, hertz: float) -> float:
return (hertz - self.low_hz) * self.slope_hz
[docs]
class OctaveScaling(ScalingFunction):
"""Uniform scaling in log2 domain from low frequency
Parameters
----------
low_hz
The positive frequency (in Hertz) corresponding to scale 0. Frequencies below
this value should never be queried.
"""
low_hz: float #:
aliases = {"octave"} #:
def __init__(self, low_hz: float):
if low_hz <= 0:
raise ValueError("low_hz must be positive")
self.low_hz = low_hz
def scale_to_hertz(self, scale: float) -> float:
return (2 ** scale) * max(1e-10, self.low_hz)
def hertz_to_scale(self, hertz: float) -> float:
return np.log2(hertz / max(1e-10, self.low_hz))
[docs]
class MelScaling(ScalingFunction):
r"""Psychoacoustic scaling function
Based of the experiment in [stevens1937]_ wherein participants adjusted a second
tone until it was half the pitch of the first. The functional approximation to the
scale is implemented with the formula from [oshaughnessy1987]_ (being honest, from
`Wikipedia <https://en.wikipedia.org/wiki/Mel_scale>`__):
.. math::
s = 1127 \ln \left(1 + \frac{f}{700} \right)
Where :math:`s` is the scale and :math:`f` is the frequency in Hertz.
"""
aliases = {"mel"} #:
def scale_to_hertz(self, scale: float) -> float:
return 700.0 * (np.exp(scale / 1127.0) - 1.0)
def hertz_to_scale(self, hertz: float) -> float:
return 1127.0 * np.log(1 + hertz / 700.0)
[docs]
class BarkScaling(ScalingFunction):
r"""Psychoacoustic scaling function
Based on a collection experiments briefly mentioned in [zwicker1961]_ involving
masking to determine critical bands. The functional approximation to the scale is
implemented with the formula from [traunmuller1990]_ (being honest, from `Wikipedia
<https://en.wikipedia.org/wiki/Bark_scale>`__):
.. math::
s = \begin{cases}
z + 0.15(2 - z) & \mbox{if }z < 2 \\
z + 0.22(z - 20.1) & \mbox{if }z > 20.1
\end{cases}
where
.. math::
z = 26.81f/(1960 + f) - 0.53
Where :math:`s` is the scale and :math:`f` is the frequency in Hertz.
"""
aliases = {"bark"} #:
def scale_to_hertz(self, scale: float) -> float:
bark = None
if scale < 2:
bark = (20.0 * scale - 6.0) / 17.0
elif scale > 20.1:
bark = (50.0 * scale + 221.1) / 61.0
else:
bark = scale
return 1960.0 * (bark + 0.53) / (26.28 - bark)
def hertz_to_scale(self, hertz: float) -> float:
bark = 26.81 * hertz / (1960.0 + hertz) - 0.53
if bark < 2:
return bark + 0.15 * (2.0 - bark)
elif bark > 20.1:
return bark + 0.22 * (bark - 20.1)
else:
return bark