Skip to content

Commit

Permalink
Correct approach to keeping noise_gen at min ampl
Browse files Browse the repository at this point in the history
  • Loading branch information
kcoul committed Feb 10, 2022
1 parent 199d4aa commit 85eb774
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 168 deletions.
28 changes: 2 additions & 26 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,8 @@ set(audio_processing__aec3
"audio_processing/aec3/erl_estimator.h"
"audio_processing/aec3/erle_estimator.cc"
"audio_processing/aec3/erle_estimator.h"
"audio_processing/aec3/false_comfort_noise_generator.cc"
"audio_processing/aec3/false_comfort_noise_generator.h"
"audio_processing/aec3/fft_buffer.cc"
"audio_processing/aec3/fft_buffer.h"
"audio_processing/aec3/fft_data.h"
Expand Down Expand Up @@ -1134,32 +1136,6 @@ if("${CMAKE_VS_PLATFORM_NAME}" STREQUAL "x86")
)
endif()



























set(PROJECT_NAME demo)

################################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,152 +29,165 @@

namespace webrtc {

namespace {
namespace {

// Table of sqrt(2) * sin(2*pi*i/32).
constexpr float kSqrt2Sin[32] = {
+0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
+1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
+1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
+0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
-1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
-1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
-0.5411961f, -0.2758994f};

void GenerateComfortNoise(Aec3Optimization optimization,
const std::array<float, kFftLengthBy2Plus1>& N2,
uint32_t* seed,
FftData* lower_band_noise,
FftData* upper_band_noise) {
FftData* N_low = lower_band_noise;
FftData* N_high = upper_band_noise;

// Compute square root spectrum.
std::array<float, kFftLengthBy2Plus1> N;
std::copy(N2.begin(), N2.end(), N.begin());
aec3::VectorMath(optimization).Sqrt(N);

// Compute the noise level for the upper bands.
constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
const float high_band_noise_level =
std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
kOneByNumBands;

// The analysis and synthesis windowing cause loss of power when
// cross-fading the noise where frames are completely uncorrelated
// (generated with random phase), hence the factor sqrt(2).
// This is not the case for the speech signal where the input is overlapping
// (strong correlation).
N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
N_high->re[kFftLengthBy2] = 0.f;
for (size_t k = 1; k < kFftLengthBy2; k++) {
constexpr int kIndexMask = 32 - 1;
// Generate a random 31-bit integer.
seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
// Convert to a 5-bit index.
int i = seed[0] >> 26;

// y = sqrt(2) * sin(a)
const float x = kSqrt2Sin[i];
// x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
const float y = kSqrt2Sin[(i + 8) & kIndexMask];

// Form low-frequency noise via spectral shaping.
N_low->re[k] = N[k] * x;
N_low->im[k] = N[k] * y;

// Form the high-frequency noise via simple levelling.
N_high->re[k] = high_band_noise_level * x;
N_high->im[k] = high_band_noise_level * y;
}
}

} // namespace

FalseComfortNoiseGenerator::FalseComfortNoiseGenerator(Aec3Optimization optimization,
size_t num_capture_channels)
: optimization_(optimization),
seed_(42),
num_capture_channels_(num_capture_channels),
N2_initial_(
std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
num_capture_channels_)),
Y2_smoothed_(num_capture_channels_),
N2_(num_capture_channels_) {
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
(*N2_initial_)[ch].fill(0.f);
Y2_smoothed_[ch].fill(0.f);
N2_[ch].fill(1.0e6f);
}
}

FalseComfortNoiseGenerator::~FalseComfortNoiseGenerator() = default;

void FalseComfortNoiseGenerator::Compute(
bool saturated_capture,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
capture_spectrum,
rtc::ArrayView<FftData> lower_band_noise,
rtc::ArrayView<FftData> upper_band_noise) {
const auto& Y2 = capture_spectrum;

if (!saturated_capture) {
// Smooth Y2.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
Y2[ch].begin(), Y2_smoothed_[ch].begin(),
[](float a, float b) { return a + 0.1f * (b - a); });
}
constexpr float kSqrt2Sin[32] = {
+0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
+1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
+1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
+0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
-1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
-1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
-0.5411961f, -0.2758994f};

void GenerateComfortNoise(Aec3Optimization optimization,
const std::array<float, kFftLengthBy2Plus1>& N2,
uint32_t* seed,
FftData* lower_band_noise,
FftData* upper_band_noise) {
FftData* N_low = lower_band_noise;
FftData* N_high = upper_band_noise;

// Compute square root spectrum.
std::array<float, kFftLengthBy2Plus1> N;
std::copy(N2.begin(), N2.end(), N.begin());
aec3::VectorMath(optimization).Sqrt(N);

// Compute the noise level for the upper bands.
constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
const float high_band_noise_level =
std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
kOneByNumBands;

// The analysis and synthesis windowing cause loss of power when
// cross-fading the noise where frames are completely uncorrelated
// (generated with random phase), hence the factor sqrt(2).
// This is not the case for the speech signal where the input is overlapping
// (strong correlation).
N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
N_high->re[kFftLengthBy2] = 0.f;
for (size_t k = 1; k < kFftLengthBy2; k++) {
constexpr int kIndexMask = 32 - 1;
// Generate a random 31-bit integer.
seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
// Convert to a 5-bit index.
int i = seed[0] >> 26;

// y = sqrt(2) * sin(a)
const float x = kSqrt2Sin[i];
// x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
const float y = kSqrt2Sin[(i + 8) & kIndexMask];

// Form low-frequency noise via spectral shaping.
N_low->re[k] = N[k] * x;
N_low->im[k] = N[k] * y;

// Form the high-frequency noise via simple levelling.
N_high->re[k] = high_band_noise_level * x;
N_high->im[k] = high_band_noise_level * y;
}
}

if (N2_counter_ > 50) {
// Update N2 from Y2_smoothed.
} // namespace

FalseComfortNoiseGenerator::FalseComfortNoiseGenerator(Aec3Optimization optimization,
size_t num_capture_channels)
: optimization_(optimization),
seed_(42),
num_capture_channels_(num_capture_channels),
N2_initial_(
std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
num_capture_channels_)),
Y2_smoothed_(num_capture_channels_),
N2_(num_capture_channels_) {
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
N2_[ch].begin(), [](float a, float b) {
return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
: a * 1.0002f;
});
(*N2_initial_)[ch].fill(0.f);
Y2_smoothed_[ch].fill(0.f);
N2_[ch].fill(1.0e6f);
}
}

if (N2_initial_) {
if (++N2_counter_ == 1000) {
N2_initial_.reset();
} else {
// Compute the N2_initial from N2.
FalseComfortNoiseGenerator::~FalseComfortNoiseGenerator() = default;

void FalseComfortNoiseGenerator::Compute(
bool saturated_capture,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
capture_spectrum,
rtc::ArrayView<FftData> lower_band_noise,
rtc::ArrayView<FftData> upper_band_noise) {
const auto& Y2 = capture_spectrum;

if (!saturated_capture) {
// Smooth Y2.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(N2_[ch].begin(), N2_[ch].end(),
(*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(),
[](float a, float b) {
return a > b ? b + 0.001f * (a - b) : a;
});
std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
Y2[ch].begin(), Y2_smoothed_[ch].begin(),
[](float a, float b) { return a + 0.1f * (b - a); });
}
}
}

// Limit the noise to a floor matching a WGN input of -96 dBFS.
constexpr float kNoiseFloor = 17.1267f;
if (N2_counter_ > 50) {
// Update N2 from Y2_smoothed.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
N2_[ch].begin(), [](float a, float b) {
return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
: a * 1.0002f;
});
}
}

for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
for (auto& n : N2_[ch]) {
n = std::max(n, kNoiseFloor);
}
if (N2_initial_) {
for (auto& n : (*N2_initial_)[ch]) {
n = std::max(n, kNoiseFloor);
if (N2_initial_) {
if (++N2_counter_ == 1000) {
N2_initial_.reset();
} else {
// Compute the N2_initial from N2.
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
std::transform(N2_[ch].begin(), N2_[ch].end(),
(*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(),
[](float a, float b) {
return a > b ? b + 0.001f * (a - b) : a;
});
}
}
}

// Limit the noise to a floor matching a WGN input of -96 dBFS.
constexpr float kNoiseFloor = 17.1267f;

for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
for (auto& n : N2_[ch]) {
n = kNoiseFloor;
}
if (N2_initial_) {
for (auto& n : (*N2_initial_)[ch]) {
n = kNoiseFloor;
}
}
}
//Original problematic code kept here for posterity
/*
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
for (auto& n : N2_[ch]) {
n = std::max(n, kNoiseFloor);
}
if (N2_initial_) {
for (auto& n : (*N2_initial_)[ch]) {
n = std::max(n, kNoiseFloor);
}
}
}
*/
}
}
}

// Choose N2 estimate to use.
const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;
// Choose N2 estimate to use.
const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;

for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
//GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
// &upper_band_noise[ch]);
}
}
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
&upper_band_noise[ch]);
}
}

} // namespace webrtc
Loading

0 comments on commit 85eb774

Please sign in to comment.