Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge Upstream #2

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
14 changes: 13 additions & 1 deletion README
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
RNNoise is a noise suppression library based on a recurrent neural network.
A description of the algorithm is provided in the following paper:

J.-M. Valin, A Hybrid DSP/Deep Learning Approach to Real-Time Full-Band Speech
Enhancement, Proceedings of IEEE Multimedia Signal Processing (MMSP) Workshop,
arXiv:1709.08243, 2018.
https://arxiv.org/pdf/1709.08243.pdf

An interactive demo is available at: https://jmvalin.ca/demo/rnnoise/

To compile, just type:
% ./autogen.sh
Expand All @@ -12,6 +20,10 @@ While it is meant to be used as a library, a simple command-line tool is
provided as an example. It operates on RAW 16-bit (machine endian) mono
PCM files sampled at 48 kHz. It can be used as:

./examples/rnnoise_demo <number of channels> <maximum attenuation> < input.raw > output.raw
./examples/rnnoise_demo <noisy speech> <output denoised>

The output is also a 16-bit raw PCM file.

The latest version of the source is available from
https://gitlab.xiph.org/xiph/rnnoise . The github repository
is a convenience copy.
49 changes: 49 additions & 0 deletions include/rnnoise.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@

#include <stdio.h>

#ifdef __cplusplus
extern "C" {
#endif

#ifndef RNNOISE_EXPORT
# if defined(WIN32)
Expand All @@ -48,18 +51,64 @@
typedef struct DenoiseState DenoiseState;
typedef struct RNNModel RNNModel;

/**
* Return the size of DenoiseState
*/
RNNOISE_EXPORT int rnnoise_get_size();

/**
* Return the number of samples processed by rnnoise_process_frame at a time
*/
RNNOISE_EXPORT int rnnoise_get_frame_size();

/**
* Initializes a pre-allocated DenoiseState
*
* If model is NULL the default model is used.
*
* See: rnnoise_create() and rnnoise_model_from_file()
*/
RNNOISE_EXPORT int rnnoise_init(DenoiseState *st, RNNModel *model);

/**
* Allocate and initialize a DenoiseState
*
* If model is NULL the default model is used.
*
* The returned pointer MUST be freed with rnnoise_destroy().
*/
RNNOISE_EXPORT DenoiseState *rnnoise_create(RNNModel *model);

/**
* Free a DenoiseState produced by rnnoise_create.
*
* The optional custom model must be freed by rnnoise_model_free() after.
*/
RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st);

/**
* Denoise a frame of samples
*
* in and out must be at least rnnoise_get_frame_size() large.
*/
RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in);

/**
* Load a model from a file
*
* It must be deallocated with rnnoise_model_free()
*/
RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f);

/**
* Free a custom model
*
* It must be called after all the DenoiseStates referring to it are freed.
*/
RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model);

#ifdef __cplusplus
}
#endif

#endif
113 changes: 3 additions & 110 deletions src/celt_lpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include "common.h"
#include "pitch.h"

void _celt_lpc(
void rnn_lpc(
opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */
const opus_val32 *ac, /* in: [0...p] autocorrelation values */
int p
Expand Down Expand Up @@ -88,114 +88,7 @@ int p
}


void celt_fir(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord)
{
int i,j;
opus_val16 rnum[ord];
for(i=0;i<ord;i++)
rnum[i] = num[ord-i-1];
for (i=0;i<N-3;i+=4)
{
opus_val32 sum[4];
sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT);
sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
xcorr_kernel(rnum, x+i-ord, sum, ord);
y[i ] = ROUND16(sum[0], SIG_SHIFT);
y[i+1] = ROUND16(sum[1], SIG_SHIFT);
y[i+2] = ROUND16(sum[2], SIG_SHIFT);
y[i+3] = ROUND16(sum[3], SIG_SHIFT);
}
for (;i<N;i++)
{
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
y[i] = ROUND16(sum, SIG_SHIFT);
}
}

void celt_iir(const opus_val32 *_x,
const opus_val16 *den,
opus_val32 *_y,
int N,
int ord,
opus_val16 *mem)
{
#ifdef SMALL_FOOTPRINT
int i,j;
for (i=0;i<N;i++)
{
opus_val32 sum = _x[i];
for (j=0;j<ord;j++)
{
sum -= MULT16_16(den[j],mem[j]);
}
for (j=ord-1;j>=1;j--)
{
mem[j]=mem[j-1];
}
mem[0] = SROUND16(sum, SIG_SHIFT);
_y[i] = sum;
}
#else
int i,j;
celt_assert((ord&3)==0);
opus_val16 rden[ord];
opus_val16 y[N+ord];
for(i=0;i<ord;i++)
rden[i] = den[ord-i-1];
for(i=0;i<ord;i++)
y[i] = -mem[ord-i-1];
for(;i<N+ord;i++)
y[i]=0;
for (i=0;i<N-3;i+=4)
{
/* Unroll by 4 as if it were an FIR filter */
opus_val32 sum[4];
sum[0]=_x[i];
sum[1]=_x[i+1];
sum[2]=_x[i+2];
sum[3]=_x[i+3];
xcorr_kernel(rden, y+i, sum, ord);

/* Patch up the result to compensate for the fact that this is an IIR */
y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
_y[i ] = sum[0];
sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
_y[i+1] = sum[1];
sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
_y[i+2] = sum[2];

sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
_y[i+3] = sum[3];
}
for (;i<N;i++)
{
opus_val32 sum = _x[i];
for (j=0;j<ord;j++)
sum -= MULT16_16(rden[j],y[i+j]);
y[i+ord] = SROUND16(sum,SIG_SHIFT);
_y[i] = sum;
}
for(i=0;i<ord;i++)
mem[i] = _y[N-i-1];
#endif
}

int _celt_autocorr(
int rnn_autocorr(
const opus_val16 *x, /* in: [0...n-1] samples x */
opus_val32 *ac, /* out: [0...lag-1] ac values */
const opus_val16 *window,
Expand Down Expand Up @@ -247,7 +140,7 @@ int _celt_autocorr(
shift = 0;
}
#endif
celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
rnn_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
for (k=0;k<=lag;k++)
{
for (i = k+fastN, d = 0; i < n; i++)
Expand Down
20 changes: 3 additions & 17 deletions src/celt_lpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,9 @@

#define LPC_ORDER 24

void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);

void celt_fir(
const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
int ord);

void celt_iir(const opus_val32 *x,
const opus_val16 *den,
opus_val32 *y,
int N,
int ord,
opus_val16 *mem);

int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
void rnn_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);

int rnn_autocorr(const opus_val16 *x, opus_val32 *ac,
const opus_val16 *window, int overlap, int lag, int n);

#endif /* PLC_H */
24 changes: 14 additions & 10 deletions src/denoise.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ struct DenoiseState {
RNNState rnn;
};

void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
static void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
int i;
float sum[NB_BANDS] = {0};
for (i=0;i<NB_BANDS-1;i++)
Expand All @@ -122,7 +122,7 @@ void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
}
}

void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
static void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
int i;
float sum[NB_BANDS] = {0};
for (i=0;i<NB_BANDS-1;i++)
Expand All @@ -147,7 +147,7 @@ void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *
}
}

void interp_band_gain(float *g, const float *bandE) {
static void interp_band_gain(float *g, const float *bandE) {
int i;
memset(g, 0, FREQ_SIZE);
for (i=0;i<NB_BANDS-1;i++)
Expand All @@ -168,7 +168,7 @@ CommonState common;
static void check_init() {
int i;
if (common.init) return;
common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
common.kfft = rnn_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
for (i=0;i<FRAME_SIZE;i++)
common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
for (i=0;i<NB_BANDS;i++) {
Expand Down Expand Up @@ -218,7 +218,7 @@ static void forward_transform(kiss_fft_cpx *out, const float *in) {
x[i].r = in[i];
x[i].i = 0;
}
opus_fft(common.kfft, x, y, 0);
rnn_fft(common.kfft, x, y, 0);
for (i=0;i<FREQ_SIZE;i++) {
out[i] = y[i];
}
Expand All @@ -236,7 +236,7 @@ static void inverse_transform(float *out, const kiss_fft_cpx *in) {
x[i].r = x[WINDOW_SIZE - i].r;
x[i].i = -x[WINDOW_SIZE - i].i;
}
opus_fft(common.kfft, x, y, 0);
rnn_fft(common.kfft, x, y, 0);
/* output in reverse order for IFFT. */
out[0] = WINDOW_SIZE*y[0].r;
for (i=1;i<WINDOW_SIZE;i++) {
Expand All @@ -257,6 +257,10 @@ int rnnoise_get_size() {
return sizeof(DenoiseState);
}

int rnnoise_get_frame_size() {
return FRAME_SIZE;
}

int rnnoise_init(DenoiseState *st, RNNModel *model) {
memset(st, 0, sizeof(*st));
if (model)
Expand Down Expand Up @@ -321,12 +325,12 @@ static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cp
RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
pre[0] = &st->pitch_buf[0];
pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
rnn_pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
rnn_pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
pitch_index = PITCH_MAX_PERIOD-pitch_index;

gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
gain = rnn_remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
st->last_period = pitch_index;
st->last_gain = gain;
Expand Down Expand Up @@ -415,7 +419,7 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const
}
}

void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
static void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
const float *Exp, const float *g) {
int i;
float r[NB_BANDS];
Expand Down
Loading