diff --git a/README b/README index 88fc79cd..957b3fff 100644 --- a/README +++ b/README @@ -1,4 +1,12 @@ RNNoise is a noise suppression library based on a recurrent neural network. +A description of the algorithm is provided in the following paper: + +J.-M. Valin, A Hybrid DSP/Deep Learning Approach to Real-Time Full-Band Speech +Enhancement, Proceedings of IEEE Multimedia Signal Processing (MMSP) Workshop, +arXiv:1709.08243, 2018. +https://arxiv.org/pdf/1709.08243.pdf + +An interactive demo is available at: https://jmvalin.ca/demo/rnnoise/ To compile, just type: % ./autogen.sh @@ -12,6 +20,10 @@ While it is meant to be used as a library, a simple command-line tool is provided as an example. It operates on RAW 16-bit (machine endian) mono PCM files sampled at 48 kHz. It can be used as: -./examples/rnnoise_demo < input.raw > output.raw +./examples/rnnoise_demo The output is also a 16-bit raw PCM file. + +The latest version of the source is available from +https://gitlab.xiph.org/xiph/rnnoise . The github repository +is a convenience copy. diff --git a/include/rnnoise.h b/include/rnnoise.h index 67f0b060..c4215d96 100644 --- a/include/rnnoise.h +++ b/include/rnnoise.h @@ -30,6 +30,9 @@ #include +#ifdef __cplusplus +extern "C" { +#endif #ifndef RNNOISE_EXPORT # if defined(WIN32) @@ -48,18 +51,64 @@ typedef struct DenoiseState DenoiseState; typedef struct RNNModel RNNModel; +/** + * Return the size of DenoiseState + */ RNNOISE_EXPORT int rnnoise_get_size(); +/** + * Return the number of samples processed by rnnoise_process_frame at a time + */ +RNNOISE_EXPORT int rnnoise_get_frame_size(); + +/** + * Initializes a pre-allocated DenoiseState + * + * If model is NULL the default model is used. + * + * See: rnnoise_create() and rnnoise_model_from_file() + */ RNNOISE_EXPORT int rnnoise_init(DenoiseState *st, RNNModel *model); +/** + * Allocate and initialize a DenoiseState + * + * If model is NULL the default model is used. + * + * The returned pointer MUST be freed with rnnoise_destroy(). + */ RNNOISE_EXPORT DenoiseState *rnnoise_create(RNNModel *model); +/** + * Free a DenoiseState produced by rnnoise_create. + * + * The optional custom model must be freed by rnnoise_model_free() after. + */ RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); +/** + * Denoise a frame of samples + * + * in and out must be at least rnnoise_get_frame_size() large. + */ RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in); +/** + * Load a model from a file + * + * It must be deallocated with rnnoise_model_free() + */ RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f); +/** + * Free a custom model + * + * It must be called after all the DenoiseStates referring to it are freed. + */ RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model); +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/celt_lpc.c b/src/celt_lpc.c index 521351e9..000924b3 100644 --- a/src/celt_lpc.c +++ b/src/celt_lpc.c @@ -34,7 +34,7 @@ #include "common.h" #include "pitch.h" -void _celt_lpc( +void rnn_lpc( opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ const opus_val32 *ac, /* in: [0...p] autocorrelation values */ int p @@ -88,114 +88,7 @@ int p } -void celt_fir( - const opus_val16 *x, - const opus_val16 *num, - opus_val16 *y, - int N, - int ord) -{ - int i,j; - opus_val16 rnum[ord]; - for(i=0;i=1;j--) - { - mem[j]=mem[j-1]; - } - mem[0] = SROUND16(sum, SIG_SHIFT); - _y[i] = sum; - } -#else - int i,j; - celt_assert((ord&3)==0); - opus_val16 rden[ord]; - opus_val16 y[N+ord]; - for(i=0;ipitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE); RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE); pre[0] = &st->pitch_buf[0]; - pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1); - pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE, + rnn_pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1); + rnn_pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE, PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index); pitch_index = PITCH_MAX_PERIOD-pitch_index; - gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD, + gain = rnn_remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD, PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain); st->last_period = pitch_index; st->last_gain = gain; @@ -415,7 +419,7 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const } } -void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep, +static void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep, const float *Exp, const float *g) { int i; float r[NB_BANDS]; diff --git a/src/kiss_fft.c b/src/kiss_fft.c index 922dacc6..02cafdb8 100644 --- a/src/kiss_fft.c +++ b/src/kiss_fft.c @@ -39,7 +39,7 @@ #define CUSTOM_MODES /* The guts header contains all the multiplication and addition macros that are defined for - complex numbers. It also delares the kf_ internal functions. + complex numbers. It also declares the kf_ internal functions. */ static void kf_bfly2( @@ -420,7 +420,7 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) #endif } -int opus_fft_alloc_arch_c(kiss_fft_state *st) { +int rnn_fft_alloc_arch_c(kiss_fft_state *st) { (void)st; return 0; } @@ -431,7 +431,7 @@ int opus_fft_alloc_arch_c(kiss_fft_state *st) { * The return value is a contiguous block of memory. As such, * It can be freed with free(). * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, +kiss_fft_state *rnn_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch) { kiss_fft_state *st=NULL; @@ -483,29 +483,29 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, compute_bitrev_table(0, bitrev, 1,1, st->factors,st); /* Initialize architecture specific fft parameters */ - if (opus_fft_alloc_arch(st, arch)) + if (rnn_fft_alloc_arch(st, arch)) goto fail; } return st; fail: - opus_fft_free(st, arch); + rnn_fft_free(st, arch); return NULL; } -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) +kiss_fft_state *rnn_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) { - return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); + return rnn_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); } -void opus_fft_free_arch_c(kiss_fft_state *st) { +void rnn_fft_free_arch_c(kiss_fft_state *st) { (void)st; } -void opus_fft_free(const kiss_fft_state *cfg, int arch) +void rnn_fft_free(const kiss_fft_state *cfg, int arch) { if (cfg) { - opus_fft_free_arch((kiss_fft_state *)cfg, arch); + rnn_fft_free_arch((kiss_fft_state *)cfg, arch); opus_free((opus_int16*)cfg->bitrev); if (cfg->shift < 0) opus_free((kiss_twiddle_cpx*)cfg->twiddles); @@ -515,7 +515,7 @@ void opus_fft_free(const kiss_fft_state *cfg, int arch) #endif /* CUSTOM_MODES */ -void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) +void rnn_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) { int m2, m; int p; @@ -563,7 +563,7 @@ void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) } } -void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void rnn_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) { int i; opus_val16 scale; @@ -582,11 +582,11 @@ void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *f fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); } - opus_fft_impl(st, fout); + rnn_fft_impl(st, fout); } -void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void rnn_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) { int i; celt_assert2 (fin != fout, "In-place FFT not supported"); @@ -595,7 +595,7 @@ void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx * fout[st->bitrev[i]] = fin[i]; for (i=0;infft;i++) fout[i].i = -fout[i].i; - opus_fft_impl(st, fout); + rnn_fft_impl(st, fout); for (i=0;infft;i++) fout[i].i = -fout[i].i; } diff --git a/src/kiss_fft.h b/src/kiss_fft.h index b2fe9a47..c9bc0065 100644 --- a/src/kiss_fft.h +++ b/src/kiss_fft.h @@ -128,9 +128,9 @@ typedef struct kiss_fft_state{ * buffer size in *lenmem. * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); +kiss_fft_state *rnn_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); +kiss_fft_state *rnn_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); /** * opus_fft(cfg,in_out_buf) @@ -142,17 +142,17 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); * Note that each element is complex and can be accessed like f[k].r and f[k].i * */ -void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void rnn_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void rnn_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); -void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); +void rnn_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); +void rnn_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); -void opus_fft_free(const kiss_fft_state *cfg, int arch); +void rnn_fft_free(const kiss_fft_state *cfg, int arch); -void opus_fft_free_arch_c(kiss_fft_state *st); -int opus_fft_alloc_arch_c(kiss_fft_state *st); +void rnn_fft_free_arch_c(kiss_fft_state *st); +int rnn_fft_alloc_arch_c(kiss_fft_state *st); #if !defined(OVERRIDE_OPUS_FFT) /* Is run-time CPU detection enabled on this platform? */ @@ -181,17 +181,17 @@ extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, #else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ -#define opus_fft_alloc_arch(_st, arch) \ - ((void)(arch), opus_fft_alloc_arch_c(_st)) +#define rnn_fft_alloc_arch(_st, arch) \ + ((void)(arch), rnn_fft_alloc_arch_c(_st)) -#define opus_fft_free_arch(_st, arch) \ - ((void)(arch), opus_fft_free_arch_c(_st)) +#define rnn_fft_free_arch(_st, arch) \ + ((void)(arch), rnn_fft_free_arch_c(_st)) -#define opus_fft(_cfg, _fin, _fout, arch) \ - ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) +#define rnn_fft(_cfg, _fin, _fout, arch) \ + ((void)(arch), rnn_fft_c(_cfg, _fin, _fout)) -#define opus_ifft(_cfg, _fin, _fout, arch) \ - ((void)(arch), opus_ifft_c(_cfg, _fin, _fout)) +#define rnn_ifft(_cfg, _fin, _fout, arch) \ + ((void)(arch), rnn_ifft_c(_cfg, _fin, _fout)) #endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ #endif /* end if !defined(OVERRIDE_OPUS_FFT) */ diff --git a/src/pitch.c b/src/pitch.c index bd101a6c..806399bd 100644 --- a/src/pitch.c +++ b/src/pitch.c @@ -37,9 +37,6 @@ #include "pitch.h" #include "common.h" -//#include "modes.h" -//#include "stack_alloc.h" -//#include "mathops.h" #include "celt_lpc.h" #include "math.h" @@ -145,7 +142,7 @@ static void celt_fir5(const opus_val16 *x, } -void pitch_downsample(celt_sig *x[], opus_val16 *x_lp, +void rnn_pitch_downsample(celt_sig *x[], opus_val16 *x_lp, int len, int C) { int i; @@ -180,7 +177,7 @@ void pitch_downsample(celt_sig *x[], opus_val16 *x_lp, x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift); } - _celt_autocorr(x_lp, ac, NULL, 0, + rnn_autocorr(x_lp, ac, NULL, 0, 4, len>>1); /* Noise floor -40 dB */ @@ -200,7 +197,7 @@ void pitch_downsample(celt_sig *x[], opus_val16 *x_lp, #endif } - _celt_lpc(lpc, ac, 4); + rnn_lpc(lpc, ac, 4); for (i=0;i<4;i++) { tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp); @@ -215,7 +212,7 @@ void pitch_downsample(celt_sig *x[], opus_val16 *x_lp, celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); } -void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, +void rnn_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) { @@ -280,7 +277,7 @@ void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, #endif } -void pitch_search(const opus_val16 *x_lp, opus_val16 *y, +void rnn_pitch_search(const opus_val16 *x_lp, opus_val16 *y, int len, int max_pitch, int *pitch) { int i, j; @@ -329,7 +326,7 @@ void pitch_search(const opus_val16 *x_lp, opus_val16 *y, #ifdef FIXED_POINT maxcorr = #endif - celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2); + rnn_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2); find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch #ifdef FIXED_POINT @@ -420,7 +417,7 @@ static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy #endif static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; -opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, +opus_val16 rnn_remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int N, int *T0_, int prev_period, opus_val16 prev_gain) { int k, i, T, T0; diff --git a/src/pitch.h b/src/pitch.h index 9bd31b49..d87fd9d4 100644 --- a/src/pitch.h +++ b/src/pitch.h @@ -38,13 +38,13 @@ //#include "cpu_support.h" #include "arch.h" -void pitch_downsample(celt_sig *x[], opus_val16 *x_lp, +void rnn_pitch_downsample(celt_sig *x[], opus_val16 *x_lp, int len, int C); -void pitch_search(const opus_val16 *x_lp, opus_val16 *y, +void rnn_pitch_search(const opus_val16 *x_lp, opus_val16 *y, int len, int max_pitch, int *pitch); -opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, +opus_val16 rnn_remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int N, int *T0, int prev_period, opus_val16 prev_gain); @@ -143,7 +143,7 @@ static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, return xy; } -void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, +void rnn_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); #endif diff --git a/src/rnn.c b/src/rnn.c index c54958eb..d36b5506 100644 --- a/src/rnn.c +++ b/src/rnn.c @@ -76,7 +76,7 @@ static OPUS_INLINE float relu(float x) return x < 0 ? 0 : x; } -void compute_dense(const DenseLayer *layer, float *output, const float *input) +static void compute_dense(const DenseLayer *layer, float *output, const float *input) { int i, j; int N, M; @@ -106,7 +106,7 @@ void compute_dense(const DenseLayer *layer, float *output, const float *input) } } -void compute_gru(const GRULayer *gru, float *state, const float *input) +static void compute_gru(const GRULayer *gru, float *state, const float *input) { int i, j; int N, M; diff --git a/src/rnn.h b/src/rnn.h index 10329f55..edb82e3b 100644 --- a/src/rnn.h +++ b/src/rnn.h @@ -60,10 +60,6 @@ typedef struct { typedef struct RNNState RNNState; -void compute_dense(const DenseLayer *layer, float *output, const float *input); - -void compute_gru(const GRULayer *gru, float *state, const float *input); - void compute_rnn(RNNState *rnn, float *gains, float *vad, const float *input); -#endif /* _MLP_H_ */ +#endif /* RNN_H_ */