vscera · tahnik · Nov 28, 2020 · Jan 18, 2021 · Jul 14, 2020 · Jan 22, 2021
diff --git a/README b/README
@@ -1,4 +1,12 @@
 RNNoise is a noise suppression library based on a recurrent neural network.
+A description of the algorithm is provided in the following paper:
+
+J.-M. Valin, A Hybrid DSP/Deep Learning Approach to Real-Time Full-Band Speech
+Enhancement, Proceedings of IEEE Multimedia Signal Processing (MMSP) Workshop,
+arXiv:1709.08243, 2018.
+https://arxiv.org/pdf/1709.08243.pdf
+
+An interactive demo is available at: https://jmvalin.ca/demo/rnnoise/
 
 To compile, just type:
 % ./autogen.sh
@@ -12,6 +20,10 @@ While it is meant to be used as a library, a simple command-line tool is
 provided as an example. It operates on RAW 16-bit (machine endian) mono
 PCM files sampled at 48 kHz. It can be used as:
 
-./examples/rnnoise_demo <number of channels> <maximum attenuation> < input.raw > output.raw
+./examples/rnnoise_demo <noisy speech> <output denoised>
 
 The output is also a 16-bit raw PCM file.
+
+The latest version of the source is available from
+https://gitlab.xiph.org/xiph/rnnoise .  The github repository
+is a convenience copy.
diff --git a/include/rnnoise.h b/include/rnnoise.h
@@ -30,6 +30,9 @@
 
 #include <stdio.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 #ifndef RNNOISE_EXPORT
 # if defined(WIN32)
@@ -48,18 +51,64 @@
 typedef struct DenoiseState DenoiseState;
 typedef struct RNNModel RNNModel;
 
+/**
+ * Return the size of DenoiseState
+ */
 RNNOISE_EXPORT int rnnoise_get_size();
 
+/**
+ * Return the number of samples processed by rnnoise_process_frame at a time
+ */
+RNNOISE_EXPORT int rnnoise_get_frame_size();
+
+/**
+ * Initializes a pre-allocated DenoiseState
+ *
+ * If model is NULL the default model is used.
+ *
+ * See: rnnoise_create() and rnnoise_model_from_file()
+ */
 RNNOISE_EXPORT int rnnoise_init(DenoiseState *st, RNNModel *model);
 
+/**
+ * Allocate and initialize a DenoiseState
+ *
+ * If model is NULL the default model is used.
+ *
+ * The returned pointer MUST be freed with rnnoise_destroy().
+ */
 RNNOISE_EXPORT DenoiseState *rnnoise_create(RNNModel *model);
 
+/**
+ * Free a DenoiseState produced by rnnoise_create.
+ *
+ * The optional custom model must be freed by rnnoise_model_free() after.
+ */
 RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st);
 
+/**
+ * Denoise a frame of samples
+ *
+ * in and out must be at least rnnoise_get_frame_size() large.
+ */
 RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in);
 
+/**
+ * Load a model from a file
+ *
+ * It must be deallocated with rnnoise_model_free()
+ */
 RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f);
 
+/**
+ * Free a custom model
+ *
+ * It must be called after all the DenoiseStates referring to it are freed.
+ */
 RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif
diff --git a/src/celt_lpc.c b/src/celt_lpc.c
@@ -34,7 +34,7 @@
 #include "common.h"
 #include "pitch.h"
 
-void _celt_lpc(
+void rnn_lpc(
       opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
 const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
 int          p
@@ -88,114 +88,7 @@ int          p
 }
 
 
-void celt_fir(
-         const opus_val16 *x,
-         const opus_val16 *num,
-         opus_val16 *y,
-         int N,
-         int ord)
-{
-   int i,j;
-   opus_val16 rnum[ord];
-   for(i=0;i<ord;i++)
-      rnum[i] = num[ord-i-1];
-   for (i=0;i<N-3;i+=4)
-   {
-      opus_val32 sum[4];
-      sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
-      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
-      sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
-      sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
-      xcorr_kernel(rnum, x+i-ord, sum, ord);
-      y[i  ] = ROUND16(sum[0], SIG_SHIFT);
-      y[i+1] = ROUND16(sum[1], SIG_SHIFT);
-      y[i+2] = ROUND16(sum[2], SIG_SHIFT);
-      y[i+3] = ROUND16(sum[3], SIG_SHIFT);
-   }
-   for (;i<N;i++)
-   {
-      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
-      for (j=0;j<ord;j++)
-         sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
-      y[i] = ROUND16(sum, SIG_SHIFT);
-   }
-}
-
-void celt_iir(const opus_val32 *_x,
-         const opus_val16 *den,
-         opus_val32 *_y,
-         int N,
-         int ord,
-         opus_val16 *mem)
-{
-#ifdef SMALL_FOOTPRINT
-   int i,j;
-   for (i=0;i<N;i++)
-   {
-      opus_val32 sum = _x[i];
-      for (j=0;j<ord;j++)
-      {
-         sum -= MULT16_16(den[j],mem[j]);
-      }
-      for (j=ord-1;j>=1;j--)
-      {
-         mem[j]=mem[j-1];
-      }
-      mem[0] = SROUND16(sum, SIG_SHIFT);
-      _y[i] = sum;
-   }
-#else
-   int i,j;
-   celt_assert((ord&3)==0);
-   opus_val16 rden[ord];
-   opus_val16 y[N+ord];
-   for(i=0;i<ord;i++)
-      rden[i] = den[ord-i-1];
-   for(i=0;i<ord;i++)
-      y[i] = -mem[ord-i-1];
-   for(;i<N+ord;i++)
-      y[i]=0;
-   for (i=0;i<N-3;i+=4)
-   {
-      /* Unroll by 4 as if it were an FIR filter */
-      opus_val32 sum[4];
-      sum[0]=_x[i];
-      sum[1]=_x[i+1];
-      sum[2]=_x[i+2];
-      sum[3]=_x[i+3];
-      xcorr_kernel(rden, y+i, sum, ord);
-
-      /* Patch up the result to compensate for the fact that this is an IIR */
-      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
-      _y[i  ] = sum[0];
-      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
-      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
-      _y[i+1] = sum[1];
-      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
-      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
-      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
-      _y[i+2] = sum[2];
-
-      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
-      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
-      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
-      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
-      _y[i+3] = sum[3];
-   }
-   for (;i<N;i++)
-   {
-      opus_val32 sum = _x[i];
-      for (j=0;j<ord;j++)
-         sum -= MULT16_16(rden[j],y[i+j]);
-      y[i+ord] = SROUND16(sum,SIG_SHIFT);
-      _y[i] = sum;
-   }
-   for(i=0;i<ord;i++)
-      mem[i] = _y[N-i-1];
-#endif
-}
-
-int _celt_autocorr(
+int rnn_autocorr(
                    const opus_val16 *x,   /*  in: [0...n-1] samples x   */
                    opus_val32       *ac,  /* out: [0...lag-1] ac values */
                    const opus_val16       *window,
@@ -247,7 +140,7 @@ int _celt_autocorr(
          shift = 0;
    }
 #endif
-   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
+   rnn_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
    for (k=0;k<=lag;k++)
    {
       for (i = k+fastN, d = 0; i < n; i++)

diff --git a/src/celt_lpc.h b/src/celt_lpc.h
@@ -37,23 +37,9 @@
 
 #define LPC_ORDER 24
 
-void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
-
-void celt_fir(
-         const opus_val16 *x,
-         const opus_val16 *num,
-         opus_val16 *y,
-         int N,
-         int ord);
-
-void celt_iir(const opus_val32 *x,
-         const opus_val16 *den,
-         opus_val32 *y,
-         int N,
-         int ord,
-         opus_val16 *mem);
-
-int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
+void rnn_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
+
+int rnn_autocorr(const opus_val16 *x, opus_val32 *ac,
          const opus_val16 *window, int overlap, int lag, int n);
 
 #endif /* PLC_H */
diff --git a/src/denoise.c b/src/denoise.c
@@ -97,7 +97,7 @@ struct DenoiseState {
   RNNState rnn;
 };
 
-void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
+static void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
   int i;
   float sum[NB_BANDS] = {0};
   for (i=0;i<NB_BANDS-1;i++)
@@ -122,7 +122,7 @@ void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
   }
 }
 
-void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
+static void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
   int i;
   float sum[NB_BANDS] = {0};
   for (i=0;i<NB_BANDS-1;i++)
@@ -147,7 +147,7 @@ void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *
   }
 }
 
-void interp_band_gain(float *g, const float *bandE) {
+static void interp_band_gain(float *g, const float *bandE) {
   int i;
   memset(g, 0, FREQ_SIZE);
   for (i=0;i<NB_BANDS-1;i++)
@@ -168,7 +168,7 @@ CommonState common;
 static void check_init() {
   int i;
   if (common.init) return;
-  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
+  common.kfft = rnn_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
   for (i=0;i<FRAME_SIZE;i++)
     common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
   for (i=0;i<NB_BANDS;i++) {
@@ -218,7 +218,7 @@ static void forward_transform(kiss_fft_cpx *out, const float *in) {
     x[i].r = in[i];
     x[i].i = 0;
   }
-  opus_fft(common.kfft, x, y, 0);
+  rnn_fft(common.kfft, x, y, 0);
   for (i=0;i<FREQ_SIZE;i++) {
     out[i] = y[i];
   }
@@ -236,7 +236,7 @@ static void inverse_transform(float *out, const kiss_fft_cpx *in) {
     x[i].r = x[WINDOW_SIZE - i].r;
     x[i].i = -x[WINDOW_SIZE - i].i;
   }
-  opus_fft(common.kfft, x, y, 0);
+  rnn_fft(common.kfft, x, y, 0);
   /* output in reverse order for IFFT. */
   out[0] = WINDOW_SIZE*y[0].r;
   for (i=1;i<WINDOW_SIZE;i++) {
@@ -257,6 +257,10 @@ int rnnoise_get_size() {
   return sizeof(DenoiseState);
 }
 
+int rnnoise_get_frame_size() {
+  return FRAME_SIZE;
+}
+
 int rnnoise_init(DenoiseState *st, RNNModel *model) {
   memset(st, 0, sizeof(*st));
   if (model)
@@ -321,12 +325,12 @@ static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cp
   RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
   RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
   pre[0] = &st->pitch_buf[0];
-  pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
-  pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
+  rnn_pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
+  rnn_pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
                PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
   pitch_index = PITCH_MAX_PERIOD-pitch_index;
 
-  gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
+  gain = rnn_remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
           PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
   st->last_period = pitch_index;
   st->last_gain = gain;
@@ -415,7 +419,7 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const
   }
 }
 
-void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
+static void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
                   const float *Exp, const float *g) {
   int i;
   float r[NB_BANDS];