Skip to content

Commit

Permalink
Limit the maximum segment length for VAD. (#990)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jun 12, 2024
1 parent aac8684 commit 208da78
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 0 deletions.
16 changes: 16 additions & 0 deletions sherpa-onnx/csrc/silero-vad-model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ class SileroVadModel::Impl {

int32_t MinSpeechDurationSamples() const { return min_speech_samples_; }

void SetMinSilenceDuration(float s) {
min_silence_samples_ = sample_rate_ * s;
}

void SetThreshold(float threshold) {
config_.silero_vad.threshold = threshold;
}

private:
void Init(void *model_data, size_t model_data_length) {
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
Expand Down Expand Up @@ -306,4 +314,12 @@ int32_t SileroVadModel::MinSpeechDurationSamples() const {
return impl_->MinSpeechDurationSamples();
}

void SileroVadModel::SetMinSilenceDuration(float s) {
impl_->SetMinSilenceDuration(s);
}

void SileroVadModel::SetThreshold(float threshold) {
impl_->SetThreshold(threshold);
}

} // namespace sherpa_onnx
3 changes: 3 additions & 0 deletions sherpa-onnx/csrc/silero-vad-model.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class SileroVadModel : public VadModel {
int32_t MinSilenceDurationSamples() const override;
int32_t MinSpeechDurationSamples() const override;

void SetMinSilenceDuration(float s) override;
void SetThreshold(float threshold) override;

private:
class Impl;
std::unique_ptr<Impl> impl_;
Expand Down
2 changes: 2 additions & 0 deletions sherpa-onnx/csrc/vad-model.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ class VadModel {

virtual int32_t MinSilenceDurationSamples() const = 0;
virtual int32_t MinSpeechDurationSamples() const = 0;
virtual void SetMinSilenceDuration(float s) = 0;
virtual void SetThreshold(float threshold) = 0;
};

} // namespace sherpa_onnx
Expand Down
12 changes: 12 additions & 0 deletions sherpa-onnx/csrc/voice-activity-detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ class VoiceActivityDetector::Impl {
#endif

void AcceptWaveform(const float *samples, int32_t n) {
if (buffer_.Size() > max_utterance_length_) {
model_->SetMinSilenceDuration(new_min_silence_duration_s_);
model_->SetThreshold(new_threshold_);
} else {
model_->SetMinSilenceDuration(config_.silero_vad.min_silence_duration);
model_->SetThreshold(config_.silero_vad.threshold);
}

int32_t window_size = model_->WindowSize();

// note n is usually window_size and there is no need to use
Expand Down Expand Up @@ -114,6 +122,10 @@ class VoiceActivityDetector::Impl {
CircularBuffer buffer_;
std::vector<float> last_;

int max_utterance_length_ = 16000 * 20; // in samples
float new_min_silence_duration_s_ = 0.1;
float new_threshold_ = 1.10;

int32_t start_ = -1;
};

Expand Down

0 comments on commit 208da78

Please sign in to comment.