Skip to content

Commit

Permalink
refactor listener module into speech_processor
Browse files Browse the repository at this point in the history
  • Loading branch information
Jef808 committed Mar 2, 2024
1 parent 26d3564 commit 120cea3
Show file tree
Hide file tree
Showing 8 changed files with 257 additions and 223 deletions.
127 changes: 0 additions & 127 deletions echo-crafter.el

This file was deleted.

15 changes: 10 additions & 5 deletions echo_crafter/listener/listener_with_wake_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@
import traceback
from typing import List

#from .handle_intent import IntentHandler
from echo_crafter.logger import setup_logger
from echo_crafter.config import Config

from echo_crafter.listener.utils import (
Intent,
microphone
)
from echo_crafter.types import Intent
from echo_crafter.listener.utils import microphone

logger = setup_logger(__name__)

Expand All @@ -24,10 +22,17 @@ def on_wake_word_detected() -> None:
"""Play a ding sound to indicate that the wake word was detected."""
play_sound(Config['TRANSCRIPT_BEGIN_WAV'])

def send_to_keyboard(content):
"""Send the content to the keyboard."""
subprocess.Popen(
['xdotool', 'type', '--clearmodifiers', '--delay', '0', content]
)


def on_intent_inferred(intent_obj: Intent) -> None:
"""Log the inferred intent and slots."""
logger.info("Intent inferred: %s", json.dumps(intent_obj))
#IntentHandler()
print(json.dumps(intent_obj, indent=2))
play_sound(Config['TRANSCRIPT_SUCCESS_WAV'])

Expand Down
58 changes: 0 additions & 58 deletions echo_crafter/listener/socket_read.py

This file was deleted.

113 changes: 113 additions & 0 deletions echo_crafter/speech_processor/process_speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import threading
import time
from collections import deque
from contextlib import ExitStack
from echo_crafter.config import Config
from echo_crafter.logger import setup_logger
from .resources import *

logger = setup_logger(__name__)


class VoiceCommander:
"""Voice Commander class."""

def __init__(self, *,
on_wake_word,
on_intent_success,
on_intent_failure,
wake_word="computer",
wake_word_sensitivity=0.5,
intent_sensitivity=0.7,
end_of_speech_timeout=1.3,
):
self.pv = {}
with ExitStack() as stack:
self.pv['cobra'](stack.enter_context(create_cobra()))
self.pv['porcupine'](stack.enter_context(create_porcupine(wake_word=wake_word, sensitivity=wake_word_sensitivity)))
self.pv['rhino'](stack.enter_context(create_rhino(context_file=Config['RHINO_CONTEXT_FILE'], sensitivity=intent_sensitivity)))
self.pv['leopard'](stack.enter_context(create_leopard(model_file=Config['LEOPARD_MODEL_FILE'])))
self.pv['recorder'](stack.enter_context(create_recorder(frame_length=Config['FRAME_LENGTH'])))

stack.pop_all()

super().__init__()
self.on_wake_word = on_wake_word
self.on_intent_success = on_intent_success
self.on_intent_failure = on_intent_failure
self.buffer = deque([], maxlen=Config['BUFFER_SIZE'])
self.state = 'idle'
self.frames_processed_for_eos = 0

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
if exc_type:
logger.error(f"An error occurred: {exc_value}", exc_info=True)
return False

def start(self):
"""Process the audio frames."""
pcm_frame = self.pv['recorder'].read()
self.process_frame(pcm_frame)

def get_next_frame(self):
if self.state in ['recognizing_intent', 'search_for_endpoint']:
if len(self.buffer) > self.frames_processed_for_eos:
pcm_frame = self.state[self.frames_processed_for_eos]
else:
pcm_frame = self.pv['rhino'].read()

def process_frame(self, pcm_frame):
if self.is_buffering:
self.buffer.append(pcm_frame)

if self.state == 'idle':
time.sleep(0.08) # Implement: Adjust according to needs
elif self.state == 'wait_for_wake_word':
keyword = self.pv['porcupine'].process(pcm_frame)
if keyword >= 0:
self.set_state('recognizing_intent')
self.buffer.append(pcm_frame)

# TODO: Figure out if this helps
self.pv['recorder'].stop()
self.pv['recorder'].start()
elif self.state == 'recognizing_intent':
is_finalized = self.pv['rhino'].process(pcm_frame)
if is_finalized:
inference = self.pv['rhino'].get_intent()
if inference.is_understood:
self.on_intent_success({"intent": inference.intent, "slots": inference.slots})
self.set_state('finished')
else:
self.on_intent_failure()
self.set_state('search_for_endpoint')
elif self.state == 'wait_for_eos':
if self._detect_end_of_speech():
self.set_state('finished')

def _detect_end_of_speech(self, buffer):
# Implement: Detect end of speech based on the audio frames
while buffer:
pcm_frame = buffer.popleft()
voice_probability = self.pv['cobra'].process(pcm_frame)
if voice_probability > 0.3:
return False
return True

def set_state(self, state):
self.state = state
if state == 'recognizing_intent':
self.is_buffering = True
elif state == 'finished':
self.reset()

def reset(self):
# Implement: Reset state and buffer
self.pv.reset()

def is_buffer_full(self):
# Implement based on a predefined limit or buffer property
return len(self.buffer) == self.buffer.maxlen
Loading

0 comments on commit 120cea3

Please sign in to comment.