-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
replace speech-to-text with speech-to-intent
- Loading branch information
Showing
7 changed files
with
142 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,67 +1,75 @@ | ||
"""Listen for a wake word and transcribe speech until endpoint is detected.""" | ||
|
||
import json | ||
import subprocess | ||
import traceback | ||
import socket | ||
from contextlib import contextmanager | ||
from typing import List | ||
|
||
from echo_crafter.logger import setup_logger | ||
from echo_crafter.config import Config | ||
|
||
from echo_crafter.listener.utils import ( | ||
Intent, | ||
microphone | ||
) | ||
from echo_crafter.logger import setup_logger | ||
from echo_crafter.config import Config | ||
|
||
def play_sound(wav_file): | ||
logger = setup_logger(__name__) | ||
|
||
def play_sound(wav_file) -> None: | ||
"""Play a ding sound to indicate that the wake word was detected.""" | ||
subprocess.Popen(["aplay", "-q", wav_file]) | ||
subprocess.Popen(['aplay', wav_file]) | ||
|
||
|
||
def wake_word_callback(): | ||
def on_wake_word_detected() -> None: | ||
"""Play a ding sound to indicate that the wake word was detected.""" | ||
play_sound(Config['TRANSCRIPT_BEGIN_WAV']) | ||
|
||
|
||
@contextmanager | ||
def create_transcription_callback(): | ||
"""Connect to the transcription socket and send it all partial transcripts.""" | ||
def on_intent_inferred(intent_obj: Intent) -> None: | ||
"""Log the inferred intent and slots.""" | ||
logger.info("Intent inferred: %s", json.dumps(intent_obj)) | ||
print(json.dumps(intent_obj, indent=2)) | ||
play_sound(Config['TRANSCRIPT_SUCCESS_WAV']) | ||
|
||
client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) | ||
try: | ||
client.connect(Config['SOCKET_PATH']) | ||
|
||
def callback(partial_transcript): | ||
"""Send the partial transcript to the active window.""" | ||
client.sendall((partial_transcript).encode()) | ||
partial_transcripts: List[str] = [] | ||
|
||
yield callback | ||
|
||
finally: | ||
client.close() | ||
def on_partial_transcript(partial_transcript: str) -> None: | ||
"""Send the partial transcript to the active window.""" | ||
partial_transcripts.append(partial_transcript) | ||
subprocess.Popen( | ||
['xdotool', 'type', '--clearmodifiers', '--delay', '0', partial_transcript] | ||
) | ||
|
||
|
||
def transcription_success_callback(): | ||
"""Play a ding sound to indicate that the final transcript was received.""" | ||
play_sound(Config['TRANSCRIPT_SUCCESS_WAV']) | ||
def on_final_transcript() -> None: | ||
"""Log the accumulated partial transcripts""" | ||
final_transcript = ''.join(partial_transcripts) | ||
partial_transcripts.clear() | ||
logger.info("Final transcript: %s", final_transcript) | ||
|
||
|
||
def main(): | ||
"""Upon detection of a wake word, transcribe speech until endpoint is detected.""" | ||
logger = setup_logger() | ||
logger = setup_logger(__name__) | ||
|
||
with microphone() as mic: | ||
try: | ||
while True: | ||
with create_transcription_callback() as transcription_callback: | ||
mic.wait_for_wake_word(wake_word_callback) | ||
|
||
mic.process_and_transmit_utterance(transcription_callback, transcription_success_callback) | ||
mic.wait_for_wake_word(on_wake_word_detected) | ||
mic.infer_intent(on_intent_inferred) | ||
#mic.process_and_transmit_utterance(on_partial_transcript, on_final_transcript) | ||
|
||
except KeyboardInterrupt: | ||
pass | ||
mic.set_is_recording(False) | ||
|
||
except Exception as e: | ||
logger.error("An error occured %s", e) | ||
logger.error(traceback.format_exc()) | ||
|
||
finally: | ||
mic.set_is_recording(False) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,11 @@ | ||
from .microphone import microphone | ||
from .sockets import socket_connection | ||
from .microphone import * | ||
from .sockets import * | ||
from .types import * | ||
|
||
__all__ = [ | ||
'Intent', | ||
'Slot', | ||
'MicrophoneCallbacks', | ||
'socket_connection', | ||
'microphone' | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from enum import StrEnum | ||
from typing import Any, NamedTuple, Tuple, List, Callable, TypeVar, Type | ||
|
||
AudioFrame = List[int] | ||
|
||
class IntentName(StrEnum): | ||
"""Intent names for the voice commands.""" | ||
|
||
UNKNOWN = "unknown" | ||
GET_SCRIPT = "getScript" | ||
ANSWER_QUESTION = "answerQuestion" | ||
TRANSCRIBE_TO_KEYBOARD = "simplyTranscribe" | ||
FOCUS_WINDOW = "focusWindow" | ||
OPEN_WINDOW = "openWindow" | ||
SET_VOLUME = "setVolume" | ||
CANCEL = "cancel" | ||
|
||
|
||
class Slot(StrEnum): | ||
"""Slot names for the intent's named parameters.""" | ||
|
||
PROGRAMMING_LANGUAGE = "programmingLanguage" | ||
PROMPT_TYPE = "promptType" | ||
WINDOW_NAME = "windowName" | ||
VOLUME_SETTING = "volumeSetting" | ||
|
||
|
||
class Intent(NamedTuple): | ||
"""Named tuple for the intent.""" | ||
|
||
intent: IntentName | ||
slots: List[Slot] | ||
|
||
|
||
class MicrophoneCallbacks(NamedTuple): | ||
"""Named tuple for the microphone callbacks.""" | ||
|
||
on_wake_word: Callable[[], Any] | ||
on_intent: Callable[[Intent], Any] | ||
on_partial_transcript: Callable[[str], Any] | ||
on_final_transcript: Callable[[], Any] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters