Skip to content

Commit

Permalink
only use partial transcripts for latency
Browse files Browse the repository at this point in the history
  • Loading branch information
Jef808 committed Dec 12, 2023
1 parent 7585b01 commit 591ddc4
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 27 deletions.
2 changes: 1 addition & 1 deletion echo-crafter.el
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
(process-send-eof process))))

(defun openai-prompt-process-sentinel (process signal)
"Handle the process state changes for PROCESS upon receiving SIGNAL."
"Handle the process state change for PROCESS upon receiving SIGNAL."
(when (memq (process-status process) '(exit signal))
(let ((exit-status (process-exit-status process))
(transcript-data (with-current-buffer microphone-stream-buffer (buffer-string))))
Expand Down
89 changes: 63 additions & 26 deletions speech-to-text/speech_reco.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@
DEFAULT_DEVICE = p.get_default_input_device_info()
DEFAULT_DEVICE_INDEX = DEFAULT_DEVICE['index']
SAMPLE_RATE = 16000 # int(DEFAULT_DEVICE['defaultSampleRate'])
FRAMES_PER_BUFFER = SAMPLE_RATE // 5 # 3200
FRAMES_PER_BUFFER = int(SAMPLE_RATE / 2) # 3200
LATENCY = FRAMES_PER_BUFFER / SAMPLE_RATE
FORMAT = pyaudio.paInt16
CHANNELS = 1

##############################
# # Termination logic config #
##############################
Expand Down Expand Up @@ -94,7 +95,7 @@ def setup(self, filepath):
for message in self._buffer:
self.write(message)
self._buffer = []
for data in self._buffer:
for data in self._wav_buffer:
self.record(data)
self._wav_buffer = []

Expand All @@ -115,24 +116,25 @@ def record(self, data):

_LOGGER = Logger()
ws = None
FINAL_TRANSCRIPTS = []
PARTIAL_TRANSCRIPT = ""
TRANSCRIPT = []

# Set up the signal handler responsible for terminating the program at the end
def signal_handler(sig, frame):
#global _PYAUDIO_END_TIME
global _SHOULD_BE_RUNNING
global _AAI_SESSION_END_REQUEST_TIME

#_PYAUDIO_END_TIME = time.time()
time.sleep(0.5)
_SHOULD_BE_RUNNING = False
print("Sending terminate_session to aai", file=sys.stderr)

_AAI_SESSION_END_REQUEST_TIME = time.time()
if ws is not None:
try:
time.sleep(FRAMES_PER_BUFFER / SAMPLE_RATE)
ws.send(json.dumps({"terminate_session": True}))
except Exception as e:
print("Error while sending terminate_session: {e}", file=sys.stderr)
print(f"Error while sending terminate_session: {e}", file=sys.stderr)
p.terminate()

# Register the signal handler
Expand All @@ -157,7 +159,7 @@ def send_data(ws, in_data, frame_count, pyaudio_buffer_time, pyaudio_current_tim
}

# Print all data for logging
_LOGGER.write(log_data)
# _LOGGER.write(log_data)
_LOGGER.record(in_data)

if _AAI_SESSION_START_TIME is None:
Expand Down Expand Up @@ -197,14 +199,13 @@ def pyaudio_callback(in_data, frame_count, time_info, status):
#################################################################
def on_open(ws):
global _AAI_SESSSION_BEGIN_TIME

_AAI_SESSSION_BEGIN_TIME = time.time()

def on_close(ws, ec, err):
print("closing websocket connection", file=sys.stderr)
try:
if not stream.is_stopped(): stream.stop_stream()
finally:
if PARTIAL_TRANSCRIPT:
TRANSCRIPT.append(PARTIAL_TRANSCRIPT)
if not stream.is_stopped():
stream.close()
p.terminate()

Expand All @@ -216,22 +217,54 @@ def on_close(ws, ec, err):
def on_message(ws, msg):
global _AAI_SESSION_START_TIME
global _AAI_SESSION_END_TIME
global PARTIAL_TRANSCRIPT

payload = json.loads(msg)
message_type = payload['message_type']

_LOGGER.write({"source": "ASSEMBLYAI", **payload})

if message_type == 'SessionBegins':
_AAI_SESSION_START_TIME = time.time()
_LOGGER.setup(f"/home/jfa/projects/echo-crafter/logs/{payload['session_id']}")
return

elif message_type == 'FinalTranscript':
FINAL_TRANSCRIPTS.append(payload)
if message_type == 'SessionTerminated':
if _AAI_SESSION_END_TIME is not None:
_AAI_SESSION_END_TIME = time.time()
ws.close()

text = payload['text']

if not text:
return

# elif message_type == "SessionTerminated":
# _AAI_SESSION_END_TIME = time.time()
# ws.close()
# return

if message_type == "FinalTranscript":
_LOGGER.write({"FINAL_TRANSCRIPT": text, "created": payload['created']})
if PARTIAL_TRANSCRIPT:
TRANSCRIPT.append(PARTIAL_TRANSCRIPT)
PARTIAL_TRANSCRIPT = ""
if _AAI_SESSION_END_REQUEST_TIME is not None:
if _AAI_SESSION_END_TIME is None:
_AAI_SESSION_END_TIME = time.time()
ws.close()
return

elif message_type == "SessionTerminated":
_AAI_SESSION_END_TIME = time.time()
ws.close()
elif text == PARTIAL_TRANSCRIPT:
TRANSCRIPT.append(text)
PARTIAL_TRANSCRIPT = ""
if _AAI_SESSION_END_REQUEST_TIME is not None:
if _AAI_SESSION_END_TIME is None:
_AAI_SESSION_END_TIME = time.time()
ws.close()

else:
PARTIAL_TRANSCRIPT = text

_LOGGER.write({"PARTIAL_TRANSCRIPT": text, "created": payload['created']})

########################
# Retrieve credentials #
Expand Down Expand Up @@ -259,11 +292,12 @@ def on_message(ws, msg):
except Exception as e:
print(f"Error while opening the pyaudio stream: {e}", file=sys.stderr)
p.terminate()
sys.exit(2)
sys.exit(7)

def on_error(ws, *err):
_LOGGER.write(*err)
print(f"Error: {err}", file=sys.stderr)

########################
# Set up the websocket #
########################
Expand All @@ -277,8 +311,9 @@ def on_error(ws, *err):
on_open=on_open)
except Exception as e:
print(f"Error while initiating the websocket: {e}", file=sys.stderr)
stream.close()
p.terminate()
if not stream.is_stopped():
stream.close()
p.terminate()

#############################################################################
# Run until the program receives SIGINT, in which case it gracefully exists #
Expand All @@ -288,20 +323,22 @@ def on_error(ws, *err):
_AAI_SESSION_START_REQUEST_TIME = time.time()
ec = ws.run_forever()

time_to_end = _AAI_SESSION_END_TIME - _AAI_SESSION_END_REQUEST_TIME
if ec and not stream.is_stopped():
stream.close()
p.terminate()

time_to_wrap_up = _AAI_SESSION_END_TIME - _AAI_SESSION_END_REQUEST_TIME
transcript = ' '.join(TRANSCRIPT)
_LOGGER.write("{SUMMARY: "
f"SESSION_START: {_PYAUDIO_START_TIME}, "
f"AAI_SESSION_REQUEST: {_AAI_SESSION_START_REQUEST_TIME}, "
f"AAI_SESSION_START: {_AAI_SESSION_START_TIME}, "
f"AAI_SESSION_END_REQUEST: {_AAI_SESSION_END_REQUEST_TIME}, "
f"AAI_SESSION_END: {_AAI_SESSION_END_TIME}"
"}")
_LOGGER.write(f"TIME_TO_WRAP_UP: {time_to_end}")
transcript = ' '.join(transcript['text'] for transcript in FINAL_TRANSCRIPTS)

_LOGGER.write(f"TIME_TO_WRAP_UP: {time_to_wrap_up}")
_LOGGER.write(json.dumps({"transcript": transcript}))

print(transcript)

sys.exit(ec)
sys.exit(ec)

0 comments on commit 591ddc4

Please sign in to comment.