Skip to content

Commit

Permalink
fix issue where text messages where not being considered correctly wh…
Browse files Browse the repository at this point in the history
…en audio is enabled
  • Loading branch information
pablomarin committed Jan 3, 2025
1 parent 307da24 commit 56ba596
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 40 deletions.
8 changes: 3 additions & 5 deletions 15-FastAPI-API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"id": "21f0382f-3960-46ed-8f68-77ac735c90c2",
"metadata": {
"tags": []
Expand All @@ -365,14 +365,12 @@
"[Tool Start] Starting documents_retrieval\n",
"\n",
"[Tool End] Done documents_retrieval\n",
"The scene where Joey asks Rachel to marry him occurs in a moment of misunderstanding. Joey, feeling a special bond with Rachel and concerned about her being a single mom, offers to marry her. He says, \"Rachel Green will you marry me?\" Rachel is surprised and responds, \"What?\" Joey insists, \"I want you to know you're not gonna be alone in this\" [[source]](https://blobstorageixqo5iaqmpzwc.blob.core.windows.net/friends/s08/e02/c11.txt?sv=2022-11-02&ss=b&srt=sco&sp=rltfx&se=2026-01-02T09:04:19Z&st=2025-01-02T01:04:19Z&spr=https&sig=q%2FjY9R25rdc%2BIH1iiq1uPIBm82xECsN9d%2B2ftdM1SJI%3D).\n",
"\n",
"Rachel, touched by Joey's gesture, declines the proposal, saying, \"Oh you're so sweet. You're so-so sweet, honey. But I'm not, I'm not looking for a husband\" [[source]](https://blobstorageixqo5iaqmpzwc.blob.core.windows.net/friends/s08/e02/c11.txt?sv=2022-11-02&ss=b&srt=sco&sp=rltfx&se=2026-01-02T09:04:19Z&st=2025-01-02T01:04:19Z&spr=https&sig=q%2FjY9R25rdc%2BIH1iiq1uPIBm82xECsN9d%2B2ftdM1SJI%3D)."
"The scene where Joey wears all of Chandler's clothes is a humorous moment from the show \"Friends.\" Joey, in retaliation for Chandler hiding his clothes, decides to wear everything Chandler owns. He walks into the room wearing multiple layers of Chandler's clothing and exclaims, \"Look at me! I'm Chandler! Could I be wearing any more clothes?\" He even jokes about going commando, saying, \"Maybe if I wasn't going commando...\" The scene is made even funnier by Joey's exaggerated movements and the sheer volume of clothes he's wearing, which makes it difficult for him to move comfortably [[source]](https://blobstorageixqo5iaqmpzwc.blob.core.windows.net/friends/s06/e20/c09.txt?sv=2022-11-02&ss=b&srt=sco&sp=rltfx&se=2026-01-02T09:04:19Z&st=2025-01-02T01:04:19Z&spr=https&sig=q%2FjY9R25rdc%2BIH1iiq1uPIBm82xECsN9d%2B2ftdM1SJI%3D)."
]
}
],
"source": [
"stream_question = \"@docsearch, how is the scene where joey asks rachel to marry\"\n",
"stream_question = \"@docsearch, describe the scene where Joey wears all of Chandler's clothes\"\n",
"call_stream(stream_question, thread_id=random_session_id)"
]
},
Expand Down
88 changes: 53 additions & 35 deletions apps/frontend/app/pages/3_FastAPI_Chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,30 @@
import os
import sys

# Import STT and TTS functions from audio_utils.py
# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------
# Import STT (speech-to-text) and TTS (text-to-speech) functions from audio_utils.py
try:
from audio_utils import (
speech_to_text_from_bytes as speech_to_text,
text_to_speech,
)
except Exception as e:
# Add the path four levels up
# If local import fails, add the path four levels up and import from there
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../')))
from common.audio_utils import (
speech_to_text_from_bytes as speech_to_text,
text_to_speech,
)


import streamlit as st

from app import model_name, api_url, get_env_var
from app import (
model_name,
api_url,
get_env_var,
)
from langchain_core.messages import AIMessage, HumanMessage
from helpers.streamlit_helpers import (
configure_page,
Expand All @@ -35,26 +41,28 @@
from audio_recorder_streamlit import audio_recorder

# -----------------------------------------------------------------------------
# Configuration
# Page Configuration
# -----------------------------------------------------------------------------
page_title = get_env_var("AGENT_PAGE_TITLE", default_value="AI Agent", required=True)
configure_page(page_title, "💬")

logger = get_logger(__name__)
logger.info(f"Page configured with title: {page_title}")

# -----------------------------------------------------------------------------
# Session IDs and Chat History
# Initialize Session IDs and Chat History
# -----------------------------------------------------------------------------
session_id, user_id = get_or_create_ids()
initialize_chat_history(model_name)

# -----------------------------------------------------------------------------
# Sidebar with optional voice input
# Sidebar (Voice Input Option)
# -----------------------------------------------------------------------------
with st.sidebar:
st.header("Voice Input")
voice_enabled = st.checkbox("Enable Voice Capabilities")

# If voice is enabled, provide audio recorder
audio_bytes = None
if voice_enabled:
audio_bytes = audio_recorder(
Expand All @@ -68,48 +76,60 @@
logger.info("Audio recorded from user microphone.")

# -----------------------------------------------------------------------------
# Display existing chat messages
# Display Existing Chat Messages
# -----------------------------------------------------------------------------
display_chat_history()
logger.debug("Displayed existing chat history.")


# -----------------------------------------------------------------------------
# Handle User Input (Text & Audio)
# -----------------------------------------------------------------------------
user_query = st.chat_input("Type your message here...")

# Track whether a new user message was added
new_user_message = False

# Handle audio input
if audio_bytes:
transcript = speech_to_text(audio_bytes)
logger.debug(f"Transcript from STT: {transcript}")
if transcript:
st.session_state.chat_history.append(HumanMessage(content=transcript))
# Text query from the st.chat_input
user_query = st.chat_input("Type your message here...")
typed_query = user_query.strip() if user_query else None

# 1) If voice is enabled, we allow typed OR voice input
if voice_enabled:
if typed_query:
# A typed query takes priority if present
st.session_state.chat_history.append(HumanMessage(content=typed_query))
with st.chat_message("Human"):
st.write(transcript)
logger.info("Transcript added to chat history.")
st.markdown(typed_query)
logger.info("User typed query added to chat history: %s", typed_query)
new_user_message = True
elif audio_bytes:
# Only if there's no typed input, process recorded audio
transcript = speech_to_text(audio_bytes)
logger.debug(f"Transcript from STT: {transcript}")
if transcript:
st.session_state.chat_history.append(HumanMessage(content=transcript))
with st.chat_message("Human"):
st.write(transcript)
logger.info("Transcript added to chat history.")
new_user_message = True

# 2) If voice is disabled, we only process typed input
else:
if typed_query:
st.session_state.chat_history.append(HumanMessage(content=typed_query))
with st.chat_message("Human"):
st.markdown(typed_query)
logger.info("User typed query added to chat history: %s", typed_query)
new_user_message = True

# Handle text input (st.chat_input)
if user_query is not None and user_query.strip() and not new_user_message:
st.session_state.chat_history.append(HumanMessage(content=user_query))
with st.chat_message("Human"):
st.markdown(user_query)
logger.info("User query added to chat history: %s", user_query)
new_user_message = True

# -----------------------------------------------------------------------------
# Generate AI response if the last message is from a Human
# Generate AI Response (If We Have a New User Message)
# -----------------------------------------------------------------------------
if new_user_message and not isinstance(st.session_state.chat_history[-1], AIMessage):
if new_user_message:
# The last message is now from a Human; let's call the AI
with st.chat_message("AI"):
try:
logger.info("Sending request to SSE /stream endpoint with user query.")
user_text = st.session_state.chat_history[-1].content

logger.info("Sending request to SSE /stream endpoint with user query.")

# Stream the AI response using your SSE consumption function
ai_response = st.write_stream(
consume_api(api_url, user_text, session_id, user_id)
)
Expand All @@ -123,7 +143,7 @@
if ai_response:
st.session_state.chat_history.append(AIMessage(content=ai_response))

# Voice Output (if enabled)
# If voice is enabled, convert AI response text to speech and auto-play
if voice_enabled:
try:
audio_file_path = text_to_speech(ai_response)
Expand All @@ -134,5 +154,3 @@
logger.info("Temporary audio file removed.")
except Exception as ex:
logger.error(f"Error generating or playing audio: {ex}", exc_info=True)


Binary file modified apps/frontend/frontend.zip
Binary file not shown.

0 comments on commit 56ba596

Please sign in to comment.