Merge pull request #806 from wotey/wotey/4o-issues

update to fix csv data issue because of bad prompt
microsoft · Aug 1, 2024 · ddda05a · ddda05a
2 parents 164a0af + 65c45c5
commit ddda05a
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 9 deletions.
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -720,7 +720,7 @@ async def posttd(csv: UploadFile = File(...)):
         global dffinal
             # Read the file into a pandas DataFrame
         content = await csv.read()
-        df = pd.read_csv(StringIO(content.decode('latin-1')))
+        df = pd.read_csv(StringIO(content.decode('utf-8-sig')))
 
         dffinal = df
         # Process the DataFrame...
@@ -732,11 +732,12 @@ async def posttd(csv: UploadFile = File(...)):
     #return {"filename": csv.filename}
 @app.get("/process_td_agent_response")
 async def process_td_agent_response(retries=3, delay=1000, question: Optional[str] = None):
+    save_df(dffinal)
     if question is None:
         raise HTTPException(status_code=400, detail="Question is required")
     for i in range(retries):
         try:
-            results = td_agent_response(question)
+            results = td_agent_response(question,dffinal)
             return results
         except AttributeError as ex:
             log.exception(f"Exception in /process_tabular_data_agent_response:{str(ex)}")

diff --git a/app/backend/approaches/tabulardataassistant.py b/app/backend/approaches/tabulardataassistant.py
@@ -69,9 +69,11 @@ def get_image_data(image_path):
 
 def save_chart(query):
     temp_dir = tempfile.gettempdir()
-    q_s = f'''You are a assistant to help analyze CSV file data and are a dataframe ally. You analyze every row, addressing all queries with unwavering precision.
-    You DO NOT answer based on subset of dataframe or top 5 or based on head() output. You need to look at all rows and then answer questions. Data is case insensitive.
-    If any charts or graphs or plots were created save them in the {temp_dir} directory. Remember, you can handle both singular and plural forms of queries. 
+    q_s = f'''You are a assistant to help analyze CSV data that is placed in a dataframe and are a dataframe ally. You analyze every row, addressing all queries with unwavering precision.
+    You DO NOT answer based on subset of the dataframe or top 5 or based on head() output. Do not create an example dataframe. Use the dataframe provided to you. You need to look at all rows and then answer questions based on the entire dataframe and ensure the input to any tool is valid. Data is case insensitive.
+    Normalize column names by converting them to lowercase and replacing spaces with underscores to handle discrepancies in column naming conventions.
+    If any charts or graphs or plots were created save them in the {temp_dir} directory. Make sure the output of the result includes the final result and not just the chart or graph. Put the charts in the {temp_dir} directory and not the final output.
+    Remember, you can handle both singular and plural forms of queries. 
     
     For example:
     - If you ask \'How many thinkpads do we have?\' or \'How many thinkpad do we have?\', you will address both forms in the same manner.
@@ -128,7 +130,7 @@ def process_agent_scratch_pad(question, df):
             raise ValueError()
 
 #Function to stream final output       
-def process_agent_response(question):
+def process_agent_response(question, df):
     question = save_chart(question)
 
     chat = AzureChatOpenAI(
@@ -138,7 +140,7 @@ def process_agent_response(question):
     deployment_name=OPENAI_DEPLOYMENT_NAME)  
 
 
-    pdagent = create_pandas_dataframe_agent(chat, dffinal, verbose=True,agent_type=AgentType.OPENAI_FUNCTIONS, allow_dangerous_code=True, agent_executor_kwargs={"handle_parsing_errors": True})
+    pdagent = create_pandas_dataframe_agent(chat, df, verbose=True,agent_type=AgentType.OPENAI_FUNCTIONS, allow_dangerous_code=True, agent_executor_kwargs={"handle_parsing_errors": True})
     for chunk in pdagent.stream({"input": question}):
         if "output" in chunk:
             output = f'Final Output: ```{chunk["output"]}```'

diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
@@ -1,7 +1,7 @@
 #### Any version change made here should also be made and tested for the enrichment and function apps in /functions and /app/enrichment
 azure-identity==1.16.1
 Flask==2.3.2
-langchain==0.2.6
+langchain==0.2.9
 azure-mgmt-cognitiveservices==13.5.0
 openai==1.35.8
 # azure-search-documents==11.4.0
@@ -25,4 +25,4 @@ wikipedia==1.4.0
 langchain-openai == 0.1.14
 pytest==8.2.1
 python-dotenv==1.0.1
-langchain-community==0.2.6 
+langchain-community==0.2.9