Skip to content

Commit

Permalink
Merge branch 'vNext-Dev' into wotey/citations
Browse files Browse the repository at this point in the history
  • Loading branch information
dayland authored Aug 21, 2024
2 parents 646ba1c + 98165a3 commit 9ec00e2
Show file tree
Hide file tree
Showing 103 changed files with 1,062 additions and 1,233 deletions.
9 changes: 4 additions & 5 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@
"request": "launch",
"program": "debug_tests.py",
"args": [
"--storage_account_connection_str",
"${env:STORAGE_ACCOUNT_CONNECTION_STR},",
"--storage_account_url",
"${env:AZURE_BLOB_STORAGE_ENDPOINT},",
"--search_service_endpoint",
"${env:SEARCH_SERVICE_ENDPOINT}",
"--search_index",
Expand All @@ -98,10 +98,9 @@
"60"
],
"env": {
"STORAGE_ACCOUNT_CONNECTION_STR": "${env:BLOB_CONNECTION_STRING}",
"storage_account_url": "${env:AZURE_BLOB_STORAGE_ENDPOINT}",
"SEARCH_SERVICE_ENDPOINT": "${env:AZURE_SEARCH_SERVICE_ENDPOINT}",
"SEARCH_INDEX": "${env:AZURE_SEARCH_INDEX}",
"SEARCH_KEY": "${env:AZURE_SEARCH_SERVICE_KEY}"
"SEARCH_INDEX": "${env:AZURE_SEARCH_INDEX}"
},
"cwd": "${workspaceFolder}/tests",
"envFile": "${workspaceFolder}/scripts/environments/infrastructure.debug.env",
Expand Down
76 changes: 45 additions & 31 deletions README.md

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Microsoft takes the security of our software products and services seriously, wh

If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.

## Reporting Security Issues
## Reporting security issues

**Please do not report security vulnerabilities through public GitHub issues.**

Expand All @@ -30,11 +30,11 @@ This information will help us triage your report more quickly.

If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.

## Providing Feedback
## Providing feedback

Please refer to the [Contributing](./CONTRIBUTING.md) guidelines for acceptable methods to provide feedback for issues which are not security related.

## Preferred Languages
## Preferred languages

We prefer all communications to be in English.

Expand Down
10 changes: 6 additions & 4 deletions SUPPORT.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@

## How to file issues and get help

This project uses GitHub Issues to track bugs and feature requests. Please search the existing
This project uses [GitHub Issues](https://github.com/microsoft/PubSec-Info-Assistant/issues) to track bugs and feature requests. Please search the existing
issues before filing new issues to avoid duplicates. For new issues, file your bug or
feature request as a new Issue.
feature request as a new Issue.

For help and questions about using this project, please use the [Discussion](https://github.com/microsoft/PubSec-Info-Assistant/discussions) forums on our GitHub Repo page.
Please provide as much information as possible when filing an issue (please redact any sensitive information).

For help and questions about using this project, please use the [Discussion](https://github.com/microsoft/PubSec-Info-Assistant/discussions) forums on our GitHub repo page.

For customer support deploying this accelerator, please reach out to your local Microsoft representative or email the [Industry Solutions Accelerator Team](mailto:[email protected]).

## Providing Feedback
## Providing feedback

Please refer to the [Contributing](./CONTRIBUTING.md) guidelines for acceptable methods to provide feedback which are not security related.

Expand Down
117 changes: 50 additions & 67 deletions app/backend/app.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from io import StringIO
from typing import Optional
import asyncio
#from sse_starlette.sse import EventSourceResponse
#from starlette.responses import StreamingResponse
from starlette.responses import Response
import logging
import os
import json
import urllib.parse
import pandas as pd
import pydantic
from datetime import datetime, time, timedelta
from datetime import datetime, timedelta
from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI, File, HTTPException, Request, UploadFile
from fastapi.responses import RedirectResponse, StreamingResponse
Expand All @@ -23,19 +21,12 @@
from approaches.chatwebretrieveread import ChatWebRetrieveRead
from approaches.gpt_direct_approach import GPTDirectApproach
from approaches.approach import Approaches
from azure.core.credentials import AzureKeyCredential
from azure.identity import DefaultAzureCredential, AzureAuthorityHosts
from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider
from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient
from azure.search.documents import SearchClient
from azure.storage.blob import (
AccountSasPermissions,
BlobServiceClient,
ResourceTypes,
generate_account_sas,
)
from azure.storage.blob import BlobServiceClient, generate_container_sas, ContainerSasPermissions
from approaches.mathassistant import(
generate_response,
process_agent_scratch_pad,
process_agent_response,
stream_agent_responses
)
Expand All @@ -45,9 +36,8 @@
process_agent_response as td_agent_response,
process_agent_scratch_pad as td_agent_scratch_pad,
get_images_in_temp

)
from shared_code.status_log import State, StatusClassification, StatusLog, StatusQueryLevel
from shared_code.status_log import State, StatusClassification, StatusLog
from azure.cosmos import CosmosClient


Expand All @@ -56,12 +46,10 @@
ENV = {
"AZURE_BLOB_STORAGE_ACCOUNT": None,
"AZURE_BLOB_STORAGE_ENDPOINT": None,
"AZURE_BLOB_STORAGE_KEY": None,
"AZURE_BLOB_STORAGE_CONTAINER": "content",
"AZURE_BLOB_STORAGE_UPLOAD_CONTAINER": "upload",
"AZURE_SEARCH_SERVICE": "gptkb",
"AZURE_SEARCH_SERVICE_ENDPOINT": None,
"AZURE_SEARCH_SERVICE_KEY": None,
"AZURE_SEARCH_INDEX": "gptkbindex",
"USE_SEMANTIC_RERANKER": "true",
"AZURE_OPENAI_SERVICE": "myopenai",
Expand All @@ -75,7 +63,6 @@
"EMBEDDING_DEPLOYMENT_NAME": "",
"AZURE_OPENAI_EMBEDDINGS_MODEL_NAME": "",
"AZURE_OPENAI_EMBEDDINGS_VERSION": "",
"AZURE_OPENAI_SERVICE_KEY": None,
"AZURE_SUBSCRIPTION_ID": None,
"AZURE_ARM_MANAGEMENT_API": "https://management.azure.com",
"CHAT_WARNING_BANNER_TEXT": "",
Expand All @@ -85,15 +72,13 @@
"KB_FIELDS_SOURCEFILE": "file_uri",
"KB_FIELDS_CHUNKFILE": "chunk_file",
"COSMOSDB_URL": None,
"COSMOSDB_KEY": None,
"COSMOSDB_LOG_DATABASE_NAME": "statusdb",
"COSMOSDB_LOG_CONTAINER_NAME": "statuscontainer",
"QUERY_TERM_LANGUAGE": "English",
"TARGET_EMBEDDINGS_MODEL": "BAAI/bge-small-en-v1.5",
"ENRICHMENT_APPSERVICE_URL": "enrichment",
"TARGET_TRANSLATION_LANGUAGE": "en",
"AZURE_AI_ENDPOINT": None,
"AZURE_AI_KEY": None,
"AZURE_AI_LOCATION": "",
"BING_SEARCH_ENDPOINT": "https://api.bing.microsoft.com/",
"BING_SEARCH_KEY": "",
Expand All @@ -102,8 +87,9 @@
"ENABLE_UNGROUNDED_CHAT": "false",
"ENABLE_MATH_ASSISTANT": "false",
"ENABLE_TABULAR_DATA_ASSISTANT": "false",
"ENABLE_MULTIMEDIA": "false",
"MAX_CSV_FILE_SIZE": "7"
"MAX_CSV_FILE_SIZE": "7",
"LOCAL_DEBUG": "false",
"AZURE_AI_CREDENTIAL_DOMAIN": "cognitiveservices.azure.com"
}

for key, value in ENV.items():
Expand Down Expand Up @@ -137,34 +123,38 @@ class StatusResponse(pydantic.BaseModel):
else:
AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD
openai.api_version = "2024-02-01"
# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed,
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
# keys for each service
# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
azure_credential = DefaultAzureCredential(authority=AUTHORITY)
# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI,
# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally)
# Use managed identity when deployed on Azure.
# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude
# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
if ENV["LOCAL_DEBUG"] == "true":
azure_credential = DefaultAzureCredential(authority=AUTHORITY)
else:
azure_credential = ManagedIdentityCredential(authority=AUTHORITY)
# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead
# openai.api_type = "azure_ad"
# openai_token = azure_credential.get_token("https://cognitiveservices.azure.com/.default")
openai.api_key = ENV["AZURE_OPENAI_SERVICE_KEY"]
openai.api_type = "azure_ad"
token_provider = get_bearer_token_provider(azure_credential, f'https://{ENV["AZURE_AI_CREDENTIAL_DOMAIN"]}/.default')
openai.azure_ad_token_provider = token_provider
#openai.api_key = ENV["AZURE_OPENAI_SERVICE_KEY"]

# Setup StatusLog to allow access to CosmosDB for logging
statusLog = StatusLog(
ENV["COSMOSDB_URL"],
ENV["COSMOSDB_KEY"],
azure_credential,
ENV["COSMOSDB_LOG_DATABASE_NAME"],
ENV["COSMOSDB_LOG_CONTAINER_NAME"]
)

azure_search_key_credential = AzureKeyCredential(ENV["AZURE_SEARCH_SERVICE_KEY"])
# Set up clients for Cognitive Search and Storage
search_client = SearchClient(
endpoint=ENV["AZURE_SEARCH_SERVICE_ENDPOINT"],
index_name=ENV["AZURE_SEARCH_INDEX"],
credential=azure_search_key_credential,
credential=azure_credential,
)
blob_client = BlobServiceClient(
account_url=ENV["AZURE_BLOB_STORAGE_ENDPOINT"],
credential=ENV["AZURE_BLOB_STORAGE_KEY"],
credential=azure_credential,
)
blob_container = blob_client.get_container_client(ENV["AZURE_BLOB_STORAGE_CONTAINER"])

Expand Down Expand Up @@ -202,7 +192,6 @@ class StatusResponse(pydantic.BaseModel):
Approaches.ReadRetrieveRead: ChatReadRetrieveReadApproach(
search_client,
ENV["AZURE_OPENAI_ENDPOINT"],
ENV["AZURE_OPENAI_SERVICE_KEY"],
ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
ENV["KB_FIELDS_SOURCEFILE"],
ENV["KB_FIELDS_CONTENT"],
Expand All @@ -217,8 +206,8 @@ class StatusResponse(pydantic.BaseModel):
ENV["ENRICHMENT_APPSERVICE_URL"],
ENV["TARGET_TRANSLATION_LANGUAGE"],
ENV["AZURE_AI_ENDPOINT"],
ENV["AZURE_AI_KEY"],
ENV["AZURE_AI_LOCATION"],
token_provider,
str_to_bool.get(ENV["USE_SEMANTIC_RERANKER"])
),
Approaches.ChatWebRetrieveRead: ChatWebRetrieveRead(
Expand All @@ -227,20 +216,23 @@ class StatusResponse(pydantic.BaseModel):
ENV["TARGET_TRANSLATION_LANGUAGE"],
ENV["BING_SEARCH_ENDPOINT"],
ENV["BING_SEARCH_KEY"],
str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"])
),
str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"]),
ENV["AZURE_OPENAI_ENDPOINT"],
token_provider
),
Approaches.CompareWorkWithWeb: CompareWorkWithWeb(
model_name,
ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
ENV["TARGET_TRANSLATION_LANGUAGE"],
ENV["BING_SEARCH_ENDPOINT"],
ENV["BING_SEARCH_KEY"],
str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"])
),
str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"]),
ENV["AZURE_OPENAI_ENDPOINT"],
token_provider
),
Approaches.CompareWebWithWork: CompareWebWithWork(
search_client,
ENV["AZURE_OPENAI_ENDPOINT"],
ENV["AZURE_OPENAI_SERVICE_KEY"],
ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
ENV["KB_FIELDS_SOURCEFILE"],
ENV["KB_FIELDS_CONTENT"],
Expand All @@ -255,13 +247,12 @@ class StatusResponse(pydantic.BaseModel):
ENV["ENRICHMENT_APPSERVICE_URL"],
ENV["TARGET_TRANSLATION_LANGUAGE"],
ENV["AZURE_AI_ENDPOINT"],
ENV["AZURE_AI_KEY"],
ENV["AZURE_AI_LOCATION"],
token_provider,
str_to_bool.get(ENV["USE_SEMANTIC_RERANKER"])
),
Approaches.GPTDirect: GPTDirectApproach(
ENV["AZURE_OPENAI_SERVICE"],
ENV["AZURE_OPENAI_SERVICE_KEY"],
token_provider,
ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"],
ENV["QUERY_TERM_LANGUAGE"],
model_name,
Expand Down Expand Up @@ -349,23 +340,17 @@ async def get_blob_client_url():
Returns:
dict: A dictionary containing the URL with the SAS token.
"""
sas_token = generate_account_sas(
ENV["AZURE_BLOB_STORAGE_ACCOUNT"],
ENV["AZURE_BLOB_STORAGE_KEY"],
resource_types=ResourceTypes(object=True, service=True, container=True),
permission=AccountSasPermissions(
read=True,
write=True,
list=True,
delete=False,
add=True,
create=True,
update=True,
process=False,
),
expiry=datetime.utcnow() + timedelta(hours=1),
# Obtain the user delegation key
user_delegation_key = blob_client.get_user_delegation_key(key_start_time=datetime.utcnow(), key_expiry_time=datetime.utcnow() + timedelta(hours=2))

sas_token = generate_container_sas(account_name=blob_client.account_name,
container_name=ENV["AZURE_BLOB_STORAGE_UPLOAD_CONTAINER"],
permission=ContainerSasPermissions(read=True, write=True, delete=False, list=True, tag=True),
user_delegation_key=user_delegation_key,
expiry=datetime.utcnow() + timedelta(hours=2)
)
return {"url": f"{blob_client.url}?{sas_token}"}

return {"url": f"{blob_client.url}upload?{sas_token}"}

@app.post("/getalluploadstatus")
async def get_all_upload_status(request: Request):
Expand Down Expand Up @@ -393,7 +378,7 @@ async def get_all_upload_status(request: Request):
# retrieve tags for each file
# Initialize an empty list to hold the tags
items = []
cosmos_client = CosmosClient(url=statusLog._url, credential=statusLog._key)
cosmos_client = CosmosClient(url=statusLog._url, credential=azure_credential, consistency_level='Session')
database = cosmos_client.get_database_client(statusLog._database_name)
container = database.get_container_client(statusLog._container_name)
query_string = "SELECT DISTINCT VALUE t FROM c JOIN t IN c.tags"
Expand Down Expand Up @@ -531,7 +516,7 @@ async def get_tags(request: Request):
try:
# Initialize an empty list to hold the tags
items = []
cosmos_client = CosmosClient(url=statusLog._url, credential=statusLog._key)
cosmos_client = CosmosClient(url=statusLog._url, credential=azure_credential, consistency_level='Session')
database = cosmos_client.get_database_client(statusLog._database_name)
container = database.get_container_client(statusLog._container_name)
query_string = "SELECT DISTINCT VALUE t FROM c JOIN t IN c.tags"
Expand Down Expand Up @@ -723,7 +708,7 @@ async def posttd(csv: UploadFile = File(...)):
# Process the DataFrame...
save_df(df)
except Exception as ex:
raise HTTPException(status_code=500, detail=str(ex)) from ex
raise HTTPException(status_code=500, detail=str(ex)) from ex


#return {"filename": csv.filename}
Expand Down Expand Up @@ -756,7 +741,7 @@ async def process_td_agent_response(retries=3, delay=1000, question: Optional[st
async def getTdAnalysis(retries=3, delay=1, question: Optional[str] = None):
global dffinal
if question is None:
raise HTTPException(status_code=400, detail="Question is required")
raise HTTPException(status_code=400, detail="Question is required")

for i in range(retries):
try:
Expand Down Expand Up @@ -848,7 +833,7 @@ async def stream_agent_response(question: str):
results = process_agent_response(question)
except Exception as e:
print(f"Error processing agent response: {e}")
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail=str(e)) from e
return results


Expand All @@ -863,14 +848,12 @@ async def get_feature_flags():
- "ENABLE_UNGROUNDED_CHAT": Flag indicating whether ungrounded chat is enabled.
- "ENABLE_MATH_ASSISTANT": Flag indicating whether the math assistant is enabled.
- "ENABLE_TABULAR_DATA_ASSISTANT": Flag indicating whether the tabular data assistant is enabled.
- "ENABLE_MULTIMEDIA": Flag indicating whether multimedia is enabled.
"""
response = {
"ENABLE_WEB_CHAT": str_to_bool.get(ENV["ENABLE_WEB_CHAT"]),
"ENABLE_UNGROUNDED_CHAT": str_to_bool.get(ENV["ENABLE_UNGROUNDED_CHAT"]),
"ENABLE_MATH_ASSISTANT": str_to_bool.get(ENV["ENABLE_MATH_ASSISTANT"]),
"ENABLE_TABULAR_DATA_ASSISTANT": str_to_bool.get(ENV["ENABLE_TABULAR_DATA_ASSISTANT"]),
"ENABLE_MULTIMEDIA": str_to_bool.get(ENV["ENABLE_MULTIMEDIA"]),
}
return response

Expand Down
Loading

0 comments on commit 9ec00e2

Please sign in to comment.