diff --git a/.vscode/launch.json b/.vscode/launch.json index be3394284..84a61a14f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -86,8 +86,8 @@ "request": "launch", "program": "debug_tests.py", "args": [ - "--storage_account_connection_str", - "${env:STORAGE_ACCOUNT_CONNECTION_STR},", + "--storage_account_url", + "${env:AZURE_BLOB_STORAGE_ENDPOINT},", "--search_service_endpoint", "${env:SEARCH_SERVICE_ENDPOINT}", "--search_index", @@ -98,10 +98,9 @@ "60" ], "env": { - "STORAGE_ACCOUNT_CONNECTION_STR": "${env:BLOB_CONNECTION_STRING}", + "storage_account_url": "${env:AZURE_BLOB_STORAGE_ENDPOINT}", "SEARCH_SERVICE_ENDPOINT": "${env:AZURE_SEARCH_SERVICE_ENDPOINT}", - "SEARCH_INDEX": "${env:AZURE_SEARCH_INDEX}", - "SEARCH_KEY": "${env:AZURE_SEARCH_SERVICE_KEY}" + "SEARCH_INDEX": "${env:AZURE_SEARCH_INDEX}" }, "cwd": "${workspaceFolder}/tests", "envFile": "${workspaceFolder}/scripts/environments/infrastructure.debug.env", diff --git a/README.md b/README.md index d4200b964..a80617b89 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,34 @@ -# Information Assistant Accelerator +# Information Assistant (IA) accelerator > [!IMPORTANT] > As of November 15, 2023, Azure Cognitive Search has been renamed to Azure AI Search. Azure Cognitive Services have also been renamed to Azure AI Services. ## Table of Contents -- [Response Generation Approaches](#response-generation-approaches) +- [Response generation approaches](#response-generation-approaches) - [Features](#features) - [Azure account requirements](#azure-account-requirements) -- [Azure Deployment](./docs/deployment/deployment.md) - - [GitHub Codespaces Setup](./docs/deployment/deployment.md#development-environment-configuration) - - [Cost Estimation](./docs/deployment/deployment.md#sizing-estimator) +- [Azure deployment](./docs/deployment/deployment.md) + - [GitHub Codespaces setup](./docs/deployment/deployment.md#development-environment-configuration) + - [Cost estimation](./docs/deployment/deployment.md#sizing-estimator) - [Configuring ENV parameters](./docs/deployment/deployment.md#configure-env-files) - [Authenticating to Azure](./docs/deployment/deployment.md#log-into-azure-using-the-azure-cli) - [Deploying to Azure](./docs/deployment/deployment.md#deploy-and-configure-azure-resources) - - [Troubleshooting Common Issues](./docs/deployment/troubleshooting.md) - - [Considerations for Production Adoption](./docs/deployment/considerations_production.md) -- [Secure-Mode Deployment](./docs/secure_deployment/secure_deployment.md) + - [Troubleshooting common issues](./docs/deployment/troubleshooting.md) + - [Considerations for production adoption](./docs/deployment/considerations_production.md) +- [Secure-mode deployment](./docs/secure_deployment/secure_deployment.md) - [Enabling optional features](./docs/features/optional_features.md) - [Using the app](/docs/deployment/using_ia_first_time.md) - [Responsible AI](#responsible-ai) - [Transparency Note](#transparency-note) - [Content Safety](#content-safety) - [Data Collection Notice](#data-collection-notice) +- [Shared responsibility and customer responsibilities](#shared-responsibility-and-customer-responsibilities) - [Resources](#resources) - - [Known Issues](./docs/knownissues.md) - - [Functional Tests](./tests/README.md) + - [Known issues](./docs/knownissues.md) + - [Functional tests](./tests/README.md) - [Navigating the source code](#navigating-the-source-code) - - [Architectural Decisions](/docs/features/architectural_decisions.md) + - [Architectural decisions](/docs/features/architectural_decisions.md) - [References](#references) - [Trademarks](#trademarks) - [Code of Conduct](#code-of-conduct) @@ -36,23 +37,23 @@ [![Open in GitHub Codespaces](https://img.shields.io/static/v1?style=for-the-badge&label=GitHub+Codespaces&message=Open&color=brightgreen&logo=github)](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=601652366&machine=basicLinux32gb&devcontainer_path=.devcontainer%2Fdevcontainer.json&location=eastus) -This industry accelerator showcases integration between Azure and OpenAI's large language models. It leverages Azure AI Search for data retrieval and ChatGPT-style Q&A interactions. Using the Retrieval Augmented Generation (RAG) design pattern with Azure OpenAI's GPT models, it provides a natural language interaction to discover relevant responses to user queries. Azure AI Search simplifies data ingestion, transformation, indexing, and multilingual translation. +Information Assistant (IA) is an industry accelerator that showcases integration between Azure and OpenAI's large language models. It leverages Azure AI Search for data retrieval and ChatGPT-style Q&A interactions. Using the Retrieval Augmented Generation (RAG) design pattern with Azure OpenAI's GPT models, it provides a natural language interaction to discover relevant responses to user queries. Azure AI Search simplifies data ingestion, transformation, indexing, and multilingual translation. The accelerator adapts prompts based on the model type for enhanced performance. Users can customize settings like temperature and persona for personalized AI interactions. It offers features like explainable thought processes, referenceable citations, and direct content for verification. Please [see this video](https://aka.ms/InfoAssist/video) for use cases that may be achievable with this accelerator. -# Response Generation Approaches +# Response generation approaches -## Work(Grounded) -It utilizes a retrieval-augmented generation (RAG) pattern to generate responses grounded in specific data sourced from your own dataset. By combining retrieval of relevant information with generative capabilities, It can produce responses that are not only contextually relevant but also grounded in verified data. The RAG pipeline accesses your dataset to retrieve relevant information before generating responses, ensuring accuracy and reliability. Additionally, each response includes a citation to the document chunk from which the answer is derived, providing transparency and allowing users to verify the source. This approach is particularly advantageous in domains where precision and factuality are paramount. Users can trust that the responses generated are based on reliable data sources, enhancing the credibility and usefulness of the application. Specific information on our Grounded (RAG) can be found in [RAG](docs/features/cognitive_search.md#azure-ai-search-integration) +## Work (Grounded) +It utilizes a retrieval-augmented generation (RAG) pattern to generate responses grounded in specific data sourced from your own dataset. By combining retrieval of relevant information with generative capabilities, it can produce responses that are not only contextually relevant but also grounded in verified data. The RAG pipeline accesses your dataset to retrieve relevant information before generating responses, ensuring accuracy and reliability. Additionally, each response includes a citation to the document chunk from which the answer is derived, providing transparency and allowing users to verify the source. This approach is particularly advantageous in domains where precision and factuality are paramount. Users can trust that the responses generated are based on reliable data sources, enhancing the credibility and usefulness of the application. Specific information on our Grounded (RAG) can be found in [RAG](docs/features/cognitive_search.md#azure-ai-search-integration). ## Ungrounded It leverages the capabilities of a large language model (LLM) to generate responses in an ungrounded manner, without relying on external data sources or retrieval-augmented generation techniques. The LLM has been trained on a vast corpus of text data, enabling it to generate coherent and contextually relevant responses solely based on the input provided. This approach allows for open-ended and creative generation, making it suitable for tasks such as ideation, brainstorming, and exploring hypothetical scenarios. It's important to note that the generated responses are not grounded in specific factual data and should be evaluated critically, especially in domains where accuracy and verifiability are paramount. ## Work and Web -It offers 3 response options: one generated through our retrieval-augmented generation (RAG) pipeline, and the other grounded in content directly from the web. When users opt for the RAG response, they receive a grounded answer sourced from your data, complete with citations to document chunks for transparency and verification. Conversely, selecting the web response provides access to a broader range of sources, potentially offering more diverse perspectives. Each web response is grounded in content from the web accompanied by citations of web links, allowing users to explore the original sources for further context and validation. Upon request, It can also generate a final response that compares and contrasts both responses. This comparative analysis allows users to make informed decisions based on the reliability, relevance, and context of the information provided. -Specific information about our Grounded and Web can be found in [Web](/docs/features/features.md#bing-search-and-compare) +It offers 2 response options: one generated through our retrieval-augmented generation (RAG) pipeline, and the other grounded in content directly from the web. When users opt for the RAG response, they receive a grounded answer sourced from their data, complete with citations to document chunks for transparency and verification. Conversely, selecting the web response provides access to a broader range of sources, potentially offering more diverse perspectives. Each web response is grounded in content from the web accompanied by citations of web links, allowing users to explore the original sources for further context and validation. Upon request, It can also generate a final response that compares and contrasts both responses. This comparative analysis allows users to make informed decisions based on the reliability, relevance, and context of the information provided. +Specific information about our Work and Web can be found in [Web](/docs/features/features.md#bing-search-and-compare). ## Assistants It generates response by using LLM as a reasoning engine. The key strength lies in agent's ability to autonomously reason about tasks, decompose them into steps, and determine the appropriate tools and data sources to leverage, all without the need for predefined task definitions or rigid workflows. This approach allows for a dynamic and adaptive response generation process without predefining set of tasks. It harnesses the capabilities of LLM to understand natural language queries and generate responses tailored to specific tasks. These Agents are being released in preview mode as we continue to evaluate and mitigate the potential risks associated with autonomous reasoning, such as misuse of external tools, lack of transparency, biased outputs, privacy concerns, and remote code execution vulnerabilities. With future releases, we plan to work to enhance the safety and robustness of these autonomous reasoning capabilities. Specific information on our preview agents can be found in [Assistants](/docs/features/features.md#autonomous-reasoning-with-assistants-agents). @@ -69,11 +70,11 @@ The IA Accelerator contains several features, many of which have their own docum For a detailed review see our [Features](./docs/features/features.md) page. -### Process Flow for Work(Grounded), Ungrounded, and Work and Web +### Process flow for Work (Grounded), Ungrounded, and Work and Web ![Process Flow for Chat](/docs/process_flow_chat.png) -### Process Flow for Assistants +### Process flow for Assistants ![Process Flow for Assistants](/docs/process_flow_agent.png) @@ -101,7 +102,7 @@ For a detailed review see our [Features](./docs/features/features.md) page. * **Azure account permissions**: * Your Azure account must have `Microsoft.Authorization/roleAssignments/write` permissions, such as [Role Based Access Control Administrator](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#role-based-access-control-administrator-preview), [User Access Administrator](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#user-access-administrator), or [Owner](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#owner) on the subscription. * Your Azure account also needs `Microsoft.Resources/deployments/write` permissions on the subscription level. - * Your Azure account also needs `microsoft.directory/applications/create` and `microsoft.directory/servicePrincipals/create`, such as [Application Administrator](https://learn.microsoft.com/en-us/entra/identity/role-based-access-control/permissions-reference#application-administrator) Entra built-in role. + * Your Azure account also needs `microsoft.directory/applications/create` and `microsoft.directory/servicePrincipals/create`, such as [Application Administrator](https://learn.microsoft.com/entra/identity/role-based-access-control/permissions-reference#application-administrator) Entra built-in role. * **To have accepted the Azure AI Services Responsible AI Notice** for your subscription. If you have not manually accepted this notice please follow our guide at [Accepting Azure AI Service Responsible AI Notice](./docs/deployment/accepting_responsible_ai_notice.md). * (Optional) Have [Visual Studio Code](https://code.visualstudio.com/) installed on your development machine. If your Azure tenant and subscription have conditional access policies or device policies required, you may need to open your GitHub Codespaces in VS Code to satisfy the required polices. @@ -115,17 +116,17 @@ You may choose to **[view the deployment and usage click-through guides](https:/ ## Responsible AI -The Information Assistant (IA) Accelerator and Microsoft are committed to the advancement of AI driven by ethical principles that put people first. +The Information Assistant (IA) accelerator and Microsoft are committed to the advancement of AI driven by ethical principles that put people first. ### Transparency Note -**Read our [Transparency Note](/docs/transparency.md)** +**Read our [Transparency Note](/docs/transparency.md).** -Find out more with Microsoft's [Responsible AI resources](https://www.microsoft.com/en-us/ai/responsible-ai) +Find out more with Microsoft's [Responsible AI resources](https://www.microsoft.com/ai/responsible-ai). ### Content Safety -Content safety is provided through Azure OpenAI service. The Azure OpenAI Service includes a content filtering system that runs alongside the core AI models. This system uses an ensemble of classification models to detect four categories of potentially harmful content (violence, hate, sexual, and self-harm) at four severity levels (safe, low, medium, high).These 4 categories may not be sufficient for all use cases, especially for minors. Please read our [Transparency Note](/docs/transparency.md) +Content safety is provided through Azure OpenAI service. The Azure OpenAI Service includes a content filtering system that runs alongside the core AI models. This system uses an ensemble of classification models to detect four categories of potentially harmful content (violence, hate, sexual, and self-harm) at four severity levels (safe, low, medium, high).These 4 categories may not be sufficient for all use cases, especially for minors. Please read our [Transparency Note](/docs/transparency.md). By default, the content filters are set to filter out prompts and completions that are detected as medium or high severity for those four harm categories. Content labeled as low or safe severity is not filtered. @@ -135,7 +136,7 @@ The filtering configuration can be customized at the resource level, allowing cu This provides controls for Azure customers to tailor the content filtering behavior to their needs while aiming to prevent potentially harmful generated content and any copyright violations from public content. -Instructions on how to configure content filters via Azure OpenAI Studio can be found here +Learn how to [configure content filters via Azure OpenAI Studio (preview)](https://learn.microsoft.com/azure/ai-services/openai/how-to/content-filters#configuring-content-filters-via-azure-openai-studio-preview). ## Data Collection Notice @@ -153,7 +154,7 @@ To disable data collection, follow the instructions in the [Configure ENV files] ## Resources -### Navigating the Source Code +### Navigating the source code This project has the following structure: @@ -183,14 +184,27 @@ README.md | Starting point for this repo. It covers overviews of the Accelerator - [Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) - [Azure OpenAI Service](https://learn.microsoft.com/azure/cognitive-services/openai/overview) -### Trademarks +## Shared responsibility and customer responsibilities -This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft’s Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies. +To ensure your data is secure and your privacy controls are addressed, we recommend that you follow a set of best practices when deploying into Azure: -### Code of Conduct +- [Azure security best practices and patterns](https://learn.microsoft.com/azure/security/fundamentals/best-practices-and-patterns) +- [Microsoft Services in Cybersecurity](https://learn.microsoft.com/azure/security/fundamentals/cyber-services) + +Protecting your data also requires that all aspects of your security and compliance program include your cloud infrastructure and data. The following guidance can help you to secure your deployment. + +## Trademarks + +This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft trademarks or logos is subject to and must follow [Microsoft’s Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party’s policies. + +## Microsoft Legal Notice + +**Notice**. The Information Assistant accelerator (the "IA") is PROVIDED "AS-IS," "WITH ALL FAULTS," AND "AS AVAILABLE," AND ARE EXCLUDED FROM THE SERVICE LEVEL AGREEMENTS AND LIMITED WARRANTY. The IA may employ lesser or different privacy and security measures than those typically present in Azure Services. Unless otherwise noted, The IA should not be used to process Personal Data or other data that is subject to legal or regulatory compliance requirements. The following terms in the DPA do not apply to the IA: Processing of Personal Data, GDPR, Data Security, and HIPAA Business Associate. We may change or discontinue the IA at any time without notice. The IA (1) is not designed, intended, or made available as legal services, (2) is not intended to substitute for professional legal counsel or judgment, and (3) should not be used in place of consulting with a qualified professional legal professional for your specific needs. Microsoft makes no warranty that the IA is accurate, up-to-date, or complete. You are wholly responsible for ensuring your own compliance with all applicable laws and regulations. + +## Code of Conduct This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. -### Reporting Security Issues +## Reporting security issues -For security concerns, please see [Security Guidelines](./SECURITY.md) \ No newline at end of file +For security concerns, please see [Security Guidelines](./SECURITY.md). \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md index 2364913c7..cbbd35b30 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,7 +6,7 @@ Microsoft takes the security of our software products and services seriously, wh If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. -## Reporting Security Issues +## Reporting security issues **Please do not report security vulnerabilities through public GitHub issues.** @@ -30,11 +30,11 @@ This information will help us triage your report more quickly. If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. -## Providing Feedback +## Providing feedback Please refer to the [Contributing](./CONTRIBUTING.md) guidelines for acceptable methods to provide feedback for issues which are not security related. -## Preferred Languages +## Preferred languages We prefer all communications to be in English. diff --git a/SUPPORT.md b/SUPPORT.md index 5a63fa282..58d87dab4 100644 --- a/SUPPORT.md +++ b/SUPPORT.md @@ -2,15 +2,17 @@ ## How to file issues and get help -This project uses GitHub Issues to track bugs and feature requests. Please search the existing +This project uses [GitHub Issues](https://github.com/microsoft/PubSec-Info-Assistant/issues) to track bugs and feature requests. Please search the existing issues before filing new issues to avoid duplicates. For new issues, file your bug or -feature request as a new Issue. +feature request as a new Issue. -For help and questions about using this project, please use the [Discussion](https://github.com/microsoft/PubSec-Info-Assistant/discussions) forums on our GitHub Repo page. +Please provide as much information as possible when filing an issue (please redact any sensitive information). + +For help and questions about using this project, please use the [Discussion](https://github.com/microsoft/PubSec-Info-Assistant/discussions) forums on our GitHub repo page. For customer support deploying this accelerator, please reach out to your local Microsoft representative or email the [Industry Solutions Accelerator Team](mailto:isat-support@microsoft.com). -## Providing Feedback +## Providing feedback Please refer to the [Contributing](./CONTRIBUTING.md) guidelines for acceptable methods to provide feedback which are not security related. diff --git a/app/backend/app.py b/app/backend/app.py index f17b5b937..6886b2cf3 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -1,18 +1,16 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. + from io import StringIO from typing import Optional import asyncio -#from sse_starlette.sse import EventSourceResponse -#from starlette.responses import StreamingResponse -from starlette.responses import Response import logging import os import json import urllib.parse import pandas as pd import pydantic -from datetime import datetime, time, timedelta +from datetime import datetime, timedelta from fastapi.staticfiles import StaticFiles from fastapi import FastAPI, File, HTTPException, Request, UploadFile from fastapi.responses import RedirectResponse, StreamingResponse @@ -23,19 +21,12 @@ from approaches.chatwebretrieveread import ChatWebRetrieveRead from approaches.gpt_direct_approach import GPTDirectApproach from approaches.approach import Approaches -from azure.core.credentials import AzureKeyCredential -from azure.identity import DefaultAzureCredential, AzureAuthorityHosts +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient from azure.search.documents import SearchClient -from azure.storage.blob import ( - AccountSasPermissions, - BlobServiceClient, - ResourceTypes, - generate_account_sas, -) +from azure.storage.blob import BlobServiceClient, generate_container_sas, ContainerSasPermissions from approaches.mathassistant import( generate_response, - process_agent_scratch_pad, process_agent_response, stream_agent_responses ) @@ -45,9 +36,8 @@ process_agent_response as td_agent_response, process_agent_scratch_pad as td_agent_scratch_pad, get_images_in_temp - ) -from shared_code.status_log import State, StatusClassification, StatusLog, StatusQueryLevel +from shared_code.status_log import State, StatusClassification, StatusLog from azure.cosmos import CosmosClient @@ -56,12 +46,10 @@ ENV = { "AZURE_BLOB_STORAGE_ACCOUNT": None, "AZURE_BLOB_STORAGE_ENDPOINT": None, - "AZURE_BLOB_STORAGE_KEY": None, "AZURE_BLOB_STORAGE_CONTAINER": "content", "AZURE_BLOB_STORAGE_UPLOAD_CONTAINER": "upload", "AZURE_SEARCH_SERVICE": "gptkb", "AZURE_SEARCH_SERVICE_ENDPOINT": None, - "AZURE_SEARCH_SERVICE_KEY": None, "AZURE_SEARCH_INDEX": "gptkbindex", "USE_SEMANTIC_RERANKER": "true", "AZURE_OPENAI_SERVICE": "myopenai", @@ -75,7 +63,6 @@ "EMBEDDING_DEPLOYMENT_NAME": "", "AZURE_OPENAI_EMBEDDINGS_MODEL_NAME": "", "AZURE_OPENAI_EMBEDDINGS_VERSION": "", - "AZURE_OPENAI_SERVICE_KEY": None, "AZURE_SUBSCRIPTION_ID": None, "AZURE_ARM_MANAGEMENT_API": "https://management.azure.com", "CHAT_WARNING_BANNER_TEXT": "", @@ -85,7 +72,6 @@ "KB_FIELDS_SOURCEFILE": "file_uri", "KB_FIELDS_CHUNKFILE": "chunk_file", "COSMOSDB_URL": None, - "COSMOSDB_KEY": None, "COSMOSDB_LOG_DATABASE_NAME": "statusdb", "COSMOSDB_LOG_CONTAINER_NAME": "statuscontainer", "QUERY_TERM_LANGUAGE": "English", @@ -93,7 +79,6 @@ "ENRICHMENT_APPSERVICE_URL": "enrichment", "TARGET_TRANSLATION_LANGUAGE": "en", "AZURE_AI_ENDPOINT": None, - "AZURE_AI_KEY": None, "AZURE_AI_LOCATION": "", "BING_SEARCH_ENDPOINT": "https://api.bing.microsoft.com/", "BING_SEARCH_KEY": "", @@ -102,8 +87,9 @@ "ENABLE_UNGROUNDED_CHAT": "false", "ENABLE_MATH_ASSISTANT": "false", "ENABLE_TABULAR_DATA_ASSISTANT": "false", - "ENABLE_MULTIMEDIA": "false", - "MAX_CSV_FILE_SIZE": "7" + "MAX_CSV_FILE_SIZE": "7", + "LOCAL_DEBUG": "false", + "AZURE_AI_CREDENTIAL_DOMAIN": "cognitiveservices.azure.com" } for key, value in ENV.items(): @@ -137,34 +123,38 @@ class StatusResponse(pydantic.BaseModel): else: AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD openai.api_version = "2024-02-01" -# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed, -# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the -# keys for each service -# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) -azure_credential = DefaultAzureCredential(authority=AUTHORITY) +# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI, +# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally) +# Use managed identity when deployed on Azure. +# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude +# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) +if ENV["LOCAL_DEBUG"] == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) # Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead -# openai.api_type = "azure_ad" -# openai_token = azure_credential.get_token("https://cognitiveservices.azure.com/.default") -openai.api_key = ENV["AZURE_OPENAI_SERVICE_KEY"] +openai.api_type = "azure_ad" +token_provider = get_bearer_token_provider(azure_credential, f'https://{ENV["AZURE_AI_CREDENTIAL_DOMAIN"]}/.default') +openai.azure_ad_token_provider = token_provider +#openai.api_key = ENV["AZURE_OPENAI_SERVICE_KEY"] # Setup StatusLog to allow access to CosmosDB for logging statusLog = StatusLog( ENV["COSMOSDB_URL"], - ENV["COSMOSDB_KEY"], + azure_credential, ENV["COSMOSDB_LOG_DATABASE_NAME"], ENV["COSMOSDB_LOG_CONTAINER_NAME"] ) -azure_search_key_credential = AzureKeyCredential(ENV["AZURE_SEARCH_SERVICE_KEY"]) # Set up clients for Cognitive Search and Storage search_client = SearchClient( endpoint=ENV["AZURE_SEARCH_SERVICE_ENDPOINT"], index_name=ENV["AZURE_SEARCH_INDEX"], - credential=azure_search_key_credential, + credential=azure_credential, ) blob_client = BlobServiceClient( account_url=ENV["AZURE_BLOB_STORAGE_ENDPOINT"], - credential=ENV["AZURE_BLOB_STORAGE_KEY"], + credential=azure_credential, ) blob_container = blob_client.get_container_client(ENV["AZURE_BLOB_STORAGE_CONTAINER"]) @@ -202,7 +192,6 @@ class StatusResponse(pydantic.BaseModel): Approaches.ReadRetrieveRead: ChatReadRetrieveReadApproach( search_client, ENV["AZURE_OPENAI_ENDPOINT"], - ENV["AZURE_OPENAI_SERVICE_KEY"], ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"], ENV["KB_FIELDS_SOURCEFILE"], ENV["KB_FIELDS_CONTENT"], @@ -217,8 +206,8 @@ class StatusResponse(pydantic.BaseModel): ENV["ENRICHMENT_APPSERVICE_URL"], ENV["TARGET_TRANSLATION_LANGUAGE"], ENV["AZURE_AI_ENDPOINT"], - ENV["AZURE_AI_KEY"], ENV["AZURE_AI_LOCATION"], + token_provider, str_to_bool.get(ENV["USE_SEMANTIC_RERANKER"]) ), Approaches.ChatWebRetrieveRead: ChatWebRetrieveRead( @@ -227,20 +216,23 @@ class StatusResponse(pydantic.BaseModel): ENV["TARGET_TRANSLATION_LANGUAGE"], ENV["BING_SEARCH_ENDPOINT"], ENV["BING_SEARCH_KEY"], - str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"]) - ), + str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"]), + ENV["AZURE_OPENAI_ENDPOINT"], + token_provider + ), Approaches.CompareWorkWithWeb: CompareWorkWithWeb( model_name, ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"], ENV["TARGET_TRANSLATION_LANGUAGE"], ENV["BING_SEARCH_ENDPOINT"], ENV["BING_SEARCH_KEY"], - str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"]) - ), + str_to_bool.get(ENV["ENABLE_BING_SAFE_SEARCH"]), + ENV["AZURE_OPENAI_ENDPOINT"], + token_provider + ), Approaches.CompareWebWithWork: CompareWebWithWork( search_client, ENV["AZURE_OPENAI_ENDPOINT"], - ENV["AZURE_OPENAI_SERVICE_KEY"], ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"], ENV["KB_FIELDS_SOURCEFILE"], ENV["KB_FIELDS_CONTENT"], @@ -255,13 +247,12 @@ class StatusResponse(pydantic.BaseModel): ENV["ENRICHMENT_APPSERVICE_URL"], ENV["TARGET_TRANSLATION_LANGUAGE"], ENV["AZURE_AI_ENDPOINT"], - ENV["AZURE_AI_KEY"], ENV["AZURE_AI_LOCATION"], + token_provider, str_to_bool.get(ENV["USE_SEMANTIC_RERANKER"]) ), Approaches.GPTDirect: GPTDirectApproach( - ENV["AZURE_OPENAI_SERVICE"], - ENV["AZURE_OPENAI_SERVICE_KEY"], + token_provider, ENV["AZURE_OPENAI_CHATGPT_DEPLOYMENT"], ENV["QUERY_TERM_LANGUAGE"], model_name, @@ -349,23 +340,17 @@ async def get_blob_client_url(): Returns: dict: A dictionary containing the URL with the SAS token. """ - sas_token = generate_account_sas( - ENV["AZURE_BLOB_STORAGE_ACCOUNT"], - ENV["AZURE_BLOB_STORAGE_KEY"], - resource_types=ResourceTypes(object=True, service=True, container=True), - permission=AccountSasPermissions( - read=True, - write=True, - list=True, - delete=False, - add=True, - create=True, - update=True, - process=False, - ), - expiry=datetime.utcnow() + timedelta(hours=1), + # Obtain the user delegation key + user_delegation_key = blob_client.get_user_delegation_key(key_start_time=datetime.utcnow(), key_expiry_time=datetime.utcnow() + timedelta(hours=2)) + + sas_token = generate_container_sas(account_name=blob_client.account_name, + container_name=ENV["AZURE_BLOB_STORAGE_UPLOAD_CONTAINER"], + permission=ContainerSasPermissions(read=True, write=True, delete=False, list=True, tag=True), + user_delegation_key=user_delegation_key, + expiry=datetime.utcnow() + timedelta(hours=2) ) - return {"url": f"{blob_client.url}?{sas_token}"} + + return {"url": f"{blob_client.url}upload?{sas_token}"} @app.post("/getalluploadstatus") async def get_all_upload_status(request: Request): @@ -393,7 +378,7 @@ async def get_all_upload_status(request: Request): # retrieve tags for each file # Initialize an empty list to hold the tags items = [] - cosmos_client = CosmosClient(url=statusLog._url, credential=statusLog._key) + cosmos_client = CosmosClient(url=statusLog._url, credential=azure_credential, consistency_level='Session') database = cosmos_client.get_database_client(statusLog._database_name) container = database.get_container_client(statusLog._container_name) query_string = "SELECT DISTINCT VALUE t FROM c JOIN t IN c.tags" @@ -531,7 +516,7 @@ async def get_tags(request: Request): try: # Initialize an empty list to hold the tags items = [] - cosmos_client = CosmosClient(url=statusLog._url, credential=statusLog._key) + cosmos_client = CosmosClient(url=statusLog._url, credential=azure_credential, consistency_level='Session') database = cosmos_client.get_database_client(statusLog._database_name) container = database.get_container_client(statusLog._container_name) query_string = "SELECT DISTINCT VALUE t FROM c JOIN t IN c.tags" @@ -723,7 +708,7 @@ async def posttd(csv: UploadFile = File(...)): # Process the DataFrame... save_df(df) except Exception as ex: - raise HTTPException(status_code=500, detail=str(ex)) from ex + raise HTTPException(status_code=500, detail=str(ex)) from ex #return {"filename": csv.filename} @@ -756,7 +741,7 @@ async def process_td_agent_response(retries=3, delay=1000, question: Optional[st async def getTdAnalysis(retries=3, delay=1, question: Optional[str] = None): global dffinal if question is None: - raise HTTPException(status_code=400, detail="Question is required") + raise HTTPException(status_code=400, detail="Question is required") for i in range(retries): try: @@ -848,7 +833,7 @@ async def stream_agent_response(question: str): results = process_agent_response(question) except Exception as e: print(f"Error processing agent response: {e}") - raise HTTPException(status_code=500, detail=str(e)) + raise HTTPException(status_code=500, detail=str(e)) from e return results @@ -863,14 +848,12 @@ async def get_feature_flags(): - "ENABLE_UNGROUNDED_CHAT": Flag indicating whether ungrounded chat is enabled. - "ENABLE_MATH_ASSISTANT": Flag indicating whether the math assistant is enabled. - "ENABLE_TABULAR_DATA_ASSISTANT": Flag indicating whether the tabular data assistant is enabled. - - "ENABLE_MULTIMEDIA": Flag indicating whether multimedia is enabled. """ response = { "ENABLE_WEB_CHAT": str_to_bool.get(ENV["ENABLE_WEB_CHAT"]), "ENABLE_UNGROUNDED_CHAT": str_to_bool.get(ENV["ENABLE_UNGROUNDED_CHAT"]), "ENABLE_MATH_ASSISTANT": str_to_bool.get(ENV["ENABLE_MATH_ASSISTANT"]), "ENABLE_TABULAR_DATA_ASSISTANT": str_to_bool.get(ENV["ENABLE_TABULAR_DATA_ASSISTANT"]), - "ENABLE_MULTIMEDIA": str_to_bool.get(ENV["ENABLE_MULTIMEDIA"]), } return response diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 0fc9e777b..ff9633083 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -6,20 +6,18 @@ import logging import urllib.parse from datetime import datetime, timedelta -from typing import Any, AsyncGenerator, Coroutine, Sequence +from typing import Any, Sequence import openai -from openai import AzureOpenAI from openai import AsyncAzureOpenAI from approaches.approach import Approach from azure.search.documents import SearchClient from azure.search.documents.models import RawVectorQuery from azure.search.documents.models import QueryType from azure.storage.blob import ( - AccountSasPermissions, + BlobSasPermissions, BlobServiceClient, - ResourceTypes, - generate_account_sas, + generate_blob_sas, ) from text import nonewlines from core.modelhelper import get_token_limit @@ -88,7 +86,6 @@ def __init__( self, search_client: SearchClient, oai_endpoint: str, - oai_service_key: str, chatgpt_deployment: str, source_file_field: str, content_field: str, @@ -103,10 +100,9 @@ def __init__( enrichment_appservice_uri: str, target_translation_language: str, azure_ai_endpoint:str, - azure_ai_key:str, azure_ai_location:str, + azure_ai_token_provider:str, use_semantic_reranker: bool - ): self.search_client = search_client self.chatgpt_deployment = chatgpt_deployment @@ -122,20 +118,19 @@ def __init__( self.escaped_target_model = re.sub(r'[^a-zA-Z0-9_\-.]', '_', target_embedding_model) self.target_translation_language=target_translation_language self.azure_ai_endpoint=azure_ai_endpoint - self.azure_ai_key=azure_ai_key self.azure_ai_location=azure_ai_location + self.azure_ai_token_provider=azure_ai_token_provider self.oai_endpoint=oai_endpoint self.embedding_service_url = enrichment_appservice_uri self.use_semantic_reranker=use_semantic_reranker openai.api_base = oai_endpoint openai.api_type = 'azure' - openai.api_key = oai_service_key openai.api_version = "2024-02-01" self.client = AsyncAzureOpenAI( - azure_endpoint = openai.api_base, - api_key=openai.api_key, + azure_endpoint = openai.api_base, + azure_ad_token_provider=azure_ai_token_provider, api_version=openai.api_version) @@ -233,10 +228,10 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any] response = requests.post(url, json=data,headers=headers,timeout=60) if response.status_code == 200: response_data = response.json() - embedded_query_vector =response_data.get('data') + embedded_query_vector =response_data.get('data') else: # Generate an error message if the embedding generation fails - log.error(f"Error generating embedding:: {response.status_code}") + log.error(f"Error generating embedding:: {response.status_code} - {response.text}") yield json.dumps({"error": "Error generating embedding"}) + "\n" return # Go no further except Exception as e: @@ -429,7 +424,7 @@ def detect_language(self, text: str) -> str: try: api_detect_endpoint = f"{self.azure_ai_endpoint}language/:analyze-text?api-version=2023-04-01" headers = { - 'Ocp-Apim-Subscription-Key': self.azure_ai_key, + 'Authorization': f'Bearer {self.azure_ai_token_provider()}', 'Content-type': 'application/json', 'Ocp-Apim-Subscription-Region': self.azure_ai_location } @@ -452,7 +447,7 @@ def detect_language(self, text: str) -> str: detected_language = response.json()["results"]["documents"][0]["detectedLanguage"]["iso6391Name"] return detected_language else: - raise Exception(f"Error detecting language: {response.status_code}") + raise Exception(f"Error detecting language: {response.status_code} - {response.text}") except Exception as e: raise Exception(f"An error occurred during language detection: {str(e)}") from e @@ -460,7 +455,7 @@ def translate_response(self, response: str, target_language: str) -> str: """ Function to translate the response to target language""" api_translate_endpoint = f"{self.azure_ai_endpoint}translator/text/v3.0/translate?api-version=3.0" headers = { - 'Ocp-Apim-Subscription-Key': self.azure_ai_key, + 'Authorization': f'Bearer {self.azure_ai_token_provider()}', 'Content-type': 'application/json', 'Ocp-Apim-Subscription-Region': self.azure_ai_location } @@ -479,20 +474,20 @@ def translate_response(self, response: str, target_language: str) -> str: def get_source_file_with_sas(self, source_file: str) -> str: """ Function to return the source file with a SAS token""" try: - sas_token = generate_account_sas( - self.blob_client.account_name, - self.blob_client.credential.account_key, - resource_types=ResourceTypes(object=True, service=True, container=True), - permission=AccountSasPermissions( - read=True, - write=True, - list=True, - delete=False, - add=True, - create=True, - update=True, - process=False, - ), + separator = "/" + file_path_w_name_no_cont = separator.join( + source_file.split(separator)[4:]) + container_name = separator.join( + source_file.split(separator)[3:4]) + # Obtain the user delegation key + user_delegation_key = self.blob_client.get_user_delegation_key(key_start_time=datetime.utcnow(), key_expiry_time=datetime.utcnow() + timedelta(hours=2)) + + sas_token = generate_blob_sas( + account_name=self.blob_client.account_name, + container_name=container_name, + blob_name=file_path_w_name_no_cont, + user_delegation_key=user_delegation_key, + permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) return source_file + "?" + sas_token diff --git a/app/backend/approaches/chatwebretrieveread.py b/app/backend/approaches/chatwebretrieveread.py index 34da368e8..f51f25c18 100644 --- a/app/backend/approaches/chatwebretrieveread.py +++ b/app/backend/approaches/chatwebretrieveread.py @@ -74,7 +74,15 @@ class ChatWebRetrieveRead(Approach): citations = {} approach_class = "" - def __init__(self, model_name: str, chatgpt_deployment: str, query_term_language: str, bing_search_endpoint: str, bing_search_key: str, bing_safe_search: bool): + def __init__(self, model_name: str, + chatgpt_deployment: str, + query_term_language: str, + bing_search_endpoint: str, + bing_search_key: str, + bing_safe_search: bool, + oai_endpoint: str, + azure_ai_token_provider:str + ): self.name = "ChatBingSearch" self.model_name = model_name self.chatgpt_deployment = chatgpt_deployment @@ -84,14 +92,14 @@ def __init__(self, model_name: str, chatgpt_deployment: str, query_term_language self.bing_search_key = bing_search_key self.bing_safe_search = bing_safe_search - # openai.api_base = oai_endpoint + openai.api_base = oai_endpoint openai.api_type = 'azure' openai.api_version = "2024-02-01" self.client = AsyncAzureOpenAI( azure_endpoint = openai.api_base , - api_key=openai.api_key, + azure_ad_token_provider=azure_ai_token_provider, api_version=openai.api_version) diff --git a/app/backend/approaches/comparewebwithwork.py b/app/backend/approaches/comparewebwithwork.py index 685040e2d..9f8698215 100644 --- a/app/backend/approaches/comparewebwithwork.py +++ b/app/backend/approaches/comparewebwithwork.py @@ -46,7 +46,6 @@ def __init__( self, search_client: SearchClient, oai_service_name: str, - oai_service_key: str, chatgpt_deployment: str, source_file_field: str, content_field: str, @@ -61,8 +60,8 @@ def __init__( enrichment_appservice_url: str, target_translation_language: str, azure_ai_endpoint:str, - azure_ai_key:str, azure_ai_location: str, + azure_ai_token_provider: str, use_semantic_reranker: bool ): self.search_client = search_client @@ -78,10 +77,9 @@ def __init__( self.escaped_target_model = re.sub(r'[^a-zA-Z0-9_\-.]', '_', target_embedding_model) self.target_translation_language=target_translation_language self.azure_ai_endpoint=azure_ai_endpoint - self.azure_ai_key=azure_ai_key self.azure_ai_location = azure_ai_location + self.azure_ai_token_provider=azure_ai_token_provider self.oai_service_name = oai_service_name - self.oai_service_key = oai_service_key self.model_name = model_name self.model_version = model_version self.enrichment_appservice_url = enrichment_appservice_url @@ -93,7 +91,7 @@ def __init__( self.client = AsyncAzureOpenAI( azure_endpoint = openai.api_base, - api_key=openai.api_key, + azure_ad_token_provider=azure_ai_token_provider, api_version=openai.api_version) async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any], web_citation_lookup: dict[str, Any], thought_chain: dict[str, Any]) -> Any: @@ -110,7 +108,6 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any] chat_rrr_approach = ChatReadRetrieveReadApproach( self.search_client, self.oai_service_name, - self.oai_service_key, self.chatgpt_deployment, self.source_file_field, self.content_field, @@ -126,7 +123,7 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any] self.target_translation_language, self.azure_ai_endpoint, self.azure_ai_location, - self.azure_ai_key, + self.azure_ai_token_provider, self.use_semantic_reranker ) rrr_response = chat_rrr_approach.run(history, overrides, {}, thought_chain) diff --git a/app/backend/approaches/compareworkwithweb.py b/app/backend/approaches/compareworkwithweb.py index c8ba26267..6268b543c 100644 --- a/app/backend/approaches/compareworkwithweb.py +++ b/app/backend/approaches/compareworkwithweb.py @@ -39,7 +39,15 @@ class CompareWorkWithWeb(Approach): web_citations = {} - def __init__(self, model_name: str, chatgpt_deployment: str, query_term_language: str, bing_search_endpoint: str, bing_search_key: str, bing_safe_search: bool): + def __init__(self, model_name: str, + chatgpt_deployment: str, + query_term_language: str, + bing_search_endpoint: str, + bing_search_key: str, + bing_safe_search: bool, + oai_endpoint: str, + azure_ai_token_provider:str + ): """ Initializes the CompareWorkWithWeb approach. @@ -59,14 +67,16 @@ def __init__(self, model_name: str, chatgpt_deployment: str, query_term_language self.bing_search_endpoint = bing_search_endpoint self.bing_search_key = bing_search_key self.bing_safe_search = bing_safe_search + self.oai_endpoint = oai_endpoint + self.azure_ai_token_provider = azure_ai_token_provider # openai.api_base = oai_endpoint openai.api_type = 'azure' openai.api_version = "2024-02-01" self.client = AsyncAzureOpenAI( - azure_endpoint = openai.api_base, - api_key=openai.api_key, + azure_endpoint = openai.api_base, + azure_ad_token_provider=azure_ai_token_provider, api_version=openai.api_version) async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any], work_citation_lookup: dict[str, Any], thought_chain: dict[str, Any]) -> Any: @@ -81,7 +91,14 @@ async def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any] Any: The result of the comparative analysis. """ # Step 1: Call bing Search Approach for a Bing LLM Response and Citations - chat_bing_search = ChatWebRetrieveRead(self.model_name, self.chatgpt_deployment, self.query_term_language, self.bing_search_endpoint, self.bing_search_key, self.bing_safe_search) + chat_bing_search = ChatWebRetrieveRead(self.model_name, + self.chatgpt_deployment, + self.query_term_language, + self.bing_search_endpoint, + self.bing_search_key, + self.bing_safe_search, + self.oai_endpoint, + self.azure_ai_token_provider) bing_search_response = chat_bing_search.run(history, overrides, {}, thought_chain) content = "" diff --git a/app/backend/approaches/gpt_direct_approach.py b/app/backend/approaches/gpt_direct_approach.py index a2ea18470..bf8fed16f 100644 --- a/app/backend/approaches/gpt_direct_approach.py +++ b/app/backend/approaches/gpt_direct_approach.py @@ -66,8 +66,7 @@ class GPTDirectApproach(Approach): def __init__( self, - oai_service_name: str, - oai_service_key: str, + azure_openai_token_provider: str, chatgpt_deployment: str, query_term_language: str, model_name: str, @@ -79,19 +78,16 @@ def __init__( self.chatgpt_token_limit = get_token_limit(model_name) openai.api_base = azure_openai_endpoint - openai.api_type = 'azure' - openai.api_key = oai_service_key + openai.api_type = "azure_ad" + openai.azure_ad_token_provider = azure_openai_token_provider + openai.api_version = "2024-02-01" self.model_name = model_name self.model_version = model_version - - openai.api_type = 'azure' - openai.api_version = "2024-02-01" - self.client = AsyncAzureOpenAI( azure_endpoint = openai.api_base, - api_key=openai.api_key, + azure_ad_token_provider=azure_openai_token_provider, api_version=openai.api_version) # def run(self, history: list[dict], overrides: dict) -> any: diff --git a/app/backend/approaches/mathassistant.py b/app/backend/approaches/mathassistant.py index 44ebe978d..f03e8e444 100644 --- a/app/backend/approaches/mathassistant.py +++ b/app/backend/approaches/mathassistant.py @@ -3,49 +3,36 @@ #Turn warnings off #from st_pages import Page, show_pages, add_page_title -import warnings -warnings.filterwarnings('ignore') import os -# import openai +import warnings from dotenv import load_dotenv +from langchain_openai import AzureChatOpenAI +from langchain.agents import initialize_agent, load_tools, AgentType +from langchain.prompts import ChatPromptTemplate +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider -#-------------------------------------------------------------------------- -#variables needed for testing -OPENAI_API_TYPE = "azure" -OPENAI_API_VERSION = "2024-02-01" -OPENAI_API_BASE = " " -OPENAI_API_KEY = " " -OPENAI_DEPLOYMENT_NAME = " " -MODEL_NAME = " " -AZURE_OPENAI_ENDPOINT = ' ' -AZURE_OPENAI_SERVICE_KEY = ' ' - -os.environ["OPENAI_API_TYPE"] = OPENAI_API_TYPE -os.environ["OPENAI_API_VERSION"] = OPENAI_API_VERSION - - +warnings.filterwarnings('ignore') load_dotenv() - -azure_openai_chatgpt_deployment = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") - -deployment_name = azure_openai_chatgpt_deployment -OPENAI_DEPLOYMENT_NAME = deployment_name - OPENAI_API_BASE = os.environ.get("AZURE_OPENAI_ENDPOINT") -OPENAI_API_KEY = os.environ.get("AZURE_OPENAI_SERVICE_KEY") -OPENAI_DEPLOYMENT_NAME = azure_openai_chatgpt_deployment +OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") -from langchain_openai import AzureChatOpenAI -from langchain.agents import initialize_agent, load_tools, AgentType -from langchain.prompts import ChatPromptTemplate +if os.environ.get("AZURE_OPENAI_AUTHORITY_HOST") == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD +if os.environ.get("LOCAL_DEBUG") == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) +token_provider = get_bearer_token_provider(azure_credential, f'https://{os.environ.get("AZURE_AI_CREDENTIAL_DOMAIN")}/.default') model = AzureChatOpenAI( - api_key= OPENAI_API_KEY, + azure_ad_token_provider=token_provider, azure_endpoint=OPENAI_API_BASE, - openai_api_version=OPENAI_API_VERSION , - deployment_name=OPENAI_DEPLOYMENT_NAME) + openai_api_version="2024-02-01" , + deployment_name=OPENAI_DEPLOYMENT_NAME) #-------------------------------------------------------------------------------------------------------------------------------------------------- # Addition of custom tools @@ -192,12 +179,7 @@ def process_agent_response( question): #Function to process clues -def generate_response(question): - model = AzureChatOpenAI( - api_key= OPENAI_API_KEY, - azure_endpoint=OPENAI_API_BASE, - openai_api_version=OPENAI_API_VERSION , - deployment_name=OPENAI_DEPLOYMENT_NAME) +def generate_response(question): prompt_template = ChatPromptTemplate.from_template(template=prompt) messages = prompt_template.format_messages( question=question diff --git a/app/backend/approaches/tabulardataassistant.py b/app/backend/approaches/tabulardataassistant.py index a577419e3..12ff446f2 100644 --- a/app/backend/approaches/tabulardataassistant.py +++ b/app/backend/approaches/tabulardataassistant.py @@ -4,53 +4,38 @@ import base64 import os import glob -import re import warnings -from PIL import Image import io -import pandas as pd +import tempfile +from dotenv import load_dotenv +from PIL import Image from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent from langchain.agents.agent_types import AgentType from langchain_openai import AzureChatOpenAI -from langchain.agents import load_tools -import matplotlib.pyplot as plt -import tempfile -warnings.filterwarnings('ignore') -from dotenv import load_dotenv - - - -#-------------------------------------------------------------------------- -#variables needed for testing -OPENAI_API_TYPE = "azure" -OPENAI_API_VERSION = "2024-02-01" -OPENAI_API_BASE = " " -OPENAI_API_KEY = " " -OPENAI_DEPLOYMENT_NAME = " " -MODEL_NAME = " " -AZURE_OPENAI_ENDPOINT = ' ' -AZURE_OPENAI_SERVICE_KEY = ' ' - -os.environ["OPENAI_API_TYPE"] = OPENAI_API_TYPE -os.environ["OPENAI_API_VERSION"] = OPENAI_API_VERSION - +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider +warnings.filterwarnings('ignore') load_dotenv() -#Environment variables when integrated into the app -#_________________________________________________________________________ - - - -azure_openai_chatgpt_deployment = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") -deployment_name = azure_openai_chatgpt_deployment -OPENAI_DEPLOYMENT_NAME = deployment_name OPENAI_API_BASE = os.environ.get("AZURE_OPENAI_ENDPOINT") -OPENAI_API_KEY = os.environ.get("AZURE_OPENAI_SERVICE_KEY") +OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT") +if os.environ.get("AZURE_OPENAI_AUTHORITY_HOST") == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD -# Page title +if os.environ.get("LOCAL_DEBUG") == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) +token_provider = get_bearer_token_provider(azure_credential, f'https://{os.environ.get("AZURE_AI_CREDENTIAL_DOMAIN")}/.default') +model = AzureChatOpenAI( + azure_ad_token_provider=token_provider, + azure_endpoint=OPENAI_API_BASE, + openai_api_version="2024-02-01" , + deployment_name=OPENAI_DEPLOYMENT_NAME) dffinal = None pdagent = None @@ -99,11 +84,6 @@ def save_df(dff): # function to stream agent response def process_agent_scratch_pad(question, df): - chat = AzureChatOpenAI( - api_key= OPENAI_API_KEY, - azure_endpoint=OPENAI_API_BASE, - openai_api_version=OPENAI_API_VERSION , - deployment_name=OPENAI_DEPLOYMENT_NAME) question = save_chart(question) # This agent relies on access to a python repl tool which can execute arbitrary code. @@ -112,7 +92,7 @@ def process_agent_scratch_pad(question, df): # which can lead to data breaches, data loss, or other security incidents. You must opt in # to use this functionality by setting allow_dangerous_code=True. # https://api.python.langchain.com/en/latest/agents/langchain_experimental.agents.agent_toolkits.pandas.base.create_pandas_dataframe_agent.html - pdagent = create_pandas_dataframe_agent(chat, df, verbose=True,agent_type=AgentType.OPENAI_FUNCTIONS,allow_dangerous_code=True , agent_executor_kwargs={"handle_parsing_errors": True}) + pdagent = create_pandas_dataframe_agent(model, df, verbose=True,agent_type=AgentType.OPENAI_FUNCTIONS,allow_dangerous_code=True , agent_executor_kwargs={"handle_parsing_errors": True}) for chunk in pdagent.stream({"input": question}): if "actions" in chunk: for action in chunk["actions"]: @@ -132,15 +112,13 @@ def process_agent_scratch_pad(question, df): #Function to stream final output def process_agent_response(question, df): question = save_chart(question) - - chat = AzureChatOpenAI( - api_key= OPENAI_API_KEY, - azure_endpoint=OPENAI_API_BASE, - openai_api_version=OPENAI_API_VERSION , - deployment_name=OPENAI_DEPLOYMENT_NAME) - - - pdagent = create_pandas_dataframe_agent(chat, df, verbose=True,agent_type=AgentType.OPENAI_FUNCTIONS, allow_dangerous_code=True, agent_executor_kwargs={"handle_parsing_errors": True}) + + pdagent = create_pandas_dataframe_agent(model, + df, + verbose=True, + agent_type=AgentType.OPENAI_FUNCTIONS, + allow_dangerous_code=True, + agent_executor_kwargs={"handle_parsing_errors": True}) for chunk in pdagent.stream({"input": question}): if "output" in chunk: output = f'Final Output: ```{chunk["output"]}```' diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 9c53b74f9..a38c9c79d 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -6,8 +6,8 @@ azure-mgmt-cognitiveservices==13.5.0 openai==1.35.8 # azure-search-documents==11.4.0 azure-search-documents==11.4.0b11 -azure-storage-blob==12.16.0 -azure-cosmos == 4.3.1 +azure-storage-blob==12.20.0 +azure-cosmos == 4.7.0 tiktoken == 0.7.0 fastapi == 0.109.1 fastapi-utils == 0.2.1 diff --git a/app/backend/testsuite.py b/app/backend/testsuite.py index 9a1c56e9c..241aabfa8 100644 --- a/app/backend/testsuite.py +++ b/app/backend/testsuite.py @@ -2,6 +2,7 @@ import re import pytest from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient +from azure.identity import DefaultAzureCredential import os from fastapi.testclient import TestClient from dotenv import load_dotenv @@ -13,6 +14,8 @@ load_dotenv(dotenv_path=f'../../scripts/environments/infrastructure.debug.env') +azure_credentials = DefaultAzureCredential() + from app import app client = TestClient(app) @@ -210,10 +213,10 @@ def test_work_compare_web_chat_api(): assert "Satya" in content or "I am not sure." in content -def test_get_blob_client_url(): - response = client.get("/getblobclienturl") +def test_get_blob_client(): + response = client.get("/getblobclient") assert response.status_code == 200 - assert "blob.core.windows.net" in response.json()["url"] + assert "blob.core.windows.net" in response.json()["client"].url def test_get_all_upload_status(): response = client.post("/getalluploadstatus", json={ @@ -306,16 +309,14 @@ def test_get_feature_flags(): "ENABLE_UNGROUNDED_CHAT": os.getenv("ENABLE_UNGROUNDED_CHAT") == "true", "ENABLE_MATH_ASSISTANT": os.getenv("ENABLE_MATH_ASSISTANT") == "true", "ENABLE_TABULAR_DATA_ASSISTANT": os.getenv("ENABLE_TABULAR_DATA_ASSISTANT") == "true", - "ENABLE_MULTIMEDIA": os.getenv("ENABLE_MULTIMEDIA") == "true", } assert response.json() == expected_response def test_upload_blob(): - account_name = os.getenv("AZURE_BLOB_STORAGE_ACCOUNT") - account_key = os.getenv("AZURE_BLOB_STORAGE_KEY") + storage_account_url=os.getenv("BLOB_STORAGE_ACCOUNT_ENDPOINT") container_name = os.getenv("AZURE_BLOB_STORAGE_UPLOAD_CONTAINER") - blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key) + blob_service_client = BlobServiceClient(account_url=storage_account_url, credential=azure_credentials) # Create a container client container_client = blob_service_client.get_container_client(container_name) diff --git a/app/enrichment/app.py b/app/enrichment/app.py index 6487b96e8..70f1706aa 100644 --- a/app/enrichment/app.py +++ b/app/enrichment/app.py @@ -14,7 +14,7 @@ import random from azure.storage.queue import QueueClient, TextBase64EncodePolicy from azure.search.documents import SearchClient -from azure.core.credentials import AzureKeyCredential +from azure.identity import ManagedIdentityCredential, DefaultAzureCredential, get_bearer_token_provider, AzureAuthorityHosts from data_model import (EmbeddingResponse, ModelInfo, ModelListResponse, StatusResponse) from fastapi import FastAPI, HTTPException @@ -32,32 +32,30 @@ # === ENV Setup === ENV = { - "AZURE_BLOB_STORAGE_KEY": None, "EMBEDDINGS_QUEUE": None, "LOG_LEVEL": "DEBUG", # Will be overwritten by LOG_LEVEL in Environment "DEQUEUE_MESSAGE_BATCH_SIZE": 1, "AZURE_BLOB_STORAGE_ACCOUNT": None, "AZURE_BLOB_STORAGE_CONTAINER": None, "AZURE_BLOB_STORAGE_ENDPOINT": None, + "AZURE_QUEUE_STORAGE_ENDPOINT": None, "AZURE_BLOB_STORAGE_UPLOAD_CONTAINER": None, "COSMOSDB_URL": None, - "COSMOSDB_KEY": None, "COSMOSDB_LOG_DATABASE_NAME": None, "COSMOSDB_LOG_CONTAINER_NAME": None, "MAX_EMBEDDING_REQUEUE_COUNT": 5, "EMBEDDING_REQUEUE_BACKOFF": 60, "AZURE_OPENAI_SERVICE": None, - "AZURE_OPENAI_SERVICE_KEY": None, "AZURE_OPENAI_ENDPOINT": None, "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME": None, "AZURE_SEARCH_INDEX": None, - "AZURE_SEARCH_SERVICE_KEY": None, "AZURE_SEARCH_SERVICE": None, - "BLOB_CONNECTION_STRING": None, "TARGET_EMBEDDINGS_MODEL": None, "EMBEDDING_VECTOR_SIZE": None, "AZURE_SEARCH_SERVICE_ENDPOINT": None, - "AZURE_BLOB_STORAGE_ENDPOINT": None + "LOCAL_DEBUG": "false", + "AZURE_AI_CREDENTIAL_DOMAIN": None, + "AZURE_OPENAI_AUTHORITY_HOST": None } for key, value in ENV.items(): @@ -67,16 +65,33 @@ elif value is None: raise ValueError(f"Environment variable {key} not set") -search_creds = AzureKeyCredential(ENV["AZURE_SEARCH_SERVICE_KEY"]) - openai.api_base = ENV["AZURE_OPENAI_ENDPOINT"] openai.api_type = "azure" -openai.api_key = ENV["AZURE_OPENAI_SERVICE_KEY"] +if ENV["AZURE_OPENAI_AUTHORITY_HOST"] == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD openai.api_version = "2024-02-01" +# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI, +# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally) +# Use managed identity when deployed on Azure. +# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude +# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) +if ENV["LOCAL_DEBUG"] == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) +# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead +openai.api_type = "azure_ad" +token_provider = get_bearer_token_provider(azure_credential, + f'https://{ENV["AZURE_AI_CREDENTIAL_DOMAIN"]}/.default') +openai.azure_ad_token_provider = token_provider +#openai.api_key = ENV["AZURE_OPENAI_SERVICE_KEY"] + client = AzureOpenAI( - azure_endpoint = openai.api_base, - api_key=openai.api_key, + azure_endpoint = openai.api_base, + azure_ad_token_provider=token_provider, api_version=openai.api_version) class AzOAIEmbedding(object): @@ -118,10 +133,10 @@ def encode(self, texts) -> None: utilities_helper = UtilitiesHelper( azure_blob_storage_account=ENV["AZURE_BLOB_STORAGE_ACCOUNT"], azure_blob_storage_endpoint=ENV["AZURE_BLOB_STORAGE_ENDPOINT"], - azure_blob_storage_key=ENV["AZURE_BLOB_STORAGE_KEY"], + credential=azure_credential ) -statusLog = StatusLog(ENV["COSMOSDB_URL"], ENV["COSMOSDB_KEY"], ENV["COSMOSDB_LOG_DATABASE_NAME"], ENV["COSMOSDB_LOG_CONTAINER_NAME"]) +statusLog = StatusLog(ENV["COSMOSDB_URL"], azure_credential, ENV["COSMOSDB_LOG_DATABASE_NAME"], ENV["COSMOSDB_LOG_CONTAINER_NAME"]) # === API Setup === start_time = datetime.now() @@ -256,7 +271,7 @@ def index_sections(chunks): """ search_client = SearchClient(endpoint=ENV["AZURE_SEARCH_SERVICE_ENDPOINT"], index_name=ENV["AZURE_SEARCH_INDEX"], - credential=search_creds) + credential=azure_credential) results = search_client.upload_documents(documents=chunks) succeeded = sum([1 for r in results if r.succeeded]) @@ -279,16 +294,13 @@ def get_tags(blob_path): # Remove the container prefix path_parts = blob_path.split('/') blob_path = '/'.join(path_parts[1:]) - - blob_service_client = BlobServiceClient.from_connection_string(ENV["BLOB_CONNECTION_STRING"]) - # container_client = blob_service_client.get_container_client(ENV["AZURE_BLOB_STORAGE_CONTAINER"]) + + blob_service_client = BlobServiceClient(ENV["AZURE_BLOB_STORAGE_ENDPOINT"], + credential=azure_credential) blob_client = blob_service_client.get_blob_client( container=ENV["AZURE_BLOB_STORAGE_UPLOAD_CONTAINER"], blob=blob_path) - - # blob_client = container_client.get_blob_client( - # blob_client = container_client.get_blob_client(container_client=container_client, blob=blob_path) blob_properties = blob_client.get_blob_properties() tags = blob_properties.metadata.get("tags") if tags != '' and tags is not None: @@ -308,9 +320,9 @@ def poll_queue() -> None: log.debug("Skipping poll_queue call, models not yet loaded") return - queue_client = QueueClient.from_connection_string( - conn_str=ENV["BLOB_CONNECTION_STRING"], queue_name=ENV["EMBEDDINGS_QUEUE"] - ) + queue_client = QueueClient(account_url=ENV["AZURE_QUEUE_STORAGE_ENDPOINT"], + queue_name=ENV["EMBEDDINGS_QUEUE"], + credential=azure_credential) log.debug("Polling embeddings queue for messages...") response = queue_client.receive_messages(max_messages=int(ENV["DEQUEUE_MESSAGE_BATCH_SIZE"])) @@ -336,7 +348,8 @@ def poll_queue() -> None: statusLog.upsert_document(blob_path, f'Embeddings process started with model {target_embeddings_model}', StatusClassification.INFO, State.PROCESSING) file_name, file_extension, file_directory = utilities_helper.get_filename_and_extension(blob_path) chunk_folder_path = file_directory + file_name + file_extension - blob_service_client = BlobServiceClient.from_connection_string(ENV["BLOB_CONNECTION_STRING"]) + blob_service_client = BlobServiceClient(ENV["AZURE_BLOB_STORAGE_ENDPOINT"], + credential=azure_credential) container_client = blob_service_client.get_container_client(ENV["AZURE_BLOB_STORAGE_CONTAINER"]) index_chunks = [] @@ -432,10 +445,10 @@ def poll_queue() -> None: if requeue_count <= int(ENV["MAX_EMBEDDING_REQUEUE_COUNT"]): message_json['embeddings_queued_count'] = requeue_count # Requeue with a random backoff within limits - queue_client = QueueClient.from_connection_string( - ENV["BLOB_CONNECTION_STRING"], - ENV["EMBEDDINGS_QUEUE"], - message_encode_policy=TextBase64EncodePolicy()) + queue_client = QueueClient(account_url=ENV["AZURE_QUEUE_STORAGE_ENDPOINT"], + queue_name=ENV["EMBEDDINGS_QUEUE"], + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) message_string = json.dumps(message_json) max_seconds = int(ENV["EMBEDDING_REQUEUE_BACKOFF"]) * (requeue_count**2) backoff = random.randint( diff --git a/app/enrichment/requirements.txt b/app/enrichment/requirements.txt index 3cffb9cb5..e5587e720 100644 --- a/app/enrichment/requirements.txt +++ b/app/enrichment/requirements.txt @@ -10,7 +10,8 @@ uvicorn == 0.23.2 azure-storage-queue == 12.6.0 azure-storage-blob==12.16.0 azure.search.documents==11.4.0b11 -azure-cosmos == 4.3.1 -azure-core == 1.26.4 +azure-cosmos == 4.7.0 +azure-core == 1.30.2 +azure-identity==1.16.1 tenacity == 8.2.3 openai==1.17.0 diff --git a/app/frontend/package.json b/app/frontend/package.json index 3987602b0..f1df48085 100644 --- a/app/frontend/package.json +++ b/app/frontend/package.json @@ -9,7 +9,7 @@ "watch": "tsc && vite build --watch" }, "dependencies": { - "@azure/storage-blob": "^12.13.0", + "@azure/storage-blob": "^12.24.0", "@fluentui/react": "^8.110.7", "@fluentui/react-icons": "^2.0.195", "@react-spring/web": "^9.7.1", @@ -43,6 +43,6 @@ "prettier": "^2.8.3", "typescript": "^4.9.3", "vite": "^5.0.10", - "vite-plugin-node-polyfills": "^0.22.0" + "vite-plugin-node-polyfills": "^0.2.0" } } diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index 34a13191e..a518dce30 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -214,6 +214,5 @@ export type GetFeatureFlagsResponse = { ENABLE_UNGROUNDED_CHAT: boolean; ENABLE_MATH_ASSISTANT: boolean; ENABLE_TABULAR_DATA_ASSISTANT: boolean; - ENABLE_MULTIMEDIA: boolean; error?: string; } \ No newline at end of file diff --git a/app/frontend/src/components/FolderPicker/FolderPicker.tsx b/app/frontend/src/components/FolderPicker/FolderPicker.tsx index 1fff39175..c392e609e 100644 --- a/app/frontend/src/components/FolderPicker/FolderPicker.tsx +++ b/app/frontend/src/components/FolderPicker/FolderPicker.tsx @@ -18,7 +18,7 @@ import { ITextFieldStyleProps, ITextFieldStyles, TextField } from '@fluentui/rea import { ILabelStyles, ILabelStyleProps } from '@fluentui/react/lib/Label'; import { IIconProps } from '@fluentui/react'; import { IButtonProps } from '@fluentui/react/lib/Button'; -import { BlobServiceClient } from "@azure/storage-blob"; +import { ContainerClient } from "@azure/storage-blob"; import { getBlobClientUrl } from "../../api"; import styles from "./FolderPicker.module.css"; @@ -86,8 +86,7 @@ export const FolderPicker = ({allowFolderCreation, onSelectedKeyChange, preSelec async function fetchBlobFolderData() { try { const blobClientUrl = await getBlobClientUrl(); - const blobServiceClient = new BlobServiceClient(blobClientUrl); - var containerClient = blobServiceClient.getContainerClient("upload"); + var containerClient = new ContainerClient(blobClientUrl); const delimiter = "/"; const prefix = ""; var newOptions: IComboBoxOption[] = allowNewFolders ? [] : [ diff --git a/app/frontend/src/components/filepicker/file-picker.tsx b/app/frontend/src/components/filepicker/file-picker.tsx index d4ca1bf05..596377b55 100644 --- a/app/frontend/src/components/filepicker/file-picker.tsx +++ b/app/frontend/src/components/filepicker/file-picker.tsx @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -import { BlobServiceClient } from "@azure/storage-blob"; +import { ContainerClient } from "@azure/storage-blob"; import classNames from "classnames"; import { nanoid } from "nanoid"; import { useCallback, useEffect, useMemo, useState } from "react"; @@ -51,32 +51,33 @@ const FilePicker = ({folderPath, tags}: Props) => { // create an instance of the BlobServiceClient const blobClientUrl = await getBlobClientUrl(); - const blobServiceClient = new BlobServiceClient(blobClientUrl); - - const containerClient = blobServiceClient.getContainerClient("upload"); + + const containerClient = new ContainerClient(blobClientUrl); var counter = 1; files.forEach(async (indexedFile: any) => { // add each file into Azure Blob Storage var file = indexedFile.file as File; var filePath = (folderPath == "") ? file.name : folderPath + "/" + file.name; - const blobClient = containerClient.getBlockBlobClient(filePath); // set mimetype as determined from browser with file upload control const options = { blobHTTPHeaders: { blobContentType: file.type }, metadata: { tags: tags.map(encodeURIComponent).join(",") } }; - - // upload file - blobClient.uploadData(file, options); - //write status to log - var logEntry: StatusLogEntry = { - path: "upload/"+filePath, - status: "File uploaded from browser to Azure Blob Storage", - status_classification: StatusLogClassification.Info, - state: StatusLogState.Uploaded + try { + // upload file + await containerClient.uploadBlockBlob(filePath, file, file.size, options) + //write status to log + var logEntry: StatusLogEntry = { + path: "upload/"+filePath, + status: "File uploaded from browser to Azure Blob Storage", + status_classification: StatusLogClassification.Info, + state: StatusLogState.Uploaded + } + await logStatus(logEntry); + } + catch (error) { + console.log("Unable to upload file"+filePath+" : Error: "+error); } - await logStatus(logEntry); - setProgress((counter/files.length) * 100); counter++; }); diff --git a/app/frontend/vite.config.ts b/app/frontend/vite.config.ts index abd3b6854..45a21c59c 100644 --- a/app/frontend/vite.config.ts +++ b/app/frontend/vite.config.ts @@ -2,6 +2,7 @@ import { defineConfig } from "vite"; import react from "@vitejs/plugin-react"; import postcssNesting from 'postcss-nesting'; import { nodePolyfills } from 'vite-plugin-node-polyfills' +import rollupNodePolyFill from 'rollup-plugin-node-polyfills' // https://vitejs.dev/config/ export default defineConfig({ @@ -9,7 +10,12 @@ export default defineConfig({ build: { outDir: "../backend/static", emptyOutDir: true, - sourcemap: true + sourcemap: true, + rollupOptions: { + plugins: [ + rollupNodePolyFill() + ] + } }, server: { proxy: { @@ -23,5 +29,11 @@ export default defineConfig({ postcssNesting ], }, + }, + resolve: { + alias: { + buffer: 'rollup-plugin-node-polyfills/polyfills/buffer-es6', + process: 'rollup-plugin-node-polyfills/polyfills/process-es6' + } } }); diff --git a/docs/deployment/deployment.md b/docs/deployment/deployment.md index 9906e2921..8a39f8057 100644 --- a/docs/deployment/deployment.md +++ b/docs/deployment/deployment.md @@ -50,8 +50,6 @@ Variable | Required | Description --- | --- | --- LOCATION | Yes | The location (West Europe is the default). The Terraform templates use this value. To get a list of all the current Azure regions you can run `az account list-locations -o table`. The value here needs to be the *Name* value and not *Display Name*. WORKSPACE | Yes | The workspace name (use something simple and unique to you). This will appended to infoasst-????? as the name of the resource group created in your subscription. -SUBSCRIPTION_ID | Yes | The GUID that represents the Azure Subscription you want the Accelerator to be deployed into. This can be obtained from the *Subscription* blade in the Azure Portal. -TENANT_ID | Yes | The GUID that represents the Azure Active Directory Tenant for the Subscription you want the accelerator to be deployed into. This can be obtained from the *Tenant Info* blade in the Azure Portal. AZURE_ENVIRONMENT | Yes | This will determine the Azure cloud environment the deployment will target. Information Assistant currently supports, AzureCloud and AzureUSGovernment. Info available at [Azure cloud environments](https://docs.microsoft.com/en-us/cli/azure/manage-clouds-azure-cli?toc=/cli/azure/toc.json&bc=/cli/azure/breadcrumb/toc.json). If you are targeting "AzureUSGovernment" please see our [sovereign deployment support documentation](/docs/deployment/enable_sovereign_deployment.md). SECURE_MODE | Yes | Defaults to `false`. This feature flag will determine if the Information Assistant deploys it's Azure Infrastructure in a secure mode or not.
:warning: Before enabling secure mode please read the extra instructions on [Enabling Secure Deployment](/docs/secure_deployment/secure_deployment.md) ENABLE_WEB_CHAT | Yes | Defaults to `false`. This feature flag will enable the ability to use Web Search results as a data source for generating answers from the LLM. This feature will also deploy a Bing v7 Search instance in Azure to retrieve web results from, however Bing v7 Search is not available in AzureUSGovernment regions, so this feature flag is **NOT** compatible with `AZURE_ENVIRONMENT=AzureUSGovernment`. @@ -67,7 +65,6 @@ SKIP_PLAN_CHECK | No | If this value is set to 1, then the Terraform deployment USE_EXISTING_AOAI | Yes | Defaults to false. Set this value to "true" if you want to use an existing Azure Open AI service instance in your subscription. This can be useful when there are limits to the number of AOAI instances you can have in one subscription. When the value is set to "false" and Terraform will create a new Azure Open AI service instance in your resource group. AZURE_OPENAI_RESOURCE_GROUP | No | If you have set **USE_EXISTING_AOAI** to "true" then use this parameter to provide the name of the resource group that hosts the Azure Open AI service instance in your subscription. AZURE_OPENAI_SERVICE_NAME | No | If you have set **USE_EXISTING_AOAI** to "true" then use this parameter to provide the name of the Azure Open AI service instance in your subscription. -AZURE_OPENAI_SERVICE_KEY | No | If you have set **USE_EXISTING_AOAI** to "true" then use this parameter to provide the Key for the Azure Open AI service instance in your subscription. AZURE_OPENAI_CHATGPT_DEPLOYMENT | No | If you have set **USE_EXISTING_AOAI** to "true" then use this parameter to provide the name of a deployment of the "gpt-35-turbo" model in the Azure Open AI service instance in your subscription. USE_AZURE_OPENAI_EMBEDDINGS | Yes | Defaults to "true". When set to "true" this value indicates to Information Assistant to use Azure OpenAI models for embedding text values. If set to "false", Information Assistant will use the open source language model that is provided in the values below. AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME| No | If you have set **USE_AZURE_OPENAI_EMBEDDINGS** to "true" then use this parameter to provide the name of a deployment of the "text-embedding-ada-002" model in the Azure Open AI service instance in your subscription. diff --git a/docs/deployment/setting_up_sandbox_environment.md b/docs/deployment/setting_up_sandbox_environment.md index a57522c6b..895cd989e 100644 --- a/docs/deployment/setting_up_sandbox_environment.md +++ b/docs/deployment/setting_up_sandbox_environment.md @@ -49,7 +49,7 @@ To set up an Azure DevOps CI/CD pipeline for deploying code from a GitHub reposi SUBSCRIPTION_ID | The ID of the subscription that should be deployed to. TENANT_ID | The ID of the tenant that should be deployed to. CONTAINER_REGISTRY_ADDRESS | Azure Container Registry where the Info Assistant development container will be cached during pipeline runs - AZURE_OPENAI_SERVICE_NAME
AZURE_OPENAI_SERVICE_KEY
AZURE_OPENAI_CHATGPT_DEPLOYMENT
AZURE_OPENAI_GPT_DEPLOYMENT | It is recommended to point the pipeline to an existing installation of Azure OpenAI. These values will be used to target that instance. + AZURE_OPENAI_SERVICE_NAME
AZURE_OPENAI_CHATGPT_DEPLOYMENT
AZURE_OPENAI_GPT_DEPLOYMENT | It is recommended to point the pipeline to an existing installation of Azure OpenAI. These values will be used to target that instance. environment | The environment name that matches an environment variable file located in `./scripts/environments`. For example if the pipeline parameter is set to "demo" there needs to be a corresponding file at `/scripts/environment/demo.env` TF_BACKEND_ACCESS_KEY | Terraform is used to create Infrastructure as Code. This is the key to the Terraform State in a Storage Account. TF_BACKEND_CONTAINER | Terraform is used to create Infrastructure as Code. This is the container that the Terraform State is stored within a Storage Account. diff --git a/docs/function_debug.md b/docs/function_debug.md index 30491540b..df8336b80 100644 --- a/docs/function_debug.md +++ b/docs/function_debug.md @@ -20,16 +20,12 @@ Next you will need to create local configuration values that are used by the fun "BLOB_STORAGE_ACCOUNT_LOG_CONTAINER_NAME": "logs", "COSMOSDB_LOG_CONTAINER_NAME": "statuscontainer", "COSMOSDB_LOG_DATABASE_NAME": "statusdb", - "COSMOSDB_KEY": "", "COSMOSDB_URL": "", "AZURE_FORM_RECOGNIZER_ENDPOINT": "", - "AZURE_FORM_RECOGNIZER_KEY": "", "BLOB_STORAGE_ACCOUNT": "", "BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME": "content", - "AZURE_BLOB_STORAGE_KEY": "", "CHUNK_TARGET_SIZE": "750", "FR_API_VERSION": "2023-02-28-preview", - "BLOB_CONNECTION_STRING": "", "CHUNK_TARGET_SIZE": "750", "FR_API_VERSION": "2022-08-31", "MAX_POLLING_REQUEUE_COUNT": "10", diff --git a/docs/process_flow_chat.drawio.png b/docs/process_flow_chat.drawio.png index 5e30eec4d..4bd9aaf25 100644 Binary files a/docs/process_flow_chat.drawio.png and b/docs/process_flow_chat.drawio.png differ diff --git a/docs/process_flow_chat.png b/docs/process_flow_chat.png index ac2916f46..dcfe17eb2 100644 Binary files a/docs/process_flow_chat.png and b/docs/process_flow_chat.png differ diff --git a/docs/secure_deployment/secure_deployment.md b/docs/secure_deployment/secure_deployment.md index b028c4bb3..90fb4e557 100644 --- a/docs/secure_deployment/secure_deployment.md +++ b/docs/secure_deployment/secure_deployment.md @@ -8,7 +8,6 @@ > * Using an existing Azure OpenAI Services > * Web chat (secure endpoints for Bing API services are not yet available) > * SharePoint connector (secure endpoints for Azure Logic Apps and SharePoint connector for Logic Apps are not yet available) -> * Multimedia (secure endpoints for Azure Video Indexer services are not yet available) > >Secure mode requires a DDOS Protection Plan for Virtual Network Protection. There is a limit of 1 DDOS protection plan for a subscription in a region. You can reuse an existing DDOS plan in your tenant or Info Assistant can deploy one for you. > @@ -138,7 +137,6 @@ To perform a secure deployment, follow these steps: export ENABLE_WEB_CHAT=false export USE_EXISTING_AOAI=false export ENABLE_SHAREPOINT_CONNECTOR=false - export ENABLE_MULTIMEDIA=false ``` *Note: Secure mode is blocked when using an existing Azure OpenAI service. We have blocked this scenario to prevent updating a shared instance of Azure OpenAI that may be in use by other workloads* diff --git a/docs/status_log.md b/docs/status_log.md index 4b43924dd..d384c0ff7 100644 --- a/docs/status_log.md +++ b/docs/status_log.md @@ -10,7 +10,7 @@ Currently the status logger provides a class, StatusLog, with the following func - **encode_document_id** - this function is used to generate the id from the file name by the upsert_document function initially. It can also be called to retrieve the encoded id of a file if you pass in the file name. The id is used as the partition key. - **read_documents** - This function returns status documents from Cosmos DB for you to use. You can specify optional query parameters, such as document id (the document path) or an integer representing how many minutes from now the processing should have started, or if you wish to receive verbose or concise details. -Finally you will need to supply 4 properties to the class before you can call the above functions. These are COSMOSDB_URL, COSMOSDB_KEY, COSMOSDB_LOG_DATABASE_NAME and COSMOSDB_LOG_CONTAINER_NAME. The resulting json includes verbose status updates but also a snapshot status for the end user UI, specifically the state, state_description and state_timestamp. These values are just select high level state snapshots, including 'Processing', 'Error' and 'Complete'. +Finally you will need to supply 4 properties to the class before you can call the above functions. These are COSMOSDB_URL, azure_credential, COSMOSDB_LOG_DATABASE_NAME and COSMOSDB_LOG_CONTAINER_NAME. The resulting json includes verbose status updates but also a snapshot status for the end user UI, specifically the state, state_description and state_timestamp. These values are just select high level state snapshots, including 'Processing', 'Error' and 'Complete'. ````json { diff --git a/functions/FileDeletion/__init__.py b/functions/FileDeletion/__init__.py index 1f7194914..6c654cea5 100644 --- a/functions/FileDeletion/__init__.py +++ b/functions/FileDeletion/__init__.py @@ -6,26 +6,42 @@ from datetime import datetime, timezone from itertools import islice import azure.functions as func -from azure.core.credentials import AzureKeyCredential from azure.search.documents import SearchClient from azure.storage.blob import BlobServiceClient +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider from shared_code.status_log import State, StatusClassification, StatusLog -blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] +azure_blob_storage_endpoint = os.environ["BLOB_STORAGE_ACCOUNT_ENDPOINT"] blob_storage_account_upload_container_name = os.environ[ "BLOB_STORAGE_ACCOUNT_UPLOAD_CONTAINER_NAME"] blob_storage_account_output_container_name = os.environ[ "BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME"] azure_search_service_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"] azure_search_index = os.environ["AZURE_SEARCH_INDEX"] -azure_search_service_key = os.environ["AZURE_SEARCH_SERVICE_KEY"] cosmosdb_url = os.environ["COSMOSDB_URL"] -cosmosdb_key = os.environ["COSMOSDB_KEY"] cosmosdb_log_database_name = os.environ["COSMOSDB_LOG_DATABASE_NAME"] cosmosdb_log_container_name = os.environ["COSMOSDB_LOG_CONTAINER_NAME"] +local_debug = os.environ["LOCAL_DEBUG"] or "false" +azure_ai_credential_domain = os.environ["AZURE_AI_CREDENTIAL_DOMAIN"] +azure_openai_authority_host = os.environ["AZURE_OPENAI_AUTHORITY_HOST"] + +if azure_openai_authority_host == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD + +# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI, +# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally) +# Use managed identity when deployed on Azure. +# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude +# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) +if local_debug == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) status_log = StatusLog(cosmosdb_url, - cosmosdb_key, + azure_credential, cosmosdb_log_database_name, cosmosdb_log_container_name) @@ -55,23 +71,6 @@ def get_deleted_blobs(blob_service_client: BlobServiceClient) -> list: deleted_blobs.append(blob.name) return deleted_blobs - -def purge_soft_deleted_blob(blob_service_client: BlobServiceClient) -> list: - '''Creates and returns a list of file paths that are soft-deleted.''' - # Create Uploaded Container Client and list all blobs, including deleted blobs - upload_container_client = blob_service_client.get_container_client( - blob_storage_account_upload_container_name) - temp_list = upload_container_client.list_blobs(include="deleted") - - deleted_blobs = [] - # Pull out the soft-deleted blob names - for blob in temp_list: - if blob.deleted: - logging.debug("\t Deleted Blob name: %s", blob.name) - deleted_blobs.append(blob.name) - return deleted_blobs - - def delete_content_blobs(blob_service_client: BlobServiceClient, deleted_blob: str) -> dict: '''Deletes blobs in the content container that correspond to a given soft-deleted blob from the upload container. Returns a list of deleted @@ -99,7 +98,7 @@ def delete_search_entries(deleted_content_blobs: dict) -> None: Search index.''' search_client = SearchClient(azure_search_service_endpoint, azure_search_index, - AzureKeyCredential(azure_search_service_key)) + azure_credential) search_id_list_to_delete = [] for file_path in deleted_content_blobs.keys(): @@ -131,7 +130,7 @@ def main(mytimer: func.TimerRequest) -> None: logging.info('Python timer trigger function ran at %s', utc_timestamp) # Create Blob Service Client - blob_service_client = BlobServiceClient.from_connection_string(blob_connection_string) + blob_service_client = BlobServiceClient(account_url=azure_blob_storage_endpoint, credential=azure_credential) deleted_blobs = get_deleted_blobs(blob_service_client) diff --git a/functions/FileFormRecPollingPDF/__init__.py b/functions/FileFormRecPollingPDF/__init__.py index 6e041df6e..43b30ff91 100644 --- a/functions/FileFormRecPollingPDF/__init__.py +++ b/functions/FileFormRecPollingPDF/__init__.py @@ -1,18 +1,18 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -import azure.functions as func -from azure.storage.queue import QueueClient, TextBase64EncodePolicy import logging import os import json -import requests -from azure.storage.queue import QueueClient, TextBase64EncodePolicy -from shared_code.status_log import StatusLog, State, StatusClassification -from shared_code.utilities import Utilities, MediaType import random from collections import namedtuple import time +import azure.functions as func +from azure.storage.queue import QueueClient, TextBase64EncodePolicy +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider +import requests +from shared_code.status_log import StatusLog, State, StatusClassification +from shared_code.utilities import Utilities, MediaType from requests.exceptions import RequestException from tenacity import retry, stop_after_attempt, wait_fixed @@ -21,19 +21,16 @@ def string_to_bool(s): azure_blob_storage_account = os.environ["BLOB_STORAGE_ACCOUNT"] azure_blob_storage_endpoint = os.environ["BLOB_STORAGE_ACCOUNT_ENDPOINT"] +azure_queue_storage_endpoint = os.environ["AZURE_QUEUE_STORAGE_ENDPOINT"] azure_blob_drop_storage_container = os.environ["BLOB_STORAGE_ACCOUNT_UPLOAD_CONTAINER_NAME"] azure_blob_content_storage_container = os.environ["BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME"] -azure_blob_storage_key = os.environ["AZURE_BLOB_STORAGE_KEY"] -azure_blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] azure_blob_log_storage_container = os.environ["BLOB_STORAGE_ACCOUNT_LOG_CONTAINER_NAME"] CHUNK_TARGET_SIZE = int(os.environ["CHUNK_TARGET_SIZE"]) FR_API_VERSION = os.environ["FR_API_VERSION"] # ALL or Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or # ranges of pages you want to get in the result. For a range of pages, use a hyphen, like pages="1-3, 5-6". # Separate each page number or range with a comma. -azure_blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] cosmosdb_url = os.environ["COSMOSDB_URL"] -cosmosdb_key = os.environ["COSMOSDB_KEY"] cosmosdb_log_database_name = os.environ["COSMOSDB_LOG_DATABASE_NAME"] cosmosdb_log_container_name = os.environ["COSMOSDB_LOG_CONTAINER_NAME"] non_pdf_submit_queue = os.environ["NON_PDF_SUBMIT_QUEUE"] @@ -41,7 +38,6 @@ def string_to_bool(s): pdf_submit_queue = os.environ["PDF_SUBMIT_QUEUE"] text_enrichment_queue = os.environ["TEXT_ENRICHMENT_QUEUE"] endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] -FR_key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] api_version = os.environ["FR_API_VERSION"] max_submit_requeue_count = int(os.environ["MAX_SUBMIT_REQUEUE_COUNT"]) max_polling_requeue_count = int(os.environ["MAX_POLLING_REQUEUE_COUNT"]) @@ -49,18 +45,35 @@ def string_to_bool(s): polling_backoff = int(os.environ["POLLING_BACKOFF"]) max_read_attempts = int(os.environ["MAX_READ_ATTEMPTS"]) enableDevCode = string_to_bool(os.environ["ENABLE_DEV_CODE"]) +local_debug = os.environ["LOCAL_DEBUG"] +azure_ai_credential_domain = os.environ["AZURE_AI_CREDENTIAL_DOMAIN"] +azure_openai_authority_host = os.environ["AZURE_OPENAI_AUTHORITY_HOST"] function_name = "FileFormRecPollingPDF" -utilities = Utilities(azure_blob_storage_account, azure_blob_storage_endpoint, azure_blob_drop_storage_container, azure_blob_content_storage_container, azure_blob_storage_key) FR_MODEL = "prebuilt-layout" +if azure_openai_authority_host == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD +# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI, +# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally) +# Use managed identity when deployed on Azure. +# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude +# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) +if local_debug == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) +token_provider = get_bearer_token_provider(azure_credential, f'https://{os.environ["AZURE_AI_CREDENTIAL_DOMAIN"]}/.default') +utilities = Utilities(azure_blob_storage_account, azure_blob_storage_endpoint, azure_blob_drop_storage_container, azure_blob_content_storage_container, azure_credential) def main(msg: func.QueueMessage) -> None: try: - statusLog = StatusLog(cosmosdb_url, cosmosdb_key, cosmosdb_log_database_name, cosmosdb_log_container_name) + statusLog = StatusLog(cosmosdb_url, azure_credential, cosmosdb_log_database_name, cosmosdb_log_container_name) # Receive message from the queue message_body = msg.get_body().decode('utf-8') message_json = json.loads(message_body) @@ -74,7 +87,8 @@ def main(msg: func.QueueMessage) -> None: # Construct and submmit the polling message to FR headers = { - 'Ocp-Apim-Subscription-Key': FR_key + "Content-Type": "application/json", + 'Authorization': f'Bearer {token_provider()}' } params = { @@ -105,7 +119,10 @@ def main(msg: func.QueueMessage) -> None: statusLog.upsert_document(blob_name, f'{function_name} - Chunking complete, {chunk_count} chunks created.', StatusClassification.DEBUG) # submit message to the enrichment queue to continue processing - queue_client = QueueClient.from_connection_string(azure_blob_connection_string, queue_name=text_enrichment_queue, message_encode_policy=TextBase64EncodePolicy()) + queue_client = QueueClient(account_url=azure_queue_storage_endpoint, + queue_name=text_enrichment_queue, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) message_json["text_enrichment_queued_count"] = 1 message_string = json.dumps(message_json) queue_client.send_message(message_string) @@ -119,7 +136,10 @@ def main(msg: func.QueueMessage) -> None: queued_count += 1 message_json['polling_queue_count'] = queued_count statusLog.upsert_document(blob_name, f"{function_name} - FR has not completed processing, requeuing. Polling back off of attempt {queued_count} of {max_polling_requeue_count} for {backoff} seconds", StatusClassification.DEBUG, State.QUEUED) - queue_client = QueueClient.from_connection_string(azure_blob_connection_string, queue_name=pdf_polling_queue, message_encode_policy=TextBase64EncodePolicy()) + queue_client = QueueClient(account_url=azure_queue_storage_endpoint, + queue_name=pdf_polling_queue, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) message_json_str = json.dumps(message_json) queue_client.send_message(message_json_str, visibility_timeout=backoff) else: @@ -128,7 +148,10 @@ def main(msg: func.QueueMessage) -> None: # unexpected status returned by FR, such as internal capacity overload, so requeue if submit_queued_count < max_submit_requeue_count: statusLog.upsert_document(blob_name, f'{function_name} - unhandled response from Form Recognizer- code: {response.status_code} status: {response_status} - text: {response.text}. Document will be resubmitted', StatusClassification.ERROR) - queue_client = QueueClient.from_connection_string(azure_blob_connection_string, pdf_submit_queue, message_encode_policy=TextBase64EncodePolicy()) + queue_client = QueueClient(account_url=azure_queue_storage_endpoint, + queue_name=pdf_submit_queue, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) submit_queued_count += 1 message_json["submit_queued_count"] = submit_queued_count message_string = json.dumps(message_json) diff --git a/functions/FileFormRecPollingPDF/function.json b/functions/FileFormRecPollingPDF/function.json index 8bbc1e5d6..a81487751 100644 --- a/functions/FileFormRecPollingPDF/function.json +++ b/functions/FileFormRecPollingPDF/function.json @@ -6,7 +6,7 @@ "type": "queueTrigger", "direction": "in", "queueName": "pdf-polling-queue", - "connection": "BLOB_CONNECTION_STRING" + "connection": "AzureStorageConnection1" } ] } \ No newline at end of file diff --git a/functions/FileFormRecSubmissionPDF/__init__.py b/functions/FileFormRecSubmissionPDF/__init__.py index 0e7d739a3..f776ec731 100644 --- a/functions/FileFormRecSubmissionPDF/__init__.py +++ b/functions/FileFormRecSubmissionPDF/__init__.py @@ -8,43 +8,60 @@ import azure.functions as func import requests from azure.storage.queue import QueueClient, TextBase64EncodePolicy +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider from shared_code.status_log import State, StatusClassification, StatusLog from shared_code.utilities import Utilities azure_blob_storage_account = os.environ["BLOB_STORAGE_ACCOUNT"] azure_blob_storage_endpoint = os.environ["BLOB_STORAGE_ACCOUNT_ENDPOINT"] +azure_queue_storage_endpoint = os.environ["AZURE_QUEUE_STORAGE_ENDPOINT"] azure_blob_drop_storage_container = os.environ[ "BLOB_STORAGE_ACCOUNT_UPLOAD_CONTAINER_NAME" ] azure_blob_content_storage_container = os.environ[ "BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME" ] -azure_blob_storage_key = os.environ["AZURE_BLOB_STORAGE_KEY"] -azure_blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] cosmosdb_url = os.environ["COSMOSDB_URL"] -cosmosdb_key = os.environ["COSMOSDB_KEY"] cosmosdb_log_database_name = os.environ["COSMOSDB_LOG_DATABASE_NAME"] cosmosdb_log_container_name = os.environ["COSMOSDB_LOG_CONTAINER_NAME"] pdf_polling_queue = os.environ["PDF_POLLING_QUEUE"] pdf_submit_queue = os.environ["PDF_SUBMIT_QUEUE"] endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] -FR_key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] api_version = os.environ["FR_API_VERSION"] max_submit_requeue_count = int(os.environ["MAX_SUBMIT_REQUEUE_COUNT"]) poll_queue_submit_backoff = int(os.environ["POLL_QUEUE_SUBMIT_BACKOFF"]) pdf_submit_queue_backoff = int(os.environ["PDF_SUBMIT_QUEUE_BACKOFF"]) +local_debug = os.environ["LOCAL_DEBUG"] +azure_ai_credential_domain = os.environ["AZURE_AI_CREDENTIAL_DOMAIN"] +azure_openai_authority_host = os.environ["AZURE_OPENAI_AUTHORITY_HOST"] +FUNCTION_NAME = "FileFormRecSubmissionPDF" +FR_MODEL = "prebuilt-layout" + +if azure_openai_authority_host == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD + +# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI, +# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally) +# Use managed identity when deployed on Azure. +# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude +# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) +if local_debug == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) +token_provider = get_bearer_token_provider(azure_credential, f'https://{os.environ["AZURE_AI_CREDENTIAL_DOMAIN"]}/.default') + utilities = Utilities( azure_blob_storage_account, azure_blob_storage_endpoint, azure_blob_drop_storage_container, azure_blob_content_storage_container, - azure_blob_storage_key, + azure_credential, ) -FUNCTION_NAME = "FileFormRecSubmissionPDF" -FR_MODEL = "prebuilt-layout" - def main(msg: func.QueueMessage) -> None: '''This function is triggered by a message in the pdf-submit-queue. @@ -56,7 +73,7 @@ def main(msg: func.QueueMessage) -> None: blob_path = message_json["blob_name"] try: statusLog = StatusLog( - cosmosdb_url, cosmosdb_key, cosmosdb_log_database_name, cosmosdb_log_container_name + cosmosdb_url, azure_credential, cosmosdb_log_database_name, cosmosdb_log_container_name ) # Receive message from the queue @@ -84,7 +101,7 @@ def main(msg: func.QueueMessage) -> None: # Construct and submmit the message to FR headers = { "Content-Type": "application/json", - "Ocp-Apim-Subscription-Key": FR_key, + 'Authorization': f'Bearer {token_provider()}' } params = {"api-version": api_version} @@ -108,11 +125,10 @@ def main(msg: func.QueueMessage) -> None: result_id = response.headers.get("apim-request-id") message_json["FR_resultId"] = result_id message_json["polling_queue_count"] = 1 - queue_client = QueueClient.from_connection_string( - azure_blob_connection_string, - queue_name=pdf_polling_queue, - message_encode_policy=TextBase64EncodePolicy(), - ) + queue_client = QueueClient(account_url=azure_queue_storage_endpoint, + queue_name=pdf_polling_queue, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) message_json_str = json.dumps(message_json) queue_client.send_message( message_json_str, visibility_timeout=poll_queue_submit_backoff @@ -139,11 +155,10 @@ def main(msg: func.QueueMessage) -> None: f"{FUNCTION_NAME} - Throttled on PDF submission to FR, requeuing. Back off of {backoff} seconds", StatusClassification.DEBUG, ) - queue_client = QueueClient.from_connection_string( - azure_blob_connection_string, - queue_name=pdf_submit_queue, - message_encode_policy=TextBase64EncodePolicy(), - ) + queue_client = QueueClient(account_url=azure_queue_storage_endpoint, + queue_name=pdf_submit_queue, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) message_json_str = json.dumps(message_json) queue_client.send_message(message_json_str, visibility_timeout=backoff) statusLog.upsert_document( diff --git a/functions/FileFormRecSubmissionPDF/function.json b/functions/FileFormRecSubmissionPDF/function.json index 7e187de44..41639b032 100644 --- a/functions/FileFormRecSubmissionPDF/function.json +++ b/functions/FileFormRecSubmissionPDF/function.json @@ -6,7 +6,7 @@ "type": "queueTrigger", "direction": "in", "queueName": "pdf-submit-queue", - "connection": "BLOB_CONNECTION_STRING" + "connection": "AzureStorageConnection1" } ] } \ No newline at end of file diff --git a/functions/FileLayoutParsingOther/__init__.py b/functions/FileLayoutParsingOther/__init__.py index d9c8a0b8f..8e35dfd99 100644 --- a/functions/FileLayoutParsingOther/__init__.py +++ b/functions/FileLayoutParsingOther/__init__.py @@ -9,6 +9,7 @@ import azure.functions as func from azure.storage.blob import generate_blob_sas from azure.storage.queue import QueueClient, TextBase64EncodePolicy +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider from shared_code.status_log import StatusLog, State, StatusClassification from shared_code.utilities import Utilities, MediaType @@ -16,13 +17,11 @@ azure_blob_storage_account = os.environ["BLOB_STORAGE_ACCOUNT"] azure_blob_storage_endpoint = os.environ["BLOB_STORAGE_ACCOUNT_ENDPOINT"] +azure_queue_storage_endpoint = os.environ["AZURE_QUEUE_STORAGE_ENDPOINT"] azure_blob_drop_storage_container = os.environ["BLOB_STORAGE_ACCOUNT_UPLOAD_CONTAINER_NAME"] azure_blob_content_storage_container = os.environ["BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME"] -azure_blob_storage_key = os.environ["AZURE_BLOB_STORAGE_KEY"] -azure_blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] azure_blob_log_storage_container = os.environ["BLOB_STORAGE_ACCOUNT_LOG_CONTAINER_NAME"] cosmosdb_url = os.environ["COSMOSDB_URL"] -cosmosdb_key = os.environ["COSMOSDB_KEY"] cosmosdb_log_database_name = os.environ["COSMOSDB_LOG_DATABASE_NAME"] cosmosdb_log_container_name = os.environ["COSMOSDB_LOG_CONTAINER_NAME"] non_pdf_submit_queue = os.environ["NON_PDF_SUBMIT_QUEUE"] @@ -30,11 +29,29 @@ pdf_submit_queue = os.environ["PDF_SUBMIT_QUEUE"] text_enrichment_queue = os.environ["TEXT_ENRICHMENT_QUEUE"] CHUNK_TARGET_SIZE = int(os.environ["CHUNK_TARGET_SIZE"]) +local_debug = os.environ["LOCAL_DEBUG"] +azure_ai_credential_domain = os.environ["AZURE_AI_CREDENTIAL_DOMAIN"] +azure_openai_authority_host = os.environ["AZURE_OPENAI_AUTHORITY_HOST"] - -utilities = Utilities(azure_blob_storage_account, azure_blob_storage_endpoint, azure_blob_drop_storage_container, azure_blob_content_storage_container, azure_blob_storage_key) function_name = "FileLayoutParsingOther" +if azure_openai_authority_host == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD + +# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI, +# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally) +# Use managed identity when deployed on Azure. +# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude +# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) +if local_debug == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) + +utilities = Utilities(azure_blob_storage_account, azure_blob_storage_endpoint, azure_blob_drop_storage_container, azure_blob_content_storage_container, azure_credential) + class UnstructuredError(Exception): pass @@ -114,7 +131,7 @@ def PartitionFile(file_extension: str, file_url: str): def main(msg: func.QueueMessage) -> None: try: - statusLog = StatusLog(cosmosdb_url, cosmosdb_key, cosmosdb_log_database_name, cosmosdb_log_container_name) + statusLog = StatusLog(cosmosdb_url, azure_credential, cosmosdb_log_database_name, cosmosdb_log_container_name) logging.info('Python queue trigger function processed a queue item: %s', msg.get_body().decode('utf-8')) @@ -189,7 +206,10 @@ def main(msg: func.QueueMessage) -> None: statusLog.upsert_document(blob_name, f'{function_name} - chunking stored.', StatusClassification.DEBUG) # submit message to the text enrichment queue to continue processing - queue_client = QueueClient.from_connection_string(azure_blob_connection_string, queue_name=text_enrichment_queue, message_encode_policy=TextBase64EncodePolicy()) + queue_client = QueueClient(account_url=azure_queue_storage_endpoint, + queue_name=text_enrichment_queue, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) message_json["text_enrichment_queued_count"] = 1 message_string = json.dumps(message_json) queue_client.send_message(message_string) diff --git a/functions/FileLayoutParsingOther/function.json b/functions/FileLayoutParsingOther/function.json index eb220c214..a40410fcc 100644 --- a/functions/FileLayoutParsingOther/function.json +++ b/functions/FileLayoutParsingOther/function.json @@ -6,7 +6,7 @@ "type": "queueTrigger", "direction": "in", "queueName": "non-pdf-submit-queue", - "connection": "BLOB_CONNECTION_STRING" + "connection": "AzureStorageConnection1" } ] } \ No newline at end of file diff --git a/functions/FileUploadedFunc/__init__.py b/functions/FileUploadedFunc/__init__.py index 75328fc62..fdb3d9f34 100644 --- a/functions/FileUploadedFunc/__init__.py +++ b/functions/FileUploadedFunc/__init__.py @@ -8,17 +8,15 @@ import time from shared_code.status_log import StatusLog, State, StatusClassification import azure.functions as func -from azure.storage.blob import BlobServiceClient, generate_blob_sas +from azure.storage.blob import BlobServiceClient from azure.storage.queue import QueueClient, TextBase64EncodePolicy +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider from azure.search.documents import SearchClient -from azure.core.credentials import AzureKeyCredential from shared_code.utilities_helper import UtilitiesHelper from urllib.parse import unquote -azure_blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] cosmosdb_url = os.environ["COSMOSDB_URL"] -cosmosdb_key = os.environ["COSMOSDB_KEY"] cosmosdb_log_database_name = os.environ["COSMOSDB_LOG_DATABASE_NAME"] cosmosdb_log_container_name = os.environ["COSMOSDB_LOG_CONTAINER_NAME"] non_pdf_submit_queue = os.environ["NON_PDF_SUBMIT_QUEUE"] @@ -29,25 +27,38 @@ max_seconds_hide_on_upload = int(os.environ["MAX_SECONDS_HIDE_ON_UPLOAD"]) azure_blob_content_container = os.environ["BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME"] azure_blob_endpoint = os.environ["BLOB_STORAGE_ACCOUNT_ENDPOINT"] -azure_blob_key = os.environ["AZURE_BLOB_STORAGE_KEY"] +azure_queue_endpoint = os.environ["AZURE_QUEUE_STORAGE_ENDPOINT"] azure_blob_upload_container = os.environ["BLOB_STORAGE_ACCOUNT_UPLOAD_CONTAINER_NAME"] azure_storage_account = os.environ["BLOB_STORAGE_ACCOUNT"] azure_search_service_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"] azure_search_service_index = os.environ["AZURE_SEARCH_INDEX"] -azure_search_service_key = os.environ["AZURE_SEARCH_SERVICE_KEY"] - +local_debug = os.environ["LOCAL_DEBUG"] +azure_ai_credential_domain = os.environ["AZURE_AI_CREDENTIAL_DOMAIN"] +azure_openai_authority_host = os.environ["AZURE_OPENAI_AUTHORITY_HOST"] +if azure_openai_authority_host == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD +# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI, +# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally) +# Use managed identity when deployed on Azure. +# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude +# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) +if local_debug == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) function_name = "FileUploadedFunc" utilities_helper = UtilitiesHelper( azure_blob_storage_account=azure_storage_account, azure_blob_storage_endpoint=azure_blob_endpoint, - azure_blob_storage_key=azure_blob_key, + credential=azure_credential ) -statusLog = StatusLog(cosmosdb_url, cosmosdb_key, cosmosdb_log_database_name, cosmosdb_log_container_name) - +statusLog = StatusLog(cosmosdb_url, azure_credential, cosmosdb_log_database_name, cosmosdb_log_container_name) def get_tags_and_upload_to_cosmos(blob_service_client, blob_path): """ Gets the tags from the blob metadata and uploads them to cosmos db""" @@ -110,10 +121,8 @@ def main(myblob: func.InputStream): } message_string = json.dumps(message) - blob_client = BlobServiceClient( - account_url=azure_blob_endpoint, - credential=azure_blob_key, - ) + blob_client = BlobServiceClient(azure_blob_endpoint, + credential=azure_credential) myblob_filename = myblob.name.split("/", 1)[1] # Check if the blob has been marked as 'do not process' and abort if so @@ -143,7 +152,7 @@ def main(myblob: func.InputStream): # instantiate the search sdk elements search_client = SearchClient(azure_search_service_endpoint, azure_search_service_index, - AzureKeyCredential(azure_search_service_key)) + azure_credential) search_id_list_to_delete = [] # Iterate through the blobs and delete each one from blob and the search index @@ -158,12 +167,15 @@ def main(myblob: func.InputStream): logging.debug("No items to delete from AI Search index.") # write tags to cosmos db once per file/message - blob_service_client = BlobServiceClient.from_connection_string(azure_blob_connection_string) + blob_service_client = BlobServiceClient(azure_blob_endpoint, credential=azure_credential) upload_container_client = blob_service_client.get_container_client(azure_blob_upload_container) - tag_list = get_tags_and_upload_to_cosmos(upload_container_client, myblob.name) + get_tags_and_upload_to_cosmos(upload_container_client, myblob.name) # Queue message with a random backoff so as not to put the next function under unnecessary load - queue_client = QueueClient.from_connection_string(azure_blob_connection_string, queue_name, message_encode_policy=TextBase64EncodePolicy()) + queue_client = QueueClient(account_url=azure_queue_endpoint, + queue_name=queue_name, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) backoff = random.randint(1, max_seconds_hide_on_upload) queue_client.send_message(message_string, visibility_timeout = backoff) statusLog.upsert_document(myblob.name, f'{function_name} - {file_extension} file sent to submit queue. Visible in {backoff} seconds', StatusClassification.DEBUG, State.QUEUED) diff --git a/functions/FileUploadedFunc/function.json b/functions/FileUploadedFunc/function.json index cd8bd4e7f..68776e21d 100644 --- a/functions/FileUploadedFunc/function.json +++ b/functions/FileUploadedFunc/function.json @@ -6,7 +6,7 @@ "type": "blobTrigger", "direction": "in", "path": "upload", - "connection": "BLOB_CONNECTION_STRING" + "connection": "AzureStorageConnection1" } ], "retry": { diff --git a/functions/ImageEnrichment/__init__.py b/functions/ImageEnrichment/__init__.py index 0c5c67c62..b9ba06247 100644 --- a/functions/ImageEnrichment/__init__.py +++ b/functions/ImageEnrichment/__init__.py @@ -5,13 +5,13 @@ import azure.functions as func import requests from azure.storage.blob import BlobServiceClient +from azure.core.credentials import AzureKeyCredential +from azure.identity import ManagedIdentityCredential, DefaultAzureCredential, get_bearer_token_provider, AzureAuthorityHosts from shared_code.status_log import State, StatusClassification, StatusLog from shared_code.utilities import Utilities, MediaType from azure.search.documents import SearchClient -from azure.core.credentials import AzureKeyCredential from datetime import datetime - azure_blob_storage_account = os.environ["BLOB_STORAGE_ACCOUNT"] azure_blob_drop_storage_container = os.environ[ "BLOB_STORAGE_ACCOUNT_UPLOAD_CONTAINER_NAME" @@ -20,53 +20,64 @@ "BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME" ] azure_blob_storage_endpoint = os.environ["BLOB_STORAGE_ACCOUNT_ENDPOINT"] -azure_blob_storage_key = os.environ["AZURE_BLOB_STORAGE_KEY"] -azure_blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] azure_blob_content_storage_container = os.environ[ "BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME" ] azure_blob_content_storage_container = os.environ[ "BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME" ] +# Authentication settings +azure_authority_host = os.environ["AZURE_OPENAI_AUTHORITY_HOST"] +local_debug = os.environ.get("LOCAL_DEBUG", False) # Cosmos DB cosmosdb_url = os.environ["COSMOSDB_URL"] -cosmosdb_key = os.environ["COSMOSDB_KEY"] cosmosdb_log_database_name = os.environ["COSMOSDB_LOG_DATABASE_NAME"] cosmosdb_log_container_name = os.environ["COSMOSDB_LOG_CONTAINER_NAME"] # Cognitive Services -cognitive_services_key = os.environ["AZURE_AI_KEY"] -cognitive_services_endpoint = os.environ["AZURE_AI_ENDPOINT"] -cognitive_services_account_location = os.environ["AZURE_AI_LOCATION"] +azure_ai_key = os.environ["AZURE_AI_KEY"] +azure_ai_endpoint = os.environ["AZURE_AI_ENDPOINT"] +azure_ai_location = os.environ["AZURE_AI_LOCATION"] +azure_ai_credential_domain = os.environ["AZURE_AI_CREDENTIAL_DOMAIN"] # Search Service AZURE_SEARCH_SERVICE_ENDPOINT = os.environ.get("AZURE_SEARCH_SERVICE_ENDPOINT") AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") or "gptkbindex" -SEARCH_CREDS = AzureKeyCredential(os.environ.get("AZURE_SEARCH_SERVICE_KEY")) + +if azure_authority_host == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD +if local_debug: + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) +token_provider = get_bearer_token_provider(azure_credential, + f'https://{azure_ai_credential_domain}/.default') # Translation params for OCR'd text targetTranslationLanguage = os.environ["TARGET_TRANSLATION_LANGUAGE"] API_DETECT_ENDPOINT = ( - f"{cognitive_services_endpoint}language/:analyze-text?api-version=2023-04-01" + f"{azure_ai_endpoint}language/:analyze-text?api-version=2023-04-01" ) API_TRANSLATE_ENDPOINT = ( - f"{cognitive_services_endpoint}translator/text/v3.0/translate?api-version=3.0" + f"{azure_ai_endpoint}translator/text/v3.0/translate?api-version=3.0" ) MAX_CHARS_FOR_DETECTION = 1000 translator_api_headers = { - "Ocp-Apim-Subscription-Key": cognitive_services_key, + "Ocp-Apim-Subscription-Key": azure_ai_key, "Content-type": "application/json", - "Ocp-Apim-Subscription-Region": cognitive_services_account_location, + "Ocp-Apim-Subscription-Region": azure_ai_location, } # Note that "caption" and "denseCaptions" are only supported in Azure GPU regions (East US, France Central, # Korea Central, North Europe, Southeast Asia, West Europe, West US). Remove "caption" and "denseCaptions" # from the list below if your Computer Vision key is not from one of those regions. -if cognitive_services_account_location in [ +if azure_ai_location in [ "eastus", "francecentral", "koreacentral", @@ -76,10 +87,17 @@ "westus", ]: GPU_REGION = True - VISION_ENDPOINT = f"{cognitive_services_endpoint}computervision/imageanalysis:analyze?api-version=2023-04-01-preview&features=caption,denseCaptions,objects,tags,read&gender-neutral-caption=true" + VISION_ENDPOINT = f"{azure_ai_endpoint}computervision/imageanalysis:analyze?api-version=2023-04-01-preview&features=caption,denseCaptions,objects,tags,read&gender-neutral-caption=true" else: GPU_REGION = False - VISION_ENDPOINT = f"{cognitive_services_endpoint}computervision/imageanalysis:analyze?api-version=2023-04-01-preview&features=objects,tags,read&gender-neutral-caption=true" + VISION_ENDPOINT = f"{azure_ai_endpoint}computervision/imageanalysis:analyze?api-version=2023-04-01-preview&features=objects,tags,read&gender-neutral-caption=true" + +vision_api_headers = { + "Ocp-Apim-Subscription-Key": azure_ai_key, + "Content-type": "application/octet-stream", + "Accept": "application/json", + "Ocp-Apim-Subscription-Region": azure_ai_location, +} FUNCTION_NAME = "ImageEnrichment" @@ -88,7 +106,7 @@ azure_blob_storage_endpoint=azure_blob_storage_endpoint, azure_blob_drop_storage_container=azure_blob_drop_storage_container, azure_blob_content_storage_container=azure_blob_content_storage_container, - azure_blob_storage_key=azure_blob_storage_key + azure_credential=azure_credential ) @@ -141,7 +159,7 @@ def main(msg: func.QueueMessage) -> None: blob_uri = message_json["blob_uri"] try: statusLog = StatusLog( - cosmosdb_url, cosmosdb_key, cosmosdb_log_database_name, cosmosdb_log_container_name + cosmosdb_url, azure_credential, cosmosdb_log_database_name, cosmosdb_log_container_name ) logging.info( "Python queue trigger function processed a queue item: %s", @@ -156,13 +174,19 @@ def main(msg: func.QueueMessage) -> None: ) # Run the image through the Computer Vision service - file_name, file_extension, file_directory = utilities.get_filename_and_extension(blob_path) - blob_path_plus_sas = utilities.get_blob_and_sas(blob_path) - - data = {"url": f"{blob_path_plus_sas}"} + file_name, file_extension, file_directory = utilities.get_filename_and_extension( + blob_path) + path = blob_path.split("/", 1)[1] + + blob_service_client = BlobServiceClient(account_url=azure_blob_storage_endpoint, + credential=azure_credential) + blob_client = blob_service_client.get_blob_client(container=azure_blob_drop_storage_container, + blob=path) + image_data = blob_client.download_blob().readall() + files = {"file": image_data} response = requests.post(VISION_ENDPOINT, - headers=translator_api_headers, - json=data) + headers=vision_api_headers, + data=image_data) if response.status_code == 200: result = response.json() @@ -228,7 +252,8 @@ def main(msg: func.QueueMessage) -> None: # Detect language output_text = "" - detected_language, detection_confidence = detect_language(complete_ocr_text) + detected_language, detection_confidence = detect_language( + complete_ocr_text) text_image_summary += f"Raw OCR Text - Detected language: {detected_language}, Confidence: {detection_confidence}\n" if detected_language != targetTranslationLanguage: @@ -283,12 +308,15 @@ def main(msg: func.QueueMessage) -> None: ) try: - file_name, file_extension, file_directory = utilities.get_filename_and_extension(blob_path) - + file_name, file_extension, file_directory = utilities.get_filename_and_extension( + blob_path) + # Get the tags from metadata on the blob path = file_directory + file_name + file_extension - blob_service_client = BlobServiceClient.from_connection_string(azure_blob_connection_string) - blob_client = blob_service_client.get_blob_client(container=azure_blob_drop_storage_container, blob=path) + blob_service_client = BlobServiceClient( + account_url=azure_blob_storage_endpoint, credential=azure_credential) + blob_client = blob_service_client.get_blob_client( + container=azure_blob_drop_storage_container, blob=path) blob_properties = blob_client.get_blob_properties() tags = blob_properties.metadata.get("tags") if tags is not None: @@ -302,9 +330,11 @@ def main(msg: func.QueueMessage) -> None: statusLog.update_document_tags(blob_path, tags_list) # Only one chunk per image currently. - chunk_file=utilities.build_chunk_filepath(file_directory, file_name, file_extension, '0') + chunk_file = utilities.build_chunk_filepath( + file_directory, file_name, file_extension, '0') - index_section(index_content, file_name, file_directory[:-1], statusLog.encode_document_id(chunk_file), chunk_file, blob_path, blob_uri, tags_list) + index_section(index_content, file_name, file_directory[:-1], statusLog.encode_document_id( + chunk_file), chunk_file, blob_path, blob_uri, tags_list) statusLog.upsert_document( blob_path, @@ -320,7 +350,6 @@ def main(msg: func.QueueMessage) -> None: State.ERROR, ) - statusLog.save_document(blob_path) @@ -345,7 +374,7 @@ def index_section(index_content, file_name, file_directory, chunk_id, chunk_file batch.append(index_chunk) search_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE_ENDPOINT, - index_name=AZURE_SEARCH_INDEX, - credential=SEARCH_CREDS) + index_name=AZURE_SEARCH_INDEX, + credential=azure_credential) search_client.upload_documents(documents=batch) diff --git a/functions/ImageEnrichment/function.json b/functions/ImageEnrichment/function.json index d59dec889..5b04da35b 100644 --- a/functions/ImageEnrichment/function.json +++ b/functions/ImageEnrichment/function.json @@ -6,7 +6,7 @@ "type": "queueTrigger", "direction": "in", "queueName": "image-enrichment-queue", - "connection": "BLOB_CONNECTION_STRING" + "connection": "AzureStorageConnection1" } ] } \ No newline at end of file diff --git a/functions/TextEnrichment/__init__.py b/functions/TextEnrichment/__init__.py index 47b7c6dda..94751eb2b 100644 --- a/functions/TextEnrichment/__init__.py +++ b/functions/TextEnrichment/__init__.py @@ -1,6 +1,7 @@ import logging import azure.functions as func from azure.storage.queue import QueueClient, TextBase64EncodePolicy +from azure.identity import ManagedIdentityCredential, AzureAuthorityHosts, DefaultAzureCredential, get_bearer_token_provider from azure.storage.blob import BlobServiceClient from shared_code.utilities import Utilities import os @@ -14,45 +15,62 @@ azure_blob_storage_account = os.environ["BLOB_STORAGE_ACCOUNT"] azure_blob_storage_endpoint = os.environ["BLOB_STORAGE_ACCOUNT_ENDPOINT"] +azure_queue_storage_endpoint = os.environ["AZURE_QUEUE_STORAGE_ENDPOINT"] azure_blob_drop_storage_container = os.environ[ "BLOB_STORAGE_ACCOUNT_UPLOAD_CONTAINER_NAME" ] azure_blob_content_storage_container = os.environ[ "BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME" ] -azure_blob_storage_key = os.environ["AZURE_BLOB_STORAGE_KEY"] -azure_blob_connection_string = os.environ["BLOB_CONNECTION_STRING"] azure_blob_content_storage_container = os.environ["BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME"] azure_blob_storage_endpoint = os.environ["BLOB_STORAGE_ACCOUNT_ENDPOINT"] cosmosdb_url = os.environ["COSMOSDB_URL"] -cosmosdb_key = os.environ["COSMOSDB_KEY"] cosmosdb_log_database_name = os.environ["COSMOSDB_LOG_DATABASE_NAME"] cosmosdb_log_container_name = os.environ["COSMOSDB_LOG_CONTAINER_NAME"] text_enrichment_queue = os.environ["TEXT_ENRICHMENT_QUEUE"] -enrichmentKey = os.environ["AZURE_AI_KEY"] -enrichmentEndpoint = os.environ["AZURE_AI_ENDPOINT"] +azure_ai_endpoint = os.environ["AZURE_AI_ENDPOINT"] +azure_ai_key = os.environ["AZURE_AI_KEY"] targetTranslationLanguage = os.environ["TARGET_TRANSLATION_LANGUAGE"] max_requeue_count = int(os.environ["MAX_ENRICHMENT_REQUEUE_COUNT"]) enrichment_backoff = int(os.environ["ENRICHMENT_BACKOFF"]) azure_blob_content_storage_container = os.environ["BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME"] queueName = os.environ["EMBEDDINGS_QUEUE"] -endpoint_region = os.environ["AZURE_AI_LOCATION"] +azure_ai_location = os.environ["AZURE_AI_LOCATION"] +local_debug = os.environ["LOCAL_DEBUG"] +azure_ai_credential_domain = os.environ["AZURE_AI_CREDENTIAL_DOMAIN"] +azure_openai_authority_host = os.environ["AZURE_OPENAI_AUTHORITY_HOST"] FUNCTION_NAME = "TextEnrichment" MAX_CHARS_FOR_DETECTION = 1000 +if azure_openai_authority_host == "AzureUSGovernment": + AUTHORITY = AzureAuthorityHosts.AZURE_GOVERNMENT +else: + AUTHORITY = AzureAuthorityHosts.AZURE_PUBLIC_CLOUD + +# When debugging in VSCode, use the current user identity to authenticate with Azure OpenAI, +# Cognitive Search and Blob Storage (no secrets needed, just use 'az login' locally) +# Use managed identity when deployed on Azure. +# If you encounter a blocking error during a DefaultAzureCredntial resolution, you can exclude +# the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) +if local_debug == "true": + azure_credential = DefaultAzureCredential(authority=AUTHORITY) +else: + azure_credential = ManagedIdentityCredential(authority=AUTHORITY) +token_provider = get_bearer_token_provider(azure_credential, f'https://{azure_ai_credential_domain}/.default') + utilities = Utilities( azure_blob_storage_account, azure_blob_storage_endpoint, azure_blob_drop_storage_container, azure_blob_content_storage_container, - azure_blob_storage_key, + azure_credential, ) statusLog = StatusLog( - cosmosdb_url, cosmosdb_key, cosmosdb_log_database_name, cosmosdb_log_container_name -) + cosmosdb_url, azure_credential, cosmosdb_log_database_name, cosmosdb_log_container_name +) def main(msg: func.QueueMessage) -> None: '''This function is triggered by a message in the text-enrichment-queue. @@ -60,8 +78,8 @@ def main(msg: func.QueueMessage) -> None: the target language, it will translate the chunks to the target language.''' try: - apiTranslateEndpoint = f"{enrichmentEndpoint}translator/text/v3.0/translate?api-version=3.0" - apiLanguageEndpoint = f"{enrichmentEndpoint}language/:analyze-text?api-version=2023-04-01" + apiTranslateEndpoint = f"{azure_ai_endpoint}translator/text/v3.0/translate?api-version=3.0" + apiLanguageEndpoint = f"{azure_ai_endpoint}language/:analyze-text?api-version=2023-04-01" message_body = msg.get_body().decode("utf-8") message_json = json.loads(message_body) @@ -83,7 +101,10 @@ def main(msg: func.QueueMessage) -> None: # Detect language of the document chunk_content = '' - blob_service_client = BlobServiceClient.from_connection_string(azure_blob_connection_string) + blob_service_client = BlobServiceClient( + account_url=azure_blob_storage_endpoint, + credential=azure_credential, + ) container_client = blob_service_client.get_container_client(azure_blob_content_storage_container) # Iterate over the chunks in the container, retrieving up to the max number of chars required chunk_list = container_client.list_blobs(name_starts_with=chunk_folder_path) @@ -104,9 +125,9 @@ def main(msg: func.QueueMessage) -> None: # detect language headers = { - 'Ocp-Apim-Subscription-Key': enrichmentKey, + "Ocp-Apim-Subscription-Key": azure_ai_key, 'Content-type': 'application/json', - 'Ocp-Apim-Subscription-Region': endpoint_region + 'Ocp-Apim-Subscription-Region': azure_ai_location } data = { @@ -160,11 +181,7 @@ def main(msg: func.QueueMessage) -> None: for field in fields_to_enrich: translate_and_set(field, chunk_dict, headers, params, message_json, detected_language, targetTranslationLanguage, apiTranslateEndpoint) - # Extract entities for index - enrich_headers = { - 'Ocp-Apim-Subscription-Key': enrichmentKey, - 'Content-type': 'application/json' - } + # Extract entities for index target_content = chunk_dict['translated_title'] + " " + chunk_dict['translated_subtitle'] + " " + chunk_dict['translated_section'] + " " + chunk_dict['translated_content'] enrich_data = { "kind": "EntityRecognition", @@ -181,7 +198,7 @@ def main(msg: func.QueueMessage) -> None: ] } } - response = requests.post(apiLanguageEndpoint, headers=enrich_headers, json=enrich_data, params=params) + response = requests.post(apiLanguageEndpoint, headers=headers, json=enrich_data, params=params) try: entities = response.json()['results']['documents'][0]['entities'] except: @@ -192,10 +209,6 @@ def main(msg: func.QueueMessage) -> None: chunk_dict[f"entities"] = entities_collection # Extract key phrases for index - enrich_headers = { - 'Ocp-Apim-Subscription-Key': enrichmentKey, - 'Content-type': 'application/json' - } target_content = chunk_dict['translated_title'] + " " + chunk_dict['translated_subtitle'] + " " + chunk_dict['translated_section'] + " " + chunk_dict['translated_content'] enrich_data = { "kind": "KeyPhraseExtraction", @@ -212,7 +225,7 @@ def main(msg: func.QueueMessage) -> None: ] } } - response = requests.post(apiLanguageEndpoint, headers=enrich_headers, json=enrich_data, params=params) + response = requests.post(apiLanguageEndpoint, headers=headers, json=enrich_data, params=params) try: key_phrases = response.json()['results']['documents'][0]['keyPhrases'] except: @@ -225,7 +238,10 @@ def main(msg: func.QueueMessage) -> None: block_blob_client.upload_blob(json_str, overwrite=True) # Queue message to embeddings queue for downstream processing - queue_client = QueueClient.from_connection_string(azure_blob_connection_string, queueName, message_encode_policy=TextBase64EncodePolicy()) + queue_client = QueueClient(account_url=azure_queue_storage_endpoint, + queue_name=queueName, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) embeddings_queue_backoff = random.randint(1, 60) message_string = json.dumps(message_json) queue_client.send_message(message_string, visibility_timeout = embeddings_queue_backoff) @@ -295,11 +311,10 @@ def requeue(response, message_json): ) queued_count += 1 message_json["text_enrichment_queued_count"] = queued_count - queue_client = QueueClient.from_connection_string( - azure_blob_connection_string, - queue_name=text_enrichment_queue, - message_encode_policy=TextBase64EncodePolicy(), - ) + queue_client = QueueClient(account_url=azure_queue_storage_endpoint, + queue_name=text_enrichment_queue, + credential=azure_credential, + message_encode_policy=TextBase64EncodePolicy()) message_json_str = json.dumps(message_json) queue_client.send_message(message_json_str, visibility_timeout=backoff) statusLog.upsert_document( diff --git a/functions/TextEnrichment/function.json b/functions/TextEnrichment/function.json index 4bcea1ca5..a53d80939 100644 --- a/functions/TextEnrichment/function.json +++ b/functions/TextEnrichment/function.json @@ -6,7 +6,7 @@ "type": "queueTrigger", "direction": "in", "queueName": "text-enrichment-queue", - "connection": "BLOB_CONNECTION_STRING" + "connection": "AzureStorageConnection1" } ] } \ No newline at end of file diff --git a/functions/requirements.txt b/functions/requirements.txt index ae80a8363..03f044a21 100644 --- a/functions/requirements.txt +++ b/functions/requirements.txt @@ -5,16 +5,17 @@ #### Any version change made here should also be made and tested for the web apps in /app/backend and /app/enrichment azure-functions == 1.17.0 -tiktoken==0.4.0 +tiktoken==0.7.0 azure.ai.formrecognizer==3.2.1 -azure-storage-blob==12.16.0 +azure-storage-blob==12.20.0 azure-core == 1.30.2 lxml == 4.9.2 -azure-cosmos == 4.3.1 +azure-cosmos == 4.7.0 azure-storage-queue == 12.6.0 nltk == 3.8.1 tenacity == 8.2.3 unstructured[csv,doc,docx,email,html,md,msg,ppt,pptx,text,xlsx,xml] == 0.12.5 pyoo == 1.4 azure-search-documents == 11.4.0b11 -beautifulsoup4 == 4.12.2 \ No newline at end of file +beautifulsoup4 == 4.12.2 +azure-identity==1.17.1 \ No newline at end of file diff --git a/functions/shared_code/status_log.py b/functions/shared_code/status_log.py index b6445426a..f83aa5466 100644 --- a/functions/shared_code/status_log.py +++ b/functions/shared_code/status_log.py @@ -38,13 +38,13 @@ class StatusQueryLevel(Enum): class StatusLog: """ Class for logging status of various processes to Cosmos DB""" - def __init__(self, url, key, database_name, container_name): + def __init__(self, url, azure_credential, database_name, container_name): """ Constructor function """ self._url = url - self._key = key + self.azure_credential = azure_credential self._database_name = database_name self._container_name = container_name - self.cosmos_client = CosmosClient(url=self._url, credential=self._key) + self.cosmos_client = CosmosClient(url=self._url, credential=self.azure_credential, consistency_level='Session') self._log_document = {} # Select a database (will create it if it doesn't exist) @@ -305,7 +305,7 @@ def get_stack_trace(self): trc = 'Traceback (most recent call last):\n' stackstr = trc + ''.join(traceback.format_list(stack)) if exc is not None: - stackstr += ' ' + traceback.format_exc().lstrip(trc) + stackstr += ' ' + traceback.format_exc().lstrip() return stackstr def get_all_tags(self): diff --git a/functions/shared_code/utilities.py b/functions/shared_code/utilities.py index 45adfd7a7..d98eeed25 100644 --- a/functions/shared_code/utilities.py +++ b/functions/shared_code/utilities.py @@ -70,16 +70,16 @@ def __init__(self, azure_blob_storage_endpoint, azure_blob_drop_storage_container, azure_blob_content_storage_container, - azure_blob_storage_key + azure_credential ): self.azure_blob_storage_account = azure_blob_storage_account self.azure_blob_storage_endpoint = azure_blob_storage_endpoint self.azure_blob_drop_storage_container = azure_blob_drop_storage_container self.azure_blob_content_storage_container = azure_blob_content_storage_container - self.azure_blob_storage_key = azure_blob_storage_key + self.azure_credential = azure_credential self.utilities_helper = UtilitiesHelper(azure_blob_storage_account, azure_blob_storage_endpoint, - azure_blob_storage_key) + azure_credential) def write_blob(self, output_container, content, output_filename, folder_set=""): """ Function to write a generic blob """ @@ -87,7 +87,8 @@ def write_blob(self, output_container, content, output_filename, folder_set=""): # Get path and file name minus the root container blob_service_client = BlobServiceClient( self.azure_blob_storage_endpoint, - self.azure_blob_storage_key) + credential=self.azure_credential + ) block_blob_client = blob_service_client.get_blob_client( container=output_container, blob=f'{folder_set}{output_filename}') block_blob_client.upload_blob(content, overwrite=True) @@ -107,50 +108,6 @@ def get_blob_and_sas(self, blob_path): """ Function to retrieve the uri and sas token for a given blob in azure storage""" return self.utilities_helper.get_blob_and_sas(blob_path) - # def table_to_html(self, table): - # """ Function to take an output FR table json structure and convert to HTML """ - # header_processing_complete = False - # table_html = "" - # rows = [sorted([cell for cell in table["cells"] if cell["rowIndex"] == i], - # key=lambda cell: cell["columnIndex"]) for i in range(table["rowCount"])] - # for row_cells in rows: - # is_row_a_header = False - # row_html = "" - # for cell in row_cells: - # tag = "td" - # #if hasattr(cell, 'kind'): - # if 'kind' in cell: - # if (cell["kind"] == "columnHeader" or cell["kind"] == "rowHeader"): - # tag = "th" - # if (cell["kind"] == "columnHeader"): - # is_row_a_header = True - # else: - # # we have encountered a cell that isn't tagged as a header, - # # so assume we have now rerached regular table cells - # header_processing_complete = True - # cell_spans = "" - # #if hasattr(cell, 'columnSpan'): - # if 'columnSpan' in cell: - # if cell["columnSpan"] > 1: - # cell_spans += f" colSpan={cell['columnSpan']}" - # #if hasattr(cell, 'rowSpan'): - # if 'rowSpan' in cell: - # if cell["rowSpan"] > 1: - # cell_spans += f" rowSpan={cell['rowSpan']}" - # row_html += f"<{tag}{cell_spans}>{html.escape(cell['content'])}" - # row_html += "" - - # if is_row_a_header and header_processing_complete == False: - # row_html = "" + row_html + "" - # table_html += row_html - # table_html += "
" - # return table_html - - - - - - def table_to_html(self, table): """ Function to take an output FR table json structure and convert to HTML """ table_html = "" @@ -370,7 +327,7 @@ def write_chunk(self, myblob_name, myblob_uri, file_number, chunk_size, chunk_te file_name, file_extension, file_directory = self.get_filename_and_extension(myblob_name) blob_service_client = BlobServiceClient( self.azure_blob_storage_endpoint, - self.azure_blob_storage_key) + credential=self.azure_credential) json_str = json.dumps(chunk_output, indent=2, ensure_ascii=False) block_blob_client = blob_service_client.get_blob_client( container=self.azure_blob_content_storage_container, diff --git a/functions/shared_code/utilities_helper.py b/functions/shared_code/utilities_helper.py index 2958697c1..e3e6551f4 100644 --- a/functions/shared_code/utilities_helper.py +++ b/functions/shared_code/utilities_helper.py @@ -5,18 +5,18 @@ import logging import urllib.parse from datetime import datetime, timedelta -from azure.storage.blob import generate_blob_sas, BlobSasPermissions +from azure.storage.blob import generate_blob_sas, BlobSasPermissions, BlobServiceClient class UtilitiesHelper: """ Helper class for utility functions""" def __init__(self, azure_blob_storage_account, azure_blob_storage_endpoint, - azure_blob_storage_key + credential ): self.azure_blob_storage_account = azure_blob_storage_account self.azure_blob_storage_endpoint = azure_blob_storage_endpoint - self.azure_blob_storage_key = azure_blob_storage_key + self.blob_service_client = BlobServiceClient(azure_blob_storage_endpoint, credential=credential) def get_filename_and_extension(self, path): """ Function to return the file name & type""" @@ -40,12 +40,15 @@ def get_blob_and_sas(self, blob_path): container_name = separator.join( blob_path.split(separator)[0:1]) + # Obtain the user delegation key + user_delegation_key = self.blob_service_client.get_user_delegation_key(key_start_time=datetime.utcnow(), key_expiry_time=datetime.utcnow() + timedelta(hours=12)) + # Gen SAS token sas_token = generate_blob_sas( account_name=self.azure_blob_storage_account, container_name=container_name, blob_name=file_path_w_name_no_cont, - account_key=self.azure_blob_storage_key, + user_delegation_key=user_delegation_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1) ) diff --git a/infra/arm_templates/kv_secret/kv_secret.template.json b/infra/arm_templates/kv_secret/kv_secret.template.json index c27d690cc..bb204c703 100644 --- a/infra/arm_templates/kv_secret/kv_secret.template.json +++ b/infra/arm_templates/kv_secret/kv_secret.template.json @@ -16,6 +16,9 @@ }, "expiration": { "type": "string" + }, + "contentType": { + "type": "string" } }, "resources": [ @@ -29,7 +32,8 @@ "attributes": { "enabled": true, "exp": "[parameters('expiration')]" - } + }, + "contentType": "[parameters('contentType')]" } } ] diff --git a/infra/arm_templates/video_indexer/avi.template.json b/infra/arm_templates/video_indexer/avi.template.json deleted file mode 100644 index 251401870..000000000 --- a/infra/arm_templates/video_indexer/avi.template.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "$schema": "${arm_template_schema_mgmt_api}/schemas/2019-04-01/deploymentTemplate.json#", - "contentVersion": "1.0.0.0", - "parameters": { - "name": { - "type": "string", - "metadata" : { - "description" : "The name of the AVAM resource" - }, - "defaultValue": "avam-account" - }, - "managedIdentityId": { - "type": "string", - "metadata" : { - "description" : "The managed identity used to grant access to the Azure Storage account" - }, - "defaultValue": "" - }, - "storageServicesResourceId": { - "type": "string", - "metadata" : { - "description" : "The Storage Account Id for storing uploaded videos. The Account needs to be created prior to the creation of this template" - }, - "defaultValue" : "" - }, - "tags": { - "type": "object", - "metadata" : { - "description" : "Arm Template Tags" - }, - "defaultValue": {} - }, - "apiVersion": { - "type": "string", - "metadata": { - "description": "Video indexer api version" - } - } - }, - "resources": [ - { - "type": "Microsoft.VideoIndexer/accounts", - "apiVersion": "[parameters('apiVersion')]", - "name": "[parameters('name')]", - - "location": "[resourceGroup().location]", - "tags": "[parameters('tags')]", - "identity": { - "type": "UserAssigned", - "userAssignedIdentities": { - "[parameters('managedIdentityId')]": {} - } - }, - "properties": { - "storageServices": { - "resourceId": "[parameters('storageServicesResourceId')]", - "userAssignedIdentity": "[parameters('managedIdentityId')]" - } - } - } - ], - "outputs": { - "avam_id": { - "value": "[reference(resourceId('Microsoft.VideoIndexer/accounts',parameters('name'))).accountId]", - "type": "string" - } - } -} \ No newline at end of file diff --git a/infra/core/ai/bingSearch/bingSearch.tf b/infra/core/ai/bingSearch/bingSearch.tf index 0ca882ddf..e980e1c87 100644 --- a/infra/core/ai/bingSearch/bingSearch.tf +++ b/infra/core/ai/bingSearch/bingSearch.tf @@ -35,4 +35,5 @@ module "bing_search_key" { alias = "bingkey" tags = var.tags kv_secret_expiration = var.kv_secret_expiration + contentType = "application/vnd.bag-StrongEncPasswordString" } \ No newline at end of file diff --git a/infra/core/ai/cogServices/cogServices.tf b/infra/core/ai/cogServices/cogServices.tf index 413918750..0214bb34b 100644 --- a/infra/core/ai/cogServices/cogServices.tf +++ b/infra/core/ai/cogServices/cogServices.tf @@ -7,10 +7,9 @@ resource "azurerm_cognitive_account" "cognitiveService" { tags = var.tags custom_subdomain_name = var.name public_network_access_enabled = var.is_secure_mode ? false : true - local_auth_enabled = var.is_secure_mode ? false : true } -module "search_service_key" { +module "cog_service_key" { source = "../../security/keyvaultSecret" arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api key_vault_name = var.key_vault_name @@ -20,6 +19,7 @@ module "search_service_key" { alias = "aisvckey" tags = var.tags kv_secret_expiration = var.kv_secret_expiration + contentType = "application/vnd.bag-StrongEncPasswordString" } data "azurerm_subnet" "subnet" { diff --git a/infra/core/ai/docintelligence/docintelligence.tf b/infra/core/ai/docintelligence/docintelligence.tf index 6e3caea19..a75bbc7f4 100644 --- a/infra/core/ai/docintelligence/docintelligence.tf +++ b/infra/core/ai/docintelligence/docintelligence.tf @@ -1,4 +1,4 @@ -resource "azurerm_cognitive_account" "formRecognizerAccount" { +resource "azurerm_cognitive_account" "docIntelligenceAccount" { name = var.name location = var.location resource_group_name = var.resourceGroupName @@ -8,18 +8,9 @@ resource "azurerm_cognitive_account" "formRecognizerAccount" { public_network_access_enabled = var.is_secure_mode ? false : true local_auth_enabled = var.is_secure_mode ? false : true tags = var.tags -} - -module "docIntelligenceKey" { - source = "../../security/keyvaultSecret" - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - resourceGroupName = var.resourceGroupName - key_vault_name = var.key_vault_name - secret_name = "AZURE-FORM-RECOGNIZER-KEY" - secret_value = azurerm_cognitive_account.formRecognizerAccount.primary_access_key - alias = "docintkey" - tags = var.tags - kv_secret_expiration = var.kv_secret_expiration + identity { + type = "SystemAssigned" + } } data "azurerm_subnet" "subnet" { @@ -29,7 +20,7 @@ data "azurerm_subnet" "subnet" { resource_group_name = var.resourceGroupName } -resource "azurerm_private_endpoint" "formPrivateEndpoint" { +resource "azurerm_private_endpoint" "docintPrivateEndpoint" { count = var.is_secure_mode ? 1 : 0 name = "${var.name}-private-endpoint" location = var.location @@ -40,7 +31,7 @@ resource "azurerm_private_endpoint" "formPrivateEndpoint" { private_service_connection { name = "cognitiveAccount" is_manual_connection = false - private_connection_resource_id = azurerm_cognitive_account.formRecognizerAccount.id + private_connection_resource_id = azurerm_cognitive_account.docIntelligenceAccount.id subresource_names = ["account"] } diff --git a/infra/core/ai/docintelligence/outputs.tf b/infra/core/ai/docintelligence/outputs.tf index f7994f894..8244d24d5 100644 --- a/infra/core/ai/docintelligence/outputs.tf +++ b/infra/core/ai/docintelligence/outputs.tf @@ -1,15 +1,19 @@ output "formRecognizerAccountName" { - value = azurerm_cognitive_account.formRecognizerAccount.name + value = azurerm_cognitive_account.docIntelligenceAccount.name } output "formRecognizerAccountEndpoint" { - value = azurerm_cognitive_account.formRecognizerAccount.endpoint + value = azurerm_cognitive_account.docIntelligenceAccount.endpoint } output "formRecognizerAccount" { - value = azurerm_cognitive_account.formRecognizerAccount.id + value = azurerm_cognitive_account.docIntelligenceAccount.id } output "formPrivateEndpoint" { - value = var.is_secure_mode ? azurerm_private_endpoint.formPrivateEndpoint[0].id : null + value = var.is_secure_mode ? azurerm_private_endpoint.docintPrivateEndpoint[0].id : null +} + +output "docIntelligenceIdentity" { + value = azurerm_cognitive_account.docIntelligenceAccount.identity[0].principal_id } \ No newline at end of file diff --git a/infra/core/ai/docintelligence/variables.tf b/infra/core/ai/docintelligence/variables.tf index f040b0e81..2c74f3820 100644 --- a/infra/core/ai/docintelligence/variables.tf +++ b/infra/core/ai/docintelligence/variables.tf @@ -58,9 +58,4 @@ variable "subnet_name" { variable "arm_template_schema_mgmt_api" { type = string -} - -variable "kv_secret_expiration" { - type = string - description = "The value for key vault secret expiration in seconds since 1970-01-01T00:00:00Z" } \ No newline at end of file diff --git a/infra/core/ai/openaiservices/openaiservices.tf b/infra/core/ai/openaiservices/openaiservices.tf index 2afa5dac7..2aec40e03 100644 --- a/infra/core/ai/openaiservices/openaiservices.tf +++ b/infra/core/ai/openaiservices/openaiservices.tf @@ -87,16 +87,4 @@ resource "azurerm_private_endpoint" "openaiPrivateEndpoint" { private_dns_zone_ids = var.private_dns_zone_ids } -} - -module "openaiServiceKeySecret" { - source = "../../security/keyvaultSecret" - key_vault_name = var.key_vault_name - secret_name = "AZURE-OPENAI-SERVICE-KEY" - secret_value = var.useExistingAOAIService ? var.openaiServiceKey : azurerm_cognitive_account.openaiAccount[0].primary_access_key - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - resourceGroupName = var.resourceGroupName - tags = var.tags - alias = "openaikey" - kv_secret_expiration = var.kv_secret_expiration } \ No newline at end of file diff --git a/infra/core/ai/openaiservices/variables.tf b/infra/core/ai/openaiservices/variables.tf index 68666cc3c..c1d110d33 100644 --- a/infra/core/ai/openaiservices/variables.tf +++ b/infra/core/ai/openaiservices/variables.tf @@ -45,11 +45,6 @@ variable "useExistingAOAIService" { default = false } -variable "openaiServiceKey" { - description = "The OpenAI service key" - type = string -} - variable "is_secure_mode" { type = bool default = false @@ -87,11 +82,6 @@ variable "arm_template_schema_mgmt_api" { type = string } -variable "kv_secret_expiration" { - type = string - description = "The value for key vault secret expiration in seconds since 1970-01-01T00:00:00Z" -} - variable "logAnalyticsWorkspaceResourceId" { type = string } \ No newline at end of file diff --git a/infra/core/db/cosmosdb.tf b/infra/core/db/cosmosdb.tf index b72d90ac5..c6fdc615a 100644 --- a/infra/core/db/cosmosdb.tf +++ b/infra/core/db/cosmosdb.tf @@ -47,6 +47,10 @@ resource "azurerm_cosmosdb_account" "cosmosdb_account" { location = var.location failover_priority = 0 } + + capabilities { + name = "EnableServerless" + } } resource "azurerm_cosmosdb_sql_database" "log_database" { @@ -61,23 +65,7 @@ resource "azurerm_cosmosdb_sql_container" "log_container" { account_name = azurerm_cosmosdb_account.cosmosdb_account.name database_name = azurerm_cosmosdb_sql_database.log_database.name - partition_key_path = "/file_name" - - autoscale_settings { - max_throughput = var.autoscaleMaxThroughput - } -} - -module "cosmos_db_key" { - source = "../security/keyvaultSecret" - resourceGroupName = var.resourceGroupName - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - key_vault_name = var.key_vault_name - secret_name = "COSMOSDB-KEY" - secret_value = azurerm_cosmosdb_account.cosmosdb_account.primary_key - alias = "cosmoskey" - tags = var.tags - kv_secret_expiration = var.kv_secret_expiration + partition_key_paths = ["/file_name"] } data "azurerm_subnet" "subnet" { diff --git a/infra/core/db/outputs.tf b/infra/core/db/outputs.tf index 08c26613b..4a421f562 100644 --- a/infra/core/db/outputs.tf +++ b/infra/core/db/outputs.tf @@ -12,4 +12,12 @@ output "CosmosDBLogContainerName" { output "privateEndpointId" { value = var.is_secure_mode ? azurerm_private_endpoint.cosmosPrivateEndpoint[0].id : null +} + +output "id" { + value = azurerm_cosmosdb_account.cosmosdb_account.id +} + +output "name" { + value = azurerm_cosmosdb_account.cosmosdb_account.name } \ No newline at end of file diff --git a/infra/core/db/variables.tf b/infra/core/db/variables.tf index 5b91a4c3b..5de215019 100644 --- a/infra/core/db/variables.tf +++ b/infra/core/db/variables.tf @@ -84,9 +84,4 @@ variable "subnet_name" { variable "arm_template_schema_mgmt_api" { type = string -} - -variable "kv_secret_expiration" { - type = string - description = "The value for key vault secret expiration in seconds since 1970-01-01T00:00:00Z" } \ No newline at end of file diff --git a/infra/core/host/enrichmentapp/enrichmentapp.tf b/infra/core/host/enrichmentapp/enrichmentapp.tf index 4e54f346a..3a68ee418 100644 --- a/infra/core/host/enrichmentapp/enrichmentapp.tf +++ b/infra/core/host/enrichmentapp/enrichmentapp.tf @@ -105,13 +105,6 @@ resource "azurerm_linux_web_app" "enrichmentapp" { "SCM_DO_BUILD_DURING_DEPLOYMENT" = lower(tostring(var.scmDoBuildDuringDeployment)) "ENABLE_ORYX_BUILD" = tostring(var.enableOryxBuild) "APPLICATIONINSIGHTS_CONNECTION_STRING" = var.applicationInsightsConnectionString - "AZURE_SEARCH_SERVICE_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-SEARCH-SERVICE-KEY)" - "COSMOSDB_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/COSMOSDB-KEY)" - "AZURE_AI_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-AI-KEY)" - "AZURE_BLOB_STORAGE_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-BLOB-STORAGE-KEY)" - "BLOB_CONNECTION_STRING" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/BLOB-CONNECTION-STRING)" - "AZURE_STORAGE_CONNECTION_STRING" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/BLOB-CONNECTION-STRING)" - "AZURE_OPENAI_SERVICE_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-OPENAI-SERVICE-KEY)" "KEY_EXPIRATION_DATE" = timeadd(timestamp(), "4320h") # Added expiration date setting for keys "WEBSITE_PULL_IMAGE_OVER_VNET" = var.is_secure_mode ? "true" : "false" "WEBSITES_PORT" = "6000" diff --git a/infra/core/host/functions/functions.tf b/infra/core/host/functions/functions.tf index 2b3e4af2a..65772604d 100644 --- a/infra/core/host/functions/functions.tf +++ b/infra/core/host/functions/functions.tf @@ -81,6 +81,7 @@ data "azurerm_storage_account" "existing_sa" { name = var.blobStorageAccountName resource_group_name = var.resourceGroupName } + // Create function app resource resource "azurerm_linux_function_app" "function_app" { name = var.name @@ -88,65 +89,74 @@ resource "azurerm_linux_function_app" "function_app" { resource_group_name = var.resourceGroupName service_plan_id = azurerm_service_plan.funcServicePlan.id storage_account_name = var.blobStorageAccountName - storage_account_access_key = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-BLOB-STORAGE-KEY)" + storage_account_access_key = data.azurerm_storage_account.existing_sa.primary_access_key + #storage_uses_managed_identity = true https_only = true tags = var.tags public_network_access_enabled = var.is_secure_mode ? false : true virtual_network_subnet_id = var.is_secure_mode ? var.subnetIntegration_id : null + content_share_force_disabled = true site_config { application_stack { docker { - image_name = "${var.container_registry}/functionapp" - image_tag = "latest" - registry_url = "https://${var.container_registry}" - registry_username = var.container_registry_admin_username - registry_password = var.container_registry_admin_password + image_name = "${var.container_registry}/functionapp" + image_tag = "latest" + registry_url = "https://${var.container_registry}" + registry_username = var.container_registry_admin_username + registry_password = var.container_registry_admin_password } } - container_registry_use_managed_identity = true - always_on = true - http2_enabled = true - ftps_state = var.is_secure_mode ? "Disabled" : var.ftpsState + container_registry_use_managed_identity = true + always_on = true + http2_enabled = true + ftps_state = "Disabled" cors { - allowed_origins = concat([var.azure_portal_domain, "https://ms.portal.azure.com"], var.allowedOrigins) + allowed_origins = concat([var.azure_portal_domain, "https://ms.portal.azure.com"], var.allowedOrigins) } - application_insights_connection_string = var.appInsightsConnectionString - application_insights_key = var.appInsightsInstrumentationKey + vnet_route_all_enabled = var.is_secure_mode ? true : false } - connection_string { - name = "BLOB_CONNECTION_STRING" - type = "Custom" - value = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/BLOB-CONNECTION-STRING)" + identity { + type = "SystemAssigned" } - + app_settings = { - WEBSITE_VNET_ROUTE_ALL = "1" - WEBSITE_CONTENTOVERVNET = var.is_secure_mode ? "1" : "0" + # Network realated settings for secure mode + WEBSITE_PULL_IMAGE_OVER_VNET = var.is_secure_mode ? "true" : "false" + SCM_DO_BUILD_DURING_DEPLOYMENT = "false" ENABLE_ORYX_BUILD = "false" - AzureWebJobsStorage = "DefaultEndpointsProtocol=https;AccountName=${var.blobStorageAccountName};EndpointSuffix=${var.endpointSuffix};AccountKey=${data.azurerm_storage_account.existing_sa.primary_access_key}" - WEBSITE_CONTENTAZUREFILECONNECTIONSTRING = "DefaultEndpointsProtocol=https;AccountName=${var.blobStorageAccountName};EndpointSuffix=${var.endpointSuffix};AccountKey=${data.azurerm_storage_account.existing_sa.primary_access_key}" - WEBSITE_CONTENTSHARE = "funcfileshare" + #Set all connections to use Managed Identity instead of connection strings + AzureWebJobsStorage = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-STORAGE-CONNECTION-STRING)" + # These will need to be enabled one Azure Functions has support for Managed Identity + #AzureWebJobsStorage__blobServiceUri = "https://${var.blobStorageAccountName}.blob.${var.endpointSuffix}" + #AzureWebJobsStorage__queueServiceUri = "https://${var.blobStorageAccountName}.queue.${var.endpointSuffix}" + #AzureWebJobsStorage__tableServiceUri = "https://${var.blobStorageAccountName}.table.${var.endpointSuffix}" + #AzureWebJobsSecretStorageKeyVaultUri = data.azurerm_key_vault.existing.vault_uri + #AzureWebJobsSecretStorageType = "keyvault" + + AzureStorageConnection1__blobServiceUri = "https://${var.blobStorageAccountName}.blob.${var.endpointSuffix}" + AzureStorageConnection1__queueServiceUri = "https://${var.blobStorageAccountName}.queue.${var.endpointSuffix}" + AzureStorageConnection1__tableServiceUri = "https://${var.blobStorageAccountName}.table.${var.endpointSuffix}" + FUNCTIONS_WORKER_RUNTIME = var.runtime FUNCTIONS_EXTENSION_VERSION = "~4" WEBSITE_NODE_DEFAULT_VERSION = "~14" APPLICATIONINSIGHTS_CONNECTION_STRING = var.appInsightsConnectionString APPINSIGHTS_INSTRUMENTATIONKEY = var.appInsightsInstrumentationKey + # Environment variables used by custom Python code BLOB_STORAGE_ACCOUNT = var.blobStorageAccountName BLOB_STORAGE_ACCOUNT_ENDPOINT = var.blobStorageAccountEndpoint BLOB_STORAGE_ACCOUNT_UPLOAD_CONTAINER_NAME = var.blobStorageAccountUploadContainerName BLOB_STORAGE_ACCOUNT_OUTPUT_CONTAINER_NAME = var.blobStorageAccountOutputContainerName BLOB_STORAGE_ACCOUNT_LOG_CONTAINER_NAME = var.blobStorageAccountLogContainerName - AZURE_BLOB_STORAGE_KEY = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-BLOB-STORAGE-KEY)" + AZURE_QUEUE_STORAGE_ENDPOINT = var.queueStorageAccountEndpoint CHUNK_TARGET_SIZE = var.chunkTargetSize TARGET_PAGES = var.targetPages FR_API_VERSION = var.formRecognizerApiVersion AZURE_FORM_RECOGNIZER_ENDPOINT = var.formRecognizerEndpoint - AZURE_FORM_RECOGNIZER_KEY = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-FORM-RECOGNIZER-KEY)" - BLOB_CONNECTION_STRING = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/BLOB-CONNECTION-STRING)" COSMOSDB_URL = var.CosmosDBEndpointURL COSMOSDB_LOG_DATABASE_NAME = var.CosmosDBLogDatabaseName COSMOSDB_LOG_CONTAINER_NAME = var.CosmosDBLogContainerName @@ -173,15 +183,11 @@ resource "azurerm_linux_function_app" "function_app" { ENRICHMENT_BACKOFF = var.enrichmentBackoff ENABLE_DEV_CODE = tostring(var.enableDevCode) EMBEDDINGS_QUEUE = var.EMBEDDINGS_QUEUE - AZURE_SEARCH_SERVICE_KEY = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-SEARCH-SERVICE-KEY)" - COSMOSDB_KEY = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/COSMOSDB-KEY)" AZURE_SEARCH_SERVICE_ENDPOINT = var.azureSearchServiceEndpoint AZURE_SEARCH_INDEX = var.azureSearchIndex - WEBSITE_PULL_IMAGE_OVER_VNET = var.is_secure_mode ? "true" : "false" - } - - identity { - type = "SystemAssigned" + AZURE_AI_CREDENTIAL_DOMAIN = var.azure_ai_credential_domain + AZURE_OPENAI_AUTHORITY_HOST = var.azure_environment + LOCAL_DEBUG = "false" } } @@ -231,7 +237,9 @@ resource "azurerm_key_vault_access_policy" "policy" { secret_permissions = [ "Get", - "List" + "List", + "Set", + "Delete" ] } diff --git a/infra/core/host/functions/outputs.tf b/infra/core/host/functions/outputs.tf index 0bc42fa88..0a287708c 100644 --- a/infra/core/host/functions/outputs.tf +++ b/infra/core/host/functions/outputs.tf @@ -2,19 +2,34 @@ output "function_app_name" { value = azurerm_linux_function_app.function_app.name } -output "function_app_identity_principal_id" { +output "name" { + value = azurerm_service_plan.funcServicePlan.name +} + +output "subnet_integration_id" { + value = var.subnetIntegration_id +} + +output "identityPrincipalId" { value = azurerm_linux_function_app.function_app.identity.0.principal_id } +output "AzureWebJobsStorage__accountName" { + value = var.blobStorageAccountName +} -# output "id" { -# value = azurerm_service_plan.funcServicePlan.id -# } +output "AzureWebJobsStorage__blobServiceUri" { + value = "https://${var.blobStorageAccountName}.blob.${var.endpointSuffix}" +} -output "name" { - value = azurerm_service_plan.funcServicePlan.name +output "STORAGE_CONNECTION_STRING__accountName" { + value = var.blobStorageAccountName } -output "subnet_integration_id" { - value = var.subnetIntegration_id -} \ No newline at end of file +output "STORAGE_CONNECTION_STRING__queueServiceUri" { + value = "https://${var.blobStorageAccountName}.queue.${var.endpointSuffix}" +} + +output "STORAGE_CONNECTION_STRING__blobServiceUri" { + value = "https://${var.blobStorageAccountName}.blob.${var.endpointSuffix}" +} \ No newline at end of file diff --git a/infra/core/host/functions/variables.tf b/infra/core/host/functions/variables.tf index 723deb773..38aa7904e 100644 --- a/infra/core/host/functions/variables.tf +++ b/infra/core/host/functions/variables.tf @@ -88,6 +88,11 @@ variable "blobStorageAccountLogContainerName" { type = string } +variable "queueStorageAccountEndpoint" { + description = "Azure Queue Storage Account Endpoint" + type = string +} + variable "chunkTargetSize" { description = "Chunk Target Size" type = string @@ -274,11 +279,6 @@ variable "managedIdentity" { default = false } -variable "ftpsState" { - type = string - default = "FtpsOnly" -} - variable "azure_portal_domain" { type = string } @@ -315,3 +315,7 @@ variable "logAnalyticsWorkspaceResourceId" { variable "azure_environment" { type = string } + +variable "azure_ai_credential_domain" { + type = string +} diff --git a/infra/core/host/webapp/variables.tf b/infra/core/host/webapp/variables.tf index 017e39d14..4ac219f1a 100644 --- a/infra/core/host/webapp/variables.tf +++ b/infra/core/host/webapp/variables.tf @@ -131,7 +131,7 @@ variable "vnet_name" { type = string } -variable "subnet_id" { +variable "subnet_name" { type = string } diff --git a/infra/core/host/webapp/webapp.tf b/infra/core/host/webapp/webapp.tf index 6b74dcebc..96b2cafa7 100644 --- a/infra/core/host/webapp/webapp.tf +++ b/infra/core/host/webapp/webapp.tf @@ -114,12 +114,7 @@ resource "azurerm_linux_web_app" "app_service" { "SCM_DO_BUILD_DURING_DEPLOYMENT" = lower(tostring(var.scmDoBuildDuringDeployment)) "ENABLE_ORYX_BUILD" = lower(tostring(var.enableOryxBuild)) "APPLICATIONINSIGHTS_CONNECTION_STRING" = var.applicationInsightsConnectionString - "AZURE_SEARCH_SERVICE_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-SEARCH-SERVICE-KEY)" - "COSMOSDB_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/COSMOSDB-KEY)" "BING_SEARCH_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/BINGSEARCH-KEY)" - "AZURE_BLOB_STORAGE_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-BLOB-STORAGE-KEY)" - "AZURE_AI_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-AI-KEY)" - "AZURE_OPENAI_SERVICE_KEY" = "@Microsoft.KeyVault(SecretUri=${var.keyVaultUri}secrets/AZURE-OPENAI-SERVICE-KEY)" "WEBSITE_PULL_IMAGE_OVER_VNET" = var.is_secure_mode ? "true" : "false" "WEBSITES_PORT" = "6000" "WEBSITES_CONTAINER_START_TIME_LIMIT" = "1600" @@ -250,12 +245,19 @@ resource "azurerm_key_vault_access_policy" "policy" { ] } +data "azurerm_subnet" "subnet" { + count = var.is_secure_mode ? 1 : 0 + name = var.subnet_name + virtual_network_name = var.vnet_name + resource_group_name = var.resourceGroupName +} + resource "azurerm_private_endpoint" "backendPrivateEndpoint" { count = var.is_secure_mode ? 1 : 0 name = "${var.name}-private-endpoint" location = var.location resource_group_name = var.resourceGroupName - subnet_id = var.subnet_id + subnet_id = data.azurerm_subnet.subnet[0].id tags = var.tags custom_network_interface_name = "infoasstwebnic" diff --git a/infra/core/network/network/output.tf b/infra/core/network/network/output.tf index 676617de2..159202654 100644 --- a/infra/core/network/network/output.tf +++ b/infra/core/network/network/output.tf @@ -71,6 +71,10 @@ output "snetIntegration_id" { value = data.azurerm_subnet.integration.id } +output "snetIntegration_name" { + value = data.azurerm_subnet.integration.name +} + output "snetSearch_name" { value = data.azurerm_subnet.aiSearch.name } diff --git a/infra/core/network/network/variables.tf b/infra/core/network/network/variables.tf index 3e88d047f..d3b8a35d2 100644 --- a/infra/core/network/network/variables.tf +++ b/infra/core/network/network/variables.tf @@ -70,10 +70,6 @@ variable "snetSearchServiceCIDR" { type = string } -variable "snetAzureVideoIndexerCIDR" { - type = string -} - variable "snetBingServiceCIDR" { type = string } diff --git a/infra/core/search/search-services.tf b/infra/core/search/search-services.tf index 1ffeec9a7..b0afd64dd 100644 --- a/infra/core/search/search-services.tf +++ b/infra/core/search/search-services.tf @@ -6,7 +6,7 @@ resource "azurerm_search_service" "search" { sku = var.sku["name"] tags = var.tags public_network_access_enabled = var.is_secure_mode ? false : true - local_authentication_enabled = var.is_secure_mode ? false : true + local_authentication_enabled = false replica_count = 1 partition_count = 1 semantic_search_sku = var.semanticSearch @@ -16,18 +16,6 @@ resource "azurerm_search_service" "search" { } } -module "search_service_key" { - source = "../security/keyvaultSecret" - key_vault_name = var.key_vault_name - resourceGroupName = var.resourceGroupName - secret_name = "AZURE-SEARCH-SERVICE-KEY" - secret_value = azurerm_search_service.search.primary_key - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - alias = "searchkey" - tags = var.tags - kv_secret_expiration = var.kv_secret_expiration -} - data "azurerm_subnet" "subnet" { count = var.is_secure_mode ? 1 : 0 name = var.subnet_name diff --git a/infra/core/search/variables.tf b/infra/core/search/variables.tf index 9b3ec3fa2..3e0cba828 100644 --- a/infra/core/search/variables.tf +++ b/infra/core/search/variables.tf @@ -62,9 +62,4 @@ variable "private_dns_zone_ids" { variable "arm_template_schema_mgmt_api" { type = string -} - -variable "kv_secret_expiration" { - type = string - description = "The value for key vault secret expiration in seconds since 1970-01-01T00:00:00Z" } \ No newline at end of file diff --git a/infra/core/security/keyvault/keyvault.tf b/infra/core/security/keyvault/keyvault.tf index 964d4ce79..87ace8edc 100644 --- a/infra/core/security/keyvault/keyvault.tf +++ b/infra/core/security/keyvault/keyvault.tf @@ -44,18 +44,6 @@ resource "azurerm_key_vault_access_policy" "infoasst" { ] } -module "spClientKeySecret" { - source = "../keyvaultSecret" - resourceGroupName = var.resourceGroupName - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - key_vault_name = azurerm_key_vault.kv.name - secret_name = "AZURE-CLIENT-SECRET" - secret_value = var.spClientSecret - tags = var.tags - alias = "clientsecret" - kv_secret_expiration = var.kv_secret_expiration -} - data "azurerm_subnet" "subnet" { count = var.is_secure_mode ? 1 : 0 name = var.subnet_name diff --git a/infra/core/security/keyvault/variables.tf b/infra/core/security/keyvault/variables.tf index 0ad56e79e..9671fc795 100644 --- a/infra/core/security/keyvault/variables.tf +++ b/infra/core/security/keyvault/variables.tf @@ -57,9 +57,4 @@ variable "azure_keyvault_domain" { variable "arm_template_schema_mgmt_api" { type = string -} - -variable "kv_secret_expiration" { - type = string - description = "The value for key vault secret expiration in seconds since 1970-01-01T00:00:00Z" } \ No newline at end of file diff --git a/infra/core/security/keyvaultSecret/keyvaultSecret.tf b/infra/core/security/keyvaultSecret/keyvaultSecret.tf index 2e5dcc803..eddb86a50 100644 --- a/infra/core/security/keyvaultSecret/keyvaultSecret.tf +++ b/infra/core/security/keyvaultSecret/keyvaultSecret.tf @@ -18,6 +18,7 @@ resource "azurerm_resource_group_template_deployment" "kv_secret" { "value" = { value = "${var.secret_value}" }, "tags" = { value = var.tags }, "expiration" = { value = var.kv_secret_expiration }, + "contentType" = { value = var.contentType }, }) template_content = data.template_file.workflow.template # The filemd5 forces this to run when the file is changed diff --git a/infra/core/security/keyvaultSecret/variables.tf b/infra/core/security/keyvaultSecret/variables.tf index fae0ad97c..cdf3bdc1e 100644 --- a/infra/core/security/keyvaultSecret/variables.tf +++ b/infra/core/security/keyvaultSecret/variables.tf @@ -31,4 +31,8 @@ variable "alias" { variable "kv_secret_expiration" { type = string description = "The value for key vault secret expiration in seconds since 1970-01-01T00:00:00Z" +} + +variable "contentType" { + type = string } \ No newline at end of file diff --git a/infra/core/storage/outputs.tf b/infra/core/storage/outputs.tf index 6fd1c7240..6788bc538 100644 --- a/infra/core/storage/outputs.tf +++ b/infra/core/storage/outputs.tf @@ -2,10 +2,14 @@ output "name" { value = azurerm_storage_account.storage.name } -output "primary_endpoints" { +output "primary_blob_endpoint" { value = azurerm_storage_account.storage.primary_blob_endpoint } +output "primary_queue_endpoint" { + value = azurerm_storage_account.storage.primary_queue_endpoint +} + output "storage_account_id" { value = azurerm_storage_account.storage.id } diff --git a/infra/core/storage/storage-account.tf b/infra/core/storage/storage-account.tf index 4d2db92c2..90fb7c4f4 100644 --- a/infra/core/storage/storage-account.tf +++ b/infra/core/storage/storage-account.tf @@ -15,7 +15,7 @@ resource "azurerm_storage_account" "storage" { enable_https_traffic_only = true public_network_access_enabled = var.is_secure_mode ? false : true allow_nested_items_to_be_public = false - shared_access_key_enabled = var.is_secure_mode ? false : true + shared_access_key_enabled = true #var.is_secure_mode ? false : true # This will need to be enabled once the Azure Functions can support Entra ID auth network_rules { default_action = var.is_secure_mode ? "Deny" : "Allow" @@ -183,6 +183,19 @@ resource "azurerm_resource_group_template_deployment" "queue" { deployment_mode = "Incremental" } +module "storage_connection_string" { + source = "../security/keyvaultSecret" + resourceGroupName = var.resourceGroupName + arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api + key_vault_name = var.key_vault_name + secret_name = "AZURE-STORAGE-CONNECTION-STRING" + secret_value = azurerm_storage_account.storage.primary_blob_connection_string + tags = var.tags + alias = "blobconnstring" + kv_secret_expiration = var.kv_secret_expiration + contentType = "application/vnd.ms-StorageConnectionString" +} + data "azurerm_subnet" "subnet" { count = var.is_secure_mode ? 1 : 0 name = var.subnet_name @@ -279,30 +292,6 @@ resource "azurerm_private_endpoint" "queuePrivateEndpoint" { } } -module "storage_connection_string" { - source = "../security/keyvaultSecret" - resourceGroupName = var.resourceGroupName - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - key_vault_name = var.key_vault_name - secret_name = "BLOB-CONNECTION-STRING" - secret_value = azurerm_storage_account.storage.primary_connection_string - tags = var.tags - alias = "blobConn" - kv_secret_expiration = var.kv_secret_expiration -} - -module "storage_key" { - source = "../security/keyvaultSecret" - resourceGroupName = var.resourceGroupName - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - key_vault_name = var.key_vault_name - secret_name = "AZURE-BLOB-STORAGE-KEY" - secret_value = azurerm_storage_account.storage.primary_access_key - tags = var.tags - alias = "blobkey" - kv_secret_expiration = var.kv_secret_expiration -} - // Only create the config blob if we are not in secure mode as SharePoint integration is not supported in secure mode resource "azurerm_storage_blob" "config" { depends_on = [ azurerm_resource_group_template_deployment.container ] diff --git a/infra/core/videoindexer/variables.tf b/infra/core/videoindexer/variables.tf deleted file mode 100644 index ffe18e438..000000000 --- a/infra/core/videoindexer/variables.tf +++ /dev/null @@ -1,29 +0,0 @@ -variable "resource_group_name" { - type = string -} - -variable "location" { - type = string -} - -variable "random_string" { - type = string -} - -variable "tags" {} - -variable "azuread_service_principal_object_id" { - type = string -} - -variable "subscription_id" { - type = string -} - -variable "arm_template_schema_mgmt_api" { - type = string -} - -variable "video_indexer_api_version" { - type = string -} diff --git a/infra/core/videoindexer/vi.tf b/infra/core/videoindexer/vi.tf deleted file mode 100644 index 3dde8e9de..000000000 --- a/infra/core/videoindexer/vi.tf +++ /dev/null @@ -1,73 +0,0 @@ -locals { - arm_file_path = "arm_templates/video_indexer/avi.template.json" -} - -# Create a media services instance -resource "azurerm_storage_account" "media_storage" { - location = var.location - resource_group_name = var.resource_group_name - tags = var.tags - - account_tier = "Standard" - account_replication_type = "LRS" - name = "infoasststoremedia${var.random_string}" - enable_https_traffic_only = true - allow_nested_items_to_be_public = false -} - -# Create the VI instance via ARM Template -data "template_file" "workflow" { - template = file(local.arm_file_path) - vars = { - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - } -} - -resource "azurerm_user_assigned_identity" "vi" { - resource_group_name = var.resource_group_name - location = var.location - name = "infoasst-ua-ident-${var.random_string}" -} - -resource "azurerm_role_assignment" "vi_storageaccount_mi_access" { - scope = azurerm_storage_account.media_storage.id - role_definition_name = "Storage Blob Data Contributor" - principal_id = azurerm_user_assigned_identity.vi.principal_id -} - -resource "azurerm_resource_group_template_deployment" "vi" { - depends_on = [azurerm_role_assignment.vi_storageaccount_mi_access] - resource_group_name = var.resource_group_name - parameters_content = jsonencode({ - "name" = { value = "infoasst-avi-${var.random_string}" }, - "managedIdentityId" = { value = azurerm_user_assigned_identity.vi.id }, - "storageServicesResourceId" = { value = azurerm_storage_account.media_storage.id }, - "tags" = { value = var.tags }, - "apiVersion" = { value = var.video_indexer_api_version } - }) - template_content = data.template_file.workflow.template - # The filemd5 forces this to run when the file is changed - # this ensures the keys are up-to-date - name = "avi-${filemd5(local.arm_file_path)}" - deployment_mode = "Incremental" -} - -output "account_id" { - value = jsondecode(azurerm_resource_group_template_deployment.vi.output_content).avam_id.value -} - -output "media_storage_account_name" { - value = azurerm_storage_account.media_storage.name -} - -output "media_storage_account_id" { - value = azurerm_storage_account.media_storage.id -} - -output "vi_name" { - value = "infoasst-avi-${var.random_string}" -} - -output "vi_id" { - value = "/subscriptions/${var.subscription_id}/resourceGroups/${var.resource_group_name}/providers/Microsoft.VideoIndexer/accounts/infoasst-avi-${var.random_string}" -} \ No newline at end of file diff --git a/infra/main.tf b/infra/main.tf index 3c3bea04d..b7682d2e4 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -2,9 +2,9 @@ locals { tags = { ProjectName = "Information Assistant", BuildNumber = var.buildNumber } azure_roles = jsondecode(file("${path.module}/azure_roles.json")) selected_roles = ["CognitiveServicesOpenAIUser", - "StorageBlobDataReader", - "StorageBlobDataContributor", - "SearchIndexDataReader", + "CognitiveServicesUser", + "StorageBlobDataOwner", + "StorageQueueDataContributor", "SearchIndexDataContributor"] } @@ -63,7 +63,6 @@ module "network" { snetEnrichmentCIDR = var.enrichment_app_CIDR snetIntegrationCIDR = var.integration_CIDR snetSearchServiceCIDR = var.search_service_CIDR - snetAzureVideoIndexerCIDR = var.azure_video_indexer_CIDR snetBingServiceCIDR = var.bing_service_CIDR snetAzureOpenAICIDR = var.azure_openAI_CIDR snetACRCIDR = var.acr_CIDR @@ -264,7 +263,6 @@ module "kvModule" { depends_on = [ module.entraObjects, module.privateDnsZoneKeyVault[0] ] azure_keyvault_domain = var.azure_keyvault_domain arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - kv_secret_expiration = var.kv_secret_expiration } module "enrichmentApp" { @@ -281,7 +279,7 @@ module "enrichmentApp" { kind = "linux" reserved = true resourceGroupName = azurerm_resource_group.rg.name - storageAccountId = "/subscriptions/${var.subscriptionId}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.Storage/storageAccounts/${module.storage.name}/services/queue/queues/${var.embeddingsQueue}" + storageAccountId = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.Storage/storageAccounts/${module.storage.name}/services/queue/queues/${var.embeddingsQueue}" scmDoBuildDuringDeployment = false enableOryxBuild = false managedIdentity = true @@ -310,7 +308,8 @@ module "enrichmentApp" { AZURE_BLOB_STORAGE_ACCOUNT = module.storage.name AZURE_BLOB_STORAGE_CONTAINER = var.contentContainerName AZURE_BLOB_STORAGE_UPLOAD_CONTAINER = var.uploadContainerName - AZURE_BLOB_STORAGE_ENDPOINT = module.storage.primary_endpoints + AZURE_BLOB_STORAGE_ENDPOINT = module.storage.primary_blob_endpoint + AZURE_QUEUE_STORAGE_ENDPOINT = module.storage.primary_queue_endpoint COSMOSDB_URL = module.cosmosdb.CosmosDBEndpointURL COSMOSDB_LOG_DATABASE_NAME = module.cosmosdb.CosmosDBLogDatabaseName COSMOSDB_LOG_CONTAINER_NAME = module.cosmosdb.CosmosDBLogContainerName @@ -324,6 +323,8 @@ module "enrichmentApp" { TARGET_EMBEDDINGS_MODEL = var.useAzureOpenAIEmbeddings ? "azure-openai_${var.azureOpenAIEmbeddingDeploymentName}" : var.sentenceTransformersModelName EMBEDDING_VECTOR_SIZE = var.useAzureOpenAIEmbeddings ? 1536 : var.sentenceTransformerEmbeddingVectorSize AZURE_SEARCH_SERVICE_ENDPOINT = module.searchServices.endpoint + AZURE_AI_CREDENTIAL_DOMAIN = var.azure_ai_private_link_domain + AZURE_OPENAI_AUTHORITY_HOST = var.azure_openai_authority_host } } @@ -353,9 +354,9 @@ module "webapp" { applicationInsightsConnectionString = module.logging.applicationInsightsConnectionString keyVaultUri = module.kvModule.keyVaultUri keyVaultName = module.kvModule.keyVaultName - tenantId = var.tenantId + tenantId = data.azurerm_client_config.current.tenant_id is_secure_mode = var.is_secure_mode - subnet_id = var.is_secure_mode ? module.network[0].snetApp_id : null + subnet_name = var.is_secure_mode ? module.network[0].snetApp_name : null vnet_name = var.is_secure_mode ? module.network[0].vnet_name : null snetIntegration_id = var.is_secure_mode ? module.network[0].snetIntegration_id : null private_dns_zone_ids = var.is_secure_mode ? [module.privateDnsZoneApp[0].privateDnsZoneResourceId] : null @@ -371,7 +372,7 @@ module "webapp" { appSettings = { APPLICATIONINSIGHTS_CONNECTION_STRING = module.logging.applicationInsightsConnectionString AZURE_BLOB_STORAGE_ACCOUNT = module.storage.name - AZURE_BLOB_STORAGE_ENDPOINT = module.storage.primary_endpoints + AZURE_BLOB_STORAGE_ENDPOINT = module.storage.primary_blob_endpoint AZURE_BLOB_STORAGE_CONTAINER = var.contentContainerName AZURE_BLOB_STORAGE_UPLOAD_CONTAINER = var.uploadContainerName AZURE_OPENAI_SERVICE = var.useExistingAOAIService ? var.azureOpenAIServiceName : module.openaiServices.name @@ -408,8 +409,8 @@ module "webapp" { ENABLE_UNGROUNDED_CHAT = var.enableUngroundedChat ENABLE_MATH_ASSISTANT = var.enableMathAssitant ENABLE_TABULAR_DATA_ASSISTANT = var.enableTabularDataAssistant - ENABLE_MULTIMEDIA = var.enableMultimedia MAX_CSV_FILE_SIZE = var.maxCsvFileSize + AZURE_AI_CREDENTIAL_DOMAIN = var.azure_ai_private_link_domain } aadClientId = module.entraObjects.azure_ad_web_app_client_id @@ -438,10 +439,11 @@ module "functions" { appInsightsConnectionString = module.logging.applicationInsightsConnectionString appInsightsInstrumentationKey = module.logging.applicationInsightsInstrumentationKey blobStorageAccountName = module.storage.name - blobStorageAccountEndpoint = module.storage.primary_endpoints + blobStorageAccountEndpoint = module.storage.primary_blob_endpoint blobStorageAccountOutputContainerName = var.contentContainerName blobStorageAccountUploadContainerName = var.uploadContainerName - blobStorageAccountLogContainerName = var.functionLogsContainerName + blobStorageAccountLogContainerName = var.functionLogsContainerName + queueStorageAccountEndpoint = module.storage.primary_queue_endpoint formRecognizerEndpoint = module.aiDocIntelligence.formRecognizerAccountEndpoint CosmosDBEndpointURL = module.cosmosdb.CosmosDBEndpointURL CosmosDBLogDatabaseName = module.cosmosdb.CosmosDBLogDatabaseName @@ -485,6 +487,7 @@ module "functions" { container_registry_admin_password = module.acr.admin_password container_registry_id = module.acr.acr_id azure_environment = var.azure_environment + azure_ai_credential_domain = var.azure_ai_private_link_domain } module "openaiServices" { @@ -493,7 +496,6 @@ module "openaiServices" { location = var.location tags = local.tags resourceGroupName = azurerm_resource_group.rg.name - openaiServiceKey = var.azureOpenAIServiceKey useExistingAOAIService = var.useExistingAOAIService is_secure_mode = var.is_secure_mode subnet_name = var.is_secure_mode ? module.network[0].snetAzureOpenAI_name : null @@ -502,7 +504,6 @@ module "openaiServices" { private_dns_zone_ids = var.is_secure_mode ? [module.privateDnsZoneAzureOpenAi[0].privateDnsZoneResourceId] : null arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api key_vault_name = module.kvModule.keyVaultName - kv_secret_expiration = var.kv_secret_expiration logAnalyticsWorkspaceResourceId = module.logging.logAnalyticsId deployments = [ @@ -544,7 +545,6 @@ module "aiDocIntelligence" { vnet_name = var.is_secure_mode ? module.network[0].vnet_name : null private_dns_zone_ids = var.is_secure_mode ? [module.privateDnsZoneAzureAi[0].privateDnsZoneResourceId] : null arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - kv_secret_expiration = var.kv_secret_expiration } module "cognitiveServices" { @@ -577,7 +577,6 @@ module "searchServices" { private_dns_zone_ids = var.is_secure_mode ? [module.privateDnsZoneSearchService[0].privateDnsZoneResourceId] : null arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api key_vault_name = module.kvModule.keyVaultName - kv_secret_expiration = var.kv_secret_expiration } module "cosmosdb" { @@ -594,7 +593,6 @@ module "cosmosdb" { vnet_name = var.is_secure_mode ? module.network[0].vnet_name : null private_dns_zone_ids = var.is_secure_mode ? [module.privateDnsZoneCosmosDb[0].privateDnsZoneResourceId] : null arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - kv_secret_expiration = var.kv_secret_expiration } module "acr"{ @@ -627,27 +625,13 @@ module "sharepoint" { ] } -// Video Indexer is not supported in secure mode -module "video_indexer" { - count = var.is_secure_mode ? 0 : var.enableMultimedia ? 1 : 0 - source = "./core/videoindexer" - location = azurerm_resource_group.rg.location - resource_group_name = azurerm_resource_group.rg.name - subscription_id = data.azurerm_client_config.current.subscription_id - random_string = random_string.random.result - tags = local.tags - azuread_service_principal_object_id = module.entraObjects.azure_ad_web_app_client_id - arm_template_schema_mgmt_api = var.arm_template_schema_mgmt_api - video_indexer_api_version = var.video_indexer_api_version -} - module "azMonitor" { source = "./core/logging/monitor" logAnalyticsName = module.logging.logAnalyticsName location = var.location logWorkbookName = "infoasst-lw-${random_string.random.result}" resourceGroupName = azurerm_resource_group.rg.name - componentResource = "/subscriptions/${var.subscriptionId}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.OperationalInsights/workspaces/${module.logging.logAnalyticsName}" + componentResource = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.OperationalInsights/workspaces/${module.logging.logAnalyticsName}" } // Bing Search is not supported in US Government or Secure Mode @@ -676,13 +660,21 @@ module "userRoles" { resourceGroupId = azurerm_resource_group.rg.id } +resource "azurerm_cosmosdb_sql_role_assignment" "user_cosmosdb_data_contributor" { + resource_group_name = azurerm_resource_group.rg.name + account_name = module.cosmosdb.name + role_definition_id = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.DocumentDB/databaseAccounts/${module.cosmosdb.name}/sqlRoleDefinitions/00000000-0000-0000-0000-000000000002" #Cosmos DB Built-in Data Contributor + principal_id = data.azurerm_client_config.current.object_id + scope = module.cosmosdb.id +} + data "azurerm_resource_group" "existing" { count = var.useExistingAOAIService ? 1 : 0 name = var.azureOpenAIResourceGroup } # # // SYSTEM IDENTITY ROLES -module "openAiRoleBackend" { +module "webApp_OpenAiRole" { source = "./core/security/role" scope = var.useExistingAOAIService ? data.azurerm_resource_group.existing[0].id : azurerm_resource_group.rg.id @@ -693,18 +685,84 @@ module "openAiRoleBackend" { resourceGroupId = azurerm_resource_group.rg.id } -module "storageRoleBackend" { +module "enrichmentApp_OpenAiRole" { + source = "./core/security/role" + + scope = var.useExistingAOAIService ? data.azurerm_resource_group.existing[0].id : azurerm_resource_group.rg.id + principalId = module.enrichmentApp.identityPrincipalId + roleDefinitionId = local.azure_roles.CognitiveServicesOpenAIUser + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "webApp_CognitiveServicesUser" { source = "./core/security/role" scope = azurerm_resource_group.rg.id principalId = module.webapp.identityPrincipalId - roleDefinitionId = local.azure_roles.StorageBlobDataReader + roleDefinitionId = local.azure_roles.CognitiveServicesUser + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "functionApp_CognitiveServicesUser" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.functions.identityPrincipalId + roleDefinitionId = local.azure_roles.CognitiveServicesUser + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "enrichmentApp_CognitiveServicesUser" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.enrichmentApp.identityPrincipalId + roleDefinitionId = local.azure_roles.CognitiveServicesUser + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "enrichmentApp_StorageQueueDataContributor" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.enrichmentApp.identityPrincipalId + roleDefinitionId = local.azure_roles.StorageQueueDataContributor + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "functionApp_StorageQueueDataContributor" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.functions.identityPrincipalId + roleDefinitionId = local.azure_roles.StorageQueueDataContributor + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "webApp_StorageBlobDataContributor" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.webapp.identityPrincipalId + roleDefinitionId = local.azure_roles.StorageBlobDataContributor principalType = "ServicePrincipal" subscriptionId = data.azurerm_client_config.current.subscription_id resourceGroupId = azurerm_resource_group.rg.id } -module "searchRoleBackend" { +module "webApp_SearchIndexDataReader" { source = "./core/security/role" scope = azurerm_resource_group.rg.id @@ -715,26 +773,93 @@ module "searchRoleBackend" { resourceGroupId = azurerm_resource_group.rg.id } -module "storageRoleFunc" { +module "functionApp_SearchIndexDataReader" { source = "./core/security/role" scope = azurerm_resource_group.rg.id - principalId = module.functions.function_app_identity_principal_id - roleDefinitionId = local.azure_roles.StorageBlobDataReader + principalId = module.functions.identityPrincipalId + roleDefinitionId = local.azure_roles.SearchIndexDataReader + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "encrichmentApp_SearchIndexDataReader" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.enrichmentApp.identityPrincipalId + roleDefinitionId = local.azure_roles.SearchIndexDataContributor + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "fuctionApp_StorageBlobDataOwner" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.functions.identityPrincipalId + roleDefinitionId = local.azure_roles.StorageBlobDataOwner + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "enrichmentApp_StorageBlobDataOwner" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.enrichmentApp.identityPrincipalId + roleDefinitionId = local.azure_roles.StorageBlobDataOwner + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id +} + +module "fuctionApp_StorageAccountContributor" { + source = "./core/security/role" + + scope = azurerm_resource_group.rg.id + principalId = module.functions.identityPrincipalId + roleDefinitionId = local.azure_roles.StorageAccountContributor principalType = "ServicePrincipal" subscriptionId = data.azurerm_client_config.current.subscription_id resourceGroupId = azurerm_resource_group.rg.id } -module "aviRoleBackend" { - source = "./core/security/role" - count = var.enableMultimedia ? 1 : 0 - scope = module.video_indexer[0].vi_id - principalId = module.webapp.identityPrincipalId - roleDefinitionId = local.azure_roles.Contributor - principalType = "ServicePrincipal" - subscriptionId = data.azurerm_client_config.current.subscription_id - resourceGroupId = azurerm_resource_group.rg.id +resource "azurerm_cosmosdb_sql_role_assignment" "webApp_cosmosdb_data_contributor" { + resource_group_name = azurerm_resource_group.rg.name + account_name = module.cosmosdb.name + role_definition_id = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.DocumentDB/databaseAccounts/${module.cosmosdb.name}/sqlRoleDefinitions/00000000-0000-0000-0000-000000000002" #Cosmos DB Built-in Data Contributor + principal_id = module.webapp.identityPrincipalId + scope = module.cosmosdb.id +} + +resource "azurerm_cosmosdb_sql_role_assignment" "functionApp_cosmosdb_data_contributor" { + resource_group_name = azurerm_resource_group.rg.name + account_name = module.cosmosdb.name + role_definition_id = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.DocumentDB/databaseAccounts/${module.cosmosdb.name}/sqlRoleDefinitions/00000000-0000-0000-0000-000000000002" #Cosmos DB Built-in Data Contributor + principal_id = module.functions.identityPrincipalId + scope = module.cosmosdb.id +} + +resource "azurerm_cosmosdb_sql_role_assignment" "enrichmentApp_cosmosdb_data_contributor" { + resource_group_name = azurerm_resource_group.rg.name + account_name = module.cosmosdb.name + role_definition_id = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/resourceGroups/${azurerm_resource_group.rg.name}/providers/Microsoft.DocumentDB/databaseAccounts/${module.cosmosdb.name}/sqlRoleDefinitions/00000000-0000-0000-0000-000000000002" #Cosmos DB Built-in Data Contributor + principal_id = module.enrichmentApp.identityPrincipalId + scope = module.cosmosdb.id +} + +module "docIntel_StorageBlobDataReader" { + source = "./core/security/role" + scope = azurerm_resource_group.rg.id + principalId = module.aiDocIntelligence.docIntelligenceIdentity + roleDefinitionId = local.azure_roles.StorageBlobDataReader + principalType = "ServicePrincipal" + subscriptionId = data.azurerm_client_config.current.subscription_id + resourceGroupId = azurerm_resource_group.rg.id } # // MANAGEMENT SERVICE PRINCIPAL ROLES diff --git a/infra/outputs.tf b/infra/outputs.tf index 63c2e322d..627fc18b3 100644 --- a/infra/outputs.tf +++ b/infra/outputs.tf @@ -23,10 +23,6 @@ output "AZURE_STORAGE_ACCOUNT" { value = module.storage.name } -output "AZURE_STORAGE_ACCOUNT_ENDPOINT" { - value = module.storage.primary_endpoints -} - output "AZURE_STORAGE_CONTAINER" { value = var.contentContainerName } @@ -116,11 +112,15 @@ output "ENABLE_DEV_CODE" { } output "AZURE_SUBSCRIPTION_ID" { - value = var.subscriptionId + value = data.azurerm_client_config.current.subscription_id } output "BLOB_STORAGE_ACCOUNT_ENDPOINT" { - value = module.storage.primary_endpoints + value = module.storage.primary_blob_endpoint +} + +output "AZURE_QUEUE_STORAGE_ENDPOINT" { + value = module.storage.primary_queue_endpoint } output "EMBEDDING_VECTOR_SIZE" { @@ -151,7 +151,7 @@ output "ENRICHMENT_APPSERVICE_URL" { value = module.enrichmentApp.uri } -output "DEPLOYMENT_KEYVAULT_NAME" { +output "AZURE_KEYVAULT_NAME" { value = module.kvModule.keyVaultName } @@ -202,4 +202,32 @@ output "CONTAINER_REGISTRY_PASSWORD" { output "DNS_PRIVATE_RESOLVER_IP" { value = var.is_secure_mode ? module.network[0].dns_private_resolver_ip : "" +} + +output "AZURE_AI_CREDENTIAL_DOMAIN" { + value = var.azure_ai_private_link_domain +} + +output "FUNC_AzureWebJobsStorage__accountName" { + value = module.functions.AzureWebJobsStorage__accountName +} + +output "FUNC_AzureWebJobsStorage__blobServiceUri" { + value = module.functions.AzureWebJobsStorage__blobServiceUri +} + +output "FUNC_STORAGE_CONNECTION_STRING__accountName" { + value = module.functions.STORAGE_CONNECTION_STRING__accountName +} + +output "FUNC_STORAGE_CONNECTION_STRING__queueServiceUri" { + value = module.functions.STORAGE_CONNECTION_STRING__queueServiceUri +} + +output "FUNC_STORAGE_CONNECTION_STRING__blobServiceUri" { + value = module.functions.STORAGE_CONNECTION_STRING__blobServiceUri +} + +output "AZURE_OPENAI_AUTHORITY_HOST" { + value = var.azure_openai_authority_host } \ No newline at end of file diff --git a/infra/providers.tf b/infra/providers.tf index b2923ae5e..5b367f3e8 100644 --- a/infra/providers.tf +++ b/infra/providers.tf @@ -3,7 +3,7 @@ terraform { required_providers { azurerm = { source = "hashicorp/azurerm" - version = "~> 3.93.0" + version = "~> 3.113.0" } azuread = { source = "hashicorp/azuread" diff --git a/infra/variables.tf b/infra/variables.tf index 1c54e999c..2b456d32c 100644 --- a/infra/variables.tf +++ b/infra/variables.tf @@ -13,16 +13,6 @@ variable "resourceGroupName" { default = "" } -variable "tenantId" { - type = string - default = "" -} - -variable "subscriptionId" { - type = string - default = "" -} - variable "buildNumber" { type = string default = "local" @@ -73,11 +63,6 @@ variable "enableSharePointConnector" { type = bool default = false } - -variable "enableMultimedia" { - type = bool - default = false -} //// //// Variables that can vary based on the Azure environment being targeted @@ -158,10 +143,6 @@ variable "azure_ai_document_intelligence_domain" { type = string } -variable "azure_ai_videoindexer_domain" { - type = string -} - variable "azure_bing_search_domain" { type = string } @@ -212,11 +193,6 @@ variable "azureOpenAIResourceGroup" { type = string } -variable "azureOpenAIServiceKey" { - type = string - sensitive = true -} - variable "openAIServiceName" { type = string default = "" @@ -479,11 +455,6 @@ variable "functionsAppSkuTier" { default = "Standard" } -variable "videoIndexerName" { - type = string - default = "" -} - variable "searchServicesName" { type = string default = "" diff --git a/pipelines/devcontainer-ci.env b/pipelines/devcontainer-ci.env index b6698038b..3edfdf142 100644 --- a/pipelines/devcontainer-ci.env +++ b/pipelines/devcontainer-ci.env @@ -13,7 +13,6 @@ AD_MGMTAPP_CLIENT_ID AD_MGMTAPP_CLIENT_SECRET AD_MGMT_SERVICE_PRINCIPAL_ID AZURE_OPENAI_SERVICE_NAME -AZURE_OPENAI_SERVICE_KEY AZURE_OPENAI_RESOURCE_GROUP AZURE_OPENAI_CHATGPT_DEPLOYMENT AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME diff --git a/pipelines/templates/make-command.yml b/pipelines/templates/make-command.yml index f02410f3d..143e4f44d 100644 --- a/pipelines/templates/make-command.yml +++ b/pipelines/templates/make-command.yml @@ -34,7 +34,6 @@ steps: ARM_SUBSCRIPTION_ID: $(SUBSCRIPTION_ID) AD_WEBAPP_CLIENT_ID: $(WEBAPP_CLIENT_ID) AZURE_OPENAI_SERVICE_NAME: $(AZURE_OPENAI_SERVICE_NAME) - AZURE_OPENAI_SERVICE_KEY: $(AZURE_OPENAI_SERVICE_KEY) AZURE_OPENAI_RESOURCE_GROUP: $(AZURE_OPENAI_RESOURCE_GROUP) AZURE_OPENAI_CHATGPT_DEPLOYMENT: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT) AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME: $(AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME) diff --git a/scripts/build.sh b/scripts/build.sh index ad4de017e..f1c62d6e0 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -31,14 +31,14 @@ npm run build # copy the shared_code files from functions to the webapp cd ../backend mkdir -p ./shared_code -cp ../../functions/shared_code/status_log.py ./shared_code -cp ../../functions/shared_code/__init__.py ./shared_code +cp -u ../../functions/shared_code/status_log.py ./shared_code +cp -u ../../functions/shared_code/__init__.py ./shared_code cd $DIR # zip the enrichment app content from app/enrichments to the .artifacts folders cd ${ENRICHMENT_ROOT_PATH} mkdir -p ./shared_code -cp ../../functions/shared_code/status_log.py ./shared_code -cp ../../functions/shared_code/utilities_helper.py ./shared_code +cp -u ../../functions/shared_code/status_log.py ./shared_code +cp -u ../../functions/shared_code/utilities_helper.py ./shared_code echo "Successfully prepared enrichment app code" echo -e "\n" \ No newline at end of file diff --git a/scripts/deploy-search-indexes.sh b/scripts/deploy-search-indexes.sh index 1bc912e42..945e00625 100755 --- a/scripts/deploy-search-indexes.sh +++ b/scripts/deploy-search-indexes.sh @@ -24,14 +24,13 @@ fi search_url="${AZURE_SEARCH_SERVICE_ENDPOINT}" -# Get the Search Admin Key -search_key=$(az search admin-key show --resource-group $RESOURCE_GROUP_NAME --service-name $AZURE_SEARCH_SERVICE --query primaryKey -o tsv) -export AZURE_SEARCH_ADMIN_KEY=$search_key +# Obtain an access token for Azure Search +access_token=$(az account get-access-token --resource $TF_VAR_azure_search_scope --query accessToken -o tsv) # Fetch existing index definition if it exists index_vector_json=$(cat ${DIR}/../azure_search/create_vector_index.json | envsubst | tr -d "\n" | tr -d "\r") index_vector_name=$(echo $index_vector_json | jq -r .name ) -existing_index=$(curl -s --header "api-key: $AZURE_SEARCH_ADMIN_KEY" $search_url/indexes/$index_vector_name?api-version=2024-05-01-preview) +existing_index=$(curl -s --header "Authorization: Bearer $access_token" $search_url/indexes/$index_vector_name?api-version=2024-05-01-preview) if [[ "$existing_index" != *"No index with the name"* ]]; then existing_dimensions=$(echo "$existing_index" | jq -r '.fields | map(select(.name == "contentVector")) | .[0].dimensions') @@ -46,7 +45,7 @@ if [[ "$existing_index" != *"No index with the name"* ]]; then exit 0 else echo "Deleting the existing index $existing_index_name..." - curl -X DELETE --header "api-key: $AZURE_SEARCH_ADMIN_KEY" $search_url/indexes/$existing_index_name?api-version=2024-05-01-preview + curl -X DELETE --header "Authorization: Bearer $access_token" $search_url/indexes/$existing_index_name?api-version=2024-05-01-preview echo "Index $index_vector_name deleted." fi fi @@ -54,7 +53,7 @@ fi # Create vector index echo "Creating index $index_vector_name ..." -curl -s -X PUT --header "Content-Type: application/json" --header "api-key: $AZURE_SEARCH_ADMIN_KEY" --data "$index_vector_json" $search_url/indexes/$index_vector_name?api-version=2024-05-01-preview +curl -s -X PUT --header "Content-Type: application/json" --header "Authorization: Bearer $access_token" --data "$index_vector_json" $search_url/indexes/$index_vector_name?api-version=2024-05-01-preview echo -e "\n" echo "Successfully deployed $index_vector_name." diff --git a/scripts/environments/AzureEnvironments/AzureCloud.env b/scripts/environments/AzureEnvironments/AzureCloud.env index 0cecbfc84..1c93db4c6 100644 --- a/scripts/environments/AzureEnvironments/AzureCloud.env +++ b/scripts/environments/AzureEnvironments/AzureCloud.env @@ -1,6 +1,7 @@ export TF_VAR_arm_template_schema_mgmt_api="https://schema.management.azure.com" export TF_VAR_azure_portal_domain="https://portal.azure.com" export TF_VAR_azure_search_domain="search.windows.net" +export TF_VAR_azure_search_scope="https://search.azure.com" export TF_VAR_use_semantic_reranker=true export TF_VAR_azure_storage_domain="core.windows.net" export TF_VAR_azure_openai_domain="openai.azure.com" @@ -9,7 +10,6 @@ export TF_VAR_azure_sts_issuer_domain="sts.windows.net" export TF_VAR_azure_websites_domain="azurewebsites.net" export TF_VAR_azure_access_token_domain="login.microsoftonline.com" export TF_VAR_azure_arm_management_api="https://management.azure.com" -export TF_VAR_azure_avam_domain="https://www.videoindexer.ai" export TF_VAR_azure_keyvault_domain="vaultcore.azure.net" export TF_VAR_cosmosdb_domain="documents.azure.com" export TF_VAR_azure_monitor_domain="monitor.azure.com" @@ -17,7 +17,6 @@ export TF_VAR_azure_monitor_oms_domain="oms.opinsights.azure.com" export TF_VAR_azure_monitor_ods_domain="ods.opinsights.azure.com" export TF_VAR_azure_automation_domain="azure-automation.net" export TF_VAR_azure_ai_document_intelligence_domain="cognitiveservices.azure.com" -export TF_VAR_azure_ai_videoindexer_domain="api.videoindexer.ai" export TF_VAR_azure_bing_search_domain="api.bing.microsoft.com" export TF_VAR_azure_ai_private_link_domain="cognitiveservices.azure.com" export TF_VAR_azure_acr_domain="azurecr.io" \ No newline at end of file diff --git a/scripts/environments/AzureEnvironments/AzureUSGovernment.env b/scripts/environments/AzureEnvironments/AzureUSGovernment.env index 07bb3067f..f55fd21d1 100644 --- a/scripts/environments/AzureEnvironments/AzureUSGovernment.env +++ b/scripts/environments/AzureEnvironments/AzureUSGovernment.env @@ -1,6 +1,7 @@ export TF_VAR_arm_template_schema_mgmt_api="https://schema.management.usgovcloudapi.net" export TF_VAR_azure_portal_domain="https://portal.azure.us" export TF_VAR_azure_search_domain="search.azure.us" +export TF_VAR_azure_search_scope="https://search.azure.us" export TF_VAR_use_semantic_reranker=false export TF_VAR_azure_storage_domain="core.usgovcloudapi.net" export TF_VAR_azure_openai_domain="openai.azure.us" @@ -9,7 +10,6 @@ export TF_VAR_azure_sts_issuer_domain="login.microsoftonline.us" export TF_VAR_azure_websites_domain="azurewebsites.us" export TF_VAR_azure_access_token_domain="login.microsoftonline.us" export TF_VAR_azure_arm_management_api="https://management.usgovcloudapi.net" -export TF_VAR_azure_avam_domain="https://videoindexer.ai.azure.us" export TF_VAR_azure_keyvault_domain="vaultcore.usgovcloudapi.net" export TF_VAR_cosmosdb_domain="documents.azure.us" export TF_VAR_azure_monitor_domain="monitor.azure.us" @@ -17,7 +17,6 @@ export TF_VAR_azure_monitor_oms_domain="oms.opinsights.azure.us" export TF_VAR_azure_monitor_ods_domain="ods.opinsights.azure.us" export TF_VAR_azure_automation_domain="azure-automation.us" export TF_VAR_azure_ai_document_intelligence_domain="cognitiveservices.azure.us" -export TF_VAR_azure_ai_videoindexer_domain="api.videoindexer.ai.azure.us" export TF_VAR_azure_bing_search_domain="" #blank as Bing Search in not available in Azure Government export TF_VAR_azure_ai_private_link_domain="cognitiveservices.azure.us" export TF_VAR_azure_acr_domain="azurecr.us" diff --git a/scripts/environments/local.env.example b/scripts/environments/local.env.example index f1c04277f..2aec77f83 100644 --- a/scripts/environments/local.env.example +++ b/scripts/environments/local.env.example @@ -5,8 +5,6 @@ # This is set by the Azure Pipeline for other environments. export LOCATION="westeurope" # Required export WORKSPACE="myworkspace" # Required -export SUBSCRIPTION_ID="" # Required -export TENANT_ID="" # Required # ---------------------------------------------------------- # The following values determine the features that are enabled in the deployment. @@ -59,7 +57,7 @@ export REQUIRE_WEBSITE_SECURITY_MEMBERSHIP=false # Required # with Microsoft's recommended guardrails for Azure Key Vault policy. We have NOT included automatic secret rotation in this deployment. See # https://learn.microsoft.com/en-us/azure/key-vault/keys/how-to-configure-key-rotation for more information on enabling cryptographic key auto-rotation. # The following setting will set the secret expiration to the current day plus the number of days specified. -export SECRET_EXPIRATION_DAYS=120 # Required +export SECRET_EXPIRATION_DAYS=730 # Required # Uncomment this if you want to avoid the "are you sure?" prompt when applying TF changes # export SKIP_PLAN_CHECK=1 @@ -70,7 +68,6 @@ export SECRET_EXPIRATION_DAYS=120 # Required export USE_EXISTING_AOAI=false # Required export AZURE_OPENAI_RESOURCE_GROUP="" export AZURE_OPENAI_SERVICE_NAME="" -export AZURE_OPENAI_SERVICE_KEY="" export AZURE_OPENAI_CHATGPT_DEPLOYMENT="" # Choose your preferred text embedding model from below options of closed source and open source models.: @@ -141,9 +138,6 @@ export CUSTOMER_USAGE_ATTRIBUTION_ID="7a01ff74-15c2-4fec-9f14-63db7d3d6131" # Leave application title blank for the default name export APPLICATION_TITLE="" -# Video Indexer API Version used in ARM deployment of Azure Video Indexer -export VIDEO_INDEXER_API_VERSION="2024-01-01" - # Enable capabilities under development. This should be set to false export ENABLE_DEV_CODE=false diff --git a/scripts/environments/shared-ia-dev.env b/scripts/environments/shared-ia-dev.env index b8595e90f..d03bfe5c2 100644 --- a/scripts/environments/shared-ia-dev.env +++ b/scripts/environments/shared-ia-dev.env @@ -42,10 +42,6 @@ export ENABLE_SHAREPOINT_CONNECTOR=true # The forward slash for at the beginning of the folder is required i.e. "/Shared Documents" # Specifying the root folder of "/Shared Documents" will crawl all your documents in your sharepoint site export SHAREPOINT_TO_SYNC='' -# Update to "true" if you want to deploy the solution with the ability to use the Multimedia feature. -# This feature will allow users to use the Azure Video Indexer feature to process video and audio files. -# Defaults to false, as this is a future feature. -export ENABLE_MULTIMEDIA=false # ---------------------------------------------------------- # End of feature flags # ---------------------------------------------------------- @@ -91,9 +87,6 @@ export CUSTOMER_USAGE_ATTRIBUTION_ID="" export ENABLE_DEV_CODE=false -# Video Indexer API Version used in ARM deployment of Azure Video Indexer -export VIDEO_INDEXER_API_VERSION="2024-01-01" - export PASSWORD_LIFETIME=365 export ENABLE_DDOS_PROTECTION_PLAN=false \ No newline at end of file diff --git a/scripts/environments/shared-ia.env b/scripts/environments/shared-ia.env index d94dc80a9..ed626265e 100644 --- a/scripts/environments/shared-ia.env +++ b/scripts/environments/shared-ia.env @@ -42,10 +42,6 @@ export ENABLE_SHAREPOINT_CONNECTOR=true # The forward slash for at the beginning of the folder is required i.e. "/Shared Documents" # Specifying the root folder of "/Shared Documents" will crawl all your documents in your sharepoint site export SHAREPOINT_TO_SYNC='' -# Update to "true" if you want to deploy the solution with the ability to use the Multimedia feature. -# This feature will allow users to use the Azure Video Indexer feature to process video and audio files. -# Defaults to false, as this is a future feature. -export ENABLE_MULTIMEDIA=false # ---------------------------------------------------------- # End of feature flags # ---------------------------------------------------------- @@ -91,9 +87,6 @@ export CUSTOMER_USAGE_ATTRIBUTION_ID="" export ENABLE_DEV_CODE=false -# Video Indexer API Version used in ARM deployment of Azure Video Indexer -export VIDEO_INDEXER_API_VERSION="2024-01-01" - export PASSWORD_LIFETIME=365 export ENABLE_DDOS_PROTECTION_PLAN=false \ No newline at end of file diff --git a/scripts/environments/tmp-ia.env b/scripts/environments/tmp-ia.env index ca32f8c6d..c3ce41fcb 100644 --- a/scripts/environments/tmp-ia.env +++ b/scripts/environments/tmp-ia.env @@ -42,10 +42,6 @@ export ENABLE_SHAREPOINT_CONNECTOR=true # The forward slash for at the beginning of the folder is required i.e. "/Shared Documents" # Specifying the root folder of "/Shared Documents" will crawl all your documents in your sharepoint site export SHAREPOINT_TO_SYNC='' -# Update to "true" if you want to deploy the solution with the ability to use the Multimedia feature. -# This feature will allow users to use the Azure Video Indexer feature to process video and audio files. -# Defaults to false, as this is a future feature. -export ENABLE_MULTIMEDIA=false # ---------------------------------------------------------- # End of feature flags # ---------------------------------------------------------- @@ -91,9 +87,6 @@ export CUSTOMER_USAGE_ATTRIBUTION_ID="" export ENABLE_DEV_CODE=false -# Video Indexer API Version used in ARM deployment of Azure Video Indexer -export VIDEO_INDEXER_API_VERSION="2024-01-01" - export PASSWORD_LIFETIME=365 export ENABLE_DDOS_PROTECTION_PLAN=false \ No newline at end of file diff --git a/scripts/environments/usgov-ia.env b/scripts/environments/usgov-ia.env index 68a4ef0b6..d6836c01a 100644 --- a/scripts/environments/usgov-ia.env +++ b/scripts/environments/usgov-ia.env @@ -42,10 +42,6 @@ export ENABLE_SHAREPOINT_CONNECTOR=true # The forward slash for at the beginning of the folder is required i.e. "/Shared Documents" # Specifying the root folder of "/Shared Documents" will crawl all your documents in your sharepoint site export SHAREPOINT_TO_SYNC='' -# Update to "true" if you want to deploy the solution with the ability to use the Multimedia feature. -# This feature will allow users to use the Azure Video Indexer feature to process video and audio files. -# Defaults to false, as this is a future feature. -export ENABLE_MULTIMEDIA=false # ---------------------------------------------------------- # End of feature flags # ---------------------------------------------------------- @@ -90,9 +86,6 @@ export CUSTOMER_USAGE_ATTRIBUTION_ID="" export ENABLE_DEV_CODE=false -# Video Indexer API Version used in ARM deployment of Azure Video Indexer -export VIDEO_INDEXER_API_VERSION="2024-01-01" - export PASSWORD_LIFETIME=365 export ENABLE_DDOS_PROTECTION_PLAN=false \ No newline at end of file diff --git a/scripts/extract-content.py b/scripts/extract-content.py index bbbbc8f73..6ad399f06 100755 --- a/scripts/extract-content.py +++ b/scripts/extract-content.py @@ -13,8 +13,7 @@ from azure.identity import DefaultAzureCredential from azure.keyvault.secrets import SecretClient from azure.search.documents import SearchClient -from azure.core.credentials import AzureKeyCredential -from azure.storage.blob import BlobServiceClient, ContainerClient +from azure.storage.blob import BlobServiceClient from azure.storage.blob import generate_container_sas, ContainerSasPermissions from datetime import datetime, timedelta import urllib.parse @@ -25,7 +24,6 @@ skip_upload_container = False skip_content_container = False - # Helper function for getting the appropriate Azure CLI Vault URL def get_keyvault_url(keyvault_name, resource_group=None): """ Return vault url @@ -161,25 +159,18 @@ def get_storage_account_endpoint(storage_account_name): old_cosmosdb_url = f'https://infoasst-cosmos-{old_random_text}.documents.azure.com:443/' old_cosmosdb_key = old_secret_client.get_secret('COSMOSDB-KEY').value old_search_endpoint = f'https://infoasst-search-{old_random_text}.search.windows.net' -old_blob_connection_string = old_secret_client.get_secret('BLOB-CONNECTION-STRING').value -old_search_key = old_secret_client.get_secret('AZURE-SEARCH-SERVICE-KEY').value old_azure_blob_storage_account = f"infoasststore{old_random_text}" -old_azure_blob_storage_key = old_secret_client.get_secret('AZURE-BLOB-STORAGE-KEY').value old_azure_blob_storage_endpoint = get_storage_account_endpoint(old_azure_blob_storage_account) -new_search_key = new_secret_client.get_secret('AZURE-SEARCH-SERVICE-KEY').value new_search_endpoint = f'https://infoasst-search-{new_random_text}.search.windows.net' new_cosmosdb_url = f'https://infoasst-cosmos-{new_random_text}.documents.azure.com:443/' -new_cosmosdb_key = new_secret_client.get_secret('COSMOSDB-KEY').value -new_blob_connection_string = new_secret_client.get_secret('BLOB-CONNECTION-STRING').value new_azure_blob_storage_account = f"infoasststore{new_random_text}" -new_azure_blob_storage_key = new_secret_client.get_secret('AZURE-BLOB-STORAGE-KEY').value new_azure_blob_storage_endpoint = get_storage_account_endpoint(new_azure_blob_storage_account) index_name = 'vector-index' -old_search_client = SearchClient(endpoint=old_search_endpoint, index_name=index_name, credential=AzureKeyCredential(old_search_key)) -new_search_client = SearchClient(endpoint=new_search_endpoint, index_name=index_name, credential=AzureKeyCredential(new_search_key)) +old_search_client = SearchClient(endpoint=old_search_endpoint, index_name=index_name, credential=credential) +new_search_client = SearchClient(endpoint=new_search_endpoint, index_name=index_name, credential=credential) error_guidance = 'If you re-run the process, you can skip sections that completed successfully by setting the corresponding skip flag to True. Read more details her' @@ -188,7 +179,7 @@ def get_storage_account_endpoint(storage_account_name): # Migrate Search if skip_search_index == False: print(f.renderText('Search Index')) - blob_service_client = BlobServiceClient.from_connection_string(old_blob_connection_string) + blob_service_client = BlobServiceClient(old_azure_blob_storage_endpoint, credential=credential) container_name = "content" container_client = blob_service_client.get_container_client(container_name) @@ -293,12 +284,12 @@ def get_storage_account_endpoint(storage_account_name): max_item_count = 1 # Get old status docs - old_cosmos_client = CosmosClient(old_cosmosdb_url, old_cosmosdb_key) + old_cosmos_client = CosmosClient(old_cosmosdb_url, old_cosmosdb_key, consistency_level='Session') old_status_database = old_cosmos_client.get_database_client('statusdb') old_status_container = old_status_database.get_container_client('statuscontainer') old_tags_database = old_cosmos_client.get_database_client('tagdb') old_tags_container = old_tags_database.get_container_client('tagcontainer') - new_cosmos_client = CosmosClient(new_cosmosdb_url, new_cosmosdb_key) + new_cosmos_client = CosmosClient(new_cosmosdb_url, DefaultAzureCredential(), consistency_level='Session') new_status_database = new_cosmos_client.get_database_client('statusdb') new_status_container = new_status_database.get_container_client('statuscontainer') @@ -377,9 +368,9 @@ def get_storage_account_endpoint(storage_account_name): download_and_install_azcopy() upload_container_error = "" container_name = "upload" - old_blob_service_client = BlobServiceClient.from_connection_string(old_blob_connection_string) + old_blob_service_client = BlobServiceClient(old_azure_blob_storage_endpoint, credential=credential) old_container_client = old_blob_service_client.get_container_client(container_name) - new_blob_service_client = BlobServiceClient.from_connection_string(new_blob_connection_string) + new_blob_service_client = BlobServiceClient(new_azure_blob_storage_endpoint, credential=credential) new_container_client = new_blob_service_client.get_container_client(container_name) file_count = 0 @@ -388,21 +379,27 @@ def get_storage_account_endpoint(storage_account_name): blobs_processed_count = 0 for blob in blob_list: + # Obtain the user delegation key + old_user_delegation_key = old_blob_service_client.get_user_delegation_key(key_start_time=datetime.utcnow(), key_expiry_time=datetime.utcnow() + timedelta(hours=12)) + # Generate SAS token for old blob old_sas_token = generate_container_sas( account_name=old_azure_blob_storage_account, container_name=container_name, - account_key=old_azure_blob_storage_key, + user_delegation_key=old_user_delegation_key, permission=ContainerSasPermissions(read=True, write=False, delete=False, list=True), # Adjust permissions as needed - expiry=datetime.utcnow() + timedelta(hours=12) + expiry=datetime.utcnow() + timedelta(hours=12) ) source_url = f"https://{old_azure_blob_storage_account}.blob.core.windows.net/{container_name}/{urllib.parse.quote(blob.name)}?{old_sas_token}" + # Obtain the user delegation key + new_user_delegation_key = new_blob_service_client.get_user_delegation_key(key_start_time=datetime.utcnow(), key_expiry_time=datetime.utcnow() + timedelta(hours=12)) + # Generate SAS token for new blob new_sas_token = generate_container_sas( account_name=new_azure_blob_storage_account, container_name=container_name, - account_key=new_azure_blob_storage_key, + user_delegation_key=new_user_delegation_key, permission=ContainerSasPermissions(read=True, write=True, delete=False, list=True), # Adjust permissions as needed expiry=datetime.utcnow() + timedelta(hours=12) ) @@ -434,9 +431,9 @@ def get_storage_account_endpoint(storage_account_name): print(f.renderText('Storage content container')) container_name = "content" content_container_error = "" - old_blob_service_client = BlobServiceClient.from_connection_string(old_blob_connection_string) + old_blob_service_client = BlobServiceClient(old_azure_blob_storage_endpoint, credential=credential) old_container_client = old_blob_service_client.get_container_client(container_name) - new_blob_service_client = BlobServiceClient.from_connection_string(new_blob_connection_string) + new_blob_service_client = BlobServiceClient(new_azure_blob_storage_endpoint, credential=credential) new_container_client = new_blob_service_client.get_container_client(container_name) chunks_processed_count = 0 diff --git a/scripts/functional-tests.sh b/scripts/functional-tests.sh index b01c06e62..c38b09cc9 100755 --- a/scripts/functional-tests.sh +++ b/scripts/functional-tests.sh @@ -29,10 +29,9 @@ BASE_PATH=$(realpath "$DIR/..") # Pipeline functional test python run_tests.py \ - --storage_account_connection_str "${BLOB_CONNECTION_STRING}" \ + --storage_account_url "${BLOB_STORAGE_ACCOUNT_ENDPOINT}" \ --search_service_endpoint "${AZURE_SEARCH_SERVICE_ENDPOINT}" \ --search_index "${AZURE_SEARCH_INDEX}" \ - --search_key "${AZURE_SEARCH_SERVICE_KEY}" \ --wait_time_seconds 60 \ --file_extensions "docx" "pdf" "html" "jpg" "png" "csv" "md" "pptx" "txt" "xlsx" "xml" diff --git a/scripts/inf-import-state.sh b/scripts/inf-import-state.sh index bb7127d51..552975224 100755 --- a/scripts/inf-import-state.sh +++ b/scripts/inf-import-state.sh @@ -335,21 +335,6 @@ module_path="module.entraObjects.azuread_service_principal.aad_mgmt_sp[0]" import_resource_if_needed $module_path "$sp_id" -# # Video Indexer -# echo -# figlet "Video Indexer" -# # Pelase note: we do not import vi state as a hotfix was pushed to main to not deploy vi due to -# # changes in the service in azure. -# name="infoasststoremedia$random_text" -# providers="/providers/Microsoft.Storage/storageAccounts/$name" -# module_path="module.video_indexer.azurerm_storage_account.media_storage" -# import_resource_if_needed $module_path "$resourceId$providers" -# name="infoasst-ua-ident-$random_text" -# providers="/providers/Microsoft.ManagedIdentity/userAssignedIdentities/$name" -# module_path="module.video_indexer.azurerm_user_assigned_identity.vi" -# import_resource_if_needed $module_path "$resourceId$providers" - - # Form Recognizer echo figlet "Form Recognizer" @@ -357,10 +342,6 @@ name="infoasst-fr-$random_text" providers="/providers/Microsoft.CognitiveServices/accounts/$name" module_path="module.formrecognizer.azurerm_cognitive_account.formRecognizerAccount" import_resource_if_needed "$module_path" "$resourceId$providers" -secret_id=$(get_secret "AZURE-FORM-RECOGNIZER-KEY") -module_path="module.formrecognizer.azurerm_key_vault_secret.docIntelligenceKey" -import_resource_if_needed "$module_path" "$secret_id" - # Cognitive Services echo @@ -373,7 +354,6 @@ secret_id=$(get_secret "AZURE-AI-KEY") module_path="module.cognitiveServices.azurerm_key_vault_secret.search_service_key" import_resource_if_needed "$module_path" "$secret_id" - # Key Vault echo figlet "Key Vault" @@ -381,9 +361,6 @@ keyVaultId="infoasst-kv-$random_text" providers="/providers/Microsoft.KeyVault/vaults/$keyVaultId" module_path="module.kvModule.azurerm_key_vault.kv" import_resource_if_needed "$module_path" "$resourceId$providers" -secret_id=$(get_secret "AZURE-CLIENT-SECRET") -module_path="module.kvModule.azurerm_key_vault_secret.spClientKeySecret" -import_resource_if_needed "$module_path" "$secret_id" current_user_id=$(az ad signed-in-user show --query id -o tsv) providers="/providers/Microsoft.KeyVault/vaults/$keyVaultId/objectId/$current_user_id" module_path="module.kvModule.azurerm_key_vault_access_policy.infoasst" @@ -492,14 +469,6 @@ import_resource_if_needed "$module_path[5]" "$url" url="https://$name.queue.core.windows.net/embeddings-queue" import_resource_if_needed "$module_path[6]" "$url" -secret_id=$(get_secret "BLOB-CONNECTION-STRING") -module_path="module.storage.azurerm_key_vault_secret.storage_connection_string" -import_resource_if_needed "$module_path" "$secret_id" -secret_id=$(get_secret "AZURE-BLOB-STORAGE-KEY") -module_path="module.storage.azurerm_key_vault_secret.storage_key" -import_resource_if_needed "$module_path" "$secret_id" - - # Cosmos DB echo figlet "Cosmos DB" @@ -513,9 +482,6 @@ import_resource_if_needed "$module_path" "$resourceId$providers" providers="/providers/Microsoft.DocumentDB/databaseAccounts/$name/sqlDatabases/statusdb/containers/statuscontainer" module_path="module.cosmosdb.azurerm_cosmosdb_sql_container.log_container" import_resource_if_needed "$module_path" "$resourceId$providers" -secret_id=$(get_secret "COSMOSDB-KEY") -module_path="module.cosmosdb.azurerm_key_vault_secret.cosmos_db_key" -import_resource_if_needed "$module_path" "$secret_id" # Search Service @@ -525,10 +491,6 @@ name="infoasst-search-$random_text" providers="/providers/Microsoft.Search/searchServices/$name" module_path="module.searchServices.azurerm_search_service.search" import_resource_if_needed "$module_path" "$resourceId$providers" -secret_id=$(get_secret "AZURE-SEARCH-SERVICE-KEY") -module_path="module.searchServices.azurerm_key_vault_secret.search_service_key" -import_resource_if_needed "$module_path" "$secret_id" - # Output log on imported services echo diff --git a/scripts/json-to-env.function.debug.sh b/scripts/json-to-env.function.debug.sh index 382a6c377..2c9d4af27 100755 --- a/scripts/json-to-env.function.debug.sh +++ b/scripts/json-to-env.function.debug.sh @@ -17,11 +17,11 @@ fi secrets="{" # Name of your Key Vault -keyVaultName=$(cat inf_output.json | jq -r .DEPLOYMENT_KEYVAULT_NAME.value) +keyVaultName=$(cat inf_output.json | jq -r .AZURE_KEYVAULT_NAME.value) # Names of your secrets -secretNames=("AZURE-SEARCH-SERVICE-KEY" "AZURE-BLOB-STORAGE-KEY" "BLOB-CONNECTION-STRING" "COSMOSDB-KEY" "AZURE-FORM-RECOGNIZER-KEY" "AZURE-AI-KEY") -azWebJobSecretName="BLOB-CONNECTION-STRING" +secretNames=("AZURE-AI-KEY" "AZURE-STORAGE-CONNECTION-STRING") +azWebJobSecretName="AZURE-STORAGE-CONNECTION-STRING" azWebJobVarName="AzureWebJobsStorage" # Retrieve and export each secret @@ -40,7 +40,7 @@ secrets+="}" secrets="${secrets%,}" jq -r --arg secrets "$secrets" ' - [ + [ { "path": "AZURE_STORAGE_ACCOUNT", "env_var": "BLOB_STORAGE_ACCOUNT" @@ -86,8 +86,24 @@ jq -r --arg secrets "$secrets" ' "env_var": "COSMOSDB_LOG_CONTAINER_NAME" }, { - "path": "AzureWebJobsStorage", - "env_var": "AzureWebJobsStorage" + "path": "FUNC_AzureWebJobsStorage__accountName", + "env_var": "AzureWebJobsStorage__accountName" + }, + { + "path": "FUNC_AzureWebJobsStorage__blobServiceUri", + "env_var": "AzureWebJobsStorage__blobServiceUri" + }, + { + "path": "FUNC_STORAGE_CONNECTION_STRING__accountName", + "env_var": "AzureStorageConnection1__accountName" + }, + { + "path": "FUNC_STORAGE_CONNECTION_STRING__queueServiceUri", + "env_var": "AzureStorageConnection1__queueServiceUri" + }, + { + "path": "FUNC_STORAGE_CONNECTION_STRING__blobServiceUri", + "env_var": "AzureStorageConnection1__blobServiceUri" }, { "path": "AZURE_AI_ENDPOINT", @@ -109,6 +125,10 @@ jq -r --arg secrets "$secrets" ' "path": "BLOB_STORAGE_ACCOUNT_ENDPOINT", "env_var": "BLOB_STORAGE_ACCOUNT_ENDPOINT" }, + { + "path": "AZURE_QUEUE_STORAGE_ENDPOINT", + "env_var": "AZURE_QUEUE_STORAGE_ENDPOINT" + }, { "path": "AZURE_LOCATION", "env_var": "AZURE_AI_LOCATION" @@ -121,13 +141,17 @@ jq -r --arg secrets "$secrets" ' "path": "AZURE_SEARCH_SERVICE_ENDPOINT", "env_var": "AZURE_SEARCH_SERVICE_ENDPOINT" }, - { - "path": "DEPLOYMENT_KEYVAULT_NAME", - "env_var": "DEPLOYMENT_KEYVAULT_NAME" - }, { "path": "AZURE_AI_LOCATION", "env_var": "AZURE_AI_LOCATION" + }, + { + "path": "AZURE_AI_CREDENTIAL_DOMAIN", + "env_var": "AZURE_AI_CREDENTIAL_DOMAIN" + }, + { + "path": "AZURE_OPENAI_AUTHORITY_HOST", + "env_var": "AZURE_OPENAI_AUTHORITY_HOST" } ] as $env_vars_to_extract @@ -148,7 +172,7 @@ jq -r --arg secrets "$secrets" ' | reduce .[] as $item ({}; .[$item.key] = $item.value) | - {"IsEncrypted": false, "Values": (. + {"FUNCTIONS_WORKER_RUNTIME": "python", + {"IsEncrypted": false, "Values": (. + {"FUNCTIONS_WORKER_RUNTIME": "python", "AzureWebJobs.parse_html_w_form_rec.Disabled": "true", "MAX_SECONDS_HIDE_ON_UPLOAD": "30", "MAX_SUBMIT_REQUEUE_COUNT": "10", @@ -168,6 +192,7 @@ jq -r --arg secrets "$secrets" ' "EMBEDDINGS_QUEUE": "embeddings-queue", "TEXT_ENRICHMENT_QUEUE": "text-enrichment-queue", "IMAGE_ENRICHMENT_QUEUE": "image-enrichment-queue", + "LOCAL_DEBUG": "true", } + ($secrets | fromjson) )} diff --git a/scripts/json-to-env.sh b/scripts/json-to-env.sh index c64037922..0778cab44 100755 --- a/scripts/json-to-env.sh +++ b/scripts/json-to-env.sh @@ -102,10 +102,6 @@ jq -r ' "path": "ENRICHMENT_APPSERVICE_NAME", "env_var": "ENRICHMENT_APPSERVICE_NAME" }, - { - "path": "DEPLOYMENT_KEYVAULT_NAME", - "env_var": "DEPLOYMENT_KEYVAULT_NAME" - }, { "path": "MAX_CSV_FILE_SIZE", "env_var": "MAX_CSV_FILE_SIZE" @@ -149,25 +145,4 @@ jq -r ' | .[] ' | sed "s/\"/'/g" # replace double quote with single quote to handle special chars - -if [ -n "${IN_AUTOMATION}" ]; then - if [ -n "${AZURE_ENVIRONMENT}" ] && [[ $AZURE_ENVIRONMENT == "AzureUSGovernment" ]]; then - az cloud set --name AZUReUSGovernment > /dev/null 2>&1 - fi - - az login --service-principal -u "$ARM_CLIENT_ID" -p "$ARM_CLIENT_SECRET" --tenant "$ARM_TENANT_ID" > /dev/null 2>&1 - az account set -s "$ARM_SUBSCRIPTION_ID" > /dev/null 2>&1 -fi - -# Name of your Key Vault -keyVaultName=$(cat inf_output.json | jq -r .DEPLOYMENT_KEYVAULT_NAME.value) -# Names of your secrets -secretNames=("AZURE-SEARCH-SERVICE-KEY" "AZURE-BLOB-STORAGE-KEY" "BLOB-CONNECTION-STRING" "COSMOSDB-KEY" "AZURE-OPENAI-SERVICE-KEY") - -# Retrieve and export each secret -for secretName in "${secretNames[@]}"; do - secretValue=$(az keyvault secret show --name $secretName --vault-name $keyVaultName --query value -o tsv) - envVarName=$(echo $secretName | tr '-' '_') - echo export $envVarName=\'$secretValue\' -done \ No newline at end of file diff --git a/scripts/json-to-env.webapp.debug.sh b/scripts/json-to-env.webapp.debug.sh index 9840862b8..bd6c3a89c 100755 --- a/scripts/json-to-env.webapp.debug.sh +++ b/scripts/json-to-env.webapp.debug.sh @@ -88,6 +88,10 @@ jq -r ' "path": "BLOB_STORAGE_ACCOUNT_ENDPOINT", "env_var": "AZURE_BLOB_STORAGE_ENDPOINT" }, + { + "path": "AZURE_QUEUE_STORAGE_ENDPOINT", + "env_var": "AZURE_QUEUE_STORAGE_ENDPOINT" + }, { "path": "TARGET_EMBEDDINGS_MODEL", "env_var": "TARGET_EMBEDDINGS_MODEL" @@ -112,10 +116,6 @@ jq -r ' "path": "ENRICHMENT_APPSERVICE_URL", "env_var": "ENRICHMENT_APPSERVICE_URL" }, - { - "path": "DEPLOYMENT_KEYVAULT_NAME", - "env_var": "DEPLOYMENT_KEYVAULT_NAME" - }, { "path": "AZURE_OPENAI_CHATGPT_MODEL_NAME", "env_var": "AZURE_OPENAI_CHATGPT_MODEL_NAME" @@ -151,6 +151,10 @@ jq -r ' { "path": "AZURE_AI_LOCATION", "env_var": "AZURE_AI_LOCATION" + }, + { + "path": "AZURE_AI_CREDENTIAL_DOMAIN", + "env_var": "AZURE_AI_CREDENTIAL_DOMAIN" } ] as $env_vars_to_extract @@ -186,7 +190,7 @@ jq -r ' echo "ENABLE_UNGROUNDED_CHAT=$ENABLE_UNGROUNDED_CHAT" echo "ENABLE_MATH_ASSISTANT=$ENABLE_MATH_ASSISTANT" echo "ENABLE_TABULAR_DATA_ASSISTANT=$ENABLE_TABULAR_DATA_ASSISTANT" - echo "ENABLE_MULTIMEDIA=$ENABLE_MULTIMEDIA" + echo "LOCAL_DEBUG=true" if [ -n "${IN_AUTOMATION}" ]; then if [ -n "${AZURE_ENVIRONMENT}" ] && [[ "$AZURE_ENVIRONMENT" == "AzureUSGovernment" ]]; then @@ -198,13 +202,13 @@ if [ -n "${IN_AUTOMATION}" ]; then fi # Name of your Key Vault -keyVaultName=$(cat inf_output.json | jq -r .DEPLOYMENT_KEYVAULT_NAME.value) +keyVaultName=$(cat inf_output.json | jq -r .AZURE_KEYVAULT_NAME.value) # Names of your secrets if [ -n "${SECURE_MODE}" ]; then - secretNames=("AZURE-SEARCH-SERVICE-KEY" "AZURE-BLOB-STORAGE-KEY" "BLOB-CONNECTION-STRING" "COSMOSDB-KEY" "AZURE-OPENAI-SERVICE-KEY" "AZURE-CLIENT-SECRET" "AZURE-AI-KEY") + secretNames=("AZURE-AI-KEY") else - secretNames=("AZURE-SEARCH-SERVICE-KEY" "AZURE-BLOB-STORAGE-KEY" "BLOB-CONNECTION-STRING" "COSMOSDB-KEY" "BINGSEARCH-KEY" "AZURE-OPENAI-SERVICE-KEY" "AZURE-CLIENT-SECRET" "AZURE-AI-KEY") + secretNames=("BINGSEARCH-KEY" "AZURE-AI-KEY") fi @@ -213,4 +217,4 @@ for secretName in "${secretNames[@]}"; do secretValue=$(az keyvault secret show --name $secretName --vault-name $keyVaultName --query value -o tsv) envVarName=$(echo $secretName | tr '-' '_') echo $envVarName=\'$secretValue\' -done \ No newline at end of file +done diff --git a/scripts/load-env.sh b/scripts/load-env.sh index d7db3bcb7..ddd2f2763 100755 --- a/scripts/load-env.sh +++ b/scripts/load-env.sh @@ -76,12 +76,6 @@ if [[ $SECURE_MODE == true && $USE_EXISTING_AOAI == true ]]; then exit 1 fi -if [[ $SECURE_MODE == true && $ENABLE_MULTIMEDIA == true ]]; then - echo -e "\n" - echo -e "Multimedia feature is not available in secure mode. Check your values for SECURE_MODE and ENABLE_MULTIMEDIA.\e[0m\n" - exit 1 -fi - #SharePoint if [[ $SECURE_MODE == true && $ENABLE_SHAREPOINT_CONNECTOR == true ]]; then echo -e "\n" diff --git a/scripts/merge-databases.py b/scripts/merge-databases.py index ba0b78f17..729607529 100755 --- a/scripts/merge-databases.py +++ b/scripts/merge-databases.py @@ -75,8 +75,8 @@ def get_keyvault_url(keyvault_name, resource_group=None): key_vault_name = f'infoasst-kv-{old_random_text}' key_vault_url = get_keyvault_url(key_vault_name) -sClient = SecretClient(vault_url=key_vault_url, credential=credential) -cosmosdb_key = sClient.get_secret('COSMOSDB-KEY') +sClient = SecretClient(vault_url=key_vault_url, credential=credential) +cosmosdb_key = sClient.get_secret('COSMOSDB-KEY') # ************************************************************************* @@ -84,7 +84,7 @@ def get_keyvault_url(keyvault_name, resource_group=None): # ************************************************************************* # Migrate Cosmos DB tags from the old tags container and database to the # status container and database as these have now been merged -client = CosmosClient(cosmosdb_url, cosmosdb_key.value) +client = CosmosClient(cosmosdb_url, cosmosdb_key.value, consistency_level='Session') try: # Get old status docs diff --git a/scripts/prepare-tf-variables.sh b/scripts/prepare-tf-variables.sh index 05e25b728..dc35493ad 100755 --- a/scripts/prepare-tf-variables.sh +++ b/scripts/prepare-tf-variables.sh @@ -8,12 +8,9 @@ ENV_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # To maintain backward compatibility, we need to convert some of the variables to TF_VAR_ format export TF_VAR_environmentName=$WORKSPACE export TF_VAR_location=$LOCATION -export TF_VAR_tenantId=$TENANT_ID -export TF_VAR_subscriptionId=$SUBSCRIPTION_ID export TF_VAR_useExistingAOAIService=$USE_EXISTING_AOAI export TF_VAR_azureOpenAIResourceGroup=$AZURE_OPENAI_RESOURCE_GROUP export TF_VAR_azureOpenAIServiceName=$AZURE_OPENAI_SERVICE_NAME -export TF_VAR_azureOpenAIServiceKey=$AZURE_OPENAI_SERVICE_KEY export TF_VAR_chatGptDeploymentName=$AZURE_OPENAI_CHATGPT_DEPLOYMENT export TF_VAR_chatGptModelName=$AZURE_OPENAI_CHATGPT_MODEL_NAME export TF_VAR_chatGptModelVersion=$AZURE_OPENAI_CHATGPT_MODEL_VERSION diff --git a/scripts/terraform-init.sh b/scripts/terraform-init.sh index d8b7859c7..bcc8f5376 100755 --- a/scripts/terraform-init.sh +++ b/scripts/terraform-init.sh @@ -39,24 +39,6 @@ function finish { } trap finish EXIT -# Default to user az cli if not set -# if [ -z $ARM_SUBSCRIPTION_ID ] || [ -z $ARM_TENANT_ID ]; -# then -# printf "$YELLOW\nCredentials for terraform not provided. Do you want to continue using your az login? (Y/n)$RESET\n" -# read answer -# if [[ "$answer" == "Y" ]]; -# then -# export ARM_SUBSCRIPTION_ID=$(az account show --query id --output tsv) -# export ARM_TENANT_ID=$(az account show --query tenantId --output tsv) - -# echo "Using subscription id: $ARM_SUBSCRIPTION_ID" -# echo "Using tenant id: $ARM_TENANT_ID" -# fi - -# fi - - - if [ -n "${IN_AUTOMATION}" ] then if [ -n "${AZURE_ENVIRONMENT}" ] && [[ "$AZURE_ENVIRONMENT" == "AzureUSGovernment" ]]; then diff --git a/scripts/tf-dependencies.json b/scripts/tf-dependencies.json index 092fee67a..a755030d8 100644 --- a/scripts/tf-dependencies.json +++ b/scripts/tf-dependencies.json @@ -685,7 +685,6 @@ "module.logging.azurerm_application_insights.applicationInsights", "module.logging.azurerm_log_analytics_workspace.logAnalytics", "module.searchServices.azurerm_search_service.search", - "module.storage.azurerm_key_vault_secret.storage_connection_string", "module.storage.azurerm_key_vault_secret.storage_key", "module.storage.azurerm_storage_account.storage", "module.storage.azurerm_storage_blob.config", @@ -725,7 +724,6 @@ "module.logging.azurerm_application_insights.applicationInsights", "module.logging.azurerm_log_analytics_workspace.logAnalytics", "module.searchServices.azurerm_search_service.search", - "module.storage.azurerm_key_vault_secret.storage_connection_string", "module.storage.azurerm_key_vault_secret.storage_key", "module.storage.azurerm_storage_account.storage", "module.storage.azurerm_storage_blob.config", @@ -759,7 +757,6 @@ "module.kvModule.azurerm_key_vault.kv", "module.kvModule.azurerm_key_vault_secret.spClientKeySecret", "module.kvModule.data.azurerm_client_config.current", - "module.storage.azurerm_key_vault_secret.storage_connection_string", "module.storage.azurerm_key_vault_secret.storage_key", "module.storage.azurerm_storage_account.storage", "module.storage.azurerm_storage_blob.config", @@ -792,7 +789,6 @@ "module.kvModule.azurerm_key_vault.kv", "module.kvModule.azurerm_key_vault_secret.spClientKeySecret", "module.kvModule.data.azurerm_client_config.current", - "module.storage.azurerm_key_vault_secret.storage_connection_string", "module.storage.azurerm_key_vault_secret.storage_key", "module.storage.azurerm_storage_account.storage", "module.storage.azurerm_storage_blob.config", @@ -1058,26 +1054,6 @@ } ] }, - { - "mode": "managed", - "type": "azurerm_key_vault_secret", - "name": "storage_connection_string", - "module": "module.storage", - "provider": "provider[\"registry.terraform.io/hashicorp/azurerm\"]", - "instances": [ - { - "dependencies": [ - "azurerm_resource_group.rg", - "data.azurerm_client_config.current", - "module.kvModule.azurerm_key_vault.kv", - "module.kvModule.data.azurerm_client_config.current", - "module.storage.azurerm_storage_account.storage", - "random_string.random" - ], - "index_key": null - } - ] - }, { "mode": "managed", "type": "azurerm_key_vault_secret", @@ -1321,7 +1297,6 @@ "module.logging.azurerm_application_insights.applicationInsights", "module.logging.azurerm_log_analytics_workspace.logAnalytics", "module.searchServices.azurerm_search_service.search", - "module.storage.azurerm_key_vault_secret.storage_connection_string", "module.storage.azurerm_key_vault_secret.storage_key", "module.storage.azurerm_storage_account.storage", "module.storage.azurerm_storage_blob.config", diff --git a/tests/debug_tests.py b/tests/debug_tests.py index 353b156b8..78b3b6ad7 100644 --- a/tests/debug_tests.py +++ b/tests/debug_tests.py @@ -4,17 +4,15 @@ import subprocess import os -BLOB_CONNECTION_STRING = os.environ.get("BLOB_CONNECTION_STRING") +STORAGE_ACCOUNT_URL = os.environ.get("AZURE_BLOB_STORAGE_ENDPOINT") AZURE_SEARCH_SERVICE_ENDPOINT = os.environ.get("AZURE_SEARCH_SERVICE_ENDPOINT") AZURE_SEARCH_INDEX = os.environ.get("AZURE_SEARCH_INDEX") -AZURE_SEARCH_SERVICE_KEY = os.environ.get("AZURE_SEARCH_SERVICE_KEY") ENRICHMENT_APPSERVICE_NAME = os.environ.get("ENRICHMENT_APPSERVICE_NAME") AZURE_WEBSITE_DOMAIN = os.environ.get("TF_VAR_azure_websites_domain") or "false" -subprocess.call(['python', 'run_tests.py', '--storage_account_connection_str', BLOB_CONNECTION_STRING, \ +subprocess.call(['python', 'run_tests.py', '--storage_account_url', STORAGE_ACCOUNT_URL, \ '--search_service_endpoint', AZURE_SEARCH_SERVICE_ENDPOINT, \ '--search_index', AZURE_SEARCH_INDEX, \ - '--search_key', AZURE_SEARCH_SERVICE_KEY, \ '--wait_time_seconds', '60', \ '--file_extensions', 'docx', 'pdf', 'html', 'jpg', 'png', 'csv', 'md', 'pptx', 'txt', 'xlsx', 'xml']) diff --git a/tests/run_tests.py b/tests/run_tests.py index e9e89c9b3..0c416a143 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -8,12 +8,13 @@ import base64 import os import time -from datetime import datetime, timedelta, timezone from rich.console import Console import rich.traceback from azure.storage.blob import BlobServiceClient +from azure.identity import DefaultAzureCredential from azure.search.documents import SearchClient -from azure.core.credentials import AzureKeyCredential + +azure_credential = DefaultAzureCredential() rich.traceback.install() console = Console() @@ -49,9 +50,9 @@ def parse_arguments(): """ parser = argparse.ArgumentParser() parser.add_argument( - "--storage_account_connection_str", + "--storage_account_url", required=True, - help="Storage account connection string (set in extract-env)") + help="Storage account endpoint string (set in extract-env)") parser.add_argument( "--search_service_endpoint", required=True, @@ -60,10 +61,6 @@ def parse_arguments(): "--search_index", required=True, help="Azure Search Index") - parser.add_argument( - "--search_key", - required=True, - help="Azure Search Key") parser.add_argument( "--wait_time_seconds", required=False, @@ -141,14 +138,13 @@ def main(blob_service_client, wait_time_seconds, test_file_names): raise ex # Check Search Index for specific content uploaded by test -def check_index(search_service_endpoint, search_index, search_key ): +def check_index(search_service_endpoint, search_index): """Function to check the index for specific content uploaded by the test""" try: - azure_search_key_credential = AzureKeyCredential(search_key) search_client = SearchClient( endpoint=search_service_endpoint, index_name=search_index, - credential=azure_search_key_credential, + credential=azure_credential, ) console.print("Begining index search") for extension, query in search_queries.items(): @@ -171,17 +167,16 @@ def check_index(search_service_endpoint, search_index, search_key ): console.log(f'[red]❌ {ex}[/red]') raise ex -def cleanup_after_test(blob_service_client, search_service_endpoint, search_index, search_key, test_file_names): +def cleanup_after_test(blob_service_client, search_service_endpoint, search_index, test_file_names): """Function to cleanup after tests""" console.print("Cleaning up after tests...") upload_container_client = blob_service_client.get_container_client(UPLOAD_CONTAINER_NAME) output_container_client = blob_service_client.get_container_client(OUTPUT_CONTAINER_NAME) - azure_search_key_credential = AzureKeyCredential(search_key) search_client = SearchClient( endpoint=search_service_endpoint, index_name=search_index, - credential=azure_search_key_credential, + credential=azure_credential, ) # Cleanup upload container @@ -226,16 +221,15 @@ def get_files_by_extension(folder_path, extensions): if __name__ == '__main__': args = parse_arguments() try: - storage_blob_service_client = BlobServiceClient.from_connection_string( - args.storage_account_connection_str) + storage_blob_service_client = BlobServiceClient( + args.storage_account_url, credential=azure_credential) # Get a list of files with specified extensions in the test_data folder test_file_names = get_files_by_extension(FILE_PATH, args.file_extensions) main(storage_blob_service_client, args.wait_time_seconds, test_file_names) - check_index(args.search_service_endpoint, args.search_index, args.search_key) + check_index(args.search_service_endpoint, args.search_index) finally: cleanup_after_test(storage_blob_service_client, args.search_service_endpoint, args.search_index, - args.search_key, test_file_names)