microsoft · ryonsteele · Dec 13, 2023 · Dec 14, 2023 · Dec 14, 2023 · Dec 15, 2023
diff --git a/app/enrichment/app.py b/app/enrichment/app.py
@@ -320,8 +320,7 @@ def poll_queue() -> None:
             i = 0
             for chunk in chunks:
 
-                statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}", State.INDEXING)
-                # statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}", State.PROCESSING 
+                statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}")
                 # open the file and extract the content
                 blob_path_plus_sas = utilities_helper.get_blob_and_sas(
                     ENV["AZURE_BLOB_STORAGE_CONTAINER"] + '/' + chunk.name)
@@ -403,7 +402,7 @@ def poll_queue() -> None:
                 backoff = random.randint(
                     int(ENV["EMBEDDING_REQUEUE_BACKOFF"]) * requeue_count, max_seconds)                
                 queue_client.send_message(message_string, visibility_timeout=backoff)
-                statusLog.upsert_document(blob_path, f'Message requeued to embeddings queue, attempt {str(requeue_count)}. Visible in {str(backoff)} seconds. Error: {str(error)}.',
+                statusLog.upsert_document(blob_path, f'Message requed to embeddings queue, attempt {str(requeue_count)}. Visible in {str(backoff)} seconds. Error: {str(error)}.',
                                           StatusClassification.ERROR,
                                           State.QUEUED)
             else:

diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
@@ -84,13 +84,10 @@ export type GetUploadStatusRequest = {
 export const enum FileState {
     All = "ALL",
     Processing = "PROCESSING",
-    Indexing = "INDEXING",
     Skipped = "SKIPPED",
     Queued = "QUEUED",
     Complete = "COMPLETE",
-    Error = "ERROR",
-    THROTTLED = "THROTTLED",
-    UPLOADED = "UPLOADED"
+    Error = "ERROR"
 }
 
 
@@ -138,7 +135,6 @@ export const enum StatusLogClassification {
 // shared code (functions/shared_code/status_log.py)
 export const enum StatusLogState {
     Processing = "Processing",
-    Indexing = "Indexing",
     Skipped = "Skipped",
     Queued = "Queued",
     Complete = "Complete",

diff --git a/app/frontend/src/components/FileStatus/DocumentsDetailList.tsx b/app/frontend/src/components/FileStatus/DocumentsDetailList.tsx
@@ -100,19 +100,19 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
             ariaLabel: 'Column operations for state, Press to sort by states',
             onColumnClick: onColumnClick,
             data: 'string',
-            // onRender: (item: IDocument) => (
-            //     <TooltipHost content={`${item.state_description} `}>
-            //         <span>{item.state}</span>
-            //     </TooltipHost>
-            // ),
+            onRender: (item: IDocument) => (
+                <TooltipHost content={`${item.state_description} `}>
+                    <span>{item.state}</span>
+                </TooltipHost>
+            ),
             isPadded: true,
         },
         {
             key: 'column4',
             name: 'Submitted On',
             fieldName: 'upload_timestamp',
-            minWidth: 90,
-            maxWidth: 120,
+            minWidth: 70,
+            maxWidth: 90,
             isResizable: true,
             isCollapsible: true,
             ariaLabel: 'Column operations for submitted on date, Press to sort by submitted date',
@@ -127,8 +127,8 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
             key: 'column5',
             name: 'Last Updated',
             fieldName: 'modified_timestamp',
-            minWidth: 90,
-            maxWidth: 120,
+            minWidth: 70,
+            maxWidth: 90,
             isResizable: true,
             isSorted: true,
             isSortedDescending: false,
@@ -142,18 +142,6 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
                 return <span>{item.modified_timestamp}</span>;
             },
         },
-        {
-            key: 'column6',
-            name: 'Status Detail',
-            fieldName: 'state_description',
-            minWidth: 90,
-            maxWidth: 200,
-            isResizable: true,
-            isCollapsible: true,
-            ariaLabel: 'Column operations for status detail',
-            data: 'string',
-            onColumnClick: onColumnClick
-        }
     ]);
 
     return (

diff --git a/app/frontend/src/components/FileStatus/FileStatus.tsx b/app/frontend/src/components/FileStatus/FileStatus.tsx
@@ -29,11 +29,8 @@ const dropdownFileStateOptions = [
     { key: FileState.Complete, text: 'Completed' },
     { key: FileState.Error, text: 'Error' },
     { key: FileState.Processing, text: 'Processing' },
-    { key: FileState.Indexing, text: 'Indexing' },
     { key: FileState.Queued, text: 'Queued' },
     { key: FileState.Skipped, text: 'Skipped'},
-    { key: FileState.UPLOADED, text: 'Uploaded'},
-    { key: FileState.THROTTLED, text: 'Throttled'},    
   ];
 
 interface Props {

diff --git a/docs/images/frontend-watch.png b/docs/images/frontend-watch.png
diff --git a/docs/images/vite-debug.png b/docs/images/vite-debug.png
diff --git a/docs/images/webapp-backend.png b/docs/images/webapp-backend.png
diff --git a/docs/knownissues.md b/docs/knownissues.md
@@ -101,33 +101,10 @@ InvalidApiSetId - The account type 'OpenAI' is either invalid or unavailable in
 ### Solution:
 Deploy Azure OpenAI Service only in the supported regions. Review the local.env file and update the location as per supported models and [region availability](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#model-summary-table-and-region-availability)
 
+
 ## Error: jq parse error: Expected value before ','
 
 If you see a jq parse error while doing deployments, it means one of the makefile scripts to extract environment variables is failing to find a value it expects to be there. The files related would be the main.parameters.json file which is the variables from bicep output from the infrastructure create. The other would be the env file used during build and deploy time
 
 ### Solution:
 To resolve carefully check your deployment .env file for any missing but required values. There are rare times when ARM has issues and output values are not written. In which case simply double check your configuration and rerun the ```make deploy``` and/or ```make extract-env``` command so that the bicep outputs can be written again
-
-## Error: Creation of new Media Service accounts are not allowed as the resource has been deprecated
-
-### Solution:
-Media Services is scheduled for 30th June 2024. This is the [guide](https://learn.microsoft.com/en-us/azure/media-services/latest/azure-media-services-retirement). On deeper investigation Video Indexer, which is the service we use that sits on top of Media Services, will switch away from this before the end date....
-
-```
-Is Azure Video Indexer being retired?
-No, Azure Video Indexer isn't part of the Media Services retirement. Although Video Indexer currently relies on a Media Services account as part of its workflow, this dependency will be eliminated before Media Services is retired on June 30, 2024. See the following for more [impact of Media Services retirement for Video Indexer](https://aka.ms/vi-ams-retirement-announcement)
-```
-
-As of today, Video Indexer still requires a Media Services service to be created, and so we can't remove it from bicep deployment. We will need to assess closer to the date if VI is working without the service and we can then remove the dependency.
-
-The error is interesting as it seems to indicate the media service cannot be created. This is not the case, it does work in regions where VI and Media Services are available. I have updated this to an enhancement and we will add a ticket to the board to action this when VI can be deployed without this supporting service.
-
-## Error: Token limit often exceeded with PDF files
-
-### Solution:
-
-The root of this is table processing. If a table is greater than our target token count for a chunk, this is not respected.Essentially tables are not chunked, but treated as units. We have added a task to our board to split tables by chunk size and repeat the table header rows in each chunk..
-
-When we switched to using unstructured.io for non-PDF documents, we were aware of the same issue there. They were planning on adding this feature. So, we need to make the change in our code, and follow up with unstructured to confirm if this has been fixed and update that path also.
-
-This issue has been updated to an enhancement.
diff --git a/docs/process_flow.drawio.png b/docs/process_flow.drawio.png
diff --git a/docs/webapp_debug.md b/docs/webapp_debug.md
@@ -6,23 +6,15 @@ The app consists of two layers, namely the frontend user interface components an
 
 To debug the webapp, both frontend and backend, first set breakpoints in your code under the frontend and/or backend. Select the 'Run & Debug' tab from the sidebar in VS Code. Select Python: Flask from the dropdown and hit run. This will initiate local debugging of the backend code.
 
-Next verify you have a virtual environment created, which should be seen as a folder called .venv under the root of your workspace. If this doesn't exists you can create one by following these steps:
+![backend debugging](/docs/images/webapp_debug_1.png)
 
-1. Opening the command palette (Ctrl+Shift+P)
-1. Select the command Python: Create Environment
-1. Next select Venv
-1. Now select the latest version of Python from the list
-1. Finally enter check marks next to all requirements.txt files listed and hit OK
+Next, you will need to initiate debugging of the frontend code. To do this select 'Vite: Debug' from the drop down and hit run.
 
-This will initiate frontend running and debugging. A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, your code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend  (flask or vite) from the debug dropdown.
-
-Now initiate debugging of the front end code by selecting 'Frontend: watch' and then hitting run
-![backend debugging](/docs/images/frontend-watch.png)
+![frontend debugging](/docs/images/webapp_debug_2.png)
 
-Finally hit Vite: Debug
-![backend debugging](/docs/images/vite-debug.png)
+This will initiate frontend running and debugging. A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, your code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend  (flask or vite) from the debug dropdown.
 
-A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, you code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend  (flask or vite) from the debug dropdown.
+![frontend debugging](/docs/images/webapp_debug_3.png)
 
 ## Known Issues
 

diff --git a/functions/TextEnrichment/__init__.py b/functions/TextEnrichment/__init__.py
@@ -227,7 +227,7 @@ def main(msg: func.QueueMessage) -> None:
 
         statusLog.upsert_document(
             blob_path,
-            f"{FUNCTION_NAME} - Text enrichment is complete, message sent to embeddings queue",
+            f"{FUNCTION_NAME} - Text enrichment is complete",
             StatusClassification.DEBUG,
             State.QUEUED,
         )

diff --git a/functions/shared_code/status_log.py b/functions/shared_code/status_log.py
@@ -13,7 +13,6 @@
 class State(Enum):
     """ Enum for state of a process """
     PROCESSING = "Processing"
-    INDEXING = "Indexing"
     SKIPPED = "Skipped"
     QUEUED = "Queued"
     COMPLETE = "Complete"
@@ -156,9 +155,6 @@ def upsert_document(self, document_path, status, status_classification: StatusCl
             if json_document['state'] != state.value:
                 json_document['state'] = state.value
                 json_document['state_timestamp'] = str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
-
-            # Update state description with latest status
-            json_document['state_description'] = status
 
             # Append a new item to the array
             status_updates = json_document["status_updates"]
@@ -180,7 +176,7 @@ def upsert_document(self, document_path, status, status_classification: StatusCl
                 "file_name": base_name,
                 "state": str(state.value),
                 "start_timestamp": str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
-                "state_description": status,
+                "state_description": "",
                 "state_timestamp": str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
                 "status_updates": [
                     {
@@ -198,7 +194,7 @@ def upsert_document(self, document_path, status, status_classification: StatusCl
                 "file_name": base_name,
                 "state": str(state.value),
                 "start_timestamp": str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
-                "state_description": status,
+                "state_description": "",
                 "state_timestamp": str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
                 "status_updates": [
                     {
@@ -212,34 +208,29 @@ def upsert_document(self, document_path, status, status_classification: StatusCl
 
         #self.container.upsert_item(body=json_document)
         self._log_document[document_id] = json_document
-
-
-    def update_document_state(self, document_path, status, state=State.PROCESSING):
+
+    def update_document_state(self, document_path, state_str):
         """Updates the state of the document in the storage"""
         try:
             document_id = self.encode_document_id(document_path)
             logging.info(f"{state_str} DocumentID - {document_id}")
-            logging.info(f"{status} DocumentID - {document_id}")
+            document_id = self.encode_document_id(document_path)
             if self._log_document.get(document_id, "") != "":
                 json_document = self._log_document[document_id]
-
-                json_document['state'] = state.value
-                json_document['state_description'] = status
+                json_document['state'] = state_str
                 json_document['state_timestamp'] = str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                 self.save_document(document_path)
                 self._log_document[document_id] = json_document
             else:
                 logging.warning(f"Document with ID {document_id} not found.")
         except Exception as err:
-            logging.error(f"An error occurred while updating the document state: {str(err)}") 
-
+            logging.error(f"An error occurred while updating the document state: {str(err)}")      
 
     def save_document(self, document_path):
         """Saves the document in the storage"""
         document_id = self.encode_document_id(document_path)
         self.container.upsert_item(body=self._log_document[document_id])
         self._log_document[document_id] = ""
-
 
     def get_stack_trace(self):
         """ Returns the stack trace of the current exception"""

diff --git a/scripts/inf-create.sh b/scripts/inf-create.sh
@@ -87,6 +87,7 @@ if [ -n "${IN_AUTOMATION}" ]; then
       echo "Please create the Azure AD objects using the script at /scripts/create-ad-objs-for-deployment.sh and set the AD_WEBAPP_CLIENT_ID pipeline variable in Azure DevOps."
       exit 1  
     fi
+    aadWebSPId=$ARM_SERVICE_PRINCIPAL_ID
     aadMgmtAppId=$AD_MGMTAPP_CLIENT_ID
     aadMgmtAppSecret=$AD_MGMTAPP_CLIENT_SECRET
     aadMgmtSPId=$AD_MGMT_SERVICE_PRINCIPAL_ID