diff --git a/app/enrichment/app.py b/app/enrichment/app.py
index 47969d6d2..d4f9ed297 100644
--- a/app/enrichment/app.py
+++ b/app/enrichment/app.py
@@ -320,8 +320,7 @@ def poll_queue() -> None:
i = 0
for chunk in chunks:
- statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}", State.INDEXING)
- # statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}", State.PROCESSING
+ statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}")
# open the file and extract the content
blob_path_plus_sas = utilities_helper.get_blob_and_sas(
ENV["AZURE_BLOB_STORAGE_CONTAINER"] + '/' + chunk.name)
@@ -403,7 +402,7 @@ def poll_queue() -> None:
backoff = random.randint(
int(ENV["EMBEDDING_REQUEUE_BACKOFF"]) * requeue_count, max_seconds)
queue_client.send_message(message_string, visibility_timeout=backoff)
- statusLog.upsert_document(blob_path, f'Message requeued to embeddings queue, attempt {str(requeue_count)}. Visible in {str(backoff)} seconds. Error: {str(error)}.',
+ statusLog.upsert_document(blob_path, f'Message requed to embeddings queue, attempt {str(requeue_count)}. Visible in {str(backoff)} seconds. Error: {str(error)}.',
StatusClassification.ERROR,
State.QUEUED)
else:
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index fea94c928..6db38afd7 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -84,13 +84,10 @@ export type GetUploadStatusRequest = {
export const enum FileState {
All = "ALL",
Processing = "PROCESSING",
- Indexing = "INDEXING",
Skipped = "SKIPPED",
Queued = "QUEUED",
Complete = "COMPLETE",
- Error = "ERROR",
- THROTTLED = "THROTTLED",
- UPLOADED = "UPLOADED"
+ Error = "ERROR"
}
@@ -138,7 +135,6 @@ export const enum StatusLogClassification {
// shared code (functions/shared_code/status_log.py)
export const enum StatusLogState {
Processing = "Processing",
- Indexing = "Indexing",
Skipped = "Skipped",
Queued = "Queued",
Complete = "Complete",
diff --git a/app/frontend/src/components/FileStatus/DocumentsDetailList.tsx b/app/frontend/src/components/FileStatus/DocumentsDetailList.tsx
index 984d0c6e7..2bbcbdd5e 100644
--- a/app/frontend/src/components/FileStatus/DocumentsDetailList.tsx
+++ b/app/frontend/src/components/FileStatus/DocumentsDetailList.tsx
@@ -100,19 +100,19 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
ariaLabel: 'Column operations for state, Press to sort by states',
onColumnClick: onColumnClick,
data: 'string',
- // onRender: (item: IDocument) => (
- //
- // {item.state}
- //
- // ),
+ onRender: (item: IDocument) => (
+
+ {item.state}
+
+ ),
isPadded: true,
},
{
key: 'column4',
name: 'Submitted On',
fieldName: 'upload_timestamp',
- minWidth: 90,
- maxWidth: 120,
+ minWidth: 70,
+ maxWidth: 90,
isResizable: true,
isCollapsible: true,
ariaLabel: 'Column operations for submitted on date, Press to sort by submitted date',
@@ -127,8 +127,8 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
key: 'column5',
name: 'Last Updated',
fieldName: 'modified_timestamp',
- minWidth: 90,
- maxWidth: 120,
+ minWidth: 70,
+ maxWidth: 90,
isResizable: true,
isSorted: true,
isSortedDescending: false,
@@ -142,18 +142,6 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
return {item.modified_timestamp};
},
},
- {
- key: 'column6',
- name: 'Status Detail',
- fieldName: 'state_description',
- minWidth: 90,
- maxWidth: 200,
- isResizable: true,
- isCollapsible: true,
- ariaLabel: 'Column operations for status detail',
- data: 'string',
- onColumnClick: onColumnClick
- }
]);
return (
diff --git a/app/frontend/src/components/FileStatus/FileStatus.tsx b/app/frontend/src/components/FileStatus/FileStatus.tsx
index 32173e7c7..055582af8 100644
--- a/app/frontend/src/components/FileStatus/FileStatus.tsx
+++ b/app/frontend/src/components/FileStatus/FileStatus.tsx
@@ -29,11 +29,8 @@ const dropdownFileStateOptions = [
{ key: FileState.Complete, text: 'Completed' },
{ key: FileState.Error, text: 'Error' },
{ key: FileState.Processing, text: 'Processing' },
- { key: FileState.Indexing, text: 'Indexing' },
{ key: FileState.Queued, text: 'Queued' },
{ key: FileState.Skipped, text: 'Skipped'},
- { key: FileState.UPLOADED, text: 'Uploaded'},
- { key: FileState.THROTTLED, text: 'Throttled'},
];
interface Props {
diff --git a/docs/images/frontend-watch.png b/docs/images/frontend-watch.png
deleted file mode 100644
index ba90d30a6..000000000
Binary files a/docs/images/frontend-watch.png and /dev/null differ
diff --git a/docs/images/vite-debug.png b/docs/images/vite-debug.png
deleted file mode 100644
index 30157b96d..000000000
Binary files a/docs/images/vite-debug.png and /dev/null differ
diff --git a/docs/images/webapp-backend.png b/docs/images/webapp-backend.png
deleted file mode 100644
index a31137758..000000000
Binary files a/docs/images/webapp-backend.png and /dev/null differ
diff --git a/docs/knownissues.md b/docs/knownissues.md
index 4a1e41ef0..0cbccc52b 100644
--- a/docs/knownissues.md
+++ b/docs/knownissues.md
@@ -101,33 +101,10 @@ InvalidApiSetId - The account type 'OpenAI' is either invalid or unavailable in
### Solution:
Deploy Azure OpenAI Service only in the supported regions. Review the local.env file and update the location as per supported models and [region availability](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#model-summary-table-and-region-availability)
+
## Error: jq parse error: Expected value before ','
If you see a jq parse error while doing deployments, it means one of the makefile scripts to extract environment variables is failing to find a value it expects to be there. The files related would be the main.parameters.json file which is the variables from bicep output from the infrastructure create. The other would be the env file used during build and deploy time
### Solution:
To resolve carefully check your deployment .env file for any missing but required values. There are rare times when ARM has issues and output values are not written. In which case simply double check your configuration and rerun the ```make deploy``` and/or ```make extract-env``` command so that the bicep outputs can be written again
-
-## Error: Creation of new Media Service accounts are not allowed as the resource has been deprecated
-
-### Solution:
-Media Services is scheduled for 30th June 2024. This is the [guide](https://learn.microsoft.com/en-us/azure/media-services/latest/azure-media-services-retirement). On deeper investigation Video Indexer, which is the service we use that sits on top of Media Services, will switch away from this before the end date....
-
-```
-Is Azure Video Indexer being retired?
-No, Azure Video Indexer isn't part of the Media Services retirement. Although Video Indexer currently relies on a Media Services account as part of its workflow, this dependency will be eliminated before Media Services is retired on June 30, 2024. See the following for more [impact of Media Services retirement for Video Indexer](https://aka.ms/vi-ams-retirement-announcement)
-```
-
-As of today, Video Indexer still requires a Media Services service to be created, and so we can't remove it from bicep deployment. We will need to assess closer to the date if VI is working without the service and we can then remove the dependency.
-
-The error is interesting as it seems to indicate the media service cannot be created. This is not the case, it does work in regions where VI and Media Services are available. I have updated this to an enhancement and we will add a ticket to the board to action this when VI can be deployed without this supporting service.
-
-## Error: Token limit often exceeded with PDF files
-
-### Solution:
-
-The root of this is table processing. If a table is greater than our target token count for a chunk, this is not respected.Essentially tables are not chunked, but treated as units. We have added a task to our board to split tables by chunk size and repeat the table header rows in each chunk..
-
-When we switched to using unstructured.io for non-PDF documents, we were aware of the same issue there. They were planning on adding this feature. So, we need to make the change in our code, and follow up with unstructured to confirm if this has been fixed and update that path also.
-
-This issue has been updated to an enhancement.
\ No newline at end of file
diff --git a/docs/process_flow.drawio.png b/docs/process_flow.drawio.png
index 08bd7d5ea..b69d6f7d2 100644
Binary files a/docs/process_flow.drawio.png and b/docs/process_flow.drawio.png differ
diff --git a/docs/webapp_debug.md b/docs/webapp_debug.md
index fc31d2d84..e661033a7 100644
--- a/docs/webapp_debug.md
+++ b/docs/webapp_debug.md
@@ -6,23 +6,15 @@ The app consists of two layers, namely the frontend user interface components an
To debug the webapp, both frontend and backend, first set breakpoints in your code under the frontend and/or backend. Select the 'Run & Debug' tab from the sidebar in VS Code. Select Python: Flask from the dropdown and hit run. This will initiate local debugging of the backend code.
-Next verify you have a virtual environment created, which should be seen as a folder called .venv under the root of your workspace. If this doesn't exists you can create one by following these steps:
+![backend debugging](/docs/images/webapp_debug_1.png)
-1. Opening the command palette (Ctrl+Shift+P)
-1. Select the command Python: Create Environment
-1. Next select Venv
-1. Now select the latest version of Python from the list
-1. Finally enter check marks next to all requirements.txt files listed and hit OK
+Next, you will need to initiate debugging of the frontend code. To do this select 'Vite: Debug' from the drop down and hit run.
-This will initiate frontend running and debugging. A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, your code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend (flask or vite) from the debug dropdown.
-
-Now initiate debugging of the front end code by selecting 'Frontend: watch' and then hitting run
-![backend debugging](/docs/images/frontend-watch.png)
+![frontend debugging](/docs/images/webapp_debug_2.png)
-Finally hit Vite: Debug
-![backend debugging](/docs/images/vite-debug.png)
+This will initiate frontend running and debugging. A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, your code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend (flask or vite) from the debug dropdown.
-A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, you code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend (flask or vite) from the debug dropdown.
+![frontend debugging](/docs/images/webapp_debug_3.png)
## Known Issues
diff --git a/functions/TextEnrichment/__init__.py b/functions/TextEnrichment/__init__.py
index 7f96246f4..e0df082e2 100644
--- a/functions/TextEnrichment/__init__.py
+++ b/functions/TextEnrichment/__init__.py
@@ -227,7 +227,7 @@ def main(msg: func.QueueMessage) -> None:
statusLog.upsert_document(
blob_path,
- f"{FUNCTION_NAME} - Text enrichment is complete, message sent to embeddings queue",
+ f"{FUNCTION_NAME} - Text enrichment is complete",
StatusClassification.DEBUG,
State.QUEUED,
)
diff --git a/functions/shared_code/status_log.py b/functions/shared_code/status_log.py
index 3adf87c78..2fcd47b16 100644
--- a/functions/shared_code/status_log.py
+++ b/functions/shared_code/status_log.py
@@ -13,7 +13,6 @@
class State(Enum):
""" Enum for state of a process """
PROCESSING = "Processing"
- INDEXING = "Indexing"
SKIPPED = "Skipped"
QUEUED = "Queued"
COMPLETE = "Complete"
@@ -156,9 +155,6 @@ def upsert_document(self, document_path, status, status_classification: StatusCl
if json_document['state'] != state.value:
json_document['state'] = state.value
json_document['state_timestamp'] = str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
-
- # Update state description with latest status
- json_document['state_description'] = status
# Append a new item to the array
status_updates = json_document["status_updates"]
@@ -180,7 +176,7 @@ def upsert_document(self, document_path, status, status_classification: StatusCl
"file_name": base_name,
"state": str(state.value),
"start_timestamp": str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
- "state_description": status,
+ "state_description": "",
"state_timestamp": str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
"status_updates": [
{
@@ -198,7 +194,7 @@ def upsert_document(self, document_path, status, status_classification: StatusCl
"file_name": base_name,
"state": str(state.value),
"start_timestamp": str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
- "state_description": status,
+ "state_description": "",
"state_timestamp": str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
"status_updates": [
{
@@ -212,34 +208,29 @@ def upsert_document(self, document_path, status, status_classification: StatusCl
#self.container.upsert_item(body=json_document)
self._log_document[document_id] = json_document
-
-
- def update_document_state(self, document_path, status, state=State.PROCESSING):
+
+ def update_document_state(self, document_path, state_str):
"""Updates the state of the document in the storage"""
try:
document_id = self.encode_document_id(document_path)
logging.info(f"{state_str} DocumentID - {document_id}")
- logging.info(f"{status} DocumentID - {document_id}")
+ document_id = self.encode_document_id(document_path)
if self._log_document.get(document_id, "") != "":
json_document = self._log_document[document_id]
-
- json_document['state'] = state.value
- json_document['state_description'] = status
+ json_document['state'] = state_str
json_document['state_timestamp'] = str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
self.save_document(document_path)
self._log_document[document_id] = json_document
else:
logging.warning(f"Document with ID {document_id} not found.")
except Exception as err:
- logging.error(f"An error occurred while updating the document state: {str(err)}")
-
+ logging.error(f"An error occurred while updating the document state: {str(err)}")
def save_document(self, document_path):
"""Saves the document in the storage"""
document_id = self.encode_document_id(document_path)
self.container.upsert_item(body=self._log_document[document_id])
self._log_document[document_id] = ""
-
def get_stack_trace(self):
""" Returns the stack trace of the current exception"""
diff --git a/scripts/inf-create.sh b/scripts/inf-create.sh
index 4793b102d..1cc9c2de0 100755
--- a/scripts/inf-create.sh
+++ b/scripts/inf-create.sh
@@ -87,6 +87,7 @@ if [ -n "${IN_AUTOMATION}" ]; then
echo "Please create the Azure AD objects using the script at /scripts/create-ad-objs-for-deployment.sh and set the AD_WEBAPP_CLIENT_ID pipeline variable in Azure DevOps."
exit 1
fi
+ aadWebSPId=$ARM_SERVICE_PRINCIPAL_ID
aadMgmtAppId=$AD_MGMTAPP_CLIENT_ID
aadMgmtAppSecret=$AD_MGMTAPP_CLIENT_SECRET
aadMgmtSPId=$AD_MGMT_SERVICE_PRINCIPAL_ID