Remove vision sdk for REST APIs

microsoft · Aug 6, 2024 · 6fe06ce · 6fe06ce
1 parent 8ec9cbe
commit 6fe06ce
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 60 deletions.
diff --git a/functions/ImageEnrichment/__init__.py b/functions/ImageEnrichment/__init__.py
@@ -2,7 +2,6 @@
 import logging
 import os
 
-import azure.ai.vision as visionsdk
 import azure.functions as func
 import requests
 from azure.storage.blob import BlobServiceClient
@@ -64,15 +63,8 @@
     "Ocp-Apim-Subscription-Region": cognitive_services_account_location,
 }
 
-# Vision SDK
-vision_service_options = visionsdk.VisionServiceOptions(
-    endpoint=cognitive_services_endpoint, key=cognitive_services_key
-)
-
-analysis_options = visionsdk.ImageAnalysisOptions()
-
-# Note that "CAPTION" and "DENSE_CAPTIONS" are only supported in Azure GPU regions (East US, France Central,
-# Korea Central, North Europe, Southeast Asia, West Europe, West US). Remove "CAPTION" and "DENSE_CAPTIONS"
+# Note that "caption" and "denseCaptions" are only supported in Azure GPU regions (East US, France Central,
+# Korea Central, North Europe, Southeast Asia, West Europe, West US). Remove "caption" and "denseCaptions"
 # from the list below if your Computer Vision key is not from one of those regions.
 
 if cognitive_services_account_location in [
@@ -85,27 +77,13 @@
     "westus",
 ]:
     GPU_REGION = True
-    analysis_options.features = (
-        visionsdk.ImageAnalysisFeature.CAPTION
-        | visionsdk.ImageAnalysisFeature.DENSE_CAPTIONS
-        | visionsdk.ImageAnalysisFeature.OBJECTS
-        | visionsdk.ImageAnalysisFeature.TEXT
-        | visionsdk.ImageAnalysisFeature.TAGS
-    )
+    VISION_ENDPOINT = f"{cognitive_services_endpoint}computervision/imageanalysis:analyze?api-version=2023-04-01-preview&features=caption,denseCaptions,objects,tags,read&gender-neutral-caption=true"
 else:
     GPU_REGION = False
-    analysis_options.features = (
-        visionsdk.ImageAnalysisFeature.OBJECTS
-        | visionsdk.ImageAnalysisFeature.TEXT
-        | visionsdk.ImageAnalysisFeature.TAGS
-    )
-
-analysis_options.model_version = "latest"
-
+    VISION_ENDPOINT = f"{cognitive_services_endpoint}computervision/imageanalysis:analyze?api-version=2023-04-01-preview&features=objects,tags,read&gender-neutral-caption=true"
 
 FUNCTION_NAME = "ImageEnrichment"
 
-
 utilities = Utilities(
     azure_blob_storage_account=azure_blob_storage_account,
     azure_blob_storage_endpoint=azure_blob_storage_endpoint,
@@ -171,67 +149,70 @@ def main(msg: func.QueueMessage) -> None:
         file_name, file_extension, file_directory  = utilities.get_filename_and_extension(blob_path)
         blob_path_plus_sas = utilities.get_blob_and_sas(blob_path)
 
-        vision_source = visionsdk.VisionSource(url=blob_path_plus_sas)
-        image_analyzer = visionsdk.ImageAnalyzer(
-            vision_service_options, vision_source, analysis_options
-        )
-        result = image_analyzer.analyze()
-
-        text_image_summary = ""
-        index_content = ""
-        complete_ocr_text = None
+        data = {"url": f"{blob_path_plus_sas}"}
+        response = requests.post(VISION_ENDPOINT, 
+                                 headers=translator_api_headers, 
+                                 json=data)
+
+        if response.status_code == 200:
+            result = response.json()
+            text_image_summary = ""
+            index_content = ""
+            complete_ocr_text = None
 
-        if result.reason == visionsdk.ImageAnalysisResultReason.ANALYZED:
             if GPU_REGION:
-                if result.caption is not None:
+                if result["captionResult"] is not None:
                     text_image_summary += "Caption:\n"
                     text_image_summary += "\t'{}', Confidence {:.4f}\n".format(
-                        result.caption.content, result.caption.confidence
+                        result["captionResult"]["text"], result["captionResult"]["confidence"]
                     )
-                    index_content += "Caption: {}\n ".format(result.caption.content)
+                    index_content += "Caption: {}\n ".format(result["captionResult"]["text"])
 
-                if result.dense_captions is not None:
+                if result["denseCaptionsResult"] is not None:
                     text_image_summary += "Dense Captions:\n"
                     index_content += "DeepCaptions: "
-                    for caption in result.dense_captions:
+                    for caption in result["denseCaptionsResult"]["values"]:
                         text_image_summary += "\t'{}', Confidence: {:.4f}\n".format(
-                            caption.content, caption.confidence
+                            caption["text"], caption["confidence"]
                         )
-                        index_content += "{}\n ".format(caption.content)
+                        index_content += "{}\n ".format(caption["text"])
 
-            if result.objects is not None:
+            if result["objectsResult"] is not None:
                 text_image_summary += "Objects:\n"
                 index_content += "Descriptions: "
-                for object_detection in result.objects:
+                for object_detection in result["objectsResult"]["values"]:
                     text_image_summary += "\t'{}', Confidence: {:.4f}\n".format(
-                        object_detection.name, object_detection.confidence
+                        object_detection["name"], object_detection["confidence"]
                     )
-                    index_content += "{}\n ".format(object_detection.name)
+                    index_content += "{}\n ".format(object_detection["name"])
 
-            if result.tags is not None:
+            if result["tagsResult"] is not None:
                 text_image_summary += "Tags:\n"
-                for tag in result.tags:
+                for tag in result["tagsResult"]["values"]:
                     text_image_summary += "\t'{}', Confidence {:.4f}\n".format(
-                        tag.name, tag.confidence
+                        tag["name"], tag["confidence"]
                     )
-                    index_content += "{}\n ".format(tag.name)
+                    index_content += "{}\n ".format(tag["name"])
 
-            if result.text is not None:
+            if result["readResult"] is not None:
                 text_image_summary += "Raw OCR Text:\n"
                 complete_ocr_text = ""
-                for line in result.text.lines:
-                    complete_ocr_text += "{}\n".format(line.content)
+                for line in result["readResult"]["pages"][0]["words"]:
+                    complete_ocr_text += "{}\n".format(line["content"])
                 text_image_summary += complete_ocr_text
 
-        else:
-            error_details = visionsdk.ImageAnalysisErrorDetails.from_result(result)
-
+        else: 
+            logging.error("%s - Image analysis failed for %s: %s",
+                          FUNCTION_NAME,
+                          blob_path,
+                          str(response.json()))
             statusLog.upsert_document(
                 blob_path,
-                f"{FUNCTION_NAME} - Image analysis failed: {error_details.error_code} {error_details.error_code} {error_details.message}",
+                f"{FUNCTION_NAME} - Image analysis failed: {str(response.json())}",
                 StatusClassification.ERROR,
                 State.ERROR,
             )
+            raise requests.exceptions.HTTPError(response.json())
 
         if complete_ocr_text not in [None, ""]:
             # Detect language

diff --git a/functions/requirements.txt b/functions/requirements.txt
@@ -8,13 +8,12 @@ azure-functions == 1.17.0
 tiktoken==0.4.0
 azure.ai.formrecognizer==3.2.1
 azure-storage-blob==12.16.0
-azure-core == 1.26.4
+azure-core == 1.30.2
 lxml == 4.9.2
 azure-cosmos == 4.3.1
 azure-storage-queue == 12.6.0
 nltk == 3.8.1
 tenacity == 8.2.3
-azure-ai-vision == 0.15.1b1
 unstructured[csv,doc,docx,email,html,md,msg,ppt,pptx,text,xlsx,xml] == 0.12.5
 pyoo == 1.4
 azure-search-documents == 11.4.0b11