-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added image processing tool using openai gpt-4o-mini multimodal model
- Loading branch information
Showing
3 changed files
with
116 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from openai import OpenAI | ||
import models | ||
|
||
def process_image(query: str, image_urls: list, model_name="gpt-4o-mini", api_key=None): | ||
|
||
api_key = api_key or models.get_api_key("openai") | ||
|
||
if not api_key: | ||
raise ValueError("The image processing tool requires an openai api key.") | ||
|
||
client = OpenAI(api_key=api_key) | ||
|
||
messages = [ | ||
{ | ||
"role": "user", | ||
"content": [ | ||
{"type": "text", "text": query}, | ||
*[ | ||
{"type": "image_url", "image_url": {"url": url}} | ||
for url in image_urls | ||
] | ||
] | ||
} | ||
] | ||
|
||
response = client.chat.completions.create( | ||
model=model_name, | ||
messages=messages, | ||
) | ||
|
||
result = response.choices[0].message.content # only the text is returned | ||
|
||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from agent import Agent | ||
from python.helpers.tool import Tool, Response | ||
from python.helpers import files | ||
from python.helpers.process_image import process_image | ||
from python.helpers.print_style import PrintStyle | ||
import base64 | ||
import os | ||
|
||
class ImageProcessingTool(Tool): | ||
|
||
def encode_image(image_path): | ||
with open(image_path, "rb") as image_file: | ||
return base64.b64encode(image_file.read()).decode('utf-8') | ||
|
||
def execute(self, query: str, **kwargs): | ||
|
||
image_paths = self.args["image_paths"] | ||
|
||
if not image_paths or not isinstance(image_paths, list): | ||
raise ValueError("The image_paths is either empty, None, or not a valid list of strings.") | ||
|
||
processed_image_paths = [] | ||
|
||
for image_path in image_paths: | ||
if not image_path.startswith("http"): | ||
if not os.path.exists(image_path): | ||
raise FileNotFoundError(f"The local file '{image_path}' not found.") | ||
with open(image_path, "rb") as image_file: | ||
base64_image = base64.b64encode(image_file.read()).decode('utf-8') | ||
image_path = f"data:image/jpeg;base64,{base64_image}" | ||
processed_image_paths.append(image_path) | ||
|
||
content = process_image(query, processed_image_paths) | ||
|
||
# if self.agent.handle_intervention(content): | ||
# return Response(message="", break_loop=False) # wait for intervention and handle it, if paused | ||
|
||
# Return the response | ||
return Response(message=content, break_loop=False) |