Skip to content

Commit

Permalink
fixed some comments
Browse files Browse the repository at this point in the history
  • Loading branch information
kevintruong committed Nov 22, 2023
1 parent 977ee69 commit b411af8
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 142 deletions.
2 changes: 2 additions & 0 deletions src/monkey_patch/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class MonkeyPatchException(Exception):
pass
2 changes: 1 addition & 1 deletion src/monkey_patch/function_modeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def _load_function_config(self, func_hash, function_description):
"""

config, default = self.data_worker._load_function_config(func_hash)
if default and self.check_for_finetunes:
if default and self.check_for_finetunes and default.get('finetune_support', True):
finetuned, finetune_config = self._check_for_finetunes(function_description)
if finetuned:
config = finetune_config
Expand Down
75 changes: 75 additions & 0 deletions src/monkey_patch/language_models/api_model_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from monkey_patch.exception import MonkeyPatchException
from monkey_patch.language_models.bedrock_api import Bedrock_API
from monkey_patch.language_models.openai_api import Openai_API


class ApiModelFactory:
@classmethod
def get_model(cls, api_model_name: str):
if api_model_name == 'openai':
return {"openai": Openai_API()}
elif api_model_name == 'bedrock':
return {"bedrock": Bedrock_API()}
else:
MonkeyPatchException(f"not support {api_model_name}")

@classmethod
def get_all_model_info(cls, api_model_name: str, generation_length):
if api_model_name == 'bedrock':
return {
"anthropic.claude-instant-v1": {
"token_limit": 100000 - generation_length,
"type": "bedrock"
},
"anthropic.claude-v2": {
"token_limit": 100000 - generation_length,
"type": "bedrock"
}
} # models and token counts
elif api_model_name == 'openai':
return {
"gpt-4": {
"token_limit": 8192 - generation_length,
"type": "openai"
},
"gpt-4-32k": {
"token_limit": 32768 - generation_length,
"type": "openai"
}
} # models and token counts
else:
MonkeyPatchException(f"not support {api_model_name}")

@classmethod
def get_teacher_model(cls, api_model_name: str):
if api_model_name == 'bedrock':
return [
'anthropic.claude-v2'
]
elif api_model_name == 'openai':
return [
"gpt-4",
"gpt-4-32k"
]
else:
MonkeyPatchException(f"not support {api_model_name}")

@classmethod
def is_finetune_support(cls, api_model_name: str):
if api_model_name == 'bedrock':
lo
return False
elif api_model_name == 'openai':
return True
else:
MonkeyPatchException(f"not support {api_model_name}")

@classmethod
def get_distilled_model(cls, api_model_name: str):
if api_model_name == 'bedrock':
return 'anthropic.claude-instant-v1'

elif api_model_name == 'openai':
return ""
else:
MonkeyPatchException(f"not support {api_model_name}")
85 changes: 20 additions & 65 deletions src/monkey_patch/language_models/bedrock_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,15 @@
import json

import boto3 as boto3
import openai
import botocore
import time
# import abstract base class

from monkey_patch.exception import MonkeyPatchException
from monkey_patch.language_models.llm_api_abc import LLM_Api
import os

OPENAI_URL = "https://api.openai.com/v1/chat/completions"
import requests

AWS_REGION = os.environ.get("AWS_DEFAULT_REGION", "us-east-1")
bedrock = boto3.client(
service_name='bedrock',
region_name=AWS_REGION,
)
bedrock_runtime = boto3.client(
service_name='bedrock-runtime',
region_name=AWS_REGION,

)


class PromptTemplate:

Expand All @@ -40,10 +29,15 @@ class Bedrock_API(LLM_Api):
def __init__(self) -> None:
# initialise the abstract base class
super().__init__()
self.bedrock_runtime = boto3.client(
service_name='bedrock-runtime',
region_name=os.environ.get("AWS_DEFAULT_REGION", "us-east-1")
)

def generate(self, model, system_message, prompt, **kwargs):
"""
The main generation function, given the args, kwargs, function_modeler, function description and model type, generate a response and check if the datapoint can be saved to the finetune dataset
The main generation function, given the args, kwargs, function_modeler, function description and model type,
generate a response and check if the datapoint can be saved to the finetune dataset
"""

# check if api key is not none
Expand All @@ -54,7 +48,8 @@ def generate(self, model, system_message, prompt, **kwargs):
frequency_penalty = kwargs.get("frequency_penalty", 0)
presence_penalty = kwargs.get("presence_penalty", 0)
body = json.dumps({
"prompt": AnthropicClaudePromptTemplate().prompt_gen(system_prompt=system_message, user_prompt=prompt),
"prompt": AnthropicClaudePromptTemplate().prompt_gen(system_prompt=system_message,
user_prompt=prompt),
"max_tokens_to_sample": 4096,
"temperature": temperature,
"top_k": 250,
Expand All @@ -65,60 +60,20 @@ def generate(self, model, system_message, prompt, **kwargs):
"anthropic_version": "bedrock-2023-05-31"
})

response = bedrock_runtime.invoke_model(
body=body,
modelId=model,
contentType="application/json",
accept="application/json")
return json.loads(response.get('body').read().decode())['completion']

params = {
"model": model,
"temperature": temperature,
"max_tokens": 512,
"top_p": top_p,
"frequency_penalty": frequency_penalty,
"presence_penalty": presence_penalty,
}
messages = [
{
"role": "system",
"content": system_message
},
{
"role": "user",
"content": prompt
}
]
params["messages"] = messages

counter = 0
choice = None
# initiate response so exception logic doesnt error out when checking for error in response
response = {}
while counter < 5:
try:
openai_headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
response = requests.post(
OPENAI_URL, headers=openai_headers, json=params, timeout=50
)
response = response.json()
choice = response["choices"][0]["message"]["content"].strip("'")
break
response = self.bedrock_runtime.invoke_model(body=body,
modelId=model,
contentType="application/json",
accept="application/json")
return json.loads(response.get('body').read().decode())['completion']

except botocore.exceptions.ClientError as error:
raise MonkeyPatchException("boto3 ")
except Exception:
if ("error" in response and
"code" in response["error"] and
response["error"]["code"] == 'invalid_api_key'):
raise Exception(f"The supplied OpenAI API key {self.api_key} is invalid")

time.sleep(1 + 3 * counter)
counter += 1
continue

if not choice:
raise Exception("OpenAI API failed to generate a response")

return choice
raise MonkeyPatchException(f"Bedrock: Model {model} API failed to generate a response")
69 changes: 3 additions & 66 deletions src/monkey_patch/language_models/language_modeler.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,8 @@
from monkey_patch.language_models.bedrock_api import Bedrock_API
from monkey_patch.language_models.openai_api import Openai_API
from monkey_patch.language_models.api_model_factory import ApiModelFactory
from monkey_patch.models.language_model_output import LanguageModelOutput
from monkey_patch.utils import approximate_token_count


class ApiModelFactory:
@classmethod
def get_model(cls, api_model_name: str):
if api_model_name == 'openai':
return {"openai": Openai_API()}
elif api_model_name == 'bedrock':
return {"bedrock": Bedrock_API()}

@classmethod
def get_all_model_info(cls, api_model_name: str, generation_length):
if api_model_name == 'bedrock':
return {
"anthropic.claude-instant-v1": {
"token_limit": 100000 - generation_length,
"type": "bedrock"
},
"anthropic.claude-v2": {
"token_limit": 100000 - generation_length,
"type": "bedrock"
}
} # models and token counts
if api_model_name == 'openai':
return {
"gpt-4": {
"token_limit": 8192 - self.generation_length,
"type": "openai"
},
"gpt-4-32k": {
"token_limit": 32768 - self.generation_length,
"type": "openai"
}
} # models and token counts

@classmethod
def get_teacher_model(cls, api_model_name: str):
if api_model_name == 'bedrock':
return [
'anthropic.claude-v2'
]
elif api_model_name == 'openai':
return [
"gpt-4",
"gpt-4-32k"
]
else:
return [
"gpt-4",
"gpt-4-32k"
]

@classmethod
def get_distilled_model(cls, api_model_name: str):
if api_model_name == 'bedrock':
return 'anthropic.claude-instant-v1'

elif api_model_name == 'openai':
return ""
else:
return ""


class LanguageModel(object):
def __init__(self, generation_token_limit=512, api_model='openai') -> None:
self.instruction = "You are given below a function description and input data. The function description of what the function must carry out can be found in the Function section, with input and output type hints. The input data can be found in Input section. Using the function description, apply the function to the Input and return a valid output type, that is acceptable by the output_class_definition and output_class_hint. Return None if you can't apply the function to the input or if the output is optional and the correct output is None.\nINCREDIBLY IMPORTANT: Only output a JSON-compatible string in the correct response format."
Expand All @@ -74,9 +12,8 @@ def __init__(self, generation_token_limit=512, api_model='openai') -> None:
self.api_models = ApiModelFactory.get_model(api_model)
self.repair_instruction = "Below are an outputs of a function applied to inputs, which failed type validation. The input to the function is brought out in the INPUT section and function description is brought out in the FUNCTION DESCRIPTION section. Your task is to apply the function to the input and return a correct output in the right type. The FAILED EXAMPLES section will show previous outputs of this function applied to the data, which failed type validation and hence are wrong outputs. Using the input and function description output the accurate output following the output_class_definition and output_type_hint attributes of the function description, which define the output type. Make sure the output is an accurate function output and in the correct type. Return None if you can't apply the function to the input or if the output is optional and the correct output is None."
self.generation_length = generation_token_limit
self.models = ApiModelFactory.get_all_model_info(
api_model,
self.generation_length)
self.models = ApiModelFactory.get_all_model_info(api_model,
self.generation_length)

def generate(self, args, kwargs, function_modeler, function_description, llm_parameters={}):
"""
Expand Down
26 changes: 16 additions & 10 deletions src/monkey_patch/trackers/buffered_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from appdirs import user_data_dir

from monkey_patch.bloom_filter import BloomFilter, optimal_bloom_filter_params
from monkey_patch.language_models.language_modeler import ApiModelFactory
from monkey_patch.language_models.api_model_factory import ApiModelFactory
from monkey_patch.trackers.dataset_worker import DatasetWorker
import json

Expand Down Expand Up @@ -37,15 +37,21 @@ def __init__(self, name, level=15):

super().__init__(name, level)

self.default_function_config = {"distilled_model": ApiModelFactory.get_distilled_model(os.getenv('API_MODEL')),
"current_model_stats": {
"trained_on_datapoints": 0,
"running_faults": []},
"last_training_run": {"trained_on_datapoints": 0},
"current_training_run": {},
"teacher_models": ApiModelFactory.get_teacher_model(os.getenv('API_MODEL')),
# currently supported teacher models
"nr_of_training_runs": 0}
self.default_function_config = {
"distilled_model": ApiModelFactory.get_distilled_model(os.getenv('API_MODEL')),
"current_model_stats": {
"trained_on_datapoints": 0,
"running_faults": []
},
"last_training_run": {
"trained_on_datapoints": 0
},
"current_training_run": {},
"teacher_models": ApiModelFactory.get_teacher_model(os.getenv('API_MODEL')),
# currently supported teacher models
"nr_of_training_runs": 0,
"finetune_support": ApiModelFactory.is_finetune_support(os.getenv('API_MODEL')),
}

def _get_log_directory(self):

Expand Down

0 comments on commit b411af8

Please sign in to comment.