Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NVIDIA: Tool calling, structured output, vlm models #7331

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
103 changes: 100 additions & 3 deletions docs/my-website/docs/providers/nvidia_nim.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ https://docs.api.nvidia.com/nim/reference/
## API Key
```python
# env variable
os.environ['NVIDIA_NIM_API_KEY']
os.environ['NVIDIA_API_KEY']
```

## Sample Usage
```python
from litellm import completion
import os

os.environ['NVIDIA_NIM_API_KEY'] = ""
os.environ['NVIDIA_API_KEY'] = ""
response = completion(
model="nvidia_nim/meta/llama3-70b-instruct",
messages=[
Expand All @@ -45,7 +45,7 @@ print(response)
from litellm import completion
import os

os.environ['NVIDIA_NIM_API_KEY'] = ""
os.environ['NVIDIA_API_KEY'] = ""
response = completion(
model="nvidia_nim/meta/llama3-70b-instruct",
messages=[
Expand All @@ -67,6 +67,103 @@ for chunk in response:
print(chunk)
```

## **Function/Tool Calling**

```python
from litellm import completion

# set env
os.environ['NVIDIA_API_KEY'] = ""

tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]

response = completion(
model="nvidia/meta/llama-3.1-70b-instruct",
messages=messages,
tools=tools,
tool_choice="auto",
)
# Add any assertions, here to check response args
print(response)
assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
assert isinstance(
response.choices[0].message.tool_calls[0].function.arguments, str
)

```

### Forcing Tool Use

If you want LLM to use a specific tool to answer the user’s question

You can do this by specifying the tool in the `tool_choice` field like so:

```python
response = completion(
os.environ['NVIDIA_API_KEY'] = ""
messages=messages,
tools=tools,
tool_choice={"type": "tool", "name": "get_weather"},
)
```

## Usage - Vision

```python
from litellm import completion

# set env
os.environ['NVIDIA_API_KEY'] = ""

def encode_image(image_path):
import base64

with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")


image_path = "nvidia-picasso.jpg"
# Getting the base64 string
base64_image = encode_image(image_path)
response = litellm.completion(
model="nvidia/microsoft/phi-3-vision-128k-instruct",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Whats in this image?"},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64," + base64_image
},
},
],
}
],
)
print(f"\nResponse: {response}")
```

## Usage - embedding

Expand Down
80 changes: 76 additions & 4 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ def identify(event_details):
azure_models: List = []
anyscale_models: List = []
cerebras_models: List = []
nvidia_models: List = []
galadriel_models: List = []
sambanova_models: List = []

Expand Down Expand Up @@ -578,6 +579,8 @@ def add_known_models():
anyscale_models.append(key)
elif value.get("litellm_provider") == "cerebras":
cerebras_models.append(key)
elif value.get("litellm_provider") == "nvidia":
nvidia_models.append(key)
elif value.get("litellm_provider") == "galadriel":
galadriel_models.append(key)
elif value.get("litellm_provider") == "sambanova_models":
Expand Down Expand Up @@ -609,6 +612,7 @@ def add_known_models():
"anyscale",
"mistral",
"groq",
"nvidia",
"nvidia_nim",
"cerebras",
"sambanova",
Expand Down Expand Up @@ -846,9 +850,76 @@ def add_known_models():
+ cerebras_models
+ galadriel_models
+ sambanova_models
+ nvidia_models
)


class LlmProviders(str, Enum):
OPENAI = "openai"
OPENAI_LIKE = "openai_like" # embedding only
JINA_AI = "jina_ai"
XAI = "xai"
CUSTOM_OPENAI = "custom_openai"
TEXT_COMPLETION_OPENAI = "text-completion-openai"
COHERE = "cohere"
COHERE_CHAT = "cohere_chat"
CLARIFAI = "clarifai"
ANTHROPIC = "anthropic"
REPLICATE = "replicate"
HUGGINGFACE = "huggingface"
TOGETHER_AI = "together_ai"
OPENROUTER = "openrouter"
VERTEX_AI = "vertex_ai"
VERTEX_AI_BETA = "vertex_ai_beta"
PALM = "palm"
GEMINI = "gemini"
AI21 = "ai21"
BASETEN = "baseten"
AZURE = "azure"
AZURE_TEXT = "azure_text"
AZURE_AI = "azure_ai"
SAGEMAKER = "sagemaker"
SAGEMAKER_CHAT = "sagemaker_chat"
BEDROCK = "bedrock"
VLLM = "vllm"
NLP_CLOUD = "nlp_cloud"
PETALS = "petals"
OOBABOOGA = "oobabooga"
OLLAMA = "ollama"
OLLAMA_CHAT = "ollama_chat"
DEEPINFRA = "deepinfra"
PERPLEXITY = "perplexity"
ANYSCALE = "anyscale"
MISTRAL = "mistral"
GROQ = "groq"
NVIDIA = "nvidia"
NVIDIA_NIM = "nvidia_nim"
CEREBRAS = "cerebras"
AI21_CHAT = "ai21_chat"
VOLCENGINE = "volcengine"
CODESTRAL = "codestral"
TEXT_COMPLETION_CODESTRAL = "text-completion-codestral"
DEEPSEEK = "deepseek"
SAMBANOVA = "sambanova"
MARITALK = "maritalk"
VOYAGE = "voyage"
CLOUDFLARE = "cloudflare"
XINFERENCE = "xinference"
FIREWORKS_AI = "fireworks_ai"
FRIENDLIAI = "friendliai"
WATSONX = "watsonx"
WATSONX_TEXT = "watsonx_text"
TRITON = "triton"
PREDIBASE = "predibase"
DATABRICKS = "databricks"
EMPOWER = "empower"
GITHUB = "github"
CUSTOM = "custom"
LITELLM_PROXY = "litellm_proxy"
HOSTED_VLLM = "hosted_vllm"
LM_STUDIO = "lm_studio"


provider_list: List[Union[LlmProviders, str]] = list(LlmProviders)


Expand Down Expand Up @@ -895,6 +966,7 @@ def add_known_models():
"azure": azure_models,
"anyscale": anyscale_models,
"cerebras": cerebras_models,
"nvidia": nvidia_models,
"galadriel": galadriel_models,
"sambanova": sambanova_models,
}
Expand Down Expand Up @@ -1114,11 +1186,11 @@ def add_known_models():

openAIGPTAudioConfig = OpenAIGPTAudioConfig()

from .llms.nvidia_nim.chat import NvidiaNimConfig
from .llms.nvidia_nim.embed import NvidiaNimEmbeddingConfig
from .llms.nvidia.chat import NvidiaConfig
from .llms.nvidia.embed import NvidiaEmbeddingConfig

nvidiaNimConfig = NvidiaNimConfig()
nvidiaNimEmbeddingConfig = NvidiaNimEmbeddingConfig()
nvidiaConfig = NvidiaConfig()
nvidiaEmbeddingConfig = NvidiaEmbeddingConfig()

from .llms.cerebras.chat import CerebrasConfig
from .llms.sambanova.chat import SambanovaConfig
Expand Down
33 changes: 22 additions & 11 deletions litellm/litellm_core_utils/get_llm_provider_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,25 @@ def get_llm_provider( # noqa: PLR0915
if _is_non_openai_azure_model(model):
custom_llm_provider = "openai"
return model, custom_llm_provider, dynamic_api_key, api_base
elif model.split("/", 1)[0] in ["nvidia", "nvidia_nim"] or model in litellm.nvidia_models:
api_base = (
api_base
or get_secret("NVIDIA_API_BASE")
or get_secret("NVIDIA_BASE_URL")
or get_secret("NVIDIA_NIM_API_BASE")
or "https://integrate.api.nvidia.com/v1"
) # type: ignore
dynamic_api_key = api_key or get_secret_str("NVIDIA_API_KEY") or get_secret_str("NVIDIA_NIM_API_KEY")
custom_llm_provider = "nvidia"
if model.split("/", 1)[0] in ["nvidia", "nvidia_nim"]:
model = model.split("/", 1)[1]

if model not in litellm.nvidia_models:
raise Exception(
f"Model not found. You passed model={model}, custom_llm_provider={custom_llm_provider}.",
"Check available models using `NvidiaConfig().available_models()` "
)
return model, custom_llm_provider, dynamic_api_key, api_base

### Handle cases when custom_llm_provider is set to cohere/command-r-plus but it should use cohere_chat route
model, custom_llm_provider = handle_cohere_chat_model_custom_llm_provider(
Expand All @@ -141,7 +160,7 @@ def get_llm_provider( # noqa: PLR0915
# check if llm provider part of model name
if (
model.split("/", 1)[0] in litellm.provider_list
and model.split("/", 1)[0] not in litellm.model_list
and model.split("/", 1)[1] not in litellm.model_list
and len(model.split("/"))
> 1 # handle edge case where user passes in `litellm --model mistral` https://github.com/BerriAI/litellm/issues/1351
):
Expand Down Expand Up @@ -185,8 +204,8 @@ def get_llm_provider( # noqa: PLR0915
custom_llm_provider = "groq"
dynamic_api_key = get_secret_str("GROQ_API_KEY")
elif endpoint == "https://integrate.api.nvidia.com/v1":
custom_llm_provider = "nvidia_nim"
dynamic_api_key = get_secret_str("NVIDIA_NIM_API_KEY")
custom_llm_provider = "nvidia"
dynamic_api_key = get_secret_str("NVIDIA_API_KEY") or get_secret_str("NVIDIA_NIM_API_KEY")
elif endpoint == "https://api.cerebras.ai/v1":
custom_llm_provider = "cerebras"
dynamic_api_key = get_secret_str("CEREBRAS_API_KEY")
Expand Down Expand Up @@ -417,14 +436,6 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915
) = litellm.GroqChatConfig()._get_openai_compatible_provider_info(
api_base, api_key
)
elif custom_llm_provider == "nvidia_nim":
# nvidia_nim is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
api_base = (
api_base
or get_secret("NVIDIA_NIM_API_BASE")
or "https://integrate.api.nvidia.com/v1"
) # type: ignore
dynamic_api_key = api_key or get_secret_str("NVIDIA_NIM_API_KEY")
elif custom_llm_provider == "cerebras":
api_base = (
api_base or get_secret("CEREBRAS_API_BASE") or "https://api.cerebras.ai/v1"
Expand Down
6 changes: 3 additions & 3 deletions litellm/litellm_core_utils/get_supported_openai_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ def get_supported_openai_params( # noqa: PLR0915
)
else:
return litellm.FireworksAIConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "nvidia_nim":
elif custom_llm_provider == "nvidia":
if request_type == "chat_completion":
return litellm.nvidiaNimConfig.get_supported_openai_params(model=model)
return litellm.nvidiaConfig.get_supported_openai_params(model=model)
elif request_type == "embeddings":
return litellm.nvidiaNimEmbeddingConfig.get_supported_openai_params()
return litellm.nvidiaEmbeddingConfig.get_supported_openai_params()
elif custom_llm_provider == "cerebras":
return litellm.CerebrasConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "xai":
Expand Down
Loading