BerriAI · raspawar · Nov 28, 2024 · Nov 28, 2024 · Nov 29, 2024 · Dec 2, 2024
diff --git a/docs/my-website/docs/providers/nvidia_nim.md b/docs/my-website/docs/providers/nvidia_nim.md
@@ -13,15 +13,15 @@ https://docs.api.nvidia.com/nim/reference/
 ## API Key
 ```python
 # env variable
-os.environ['NVIDIA_NIM_API_KEY']
+os.environ['NVIDIA_API_KEY']
 ```
 
 ## Sample Usage
 ```python
 from litellm import completion
 import os
 
-os.environ['NVIDIA_NIM_API_KEY'] = ""
+os.environ['NVIDIA_API_KEY'] = ""
 response = completion(
     model="nvidia_nim/meta/llama3-70b-instruct",
     messages=[
@@ -45,7 +45,7 @@ print(response)
 from litellm import completion
 import os
 
-os.environ['NVIDIA_NIM_API_KEY'] = ""
+os.environ['NVIDIA_API_KEY'] = ""
 response = completion(
     model="nvidia_nim/meta/llama3-70b-instruct",
     messages=[
@@ -67,6 +67,103 @@ for chunk in response:
     print(chunk)
 ```
 
+## **Function/Tool Calling**
+
+```python
+from litellm import completion
+
+# set env
+os.environ['NVIDIA_API_KEY'] = ""
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location"],
+            },
+        },
+    }
+]
+messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
+
+response = completion(
+    model="nvidia/meta/llama-3.1-70b-instruct",
+    messages=messages,
+    tools=tools,
+    tool_choice="auto",
+)
+# Add any assertions, here to check response args
+print(response)
+assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
+assert isinstance(
+    response.choices[0].message.tool_calls[0].function.arguments, str
+)
+
+```
+
+### Forcing Tool Use
+
+If you want LLM to use a specific tool to answer the user’s question
+
+You can do this by specifying the tool in the `tool_choice` field like so:
+
+```python
+response = completion(
+    os.environ['NVIDIA_API_KEY'] = ""
+    messages=messages,
+    tools=tools,
+    tool_choice={"type": "tool", "name": "get_weather"},
+)
+```
+
+## Usage - Vision 
+
+```python
+from litellm import completion
+
+# set env
+os.environ['NVIDIA_API_KEY'] = ""
+
+def encode_image(image_path):
+    import base64
+
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+
+
+image_path = "nvidia-picasso.jpg"
+# Getting the base64 string
+base64_image = encode_image(image_path)
+response = litellm.completion(
+    model="nvidia/microsoft/phi-3-vision-128k-instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Whats in this image?"},
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": "data:image/jpeg;base64," + base64_image
+                    },
+                },
+            ],
+        }
+    ],
+)
+print(f"\nResponse: {response}")
+```
 
 ## Usage - embedding
 

diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -470,6 +470,7 @@ def identify(event_details):
 azure_models: List = []
 anyscale_models: List = []
 cerebras_models: List = []
+nvidia_models: List = []
 galadriel_models: List = []
 sambanova_models: List = []
 
@@ -578,6 +579,8 @@ def add_known_models():
             anyscale_models.append(key)
         elif value.get("litellm_provider") == "cerebras":
             cerebras_models.append(key)
+        elif value.get("litellm_provider") == "nvidia":
+            nvidia_models.append(key)
         elif value.get("litellm_provider") == "galadriel":
             galadriel_models.append(key)
         elif value.get("litellm_provider") == "sambanova_models":
@@ -609,6 +612,7 @@ def add_known_models():
     "anyscale",
     "mistral",
     "groq",
+    "nvidia",
     "nvidia_nim",
     "cerebras",
     "sambanova",
@@ -846,9 +850,76 @@ def add_known_models():
     + cerebras_models
     + galadriel_models
     + sambanova_models
+    + nvidia_models
 )
 
 
+class LlmProviders(str, Enum):
+    OPENAI = "openai"
+    OPENAI_LIKE = "openai_like"  # embedding only
+    JINA_AI = "jina_ai"
+    XAI = "xai"
+    CUSTOM_OPENAI = "custom_openai"
+    TEXT_COMPLETION_OPENAI = "text-completion-openai"
+    COHERE = "cohere"
+    COHERE_CHAT = "cohere_chat"
+    CLARIFAI = "clarifai"
+    ANTHROPIC = "anthropic"
+    REPLICATE = "replicate"
+    HUGGINGFACE = "huggingface"
+    TOGETHER_AI = "together_ai"
+    OPENROUTER = "openrouter"
+    VERTEX_AI = "vertex_ai"
+    VERTEX_AI_BETA = "vertex_ai_beta"
+    PALM = "palm"
+    GEMINI = "gemini"
+    AI21 = "ai21"
+    BASETEN = "baseten"
+    AZURE = "azure"
+    AZURE_TEXT = "azure_text"
+    AZURE_AI = "azure_ai"
+    SAGEMAKER = "sagemaker"
+    SAGEMAKER_CHAT = "sagemaker_chat"
+    BEDROCK = "bedrock"
+    VLLM = "vllm"
+    NLP_CLOUD = "nlp_cloud"
+    PETALS = "petals"
+    OOBABOOGA = "oobabooga"
+    OLLAMA = "ollama"
+    OLLAMA_CHAT = "ollama_chat"
+    DEEPINFRA = "deepinfra"
+    PERPLEXITY = "perplexity"
+    ANYSCALE = "anyscale"
+    MISTRAL = "mistral"
+    GROQ = "groq"
+    NVIDIA = "nvidia"
+    NVIDIA_NIM = "nvidia_nim"
+    CEREBRAS = "cerebras"
+    AI21_CHAT = "ai21_chat"
+    VOLCENGINE = "volcengine"
+    CODESTRAL = "codestral"
+    TEXT_COMPLETION_CODESTRAL = "text-completion-codestral"
+    DEEPSEEK = "deepseek"
+    SAMBANOVA = "sambanova"
+    MARITALK = "maritalk"
+    VOYAGE = "voyage"
+    CLOUDFLARE = "cloudflare"
+    XINFERENCE = "xinference"
+    FIREWORKS_AI = "fireworks_ai"
+    FRIENDLIAI = "friendliai"
+    WATSONX = "watsonx"
+    WATSONX_TEXT = "watsonx_text"
+    TRITON = "triton"
+    PREDIBASE = "predibase"
+    DATABRICKS = "databricks"
+    EMPOWER = "empower"
+    GITHUB = "github"
+    CUSTOM = "custom"
+    LITELLM_PROXY = "litellm_proxy"
+    HOSTED_VLLM = "hosted_vllm"
+    LM_STUDIO = "lm_studio"
+
+
 provider_list: List[Union[LlmProviders, str]] = list(LlmProviders)
 
 
@@ -895,6 +966,7 @@ def add_known_models():
     "azure": azure_models,
     "anyscale": anyscale_models,
     "cerebras": cerebras_models,
+    "nvidia": nvidia_models,
     "galadriel": galadriel_models,
     "sambanova": sambanova_models,
 }
@@ -1114,11 +1186,11 @@ def add_known_models():
 
 openAIGPTAudioConfig = OpenAIGPTAudioConfig()
 
-from .llms.nvidia_nim.chat import NvidiaNimConfig
-from .llms.nvidia_nim.embed import NvidiaNimEmbeddingConfig
+from .llms.nvidia.chat import NvidiaConfig
+from .llms.nvidia.embed import NvidiaEmbeddingConfig
 
-nvidiaNimConfig = NvidiaNimConfig()
-nvidiaNimEmbeddingConfig = NvidiaNimEmbeddingConfig()
+nvidiaConfig = NvidiaConfig()
+nvidiaEmbeddingConfig = NvidiaEmbeddingConfig()
 
 from .llms.cerebras.chat import CerebrasConfig
 from .llms.sambanova.chat import SambanovaConfig

diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py
@@ -118,6 +118,25 @@ def get_llm_provider(  # noqa: PLR0915
             if _is_non_openai_azure_model(model):
                 custom_llm_provider = "openai"
                 return model, custom_llm_provider, dynamic_api_key, api_base
+        elif model.split("/", 1)[0] in ["nvidia", "nvidia_nim"] or model in litellm.nvidia_models:
+            api_base = (
+                api_base 
+                or get_secret("NVIDIA_API_BASE") 
+                or get_secret("NVIDIA_BASE_URL") 
+                or get_secret("NVIDIA_NIM_API_BASE") 
+                or "https://integrate.api.nvidia.com/v1"
+            ) # type: ignore
+            dynamic_api_key = api_key or get_secret_str("NVIDIA_API_KEY") or get_secret_str("NVIDIA_NIM_API_KEY")
+            custom_llm_provider = "nvidia"
+            if model.split("/", 1)[0] in ["nvidia", "nvidia_nim"]:
+                model = model.split("/", 1)[1]
+
+            if model not in litellm.nvidia_models:
+                raise Exception(
+                    f"Model not found. You passed model={model}, custom_llm_provider={custom_llm_provider}.",
+                    "Check available models using `NvidiaConfig().available_models()` "
+                )
+            return model, custom_llm_provider, dynamic_api_key, api_base
 
         ### Handle cases when custom_llm_provider is set to cohere/command-r-plus but it should use cohere_chat route
         model, custom_llm_provider = handle_cohere_chat_model_custom_llm_provider(
@@ -141,7 +160,7 @@ def get_llm_provider(  # noqa: PLR0915
         # check if llm provider part of model name
         if (
             model.split("/", 1)[0] in litellm.provider_list
-            and model.split("/", 1)[0] not in litellm.model_list
+            and model.split("/", 1)[1] not in litellm.model_list
             and len(model.split("/"))
             > 1  # handle edge case where user passes in `litellm --model mistral` https://github.com/BerriAI/litellm/issues/1351
         ):
@@ -185,8 +204,8 @@ def get_llm_provider(  # noqa: PLR0915
                         custom_llm_provider = "groq"
                         dynamic_api_key = get_secret_str("GROQ_API_KEY")
                     elif endpoint == "https://integrate.api.nvidia.com/v1":
-                        custom_llm_provider = "nvidia_nim"
-                        dynamic_api_key = get_secret_str("NVIDIA_NIM_API_KEY")
+                        custom_llm_provider = "nvidia"
+                        dynamic_api_key = get_secret_str("NVIDIA_API_KEY") or get_secret_str("NVIDIA_NIM_API_KEY")
                     elif endpoint == "https://api.cerebras.ai/v1":
                         custom_llm_provider = "cerebras"
                         dynamic_api_key = get_secret_str("CEREBRAS_API_KEY")
@@ -417,14 +436,6 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
         ) = litellm.GroqChatConfig()._get_openai_compatible_provider_info(
             api_base, api_key
         )
-    elif custom_llm_provider == "nvidia_nim":
-        # nvidia_nim is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
-        api_base = (
-            api_base
-            or get_secret("NVIDIA_NIM_API_BASE")
-            or "https://integrate.api.nvidia.com/v1"
-        )  # type: ignore
-        dynamic_api_key = api_key or get_secret_str("NVIDIA_NIM_API_KEY")
     elif custom_llm_provider == "cerebras":
         api_base = (
             api_base or get_secret("CEREBRAS_API_BASE") or "https://api.cerebras.ai/v1"

diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py
@@ -43,11 +43,11 @@ def get_supported_openai_params(  # noqa: PLR0915
             )
         else:
             return litellm.FireworksAIConfig().get_supported_openai_params(model=model)
-    elif custom_llm_provider == "nvidia_nim":
+    elif custom_llm_provider == "nvidia":
         if request_type == "chat_completion":
-            return litellm.nvidiaNimConfig.get_supported_openai_params(model=model)
+            return litellm.nvidiaConfig.get_supported_openai_params(model=model)
         elif request_type == "embeddings":
-            return litellm.nvidiaNimEmbeddingConfig.get_supported_openai_params()
+            return litellm.nvidiaEmbeddingConfig.get_supported_openai_params()
     elif custom_llm_provider == "cerebras":
         return litellm.CerebrasConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "xai":