BerriAI · krrishdholakia · Dec 27, 2024 · Dec 26, 2024 · Dec 26, 2024 · Dec 26, 2024
diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md
@@ -4,6 +4,16 @@ import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
 # Azure OpenAI
+
+## Overview
+
+| Property | Details |
+|-------|-------|
+| Description | Azure OpenAI Service provides REST API access to OpenAI's powerful language models including o1, o1-mini, GPT-4o, GPT-4o mini, GPT-4 Turbo with Vision, GPT-4, GPT-3.5-Turbo, and Embeddings model series |
+| Provider Route on LiteLLM | `azure/` |
+| Supported Operations | [`/chat/completions`](#azure-openai-chat-completion-models), [`/completions`](#azure-instruct-models), [`/embeddings`](../embedding/supported_embedding#azure-openai-embedding-models), [`/audio/speech`](#azure-text-to-speech-tts), [`/audio/transcriptions`](../audio_transcription), `/fine_tuning`, [`/batches`](#azure-batches-api), `/files`, [`/images`](../image_generation#azure-openai-image-generation-models) |
+| Link to Provider Doc | [Azure OpenAI ↗](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview)
+
 ## API Keys, Params
 api_key, api_base, api_version etc can be passed directly to `litellm.completion` - see here or set as `litellm.api_key` params see here
 ```python
@@ -889,7 +899,6 @@ Expected Response:
 {"data":[{"id":"batch_R3V...}
 ```
 
-
 ## Advanced
 ### Azure API Load-Balancing
 

diff --git a/docs/my-website/docs/proxy/customers.md b/docs/my-website/docs/proxy/customers.md
@@ -2,11 +2,11 @@ import Image from '@theme/IdealImage';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# 🙋‍♂️ Customers 
+# 🙋‍♂️ Customers / End-User Budgets
 
 Track spend, set budgets for your customers.
 
-## Tracking Customer Credit
+## Tracking Customer Spend
 
 ### 1. Make LLM API call w/ Customer ID
 

diff --git a/docs/my-website/docs/proxy/rate_limit_tiers.md b/docs/my-website/docs/proxy/rate_limit_tiers.md
@@ -0,0 +1,68 @@
+# ✨ Budget / Rate Limit Tiers
+
+Create tiers with different budgets and rate limits. Making it easy to manage different users and their usage.
+
+:::info 
+
+This is a LiteLLM Enterprise feature.
+
+Get a 7 day free trial + get in touch [here](https://litellm.ai/#trial).
+
+See pricing [here](https://litellm.ai/#pricing).
+
+:::
+
+
+## 1. Create a budget 
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/budget/new' \
+-H 'Authorization: Bearer sk-1234' \
+-H 'Content-Type: application/json' \
+-d '{
+    "budget_id": "my-test-tier",
+    "rpm_limit": 0
+}'
+```
+
+## 2. Assign budget to a key 
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/key/generate' \
+-H 'Authorization: Bearer sk-1234' \
+-H 'Content-Type: application/json' \
+-d '{
+    "budget_id": "my-test-tier"
+}'
+```
+
+Expected Response:
+
+```json
+{
+    "key": "sk-...",
+    "budget_id": "my-test-tier",
+    "litellm_budget_table": {
+        "budget_id": "my-test-tier",
+        "rpm_limit": 0
+    }
+}
+```
+
+## 3. Check if budget is enforced on key 
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-...' \ # 👈 KEY from step 2.
+-d '{
+    "model": "<REPLACE_WITH_MODEL_NAME_FROM_CONFIG.YAML>",
+    "messages": [
+      {"role": "user", "content": "hi my email is ishaan"}
+    ]
+}'
+```
+
+
+## [API Reference](https://litellm-api.up.railway.app/#/budget%20management)
+
diff --git a/docs/my-website/docs/proxy/user_management_heirarchy.md b/docs/my-website/docs/proxy/user_management_heirarchy.md
@@ -0,0 +1,13 @@
+import Image from '@theme/IdealImage';
+
+
+# User Management Heirarchy
+
+<Image img={require('../../img/litellm_user_heirarchy.png')} style={{ width: '100%', maxWidth: '4000px' }} />
+
+LiteLLM supports a heirarchy of users, teams, organizations, and budgets.
+
+- Organizations can have multiple teams. [API Reference](https://litellm-api.up.railway.app/#/organization%20management)
+- Teams can have multiple users. [API Reference](https://litellm-api.up.railway.app/#/team%20management)
+- Users can have multiple keys. [API Reference](https://litellm-api.up.railway.app/#/budget%20management)
+- Keys can belong to either a team or a user. [API Reference](https://litellm-api.up.railway.app/#/end-user%20management)
diff --git a/docs/my-website/img/litellm_user_heirarchy.png b/docs/my-website/img/litellm_user_heirarchy.png
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
@@ -51,7 +51,7 @@ const sidebars = {
         {
           type: "category",
           label: "Architecture",
-          items: ["proxy/architecture", "proxy/db_info", "router_architecture"],
+          items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy"],
         }, 
         {
           type: "link",
@@ -99,8 +99,13 @@ const sidebars = {
         },
         {
           type: "category",
-          label: "Spend Tracking + Budgets",
-          items: ["proxy/cost_tracking", "proxy/users", "proxy/custom_pricing", "proxy/team_budgets", "proxy/billing", "proxy/customers"],
+          label: "Spend Tracking",
+          items: ["proxy/cost_tracking", "proxy/custom_pricing", "proxy/billing",],
+        },
+        {
+          type: "category",
+          label: "Budgets + Rate Limits",
+          items: ["proxy/users", "proxy/rate_limit_tiers", "proxy/team_budgets", "proxy/customers"],
         },
         {
           type: "link",
@@ -135,9 +140,17 @@ const sidebars = {
             "oidc"
           ]
         },
+        {
+          type: "category",
+          label: "Create Custom Plugins",
+          description: "Modify requests, responses, and more",
+          items: [
+            "proxy/call_hooks",
+            "proxy/rules", 
+          ]
+        },
         "proxy/caching",
-        "proxy/call_hooks",
-        "proxy/rules", 
+
       ]
     },
     {

diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py
@@ -19,12 +19,16 @@
 import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI
-from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI
+from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI
 from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI
 from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters
+from litellm.types.llms.openai import (
+    FineTuningJob,
+    FineTuningJobCreate,
+    Hyperparameters,
+)
 from litellm.types.router import *
-from litellm.utils import supports_httpx_timeout
+from litellm.utils import client, supports_httpx_timeout
 
 ####### ENVIRONMENT VARIABLES ###################
 openai_fine_tuning_apis_instance = OpenAIFineTuningAPI()
@@ -33,6 +37,7 @@
 #################################################
 
 
+@client
 async def acreate_fine_tuning_job(
     model: str,
     training_file: str,
@@ -86,6 +91,7 @@ async def acreate_fine_tuning_job(
         raise e
 
 
+@client
 def create_fine_tuning_job(
     model: str,
     training_file: str,

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
@@ -633,8 +633,12 @@ def _set_virtual_key_rate_limit_metrics(
         )
         remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
 
-        remaining_requests = metadata.get(remaining_requests_variable_name, sys.maxsize)
-        remaining_tokens = metadata.get(remaining_tokens_variable_name, sys.maxsize)
+        remaining_requests = (
+            metadata.get(remaining_requests_variable_name, sys.maxsize) or sys.maxsize
+        )
+        remaining_tokens = (
+            metadata.get(remaining_tokens_variable_name, sys.maxsize) or sys.maxsize
+        )
 
         self.litellm_remaining_api_key_requests_for_model.labels(
             user_api_key, user_api_key_alias, model_group

diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
@@ -37,6 +37,7 @@
 from litellm.types.llms.openai import (
     AllMessageValues,
     Batch,
+    FineTuningJob,
     HttpxBinaryResponseContent,
 )
 from litellm.types.rerank import RerankResponse
@@ -760,6 +761,7 @@ def _response_cost_calculator(
             HttpxBinaryResponseContent,
             RerankResponse,
             Batch,
+            FineTuningJob,
         ],
         cache_hit: Optional[bool] = None,
     ) -> Optional[float]:
@@ -877,6 +879,7 @@ def _success_handler_helper_fn(
                     or isinstance(result, HttpxBinaryResponseContent)  # tts
                     or isinstance(result, RerankResponse)
                     or isinstance(result, Batch)
+                    or isinstance(result, FineTuningJob)
                 ):
                     ## RESPONSE COST ##
                     self.model_call_details["response_cost"] = (

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
@@ -12,6 +12,7 @@
 from litellm.types.router import RouterErrors, UpdateRouterConfig
 from litellm.types.utils import (
     EmbeddingResponse,
+    GenericBudgetConfigType,
     ImageResponse,
     LiteLLMPydanticObjectBase,
     ModelResponse,
@@ -614,37 +615,39 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
     rpm_limit: Optional[int] = None
     budget_duration: Optional[str] = None
     allowed_cache_controls: Optional[list] = []
-    soft_budget: Optional[float] = None
     config: Optional[dict] = {}
     permissions: Optional[dict] = {}
     model_max_budget: Optional[dict] = (
         {}
     )  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
 
     model_config = ConfigDict(protected_namespaces=())
-    send_invite_email: Optional[bool] = None
     model_rpm_limit: Optional[dict] = None
     model_tpm_limit: Optional[dict] = None
     guardrails: Optional[List[str]] = None
     blocked: Optional[bool] = None
     aliases: Optional[dict] = {}
 
 
-class _GenerateKeyRequest(GenerateRequestBase):
+class KeyRequestBase(GenerateRequestBase):
     key: Optional[str] = None
-
-
-class GenerateKeyRequest(_GenerateKeyRequest):
+    budget_id: Optional[str] = None
     tags: Optional[List[str]] = None
     enforced_params: Optional[List[str]] = None
 
 
-class GenerateKeyResponse(_GenerateKeyRequest):
+class GenerateKeyRequest(KeyRequestBase):
+    soft_budget: Optional[float] = None
+    send_invite_email: Optional[bool] = None
+
+
+class GenerateKeyResponse(KeyRequestBase):
     key: str  # type: ignore
     key_name: Optional[str] = None
     expires: Optional[datetime]
     user_id: Optional[str] = None
     token_id: Optional[str] = None
+    litellm_budget_table: Optional[Any] = None
 
     @model_validator(mode="before")
     @classmethod
@@ -669,7 +672,7 @@ def set_model_info(cls, values):
         return values
 
 
-class UpdateKeyRequest(GenerateKeyRequest):
+class UpdateKeyRequest(KeyRequestBase):
     # Note: the defaults of all Params here MUST BE NONE
     # else they will get overwritten
     key: str  # type: ignore
@@ -765,7 +768,7 @@ class DeleteUserRequest(LiteLLMPydanticObjectBase):
 AllowedModelRegion = Literal["eu", "us"]
 
 
-class BudgetNew(LiteLLMPydanticObjectBase):
+class BudgetNewRequest(LiteLLMPydanticObjectBase):
     budget_id: Optional[str] = Field(default=None, description="The unique budget id.")
     max_budget: Optional[float] = Field(
         default=None,
@@ -788,6 +791,10 @@ class BudgetNew(LiteLLMPydanticObjectBase):
         default=None,
         description="Max duration budget should be set for (e.g. '1hr', '1d', '28d')",
     )
+    model_max_budget: Optional[GenericBudgetConfigType] = Field(
+        default=None,
+        description="Max budget for each model (e.g. {'gpt-4o': {'max_budget': '0.0000001', 'budget_duration': '1d', 'tpm_limit': 1000, 'rpm_limit': 1000}})",
+    )
 
 
 class BudgetRequest(LiteLLMPydanticObjectBase):
@@ -805,11 +812,11 @@ class CustomerBase(LiteLLMPydanticObjectBase):
     allowed_model_region: Optional[AllowedModelRegion] = None
     default_model: Optional[str] = None
     budget_id: Optional[str] = None
-    litellm_budget_table: Optional[BudgetNew] = None
+    litellm_budget_table: Optional[BudgetNewRequest] = None
     blocked: bool = False
 
 
-class NewCustomerRequest(BudgetNew):
+class NewCustomerRequest(BudgetNewRequest):
     """
     Create a new customer, allocate a budget to them
     """
@@ -1426,6 +1433,19 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
     # Time stamps
     last_refreshed_at: Optional[float] = None  # last time joint view was pulled from db
 
+    def __init__(self, **kwargs):
+        # Handle litellm_budget_table_* keys
+        for key, value in list(kwargs.items()):
+            if key.startswith("litellm_budget_table_") and value is not None:
+                # Extract the corresponding attribute name
+                attr_name = key.replace("litellm_budget_table_", "")
+                # Check if the value is None and set the corresponding attribute
+                if getattr(self, attr_name, None) is None:
+                    kwargs[attr_name] = value
+
+        # Initialize the superclass
+        super().__init__(**kwargs)
+
 
 class UserAPIKeyAuth(
     LiteLLM_VerificationTokenView
@@ -2194,9 +2214,9 @@ class ProviderBudgetResponseObject(LiteLLMPydanticObjectBase):
     Configuration for a single provider's budget settings
     """
 
-    budget_limit: float  # Budget limit in USD for the time period
-    time_period: str  # Time period for budget (e.g., '1d', '30d', '1mo')
-    spend: float = 0.0  # Current spend for this provider
+    budget_limit: Optional[float]  # Budget limit in USD for the time period
+    time_period: Optional[str]  # Time period for budget (e.g., '1d', '30d', '1mo')
+    spend: Optional[float] = 0.0  # Current spend for this provider
     budget_reset_at: Optional[str] = None  # When the current budget period resets
 
 

diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py
@@ -418,6 +418,12 @@ def get_key_model_rpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]
     if user_api_key_dict.metadata:
         if "model_rpm_limit" in user_api_key_dict.metadata:
             return user_api_key_dict.metadata["model_rpm_limit"]
+    elif user_api_key_dict.model_max_budget:
+        model_rpm_limit: Dict[str, Any] = {}
+        for model, budget in user_api_key_dict.model_max_budget.items():
+            if "rpm_limit" in budget and budget["rpm_limit"] is not None:
+                model_rpm_limit[model] = budget["rpm_limit"]
+        return model_rpm_limit
 
     return None
 
@@ -426,6 +432,9 @@ def get_key_model_tpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]
     if user_api_key_dict.metadata:
         if "model_tpm_limit" in user_api_key_dict.metadata:
             return user_api_key_dict.metadata["model_tpm_limit"]
+    elif user_api_key_dict.model_max_budget:
+        if "tpm_limit" in user_api_key_dict.model_max_budget:
+            return user_api_key_dict.model_max_budget["tpm_limit"]
 
     return None