diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md index 44e9024ac1d8..97a8ff10e631 100644 --- a/docs/my-website/docs/providers/azure.md +++ b/docs/my-website/docs/providers/azure.md @@ -4,6 +4,16 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; # Azure OpenAI + +## Overview + +| Property | Details | +|-------|-------| +| Description | Azure OpenAI Service provides REST API access to OpenAI's powerful language models including o1, o1-mini, GPT-4o, GPT-4o mini, GPT-4 Turbo with Vision, GPT-4, GPT-3.5-Turbo, and Embeddings model series | +| Provider Route on LiteLLM | `azure/` | +| Supported Operations | [`/chat/completions`](#azure-openai-chat-completion-models), [`/completions`](#azure-instruct-models), [`/embeddings`](../embedding/supported_embedding#azure-openai-embedding-models), [`/audio/speech`](#azure-text-to-speech-tts), [`/audio/transcriptions`](../audio_transcription), `/fine_tuning`, [`/batches`](#azure-batches-api), `/files`, [`/images`](../image_generation#azure-openai-image-generation-models) | +| Link to Provider Doc | [Azure OpenAI ↗](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) + ## API Keys, Params api_key, api_base, api_version etc can be passed directly to `litellm.completion` - see here or set as `litellm.api_key` params see here ```python @@ -889,7 +899,6 @@ Expected Response: {"data":[{"id":"batch_R3V...} ``` - ## Advanced ### Azure API Load-Balancing diff --git a/docs/my-website/docs/proxy/customers.md b/docs/my-website/docs/proxy/customers.md index ba9ecd83dde6..2035b24f3a6a 100644 --- a/docs/my-website/docs/proxy/customers.md +++ b/docs/my-website/docs/proxy/customers.md @@ -2,11 +2,11 @@ import Image from '@theme/IdealImage'; import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# 🙋‍♂️ Customers +# 🙋‍♂️ Customers / End-User Budgets Track spend, set budgets for your customers. -## Tracking Customer Credit +## Tracking Customer Spend ### 1. Make LLM API call w/ Customer ID diff --git a/docs/my-website/docs/proxy/rate_limit_tiers.md b/docs/my-website/docs/proxy/rate_limit_tiers.md new file mode 100644 index 000000000000..e7dc075bd1dd --- /dev/null +++ b/docs/my-website/docs/proxy/rate_limit_tiers.md @@ -0,0 +1,68 @@ +# ✨ Budget / Rate Limit Tiers + +Create tiers with different budgets and rate limits. Making it easy to manage different users and their usage. + +:::info + +This is a LiteLLM Enterprise feature. + +Get a 7 day free trial + get in touch [here](https://litellm.ai/#trial). + +See pricing [here](https://litellm.ai/#pricing). + +::: + + +## 1. Create a budget + +```bash +curl -L -X POST 'http://0.0.0.0:4000/budget/new' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-d '{ + "budget_id": "my-test-tier", + "rpm_limit": 0 +}' +``` + +## 2. Assign budget to a key + +```bash +curl -L -X POST 'http://0.0.0.0:4000/key/generate' \ +-H 'Authorization: Bearer sk-1234' \ +-H 'Content-Type: application/json' \ +-d '{ + "budget_id": "my-test-tier" +}' +``` + +Expected Response: + +```json +{ + "key": "sk-...", + "budget_id": "my-test-tier", + "litellm_budget_table": { + "budget_id": "my-test-tier", + "rpm_limit": 0 + } +} +``` + +## 3. Check if budget is enforced on key + +```bash +curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-...' \ # 👈 KEY from step 2. +-d '{ + "model": "", + "messages": [ + {"role": "user", "content": "hi my email is ishaan"} + ] +}' +``` + + +## [API Reference](https://litellm-api.up.railway.app/#/budget%20management) + diff --git a/docs/my-website/docs/proxy/user_management_heirarchy.md b/docs/my-website/docs/proxy/user_management_heirarchy.md new file mode 100644 index 000000000000..5f3e83ae350f --- /dev/null +++ b/docs/my-website/docs/proxy/user_management_heirarchy.md @@ -0,0 +1,13 @@ +import Image from '@theme/IdealImage'; + + +# User Management Heirarchy + + + +LiteLLM supports a heirarchy of users, teams, organizations, and budgets. + +- Organizations can have multiple teams. [API Reference](https://litellm-api.up.railway.app/#/organization%20management) +- Teams can have multiple users. [API Reference](https://litellm-api.up.railway.app/#/team%20management) +- Users can have multiple keys. [API Reference](https://litellm-api.up.railway.app/#/budget%20management) +- Keys can belong to either a team or a user. [API Reference](https://litellm-api.up.railway.app/#/end-user%20management) diff --git a/docs/my-website/img/litellm_user_heirarchy.png b/docs/my-website/img/litellm_user_heirarchy.png new file mode 100644 index 000000000000..63dba72c21d1 Binary files /dev/null and b/docs/my-website/img/litellm_user_heirarchy.png differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 8f1ecfd8118f..6e32d57655d0 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -51,7 +51,7 @@ const sidebars = { { type: "category", label: "Architecture", - items: ["proxy/architecture", "proxy/db_info", "router_architecture"], + items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy"], }, { type: "link", @@ -99,8 +99,13 @@ const sidebars = { }, { type: "category", - label: "Spend Tracking + Budgets", - items: ["proxy/cost_tracking", "proxy/users", "proxy/custom_pricing", "proxy/team_budgets", "proxy/billing", "proxy/customers"], + label: "Spend Tracking", + items: ["proxy/cost_tracking", "proxy/custom_pricing", "proxy/billing",], + }, + { + type: "category", + label: "Budgets + Rate Limits", + items: ["proxy/users", "proxy/rate_limit_tiers", "proxy/team_budgets", "proxy/customers"], }, { type: "link", @@ -135,9 +140,17 @@ const sidebars = { "oidc" ] }, + { + type: "category", + label: "Create Custom Plugins", + description: "Modify requests, responses, and more", + items: [ + "proxy/call_hooks", + "proxy/rules", + ] + }, "proxy/caching", - "proxy/call_hooks", - "proxy/rules", + ] }, { diff --git a/litellm/fine_tuning/main.py b/litellm/fine_tuning/main.py index eace2f64a4b3..7672ad43a9be 100644 --- a/litellm/fine_tuning/main.py +++ b/litellm/fine_tuning/main.py @@ -19,12 +19,16 @@ import litellm from litellm._logging import verbose_logger from litellm.llms.azure.fine_tuning.handler import AzureOpenAIFineTuningAPI -from litellm.llms.openai.fine_tuning.handler import FineTuningJob, OpenAIFineTuningAPI +from litellm.llms.openai.fine_tuning.handler import OpenAIFineTuningAPI from litellm.llms.vertex_ai.fine_tuning.handler import VertexFineTuningAPI from litellm.secret_managers.main import get_secret_str -from litellm.types.llms.openai import FineTuningJobCreate, Hyperparameters +from litellm.types.llms.openai import ( + FineTuningJob, + FineTuningJobCreate, + Hyperparameters, +) from litellm.types.router import * -from litellm.utils import supports_httpx_timeout +from litellm.utils import client, supports_httpx_timeout ####### ENVIRONMENT VARIABLES ################### openai_fine_tuning_apis_instance = OpenAIFineTuningAPI() @@ -33,6 +37,7 @@ ################################################# +@client async def acreate_fine_tuning_job( model: str, training_file: str, @@ -86,6 +91,7 @@ async def acreate_fine_tuning_job( raise e +@client def create_fine_tuning_job( model: str, training_file: str, diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index e7e4a8cdb2b8..5454c5fcb01b 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -633,8 +633,12 @@ def _set_virtual_key_rate_limit_metrics( ) remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}" - remaining_requests = metadata.get(remaining_requests_variable_name, sys.maxsize) - remaining_tokens = metadata.get(remaining_tokens_variable_name, sys.maxsize) + remaining_requests = ( + metadata.get(remaining_requests_variable_name, sys.maxsize) or sys.maxsize + ) + remaining_tokens = ( + metadata.get(remaining_tokens_variable_name, sys.maxsize) or sys.maxsize + ) self.litellm_remaining_api_key_requests_for_model.labels( user_api_key, user_api_key_alias, model_group diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index ee769bb7ebbf..0736aa2da5a6 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -37,6 +37,7 @@ from litellm.types.llms.openai import ( AllMessageValues, Batch, + FineTuningJob, HttpxBinaryResponseContent, ) from litellm.types.rerank import RerankResponse @@ -760,6 +761,7 @@ def _response_cost_calculator( HttpxBinaryResponseContent, RerankResponse, Batch, + FineTuningJob, ], cache_hit: Optional[bool] = None, ) -> Optional[float]: @@ -877,6 +879,7 @@ def _success_handler_helper_fn( or isinstance(result, HttpxBinaryResponseContent) # tts or isinstance(result, RerankResponse) or isinstance(result, Batch) + or isinstance(result, FineTuningJob) ): ## RESPONSE COST ## self.model_call_details["response_cost"] = ( diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 277c31acc685..a226462ff7d7 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -12,6 +12,7 @@ from litellm.types.router import RouterErrors, UpdateRouterConfig from litellm.types.utils import ( EmbeddingResponse, + GenericBudgetConfigType, ImageResponse, LiteLLMPydanticObjectBase, ModelResponse, @@ -614,7 +615,6 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase): rpm_limit: Optional[int] = None budget_duration: Optional[str] = None allowed_cache_controls: Optional[list] = [] - soft_budget: Optional[float] = None config: Optional[dict] = {} permissions: Optional[dict] = {} model_max_budget: Optional[dict] = ( @@ -622,7 +622,6 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase): ) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {} model_config = ConfigDict(protected_namespaces=()) - send_invite_email: Optional[bool] = None model_rpm_limit: Optional[dict] = None model_tpm_limit: Optional[dict] = None guardrails: Optional[List[str]] = None @@ -630,21 +629,25 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase): aliases: Optional[dict] = {} -class _GenerateKeyRequest(GenerateRequestBase): +class KeyRequestBase(GenerateRequestBase): key: Optional[str] = None - - -class GenerateKeyRequest(_GenerateKeyRequest): + budget_id: Optional[str] = None tags: Optional[List[str]] = None enforced_params: Optional[List[str]] = None -class GenerateKeyResponse(_GenerateKeyRequest): +class GenerateKeyRequest(KeyRequestBase): + soft_budget: Optional[float] = None + send_invite_email: Optional[bool] = None + + +class GenerateKeyResponse(KeyRequestBase): key: str # type: ignore key_name: Optional[str] = None expires: Optional[datetime] user_id: Optional[str] = None token_id: Optional[str] = None + litellm_budget_table: Optional[Any] = None @model_validator(mode="before") @classmethod @@ -669,7 +672,7 @@ def set_model_info(cls, values): return values -class UpdateKeyRequest(GenerateKeyRequest): +class UpdateKeyRequest(KeyRequestBase): # Note: the defaults of all Params here MUST BE NONE # else they will get overwritten key: str # type: ignore @@ -765,7 +768,7 @@ class DeleteUserRequest(LiteLLMPydanticObjectBase): AllowedModelRegion = Literal["eu", "us"] -class BudgetNew(LiteLLMPydanticObjectBase): +class BudgetNewRequest(LiteLLMPydanticObjectBase): budget_id: Optional[str] = Field(default=None, description="The unique budget id.") max_budget: Optional[float] = Field( default=None, @@ -788,6 +791,10 @@ class BudgetNew(LiteLLMPydanticObjectBase): default=None, description="Max duration budget should be set for (e.g. '1hr', '1d', '28d')", ) + model_max_budget: Optional[GenericBudgetConfigType] = Field( + default=None, + description="Max budget for each model (e.g. {'gpt-4o': {'max_budget': '0.0000001', 'budget_duration': '1d', 'tpm_limit': 1000, 'rpm_limit': 1000}})", + ) class BudgetRequest(LiteLLMPydanticObjectBase): @@ -805,11 +812,11 @@ class CustomerBase(LiteLLMPydanticObjectBase): allowed_model_region: Optional[AllowedModelRegion] = None default_model: Optional[str] = None budget_id: Optional[str] = None - litellm_budget_table: Optional[BudgetNew] = None + litellm_budget_table: Optional[BudgetNewRequest] = None blocked: bool = False -class NewCustomerRequest(BudgetNew): +class NewCustomerRequest(BudgetNewRequest): """ Create a new customer, allocate a budget to them """ @@ -1426,6 +1433,19 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken): # Time stamps last_refreshed_at: Optional[float] = None # last time joint view was pulled from db + def __init__(self, **kwargs): + # Handle litellm_budget_table_* keys + for key, value in list(kwargs.items()): + if key.startswith("litellm_budget_table_") and value is not None: + # Extract the corresponding attribute name + attr_name = key.replace("litellm_budget_table_", "") + # Check if the value is None and set the corresponding attribute + if getattr(self, attr_name, None) is None: + kwargs[attr_name] = value + + # Initialize the superclass + super().__init__(**kwargs) + class UserAPIKeyAuth( LiteLLM_VerificationTokenView @@ -2194,9 +2214,9 @@ class ProviderBudgetResponseObject(LiteLLMPydanticObjectBase): Configuration for a single provider's budget settings """ - budget_limit: float # Budget limit in USD for the time period - time_period: str # Time period for budget (e.g., '1d', '30d', '1mo') - spend: float = 0.0 # Current spend for this provider + budget_limit: Optional[float] # Budget limit in USD for the time period + time_period: Optional[str] # Time period for budget (e.g., '1d', '30d', '1mo') + spend: Optional[float] = 0.0 # Current spend for this provider budget_reset_at: Optional[str] = None # When the current budget period resets diff --git a/litellm/proxy/auth/auth_utils.py b/litellm/proxy/auth/auth_utils.py index f73e045075bd..c1091d500f61 100644 --- a/litellm/proxy/auth/auth_utils.py +++ b/litellm/proxy/auth/auth_utils.py @@ -418,6 +418,12 @@ def get_key_model_rpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict] if user_api_key_dict.metadata: if "model_rpm_limit" in user_api_key_dict.metadata: return user_api_key_dict.metadata["model_rpm_limit"] + elif user_api_key_dict.model_max_budget: + model_rpm_limit: Dict[str, Any] = {} + for model, budget in user_api_key_dict.model_max_budget.items(): + if "rpm_limit" in budget and budget["rpm_limit"] is not None: + model_rpm_limit[model] = budget["rpm_limit"] + return model_rpm_limit return None @@ -426,6 +432,9 @@ def get_key_model_tpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict] if user_api_key_dict.metadata: if "model_tpm_limit" in user_api_key_dict.metadata: return user_api_key_dict.metadata["model_tpm_limit"] + elif user_api_key_dict.model_max_budget: + if "tpm_limit" in user_api_key_dict.model_max_budget: + return user_api_key_dict.model_max_budget["tpm_limit"] return None diff --git a/litellm/proxy/hooks/model_max_budget_limiter.py b/litellm/proxy/hooks/model_max_budget_limiter.py index 5d5e56e014fd..3befca851639 100644 --- a/litellm/proxy/hooks/model_max_budget_limiter.py +++ b/litellm/proxy/hooks/model_max_budget_limiter.py @@ -9,8 +9,8 @@ from litellm.router_strategy.budget_limiter import RouterBudgetLimiting from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import ( + BudgetConfig, GenericBudgetConfigType, - GenericBudgetInfo, StandardLoggingPayload, ) @@ -42,12 +42,8 @@ async def is_key_within_model_budget( _model_max_budget = user_api_key_dict.model_max_budget internal_model_max_budget: GenericBudgetConfigType = {} - # case each element in _model_max_budget to GenericBudgetInfo for _model, _budget_info in _model_max_budget.items(): - internal_model_max_budget[_model] = GenericBudgetInfo( - time_period=_budget_info.get("time_period"), - budget_limit=float(_budget_info.get("budget_limit")), - ) + internal_model_max_budget[_model] = BudgetConfig(**_budget_info) verbose_proxy_logger.debug( "internal_model_max_budget %s", @@ -65,7 +61,10 @@ async def is_key_within_model_budget( return True # check if current model is within budget - if _current_model_budget_info.budget_limit > 0: + if ( + _current_model_budget_info.max_budget + and _current_model_budget_info.max_budget > 0 + ): _current_spend = await self._get_virtual_key_spend_for_model( user_api_key_hash=user_api_key_dict.token, model=model, @@ -73,12 +72,13 @@ async def is_key_within_model_budget( ) if ( _current_spend is not None - and _current_spend > _current_model_budget_info.budget_limit + and _current_model_budget_info.max_budget is not None + and _current_spend > _current_model_budget_info.max_budget ): raise litellm.BudgetExceededError( message=f"LiteLLM Virtual Key: {user_api_key_dict.token}, key_alias: {user_api_key_dict.key_alias}, exceeded budget for model={model}", current_cost=_current_spend, - max_budget=_current_model_budget_info.budget_limit, + max_budget=_current_model_budget_info.max_budget, ) return True @@ -87,7 +87,7 @@ async def _get_virtual_key_spend_for_model( self, user_api_key_hash: Optional[str], model: str, - key_budget_config: GenericBudgetInfo, + key_budget_config: BudgetConfig, ) -> Optional[float]: """ Get the current spend for a virtual key for a model @@ -98,7 +98,7 @@ async def _get_virtual_key_spend_for_model( """ # 1. model: directly look up `model` - virtual_key_model_spend_cache_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{user_api_key_hash}:{model}:{key_budget_config.time_period}" + virtual_key_model_spend_cache_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{user_api_key_hash}:{model}:{key_budget_config.budget_duration}" _current_spend = await self.dual_cache.async_get_cache( key=virtual_key_model_spend_cache_key, ) @@ -106,7 +106,7 @@ async def _get_virtual_key_spend_for_model( if _current_spend is None: # 2. If 1, does not exist, check if passed as {custom_llm_provider}/model # if "/" in model, remove first part before "/" - eg. openai/o1-preview -> o1-preview - virtual_key_model_spend_cache_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{user_api_key_hash}:{self._get_model_without_custom_llm_provider(model)}:{key_budget_config.time_period}" + virtual_key_model_spend_cache_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{user_api_key_hash}:{self._get_model_without_custom_llm_provider(model)}:{key_budget_config.budget_duration}" _current_spend = await self.dual_cache.async_get_cache( key=virtual_key_model_spend_cache_key, ) @@ -114,7 +114,7 @@ async def _get_virtual_key_spend_for_model( def _get_request_model_budget_config( self, model: str, internal_model_max_budget: GenericBudgetConfigType - ) -> Optional[GenericBudgetInfo]: + ) -> Optional[BudgetConfig]: """ Get the budget config for the request model @@ -175,8 +175,8 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti virtual_key = standard_logging_payload.get("metadata").get("user_api_key_hash") model = standard_logging_payload.get("model") if virtual_key is not None: - budget_config = GenericBudgetInfo(time_period="1d", budget_limit=0.1) - virtual_spend_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{virtual_key}:{model}:{budget_config.time_period}" + budget_config = BudgetConfig(time_period="1d", budget_limit=0.1) + virtual_spend_key = f"{VIRTUAL_KEY_SPEND_CACHE_KEY_PREFIX}:{virtual_key}:{model}:{budget_config.budget_duration}" virtual_start_time_key = f"virtual_key_budget_start_time:{virtual_key}" await self._increment_spend_for_key( budget_config=budget_config, diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index b1a2716a4e84..656e2f880494 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -317,7 +317,6 @@ async def async_pre_call_hook( # noqa: PLR0915 _tpm_limit_for_key_model = get_key_model_tpm_limit(user_api_key_dict) _rpm_limit_for_key_model = get_key_model_rpm_limit(user_api_key_dict) - if _model is not None: if _tpm_limit_for_key_model: @@ -325,6 +324,7 @@ async def async_pre_call_hook( # noqa: PLR0915 if _rpm_limit_for_key_model: rpm_limit_for_model = _rpm_limit_for_key_model.get(_model) + if current is None: new_val = { "current_requests": 1, @@ -485,6 +485,7 @@ async def async_log_success_event( # noqa: PLR0915 ) try: self.print_verbose("INSIDE parallel request limiter ASYNC SUCCESS LOGGING") + global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get( "global_max_parallel_requests", None ) @@ -495,6 +496,9 @@ async def async_log_success_event( # noqa: PLR0915 user_api_key_team_id = kwargs["litellm_params"]["metadata"].get( "user_api_key_team_id", None ) + user_api_key_model_max_budget = kwargs["litellm_params"]["metadata"].get( + "user_api_key_model_max_budget", None + ) user_api_key_end_user_id = kwargs.get("user") user_api_key_metadata = ( @@ -568,6 +572,7 @@ async def async_log_success_event( # noqa: PLR0915 and ( "model_rpm_limit" in user_api_key_metadata or "model_tpm_limit" in user_api_key_metadata + or user_api_key_model_max_budget is not None ) ): request_count_api_key = ( diff --git a/litellm/proxy/management_endpoints/budget_management_endpoints.py b/litellm/proxy/management_endpoints/budget_management_endpoints.py new file mode 100644 index 000000000000..20aa1c6bbf00 --- /dev/null +++ b/litellm/proxy/management_endpoints/budget_management_endpoints.py @@ -0,0 +1,287 @@ +""" +BUDGET MANAGEMENT + +All /budget management endpoints + +/budget/new +/budget/info +/budget/update +/budget/delete +/budget/settings +/budget/list +""" + +#### BUDGET TABLE MANAGEMENT #### +from fastapi import APIRouter, Depends, HTTPException + +from litellm.proxy._types import * +from litellm.proxy.auth.user_api_key_auth import user_api_key_auth +from litellm.proxy.utils import jsonify_object + +router = APIRouter() + + +@router.post( + "/budget/new", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def new_budget( + budget_obj: BudgetNewRequest, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Create a new budget object. Can apply this to teams, orgs, end-users, keys. + + Parameters: + - budget_duration: Optional[str] - Budget reset period ("30d", "1h", etc.) + - budget_id: Optional[str] - The id of the budget. If not provided, a new id will be generated. + - max_budget: Optional[float] - The max budget for the budget. + - soft_budget: Optional[float] - The soft budget for the budget. + - max_parallel_requests: Optional[int] - The max number of parallel requests for the budget. + - tpm_limit: Optional[int] - The tokens per minute limit for the budget. + - rpm_limit: Optional[int] - The requests per minute limit for the budget. + - model_max_budget: Optional[dict] - Specify max budget for a given model. Example: {"openai/gpt-4o-mini": {"max_budget": 100.0, "budget_duration": "1d", "tpm_limit": 100000, "rpm_limit": 100000}} + """ + from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=500, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + budget_obj_json = budget_obj.model_dump(exclude_none=True) + budget_obj_jsonified = jsonify_object(budget_obj_json) # json dump any dictionaries + response = await prisma_client.db.litellm_budgettable.create( + data={ + **budget_obj_jsonified, # type: ignore + "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name, + "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name, + } # type: ignore + ) + + return response + + +@router.post( + "/budget/update", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def update_budget( + budget_obj: BudgetNewRequest, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Update an existing budget object. + + Parameters: + - budget_duration: Optional[str] - Budget reset period ("30d", "1h", etc.) + - budget_id: Optional[str] - The id of the budget. If not provided, a new id will be generated. + - max_budget: Optional[float] - The max budget for the budget. + - soft_budget: Optional[float] - The soft budget for the budget. + - max_parallel_requests: Optional[int] - The max number of parallel requests for the budget. + - tpm_limit: Optional[int] - The tokens per minute limit for the budget. + - rpm_limit: Optional[int] - The requests per minute limit for the budget. + - model_max_budget: Optional[dict] - Specify max budget for a given model. Example: {"openai/gpt-4o-mini": {"max_budget": 100.0, "budget_duration": "1d", "tpm_limit": 100000, "rpm_limit": 100000}} + """ + from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=500, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + if budget_obj.budget_id is None: + raise HTTPException(status_code=400, detail={"error": "budget_id is required"}) + + response = await prisma_client.db.litellm_budgettable.update( + where={"budget_id": budget_obj.budget_id}, + data={ + **budget_obj.model_dump(exclude_none=True), # type: ignore + "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name, + }, # type: ignore + ) + + return response + + +@router.post( + "/budget/info", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def info_budget(data: BudgetRequest): + """ + Get the budget id specific information + + Parameters: + - budgets: List[str] - The list of budget ids to get information for + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException(status_code=500, detail={"error": "No db connected"}) + + if len(data.budgets) == 0: + raise HTTPException( + status_code=400, + detail={ + "error": f"Specify list of budget id's to query. Passed in={data.budgets}" + }, + ) + response = await prisma_client.db.litellm_budgettable.find_many( + where={"budget_id": {"in": data.budgets}}, + ) + + return response + + +@router.get( + "/budget/settings", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def budget_settings( + budget_id: str, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Get list of configurable params + current value for a budget item + description of each field + + Used on Admin UI. + + Query Parameters: + - budget_id: str - The budget id to get information for + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=400, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: + raise HTTPException( + status_code=400, + detail={ + "error": "{}, your role={}".format( + CommonProxyErrors.not_allowed_access.value, + user_api_key_dict.user_role, + ) + }, + ) + + ## get budget item from db + db_budget_row = await prisma_client.db.litellm_budgettable.find_first( + where={"budget_id": budget_id} + ) + + if db_budget_row is not None: + db_budget_row_dict = db_budget_row.model_dump(exclude_none=True) + else: + db_budget_row_dict = {} + + allowed_args = { + "max_parallel_requests": {"type": "Integer"}, + "tpm_limit": {"type": "Integer"}, + "rpm_limit": {"type": "Integer"}, + "budget_duration": {"type": "String"}, + "max_budget": {"type": "Float"}, + "soft_budget": {"type": "Float"}, + } + + return_val = [] + + for field_name, field_info in BudgetNewRequest.model_fields.items(): + if field_name in allowed_args: + + _stored_in_db = True + + _response_obj = ConfigList( + field_name=field_name, + field_type=allowed_args[field_name]["type"], + field_description=field_info.description or "", + field_value=db_budget_row_dict.get(field_name, None), + stored_in_db=_stored_in_db, + field_default_value=field_info.default, + ) + return_val.append(_response_obj) + + return return_val + + +@router.get( + "/budget/list", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def list_budget( + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """List all the created budgets in proxy db. Used on Admin UI.""" + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=400, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: + raise HTTPException( + status_code=400, + detail={ + "error": "{}, your role={}".format( + CommonProxyErrors.not_allowed_access.value, + user_api_key_dict.user_role, + ) + }, + ) + + response = await prisma_client.db.litellm_budgettable.find_many() + + return response + + +@router.post( + "/budget/delete", + tags=["budget management"], + dependencies=[Depends(user_api_key_auth)], +) +async def delete_budget( + data: BudgetDeleteRequest, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Delete budget + + Parameters: + - id: str - The budget id to delete + """ + from litellm.proxy.proxy_server import prisma_client + + if prisma_client is None: + raise HTTPException( + status_code=500, + detail={"error": CommonProxyErrors.db_not_connected_error.value}, + ) + + if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: + raise HTTPException( + status_code=400, + detail={ + "error": "{}, your role={}".format( + CommonProxyErrors.not_allowed_access.value, + user_api_key_dict.user_role, + ) + }, + ) + + response = await prisma_client.db.litellm_budgettable.delete( + where={"budget_id": data.id} + ) + + return response diff --git a/litellm/proxy/management_endpoints/customer_endpoints.py b/litellm/proxy/management_endpoints/customer_endpoints.py index 47bc7f61653c..976ff8581f48 100644 --- a/litellm/proxy/management_endpoints/customer_endpoints.py +++ b/litellm/proxy/management_endpoints/customer_endpoints.py @@ -131,11 +131,11 @@ async def unblock_user(data: BlockUsers): return {"blocked_users": litellm.blocked_user_list} -def new_budget_request(data: NewCustomerRequest) -> Optional[BudgetNew]: +def new_budget_request(data: NewCustomerRequest) -> Optional[BudgetNewRequest]: """ Return a new budget object if new budget params are passed. """ - budget_params = BudgetNew.model_fields.keys() + budget_params = BudgetNewRequest.model_fields.keys() budget_kv_pairs = {} # Get the actual values from the data object using getattr @@ -147,7 +147,7 @@ def new_budget_request(data: NewCustomerRequest) -> Optional[BudgetNew]: budget_kv_pairs[field_name] = value if budget_kv_pairs: - return BudgetNew(**budget_kv_pairs) + return BudgetNewRequest(**budget_kv_pairs) return None @@ -182,6 +182,7 @@ async def new_end_user( - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). - tpm_limit: Optional[int] - [Not Implemented Yet] Specify tpm limit for a given customer (Tokens per minute) - rpm_limit: Optional[int] - [Not Implemented Yet] Specify rpm limit for a given customer (Requests per minute) + - model_max_budget: Optional[dict] - [Not Implemented Yet] Specify max budget for a given model. Example: {"openai/gpt-4o-mini": {"max_budget": 100.0, "budget_duration": "1d"}} - max_parallel_requests: Optional[int] - [Not Implemented Yet] Specify max parallel requests for a given customer. - soft_budget: Optional[float] - [Not Implemented Yet] Get alerts when customer crosses given budget, doesn't block requests. @@ -271,7 +272,7 @@ async def new_end_user( _user_data = data.dict(exclude_none=True) for k, v in _user_data.items(): - if k not in BudgetNew.model_fields.keys(): + if k not in BudgetNewRequest.model_fields.keys(): new_end_user_obj[k] = v ## WRITE TO DB ## diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index 402e8fbb891b..caf48e4342d4 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -40,7 +40,7 @@ ) from litellm.secret_managers.main import get_secret from litellm.types.utils import ( - GenericBudgetInfo, + BudgetConfig, PersonalUIKeyGenerationConfig, TeamUIKeyGenerationConfig, ) @@ -238,6 +238,7 @@ async def generate_key_fn( # noqa: PLR0915 - key: Optional[str] - User defined key value. If not set, a 16-digit unique sk-key is created for you. - team_id: Optional[str] - The team id of the key - user_id: Optional[str] - The user id of the key + - budget_id: Optional[str] - The budget id associated with the key. Created by calling `/budget/new`. - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models - config: Optional[dict] - any key-specific configs, overrides config in config.yaml @@ -249,7 +250,7 @@ async def generate_key_fn( # noqa: PLR0915 - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } - guardrails: Optional[List[str]] - List of active guardrails for the key - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false} - - model_max_budget: Optional[Dict[str, GenericBudgetInfo]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}}}. IF null or {} then no model specific budget. + - model_max_budget: Optional[Dict[str, BudgetConfig]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}}}. IF null or {} then no model specific budget. - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit. - model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit. - allowed_cache_controls: Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request @@ -376,7 +377,7 @@ async def generate_key_fn( # noqa: PLR0915 ) # TODO: @ishaan-jaff: Migrate all budget tracking to use LiteLLM_BudgetTable - _budget_id = None + _budget_id = data.budget_id if prisma_client is not None and data.soft_budget is not None: # create the Budget Row for the LiteLLM Verification Token budget_row = LiteLLM_BudgetTable( @@ -547,14 +548,15 @@ async def update_key_fn( - key_alias: Optional[str] - User-friendly key alias - user_id: Optional[str] - User ID associated with key - team_id: Optional[str] - Team ID associated with key + - budget_id: Optional[str] - The budget id associated with the key. Created by calling `/budget/new`. - models: Optional[list] - Model_name's a user is allowed to call - tags: Optional[List[str]] - Tags for organizing keys (Enterprise only) - enforced_params: Optional[List[str]] - List of enforced params for the key (Enterprise only). [Docs](https://docs.litellm.ai/docs/proxy/enterprise#enforce-required-params-for-llm-requests) - spend: Optional[float] - Amount spent by key - max_budget: Optional[float] - Max budget for key - - model_max_budget: Optional[Dict[str, GenericBudgetInfo]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}} + - model_max_budget: Optional[Dict[str, BudgetConfig]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}} - budget_duration: Optional[str] - Budget reset period ("30d", "1h", etc.) - - soft_budget: Optional[float] - Soft budget limit (warning vs. hard stop). Will trigger a slack alert when this soft budget is reached. + - soft_budget: Optional[float] - [TODO] Soft budget limit (warning vs. hard stop). Will trigger a slack alert when this soft budget is reached. - max_parallel_requests: Optional[int] - Rate limit for parallel requests - metadata: Optional[dict] - Metadata for key. Example {"team": "core-infra", "app": "app2"} - tpm_limit: Optional[int] - Tokens per minute limit @@ -592,7 +594,7 @@ async def update_key_fn( ) try: - data_json: dict = data.model_dump(exclude_unset=True) + data_json: dict = data.model_dump(exclude_unset=True, exclude_none=True) key = data_json.pop("key") # get the row from db if prisma_client is None: @@ -1135,6 +1137,9 @@ async def generate_key_helper_fn( # noqa: PLR0915 data=key_data, table_name="key" ) key_data["token_id"] = getattr(create_key_response, "token", None) + key_data["litellm_budget_table"] = getattr( + create_key_response, "litellm_budget_table", None + ) except Exception as e: verbose_proxy_logger.error( "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format( @@ -1247,7 +1252,7 @@ async def regenerate_key_fn( - tags: Optional[List[str]] - Tags for organizing keys (Enterprise only) - spend: Optional[float] - Amount spent by key - max_budget: Optional[float] - Max budget for key - - model_max_budget: Optional[Dict[str, GenericBudgetInfo]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}} + - model_max_budget: Optional[Dict[str, BudgetConfig]] - Model-specific budgets {"gpt-4": {"budget_limit": 0.0005, "time_period": "30d"}} - budget_duration: Optional[str] - Budget reset period ("30d", "1h", etc.) - soft_budget: Optional[float] - Soft budget limit (warning vs. hard stop). Will trigger a slack alert when this soft budget is reached. - max_parallel_requests: Optional[int] - Rate limit for parallel requests @@ -1956,7 +1961,7 @@ def validate_model_max_budget(model_max_budget: Optional[Dict]) -> None: # /CRUD endpoints can pass budget_limit as a string, so we need to convert it to a float if "budget_limit" in _budget_info: _budget_info["budget_limit"] = float(_budget_info["budget_limit"]) - GenericBudgetInfo(**_budget_info) + BudgetConfig(**_budget_info) except Exception as e: raise ValueError( f"Invalid model_max_budget: {str(e)}. Example of valid model_max_budget: https://docs.litellm.ai/docs/proxy/users" diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f65c9fe6a386..39c9cd588190 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -178,6 +178,9 @@ def generate_feedback_box(): from litellm.proxy.hooks.proxy_failure_handler import _PROXY_failure_handler from litellm.proxy.hooks.proxy_track_cost_callback import _PROXY_track_cost_callback from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request +from litellm.proxy.management_endpoints.budget_management_endpoints import ( + router as budget_management_router, +) from litellm.proxy.management_endpoints.customer_endpoints import ( router as customer_router, ) @@ -5531,238 +5534,6 @@ async def supported_openai_params(model: str): ) -#### BUDGET TABLE MANAGEMENT #### - - -@router.post( - "/budget/new", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def new_budget( - budget_obj: BudgetNew, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """ - Create a new budget object. Can apply this to teams, orgs, end-users, keys. - """ - global prisma_client - - if prisma_client is None: - raise HTTPException( - status_code=500, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - - response = await prisma_client.db.litellm_budgettable.create( - data={ - **budget_obj.model_dump(exclude_none=True), # type: ignore - "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name, - "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name, - } # type: ignore - ) - - return response - - -@router.post( - "/budget/update", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def update_budget( - budget_obj: BudgetNew, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """ - Create a new budget object. Can apply this to teams, orgs, end-users, keys. - """ - global prisma_client - - if prisma_client is None: - raise HTTPException( - status_code=500, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - if budget_obj.budget_id is None: - raise HTTPException(status_code=400, detail={"error": "budget_id is required"}) - - response = await prisma_client.db.litellm_budgettable.update( - where={"budget_id": budget_obj.budget_id}, - data={ - **budget_obj.model_dump(exclude_none=True), # type: ignore - "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name, - }, # type: ignore - ) - - return response - - -@router.post( - "/budget/info", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def info_budget(data: BudgetRequest): - """ - Get the budget id specific information - """ - global prisma_client - - if prisma_client is None: - raise HTTPException(status_code=500, detail={"error": "No db connected"}) - - if len(data.budgets) == 0: - raise HTTPException( - status_code=400, - detail={ - "error": f"Specify list of budget id's to query. Passed in={data.budgets}" - }, - ) - response = await prisma_client.db.litellm_budgettable.find_many( - where={"budget_id": {"in": data.budgets}}, - ) - - return response - - -@router.get( - "/budget/settings", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def budget_settings( - budget_id: str, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """ - Get list of configurable params + current value for a budget item + description of each field - - Used on Admin UI. - """ - if prisma_client is None: - raise HTTPException( - status_code=400, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - - if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: - raise HTTPException( - status_code=400, - detail={ - "error": "{}, your role={}".format( - CommonProxyErrors.not_allowed_access.value, - user_api_key_dict.user_role, - ) - }, - ) - - ## get budget item from db - db_budget_row = await prisma_client.db.litellm_budgettable.find_first( - where={"budget_id": budget_id} - ) - - if db_budget_row is not None: - db_budget_row_dict = db_budget_row.model_dump(exclude_none=True) - else: - db_budget_row_dict = {} - - allowed_args = { - "max_parallel_requests": {"type": "Integer"}, - "tpm_limit": {"type": "Integer"}, - "rpm_limit": {"type": "Integer"}, - "budget_duration": {"type": "String"}, - "max_budget": {"type": "Float"}, - "soft_budget": {"type": "Float"}, - } - - return_val = [] - - for field_name, field_info in BudgetNew.model_fields.items(): - if field_name in allowed_args: - - _stored_in_db = True - - _response_obj = ConfigList( - field_name=field_name, - field_type=allowed_args[field_name]["type"], - field_description=field_info.description or "", - field_value=db_budget_row_dict.get(field_name, None), - stored_in_db=_stored_in_db, - field_default_value=field_info.default, - ) - return_val.append(_response_obj) - - return return_val - - -@router.get( - "/budget/list", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def list_budget( - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """List all the created budgets in proxy db. Used on Admin UI.""" - if prisma_client is None: - raise HTTPException( - status_code=400, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - - if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: - raise HTTPException( - status_code=400, - detail={ - "error": "{}, your role={}".format( - CommonProxyErrors.not_allowed_access.value, - user_api_key_dict.user_role, - ) - }, - ) - - response = await prisma_client.db.litellm_budgettable.find_many() - - return response - - -@router.post( - "/budget/delete", - tags=["budget management"], - dependencies=[Depends(user_api_key_auth)], -) -async def delete_budget( - data: BudgetDeleteRequest, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """Delete budget""" - global prisma_client - - if prisma_client is None: - raise HTTPException( - status_code=500, - detail={"error": CommonProxyErrors.db_not_connected_error.value}, - ) - - if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN: - raise HTTPException( - status_code=400, - detail={ - "error": "{}, your role={}".format( - CommonProxyErrors.not_allowed_access.value, - user_api_key_dict.user_role, - ) - }, - ) - - response = await prisma_client.db.litellm_budgettable.delete( - where={"budget_id": data.id} - ) - - return response - - #### MODEL MANAGEMENT #### @@ -8856,3 +8627,4 @@ def cleanup_router_config_variables(): app.include_router(ui_crud_endpoints_router) app.include_router(openai_files_router) app.include_router(team_callback_router) +app.include_router(budget_management_router) diff --git a/litellm/proxy/spend_tracking/spend_management_endpoints.py b/litellm/proxy/spend_tracking/spend_management_endpoints.py index 6af8593bd7dc..81968f9e0a9b 100644 --- a/litellm/proxy/spend_tracking/spend_management_endpoints.py +++ b/litellm/proxy/spend_tracking/spend_management_endpoints.py @@ -2533,8 +2533,8 @@ async def provider_budgets() -> ProviderBudgetResponse: _provider ) provider_budget_response_object = ProviderBudgetResponseObject( - budget_limit=_budget_info.budget_limit, - time_period=_budget_info.time_period, + budget_limit=_budget_info.max_budget, + time_period=_budget_info.budget_duration, spend=_provider_spend, budget_reset_at=_provider_budget_ttl, ) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index c93652f60c94..896a04cd8dd9 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1018,6 +1018,19 @@ def on_backoff(details): print_verbose(f"Backing off... this was attempt #{details['tries']}") +def jsonify_object(data: dict) -> dict: + db_data = copy.deepcopy(data) + + for k, v in db_data.items(): + if isinstance(v, dict): + try: + db_data[k] = json.dumps(v) + except Exception: + # This avoids Prisma retrying this 5 times, and making 5 clients + db_data[k] = "failed-to-serialize-json" + return db_data + + class PrismaClient: user_list_transactons: dict = {} end_user_list_transactons: dict = {} @@ -1503,25 +1516,31 @@ async def get_data( # noqa: PLR0915 ) sql_query = f""" - SELECT - v.*, - t.spend AS team_spend, - t.max_budget AS team_max_budget, - t.tpm_limit AS team_tpm_limit, - t.rpm_limit AS team_rpm_limit, - t.models AS team_models, - t.metadata AS team_metadata, - t.blocked AS team_blocked, - t.team_alias AS team_alias, - t.metadata AS team_metadata, - t.members_with_roles AS team_members_with_roles, - tm.spend AS team_member_spend, - m.aliases as team_model_aliases - FROM "LiteLLM_VerificationToken" AS v - LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id - LEFT JOIN "LiteLLM_TeamMembership" AS tm ON v.team_id = tm.team_id AND tm.user_id = v.user_id - LEFT JOIN "LiteLLM_ModelTable" m ON t.model_id = m.id - WHERE v.token = '{token}' + SELECT + v.*, + t.spend AS team_spend, + t.max_budget AS team_max_budget, + t.tpm_limit AS team_tpm_limit, + t.rpm_limit AS team_rpm_limit, + t.models AS team_models, + t.metadata AS team_metadata, + t.blocked AS team_blocked, + t.team_alias AS team_alias, + t.metadata AS team_metadata, + t.members_with_roles AS team_members_with_roles, + tm.spend AS team_member_spend, + m.aliases AS team_model_aliases, + -- Added comma to separate b.* columns + b.max_budget AS litellm_budget_table_max_budget, + b.tpm_limit AS litellm_budget_table_tpm_limit, + b.rpm_limit AS litellm_budget_table_rpm_limit, + b.model_max_budget as litellm_budget_table_model_max_budget + FROM "LiteLLM_VerificationToken" AS v + LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id + LEFT JOIN "LiteLLM_TeamMembership" AS tm ON v.team_id = tm.team_id AND tm.user_id = v.user_id + LEFT JOIN "LiteLLM_ModelTable" m ON t.model_id = m.id + LEFT JOIN "LiteLLM_BudgetTable" AS b ON v.budget_id = b.budget_id + WHERE v.token = '{token}' """ print_verbose("sql_query being made={}".format(sql_query)) @@ -1634,6 +1653,7 @@ async def insert_data( # noqa: PLR0915 "create": {**db_data}, # type: ignore "update": {}, # don't do anything if it already exists }, + include={"litellm_budget_table": True}, ) verbose_proxy_logger.info("Data Inserted into Keys Table") return new_verification_token diff --git a/litellm/router.py b/litellm/router.py index 3cd1ef4c2f65..7aa2528504b4 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -98,7 +98,6 @@ CustomRoutingStrategyBase, Deployment, DeploymentTypedDict, - GenericBudgetConfigType, LiteLLM_Params, ModelGroupInfo, OptionalPreCallChecks, @@ -111,6 +110,7 @@ RoutingStrategy, ) from litellm.types.services import ServiceTypes +from litellm.types.utils import GenericBudgetConfigType from litellm.types.utils import ModelInfo as ModelMapInfo from litellm.types.utils import StandardLoggingPayload from litellm.utils import ( diff --git a/litellm/router_strategy/budget_limiter.py b/litellm/router_strategy/budget_limiter.py index 2d20e19a8e9b..920f6c0881ee 100644 --- a/litellm/router_strategy/budget_limiter.py +++ b/litellm/router_strategy/budget_limiter.py @@ -33,14 +33,10 @@ _get_prometheus_logger_from_callbacks, ) from litellm.types.llms.openai import AllMessageValues -from litellm.types.router import ( - DeploymentTypedDict, - GenericBudgetConfigType, - GenericBudgetInfo, - LiteLLM_Params, - RouterErrors, -) -from litellm.types.utils import BudgetConfig, StandardLoggingPayload +from litellm.types.router import DeploymentTypedDict, LiteLLM_Params, RouterErrors +from litellm.types.utils import BudgetConfig +from litellm.types.utils import BudgetConfig as GenericBudgetInfo +from litellm.types.utils import GenericBudgetConfigType, StandardLoggingPayload DEFAULT_REDIS_SYNC_INTERVAL = 1 @@ -170,17 +166,19 @@ def _filter_out_deployments_above_budget( provider = self._get_llm_provider_for_deployment(deployment) if provider in provider_configs: config = provider_configs[provider] + if config.max_budget is None: + continue current_spend = spend_map.get( - f"provider_spend:{provider}:{config.time_period}", 0.0 + f"provider_spend:{provider}:{config.budget_duration}", 0.0 ) self._track_provider_remaining_budget_prometheus( provider=provider, spend=current_spend, - budget_limit=config.budget_limit, + budget_limit=config.max_budget, ) - if current_spend >= config.budget_limit: - debug_msg = f"Exceeded budget for provider {provider}: {current_spend} >= {config.budget_limit}" + if config.max_budget and current_spend >= config.max_budget: + debug_msg = f"Exceeded budget for provider {provider}: {current_spend} >= {config.max_budget}" deployment_above_budget_info += f"{debug_msg}\n" is_within_budget = False continue @@ -194,30 +192,32 @@ def _filter_out_deployments_above_budget( if model_id in deployment_configs: config = deployment_configs[model_id] current_spend = spend_map.get( - f"deployment_spend:{model_id}:{config.time_period}", 0.0 + f"deployment_spend:{model_id}:{config.budget_duration}", 0.0 ) - if current_spend >= config.budget_limit: - debug_msg = f"Exceeded budget for deployment model_name: {_model_name}, litellm_params.model: {_litellm_model_name}, model_id: {model_id}: {current_spend} >= {config.budget_limit}" + if config.max_budget and current_spend >= config.max_budget: + debug_msg = f"Exceeded budget for deployment model_name: {_model_name}, litellm_params.model: {_litellm_model_name}, model_id: {model_id}: {current_spend} >= {config.budget_duration}" verbose_router_logger.debug(debug_msg) deployment_above_budget_info += f"{debug_msg}\n" is_within_budget = False continue - # Check tag budget if self.tag_budget_config and is_within_budget: for _tag in request_tags: _tag_budget_config = self._get_budget_config_for_tag(_tag) if _tag_budget_config: _tag_spend = spend_map.get( - f"tag_spend:{_tag}:{_tag_budget_config.time_period}", 0.0 + f"tag_spend:{_tag}:{_tag_budget_config.budget_duration}", + 0.0, ) - if _tag_spend >= _tag_budget_config.budget_limit: - debug_msg = f"Exceeded budget for tag='{_tag}', tag_spend={_tag_spend}, tag_budget_limit={_tag_budget_config.budget_limit}" + if ( + _tag_budget_config.max_budget + and _tag_spend >= _tag_budget_config.max_budget + ): + debug_msg = f"Exceeded budget for tag='{_tag}', tag_spend={_tag_spend}, tag_budget_limit={_tag_budget_config.max_budget}" verbose_router_logger.debug(debug_msg) deployment_above_budget_info += f"{debug_msg}\n" is_within_budget = False continue - if is_within_budget: potential_deployments.append(deployment) @@ -247,10 +247,13 @@ async def _async_get_cache_keys_for_router_budget_limiting( provider = self._get_llm_provider_for_deployment(deployment) if provider is not None: budget_config = self._get_budget_config_for_provider(provider) - if budget_config is not None: + if ( + budget_config is not None + and budget_config.budget_duration is not None + ): provider_configs[provider] = budget_config cache_keys.append( - f"provider_spend:{provider}:{budget_config.time_period}" + f"provider_spend:{provider}:{budget_config.budget_duration}" ) # Check deployment budgets @@ -261,7 +264,7 @@ async def _async_get_cache_keys_for_router_budget_limiting( if budget_config is not None: deployment_configs[model_id] = budget_config cache_keys.append( - f"deployment_spend:{model_id}:{budget_config.time_period}" + f"deployment_spend:{model_id}:{budget_config.budget_duration}" ) # Check tag budgets if self.tag_budget_config: @@ -272,7 +275,7 @@ async def _async_get_cache_keys_for_router_budget_limiting( _tag_budget_config = self._get_budget_config_for_tag(_tag) if _tag_budget_config: cache_keys.append( - f"tag_spend:{_tag}:{_tag_budget_config.time_period}" + f"tag_spend:{_tag}:{_tag_budget_config.budget_duration}" ) return cache_keys, provider_configs, deployment_configs @@ -365,7 +368,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti if budget_config: # increment spend for provider spend_key = ( - f"provider_spend:{custom_llm_provider}:{budget_config.time_period}" + f"provider_spend:{custom_llm_provider}:{budget_config.budget_duration}" ) start_time_key = f"provider_budget_start_time:{custom_llm_provider}" await self._increment_spend_for_key( @@ -378,9 +381,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti deployment_budget_config = self._get_budget_config_for_deployment(model_id) if deployment_budget_config: # increment spend for specific deployment id - deployment_spend_key = ( - f"deployment_spend:{model_id}:{deployment_budget_config.time_period}" - ) + deployment_spend_key = f"deployment_spend:{model_id}:{deployment_budget_config.budget_duration}" deployment_start_time_key = f"deployment_budget_start_time:{model_id}" await self._increment_spend_for_key( budget_config=deployment_budget_config, @@ -395,7 +396,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti _tag_budget_config = self._get_budget_config_for_tag(_tag) if _tag_budget_config: _tag_spend_key = ( - f"tag_spend:{_tag}:{_tag_budget_config.time_period}" + f"tag_spend:{_tag}:{_tag_budget_config.budget_duration}" ) _tag_start_time_key = f"tag_budget_start_time:{_tag}" await self._increment_spend_for_key( @@ -412,8 +413,11 @@ async def _increment_spend_for_key( start_time_key: str, response_cost: float, ): + if budget_config.budget_duration is None: + return + current_time = datetime.now(timezone.utc).timestamp() - ttl_seconds = duration_in_seconds(budget_config.time_period) + ttl_seconds = duration_in_seconds(budget_config.budget_duration) budget_start = await self._get_or_set_budget_start_time( start_time_key=start_time_key, @@ -529,21 +533,23 @@ async def _sync_in_memory_spend_with_redis(self): for provider, config in self.provider_budget_config.items(): if config is None: continue - cache_keys.append(f"provider_spend:{provider}:{config.time_period}") + cache_keys.append( + f"provider_spend:{provider}:{config.budget_duration}" + ) if self.deployment_budget_config is not None: for model_id, config in self.deployment_budget_config.items(): if config is None: continue cache_keys.append( - f"deployment_spend:{model_id}:{config.time_period}" + f"deployment_spend:{model_id}:{config.budget_duration}" ) if self.tag_budget_config is not None: for tag, config in self.tag_budget_config.items(): if config is None: continue - cache_keys.append(f"tag_spend:{tag}:{config.time_period}") + cache_keys.append(f"tag_spend:{tag}:{config.budget_duration}") # Batch fetch current spend values from Redis redis_values = await self.dual_cache.redis_cache.async_batch_get_cache( @@ -635,7 +641,7 @@ async def _get_current_provider_spend(self, provider: str) -> Optional[float]: if budget_config is None: return None - spend_key = f"provider_spend:{provider}:{budget_config.time_period}" + spend_key = f"provider_spend:{provider}:{budget_config.budget_duration}" if self.dual_cache.redis_cache: # use Redis as source of truth since that has spend across all instances @@ -652,7 +658,7 @@ async def _get_current_provider_budget_reset_at( if budget_config is None: return None - spend_key = f"provider_spend:{provider}:{budget_config.time_period}" + spend_key = f"provider_spend:{provider}:{budget_config.budget_duration}" if self.dual_cache.redis_cache: ttl_seconds = await self.dual_cache.redis_cache.async_get_ttl(spend_key) else: @@ -672,9 +678,13 @@ async def _init_provider_budget_in_cache( - provider_budget_start_time:{provider} - stores the start time of the budget window """ - spend_key = f"provider_spend:{provider}:{budget_config.time_period}" + + spend_key = f"provider_spend:{provider}:{budget_config.budget_duration}" start_time_key = f"provider_budget_start_time:{provider}" - ttl_seconds = duration_in_seconds(budget_config.time_period) + ttl_seconds: Optional[int] = None + if budget_config.budget_duration is not None: + ttl_seconds = duration_in_seconds(budget_config.budget_duration) + budget_start = await self.dual_cache.async_get_cache(start_time_key) if budget_start is None: budget_start = datetime.now(timezone.utc).timestamp() diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index e8cda0aae4a4..b63099f08185 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -30,6 +30,7 @@ ChatCompletionPredictionContentParam, ) from openai.types.embedding import Embedding as OpenAIEmbedding +from openai.types.fine_tuning.fine_tuning_job import FineTuningJob from pydantic import BaseModel, Field from typing_extensions import Dict, Required, TypedDict, override diff --git a/litellm/types/router.py b/litellm/types/router.py index e5d6511359eb..f1a1f444805f 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -11,8 +11,6 @@ from pydantic import BaseModel, ConfigDict, Field from typing_extensions import Required, TypedDict -from litellm.types.utils import GenericBudgetConfigType, GenericBudgetInfo - from ..exceptions import RateLimitError from .completion import CompletionRequest from .embedding import EmbeddingRequest diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 934b56e4867c..957ce3ff5b91 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1694,17 +1694,25 @@ class StandardKeyGenerationConfig(TypedDict, total=False): personal_key_generation: PersonalUIKeyGenerationConfig -class GenericBudgetInfo(BaseModel): - time_period: str # e.g., '1d', '30d' - budget_limit: float +class BudgetConfig(BaseModel): + max_budget: Optional[float] = None + budget_duration: Optional[str] = None + tpm_limit: Optional[int] = None + rpm_limit: Optional[int] = None + def __init__(self, **data: Any) -> None: + # Map time_period to budget_duration if present + if "time_period" in data: + data["budget_duration"] = data.pop("time_period") -GenericBudgetConfigType = Dict[str, GenericBudgetInfo] + # Map budget_limit to max_budget if present + if "budget_limit" in data: + data["max_budget"] = data.pop("budget_limit") + super().__init__(**data) -class BudgetConfig(BaseModel): - max_budget: float - budget_duration: str + +GenericBudgetConfigType = Dict[str, BudgetConfig] class LlmProviders(str, Enum): diff --git a/litellm/utils.py b/litellm/utils.py index e7d7f97ee07e..72f5f80933ad 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1234,6 +1234,7 @@ def _is_async_request( or kwargs.get("arerank", False) is True or kwargs.get("_arealtime", False) is True or kwargs.get("acreate_batch", False) is True + or kwargs.get("acreate_fine_tuning_job", False) is True or is_pass_through is True ): return True diff --git a/tests/batches_tests/test_fine_tuning_api.py b/tests/batches_tests/test_fine_tuning_api.py index 893849b3b5e9..cc53f599faf3 100644 --- a/tests/batches_tests/test_fine_tuning_api.py +++ b/tests/batches_tests/test_fine_tuning_api.py @@ -1,7 +1,7 @@ import os import sys import traceback - +import json import pytest sys.path.insert( @@ -14,7 +14,7 @@ litellm.num_retries = 0 import asyncio import logging - +from typing import Optional import openai from test_openai_batches_and_files import load_vertex_ai_credentials @@ -24,10 +24,27 @@ FineTuningJobCreate, VertexFineTuningAPI, ) +from litellm.integrations.custom_logger import CustomLogger +from litellm.types.utils import StandardLoggingPayload vertex_finetune_api = VertexFineTuningAPI() +class TestCustomLogger(CustomLogger): + def __init__(self): + super().__init__() + self.standard_logging_object: Optional[StandardLoggingPayload] = None + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + print( + "Success event logged with kwargs=", + kwargs, + "and response_obj=", + response_obj, + ) + self.standard_logging_object = kwargs["standard_logging_object"] + + def test_create_fine_tune_job(): try: verbose_logger.setLevel(logging.DEBUG) @@ -89,6 +106,8 @@ def test_create_fine_tune_job(): @pytest.mark.asyncio async def test_create_fine_tune_jobs_async(): try: + custom_logger = TestCustomLogger() + litellm.callbacks = ["datadog", custom_logger] verbose_logger.setLevel(logging.DEBUG) file_name = "openai_batch_completions.jsonl" _current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -113,6 +132,16 @@ async def test_create_fine_tune_jobs_async(): assert create_fine_tuning_response.id is not None assert create_fine_tuning_response.model == "gpt-3.5-turbo-0125" + await asyncio.sleep(2) + _logged_standard_logging_object = custom_logger.standard_logging_object + assert _logged_standard_logging_object is not None + print( + "custom_logger.standard_logging_object=", + json.dumps(_logged_standard_logging_object, indent=4), + ) + assert _logged_standard_logging_object["model"] == "gpt-3.5-turbo-0125" + assert _logged_standard_logging_object["id"] == create_fine_tuning_response.id + # list fine tuning jobs print("listing ft jobs") ft_jobs = await litellm.alist_fine_tuning_jobs(limit=2) diff --git a/tests/documentation_tests/test_api_docs.py b/tests/documentation_tests/test_api_docs.py index edab7c1d3434..2faac371c391 100644 --- a/tests/documentation_tests/test_api_docs.py +++ b/tests/documentation_tests/test_api_docs.py @@ -172,6 +172,11 @@ def main(): "delete_organization", "list_organization", "user_update", + "new_budget", + "info_budget", + "update_budget", + "delete_budget", + "list_budget", ] # directory = "../../litellm/proxy/management_endpoints" # LOCAL directory = "./litellm/proxy/management_endpoints" diff --git a/tests/local_testing/test_router_budget_limiter.py b/tests/local_testing/test_router_budget_limiter.py index 8ca1f4e7677c..8d4948f8f99f 100644 --- a/tests/local_testing/test_router_budget_limiter.py +++ b/tests/local_testing/test_router_budget_limiter.py @@ -14,15 +14,13 @@ from litellm.router_strategy.budget_limiter import RouterBudgetLimiting from litellm.types.router import ( RoutingStrategy, - GenericBudgetConfigType, - GenericBudgetInfo, ) +from litellm.types.utils import GenericBudgetConfigType, BudgetConfig from litellm.caching.caching import DualCache, RedisCache import logging from litellm._logging import verbose_router_logger import litellm from datetime import timezone, timedelta -from litellm.types.utils import BudgetConfig verbose_router_logger.setLevel(logging.DEBUG) @@ -67,8 +65,8 @@ async def test_provider_budgets_e2e_test(): cleanup_redis() # Modify for test provider_budget_config: GenericBudgetConfigType = { - "openai": GenericBudgetInfo(time_period="1d", budget_limit=0.000000000001), - "azure": GenericBudgetInfo(time_period="1d", budget_limit=100), + "openai": BudgetConfig(time_period="1d", budget_limit=0.000000000001), + "azure": BudgetConfig(time_period="1d", budget_limit=100), } router = Router( @@ -215,8 +213,8 @@ async def test_get_budget_config_for_provider(): """ cleanup_redis() config = { - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100), - "anthropic": GenericBudgetInfo(time_period="7d", budget_limit=500), + "openai": BudgetConfig(budget_duration="1d", max_budget=100), + "anthropic": BudgetConfig(budget_duration="7d", max_budget=500), } provider_budget = RouterBudgetLimiting( @@ -226,13 +224,13 @@ async def test_get_budget_config_for_provider(): # Test existing providers openai_config = provider_budget._get_budget_config_for_provider("openai") assert openai_config is not None - assert openai_config.time_period == "1d" - assert openai_config.budget_limit == 100 + assert openai_config.budget_duration == "1d" + assert openai_config.max_budget == 100 anthropic_config = provider_budget._get_budget_config_for_provider("anthropic") assert anthropic_config is not None - assert anthropic_config.time_period == "7d" - assert anthropic_config.budget_limit == 500 + assert anthropic_config.budget_duration == "7d" + assert anthropic_config.max_budget == 500 # Test non-existent provider assert provider_budget._get_budget_config_for_provider("unknown") is None @@ -254,15 +252,15 @@ async def test_prometheus_metric_tracking(): provider_budget = RouterBudgetLimiting( dual_cache=DualCache(), provider_budget_config={ - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100) + "openai": BudgetConfig(budget_duration="1d", max_budget=100) }, ) litellm._async_success_callback = [mock_prometheus] provider_budget_config: GenericBudgetConfigType = { - "openai": GenericBudgetInfo(time_period="1d", budget_limit=0.000000000001), - "azure": GenericBudgetInfo(time_period="1d", budget_limit=100), + "openai": BudgetConfig(budget_duration="1d", max_budget=0.000000000001), + "azure": BudgetConfig(budget_duration="1d", max_budget=100), } router = Router( @@ -442,8 +440,8 @@ async def test_sync_in_memory_spend_with_redis(): """ cleanup_redis() provider_budget_config = { - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100), - "anthropic": GenericBudgetInfo(time_period="1d", budget_limit=200), + "openai": BudgetConfig(time_period="1d", budget_limit=100), + "anthropic": BudgetConfig(time_period="1d", budget_limit=200), } provider_budget = RouterBudgetLimiting( @@ -497,7 +495,7 @@ async def test_get_current_provider_spend(): provider_budget = RouterBudgetLimiting( dual_cache=DualCache(), provider_budget_config={ - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100), + "openai": BudgetConfig(time_period="1d", budget_limit=100), }, ) @@ -538,8 +536,8 @@ async def test_get_current_provider_budget_reset_at(): ) ), provider_budget_config={ - "openai": GenericBudgetInfo(time_period="1d", budget_limit=100), - "vertex_ai": GenericBudgetInfo(time_period="1h", budget_limit=100), + "openai": BudgetConfig(budget_duration="1d", max_budget=100), + "vertex_ai": BudgetConfig(budget_duration="1h", max_budget=100), }, ) diff --git a/tests/proxy_admin_ui_tests/test_key_management.py b/tests/proxy_admin_ui_tests/test_key_management.py index 9104d7b9a778..2314d67c6181 100644 --- a/tests/proxy_admin_ui_tests/test_key_management.py +++ b/tests/proxy_admin_ui_tests/test_key_management.py @@ -777,3 +777,68 @@ async def test_user_info_as_proxy_admin(prisma_client): assert user_info_response.keys is not None assert len(user_info_response.keys) > 0, "Expected at least one key in response" + + +@pytest.mark.asyncio +async def test_key_update_with_model_specific_params(prisma_client): + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + await litellm.proxy.proxy_server.prisma_client.connect() + + from litellm.proxy.management_endpoints.key_management_endpoints import ( + update_key_fn, + ) + from litellm.proxy._types import UpdateKeyRequest + + new_key = await generate_key_fn( + data=GenerateKeyRequest(models=["gpt-4"]), + user_api_key_dict=UserAPIKeyAuth( + user_role=LitellmUserRoles.PROXY_ADMIN, + api_key="sk-1234", + user_id="1234", + ), + ) + + generated_key = new_key.key + token_hash = new_key.token_id + print(generated_key) + + request = Request(scope={"type": "http"}) + request._url = URL(url="/update/key") + + args = { + "key_alias": f"test-key_{uuid.uuid4()}", + "duration": None, + "models": ["all-team-models"], + "spend": 0, + "max_budget": None, + "user_id": "default_user_id", + "team_id": None, + "max_parallel_requests": None, + "metadata": { + "model_tpm_limit": {"fake-openai-endpoint": 10}, + "model_rpm_limit": {"fake-openai-endpoint": 0}, + }, + "tpm_limit": None, + "rpm_limit": None, + "budget_duration": None, + "allowed_cache_controls": [], + "soft_budget": None, + "config": {}, + "permissions": {}, + "model_max_budget": {}, + "send_invite_email": None, + "model_rpm_limit": None, + "model_tpm_limit": None, + "guardrails": None, + "blocked": None, + "aliases": {}, + "key": token_hash, + "budget_id": None, + "key_name": "sk-...2GWA", + "expires": None, + "token_id": token_hash, + "litellm_budget_table": None, + "token": token_hash, + } + await update_key_fn(request=request, data=UpdateKeyRequest(**args)) diff --git a/tests/proxy_unit_tests/test_proxy_utils.py b/tests/proxy_unit_tests/test_proxy_utils.py index 5413bfce73ee..3b3da7b98201 100644 --- a/tests/proxy_unit_tests/test_proxy_utils.py +++ b/tests/proxy_unit_tests/test_proxy_utils.py @@ -1,6 +1,7 @@ import asyncio import os import sys +from typing import Any, Dict from unittest.mock import Mock from litellm.proxy.utils import _get_redoc_url, _get_docs_url import json @@ -1104,3 +1105,89 @@ def test_proxy_config_state_post_init_callback_call(): config = pc.get_config_state() assert config["litellm_settings"]["default_team_settings"][0]["team_id"] == "test" + + +@pytest.mark.parametrize( + "associated_budget_table, expected_user_api_key_auth_key, expected_user_api_key_auth_value", + [ + ( + { + "litellm_budget_table_max_budget": None, + "litellm_budget_table_tpm_limit": None, + "litellm_budget_table_rpm_limit": 1, + "litellm_budget_table_model_max_budget": None, + }, + "rpm_limit", + 1, + ), + ( + {}, + None, + None, + ), + ( + { + "litellm_budget_table_max_budget": None, + "litellm_budget_table_tpm_limit": None, + "litellm_budget_table_rpm_limit": None, + "litellm_budget_table_model_max_budget": {"gpt-4o": 100}, + }, + "model_max_budget", + {"gpt-4o": 100}, + ), + ], +) +def test_litellm_verification_token_view_response_with_budget_table( + associated_budget_table, + expected_user_api_key_auth_key, + expected_user_api_key_auth_value, +): + from litellm.proxy._types import LiteLLM_VerificationTokenView + + args: Dict[str, Any] = { + "token": "78b627d4d14bc3acf5571ae9cb6834e661bc8794d1209318677387add7621ce1", + "key_name": "sk-...if_g", + "key_alias": None, + "soft_budget_cooldown": False, + "spend": 0.011441999999999997, + "expires": None, + "models": [], + "aliases": {}, + "config": {}, + "user_id": None, + "team_id": "test", + "permissions": {}, + "max_parallel_requests": None, + "metadata": {}, + "blocked": None, + "tpm_limit": None, + "rpm_limit": None, + "max_budget": None, + "budget_duration": None, + "budget_reset_at": None, + "allowed_cache_controls": [], + "model_spend": {}, + "model_max_budget": {}, + "budget_id": "my-test-tier", + "created_at": "2024-12-26T02:28:52.615+00:00", + "updated_at": "2024-12-26T03:01:51.159+00:00", + "team_spend": 0.012134999999999998, + "team_max_budget": None, + "team_tpm_limit": None, + "team_rpm_limit": None, + "team_models": [], + "team_metadata": {}, + "team_blocked": False, + "team_alias": None, + "team_members_with_roles": [{"role": "admin", "user_id": "default_user_id"}], + "team_member_spend": None, + "team_model_aliases": None, + "team_member": None, + **associated_budget_table, + } + resp = LiteLLM_VerificationTokenView(**args) + if expected_user_api_key_auth_key is not None: + assert ( + getattr(resp, expected_user_api_key_auth_key) + == expected_user_api_key_auth_value + ) diff --git a/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py b/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py index 82d85cf2f7db..fc8373a17468 100644 --- a/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py +++ b/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py @@ -13,7 +13,7 @@ import litellm import json -from litellm.types.utils import GenericBudgetInfo +from litellm.types.utils import BudgetConfig as GenericBudgetInfo import os import sys from datetime import datetime @@ -56,13 +56,13 @@ def test_get_request_model_budget_config(budget_limiter): config = budget_limiter._get_request_model_budget_config( model="gpt-4", internal_model_max_budget=internal_budget ) - assert config.budget_limit == 100.0 + assert config.max_budget == 100.0 # Test model with provider config = budget_limiter._get_request_model_budget_config( model="openai/gpt-4", internal_model_max_budget=internal_budget ) - assert config.budget_limit == 100.0 + assert config.max_budget == 100.0 # Test non-existent model config = budget_limiter._get_request_model_budget_config(