From 25ddacb7912b89483a6ac775d7e32170e3d08831 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 16:23:34 -0800 Subject: [PATCH 01/22] fix _select_model_name_for_cost_calc docstring --- litellm/cost_calculator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 6f176e12c380..45bd24aab3cc 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -373,8 +373,7 @@ def _select_model_name_for_cost_calc( 1. If custom pricing is true, return received model name 2. If base_model is set (e.g. for azure models), return that 3. If completion response has model set return that - 4. Check if compl - 4. If model is passed in return that + 4. Check if model is passed in return that """ return_model: Optional[str] = None region_name: Optional[str] = None From 44039182284eede08a363f31c1a220a71087510c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 17:40:36 -0800 Subject: [PATCH 02/22] add keepttl to in memory cache --- litellm/caching/in_memory_cache.py | 35 +++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index 9fca96922636..525e93e7b71d 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -57,13 +57,25 @@ def evict_cache(self): # One of the most common causes of memory leaks in Python is the retention of objects that are no longer being used. # This can occur when an object is referenced by another object, but the reference is never removed. - def set_cache(self, key, value, **kwargs): + def set_cache(self, key, value, keepttl: bool = False, **kwargs): + """ + Set cache value + + Args: + key: str + value: Any + keepttl: bool. if True, retain the time to live associated with the key. (keepttl is a Redis parameter. we use the same parameter name as Redis) + **kwargs: + """ if len(self.cache_dict) >= self.max_size_in_memory: # only evict when cache is full self.evict_cache() self.cache_dict[key] = value - if "ttl" in kwargs and kwargs["ttl"] is not None: + + if keepttl and key in self.ttl_dict: + pass + elif "ttl" in kwargs and kwargs["ttl"] is not None: self.ttl_dict[key] = time.time() + kwargs["ttl"] else: self.ttl_dict[key] = time.time() + self.default_ttl @@ -71,12 +83,25 @@ def set_cache(self, key, value, **kwargs): async def async_set_cache(self, key, value, **kwargs): self.set_cache(key=key, value=value, **kwargs) - async def async_set_cache_pipeline(self, cache_list, ttl=None, **kwargs): + async def async_set_cache_pipeline( + self, cache_list, ttl: Optional[float] = None, keepttl: bool = False, **kwargs + ): + """ + Use in-memory cache for bulk write operations + + Args: + cache_list: List[Tuple[Any, Any]] + ttl: Optional[float] = None + keepttl: bool = False. if True, retain the time to live associated with the key. (keepttl is a Redis parameter. we use the same parameter name as Redis) + **kwargs: + """ for cache_key, cache_value in cache_list: if ttl is not None: - self.set_cache(key=cache_key, value=cache_value, ttl=ttl) + self.set_cache( + key=cache_key, value=cache_value, ttl=ttl, keepttl=keepttl + ) else: - self.set_cache(key=cache_key, value=cache_value) + self.set_cache(key=cache_key, value=cache_value, keepttl=keepttl) async def async_set_cache_sadd(self, key, value: List, ttl: Optional[float]): """ From c029d91b14577fbb425fefdf10275f0ee61bd55c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 17:41:27 -0800 Subject: [PATCH 03/22] add keepttl to redis and dual cache --- litellm/caching/dual_cache.py | 31 +++++++++++++++++++++++++------ litellm/caching/redis_cache.py | 24 ++++++++++++++++++++---- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/litellm/caching/dual_cache.py b/litellm/caching/dual_cache.py index f4abc6f15392..0e54774f0bf4 100644 --- a/litellm/caching/dual_cache.py +++ b/litellm/caching/dual_cache.py @@ -299,16 +299,22 @@ async def async_batch_get_cache( except Exception: verbose_logger.error(traceback.format_exc()) - async def async_set_cache(self, key, value, local_only: bool = False, **kwargs): + async def async_set_cache( + self, key, value, local_only: bool = False, keepttl: bool = False, **kwargs + ): print_verbose( f"async set cache: cache key: {key}; local_only: {local_only}; value: {value}" ) try: if self.in_memory_cache is not None: - await self.in_memory_cache.async_set_cache(key, value, **kwargs) + await self.in_memory_cache.async_set_cache( + key, value, keepttl=keepttl, **kwargs + ) if self.redis_cache is not None and local_only is False: - await self.redis_cache.async_set_cache(key, value, **kwargs) + await self.redis_cache.async_set_cache( + key, value, keepttl=keepttl, **kwargs + ) except Exception as e: verbose_logger.exception( f"LiteLLM Cache: Excepton async add_cache: {str(e)}" @@ -316,10 +322,20 @@ async def async_set_cache(self, key, value, local_only: bool = False, **kwargs): # async_batch_set_cache async def async_set_cache_pipeline( - self, cache_list: list, local_only: bool = False, **kwargs + self, + cache_list: list, + local_only: bool = False, + keepttl: bool = False, + **kwargs, ): """ Batch write values to the cache + + Args: + cache_list: list + local_only: bool = False + keepttl: bool. if True, retain the time to live associated with the key. (keepttl is a Redis parameter. we use the same parameter name as Redis) + **kwargs: """ print_verbose( f"async batch set cache: cache keys: {cache_list}; local_only: {local_only}" @@ -327,12 +343,15 @@ async def async_set_cache_pipeline( try: if self.in_memory_cache is not None: await self.in_memory_cache.async_set_cache_pipeline( - cache_list=cache_list, **kwargs + cache_list=cache_list, keepttl=keepttl, **kwargs ) if self.redis_cache is not None and local_only is False: await self.redis_cache.async_set_cache_pipeline( - cache_list=cache_list, ttl=kwargs.pop("ttl", None), **kwargs + cache_list=cache_list, + ttl=kwargs.pop("ttl", None), + keepttl=keepttl, + **kwargs, ) except Exception as e: verbose_logger.exception( diff --git a/litellm/caching/redis_cache.py b/litellm/caching/redis_cache.py index fa0002fe6231..055a4407dad4 100644 --- a/litellm/caching/redis_cache.py +++ b/litellm/caching/redis_cache.py @@ -348,7 +348,11 @@ async def async_set_cache(self, key, value, **kwargs): ) async def _pipeline_helper( - self, pipe: pipeline, cache_list: List[Tuple[Any, Any]], ttl: Optional[float] + self, + pipe: pipeline, + cache_list: List[Tuple[Any, Any]], + ttl: Optional[float], + keepttl: bool = False, ) -> List: ttl = self.get_ttl(ttl=ttl) # Iterate through each key-value pair in the cache_list and set them in the pipeline. @@ -362,16 +366,26 @@ async def _pipeline_helper( _td: Optional[timedelta] = None if ttl is not None: _td = timedelta(seconds=ttl) - pipe.set(cache_key, json_cache_value, ex=_td) + pipe.set(cache_key, json_cache_value, ex=_td, keepttl=keepttl) # Execute the pipeline and return the results. results = await pipe.execute() return results async def async_set_cache_pipeline( - self, cache_list: List[Tuple[Any, Any]], ttl: Optional[float] = None, **kwargs + self, + cache_list: List[Tuple[Any, Any]], + ttl: Optional[float] = None, + keepttl: bool = False, + **kwargs, ): """ Use Redis Pipelines for bulk write operations + + Args: + cache_list: List[Tuple[Any, Any]] + ttl: Optional[float] = None + keepttl: bool. Redis parameter. If True, retain the time to live associated with the key. + **kwargs: """ # don't waste a network request if there's nothing to set if len(cache_list) == 0: @@ -388,7 +402,9 @@ async def async_set_cache_pipeline( try: async with _redis_client as redis_client: async with redis_client.pipeline(transaction=True) as pipe: - results = await self._pipeline_helper(pipe, cache_list, ttl) + results = await self._pipeline_helper( + pipe=pipe, cache_list=cache_list, ttl=ttl, keepttl=keepttl + ) print_verbose(f"pipeline results: {results}") # Optionally, you could process 'results' to make sure that all set operations were successful. From f8625156281b3c796736d49944b037c06ed9a273 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 17:42:40 -0800 Subject: [PATCH 04/22] use keep ttl param for set pipeleine and auth check --- litellm/proxy/auth/auth_checks.py | 3 +-- litellm/proxy/proxy_server.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index 45e8c844c4f2..3c93ab485de2 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -493,7 +493,7 @@ async def _cache_management_object( user_api_key_cache: DualCache, proxy_logging_obj: Optional[ProxyLogging], ): - await user_api_key_cache.async_set_cache(key=key, value=value) + await user_api_key_cache.async_set_cache(key=key, value=value, keepttl=True) async def _cache_team_object( @@ -720,7 +720,6 @@ async def get_key_object( cached_key_obj: Optional[UserAPIKeyAuth] = await user_api_key_cache.async_get_cache( key=key ) - if cached_key_obj is not None: if isinstance(cached_key_obj, dict): return UserAPIKeyAuth(**cached_key_obj) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 4210d6035c55..9ac07ace33f9 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1247,8 +1247,8 @@ async def _update_team_cache(): asyncio.create_task( user_api_key_cache.async_set_cache_pipeline( cache_list=values_to_update_in_cache, - ttl=60, litellm_parent_otel_span=parent_otel_span, + keepttl=True, ) ) From fc022ff24ed4533b4d63212de571e8d67753c657 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:09:06 -0800 Subject: [PATCH 05/22] use keep ttl=True for _cache_management_object --- litellm/caching/dual_cache.py | 4 +++- litellm/caching/in_memory_cache.py | 13 +++++++++---- litellm/proxy/auth/auth_checks.py | 7 ++++++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/litellm/caching/dual_cache.py b/litellm/caching/dual_cache.py index 0e54774f0bf4..ceabef1ecc17 100644 --- a/litellm/caching/dual_cache.py +++ b/litellm/caching/dual_cache.py @@ -61,7 +61,9 @@ def __init__( ) -> None: super().__init__() # If in_memory_cache is not provided, use the default InMemoryCache - self.in_memory_cache = in_memory_cache or InMemoryCache() + self.in_memory_cache = in_memory_cache or InMemoryCache( + default_ttl=default_in_memory_ttl + ) # If redis_cache is not provided, use the default RedisCache self.redis_cache = redis_cache self.last_redis_batch_access_time = LimitedSizeOrderedDict( diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index 525e93e7b71d..7e871edb9d8a 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -12,24 +12,29 @@ import time from typing import List, Optional +import litellm + from .base_cache import BaseCache +IN_MEMORY_CACHE_DEFAULT_TTL = 600 +IN_MEMORY_CACHE_MAX_SIZE = 200 + class InMemoryCache(BaseCache): def __init__( self, max_size_in_memory: Optional[int] = 200, default_ttl: Optional[ - int - ] = 600, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute + float + ] = None, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute ): """ max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default """ self.max_size_in_memory = ( - max_size_in_memory or 200 + max_size_in_memory or IN_MEMORY_CACHE_MAX_SIZE ) # set an upper bound of 200 items in-memory - self.default_ttl = default_ttl or 600 + self.default_ttl = default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL # in-memory cache self.cache_dict: dict = {} diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index 3c93ab485de2..3d3343c60cb9 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -493,7 +493,11 @@ async def _cache_management_object( user_api_key_cache: DualCache, proxy_logging_obj: Optional[ProxyLogging], ): - await user_api_key_cache.async_set_cache(key=key, value=value, keepttl=True) + await user_api_key_cache.async_set_cache( + key=key, + value=value, + keepttl=True, + ) async def _cache_team_object( @@ -720,6 +724,7 @@ async def get_key_object( cached_key_obj: Optional[UserAPIKeyAuth] = await user_api_key_cache.async_get_cache( key=key ) + if cached_key_obj is not None: if isinstance(cached_key_obj, dict): return UserAPIKeyAuth(**cached_key_obj) From 6695dd028bedda5deef8f5dd9ee044fc5d764635 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:16:06 -0800 Subject: [PATCH 06/22] fix linting --- litellm/caching/in_memory_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index 7e871edb9d8a..1b6d8cfb02de 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -16,7 +16,7 @@ from .base_cache import BaseCache -IN_MEMORY_CACHE_DEFAULT_TTL = 600 +IN_MEMORY_CACHE_DEFAULT_TTL = 600.0 IN_MEMORY_CACHE_MAX_SIZE = 200 From 24133d5b2e2c5a7596889fe43004a6b1e5397e31 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:16:28 -0800 Subject: [PATCH 07/22] fix in memory cache --- litellm/caching/in_memory_cache.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index 1b6d8cfb02de..aa40c4a4333d 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -12,8 +12,6 @@ import time from typing import List, Optional -import litellm - from .base_cache import BaseCache IN_MEMORY_CACHE_DEFAULT_TTL = 600.0 From 536f91d8be738ad8170096383e74201305601bf5 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:44:02 -0800 Subject: [PATCH 08/22] add e2e rpm setting on keys --- .../example_config_yaml/otel_test_config.yaml | 5 + litellm/proxy/proxy_config.yaml | 15 +-- .../test_multi_instance_test.py | 123 ++++++++++++++++++ 3 files changed, 132 insertions(+), 11 deletions(-) create mode 100644 tests/multi_instance_tests/test_multi_instance_test.py diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml index fae3ee3daec0..da87c17e4bdb 100644 --- a/litellm/proxy/example_config_yaml/otel_test_config.yaml +++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml @@ -31,6 +31,11 @@ model_list: api_key: fake-key model_info: supports_vision: True + - model_name: fake-openai-endpoint-all-users + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 1a90d9090ed3..fe8452edebdf 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,14 +1,7 @@ model_list: - - model_name: gpt-4o + - model_name: fake-openai-endpoint-all-users litellm_params: - model: openai/gpt-4o - api_key: os.environ/OPENAI_API_KEY + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ -litellm_settings: - tag_budget_config: - product:chat-bot: # (Tag) - max_budget: 0.000000000001 # (USD) - budget_duration: 1d # (Duration) - product:chat-bot-2: # (Tag) - max_budget: 100 # (USD) - budget_duration: 1d # (Duration) \ No newline at end of file diff --git a/tests/multi_instance_tests/test_multi_instance_test.py b/tests/multi_instance_tests/test_multi_instance_test.py new file mode 100644 index 000000000000..e264cbc35784 --- /dev/null +++ b/tests/multi_instance_tests/test_multi_instance_test.py @@ -0,0 +1,123 @@ +import pytest +import asyncio +import aiohttp +from typing import Optional, List, Union + + +async def generate_key(session, rpm_limit: int, port: int = 4000): + url = f"http://0.0.0.0:{port}/key/generate" + headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} + data = {"rpm_limit": rpm_limit} + + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + print(response_text) + print() + + if status != 200: + raise Exception(f"Request did not return a 200 status code: {status}") + + return await response.json() + + +async def update_key(session, key: str, rpm_limit: int, port: int = 4000): + url = f"http://0.0.0.0:{port}/key/update" + headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} + data = {"key": key, "rpm_limit": rpm_limit} + + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + print(response_text) + print() + + if status != 200: + raise Exception(f"Request did not return a 200 status code: {status}") + + return await response.json() + + +async def chat_completion(session, key: str, port: int = 4000): + url = f"http://0.0.0.0:{port}/chat/completions" + headers = { + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + } + data = { + "model": "fake-openai-endpoint-all-users", + "messages": [{"role": "user", "content": "Hello!"}], + } + + async with session.post(url, headers=headers, json=data) as response: + return response.status + + +async def simulated_traffic_for_key(session, key: str, port: int = 4000): + print(f"simulating traffic for key {key} on port {port}") + for i in range(100): + print(f"simulating traffic - chat completion number {i}, port {port}") + await chat_completion(session, key, port=port) + await asyncio.sleep(0.1) + + +@pytest.mark.asyncio +async def test_multi_instance_key_management(): + """ + Test key management across multiple LiteLLM instances: + 1. Create key on instance 1 (port 4000) with RPM=1 + 2. Test key on instance 2 (port 4001) - expect only 1/5 requests to succeed + 3. Update key on instance 1 to RPM=1000 + 4. Test key on instance 2 - expect all 5 requests to succeed + """ + async with aiohttp.ClientSession() as session: + # Create key on instance 1 with RPM=1 + key_response = await generate_key(session, rpm_limit=1, port=4000) + test_key = key_response["key"] + print("created key with rpm limit 1") + + # Test key on instance 2 with 5 requests + print("running 5 requests on instance 2. Expecting 1 to succeed") + statuses = [] + for _ in range(5): + status = await chat_completion(session, test_key, port=4001) + statuses.append(status) + await asyncio.sleep(0.1) # Small delay between requests + + # Expect only first request to succeed + success_count = sum(1 for status in statuses if status == 200) + print( + f"statuses count of 5 /chat/completions: {statuses}. EXPECTED 1 SUCCESS (200)" + ) + assert success_count == 1, f"Expected 1 successful request, got {success_count}" + + # Update key on instance 1 to RPM=1000 + await update_key(session, test_key, rpm_limit=1000, port=4000) + print("updated key on instance 1 to rpm limit 1000") + print("simulating /chat/completion straffic for 60 seconds") + + # create task to simulate traffic for key + asyncio.create_task(simulated_traffic_for_key(session, test_key, port=4000)) + asyncio.create_task(simulated_traffic_for_key(session, test_key, port=4001)) + + # wait for 60 seconds for traffic to propagate + await asyncio.sleep(60) # Wait for key update to propagate + + print("\n\n Done simulating traffic for 60 seconds \n\n") + print("\n\n Now testing if key has new rpm_limit=1000 \n\n") + print("running 5 requests on instance 2. Expecting 5 to succeed") + # Test key again on instance 2 with 5 requests + statuses = [] + for _ in range(5): + status = await chat_completion(session, test_key, port=4001) + statuses.append(status) + await asyncio.sleep(0.1) + print(f"status of 5 /chat/completions: {statuses}. Expecting 200 for all 5") + + # Expect all requests to succeed + success_count = sum(1 for status in statuses if status == 200) + assert ( + success_count == 5 + ), f"Expected 5 successful requests, got {success_count}" From 62b3e19d492d6eb833c62a4dbcb6c6d2d9403ddb Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:47:26 -0800 Subject: [PATCH 09/22] edit ci/cd --- .circleci/config.yml | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c31a07a22b00..a7f492c89fc4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -975,7 +975,7 @@ jobs: # Store test results - store_test_results: path: test-results - proxy_logging_guardrails_model_info_tests: + proxy_e2e_tests_advanced: machine: image: ubuntu-2204:2023.10.1 resource_class: xlarge @@ -1087,6 +1087,47 @@ jobs: python -m pytest -vv tests/otel_tests -x --junitxml=test-results/junit.xml --durations=5 no_output_timeout: 120m # Clean up first container + - run: + name: Run Docker container + # intentionally give bad redis credentials here + # the OTEL test - should get this as a trace + command: | + docker run -d \ + -p 4000:4000 \ + -e DATABASE_URL=$PROXY_DATABASE_URL \ + -e REDIS_HOST=$REDIS_HOST \ + -e REDIS_PASSWORD=$REDIS_PASSWORD \ + -e REDIS_PORT=$REDIS_PORT \ + -e LITELLM_MASTER_KEY="sk-1234" \ + -e OPENAI_API_KEY=$OPENAI_API_KEY \ + -e LITELLM_LICENSE=$LITELLM_LICENSE \ + -e OTEL_EXPORTER="in_memory" \ + -e APORIA_API_BASE_2=$APORIA_API_BASE_2 \ + -e APORIA_API_KEY_2=$APORIA_API_KEY_2 \ + -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e AWS_REGION_NAME=$AWS_REGION_NAME \ + -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \ + -e COHERE_API_KEY=$COHERE_API_KEY \ + -e GCS_FLUSH_INTERVAL="1" \ + --name my-app \ + -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \ + -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \ + my-app:latest \ + --config /app/config.yaml \ + --port 4001 \ + --detailed_debug \ + - run: + name: Wait for app to be ready + command: dockerize -wait http://localhost:4001 -timeout 5m + - run: + name: Run tests + command: | + pwd + ls + python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5 + no_output_timeout: 120m - run: name: Stop and remove first container command: | From d252dd60ede02fbf86589200af819d89cbdce754 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:49:41 -0800 Subject: [PATCH 10/22] fix testing --- .circleci/config.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a7f492c89fc4..7278beefe7c6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1093,7 +1093,7 @@ jobs: # the OTEL test - should get this as a trace command: | docker run -d \ - -p 4000:4000 \ + -p 4001:4001 \ -e DATABASE_URL=$PROXY_DATABASE_URL \ -e REDIS_HOST=$REDIS_HOST \ -e REDIS_PASSWORD=$REDIS_PASSWORD \ @@ -1111,10 +1111,10 @@ jobs: -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \ -e COHERE_API_KEY=$COHERE_API_KEY \ -e GCS_FLUSH_INTERVAL="1" \ - --name my-app \ + --name my-app-2 \ -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \ -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \ - my-app:latest \ + my-app-2:latest \ --config /app/config.yaml \ --port 4001 \ --detailed_debug \ @@ -1133,6 +1133,8 @@ jobs: command: | docker stop my-app docker rm my-app + docker stop my-app-2 + docker rm my-app-2 # Second Docker Container Run with Different Config # NOTE: We intentionally pass a "bad" license here. We need to ensure proxy starts and serves request even with bad license From abae707fbba82a858c4342568a08b8eb009a0c15 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:50:34 -0800 Subject: [PATCH 11/22] fix proxy_logging_guardrails_model_info_tests --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7278beefe7c6..3ce3170805c5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -975,7 +975,7 @@ jobs: # Store test results - store_test_results: path: test-results - proxy_e2e_tests_advanced: + proxy_logging_guardrails_model_info_tests: machine: image: ubuntu-2204:2023.10.1 resource_class: xlarge From c372a4695704e39e48fa44d8f38cb8cc0d6b308a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:53:57 -0800 Subject: [PATCH 12/22] test_redis_caching_ttl_pipeline --- tests/local_testing/test_caching.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py index 18f7700c7529..c496c2bbc883 100644 --- a/tests/local_testing/test_caching.py +++ b/tests/local_testing/test_caching.py @@ -2382,8 +2382,12 @@ async def test_redis_caching_ttl_pipeline(): # Verify that the set method was called on the mock Redis instance mock_set.assert_has_calls( [ - call.set("test_key1", '"test_value1"', ex=expected_timedelta), - call.set("test_key2", '"test_value2"', ex=expected_timedelta), + call.set( + "test_key1", '"test_value1"', ex=expected_timedelta, keepttl=False + ), + call.set( + "test_key2", '"test_value2"', ex=expected_timedelta, keepttl=False + ), ] ) From 383e90f67d4ed804e7a7ddaf4b397d5434cd290b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:56:10 -0800 Subject: [PATCH 13/22] config.yaml updates ci/cd --- .circleci/config.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3ce3170805c5..8bebc1c19ab3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1088,7 +1088,7 @@ jobs: no_output_timeout: 120m # Clean up first container - run: - name: Run Docker container + name: Run Docker container (Instance 2 Port 4001) # intentionally give bad redis credentials here # the OTEL test - should get this as a trace command: | @@ -1119,17 +1119,17 @@ jobs: --port 4001 \ --detailed_debug \ - run: - name: Wait for app to be ready + name: Wait for app to be ready (instance 2 port 4001) command: dockerize -wait http://localhost:4001 -timeout 5m - run: - name: Run tests + name: Run Multi Instance Tests command: | pwd ls python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5 no_output_timeout: 120m - run: - name: Stop and remove first container + name: Stop and remove first container (Cleanup) command: | docker stop my-app docker rm my-app @@ -1139,7 +1139,7 @@ jobs: # Second Docker Container Run with Different Config # NOTE: We intentionally pass a "bad" license here. We need to ensure proxy starts and serves request even with bad license - run: - name: Run Second Docker container + name: Run 3rd Docker container (Restart docker container - ensure it starts) command: | docker run -d \ -p 4000:4000 \ From eca338b548e2f6d5e88d6e564ff20dd9ced5b5b0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 18:58:13 -0800 Subject: [PATCH 14/22] fix default ttl setting --- litellm/caching/in_memory_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index aa40c4a4333d..e780fca78ade 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -32,7 +32,7 @@ def __init__( self.max_size_in_memory = ( max_size_in_memory or IN_MEMORY_CACHE_MAX_SIZE ) # set an upper bound of 200 items in-memory - self.default_ttl = default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL + self.default_ttl: float = default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL # in-memory cache self.cache_dict: dict = {} From cf1325931355206676508341c3f91a7f6f1b3006 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 19:02:23 -0800 Subject: [PATCH 15/22] udpate config.yml --- .circleci/config.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8bebc1c19ab3..1ad39489f5e9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1034,7 +1034,7 @@ jobs: name: Build Docker image command: docker build -t my-app:latest -f ./docker/Dockerfile.database . - run: - name: Run Docker container + name: Run Docker container (Instance 1 port 4000) # intentionally give bad redis credentials here # the OTEL test - should get this as a trace command: | @@ -1065,7 +1065,7 @@ jobs: --port 4000 \ --detailed_debug \ - run: - name: Install curl and dockerize + name: Install curl and dockerize (Instance 1 port 4000) command: | sudo apt-get update sudo apt-get install -y curl @@ -1073,14 +1073,14 @@ jobs: sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz sudo rm dockerize-linux-amd64-v0.6.1.tar.gz - run: - name: Start outputting logs + name: Start outputting logs (Instance 1 port 4000) command: docker logs -f my-app background: true - run: - name: Wait for app to be ready + name: Wait for app to be ready (Instance 1 port 4000) command: dockerize -wait http://localhost:4000 -timeout 5m - run: - name: Run tests + name: Run tests (Instance 1 port 4000) command: | pwd ls From a39e386aeeaed54c347cd46a8cbeb089846275f1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 19:07:17 -0800 Subject: [PATCH 16/22] update config.yml --- .circleci/config.yml | 208 ++++++++++++++++++++++++++++++++----------- 1 file changed, 157 insertions(+), 51 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1ad39489f5e9..2aef17257362 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1034,7 +1034,7 @@ jobs: name: Build Docker image command: docker build -t my-app:latest -f ./docker/Dockerfile.database . - run: - name: Run Docker container (Instance 1 port 4000) + name: Run Docker container # intentionally give bad redis credentials here # the OTEL test - should get this as a trace command: | @@ -1065,7 +1065,7 @@ jobs: --port 4000 \ --detailed_debug \ - run: - name: Install curl and dockerize (Instance 1 port 4000) + name: Install curl and dockerize command: | sudo apt-get update sudo apt-get install -y curl @@ -1073,14 +1073,14 @@ jobs: sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz sudo rm dockerize-linux-amd64-v0.6.1.tar.gz - run: - name: Start outputting logs (Instance 1 port 4000) + name: Start outputting logs command: docker logs -f my-app background: true - run: - name: Wait for app to be ready (Instance 1 port 4000) + name: Wait for app to be ready command: dockerize -wait http://localhost:4000 -timeout 5m - run: - name: Run tests (Instance 1 port 4000) + name: Run tests command: | pwd ls @@ -1088,58 +1088,15 @@ jobs: no_output_timeout: 120m # Clean up first container - run: - name: Run Docker container (Instance 2 Port 4001) - # intentionally give bad redis credentials here - # the OTEL test - should get this as a trace - command: | - docker run -d \ - -p 4001:4001 \ - -e DATABASE_URL=$PROXY_DATABASE_URL \ - -e REDIS_HOST=$REDIS_HOST \ - -e REDIS_PASSWORD=$REDIS_PASSWORD \ - -e REDIS_PORT=$REDIS_PORT \ - -e LITELLM_MASTER_KEY="sk-1234" \ - -e OPENAI_API_KEY=$OPENAI_API_KEY \ - -e LITELLM_LICENSE=$LITELLM_LICENSE \ - -e OTEL_EXPORTER="in_memory" \ - -e APORIA_API_BASE_2=$APORIA_API_BASE_2 \ - -e APORIA_API_KEY_2=$APORIA_API_KEY_2 \ - -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \ - -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ - -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ - -e AWS_REGION_NAME=$AWS_REGION_NAME \ - -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \ - -e COHERE_API_KEY=$COHERE_API_KEY \ - -e GCS_FLUSH_INTERVAL="1" \ - --name my-app-2 \ - -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \ - -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \ - my-app-2:latest \ - --config /app/config.yaml \ - --port 4001 \ - --detailed_debug \ - - run: - name: Wait for app to be ready (instance 2 port 4001) - command: dockerize -wait http://localhost:4001 -timeout 5m - - run: - name: Run Multi Instance Tests - command: | - pwd - ls - python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5 - no_output_timeout: 120m - - run: - name: Stop and remove first container (Cleanup) + name: Stop and remove first container command: | docker stop my-app docker rm my-app - docker stop my-app-2 - docker rm my-app-2 # Second Docker Container Run with Different Config # NOTE: We intentionally pass a "bad" license here. We need to ensure proxy starts and serves request even with bad license - run: - name: Run 3rd Docker container (Restart docker container - ensure it starts) + name: Run Second Docker container command: | docker run -d \ -p 4000:4000 \ @@ -1295,6 +1252,154 @@ jobs: - store_test_results: path: test-results + multi_instance_proxy_tests: + machine: + image: ubuntu-2204:2023.10.1 + resource_class: xlarge + working_directory: ~/project + steps: + - checkout + - run: + name: Install Docker CLI + command: | + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io + - run: + name: Install Python 3.9 + command: | + curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda + export PATH="$HOME/miniconda/bin:$PATH" + conda init bash + source ~/.bashrc + conda create -n myenv python=3.9 -y + conda activate myenv + python --version + - run: + name: Install Python Dependencies + command: | + pip install "pytest==7.3.1" + pip install "pytest-asyncio==0.21.1" + pip install aiohttp + python -m pip install --upgrade pip + python -m pip install -r .circleci/requirements.txt + pip install "pytest==7.3.1" + pip install "pytest-retry==1.6.3" + pip install "pytest-mock==3.12.0" + pip install "pytest-asyncio==0.21.1" + pip install mypy + pip install "google-generativeai==0.3.2" + pip install "google-cloud-aiplatform==1.43.0" + pip install pyarrow + pip install "boto3==1.34.34" + pip install "aioboto3==12.3.0" + pip install langchain + pip install "langfuse>=2.0.0" + pip install "logfire==0.29.0" + pip install numpydoc + pip install prisma + pip install fastapi + pip install jsonschema + pip install "httpx==0.24.1" + pip install "gunicorn==21.2.0" + pip install "anyio==3.7.1" + pip install "aiodynamo==23.10.1" + pip install "asyncio==3.4.3" + pip install "PyGithub==1.59.1" + pip install "openai==1.54.0" + - run: + name: Build Docker image + command: docker build -t my-app:latest -f ./docker/Dockerfile.database . + - run: + name: Run First Proxy Instance (Port 4000) + command: | + docker run -d \ + -p 4000:4000 \ + -e DATABASE_URL=$PROXY_DATABASE_URL \ + -e REDIS_HOST=$REDIS_HOST \ + -e REDIS_PASSWORD=$REDIS_PASSWORD \ + -e REDIS_PORT=$REDIS_PORT \ + -e LITELLM_MASTER_KEY="sk-1234" \ + -e OPENAI_API_KEY=$OPENAI_API_KEY \ + -e LITELLM_LICENSE=$LITELLM_LICENSE \ + -e OTEL_EXPORTER="in_memory" \ + -e APORIA_API_BASE_2=$APORIA_API_BASE_2 \ + -e APORIA_API_KEY_2=$APORIA_API_KEY_2 \ + -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e AWS_REGION_NAME=$AWS_REGION_NAME \ + -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \ + -e COHERE_API_KEY=$COHERE_API_KEY \ + -e GCS_FLUSH_INTERVAL="1" \ + --name proxy-instance-1 \ + -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \ + -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \ + my-app:latest \ + --config /app/config.yaml \ + --port 4000 \ + --detailed_debug + - run: + name: Run Second Proxy Instance (Port 4001) + command: | + docker run -d \ + -p 4001:4001 \ + -e DATABASE_URL=$PROXY_DATABASE_URL \ + -e REDIS_HOST=$REDIS_HOST \ + -e REDIS_PASSWORD=$REDIS_PASSWORD \ + -e REDIS_PORT=$REDIS_PORT \ + -e LITELLM_MASTER_KEY="sk-1234" \ + -e OPENAI_API_KEY=$OPENAI_API_KEY \ + -e LITELLM_LICENSE=$LITELLM_LICENSE \ + -e OTEL_EXPORTER="in_memory" \ + -e APORIA_API_BASE_2=$APORIA_API_BASE_2 \ + -e APORIA_API_KEY_2=$APORIA_API_KEY_2 \ + -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \ + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ + -e AWS_REGION_NAME=$AWS_REGION_NAME \ + -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \ + -e COHERE_API_KEY=$COHERE_API_KEY \ + -e GCS_FLUSH_INTERVAL="1" \ + --name proxy-instance-2 \ + -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \ + -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \ + my-app:latest \ + --config /app/config.yaml \ + --port 4001 \ + --detailed_debug + - run: + name: Install curl and dockerize + command: | + sudo apt-get update + sudo apt-get install -y curl + sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz + sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz + sudo rm dockerize-linux-amd64-v0.6.1.tar.gz + - run: + name: Start outputting logs for first instance + command: docker logs -f proxy-instance-1 + background: true + - run: + name: Start outputting logs for second instance + command: docker logs -f proxy-instance-2 + background: true + - run: + name: Wait for both instances to be ready + command: | + dockerize -wait http://localhost:4000 -timeout 5m + dockerize -wait http://localhost:4001 -timeout 5m + - run: + name: Run Multi-Instance Tests + command: | + pwd + ls + python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5 + no_output_timeout: 120m + # Store test results + - store_test_results: + path: test-results + upload-coverage: docker: - image: cimg/python:3.9 @@ -1443,6 +1548,8 @@ jobs: pip install "httpx==0.24.1" pip install "anyio==3.7.1" pip install "asyncio==3.4.3" + pip install "PyGithub==1.59.1" + pip install "openai==1.54.0 " - run: name: Install Playwright Browsers command: | @@ -1688,4 +1795,3 @@ workflows: branches: only: - main - From 774d7049bf214c26ab7bb53a96ed90d41290f9c4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 19:08:21 -0800 Subject: [PATCH 17/22] update config.yml --- .circleci/config.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2aef17257362..199248d60f88 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1548,8 +1548,6 @@ jobs: pip install "httpx==0.24.1" pip install "anyio==3.7.1" pip install "asyncio==3.4.3" - pip install "PyGithub==1.59.1" - pip install "openai==1.54.0 " - run: name: Install Playwright Browsers command: | From caec8a1104e9286b0099a5ba822870880faf45fc Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 19:09:26 -0800 Subject: [PATCH 18/22] update config.yml --- .circleci/config.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 199248d60f88..f709bd3b7c9d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1730,6 +1730,12 @@ workflows: only: - main - /litellm_.*/ + - multi_instance_proxy_tests: + filters: + branches: + only: + - main + - /litellm_.*/ - upload-coverage: requires: - llm_translation_testing @@ -1771,6 +1777,7 @@ workflows: requires: - local_testing - build_and_test + - multi_instance_proxy_tests - load_testing - test_bad_database_url - llm_translation_testing From ce0c8135303752422334b62fe210974201ddfe66 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 19:15:56 -0800 Subject: [PATCH 19/22] fix linting warnings --- litellm/caching/in_memory_cache.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index e780fca78ade..c29990a05cac 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -10,12 +10,12 @@ import json import time -from typing import List, Optional +from typing import List, Optional, Union from .base_cache import BaseCache -IN_MEMORY_CACHE_DEFAULT_TTL = 600.0 -IN_MEMORY_CACHE_MAX_SIZE = 200 +IN_MEMORY_CACHE_DEFAULT_TTL: int = 600 +IN_MEMORY_CACHE_MAX_SIZE: int = 200 class InMemoryCache(BaseCache): @@ -23,7 +23,7 @@ def __init__( self, max_size_in_memory: Optional[int] = 200, default_ttl: Optional[ - float + Union[int, float] ] = None, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute ): """ @@ -32,7 +32,7 @@ def __init__( self.max_size_in_memory = ( max_size_in_memory or IN_MEMORY_CACHE_MAX_SIZE ) # set an upper bound of 200 items in-memory - self.default_ttl: float = default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL + self.default_ttl: float = float(default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL) # in-memory cache self.cache_dict: dict = {} From c19ec3757c9196fe64a91ef8890a621decf3a472 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 19:16:43 -0800 Subject: [PATCH 20/22] fix config --- litellm/proxy/example_config_yaml/otel_test_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml index da87c17e4bdb..05cb8f87337d 100644 --- a/litellm/proxy/example_config_yaml/otel_test_config.yaml +++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml @@ -31,7 +31,7 @@ model_list: api_key: fake-key model_info: supports_vision: True - - model_name: fake-openai-endpoint-all-users + - model_name: fake-openai-endpoint-all-users litellm_params: model: openai/fake api_key: fake-key From b85db568ec775eadcf5f11c52e6d3da9891167be Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 20:12:48 -0800 Subject: [PATCH 21/22] fix linting --- litellm/caching/in_memory_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py index c29990a05cac..d6c933f5cafc 100644 --- a/litellm/caching/in_memory_cache.py +++ b/litellm/caching/in_memory_cache.py @@ -32,7 +32,7 @@ def __init__( self.max_size_in_memory = ( max_size_in_memory or IN_MEMORY_CACHE_MAX_SIZE ) # set an upper bound of 200 items in-memory - self.default_ttl: float = float(default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL) + self.default_ttl: int = int(default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL) # in-memory cache self.cache_dict: dict = {} From 5d6d9f9acac7b4be61fadde8f8eee3edd7d1fccd Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 17 Dec 2024 20:17:42 -0800 Subject: [PATCH 22/22] fix pytest --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f709bd3b7c9d..3f16ed1a35fe 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1394,7 +1394,7 @@ jobs: command: | pwd ls - python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5 + python -m pytest -vv tests/multi_instance_tests -x -s -v --junitxml=test-results/junit.xml --durations=5 no_output_timeout: 120m # Store test results - store_test_results: