From 25ddacb7912b89483a6ac775d7e32170e3d08831 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 16:23:34 -0800
Subject: [PATCH 01/22] fix _select_model_name_for_cost_calc docstring

---
 litellm/cost_calculator.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 6f176e12c380..45bd24aab3cc 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -373,8 +373,7 @@ def _select_model_name_for_cost_calc(
     1. If custom pricing is true, return received model name
     2. If base_model is set (e.g. for azure models), return that
     3. If completion response has model set return that
-    4. Check if compl
-    4. If model is passed in return that
+    4. Check if model is passed in return that
     """
     return_model: Optional[str] = None
     region_name: Optional[str] = None

From 44039182284eede08a363f31c1a220a71087510c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 17:40:36 -0800
Subject: [PATCH 02/22] add keepttl to in memory cache

---
 litellm/caching/in_memory_cache.py | 35 +++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py
index 9fca96922636..525e93e7b71d 100644
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@@ -57,13 +57,25 @@ def evict_cache(self):
                 # One of the most common causes of memory leaks in Python is the retention of objects that are no longer being used.
                 # This can occur when an object is referenced by another object, but the reference is never removed.
 
-    def set_cache(self, key, value, **kwargs):
+    def set_cache(self, key, value, keepttl: bool = False, **kwargs):
+        """
+        Set cache value
+
+        Args:
+            key: str
+            value: Any
+            keepttl: bool. if True, retain the time to live associated with the key. (keepttl is a Redis parameter. we use the same parameter name as Redis)
+            **kwargs:
+        """
         if len(self.cache_dict) >= self.max_size_in_memory:
             # only evict when cache is full
             self.evict_cache()
 
         self.cache_dict[key] = value
-        if "ttl" in kwargs and kwargs["ttl"] is not None:
+
+        if keepttl and key in self.ttl_dict:
+            pass
+        elif "ttl" in kwargs and kwargs["ttl"] is not None:
             self.ttl_dict[key] = time.time() + kwargs["ttl"]
         else:
             self.ttl_dict[key] = time.time() + self.default_ttl
@@ -71,12 +83,25 @@ def set_cache(self, key, value, **kwargs):
     async def async_set_cache(self, key, value, **kwargs):
         self.set_cache(key=key, value=value, **kwargs)
 
-    async def async_set_cache_pipeline(self, cache_list, ttl=None, **kwargs):
+    async def async_set_cache_pipeline(
+        self, cache_list, ttl: Optional[float] = None, keepttl: bool = False, **kwargs
+    ):
+        """
+        Use in-memory cache for bulk write operations
+
+        Args:
+            cache_list: List[Tuple[Any, Any]]
+            ttl: Optional[float] = None
+            keepttl: bool = False. if True, retain the time to live associated with the key. (keepttl is a Redis parameter. we use the same parameter name as Redis)
+            **kwargs:
+        """
         for cache_key, cache_value in cache_list:
             if ttl is not None:
-                self.set_cache(key=cache_key, value=cache_value, ttl=ttl)
+                self.set_cache(
+                    key=cache_key, value=cache_value, ttl=ttl, keepttl=keepttl
+                )
             else:
-                self.set_cache(key=cache_key, value=cache_value)
+                self.set_cache(key=cache_key, value=cache_value, keepttl=keepttl)
 
     async def async_set_cache_sadd(self, key, value: List, ttl: Optional[float]):
         """

From c029d91b14577fbb425fefdf10275f0ee61bd55c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 17:41:27 -0800
Subject: [PATCH 03/22] add keepttl to redis and dual cache

---
 litellm/caching/dual_cache.py  | 31 +++++++++++++++++++++++++------
 litellm/caching/redis_cache.py | 24 ++++++++++++++++++++----
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/litellm/caching/dual_cache.py b/litellm/caching/dual_cache.py
index f4abc6f15392..0e54774f0bf4 100644
--- a/litellm/caching/dual_cache.py
+++ b/litellm/caching/dual_cache.py
@@ -299,16 +299,22 @@ async def async_batch_get_cache(
         except Exception:
             verbose_logger.error(traceback.format_exc())
 
-    async def async_set_cache(self, key, value, local_only: bool = False, **kwargs):
+    async def async_set_cache(
+        self, key, value, local_only: bool = False, keepttl: bool = False, **kwargs
+    ):
         print_verbose(
             f"async set cache: cache key: {key}; local_only: {local_only}; value: {value}"
         )
         try:
             if self.in_memory_cache is not None:
-                await self.in_memory_cache.async_set_cache(key, value, **kwargs)
+                await self.in_memory_cache.async_set_cache(
+                    key, value, keepttl=keepttl, **kwargs
+                )
 
             if self.redis_cache is not None and local_only is False:
-                await self.redis_cache.async_set_cache(key, value, **kwargs)
+                await self.redis_cache.async_set_cache(
+                    key, value, keepttl=keepttl, **kwargs
+                )
         except Exception as e:
             verbose_logger.exception(
                 f"LiteLLM Cache: Excepton async add_cache: {str(e)}"
@@ -316,10 +322,20 @@ async def async_set_cache(self, key, value, local_only: bool = False, **kwargs):
 
     # async_batch_set_cache
     async def async_set_cache_pipeline(
-        self, cache_list: list, local_only: bool = False, **kwargs
+        self,
+        cache_list: list,
+        local_only: bool = False,
+        keepttl: bool = False,
+        **kwargs,
     ):
         """
         Batch write values to the cache
+
+        Args:
+            cache_list: list
+            local_only: bool = False
+            keepttl: bool. if True, retain the time to live associated with the key. (keepttl is a Redis parameter. we use the same parameter name as Redis)
+            **kwargs:
         """
         print_verbose(
             f"async batch set cache: cache keys: {cache_list}; local_only: {local_only}"
@@ -327,12 +343,15 @@ async def async_set_cache_pipeline(
         try:
             if self.in_memory_cache is not None:
                 await self.in_memory_cache.async_set_cache_pipeline(
-                    cache_list=cache_list, **kwargs
+                    cache_list=cache_list, keepttl=keepttl, **kwargs
                 )
 
             if self.redis_cache is not None and local_only is False:
                 await self.redis_cache.async_set_cache_pipeline(
-                    cache_list=cache_list, ttl=kwargs.pop("ttl", None), **kwargs
+                    cache_list=cache_list,
+                    ttl=kwargs.pop("ttl", None),
+                    keepttl=keepttl,
+                    **kwargs,
                 )
         except Exception as e:
             verbose_logger.exception(
diff --git a/litellm/caching/redis_cache.py b/litellm/caching/redis_cache.py
index fa0002fe6231..055a4407dad4 100644
--- a/litellm/caching/redis_cache.py
+++ b/litellm/caching/redis_cache.py
@@ -348,7 +348,11 @@ async def async_set_cache(self, key, value, **kwargs):
                 )
 
     async def _pipeline_helper(
-        self, pipe: pipeline, cache_list: List[Tuple[Any, Any]], ttl: Optional[float]
+        self,
+        pipe: pipeline,
+        cache_list: List[Tuple[Any, Any]],
+        ttl: Optional[float],
+        keepttl: bool = False,
     ) -> List:
         ttl = self.get_ttl(ttl=ttl)
         # Iterate through each key-value pair in the cache_list and set them in the pipeline.
@@ -362,16 +366,26 @@ async def _pipeline_helper(
             _td: Optional[timedelta] = None
             if ttl is not None:
                 _td = timedelta(seconds=ttl)
-            pipe.set(cache_key, json_cache_value, ex=_td)
+            pipe.set(cache_key, json_cache_value, ex=_td, keepttl=keepttl)
         # Execute the pipeline and return the results.
         results = await pipe.execute()
         return results
 
     async def async_set_cache_pipeline(
-        self, cache_list: List[Tuple[Any, Any]], ttl: Optional[float] = None, **kwargs
+        self,
+        cache_list: List[Tuple[Any, Any]],
+        ttl: Optional[float] = None,
+        keepttl: bool = False,
+        **kwargs,
     ):
         """
         Use Redis Pipelines for bulk write operations
+
+        Args:
+            cache_list: List[Tuple[Any, Any]]
+            ttl: Optional[float] = None
+            keepttl: bool. Redis parameter. If True, retain the time to live associated with the key.
+            **kwargs:
         """
         # don't waste a network request if there's nothing to set
         if len(cache_list) == 0:
@@ -388,7 +402,9 @@ async def async_set_cache_pipeline(
         try:
             async with _redis_client as redis_client:
                 async with redis_client.pipeline(transaction=True) as pipe:
-                    results = await self._pipeline_helper(pipe, cache_list, ttl)
+                    results = await self._pipeline_helper(
+                        pipe=pipe, cache_list=cache_list, ttl=ttl, keepttl=keepttl
+                    )
 
             print_verbose(f"pipeline results: {results}")
             # Optionally, you could process 'results' to make sure that all set operations were successful.

From f8625156281b3c796736d49944b037c06ed9a273 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 17:42:40 -0800
Subject: [PATCH 04/22] use keep ttl param for set pipeleine and auth check

---
 litellm/proxy/auth/auth_checks.py | 3 +--
 litellm/proxy/proxy_server.py     | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py
index 45e8c844c4f2..3c93ab485de2 100644
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@@ -493,7 +493,7 @@ async def _cache_management_object(
     user_api_key_cache: DualCache,
     proxy_logging_obj: Optional[ProxyLogging],
 ):
-    await user_api_key_cache.async_set_cache(key=key, value=value)
+    await user_api_key_cache.async_set_cache(key=key, value=value, keepttl=True)
 
 
 async def _cache_team_object(
@@ -720,7 +720,6 @@ async def get_key_object(
     cached_key_obj: Optional[UserAPIKeyAuth] = await user_api_key_cache.async_get_cache(
         key=key
     )
-
     if cached_key_obj is not None:
         if isinstance(cached_key_obj, dict):
             return UserAPIKeyAuth(**cached_key_obj)
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 4210d6035c55..9ac07ace33f9 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1247,8 +1247,8 @@ async def _update_team_cache():
     asyncio.create_task(
         user_api_key_cache.async_set_cache_pipeline(
             cache_list=values_to_update_in_cache,
-            ttl=60,
             litellm_parent_otel_span=parent_otel_span,
+            keepttl=True,
         )
     )
 

From fc022ff24ed4533b4d63212de571e8d67753c657 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:09:06 -0800
Subject: [PATCH 05/22] use keep ttl=True for _cache_management_object

---
 litellm/caching/dual_cache.py      |  4 +++-
 litellm/caching/in_memory_cache.py | 13 +++++++++----
 litellm/proxy/auth/auth_checks.py  |  7 ++++++-
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/litellm/caching/dual_cache.py b/litellm/caching/dual_cache.py
index 0e54774f0bf4..ceabef1ecc17 100644
--- a/litellm/caching/dual_cache.py
+++ b/litellm/caching/dual_cache.py
@@ -61,7 +61,9 @@ def __init__(
     ) -> None:
         super().__init__()
         # If in_memory_cache is not provided, use the default InMemoryCache
-        self.in_memory_cache = in_memory_cache or InMemoryCache()
+        self.in_memory_cache = in_memory_cache or InMemoryCache(
+            default_ttl=default_in_memory_ttl
+        )
         # If redis_cache is not provided, use the default RedisCache
         self.redis_cache = redis_cache
         self.last_redis_batch_access_time = LimitedSizeOrderedDict(
diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py
index 525e93e7b71d..7e871edb9d8a 100644
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@@ -12,24 +12,29 @@
 import time
 from typing import List, Optional
 
+import litellm
+
 from .base_cache import BaseCache
 
+IN_MEMORY_CACHE_DEFAULT_TTL = 600
+IN_MEMORY_CACHE_MAX_SIZE = 200
+
 
 class InMemoryCache(BaseCache):
     def __init__(
         self,
         max_size_in_memory: Optional[int] = 200,
         default_ttl: Optional[
-            int
-        ] = 600,  # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
+            float
+        ] = None,  # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
     ):
         """
         max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
         """
         self.max_size_in_memory = (
-            max_size_in_memory or 200
+            max_size_in_memory or IN_MEMORY_CACHE_MAX_SIZE
         )  # set an upper bound of 200 items in-memory
-        self.default_ttl = default_ttl or 600
+        self.default_ttl = default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL
 
         # in-memory cache
         self.cache_dict: dict = {}
diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py
index 3c93ab485de2..3d3343c60cb9 100644
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@@ -493,7 +493,11 @@ async def _cache_management_object(
     user_api_key_cache: DualCache,
     proxy_logging_obj: Optional[ProxyLogging],
 ):
-    await user_api_key_cache.async_set_cache(key=key, value=value, keepttl=True)
+    await user_api_key_cache.async_set_cache(
+        key=key,
+        value=value,
+        keepttl=True,
+    )
 
 
 async def _cache_team_object(
@@ -720,6 +724,7 @@ async def get_key_object(
     cached_key_obj: Optional[UserAPIKeyAuth] = await user_api_key_cache.async_get_cache(
         key=key
     )
+
     if cached_key_obj is not None:
         if isinstance(cached_key_obj, dict):
             return UserAPIKeyAuth(**cached_key_obj)

From 6695dd028bedda5deef8f5dd9ee044fc5d764635 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:16:06 -0800
Subject: [PATCH 06/22] fix linting

---
 litellm/caching/in_memory_cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py
index 7e871edb9d8a..1b6d8cfb02de 100644
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@@ -16,7 +16,7 @@
 
 from .base_cache import BaseCache
 
-IN_MEMORY_CACHE_DEFAULT_TTL = 600
+IN_MEMORY_CACHE_DEFAULT_TTL = 600.0
 IN_MEMORY_CACHE_MAX_SIZE = 200
 
 

From 24133d5b2e2c5a7596889fe43004a6b1e5397e31 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:16:28 -0800
Subject: [PATCH 07/22] fix in memory cache

---
 litellm/caching/in_memory_cache.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py
index 1b6d8cfb02de..aa40c4a4333d 100644
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@@ -12,8 +12,6 @@
 import time
 from typing import List, Optional
 
-import litellm
-
 from .base_cache import BaseCache
 
 IN_MEMORY_CACHE_DEFAULT_TTL = 600.0

From 536f91d8be738ad8170096383e74201305601bf5 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:44:02 -0800
Subject: [PATCH 08/22] add e2e rpm setting on keys

---
 .../example_config_yaml/otel_test_config.yaml |   5 +
 litellm/proxy/proxy_config.yaml               |  15 +--
 .../test_multi_instance_test.py               | 123 ++++++++++++++++++
 3 files changed, 132 insertions(+), 11 deletions(-)
 create mode 100644 tests/multi_instance_tests/test_multi_instance_test.py

diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml
index fae3ee3daec0..da87c17e4bdb 100644
--- a/litellm/proxy/example_config_yaml/otel_test_config.yaml
+++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml
@@ -31,6 +31,11 @@ model_list:
      api_key: fake-key
    model_info:
      supports_vision: True
+  - model_name: fake-openai-endpoint-all-users
+   litellm_params:
+     model: openai/fake
+     api_key: fake-key
+     api_base: https://exampleopenaiendpoint-production.up.railway.app/
 
 
 
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 1a90d9090ed3..fe8452edebdf 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,14 +1,7 @@
 model_list:
-  - model_name: gpt-4o
+  - model_name: fake-openai-endpoint-all-users
     litellm_params:
-      model: openai/gpt-4o
-      api_key: os.environ/OPENAI_API_KEY
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
 
-litellm_settings:
-  tag_budget_config:
-    product:chat-bot: # (Tag)
-      max_budget: 0.000000000001 # (USD)
-      budget_duration: 1d # (Duration)
-    product:chat-bot-2: # (Tag)
-      max_budget: 100 # (USD)
-      budget_duration: 1d # (Duration)
\ No newline at end of file
diff --git a/tests/multi_instance_tests/test_multi_instance_test.py b/tests/multi_instance_tests/test_multi_instance_test.py
new file mode 100644
index 000000000000..e264cbc35784
--- /dev/null
+++ b/tests/multi_instance_tests/test_multi_instance_test.py
@@ -0,0 +1,123 @@
+import pytest
+import asyncio
+import aiohttp
+from typing import Optional, List, Union
+
+
+async def generate_key(session, rpm_limit: int, port: int = 4000):
+    url = f"http://0.0.0.0:{port}/key/generate"
+    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
+    data = {"rpm_limit": rpm_limit}
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(response_text)
+        print()
+
+        if status != 200:
+            raise Exception(f"Request did not return a 200 status code: {status}")
+
+        return await response.json()
+
+
+async def update_key(session, key: str, rpm_limit: int, port: int = 4000):
+    url = f"http://0.0.0.0:{port}/key/update"
+    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
+    data = {"key": key, "rpm_limit": rpm_limit}
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(response_text)
+        print()
+
+        if status != 200:
+            raise Exception(f"Request did not return a 200 status code: {status}")
+
+        return await response.json()
+
+
+async def chat_completion(session, key: str, port: int = 4000):
+    url = f"http://0.0.0.0:{port}/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "model": "fake-openai-endpoint-all-users",
+        "messages": [{"role": "user", "content": "Hello!"}],
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        return response.status
+
+
+async def simulated_traffic_for_key(session, key: str, port: int = 4000):
+    print(f"simulating traffic for key {key} on port {port}")
+    for i in range(100):
+        print(f"simulating traffic - chat completion number {i}, port {port}")
+        await chat_completion(session, key, port=port)
+        await asyncio.sleep(0.1)
+
+
+@pytest.mark.asyncio
+async def test_multi_instance_key_management():
+    """
+    Test key management across multiple LiteLLM instances:
+    1. Create key on instance 1 (port 4000) with RPM=1
+    2. Test key on instance 2 (port 4001) - expect only 1/5 requests to succeed
+    3. Update key on instance 1 to RPM=1000
+    4. Test key on instance 2 - expect all 5 requests to succeed
+    """
+    async with aiohttp.ClientSession() as session:
+        # Create key on instance 1 with RPM=1
+        key_response = await generate_key(session, rpm_limit=1, port=4000)
+        test_key = key_response["key"]
+        print("created key with rpm limit 1")
+
+        # Test key on instance 2 with 5 requests
+        print("running 5 requests on instance 2. Expecting 1 to succeed")
+        statuses = []
+        for _ in range(5):
+            status = await chat_completion(session, test_key, port=4001)
+            statuses.append(status)
+            await asyncio.sleep(0.1)  # Small delay between requests
+
+        # Expect only first request to succeed
+        success_count = sum(1 for status in statuses if status == 200)
+        print(
+            f"statuses count of 5 /chat/completions: {statuses}. EXPECTED 1 SUCCESS (200)"
+        )
+        assert success_count == 1, f"Expected 1 successful request, got {success_count}"
+
+        # Update key on instance 1 to RPM=1000
+        await update_key(session, test_key, rpm_limit=1000, port=4000)
+        print("updated key on instance 1 to rpm limit 1000")
+        print("simulating /chat/completion straffic for 60 seconds")
+
+        # create task to simulate traffic for key
+        asyncio.create_task(simulated_traffic_for_key(session, test_key, port=4000))
+        asyncio.create_task(simulated_traffic_for_key(session, test_key, port=4001))
+
+        # wait for 60 seconds for traffic to propagate
+        await asyncio.sleep(60)  # Wait for key update to propagate
+
+        print("\n\n Done simulating traffic for 60 seconds \n\n")
+        print("\n\n Now testing if key has new rpm_limit=1000 \n\n")
+        print("running 5 requests on instance 2. Expecting 5 to succeed")
+        # Test key again on instance 2 with 5 requests
+        statuses = []
+        for _ in range(5):
+            status = await chat_completion(session, test_key, port=4001)
+            statuses.append(status)
+            await asyncio.sleep(0.1)
+        print(f"status of 5 /chat/completions: {statuses}. Expecting 200 for all 5")
+
+        # Expect all requests to succeed
+        success_count = sum(1 for status in statuses if status == 200)
+        assert (
+            success_count == 5
+        ), f"Expected 5 successful requests, got {success_count}"

From 62b3e19d492d6eb833c62a4dbcb6c6d2d9403ddb Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:47:26 -0800
Subject: [PATCH 09/22] edit ci/cd

---
 .circleci/config.yml | 43 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index c31a07a22b00..a7f492c89fc4 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -975,7 +975,7 @@ jobs:
       # Store test results
       - store_test_results:
           path: test-results
-  proxy_logging_guardrails_model_info_tests:
+  proxy_e2e_tests_advanced:
     machine:
       image: ubuntu-2204:2023.10.1
     resource_class: xlarge
@@ -1087,6 +1087,47 @@ jobs:
             python -m pytest -vv tests/otel_tests -x --junitxml=test-results/junit.xml --durations=5
           no_output_timeout: 120m
            # Clean up first container
+      - run:
+          name: Run Docker container
+          # intentionally give bad redis credentials here
+          # the OTEL test - should get this as a trace
+          command: |
+            docker run -d \
+              -p 4000:4000 \
+              -e DATABASE_URL=$PROXY_DATABASE_URL \
+              -e REDIS_HOST=$REDIS_HOST \
+              -e REDIS_PASSWORD=$REDIS_PASSWORD \
+              -e REDIS_PORT=$REDIS_PORT \
+              -e LITELLM_MASTER_KEY="sk-1234" \
+              -e OPENAI_API_KEY=$OPENAI_API_KEY \
+              -e LITELLM_LICENSE=$LITELLM_LICENSE \
+              -e OTEL_EXPORTER="in_memory" \
+              -e APORIA_API_BASE_2=$APORIA_API_BASE_2 \
+              -e APORIA_API_KEY_2=$APORIA_API_KEY_2 \
+              -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \
+              -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
+              -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
+              -e AWS_REGION_NAME=$AWS_REGION_NAME \
+              -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
+              -e COHERE_API_KEY=$COHERE_API_KEY \
+              -e GCS_FLUSH_INTERVAL="1" \
+              --name my-app \
+              -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \
+              -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \
+              my-app:latest \
+              --config /app/config.yaml \
+              --port 4001 \
+              --detailed_debug \
+      - run:
+          name: Wait for app to be ready
+          command: dockerize -wait http://localhost:4001 -timeout 5m
+      - run:
+          name: Run tests
+          command: |
+            pwd
+            ls
+            python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5
+          no_output_timeout: 120m
       - run:
           name: Stop and remove first container
           command: |

From d252dd60ede02fbf86589200af819d89cbdce754 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:49:41 -0800
Subject: [PATCH 10/22] fix testing

---
 .circleci/config.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index a7f492c89fc4..7278beefe7c6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1093,7 +1093,7 @@ jobs:
           # the OTEL test - should get this as a trace
           command: |
             docker run -d \
-              -p 4000:4000 \
+              -p 4001:4001 \
               -e DATABASE_URL=$PROXY_DATABASE_URL \
               -e REDIS_HOST=$REDIS_HOST \
               -e REDIS_PASSWORD=$REDIS_PASSWORD \
@@ -1111,10 +1111,10 @@ jobs:
               -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
               -e COHERE_API_KEY=$COHERE_API_KEY \
               -e GCS_FLUSH_INTERVAL="1" \
-              --name my-app \
+              --name my-app-2 \
               -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \
               -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \
-              my-app:latest \
+              my-app-2:latest \
               --config /app/config.yaml \
               --port 4001 \
               --detailed_debug \
@@ -1133,6 +1133,8 @@ jobs:
           command: |
             docker stop my-app
             docker rm my-app
+            docker stop my-app-2
+            docker rm my-app-2
 
       # Second Docker Container Run with Different Config
       # NOTE: We intentionally pass a "bad" license here. We need to ensure proxy starts and serves request even with bad license

From abae707fbba82a858c4342568a08b8eb009a0c15 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:50:34 -0800
Subject: [PATCH 11/22] fix proxy_logging_guardrails_model_info_tests

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 7278beefe7c6..3ce3170805c5 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -975,7 +975,7 @@ jobs:
       # Store test results
       - store_test_results:
           path: test-results
-  proxy_e2e_tests_advanced:
+  proxy_logging_guardrails_model_info_tests:
     machine:
       image: ubuntu-2204:2023.10.1
     resource_class: xlarge

From c372a4695704e39e48fa44d8f38cb8cc0d6b308a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:53:57 -0800
Subject: [PATCH 12/22] test_redis_caching_ttl_pipeline

---
 tests/local_testing/test_caching.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py
index 18f7700c7529..c496c2bbc883 100644
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@@ -2382,8 +2382,12 @@ async def test_redis_caching_ttl_pipeline():
         # Verify that the set method was called on the mock Redis instance
         mock_set.assert_has_calls(
             [
-                call.set("test_key1", '"test_value1"', ex=expected_timedelta),
-                call.set("test_key2", '"test_value2"', ex=expected_timedelta),
+                call.set(
+                    "test_key1", '"test_value1"', ex=expected_timedelta, keepttl=False
+                ),
+                call.set(
+                    "test_key2", '"test_value2"', ex=expected_timedelta, keepttl=False
+                ),
             ]
         )
 

From 383e90f67d4ed804e7a7ddaf4b397d5434cd290b Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:56:10 -0800
Subject: [PATCH 13/22] config.yaml updates ci/cd

---
 .circleci/config.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3ce3170805c5..8bebc1c19ab3 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1088,7 +1088,7 @@ jobs:
           no_output_timeout: 120m
            # Clean up first container
       - run:
-          name: Run Docker container
+          name: Run Docker container (Instance 2 Port 4001)
           # intentionally give bad redis credentials here
           # the OTEL test - should get this as a trace
           command: |
@@ -1119,17 +1119,17 @@ jobs:
               --port 4001 \
               --detailed_debug \
       - run:
-          name: Wait for app to be ready
+          name: Wait for app to be ready (instance 2 port 4001)
           command: dockerize -wait http://localhost:4001 -timeout 5m
       - run:
-          name: Run tests
+          name: Run Multi Instance Tests
           command: |
             pwd
             ls
             python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5
           no_output_timeout: 120m
       - run:
-          name: Stop and remove first container
+          name: Stop and remove first container (Cleanup)
           command: |
             docker stop my-app
             docker rm my-app
@@ -1139,7 +1139,7 @@ jobs:
       # Second Docker Container Run with Different Config
       # NOTE: We intentionally pass a "bad" license here. We need to ensure proxy starts and serves request even with bad license
       - run:
-          name: Run Second Docker container
+          name: Run 3rd Docker container (Restart docker container - ensure it starts)
           command: |
             docker run -d \
               -p 4000:4000 \

From eca338b548e2f6d5e88d6e564ff20dd9ced5b5b0 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 18:58:13 -0800
Subject: [PATCH 14/22] fix default ttl setting

---
 litellm/caching/in_memory_cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py
index aa40c4a4333d..e780fca78ade 100644
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@@ -32,7 +32,7 @@ def __init__(
         self.max_size_in_memory = (
             max_size_in_memory or IN_MEMORY_CACHE_MAX_SIZE
         )  # set an upper bound of 200 items in-memory
-        self.default_ttl = default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL
+        self.default_ttl: float = default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL
 
         # in-memory cache
         self.cache_dict: dict = {}

From cf1325931355206676508341c3f91a7f6f1b3006 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 19:02:23 -0800
Subject: [PATCH 15/22] udpate config.yml

---
 .circleci/config.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 8bebc1c19ab3..1ad39489f5e9 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1034,7 +1034,7 @@ jobs:
           name: Build Docker image
           command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
       - run:
-          name: Run Docker container
+          name: Run Docker container (Instance 1 port 4000)
           # intentionally give bad redis credentials here
           # the OTEL test - should get this as a trace
           command: |
@@ -1065,7 +1065,7 @@ jobs:
               --port 4000 \
               --detailed_debug \
       - run:
-          name: Install curl and dockerize
+          name: Install curl and dockerize (Instance 1 port 4000)
           command: |
             sudo apt-get update
             sudo apt-get install -y curl
@@ -1073,14 +1073,14 @@ jobs:
             sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
             sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
       - run:
-          name: Start outputting logs
+          name: Start outputting logs (Instance 1 port 4000)
           command: docker logs -f my-app
           background: true
       - run:
-          name: Wait for app to be ready
+          name: Wait for app to be ready (Instance 1 port 4000)
           command: dockerize -wait http://localhost:4000 -timeout 5m
       - run:
-          name: Run tests
+          name: Run tests (Instance 1 port 4000)
           command: |
             pwd
             ls

From a39e386aeeaed54c347cd46a8cbeb089846275f1 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 19:07:17 -0800
Subject: [PATCH 16/22] update config.yml

---
 .circleci/config.yml | 208 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 157 insertions(+), 51 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 1ad39489f5e9..2aef17257362 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1034,7 +1034,7 @@ jobs:
           name: Build Docker image
           command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
       - run:
-          name: Run Docker container (Instance 1 port 4000)
+          name: Run Docker container
           # intentionally give bad redis credentials here
           # the OTEL test - should get this as a trace
           command: |
@@ -1065,7 +1065,7 @@ jobs:
               --port 4000 \
               --detailed_debug \
       - run:
-          name: Install curl and dockerize (Instance 1 port 4000)
+          name: Install curl and dockerize
           command: |
             sudo apt-get update
             sudo apt-get install -y curl
@@ -1073,14 +1073,14 @@ jobs:
             sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
             sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
       - run:
-          name: Start outputting logs (Instance 1 port 4000)
+          name: Start outputting logs
           command: docker logs -f my-app
           background: true
       - run:
-          name: Wait for app to be ready (Instance 1 port 4000)
+          name: Wait for app to be ready
           command: dockerize -wait http://localhost:4000 -timeout 5m
       - run:
-          name: Run tests (Instance 1 port 4000)
+          name: Run tests
           command: |
             pwd
             ls
@@ -1088,58 +1088,15 @@ jobs:
           no_output_timeout: 120m
            # Clean up first container
       - run:
-          name: Run Docker container (Instance 2 Port 4001)
-          # intentionally give bad redis credentials here
-          # the OTEL test - should get this as a trace
-          command: |
-            docker run -d \
-              -p 4001:4001 \
-              -e DATABASE_URL=$PROXY_DATABASE_URL \
-              -e REDIS_HOST=$REDIS_HOST \
-              -e REDIS_PASSWORD=$REDIS_PASSWORD \
-              -e REDIS_PORT=$REDIS_PORT \
-              -e LITELLM_MASTER_KEY="sk-1234" \
-              -e OPENAI_API_KEY=$OPENAI_API_KEY \
-              -e LITELLM_LICENSE=$LITELLM_LICENSE \
-              -e OTEL_EXPORTER="in_memory" \
-              -e APORIA_API_BASE_2=$APORIA_API_BASE_2 \
-              -e APORIA_API_KEY_2=$APORIA_API_KEY_2 \
-              -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \
-              -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-              -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-              -e AWS_REGION_NAME=$AWS_REGION_NAME \
-              -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
-              -e COHERE_API_KEY=$COHERE_API_KEY \
-              -e GCS_FLUSH_INTERVAL="1" \
-              --name my-app-2 \
-              -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \
-              -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \
-              my-app-2:latest \
-              --config /app/config.yaml \
-              --port 4001 \
-              --detailed_debug \
-      - run:
-          name: Wait for app to be ready (instance 2 port 4001)
-          command: dockerize -wait http://localhost:4001 -timeout 5m
-      - run:
-          name: Run Multi Instance Tests
-          command: |
-            pwd
-            ls
-            python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5
-          no_output_timeout: 120m
-      - run:
-          name: Stop and remove first container (Cleanup)
+          name: Stop and remove first container
           command: |
             docker stop my-app
             docker rm my-app
-            docker stop my-app-2
-            docker rm my-app-2
 
       # Second Docker Container Run with Different Config
       # NOTE: We intentionally pass a "bad" license here. We need to ensure proxy starts and serves request even with bad license
       - run:
-          name: Run 3rd Docker container (Restart docker container - ensure it starts)
+          name: Run Second Docker container
           command: |
             docker run -d \
               -p 4000:4000 \
@@ -1295,6 +1252,154 @@ jobs:
       - store_test_results:
           path: test-results
 
+  multi_instance_proxy_tests:
+    machine:
+      image: ubuntu-2204:2023.10.1
+    resource_class: xlarge
+    working_directory: ~/project
+    steps:
+      - checkout
+      - run:
+          name: Install Docker CLI
+          command: |
+            sudo apt-get update
+            sudo apt-get install -y docker-ce docker-ce-cli containerd.io
+      - run:
+          name: Install Python 3.9
+          command: |
+            curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
+            bash miniconda.sh -b -p $HOME/miniconda
+            export PATH="$HOME/miniconda/bin:$PATH"
+            conda init bash
+            source ~/.bashrc
+            conda create -n myenv python=3.9 -y
+            conda activate myenv
+            python --version
+      - run:
+          name: Install Python Dependencies
+          command: |
+            pip install "pytest==7.3.1"
+            pip install "pytest-asyncio==0.21.1"
+            pip install aiohttp
+            python -m pip install --upgrade pip
+            python -m pip install -r .circleci/requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-mock==3.12.0"
+            pip install "pytest-asyncio==0.21.1"
+            pip install mypy
+            pip install "google-generativeai==0.3.2"
+            pip install "google-cloud-aiplatform==1.43.0"
+            pip install pyarrow
+            pip install "boto3==1.34.34"
+            pip install "aioboto3==12.3.0"
+            pip install langchain
+            pip install "langfuse>=2.0.0"
+            pip install "logfire==0.29.0"
+            pip install numpydoc
+            pip install prisma            
+            pip install fastapi            
+            pip install jsonschema   
+            pip install "httpx==0.24.1"
+            pip install "gunicorn==21.2.0"
+            pip install "anyio==3.7.1"
+            pip install "aiodynamo==23.10.1"
+            pip install "asyncio==3.4.3"
+            pip install "PyGithub==1.59.1"
+            pip install "openai==1.54.0"
+      - run:
+          name: Build Docker image
+          command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
+      - run:
+          name: Run First Proxy Instance (Port 4000)
+          command: |
+            docker run -d \
+              -p 4000:4000 \
+              -e DATABASE_URL=$PROXY_DATABASE_URL \
+              -e REDIS_HOST=$REDIS_HOST \
+              -e REDIS_PASSWORD=$REDIS_PASSWORD \
+              -e REDIS_PORT=$REDIS_PORT \
+              -e LITELLM_MASTER_KEY="sk-1234" \
+              -e OPENAI_API_KEY=$OPENAI_API_KEY \
+              -e LITELLM_LICENSE=$LITELLM_LICENSE \
+              -e OTEL_EXPORTER="in_memory" \
+              -e APORIA_API_BASE_2=$APORIA_API_BASE_2 \
+              -e APORIA_API_KEY_2=$APORIA_API_KEY_2 \
+              -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \
+              -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
+              -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
+              -e AWS_REGION_NAME=$AWS_REGION_NAME \
+              -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
+              -e COHERE_API_KEY=$COHERE_API_KEY \
+              -e GCS_FLUSH_INTERVAL="1" \
+              --name proxy-instance-1 \
+              -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \
+              -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \
+              my-app:latest \
+              --config /app/config.yaml \
+              --port 4000 \
+              --detailed_debug
+      - run:
+          name: Run Second Proxy Instance (Port 4001)
+          command: |
+            docker run -d \
+              -p 4001:4001 \
+              -e DATABASE_URL=$PROXY_DATABASE_URL \
+              -e REDIS_HOST=$REDIS_HOST \
+              -e REDIS_PASSWORD=$REDIS_PASSWORD \
+              -e REDIS_PORT=$REDIS_PORT \
+              -e LITELLM_MASTER_KEY="sk-1234" \
+              -e OPENAI_API_KEY=$OPENAI_API_KEY \
+              -e LITELLM_LICENSE=$LITELLM_LICENSE \
+              -e OTEL_EXPORTER="in_memory" \
+              -e APORIA_API_BASE_2=$APORIA_API_BASE_2 \
+              -e APORIA_API_KEY_2=$APORIA_API_KEY_2 \
+              -e APORIA_API_BASE_1=$APORIA_API_BASE_1 \
+              -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
+              -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
+              -e AWS_REGION_NAME=$AWS_REGION_NAME \
+              -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
+              -e COHERE_API_KEY=$COHERE_API_KEY \
+              -e GCS_FLUSH_INTERVAL="1" \
+              --name proxy-instance-2 \
+              -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \
+              -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \
+              my-app:latest \
+              --config /app/config.yaml \
+              --port 4001 \
+              --detailed_debug
+      - run:
+          name: Install curl and dockerize
+          command: |
+            sudo apt-get update
+            sudo apt-get install -y curl
+            sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
+            sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
+            sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
+      - run:
+          name: Start outputting logs for first instance
+          command: docker logs -f proxy-instance-1
+          background: true
+      - run:
+          name: Start outputting logs for second instance
+          command: docker logs -f proxy-instance-2
+          background: true
+      - run:
+          name: Wait for both instances to be ready
+          command: |
+            dockerize -wait http://localhost:4000 -timeout 5m
+            dockerize -wait http://localhost:4001 -timeout 5m
+      - run:
+          name: Run Multi-Instance Tests
+          command: |
+            pwd
+            ls
+            python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5
+          no_output_timeout: 120m
+      # Store test results
+      - store_test_results:
+          path: test-results
+
   upload-coverage:
     docker:
       - image: cimg/python:3.9
@@ -1443,6 +1548,8 @@ jobs:
             pip install "httpx==0.24.1"
             pip install "anyio==3.7.1"
             pip install "asyncio==3.4.3"
+            pip install "PyGithub==1.59.1"
+            pip install "openai==1.54.0 "
       - run:
           name: Install Playwright Browsers
           command: |
@@ -1688,4 +1795,3 @@ workflows:
             branches:
               only:
                 - main
-      

From 774d7049bf214c26ab7bb53a96ed90d41290f9c4 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 19:08:21 -0800
Subject: [PATCH 17/22] update config.yml

---
 .circleci/config.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2aef17257362..199248d60f88 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1548,8 +1548,6 @@ jobs:
             pip install "httpx==0.24.1"
             pip install "anyio==3.7.1"
             pip install "asyncio==3.4.3"
-            pip install "PyGithub==1.59.1"
-            pip install "openai==1.54.0 "
       - run:
           name: Install Playwright Browsers
           command: |

From caec8a1104e9286b0099a5ba822870880faf45fc Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 19:09:26 -0800
Subject: [PATCH 18/22] update config.yml

---
 .circleci/config.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 199248d60f88..f709bd3b7c9d 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1730,6 +1730,12 @@ workflows:
               only:
                 - main
                 - /litellm_.*/
+      - multi_instance_proxy_tests:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
       - upload-coverage:
           requires:
             - llm_translation_testing
@@ -1771,6 +1777,7 @@ workflows:
           requires:
             - local_testing
             - build_and_test
+            - multi_instance_proxy_tests
             - load_testing
             - test_bad_database_url
             - llm_translation_testing

From ce0c8135303752422334b62fe210974201ddfe66 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 19:15:56 -0800
Subject: [PATCH 19/22] fix linting warnings

---
 litellm/caching/in_memory_cache.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py
index e780fca78ade..c29990a05cac 100644
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@@ -10,12 +10,12 @@
 
 import json
 import time
-from typing import List, Optional
+from typing import List, Optional, Union
 
 from .base_cache import BaseCache
 
-IN_MEMORY_CACHE_DEFAULT_TTL = 600.0
-IN_MEMORY_CACHE_MAX_SIZE = 200
+IN_MEMORY_CACHE_DEFAULT_TTL: int = 600
+IN_MEMORY_CACHE_MAX_SIZE: int = 200
 
 
 class InMemoryCache(BaseCache):
@@ -23,7 +23,7 @@ def __init__(
         self,
         max_size_in_memory: Optional[int] = 200,
         default_ttl: Optional[
-            float
+            Union[int, float]
         ] = None,  # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
     ):
         """
@@ -32,7 +32,7 @@ def __init__(
         self.max_size_in_memory = (
             max_size_in_memory or IN_MEMORY_CACHE_MAX_SIZE
         )  # set an upper bound of 200 items in-memory
-        self.default_ttl: float = default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL
+        self.default_ttl: float = float(default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL)
 
         # in-memory cache
         self.cache_dict: dict = {}

From c19ec3757c9196fe64a91ef8890a621decf3a472 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 19:16:43 -0800
Subject: [PATCH 20/22] fix config

---
 litellm/proxy/example_config_yaml/otel_test_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml
index da87c17e4bdb..05cb8f87337d 100644
--- a/litellm/proxy/example_config_yaml/otel_test_config.yaml
+++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml
@@ -31,7 +31,7 @@ model_list:
      api_key: fake-key
    model_info:
      supports_vision: True
-  - model_name: fake-openai-endpoint-all-users
+ - model_name: fake-openai-endpoint-all-users
    litellm_params:
      model: openai/fake
      api_key: fake-key

From b85db568ec775eadcf5f11c52e6d3da9891167be Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 20:12:48 -0800
Subject: [PATCH 21/22] fix linting

---
 litellm/caching/in_memory_cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/litellm/caching/in_memory_cache.py b/litellm/caching/in_memory_cache.py
index c29990a05cac..d6c933f5cafc 100644
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@@ -32,7 +32,7 @@ def __init__(
         self.max_size_in_memory = (
             max_size_in_memory or IN_MEMORY_CACHE_MAX_SIZE
         )  # set an upper bound of 200 items in-memory
-        self.default_ttl: float = float(default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL)
+        self.default_ttl: int = int(default_ttl or IN_MEMORY_CACHE_DEFAULT_TTL)
 
         # in-memory cache
         self.cache_dict: dict = {}

From 5d6d9f9acac7b4be61fadde8f8eee3edd7d1fccd Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 17 Dec 2024 20:17:42 -0800
Subject: [PATCH 22/22] fix pytest

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index f709bd3b7c9d..3f16ed1a35fe 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1394,7 +1394,7 @@ jobs:
           command: |
             pwd
             ls
-            python -m pytest -vv tests/multi_instance_tests -x --junitxml=test-results/junit.xml --durations=5
+            python -m pytest -vv tests/multi_instance_tests -x -s -v --junitxml=test-results/junit.xml --durations=5
           no_output_timeout: 120m
       # Store test results
       - store_test_results: