diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index 5aa26ced46dd..4bb12a3e8c9c 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -571,13 +571,28 @@ async def ollama_acompletion(
 
             model_response.created = int(time.time())
             model_response.model = "ollama_chat/" + data["model"]
-            prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"]))  # type: ignore
-            completion_tokens = response_json.get(
-                "eval_count",
-                litellm.token_counter(
-                    text=response_json["message"]["content"], count_response_tokens=True
-                ),
-            )
+            prompt_tokens = response_json.get("prompt_eval_count", 0)
+            if prompt_tokens == 0:  # Only calculate if Ollama doesn't provide it
+                try:
+                    prompt_tokens = litellm.token_counter(messages=data["messages"])
+                except (ValueError, TypeError, AttributeError) as e:
+                    verbose_logger.debug(f"Error counting prompt tokens: {str(e)}")
+                    prompt_tokens = 0  # Fallback if token counting fails
+            
+            completion_tokens = response_json.get("eval_count", 0)
+            if completion_tokens == 0:
+                try:
+                    # For function calls, the content might be JSON string, since ollama 5.0
+                    response_text = (
+                        response_json["message"]["content"] 
+                        if isinstance(response_json["message"]["content"], str) 
+                        else json.dumps(response_json["message"]["content"])
+                    )
+                    completion_tokens = litellm.token_counter(text=response_text, count_response_tokens=True)
+                except (ValueError, TypeError, KeyError, json.JSONDecodeError) as e:
+                    verbose_logger.debug(f"Error counting completion tokens: {str(e)}")
+                    completion_tokens = 0
+
             setattr(
                 model_response,
                 "usage",