Skip to content

Commit

Permalink
Add robust token counter with 0 default on failure
Browse files Browse the repository at this point in the history
  • Loading branch information
TensorTemplar committed Dec 23, 2024
1 parent 8b1ea40 commit a52de48
Showing 1 changed file with 22 additions and 7 deletions.
29 changes: 22 additions & 7 deletions litellm/llms/ollama_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,13 +571,28 @@ async def ollama_acompletion(

model_response.created = int(time.time())
model_response.model = "ollama_chat/" + data["model"]
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
completion_tokens = response_json.get(
"eval_count",
litellm.token_counter(
text=response_json["message"]["content"], count_response_tokens=True
),
)
prompt_tokens = response_json.get("prompt_eval_count", 0)
if prompt_tokens == 0: # Only calculate if Ollama doesn't provide it
try:
prompt_tokens = litellm.token_counter(messages=data["messages"])
except (ValueError, TypeError, AttributeError) as e:
verbose_logger.debug(f"Error counting prompt tokens: {str(e)}")
prompt_tokens = 0 # Fallback if token counting fails

completion_tokens = response_json.get("eval_count", 0)
if completion_tokens == 0:
try:
# For function calls, the content might be JSON string, since ollama 5.0
response_text = (
response_json["message"]["content"]
if isinstance(response_json["message"]["content"], str)
else json.dumps(response_json["message"]["content"])
)
completion_tokens = litellm.token_counter(text=response_text, count_response_tokens=True)
except (ValueError, TypeError, KeyError, json.JSONDecodeError) as e:
verbose_logger.debug(f"Error counting completion tokens: {str(e)}")
completion_tokens = 0

setattr(
model_response,
"usage",
Expand Down

0 comments on commit a52de48

Please sign in to comment.