Skip to content

Commit

Permalink
Add validate_token() method to TokenParser
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Nov 27, 2024
1 parent 3962ef3 commit d53d9f0
Showing 1 changed file with 20 additions and 0 deletions.
20 changes: 20 additions & 0 deletions parser/src/tokenparser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,32 @@ impl TokenParser {
Ok(())
}

pub fn validate_token(&mut self, token: TokenId) -> Result<bool> {
self.check_initialized("validate_tokens_raw")?;
let bytes = self.tok_trie().decode_raw(&[token]);
let n_valid = self.parser.validate_bytes(&bytes);
assert!(n_valid <= bytes.len());
Ok(n_valid == bytes.len())
}

/// Returns how many of the passed tokens can be accepted by the parser.
/// It does not tokenize forced bytes, so will accept non-canonical tokenizations.
/// If called with more than one token, it may ignore max_tokens constraints.
pub fn validate_tokens_raw(&mut self, tokens: &[TokenId]) -> Result<usize> {
self.check_initialized("validate_tokens_raw")?;

if tokens.is_empty() {
return Ok(0);
}

if tokens.len() == 1 {
return if self.validate_token(tokens[0])? {
Ok(1)
} else {
Ok(0)
};
}

let bytes = self.tok_trie().decode_raw(tokens);
let n_valid = self.parser.validate_bytes(&bytes);
assert!(n_valid <= bytes.len());
Expand Down

0 comments on commit d53d9f0

Please sign in to comment.