Skip to content

Commit

Permalink
add bytes_allowed method to Parser for token validation
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Nov 23, 2024
1 parent 4931fc3 commit 216db56
Showing 1 changed file with 32 additions and 0 deletions.
32 changes: 32 additions & 0 deletions parser/src/earley/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,33 @@ impl ParserState {
}
}

pub fn bytes_allowed(&mut self, shared: &mut SharedState, tok_bytes: &[u8]) -> bool {
self.assert_definitive();
let applied_idx = self.byte_to_token_idx.len();
let tok_bytes = if applied_idx < self.bytes.len() {
let prefix_len = std::cmp::min(tok_bytes.len(), self.bytes.len() - applied_idx);
if self.bytes[applied_idx..applied_idx + prefix_len] != tok_bytes[..prefix_len] {
return false;
}
&tok_bytes[prefix_len..]
} else {
tok_bytes
};
if tok_bytes.is_empty() {
return true;
}

self.run_speculative(|s| {
let mut r = ParserRecognizer { shared, state: s };
for &b in tok_bytes {
if !r.try_push_byte(b) {
return false;
}
}
true
})
}

// apply_tokens() "pushes" the bytes in 'tokens' into the lexer and parser. It is a top-level
// method in this file. It is well below llguidance's top-level methods, but in the llguidance
// LLInterpreter interface, it is called indirectly via the commit_token() method.
Expand Down Expand Up @@ -2001,6 +2028,11 @@ impl Parser {
r
}

pub fn bytes_allowed(&mut self, tok_bytes: &[u8]) -> bool {
let mut shared = self.shared.lock().unwrap();
self.state.bytes_allowed(&mut shared, tok_bytes)
}

pub fn filter_max_tokens(&mut self) {
self.state.filter_max_tokens();
}
Expand Down

0 comments on commit 216db56

Please sign in to comment.