Skip to content

Commit

Permalink
track more parser stats
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Nov 14, 2024
1 parent 62a1403 commit f49791c
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 8 deletions.
21 changes: 19 additions & 2 deletions parser/src/earley/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use std::{

use anyhow::{bail, ensure, Result};
use derivre::{RegexAst, StateID};
use instant::Instant;
use serde::{Deserialize, Serialize};
use toktrie::{Recognizer, SimpleVob, SpecialToken, TokEnv, TokTrie, TokenId};

Expand Down Expand Up @@ -61,8 +62,8 @@ pub struct ParserStats {
pub num_lex_errors: usize,
pub num_lexemes: usize,
pub all_items: usize,

pub lexer_cost: u64,
pub compute_time_us: u64,
}

impl ParserStats {
Expand All @@ -74,8 +75,21 @@ impl ParserStats {
num_lexemes: self.num_lexemes - previous.num_lexemes,
num_lex_errors: self.num_lex_errors - previous.num_lex_errors,
all_items: self.all_items - previous.all_items,

lexer_cost: self.lexer_cost - previous.lexer_cost,
compute_time_us: self.compute_time_us - previous.compute_time_us,
}
}

pub fn max(&self, other: &ParserStats) -> ParserStats {
ParserStats {
rows: self.rows.max(other.rows),
definitive_bytes: self.definitive_bytes.max(other.definitive_bytes),
lexer_ops: self.lexer_ops.max(other.lexer_ops),
num_lexemes: self.num_lexemes.max(other.num_lexemes),
num_lex_errors: self.num_lex_errors.max(other.num_lex_errors),
all_items: self.all_items.max(other.all_items),
lexer_cost: self.lexer_cost.max(other.lexer_cost),
compute_time_us: self.compute_time_us.max(other.compute_time_us),
}
}
}
Expand Down Expand Up @@ -407,6 +421,7 @@ impl ParserState {
computer: &dyn BiasComputer,
start: &[u8],
) -> SimpleVob {
let t0 = Instant::now();
let dfa = &mut shared.lexer.dfa;
dfa.set_fuel(self.limits.step_lexer_fuel);
dfa.set_max_states(self.limits.max_lexer_states);
Expand Down Expand Up @@ -438,6 +453,8 @@ impl ParserState {
set.allow_token(computer.trie().eos_token());
}

self.stats.compute_time_us += t0.elapsed().as_micros() as u64;

set
}

Expand Down
22 changes: 18 additions & 4 deletions parser/src/tokenparser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ pub struct TokenParser {
pub logger: Logger,
pub limits: ParserLimits,
pub bias_computer: Arc<dyn BiasComputer>,
last_step_stats: ParserStats,
max_step_stats: ParserStats,
pending_bogus_backtrack: u32,
// sampling any of these will pop the parser stack:
pop_tokens: Option<SimpleVob>,
Expand Down Expand Up @@ -93,6 +95,8 @@ impl TokenParser {
inference_caps,
limits,
pending_bogus_backtrack: 0,
max_step_stats: ParserStats::default(),
last_step_stats: ParserStats::default(),
mid_process_start_time,
mid_process_was_accepting: false,
no_bias_this_mid_process: false,
Expand Down Expand Up @@ -133,6 +137,14 @@ impl TokenParser {
self.parser.stats()
}

pub fn last_step_stats(&self) -> &ParserStats {
&self.last_step_stats
}

pub fn max_step_stats(&self) -> &ParserStats {
&self.max_step_stats
}

pub fn num_tokens(&self) -> usize {
self.llm_tokens.len()
}
Expand Down Expand Up @@ -633,7 +645,6 @@ impl TokenParser {
return StepResult::noop();
}

let pre = instant::Instant::now();
let pre_stats = self.parser.stats().clone();
let mut set = self
.parser
Expand All @@ -643,7 +654,9 @@ impl TokenParser {
let err = format!("lexer error: {}", err);
return self.stop(&err, StopReason::LexerTooComplex);
}
self.last_bias_time = pre.elapsed();
self.last_bias_time = Duration::from_micros(p_stats.compute_time_us);
self.last_step_stats = p_stats.clone();
self.max_step_stats = self.max_step_stats.max(&p_stats);

if inner_accepting {
let mut all_accepting = true;
Expand Down Expand Up @@ -682,9 +695,10 @@ impl TokenParser {

infoln!(
self,
"step-stats: {:?}; {} lex fuel; {}",
start_time.elapsed(),
"step-stats: {}us; {} lex fuel; {} items; {}",
start_time.elapsed().as_micros(),
p_stats.lexer_cost,
p_stats.all_items,
self.parser.lexer_stats(),
);

Expand Down
10 changes: 8 additions & 2 deletions sample_parser/src/sample_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,15 @@ fn main() {
black_box(constraint.temperature);
let sampled_token = tokens[idx];

let p_stats = constraint.parser.last_step_stats();
println!(
"SAMPLE {}: {} {}",
"SAMPLE {}: {} {}; stats: {} lex, {} rows, {} us",
idx,
sampled_token,
tok_env.tok_trie().token_dbg(sampled_token)
tok_env.tok_trie().token_dbg(sampled_token),
p_stats.lexer_cost,
p_stats.all_items,
p_stats.compute_time_us,
);
Some(sampled_token)
} else {
Expand Down Expand Up @@ -130,6 +134,8 @@ fn main() {
send_output(&constraint.flush_logs());
// the stop reason should be likely also sent to the user
println!("Stop reason: {:?}", constraint.parser.stop_reason());

println!("Max step stats: {:?}", constraint.parser.max_step_stats());
}

fn read_file_to_string(filename: &str) -> String {
Expand Down

0 comments on commit f49791c

Please sign in to comment.