Skip to content

Commit

Permalink
Comments on lowest_match_inner() (#56)
Browse files Browse the repository at this point in the history
Co-authored-by: Jeffrey Kegler <[email protected]>
  • Loading branch information
v-jkegler and Jeffrey Kegler authored Nov 19, 2024
1 parent d623c7c commit d86df7d
Showing 1 changed file with 33 additions and 3 deletions.
36 changes: 33 additions & 3 deletions parser/src/earley/regexvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,36 +201,66 @@ impl RegexVec {
}

// Find the lowest, or best, match in 'state'. It is the first lazy regex.
// If there is no lazy regex, and all greedy lexemes have reached end of
// input (EOI), then it is the first greedy lexeme. If neither of these
// If there is no lazy regex, and all greedy lexemes have reached the end of
// the lexeme, then it is the first greedy lexeme. If neither of these
// criteria produce a choice for "best", 'None' is returned.
fn lowest_match_inner(&mut self, state: StateID) -> Option<(usize, usize)> {
// 'all_eoi' is true if all greedy lexemes match, that is, if we are at
// the end of lexeme for all of them. End of lexeme is called
// "end of input" or EOI for consistency with the regex package.
// Initially, 'all_eoi' is true, vacuously.
let mut all_eoi = true;

// 'eoi_candidate' tracks the lowest (aka first or best) greedy match.
// Initially, there is none.
let mut eoi_candidate = None;

// For every regex in this state
for (idx, e) in iter_state(&self.rx_sets, state) {

// If this lexeme is not a match. (If the derivative at this point is nullable,
// there is a match, so if it is not nullable, there is no match.)
if !self.exprs.is_nullable(e) {
// The derivative of 'e' is nullable, so 'e' matches.
// No match, so not at end of lexeme
all_eoi = false;
continue;
}

// If this is the first lazy lexeme, we can cut things short. The first
// lazy lexeme is our lowest, or best, match. We return it and are done.
if self.lazy[idx] {
let len = self.exprs.possible_lookahead_len(e);
return Some((idx, len));
}

// If we are here, we are greedy matching.

// If all the greedy lexemes so far are matches.
if all_eoi {
// If this greedy lexeme is at end of lexeme ...
if self.next_byte.next_byte(&self.exprs, e) == NextByte::ForcedEOI {
// then, if we have not yet found a matching greedy lexeme, set
// this one to be our lowest match ...
if eoi_candidate.is_none() {
eoi_candidate = Some((idx, self.exprs.possible_lookahead_len(e)));
}
} else {
// ... otherwise, if this greedy lexeme is not yet a match, then indicate
// that not all greedy lexemes are matches at this point.
all_eoi = false;
}
}
}

if all_eoi {
// At this point all lexemes are greedy, and are the end of lexeme,
// so there are no further possibilities for greediness.
// We tracked our lowest greedy lexeme in 'eoi_candidate', which we
// now return.
eoi_candidate
} else {
// For the greedy lexeme finding strategy, possibilities remain,
// so we have not yet settled on a lexeme, and return 'None'.
None
}
}
Expand Down

0 comments on commit d86df7d

Please sign in to comment.