Skip to content

Commit

Permalink
Comment counted segment logic (#49)
Browse files Browse the repository at this point in the history
* Work on commenting counted sequence logic

* Work on commenting counted sequence logic

---------

Co-authored-by: Jeffrey Kegler <[email protected]>
  • Loading branch information
v-jkegler and Jeffrey Kegler authored Nov 12, 2024
1 parent 54242ee commit 5114648
Showing 1 changed file with 85 additions and 2 deletions.
87 changes: 85 additions & 2 deletions parser/src/grammar_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,72 +267,155 @@ impl GrammarBuilder {
p
}

// this tries to keep grammar size O(log(n))
// at_most() creates a rule which accepts at most 'n' copies
// of element 'elt'.

// The first-time reader of at_most() might want to consult
// the comments for repeat_exact(), where similar logic is
// used in a simpler form.
//
// at_most() recursively factors the sequence into K-size pieces,
// in an attempt to keep grammar size O(log(n)).
fn at_most(&mut self, elt: NodeRef, n: usize) -> NodeRef {
if n == 0 {
// If the max ('n') is 0, an empty rule
self.empty()
} else if n == 1 {
// If 'n' is 1, an optional rule of length 1
self.optional(elt)
} else if n < 3 * K {
// If 'n' is below a fixed number (currently 12),
// the rule is a choice of all the rules of fixed length
// from 0 to 'n'.
let options = (0..=n)
.map(|k| self.simple_repeat(elt, k))
.collect::<Vec<_>>();
self.select(&options)
} else {
// Above a fixed number (again, currently 12),
// we "factor" the sequence into K-sized pieces.
// Let 'elt_k' be a k-element --- the repetition
// of 'k' copies of the element ('elt').
let elt_k = self.simple_repeat(elt, K);

// First we deal with the sequences of length less than
// (n/K)*K.
// 'elt_max_nk' is all the sequences of k-elements
// of length less than n/K.
let elt_max_nk = self.at_most(elt_k, (n / K) - 1);
// The may be up to K-1 elements not accounted by the sequences
// of k-elements in 'elt_max_k'. The choices in 'elt_max_k'
// account for these "remainders".
let elt_max_k = self.at_most(elt, K - 1);
let elt_max_nk = self.join(&[elt_max_nk, elt_max_k]);

// Next we deal with the sequences of length between
// (n/K)*K and 'n', inclusive. It is integer arithmetic, so there
// will be n%K of these.
// Here we call n/K the quotient and n%K the remainder.
// 'elt_nk' repeats the k-element exactly the quotient
// number of times, to ensure all our sequences are of
// length at least (n/K)*K.
let elt_nk = self.repeat_exact(elt_k, n / K);
// 'left' repeats 'elt' at most the remainder number
// of times. The remainder is always less than K.
let left = self.at_most(elt, n % K);
// Join 'elt_nk' and 'left' into 'elt_n'.
// 'elt_nk' is a constant-sized piece,
// which ensures all the sequences of 'elt' in 'elt_n',
// will be of length at least (n/K)*K.
// 'left' will be a choice of rules which
// produce at most K-1 copies of 'elt'.
let elt_n = self.join(&[elt_nk, left]);

// We have accounted for all the sequences of less than
// (n/K)*K elements in 'elt_max_nk'. We have accounted
// for all the sequences of length between (n/K)*K elements and n elements
// (inclusive) in 'elt_n'. Clearly, the sequences of length at most 'n'
// are the alternation of 'elt_max_nk' and 'elt_n'.
self.select(&[elt_n, elt_max_nk])
}
}

// simple_repeat() "simply" repeats the element ('elt') 'n' times.
// Here "simple" means we do not factor into K-size pieces, so that
// time will be O(n). The intent is that simple_repeat() only be
// called for small 'n'.
fn simple_repeat(&mut self, elt: NodeRef, n: usize) -> NodeRef {
let elt_n = (0..n).map(|_| elt).collect::<Vec<_>>();
self.join(&elt_n)
}

// this tries to keep grammar size O(log(n))
// Repeat element 'elt' exactly 'n' times, using factoring
// in an attempt to keep grammar size O(log(n)).
fn repeat_exact(&mut self, elt: NodeRef, n: usize) -> NodeRef {
if n > 2 * K {
// For large 'n', try to keep the number of rules O(log(n))
// by "factoring" the sequence into K-sized pieces

// Create a K-element -- 'elt' repeated 'K' times.
let elt_k = self.simple_repeat(elt, K);

// Repeat the K-element n/K times. The repetition
// is itself factored, so that the process is
// recursive.
let inner = self.repeat_exact(elt_k, n / K);

// 'inner' will contain ((n/K)K) be an 'elt'-sequence
// of length ((n/K)K), which is n-((n/K)K), or n%K,
// short of what we want. We create 'elt_left' to contain
// the n%K additional items we need, and concatenate it
// with 'inner' to form our result.
let left = n % K;
let mut elt_left = (0..left).map(|_| elt).collect::<Vec<_>>();
elt_left.push(inner);
self.join(&elt_left)
} else {
// For small 'n' (currently, 8 or less), simply
// repeat 'elt' 'n' times.
self.simple_repeat(elt, n)
}
}

// at_least() accepts a sequence of at least 'n' copies of
// element 'elt'.
fn at_least(&mut self, elt: NodeRef, n: usize) -> NodeRef {
let z = self.zero_or_more(elt);
if n == 0 {
// If n==0, atleast() is equivalent to zero_or_more().
z
} else {
// If n>0, first sequence is a factored repetition of
// exactly 'n' copies of 'elt', ...
let r = self.repeat_exact(elt, n);
// ... followed by zero or more copies of 'elt'
self.join(&[r, z])
}
}

// Create a rule which accepts from 'min' to 'max' copies of element
// 'elt', inclusive.
pub fn repeat(&mut self, elt: NodeRef, min: usize, max: Option<usize>) -> NodeRef {
if max.is_none() {
// If no 'max', what we want is equivalent to a rule accepting at least
// 'min' elements.
return self.at_least(elt, min);
}
let max = max.unwrap();
assert!(min <= max);
if min == max {
// Where 'min' is equal to 'max', what we want is equivalent to a rule
// repeating element 'elt' exactly 'min' times.
self.repeat_exact(elt, min)
} else if min == 0 {
// If 'min' is zero, what we want is equivalent to a rule accepting at least
// 'min' elements.
self.at_most(elt, max)
} else {
// In the general case, what we want is equivalent to
// a rule accepting a fixed-size block of length 'min',
// followed by a rule accepting at most 'd' elements,
// where 'd' is the difference between 'min' and 'max'
let d = max - min;
let common = self.repeat_exact(elt, min);
let extra = self.at_most(elt, d);
Expand Down

0 comments on commit 5114648

Please sign in to comment.