From 5114648456ae6ba06ea0d5da7f1c1d22b6c0a06f Mon Sep 17 00:00:00 2001 From: v-jkegler Date: Tue, 12 Nov 2024 12:11:46 -0500 Subject: [PATCH] Comment counted segment logic (#49) * Work on commenting counted sequence logic * Work on commenting counted sequence logic --------- Co-authored-by: Jeffrey Kegler --- parser/src/grammar_builder.rs | 87 ++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/parser/src/grammar_builder.rs b/parser/src/grammar_builder.rs index 1ab2d38..79c2f98 100644 --- a/parser/src/grammar_builder.rs +++ b/parser/src/grammar_builder.rs @@ -267,72 +267,155 @@ impl GrammarBuilder { p } - // this tries to keep grammar size O(log(n)) + // at_most() creates a rule which accepts at most 'n' copies + // of element 'elt'. + + // The first-time reader of at_most() might want to consult + // the comments for repeat_exact(), where similar logic is + // used in a simpler form. + // + // at_most() recursively factors the sequence into K-size pieces, + // in an attempt to keep grammar size O(log(n)). fn at_most(&mut self, elt: NodeRef, n: usize) -> NodeRef { if n == 0 { + // If the max ('n') is 0, an empty rule self.empty() } else if n == 1 { + // If 'n' is 1, an optional rule of length 1 self.optional(elt) } else if n < 3 * K { + // If 'n' is below a fixed number (currently 12), + // the rule is a choice of all the rules of fixed length + // from 0 to 'n'. let options = (0..=n) .map(|k| self.simple_repeat(elt, k)) .collect::>(); self.select(&options) } else { + // Above a fixed number (again, currently 12), + // we "factor" the sequence into K-sized pieces. + // Let 'elt_k' be a k-element --- the repetition + // of 'k' copies of the element ('elt'). let elt_k = self.simple_repeat(elt, K); + // First we deal with the sequences of length less than + // (n/K)*K. + // 'elt_max_nk' is all the sequences of k-elements + // of length less than n/K. let elt_max_nk = self.at_most(elt_k, (n / K) - 1); + // The may be up to K-1 elements not accounted by the sequences + // of k-elements in 'elt_max_k'. The choices in 'elt_max_k' + // account for these "remainders". let elt_max_k = self.at_most(elt, K - 1); let elt_max_nk = self.join(&[elt_max_nk, elt_max_k]); + // Next we deal with the sequences of length between + // (n/K)*K and 'n', inclusive. It is integer arithmetic, so there + // will be n%K of these. + // Here we call n/K the quotient and n%K the remainder. + // 'elt_nk' repeats the k-element exactly the quotient + // number of times, to ensure all our sequences are of + // length at least (n/K)*K. let elt_nk = self.repeat_exact(elt_k, n / K); + // 'left' repeats 'elt' at most the remainder number + // of times. The remainder is always less than K. let left = self.at_most(elt, n % K); + // Join 'elt_nk' and 'left' into 'elt_n'. + // 'elt_nk' is a constant-sized piece, + // which ensures all the sequences of 'elt' in 'elt_n', + // will be of length at least (n/K)*K. + // 'left' will be a choice of rules which + // produce at most K-1 copies of 'elt'. let elt_n = self.join(&[elt_nk, left]); + // We have accounted for all the sequences of less than + // (n/K)*K elements in 'elt_max_nk'. We have accounted + // for all the sequences of length between (n/K)*K elements and n elements + // (inclusive) in 'elt_n'. Clearly, the sequences of length at most 'n' + // are the alternation of 'elt_max_nk' and 'elt_n'. self.select(&[elt_n, elt_max_nk]) } } + // simple_repeat() "simply" repeats the element ('elt') 'n' times. + // Here "simple" means we do not factor into K-size pieces, so that + // time will be O(n). The intent is that simple_repeat() only be + // called for small 'n'. fn simple_repeat(&mut self, elt: NodeRef, n: usize) -> NodeRef { let elt_n = (0..n).map(|_| elt).collect::>(); self.join(&elt_n) } - // this tries to keep grammar size O(log(n)) + // Repeat element 'elt' exactly 'n' times, using factoring + // in an attempt to keep grammar size O(log(n)). fn repeat_exact(&mut self, elt: NodeRef, n: usize) -> NodeRef { if n > 2 * K { + // For large 'n', try to keep the number of rules O(log(n)) + // by "factoring" the sequence into K-sized pieces + + // Create a K-element -- 'elt' repeated 'K' times. let elt_k = self.simple_repeat(elt, K); + + // Repeat the K-element n/K times. The repetition + // is itself factored, so that the process is + // recursive. let inner = self.repeat_exact(elt_k, n / K); + + // 'inner' will contain ((n/K)K) be an 'elt'-sequence + // of length ((n/K)K), which is n-((n/K)K), or n%K, + // short of what we want. We create 'elt_left' to contain + // the n%K additional items we need, and concatenate it + // with 'inner' to form our result. let left = n % K; let mut elt_left = (0..left).map(|_| elt).collect::>(); elt_left.push(inner); self.join(&elt_left) } else { + // For small 'n' (currently, 8 or less), simply + // repeat 'elt' 'n' times. self.simple_repeat(elt, n) } } + // at_least() accepts a sequence of at least 'n' copies of + // element 'elt'. fn at_least(&mut self, elt: NodeRef, n: usize) -> NodeRef { let z = self.zero_or_more(elt); if n == 0 { + // If n==0, atleast() is equivalent to zero_or_more(). z } else { + // If n>0, first sequence is a factored repetition of + // exactly 'n' copies of 'elt', ... let r = self.repeat_exact(elt, n); + // ... followed by zero or more copies of 'elt' self.join(&[r, z]) } } + // Create a rule which accepts from 'min' to 'max' copies of element + // 'elt', inclusive. pub fn repeat(&mut self, elt: NodeRef, min: usize, max: Option) -> NodeRef { if max.is_none() { + // If no 'max', what we want is equivalent to a rule accepting at least + // 'min' elements. return self.at_least(elt, min); } let max = max.unwrap(); assert!(min <= max); if min == max { + // Where 'min' is equal to 'max', what we want is equivalent to a rule + // repeating element 'elt' exactly 'min' times. self.repeat_exact(elt, min) } else if min == 0 { + // If 'min' is zero, what we want is equivalent to a rule accepting at least + // 'min' elements. self.at_most(elt, max) } else { + // In the general case, what we want is equivalent to + // a rule accepting a fixed-size block of length 'min', + // followed by a rule accepting at most 'd' elements, + // where 'd' is the difference between 'min' and 'max' let d = max - min; let common = self.repeat_exact(elt, min); let extra = self.at_most(elt, d);