From d3246a6e84a2b5e40c036db2aea469f92694bf13 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 21 Nov 2024 16:45:06 -0800 Subject: [PATCH] add lark and regex compilers to the python api --- python/llguidance/__init__.py | 4 ++- python/llguidance/_lib.pyi | 33 +++++++++++++++++++++++++ rust/src/py.rs | 46 +++++++++++++++++++++++++++++++---- 3 files changed, 77 insertions(+), 6 deletions(-) diff --git a/python/llguidance/__init__.py b/python/llguidance/__init__.py index 0726170..4c33248 100644 --- a/python/llguidance/__init__.py +++ b/python/llguidance/__init__.py @@ -1,9 +1,11 @@ -from ._lib import LLTokenizer, LLInterpreter, JsonCompiler +from ._lib import LLTokenizer, LLInterpreter, JsonCompiler, LarkCompiler, RegexCompiler from ._tokenizer import TokenizerWrapper __all__ = [ "LLTokenizer", "LLInterpreter", "JsonCompiler", + "LarkCompiler", + "RegexCompiler", "TokenizerWrapper", ] diff --git a/python/llguidance/_lib.pyi b/python/llguidance/_lib.pyi index 7a290bb..d37fcda 100644 --- a/python/llguidance/_lib.pyi +++ b/python/llguidance/_lib.pyi @@ -159,3 +159,36 @@ class JsonCompiler: """ Compile the JSON representation of the AG2 grammar/constraint. """ + +class LarkCompiler: + def __new__( + cls, + ) -> "LarkCompiler": + """ + Create a new Lark compiler. + """ + + def compile( + self, + lark: str, + ) -> str: + """ + Compile the JSON representation of the AG2 grammar/constraint. + """ + +class RegexCompiler: + def __new__( + cls, + ) -> "RegexCompiler": + """ + Create a new Regex compiler. + """ + + def compile( + self, + regex: str, + stop_regex: Optional[str] = None, + ) -> str: + """ + Compile the JSON representation of the AG2 grammar/constraint. + """ diff --git a/rust/src/py.rs b/rust/src/py.rs index 11fd191..5c67730 100644 --- a/rust/src/py.rs +++ b/rust/src/py.rs @@ -1,12 +1,12 @@ use std::fmt::Display; use std::{borrow::Cow, sync::Arc}; -use llguidance_parser::api::ParserLimits; +use llguidance_parser::api::{GrammarWithLexer, ParserLimits}; use llguidance_parser::toktrie::{ self, InferenceCapabilities, TokRxInfo, TokTrie, TokenId, TokenizerEnv, }; use llguidance_parser::{api::TopLevelGrammar, output::ParserOutput, TokenParser}; -use llguidance_parser::{Constraint, JsonCompileOptions, Logger}; +use llguidance_parser::{lark_to_llguidance, Constraint, GrammarBuilder, JsonCompileOptions, Logger}; use pyo3::{exceptions::PyValueError, prelude::*}; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -272,17 +272,53 @@ impl JsonCompiler { key_separator: self.key_separator.clone(), whitespace_flexible: self.whitespace_flexible, }; - let tlg = compile_options.json_to_llg(schema).map_err(val_error)?; - let grammar = &tlg.grammars[0]; - Ok(serde_json::to_string(grammar).map_err(val_error)?) + let grammar = compile_options.json_to_llg(schema).map_err(val_error)?; + serde_json::to_string(&grammar).map_err(val_error) } +} +#[derive(Clone)] +#[pyclass] +struct LarkCompiler {} + +#[pymethods] +impl LarkCompiler { + #[new] + fn py_new() -> Self { + LarkCompiler {} + } + fn compile(&self, lark: &str) -> PyResult { + let grammar = lark_to_llguidance(lark).map_err(val_error)?; + serde_json::to_string(&grammar).map_err(val_error) + } +} + +#[derive(Clone)] +#[pyclass] +struct RegexCompiler {} + +#[pymethods] +impl RegexCompiler { + #[new] + fn py_new() -> Self { + RegexCompiler {} + } + fn compile(&self, regex: &str, stop_regex: Option<&str>) -> PyResult { + let mut builder = GrammarBuilder::new(); + builder.add_grammar(GrammarWithLexer::default()); + let noderef = builder.gen_rx(regex, stop_regex.unwrap_or("")); + builder.set_start_node(noderef); + let grammar = builder.finalize().map_err(val_error)?; + serde_json::to_string(&grammar).map_err(val_error) + } } pub(crate) fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) }