Skip to content

Commit

Permalink
add lark and regex compilers to the python api
Browse files Browse the repository at this point in the history
  • Loading branch information
hudson-ai committed Nov 22, 2024
1 parent 5c0705a commit d3246a6
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 6 deletions.
4 changes: 3 additions & 1 deletion python/llguidance/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from ._lib import LLTokenizer, LLInterpreter, JsonCompiler
from ._lib import LLTokenizer, LLInterpreter, JsonCompiler, LarkCompiler, RegexCompiler
from ._tokenizer import TokenizerWrapper

__all__ = [
"LLTokenizer",
"LLInterpreter",
"JsonCompiler",
"LarkCompiler",
"RegexCompiler",
"TokenizerWrapper",
]
33 changes: 33 additions & 0 deletions python/llguidance/_lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,36 @@ class JsonCompiler:
"""
Compile the JSON representation of the AG2 grammar/constraint.
"""

class LarkCompiler:
def __new__(
cls,
) -> "LarkCompiler":
"""
Create a new Lark compiler.
"""

def compile(
self,
lark: str,
) -> str:
"""
Compile the JSON representation of the AG2 grammar/constraint.
"""

class RegexCompiler:
def __new__(
cls,
) -> "RegexCompiler":
"""
Create a new Regex compiler.
"""

def compile(
self,
regex: str,
stop_regex: Optional[str] = None,
) -> str:
"""
Compile the JSON representation of the AG2 grammar/constraint.
"""
46 changes: 41 additions & 5 deletions rust/src/py.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use std::fmt::Display;
use std::{borrow::Cow, sync::Arc};

use llguidance_parser::api::ParserLimits;
use llguidance_parser::api::{GrammarWithLexer, ParserLimits};
use llguidance_parser::toktrie::{
self, InferenceCapabilities, TokRxInfo, TokTrie, TokenId, TokenizerEnv,
};
use llguidance_parser::{api::TopLevelGrammar, output::ParserOutput, TokenParser};
use llguidance_parser::{Constraint, JsonCompileOptions, Logger};
use llguidance_parser::{lark_to_llguidance, Constraint, GrammarBuilder, JsonCompileOptions, Logger};
use pyo3::{exceptions::PyValueError, prelude::*};
use serde::{Deserialize, Serialize};
use serde_json::Value;
Expand Down Expand Up @@ -272,17 +272,53 @@ impl JsonCompiler {
key_separator: self.key_separator.clone(),
whitespace_flexible: self.whitespace_flexible,
};
let tlg = compile_options.json_to_llg(schema).map_err(val_error)?;
let grammar = &tlg.grammars[0];
Ok(serde_json::to_string(grammar).map_err(val_error)?)
let grammar = compile_options.json_to_llg(schema).map_err(val_error)?;
serde_json::to_string(&grammar).map_err(val_error)
}
}

#[derive(Clone)]
#[pyclass]
struct LarkCompiler {}

#[pymethods]
impl LarkCompiler {
#[new]
fn py_new() -> Self {
LarkCompiler {}
}
fn compile(&self, lark: &str) -> PyResult<String> {
let grammar = lark_to_llguidance(lark).map_err(val_error)?;
serde_json::to_string(&grammar).map_err(val_error)
}
}

#[derive(Clone)]
#[pyclass]
struct RegexCompiler {}

#[pymethods]
impl RegexCompiler {
#[new]
fn py_new() -> Self {
RegexCompiler {}
}
fn compile(&self, regex: &str, stop_regex: Option<&str>) -> PyResult<String> {
let mut builder = GrammarBuilder::new();
builder.add_grammar(GrammarWithLexer::default());
let noderef = builder.gen_rx(regex, stop_regex.unwrap_or(""));
builder.set_start_node(noderef);
let grammar = builder.finalize().map_err(val_error)?;
serde_json::to_string(&grammar).map_err(val_error)
}
}

pub(crate) fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<LLTokenizer>()?;
m.add_class::<LLInterpreter>()?;
m.add_class::<JsonCompiler>()?;
m.add_class::<LarkCompiler>()?;
m.add_class::<RegexCompiler>()?;
Ok(())
}

Expand Down

0 comments on commit d3246a6

Please sign in to comment.