memory and iter limit added

perpetual-ml · Oct 28, 2024 · a94ef73 · a94ef73
1 parent e5f933a
commit a94ef73
Show file tree

Hide file tree

Showing 14 changed files with 118 additions and 49 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "perpetual"
-version = "0.5.2"
+version = "0.6.0"
 edition = "2021"
 authors = ["Mutlu Simsek <[email protected]>"]
 homepage = "https://perpetual-ml.com"
@@ -21,9 +21,9 @@ codegen-units = 1
 
 [dependencies]
 rayon = "1.8"
-thiserror = "1.0.64"
-serde_json = { version = "1.0.129", features = ["float_roundtrip"] }
-serde = { version = "1.0.209", features = ["derive"] }
+thiserror = "1.0.65"
+serde_json = { version = "1.0.132", features = ["float_roundtrip"] }
+serde = { version = "1.0.213", features = ["derive"] }
 approx = "0.5"
 log = "0.4"
 rand = "0.8.5"

diff --git a/README.md b/README.md
@@ -61,7 +61,7 @@ pip install perpetual
 To use in a Rust project, add the following to your Cargo.toml file to get the package from [crates.io](https://crates.io/crates/perpetual).
 
 ```toml
-perpetual = "0.5.2"
+perpetual = "0.6.0"
 ```
 
 ## Paper

diff --git a/benches/perpetual_benchmarks.rs b/benches/perpetual_benchmarks.rs
@@ -152,9 +152,11 @@ pub fn tree_benchmarks(c: &mut Criterion) {
                 .fit(
                     black_box(&data),
                     black_box(&y),
+                    black_box(0.3),
+                    black_box(None),
+                    black_box(None),
                     black_box(None),
                     black_box(None),
-                    black_box(0.3),
                     black_box(None),
                     black_box(None),
                     black_box(None),
@@ -169,9 +171,11 @@ pub fn tree_benchmarks(c: &mut Criterion) {
                 .fit(
                     black_box(&data),
                     black_box(&y),
+                    black_box(0.3),
+                    black_box(None),
+                    black_box(None),
                     black_box(None),
                     black_box(None),
-                    black_box(0.3),
                     black_box(None),
                     black_box(None),
                     black_box(None),
@@ -180,7 +184,9 @@ pub fn tree_benchmarks(c: &mut Criterion) {
         })
     });
     let mut booster = PerpetualBooster::default();
-    booster.fit(&data, &y, None, None, 0.1, None, None, None).unwrap();
+    booster
+        .fit(&data, &y, 0.1, None, None, None, None, None, None, None)
+        .unwrap();
     booster_train.bench_function("Predict Booster", |b| {
         b.iter(|| booster.predict(black_box(&data), false))
     });

diff --git a/examples/cal_housing.rs b/examples/cal_housing.rs
@@ -123,7 +123,18 @@ fn main() -> Result<(), Box<dyn Error>> {
         .set_num_threads(Some(*num_threads));
 
     let now = SystemTime::now();
-    model.fit(&matrix_train, &y_train, None, None, *budget, None, None, None)?;
+    model.fit(
+        &matrix_train,
+        &y_train,
+        *budget,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    )?;
     println!("now.elapsed: {:?}", now.elapsed().unwrap().as_secs_f32());
 
     let trees = model.get_prediction_trees();

diff --git a/examples/cover_types.rs b/examples/cover_types.rs
@@ -148,7 +148,7 @@ fn main() -> Result<(), Box<dyn Error>> {
             .map(|y| if (*y as i32) == i { 1.0 } else { 0.0 })
             .collect();
 
-        model.fit(&matrix_train, &y_tr, None, None, *budget, None, None, None)?;
+        model.fit(&matrix_train, &y_tr, *budget, None, None, None, None, None, None, None)?;
         println!("Completed fitting model number: {}", i);
 
         let trees = model.get_prediction_trees();

diff --git a/examples/titanic.rs b/examples/titanic.rs
@@ -51,7 +51,7 @@ fn main() -> Result<(), Box<dyn Error>> {
     // the relevant `set_` methods for any parameters you would like to
     // adjust.
     let mut model = PerpetualBooster::default().set_objective(Objective::LogLoss);
-    model.fit(&matrix, &y, None, None, *budget, None, None, None)?;
+    model.fit(&matrix, &y, *budget, None, None, None, None, None, None, None)?;
 
     println!("Model prediction: {:?} ...", &model.predict(&matrix, true)[0..10]);
 

diff --git a/python-package/Cargo.toml b/python-package/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "py-perpetual"
-version = "0.5.2"
+version = "0.6.0"
 edition = "2021"
 authors = ["Mutlu Simsek <[email protected]>"]
 homepage = "https://perpetual-ml.com"
@@ -19,7 +19,7 @@ crate-type = ["cdylib", "rlib"]
 
 [dependencies]
 pyo3 = { version = "0.22.5", features = ["extension-module"] }
-perpetual_rs = {package="perpetual", version = "0.5.2", path = "../" }
+perpetual_rs = {package="perpetual", version = "0.6.0", path = "../" }
 numpy = "0.22.0"
 ndarray = "0.16.1"
 serde_plain = { version = "1.0" }

diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "perpetual"
-version = "0.5.2"
+version = "0.6.0"
 description = "A self-generalizing gradient boosting machine which doesn't need hyperparameter optimization"
 license = { file = "LICENSE" }
 keywords = [

diff --git a/python-package/python/perpetual/booster.py b/python-package/python/perpetual/booster.py
@@ -157,6 +157,8 @@ def fit(
         reset: Union[bool, None] = None,
         categorical_features: Union[Iterable[int], Iterable[str], str, None] = "auto",
         timeout: Union[float, None] = None,
+        iteration_limit: Union[int, None] = None,
+        memory_limit: Union[float, None] = None,
     ) -> Self:
         """Fit the gradient booster on a provided dataset.
 
@@ -168,12 +170,17 @@ def fit(
                 training the model. If None is passed, a weight of 1 will be used for every record.
                 Defaults to None.
             budget: a positive number for fitting budget. Increasing this number will more
-                likely result in increased accuracy.
+                likely result in more boosting rounds and more increased predictive power.
             alpha: only used in quantile regression.
             reset: whether to reset the model or continue training.
             categorical_features: The names or indices for categorical features.
                 `auto` for Polars or Pandas categorical data type.
             timeout: optional fit timeout in seconds
+            iteration_limit: optional limit for the number of boosting rounds. The default value is 1000 boosting rounds.
+                The algorithm automatically stops for most of the cases before hitting this limit.
+                If you want to experiment with very high budget (>2.0), you can also increase this limit.
+            memory_limit: optional limit for memory allocation in GB. If not set, the memory will be allocated based on
+                available memory and the algorithm requirements.
         """
 
         features_, flat_data, rows, cols, categorical_features_, cat_mapping = (
@@ -247,6 +254,8 @@ def fit(
             reset=reset,
             categorical_features=categorical_features_,  # type: ignore
             timeout=timeout,
+            iteration_limit=iteration_limit,
+            memory_limit=memory_limit,
         )
 
         return self

diff --git a/python-package/src/booster.rs b/python-package/src/booster.rs
@@ -141,6 +141,8 @@ impl PerpetualBooster {
         reset: Option<bool>,
         categorical_features: Option<HashSet<usize>>,
         timeout: Option<f32>,
+        iteration_limit: Option<usize>,
+        memory_limit: Option<f32>,
     ) -> PyResult<()> {
         let flat_data = flat_data.as_slice()?;
         let data = Matrix::new(flat_data, rows, cols);
@@ -156,12 +158,14 @@ impl PerpetualBooster {
         match self.booster.fit(
             &data,
             y,
+            budget,
             sample_weight_,
             alpha,
-            budget,
             reset,
             categorical_features,
             timeout,
+            iteration_limit,
+            memory_limit,
         ) {
             Ok(m) => Ok(m),
             Err(e) => Err(PyValueError::new_err(e.to_string())),

diff --git a/python-package/src/multi_output.rs b/python-package/src/multi_output.rs
@@ -160,6 +160,8 @@ impl MultiOutputBooster {
         reset: Option<bool>,
         categorical_features: Option<HashSet<usize>>,
         timeout: Option<f32>,
+        iteration_limit: Option<usize>,
+        memory_limit: Option<f32>,
     ) -> PyResult<()> {
         let flat_data = flat_data.as_slice()?;
         let data = Matrix::new(flat_data, rows, cols);
@@ -178,12 +180,14 @@ impl MultiOutputBooster {
         match self.booster.fit(
             &data,
             &y_data,
+            budget,
             sample_weight_,
             alpha,
-            budget,
             reset,
             categorical_features,
             timeout,
+            iteration_limit,
+            memory_limit,
         ) {
             Ok(m) => Ok(m),
             Err(e) => Err(PyValueError::new_err(e.to_string())),