Skip to content

Commit

Permalink
Merge pull request #22 from perpetual-ml/json_dump
Browse files Browse the repository at this point in the history
json dump before fit
  • Loading branch information
deadsoul44 authored Oct 30, 2024
2 parents ccfdd49 + 5d6b545 commit e4a4ad2
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "perpetual"
version = "0.6.0"
version = "0.6.1"
edition = "2021"
authors = ["Mutlu Simsek <[email protected]>"]
homepage = "https://perpetual-ml.com"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pip install perpetual
To use in a Rust project, add the following to your Cargo.toml file to get the package from [crates.io](https://crates.io/crates/perpetual).

```toml
perpetual = "0.6.0"
perpetual = "0.6.1"
```

## Paper
Expand Down
4 changes: 2 additions & 2 deletions python-package/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "py-perpetual"
version = "0.6.0"
version = "0.6.1"
edition = "2021"
authors = ["Mutlu Simsek <[email protected]>"]
homepage = "https://perpetual-ml.com"
Expand All @@ -19,7 +19,7 @@ crate-type = ["cdylib", "rlib"]

[dependencies]
pyo3 = { version = "0.22.5", features = ["extension-module"] }
perpetual_rs = {package="perpetual", version = "0.6.0", path = "../" }
perpetual_rs = {package="perpetual", version = "0.6.1", path = "../" }
numpy = "0.22.0"
ndarray = "0.16.1"
serde_plain = { version = "1.0" }
Expand Down
4 changes: 2 additions & 2 deletions python-package/examples/categorical_data_titanic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "py310",
"display_name": "py311",
"language": "python",
"name": "python3"
},
Expand All @@ -278,7 +278,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.20"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
6 changes: 3 additions & 3 deletions python-package/examples/openml.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -105,7 +105,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -128,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand Down
197 changes: 197 additions & 0 deletions python-package/examples/santander.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "967af9d9",
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2024-10-21T07:01:07.130508Z",
"iopub.status.busy": "2024-10-21T07:01:07.130061Z",
"iopub.status.idle": "2024-10-21T07:01:08.048111Z",
"shell.execute_reply": "2024-10-21T07:01:08.046970Z"
},
"papermill": {
"duration": 0.926499,
"end_time": "2024-10-21T07:01:08.050965",
"exception": false,
"start_time": "2024-10-21T07:01:07.124466",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from perpetual import PerpetualBooster\n",
"from sklearn.metrics import roc_auc_score\n",
"from sklearn.model_selection import KFold\n",
"from autogluon.tabular import TabularPredictor"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c257f8fc",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-21T07:01:28.223537Z",
"iopub.status.busy": "2024-10-21T07:01:28.222764Z",
"iopub.status.idle": "2024-10-21T07:01:34.667262Z",
"shell.execute_reply": "2024-10-21T07:01:34.666013Z"
},
"papermill": {
"duration": 6.453134,
"end_time": "2024-10-21T07:01:34.670004",
"exception": false,
"start_time": "2024-10-21T07:01:28.216870",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"X_train = pd.read_csv('../../resources/santander-train.csv', index_col=0)\n",
"y_train = X_train.pop('TARGET')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e0590d0a",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-21T07:01:34.680697Z",
"iopub.status.busy": "2024-10-21T07:01:34.680290Z",
"iopub.status.idle": "2024-10-21T07:01:34.689412Z",
"shell.execute_reply": "2024-10-21T07:01:34.688210Z"
},
"papermill": {
"duration": 0.017414,
"end_time": "2024-10-21T07:01:34.691792",
"exception": false,
"start_time": "2024-10-21T07:01:34.674378",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"X_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "22eba1d7",
"metadata": {},
"outputs": [],
"source": [
"cv = KFold(shuffle=True, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "921f491f",
"metadata": {},
"outputs": [],
"source": [
"scores = []\n",
"for train, test in cv.split(X_train, y_train):\n",
" model = PerpetualBooster(objective=\"LogLoss\")\n",
" model.fit(X_train.iloc[train], y_train.iloc[train], budget=1.0)\n",
" probabilities = model.predict_proba(X_train.iloc[test])\n",
" score = roc_auc_score(y_train.iloc[test], probabilities[:, 1])\n",
" scores.append(score)\n",
" print(model.number_of_trees)\n",
"print(np.mean(scores))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a04e569",
"metadata": {
"execution": {
"iopub.execute_input": "2024-10-21T07:01:34.702317Z",
"iopub.status.busy": "2024-10-21T07:01:34.701880Z",
"iopub.status.idle": "2024-10-21T07:02:04.983918Z",
"shell.execute_reply": "2024-10-21T07:02:04.982720Z"
},
"papermill": {
"duration": 30.294535,
"end_time": "2024-10-21T07:02:04.990727",
"exception": false,
"start_time": "2024-10-21T07:01:34.696192",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"scores = []\n",
"X_train['TARGET'] = y_train\n",
"for train, test in cv.split(X_train, y_train):\n",
" model = TabularPredictor(label=\"TARGET\", verbosity=0)\n",
" model.fit(X_train.iloc[train])\n",
" probabilities = model.predict_proba(X_train.iloc[test])\n",
" score = roc_auc_score(y_train.iloc[test], probabilities.to_numpy()[:, 1])\n",
" print(score)\n",
" scores.append(score)\n",
"print(np.mean(scores))"
]
}
],
"metadata": {
"kaggle": {
"accelerator": "none",
"dataSources": [
{
"databundleVersionId": 860641,
"sourceId": 4986,
"sourceType": "competition"
}
],
"dockerImageVersionId": 30786,
"isGpuEnabled": false,
"isInternetEnabled": true,
"language": "python",
"sourceType": "notebook"
},
"kernelspec": {
"display_name": "py311",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
},
"papermill": {
"default_parameters": {},
"duration": 62.454609,
"end_time": "2024-10-21T07:02:06.520206",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2024-10-21T07:01:04.065597",
"version": "2.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion python-package/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "perpetual"
version = "0.6.0"
version = "0.6.1"
description = "A self-generalizing gradient boosting machine which doesn't need hyperparameter optimization"
license = { file = "LICENSE" }
keywords = [
Expand Down
14 changes: 14 additions & 0 deletions python-package/python/perpetual/booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,20 @@ def __init__(
self.log_iterations = log_iterations
self.feature_importance_method = feature_importance_method

booster = CratePerpetualBooster(
objective=self.objective,
num_threads=self.num_threads,
monotone_constraints=dict(),
force_children_to_bound_parent=self.force_children_to_bound_parent,
missing=self.missing,
allow_missing_splits=self.allow_missing_splits,
create_missing_branch=self.create_missing_branch,
terminate_missing_features=set(),
missing_node_treatment=self.missing_node_treatment,
log_iterations=self.log_iterations,
)
self.booster = cast(BoosterType, booster)

def fit(
self,
X,
Expand Down
2 changes: 2 additions & 0 deletions python-package/tests/test_booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,8 @@ def test_booster_metadata(
f64_model_path = tmp_path / "modelf64_sl.json"
X, y = X_y
model = PerpetualBooster(objective="SquaredLoss")
save_func(model, f64_model_path)
model.json_dump()
model.fit(X, y)
preds = model.predict(X)
save_func(model, f64_model_path)
Expand Down
4 changes: 2 additions & 2 deletions python-package/tests/test_multi_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ def test_multi_output():
y = np.array(X.pop("Cover_Type"))
X_test = pd.read_csv("../resources/cover_types_test.csv", index_col=False)
y_test = np.array(X_test.pop("Cover_Type"))
model = PerpetualBooster(num_threads=1)
model.fit(X, y)
model = PerpetualBooster()
model.fit(X, y, iteration_limit=40)
pred_test = model.predict(X_test)
proba_test = model.predict_proba(X_test)
log_odds_test = model.predict_log_proba(X_test)
Expand Down

0 comments on commit e4a4ad2

Please sign in to comment.