GitHub - rsharp-lang/enigma: R-sharp machine learning toolkit

Use this Machine Learning Package

require(enigma);

imports ["learning", "model", "activateFunction"] from "enigma";

Classification Example

Some examples of solve a classification problem, and inspect the dataset at first for the one class classification problem:

data("bezdekIris");

print(dataset::description(bezdekIris));
#                D1         D2         D3         D4            class
# --------------------------------------------------------------------
# <mode>   <double>   <double>   <double>   <double>         <string>
# min           4.3          2          1        0.1    "Iris-setosa"
# max           7.9        4.4        6.9        2.5 "Iris-virginica"
# width         3.6        2.4        5.9        2.4              "3"
# mean      5.84333    3.05733      3.758    1.19933    "Iris-setosa"
# sd      0.0676113  0.0355883   0.144136  0.0622364             "NA"
# rsd     0.0115707  0.0116403  0.0383544  0.0518925             "NA"

rownames(bezdekIris) = unique.names(bezdekIris$class);

data = bezdekIris 
|> toFeatureSet() 
|> dataset::encoding(class = to_factors) 
|> as.data.frame()
;
#                     D1       D2       D3       D4 class.Iris-setosa class.Iris-versicolor class.Iris-virginica
# ---------------------------------------------------------------------------------------------------------------
# <mode>        <double> <double> <double> <double>         <integer>             <integer>            <integer>
# Iris-setosa        5.1      3.5      1.4      0.2                 1                     0                    0
# Iris-setosa_1      4.9        3      1.4      0.2                 1                     0                    0
# Iris-setosa_2      4.7      3.2      1.3      0.2                 1                     0                    0
# Iris-setosa_3      4.6      3.1      1.5      0.2                 1                     0                    0
# Iris-setosa_4        5      3.6      1.4      0.2                 1                     0                    0
# Iris-setosa_5      5.4      3.9      1.7      0.4                 1                     0                    0

#  [ reached 'max' / getOption("max.print") -- omitted 144 rows ]

i = data[, "class.Iris-setosa"] == 1;
v = rep("", length(i));
v[i] = "Iris-setosa";
v[!i] = "Other";

data[, "class.Iris-setosa"] = v;

For use the svm classifier:

tensor(model = model::svm)
|> feed(data, features = ["D1","D2","D3","D4"])
|> output(labels = ["class.Iris-setosa"])
|> learn(
    degree = 3,
    gamma = 0.5,
    coefficient0 = 0,
    nu = 0.5,
    cacheSize = 40,
    c = 1,
    EPS = 0.001,
    P = 0.1,
    shrinking = TRUE,
    probability = FALSE
)
|> solve(data)
;

For use the xgboost classifier:

tensor(model = model::xgboost)
|> feed(data, features = ["D1","D2","D3","D4"])
|> output(labels = ["class.Iris-setosa"])
|> learn(
    loss = "logloss",
    cost = "auc",
    num_boost_round = 10,
    early_stopping_rounds = 10,
    maximize = TRUE,
    eta = 0.3,
    max_depth = 7,
    scale_pos_weight = 1,
    rowsample = 0.8,
    colsample = 0.8,
    min_child_weight = 1,
    min_sample_split = 5,
    lambda = 1,
    gamma = 0,
    num_thread = 8
)
|> solve(data)
;

For use the ANN(Back Propagation Neural Network) classifier, then you could:

tensor(model = model::ANN)
|> feed(data, features = ["D1","D2","D3","D4"])
|> hidden_layer([50, 500, 5], activate = activateFunction::sigmoid(alpha = 2.0))
|> output(
    labels = ["class.Iris-setosa","class.Iris-versicolor","class.Iris-virginica"], 
    activate = activateFunction::sigmoid(alpha = 2.0)
)
|> learn(
    parallel = TRUE,
    learn.rate = 0.01,
    truncate = -1,
    threshold = 0.01,
    softmax = FALSE,
    max.epochs = 10000
)
|> solve(data)

Regression Example

By first of all, we prepares an example demo dataset for such regression modelling problem like:

# generate the demo test dataset
let x = 1:1000;
let y = x ^ 1.25 + runif(n = length(x));
let z = y / x;

data = data.frame(x, y, z, row.names = as.character(x));
#                x        y        z
# -----------------------------------
# <mode> <integer> <double> <double>
# 1              1  1.99925  1.99925
# 2              2  3.34409  1.67205
# 3              3  4.40403  1.46801
# 4              4   5.8122  1.45305
# 5              5   7.9491  1.58982
# 6              6  10.1006  1.68343

#  [ reached 'max' / getOption("max.print") -- omitted 994 rows ]

The some of the example for solve the regression problem by use the enigma package:

1. xgboost regession

test = tensor(model = model::xgboost)
|> feed(data, features = ["x", "y"])
|> output(labels = "z")
|> learn(
    loss = "squareloss", 
    cost = "mse",
    num_boost_round = 10,
    early_stopping_rounds = 10,
    maximize = TRUE,
    eta = 0.3,
    max_depth = 7,
    scale_pos_weight = 1,
    rowsample = 0.8,
    colsample = 0.8,
    min_child_weight = 1,
    min_sample_split = 5,
    lambda = 1,
    gamma = 0,
    num_thread = 8
)
|> solve(data)
;

test[, "errors"] = abs(test$z - test[, "z(predicts)"]);
i = order(test$errors);
test = test[i, ];
#                x        y        z z(predicts)       errors
# ------------------------------------------------------------
# <mode> <integer> <double> <double>    <double>     <double>
# 918          918  5053.48  5.50488     5.50487  3.06194E-06
# 696          696  3574.91  5.13637     5.13636  5.30553E-06
# 607          607  3012.96   4.9637     4.96369  7.10683E-06
# 826          826  4428.27  5.36111     5.36112  1.16761E-05
# 810          810  4322.19  5.33603     5.33604  1.23223E-05
# 879          879  4786.52  5.44542     5.44543  1.36714E-05

#  [ reached 'max' / getOption("max.print") -- omitted 994 rows ]

2. SVR regression

For solve a SVR regression problem, then we could just replace the tensor model to svr function:

test = tensor(model = model::svr)
|> feed(data, features = ["x", "y"])
|> output(labels = "z")
|> learn(
    degree = 3,
    gamma = 0.5,
    coefficient0 = 0,
    nu = 0.5,
    cacheSize = 40,
    c = 1,
    EPS = 0.001,
    P = 0.1,
    shrinking = TRUE,
    probability = FALSE
)
|> solve(data)
;

Visual Regression Test Error

data[, "errors"] = abs(data$z.predicts. - data$z);

print(data, max.print =6);

bitmap(file = file, size = [2700, 1800]) {
    ggplot(data, aes(x = "errors"), padding = "padding:250px 400px 200px 250px;")
    + geom_histogram(bins = 250, color = "steelblue", range = [0, 1])
    + ggtitle(title)
    + scale_x_continuous(labels = "F2")
    + scale_y_continuous(labels = "F0")
    + theme_default()
    ;
}

Name		Name	Last commit message	Last commit date
Latest commit History 246 Commits
.github		.github
.pkg		.pkg
.vscode		.vscode
@export		@export
R		R
data		data
llvm		llvm
man		man
src		src
test		test
tools		tools
vignettes		vignettes
.Rbuildignore		.Rbuildignore
.gitignore		.gitignore
DESCRIPTION		DESCRIPTION
LICENSE		LICENSE
NAMESPACE		NAMESPACE
README.md		README.md
enigma.Rproj		enigma.Rproj

License

rsharp-lang/enigma

Folders and files

Latest commit

History

Repository files navigation

Use this Machine Learning Package

Classification Example

Regression Example

1. xgboost regession

2. SVR regression

Visual Regression Test Error

About

Topics

Resources

License

Stars

Watchers

Forks

Languages