Skip to content

Commit

Permalink
Merge pull request #250 from RubixML/2.2
Browse files Browse the repository at this point in the history
2.2
  • Loading branch information
andrewdalpino authored Oct 1, 2022
2 parents e7b63e1 + 0c967d8 commit b813162
Show file tree
Hide file tree
Showing 97 changed files with 1,580 additions and 188 deletions.
3 changes: 1 addition & 2 deletions .php-cs-fixer.dist.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@
'no_spaces_around_offset' => true,
'no_superfluous_phpdoc_tags' => false,
'no_superfluous_elseif' => true,
'no_trailing_comma_in_list_call' => true,
'no_trailing_comma_in_singleline_array' => true,
'no_trailing_comma_in_singleline' => true,
'no_unneeded_control_parentheses' => true,
'no_unneeded_curly_braces' => true,
'no_unset_cast' => true,
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
- 2.2.0
- Added Image Rotator transformer
- Added One Vs Rest ensemble classifier
- Add variance and range to the Dataset `describe()` report
- Added Gower distance kernel
- Added `types()` method to Dataset
- Concatenator now accepts an iterator of iterators

- 2.1.1
- Do not consider unset properties when determining revision

Expand Down
62 changes: 62 additions & 0 deletions benchmarks/Classifiers/OneVsRestBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?php

namespace Rubix\ML\Benchmarks\Classifiers;

use Rubix\ML\Classifiers\OneVsRest;
use Rubix\ML\Datasets\Generators\Blob;
use Rubix\ML\Classifiers\LogisticRegression;
use Rubix\ML\NeuralNet\Optimizers\Stochastic;
use Rubix\ML\Datasets\Generators\Agglomerate;

/**
* @Groups({"Classifiers"})
* @BeforeMethods({"setUp"})
*/
class OneVsRestBench
{
protected const TRAINING_SIZE = 2500;

protected const TESTING_SIZE = 10000;

/**
* @var \Rubix\ML\Datasets\Labeled;
*/
protected $training;

/**
* @var \Rubix\ML\Datasets\Labeled;
*/
protected $testing;

/**
* @var \Rubix\ML\Classifiers\OneVsRest
*/
protected $estimator;

public function setUp() : void
{
$generator = new Agglomerate([
'Iris-setosa' => new Blob([5.0, 3.42, 1.46, 0.24], [0.35, 0.38, 0.17, 0.1]),
'Iris-versicolor' => new Blob([5.94, 2.77, 4.26, 1.33], [0.51, 0.31, 0.47, 0.2]),
'Iris-virginica' => new Blob([6.59, 2.97, 5.55, 2.03], [0.63, 0.32, 0.55, 0.27]),
]);

$this->training = $generator->generate(self::TRAINING_SIZE);

$this->testing = $generator->generate(self::TESTING_SIZE);

$this->estimator = new OneVsRest(new LogisticRegression(64, new Stochastic(0.001)));
}

/**
* @Subject
* @Iterations(5)
* @OutputTimeUnit("seconds", precision=3)
*/
public function trainPredict() : void
{
$this->estimator->train($this->training);

$this->estimator->predict($this->testing);
}
}
68 changes: 68 additions & 0 deletions benchmarks/Kernels/Distance/GowerBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php

namespace Rubix\ML\Benchmarks\Kernels\Distance;

use Rubix\ML\Datasets\Generators\Blob;
use Rubix\ML\Kernels\Distance\Gower;
use Rubix\ML\Transformers\LambdaFunction;

/**
* @Groups({"DistanceKernels"})
* @BeforeMethods({"setUp"})
*/
class GowerBench
{
protected const NUM_SAMPLES = 10000;

/**
* @var list<list<float>>
*/
protected $aSamples;

/**
* @var list<list<float>>
*/
protected $bSamples;

/**
* @var \Rubix\ML\Kernels\Distance\Gower
*/
protected $kernel;

public function setUp() : void
{
$generator = new Blob([0, 0, 0, 0, 0, 0, 0, 0], 5.0);

$dropValues = new LambdaFunction((function ($sample) {
$sample[4] = rand(0, 5) === 0 ? NAN : $sample[4];
$sample[5] = rand(0, 10) === 0 ? NAN : $sample[5];
}));

$discretize = new LambdaFunction(function ($sample) {
$sample[6] = $sample[6] > 0.0 ? 'over' : 'under';
$sample[7] = abs($sample[7]) > 0.5 ? 'big' : 'small';
});

$this->aSamples = $generator->generate(self::NUM_SAMPLES)
->apply($dropValues)
->apply($discretize)
->samples();

$this->bSamples = $generator->generate(self::NUM_SAMPLES)
->apply($dropValues)
->apply($discretize)
->samples();

$this->kernel = new Gower(5.0);
}

/**
* @Subject
* @Iterations(5)
* @OutputTimeUnit("milliseconds", precision=3)
*/
public function compute() : void
{
array_map([$this->kernel, 'compute'], $this->aSamples, $this->bSamples);
}
}
15 changes: 13 additions & 2 deletions benchmarks/Kernels/Distance/SafeEuclideanBench.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Rubix\ML\Datasets\Generators\Blob;
use Rubix\ML\Kernels\Distance\SafeEuclidean;
use Rubix\ML\Transformers\LambdaFunction;

/**
* @Groups({"DistanceKernels"})
Expand Down Expand Up @@ -32,8 +33,18 @@ public function setUp() : void
{
$generator = new Blob([0, 0, 0, 0, 0, 0, 0, 0], 5.0);

$this->aSamples = $generator->generate(self::NUM_SAMPLES)->samples();
$this->bSamples = $generator->generate(self::NUM_SAMPLES)->samples();
$dropValues = new LambdaFunction((function ($sample) {
$sample[4] = rand(0, 5) === 0 ? NAN : $sample[4];
$sample[5] = rand(0, 10) === 0 ? NAN : $sample[5];
}));

$this->aSamples = $generator->generate(self::NUM_SAMPLES)
->apply($dropValues)
->samples();

$this->bSamples = $generator->generate(self::NUM_SAMPLES)
->apply($dropValues)
->samples();

$this->kernel = new SafeEuclidean();
}
Expand Down
107 changes: 107 additions & 0 deletions benchmarks/Kernels/Distance/SparseCosineBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<?php

namespace Rubix\ML\Benchmarks\Kernels\Distance;

use Tensor\Matrix;
use Rubix\ML\Kernels\Distance\SparseCosine;

/**
* @Groups({"DistanceKernels"})
*/
class SparseCosineBench
{
protected const NUM_SAMPLES = 10000;

/**
* @var list<list<float>>
*/
protected $aSamples;

/**
* @var list<list<float>>
*/
protected $bSamples;

/**
* @var \Rubix\ML\Kernels\Distance\SparseCosine
*/
protected $kernel;

public function setUp() : void
{
$this->kernel = new SparseCosine();
}

public function setUpDense() : void
{
$this->aSamples = Matrix::gaussian(self::NUM_SAMPLES, 8)->asArray();
$this->bSamples = Matrix::gaussian(self::NUM_SAMPLES, 8)->asArray();
}

/**
* @Subject
* @Iterations(5)
* @BeforeMethods({"setUp", "setUpDense"})
* @OutputTimeUnit("milliseconds", precision=3)
*/
public function computeDense() : void
{
array_map([$this->kernel, 'compute'], $this->aSamples, $this->bSamples);
}

public function setUpSparse() : void
{
$mask = Matrix::rand(self::NUM_SAMPLES, 8)
->greater(0.5);

$this->aSamples = Matrix::gaussian(self::NUM_SAMPLES, 8)
->multiply($mask)
->asArray();

$mask = Matrix::rand(self::NUM_SAMPLES, 8)
->greater(0.5);

$this->bSamples = Matrix::gaussian(self::NUM_SAMPLES, 8)
->multiply($mask)
->asArray();
}

/**
* @Subject
* @Iterations(5)
* @BeforeMethods({"setUp", "setUpSparse"})
* @OutputTimeUnit("milliseconds", precision=3)
*/
public function computeSparse() : void
{
array_map([$this->kernel, 'compute'], $this->aSamples, $this->bSamples);
}

public function setUpVerySparse() : void
{
$mask = Matrix::rand(self::NUM_SAMPLES, 8)
->greater(0.9);

$this->aSamples = Matrix::gaussian(self::NUM_SAMPLES, 8)
->multiply($mask)
->asArray();

$mask = Matrix::rand(self::NUM_SAMPLES, 8)
->greater(0.9);

$this->bSamples = Matrix::gaussian(self::NUM_SAMPLES, 8)
->multiply($mask)
->asArray();
}

/**
* @Subject
* @Iterations(5)
* @BeforeMethods({"setUp", "setUpVerySparse"})
* @OutputTimeUnit("milliseconds", precision=3)
*/
public function computeVerySparse() : void
{
array_map([$this->kernel, 'compute'], $this->aSamples, $this->bSamples);
}
}
1 change: 1 addition & 0 deletions docs/choosing-an-estimator.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Classifiers are supervised learners that predict a categorical *class* label. Th
| [Logit Boost](classifiers/logit-boost.md) | High || ||| Categorical, Continuous |
| [Multilayer Perceptron](classifiers/multilayer-perceptron.md) | High ||| || Continuous |
| [Naive Bayes](classifiers/naive-bayes.md) | Medium ||| | | Categorical |
| [One Vs Rest](classifiers/one-vs-rest.md) | Medium || | | | Depends on base learner |
| [Radius Neighbors](classifiers/radius-neighbors.md) | Medium || | | | Depends on distance kernel |
| [Random Forest](classifiers/random-forest.md) | High || || | Categorical, Continuous |
| [Softmax Classifier](classifiers/softmax-classifier.md) | Low ||| || Continuous |
Expand Down
25 changes: 25 additions & 0 deletions docs/classifiers/one-vs-rest.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/Classifiers/OneVsRest.php">[source]</a></span>

# One Vs Rest
One Vs Rest is an ensemble learner that trains a binary classifier to predict a particular class vs every other class for every possible class. The final class prediction is the class whose binary classifier returned the highest probability. One of the features of One Vs Rest is that it allows you to build a multiclass classifier out of an ensemble of otherwise binary classifiers.

**Interfaces:** [Estimator](../estimator.md), [Learner](../learner.md), [Probabilistic](../probabilistic.md), [Parallel](../parallel.md), [Persistable](../persistable.md)

**Data Type Compatibility:** Depends on the base learner

## Parameters
| # | Name | Default | Type | Description |
|---|---|---|---|---|
| 1 | base | | Learner|Probabilistic | The base classifier. |

## Example
```php
use Rubix\ML\Classifiers\OneVsRest;
use Rubix\ML\Classifiers\LogisticRegression;
use Rubix\ML\NeuralNet\Optimizers\Stochastic;

$estimator = new OneVsRest(new LogisticRegression(64, new Stochastic(0.001)));
```

## Additional Methods
This estimator does not have any additional methods.
5 changes: 5 additions & 0 deletions docs/datasets/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ echo "$m x $n";
```

## Data Types
Return the data types for each column in the data table:
```php
public types() : Rubix\ML\DataType[]
```

Return the data types for each feature column:
```php
public featureTypes() : Rubix\ML\DataType[]
Expand Down
1 change: 1 addition & 0 deletions docs/exploring-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ echo $report;
"standard deviation": 3.129252661934191,
"skewness": -0.4481030843690633,
"kurtosis": -1.1330702741786107,
"range": 9.0,
"min": -5,
"25%": -1.375,
"median": 0.8,
Expand Down
24 changes: 24 additions & 0 deletions docs/extractors/column-filter.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/Extractors/ColumnFilter.php">[source]</a></span>

# Column Filter

**Interfaces:** [Extractor](api.md)

## Parameters
| # | Name | Default | Type | Description |
|---|---|---|---|---|
| 1 | iterator | | Traversable | The base iterator. |
| 2 | keys | | array | The string and/or integer keys of the columns to filter from the table |

## Example
```php
use Rubix\ML\Extractors\ColumnFilter;
use Rubix\ML\Extractors\CSV;

$extractor = new ColumnFilter(new CSV('example.csv', true), [
'texture', 'class',
]);
```

## Additional Methods
This extractor does not have any additional methods.
1 change: 1 addition & 0 deletions docs/extractors/column-picker.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
An extractor that wraps another iterator and selects and reorders the columns of the data table according to the keys specified by the user. The key of a column may either be a string or a column number (integer) depending on the way the columns are indexed in the base iterator.

**Interfaces:** [Extractor](api.md)

## Parameters
| # | Name | Default | Type | Description |
|---|---|---|---|---|
Expand Down
4 changes: 2 additions & 2 deletions docs/extractors/concatenator.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/Extractors/Concatenator.php">[source]</a></span>

# Concatenator
Concatenates multiple iterators by joining the tail of one with the head of another.
Combines multiple iterators by concatenating the output of one iterator with the output of the next iterator in the series.

**Interfaces:** [Extractor](api.md)
## Parameters
| # | Name | Default | Type | Description |
|---|---|---|---|---|
| 1 | iterators | | array | The iterators to concatenate together. |
| 1 | iterators | | iterable | The iterators to concatenate together. |

## Example
```php
Expand Down
Loading

0 comments on commit b813162

Please sign in to comment.