From 405b643f2dda1dbb7287800d6c59723b1f692452 Mon Sep 17 00:00:00 2001 From: Chris Fusting Date: Wed, 11 Oct 2017 15:00:57 -0400 Subject: [PATCH 1/2] Added hall_of_fame attribute such that users can retrieve the k best scores and parameters from a run. --- evolutionary_search/cv.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/evolutionary_search/cv.py b/evolutionary_search/cv.py index d02e258..c736879 100644 --- a/evolutionary_search/cv.py +++ b/evolutionary_search/cv.py @@ -173,6 +173,9 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV): parameter in range [ind1_parameter, ind2_parameter]. Of course it is correct only when parameters of some value is sorted. + n_hall_of_fame : int, default=1 + Number of individuals to retain in the hall of fame. + n_jobs : int, default=1 Number of jobs to run in parallel. @@ -246,7 +249,8 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV): gene_mutation_prob=0.10, gene_crossover_prob=0.5, tournament_size=3, - generations_number=10) + generations_number=10, + n_hall_of_fame=5) cv.fit(X, y) @@ -277,13 +281,16 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV): all_logbooks_: list of the deap.tools.Logbook objects, indexed by params (len 1 if params is not a list). With the statistics of the evolution. + hall_of_fame: list of tuples + (score, parameters) for the best individuals. + """ def __init__(self, estimator, params, scoring=None, cv=4, refit=True, verbose=False, population_size=50, gene_mutation_prob=0.1, gene_crossover_prob=0.5, tournament_size=3, generations_number=10, gene_type=None, - n_jobs=1, iid=True, pre_dispatch='2*n_jobs', error_score='raise', - fit_params={}): + n_hall_of_fame=1, n_jobs=1, iid=True, pre_dispatch='2*n_jobs', + error_score='raise', fit_params={}): super(EvolutionaryAlgorithmSearchCV, self).__init__( estimator=estimator, scoring=scoring, fit_params=fit_params, n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, @@ -296,6 +303,8 @@ def __init__(self, estimator, params, scoring=None, cv=4, self.gene_crossover_prob = gene_crossover_prob self.tournament_size = tournament_size self.gene_type = gene_type + self.n_hall_of_fame = n_hall_of_fame + self.hall_of_fame = None self.all_history_, self.all_logbooks_ = [], [] self._cv_results = None self.best_score_ = None @@ -404,7 +413,7 @@ def _fit(self, X, y, parameter_dict): toolbox.register("select", tools.selTournament, tournsize=self.tournament_size) pop = toolbox.population(n=self.population_size) - hof = tools.HallOfFame(1) + hof = tools.HallOfFame(self.n_hall_of_fame) # Stats stats = tools.Statistics(lambda ind: ind.fitness.values) @@ -429,8 +438,10 @@ def _fit(self, X, y, parameter_dict): # Save History self.all_history_.append(hist) self.all_logbooks_.append(logbook) - current_best_score_ = hof[0].fitness.values[0] - current_best_params_ = _individual_to_params(hof[0], name_values) + + def get_best_score_and_params(ind): + return ind.fitness.values[0], _individual_to_params(ind, name_values) + current_best_score_, current_best_params_ = get_best_score_and_params(hof[0]) if self.verbose: print("Best individual is: %s\nwith fitness: %s" % ( current_best_params_, current_best_score_)) @@ -448,3 +459,5 @@ def _fit(self, X, y, parameter_dict): self.best_score_ = current_best_score_ self.best_params_ = current_best_params_ + + self.hall_of_fame = list(map(get_best_score_and_params, hof)) From 23402766d3b8c3f1e292f49d23e91dc7cd50acc1 Mon Sep 17 00:00:00 2001 From: Chris Fusting Date: Thu, 12 Oct 2017 15:57:55 -0400 Subject: [PATCH 2/2] Added hall_of_fame to maximize and updated tests. --- evolutionary_search/optimize.py | 18 +++++++++++++----- test.py | 9 ++++++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/evolutionary_search/optimize.py b/evolutionary_search/optimize.py index 4b80f96..ced4384 100644 --- a/evolutionary_search/optimize.py +++ b/evolutionary_search/optimize.py @@ -25,7 +25,7 @@ def _evalFunction(func, individual, name_values, verbose=0, error_score='raise', def maximize(func, parameter_dict, args={}, verbose=False, population_size=50, gene_mutation_prob=0.1, gene_crossover_prob=0.5, - tournament_size=3, generations_number=10, gene_type=None, + tournament_size=3, generations_number=10, gene_type=None, n_hall_of_fame=1, n_jobs=1, pre_dispatch='2*n_jobs', error_score='raise'): """ Same as _fit in EvolutionarySearchCV but without fitting data. More similar to scipy.optimize. @@ -45,6 +45,11 @@ def maximize(func, parameter_dict, args={}, logbook: deap.tools.Logbook object. Includes the statistics of the evolution. + + archive: dict + n_hall_of_fame: list of tuples + (score, parameters) for the best individuals. + """ _check_param_grid(parameter_dict) @@ -75,7 +80,7 @@ def maximize(func, parameter_dict, args={}, # Tools pop = toolbox.population(n=population_size) - hof = tools.HallOfFame(1) + hof = tools.HallOfFame(n_hall_of_fame) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", np.nanmean) stats.register("min", np.nanmin) @@ -95,8 +100,9 @@ def maximize(func, parameter_dict, args={}, ngen=generations_number, stats=stats, halloffame=hof, verbose=verbose) - current_best_score_ = hof[0].fitness.values[0] - current_best_params_ = _individual_to_params(hof[0], name_values) + def get_best_score_and_params(ind): + return ind.fitness.values[0], _individual_to_params(ind, name_values) + current_best_score_, current_best_params_ = get_best_score_and_params(hof[0]) # Generate score_cache with real parameters _, individuals, each_scores = zip(*[(idx, indiv, np.mean(indiv.fitness.values)) @@ -114,4 +120,6 @@ def maximize(func, parameter_dict, args={}, pool.close() pool.join() - return current_best_params_, current_best_score_, score_results, hist, logbook + hall_of_fame = list(map(get_best_score_and_params, hof)) + + return current_best_params_, current_best_score_, score_results, hist, logbook, {'hall_of_fame': hall_of_fame} diff --git a/test.py b/test.py index 6dc5664..d5596ca 100644 --- a/test.py +++ b/test.py @@ -6,9 +6,11 @@ import unittest import random + def func(x, y, m=1., z=False): return m * (np.exp(-(x**2 + y**2)) + float(z)) + def readme(n_jobs=1): data = sklearn.datasets.load_digits() X = data["data"] @@ -34,6 +36,7 @@ def readme(n_jobs=1): cv.fit(X, y) return cv + class TestEvolutionarySearch(unittest.TestCase): def test_cv(self): @@ -44,7 +47,7 @@ def try_with_params(**kwargs): self.assertIsNotNone(cv_results_, msg="cv_results is None.") self.assertNotEqual(cv_results_, {}, msg="cv_results is empty.") self.assertAlmostEqual(cv.best_score_, 1., delta=.05, - msg="Did not find the best score. Returned: {}".format(cv.best_score_)) + msg="Did not find the best score. Returned: {}".format(cv.best_score_)) try_with_params(n_jobs=1) try_with_params(n_jobs=4) @@ -56,8 +59,8 @@ def test_optimize(self): args = {'m': 1.} def try_with_params(**max_args): - best_params, best_score, score_results = maximize(func, param_grid, - args, verbose=True, **max_args) + best_params, best_score, score_results, history, logbook, archive = maximize(func, param_grid, + args, verbose=True, **max_args) print("Score Results:\n{}".format(score_results)) self.assertEqual(best_params, {'x': 0., 'y': 0., 'z': True}) self.assertEqual(best_score, 2.)