diff --git a/evolutionary_search/cv.py b/evolutionary_search/cv.py index 74d6a9f..eda081d 100644 --- a/evolutionary_search/cv.py +++ b/evolutionary_search/cv.py @@ -174,6 +174,9 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV): parameter in range [ind1_parameter, ind2_parameter]. Of course it is correct only when parameters of some value is sorted. + n_hall_of_fame : int, default=1 + Number of individuals to retain in the hall of fame. + n_jobs : int or map function, default=1 Number of jobs to run in parallel. Also accepts custom parallel map functions from Pool or SCOOP. @@ -246,7 +249,8 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV): gene_mutation_prob=0.10, gene_crossover_prob=0.5, tournament_size=3, - generations_number=10) + generations_number=10, + n_hall_of_fame=5) cv.fit(X, y) @@ -277,13 +281,16 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV): all_logbooks_: list of the deap.tools.Logbook objects, indexed by params (len 1 if params is not a list). With the statistics of the evolution. + hall_of_fame: list of tuples + (score, parameters) for the best individuals. + """ def __init__(self, estimator, params, scoring=None, cv=4, refit=True, verbose=False, population_size=50, gene_mutation_prob=0.1, gene_crossover_prob=0.5, tournament_size=3, generations_number=10, gene_type=None, - n_jobs=1, iid=True, error_score='raise', - fit_params={}): + n_hall_of_fame=1, n_jobs=1, iid=True, pre_dispatch='2*n_jobs', + error_score='raise', fit_params={}): super(EvolutionaryAlgorithmSearchCV, self).__init__( estimator=estimator, scoring=scoring, fit_params=fit_params, iid=iid, refit=refit, cv=cv, verbose=verbose, @@ -296,6 +303,8 @@ def __init__(self, estimator, params, scoring=None, cv=4, self.gene_crossover_prob = gene_crossover_prob self.tournament_size = tournament_size self.gene_type = gene_type + self.n_hall_of_fame = n_hall_of_fame + self.hall_of_fame = None self.all_history_, self.all_logbooks_ = [], [] self._cv_results = None self.best_score_ = None @@ -423,7 +432,7 @@ def _fit(self, X, y, parameter_dict): toolbox.register("select", tools.selTournament, tournsize=self.tournament_size) pop = toolbox.population(n=self.population_size) - hof = tools.HallOfFame(1) + hof = tools.HallOfFame(self.n_hall_of_fame) # Stats stats = tools.Statistics(lambda ind: ind.fitness.values) @@ -448,8 +457,10 @@ def _fit(self, X, y, parameter_dict): # Save History self.all_history_.append(hist) self.all_logbooks_.append(logbook) - current_best_score_ = hof[0].fitness.values[0] - current_best_params_ = _individual_to_params(hof[0], name_values) + + def get_best_score_and_params(ind): + return ind.fitness.values[0], _individual_to_params(ind, name_values) + current_best_score_, current_best_params_ = get_best_score_and_params(hof[0]) if self.verbose: print("Best individual is: %s\nwith fitness: %s" % ( current_best_params_, current_best_score_)) @@ -468,3 +479,5 @@ def _fit(self, X, y, parameter_dict): self.best_score_ = current_best_score_ self.best_params_ = current_best_params_ + + self.hall_of_fame = list(map(get_best_score_and_params, hof)) diff --git a/evolutionary_search/optimize.py b/evolutionary_search/optimize.py index caa02c3..519a6b3 100644 --- a/evolutionary_search/optimize.py +++ b/evolutionary_search/optimize.py @@ -28,10 +28,10 @@ def compile(): creator.create("Individual", list, fitness=creator.FitnessMax) def maximize(func, parameter_dict, args={}, - verbose=False, population_size=50, - gene_mutation_prob=0.1, gene_crossover_prob=0.5, - tournament_size=3, generations_number=10, gene_type=None, - n_jobs=1, error_score='raise'): + verbose=False, population_size=50, + gene_mutation_prob=0.1, gene_crossover_prob=0.5, + tournament_size=3, generations_number=10, gene_type=None, n_hall_of_fame=1, + n_jobs=1, pre_dispatch='2*n_jobs', error_score='raise'): """ Same as _fit in EvolutionarySearchCV but without fitting data. More similar to scipy.optimize. Parameters @@ -56,6 +56,11 @@ def maximize(func, parameter_dict, args={}, logbook: deap.tools.Logbook object. Includes the statistics of the evolution. + + archive: dict + n_hall_of_fame: list of tuples + (score, parameters) for the best individuals. + """ toolbox = base.Toolbox() @@ -108,7 +113,7 @@ def maximize(func, parameter_dict, args={}, # Tools pop = toolbox.population(n=population_size) - hof = tools.HallOfFame(1) + hof = tools.HallOfFame(n_hall_of_fame) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", np.nanmean) stats.register("min", np.nanmin) @@ -128,8 +133,9 @@ def maximize(func, parameter_dict, args={}, ngen=generations_number, stats=stats, halloffame=hof, verbose=verbose) - current_best_score_ = hof[0].fitness.values[0] - current_best_params_ = _individual_to_params(hof[0], name_values) + def get_best_score_and_params(ind): + return ind.fitness.values[0], _individual_to_params(ind, name_values) + current_best_score_, current_best_params_ = get_best_score_and_params(hof[0]) # Generate score_cache with real parameters _, individuals, each_scores = zip(*[(idx, indiv, np.mean(indiv.fitness.values)) @@ -148,4 +154,6 @@ def maximize(func, parameter_dict, args={}, pool.close() pool.join() - return current_best_params_, current_best_score_, score_results, hist, logbook + hall_of_fame = list(map(get_best_score_and_params, hof)) + + return current_best_params_, current_best_score_, score_results, hist, logbook, {'hall_of_fame': hall_of_fame} diff --git a/test.py b/test.py index 47e3264..b494dd1 100644 --- a/test.py +++ b/test.py @@ -6,6 +6,7 @@ import unittest import random + def func(x, y, m=1., z=False): return m * (np.exp(-(x**2 + y**2)) + float(z)) @@ -33,6 +34,7 @@ def readme(): cv.fit(X, y) return cv + class TestEvolutionarySearch(unittest.TestCase): def test_cv(self): @@ -43,7 +45,7 @@ def try_with_params(**kwargs): self.assertIsNotNone(cv_results_, msg="cv_results is None.") self.assertNotEqual(cv_results_, {}, msg="cv_results is empty.") self.assertAlmostEqual(cv.best_score_, 1., delta=.05, - msg="Did not find the best score. Returned: {}".format(cv.best_score_)) + msg="Did not find the best score. Returned: {}".format(cv.best_score_)) try_with_params() @@ -53,10 +55,15 @@ def test_optimize(self): param_grid = {'x': [-1., 0., 1.], 'y': [-1., 0., 1.], 'z': [True, False]} args = {'m': 1.} - best_params, best_score, score_results, _, _ = maximize(func, param_grid, args, verbose=True) - print("Score Results:\n{}".format(score_results)) - self.assertEqual(best_params, {'x': 0., 'y': 0., 'z': True}) - self.assertEqual(best_score, 2.) + def try_with_params(**max_args): + best_params, best_score, score_results, history, logbook, archive = maximize(func, param_grid, + args, verbose=True, **max_args) + print("Score Results:\n{}".format(score_results)) + self.assertEqual(best_params, {'x': 0., 'y': 0., 'z': True}) + self.assertEqual(best_score, 2.) + + try_with_params(n_jobs=1) + try_with_params(n_jobs=4) if __name__ == "__main__": unittest.main()