From e2b819f73563b419cd10aab9ce663be1d001b9ae Mon Sep 17 00:00:00 2001 From: Sergiy Matusevych Date: Mon, 20 May 2024 14:33:34 -0700 Subject: [PATCH] Update mlos_core API to support with multi-factor optimization (#730) * [x] Pass multi-column DataFrame instead of Sequence to `BaseOptimizer.register()` and other methods that deal with scores * [x] Update mlos_bench `MlosCoreOptimizer` to support the new mlos_core API * [x] Update unit tests to work with the new API * [x] Add unit tests for end-to-end multi-target optimization Merge after ~#726~ --------- Co-authored-by: Brian Kroth --- .../mlos_bench/optimizers/base_optimizer.py | 2 + .../optimizers/mlos_core_optimizer.py | 53 ++++--- mlos_core/mlos_core/optimizers/__init__.py | 6 +- .../bayesian_optimizers/smac_optimizer.py | 24 +++- .../mlos_core/optimizers/flaml_optimizer.py | 28 ++-- mlos_core/mlos_core/optimizers/optimizer.py | 66 +++++---- .../mlos_core/optimizers/random_optimizer.py | 4 +- .../optimizers/bayesian_optimizers_test.py | 10 +- .../tests/optimizers/one_hot_test.py | 1 + .../optimizers/optimizer_multiobj_test.py | 97 +++++++++++++ .../tests/optimizers/optimizer_test.py | 129 +++++++++++------- 11 files changed, 305 insertions(+), 115 deletions(-) create mode 100644 mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index 51cbf9694f..911c624315 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -325,6 +325,8 @@ def _get_scores(self, status: Status, if not status.is_succeeded(): assert scores is None + # TODO: Be more flexible with values used for failed trials (not just +inf). + # Issue: https://github.com/microsoft/MLOS/issues/523 return {opt_target: float("inf") for opt_target in self._opt_targets} assert scores is not None diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 6b3221a38f..7747035c13 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -46,11 +46,6 @@ def __init__(self, service: Optional[Service] = None): super().__init__(tunables, config, global_config, service) - # TODO: Remove after implementing multi-target optimization in mlos_core - if len(self._opt_targets) != 1: - raise NotImplementedError(f"Multi-target optimization is not supported: {self}") - (self._opt_target, self._opt_sign) = list(self._opt_targets.items())[0] - opt_type = getattr(OptimizerType, self._config.pop( 'optimizer_type', DEFAULT_OPTIMIZER_TYPE.name)) @@ -79,6 +74,7 @@ def __init__(self, self._opt: BaseOptimizer = OptimizerFactory.create( parameter_space=self.config_space, + optimization_targets=list(self._opt_targets), optimizer_type=opt_type, optimizer_kwargs=self._config, space_adapter_type=space_adapter_type, @@ -99,26 +95,43 @@ def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[Dict[str, TunableValue]]], status: Optional[Sequence[Status]] = None) -> bool: + if not super().bulk_register(configs, scores, status): return False + df_configs = self._to_df(configs) # Impute missing values, if necessary - df_scores = pd.Series( - [self._extract_target(score) for score in scores], - dtype=float) * self._opt_sign + + df_scores = self._adjust_signs_df( + pd.DataFrame([{} if score is None else score for score in scores])) + + opt_targets = list(self._opt_targets) if status is not None: + # Select only the completed trials, set scores for failed trials to +inf. df_status = pd.Series(status) - df_scores[df_status != Status.SUCCEEDED] = float("inf") + # TODO: Be more flexible with values used for failed trials (not just +inf). + # Issue: https://github.com/microsoft/MLOS/issues/523 + df_scores.loc[df_status != Status.SUCCEEDED, opt_targets] = float("inf") df_status_completed = df_status.apply(Status.is_completed) df_configs = df_configs[df_status_completed] df_scores = df_scores[df_status_completed] - self._opt.register(df_configs, df_scores) + + # TODO: Specify (in the config) which metrics to pass to the optimizer. + # Issue: https://github.com/microsoft/MLOS/issues/745 + self._opt.register(df_configs, df_scores[opt_targets].astype(float)) + if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() _LOG.debug("Warm-up END: %s :: %s", self, score) + return True - def _extract_target(self, scores: Optional[Dict[str, TunableValue]]) -> Optional[TunableValue]: - return None if scores is None else scores[self._opt_target] + def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame: + """ + In-place adjust the signs of the scores for MINIMIZATION problem. + """ + for (opt_target, opt_dir) in self._opt_targets.items(): + df_scores[opt_target] *= opt_dir + return df_scores def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame: """ @@ -175,21 +188,21 @@ def suggest(self) -> TunableGroups: def register(self, tunables: TunableGroups, status: Status, score: Optional[Dict[str, TunableValue]] = None) -> Optional[Dict[str, float]]: - registered_score = super().register(tunables, status, score) # With _opt_sign applied + registered_score = super().register(tunables, status, score) # Sign-adjusted for MINIMIZATION if status.is_completed(): assert registered_score is not None df_config = self._to_df([tunables.get_param_values()]) _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config) - self._opt.register(df_config, pd.Series([registered_score[self._opt_target]], dtype=float)) + # TODO: Specify (in the config) which metrics to pass to the optimizer. + # Issue: https://github.com/microsoft/MLOS/issues/745 + self._opt.register(df_config, pd.DataFrame([registered_score], dtype=float)) return registered_score def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: - df_config = self._opt.get_best_observation() + (df_config, df_score, _df_context) = self._opt.get_best_observations() if len(df_config) == 0: return (None, None) params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict()) - _LOG.debug("Best observation: %s", params) - score = params.pop("score") - assert score is not None - score = float(score) * self._opt_sign # mlos_core always uses the `score` column - return ({self._opt_target: score}, self._tunables.copy().assign(params)) + scores = self._adjust_signs_df(df_score).iloc[0].to_dict() + _LOG.debug("Best observation: %s score: %s", params, scores) + return (scores, self._tunables.copy().assign(params)) diff --git a/mlos_core/mlos_core/optimizers/__init__.py b/mlos_core/mlos_core/optimizers/__init__.py index 93256ac451..b00a9e8eb1 100644 --- a/mlos_core/mlos_core/optimizers/__init__.py +++ b/mlos_core/mlos_core/optimizers/__init__.py @@ -7,7 +7,7 @@ """ from enum import Enum -from typing import Optional, TypeVar +from typing import List, Optional, TypeVar import ConfigSpace @@ -62,6 +62,7 @@ class OptimizerFactory: @staticmethod def create(*, parameter_space: ConfigSpace.ConfigurationSpace, + optimization_targets: List[str], optimizer_type: OptimizerType = DEFAULT_OPTIMIZER_TYPE, optimizer_kwargs: Optional[dict] = None, space_adapter_type: SpaceAdapterType = SpaceAdapterType.IDENTITY, @@ -74,6 +75,8 @@ def create(*, ---------- parameter_space : ConfigSpace.ConfigurationSpace Input configuration space. + optimization_targets : List[str] + The names of the optimization targets to minimize. optimizer_type : OptimizerType Optimizer class as defined by Enum. optimizer_kwargs : Optional[dict] @@ -102,6 +105,7 @@ def create(*, optimizer: ConcreteOptimizer = optimizer_type.value( parameter_space=parameter_space, + optimization_targets=optimization_targets, space_adapter=space_adapter, **optimizer_kwargs ) diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index e072394853..aedbb31d29 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -29,6 +29,7 @@ class SmacOptimizer(BaseBayesianOptimizer): def __init__(self, *, # pylint: disable=too-many-locals parameter_space: ConfigSpace.ConfigurationSpace, + optimization_targets: List[str], space_adapter: Optional[BaseSpaceAdapter] = None, seed: Optional[int] = 0, run_name: Optional[str] = None, @@ -46,6 +47,9 @@ def __init__(self, *, # pylint: disable=too-many-locals parameter_space : ConfigSpace.ConfigurationSpace The parameter space to optimize. + optimization_targets : List[str] + The names of the optimization targets to minimize. + space_adapter : BaseSpaceAdapter The space adapter class to employ for parameter space transformations. @@ -86,6 +90,7 @@ def __init__(self, *, # pylint: disable=too-many-locals """ super().__init__( parameter_space=parameter_space, + optimization_targets=optimization_targets, space_adapter=space_adapter, ) @@ -125,6 +130,7 @@ def __init__(self, *, # pylint: disable=too-many-locals scenario: Scenario = Scenario( self.optimizer_parameter_space, + objectives=self._optimization_targets, name=run_name, output_directory=Path(output_directory), deterministic=True, @@ -186,6 +192,10 @@ def __init__(self, *, # pylint: disable=too-many-locals intensifier=intensifier, random_design=random_design, config_selector=config_selector, + multi_objective_algorithm=Optimizer_Smac.get_multi_objective_algorithm( + scenario, + # objective_weights=[1, 2], # TODO: pass weights as constructor args + ), overwrite=True, logging_level=False, # Use the existing logger ) @@ -228,7 +238,8 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None # -- this planned to be fixed in some future release: https://github.com/automl/SMAC3/issues/946 raise RuntimeError('This function should never be called.') - def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: Optional[pd.DataFrame] = None) -> None: + def _register(self, configurations: pd.DataFrame, + scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. Parameters @@ -236,7 +247,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: Op configurations : pd.DataFrame Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. - scores : pd.Series + scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. context : pd.DataFrame @@ -248,10 +259,11 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: Op warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) # Register each trial (one-by-one) - for config, score in zip(self._to_configspace_configs(configurations), scores.tolist()): + for (config, (_i, score)) in zip(self._to_configspace_configs(configurations), scores.iterrows()): # Retrieve previously generated TrialInfo (returned by .ask()) or create new TrialInfo instance - info: TrialInfo = self.trial_info_map.get(config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed)) - value: TrialValue = TrialValue(cost=score, time=0.0, status=StatusType.SUCCESS) + info: TrialInfo = self.trial_info_map.get( + config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed)) + value = TrialValue(cost=list(score.astype(float)), time=0.0, status=StatusType.SUCCESS) self.base_optimizer.tell(info, value, save=False) # Save optimizer once we register all configs @@ -293,7 +305,7 @@ def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.D if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) if self._space_adapter and not isinstance(self._space_adapter, IdentityAdapter): - raise NotImplementedError() + raise NotImplementedError("Space adapter not supported for surrogate_predict.") # pylint: disable=protected-access if len(self._observations) <= self.base_optimizer._initial_design._n_configs: diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 07fa5fe3a2..59d6439f38 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -6,7 +6,7 @@ Contains the FlamlOptimizer class. """ -from typing import Dict, NamedTuple, Optional, Union +from typing import Dict, List, NamedTuple, Optional, Union from warnings import warn import ConfigSpace @@ -32,17 +32,22 @@ class FlamlOptimizer(BaseOptimizer): def __init__(self, *, parameter_space: ConfigSpace.ConfigurationSpace, + optimization_targets: List[str], space_adapter: Optional[BaseSpaceAdapter] = None, low_cost_partial_config: Optional[dict] = None, seed: Optional[int] = None): """ - Create an MLOS wrapper class for FLAML. + Create an MLOS wrapper for FLAML. Parameters ---------- parameter_space : ConfigSpace.ConfigurationSpace The parameter space to optimize. + optimization_targets : List[str] + The names of the optimization targets to minimize. + For FLAML it must be a list with a single element, e.g., `["score"]`. + space_adapter : BaseSpaceAdapter The space adapter class to employ for parameter space transformations. @@ -55,9 +60,14 @@ def __init__(self, *, """ super().__init__( parameter_space=parameter_space, + optimization_targets=optimization_targets, space_adapter=space_adapter, ) + if len(self._optimization_targets) != 1: + raise ValueError("FLAML does not support multi-target optimization") + self._flaml_optimization_target = self._optimization_targets[0] + # Per upstream documentation, it is recommended to set the seed for # flaml at the start of its operation globally. if seed is not None: @@ -72,7 +82,7 @@ def __init__(self, *, self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {} self._suggested_config: Optional[dict] - def _register(self, configurations: pd.DataFrame, scores: pd.Series, + def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. @@ -81,7 +91,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, configurations : pd.DataFrame Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. - scores : pd.Series + scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. context : None @@ -89,7 +99,8 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - for (_, config), score in zip(configurations.astype('O').iterrows(), scores): + for (_, config), score in zip(configurations.astype('O').iterrows(), + scores[self._flaml_optimization_target]): cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( self.optimizer_parameter_space, values=config.to_dict()) if cs_config in self.evaluated_samples: @@ -140,7 +151,7 @@ def _target_function(self, config: dict) -> Union[dict, None]: """ cs_config = normalize_config(self.optimizer_parameter_space, config) if cs_config in self.evaluated_samples: - return {'score': self.evaluated_samples[cs_config].score} + return {self._flaml_optimization_target: self.evaluated_samples[cs_config].score} self._suggested_config = dict(cs_config) # Cleaned-up version of the config return None # Returning None stops the process @@ -156,7 +167,8 @@ def _get_next_config(self) -> dict: Returns ------- result: dict - Dictionary with a single key, `score`, if config already evaluated; `None` otherwise. + A dictionary with a single key that is equal to the name of the optimization target, + if config already evaluated; `None` otherwise. Raises ------ @@ -182,7 +194,7 @@ def _get_next_config(self) -> dict: self._target_function, config=self.flaml_parameter_space, mode='min', - metric='score', + metric=self._flaml_optimization_target, points_to_evaluate=points_to_evaluate, evaluated_rewards=evaluated_rewards, num_samples=len(points_to_evaluate) + 1, diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index d4e8759e2a..bd26619754 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -26,6 +26,7 @@ class BaseOptimizer(metaclass=ABCMeta): def __init__(self, *, parameter_space: ConfigSpace.ConfigurationSpace, + optimization_targets: List[str], space_adapter: Optional[BaseSpaceAdapter] = None): """ Create a new instance of the base optimizer. @@ -34,6 +35,8 @@ def __init__(self, *, ---------- parameter_space : ConfigSpace.ConfigurationSpace The parameter space to optimize. + optimization_targets : List[str] + The names of the optimization targets to minimize. space_adapter : BaseSpaceAdapter The space adapter class to employ for parameter space transformations. """ @@ -44,8 +47,9 @@ def __init__(self, *, if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space: raise ValueError("Given parameter space differs from the one given to space adapter") + self._optimization_targets = optimization_targets self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter - self._observations: List[Tuple[pd.DataFrame, pd.Series, Optional[pd.DataFrame]]] = [] + self._observations: List[Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]] = [] self._has_context: Optional[bool] = None self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = [] @@ -57,7 +61,7 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]: """Get the space adapter instance (if any).""" return self._space_adapter - def register(self, configurations: pd.DataFrame, scores: pd.Series, + def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Wrapper method, which employs the space adapter (if any), before registering the configurations and scores. @@ -65,13 +69,15 @@ def register(self, configurations: pd.DataFrame, scores: pd.Series, ---------- configurations : pd.DataFrame Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. - scores : pd.Series + scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. context : pd.DataFrame Not Yet Implemented. """ # Do some input validation. + assert set(scores.columns) == set(self._optimization_targets), \ + "Mismatched optimization targets." assert self._has_context is None or self._has_context ^ (context is None), \ "Context must always be added or never be added." assert len(configurations) == len(scores), \ @@ -91,7 +97,7 @@ def register(self, configurations: pd.DataFrame, scores: pd.Series, return self._register(configurations, scores, context) @abstractmethod - def _register(self, configurations: pd.DataFrame, scores: pd.Series, + def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. @@ -99,7 +105,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, ---------- configurations : pd.DataFrame Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. - scores : pd.Series + scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. context : pd.DataFrame @@ -172,41 +178,45 @@ def register_pending(self, configurations: pd.DataFrame, """ pass # pylint: disable=unnecessary-pass # pragma: no cover - def get_observations(self) -> pd.DataFrame: - """Returns the observations as a dataframe. + def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: + """ + Returns the observations as a triplet of DataFrames (config, score, context). Returns ------- - observations : pd.DataFrame - Dataframe of observations. The columns are parameter names and "score" for the score, each row is an observation. + observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] + A triplet of (config, score, context) DataFrames of observations. """ if len(self._observations) == 0: raise ValueError("No observations registered yet.") - configs = pd.concat([config for config, _, _ in self._observations]) - scores = pd.concat([score for _, score, _ in self._observations]) - try: - contexts = pd.concat([context for _, _, context in self._observations if context is not None]) - except ValueError: - contexts = None - configs["score"] = scores - if contexts is not None: - # configs = pd.concat([configs, contexts], axis=1) - # Not reachable for now - raise NotImplementedError() - return configs - - def get_best_observation(self) -> pd.DataFrame: - """Returns the best observation so far as a dataframe. + configs = pd.concat([config for config, _, _ in self._observations]).reset_index(drop=True) + scores = pd.concat([score for _, score, _ in self._observations]).reset_index(drop=True) + contexts = pd.concat([pd.DataFrame() if context is None else context + for _, _, context in self._observations]).reset_index(drop=True) + return (configs, scores, contexts if len(contexts.columns) > 0 else None) + + def get_best_observations(self, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: + """ + Get the N best observations so far as a triplet of DataFrames (config, score, context). + Default is N=1. The columns are ordered in ASCENDING order of the optimization targets. + The function uses `pandas.DataFrame.nsmallest(..., keep="first")` method under the hood. + + Parameters + ---------- + n_max : int + Maximum number of best observations to return. Default is 1. Returns ------- - best_observation : pd.DataFrame - Dataframe with a single row containing the best observation. The columns are parameter names and "score" for the score. + observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] + A triplet of best (config, score, context) DataFrames of best observations. """ if len(self._observations) == 0: raise ValueError("No observations registered yet.") - observations = self.get_observations() - return observations.nsmallest(1, columns='score') + (configs, scores, contexts) = self.get_observations() + idx = scores.nsmallest(n_max, columns=self._optimization_targets, keep="first").index + return (configs.loc[idx], scores.loc[idx], + None if contexts is None else contexts.loc[idx]) def cleanup(self) -> None: """ diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index 4b023ee8df..f81092a65d 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -24,7 +24,7 @@ class RandomOptimizer(BaseOptimizer): The parameter space to optimize. """ - def _register(self, configurations: pd.DataFrame, scores: pd.Series, + def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. @@ -35,7 +35,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, configurations : pd.DataFrame Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. - scores : pd.Series + scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. context : None diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index 70265be54e..69ce4f8dff 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -29,13 +29,17 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp """ if kwargs is None: kwargs = {} - optimizer = optimizer_class(parameter_space=configuration_space, **kwargs) + optimizer = optimizer_class( + parameter_space=configuration_space, + optimization_targets=['score'], + **kwargs + ) suggestion = optimizer.suggest() scores = pd.DataFrame({'score': [1]}) context = pd.DataFrame([["something"]]) - # test context not implemented errors + with pytest.raises(UserWarning): - optimizer.register(suggestion, scores['score'], context=context) + optimizer.register(suggestion, scores, context=context) with pytest.raises(UserWarning): optimizer.suggest(context=context) diff --git a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py index 46d549ae54..0a9a6ed3c5 100644 --- a/mlos_core/mlos_core/tests/optimizers/one_hot_test.py +++ b/mlos_core/mlos_core/tests/optimizers/one_hot_test.py @@ -75,6 +75,7 @@ def optimizer(configuration_space: CS.ConfigurationSpace) -> BaseOptimizer: """ return SmacOptimizer( parameter_space=configuration_space, + optimization_targets=['score'], ) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py new file mode 100644 index 0000000000..888d07ff54 --- /dev/null +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py @@ -0,0 +1,97 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +""" +Test multi-target optimization. +""" + +import logging +import pytest + +import pandas as pd +import numpy as np +import ConfigSpace as CS + +from mlos_core.optimizers import OptimizerType, OptimizerFactory + +from mlos_core.tests import SEED + + +_LOG = logging.getLogger(__name__) +_LOG.setLevel(logging.DEBUG) + + +def test_multi_target_opt() -> None: + """ + Toy multi-target optimization problem to test the optimizers with + mixed numeric types to ensure that original dtypes are retained. + """ + max_iterations = 10 + + def objective(point: pd.DataFrame) -> pd.DataFrame: + # mix of hyperparameters, optimal is to select the highest possible + return pd.DataFrame({ + "score": point.x + point.y, + "other_score": point.x ** 2 + point.y ** 2, + }) + + input_space = CS.ConfigurationSpace(seed=SEED) + # add a mix of numeric datatypes + input_space.add_hyperparameter( + CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5)) + input_space.add_hyperparameter( + CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) + + optimizer = OptimizerFactory.create( + parameter_space=input_space, + optimization_targets=['score', 'other_score'], + optimizer_type=OptimizerType.SMAC, + optimizer_kwargs={ + # Test with default config. + 'use_default_config': True, + # 'n_random_init': 10, + }, + ) + + with pytest.raises(ValueError, match="No observations"): + optimizer.get_best_observations() + + with pytest.raises(ValueError, match="No observations"): + optimizer.get_observations() + + for _ in range(max_iterations): + suggestion = optimizer.suggest() + assert isinstance(suggestion, pd.DataFrame) + assert set(suggestion.columns) == {'x', 'y'} + # Check suggestion values are the expected dtype + assert isinstance(suggestion.x.iloc[0], np.integer) + assert isinstance(suggestion.y.iloc[0], np.floating) + # Check that suggestion is in the space + test_configuration = CS.Configuration( + optimizer.parameter_space, suggestion.astype('O').iloc[0].to_dict()) + # Raises an error if outside of configuration space + test_configuration.is_valid_configuration() + # Test registering the suggested configuration with a score. + observation = objective(suggestion) + assert isinstance(observation, pd.DataFrame) + assert set(observation.columns) == {'score', 'other_score'} + optimizer.register(suggestion, observation) + + (best_config, best_score, best_context) = optimizer.get_best_observations() + assert isinstance(best_config, pd.DataFrame) + assert isinstance(best_score, pd.DataFrame) + assert best_context is None + assert set(best_config.columns) == {'x', 'y'} + assert set(best_score.columns) == {'score', 'other_score'} + assert best_config.shape == (1, 2) + assert best_score.shape == (1, 2) + + (all_configs, all_scores, all_contexts) = optimizer.get_observations() + assert isinstance(all_configs, pd.DataFrame) + assert isinstance(all_scores, pd.DataFrame) + assert all_contexts is None + assert set(all_configs.columns) == {'x', 'y'} + assert set(all_scores.columns) == {'score', 'other_score'} + assert all_configs.shape == (max_iterations, 2) + assert all_scores.shape == (max_iterations, 2) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 6fd6298694..67c7eddf3b 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -14,7 +14,6 @@ import pandas as pd import numpy as np -import numpy.typing as npt import ConfigSpace as CS from mlos_core.optimizers import ( @@ -40,7 +39,11 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace """ if kwargs is None: kwargs = {} - optimizer = optimizer_class(parameter_space=configuration_space, **kwargs) + optimizer = optimizer_class( + parameter_space=configuration_space, + optimization_targets=['score'], + **kwargs + ) assert optimizer is not None assert optimizer.parameter_space is not None @@ -64,6 +67,7 @@ def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace, """ Toy problem to test the optimizers. """ + # pylint: disable=too-many-locals max_iterations = 20 if kwargs is None: kwargs = {} @@ -72,15 +76,19 @@ def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace, # To avoid having to train more than 25 model iterations, we set a lower number of max iterations. kwargs['max_trials'] = max_iterations * 2 - def objective(x: pd.Series) -> npt.ArrayLike: # pylint: disable=invalid-name - ret: npt.ArrayLike = (6 * x - 2)**2 * np.sin(12 * x - 4) - return ret + def objective(x: pd.Series) -> pd.DataFrame: + return pd.DataFrame({"score": (6 * x - 2)**2 * np.sin(12 * x - 4)}) + # Emukit doesn't allow specifying a random state, so we set the global seed. np.random.seed(SEED) - optimizer = optimizer_class(parameter_space=configuration_space, **kwargs) + optimizer = optimizer_class( + parameter_space=configuration_space, + optimization_targets=['score'], + **kwargs + ) with pytest.raises(ValueError, match="No observations"): - optimizer.get_best_observation() + optimizer.get_best_observations() with pytest.raises(ValueError, match="No observations"): optimizer.get_observations() @@ -88,31 +96,40 @@ def objective(x: pd.Series) -> npt.ArrayLike: # pylint: disable=invalid-name for _ in range(max_iterations): suggestion = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) - assert (suggestion.columns == ['x', 'y', 'z']).all() + assert set(suggestion.columns) == {'x', 'y', 'z'} # check that suggestion is in the space configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) # Raises an error if outside of configuration space configuration.is_valid_configuration() observation = objective(suggestion['x']) - assert isinstance(observation, pd.Series) + assert isinstance(observation, pd.DataFrame) optimizer.register(suggestion, observation) - best_observation = optimizer.get_best_observation() - assert isinstance(best_observation, pd.DataFrame) - assert (best_observation.columns == ['x', 'y', 'z', 'score']).all() - assert best_observation['score'].iloc[0] < -5 - - all_observations = optimizer.get_observations() - assert isinstance(all_observations, pd.DataFrame) - assert all_observations.shape == (20, 4) - assert (all_observations.columns == ['x', 'y', 'z', 'score']).all() + (best_config, best_score, best_context) = optimizer.get_best_observations() + assert isinstance(best_config, pd.DataFrame) + assert isinstance(best_score, pd.DataFrame) + assert best_context is None + assert set(best_config.columns) == {'x', 'y', 'z'} + assert set(best_score.columns) == {'score'} + assert best_config.shape == (1, 3) + assert best_score.shape == (1, 1) + assert best_score.score.iloc[0] < -5 + + (all_configs, all_scores, all_contexts) = optimizer.get_observations() + assert isinstance(all_configs, pd.DataFrame) + assert isinstance(all_scores, pd.DataFrame) + assert all_contexts is None + assert set(all_configs.columns) == {'x', 'y', 'z'} + assert set(all_scores.columns) == {'score'} + assert all_configs.shape == (20, 3) + assert all_scores.shape == (20, 1) # It would be better to put this into bayesian_optimizer_test but then we'd have to refit the model if isinstance(optimizer, BaseBayesianOptimizer): - pred_best = optimizer.surrogate_predict(best_observation[['x', 'y', 'z']]) + pred_best = optimizer.surrogate_predict(best_config) assert pred_best.shape == (1,) - pred_all = optimizer.surrogate_predict(all_observations[['x', 'y', 'z']]) + pred_all = optimizer.surrogate_predict(all_configs) assert pred_all.shape == (20,) @@ -145,11 +162,13 @@ def test_create_optimizer_with_factory_method(configuration_space: CS.Configurat if optimizer_type is None: optimizer = OptimizerFactory.create( parameter_space=configuration_space, + optimization_targets=['score'], optimizer_kwargs=kwargs, ) else: optimizer = OptimizerFactory.create( parameter_space=configuration_space, + optimization_targets=['score'], optimizer_type=optimizer_type, optimizer_kwargs=kwargs, ) @@ -179,17 +198,15 @@ def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: Optiona """ Toy problem to test the optimizers with llamatune space adapter. """ - # pylint: disable=too-complex - # pylint: disable=too-many-statements - # pylint: disable=too-many-locals + # pylint: disable=too-complex,disable=too-many-statements,disable=too-many-locals num_iters = 50 if kwargs is None: kwargs = {} - def objective(point: pd.DataFrame) -> pd.Series: + def objective(point: pd.DataFrame) -> pd.DataFrame: # Best value can be reached by tuning an 1-dimensional search space - ret: pd.Series = np.sin(point['x'] * point['y']) - assert ret.hasnans is False + ret = pd.DataFrame({"score": np.sin(point.x * point.y)}) + assert ret.score.hasnans is False return ret input_space = CS.ConfigurationSpace(seed=1234) @@ -218,6 +235,7 @@ def objective(point: pd.DataFrame) -> pd.Series: llamatune_optimizer: BaseOptimizer = OptimizerFactory.create( parameter_space=input_space, + optimization_targets=['score'], optimizer_type=optimizer_type, optimizer_kwargs=llamatune_optimizer_kwargs, space_adapter_type=SpaceAdapterType.LLAMATUNE, @@ -226,6 +244,7 @@ def objective(point: pd.DataFrame) -> pd.Series: # Initialize an optimizer that uses the original space optimizer: BaseOptimizer = OptimizerFactory.create( parameter_space=input_space, + optimization_targets=['score'], optimizer_type=optimizer_type, optimizer_kwargs=optimizer_kwargs, ) @@ -261,27 +280,38 @@ def objective(point: pd.DataFrame) -> pd.Series: llamatune_optimizer.register(suggestion, observation) # Retrieve best observations - best_observation = optimizer.get_best_observation() - llamatune_best_observation = llamatune_optimizer.get_best_observation() + best_observation = optimizer.get_best_observations() + llamatune_best_observation = llamatune_optimizer.get_best_observations() - for best_obv in (best_observation, llamatune_best_observation): - assert isinstance(best_obv, pd.DataFrame) - assert (best_obv.columns == ['x', 'y', 'score']).all() + for (best_config, best_score, best_context) in (best_observation, llamatune_best_observation): + assert isinstance(best_config, pd.DataFrame) + assert isinstance(best_score, pd.DataFrame) + assert best_context is None + assert set(best_config.columns) == {'x', 'y'} + assert set(best_score.columns) == {'score'} + + (best_config, best_score, _context) = best_observation + (llamatune_best_config, llamatune_best_score, _context) = llamatune_best_observation # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's one, or close to that - assert best_observation['score'].iloc[0] > llamatune_best_observation['score'].iloc[0] or \ - best_observation['score'].iloc[0] + 1e-3 > llamatune_best_observation['score'].iloc[0] + assert best_score.score.iloc[0] > llamatune_best_score.score.iloc[0] or \ + best_score.score.iloc[0] + 1e-3 > llamatune_best_score.score.iloc[0] # Retrieve and check all observations - for all_obvs in (optimizer.get_observations(), llamatune_optimizer.get_observations()): - assert isinstance(all_obvs, pd.DataFrame) - assert all_obvs.shape == (num_iters, 3) - assert (all_obvs.columns == ['x', 'y', 'score']).all() + for (all_configs, all_scores, all_contexts) in ( + optimizer.get_observations(), llamatune_optimizer.get_observations()): + assert isinstance(all_configs, pd.DataFrame) + assert isinstance(all_scores, pd.DataFrame) + assert all_contexts is None + assert set(all_configs.columns) == {'x', 'y'} + assert set(all_scores.columns) == {'score'} + assert len(all_configs) == num_iters + assert len(all_scores) == num_iters # .surrogate_predict method not currently implemented if space adapter is employed if isinstance(llamatune_optimizer, BaseBayesianOptimizer): with pytest.raises(NotImplementedError): - llamatune_optimizer.surrogate_predict(llamatune_best_observation[['x', 'y']]) + llamatune_optimizer.surrogate_predict(llamatune_best_config) # Dynamically determine all of the optimizers we have implemented. @@ -315,10 +345,9 @@ def test_mixed_numerics_type_input_space_types(optimizer_type: Optional[Optimize if kwargs is None: kwargs = {} - def objective(point: pd.DataFrame) -> pd.Series: + def objective(point: pd.DataFrame) -> pd.DataFrame: # mix of hyperparameters, optimal is to select the highest possible - ret: pd.Series = point["x"] + point["y"] - return ret + return pd.DataFrame({"score": point["x"] + point["y"]}) input_space = CS.ConfigurationSpace(seed=SEED) # add a mix of numeric datatypes @@ -328,17 +357,19 @@ def objective(point: pd.DataFrame) -> pd.Series: if optimizer_type is None: optimizer = OptimizerFactory.create( parameter_space=input_space, + optimization_targets=['score'], optimizer_kwargs=kwargs, ) else: optimizer = OptimizerFactory.create( parameter_space=input_space, + optimization_targets=['score'], optimizer_type=optimizer_type, optimizer_kwargs=kwargs, ) with pytest.raises(ValueError, match="No observations"): - optimizer.get_best_observation() + optimizer.get_best_observations() with pytest.raises(ValueError, match="No observations"): optimizer.get_observations() @@ -356,11 +387,15 @@ def objective(point: pd.DataFrame) -> pd.Series: test_configuration.is_valid_configuration() # Test registering the suggested configuration with a score. observation = objective(suggestion) - assert isinstance(observation, pd.Series) + assert isinstance(observation, pd.DataFrame) optimizer.register(suggestion, observation) - best_observation = optimizer.get_best_observation() - assert isinstance(best_observation, pd.DataFrame) + (best_config, best_score, best_context) = optimizer.get_best_observations() + assert isinstance(best_config, pd.DataFrame) + assert isinstance(best_score, pd.DataFrame) + assert best_context is None - all_observations = optimizer.get_observations() - assert isinstance(all_observations, pd.DataFrame) + (all_configs, all_scores, all_contexts) = optimizer.get_observations() + assert isinstance(all_configs, pd.DataFrame) + assert isinstance(all_scores, pd.DataFrame) + assert all_contexts is None