From d1a4658250d4503ff16e47be687d48fbc31db642 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Mon, 1 Jul 2024 17:13:58 -0500 Subject: [PATCH] Add Metadata to optimizers (#770) Adds metadata to respond from suggest, and be passable into register. This is in support of adding multi-fidelity support (#751) --------- Co-authored-by: Brian Kroth Co-authored-by: Brian Kroth --- .../optimizers/mlos_core_optimizer.py | 2 +- .../bayesian_optimizers/smac_optimizer.py | 18 +++++++++---- .../mlos_core/optimizers/flaml_optimizer.py | 19 ++++++++++---- mlos_core/mlos_core/optimizers/optimizer.py | 26 ++++++++++++++----- .../mlos_core/optimizers/random_optimizer.py | 18 +++++++++---- .../optimizers/bayesian_optimizers_test.py | 2 +- .../optimizers/optimizer_multiobj_test.py | 3 ++- .../tests/optimizers/optimizer_test.py | 21 ++++++++------- 8 files changed, 74 insertions(+), 35 deletions(-) diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 8e7c75a0d5..e0235f76b9 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -180,7 +180,7 @@ def suggest(self) -> TunableGroups: tunables = super().suggest() if self._start_with_defaults: _LOG.info("Use default values for the first trial") - df_config = self._opt.suggest(defaults=self._start_with_defaults) + df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults) self._start_with_defaults = False _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config) return tunables.assign( diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 7bc7740a61..aa948b8125 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -9,7 +9,7 @@ from logging import warning from pathlib import Path -from typing import Dict, List, Optional, Union, TYPE_CHECKING +from typing import Dict, List, Optional, Tuple, Union, TYPE_CHECKING from tempfile import TemporaryDirectory from warnings import warn @@ -242,7 +242,7 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None raise RuntimeError('This function should never be called.') def _register(self, *, configs: pd.DataFrame, - scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: + scores: pd.DataFrame, context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs and scores. Parameters @@ -255,6 +255,9 @@ def _register(self, *, configs: pd.DataFrame, context : pd.DataFrame Not Yet Implemented. + + metadata: pd.DataFrame + Not Yet Implemented. """ from smac.runhistory import StatusType, TrialInfo, TrialValue # pylint: disable=import-outside-toplevel @@ -272,7 +275,7 @@ def _register(self, *, configs: pd.DataFrame, # Save optimizer once we register all configs self.base_optimizer.optimizer.save() - def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Suggests a new configuration. Parameters @@ -284,6 +287,9 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. + + metadata : Optional[pd.DataFrame] + Not yet implemented. """ if TYPE_CHECKING: from smac.runhistory import TrialInfo # pylint: disable=import-outside-toplevel,unused-import @@ -297,9 +303,11 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: assert trial.config.config_space == self.optimizer_parameter_space self.trial_info_map[trial.config] = trial config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())) - return config_df + return config_df, None - def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: + def register_pending(self, *, configs: pd.DataFrame, + context: Optional[pd.DataFrame] = None, + metadata: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray: diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index a58e74af02..4f478db2bf 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -6,7 +6,7 @@ Contains the FlamlOptimizer class. """ -from typing import Dict, List, NamedTuple, Optional, Union +from typing import Dict, List, NamedTuple, Optional, Tuple, Union from warnings import warn import ConfigSpace @@ -86,7 +86,7 @@ def __init__(self, *, # pylint: disable=too-many-arguments self._suggested_config: Optional[dict] def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs and scores. Parameters @@ -99,9 +99,15 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context : None Not Yet Implemented. + + metadata : None + Not Yet Implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) + if metadata is not None: + warn(f"Not Implemented: Ignoring metadata {list(metadata.columns)}", UserWarning) + for (_, config), (_, score) in zip(configs.astype('O').iterrows(), scores.iterrows()): cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( self.optimizer_parameter_space, values=config.to_dict()) @@ -112,7 +118,7 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, score=float(np.average(score.astype(float), weights=self._objective_weights)), ) - def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Suggests a new configuration. Sampled at random using ConfigSpace. @@ -126,14 +132,17 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. + + metadata : None + Not implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) config: dict = self._get_next_config() - return pd.DataFrame(config, index=[0]) + return pd.DataFrame(config, index=[0]), None def register_pending(self, *, configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() def _target_function(self, config: dict) -> Union[dict, None]: diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index a72a4e1eb8..8fcf592a6c 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -69,7 +69,7 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]: return self._space_adapter def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Wrapper method, which employs the space adapter (if any), before registering the configs and scores. Parameters @@ -81,8 +81,12 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context : pd.DataFrame Not Yet Implemented. + + metadata : Optional[pd.DataFrame] + Not Yet Implemented. """ # Do some input validation. + assert metadata is None or isinstance(metadata, pd.DataFrame) assert set(scores.columns) == set(self._optimization_targets), \ "Mismatched optimization targets." assert self._has_context is None or self._has_context ^ (context is None), \ @@ -105,7 +109,7 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, @abstractmethod def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs and scores. Parameters @@ -120,7 +124,8 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, """ pass # pylint: disable=unnecessary-pass # pragma: no cover - def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame: + def suggest(self, *, context: Optional[pd.DataFrame] = None, + defaults: bool = False) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """ Wrapper method, which employs the space adapter (if any), after suggesting a new configuration. @@ -139,10 +144,11 @@ def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = Fa """ if defaults: configuration = config_to_dataframe(self.parameter_space.get_default_configuration()) + metadata = None if self.space_adapter is not None: configuration = self.space_adapter.inverse_transform(configuration) else: - configuration = self._suggest(context=context) + configuration, metadata = self._suggest(context=context) assert len(configuration) == 1, \ "Suggest must return a single configuration." assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \ @@ -151,10 +157,10 @@ def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = Fa configuration = self._space_adapter.transform(configuration) assert set(configuration.columns).issubset(set(self.parameter_space)), \ "Space adapter produced a configuration that does not match the expected parameter space." - return configuration + return configuration, metadata @abstractmethod - def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Suggests a new configuration. Parameters @@ -166,12 +172,16 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. + + metadata : Optional[pd.DataFrame] + The metadata associated with the given configuration used for evaluations. """ pass # pylint: disable=unnecessary-pass # pragma: no cover @abstractmethod def register_pending(self, *, configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, + metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs as "pending". That is it say, it has been suggested by the optimizer, and an experiment trial has been started. This can be useful for executing multiple trials in parallel, retry logic, etc. @@ -182,6 +192,8 @@ def register_pending(self, *, configs: pd.DataFrame, Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. context : pd.DataFrame Not Yet Implemented. + metadata : Optional[pd.DataFrame] + Not Yet Implemented. """ pass # pylint: disable=unnecessary-pass # pragma: no cover diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index 8893b456ac..0af785ef20 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -6,7 +6,7 @@ Contains the RandomOptimizer class. """ -from typing import Optional +from typing import Optional, Tuple from warnings import warn import pandas as pd @@ -25,7 +25,7 @@ class RandomOptimizer(BaseOptimizer): """ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: """Registers the given configs and scores. Doesn't do anything on the RandomOptimizer except storing configs for logging. @@ -40,12 +40,17 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame, context : None Not Yet Implemented. + + metadata : None + Not Yet Implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) + if metadata is not None: + warn(f"Not Implemented: Ignoring context {list(metadata.columns)}", UserWarning) # should we pop them from self.pending_observations? - def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: + def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Suggests a new configuration. Sampled at random using ConfigSpace. @@ -59,13 +64,16 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. + + metadata : None + Not implemented. """ if context is not None: # not sure how that works here? warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]) + return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]), None def register_pending(self, *, configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() # self._pending_observations.append((configs, context)) diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index 037e85ef73..c1aaa710ac 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -34,7 +34,7 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp optimization_targets=['score'], **kwargs ) - suggestion = optimizer.suggest() + suggestion, _metadata = optimizer.suggest() scores = pd.DataFrame({'score': [1]}) context = pd.DataFrame([["something"]]) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py index e3c053fa5b..22263b4c1d 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py @@ -84,8 +84,9 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) + assert metadata is None or isinstance(metadata, pd.DataFrame) assert set(suggestion.columns) == {'x', 'y'} # Check suggestion values are the expected dtype assert isinstance(suggestion.x.iloc[0], np.integer) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 49ff691635..8231e59feb 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -48,7 +48,7 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace assert optimizer.parameter_space is not None - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() assert suggestion is not None myrepr = repr(optimizer) @@ -56,7 +56,7 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace # pending not implemented with pytest.raises(NotImplementedError): - optimizer.register_pending(configs=suggestion) + optimizer.register_pending(configs=suggestion, metadata=metadata) @pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ @@ -94,8 +94,9 @@ def objective(x: pd.Series) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) + assert metadata is None or isinstance(metadata, pd.DataFrame) assert set(suggestion.columns) == {'x', 'y', 'z'} # check that suggestion is in the space configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) @@ -103,7 +104,7 @@ def objective(x: pd.Series) -> pd.DataFrame: configuration.is_valid_configuration() observation = objective(suggestion['x']) assert isinstance(observation, pd.DataFrame) - optimizer.register(configs=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation, metadata=metadata) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) @@ -268,16 +269,16 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: _LOG.debug("Optimizer is done with random init.") # loop for optimizer - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() observation = objective(suggestion) - optimizer.register(configs=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation, metadata=metadata) # loop for llamatune-optimizer - suggestion = llamatune_optimizer.suggest() + suggestion, metadata = llamatune_optimizer.suggest() _x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0] assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space observation = objective(suggestion) - llamatune_optimizer.register(configs=suggestion, scores=observation) + llamatune_optimizer.register(configs=suggestion, scores=observation, metadata=metadata) # Retrieve best observations best_observation = optimizer.get_best_observations() @@ -375,7 +376,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion = optimizer.suggest() + suggestion, metadata = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) assert (suggestion.columns == ['x', 'y']).all() # Check suggestion values are the expected dtype @@ -388,7 +389,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # Test registering the suggested configuration with a score. observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) - optimizer.register(configs=suggestion, scores=observation) + optimizer.register(configs=suggestion, scores=observation, metadata=metadata) (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame)