diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index dbfc770243..a5e9fe526c 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -22,6 +22,7 @@ from mlos_bench.services.base_service import Service from mlos_bench.tunables.tunable import TunableValue from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_core.optimizers.observations import Observation from mlos_core.optimizers import ( DEFAULT_OPTIMIZER_TYPE, BaseOptimizer, @@ -128,7 +129,7 @@ def bulk_register( # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 - self._opt.register(configs=df_configs, scores=df_scores) + self._opt.register(observation=Observation(config=df_configs, performance=df_scores)) if _LOG.isEnabledFor(logging.DEBUG): (score, _) = self.get_best_observation() @@ -198,10 +199,12 @@ def suggest(self) -> TunableGroups: tunables = super().suggest() if self._start_with_defaults: _LOG.info("Use default values for the first trial") - df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults) + suggestion = self._opt.suggest(defaults=self._start_with_defaults) self._start_with_defaults = False - _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config) - return tunables.assign(configspace_data_to_tunable_values(df_config.loc[0].to_dict())) + _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, suggestion.config) + return tunables.assign( + configspace_data_to_tunable_values(suggestion.config.loc[0].to_dict()) + ) def register( self, @@ -221,15 +224,19 @@ def register( # TODO: Specify (in the config) which metrics to pass to the optimizer. # Issue: https://github.com/microsoft/MLOS/issues/745 self._opt.register( - configs=df_config, - scores=pd.DataFrame([registered_score], dtype=float), + observation=Observation( + config=df_config, + performance=pd.DataFrame([registered_score], dtype=float), + ) ) return registered_score def get_best_observation( self, ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: - (df_config, df_score, _df_context) = self._opt.get_best_observations() + (df_config, df_score, _df_context, _metadata) = ( + self._opt.get_best_observations().to_legacy() + ) if len(df_config) == 0: return (None, None) params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict()) diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 611dc04044..5742a220fc 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -18,6 +18,7 @@ import numpy.typing as npt import pandas as pd +from mlos_core.optimizers.observations import Observation, Suggestion from mlos_core.optimizers.bayesian_optimizers.bayesian_optimizer import ( BaseBayesianOptimizer, ) @@ -272,29 +273,15 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None def _register( self, *, - configs: pd.DataFrame, - scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, + observation: Observation, ) -> None: """ Registers the given configs and scores. Parameters ---------- - configs : pd.DataFrame - Dataframe of configs / parameters. The columns are parameter names and - the rows are the configs. - - scores : pd.DataFrame - Scores from running the configs. The index is the same as the index of - the configs. - - context : pd.DataFrame - Not Yet Implemented. - - metadata: pd.DataFrame - Not Yet Implemented. + observation: Observation + The observation to register. """ from smac.runhistory import ( # pylint: disable=import-outside-toplevel StatusType, @@ -302,12 +289,16 @@ def _register( TrialValue, ) - if context is not None: - warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) + if observation.context is not None: + warn( + f"Not Implemented: Ignoring context {list(observation.context.columns)}", + UserWarning, + ) # Register each trial (one-by-one) for config, (_i, score) in zip( - self._to_configspace_configs(configs=configs), scores.iterrows() + self._to_configspace_configs(configs=observation.config), + observation.performance.iterrows(), ): # Retrieve previously generated TrialInfo (returned by .ask()) or create # new TrialInfo instance @@ -325,7 +316,7 @@ def _suggest( self, *, context: Optional[pd.DataFrame] = None, - ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + ) -> Suggestion: """ Suggests a new configuration. @@ -357,15 +348,9 @@ def _suggest( config_df = pd.DataFrame( [trial.config], columns=list(self.optimizer_parameter_space.keys()) ) - return config_df, None + return Suggestion(config=config_df, context=context, metadata=None) - def register_pending( - self, - *, - configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, - ) -> None: + def register_pending(self, *, suggestion: Suggestion) -> None: raise NotImplementedError() def surrogate_predict( @@ -383,11 +368,14 @@ def surrogate_predict( raise NotImplementedError("Space adapter not supported for surrogate_predict.") # pylint: disable=protected-access - if len(self._observations) <= self.base_optimizer._initial_design._n_configs: + if ( + sum(len(o.config.index) for o in self._observations) + <= self.base_optimizer._initial_design._n_configs + ): raise RuntimeError( "Surrogate model can make predictions *only* after " "all initial points have been evaluated " - f"{len(self._observations)} <= {self.base_optimizer._initial_design._n_configs}" + f"{sum(len(o.config.index) for o in self._observations)} <= {self.base_optimizer._initial_design._n_configs}" ) if self.base_optimizer._config_selector._model is None: raise RuntimeError("Surrogate model is not yet trained") @@ -440,7 +428,8 @@ def _to_configspace_configs(self, *, configs: pd.DataFrame) -> List[ConfigSpace. configs : list List of ConfigSpace configs. """ + values = [config.to_dict() for (_, config) in configs.astype("O").iterrows()] return [ - ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict()) - for (_, config) in configs.astype("O").iterrows() + ConfigSpace.Configuration(self.optimizer_parameter_space, values=value) + for value in values ] diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 50def8bc80..eb25912cb1 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -11,6 +11,7 @@ import numpy as np import pandas as pd +from mlos_core.optimizers.observations import Observation, Suggestion from mlos_core.optimizers.optimizer import BaseOptimizer from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter from mlos_core.util import normalize_config @@ -92,38 +93,29 @@ def __init__( self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {} self._suggested_config: Optional[dict] - def _register( - self, - *, - configs: pd.DataFrame, - scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, - ) -> None: + def _register(self, *, observation: Observation) -> None: """ Registers the given configs and scores. Parameters ---------- - configs : pd.DataFrame - Dataframe of configs / parameters. The columns are parameter names and - the rows are the configs. - - scores : pd.DataFrame - Scores from running the configs. The index is the same as the index of the configs. - - context : None - Not Yet Implemented. - - metadata : None - Not Yet Implemented. + observation: Observation + The observation to register. """ - if context is not None: - warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - if metadata is not None: - warn(f"Not Implemented: Ignoring metadata {list(metadata.columns)}", UserWarning) + if observation.context is not None: + warn( + f"Not Implemented: Ignoring context {list(observation.context.columns)}", + UserWarning, + ) + if observation.metadata is not None: + warn( + f"Not Implemented: Ignoring metadata {list(observation.metadata.columns)}", + UserWarning, + ) - for (_, config), (_, score) in zip(configs.astype("O").iterrows(), scores.iterrows()): + for (_, config), (_, score) in zip( + observation.config.astype("O").iterrows(), observation.performance.iterrows() + ): cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( self.optimizer_parameter_space, values=config.to_dict() ) @@ -138,7 +130,7 @@ def _suggest( self, *, context: Optional[pd.DataFrame] = None, - ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + ) -> Suggestion: """ Suggests a new configuration. @@ -151,24 +143,15 @@ def _suggest( Returns ------- - configuration : pd.DataFrame - Pandas dataframe with a single row. Column names are the parameter names. - - metadata : None - Not implemented. + suggestion: Suggestion + The suggestion to evaluate. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) config: dict = self._get_next_config() - return pd.DataFrame(config, index=[0]), None + return Suggestion(config=pd.DataFrame(config, index=[0]), context=context) - def register_pending( - self, - *, - configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, - ) -> None: + def register_pending(self, *, suggestion: Suggestion) -> None: raise NotImplementedError() def _target_function(self, config: dict) -> Union[dict, None]: diff --git a/mlos_core/mlos_core/optimizers/observations.py b/mlos_core/mlos_core/optimizers/observations.py new file mode 100644 index 0000000000..5d87ea9e25 --- /dev/null +++ b/mlos_core/mlos_core/optimizers/observations.py @@ -0,0 +1,230 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +from typing import Any, Iterator, List, Optional, Tuple + +import pandas as pd + + +def compare_optional_series(left: Optional[pd.Series], right: Optional[pd.Series]) -> bool: + if left is None and right is not None: + return False + if left is not None and right is None: + return False + elif left is not None and right is not None: + if not left.equals(right): + return False + return True + + +def compare_optional_dataframe( + left: Optional[pd.DataFrame], right: Optional[pd.DataFrame] +) -> bool: + if left is None and right is not None: + return False + if left is not None and right is None: + return False + elif left is not None and right is not None: + if not left.equals(right): + return False + return True + + +class Observation: + """ + A single observation of a configuration's performance. + + Attributes + ---------- + config : pd.DataFrame + Pandas dataframe with a single row. Column names are the parameter names. + performance : Optional[pd.Series] + The performance metrics observed. + context : Optional[pd.Series] + The context in which the configuration was evaluated. + Not Yet Implemented. + metadata: Optional[pd.Series] + The metadata in which the configuration was evaluated + Not Yet Implemented. + """ + + def __init__( + self, + *, + config: pd.DataFrame, + performance: pd.DataFrame, + context: Optional[pd.DataFrame] = None, + metadata: Optional[pd.DataFrame] = None, + ): + self.config = config + self.performance = performance + self.context = context + self.metadata = metadata + + def __repr__(self) -> str: + return f"Observation(config={self.config}, performance={self.performance}, context={self.context}, metadata={self.metadata})" + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Observation): + return False + + if not self.config.equals(other.config): + return False + if not self.performance.equals(other.performance): + return False + if not compare_optional_dataframe(self.context, other.context): + return False + if not compare_optional_dataframe(self.metadata, other.metadata): + return False + + return True + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + +class Suggestion: + """ + A single suggestion for a configuration. + + Attributes + ---------- + config : pd.DataFrame + The suggested configuration. + """ + + def __init__( + self, + *, + config: pd.DataFrame, + context: Optional[pd.DataFrame] = None, + metadata: Optional[pd.DataFrame] = None, + ): + self.config = config + self.context = context + self.metadata = metadata + + def evaluate(self, performance: pd.DataFrame) -> Observation: + """ + Completes the suggestion. + + Parameters + ---------- + performance : pd.Series + The performance metrics observed. + + Returns + ------- + Observation + The observation of the suggestion. + """ + + assert len(performance) == len( + self.config + ), "Performance must have the same length as the config" + + return Observation( + config=self.config, + performance=performance, + context=self.context, + metadata=self.metadata, + ) + + def __repr__(self) -> str: + return ( + f"Suggestion(config={self.config}, context={self.context}, metadata={self.metadata})" + ) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Suggestion): + return False + + if not self.config.equals(other.config): + return False + if not compare_optional_dataframe(self.context, other.context): + return False + if not compare_optional_dataframe(self.metadata, other.metadata): + return False + + return True + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + + +class Observations: + """ + A collection of observations. + + Attributes + ---------- + observations : List[Observation] + The list of observations. + """ + + def __init__(self, observations: List[Observation] = []): + self.observations = observations + + def append(self, observation: Observation) -> None: + """ + Appends an observation to the collection. + + Parameters + ---------- + Observation : observation + The observation to append. + """ + + self.observations.append(observation) + + def __iter__(self) -> Iterator[Observation]: + return iter(self.observations) + + def to_legacy( + self, + ) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]]: + """ + Hack to allow for iteration over the observations. + + Returns + ------- + Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame] + Legacy access pattern + """ + + configs: pd.DataFrame = pd.concat([o.config for o in self.observations]).reset_index( + drop=True + ) + scores: pd.DataFrame = pd.concat([o.performance for o in self.observations]).reset_index( + drop=True + ) + contexts: pd.DataFrame = pd.concat( + [pd.DataFrame() if o.context is None else o.context for o in self.observations] + ).reset_index(drop=True) + metadata: pd.DataFrame = pd.concat( + [pd.DataFrame() if o.metadata is None else o.metadata for o in self.observations] + ).reset_index(drop=True) + + return ( + configs, + scores, + contexts if len(contexts.columns) > 0 else None, + metadata if len(metadata.columns) > 0 else None, + ) + + def __repr__(self) -> str: + return f"Observations(observations={self.observations})" + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Observations): + return False + if len(self.observations) != len(other.observations): + return False + for self_observation, other_observation in zip(self.observations, other.observations): + if self_observation != other_observation: + return False + return True + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index a0026a9155..2c608a6613 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -13,6 +13,7 @@ import numpy.typing as npt import pandas as pd +from mlos_core.optimizers.observations import Observation, Observations, Suggestion from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter from mlos_core.util import config_to_dataframe @@ -58,9 +59,9 @@ def __init__( raise ValueError("Number of weights must match the number of optimization targets") self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter - self._observations: List[Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]] = [] + self._observations: Observations = Observations() self._has_context: Optional[bool] = None - self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = [] + self._pending_observations: List[Suggestion] = [] def __repr__(self) -> str: return f"{self.__class__.__name__}(space_adapter={self.space_adapter})" @@ -70,78 +71,53 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]: """Get the space adapter instance (if any).""" return self._space_adapter - def register( - self, - *, - configs: pd.DataFrame, - scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, - ) -> None: + def register(self, *, observation: Observation) -> None: """ Wrapper method, which employs the space adapter (if any), before registering the configs and scores. Parameters ---------- - configs : pd.DataFrame - Dataframe of configs / parameters. The columns are parameter names and - the rows are the configs. - scores : pd.DataFrame - Scores from running the configs. The index is the same as the index of the configs. - - context : pd.DataFrame - Not Yet Implemented. - - metadata : Optional[pd.DataFrame] - Not Yet Implemented. + observation: Observation + A list of observations to register. """ # Do some input validation. - assert metadata is None or isinstance(metadata, pd.DataFrame) - assert set(scores.columns) == set( + assert observation.metadata is None or isinstance(observation.metadata, pd.DataFrame) + assert set(observation.performance.columns) == set( self._optimization_targets ), "Mismatched optimization targets." assert self._has_context is None or self._has_context ^ ( - context is None + observation.context is None ), "Context must always be added or never be added." - assert len(configs) == len(scores), "Mismatched number of configs and scores." - if context is not None: - assert len(configs) == len(context), "Mismatched number of configs and context." - assert configs.shape[1] == len( + assert len(observation.config) == len( + observation.performance + ), "Mismatched number of configs and scores." + if observation.context is not None: + assert len(observation.config) == len( + observation.context + ), "Mismatched number of configs and context." + assert observation.config.shape[1] == len( self.parameter_space.values() ), "Mismatched configuration shape." - self._observations.append((configs, scores, context)) - self._has_context = context is not None + self._observations.append(observation) + self._has_context = observation.context is not None if self._space_adapter: - configs = self._space_adapter.inverse_transform(configs) + configs = self._space_adapter.inverse_transform(observation.config) assert configs.shape[1] == len( self.optimizer_parameter_space.values() ), "Mismatched configuration shape after inverse transform." - return self._register(configs=configs, scores=scores, context=context) + return self._register(observation=observation) @abstractmethod - def _register( - self, - *, - configs: pd.DataFrame, - scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, - ) -> None: + def _register(self, *, observation: Observation) -> None: """ Registers the given configs and scores. Parameters ---------- - configs : pd.DataFrame - Dataframe of configs / parameters. The columns are parameter names and - the rows are the configs. - scores : pd.DataFrame - Scores from running the configs. The index is the same as the index of the configs. - - context : pd.DataFrame - Not Yet Implemented. + observation: Observation + The observation to register """ pass # pylint: disable=unnecessary-pass # pragma: no cover @@ -150,7 +126,7 @@ def suggest( *, context: Optional[pd.DataFrame] = None, defaults: bool = False, - ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + ) -> Suggestion: """ Wrapper method, which employs the space adapter (if any), after suggesting a new configuration. @@ -173,27 +149,29 @@ def suggest( metadata = None if self.space_adapter is not None: configuration = self.space_adapter.inverse_transform(configuration) + suggestion = Suggestion(config=configuration, metadata=metadata, context=context) else: - configuration, metadata = self._suggest(context=context) - assert len(configuration) == 1, "Suggest must return a single configuration." - assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), ( + suggestion = self._suggest(context=context) + assert len(suggestion.config) == 1, "Suggest must return a single configuration." + assert set(suggestion.config.columns).issubset(set(self.optimizer_parameter_space)), ( "Optimizer suggested a configuration that does " "not match the expected parameter space." ) + if self._space_adapter: - configuration = self._space_adapter.transform(configuration) - assert set(configuration.columns).issubset(set(self.parameter_space)), ( + suggestion.config = self._space_adapter.transform(suggestion.config) + assert set(suggestion.config.columns).issubset(set(self.parameter_space)), ( "Space adapter produced a configuration that does " "not match the expected parameter space." ) - return configuration, metadata + return suggestion @abstractmethod def _suggest( self, *, context: Optional[pd.DataFrame] = None, - ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + ) -> Suggestion: """ Suggests a new configuration. @@ -216,9 +194,7 @@ def _suggest( def register_pending( self, *, - configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, + suggestion: Suggestion, ) -> None: """ Registers the given configs as "pending". That is it say, it has been suggested @@ -227,17 +203,12 @@ def register_pending( Parameters ---------- - configs : pd.DataFrame - Dataframe of configs / parameters. The columns are parameter names and - the rows are the configs. - context : pd.DataFrame - Not Yet Implemented. - metadata : Optional[pd.DataFrame] - Not Yet Implemented. + suggestion: Suggestion + The suggestion to register as pending. """ pass # pylint: disable=unnecessary-pass # pragma: no cover - def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: + def get_observations(self) -> Observations: """ Returns the observations as a triplet of DataFrames (config, score, context). @@ -246,23 +217,16 @@ def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.Data observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] A triplet of (config, score, context) DataFrames of observations. """ - if len(self._observations) == 0: + if sum(len(o.config.index) for o in self._observations) == 0: raise ValueError("No observations registered yet.") - configs = pd.concat([config for config, _, _ in self._observations]).reset_index(drop=True) - scores = pd.concat([score for _, score, _ in self._observations]).reset_index(drop=True) - contexts = pd.concat( - [ - pd.DataFrame() if context is None else context - for _, _, context in self._observations - ] - ).reset_index(drop=True) - return (configs, scores, contexts if len(contexts.columns) > 0 else None) + + return self._observations def get_best_observations( self, *, n_max: int = 1, - ) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: + ) -> Observations: """ Get the N best observations so far as a triplet of DataFrames (config, score, context). Default is N=1. The columns are ordered in ASCENDING order of the @@ -279,11 +243,23 @@ def get_best_observations( observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] A triplet of best (config, score, context) DataFrames of best observations. """ - if len(self._observations) == 0: + if sum(len(o.config.index) for o in self._observations) == 0: raise ValueError("No observations registered yet.") - (configs, scores, contexts) = self.get_observations() + + configs, scores, contexts, metadata = self._observations.to_legacy() idx = scores.nsmallest(n_max, columns=self._optimization_targets, keep="first").index - return (configs.loc[idx], scores.loc[idx], None if contexts is None else contexts.loc[idx]) + + return Observations( + observations=[ + Observation( + config=configs.loc[[i]], + performance=scores.loc[[i]], + context=None if contexts is None else contexts.loc[[i]], + metadata=None if metadata is None else metadata.loc[[i]], + ) + for i in idx + ] + ) def cleanup(self) -> None: """ diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index 661a48a373..6a575a58b2 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -9,6 +9,7 @@ import pandas as pd +from mlos_core.optimizers.observations import Observation, Suggestion from mlos_core.optimizers.optimizer import BaseOptimizer @@ -23,14 +24,7 @@ class RandomOptimizer(BaseOptimizer): The parameter space to optimize. """ - def _register( - self, - *, - configs: pd.DataFrame, - scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, - ) -> None: + def _register(self, *, observation: Observation) -> None: """ Registers the given configs and scores. @@ -38,30 +32,26 @@ def _register( Parameters ---------- - configs : pd.DataFrame - Dataframe of configs / parameters. The columns are parameter names and - the rows are the configs. - - scores : pd.DataFrame - Scores from running the configs. The index is the same as the index of the configs. - - context : None - Not Yet Implemented. - - metadata : None - Not Yet Implemented. + observation: Observation + The observation to register. """ - if context is not None: - warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - if metadata is not None: - warn(f"Not Implemented: Ignoring context {list(metadata.columns)}", UserWarning) + if observation.context is not None: + warn( + f"Not Implemented: Ignoring context {list(observation.context.columns)}", + UserWarning, + ) + if observation.metadata is not None: + warn( + f"Not Implemented: Ignoring context {list(observation.metadata.columns)}", + UserWarning, + ) # should we pop them from self.pending_observations? def _suggest( self, *, context: Optional[pd.DataFrame] = None, - ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: + ) -> Suggestion: """ Suggests a new configuration. @@ -74,26 +64,20 @@ def _suggest( Returns ------- - configuration : pd.DataFrame - Pandas dataframe with a single row. Column names are the parameter names. - - metadata : None - Not implemented. + suggestion: Suggestion + The suggestion to evaluate. """ if context is not None: # not sure how that works here? warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - return ( - pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]), - None, + return Suggestion( + config=pd.DataFrame( + dict(self.optimizer_parameter_space.sample_configuration()), + index=[0], + ), + context=context, ) - def register_pending( - self, - *, - configs: pd.DataFrame, - context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None, - ) -> None: + def register_pending(self, *, suggestion: Suggestion) -> None: raise NotImplementedError() # self._pending_observations.append((configs, context)) diff --git a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py index 65f0d9ab92..900ccc1281 100644 --- a/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py +++ b/mlos_core/mlos_core/tests/optimizers/bayesian_optimizers_test.py @@ -10,6 +10,7 @@ import pandas as pd import pytest +from mlos_core.optimizers.bayesian_optimizers.smac_optimizer import SmacOptimizer from mlos_core.optimizers import BaseOptimizer, OptimizerType from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer @@ -36,16 +37,19 @@ def test_context_not_implemented_warning( optimization_targets=["score"], **kwargs, ) - suggestion, _metadata = optimizer.suggest() - scores = pd.DataFrame({"score": [1]}) - context = pd.DataFrame([["something"]]) + suggestion = optimizer.suggest() + observation = suggestion.evaluate(performance=pd.DataFrame({"score": [1]})) + observation.context = pd.DataFrame([["something"]]) with pytest.raises(UserWarning): - optimizer.register(configs=suggestion, scores=scores, context=context) + optimizer.register(observation=observation) with pytest.raises(UserWarning): - optimizer.suggest(context=context) + optimizer.suggest(context=pd.DataFrame([["something"]])) - if isinstance(optimizer, BaseBayesianOptimizer): + if isinstance(optimizer, SmacOptimizer): + with pytest.raises(RuntimeError): + optimizer.surrogate_predict(configs=suggestion.config, context=suggestion.context) + elif isinstance(optimizer, BaseBayesianOptimizer): with pytest.raises(UserWarning): - optimizer.surrogate_predict(configs=suggestion, context=context) + optimizer.surrogate_predict(configs=suggestion.config, context=suggestion.context) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py index bd04203b05..170bf86a75 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py @@ -12,6 +12,7 @@ import pandas as pd import pytest +from mlos_core.optimizers.observations import Suggestion from mlos_core.optimizers import BaseOptimizer, OptimizerType from mlos_core.tests import SEED @@ -93,35 +94,41 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion, metadata = optimizer.suggest() - assert isinstance(suggestion, pd.DataFrame) - assert metadata is None or isinstance(metadata, pd.DataFrame) - assert set(suggestion.columns) == {"x", "y"} + suggestion = optimizer.suggest() + assert isinstance(suggestion, Suggestion) + assert isinstance(suggestion.config, pd.DataFrame) + assert suggestion.metadata is None or isinstance(suggestion.metadata, pd.DataFrame) + assert set(suggestion.config.columns) == {"x", "y"} # Check suggestion values are the expected dtype - assert isinstance(suggestion.x.iloc[0], np.integer) - assert isinstance(suggestion.y.iloc[0], np.floating) + assert isinstance(suggestion.config.x.iloc[0], np.integer) + assert isinstance(suggestion.config.y.iloc[0], np.floating) # Check that suggestion is in the space test_configuration = CS.Configuration( - optimizer.parameter_space, suggestion.astype("O").iloc[0].to_dict() + optimizer.parameter_space, suggestion.config.astype("O").iloc[0].to_dict() ) # Raises an error if outside of configuration space test_configuration.is_valid_configuration() # Test registering the suggested configuration with a score. - observation = objective(suggestion) + observation = objective(suggestion.config) assert isinstance(observation, pd.DataFrame) assert set(observation.columns) == {"main_score", "other_score"} - optimizer.register(configs=suggestion, scores=observation) + optimizer.register(observation=suggestion.evaluate(observation)) - (best_config, best_score, best_context) = optimizer.get_best_observations() + (best_config, best_score, best_context, _best_metadata) = ( + optimizer.get_best_observations().to_legacy() + ) assert isinstance(best_config, pd.DataFrame) assert isinstance(best_score, pd.DataFrame) assert best_context is None - assert set(best_config.columns) == {"x", "y"} + t = set(best_config.columns) + assert t == {"x", "y"} assert set(best_score.columns) == {"main_score", "other_score"} assert best_config.shape == (1, 2) assert best_score.shape == (1, 2) - (all_configs, all_scores, all_contexts) = optimizer.get_observations() + (all_configs, all_scores, all_contexts, _all_metadata) = ( + optimizer.get_observations().to_legacy() + ) assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) assert all_contexts is None diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index fce083150f..a01185aaa9 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -13,6 +13,7 @@ import pandas as pd import pytest +from mlos_core.optimizers.observations import Observations, Suggestion from mlos_core.optimizers import ( BaseOptimizer, ConcreteOptimizer, @@ -53,15 +54,15 @@ def test_create_optimizer_and_suggest( assert optimizer.parameter_space is not None - suggestion, metadata = optimizer.suggest() - assert suggestion is not None + suggestion = optimizer.suggest() + assert suggestion.config is not None myrepr = repr(optimizer) assert myrepr.startswith(optimizer_class.__name__) # pending not implemented with pytest.raises(NotImplementedError): - optimizer.register_pending(configs=suggestion, metadata=metadata) + optimizer.register_pending(suggestion=suggestion) @pytest.mark.parametrize( @@ -105,19 +106,24 @@ def objective(x: pd.Series) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion, metadata = optimizer.suggest() - assert isinstance(suggestion, pd.DataFrame) - assert metadata is None or isinstance(metadata, pd.DataFrame) - assert set(suggestion.columns) == {"x", "y", "z"} + suggestion = optimizer.suggest() + assert isinstance(suggestion, Suggestion) + assert isinstance(suggestion.config, pd.DataFrame) + assert suggestion.metadata is None or isinstance(suggestion.metadata, pd.DataFrame) + assert set(suggestion.config.columns) == {"x", "y", "z"} # check that suggestion is in the space - configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) + configuration = CS.Configuration( + optimizer.parameter_space, suggestion.config.iloc[0].to_dict() + ) # Raises an error if outside of configuration space configuration.is_valid_configuration() - observation = objective(suggestion["x"]) + observation = objective(suggestion.config["x"]) assert isinstance(observation, pd.DataFrame) - optimizer.register(configs=suggestion, scores=observation, metadata=metadata) + optimizer.register(observation=suggestion.evaluate(performance=observation)) - (best_config, best_score, best_context) = optimizer.get_best_observations() + (best_config, best_score, best_context, _metadata) = ( + optimizer.get_best_observations().to_legacy() + ) assert isinstance(best_config, pd.DataFrame) assert isinstance(best_score, pd.DataFrame) assert best_context is None @@ -127,7 +133,7 @@ def objective(x: pd.Series) -> pd.DataFrame: assert best_score.shape == (1, 1) assert best_score.score.iloc[0] < -5 - (all_configs, all_scores, all_contexts) = optimizer.get_observations() + (all_configs, all_scores, all_contexts, _metadata) = optimizer.get_observations().to_legacy() assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) assert all_contexts is None @@ -291,31 +297,37 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: _LOG.debug("Optimizer is done with random init.") # loop for optimizer - suggestion, metadata = optimizer.suggest() - observation = objective(suggestion) - optimizer.register(configs=suggestion, scores=observation, metadata=metadata) + suggestion = optimizer.suggest() + observation = objective(suggestion.config) + optimizer.register(observation=suggestion.evaluate(observation)) # loop for llamatune-optimizer - suggestion, metadata = llamatune_optimizer.suggest() - _x, _y = suggestion["x"].iloc[0], suggestion["y"].iloc[0] + suggestion = llamatune_optimizer.suggest() + _x, _y = suggestion.config["x"].iloc[0], suggestion.config["y"].iloc[0] # optimizer explores 1-dimensional space assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3.0, rel=1e-3) - observation = objective(suggestion) - llamatune_optimizer.register(configs=suggestion, scores=observation, metadata=metadata) + observation = objective(suggestion.config) + llamatune_optimizer.register(observation=suggestion.evaluate(observation)) # Retrieve best observations best_observation = optimizer.get_best_observations() llamatune_best_observation = llamatune_optimizer.get_best_observations() - for best_config, best_score, best_context in (best_observation, llamatune_best_observation): + for best_config, best_score, best_context, best_metadata in ( + best_observation.to_legacy(), + llamatune_best_observation.to_legacy(), + ): assert isinstance(best_config, pd.DataFrame) assert isinstance(best_score, pd.DataFrame) assert best_context is None + assert best_metadata is None assert set(best_config.columns) == {"x", "y"} assert set(best_score.columns) == {"score"} - (best_config, best_score, _context) = best_observation - (llamatune_best_config, llamatune_best_score, _context) = llamatune_best_observation + (best_config, best_score, _context, _metadata) = best_observation.to_legacy() + (llamatune_best_config, llamatune_best_score, _context, _metadata) = ( + llamatune_best_observation.to_legacy() + ) # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's # one, or close to that @@ -325,17 +337,16 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: ) # Retrieve and check all observations - for all_configs, all_scores, all_contexts in ( - optimizer.get_observations(), - llamatune_optimizer.get_observations(), - ): + for obs in [optimizer.get_observations(), llamatune_optimizer.get_observations()]: + assert isinstance(obs, Observations) + all_configs, all_scores, all_contexts, _metadata = obs.to_legacy() assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) assert all_contexts is None assert set(all_configs.columns) == {"x", "y"} assert set(all_scores.columns) == {"score"} - assert len(all_configs) == num_iters - assert len(all_scores) == num_iters + assert len(all_configs.index) == num_iters + assert len(all_scores.index) == num_iters # .surrogate_predict method not currently implemented if space adapter is employed if isinstance(llamatune_optimizer, BaseBayesianOptimizer): @@ -411,29 +422,32 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion, metadata = optimizer.suggest() - assert isinstance(suggestion, pd.DataFrame) - assert (suggestion.columns == ["x", "y"]).all() + suggestion = optimizer.suggest() + assert isinstance(suggestion, Suggestion) + assert isinstance(suggestion.config, pd.DataFrame) + assert (suggestion.config.columns == ["x", "y"]).all() # Check suggestion values are the expected dtype - assert isinstance(suggestion["x"].iloc[0], np.integer) - assert isinstance(suggestion["y"].iloc[0], np.floating) + assert isinstance(suggestion.config["x"].iloc[0], np.integer) + assert isinstance(suggestion.config["y"].iloc[0], np.floating) # Check that suggestion is in the space test_configuration = CS.Configuration( - optimizer.parameter_space, suggestion.astype("O").iloc[0].to_dict() + optimizer.parameter_space, suggestion.config.astype("O").iloc[0].to_dict() ) # Raises an error if outside of configuration space test_configuration.is_valid_configuration() # Test registering the suggested configuration with a score. - observation = objective(suggestion) + observation = objective(suggestion.config) assert isinstance(observation, pd.DataFrame) - optimizer.register(configs=suggestion, scores=observation, metadata=metadata) + optimizer.register(observation=suggestion.evaluate(observation)) - (best_config, best_score, best_context) = optimizer.get_best_observations() + (best_config, best_score, best_context, _metadata) = ( + optimizer.get_best_observations().to_legacy() + ) assert isinstance(best_config, pd.DataFrame) assert isinstance(best_score, pd.DataFrame) assert best_context is None - (all_configs, all_scores, all_contexts) = optimizer.get_observations() + (all_configs, all_scores, all_contexts, _metadata) = optimizer.get_observations().to_legacy() assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) assert all_contexts is None diff --git a/test.csv b/test.csv new file mode 100644 index 0000000000..8a27835dba --- /dev/null +++ b/test.csv @@ -0,0 +1,101 @@ +,x,y +0,1.8663263131194956,1.3131832170213436 +1,2.3560757511413075,2.3560757511413075 +2,2.3399274243564108,0.8177778158479249 +3,0.8293927654292901,0.8293927654292901 +4,2.405616532605058,2.8744180610511156 +5,2.6277979042262842,2.6277979042262842 +6,1.0734518098736001,1.502985376570376 +7,2.050388805516409,2.050388805516409 +8,2.1381060809487007,1.1107522643711847 +9,1.6835885581968748,1.6835885581968748 +10,1.509249495923429,0.041305348772046724 +11,2.318479864837122,2.318479864837122 +12,2.6479235719083496,1.0946579517041168 +13,1.846188535300481,1.846188535300481 +14,0.22614372492892965,1.1064720180059235 +15,2.7994203059475646,2.7994203059475646 +16,1.954134429679732,1.1916077331784627 +17,2.3661904288222364,2.3661904288222364 +18,0.9505083665066137,1.7042959578782075 +19,2.6073821686836776,2.6073821686836776 +20,1.308520271687038,2.4064429262404774 +21,0.4313004735436937,0.4313004735436937 +22,2.112782913355006,2.1137439245687175 +23,0.6563763170222657,0.6563763170222657 +24,2.7746028858466953,1.3264222662125298 +25,2.7279478769174177,2.7279478769174177 +26,0.1794276683395557,0.5528612514414409 +27,0.1420658364045454,0.1420658364045454 +28,2.0246428307469904,1.7838743398033465 +29,1.5999304889962516,1.5999304889962516 +30,0.12997218808441047,1.6842992401901937 +31,0.989005336862745,0.989005336862745 +32,1.5089004993378552,0.33568295272321147 +33,1.8215811186554536,1.8215811186554536 +34,1.697833929151594,0.02029218597000837 +35,1.8523251264128913,1.8523251264128913 +36,2.7363686592994627,2.3715723991711 +37,2.9762443985650844,2.9762443985650844 +38,2.8764052864585996,2.3758924058749193 +39,0.8557528800735293,0.8557528800735293 +40,1.874750115917733,1.4342813870120237 +41,0.5870255359976947,0.5870255359976947 +42,1.1469523560945194,0.16162105543870975 +43,1.3549452247825773,1.3549452247825773 +44,2.9460142245658636,0.37182810146088896 +45,0.3581426937787452,0.3581426937787452 +46,2.2155691684300405,1.7619109003919537 +47,1.4148976029611033,1.4148976029611033 +48,0.3213804515815989,0.6876556963818538 +49,2.699895584510026,2.699895584510026 +50,1.2502606134080796,1.6075549875948476 +51,0.018625549761388194,0.018625549761388194 +52,0.9019251173109034,1.3106795165268306 +53,1.8364469911972727,1.8364469911972727 +54,2.754594226141719,1.877210009887606 +55,2.1179926952453196,2.1179926952453196 +56,0.4495011479697817,2.2381902274101497 +57,2.4930209773006133,2.4930209773006133 +58,1.9011773068529374,1.3149296433672826 +59,0.4577183240235161,0.4577183240235161 +60,1.7052288457415705,1.5846728327551816 +61,2.8542862912607796,2.8542862912607796 +62,1.4410775355300482,1.507678690147651 +63,1.6106345787732292,1.6106345787732292 +64,2.457606201192475,0.17134691426657966 +65,2.0082652292236465,2.0082652292236465 +66,2.3013498851384173,2.1243460859328116 +67,2.39060155117559,2.39060155117559 +68,1.6732824852823485,2.897509595976383 +69,0.44147069967899155,0.44147069967899155 +70,0.08894100160624674,1.7816804778743154 +71,0.34219709622798744,0.34219709622798744 +72,2.8524295502523667,0.9771222432760417 +73,0.5808560704613316,0.5808560704613316 +74,1.3734349466922826,2.7612077132792634 +75,2.637207484544027,2.637207484544027 +76,0.7578472651395907,1.0440263786080384 +77,0.5477661947409262,0.5477661947409262 +78,2.7053881541129763,2.1195844895153924 +79,2.179975384686422,2.179975384686422 +80,2.700263510429123,2.3374914023079727 +81,1.7974643418128773,1.7974643418128773 +82,0.8733757346936245,0.45418579322229635 +83,1.0055239774483047,1.0055239774483047 +84,1.9726553314734583,0.2200276308978545 +85,0.16501918621867095,0.16501918621867095 +86,0.9695844417635293,1.7714454133889586 +87,2.5616957013769253,2.5616957013769253 +88,0.861187275000027,0.5192016804443764 +89,0.40206361799652846,0.40206361799652846 +90,2.9839614859328836,0.5384936084081755 +91,0.9526404690815875,0.9526404690815875 +92,1.7048742139773216,0.028045723500733533 +93,2.7019458634652755,2.7019458634652755 +94,2.931724292767761,1.6706840374105045 +95,0.2543215301825704,0.2543215301825704 +96,0.9990073971873003,2.185286029109016 +97,0.42730612002545165,0.42730612002545165 +98,1.6574068184926365,0.8191297790510568 +99,2.923485414261779,2.923485414261779