diff --git a/.vscode/settings.json b/.vscode/settings.json index 2c8098f9d9a..776ba120ac2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -17,7 +17,6 @@ // See Also: // - https://github.com/microsoft/vscode/issues/2809#issuecomment-1544387883 // - mlos_bench/config/schemas/README.md - { "fileMatch": [ "mlos_bench/mlos_bench/tests/config/schemas/environments/test-cases/**/*.jsonc", @@ -136,8 +135,7 @@ // See Also .vscode/launch.json for environment variable args to pytest during debug sessions. // For the rest, see setup.cfg "python.testing.pytestArgs": [ - "--log-level=DEBUG", "." ], "python.testing.unittestEnabled": false -} +} \ No newline at end of file diff --git a/mlos_bench/mlos_bench/config/environments/apps/redis/redis.jsonc b/mlos_bench/mlos_bench/config/environments/apps/redis/redis.jsonc index dc44871abd4..c81d2c18e16 100644 --- a/mlos_bench/mlos_bench/config/environments/apps/redis/redis.jsonc +++ b/mlos_bench/mlos_bench/config/environments/apps/redis/redis.jsonc @@ -45,11 +45,6 @@ "trial_id", "mountPoint" ], - "shell_env_params": [ - "mountPoint", - "experiment_id", - "trial_id" - ], "setup": [ "$mountPoint/$experiment_id/$trial_id/scripts/setup-workload.sh", "$mountPoint/$experiment_id/$trial_id/scripts/setup-app.sh" diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index a2c5b5d5e72..15d05d22363 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -19,7 +19,6 @@ import numpy as np import numpy.typing as npt import pandas as pd -from mlos_core.mlos_core.optimizers.utils import filter_kwargs, to_metadata from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter from smac import HyperparameterOptimizationFacade as Optimizer_Smac @@ -45,7 +44,6 @@ def __init__( *, # pylint: disable=too-many-locals parameter_space: ConfigSpace.ConfigurationSpace, optimization_targets: List[str], - objective_weights: Optional[List[float]] = None, space_adapter: Optional[BaseSpaceAdapter] = None, seed: Optional[int] = 0, run_name: Optional[str] = None, @@ -71,9 +69,6 @@ def __init__( optimization_targets : List[str] The names of the optimization targets to minimize. - objective_weights : Optional[List[float]] - Optional list of weights of optimization targets. - space_adapter : BaseSpaceAdapter The space adapter class to employ for parameter space transformations. @@ -126,12 +121,11 @@ def __init__( **kwargs: Additional arguments to be passed to the - facade, scenario, and intensifier + scenerio, and intensifier """ super().__init__( parameter_space=parameter_space, optimization_targets=optimization_targets, - objective_weights=objective_weights, space_adapter=space_adapter, ) @@ -140,7 +134,7 @@ def __init__( # Store for TrialInfo instances returned by .ask() self.trial_info_df: pd.DataFrame = pd.DataFrame( - columns=["Configuration", "Metadata", "TrialInfo", "TrialValue"] + columns=["Configuration", "Context", "TrialInfo", "TrialValue"] ) # The default when not specified is to use a known seed (0) to keep results reproducible. # However, if a `None` seed is explicitly provided, we let a random seed be produced by SMAC. @@ -172,7 +166,7 @@ def __init__( n_trials=max_trials, seed=seed or -1, # if -1, SMAC will generate a random seed internally n_workers=1, # Use a single thread for evaluating trials - **filter_kwargs(Scenario, **kwargs), + **SmacOptimizer._filter_kwargs(Scenario, **kwargs), ) config_selector: ConfigSelector = facade.get_config_selector( @@ -183,7 +177,7 @@ def __init__( intensifier_instance = facade.get_intensifier(scenario) else: intensifier_instance = intensifier( - scenario, **filter_kwargs(intensifier, **kwargs) + scenario, **SmacOptimizer._filter_kwargs(intensifier, **kwargs) ) # TODO: When bulk registering prior configs to rewarm the optimizer, @@ -235,11 +229,9 @@ def __init__( intensifier=intensifier_instance, random_design=random_design, config_selector=config_selector, - multi_objective_algorithm=Optimizer_Smac.get_multi_objective_algorithm( - scenario, objective_weights=self._objective_weights), overwrite=True, logging_level=False, # Use the existing logger - **filter_kwargs(facade, **kwargs), + **SmacOptimizer._filter_kwargs(facade, **kwargs), ) self.lock = threading.Lock() @@ -265,7 +257,33 @@ def n_random_init(self) -> int: return self.base_optimizer._initial_design._n_configs @staticmethod - + def _filter_kwargs(function: Callable, **kwargs: Any) -> Dict[str, Any]: + """ + Filters arguments provided in the kwargs dictionary to be restricted to the arguments legal for + the called function. + + Parameters + ---------- + function : Callable + function over which we filter kwargs for. + kwargs: + kwargs that we are filtering for the target function + + Returns + ------- + dict + kwargs with the non-legal argument filtered out + """ + sig = inspect.signature(function) + filter_keys = [ + param.name + for param in sig.parameters.values() + if param.kind == param.POSITIONAL_OR_KEYWORD + ] + filtered_dict = { + filter_key: kwargs[filter_key] for filter_key in filter_keys & kwargs.keys() + } + return filtered_dict @staticmethod def _dummy_target_func( @@ -297,8 +315,7 @@ def _dummy_target_func( raise RuntimeError('This function should never be called.') def _register(self, configurations: pd.DataFrame, - scores: pd.DataFrame, context: Optional[pd.DataFrame] = None, - metadata: Optional[pd.DataFrame] = None) -> None: + scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. Parameters @@ -309,24 +326,18 @@ def _register(self, configurations: pd.DataFrame, scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. - metadata : pd.DataFrame - Metadata of the request that is being registered. - context : pd.DataFrame - Not Yet Implemented. + Context of the request that is being registered. """ - if context is not None: - warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - with self.lock: # Register each trial (one-by-one) - metadatas: Union[List[pd.Series], List[None]] = to_metadata(metadata) or [ + contexts: Union[List[pd.Series], List[None]] = _to_context(context) or [ None for _ in scores # type: ignore[misc] ] for config, score, ctx in zip( self._to_configspace_configs(configurations), scores.values.tolist(), - metadatas, + contexts, ): value: TrialValue = TrialValue( cost=score, time=0.0, status=StatusType.SUCCESS @@ -339,7 +350,7 @@ def _register(self, configurations: pd.DataFrame, matching = ( self.trial_info_df["Configuration"] == config ) & pd.Series( - [df_ctx.equals(ctx) for df_ctx in self.trial_info_df["Metadata"]] + [df_ctx.equals(ctx) for df_ctx in self.trial_info_df["Context"]] ) # make a new entry @@ -392,8 +403,8 @@ def _suggest( configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. - metadata : pd.DataFrame - Pandas dataframe with a single row containing the metadata. + context : pd.DataFrame + Pandas dataframe with a single row containing the context. Column names are the budget, seed, and instance of the evaluation, if valid. """ with self.lock: @@ -408,17 +419,17 @@ def _suggest( self.optimizer_parameter_space.check_configuration(trial.config) assert trial.config.config_space == self.optimizer_parameter_space - config_df = _extract_config(trial) - metadata_df = _extract_metadata(trial) + config_df = self._extract_config(trial) + context_df = SmacOptimizer._extract_context(trial) self.trial_info_df.loc[len(self.trial_info_df.index)] = [ trial.config, - metadata_df.iloc[0], + context_df.iloc[0], trial, None, ] - return config_df, metadata_df + return config_df, context_df def register_pending(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() @@ -482,6 +493,31 @@ def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSp for (_, config) in configurations.astype('O').iterrows() ] + @staticmethod + def _extract_context(trial: TrialInfo) -> pd.DataFrame: + """Convert TrialInfo to a DataFrame. + + Parameters + ---------- + trial : TrialInfo + The trial to extract. + + Returns + ------- + context : pd.DataFrame + Pandas dataframe with a single row containing the context. + Column names are the budget and instance of the evaluation, if valid. + """ + return pd.DataFrame( + [[trial.instance, trial.seed, trial.budget]], + columns=["instance", "seed", "budget"], + ) + + def _extract_config(self, trial: TrialInfo) -> pd.DataFrame: + return pd.DataFrame( + [trial.config], columns=list(self.optimizer_parameter_space.keys()) + ) + def get_observations_full(self) -> pd.DataFrame: """Returns the observations as a dataframe with additional info. @@ -510,18 +546,18 @@ def get_best_observation(self) -> pd.DataFrame: max_budget = np.nan budgets = [ - metadata["budget"].max() - for _, _, metadata in self._observations - if metadata is not None + context["budget"].max() + for _, _, context in self._observations + if context is not None ] if len(budgets) > 0: max_budget = max(budgets) if max_budget is not np.nan: observations = [ - (config, score, metadata) - for config, score, metadata in self._observations - if metadata is not None and metadata["budget"].max() == max_budget + (config, score, context) + for config, score, context in self._observations + if context is not None and context["budget"].max() == max_budget ] configs = pd.concat([config for config, _, _ in observations]) @@ -530,39 +566,8 @@ def get_best_observation(self) -> pd.DataFrame: return configs.nsmallest(1, columns="score") -def _extract_metadata(trial: TrialInfo) -> pd.DataFrame: - """Convert TrialInfo to a metadata DataFrame. - Parameters - ---------- - trial : TrialInfo - The trial to extract. - - Returns - ------- - metadata : pd.DataFrame - Pandas dataframe with a single row containing the metadata. - Column names are the budget and instance of the evaluation, if valid. - """ - return pd.DataFrame( - [[trial.instance, trial.seed, trial.budget]], - columns=["instance", "seed", "budget"], - ) - -def _extract_config(self, trial: TrialInfo) -> pd.DataFrame: - """Convert TrialInfo to a config DataFrame. - - Parameters - ---------- - trial : TrialInfo - The trial to extract. - - Returns - ------- - config : pd.DataFrame - Pandas dataframe with a single row containing the config. - Column names are config parameters - """ - return pd.DataFrame( - [trial.config], columns=list(self.optimizer_parameter_space.keys()) - ) \ No newline at end of file +def _to_context(contexts: Optional[pd.DataFrame]) -> Optional[List[pd.Series]]: + if contexts is None: + return None + return [idx_series[1] for idx_series in contexts.iterrows()] diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index 723bc4332f7..098ad855dc3 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -30,13 +30,9 @@ class FlamlOptimizer(BaseOptimizer): Wrapper class for FLAML Optimizer: A fast library for AutoML and tuning. """ - # The name of an internal objective attribute that is calculated as a weighted average of the user provided objective metrics. - _METRIC_NAME = "FLAML_score" - def __init__(self, *, # pylint: disable=too-many-arguments parameter_space: ConfigSpace.ConfigurationSpace, optimization_targets: List[str], - objective_weights: Optional[List[float]] = None, space_adapter: Optional[BaseSpaceAdapter] = None, low_cost_partial_config: Optional[dict] = None, seed: Optional[int] = None): @@ -50,9 +46,7 @@ def __init__(self, *, # pylint: disable=too-many-arguments optimization_targets : List[str] The names of the optimization targets to minimize. - - objective_weights : Optional[List[float]] - Optional list of weights of optimization targets. + For FLAML it must be a list with a single element, e.g., `["score"]`. space_adapter : BaseSpaceAdapter The space adapter class to employ for parameter space transformations. @@ -67,10 +61,13 @@ def __init__(self, *, # pylint: disable=too-many-arguments super().__init__( parameter_space=parameter_space, optimization_targets=optimization_targets, - objective_weights=objective_weights, space_adapter=space_adapter, ) + if len(self._optimization_targets) != 1: + raise ValueError("FLAML does not support multi-target optimization") + self._flaml_optimization_target = self._optimization_targets[0] + # Per upstream documentation, it is recommended to set the seed for # flaml at the start of its operation globally. if seed is not None: @@ -86,7 +83,7 @@ def __init__(self, *, # pylint: disable=too-many-arguments self._suggested_config: Optional[dict] def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. Parameters @@ -96,24 +93,20 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. + context : None Not Yet Implemented. - metadata : None - Not Yet Implemented. """ if context is not None: warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) - if metadata is not None: - warn(f"Not Implemented: Ignoring metadata {list(metadata.columns)}", UserWarning) - for (_, config), (_, score) in zip(configurations.astype('O').iterrows(), scores.iterrows()): + for (_, config), score in zip(configurations.astype('O').iterrows(), + scores[self._flaml_optimization_target]): cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( self.optimizer_parameter_space, values=config.to_dict()) if cs_config in self.evaluated_samples: warn(f"Configuration {config} was already registered", UserWarning) - self.evaluated_samples[cs_config] = EvaluatedSample( - config=config.to_dict(), - score=float(np.average(score.astype(float), weights=self._objective_weights)), - ) + + self.evaluated_samples[cs_config] = EvaluatedSample(config=config.to_dict(), score=score) def _suggest( self, context: Optional[pd.DataFrame] = None @@ -156,11 +149,11 @@ def _target_function(self, config: dict) -> Union[dict, None]: Returns ------- result: Union[dict, None] - Dictionary with a single key, `FLAML_score`, if config already evaluated; `None` otherwise. + Dictionary with a single key, `score`, if config already evaluated; `None` otherwise. """ cs_config = normalize_config(self.optimizer_parameter_space, config) if cs_config in self.evaluated_samples: - return {self._METRIC_NAME: self.evaluated_samples[cs_config].score} + return {self._flaml_optimization_target: self.evaluated_samples[cs_config].score} self._suggested_config = dict(cs_config) # Cleaned-up version of the config return None # Returning None stops the process @@ -203,7 +196,7 @@ def _get_next_config(self) -> dict: self._target_function, config=self.flaml_parameter_space, mode='min', - metric=self._METRIC_NAME, + metric=self._flaml_optimization_target, points_to_evaluate=points_to_evaluate, evaluated_rewards=evaluated_rewards, num_samples=len(points_to_evaluate) + 1, diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index c0ff6e4aa46..be3ac76dd0b 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -27,7 +27,6 @@ class BaseOptimizer(metaclass=ABCMeta): def __init__(self, *, parameter_space: ConfigSpace.ConfigurationSpace, optimization_targets: List[str], - objective_weights: Optional[List[float]] = None, space_adapter: Optional[BaseSpaceAdapter] = None): """ Create a new instance of the base optimizer. @@ -38,8 +37,6 @@ def __init__(self, *, The parameter space to optimize. optimization_targets : List[str] The names of the optimization targets to minimize. - objective_weights : Optional[List[float]] - Optional list of weights of optimization targets. space_adapter : BaseSpaceAdapter The space adapter class to employ for parameter space transformations. """ @@ -51,16 +48,12 @@ def __init__(self, *, raise ValueError("Given parameter space differs from the one given to space adapter") self._optimization_targets = optimization_targets - self._objective_weights = objective_weights - if objective_weights is not None and len(objective_weights) != len(optimization_targets): - raise ValueError("Number of weights must match the number of optimization targets") - self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter - self._observations: List[Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]]] = [] + self._observations: List[Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]] = [] self._has_context: Optional[bool] = None - self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]]] = [] + self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = [] self.delayed_config: Optional[pd.DataFrame] = None - self.delayed_metadata: Optional[pd.DataFrame] = None + self.delayed_context: Optional[pd.DataFrame] = None def __repr__(self) -> str: return f"{self.__class__.__name__}(space_adapter={self.space_adapter})" @@ -71,7 +64,7 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]: return self._space_adapter def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None) -> None: """Wrapper method, which employs the space adapter (if any), before registering the configurations and scores. Parameters @@ -80,10 +73,9 @@ def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. + context : pd.DataFrame - Not implemented yet. - metadata : pd.DataFrame - Implementation depends on instance (e.g., saved optimizer state to return). + Not Yet Implemented. """ # Do some input validation. assert set(scores.columns) >= set(self._optimization_targets), "Mismatched optimization targets." @@ -94,23 +86,20 @@ def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, if context is not None: assert len(configurations) == len(context), \ "Mismatched number of configurations and context." - if metadata is not None: - assert len(configurations) == len(metadata), \ - "Mismatched number of configurations and metadata." assert configurations.shape[1] == len(self.parameter_space.values()), \ "Mismatched configuration shape." - self._observations.append((configurations, scores, context, metadata)) + self._observations.append((configurations, scores, context)) self._has_context = context is not None if self._space_adapter: configurations = self._space_adapter.inverse_transform(configurations) assert configurations.shape[1] == len(self.optimizer_parameter_space.values()), \ "Mismatched configuration shape after inverse transform." - return self._register(configurations, scores, context, metadata) + return self._register(configurations, scores, context) @abstractmethod def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. Parameters @@ -119,10 +108,9 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. + context : pd.DataFrame - Not implemented yet. - metadata : pd.DataFrame - Implementaton depends on instance. + Not Yet Implemented. """ pass # pylint: disable=unnecessary-pass # pragma: no cover @@ -144,25 +132,26 @@ def suggest( ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. - metadata : pd.DataFrame - Pandas dataframe with a single row containing the metadata. + + context : pd.DataFrame + Pandas dataframe with a single row containing the context. Column names are the budget, seed, and instance of the evaluation, if valid. """ if defaults: - self.delayed_config, self.delayed_metadata = self._suggest(context) + self.delayed_config, self.delayed_context = self._suggest(context) configuration: pd.DataFrame = config_to_dataframe( self.parameter_space.get_default_configuration() ) - metadata = self.delayed_metadata + context = self.delayed_context if self.space_adapter is not None: configuration = self.space_adapter.inverse_transform(configuration) else: if self.delayed_config is None: - configuration, metadata = self._suggest(metadata) + configuration, context = self._suggest(context) else: - configuration, metadata = self.delayed_config, self.delayed_metadata - self.delayed_config, self.delayed_metadata = None, None + configuration, context = self.delayed_config, self.delayed_context + self.delayed_config, self.delayed_context = None, None assert len(configuration) == 1, \ "Suggest must return a single configuration." assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \ @@ -171,7 +160,7 @@ def suggest( configuration = self._space_adapter.transform(configuration) assert set(configuration.columns).issubset(set(self.parameter_space)), \ "Space adapter produced a configuration that does not match the expected parameter space." - return configuration, metadata + return configuration, context @abstractmethod def _suggest( @@ -188,16 +177,12 @@ def _suggest( ------- configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. - - metadata : pd.DataFrame - Pandas dataframe with a single row containing the metadata. - Column names are the budget, seed, and instance of the evaluation, if valid. """ pass # pylint: disable=unnecessary-pass # pragma: no cover @abstractmethod def register_pending(self, configurations: pd.DataFrame, - context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations as "pending". That is it say, it has been suggested by the optimizer, and an experiment trial has been started. This can be useful for executing multiple trials in parallel, retry logic, etc. @@ -207,34 +192,30 @@ def register_pending(self, configurations: pd.DataFrame, configurations : pd.DataFrame Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. context : pd.DataFrame - Not implemented yet. - metadata : pd.DataFrame - Implementaton depends on instance. + Not Yet Implemented. """ pass # pylint: disable=unnecessary-pass # pragma: no cover - def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]]: + def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: """ - Returns the observations as a triplet of DataFrames (config, score, context, metadata). + Returns the observations as a triplet of DataFrames (config, score, context). Returns ------- observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] - A triplet of (config, score, metadata) DataFrames of observations. + A triplet of (config, score, context) DataFrames of observations. """ if len(self._observations) == 0: raise ValueError("No observations registered yet.") - configs = pd.concat([config for config, _, _, _ in self._observations]).reset_index(drop=True) - scores = pd.concat([score for _, score, _, _ in self._observations]).reset_index(drop=True) + configs = pd.concat([config for config, _, _ in self._observations]).reset_index(drop=True) + scores = pd.concat([score for _, score, _ in self._observations]).reset_index(drop=True) contexts = pd.concat([pd.DataFrame() if context is None else context - for _, _, context, _ in self._observations]).reset_index(drop=True) - metadatas = pd.concat([pd.DataFrame() if metadata is None else metadata - for _, _, _, metadata in self._observations]).reset_index(drop=True) - return (configs, scores, contexts, metadatas if len(metadatas.columns) > 0 else None) + for _, _, context in self._observations]).reset_index(drop=True) + return (configs, scores, contexts if len(contexts.columns) > 0 else None) - def get_best_observations(self, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]]: + def get_best_observations(self, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: """ - Get the N best observations so far as a triplet of DataFrames (config, score, metadata). + Get the N best observations so far as a triplet of DataFrames (config, score, context). Default is N=1. The columns are ordered in ASCENDING order of the optimization targets. The function uses `pandas.DataFrame.nsmallest(..., keep="first")` method under the hood. @@ -245,15 +226,15 @@ def get_best_observations(self, n_max: int = 1) -> Tuple[pd.DataFrame, pd.DataFr Returns ------- - observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame], Optional[pd.DataFrame]] - A triplet of best (config, score, context, metadata) DataFrames of best observations. + observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] + A triplet of best (config, score, context) DataFrames of best observations. """ if len(self._observations) == 0: raise ValueError("No observations registered yet.") - (configs, scores, contexts, metadatas) = self.get_observations() + (configs, scores, contexts) = self.get_observations() idx = scores.nsmallest(n_max, columns=self._optimization_targets, keep="first").index return (configs.loc[idx], scores.loc[idx], - None if contexts is None else contexts.loc[idx], None if metadatas is None else metadatas.loc[idx]) + None if contexts is None else contexts.loc[idx]) def cleanup(self) -> None: """ diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index b1acec0d56c..8129f65b2ee 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -25,7 +25,7 @@ class RandomOptimizer(BaseOptimizer): """ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, - context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None) -> None: """Registers the given configurations and scores. Doesn't do anything on the RandomOptimizer except storing configurations for logging. @@ -34,14 +34,15 @@ def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame, ---------- configurations : pd.DataFrame Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. + scores : pd.DataFrame Scores from running the configurations. The index is the same as the index of the configurations. + context : None - Metadata is ignored for random_optimizer. - metadata : None - Metadata is ignored for random_optimizer. + Not Yet Implemented. """ - pass + if context is not None: + warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) # should we pop them from self.pending_observations? def _suggest( @@ -61,8 +62,8 @@ def _suggest( configuration : pd.DataFrame Pandas dataframe with a single row. Column names are the parameter names. - metadata : pd.DataFrame - Pandas dataframe with a single row containing the metadata. + context : pd.DataFrame + Pandas dataframe with a single row containing the context. Column names are the budget, seed, and instance of the evaluation, if valid. """ if context is not None: @@ -71,6 +72,6 @@ def _suggest( return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]), None def register_pending(self, configurations: pd.DataFrame, - metadata: Optional[pd.DataFrame] = None) -> None: + context: Optional[pd.DataFrame] = None) -> None: raise NotImplementedError() - # self._pending_observations.append((configurations, metadata)) + # self._pending_observations.append((configurations, context)) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py index 4f23ece41b8..2b1a11dae5d 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_multiobj_test.py @@ -7,49 +7,22 @@ """ import logging -from typing import List, Optional, Type - import pytest import pandas as pd import numpy as np import ConfigSpace as CS -from mlos_core.optimizers import OptimizerType, BaseOptimizer +from mlos_core.optimizers import OptimizerType, OptimizerFactory from mlos_core.tests import SEED + _LOG = logging.getLogger(__name__) +_LOG.setLevel(logging.DEBUG) -@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ - *[(member.value, {}) for member in OptimizerType], -]) -def test_multi_target_opt_wrong_weights(optimizer_class: Type[BaseOptimizer], kwargs: dict) -> None: - """ - Make sure that the optimizer raises an error if the number of objective weights - does not match the number of optimization targets. - """ - with pytest.raises(ValueError): - optimizer_class( - parameter_space=CS.ConfigurationSpace(seed=SEED), - optimization_targets=['main_score', 'other_score'], - objective_weights=[1], - **kwargs - ) - - -@pytest.mark.parametrize(('objective_weights'), [ - [2, 1], - [0.5, 0.5], - None, -]) -@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ - *[(member.value, {}) for member in OptimizerType], -]) -def test_multi_target_opt(objective_weights: Optional[List[float]], - optimizer_class: Type[BaseOptimizer], - kwargs: dict) -> None: +def test_multi_target_opt() -> None: """ Toy multi-target optimization problem to test the optimizers with mixed numeric types to ensure that original dtypes are retained. @@ -59,7 +32,7 @@ def test_multi_target_opt(objective_weights: Optional[List[float]], def objective(point: pd.DataFrame) -> pd.DataFrame: # mix of hyperparameters, optimal is to select the highest possible return pd.DataFrame({ - "main_score": point.x + point.y, + "score": point.x + point.y, "other_score": point.x ** 2 + point.y ** 2, }) @@ -70,11 +43,15 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: input_space.add_hyperparameter( CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) - optimizer = optimizer_class( + optimizer = OptimizerFactory.create( parameter_space=input_space, - optimization_targets=['main_score', 'other_score'], - objective_weights=objective_weights, - **kwargs, + optimization_targets=['score', 'other_score'], + optimizer_type=OptimizerType.SMAC, + optimizer_kwargs={ + # Test with default config. + 'use_default_config': True, + # 'n_random_init': 10, + }, ) with pytest.raises(ValueError, match="No observations"): @@ -84,7 +61,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion, metadata = optimizer.suggest() + suggestion, context = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) assert set(suggestion.columns) == {'x', 'y'} # Check suggestion values are the expected dtype @@ -98,31 +75,22 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: # Test registering the suggested configuration with a score. observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) - assert set(observation.columns) == {'main_score', 'other_score'} - optimizer.register(suggestion, observation, context=None, metadata=metadata) + assert set(observation.columns) == {'score', 'other_score'} + optimizer.register(suggestion, observation, context) - (best_config, best_score, best_metadata, best_context) = optimizer.get_best_observations() + (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) assert isinstance(best_score, pd.DataFrame) - if optimizer_class is OptimizerType.SMAC: - assert isinstance(best_metadata, pd.DataFrame) or best_metadata is None - else: - assert best_metadata is None - assert best_context is None assert set(best_config.columns) == {'x', 'y'} - assert set(best_score.columns) == {'main_score', 'other_score'} + assert set(best_score.columns) == {'score', 'other_score'} assert best_config.shape == (1, 2) assert best_score.shape == (1, 2) - (all_configs, all_scores, all_metadata, best_context) = optimizer.get_observations() + (all_configs, all_scores, all_contexts) = optimizer.get_observations() assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) - if optimizer_class is OptimizerType.SMAC: - assert isinstance(all_metadata, pd.DataFrame) or all_metadata is None - else: - assert all_metadata is None - assert best_context is None + assert isinstance(all_contexts, pd.DataFrame) or all_contexts is None assert set(all_configs.columns) == {'x', 'y'} - assert set(all_scores.columns) == {'main_score', 'other_score'} + assert set(all_scores.columns) == {'score', 'other_score'} assert all_configs.shape == (max_iterations, 2) assert all_scores.shape == (max_iterations, 2) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 68889701f3c..14dec95f5b8 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -48,7 +48,7 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace assert optimizer.parameter_space is not None - suggestion, _ = optimizer.suggest() + suggestion, context = optimizer.suggest() assert suggestion is not None myrepr = repr(optimizer) @@ -94,7 +94,7 @@ def objective(x: pd.Series) -> pd.DataFrame: optimizer.get_observations() for _ in range(max_iterations): - suggestion, metadata = optimizer.suggest() + suggestion, context = optimizer.suggest() assert isinstance(suggestion, pd.DataFrame) assert set(suggestion.columns) == {'x', 'y', 'z'} # check that suggestion is in the space @@ -103,9 +103,9 @@ def objective(x: pd.Series) -> pd.DataFrame: configuration.is_valid_configuration() observation = objective(suggestion['x']) assert isinstance(observation, pd.DataFrame) - optimizer.register(suggestion, observation, context=None, metadata=metadata) + optimizer.register(suggestion, observation, context) - (best_config, best_score, _, _) = optimizer.get_best_observations() + (best_config, best_score, best_context) = optimizer.get_best_observations() assert isinstance(best_config, pd.DataFrame) assert isinstance(best_score, pd.DataFrame) assert set(best_config.columns) == {'x', 'y', 'z'} @@ -114,7 +114,7 @@ def objective(x: pd.Series) -> pd.DataFrame: assert best_score.shape == (1, 1) assert best_score.score.iloc[0] < -5 - (all_configs, all_scores, _, _) = optimizer.get_observations() + (all_configs, all_scores, all_contexts) = optimizer.get_observations() assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) assert set(all_configs.columns) == {'x', 'y', 'z'} @@ -266,36 +266,36 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: _LOG.debug("Optimizer is done with random init.") # loop for optimizer - suggestion, metadata = optimizer.suggest() + suggestion, context = optimizer.suggest() observation = objective(suggestion) - optimizer.register(suggestion, observation, context=None, metadata=metadata) + optimizer.register(suggestion, observation, context) # loop for llamatune-optimizer - suggestion, metadata = llamatune_optimizer.suggest() + suggestion, context = llamatune_optimizer.suggest() _x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0] assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space observation = objective(suggestion) - llamatune_optimizer.register(suggestion, observation, context=None, metadata=metadata) + llamatune_optimizer.register(suggestion, observation, context) # Retrieve best observations best_observation = optimizer.get_best_observations() llamatune_best_observation = llamatune_optimizer.get_best_observations() - for (best_config, best_score, _, _) in (best_observation, llamatune_best_observation): + for (best_config, best_score, best_context) in (best_observation, llamatune_best_observation): assert isinstance(best_config, pd.DataFrame) assert isinstance(best_score, pd.DataFrame) assert set(best_config.columns) == {'x', 'y'} assert set(best_score.columns) == {'score'} - (best_config, best_score, _, _) = best_observation - (llamatune_best_config, llamatune_best_score, _metadata) = llamatune_best_observation + (best_config, best_score, _context) = best_observation + (llamatune_best_config, llamatune_best_score, _context) = llamatune_best_observation # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's one, or close to that assert best_score.score.iloc[0] > llamatune_best_score.score.iloc[0] or \ best_score.score.iloc[0] + 1e-3 > llamatune_best_score.score.iloc[0] # Retrieve and check all observations - for (all_configs, all_scores, _, _) in ( + for (all_configs, all_scores, all_contexts) in ( optimizer.get_observations(), llamatune_optimizer.get_observations()): assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) @@ -393,7 +393,4 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: (all_configs, all_scores, all_contexts) = optimizer.get_observations() assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) - if optimizer_type is OptimizerType.SMAC: - assert isinstance(all_contexts, pd.DataFrame) or all_contexts is None - else: - assert all_contexts is None + assert isinstance(all_contexts, pd.DataFrame) or all_contexts is None