Skip to content

Commit

Permalink
Update mlos_core API to support with multi-factor optimization (#730)
Browse files Browse the repository at this point in the history
* [x] Pass multi-column DataFrame instead of Sequence to
`BaseOptimizer.register()` and other methods that deal with scores
* [x] Update mlos_bench `MlosCoreOptimizer` to support the new mlos_core
API
* [x] Update unit tests to work with the new API
* [x] Add unit tests for end-to-end multi-target optimization 

Merge after ~#726~

---------

Co-authored-by: Brian Kroth <[email protected]>
  • Loading branch information
motus and bpkroth authored May 20, 2024
1 parent 6d8854b commit e2b819f
Show file tree
Hide file tree
Showing 11 changed files with 305 additions and 115 deletions.
2 changes: 2 additions & 0 deletions mlos_bench/mlos_bench/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ def _get_scores(self, status: Status,

if not status.is_succeeded():
assert scores is None
# TODO: Be more flexible with values used for failed trials (not just +inf).
# Issue: https://github.com/microsoft/MLOS/issues/523
return {opt_target: float("inf") for opt_target in self._opt_targets}

assert scores is not None
Expand Down
53 changes: 33 additions & 20 deletions mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,6 @@ def __init__(self,
service: Optional[Service] = None):
super().__init__(tunables, config, global_config, service)

# TODO: Remove after implementing multi-target optimization in mlos_core
if len(self._opt_targets) != 1:
raise NotImplementedError(f"Multi-target optimization is not supported: {self}")
(self._opt_target, self._opt_sign) = list(self._opt_targets.items())[0]

opt_type = getattr(OptimizerType, self._config.pop(
'optimizer_type', DEFAULT_OPTIMIZER_TYPE.name))

Expand Down Expand Up @@ -79,6 +74,7 @@ def __init__(self,

self._opt: BaseOptimizer = OptimizerFactory.create(
parameter_space=self.config_space,
optimization_targets=list(self._opt_targets),
optimizer_type=opt_type,
optimizer_kwargs=self._config,
space_adapter_type=space_adapter_type,
Expand All @@ -99,26 +95,43 @@ def bulk_register(self,
configs: Sequence[dict],
scores: Sequence[Optional[Dict[str, TunableValue]]],
status: Optional[Sequence[Status]] = None) -> bool:

if not super().bulk_register(configs, scores, status):
return False

df_configs = self._to_df(configs) # Impute missing values, if necessary
df_scores = pd.Series(
[self._extract_target(score) for score in scores],
dtype=float) * self._opt_sign

df_scores = self._adjust_signs_df(
pd.DataFrame([{} if score is None else score for score in scores]))

opt_targets = list(self._opt_targets)
if status is not None:
# Select only the completed trials, set scores for failed trials to +inf.
df_status = pd.Series(status)
df_scores[df_status != Status.SUCCEEDED] = float("inf")
# TODO: Be more flexible with values used for failed trials (not just +inf).
# Issue: https://github.com/microsoft/MLOS/issues/523
df_scores.loc[df_status != Status.SUCCEEDED, opt_targets] = float("inf")
df_status_completed = df_status.apply(Status.is_completed)
df_configs = df_configs[df_status_completed]
df_scores = df_scores[df_status_completed]
self._opt.register(df_configs, df_scores)

# TODO: Specify (in the config) which metrics to pass to the optimizer.
# Issue: https://github.com/microsoft/MLOS/issues/745
self._opt.register(df_configs, df_scores[opt_targets].astype(float))

if _LOG.isEnabledFor(logging.DEBUG):
(score, _) = self.get_best_observation()
_LOG.debug("Warm-up END: %s :: %s", self, score)

return True

def _extract_target(self, scores: Optional[Dict[str, TunableValue]]) -> Optional[TunableValue]:
return None if scores is None else scores[self._opt_target]
def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:
"""
In-place adjust the signs of the scores for MINIMIZATION problem.
"""
for (opt_target, opt_dir) in self._opt_targets.items():
df_scores[opt_target] *= opt_dir
return df_scores

def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -175,21 +188,21 @@ def suggest(self) -> TunableGroups:

def register(self, tunables: TunableGroups, status: Status,
score: Optional[Dict[str, TunableValue]] = None) -> Optional[Dict[str, float]]:
registered_score = super().register(tunables, status, score) # With _opt_sign applied
registered_score = super().register(tunables, status, score) # Sign-adjusted for MINIMIZATION
if status.is_completed():
assert registered_score is not None
df_config = self._to_df([tunables.get_param_values()])
_LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)
self._opt.register(df_config, pd.Series([registered_score[self._opt_target]], dtype=float))
# TODO: Specify (in the config) which metrics to pass to the optimizer.
# Issue: https://github.com/microsoft/MLOS/issues/745
self._opt.register(df_config, pd.DataFrame([registered_score], dtype=float))
return registered_score

def get_best_observation(self) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
df_config = self._opt.get_best_observation()
(df_config, df_score, _df_context) = self._opt.get_best_observations()
if len(df_config) == 0:
return (None, None)
params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())
_LOG.debug("Best observation: %s", params)
score = params.pop("score")
assert score is not None
score = float(score) * self._opt_sign # mlos_core always uses the `score` column
return ({self._opt_target: score}, self._tunables.copy().assign(params))
scores = self._adjust_signs_df(df_score).iloc[0].to_dict()
_LOG.debug("Best observation: %s score: %s", params, scores)
return (scores, self._tunables.copy().assign(params))
6 changes: 5 additions & 1 deletion mlos_core/mlos_core/optimizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""

from enum import Enum
from typing import Optional, TypeVar
from typing import List, Optional, TypeVar

import ConfigSpace

Expand Down Expand Up @@ -62,6 +62,7 @@ class OptimizerFactory:
@staticmethod
def create(*,
parameter_space: ConfigSpace.ConfigurationSpace,
optimization_targets: List[str],
optimizer_type: OptimizerType = DEFAULT_OPTIMIZER_TYPE,
optimizer_kwargs: Optional[dict] = None,
space_adapter_type: SpaceAdapterType = SpaceAdapterType.IDENTITY,
Expand All @@ -74,6 +75,8 @@ def create(*,
----------
parameter_space : ConfigSpace.ConfigurationSpace
Input configuration space.
optimization_targets : List[str]
The names of the optimization targets to minimize.
optimizer_type : OptimizerType
Optimizer class as defined by Enum.
optimizer_kwargs : Optional[dict]
Expand Down Expand Up @@ -102,6 +105,7 @@ def create(*,

optimizer: ConcreteOptimizer = optimizer_type.value(
parameter_space=parameter_space,
optimization_targets=optimization_targets,
space_adapter=space_adapter,
**optimizer_kwargs
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class SmacOptimizer(BaseBayesianOptimizer):

def __init__(self, *, # pylint: disable=too-many-locals
parameter_space: ConfigSpace.ConfigurationSpace,
optimization_targets: List[str],
space_adapter: Optional[BaseSpaceAdapter] = None,
seed: Optional[int] = 0,
run_name: Optional[str] = None,
Expand All @@ -46,6 +47,9 @@ def __init__(self, *, # pylint: disable=too-many-locals
parameter_space : ConfigSpace.ConfigurationSpace
The parameter space to optimize.
optimization_targets : List[str]
The names of the optimization targets to minimize.
space_adapter : BaseSpaceAdapter
The space adapter class to employ for parameter space transformations.
Expand Down Expand Up @@ -86,6 +90,7 @@ def __init__(self, *, # pylint: disable=too-many-locals
"""
super().__init__(
parameter_space=parameter_space,
optimization_targets=optimization_targets,
space_adapter=space_adapter,
)

Expand Down Expand Up @@ -125,6 +130,7 @@ def __init__(self, *, # pylint: disable=too-many-locals

scenario: Scenario = Scenario(
self.optimizer_parameter_space,
objectives=self._optimization_targets,
name=run_name,
output_directory=Path(output_directory),
deterministic=True,
Expand Down Expand Up @@ -186,6 +192,10 @@ def __init__(self, *, # pylint: disable=too-many-locals
intensifier=intensifier,
random_design=random_design,
config_selector=config_selector,
multi_objective_algorithm=Optimizer_Smac.get_multi_objective_algorithm(
scenario,
# objective_weights=[1, 2], # TODO: pass weights as constructor args
),
overwrite=True,
logging_level=False, # Use the existing logger
)
Expand Down Expand Up @@ -228,15 +238,16 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None
# -- this planned to be fixed in some future release: https://github.com/automl/SMAC3/issues/946
raise RuntimeError('This function should never be called.')

def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: Optional[pd.DataFrame] = None) -> None:
def _register(self, configurations: pd.DataFrame,
scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations and scores.
Parameters
----------
configurations : pd.DataFrame
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
scores : pd.Series
scores : pd.DataFrame
Scores from running the configurations. The index is the same as the index of the configurations.
context : pd.DataFrame
Expand All @@ -248,10 +259,11 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series, context: Op
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)

# Register each trial (one-by-one)
for config, score in zip(self._to_configspace_configs(configurations), scores.tolist()):
for (config, (_i, score)) in zip(self._to_configspace_configs(configurations), scores.iterrows()):
# Retrieve previously generated TrialInfo (returned by .ask()) or create new TrialInfo instance
info: TrialInfo = self.trial_info_map.get(config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed))
value: TrialValue = TrialValue(cost=score, time=0.0, status=StatusType.SUCCESS)
info: TrialInfo = self.trial_info_map.get(
config, TrialInfo(config=config, seed=self.base_optimizer.scenario.seed))
value = TrialValue(cost=list(score.astype(float)), time=0.0, status=StatusType.SUCCESS)
self.base_optimizer.tell(info, value, save=False)

# Save optimizer once we register all configs
Expand Down Expand Up @@ -293,7 +305,7 @@ def surrogate_predict(self, configurations: pd.DataFrame, context: Optional[pd.D
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
if self._space_adapter and not isinstance(self._space_adapter, IdentityAdapter):
raise NotImplementedError()
raise NotImplementedError("Space adapter not supported for surrogate_predict.")

# pylint: disable=protected-access
if len(self._observations) <= self.base_optimizer._initial_design._n_configs:
Expand Down
28 changes: 20 additions & 8 deletions mlos_core/mlos_core/optimizers/flaml_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Contains the FlamlOptimizer class.
"""

from typing import Dict, NamedTuple, Optional, Union
from typing import Dict, List, NamedTuple, Optional, Union
from warnings import warn

import ConfigSpace
Expand All @@ -32,17 +32,22 @@ class FlamlOptimizer(BaseOptimizer):

def __init__(self, *,
parameter_space: ConfigSpace.ConfigurationSpace,
optimization_targets: List[str],
space_adapter: Optional[BaseSpaceAdapter] = None,
low_cost_partial_config: Optional[dict] = None,
seed: Optional[int] = None):
"""
Create an MLOS wrapper class for FLAML.
Create an MLOS wrapper for FLAML.
Parameters
----------
parameter_space : ConfigSpace.ConfigurationSpace
The parameter space to optimize.
optimization_targets : List[str]
The names of the optimization targets to minimize.
For FLAML it must be a list with a single element, e.g., `["score"]`.
space_adapter : BaseSpaceAdapter
The space adapter class to employ for parameter space transformations.
Expand All @@ -55,9 +60,14 @@ def __init__(self, *,
"""
super().__init__(
parameter_space=parameter_space,
optimization_targets=optimization_targets,
space_adapter=space_adapter,
)

if len(self._optimization_targets) != 1:
raise ValueError("FLAML does not support multi-target optimization")
self._flaml_optimization_target = self._optimization_targets[0]

# Per upstream documentation, it is recommended to set the seed for
# flaml at the start of its operation globally.
if seed is not None:
Expand All @@ -72,7 +82,7 @@ def __init__(self, *,
self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {}
self._suggested_config: Optional[dict]

def _register(self, configurations: pd.DataFrame, scores: pd.Series,
def _register(self, configurations: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configurations and scores.
Expand All @@ -81,15 +91,16 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series,
configurations : pd.DataFrame
Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
scores : pd.Series
scores : pd.DataFrame
Scores from running the configurations. The index is the same as the index of the configurations.
context : None
Not Yet Implemented.
"""
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
for (_, config), score in zip(configurations.astype('O').iterrows(), scores):
for (_, config), score in zip(configurations.astype('O').iterrows(),
scores[self._flaml_optimization_target]):
cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration(
self.optimizer_parameter_space, values=config.to_dict())
if cs_config in self.evaluated_samples:
Expand Down Expand Up @@ -140,7 +151,7 @@ def _target_function(self, config: dict) -> Union[dict, None]:
"""
cs_config = normalize_config(self.optimizer_parameter_space, config)
if cs_config in self.evaluated_samples:
return {'score': self.evaluated_samples[cs_config].score}
return {self._flaml_optimization_target: self.evaluated_samples[cs_config].score}

self._suggested_config = dict(cs_config) # Cleaned-up version of the config
return None # Returning None stops the process
Expand All @@ -156,7 +167,8 @@ def _get_next_config(self) -> dict:
Returns
-------
result: dict
Dictionary with a single key, `score`, if config already evaluated; `None` otherwise.
A dictionary with a single key that is equal to the name of the optimization target,
if config already evaluated; `None` otherwise.
Raises
------
Expand All @@ -182,7 +194,7 @@ def _get_next_config(self) -> dict:
self._target_function,
config=self.flaml_parameter_space,
mode='min',
metric='score',
metric=self._flaml_optimization_target,
points_to_evaluate=points_to_evaluate,
evaluated_rewards=evaluated_rewards,
num_samples=len(points_to_evaluate) + 1,
Expand Down
Loading

0 comments on commit e2b819f

Please sign in to comment.