Skip to content

Commit

Permalink
Add Metadata to optimizers (#770)
Browse files Browse the repository at this point in the history
Adds metadata to respond from suggest, and be passable into register.

This is in support of adding multi-fidelity support (#751)

---------

Co-authored-by: Brian Kroth <[email protected]>
Co-authored-by: Brian Kroth <[email protected]>
  • Loading branch information
3 people authored Jul 1, 2024
1 parent 5338631 commit d1a4658
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 35 deletions.
2 changes: 1 addition & 1 deletion mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def suggest(self) -> TunableGroups:
tunables = super().suggest()
if self._start_with_defaults:
_LOG.info("Use default values for the first trial")
df_config = self._opt.suggest(defaults=self._start_with_defaults)
df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults)
self._start_with_defaults = False
_LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config)
return tunables.assign(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from logging import warning
from pathlib import Path
from typing import Dict, List, Optional, Union, TYPE_CHECKING
from typing import Dict, List, Optional, Tuple, Union, TYPE_CHECKING
from tempfile import TemporaryDirectory
from warnings import warn

Expand Down Expand Up @@ -242,7 +242,7 @@ def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None
raise RuntimeError('This function should never be called.')

def _register(self, *, configs: pd.DataFrame,
scores: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
scores: pd.DataFrame, context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs and scores.
Parameters
Expand All @@ -255,6 +255,9 @@ def _register(self, *, configs: pd.DataFrame,
context : pd.DataFrame
Not Yet Implemented.
metadata: pd.DataFrame
Not Yet Implemented.
"""
from smac.runhistory import StatusType, TrialInfo, TrialValue # pylint: disable=import-outside-toplevel

Expand All @@ -272,7 +275,7 @@ def _register(self, *, configs: pd.DataFrame,
# Save optimizer once we register all configs
self.base_optimizer.optimizer.save()

def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Suggests a new configuration.
Parameters
Expand All @@ -284,6 +287,9 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-------
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.
metadata : Optional[pd.DataFrame]
Not yet implemented.
"""
if TYPE_CHECKING:
from smac.runhistory import TrialInfo # pylint: disable=import-outside-toplevel,unused-import
Expand All @@ -297,9 +303,11 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
assert trial.config.config_space == self.optimizer_parameter_space
self.trial_info_map[trial.config] = trial
config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys()))
return config_df
return config_df, None

def register_pending(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None:
def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None,
metadata: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()

def surrogate_predict(self, *, configs: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> npt.NDArray:
Expand Down
19 changes: 14 additions & 5 deletions mlos_core/mlos_core/optimizers/flaml_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Contains the FlamlOptimizer class.
"""

from typing import Dict, List, NamedTuple, Optional, Union
from typing import Dict, List, NamedTuple, Optional, Tuple, Union
from warnings import warn

import ConfigSpace
Expand Down Expand Up @@ -86,7 +86,7 @@ def __init__(self, *, # pylint: disable=too-many-arguments
self._suggested_config: Optional[dict]

def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs and scores.
Parameters
Expand All @@ -99,9 +99,15 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context : None
Not Yet Implemented.
metadata : None
Not Yet Implemented.
"""
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
if metadata is not None:
warn(f"Not Implemented: Ignoring metadata {list(metadata.columns)}", UserWarning)

for (_, config), (_, score) in zip(configs.astype('O').iterrows(), scores.iterrows()):
cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration(
self.optimizer_parameter_space, values=config.to_dict())
Expand All @@ -112,7 +118,7 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
score=float(np.average(score.astype(float), weights=self._objective_weights)),
)

def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Suggests a new configuration.
Sampled at random using ConfigSpace.
Expand All @@ -126,14 +132,17 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-------
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.
metadata : None
Not implemented.
"""
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
config: dict = self._get_next_config()
return pd.DataFrame(config, index=[0])
return pd.DataFrame(config, index=[0]), None

def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()

def _target_function(self, config: dict) -> Union[dict, None]:
Expand Down
26 changes: 19 additions & 7 deletions mlos_core/mlos_core/optimizers/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def space_adapter(self) -> Optional[BaseSpaceAdapter]:
return self._space_adapter

def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Wrapper method, which employs the space adapter (if any), before registering the configs and scores.
Parameters
Expand All @@ -81,8 +81,12 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context : pd.DataFrame
Not Yet Implemented.
metadata : Optional[pd.DataFrame]
Not Yet Implemented.
"""
# Do some input validation.
assert metadata is None or isinstance(metadata, pd.DataFrame)
assert set(scores.columns) == set(self._optimization_targets), \
"Mismatched optimization targets."
assert self._has_context is None or self._has_context ^ (context is None), \
Expand All @@ -105,7 +109,7 @@ def register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,

@abstractmethod
def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs and scores.
Parameters
Expand All @@ -120,7 +124,8 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
"""
pass # pylint: disable=unnecessary-pass # pragma: no cover

def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame:
def suggest(self, *, context: Optional[pd.DataFrame] = None,
defaults: bool = False) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""
Wrapper method, which employs the space adapter (if any), after suggesting a new configuration.
Expand All @@ -139,10 +144,11 @@ def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = Fa
"""
if defaults:
configuration = config_to_dataframe(self.parameter_space.get_default_configuration())
metadata = None
if self.space_adapter is not None:
configuration = self.space_adapter.inverse_transform(configuration)
else:
configuration = self._suggest(context=context)
configuration, metadata = self._suggest(context=context)
assert len(configuration) == 1, \
"Suggest must return a single configuration."
assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \
Expand All @@ -151,10 +157,10 @@ def suggest(self, *, context: Optional[pd.DataFrame] = None, defaults: bool = Fa
configuration = self._space_adapter.transform(configuration)
assert set(configuration.columns).issubset(set(self.parameter_space)), \
"Space adapter produced a configuration that does not match the expected parameter space."
return configuration
return configuration, metadata

@abstractmethod
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Suggests a new configuration.
Parameters
Expand All @@ -166,12 +172,16 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-------
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.
metadata : Optional[pd.DataFrame]
The metadata associated with the given configuration used for evaluations.
"""
pass # pylint: disable=unnecessary-pass # pragma: no cover

@abstractmethod
def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None,
metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs as "pending".
That is it say, it has been suggested by the optimizer, and an experiment trial has been started.
This can be useful for executing multiple trials in parallel, retry logic, etc.
Expand All @@ -182,6 +192,8 @@ def register_pending(self, *, configs: pd.DataFrame,
Dataframe of configs / parameters. The columns are parameter names and the rows are the configs.
context : pd.DataFrame
Not Yet Implemented.
metadata : Optional[pd.DataFrame]
Not Yet Implemented.
"""
pass # pylint: disable=unnecessary-pass # pragma: no cover

Expand Down
18 changes: 13 additions & 5 deletions mlos_core/mlos_core/optimizers/random_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Contains the RandomOptimizer class.
"""

from typing import Optional
from typing import Optional, Tuple
from warnings import warn

import pandas as pd
Expand All @@ -25,7 +25,7 @@ class RandomOptimizer(BaseOptimizer):
"""

def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
"""Registers the given configs and scores.
Doesn't do anything on the RandomOptimizer except storing configs for logging.
Expand All @@ -40,12 +40,17 @@ def _register(self, *, configs: pd.DataFrame, scores: pd.DataFrame,
context : None
Not Yet Implemented.
metadata : None
Not Yet Implemented.
"""
if context is not None:
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
if metadata is not None:
warn(f"Not Implemented: Ignoring context {list(metadata.columns)}", UserWarning)
# should we pop them from self.pending_observations?

def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
"""Suggests a new configuration.
Sampled at random using ConfigSpace.
Expand All @@ -59,13 +64,16 @@ def _suggest(self, *, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-------
configuration : pd.DataFrame
Pandas dataframe with a single row. Column names are the parameter names.
metadata : None
Not implemented.
"""
if context is not None:
# not sure how that works here?
warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning)
return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0])
return pd.DataFrame(dict(self.optimizer_parameter_space.sample_configuration()), index=[0]), None

def register_pending(self, *, configs: pd.DataFrame,
context: Optional[pd.DataFrame] = None) -> None:
context: Optional[pd.DataFrame] = None, metadata: Optional[pd.DataFrame] = None) -> None:
raise NotImplementedError()
# self._pending_observations.append((configs, context))
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_context_not_implemented_warning(configuration_space: CS.ConfigurationSp
optimization_targets=['score'],
**kwargs
)
suggestion = optimizer.suggest()
suggestion, _metadata = optimizer.suggest()
scores = pd.DataFrame({'score': [1]})
context = pd.DataFrame([["something"]])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
optimizer.get_observations()

for _ in range(max_iterations):
suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
assert isinstance(suggestion, pd.DataFrame)
assert metadata is None or isinstance(metadata, pd.DataFrame)
assert set(suggestion.columns) == {'x', 'y'}
# Check suggestion values are the expected dtype
assert isinstance(suggestion.x.iloc[0], np.integer)
Expand Down
21 changes: 11 additions & 10 deletions mlos_core/mlos_core/tests/optimizers/optimizer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace

assert optimizer.parameter_space is not None

suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
assert suggestion is not None

myrepr = repr(optimizer)
assert myrepr.startswith(optimizer_class.__name__)

# pending not implemented
with pytest.raises(NotImplementedError):
optimizer.register_pending(configs=suggestion)
optimizer.register_pending(configs=suggestion, metadata=metadata)


@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
Expand Down Expand Up @@ -94,16 +94,17 @@ def objective(x: pd.Series) -> pd.DataFrame:
optimizer.get_observations()

for _ in range(max_iterations):
suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
assert isinstance(suggestion, pd.DataFrame)
assert metadata is None or isinstance(metadata, pd.DataFrame)
assert set(suggestion.columns) == {'x', 'y', 'z'}
# check that suggestion is in the space
configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict())
# Raises an error if outside of configuration space
configuration.is_valid_configuration()
observation = objective(suggestion['x'])
assert isinstance(observation, pd.DataFrame)
optimizer.register(configs=suggestion, scores=observation)
optimizer.register(configs=suggestion, scores=observation, metadata=metadata)

(best_config, best_score, best_context) = optimizer.get_best_observations()
assert isinstance(best_config, pd.DataFrame)
Expand Down Expand Up @@ -268,16 +269,16 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
_LOG.debug("Optimizer is done with random init.")

# loop for optimizer
suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
observation = objective(suggestion)
optimizer.register(configs=suggestion, scores=observation)
optimizer.register(configs=suggestion, scores=observation, metadata=metadata)

# loop for llamatune-optimizer
suggestion = llamatune_optimizer.suggest()
suggestion, metadata = llamatune_optimizer.suggest()
_x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0]
assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space
observation = objective(suggestion)
llamatune_optimizer.register(configs=suggestion, scores=observation)
llamatune_optimizer.register(configs=suggestion, scores=observation, metadata=metadata)

# Retrieve best observations
best_observation = optimizer.get_best_observations()
Expand Down Expand Up @@ -375,7 +376,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
optimizer.get_observations()

for _ in range(max_iterations):
suggestion = optimizer.suggest()
suggestion, metadata = optimizer.suggest()
assert isinstance(suggestion, pd.DataFrame)
assert (suggestion.columns == ['x', 'y']).all()
# Check suggestion values are the expected dtype
Expand All @@ -388,7 +389,7 @@ def objective(point: pd.DataFrame) -> pd.DataFrame:
# Test registering the suggested configuration with a score.
observation = objective(suggestion)
assert isinstance(observation, pd.DataFrame)
optimizer.register(configs=suggestion, scores=observation)
optimizer.register(configs=suggestion, scores=observation, metadata=metadata)

(best_config, best_score, best_context) = optimizer.get_best_observations()
assert isinstance(best_config, pd.DataFrame)
Expand Down

0 comments on commit d1a4658

Please sign in to comment.