Skip to content

Commit

Permalink
Use scheduler in dummy runs (#861)
Browse files Browse the repository at this point in the history
Refactors tests in preparation for testing for #720.

Adds `status()` output for MockEnv as well, though with a pending FIXME
on improving the Status type in the status output that we'll address in
a future PR.
  • Loading branch information
bpkroth authored Oct 1, 2024
1 parent fcc49aa commit 1f6a787
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 108 deletions.
52 changes: 39 additions & 13 deletions mlos_bench/mlos_bench/environments/mock_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
import random
from datetime import datetime
from typing import Dict, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple

import numpy

Expand Down Expand Up @@ -61,11 +61,26 @@ def __init__( # pylint: disable=too-many-arguments
service=service,
)
seed = int(self.config.get("mock_env_seed", -1))
self._random = random.Random(seed or None) if seed >= 0 else None
self._run_random = random.Random(seed or None) if seed >= 0 else None
self._status_random = random.Random(seed or None) if seed >= 0 else None
self._range = self.config.get("mock_env_range")
self._metrics = self.config.get("mock_env_metrics", ["score"])
self._is_ready = True

def _produce_metrics(self, rand: Optional[random.Random]) -> Dict[str, TunableValue]:
# Simple convex function of all tunable parameters.
score = numpy.mean(
numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params])
)

# Add noise and shift the benchmark value from [0, 1] to a given range.
noise = rand.gauss(0, self._NOISE_VAR) if rand else 0
score = numpy.clip(score + noise, 0, 1)
if self._range:
score = self._range[0] + score * (self._range[1] - self._range[0])

return {metric: score for metric in self._metrics}

def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]:
"""
Produce mock benchmark data for one experiment.
Expand All @@ -82,19 +97,30 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]:
(status, timestamp, _) = result = super().run()
if not status.is_ready():
return result
metrics = self._produce_metrics(self._run_random)
return (Status.SUCCEEDED, timestamp, metrics)

# Simple convex function of all tunable parameters.
score = numpy.mean(
numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params])
)

# Add noise and shift the benchmark value from [0, 1] to a given range.
noise = self._random.gauss(0, self._NOISE_VAR) if self._random else 0
score = numpy.clip(score + noise, 0, 1)
if self._range:
score = self._range[0] + score * (self._range[1] - self._range[0])
def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]:
"""
Produce mock benchmark status telemetry for one experiment.
return (Status.SUCCEEDED, timestamp, {metric: score for metric in self._metrics})
Returns
-------
(benchmark_status, timestamp, telemetry) : (Status, datetime, list)
3-tuple of (benchmark status, timestamp, telemetry) values.
`timestamp` is UTC time stamp of the status; it's current time by default.
`telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets.
"""
(status, timestamp, _) = result = super().status()
if not status.is_ready():
return result
metrics = self._produce_metrics(self._status_random)
return (
# FIXME: this causes issues if we report RUNNING instead of READY
Status.READY,
timestamp,
[(timestamp, metric, score) for (metric, score) in metrics.items()],
)

@staticmethod
def _normalized(tunable: Tunable) -> float:
Expand Down
5 changes: 5 additions & 0 deletions mlos_bench/mlos_bench/storage/base_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ def description(self) -> str:
"""Get the Experiment's description."""
return self._description

@property
def root_env_config(self) -> str:
"""Get the Experiment's root Environment config file path."""
return self._root_env_config

@property
def tunables(self) -> TunableGroups:
"""Get the Experiment's tunables."""
Expand Down
5 changes: 0 additions & 5 deletions mlos_bench/mlos_bench/tests/storage/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@
exp_storage = sql_storage_fixtures.exp_storage
exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage
mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage
exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials
exp_no_tunables_storage_with_trials = sql_storage_fixtures.exp_no_tunables_storage_with_trials
mixed_numerics_exp_storage_with_trials = (
sql_storage_fixtures.mixed_numerics_exp_storage_with_trials
)
exp_data = sql_storage_fixtures.exp_data
exp_no_tunables_data = sql_storage_fixtures.exp_no_tunables_data
mixed_numerics_exp_data = sql_storage_fixtures.mixed_numerics_exp_data
149 changes: 63 additions & 86 deletions mlos_bench/mlos_bench/tests/storage/sql/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,14 @@
#
"""Test fixtures for mlos_bench storage."""

from datetime import datetime
from random import random
from random import seed as rand_seed
from typing import Generator, Optional
from typing import Generator

import pytest
from pytz import UTC

from mlos_bench.environments.status import Status
from mlos_bench.environments.mock_env import MockEnv
from mlos_bench.optimizers.mock_optimizer import MockOptimizer
from mlos_bench.schedulers.sync_scheduler import SyncScheduler
from mlos_bench.storage.base_experiment_data import ExperimentData
from mlos_bench.storage.sql.storage import SqlStorage
from mlos_bench.tests import SEED
Expand Down Expand Up @@ -107,120 +105,99 @@ def mixed_numerics_exp_storage(


def _dummy_run_exp(
storage: SqlStorage,
exp: SqlStorage.Experiment,
tunable_name: Optional[str],
) -> SqlStorage.Experiment:
"""Generates data by doing a simulated run of the given experiment."""
# Add some trials to that experiment.
# Note: we're just fabricating some made up function for the ML libraries to try and learn.
base_score = 10.0
if tunable_name:
tunable = exp.tunables.get_tunable(tunable_name)[0]
assert isinstance(tunable.default, int)
(tunable_min, tunable_max) = tunable.range
tunable_range = tunable_max - tunable_min
) -> ExperimentData:
"""
Generates data by doing a simulated run of the given experiment.
Parameters
----------
storage : SqlStorage
The storage object to use.
exp : SqlStorage.Experiment
The experiment to "run".
Note: this particular object won't be updated, but a new one will be created
from its metadata.
Returns
-------
ExperimentData
The data generated by the simulated run.
"""
# pylint: disable=too-many-locals

rand_seed(SEED)

env = MockEnv(
name="Test Env",
config={
"tunable_params": list(exp.tunables.get_covariant_group_names()),
"mock_env_seed": SEED,
"mock_env_range": [60, 120],
"mock_env_metrics": ["score"],
},
tunables=exp.tunables,
)

opt = MockOptimizer(
tunables=exp.tunables,
config={
"optimization_targets": exp.opt_targets,
"seed": SEED,
# This should be the default, so we leave it omitted for now to test the default.
# But the test logic relies on this (e.g., trial 1 is config 1 is the
# default values for the tunable params)
# "start_with_defaults": True,
},
)
assert opt.start_with_defaults
for config_i in range(CONFIG_COUNT):
tunables = opt.suggest()
for repeat_j in range(CONFIG_TRIAL_REPEAT_COUNT):
trial = exp.new_trial(
tunables=tunables.copy(),
config={
"trial_number": config_i * CONFIG_TRIAL_REPEAT_COUNT + repeat_j + 1,
**{
f"opt_{key}_{i}": val
for (i, opt_target) in enumerate(exp.opt_targets.items())
for (key, val) in zip(["target", "direction"], opt_target)
},
},
)
if exp.tunables:
assert trial.tunable_config_id == config_i + 1
else:
assert trial.tunable_config_id == 1
if tunable_name:
tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value)
tunable_value_norm = base_score * (tunable_value - tunable_min) / tunable_range
else:
tunable_value_norm = 0
timestamp = datetime.now(UTC)
trial.update_telemetry(
status=Status.RUNNING,
timestamp=timestamp,
metrics=[
(timestamp, "some-metric", tunable_value_norm + random() / 100),
],
)
trial.update(
Status.SUCCEEDED,
timestamp,
metrics={
# Give some variance on the score.
# And some influence from the tunable value.
"score": tunable_value_norm
+ random() / 100
},
)
return exp


@pytest.fixture
def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Experiment:
"""Test fixture for Experiment using in-memory SQLite3 storage."""
return _dummy_run_exp(exp_storage, tunable_name="kernel_sched_latency_ns")


@pytest.fixture
def exp_no_tunables_storage_with_trials(
exp_no_tunables_storage: SqlStorage.Experiment,
) -> SqlStorage.Experiment:
"""Test fixture for Experiment using in-memory SQLite3 storage."""
assert not exp_no_tunables_storage.tunables
return _dummy_run_exp(exp_no_tunables_storage, tunable_name=None)
scheduler = SyncScheduler(
# All config values can be overridden from global config
config={
"experiment_id": exp.experiment_id,
"trial_id": exp.trial_id,
"config_id": -1,
"trial_config_repeat_count": CONFIG_TRIAL_REPEAT_COUNT,
"max_trials": CONFIG_COUNT * CONFIG_TRIAL_REPEAT_COUNT,
},
global_config={},
environment=env,
optimizer=opt,
storage=storage,
root_env_config=exp.root_env_config,
)

# Add some trial data to that experiment by "running" it.
with scheduler:
scheduler.start()
scheduler.teardown()

@pytest.fixture
def mixed_numerics_exp_storage_with_trials(
mixed_numerics_exp_storage: SqlStorage.Experiment,
) -> SqlStorage.Experiment:
"""Test fixture for Experiment using in-memory SQLite3 storage."""
tunable = next(iter(mixed_numerics_exp_storage.tunables))[0]
return _dummy_run_exp(mixed_numerics_exp_storage, tunable_name=tunable.name)
return storage.experiments[exp.experiment_id]


@pytest.fixture
def exp_data(
storage: SqlStorage,
exp_storage_with_trials: SqlStorage.Experiment,
exp_storage: SqlStorage.Experiment,
) -> ExperimentData:
"""Test fixture for ExperimentData."""
return storage.experiments[exp_storage_with_trials.experiment_id]
return _dummy_run_exp(storage, exp_storage)


@pytest.fixture
def exp_no_tunables_data(
storage: SqlStorage,
exp_no_tunables_storage_with_trials: SqlStorage.Experiment,
exp_no_tunables_storage: SqlStorage.Experiment,
) -> ExperimentData:
"""Test fixture for ExperimentData with no tunable configs."""
return storage.experiments[exp_no_tunables_storage_with_trials.experiment_id]
return _dummy_run_exp(storage, exp_no_tunables_storage)


@pytest.fixture
def mixed_numerics_exp_data(
storage: SqlStorage,
mixed_numerics_exp_storage_with_trials: SqlStorage.Experiment,
mixed_numerics_exp_storage: SqlStorage.Experiment,
) -> ExperimentData:
"""Test fixture for ExperimentData with mixed numerical tunable types."""
return storage.experiments[mixed_numerics_exp_storage_with_trials.experiment_id]
return _dummy_run_exp(storage, mixed_numerics_exp_storage)
4 changes: 2 additions & 2 deletions mlos_bench/mlos_bench/tests/storage/trial_data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def test_exp_trial_data(exp_data: ExperimentData) -> None:
assert trial.trial_id == trial_id
assert trial.tunable_config_id == expected_config_id
assert trial.status == Status.SUCCEEDED
assert trial.metadata_dict["trial_number"] == trial_id
assert trial.metadata_dict["repeat_i"] == 1
assert list(trial.results_dict.keys()) == ["score"]
assert trial.results_dict["score"] == pytest.approx(0.0, abs=0.1)
assert trial.results_dict["score"] == pytest.approx(73.27, 0.01)
assert isinstance(trial.ts_start, datetime)
assert isinstance(trial.ts_end, datetime)
# Note: tests for telemetry are in test_update_telemetry()
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
#
"""Unit tests for loading the TunableConfigData."""

from math import ceil

from mlos_bench.storage.base_experiment_data import ExperimentData
from mlos_bench.tests.storage import CONFIG_TRIAL_REPEAT_COUNT
from mlos_bench.tunables.tunable_groups import TunableGroups


Expand All @@ -27,10 +30,14 @@ def test_trial_metadata(exp_data: ExperimentData) -> None:
"""Check expected return values for TunableConfigData metadata."""
assert exp_data.objectives == {"score": "min"}
for trial_id, trial in exp_data.trials.items():
assert trial.tunable_config_id == ceil(trial_id / CONFIG_TRIAL_REPEAT_COUNT)
assert trial.metadata_dict == {
# Only the first CONFIG_TRIAL_REPEAT_COUNT set should be the defaults.
"is_defaults": str(trial_id <= CONFIG_TRIAL_REPEAT_COUNT),
"opt_target_0": "score",
"opt_direction_0": "min",
"trial_number": trial_id,
"optimizer": "MockOptimizer",
"repeat_i": ((trial_id - 1) % CONFIG_TRIAL_REPEAT_COUNT) + 1,
}


Expand Down
1 change: 0 additions & 1 deletion mlos_viz/mlos_viz/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

storage = sql_storage_fixtures.storage
exp_storage = sql_storage_fixtures.exp_storage
exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials
exp_data = sql_storage_fixtures.exp_data

tunable_groups_config = tunable_groups_fixtures.tunable_groups_config
Expand Down

0 comments on commit 1f6a787

Please sign in to comment.