From 1f6a787c48452252094d41e02ee6d3ec73deb817 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 1 Oct 2024 16:44:13 -0500 Subject: [PATCH] Use scheduler in dummy runs (#861) Refactors tests in preparation for testing for #720. Adds `status()` output for MockEnv as well, though with a pending FIXME on improving the Status type in the status output that we'll address in a future PR. --- .../mlos_bench/environments/mock_env.py | 52 ++++-- mlos_bench/mlos_bench/storage/base_storage.py | 5 + .../mlos_bench/tests/storage/conftest.py | 5 - .../mlos_bench/tests/storage/sql/fixtures.py | 149 ++++++++---------- .../tests/storage/trial_data_test.py | 4 +- .../tests/storage/tunable_config_data_test.py | 9 +- mlos_viz/mlos_viz/tests/conftest.py | 1 - 7 files changed, 117 insertions(+), 108 deletions(-) diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index 765deb05b3..6d3309f35b 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -7,7 +7,7 @@ import logging import random from datetime import datetime -from typing import Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import numpy @@ -61,11 +61,26 @@ def __init__( # pylint: disable=too-many-arguments service=service, ) seed = int(self.config.get("mock_env_seed", -1)) - self._random = random.Random(seed or None) if seed >= 0 else None + self._run_random = random.Random(seed or None) if seed >= 0 else None + self._status_random = random.Random(seed or None) if seed >= 0 else None self._range = self.config.get("mock_env_range") self._metrics = self.config.get("mock_env_metrics", ["score"]) self._is_ready = True + def _produce_metrics(self, rand: Optional[random.Random]) -> Dict[str, TunableValue]: + # Simple convex function of all tunable parameters. + score = numpy.mean( + numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params]) + ) + + # Add noise and shift the benchmark value from [0, 1] to a given range. + noise = rand.gauss(0, self._NOISE_VAR) if rand else 0 + score = numpy.clip(score + noise, 0, 1) + if self._range: + score = self._range[0] + score * (self._range[1] - self._range[0]) + + return {metric: score for metric in self._metrics} + def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: """ Produce mock benchmark data for one experiment. @@ -82,19 +97,30 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: (status, timestamp, _) = result = super().run() if not status.is_ready(): return result + metrics = self._produce_metrics(self._run_random) + return (Status.SUCCEEDED, timestamp, metrics) - # Simple convex function of all tunable parameters. - score = numpy.mean( - numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params]) - ) - - # Add noise and shift the benchmark value from [0, 1] to a given range. - noise = self._random.gauss(0, self._NOISE_VAR) if self._random else 0 - score = numpy.clip(score + noise, 0, 1) - if self._range: - score = self._range[0] + score * (self._range[1] - self._range[0]) + def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: + """ + Produce mock benchmark status telemetry for one experiment. - return (Status.SUCCEEDED, timestamp, {metric: score for metric in self._metrics}) + Returns + ------- + (benchmark_status, timestamp, telemetry) : (Status, datetime, list) + 3-tuple of (benchmark status, timestamp, telemetry) values. + `timestamp` is UTC time stamp of the status; it's current time by default. + `telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets. + """ + (status, timestamp, _) = result = super().status() + if not status.is_ready(): + return result + metrics = self._produce_metrics(self._status_random) + return ( + # FIXME: this causes issues if we report RUNNING instead of READY + Status.READY, + timestamp, + [(timestamp, metric, score) for (metric, score) in metrics.items()], + ) @staticmethod def _normalized(tunable: Tunable) -> float: diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index d3e9b6583d..867c4e0bc0 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -215,6 +215,11 @@ def description(self) -> str: """Get the Experiment's description.""" return self._description + @property + def root_env_config(self) -> str: + """Get the Experiment's root Environment config file path.""" + return self._root_env_config + @property def tunables(self) -> TunableGroups: """Get the Experiment's tunables.""" diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index 52b0fdcd53..a143705282 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -15,11 +15,6 @@ exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage -exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials -exp_no_tunables_storage_with_trials = sql_storage_fixtures.exp_no_tunables_storage_with_trials -mixed_numerics_exp_storage_with_trials = ( - sql_storage_fixtures.mixed_numerics_exp_storage_with_trials -) exp_data = sql_storage_fixtures.exp_data exp_no_tunables_data = sql_storage_fixtures.exp_no_tunables_data mixed_numerics_exp_data = sql_storage_fixtures.mixed_numerics_exp_data diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 8a9065e436..4e92d9ab9d 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -4,16 +4,14 @@ # """Test fixtures for mlos_bench storage.""" -from datetime import datetime -from random import random from random import seed as rand_seed -from typing import Generator, Optional +from typing import Generator import pytest -from pytz import UTC -from mlos_bench.environments.status import Status +from mlos_bench.environments.mock_env import MockEnv from mlos_bench.optimizers.mock_optimizer import MockOptimizer +from mlos_bench.schedulers.sync_scheduler import SyncScheduler from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.tests import SEED @@ -107,22 +105,45 @@ def mixed_numerics_exp_storage( def _dummy_run_exp( + storage: SqlStorage, exp: SqlStorage.Experiment, - tunable_name: Optional[str], -) -> SqlStorage.Experiment: - """Generates data by doing a simulated run of the given experiment.""" - # Add some trials to that experiment. - # Note: we're just fabricating some made up function for the ML libraries to try and learn. - base_score = 10.0 - if tunable_name: - tunable = exp.tunables.get_tunable(tunable_name)[0] - assert isinstance(tunable.default, int) - (tunable_min, tunable_max) = tunable.range - tunable_range = tunable_max - tunable_min +) -> ExperimentData: + """ + Generates data by doing a simulated run of the given experiment. + + Parameters + ---------- + storage : SqlStorage + The storage object to use. + exp : SqlStorage.Experiment + The experiment to "run". + Note: this particular object won't be updated, but a new one will be created + from its metadata. + + Returns + ------- + ExperimentData + The data generated by the simulated run. + """ + # pylint: disable=too-many-locals + rand_seed(SEED) + + env = MockEnv( + name="Test Env", + config={ + "tunable_params": list(exp.tunables.get_covariant_group_names()), + "mock_env_seed": SEED, + "mock_env_range": [60, 120], + "mock_env_metrics": ["score"], + }, + tunables=exp.tunables, + ) + opt = MockOptimizer( tunables=exp.tunables, config={ + "optimization_targets": exp.opt_targets, "seed": SEED, # This should be the default, so we leave it omitted for now to test the default. # But the test logic relies on this (e.g., trial 1 is config 1 is the @@ -130,97 +151,53 @@ def _dummy_run_exp( # "start_with_defaults": True, }, ) - assert opt.start_with_defaults - for config_i in range(CONFIG_COUNT): - tunables = opt.suggest() - for repeat_j in range(CONFIG_TRIAL_REPEAT_COUNT): - trial = exp.new_trial( - tunables=tunables.copy(), - config={ - "trial_number": config_i * CONFIG_TRIAL_REPEAT_COUNT + repeat_j + 1, - **{ - f"opt_{key}_{i}": val - for (i, opt_target) in enumerate(exp.opt_targets.items()) - for (key, val) in zip(["target", "direction"], opt_target) - }, - }, - ) - if exp.tunables: - assert trial.tunable_config_id == config_i + 1 - else: - assert trial.tunable_config_id == 1 - if tunable_name: - tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value) - tunable_value_norm = base_score * (tunable_value - tunable_min) / tunable_range - else: - tunable_value_norm = 0 - timestamp = datetime.now(UTC) - trial.update_telemetry( - status=Status.RUNNING, - timestamp=timestamp, - metrics=[ - (timestamp, "some-metric", tunable_value_norm + random() / 100), - ], - ) - trial.update( - Status.SUCCEEDED, - timestamp, - metrics={ - # Give some variance on the score. - # And some influence from the tunable value. - "score": tunable_value_norm - + random() / 100 - }, - ) - return exp - -@pytest.fixture -def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Experiment: - """Test fixture for Experiment using in-memory SQLite3 storage.""" - return _dummy_run_exp(exp_storage, tunable_name="kernel_sched_latency_ns") - - -@pytest.fixture -def exp_no_tunables_storage_with_trials( - exp_no_tunables_storage: SqlStorage.Experiment, -) -> SqlStorage.Experiment: - """Test fixture for Experiment using in-memory SQLite3 storage.""" - assert not exp_no_tunables_storage.tunables - return _dummy_run_exp(exp_no_tunables_storage, tunable_name=None) + scheduler = SyncScheduler( + # All config values can be overridden from global config + config={ + "experiment_id": exp.experiment_id, + "trial_id": exp.trial_id, + "config_id": -1, + "trial_config_repeat_count": CONFIG_TRIAL_REPEAT_COUNT, + "max_trials": CONFIG_COUNT * CONFIG_TRIAL_REPEAT_COUNT, + }, + global_config={}, + environment=env, + optimizer=opt, + storage=storage, + root_env_config=exp.root_env_config, + ) + # Add some trial data to that experiment by "running" it. + with scheduler: + scheduler.start() + scheduler.teardown() -@pytest.fixture -def mixed_numerics_exp_storage_with_trials( - mixed_numerics_exp_storage: SqlStorage.Experiment, -) -> SqlStorage.Experiment: - """Test fixture for Experiment using in-memory SQLite3 storage.""" - tunable = next(iter(mixed_numerics_exp_storage.tunables))[0] - return _dummy_run_exp(mixed_numerics_exp_storage, tunable_name=tunable.name) + return storage.experiments[exp.experiment_id] @pytest.fixture def exp_data( storage: SqlStorage, - exp_storage_with_trials: SqlStorage.Experiment, + exp_storage: SqlStorage.Experiment, ) -> ExperimentData: """Test fixture for ExperimentData.""" - return storage.experiments[exp_storage_with_trials.experiment_id] + return _dummy_run_exp(storage, exp_storage) @pytest.fixture def exp_no_tunables_data( storage: SqlStorage, - exp_no_tunables_storage_with_trials: SqlStorage.Experiment, + exp_no_tunables_storage: SqlStorage.Experiment, ) -> ExperimentData: """Test fixture for ExperimentData with no tunable configs.""" - return storage.experiments[exp_no_tunables_storage_with_trials.experiment_id] + return _dummy_run_exp(storage, exp_no_tunables_storage) @pytest.fixture def mixed_numerics_exp_data( storage: SqlStorage, - mixed_numerics_exp_storage_with_trials: SqlStorage.Experiment, + mixed_numerics_exp_storage: SqlStorage.Experiment, ) -> ExperimentData: """Test fixture for ExperimentData with mixed numerical tunable types.""" - return storage.experiments[mixed_numerics_exp_storage_with_trials.experiment_id] + return _dummy_run_exp(storage, mixed_numerics_exp_storage) diff --git a/mlos_bench/mlos_bench/tests/storage/trial_data_test.py b/mlos_bench/mlos_bench/tests/storage/trial_data_test.py index 9fe59b426b..ea513eace2 100644 --- a/mlos_bench/mlos_bench/tests/storage/trial_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/trial_data_test.py @@ -20,9 +20,9 @@ def test_exp_trial_data(exp_data: ExperimentData) -> None: assert trial.trial_id == trial_id assert trial.tunable_config_id == expected_config_id assert trial.status == Status.SUCCEEDED - assert trial.metadata_dict["trial_number"] == trial_id + assert trial.metadata_dict["repeat_i"] == 1 assert list(trial.results_dict.keys()) == ["score"] - assert trial.results_dict["score"] == pytest.approx(0.0, abs=0.1) + assert trial.results_dict["score"] == pytest.approx(73.27, 0.01) assert isinstance(trial.ts_start, datetime) assert isinstance(trial.ts_end, datetime) # Note: tests for telemetry are in test_update_telemetry() diff --git a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py index 755fc0205a..8721bbe451 100644 --- a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py @@ -4,7 +4,10 @@ # """Unit tests for loading the TunableConfigData.""" +from math import ceil + from mlos_bench.storage.base_experiment_data import ExperimentData +from mlos_bench.tests.storage import CONFIG_TRIAL_REPEAT_COUNT from mlos_bench.tunables.tunable_groups import TunableGroups @@ -27,10 +30,14 @@ def test_trial_metadata(exp_data: ExperimentData) -> None: """Check expected return values for TunableConfigData metadata.""" assert exp_data.objectives == {"score": "min"} for trial_id, trial in exp_data.trials.items(): + assert trial.tunable_config_id == ceil(trial_id / CONFIG_TRIAL_REPEAT_COUNT) assert trial.metadata_dict == { + # Only the first CONFIG_TRIAL_REPEAT_COUNT set should be the defaults. + "is_defaults": str(trial_id <= CONFIG_TRIAL_REPEAT_COUNT), "opt_target_0": "score", "opt_direction_0": "min", - "trial_number": trial_id, + "optimizer": "MockOptimizer", + "repeat_i": ((trial_id - 1) % CONFIG_TRIAL_REPEAT_COUNT) + 1, } diff --git a/mlos_viz/mlos_viz/tests/conftest.py b/mlos_viz/mlos_viz/tests/conftest.py index 228609ba09..9299ebb377 100644 --- a/mlos_viz/mlos_viz/tests/conftest.py +++ b/mlos_viz/mlos_viz/tests/conftest.py @@ -11,7 +11,6 @@ storage = sql_storage_fixtures.storage exp_storage = sql_storage_fixtures.exp_storage -exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials exp_data = sql_storage_fixtures.exp_data tunable_groups_config = tunable_groups_fixtures.tunable_groups_config