Use scheduler in dummy runs (#861)

Refactors tests in preparation for testing for #720. Adds `status()` output for MockEnv as well, though with a pending FIXME on improving the Status type in the status output that we'll address in a future PR.
microsoft · Oct 1, 2024 · 1f6a787 · 1f6a787
1 parent fcc49aa
commit 1f6a787
Show file tree

Hide file tree

Showing 7 changed files with 117 additions and 108 deletions.
diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py
@@ -7,7 +7,7 @@
 import logging
 import random
 from datetime import datetime
-from typing import Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import numpy
 
@@ -61,11 +61,26 @@ def __init__(  # pylint: disable=too-many-arguments
             service=service,
         )
         seed = int(self.config.get("mock_env_seed", -1))
-        self._random = random.Random(seed or None) if seed >= 0 else None
+        self._run_random = random.Random(seed or None) if seed >= 0 else None
+        self._status_random = random.Random(seed or None) if seed >= 0 else None
         self._range = self.config.get("mock_env_range")
         self._metrics = self.config.get("mock_env_metrics", ["score"])
         self._is_ready = True
 
+    def _produce_metrics(self, rand: Optional[random.Random]) -> Dict[str, TunableValue]:
+        # Simple convex function of all tunable parameters.
+        score = numpy.mean(
+            numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params])
+        )
+
+        # Add noise and shift the benchmark value from [0, 1] to a given range.
+        noise = rand.gauss(0, self._NOISE_VAR) if rand else 0
+        score = numpy.clip(score + noise, 0, 1)
+        if self._range:
+            score = self._range[0] + score * (self._range[1] - self._range[0])
+
+        return {metric: score for metric in self._metrics}
+
     def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]:
         """
         Produce mock benchmark data for one experiment.
@@ -82,19 +97,30 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]:
         (status, timestamp, _) = result = super().run()
         if not status.is_ready():
             return result
+        metrics = self._produce_metrics(self._run_random)
+        return (Status.SUCCEEDED, timestamp, metrics)
 
-        # Simple convex function of all tunable parameters.
-        score = numpy.mean(
-            numpy.square([self._normalized(tunable) for (tunable, _group) in self._tunable_params])
-        )
-
-        # Add noise and shift the benchmark value from [0, 1] to a given range.
-        noise = self._random.gauss(0, self._NOISE_VAR) if self._random else 0
-        score = numpy.clip(score + noise, 0, 1)
-        if self._range:
-            score = self._range[0] + score * (self._range[1] - self._range[0])
+    def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]:
+        """
+        Produce mock benchmark status telemetry for one experiment.
 
-        return (Status.SUCCEEDED, timestamp, {metric: score for metric in self._metrics})
+        Returns
+        -------
+        (benchmark_status, timestamp, telemetry) : (Status, datetime, list)
+            3-tuple of (benchmark status, timestamp, telemetry) values.
+            `timestamp` is UTC time stamp of the status; it's current time by default.
+            `telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets.
+        """
+        (status, timestamp, _) = result = super().status()
+        if not status.is_ready():
+            return result
+        metrics = self._produce_metrics(self._status_random)
+        return (
+            # FIXME: this causes issues if we report RUNNING instead of READY
+            Status.READY,
+            timestamp,
+            [(timestamp, metric, score) for (metric, score) in metrics.items()],
+        )
 
     @staticmethod
     def _normalized(tunable: Tunable) -> float:

diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py
@@ -215,6 +215,11 @@ def description(self) -> str:
             """Get the Experiment's description."""
             return self._description
 
+        @property
+        def root_env_config(self) -> str:
+            """Get the Experiment's root Environment config file path."""
+            return self._root_env_config
+
         @property
         def tunables(self) -> TunableGroups:
             """Get the Experiment's tunables."""

diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py
@@ -15,11 +15,6 @@
 exp_storage = sql_storage_fixtures.exp_storage
 exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage
 mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage
-exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials
-exp_no_tunables_storage_with_trials = sql_storage_fixtures.exp_no_tunables_storage_with_trials
-mixed_numerics_exp_storage_with_trials = (
-    sql_storage_fixtures.mixed_numerics_exp_storage_with_trials
-)
 exp_data = sql_storage_fixtures.exp_data
 exp_no_tunables_data = sql_storage_fixtures.exp_no_tunables_data
 mixed_numerics_exp_data = sql_storage_fixtures.mixed_numerics_exp_data
diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py
@@ -4,16 +4,14 @@
 #
 """Test fixtures for mlos_bench storage."""
 
-from datetime import datetime
-from random import random
 from random import seed as rand_seed
-from typing import Generator, Optional
+from typing import Generator
 
 import pytest
-from pytz import UTC
 
-from mlos_bench.environments.status import Status
+from mlos_bench.environments.mock_env import MockEnv
 from mlos_bench.optimizers.mock_optimizer import MockOptimizer
+from mlos_bench.schedulers.sync_scheduler import SyncScheduler
 from mlos_bench.storage.base_experiment_data import ExperimentData
 from mlos_bench.storage.sql.storage import SqlStorage
 from mlos_bench.tests import SEED
@@ -107,120 +105,99 @@ def mixed_numerics_exp_storage(
 
 
 def _dummy_run_exp(
+    storage: SqlStorage,
     exp: SqlStorage.Experiment,
-    tunable_name: Optional[str],
-) -> SqlStorage.Experiment:
-    """Generates data by doing a simulated run of the given experiment."""
-    # Add some trials to that experiment.
-    # Note: we're just fabricating some made up function for the ML libraries to try and learn.
-    base_score = 10.0
-    if tunable_name:
-        tunable = exp.tunables.get_tunable(tunable_name)[0]
-        assert isinstance(tunable.default, int)
-        (tunable_min, tunable_max) = tunable.range
-        tunable_range = tunable_max - tunable_min
+) -> ExperimentData:
+    """
+    Generates data by doing a simulated run of the given experiment.
+
+    Parameters
+    ----------
+    storage : SqlStorage
+        The storage object to use.
+    exp : SqlStorage.Experiment
+        The experiment to "run".
+        Note: this particular object won't be updated, but a new one will be created
+        from its metadata.
+
+    Returns
+    -------
+    ExperimentData
+        The data generated by the simulated run.
+    """
+    # pylint: disable=too-many-locals
+
     rand_seed(SEED)
+
+    env = MockEnv(
+        name="Test Env",
+        config={
+            "tunable_params": list(exp.tunables.get_covariant_group_names()),
+            "mock_env_seed": SEED,
+            "mock_env_range": [60, 120],
+            "mock_env_metrics": ["score"],
+        },
+        tunables=exp.tunables,
+    )
+
     opt = MockOptimizer(
         tunables=exp.tunables,
         config={
+            "optimization_targets": exp.opt_targets,
             "seed": SEED,
             # This should be the default, so we leave it omitted for now to test the default.
             # But the test logic relies on this (e.g., trial 1 is config 1 is the
             # default values for the tunable params)
             # "start_with_defaults": True,
         },
     )
-    assert opt.start_with_defaults
-    for config_i in range(CONFIG_COUNT):
-        tunables = opt.suggest()
-        for repeat_j in range(CONFIG_TRIAL_REPEAT_COUNT):
-            trial = exp.new_trial(
-                tunables=tunables.copy(),
-                config={
-                    "trial_number": config_i * CONFIG_TRIAL_REPEAT_COUNT + repeat_j + 1,
-                    **{
-                        f"opt_{key}_{i}": val
-                        for (i, opt_target) in enumerate(exp.opt_targets.items())
-                        for (key, val) in zip(["target", "direction"], opt_target)
-                    },
-                },
-            )
-            if exp.tunables:
-                assert trial.tunable_config_id == config_i + 1
-            else:
-                assert trial.tunable_config_id == 1
-            if tunable_name:
-                tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value)
-                tunable_value_norm = base_score * (tunable_value - tunable_min) / tunable_range
-            else:
-                tunable_value_norm = 0
-            timestamp = datetime.now(UTC)
-            trial.update_telemetry(
-                status=Status.RUNNING,
-                timestamp=timestamp,
-                metrics=[
-                    (timestamp, "some-metric", tunable_value_norm + random() / 100),
-                ],
-            )
-            trial.update(
-                Status.SUCCEEDED,
-                timestamp,
-                metrics={
-                    # Give some variance on the score.
-                    # And some influence from the tunable value.
-                    "score": tunable_value_norm
-                    + random() / 100
-                },
-            )
-    return exp
-
 
-@pytest.fixture
-def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Experiment:
-    """Test fixture for Experiment using in-memory SQLite3 storage."""
-    return _dummy_run_exp(exp_storage, tunable_name="kernel_sched_latency_ns")
-
-
-@pytest.fixture
-def exp_no_tunables_storage_with_trials(
-    exp_no_tunables_storage: SqlStorage.Experiment,
-) -> SqlStorage.Experiment:
-    """Test fixture for Experiment using in-memory SQLite3 storage."""
-    assert not exp_no_tunables_storage.tunables
-    return _dummy_run_exp(exp_no_tunables_storage, tunable_name=None)
+    scheduler = SyncScheduler(
+        # All config values can be overridden from global config
+        config={
+            "experiment_id": exp.experiment_id,
+            "trial_id": exp.trial_id,
+            "config_id": -1,
+            "trial_config_repeat_count": CONFIG_TRIAL_REPEAT_COUNT,
+            "max_trials": CONFIG_COUNT * CONFIG_TRIAL_REPEAT_COUNT,
+        },
+        global_config={},
+        environment=env,
+        optimizer=opt,
+        storage=storage,
+        root_env_config=exp.root_env_config,
+    )
 
+    # Add some trial data to that experiment by "running" it.
+    with scheduler:
+        scheduler.start()
+        scheduler.teardown()
 
-@pytest.fixture
-def mixed_numerics_exp_storage_with_trials(
-    mixed_numerics_exp_storage: SqlStorage.Experiment,
-) -> SqlStorage.Experiment:
-    """Test fixture for Experiment using in-memory SQLite3 storage."""
-    tunable = next(iter(mixed_numerics_exp_storage.tunables))[0]
-    return _dummy_run_exp(mixed_numerics_exp_storage, tunable_name=tunable.name)
+    return storage.experiments[exp.experiment_id]
 
 
 @pytest.fixture
 def exp_data(
     storage: SqlStorage,
-    exp_storage_with_trials: SqlStorage.Experiment,
+    exp_storage: SqlStorage.Experiment,
 ) -> ExperimentData:
     """Test fixture for ExperimentData."""
-    return storage.experiments[exp_storage_with_trials.experiment_id]
+    return _dummy_run_exp(storage, exp_storage)
 
 
 @pytest.fixture
 def exp_no_tunables_data(
     storage: SqlStorage,
-    exp_no_tunables_storage_with_trials: SqlStorage.Experiment,
+    exp_no_tunables_storage: SqlStorage.Experiment,
 ) -> ExperimentData:
     """Test fixture for ExperimentData with no tunable configs."""
-    return storage.experiments[exp_no_tunables_storage_with_trials.experiment_id]
+    return _dummy_run_exp(storage, exp_no_tunables_storage)
 
 
 @pytest.fixture
 def mixed_numerics_exp_data(
     storage: SqlStorage,
-    mixed_numerics_exp_storage_with_trials: SqlStorage.Experiment,
+    mixed_numerics_exp_storage: SqlStorage.Experiment,
 ) -> ExperimentData:
     """Test fixture for ExperimentData with mixed numerical tunable types."""
-    return storage.experiments[mixed_numerics_exp_storage_with_trials.experiment_id]
+    return _dummy_run_exp(storage, mixed_numerics_exp_storage)
diff --git a/mlos_bench/mlos_bench/tests/storage/trial_data_test.py b/mlos_bench/mlos_bench/tests/storage/trial_data_test.py
@@ -20,9 +20,9 @@ def test_exp_trial_data(exp_data: ExperimentData) -> None:
     assert trial.trial_id == trial_id
     assert trial.tunable_config_id == expected_config_id
     assert trial.status == Status.SUCCEEDED
-    assert trial.metadata_dict["trial_number"] == trial_id
+    assert trial.metadata_dict["repeat_i"] == 1
     assert list(trial.results_dict.keys()) == ["score"]
-    assert trial.results_dict["score"] == pytest.approx(0.0, abs=0.1)
+    assert trial.results_dict["score"] == pytest.approx(73.27, 0.01)
     assert isinstance(trial.ts_start, datetime)
     assert isinstance(trial.ts_end, datetime)
     # Note: tests for telemetry are in test_update_telemetry()
diff --git a/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py b/mlos_bench/mlos_bench/tests/storage/tunable_config_data_test.py
@@ -4,7 +4,10 @@
 #
 """Unit tests for loading the TunableConfigData."""
 
+from math import ceil
+
 from mlos_bench.storage.base_experiment_data import ExperimentData
+from mlos_bench.tests.storage import CONFIG_TRIAL_REPEAT_COUNT
 from mlos_bench.tunables.tunable_groups import TunableGroups
 
 
@@ -27,10 +30,14 @@ def test_trial_metadata(exp_data: ExperimentData) -> None:
     """Check expected return values for TunableConfigData metadata."""
     assert exp_data.objectives == {"score": "min"}
     for trial_id, trial in exp_data.trials.items():
+        assert trial.tunable_config_id == ceil(trial_id / CONFIG_TRIAL_REPEAT_COUNT)
         assert trial.metadata_dict == {
+            # Only the first CONFIG_TRIAL_REPEAT_COUNT set should be the defaults.
+            "is_defaults": str(trial_id <= CONFIG_TRIAL_REPEAT_COUNT),
             "opt_target_0": "score",
             "opt_direction_0": "min",
-            "trial_number": trial_id,
+            "optimizer": "MockOptimizer",
+            "repeat_i": ((trial_id - 1) % CONFIG_TRIAL_REPEAT_COUNT) + 1,
         }
 
 

diff --git a/mlos_viz/mlos_viz/tests/conftest.py b/mlos_viz/mlos_viz/tests/conftest.py
@@ -11,7 +11,6 @@
 
 storage = sql_storage_fixtures.storage
 exp_storage = sql_storage_fixtures.exp_storage
-exp_storage_with_trials = sql_storage_fixtures.exp_storage_with_trials
 exp_data = sql_storage_fixtures.exp_data
 
 tunable_groups_config = tunable_groups_fixtures.tunable_groups_config