Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make optimizer a fixture for 1-hot encoding/decoding tests #737

Merged
merged 1 commit into from
May 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 27 additions & 20 deletions mlos_core/mlos_core/tests/optimizers/one_hot_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import numpy.typing as npt
import ConfigSpace as CS

from mlos_core.optimizers import SmacOptimizer
from mlos_core.optimizers import BaseOptimizer, SmacOptimizer

# pylint: disable=protected-access,redefined-outer-name

Expand Down Expand Up @@ -68,79 +68,86 @@ def one_hot_series() -> npt.NDArray:
])


def test_to_1hot_data_frame(configuration_space: CS.ConfigurationSpace,
data_frame: pd.DataFrame, one_hot_data_frame: npt.NDArray) -> None:
@pytest.fixture
def optimizer(configuration_space: CS.ConfigurationSpace) -> BaseOptimizer:
"""
Test fixture for the optimizer. Use it to test one-hot encoding/decoding.
"""
return SmacOptimizer(
parameter_space=configuration_space,
)


def test_to_1hot_data_frame(optimizer: BaseOptimizer,
data_frame: pd.DataFrame,
one_hot_data_frame: npt.NDArray) -> None:
"""
Toy problem to test one-hot encoding of dataframe.
"""
optimizer = SmacOptimizer(parameter_space=configuration_space)
assert optimizer._to_1hot(data_frame) == pytest.approx(one_hot_data_frame)


def test_to_1hot_series(configuration_space: CS.ConfigurationSpace,
def test_to_1hot_series(optimizer: BaseOptimizer,
series: pd.Series, one_hot_series: npt.NDArray) -> None:
"""
Toy problem to test one-hot encoding of series.
"""
optimizer = SmacOptimizer(parameter_space=configuration_space)
assert optimizer._to_1hot(series) == pytest.approx(one_hot_series)


def test_from_1hot_data_frame(configuration_space: CS.ConfigurationSpace,
data_frame: pd.DataFrame, one_hot_data_frame: npt.NDArray) -> None:
def test_from_1hot_data_frame(optimizer: BaseOptimizer,
data_frame: pd.DataFrame,
one_hot_data_frame: npt.NDArray) -> None:
"""
Toy problem to test one-hot decoding of dataframe.
"""
optimizer = SmacOptimizer(parameter_space=configuration_space)
assert optimizer._from_1hot(one_hot_data_frame).to_dict() == data_frame.to_dict()


def test_from_1hot_series(configuration_space: CS.ConfigurationSpace,
series: pd.Series, one_hot_series: npt.NDArray) -> None:
def test_from_1hot_series(optimizer: BaseOptimizer,
series: pd.Series,
one_hot_series: npt.NDArray) -> None:
"""
Toy problem to test one-hot decoding of series.
"""
optimizer = SmacOptimizer(parameter_space=configuration_space)
one_hot_df = optimizer._from_1hot(one_hot_series)
assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)"
assert one_hot_df.iloc[0].to_dict() == series.to_dict()


def test_round_trip_data_frame(configuration_space: CS.ConfigurationSpace, data_frame: pd.DataFrame) -> None:
def test_round_trip_data_frame(optimizer: BaseOptimizer, data_frame: pd.DataFrame) -> None:
"""
Round-trip test for one-hot-encoding and then decoding a data frame.
"""
optimizer = SmacOptimizer(parameter_space=configuration_space)
df_round_trip = optimizer._from_1hot(optimizer._to_1hot(data_frame))
assert df_round_trip.x.to_numpy() == pytest.approx(data_frame.x)
assert (df_round_trip.y == data_frame.y).all()
assert (df_round_trip.z == data_frame.z).all()


def test_round_trip_series(configuration_space: CS.ConfigurationSpace, series: pd.DataFrame) -> None:
def test_round_trip_series(optimizer: BaseOptimizer, series: pd.DataFrame) -> None:
"""
Round-trip test for one-hot-encoding and then decoding a series.
"""
optimizer = SmacOptimizer(parameter_space=configuration_space)
series_round_trip = optimizer._from_1hot(optimizer._to_1hot(series))
assert series_round_trip.x.to_numpy() == pytest.approx(series.x)
assert (series_round_trip.y == series.y).all()
assert (series_round_trip.z == series.z).all()


def test_round_trip_reverse_data_frame(configuration_space: CS.ConfigurationSpace, one_hot_data_frame: npt.NDArray) -> None:
def test_round_trip_reverse_data_frame(optimizer: BaseOptimizer,
one_hot_data_frame: npt.NDArray) -> None:
"""
Round-trip test for one-hot-decoding and then encoding of a numpy array.
"""
optimizer = SmacOptimizer(parameter_space=configuration_space)
round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_data_frame))
assert round_trip == pytest.approx(one_hot_data_frame)


def test_round_trip_reverse_series(configuration_space: CS.ConfigurationSpace, one_hot_series: npt.NDArray) -> None:
def test_round_trip_reverse_series(optimizer: BaseOptimizer,
one_hot_series: npt.NDArray) -> None:
"""
Round-trip test for one-hot-decoding and then encoding of a numpy array.
"""
optimizer = SmacOptimizer(parameter_space=configuration_space)
round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_series))
assert round_trip == pytest.approx(one_hot_series)
Loading