From 7cf42f949ad4250a60254cbdd63cee483169629c Mon Sep 17 00:00:00 2001 From: Andrii Ieroshenko Date: Wed, 15 May 2024 14:24:46 -0700 Subject: [PATCH] Use pytest temporary folders and fixtures to create test file hierarchy at test time (#516) * make test db a fixture that returns a session, use jp_data_dir fixture * Use job_id returned by the db * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * use fixture for root dir * create test dirs by copying test files from static dir into temp dirs instead of uploading test dirs for execution manager test * create test dirs by copying test files from static dir into temp dirs instead of uploading test dirs for scheduler tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * create test dirs by copying test files from static dir into temp dirs instead of uploading test dirs for job files manager tests * make static_test_files_dir fixture session-scoped * add return type annotations to fixtures where return type is not clear * explicitly close db session to ensure all db connections are released * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Assert tmp_path and similar as Path type --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- conftest.py | 84 +++++++++----- .../job-1 => static}/helloworld-1.html | 0 .../job-1 => static}/helloworld-1.ipynb | 0 .../job-1 => static}/helloworld.ipynb | 0 .../job-2 => static}/helloworld.tar.gz | Bin .../job-5/a/b => static}/helloworld.txt | 0 .../job-5 => static}/import-helloworld.ipynb | 0 .../job-4 => static}/output_side_effect.txt | 0 .../job-4 => static}/side_effects.ipynb | 0 .../tests/test_execution_manager.py | 75 +++++++----- .../tests/test_job_files_manager.py | 107 +++++++++++------- jupyter_scheduler/tests/test_scheduler.py | 75 ++++++------ 12 files changed, 209 insertions(+), 132 deletions(-) rename jupyter_scheduler/tests/{test_staging_dir/job-1 => static}/helloworld-1.html (100%) rename jupyter_scheduler/tests/{test_staging_dir/job-1 => static}/helloworld-1.ipynb (100%) rename jupyter_scheduler/tests/{test_staging_dir/job-1 => static}/helloworld.ipynb (100%) rename jupyter_scheduler/tests/{test_staging_dir/job-2 => static}/helloworld.tar.gz (100%) rename jupyter_scheduler/tests/{test_root_dir/job-5/a/b => static}/helloworld.txt (100%) rename jupyter_scheduler/tests/{test_root_dir/job-5 => static}/import-helloworld.ipynb (100%) rename jupyter_scheduler/tests/{test_staging_dir/job-4 => static}/output_side_effect.txt (100%) rename jupyter_scheduler/tests/{test_staging_dir/job-4 => static}/side_effects.ipynb (100%) diff --git a/conftest.py b/conftest.py index 71915074f..87cebe694 100644 --- a/conftest.py +++ b/conftest.py @@ -1,27 +1,73 @@ -import os from pathlib import Path import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import Session, sessionmaker -from jupyter_scheduler.orm import create_session, create_tables +from jupyter_scheduler.orm import Base from jupyter_scheduler.scheduler import Scheduler from jupyter_scheduler.tests.mocks import MockEnvironmentManager -pytest_plugins = ("jupyter_server.pytest_plugin",) +pytest_plugins = ("jupyter_server.pytest_plugin", "pytest_jupyter.jupyter_server") -HERE = Path(__file__).parent.resolve() -DB_FILE_PATH = f"{HERE}/jupyter_scheduler/tests/testdb.sqlite" -DB_URL = f"sqlite:///{DB_FILE_PATH}" -TEST_ROOT_DIR = f"{HERE}/jupyter_scheduler/tests/test_root_dir" +@pytest.fixture(scope="session") +def static_test_files_dir() -> Path: + return Path(__file__).parent.resolve() / "jupyter_scheduler" / "tests" / "static" @pytest.fixture -def jp_server_config(jp_server_config): +def jp_scheduler_root_dir(tmp_path: Path) -> Path: + root_dir = tmp_path / "workspace_root" + root_dir.mkdir() + return root_dir + + +@pytest.fixture +def jp_scheduler_output_dir(jp_scheduler_root_dir: Path) -> Path: + output_dir = jp_scheduler_root_dir / "jobs" + output_dir.mkdir() + return output_dir + + +@pytest.fixture +def jp_scheduler_staging_dir(jp_data_dir: Path) -> Path: + staging_area = jp_data_dir / "scheduler_staging_area" + staging_area.mkdir() + return staging_area + + +@pytest.fixture +def jp_scheduler_db_url(jp_scheduler_staging_dir: Path) -> str: + db_file_path = jp_scheduler_staging_dir / "scheduler.sqlite" + return f"sqlite:///{db_file_path}" + + +@pytest.fixture +def jp_scheduler_db(jp_scheduler_db_url): + engine = create_engine(jp_scheduler_db_url, echo=False) + Base.metadata.create_all(engine) + Session = sessionmaker(bind=engine) + session = Session() + yield session + session.close() + + +@pytest.fixture +def jp_scheduler(jp_scheduler_db_url, jp_scheduler_root_dir, jp_scheduler_db): + return Scheduler( + db_url=jp_scheduler_db_url, + root_dir=str(jp_scheduler_root_dir), + environments_manager=MockEnvironmentManager(), + ) + + +@pytest.fixture +def jp_server_config(jp_scheduler_db_url, jp_server_config): return { "ServerApp": {"jpserver_extensions": {"jupyter_scheduler": True}}, "SchedulerApp": { - "db_url": DB_URL, + "db_url": jp_scheduler_db_url, "drop_tables": True, "environment_manager_class": "jupyter_scheduler.tests.mocks.MockEnvironmentManager", }, @@ -30,23 +76,3 @@ def jp_server_config(jp_server_config): }, "Scheduler": {"task_runner_class": "jupyter_scheduler.tests.mocks.MockTaskRunner"}, } - - -@pytest.fixture(autouse=True) -def setup_db(): - create_tables(DB_URL, True) - yield - if os.path.exists(DB_FILE_PATH): - os.remove(DB_FILE_PATH) - - -@pytest.fixture -def jp_scheduler_db(): - return create_session(DB_URL) - - -@pytest.fixture -def jp_scheduler(): - return Scheduler( - db_url=DB_URL, root_dir=str(TEST_ROOT_DIR), environments_manager=MockEnvironmentManager() - ) diff --git a/jupyter_scheduler/tests/test_staging_dir/job-1/helloworld-1.html b/jupyter_scheduler/tests/static/helloworld-1.html similarity index 100% rename from jupyter_scheduler/tests/test_staging_dir/job-1/helloworld-1.html rename to jupyter_scheduler/tests/static/helloworld-1.html diff --git a/jupyter_scheduler/tests/test_staging_dir/job-1/helloworld-1.ipynb b/jupyter_scheduler/tests/static/helloworld-1.ipynb similarity index 100% rename from jupyter_scheduler/tests/test_staging_dir/job-1/helloworld-1.ipynb rename to jupyter_scheduler/tests/static/helloworld-1.ipynb diff --git a/jupyter_scheduler/tests/test_staging_dir/job-1/helloworld.ipynb b/jupyter_scheduler/tests/static/helloworld.ipynb similarity index 100% rename from jupyter_scheduler/tests/test_staging_dir/job-1/helloworld.ipynb rename to jupyter_scheduler/tests/static/helloworld.ipynb diff --git a/jupyter_scheduler/tests/test_staging_dir/job-2/helloworld.tar.gz b/jupyter_scheduler/tests/static/helloworld.tar.gz similarity index 100% rename from jupyter_scheduler/tests/test_staging_dir/job-2/helloworld.tar.gz rename to jupyter_scheduler/tests/static/helloworld.tar.gz diff --git a/jupyter_scheduler/tests/test_root_dir/job-5/a/b/helloworld.txt b/jupyter_scheduler/tests/static/helloworld.txt similarity index 100% rename from jupyter_scheduler/tests/test_root_dir/job-5/a/b/helloworld.txt rename to jupyter_scheduler/tests/static/helloworld.txt diff --git a/jupyter_scheduler/tests/test_root_dir/job-5/import-helloworld.ipynb b/jupyter_scheduler/tests/static/import-helloworld.ipynb similarity index 100% rename from jupyter_scheduler/tests/test_root_dir/job-5/import-helloworld.ipynb rename to jupyter_scheduler/tests/static/import-helloworld.ipynb diff --git a/jupyter_scheduler/tests/test_staging_dir/job-4/output_side_effect.txt b/jupyter_scheduler/tests/static/output_side_effect.txt similarity index 100% rename from jupyter_scheduler/tests/test_staging_dir/job-4/output_side_effect.txt rename to jupyter_scheduler/tests/static/output_side_effect.txt diff --git a/jupyter_scheduler/tests/test_staging_dir/job-4/side_effects.ipynb b/jupyter_scheduler/tests/static/side_effects.ipynb similarity index 100% rename from jupyter_scheduler/tests/test_staging_dir/job-4/side_effects.ipynb rename to jupyter_scheduler/tests/static/side_effects.ipynb diff --git a/jupyter_scheduler/tests/test_execution_manager.py b/jupyter_scheduler/tests/test_execution_manager.py index a9393eb9c..66546be38 100644 --- a/jupyter_scheduler/tests/test_execution_manager.py +++ b/jupyter_scheduler/tests/test_execution_manager.py @@ -1,43 +1,60 @@ +import shutil from pathlib import Path +from typing import Tuple import pytest -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker -from conftest import DB_URL from jupyter_scheduler.executors import DefaultExecutionManager from jupyter_scheduler.orm import Job -JOB_ID = "69856f4e-ce94-45fd-8f60-3a587457fce7" -NOTEBOOK_NAME = "side_effects.ipynb" -SIDE_EFECT_FILE_NAME = "output_side_effect.txt" -NOTEBOOK_DIR = Path(__file__).resolve().parent / "test_staging_dir" / "job-4" -NOTEBOOK_PATH = NOTEBOOK_DIR / NOTEBOOK_NAME -SIDE_EFFECT_FILE = NOTEBOOK_DIR / SIDE_EFECT_FILE_NAME +@pytest.fixture +def staging_dir_with_side_effects( + static_test_files_dir, jp_scheduler_staging_dir +) -> Tuple[Path, Path]: + notebook_file_path = static_test_files_dir / "side_effects.ipynb" + side_effect_file_path = static_test_files_dir / "output_side_effect.txt" + job_staging_dir = jp_scheduler_staging_dir / "job-4" + + job_staging_dir.mkdir() + shutil.copy2(notebook_file_path, job_staging_dir) + shutil.copy2(side_effect_file_path, job_staging_dir) + + return (notebook_file_path, side_effect_file_path) @pytest.fixture -def load_job(jp_scheduler_db): - with jp_scheduler_db() as session: - job = Job( - runtime_environment_name="abc", - input_filename=NOTEBOOK_NAME, - job_id=JOB_ID, - ) - session.add(job) - session.commit() - - -def test_add_side_effects_files(jp_scheduler_db, load_job): +def side_effects_job_record(staging_dir_with_side_effects, jp_scheduler_db) -> str: + notebook_name = staging_dir_with_side_effects[0].name + job = Job( + runtime_environment_name="abc", + input_filename=notebook_name, + ) + jp_scheduler_db.add(job) + jp_scheduler_db.commit() + + return job.job_id + + +def test_add_side_effects_files( + side_effects_job_record, + staging_dir_with_side_effects, + jp_scheduler_root_dir, + jp_scheduler_db_url, + jp_scheduler_db, +): + job_id = side_effects_job_record + staged_notebook_file_path = staging_dir_with_side_effects[0] + staged_notebook_dir = staged_notebook_file_path.parent + side_effect_file_name = staging_dir_with_side_effects[1].name + manager = DefaultExecutionManager( - job_id=JOB_ID, - root_dir=str(NOTEBOOK_DIR), - db_url=DB_URL, - staging_paths={"input": str(NOTEBOOK_PATH)}, + job_id=job_id, + root_dir=jp_scheduler_root_dir, + db_url=jp_scheduler_db_url, + staging_paths={"input": staged_notebook_file_path}, ) - manager.add_side_effects_files(str(NOTEBOOK_DIR)) + manager.add_side_effects_files(staged_notebook_dir) - with jp_scheduler_db() as session: - job = session.query(Job).filter(Job.job_id == JOB_ID).one() - assert SIDE_EFECT_FILE_NAME in job.packaged_files + job = jp_scheduler_db.query(Job).filter(Job.job_id == job_id).one() + assert side_effect_file_name in job.packaged_files diff --git a/jupyter_scheduler/tests/test_job_files_manager.py b/jupyter_scheduler/tests/test_job_files_manager.py index e6fcb5d6d..66a9727b7 100644 --- a/jupyter_scheduler/tests/test_job_files_manager.py +++ b/jupyter_scheduler/tests/test_job_files_manager.py @@ -60,65 +60,88 @@ async def test_copy_from_staging(): ) -HERE = Path(__file__).parent.resolve() -OUTPUTS_DIR = os.path.join(HERE, "test_files_output") +@pytest.fixture +def staging_dir_with_notebook_job(static_test_files_dir, jp_scheduler_staging_dir): + staging_dir = jp_scheduler_staging_dir / "job-1" + job_filenames = ["helloworld-1.ipynb", "helloworld-1.html", "helloworld.ipynb"] + + staged_job_files = [] + staging_dir.mkdir() + for job_filename in job_filenames: + staged_job_file = shutil.copy2(static_test_files_dir / job_filename, staging_dir) + staged_job_files.append(staged_job_file) + + return staged_job_files @pytest.fixture -def clear_outputs_dir(): - yield - shutil.rmtree(OUTPUTS_DIR) - # rmtree() is not synchronous; wait until it has finished running - while os.path.isdir(OUTPUTS_DIR): - time.sleep(0.01) - - -@pytest.mark.parametrize( - "output_formats, output_filenames, staging_paths, output_dir, redownload", - [ - ( - ["ipynb", "html"], - { +def staging_dir_with_tar_job(static_test_files_dir, jp_scheduler_staging_dir): + staging_dir = jp_scheduler_staging_dir / "job-2" + job_tar_file = static_test_files_dir / "helloworld.tar.gz" + + staging_dir.mkdir() + staged_tar_file = shutil.copy2(job_tar_file, staging_dir) + + return staged_tar_file + + +@pytest.fixture +def downloader_parameters( + staging_dir_with_notebook_job, + staging_dir_with_tar_job, + request, + jp_scheduler_output_dir, +): + job_1_ipynb_file_path, job_1_html_file_path, job_1_input_file_path = ( + staging_dir_with_notebook_job + ) + job_2_tar_file_path = staging_dir_with_tar_job + index = request.param + parameters = [ + { + "output_formats": ["ipynb", "html"], + "output_filenames": { "ipynb": "job-1/helloworld-out.ipynb", "html": "job-1/helloworld-out.html", "input": "job-1/helloworld-input.ipynb", }, - { - "ipynb": os.path.join(HERE, "test_staging_dir", "job-1", "helloworld-1.ipynb"), - "html": os.path.join(HERE, "test_staging_dir", "job-1", "helloworld-1.html"), - "input": os.path.join(HERE, "test_staging_dir", "job-1", "helloworld.ipynb"), + "staging_paths": { + "ipynb": job_1_ipynb_file_path, + "html": job_1_html_file_path, + "input": job_1_input_file_path, }, - OUTPUTS_DIR, - False, - ), - ( - ["ipynb", "html"], - { + "output_dir": jp_scheduler_output_dir, + "redownload": False, + }, + { + "output_formats": ["ipynb", "html"], + "output_filenames": { "ipynb": "job-2/helloworld-1.ipynb", "html": "job-2/helloworld-1.html", "input": "job-2/helloworld.ipynb", }, - { - "tar.gz": os.path.join(HERE, "test_staging_dir", "job-2", "helloworld.tar.gz"), + "staging_paths": { + "tar.gz": job_2_tar_file_path, "ipynb": "job-2/helloworld-1.ipynb", "html": "job-2/helloworld-1.html", "input": "job-2/helloworld.ipynb", }, - OUTPUTS_DIR, - False, - ), - ], -) -def test_downloader_download( - clear_outputs_dir, output_formats, output_filenames, staging_paths, output_dir, redownload -): - downloader = Downloader( - output_formats=output_formats, - output_filenames=output_filenames, - staging_paths=staging_paths, - output_dir=output_dir, - redownload=redownload, + "output_dir": jp_scheduler_output_dir, + "redownload": False, + }, + ] + return parameters[index] + + +@pytest.mark.parametrize("downloader_parameters", [0, 1], indirect=True) +def test_downloader_download(downloader_parameters): + output_formats, output_filenames, staging_paths, output_dir = ( + downloader_parameters["output_formats"], + downloader_parameters["output_filenames"], + downloader_parameters["staging_paths"], + downloader_parameters["output_dir"], ) + downloader = Downloader(**downloader_parameters) downloader.download() assert os.path.exists(output_dir) diff --git a/jupyter_scheduler/tests/test_scheduler.py b/jupyter_scheduler/tests/test_scheduler.py index fe67504bf..54b90d5da 100644 --- a/jupyter_scheduler/tests/test_scheduler.py +++ b/jupyter_scheduler/tests/test_scheduler.py @@ -1,5 +1,7 @@ """Tests for scheduler""" +import shutil +from pathlib import Path from unittest import mock from unittest.mock import patch @@ -16,6 +18,20 @@ from jupyter_scheduler.orm import Job, JobDefinition +@pytest.fixture +def root_dir_with_input_folder(static_test_files_dir, jp_scheduler_root_dir): + notebook_file_path = static_test_files_dir / "import-helloworld.ipynb" + dependency_file_path = static_test_files_dir / "helloworld.txt" + job_root_dir = jp_scheduler_root_dir / "job-5" + dependency_root_dir = job_root_dir / "a" / "b" + + dependency_root_dir.mkdir(parents=True) + shutil.copy2(notebook_file_path, job_root_dir) + shutil.copy2(dependency_file_path, dependency_root_dir) + + return Path(job_root_dir.name) / notebook_file_path.name + + def test_create_job_definition(jp_scheduler): with patch("jupyter_scheduler.scheduler.fsspec") as mock_fsspec: with patch("jupyter_scheduler.scheduler.Scheduler.file_exists") as mock_file_exists: @@ -40,10 +56,10 @@ def test_create_job_definition(jp_scheduler): assert "hello world" == definition.name -def test_create_job_definition_with_input_folder(jp_scheduler): +def test_create_job_definition_with_input_folder(jp_scheduler, root_dir_with_input_folder): job_definition_id = jp_scheduler.create_job_definition( CreateJobDefinition( - input_uri="job-5/import-helloworld.ipynb", + input_uri=str(root_dir_with_input_folder), runtime_environment_name="default", name="import hello world", output_formats=["ipynb"], @@ -61,10 +77,10 @@ def test_create_job_definition_with_input_folder(jp_scheduler): assert "a/b/helloworld.txt" in definition.packaged_files -def test_create_job_with_input_folder(jp_scheduler): +def test_create_job_with_input_folder(jp_scheduler, root_dir_with_input_folder): job_id = jp_scheduler.create_job( CreateJob( - input_uri="job-5/import-helloworld.ipynb", + input_uri=str(root_dir_with_input_folder), runtime_environment_name="default", name="import hello world", output_formats=["ipynb"], @@ -127,11 +143,10 @@ def test_create_job_with_input_folder(jp_scheduler): @pytest.fixture def load_job_definitions(jp_scheduler_db): - with jp_scheduler_db() as session: - session.add(JobDefinition(**job_definition_1)) - session.add(JobDefinition(**job_definition_2)) - session.add(JobDefinition(**job_definition_3)) - session.commit() + jp_scheduler_db.add(JobDefinition(**job_definition_1)) + jp_scheduler_db.add(JobDefinition(**job_definition_2)) + jp_scheduler_db.add(JobDefinition(**job_definition_3)) + jp_scheduler_db.commit() @pytest.mark.parametrize( @@ -182,14 +197,13 @@ def test_pause_jobs(jp_scheduler, load_job_definitions, jp_scheduler_db): with patch("jupyter_scheduler.scheduler.Scheduler.task_runner") as mock_task_runner: jp_scheduler.update_job_definition(job_definition_id, UpdateJobDefinition(active=False)) - with jp_scheduler_db() as session: - active = ( - session.query(JobDefinition.active) - .filter(JobDefinition.job_definition_id == job_definition_id) - .one() - .active - ) - assert not active + active = ( + jp_scheduler_db.query(JobDefinition.active) + .filter(JobDefinition.job_definition_id == job_definition_id) + .one() + .active + ) + assert not active def test_resume_jobs(jp_scheduler, load_job_definitions, jp_scheduler_db): @@ -197,14 +211,13 @@ def test_resume_jobs(jp_scheduler, load_job_definitions, jp_scheduler_db): with patch("jupyter_scheduler.scheduler.Scheduler.task_runner") as mock_task_runner: jp_scheduler.update_job_definition(job_definition_id, UpdateJobDefinition(active=True)) - with jp_scheduler_db() as session: - active = ( - session.query(JobDefinition.active) - .filter(JobDefinition.job_definition_id == job_definition_id) - .one() - .active - ) - assert active + active = ( + jp_scheduler_db.query(JobDefinition.active) + .filter(JobDefinition.job_definition_id == job_definition_id) + .one() + .active + ) + assert active def test_update_job_definition(jp_scheduler, load_job_definitions, jp_scheduler_db): @@ -217,15 +230,13 @@ def test_update_job_definition(jp_scheduler, load_job_definitions, jp_scheduler_ ) jp_scheduler.update_job_definition(job_definition_id, update) - with jp_scheduler_db() as session: - definition = session.get(JobDefinition, job_definition_id) - assert schedule == definition.schedule - assert timezone == definition.timezone + definition = jp_scheduler_db.get(JobDefinition, job_definition_id) + assert schedule == definition.schedule + assert timezone == definition.timezone def test_delete_job_definition(jp_scheduler, load_job_definitions, jp_scheduler_db): job_definition_id = job_definition_1["job_definition_id"] jp_scheduler.delete_job_definition(job_definition_id) - with jp_scheduler_db() as session: - definition = session.get(JobDefinition, job_definition_id) - assert not definition + definition = jp_scheduler_db.get(JobDefinition, job_definition_id) + assert not definition