Skip to content

Commit

Permalink
feat(sessions): add allowlist for interactive session images (reanahu…
Browse files Browse the repository at this point in the history
  • Loading branch information
mdonadoni committed May 27, 2024
1 parent 13d1c5d commit 4f0fc6c
Show file tree
Hide file tree
Showing 7 changed files with 254 additions and 25 deletions.
59 changes: 56 additions & 3 deletions reana_workflow_controller/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,60 @@ def _env_vars_dict_to_k8s_list(env_vars):
)
"""Common to all workflow engines environment variables for debug mode."""

JUPYTER_INTERACTIVE_SESSION_DEFAULT_IMAGE = (
"docker.io/jupyter/scipy-notebook:notebook-6.4.5"

def _parse_interactive_sessions_environments(env_var):
config = {}
for type_ in env_var:
recommended = []
env_recommended = env_var[type_].get("recommended") or []
for recommended_item in env_recommended:
image = recommended_item.get("image")
if not image:
continue
name = recommended_item.get("name") or image
recommended.append({"name": name, "image": image})

config[type_] = {
"allow_custom": env_var[type_].get("allow_custom", False),
"recommended": recommended,
}
return config


REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS = _parse_interactive_sessions_environments(
json.loads(os.getenv("REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS", "{}"))
)
"""Default image for Jupyter based interactive session deployments."""
"""Allowed and recommended environments to be used for interactive sessions.
This is a dictionary where keys are the type of the interactive session.
For each session type, a list of recommended Docker images are provided (`recommended`)
and whether custom images are allowed (`allow_custom`).
Example:
{
"jupyter": {
"recommended": [
{
"name": "Jupyter SciPy Notebook 6.4.5",
"image": "docker.io/jupyter/scipy-notebook:notebook-6.4.5"
}
],
"allow_custom": true
}
}
"""

REANA_INTERACTIVE_SESSIONS_RECOMMENDED_IMAGES = {
type_: {recommended["image"] for recommended in config["recommended"]}
for type_, config in REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS.items()
}
"""Set of recommended images for each interactive session type."""

REANA_INTERACTIVE_SESSIONS_DEFAULT_IMAGES = {
type_: next(iter(config["recommended"]), {}).get("image")
for type_, config in REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS.items()
}
"""Default image for each interactive session type, can be `None`."""

JUPYTER_INTERACTIVE_SESSION_DEFAULT_PORT = 8888
"""Default port for Jupyter based interactive session deployments."""
Expand Down Expand Up @@ -219,3 +269,6 @@ def _env_vars_dict_to_k8s_list(env_vars):
The job controller needs to clean up all the running jobs before the end of the grace period.
"""

CONTAINER_IMAGE_ALIAS_PREFIXES = ["docker.io/", "docker.io/library/", "library/"]
"""Prefixes that can be removed from container image references to generate valid image aliases."""
8 changes: 3 additions & 5 deletions reana_workflow_controller/k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
)

from reana_workflow_controller.config import ( # isort:skip
JUPYTER_INTERACTIVE_SESSION_DEFAULT_IMAGE,
JUPYTER_INTERACTIVE_SESSION_DEFAULT_PORT,
REANA_INGRESS_ANNOTATIONS,
REANA_INGRESS_CLASS_NAME,
Expand Down Expand Up @@ -250,11 +249,11 @@ def build_interactive_jupyter_deployment_k8s_objects(
deployment_name,
workspace,
access_path,
image,
access_token=None,
cvmfs_repos=None,
owner_id=None,
workflow_id=None,
image=None,
):
"""Build the Kubernetes specification for a Jupyter NB interactive session.
Expand All @@ -270,14 +269,13 @@ def build_interactive_jupyter_deployment_k8s_objects(
/me Traefik won't send the request to the interactive session
(``/1234/me``) but to the root path (``/me``) giving most probably
a ``404``.
:param image: Jupyter Notebook image to use, i.e.
``jupyter/tensorflow-notebook`` to enable ``tensorflow``.
:param cvmfs_mounts: List of CVMFS repos to make available.
:param owner_id: Owner of the interactive session.
:param workflow_id: UUID of the workflow to which the interactive
session belongs to.
:param image: Jupyter Notebook image to use, i.e.
``jupyter/tensorflow-notebook`` to enable ``tensorflow``.
"""
image = image or JUPYTER_INTERACTIVE_SESSION_DEFAULT_IMAGE
cvmfs_repos = cvmfs_repos or []
port = JUPYTER_INTERACTIVE_SESSION_DEFAULT_PORT
deployment_builder = InteractiveDeploymentK8sBuilder(
Expand Down
2 changes: 1 addition & 1 deletion reana_workflow_controller/rest/workflows_session.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of REANA.
# Copyright (C) 2020, 2021 CERN.
# Copyright (C) 2020, 2021, 2024 CERN.
#
# REANA is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand Down
85 changes: 78 additions & 7 deletions reana_workflow_controller/workflow_run_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
import logging
import os
from typing import List, Optional

from flask import current_app
from kubernetes import client
Expand Down Expand Up @@ -59,10 +60,14 @@
)

from reana_workflow_controller.config import ( # isort:skip
CONTAINER_IMAGE_ALIAS_PREFIXES,
IMAGE_PULL_SECRETS,
JOB_CONTROLLER_CONTAINER_PORT,
JOB_CONTROLLER_ENV_VARS,
JOB_CONTROLLER_SHUTDOWN_ENDPOINT,
REANA_INTERACTIVE_SESSIONS_DEFAULT_IMAGES,
REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS,
REANA_INTERACTIVE_SESSIONS_RECOMMENDED_IMAGES,
REANA_RUNTIME_BATCH_TERMINATION_GRACE_PERIOD,
REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_LIMIT,
REANA_KUBERNETES_JOBS_MEMORY_LIMIT,
Expand All @@ -81,6 +86,66 @@
)


def _container_image_aliases(
image: str, prefixes=CONTAINER_IMAGE_ALIAS_PREFIXES
) -> List[str]:
"""Return possible aliases for a docker image reference.
Aliases are obtained by adding/removing default prefixes like "docker.io/".
Some of the returned aliases might not be valid docker image references,
in particular when adding default prefixes to references that are already
fully qualified.
Example: the returned aliases for `docker.io/library/ubuntu:24.04` are:
- `docker.io/library/ubuntu:24.04`
- `library/ubuntu:24.04`
- `ubuntu:24.04`
- `library/docker.io/library/ubuntu:24.04` (not valid)
"""
aliases = [image]
for prefix in prefixes:
if image.startswith(prefix):
# remove prefix
aliases.append(image[len(prefix) :])
else:
# add prefix
aliases.append(prefix + image)
return aliases


def _validate_interactive_session_image(type_: str, user_image: Optional[str]) -> str:
if type_ not in REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS:
raise REANAInteractiveSessionError(
f"Missing environment configuration for {type_}."
)

config = REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS[type_]
# recommended_images can be empty
recommended_images = REANA_INTERACTIVE_SESSIONS_RECOMMENDED_IMAGES[type_]
# default_image can be `None`
default_image = REANA_INTERACTIVE_SESSIONS_DEFAULT_IMAGES[type_]
image = user_image or default_image

if not image:
raise REANAInteractiveSessionError("Container image must be specified.")

if not config["allow_custom"]:
# check if one of the aliases is in the recommended list
aliases = _container_image_aliases(image)
# normally only one alias should match, unless multiple aliases of the same
# image are present in the recommended list
allowed_alias = next(
(alias for alias in aliases if alias in recommended_images), None
)
if not allowed_alias:
raise REANAInteractiveSessionError(
f"Custom container image {image} is not allowed."
)
return allowed_alias
else:
return image


class WorkflowRunManager:
"""Interface which specifies how to manage workflow runs."""

Expand Down Expand Up @@ -316,21 +381,26 @@ def start_batch_workflow_run(
logging.error(msg, exc_info=True)
raise e

def start_interactive_session(self, interactive_session_type, **kwargs):
def start_interactive_session(self, interactive_session_type, image=None, **kwargs):
"""Start an interactive workflow run.
:param interactive_session_type: One of the available interactive
session types.
:param image: Docker image to use for the interactive session.
:return: Relative path to access the interactive session.
"""
if interactive_session_type not in InteractiveSessionType.__members__:
raise REANAInteractiveSessionError(
f"Interactive type {interactive_session_type} does not exist."
)

validated_image = _validate_interactive_session_image(
interactive_session_type, image
)

action_completed = True
kubernetes_objects = None
try:
if interactive_session_type not in InteractiveSessionType.__members__:
raise REANAInteractiveSessionError(
"Interactive type {} does not exist.".format(
interactive_session_type
)
)
access_path = self._generate_interactive_workflow_path()
workflow_run_name = self._workflow_run_name_generator("session")
kubernetes_objects = build_interactive_k8s_objects[
Expand All @@ -339,6 +409,7 @@ def start_interactive_session(self, interactive_session_type, **kwargs):
workflow_run_name,
self.workflow.workspace_path,
access_path,
validated_image,
access_token=self.workflow.get_owner_access_token(),
cvmfs_repos=self.retrieve_required_cvmfs_repos(),
owner_id=self.workflow.owner_id,
Expand Down
30 changes: 29 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of REANA.
# Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022 CERN.
# Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022, 2024 CERN.
#
# REANA is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand All @@ -25,6 +25,11 @@
)
from sqlalchemy_utils import create_database, database_exists, drop_database

from reana_workflow_controller.config import (
REANA_INTERACTIVE_SESSIONS_DEFAULT_IMAGES,
REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS,
REANA_INTERACTIVE_SESSIONS_RECOMMENDED_IMAGES,
)
from reana_workflow_controller.factory import create_app


Expand Down Expand Up @@ -124,3 +129,26 @@ def sample_serial_workflow_with_retention_rule(session, sample_serial_workflow_i
session.query(WorkspaceRetentionAuditLog).delete()
session.delete(rule)
session.commit()


@pytest.fixture()
def interactive_session_environments(monkeypatch):
monkeypatch.setitem(
REANA_INTERACTIVE_SESSIONS_ENVIRONMENTS,
"jupyter",
{
"recommended": [
{"image": "docker_image_1", "name": "image name 1"},
{"image": "docker_image_2", "name": "image name 2"},
],
"allow_custom": False,
},
)
monkeypatch.setitem(
REANA_INTERACTIVE_SESSIONS_DEFAULT_IMAGES, "jupyter", "docker_image_1"
)
monkeypatch.setitem(
REANA_INTERACTIVE_SESSIONS_RECOMMENDED_IMAGES,
"jupyter",
{"docker_image_1", "docker_image_2"},
)
14 changes: 8 additions & 6 deletions tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of REANA.
# Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022 CERN.
# Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022, 2024 CERN.
#
# REANA is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
Expand Down Expand Up @@ -1477,7 +1477,9 @@ def test_get_workspace_diff(
assert "# File" in response_data["workspace_listing"]


def test_create_interactive_session(app, default_user, sample_serial_workflow_in_db):
def test_create_interactive_session(
app, default_user, sample_serial_workflow_in_db, interactive_session_environments
):
"""Test create interactive session."""
wrm = WorkflowRunManager(sample_serial_workflow_in_db)
expected_data = {"path": wrm._generate_interactive_workflow_path()}
Expand All @@ -1501,7 +1503,7 @@ def test_create_interactive_session(app, default_user, sample_serial_workflow_in


def test_create_interactive_session_unknown_type(
app, default_user, sample_serial_workflow_in_db
app, default_user, sample_serial_workflow_in_db, interactive_session_environments
):
"""Test create interactive session for unknown interactive type."""
with app.test_client() as client:
Expand All @@ -1510,18 +1512,18 @@ def test_create_interactive_session_unknown_type(
url_for(
"workflows_session.open_interactive_session",
workflow_id_or_name=sample_serial_workflow_in_db.id_,
interactive_session_type="terminl",
interactive_session_type="terminal",
),
query_string={"user": default_user.id_},
)
assert res.status_code == 404


def test_create_interactive_session_custom_image(
app, default_user, sample_serial_workflow_in_db
app, default_user, sample_serial_workflow_in_db, interactive_session_environments
):
"""Create an interactive session with custom image."""
custom_image = "test/image"
custom_image = "docker_image_2"
interactive_session_configuration = {"image": custom_image}
with app.test_client() as client:
# create workflow
Expand Down
Loading

0 comments on commit 4f0fc6c

Please sign in to comment.