Skip to content

Commit

Permalink
Supporting specifying user-specific maximum device attachment allowed
Browse files Browse the repository at this point in the history
* Added flag "--max-devices-for-user"
* Bug fix in enforcement rule "max_devices_per_user"
  • Loading branch information
razrotenberg committed Jun 21, 2023
1 parent 6c8b92c commit d7835ab
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 10 deletions.
4 changes: 4 additions & 0 deletions docs/source/usage/enforcement.rst
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,7 @@ Use the flag :code:`--max-devices-per-user <value>` to control how many devices

If a Linux user is using more devices than the specified value, some of his or her environments would get detached to free up resources.
Processes from the detached environments that are running on the detached devices would get terminated.

This flag could be used combined with :code:`--max-devices-for-user <value>` to specify user-specific values.

For example, by passing :code:`--max-devices-per-user 1 --max-devices-for-user john=3 paul=2`, you enforce all users to use a single device at most, except for John and Paul which are allowed to use 3 and 2 respectively.
22 changes: 21 additions & 1 deletion genv/cli/enforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,22 @@ def add_enforcement(
help="maximum allowed attached devices for each user",
)

def max_devices_for_user(value: str):
try:
username, maximum = value.split("=")

return username, int(maximum)
except (ValueError, SyntaxError):
raise argparse.ArgumentTypeError(f"not a valid spec: {value}")

enforcements.add_argument(
"--max-devices-for-user",
nargs="+",
help="per-user specification of maximum allowed attached devices",
metavar="username=maximum",
type=max_devices_for_user,
)


async def run(args: argparse.Namespace) -> None:
"""
Expand All @@ -90,7 +106,11 @@ async def run(args: argparse.Namespace) -> None:

if args.max_devices_per_user is not None:
genv.enforce.rules.max_devices_per_user(
survey, maximum=args.max_devices_per_user
survey,
maximum=args.max_devices_per_user,
maximum_for_user=(
dict(args.max_devices_for_user) if args.max_devices_for_user else {}
),
)

with genv.utils.global_lock():
Expand Down
30 changes: 25 additions & 5 deletions genv/cli/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import shutil
import sys
from typing import Iterable, NoReturn, Optional
from typing import Iterable, NoReturn, Optional, Tuple

import genv

Expand Down Expand Up @@ -129,6 +129,7 @@ async def do_enforce(
env_devices: bool,
env_memory: bool,
max_devices_per_user: Optional[int],
max_devices_for_user: Optional[Iterable[Tuple[str, int]]],
) -> None:
"""
Enforce GPU usage on multiple hosts.
Expand Down Expand Up @@ -156,7 +157,11 @@ async def do_enforce(

if max_devices_per_user is not None:
genv.enforce.rules.max_devices_per_user(
*surveys, maximum=max_devices_per_user
*surveys,
maximum=max_devices_per_user,
maximum_for_user=(
dict(max_devices_for_user) if max_devices_for_user else {}
),
)

reports = [survey.report for survey in surveys]
Expand Down Expand Up @@ -414,6 +419,22 @@ def add_enforcement(
help="maximum allowed attached devices for each user",
)

def max_devices_for_user(value: str):
try:
username, maximum = value.split("=")

return username, int(maximum)
except (ValueError, SyntaxError):
raise argparse.ArgumentTypeError(f"not a valid spec: {value}")

enforcements.add_argument(
"--max-devices-for-user",
nargs="+",
help="per-user specification of maximum allowed attached devices",
metavar="username=maximum",
type=max_devices_for_user,
)

def envs(parser):
parser.add_argument(
"--no-header",
Expand Down Expand Up @@ -497,9 +518,7 @@ async def run(args: argparse.Namespace) -> None:
else:
hostnames = args.hostnames.split(",")

hosts = [
genv.remote.Host(hostname, args.timeout) for hostname in hostnames
]
hosts = [genv.remote.Host(hostname, args.timeout) for hostname in hostnames]

config = genv.remote.Config(hosts, args.throw_on_error, args.quiet)

Expand All @@ -515,6 +534,7 @@ async def run(args: argparse.Namespace) -> None:
args.env_devices,
args.env_memory,
args.max_devices_per_user,
args.max_devices_for_user,
)
elif args.command == "envs":
await do_envs(
Expand Down
12 changes: 8 additions & 4 deletions genv/enforce/rules/max_devices_per_user.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from typing import Dict

from genv.entities.enforce import Survey


def max_devices_per_user(*surveys: Survey, maximum: int) -> None:
def max_devices_per_user(
*surveys: Survey, maximum: int, maximum_for_user: Dict[str, int] = {}
) -> None:
"""
Enforce maximum devices per user.
"""
Expand All @@ -17,10 +21,10 @@ def max_devices_per_user(*surveys: Survey, maximum: int) -> None:

attached = sum(len(snapshot.devices) for snapshot in snapshots)

if attached <= maximum:
return
over = attached - maximum_for_user.get(username, maximum)

over = attached - maximum
if over <= 0:
continue

if all(survey.hostname for survey in surveys):
hosts = len([snapshot for snapshot in snapshots if len(snapshot.envs) > 0])
Expand Down

0 comments on commit d7835ab

Please sign in to comment.