Skip to content

Commit

Permalink
allow for partial access to huge & largemem nodes (#234)
Browse files Browse the repository at this point in the history
  • Loading branch information
johrstrom authored Feb 24, 2023
1 parent 01603cf commit f9b7138
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 32 deletions.
24 changes: 20 additions & 4 deletions apps.awesim.org/apps/bc_desktop/owens.yml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,24 @@ attributes:
- **hugemem** - (*48 cores*) This Owens node has 1.5TB of available RAM
as well as 48 cores. There are 16 of these nodes on Owens.
options:
- [ "any", "any" ]
- [ "vis", "vis" ]
- [ "gpu", "gpu-any" ]
- [ "hugemem", "hugemem" ]
- [
"any", "any",
data-min-cores: 1,
data-max-cores: 28,
]
- [
"vis", "vis",
data-min-cores: 1,
data-max-cores: 28,
]
- [
"gpu", "gpu-any",
data-min-cores: 1,
data-max-cores: 28,
]
- [
"hugemem", "hugemem",
data-min-cores: 4,
data-max-cores: 48,
]
submit: submit/slurm.yml.erb
58 changes: 50 additions & 8 deletions apps.awesim.org/apps/bc_desktop/pitzer.yml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,54 @@ attributes:
[detailed information on the Pitzer cluster]: https://www.osc.edu/resources/technical_support/supercomputers/pitzer
[NVIDIA Tesla V100 GPUs]: https://www.nvidia.com/en-us/data-center/v100/
options:
- [ "any", "any" ]
- [ "48 core", "any-48core" ]
- [ "any gpu", "gpu-any"]
- [ "48 core with gpu", "gpu-48core" ]
- [ "densegpu", "densegpu" ]
- [ "visualization node", "vis"]
- [ "largemem", "largemem" ]
- [ "hugemem", "hugemem" ]
- [
"any", "any",
data-min-cores: 1,
data-max-cores: 80,
]
- [
"40 core", "any-40core",
data-min-cores: 1,
data-max-cores: 40,
]
- [
"48 core", "any-48core",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"any gpu", "gpu-any",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"40 core with gpu", "gpu-40core",
data-min-cores: 1,
data-max-cores: 40,
]
- [
"48 core with gpu", "gpu-48core",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"densegpu", "densegpu",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"visualization node", "vis",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"largemem", "largemem",
data-min-cores: 24,
data-max-cores: 48,
]
- [
"hugemem", "hugemem",
data-min-cores: 20,
data-max-cores: 80,
]
submit: submit/slurm.yml.erb
6 changes: 2 additions & 4 deletions apps.awesim.org/apps/bc_desktop/submit/slurm.yml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
end
end

hugemem_cpus = cluster == 'pitzer' ? 80 : 48

slurm_args = case node_type
# 'any' case handled by scheduler, this is just a quick short circuit
when "any"
Expand All @@ -57,10 +55,10 @@
# using partitions here is easier than specifying memory requests
when "largemem"
partition = bc_num_slots.to_i > 1 ? "largemem-parallel" : "largemem"
base_slurm_args + p20_node + ["--partition", partition ]
base_slurm_args + tasks_per_node + ["--partition", partition ]
when "hugemem"
partition = bc_num_slots.to_i > 1 ? "hugemem-parallel" : "hugemem"
base_slurm_args + [ "--ntasks-per-node", "#{hugemem_cpus}", "--partition", partition ]
base_slurm_args + tasks_per_node + ["--partition", partition ]

else
base_slurm_args
Expand Down
24 changes: 20 additions & 4 deletions ondemand.osc.edu/apps/bc_desktop/owens.yml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,24 @@ attributes:
- **hugemem** - (*48 cores*) This Owens node has 1.5TB of available RAM
as well as 48 cores. There are 16 of these nodes on Owens.
options:
- [ "any", "any" ]
- [ "vis", "vis" ]
- [ "gpu", "gpu-any" ]
- [ "hugemem", "hugemem" ]
- [
"any", "any",
data-min-cores: 1,
data-max-cores: 28,
]
- [
"vis", "vis",
data-min-cores: 1,
data-max-cores: 28,
]
- [
"gpu", "gpu-any",
data-min-cores: 1,
data-max-cores: 28,
]
- [
"hugemem", "hugemem",
data-min-cores: 4,
data-max-cores: 48,
]
submit: submit/slurm.yml.erb
58 changes: 50 additions & 8 deletions ondemand.osc.edu/apps/bc_desktop/pitzer.yml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,54 @@ attributes:
[detailed information on the Pitzer cluster]: https://www.osc.edu/resources/technical_support/supercomputers/pitzer
[NVIDIA Tesla V100 GPUs]: https://www.nvidia.com/en-us/data-center/v100/
options:
- [ "any", "any" ]
- [ "48 core", "any-48core" ]
- [ "any gpu", "gpu-any"]
- [ "48 core with gpu", "gpu-48core" ]
- [ "densegpu", "densegpu" ]
- [ "visualization node", "vis"]
- [ "largemem", "largemem" ]
- [ "hugemem", "hugemem" ]
- [
"any", "any",
data-min-cores: 1,
data-max-cores: 80,
]
- [
"40 core", "any-40core",
data-min-cores: 1,
data-max-cores: 40,
]
- [
"48 core", "any-48core",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"any gpu", "gpu-any",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"40 core with gpu", "gpu-40core",
data-min-cores: 1,
data-max-cores: 40,
]
- [
"48 core with gpu", "gpu-48core",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"densegpu", "densegpu",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"visualization node", "vis",
data-min-cores: 1,
data-max-cores: 48,
]
- [
"largemem", "largemem",
data-min-cores: 24,
data-max-cores: 48,
]
- [
"hugemem", "hugemem",
data-min-cores: 20,
data-max-cores: 80,
]
submit: submit/slurm.yml.erb
6 changes: 2 additions & 4 deletions ondemand.osc.edu/apps/bc_desktop/submit/slurm.yml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
end
end

hugemem_cpus = cluster == 'pitzer' ? 80 : 48

slurm_args = case node_type
# 'any' case handled by scheduler, this is just a quick short circuit
when "any"
Expand All @@ -57,10 +55,10 @@
# using partitions here is easier than specifying memory requests
when "largemem"
partition = bc_num_slots.to_i > 1 ? "largemem-parallel" : "largemem"
base_slurm_args + p20_node + ["--partition", partition ]
base_slurm_args + tasks_per_node + ["--partition", partition ]
when "hugemem"
partition = bc_num_slots.to_i > 1 ? "hugemem-parallel" : "hugemem"
base_slurm_args + [ "--ntasks-per-node", "#{hugemem_cpus}", "--partition", partition ]
base_slurm_args + tasks_per_node + ["--partition", partition ]

else
base_slurm_args
Expand Down

0 comments on commit f9b7138

Please sign in to comment.