Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test longrun #856

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
304 changes: 10 additions & 294 deletions .buildkite/longruns/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,6 @@ env:
timeout_in_minutes: 1440

steps:
- label: "init :computer:"
key: "init_cpu_env"
command:
- "echo $$JULIA_DEPOT_PATH"

- echo "--- Instantiate AMIP env"
- "julia --project=experiments/ClimaEarth/ -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
- "julia --project=experiments/ClimaEarth/ -e 'using Pkg; Pkg.precompile()'"
- "julia --project=experiments/ClimaEarth/ -e 'using Pkg; Pkg.status()'"

- echo "--- Download artifacts"
- "julia --project=artifacts -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
- "julia --project=artifacts -e 'using Pkg; Pkg.precompile()'"
- "julia --project=artifacts -e 'using Pkg; Pkg.status()'"
- "julia --project=artifacts artifacts/download_artifacts.jl"

agents:
slurm_cpus_per_task: 8
env:
JULIA_NUM_PRECOMPILE_TASKS: 8
JULIA_MAX_NUM_PRECOMPILE_FILES: 50

- label: "init clima :computer:"
key: "init_cpu_env_clima"
Expand All @@ -45,6 +24,7 @@ steps:

- echo "--- Instantiate AMIP env"
- "julia --project=experiments/ClimaEarth/ -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
#- "julia --project=experiments/ClimaEarth/ -e 'using Pkg; Pkg.develop(;path=\".\"); Pkg.add(url = \"https://github.com/CliMA/RRTMGP.jl.git\", rev = \"zs/checknan\");'"
- "julia --project=experiments/ClimaEarth/ -e 'using Pkg; Pkg.precompile()'"
- "julia --project=experiments/ClimaEarth/ -e 'using Pkg; Pkg.status()'"

Expand All @@ -57,298 +37,34 @@ steps:
agents:
queue: clima
modules: common
slurm_gpus: 1
slurm_cpus_per_task: 8
env:
JULIA_NUM_PRECOMPILE_TASKS: 8
JULIA_MAX_NUM_PRECOMPILE_FILES: 50

- wait

- group: "Coupler integration and conservation tests"

steps:

# Integration tests - the expected results were also confirmed locally
- label: "Slabplanet_aqua: nocouple"
key: "slabplanet_aqua_atmos_sf_nocouple" # SF at each Atmos stage, no coupling, prescribed SST from atmos
command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_aqua_atmos_sf_nocouple.yml --job_id slabplanet_aqua_atmos_sf_nocouple"
artifact_paths: "experiments/ClimaEarth/output/slabplanet_aqua/slabplanet_aqua_atmos_sf_nocouple_artifacts/*"
env:
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 1
slurm_nodes: 1
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "Slabplanet_aqua: couple"
key: "slabplanet_aqua_atmos_sf_couple" # SF at each Atmos stage, coupling, prescribed SST from coupler - identical results to the above confirm 1) initial conditions in Atmos are unchanged compared to the slab, 2) coupling not introducing variability when constant surface
command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_aqua_atmos_sf_couple.yml --job_id slabplanet_aqua_atmos_sf_couple"
artifact_paths: "experiments/ClimaEarth/output/slabplanet_aqua/slabplanet_aqua_atmos_sf_couple_artifacts/*"
env:
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 1
slurm_nodes: 1
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "Slabplanet_aqua: coupler fluxes"
key: "slabplanet_aqua_coupler_sf" # SF at each coupler timestep, constant ocean - comparing to the above runs, this tests the sensitivity of less frequent flux calculation
command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_aqua_coupler_sf.yml --job_id slabplanet_aqua_coupler_sf"
artifact_paths: "experiments/ClimaEarth/output/slabplanet_aqua/slabplanet_aqua_coupler_sf_artifacts/*"
env:
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 1
slurm_nodes: 1
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "Slabplanet_aqua: coupler fluxes, evolving ocean"
key: "slabplanet_aqua_coupler_sf_evolve_ocn" # SF at each coupler timestep, evolving ocean - comparing to the above run, tests the sensitivity of evolving ocean
command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_aqua_coupler_sf_evolve_ocn.yml --job_id slabplanet_aqua_coupler_sf_evolve_ocn"
artifact_paths: "experiments/ClimaEarth/output/slabplanet_aqua/slabplanet_aqua_coupler_sf_evolve_ocn_artifacts/*"
env:
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 1
slurm_nodes: 1
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "Slabplanet_terra: coupler fluxes, evolving bucket"
key: "slabplanet_terra" # SF at each coupler timestep, evolving ocean - comparing to the above run, tests the sensitivity of evolving bucket
command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_terra.yml --job_id slabplanet_terra"
artifact_paths: "experiments/ClimaEarth/output/slabplanet_terra/slabplanet_terra_artifacts/*"
env:
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 1
slurm_nodes: 1
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "Slabplanet: coupler fluxes, evolving ocean and land"
key: "slabplanet_coupler_sf_evolve_ocn"
command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_coupler_sf_evolve_ocn.yml --job_id slabplanet_coupler_sf_evolve_ocn"
artifact_paths: "experiments/ClimaEarth/output/slabplanet/slabplanet_coupler_sf_evolve_ocn_artifacts/*"
env:
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 1
slurm_nodes: 1
slurm_mem_per_cpu: 16G
soft_fail: true

- group: "Current target tests: idealized surfaces"

steps:

- label: "ClimaAtmos standalone target"
command:
- srun julia --project=experiments/ClimaEarth/ test/component_model_tests/climaatmos_standalone/atmos_driver.jl --config_file test/component_model_tests/climaatmos_standalone/longrun_aquaplanet_rhoe_equil_55km_nz63_clearsky_tvinsol_0M_slabocean.yml --job_id longrun_aquaplanet_rhoe_equil_55km_nz63_clearsky_tvinsol_0M_slabocean
artifact_paths: "longrun_aquaplanet_rhoe_equil_55km_nz63_clearsky_tvinsol_0M_slabocean/*"
env:
BUILD_HISTORY_HANDLE: ""
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "TARGET IDEALIZED: new target aqua - fixed ocean T, nocouple, atmos flux calc"
key: "slabplanet_aqua_target_nocouple"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_aqua_target_nocouple.yml --job_id slabplanet_aqua_target_nocouple"
artifact_paths: "experiments/ClimaEarth/output/slabplanet_aqua/slabplanet_aqua_target_nocouple_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "TARGET IDEALIZED: new target aqua - fixed ocean T, coupler flux calc"
key: "slabplanet_aqua_target"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_aqua_target.yml --job_id slabplanet_aqua_target"
artifact_paths: "experiments/ClimaEarth/output/slabplanet_aqua/slabplanet_aqua_target_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "TARGET IDEALIZED: new target aqua - evolving slab ocean T"
key: "slabplanet_aqua_target_evolve_ocn"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_aqua_target_evolve_ocn.yml --job_id slabplanet_aqua_target_evolve_ocn"
artifact_paths: "experiments/ClimaEarth/output/slabplanet_aqua/slabplanet_aqua_target_evolve_ocn_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "TARGET IDEALIZED: new target slab - fixed ocean T, bucket"
key: "slabplanet_target"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_target.yml --job_id slabplanet_target"
artifact_paths: "experiments/ClimaEarth/output/slabplanet/slabplanet_target_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "TARGET IDEALIZED: new target slab - evolving slab ocean T, bucket"
key: "slabplanet_target_evolve_ocn"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/slabplanet_target_evolve_ocn.yml --job_id slabplanet_target_evolve_ocn"
artifact_paths: "experiments/ClimaEarth/output/slabplanet/slabplanet_target_evolve_ocn_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 16G
soft_fail: true


- group: "Current target tests: AMIP surfaces"

steps:

- label: "MPI AMIP FINE: new target amip"
key: "amip_target"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/amip_target.yml --job_id amip_target"
artifact_paths: "experiments/ClimaEarth/output/amip/amip_target_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "MPI AMIP FINE: new target amip: topo"
key: "amip_target_topo"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/amip_target_topo.yml --job_id amip_target_topo"
artifact_paths: "experiments/ClimaEarth/output/amip/amip_target_topo_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 16G
soft_fail: true

- label: "MPI AMIP FINE: new target amip: topo + diagedmf"
key: "amip_target_topo_diagedmf_cpu"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/amip_target_topo_diagedmf_cpu.yml --job_id amip_target_topo_diagedmf_cpu"
artifact_paths: "experiments/ClimaEarth/output/amip/amip_target_topo_diagedmf_cpu_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
BUILD_HISTORY_HANDLE: ""
agents:
slurm_ntasks_per_node: 16
slurm_nodes: 4
slurm_mem_per_cpu: 20G
soft_fail: true

- group: "Current target tests on GPU: AMIP surface"

steps:

- label: "GPU AMIP FINE: new target amip: topo"
key: "amip_target_topo_gpu"
command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/amip_target_topo.yml --job_id amip_target_topo_gpu"
artifact_paths: "experiments/ClimaEarth/output/amip/amip_target_topo_gpu_artifacts/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_mem: 16GB
soft_fail: true

- label: "GPU AMIP FINE: new target amip: topo + diagedmf"
key: "amip_target_topo_diagedmf_gpu"
- label: "GPU AMIP FINE: new target amip: topo + diagedmf + 1M"
key: "amip_target_topo_diagedmf_1m"
command:
- echo "--- Run simulation"
- "srun --cpu-bind=threads --cpus-per-task=4 julia --threads=3 --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/amip_target_topo_diagedmf_gpu.yml --job_id amip_target_topo_diagedmf_gpu"
artifact_paths: "experiments/ClimaEarth/output/amip/amip_target_topo_diagedmf_gpu_artifacts/*"
- "srun --cpu-bind=threads --cpus-per-task=4 julia --threads=3 --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/amip_target_topo_diagedmf_1m.yml --restart_dir $$RESTART_DIR --restart_t $$RESTART_T --job_id amip_target_topo_diagedmf_1m"
artifact_paths: "experiments/ClimaEarth/output/amip/amip_target_topo_diagedmf_1m_artifacts/*"
timeout_in_minutes: 1440
env:
CLIMACOMMS_CONTEXT: "MPI"
CLIMACOMMS_DEVICE: "CUDA"
RESTART_DIR: "/scratch/clima/slurm-buildkite/climacoupler-longruns/783/climacoupler-longruns/experiments/ClimaEarth/output/amip/amip_target_topo_diagedmf_1m_artifacts/"
RESTART_T: 2592000
agents:
queue: clima
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
slurm_ntasks: 4
slurm_ntasks: 1
slurm_mem: 30GB
modules: common
soft_fail: true

# DYAMOND AMIP: 1 day (convection resolving)
- label: "GPU AMIP SUPERFINE: dyamond_target"
key: "longrun_amip_dyamond_gpu"
command:
- echo "--- Run simulation"
- "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/longrun_amip_dyamond.yml --job_id longrun_amip_dyamond_gpu"
artifact_paths: "experiments/ClimaEarth/output/amip/longrun_amip_dyamond_gpu_artifacts/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
queue: clima
slurm_mem: 20GB
slurm_gpus: 1
modules: common
soft_fail: true

- wait

- group: "Job analysis and reporting"

steps:

# plot job performance history
- label: ":chart_with_downwards_trend: build history"
command:
- build_history main # name of branch to plot
artifact_paths:
- "build_history.html"

- wait

- label: ":envelope: Slack report: build_history"
command:
- |
slack-upload -c "#coupler-report" -f build_history.html -m html -n build_history -x ":rocket: Interactive overall job performance history (download the attached file and view in browser) :rocket:"

- label: ":envelope: Slack report: Slabplanet"
command:
- slack-upload -c "#coupler-report" -f experiments/ClimaEarth/output/slabplanet/slabplanet_coupler_sf_evolve_ocn_artifacts/total_energy_bucket.png -m png -n slab_coarse -x "Slabplanet energy conservation"
- slack-upload -c "#coupler-report" -f experiments/ClimaEarth/output/slabplanet/slabplanet_coupler_sf_evolve_ocn_artifacts/total_water_bucket.png -m png -n slab_coarse_w -x "Slabplanet water conservation"

- label: ":envelope: Slack report: target AMIP"
command:
- slack-upload -c "#coupler-report" -f experiments/ClimaEarth/output/amip/amip_target_topo_diagedmf_gpu_artifacts/amip_ncep.png -m png -n amip_fine -x "300d Target AMIP v NCEP Last Month Mean"
- |
find experiments/ClimaEarth/output/amip/amip_target_topo_diagedmf_gpu_artifacts/ -type f -name 'bias*.png' -print0 | while IFS= read -r -d '' file; do
slack-upload -c "#coupler-report" -f "$$file" -m png -n "$$(basename "$$file" .png)" -x "$$(basename "$$file" .png)"
done
agents:
queue: clima
modules: common
33 changes: 33 additions & 0 deletions config/longrun_configs/amip_target_topo_diagedmf_1m.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FLOAT_TYPE: "Float32"
albedo_model: "CouplerAlbedo"
anim: false
atmos_config_file: "config/longrun_configs/amip_target_diagedmf.yml"
dt: "120secs"
dt_cpl: 120
dt_save_state_to_disk: "30days"
dt_save_to_sol: "30days"
energy_check: false
hourly_checkpoint: true
hourly_checkpoint_dt: 24
land_albedo_type: "map_temporal"
mode_name: "amip"
mono_surface: false
netcdf_output_at_levels: true
output_default_diagnostics: true
precip_model: "1M"
start_date: "20100131"
surface_setup: "PrescribedSurface"
t_end: "90days"
topo_smoothing: true
topography: "Earth"
coupler_toml_file: "toml/amip_target_topo_diagedmf_1m.toml"
turb_flux_partition: "CombinedStateFluxesMOST"
use_reference_state: false
diagnostics:
diagnostics:
- short_name: [ts, ta, thetaa, ha, pfull, rhoa, ua, va, wa, hur, hus, cl, clw, cli, hussfc, evspsbl, pr]
period: 1days
- short_name: [rsdt, rsut, rlut, rsds, rsus, rlds, rlus]
period: 1days
- short_name: [arup, waup, taup, thetaaup, haup, husup, hurup, clwup, cliup, waen, tke, lmix]
period: 1days
Loading
Loading