Skip to content

Commit

Permalink
Update the default machine type and accelerator
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 666415414
  • Loading branch information
vertex-mg-bot authored and copybara-github committed Aug 22, 2024
1 parent c38fd04 commit 830c954
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -323,18 +323,18 @@
"# @markdown 1. If `max_steps>0`, it will precedence over `epochs`. One can set a small `max_steps` value to quickly check the pipeline.\n",
"# @markdown 1. With the default setting, training takes between 1.5 ~ 2 hours.\n",
"\n",
"TRAIN_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-peft-train:20240724_0936_RC00\"\n",
"\n",
"\n",
"# The Llama 3.1 base model.\n",
"MODEL_ID = \"meta-llama/Meta-Llama-3.1-8B-Instruct\" # @param [\"meta-llama/Meta-Llama-3.1-8B\", \"meta-llama/Meta-Llama-3.1-8B-Instruct\", \"meta-llama/Meta-Llama-3.1-70B\", \"meta-llama/Meta-Llama-3.1-70B-Instruct\"] {isTemplate:true}\n",
"if LOAD_MODEL_FROM == \"Google Cloud\":\n",
" base_model_id = os.path.join(MODEL_BUCKET, MODEL_ID.split(\"/\")[-1])\n",
"else:\n",
" base_model_id = MODEL_ID\n",
"\n",
"# The pre-built training docker image.\n",
"TRAIN_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-peft-train:20240724_0936_RC00\"\n",
"\n",
"# The accelerator to use.\n",
"accelerator_type = \"NVIDIA_L4\" # @param [\"NVIDIA_L4\", \"NVIDIA_A100_80GB\"]\n",
"accelerator_type = \"NVIDIA_A100_80GB\" # @param [\"NVIDIA_A100_80GB\"]\n",
"\n",
"# Batch size for finetuning.\n",
"per_device_train_batch_size = 1 # @param{type:\"integer\"}\n",
Expand Down Expand Up @@ -362,25 +362,13 @@
"logging_steps = save_steps\n",
"\n",
"# Worker pool spec.\n",
"machine_type = None\n",
"if \"8b\" in MODEL_ID.lower():\n",
" if accelerator_type == \"NVIDIA_L4\":\n",
" accelerator_count = 4\n",
" machine_type = \"g2-standard-48\"\n",
" else:\n",
" raise ValueError(\n",
" f\"Recommended machine settings not found for: {accelerator_type}. To use another accelerator, edit this code block to pass in an appropriate `machine_type`, `accelerator_type`, and `accelerator_count` to the deploy_model_vllm function by clicking `Show Code` and then modifying the code.\"\n",
" )\n",
"elif \"70b\" in MODEL_ID.lower():\n",
" if accelerator_type == \"NVIDIA_A100_80GB\":\n",
" accelerator_count = 4\n",
" machine_type = \"a2-ultragpu-4g\"\n",
" else:\n",
" raise ValueError(\n",
" f\"Recommended machine settings not found for: {accelerator_type}. To use another accelerator, edit this code block to pass in an appropriate `machine_type`, `accelerator_type`, and `accelerator_count` to the deploy_model_vllm function by clicking `Show Code` and then modifying the code.\"\n",
" )\n",
"if accelerator_type == \"NVIDIA_A100_80GB\":\n",
" accelerator_count = 4\n",
" machine_type = \"a2-ultragpu-4g\"\n",
"else:\n",
" raise ValueError(f\"Unsupported model ID or GCS path: {MODEL_ID}.\")\n",
" raise ValueError(\n",
" f\"Recommended machine settings not found for: {accelerator_type}. To use another accelerator, edit this code block to pass in an appropriate `machine_type`, `accelerator_type`, and `accelerator_count` to the deploy_model_vllm function by clicking `Show Code` and then modifying the code.\"\n",
" )\n",
"\n",
"replica_count = 1\n",
"\n",
Expand Down Expand Up @@ -600,6 +588,7 @@
"\n",
" return model, endpoint\n",
"\n",
"\n",
"models[\"vllm_gpu\"], endpoints[\"vllm_gpu\"] = deploy_model_vllm(\n",
" model_name=common_util.get_job_name_with_datetime(prefix=\"llama3_1-vllm-serve\"),\n",
" model_id=merged_model_output_dir,\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@
"train_precision = \"float16\"\n",
"\n",
"# Worker pool spec for 4bit finetuning.\n",
"accelerator_type = \"NVIDIA_A100_80GB\" # @param[\"NVIDIA_A100_80GB\", \"NVIDIA_L4\"]\n",
"accelerator_type = \"NVIDIA_A100_80GB\" # @param[\"NVIDIA_A100_80GB\"]\n",
"\n",
"if accelerator_type == \"NVIDIA_L4\":\n",
" accelerator_count = 4\n",
Expand Down Expand Up @@ -532,6 +532,7 @@
"\n",
" return model, endpoint\n",
"\n",
"\n",
"models[\"vllm_gpu\"], endpoints[\"vllm_gpu\"] = deploy_model_vllm(\n",
" model_name=common_util.get_job_name_with_datetime(prefix=\"mistral-vllm-serve\"),\n",
" model_id=merged_model_output_dir,\n",
Expand Down

0 comments on commit 830c954

Please sign in to comment.