Update the default machine type and accelerator

PiperOrigin-RevId: 666415414
GoogleCloudPlatform · Aug 22, 2024 · 830c954 · 830c954
1 parent c38fd04
commit 830c954
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 23 deletions.
diff --git a/notebooks/community/model_garden/model_garden_pytorch_llama3_1_finetuning.ipynb b/notebooks/community/model_garden/model_garden_pytorch_llama3_1_finetuning.ipynb
@@ -323,18 +323,18 @@
         "# @markdown 1. If `max_steps>0`, it will precedence over `epochs`. One can set a small `max_steps` value to quickly check the pipeline.\n",
         "# @markdown 1. With the default setting, training takes between 1.5 ~ 2 hours.\n",
         "\n",
-        "TRAIN_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-peft-train:20240724_0936_RC00\"\n",
-        "\n",
-        "\n",
         "# The Llama 3.1 base model.\n",
         "MODEL_ID = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"  # @param [\"meta-llama/Meta-Llama-3.1-8B\", \"meta-llama/Meta-Llama-3.1-8B-Instruct\", \"meta-llama/Meta-Llama-3.1-70B\", \"meta-llama/Meta-Llama-3.1-70B-Instruct\"] {isTemplate:true}\n",
         "if LOAD_MODEL_FROM == \"Google Cloud\":\n",
         "    base_model_id = os.path.join(MODEL_BUCKET, MODEL_ID.split(\"/\")[-1])\n",
         "else:\n",
         "    base_model_id = MODEL_ID\n",
         "\n",
+        "# The pre-built training docker image.\n",
+        "TRAIN_DOCKER_URI = \"us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-peft-train:20240724_0936_RC00\"\n",
+        "\n",
         "# The accelerator to use.\n",
-        "accelerator_type = \"NVIDIA_L4\"  # @param [\"NVIDIA_L4\", \"NVIDIA_A100_80GB\"]\n",
+        "accelerator_type = \"NVIDIA_A100_80GB\"  # @param [\"NVIDIA_A100_80GB\"]\n",
         "\n",
         "# Batch size for finetuning.\n",
         "per_device_train_batch_size = 1  # @param{type:\"integer\"}\n",
@@ -362,25 +362,13 @@
         "logging_steps = save_steps\n",
         "\n",
         "# Worker pool spec.\n",
-        "machine_type = None\n",
-        "if \"8b\" in MODEL_ID.lower():\n",
-        "    if accelerator_type == \"NVIDIA_L4\":\n",
-        "        accelerator_count = 4\n",
-        "        machine_type = \"g2-standard-48\"\n",
-        "    else:\n",
-        "        raise ValueError(\n",
-        "            f\"Recommended machine settings not found for: {accelerator_type}. To use another accelerator, edit this code block to pass in an appropriate `machine_type`, `accelerator_type`, and `accelerator_count` to the deploy_model_vllm function by clicking `Show Code` and then modifying the code.\"\n",
-        "        )\n",
-        "elif \"70b\" in MODEL_ID.lower():\n",
-        "    if accelerator_type == \"NVIDIA_A100_80GB\":\n",
-        "        accelerator_count = 4\n",
-        "        machine_type = \"a2-ultragpu-4g\"\n",
-        "    else:\n",
-        "        raise ValueError(\n",
-        "            f\"Recommended machine settings not found for: {accelerator_type}. To use another accelerator, edit this code block to pass in an appropriate `machine_type`, `accelerator_type`, and `accelerator_count` to the deploy_model_vllm function by clicking `Show Code` and then modifying the code.\"\n",
-        "        )\n",
+        "if accelerator_type == \"NVIDIA_A100_80GB\":\n",
+        "    accelerator_count = 4\n",
+        "    machine_type = \"a2-ultragpu-4g\"\n",
         "else:\n",
-        "    raise ValueError(f\"Unsupported model ID or GCS path: {MODEL_ID}.\")\n",
+        "    raise ValueError(\n",
+        "        f\"Recommended machine settings not found for: {accelerator_type}. To use another accelerator, edit this code block to pass in an appropriate `machine_type`, `accelerator_type`, and `accelerator_count` to the deploy_model_vllm function by clicking `Show Code` and then modifying the code.\"\n",
+        "    )\n",
         "\n",
         "replica_count = 1\n",
         "\n",
@@ -600,6 +588,7 @@
         "\n",
         "    return model, endpoint\n",
         "\n",
+        "\n",
         "models[\"vllm_gpu\"], endpoints[\"vllm_gpu\"] = deploy_model_vllm(\n",
         "    model_name=common_util.get_job_name_with_datetime(prefix=\"llama3_1-vllm-serve\"),\n",
         "    model_id=merged_model_output_dir,\n",

diff --git a/notebooks/community/model_garden/model_garden_pytorch_mistral_peft_tuning.ipynb b/notebooks/community/model_garden/model_garden_pytorch_mistral_peft_tuning.ipynb
@@ -306,7 +306,7 @@
         "train_precision = \"float16\"\n",
         "\n",
         "# Worker pool spec for 4bit finetuning.\n",
-        "accelerator_type = \"NVIDIA_A100_80GB\"  # @param[\"NVIDIA_A100_80GB\", \"NVIDIA_L4\"]\n",
+        "accelerator_type = \"NVIDIA_A100_80GB\"  # @param[\"NVIDIA_A100_80GB\"]\n",
         "\n",
         "if accelerator_type == \"NVIDIA_L4\":\n",
         "    accelerator_count = 4\n",
@@ -532,6 +532,7 @@
         "\n",
         "    return model, endpoint\n",
         "\n",
+        "\n",
         "models[\"vllm_gpu\"], endpoints[\"vllm_gpu\"] = deploy_model_vllm(\n",
         "    model_name=common_util.get_job_name_with_datetime(prefix=\"mistral-vllm-serve\"),\n",
         "    model_id=merged_model_output_dir,\n",