From 12f1208d46b3409079227f071b27f28db773b627 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Thu, 12 Dec 2024 11:15:24 +0800 Subject: [PATCH 01/13] [k8s] Add validation for pod_config #4206 Check pod_config when run 'sky check k8s' by using k8s api --- sky/provision/kubernetes/utils.py | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index 7442c9be7a6..2c062ad987f 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -866,6 +866,15 @@ def check_credentials(context: Optional[str], _, exec_msg = is_kubeconfig_exec_auth(context) + # Check whether pod_config is valid + pod_config = skypilot_config.get_nested(('kubernetes', 'pod_config'), + default_value={}, + override_configs={}) + if pod_config: + _, pod_msg = _check_pod_config(context, pod_config) + if pod_msg: + return False, pod_msg + # We now check if GPUs are available and labels are set correctly on the # cluster, and if not we return hints that may help debug any issues. # This early check avoids later surprises for user when they try to run @@ -891,6 +900,40 @@ def check_credentials(context: Optional[str], else: return True, None +def _check_pod_config( + context: Optional[str] = None, pod_config: Optional[Any] = None) \ + -> Tuple[bool, Optional[str]]: + """Check if the pod_config is a valid pod config + + Using create_namespaced_pod api with dry_run to check the pod_config + is valid or not. + + Returns: + bool: True if pod_config is valid. + str: Error message about why the pod_config is invalid, None otherwise. + """ + try: + namespace = get_kube_config_context_namespace(context) + kubernetes.core_api(context).create_namespaced_pod( + namespace, + body=pod_config, + dry_run='All', + field_validation='Strict', + _request_timeout=kubernetes.API_TIMEOUT) + except kubernetes.api_exception() as e: + error_msg = '' + if e.body: + # get detail error message from api_exception + exception_body = json.loads(e.body) + error_msg = exception_body.get('message') + else: + error_msg = str(e) + return False, f'Invalid pod_config: {error_msg}' + except Exception as e: # pylint: disable=broad-except + return False, ('An error occurred: ' + f'{common_utils.format_exception(e, use_bracket=True)}') + return True, None + def is_kubeconfig_exec_auth( context: Optional[str] = None) -> Tuple[bool, Optional[str]]: From 64bb66ac1521270b030a21a28b5fc3c4b3503441 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Fri, 13 Dec 2024 16:41:44 +0800 Subject: [PATCH 02/13] update: check pod_config when launch check merged pod_config during launch using k8s api --- sky/backends/backend_utils.py | 4 ++++ sky/provision/kubernetes/utils.py | 28 ++++++++++++---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index a3651bdba9a..95c2296b0b9 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -916,6 +916,10 @@ def write_cluster_config( tmp_yaml_path, cluster_config_overrides=to_provision.cluster_config_overrides) kubernetes_utils.combine_metadata_fields(tmp_yaml_path) + valid, message = kubernetes_utils.check_pod_config(tmp_yaml_path) + if not valid: + raise exceptions.InvalidCloudConfigs( + f'There are invalid config in pod_config, deatil: {message}') if dryrun: # If dryrun, return the unfinished tmp yaml path. diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index 2c062ad987f..11101c2ac95 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -866,15 +866,6 @@ def check_credentials(context: Optional[str], _, exec_msg = is_kubeconfig_exec_auth(context) - # Check whether pod_config is valid - pod_config = skypilot_config.get_nested(('kubernetes', 'pod_config'), - default_value={}, - override_configs={}) - if pod_config: - _, pod_msg = _check_pod_config(context, pod_config) - if pod_msg: - return False, pod_msg - # We now check if GPUs are available and labels are set correctly on the # cluster, and if not we return hints that may help debug any issues. # This early check avoids later surprises for user when they try to run @@ -900,9 +891,8 @@ def check_credentials(context: Optional[str], else: return True, None -def _check_pod_config( - context: Optional[str] = None, pod_config: Optional[Any] = None) \ - -> Tuple[bool, Optional[str]]: + +def check_pod_config(cluster_yaml_path: str) -> Tuple[bool, Optional[str]]: """Check if the pod_config is a valid pod config Using create_namespaced_pod api with dry_run to check the pod_config @@ -912,13 +902,19 @@ def _check_pod_config( bool: True if pod_config is valid. str: Error message about why the pod_config is invalid, None otherwise. """ + with open(cluster_yaml_path, 'r', encoding='utf-8') as f: + yaml_content = f.read() + yaml_obj = yaml.safe_load(yaml_content) + pod_config = \ + yaml_obj['available_node_types']['ray_head_default']['node_config'] try: - namespace = get_kube_config_context_namespace(context) - kubernetes.core_api(context).create_namespaced_pod( + # This ok to use None context here as we only test the pod is valid + # won't do any change in the cluster + namespace = get_kube_config_context_namespace(None) + kubernetes.core_api().create_namespaced_pod( namespace, body=pod_config, dry_run='All', - field_validation='Strict', _request_timeout=kubernetes.API_TIMEOUT) except kubernetes.api_exception() as e: error_msg = '' @@ -928,7 +924,7 @@ def _check_pod_config( error_msg = exception_body.get('message') else: error_msg = str(e) - return False, f'Invalid pod_config: {error_msg}' + return False, error_msg except Exception as e: # pylint: disable=broad-except return False, ('An error occurred: ' f'{common_utils.format_exception(e, use_bracket=True)}') From a0f29e5585476f49ac7db85e5d67caef0860448f Mon Sep 17 00:00:00 2001 From: Chester Li Date: Fri, 13 Dec 2024 17:00:25 +0800 Subject: [PATCH 03/13] fix test --- sky/provision/kubernetes/utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index 11101c2ac95..495147d3caf 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -907,11 +907,11 @@ def check_pod_config(cluster_yaml_path: str) -> Tuple[bool, Optional[str]]: yaml_obj = yaml.safe_load(yaml_content) pod_config = \ yaml_obj['available_node_types']['ray_head_default']['node_config'] + context = yaml_obj['provider'].get('context', + get_current_kube_config_context_name()) try: - # This ok to use None context here as we only test the pod is valid - # won't do any change in the cluster - namespace = get_kube_config_context_namespace(None) - kubernetes.core_api().create_namespaced_pod( + namespace = get_kube_config_context_namespace(context) + kubernetes.core_api(context).create_namespaced_pod( namespace, body=pod_config, dry_run='All', @@ -925,6 +925,8 @@ def check_pod_config(cluster_yaml_path: str) -> Tuple[bool, Optional[str]]: else: error_msg = str(e) return False, error_msg + except ValueError as e: + return False, common_utils.format_exception(e) except Exception as e: # pylint: disable=broad-except return False, ('An error occurred: ' f'{common_utils.format_exception(e, use_bracket=True)}') From 47e724da33537f87c1caa8986d7a93285e7c32a5 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Fri, 13 Dec 2024 17:16:18 +0800 Subject: [PATCH 04/13] ignore check failed when test with dryrun if there is no kube config in env, ignore ValueError when launch with dryrun. For now, we don't support check schema offline. --- sky/backends/backend_utils.py | 3 ++- sky/provision/kubernetes/utils.py | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 95c2296b0b9..62d965e3b79 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -916,7 +916,8 @@ def write_cluster_config( tmp_yaml_path, cluster_config_overrides=to_provision.cluster_config_overrides) kubernetes_utils.combine_metadata_fields(tmp_yaml_path) - valid, message = kubernetes_utils.check_pod_config(tmp_yaml_path) + valid, message = kubernetes_utils.check_pod_config( + tmp_yaml_path, dryrun) if not valid: raise exceptions.InvalidCloudConfigs( f'There are invalid config in pod_config, deatil: {message}') diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index 495147d3caf..8cd46cf7323 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -892,7 +892,8 @@ def check_credentials(context: Optional[str], return True, None -def check_pod_config(cluster_yaml_path: str) -> Tuple[bool, Optional[str]]: +def check_pod_config(cluster_yaml_path: str, dryrun: bool) \ + -> Tuple[bool, Optional[str]]: """Check if the pod_config is a valid pod config Using create_namespaced_pod api with dry_run to check the pod_config @@ -926,6 +927,11 @@ def check_pod_config(cluster_yaml_path: str) -> Tuple[bool, Optional[str]]: error_msg = str(e) return False, error_msg except ValueError as e: + if dryrun: + logger.debug('ignore ValueError as there is no kube config ' + 'in the enviroment with dry_run. ' + 'For now we don\'t support check pod_config offline.') + return True, None return False, common_utils.format_exception(e) except Exception as e: # pylint: disable=broad-except return False, ('An error occurred: ' From 699961d599ba561fea0ae3defc8f945686b1d388 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Wed, 25 Dec 2024 12:36:32 +0800 Subject: [PATCH 05/13] use deserialize api to check pod_config schema --- sky/backends/backend_utils.py | 5 ++- sky/provision/kubernetes/utils.py | 67 +++++++++++++++---------------- tests/test_config.py | 54 +++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 37 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 62d965e3b79..143876a59ab 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -916,8 +916,9 @@ def write_cluster_config( tmp_yaml_path, cluster_config_overrides=to_provision.cluster_config_overrides) kubernetes_utils.combine_metadata_fields(tmp_yaml_path) - valid, message = kubernetes_utils.check_pod_config( - tmp_yaml_path, dryrun) + yaml_obj = common_utils.read_yaml(tmp_yaml_path) + pod_config = yaml_obj['available_node_types']['ray_head_default']['node_config'] + valid, message = kubernetes_utils.check_pod_config(pod_config) if not valid: raise exceptions.InvalidCloudConfigs( f'There are invalid config in pod_config, deatil: {message}') diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index 8cd46cf7323..d0a0e87860a 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -892,51 +892,48 @@ def check_credentials(context: Optional[str], return True, None -def check_pod_config(cluster_yaml_path: str, dryrun: bool) \ +def check_pod_config(pod_config: dict) \ -> Tuple[bool, Optional[str]]: """Check if the pod_config is a valid pod config - Using create_namespaced_pod api with dry_run to check the pod_config - is valid or not. + Using deserialize api to check the pod_config is valid or not. Returns: bool: True if pod_config is valid. str: Error message about why the pod_config is invalid, None otherwise. """ - with open(cluster_yaml_path, 'r', encoding='utf-8') as f: - yaml_content = f.read() - yaml_obj = yaml.safe_load(yaml_content) - pod_config = \ - yaml_obj['available_node_types']['ray_head_default']['node_config'] - context = yaml_obj['provider'].get('context', - get_current_kube_config_context_name()) + errors = [] + api_client = kubernetes.api_client() + + # Used for kubernetes api_client deserialize function, the function will use + # data attr, the detail ref: + # https://github.com/kubernetes-client/python/blob/master/kubernetes/client/api_client.py#L244 + class InnerResponse(): + + def __init__(self, data: dict): + self.data = json.dumps(data) + try: - namespace = get_kube_config_context_namespace(context) - kubernetes.core_api(context).create_namespaced_pod( - namespace, - body=pod_config, - dry_run='All', - _request_timeout=kubernetes.API_TIMEOUT) - except kubernetes.api_exception() as e: - error_msg = '' - if e.body: - # get detail error message from api_exception - exception_body = json.loads(e.body) - error_msg = exception_body.get('message') - else: - error_msg = str(e) - return False, error_msg - except ValueError as e: - if dryrun: - logger.debug('ignore ValueError as there is no kube config ' - 'in the enviroment with dry_run. ' - 'For now we don\'t support check pod_config offline.') - return True, None - return False, common_utils.format_exception(e) + # Validate metadata if present + if 'metadata' in pod_config: + try: + value = InnerResponse(pod_config['metadata']) + api_client.deserialize( + value, kubernetes.kubernetes.client.V1ObjectMeta) + except ValueError as e: + errors.append(f'Invalid metadata: {str(e)}') + # Validate spec if present + if 'spec' in pod_config: + try: + value = InnerResponse(pod_config['spec']) + api_client.deserialize(value, + kubernetes.kubernetes.client.V1PodSpec) + except ValueError as e: + errors.append(f'Invalid spec: {str(e)}') + return len(errors) == 0, '.'.join(errors) except Exception as e: # pylint: disable=broad-except - return False, ('An error occurred: ' - f'{common_utils.format_exception(e, use_bracket=True)}') - return True, None + errors.append(f'Validation error: {str(e)}') + return False, '.'.join(errors) def is_kubeconfig_exec_auth( diff --git a/tests/test_config.py b/tests/test_config.py index 5789214dc61..b9f3d480d1b 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -7,6 +7,7 @@ import sky from sky import skypilot_config +import sky.exceptions from sky.skylet import constants from sky.utils import common_utils from sky.utils import kubernetes_enums @@ -98,6 +99,39 @@ def _create_task_yaml_file(task_file_path: pathlib.Path) -> None: run: echo 'Running...' """)) +def _create_invalid_config_yaml_file(task_file_path: pathlib.Path) -> None: + task_file_path.write_text( + textwrap.dedent(f"""\ + experimental: + config_overrides: + docker: + run_options: + - -v /tmp:/tmp + kubernetes: + pod_config: + metadata: + labels: + test-key: test-value + annotations: + abc: def + spec: + containers: + - name: + imagePullSecrets: + - name: my-secret-2 + provision_timeout: 100 + gcp: + managed_instance_group: + run_duration: {RUN_DURATION_OVERRIDE} + nvidia_gpus: + disable_ecc: true + resources: + image_id: docker:ubuntu:latest + + setup: echo 'Setting up...' + run: echo 'Running...' + """)) + def test_nested_config(monkeypatch) -> None: """Test that the nested config works.""" @@ -334,6 +368,26 @@ def test_k8s_config_with_override(monkeypatch, tmp_path, 'imagePullSecrets'][0]['name'] == 'my-secret-2' assert cluster_pod_config['spec']['runtimeClassName'] == 'nvidia' +def test_k8s_config_with_invalid_config(monkeypatch, tmp_path, + enable_all_clouds) -> None: + config_path = tmp_path / 'config.yaml' + _create_config_file(config_path) + monkeypatch.setattr(skypilot_config, 'CONFIG_PATH', config_path) + + _reload_config() + task_path = tmp_path / 'task.yaml' + _create_invalid_config_yaml_file(task_path) + task = sky.Task.from_yaml(task_path) + + # Test Kubernetes pod_config invalid + cluster_name = 'test-kubernetes-config-with-override' + task.set_resources_override({'cloud': sky.Kubernetes()}) + exception = None + try: + sky.launch(task, cluster_name=cluster_name, dryrun=True) + except sky.exceptions.ResourcesUnavailableError as e: + exception = e + assert not exception def test_gcp_config_with_override(monkeypatch, tmp_path, enable_all_clouds) -> None: From 164f436d6fbae5401383c62c841441bd54bf9310 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Thu, 26 Dec 2024 10:04:33 +0800 Subject: [PATCH 06/13] test --- sky/backends/backend_utils.py | 3 ++- tests/test_config.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 143876a59ab..bd22f8466ea 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -917,7 +917,8 @@ def write_cluster_config( cluster_config_overrides=to_provision.cluster_config_overrides) kubernetes_utils.combine_metadata_fields(tmp_yaml_path) yaml_obj = common_utils.read_yaml(tmp_yaml_path) - pod_config = yaml_obj['available_node_types']['ray_head_default']['node_config'] + pod_config = yaml_obj['available_node_types']['ray_head_default'][ + 'node_config'] valid, message = kubernetes_utils.check_pod_config(pod_config) if not valid: raise exceptions.InvalidCloudConfigs( diff --git a/tests/test_config.py b/tests/test_config.py index b9f3d480d1b..756e1fd6cf6 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -99,6 +99,7 @@ def _create_task_yaml_file(task_file_path: pathlib.Path) -> None: run: echo 'Running...' """)) + def _create_invalid_config_yaml_file(task_file_path: pathlib.Path) -> None: task_file_path.write_text( textwrap.dedent(f"""\ @@ -368,6 +369,7 @@ def test_k8s_config_with_override(monkeypatch, tmp_path, 'imagePullSecrets'][0]['name'] == 'my-secret-2' assert cluster_pod_config['spec']['runtimeClassName'] == 'nvidia' + def test_k8s_config_with_invalid_config(monkeypatch, tmp_path, enable_all_clouds) -> None: config_path = tmp_path / 'config.yaml' @@ -380,7 +382,7 @@ def test_k8s_config_with_invalid_config(monkeypatch, tmp_path, task = sky.Task.from_yaml(task_path) # Test Kubernetes pod_config invalid - cluster_name = 'test-kubernetes-config-with-override' + cluster_name = 'test_k8s_config_with_invalid_config' task.set_resources_override({'cloud': sky.Kubernetes()}) exception = None try: @@ -389,6 +391,7 @@ def test_k8s_config_with_invalid_config(monkeypatch, tmp_path, exception = e assert not exception + def test_gcp_config_with_override(monkeypatch, tmp_path, enable_all_clouds) -> None: config_path = tmp_path / 'config.yaml' From 0471b4cee0352b72919c94166fd6fd681800bab4 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Thu, 26 Dec 2024 10:15:59 +0800 Subject: [PATCH 07/13] create another api_client with no kubeconfig --- sky/provision/kubernetes/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index d0a0e87860a..04337c5afc9 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -903,7 +903,9 @@ def check_pod_config(pod_config: dict) \ str: Error message about why the pod_config is invalid, None otherwise. """ errors = [] - api_client = kubernetes.api_client() + # This api_client won't be used to send any requests, so there is no need to + # load kubeconfig + api_client = kubernetes.kubernetes.client.ApiClient() # Used for kubernetes api_client deserialize function, the function will use # data attr, the detail ref: From 98a1d842f622bc8e17a84a30de9597e79a5f12d1 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Thu, 26 Dec 2024 10:30:42 +0800 Subject: [PATCH 08/13] test --- tests/test_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 756e1fd6cf6..55860f3cd4d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -384,12 +384,12 @@ def test_k8s_config_with_invalid_config(monkeypatch, tmp_path, # Test Kubernetes pod_config invalid cluster_name = 'test_k8s_config_with_invalid_config' task.set_resources_override({'cloud': sky.Kubernetes()}) - exception = None + exception_occurred = False try: sky.launch(task, cluster_name=cluster_name, dryrun=True) - except sky.exceptions.ResourcesUnavailableError as e: - exception = e - assert not exception + except sky.exceptions.ResourcesUnavailableError: + exception_occurred = True + assert exception_occurred def test_gcp_config_with_override(monkeypatch, tmp_path, From 7e501f391fb008721bbb6347808200df219a42cd Mon Sep 17 00:00:00 2001 From: Chester Li Date: Thu, 26 Dec 2024 10:48:14 +0800 Subject: [PATCH 09/13] update error message --- sky/backends/backend_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index bd22f8466ea..072e60af411 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -922,7 +922,7 @@ def write_cluster_config( valid, message = kubernetes_utils.check_pod_config(pod_config) if not valid: raise exceptions.InvalidCloudConfigs( - f'There are invalid config in pod_config, deatil: {message}') + f'Invalid pod_config. Deatil: {message}') if dryrun: # If dryrun, return the unfinished tmp yaml path. From ef562da1466b13f027c660987fef3ffd3f4912cd Mon Sep 17 00:00:00 2001 From: Chester Li Date: Tue, 31 Dec 2024 10:32:55 +0800 Subject: [PATCH 10/13] update test --- sky/backends/backend_utils.py | 2 +- tests/test_config.py | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 072e60af411..74cd9fabb58 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -922,7 +922,7 @@ def write_cluster_config( valid, message = kubernetes_utils.check_pod_config(pod_config) if not valid: raise exceptions.InvalidCloudConfigs( - f'Invalid pod_config. Deatil: {message}') + f'Invalid pod_config. Deatils: {message}') if dryrun: # If dryrun, return the unfinished tmp yaml path. diff --git a/tests/test_config.py b/tests/test_config.py index 55860f3cd4d..d3d18a8dffe 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -102,12 +102,9 @@ def _create_task_yaml_file(task_file_path: pathlib.Path) -> None: def _create_invalid_config_yaml_file(task_file_path: pathlib.Path) -> None: task_file_path.write_text( - textwrap.dedent(f"""\ + textwrap.dedent("""\ experimental: config_overrides: - docker: - run_options: - - -v /tmp:/tmp kubernetes: pod_config: metadata: @@ -120,14 +117,6 @@ def _create_invalid_config_yaml_file(task_file_path: pathlib.Path) -> None: - name: imagePullSecrets: - name: my-secret-2 - provision_timeout: 100 - gcp: - managed_instance_group: - run_duration: {RUN_DURATION_OVERRIDE} - nvidia_gpus: - disable_ecc: true - resources: - image_id: docker:ubuntu:latest setup: echo 'Setting up...' run: echo 'Running...' @@ -387,7 +376,9 @@ def test_k8s_config_with_invalid_config(monkeypatch, tmp_path, exception_occurred = False try: sky.launch(task, cluster_name=cluster_name, dryrun=True) - except sky.exceptions.ResourcesUnavailableError: + except sky.exceptions.ResourcesUnavailableError as e: + assert 'Invalid pod_config. Deatils: Invalid spec: \ + Invalid value for `name`, must not be `None`' in str(e) exception_occurred = True assert exception_occurred From 2b5dba1eba3de434e3a589e6a90d003e28badf34 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Tue, 31 Dec 2024 10:42:28 +0800 Subject: [PATCH 11/13] test --- tests/test_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index d3d18a8dffe..566f824a3a9 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -377,8 +377,10 @@ def test_k8s_config_with_invalid_config(monkeypatch, tmp_path, try: sky.launch(task, cluster_name=cluster_name, dryrun=True) except sky.exceptions.ResourcesUnavailableError as e: - assert 'Invalid pod_config. Deatils: Invalid spec: \ - Invalid value for `name`, must not be `None`' in str(e) + expect_error_message = ( + 'Invalid pod_config. Deatils: ' + 'Invalid spec: Invalid value for `name`, must not be `None`') + assert expect_error_message in str(e) exception_occurred = True assert exception_occurred From 81029d71b7ca7b42fdb03617e53bff33267eba01 Mon Sep 17 00:00:00 2001 From: Chester Li Date: Tue, 31 Dec 2024 10:48:36 +0800 Subject: [PATCH 12/13] test --- tests/test_config.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 566f824a3a9..d3eaeb261bc 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -376,11 +376,7 @@ def test_k8s_config_with_invalid_config(monkeypatch, tmp_path, exception_occurred = False try: sky.launch(task, cluster_name=cluster_name, dryrun=True) - except sky.exceptions.ResourcesUnavailableError as e: - expect_error_message = ( - 'Invalid pod_config. Deatils: ' - 'Invalid spec: Invalid value for `name`, must not be `None`') - assert expect_error_message in str(e) + except sky.exceptions.ResourcesUnavailableError: exception_occurred = True assert exception_occurred From d349c379c9b48b7dda09f60d9291782b0691519f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 3 Jan 2025 09:07:38 -0800 Subject: [PATCH 13/13] Update sky/backends/backend_utils.py --- sky/backends/backend_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 74cd9fabb58..07cd294787a 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -922,7 +922,7 @@ def write_cluster_config( valid, message = kubernetes_utils.check_pod_config(pod_config) if not valid: raise exceptions.InvalidCloudConfigs( - f'Invalid pod_config. Deatils: {message}') + f'Invalid pod_config. Details: {message}') if dryrun: # If dryrun, return the unfinished tmp yaml path.