You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Launching Dataflow job preprocess-taxifeatures-190401-055608 ... hang on
CalledProcessErrorTraceback (most recent call last)
in ()
----> 1 preprocess(50*100, 'DataflowRunner')
2 #change first arg to None to preprocess full dataset
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/pipeline.pyc in exit(self, exc_type, exc_val, exc_tb)
181 def _current_transform(self):
182 """Returns the transform currently on the top of the stack."""
--> 183 return self.transforms_stack[-1]
184
185 def _root_transform(self):
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/pipeline.pyc in run(self, test_runner_api)
174 @Property
175 @deprecated(since='First stable release',
--> 176 extra_message='References to .options'
177 ' will not be supported')
178 def options(self):
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/dataflow_runner.pyc in run(self, pipeline)
250
251 TODO(BEAM-115): Once Python SDK is compatible with the new Runner API,
--> 252 we could directly replace the coder instead of mutating the element type.
253 """
254 def visit_transform(self, transform_node):
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/retry.pyc in wrapper(*args, **kwargs)
166 the make_http_request() call below will be retried 16 times with exponential
167 backoff and fuzzing of the delay interval (default settings).
--> 168
169 from apache_beam.utils import retry
170 # ...
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc in create_job(self, job)
423 else:
424 self.environment_version = _LEGACY_ENVIRONMENT_MAJOR_VERSION
--> 425
426 if self.google_cloud_options.no_auth:
427 credentials = None
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc in create_job_description(self, job)
446 def _gcs_file_copy(self, from_path, to_path):
447 to_folder, to_name = os.path.split(to_path)
--> 448 with open(from_path, 'rb') as f:
449 self.stage_file(to_folder, to_name, f)
450
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc in stage_job_resources(options, file_copy, build_setup_args, temp_dir, populate_requirements_cache)
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc in _stage_beam_sdk_tarball(sdk_remote_location, staged_path, temp_dir)
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc in _download_pypi_sdk_package(temp_dir)
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/processes.pyc in check_call(*args, **kwargs)
42
43
---> 44 def check_call(*args, **kwargs):
45 if force_shell:
46 kwargs['shell'] = True
/usr/local/envs/py2env/lib/python2.7/subprocess.pyc in check_call(*popenargs, **kwargs)
188 if cmd is None:
189 cmd = popenargs[0]
--> 190 raise CalledProcessError(retcode, cmd)
191 return 0
192
CalledProcessError: Command '['/usr/local/envs/py2env/bin/python', '-m', 'pip', 'install', '--download', '/tmp/tmp2Su2Vy', 'google-cloud-dataflow==2.0.0', '--no-binary', ':all:', '--no-deps']' returned non-zero exit status 2
How do I solve this issue?
The text was updated successfully, but these errors were encountered:
Launching Dataflow job preprocess-taxifeatures-190401-055608 ... hang on
CalledProcessErrorTraceback (most recent call last)
in ()
----> 1 preprocess(50*100, 'DataflowRunner')
2 #change first arg to None to preprocess full dataset
in preprocess(EVERY_N, RUNNER)
50 p | 'read_{}'.format(phase) >> beam.io.Read(beam.io.BigQuerySource(query=query))
51 | 'tocsv_{}'.format(phase) >> beam.Map(to_csv)
---> 52 | 'write_{}'.format(phase) >> beam.io.Write(beam.io.WriteToText(outfile))
53 )
54 print("Done")
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/pipeline.pyc in exit(self, exc_type, exc_val, exc_tb)
181 def _current_transform(self):
182 """Returns the transform currently on the top of the stack."""
--> 183 return self.transforms_stack[-1]
184
185 def _root_transform(self):
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/pipeline.pyc in run(self, test_runner_api)
174 @Property
175 @deprecated(since='First stable release',
--> 176 extra_message='References to .options'
177 ' will not be supported')
178 def options(self):
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/dataflow_runner.pyc in run(self, pipeline)
250
251 TODO(BEAM-115): Once Python SDK is compatible with the new Runner API,
--> 252 we could directly replace the coder instead of mutating the element type.
253 """
254 def visit_transform(self, transform_node):
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/retry.pyc in wrapper(*args, **kwargs)
166 the make_http_request() call below will be retried 16 times with exponential
167 backoff and fuzzing of the delay interval (default settings).
--> 168
169 from apache_beam.utils import retry
170 # ...
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc in create_job(self, job)
423 else:
424 self.environment_version = _LEGACY_ENVIRONMENT_MAJOR_VERSION
--> 425
426 if self.google_cloud_options.no_auth:
427 credentials = None
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc in create_job_description(self, job)
446 def _gcs_file_copy(self, from_path, to_path):
447 to_folder, to_name = os.path.split(to_path)
--> 448 with open(from_path, 'rb') as f:
449 self.stage_file(to_folder, to_name, f)
450
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc in stage_job_resources(options, file_copy, build_setup_args, temp_dir, populate_requirements_cache)
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc in _stage_beam_sdk_tarball(sdk_remote_location, staged_path, temp_dir)
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc in _download_pypi_sdk_package(temp_dir)
/usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/processes.pyc in check_call(*args, **kwargs)
42
43
---> 44 def check_call(*args, **kwargs):
45 if force_shell:
46 kwargs['shell'] = True
/usr/local/envs/py2env/lib/python2.7/subprocess.pyc in check_call(*popenargs, **kwargs)
188 if cmd is None:
189 cmd = popenargs[0]
--> 190 raise CalledProcessError(retcode, cmd)
191 return 0
192
CalledProcessError: Command '['/usr/local/envs/py2env/bin/python', '-m', 'pip', 'install', '--download', '/tmp/tmp2Su2Vy', 'google-cloud-dataflow==2.0.0', '--no-binary', ':all:', '--no-deps']' returned non-zero exit status 2
How do I solve this issue?
The text was updated successfully, but these errors were encountered: