Skip to content

Commit

Permalink
scheduler: expose scheduling errors to users
Browse files Browse the repository at this point in the history
Closes #643
  • Loading branch information
mdonadoni committed Nov 15, 2023
1 parent 34131ba commit 0d6df22
Showing 1 changed file with 14 additions and 10 deletions.
24 changes: 14 additions & 10 deletions reana_server/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,18 @@ def _fail_workflow(self, workflow_id: str, logs: str = "") -> None:
logs=logs,
)

def _retry_submission(self, workflow_id: str, workflow_submission: Dict) -> None:
def _retry_submission(
self, workflow_id: str, workflow_submission: Dict, reason: Optional[str] = None
) -> None:
retry_count = workflow_submission.get("retry_count", 0)

if retry_count >= REANA_SCHEDULER_REQUEUE_COUNT:
error_message = (
f"Workflow {workflow_submission['workflow_id_or_name']} failed to schedule after "
f"{retry_count} retries. Giving up."
)
if reason:
error_message += f"\nReason: {reason}"
logging.error(error_message)
self._fail_workflow(workflow_id, logs=error_message)
else:
Expand Down Expand Up @@ -203,11 +208,11 @@ def on_message(self, body, message):
)

except HTTPBadGateway as api_e:
logging.error(
"Workflow failed to start because RWC got an error while calling"
f"an external service (i.e. DB):\n {api_e}",
exc_info=True,
error = (
"Workflow failed to start because reana-workflow-controller got an error while calling"
f"an external service (i.e. database):\n{api_e}"
)
logging.error(error, exc_info=True)
except HTTPNotFound as not_found_e:
# if workflow is not found, we cannot retry or report an error to workflow logs
retry = False
Expand All @@ -234,14 +239,13 @@ def on_message(self, body, message):
)
self._fail_workflow(workflow_id, logs=error_message)
except Exception as e:
logging.error(
f"Something went wrong while calling RWC:\n {e}", exc_info=True
)
error = f"Something went wrong while calling reana-workflow-controller:\n{e}"
logging.error(error, exc_info=True)
finally:
sleep(REANA_SCHEDULER_REQUEUE_SLEEP)
if not started and retry:
message.reject()
self._retry_submission(workflow_id, workflow_submission_copy)
self._retry_submission(workflow_id, workflow_submission_copy, error)
else:
message.ack()
else:
Expand All @@ -251,4 +255,4 @@ def on_message(self, body, message):
)
sleep(REANA_SCHEDULER_REQUEUE_SLEEP)
message.reject()
self._retry_submission(workflow_id, workflow_submission_copy)
self._retry_submission(workflow_id, workflow_submission_copy, error)

0 comments on commit 0d6df22

Please sign in to comment.