Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

json-schema spec for the workflow #562

Open
wants to merge 16 commits into
base: v3-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions json-schema/exampleWorkflow.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{
"schemaVersion": "0.0.1",
"name": "Example workflow",
"parameters": {
"ENV": "production",
"BUCKET": "mybucket"
},
"schedule": "0 6 * * *",
"timezone": "UTC",
"tasks": [
{
"node_id": "T1",
"name": "Initial Data Prep",
"input_uri": "/data/input1.csv",
"depends_on": [],
"runtime_environment_name": "python-env",
"runtime_environment_parameters": {
"python_version": "3.9"
},
"output_formats": [
"html",
"pdf"
],
"parameters": {
"cleanup": "true"
},
"tags": [
"data-prep"
],
"compute_type": "small",
"package_input_folder": false
},
{
"node_id": "T2",
"name": "Combine Data",
"input_uri": "/data/combined_input.csv",
"depends_on": [
{
"node_id": "T1"
},
{
"node_id": "T3"
}
],
"runtime_environment_name": "python-env",
"runtime_environment_parameters": {
"python_version": "3.9"
},
"output_formats": [
"notebook"
],
"parameters": {
"merge_mode": "full-outer"
},
"tags": [
"combine",
"processing"
],
"compute_type": "medium",
"package_input_folder": false
},
{
"node_id": "T3",
"name": "Preprocess Data",
"input_uri": "/data/raw_data.csv",
"depends_on": [],
"runtime_environment_name": "python-env",
"runtime_environment_parameters": {},
"output_formats": [],
"parameters": {
"normalize": "true"
},
"tags": [
"preprocess"
],
"compute_type": "large",
"package_input_folder": true
},
{
"node_id": "T4",
"name": "Analysis",
"input_uri": "/data/analysis_input.csv",
"depends_on": [
{
"node_id": "T2"
}
],
"runtime_environment_name": "r-env",
"runtime_environment_parameters": {
"r_version": "4.2"
},
"output_formats": [
"html"
],
"parameters": {
"analysis_type": "timeseries"
},
"tags": [
"analysis",
"R"
],
"compute_type": "medium",
"package_input_folder": false
},
{
"node_id": "T5",
"name": "Final Output",
"input_uri": "/data/final_result.csv",
"depends_on": [],
"runtime_environment_name": "python-env",
"runtime_environment_parameters": {},
"output_formats": [],
"parameters": {},
"tags": [
"final"
],
"compute_type": "small",
"package_input_folder": false
}
]
}
121 changes: 121 additions & 0 deletions json-schema/workflow.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"version": "0.0.1",
"title": "Workflow",
"type": "object",
"properties": {
"schemaVersion": {
"type": "string",
"default": "0.0.1"
},
"tasks": {
"type": "array",
"items": {
"$ref": "#/definitions/Task"
},
"description": "Worfklow tasks."
},
"name": {
"type": "string",
"description": "The name of the workflow."
},
"parameters": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional parameters for the workflow."
},
"schedule": {
"type": "string",
"description": "Optional schedule in cron format."
},
"timezone": {
"type": "string",
"description": "Timezone for the schedule."
}
},
"required": [
"tasks",
"name"
],
"definitions": {
"Task": {
"type": "object",
"properties": {
"input_uri": {
"type": "string",
"description": "The URI of the input file."
},
"runtime_environment_name": {
"type": "string",
"description": "Name of the runtime environment."
},
"runtime_environment_parameters": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Parameters for the runtime environment."
},
"output_formats": {
"type": "array",
"items": {
"type": "string"
}
},
"parameters": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Task-specific parameters."
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Tags for categorizing the job."
},
"name": {
"type": "string",
"description": "Name of the job."
},
"compute_type": {
"type": "string",
"description": "Type of compute resource to use."
},
"package_input_folder": {
"type": "boolean",
"description": "Whether to package the input folder."
},
"depends_on": {
"type": "array",
"description": "DAG node IDs of tasks this task depends on (upstream dependencies).",
"items": {
"type": "object",
"properties": {
"node_id": {
"type": "string"
}
},
"required": [
"node_id"
]
}
},
"node_id": {
"type": "string",
"description": "DAG node ID of this task."
}
},
"required": [
"input_uri",
"name",
"node_id",
"depends_on"
]
}
}
}
Loading