Orquesta CI #4721

Workflow file for this run

.github/workflows/orquesta-integration-tests.yaml at b05f0cf

	# We run orquesta integration tests as part of a separate workflow.
	# Orquesta tests have a lot of race conditions which result in intermediate failures and timeouts.
	# Utilizing separate workflow allows us to re-run just this workflow / job on failure instead of
	# wasting time and resources by needing to re-run all the jobs.
	name: Orquesta CI

	on:
	push:
	branches:
	# only on merges to master branch
	- master
	# and version branches, which only include minor versions (eg: v3.4)
	- v[0-9]+.[0-9]+
	tags:
	# also version tags, which include bugfix releases (eg: v3.4.0)
	- v[0-9]+.[0-9]+.[0-9]+
	pull_request:
	type: [opened, reopened, edited]
	branches:
	# Only for PRs targeting those branches
	- master
	- v[0-9]+.[0-9]+
	schedule:
	# run every night at midnight
	- cron: '0 0 * * *'

	jobs:
	# TODO: Fix the required checks!
	# When the pre_job triggers and skips builds, it prevents merging the PR because
	# the required checks are reported as skipped instead of passed.
	# Special job which automatically cancels old runs for the same branch, prevents runs for the
	# same file set which has already passed, etc.
	pre_job:
	name: Skip Duplicate Jobs Pre Job
	runs-on: ubuntu-20.04
	outputs:
	should_skip: ${{ steps.skip_check.outputs.should_skip }}
	steps:
	- id: skip_check
	uses: fkirc/skip-duplicate-actions@4c656bbdb6906310fa6213604828008bc28fe55d # v3.3.0
	with:
	cancel_others: 'true'
	github_token: ${{ github.token }}

	integration-tests:
	needs: pre_job
	# NOTE: We always want to run job on master since we run some additional checks there (code
	# coverage, etc)
	# if: ${{ needs.pre_job.outputs.should_skip != 'true' \|\| github.ref == 'refs/heads/master' }}
	name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}'
	runs-on: ubuntu-20.04
	strategy:
	fail-fast: false
	matrix:
	# NOTE: We need to use full Python version as part of Python deps cache key otherwise
	# setup virtualenv step will fail.
	include:
	- name: 'Integration Tests (Orquesta)'
	task: 'ci-orquesta'
	nosetests_node_total: 1
	nosetests_node_index: 0
	python-version-short: '3.8'
	python-version: '3.8.10'
	- name: 'Integration Tests (Orquesta)'
	task: 'ci-orquesta'
	nosetests_node_total: 1
	nosetests_node_index: 0
	python-version-short: '3.9'
	python-version: '3.9.14'
	services:
	mongo:
	image: mongo:7.0
	ports:
	- 27017:27017

	rabbitmq:
	image: rabbitmq:3.8-management
	options: >-
	--name rabbitmq
	ports:
	- 5671:5671/tcp # AMQP SSL port
	- 5672:5672/tcp # AMQP standard port
	- 15672:15672/tcp # Management: HTTP, CLI

	redis:
	# Docker Hub image
	image: redis
	# Set health checks to wait until redis has started
	options: >-
	--name "redis"
	--health-cmd "redis-cli ping"
	--health-interval 10s
	--health-timeout 5s
	--health-retries 5
	ports:
	- 6379:6379/tcp

	env:
	TASK: '${{ matrix.task }}'
	NODE_TOTAL: '${{ matrix.nosetests_node_total }}'
	NODE_INDEX: '${{ matrix.nosetests_node_index }}'

	# We need to explicitly specify terminal width otherwise some CLI tests fail on container
	# environments where small terminal size is used.
	COLUMNS: '120'

	# CI st2.conf (with ST2_CI_USER user instead of stanley)
	ST2_CONF: 'conf/st2.ci.conf'

	# Tell StackStorm that we are indeed in CI mode, previously we hard coded a Travis specific
	# environment variable in our test code, making it a PITA when we switch CI providers.
	# Now, we simply set this environment varible here in the CI portion of our testing and
	# it avoids any CI provider type lock-in.
	ST2_CI: 'true'

	# Name of the user who is running the CI (on GitHub Actions this is 'runner')
	ST2_CI_USER: 'runner'

	# GitHub is juggling how to set vars for multiple shells. Protect our PATH assumptions.
	PATH: /home/runner/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4
	- name: Custom Environment Setup
	run: \|
	./scripts/github/setup-environment.sh
	- name: 'Set up Python (${{ matrix.python-version }}) and Cache Deps'
	uses: ./.github/actions/setup-python
	with:
	python-version: '${{ matrix.python-version }}'
	- name: Cache and Install APT Dependencies
	uses: ./.github/actions/apt-packages
	- name: Install virtualenv
	run: \|
	./scripts/github/install-virtualenv.sh
	- name: Install requirements
	run: \|
	./scripts/ci/install-requirements.sh
	- name: Setup Integration Tests
	run: \|
	# prep a ci-specific dev conf file that uses runner instead of stanley
	# this user is the username of the user in GitHub actions, used for SSH, etc during
	# integration tests (important)
	cp conf/st2.dev.conf "${ST2_CONF}" ; sed -i -e "s/stanley/${ST2_CI_USER}/" "${ST2_CONF}"

	sudo -E ./scripts/ci/add-itest-user-key.sh
	- name: Permissions Workaround
	run: \|
	echo "$ST2_CI_REPO_PATH"
	sudo ST2_CI_REPO_PATH="${ST2_CI_REPO_PATH}" scripts/ci/permissions-workaround.sh
	- name: Print versions
	run: \|
	./scripts/ci/print-versions.sh
	- name: make
	timeout-minutes: 41
	env:
	MAX_ATTEMPTS: 3
	RETRY_DELAY: 5
	# use: script -e -c to print colors
	run: \|
	# There is a race in some orequesta integration tests so they tend to fail quite often.
	# To avoid needed to re-run whole workflow in such case, we should try to retry this
	# specific step. This saves us a bunch of time manually re-running the whole workflow.
	# TODO: Try to identify problematic tests (iirc mostly orquesta ones) and only retry /
	# re-run those.
	set +e
	for i in $(seq 1 ${MAX_ATTEMPTS}); do
	echo "Attempt: ${i}/${MAX_ATTEMPTS}"
	script -e -c "timeout 10m make ${TASK}" && exit 0
	exit_code=$?
	echo "Command failed / timed out (exit_code=${exit_code}), will retry in ${RETRY_DELAY} seconds..."
	sleep ${RETRY_DELAY}
	done
	set -e
	echo "Failed after ${MAX_ATTEMPTS} attempts, failing the job."
	exit 1
	- name: Compress Service Logs Before upload
	if: ${{ failure() }}
	run: \|
	./tools/launchdev.sh stop # stop st2 before collecting logs
	tar cvzpf logs.tar.gz logs/*
	- name: Upload StackStorm services Logs
	if: ${{ failure() }}
	uses: actions/upload-artifact@v4
	with:
	name: logs-py${{ matrix.python-version }}
	path: logs.tar.gz
	retention-days: 7

	slack-notification:
	name: Slack notification for failed master builds
	if: always()
	needs:
	- integration-tests
	runs-on: ubuntu-20.04
	steps:
	- name: Workflow conclusion
	# this step creates an environment variable WORKFLOW_CONCLUSION and is the most reliable way to check the status of previous jobs
	uses: technote-space/workflow-conclusion-action@v2
	- name: CI Run Failure Slack Notification
	if: ${{ env.WORKFLOW_CONCLUSION == 'failure' && github.ref == 'refs/heads/master' }}
	env:
	SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
	uses: voxmedia/github-action-slack-notify-build@v1
	with:
	channel: development
	status: FAILED
	color: danger

	# HELPER FOR FUTURE DEVELOPERS:
	# If your GitHub Actions job is failing and you need to debug it, by default there is
	# no way to SSH into the container.
	# The step below can be uncommeted and will stop here and allow you to SSH in.
	# When this step is reached, simply refresh the GitHub Actions output for this build
	# and this SSH command will be printed every 5 seconds to the output.
	# Once you are done debugging in your SSH session, simply: touch /continue
	# and this will continue the build.
	#
	# - name: Setup tmate session for debugging failed jobs (allows SSH into the container)
	# uses: mxschmitt/action-tmate@v3
	# if: "${{ failure() }}"
	#

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Orquesta CI #4721

Workflow file

Orquesta CI #4721

Jobs

Run details

Workflow file for this run