Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Post CI health reports to Slack #27

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 2 additions & 23 deletions .github/workflows/optimize.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,35 +13,14 @@ jobs:
sync:
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{ secrets.PERSONAL_GITHUB_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.AWS_REGION }}
TRINO_VERSION: 445
steps:
- uses: actions/checkout@v4
- name: Start Trino
run: |
CONTAINER_ID=$(docker run \
-v $(pwd)/catalog/trinocicd.properties:/etc/trino/catalog/trinocicd.properties \
-v $(pwd)/sql:/sql \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_REGION \
-p 8080:8080 \
--name trino \
-d \
trinodb/trino:$TRINO_VERSION)
SERVER_IP=$(docker inspect --format '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$CONTAINER_ID")
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
echo "SERVER_IP=$SERVER_IP" >> $GITHUB_ENV
i=0
until docker inspect "${CONTAINER_ID}" --format "{{json .State.Health.Status }}" | grep -q '"healthy"'; do
if [[ $((i++)) -ge 10 ]]; then
echo "🚨 Too many retries waiting for Trino to start"
exit 1
fi
sleep 10
done
run: ./bin/run-trino.sh
- name: Optimize tables
run: |
docker exec \
Expand Down
115 changes: 79 additions & 36 deletions .github/workflows/reports.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,53 +16,96 @@ jobs:
run:
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{ secrets.PERSONAL_GITHUB_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.AWS_REGION }}
TRINO_VERSION: 445
TRINO_GIT_VERSION: "0.74"
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
sudo apt update
sudo pip install ansi2html
curl -fLOsS https://github.com/nineinchnick/trino-git/releases/download/v$TRINO_GIT_VERSION/trino-git-$TRINO_GIT_VERSION.zip
unzip trino-git-$TRINO_GIT_VERSION.zip
- name: Start Trino
run: |
CONTAINER_ID=$(docker run \
-v $(pwd)/trino-git-$TRINO_GIT_VERSION:/usr/lib/trino/plugin/git \
-v $(pwd)/catalog/git.properties:/etc/trino/catalog/git.properties \
-v $(pwd)/catalog/trinocicd.properties:/etc/trino/catalog/trinocicd.properties \
-v $(pwd)/hive-cache:/opt/hive-cache \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_REGION \
-p 8080:8080 \
--name trino \
-d \
trinodb/trino:$TRINO_VERSION)
SERVER_IP=$(docker inspect --format '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$CONTAINER_ID")
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
echo "SERVER_IP=$SERVER_IP" >> $GITHUB_ENV
until docker logs trino 2>&1 | grep --quiet --fixed-strings --max-count=1 "SERVER STARTED"; do sleep 1 ; done
- name: Execute queries
run: |
./bin/reports.sh reports/ci-cd/index.md "Trino CI/CD Reports" sql/ci-cd/{health,success-ratio-per-day,runs-queue-time-per-day,runs-duration-per-day,runs-job-cumulative-duration-per-day,jobs-duration}.sql
./bin/reports.sh reports/flaky/index.md "Trino Flaky Tests" sql/flaky/{jobs,jobs-week,tests,classes}.sql
./bin/reports.sh reports/pr/index.md "Trino PR Reports" sql/pr/{idents,burndown,authors-per-month,changes-per-month,prs-per-author,reviewers-per-pr,reviews-per-author-assoc,top-reviewers,top-authors,top-mergers,sith-lords,mergers-authors,reviewers-top-authors,time-to-merge,time-to-merge-per-size,avg-time-to-merge,time-to-first-review,avg-time-to-first-review,reviewer-responsiveness,author-responsiveness,open-pr-age,awaiting-review,inactivity-on-prs,abandoned-prs,running-prs,stale-prs}.sql
- name: Commit report
run: |
# pull in case someone pushed commits during reports generation, which can take a while
# if there are conflicts, this will fail
git pull --ff-only
git config user.name 'GitHub Automation'
git config user.email ''
git add --all reports/
git commit -m "Automated report"
git push
run: ./bin/run-trino.sh
- name: Dump Trino logs
if: always()
run: |
docker logs trino
- name: Generate Slack message
id: message
run: |
output=$(docker exec trino \
java -Dorg.jline.terminal.dumb=true -jar /usr/bin/trino \
trino://localhost:8080/trinocicd/v2 \
--file /sql/ci-cd/health.sql \
--output-format=VERTICAL | tail -n+2)

EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "health<<$EOF" >> $GITHUB_OUTPUT
echo '```'"$output"'```' >> $GITHUB_OUTPUT
echo "$EOF" >> $GITHUB_OUTPUT

output=$(docker exec trino \
java -Dorg.jline.terminal.dumb=true -jar /usr/bin/trino \
trino://localhost:8080/trinocicd/v2 \
--file /sql/ci-cd/failing-jobs.sql \
--output-format=CSV_UNQUOTED)

EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "failing-jobs<<$EOF" >> $GITHUB_OUTPUT
echo "$output" >> $GITHUB_OUTPUT
echo "$EOF" >> $GITHUB_OUTPUT
- name: Post to a Slack channel
id: slack
uses: slackapi/[email protected]
with:
channel-id: 'tmp-reports'
payload: |
{
"text": "CI health reports",
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "CI health"
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ${{ toJSON(steps.message.outputs.health) }}
}
},
{
"type": "divider"
},
{
"type": "header",
"text": {
"type": "plain_text",
"text": "Failing jobs"
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ${{ toJSON(steps.message.outputs.failing-jobs) }}
}
},
{
"type": "context",
"elements": [
{
"type": "mrkdwn",
"text": "See the complete <https://trinodb.github.io/reports/reports/flaky/|flaky jobs report>"
}
]
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
37 changes: 1 addition & 36 deletions .github/workflows/sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ jobs:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.AWS_REGION }}
TRINO_VERSION: 445
TRINO_REST_VERSION: "0.144"
GITHUB_OWNER: trinodb
GITHUB_REPO: trino
Expand All @@ -28,45 +27,11 @@ jobs:
SYNC_TABLES: runs,jobs,steps,check_suites,check_runs,check_run_annotations,pulls,pull_commits,pull_stats,review_comments,reviews,issues,issue_comments,commits,teams,members
steps:
- uses: actions/checkout@v4
- name: Download trino-rest
run: |
curl -fLOsS https://github.com/nineinchnick/trino-rest/releases/download/v$TRINO_REST_VERSION/trino-rest-github-$TRINO_REST_VERSION.zip
unzip trino-rest-github-$TRINO_REST_VERSION.zip
- name: Start Trino
run: |
cat <<EOF >config.properties
coordinator=true
node-scheduler.include-coordinator=true
http-server.http.port=8080
discovery.uri=http://localhost:8080
query.max-memory-per-node=4086929818B
EOF
CONTAINER_ID=$(docker run \
-v $(pwd)/config.properties:/etc/trino/config.properties \
-v $(pwd)/trino-rest-github-$TRINO_REST_VERSION:/usr/lib/trino/plugin/github \
-v $(pwd)/catalog/github.properties:/etc/trino/catalog/github.properties \
-v $(pwd)/catalog/trinocicd.properties:/etc/trino/catalog/trinocicd.properties \
-v $(pwd)/hive-cache:/opt/hive-cache \
-v $(pwd)/sql:/sql \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_REGION \
-e GITHUB_TOKEN \
-p 8080:8080 \
--name trino \
-d \
trinodb/trino:$TRINO_VERSION)
./bin/run-trino.sh
SERVER_IP=$(docker inspect --format '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$CONTAINER_ID")
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
echo "SERVER_IP=$SERVER_IP" >> $GITHUB_ENV
i=0
until docker inspect "${CONTAINER_ID}" --format "{{json .State.Health.Status }}" | grep -q '"healthy"'; do
if [[ $((i++)) -ge 10 ]]; then
echo "🚨 Too many retries waiting for Trino to start"
exit 1
fi
sleep 10
done
- name: Run Sync
run: |
docker run \
Expand Down
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,10 @@ dist
.idea

# custom Trino connectors
/trino-rest-github-*
/trino-git-*
/bin/trino-rest-github-*
/bin/trino-git-*
/bin/config.properties
/bin/hive-cache

# Jekyll
_site
Expand Down
76 changes: 76 additions & 0 deletions bin/run-trino.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env bash
#
# Start a Trino container with additional connectors and catalogs
# configured and wait for it to be ready.

set -euo pipefail

for cmd in curl unzip docker; do
if ! command -v "$cmd" >/dev/null; then
echo >&2 "Missing the $cmd command"
exit 1
fi
done

if [ -z "$AWS_REGION" ] || [ -z "$AWS_ACCESS_KEY_ID" ] || [ -z "$AWS_SECRET_ACCESS_KEY" ] || [ -z "$GITHUB_TOKEN" ]; then
echo >&2 "Following environmental variables need to be set: AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, GITHUB_TOKEN"
exit 1
fi

CONTAINER_NAME=trino
TRINO_VERSION=455
TRINO_GIT_VERSION=0.83
TRINO_REST_VERSION=0.154

SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
cd "$SCRIPT_DIR" || exit 1

if [ ! -d trino-git-$TRINO_GIT_VERSION ]; then
curl -fLOsS https://github.com/nineinchnick/trino-git/releases/download/v$TRINO_GIT_VERSION/trino-git-$TRINO_GIT_VERSION.zip
unzip trino-git-$TRINO_GIT_VERSION.zip
fi

if [ ! -d trino-rest-github-$TRINO_REST_VERSION ]; then
curl -fLOsS https://github.com/nineinchnick/trino-rest/releases/download/v$TRINO_REST_VERSION/trino-rest-github-$TRINO_REST_VERSION.zip
unzip trino-rest-github-$TRINO_REST_VERSION.zip
fi

cat <<EOF >config.properties
coordinator=true
node-scheduler.include-coordinator=true
http-server.http.port=8080
discovery.uri=http://localhost:8080
query.max-memory-per-node=4086929818B
EOF

mkdir -p "$SCRIPT_DIR"/hive-cache
chmod 777 "$SCRIPT_DIR"/hive-cache
docker run \
-v "$SCRIPT_DIR"/config.properties:/etc/trino/config.properties \
-v "$SCRIPT_DIR"/trino-git-$TRINO_GIT_VERSION:/usr/lib/trino/plugin/git \
-v "$SCRIPT_DIR"/trino-rest-github-$TRINO_REST_VERSION:/usr/lib/trino/plugin/github \
-v "$SCRIPT_DIR"/../catalog/git.properties:/etc/trino/catalog/git.properties \
-v "$SCRIPT_DIR"/../catalog/github.properties:/etc/trino/catalog/github.properties \
-v "$SCRIPT_DIR"/../catalog/trinocicd.properties:/etc/trino/catalog/trinocicd.properties \
-v "$SCRIPT_DIR"/hive-cache:/opt/hive-cache \
-v "$SCRIPT_DIR"/../sql:/sql \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_REGION \
-e GITHUB_TOKEN \
-p 8080:8080 \
--name $CONTAINER_NAME \
-d \
trinodb/trino:$TRINO_VERSION

i=0
until status=$(docker inspect $CONTAINER_NAME --format "{{json .State.Health.Status }}") && echo "$status" | grep -q '"healthy"'; do
if [[ $((i++)) -ge 10 ]]; then
echo >&2 "🚨 Too many retries waiting for Trino to start"
exit 1
fi
echo >&2 "Status is: $status, sleeping 10 seconds"
sleep 10
done

echo >&2 "Status is: $status, connect to trino://localhost:8080/trinocicd/v2"
7 changes: 4 additions & 3 deletions catalog/trinocicd.properties
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ hive.s3.aws-access-key=${ENV:AWS_ACCESS_KEY_ID}
hive.s3.aws-secret-key=${ENV:AWS_SECRET_ACCESS_KEY}
hive.non-managed-table-writes-enabled=true
hive.storage-format=ORC
#hive.cache.enabled=true
#hive.cache.location=/opt/hive-cache
#hive.cache.start-server-on-coordinator=true
fs.cache.enabled=true
fs.cache.directories=/opt/hive-cache
fs.cache.max-sizes=500MB
fs.cache.ttl=1d
45 changes: 45 additions & 0 deletions sql/ci-cd/failing-jobs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
-- Recently failing jobs
-- Lists failed jobs on master branch in last 24h
WITH
recent_master_runs AS (
SELECT
id
, check_suite_id
, format('<https://github.com/trinodb/trino/actions/runs/%d|%d>', id, run_number) AS details_link
, conclusion
FROM runs
WHERE owner = 'trinodb' AND repo = 'trino' AND name = 'ci'
-- do not include pull requests, to filter out PR runs from master branch in forks
AND head_branch = 'master' AND event != 'pull_request' AND status = 'completed'
AND created_at > now() - interval '1' day
)
, failed_recent_master_runs AS (
SELECT
id
, check_suite_id
, details_link
FROM recent_master_runs
WHERE conclusion != 'success'
)
, failed_jobs AS (
SELECT
jobs.name
, count(DISTINCT runs.id) AS num_failed_runs
, array_agg(DISTINCT runs.details_link ORDER BY runs.details_link) AS failed_runs
FROM failed_recent_master_runs runs
JOIN check_runs jobs ON jobs.check_suite_id = runs.check_suite_id AND jobs.conclusion NOT IN ('success', 'skipped')
GROUP BY jobs.name
)

SELECT
format(
'• %s - %.1f%% (%d/%d)%n %s'
, name
, 100e0 * num_failed_runs / (SELECT count(*) FROM recent_master_runs)
, num_failed_runs
, (SELECT count(*) FROM recent_master_runs)
, array_join(failed_runs, ', ')
) AS "Jobs"
FROM failed_jobs
ORDER BY num_failed_runs DESC, name
;
Loading