Skip to content

Commit

Permalink
fixup! Post CI health reports to Slack
Browse files Browse the repository at this point in the history
  • Loading branch information
nineinchnick committed Sep 3, 2024
1 parent 6274c23 commit d71446b
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 75 deletions.
40 changes: 29 additions & 11 deletions .github/workflows/reports.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,27 @@ jobs:
- name: Generate Slack message
id: message
run: |
for name in health failing-jobs; do
output=$(docker exec trino \
java -Dorg.jline.terminal.dumb=true -jar /usr/bin/trino \
trino://localhost:8080/trinocicd/v2 \
--file /sql/ci-cd/$name.sql \
--output-format=ALIGNED)
output=$(docker exec trino \
java -Dorg.jline.terminal.dumb=true -jar /usr/bin/trino \
trino://localhost:8080/trinocicd/v2 \
--file /sql/ci-cd/health.sql \
--output-format=VERTICAL | tail -n+2)
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "$name<<$EOF" >> $GITHUB_OUTPUT
echo '```'"$output"'```' >> $GITHUB_OUTPUT
echo "$EOF" >> $GITHUB_OUTPUT
done
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "health<<$EOF" >> $GITHUB_OUTPUT
echo '```'"$output"'```' >> $GITHUB_OUTPUT
echo "$EOF" >> $GITHUB_OUTPUT
output=$(docker exec trino \
java -Dorg.jline.terminal.dumb=true -jar /usr/bin/trino \
trino://localhost:8080/trinocicd/v2 \
--file /sql/ci-cd/failing-jobs.sql \
--output-format=CSV_UNQUOTED)
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "failing-jobs<<$EOF" >> $GITHUB_OUTPUT
echo "$output" >> $GITHUB_OUTPUT
echo "$EOF" >> $GITHUB_OUTPUT
- name: Post to a Slack channel
id: slack
uses: slackapi/[email protected]
Expand Down Expand Up @@ -86,6 +95,15 @@ jobs:
"type": "mrkdwn",
"text": ${{ toJSON(steps.message.outputs.failing-jobs) }}
}
},
{
"type": "context",
"elements": [
{
"type": "mrkdwn",
"text": "See the complete <https://trinodb.github.io/reports/reports/flaky/|flaky jobs report>"
}
]
}
]
}
Expand Down
41 changes: 13 additions & 28 deletions sql/ci-cd/failing-jobs.sql
Original file line number Diff line number Diff line change
@@ -1,60 +1,45 @@
-- Recently failing jobs
-- Lists failed jobs on master branch in last 24h
WITH
-- bar() function renders colored output, and uses one character rendering (boxes), so has less precision within same width
FUNCTION ascii_bar(value double)
RETURNS varchar
DETERMINISTIC
BEGIN
DECLARE max_width double DEFAULT 20;
DECLARE clamped_value double;
SET clamped_value = greatest(0, least(1, value));
RETURN array_join(
repeat('',
greatest(0, CAST(floor(max_width * clamped_value) AS integer) - 1)), '')
|| ARRAY[' ', '', '', '', '', '', '', '', ''][cast((clamped_value % (1e0 / max_width)) * max_width * 8 + 1 as int)];
END
WITH
recent_master_runs AS (
SELECT
id
, check_suite_id
, 'https://github.com/trinodb/trino/actions/runs/' || cast(id as varchar) AS details_url
, format('<https://github.com/trinodb/trino/actions/runs/%d|%d>', id, run_number) AS details_link
, conclusion
FROM runs
WHERE owner = 'trinodb' AND repo = 'trino' AND name = 'ci'
-- only include push events, not pull requests, to filter out PR runs from master branch in forks
AND head_branch = 'master' AND event = 'push' AND status = 'completed'
-- do not include pull requests, to filter out PR runs from master branch in forks
AND head_branch = 'master' AND event != 'pull_request' AND status = 'completed'
AND created_at > now() - interval '1' day
)
, failed_recent_master_runs AS (
SELECT
id
, check_suite_id
, details_url
, details_link
FROM recent_master_runs
WHERE conclusion != 'success'
)
, failed_jobs AS (
SELECT
jobs.name
, count(DISTINCT runs.id) AS num_failed_runs
, array_agg(anno.title || chr(10) || anno.message ORDER BY anno.message) AS errors
, array_agg(DISTINCT runs.details_url ORDER BY runs.details_url) AS failed_runs
, array_agg(DISTINCT runs.details_link ORDER BY runs.details_link) AS failed_runs
FROM failed_recent_master_runs runs
JOIN check_runs jobs ON jobs.check_suite_id = runs.check_suite_id AND jobs.conclusion NOT IN ('success', 'skipped')
LEFT JOIN check_run_annotations anno ON anno.check_run_id = jobs.id
GROUP BY jobs.name
)

SELECT
name AS "Job name"
, ascii_bar(1e0 * num_failed_runs / (SELECT count(*) FROM recent_master_runs)) AS "Failure ratio chart"
, round(100e0 * num_failed_runs / (SELECT count(*) FROM recent_master_runs), 1) AS "Failure percent"
, num_failed_runs AS "Number of failed runs"
-- whole report must be under 3k characters to fit into a Slack notification
--, errors AS "Error messages"
, failed_runs AS "Run URLs"
format(
'• %s - %.1f%% (%d/%d)%n %s'
, name
, 100e0 * num_failed_runs / (SELECT count(*) FROM recent_master_runs)
, num_failed_runs
, (SELECT count(*) FROM recent_master_runs)
, array_join(failed_runs, ', ')
) AS "Jobs"
FROM failed_jobs
ORDER BY num_failed_runs DESC, name
;
89 changes: 53 additions & 36 deletions sql/ci-cd/health.sql
Original file line number Diff line number Diff line change
@@ -1,46 +1,63 @@
-- CI workflow health
WITH
-- bar() function renders colored output, and uses one character rendering (boxes), so has less precision within same width
FUNCTION ascii_bar(value double)
RETURNS varchar
DETERMINISTIC
BEGIN
DECLARE max_width double DEFAULT 20;
DECLARE clamped_value double;
SET clamped_value = greatest(0, least(1, value));
RETURN array_join(
repeat('',
greatest(0, CAST(floor(max_width * clamped_value) AS integer) - 1)), '')
|| ARRAY[' ', '', '', '', '', '', '', '', ''][cast((clamped_value % (1e0 / max_width)) * max_width * 8 + 1 as int)];
END
-- Percentage of successful runs of the `ci` workflow on the master branch.
WITH FUNCTION vertical_bar(value DOUBLE, day DATE)
RETURNS VARCHAR
DETERMINISTIC
RETURN CASE
-- for weekends, if missing or zero, grey it out
WHEN value = 0 AND day_of_week(day) IN (6,7) THEN ''
-- map [0.0, 1.0] to [1, 9]
ELSE ARRAY[' ', '', '', '', '', '', '', '', ''][cast(value * 8 + 1 as int)]
END
WITH
runs AS (
SELECT
CASE head_branch
WHEN 'master' THEN head_branch
ELSE '[other]'
END AS branch
, created_at
date(created_at) AS created_at
, conclusion
, count(*) AS num_runs
, count(*) FILTER (WHERE conclusion = 'success') AS num_success
FROM runs
WHERE owner = 'trinodb' AND repo = 'trino'
AND name = 'ci' AND created_at >= CURRENT_DATE - INTERVAL '30' DAY
WHERE owner = 'trinodb' AND repo = 'trino' AND name = 'ci'
AND head_branch = 'master' AND event != 'pull_request' AND status = 'completed'
AND created_at >= CURRENT_DATE - INTERVAL '7' DAY
GROUP BY date(created_at), conclusion
)
, days AS (
SELECT seq.day
FROM (SELECT min(created_at) AS first_day , max(created_at) AS last_day FROM runs) range
CROSS JOIN UNNEST (sequence(range.first_day, range.last_day)) seq(day)
)
, daily AS (
SELECT
created_at
, conclusion
, num_runs
, num_success
, 1e0 * num_success / num_runs AS ratio
FROM days
LEFT JOIN runs ON runs.created_at = days.day
)
, intervals(days, label) AS (
VALUES
(INTERVAL '1' DAY, '1 day')
, (INTERVAL '3' DAY, '3 days')
, (INTERVAL '7' DAY, '7 days')
, (INTERVAL '30' DAY, '30 days')
, summary AS (
SELECT
sum(num_runs) AS num_runs
, sum(num_success) AS num_success
, 1e0 * sum(num_success) / sum(num_runs) AS ratio
, array_join(array_agg(vertical_bar(coalesce(ratio, 0), created_at) ORDER BY created_at DESC), '') AS chart
FROM daily
)
, latest AS (
SELECT
num_runs
, num_success
, 1e0 * num_success / num_runs AS ratio
FROM runs
ORDER BY created_at DESC
LIMIT 1
)
SELECT
branch AS "Branch"
, intervals.label AS "Interval"
, ascii_bar(1e0 * count(1) FILTER (WHERE conclusion = 'success') / count(1)) AS "Success ratio chart"
, round(100e0 * count(1) FILTER (WHERE conclusion = 'success') / count(1), 1) AS "Success percent"
, count(1) FILTER (WHERE created_at > now() - intervals.days) AS "Number of runs"
FROM intervals
JOIN runs ON runs.created_at > now() - intervals.days
GROUP BY branch, intervals.days, intervals.label
ORDER BY branch DESC, intervals.days
format('%.1f%% (%d/%d)', 100e0 * latest.ratio, latest.num_success, latest.num_runs) AS "Today"
, format('%.1f%% (%d/%d)', 100e0 * summary.ratio, summary.num_success, summary.num_runs) AS "Weekly"
, summary.chart AS "Daily (desc)"
FROM summary
CROSS JOIN latest
;
1 change: 1 addition & 0 deletions sql/flaky/jobs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ WITH report_configuration AS (
AND jobs.conclusion IS NOT NULL -- ignore partially ingested information
AND jobs.conclusion != 'skipped'
AND jobs.conclusion != 'cancelled'
AND jobs.name NOT LIKE 'check-commit%' -- these include a commit SHA and are always unique
)
, analyzed_job_runs AS (
-- When using "Re-run failed jobs", previously successful jobs appear as successful, which could lead to
Expand Down

0 comments on commit d71446b

Please sign in to comment.