Skip to content

Commit

Permalink
Merge pull request #25 from wunderio/feature/logs
Browse files Browse the repository at this point in the history
Print event and logs for failing pods during deployment
  • Loading branch information
Jancis authored Oct 27, 2022
2 parents c8ab1a7 + 1685780 commit 045671c
Show file tree
Hide file tree
Showing 3 changed files with 263 additions and 65 deletions.
164 changes: 114 additions & 50 deletions cmd/ciReleaseDeploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ var ciReleaseDeployCmd = &cobra.Command{

// helm release
command = fmt.Sprintf(`
set -euo pipefail
set -Eeuo pipefail
RELEASE_NAME='%s'
CHART_NAME='%s'
CHART_REPOSITORY='%s'
Expand All @@ -183,7 +183,39 @@ var ciReleaseDeployCmd = &cobra.Command{
NAMESPACE='%s'
SILTA_CONFIG='%s'
EXTRA_HELM_FLAGS='%s'
# Detect pods in FAILED state
function show_failing_pods() {
echo ""
failed_pods=$(kubectl get pod -l "release=$RELEASE_NAME,cronjob!=true" -n "$NAMESPACE" -o custom-columns="POD:metadata.name,STATE:status.containerStatuses[*].ready" --no-headers | grep -E "<none>|false" | grep -Eo '^[^ ]+')
if [[ ! -z "$failed_pods" ]] ; then
echo "Failing pods:"
while IFS= read -r pod; do
echo "---- ${NAMESPACE} / ${pod} ----"
echo "* Events"
kubectl get events --field-selector involvedObject.name=${pod},type!=Normal --show-kind=true --ignore-not-found=true --namespace ${NAMESPACE}
echo ""
echo "* Logs"
containers=$(kubectl get pods "${pod}" --namespace "${NAMESPACE}" -o json | jq -r 'try .status | .containerStatuses[] | select(.ready == false).name')
if [[ ! -z "$containers" ]] ; then
for container in ${containers}; do
kubectl logs "${pod}" --prefix=true --since="${DEPLOYMENT_TIMEOUT}" --namespace "${NAMESPACE}" -c "${container}"
done
else
echo "no logs found"
fi
echo "----"
done <<< "$failed_pods"
false
else
true
fi
}
trap show_failing_pods ERR
helm upgrade --install "${RELEASE_NAME}" "${CHART_NAME}" \
--repo "${CHART_REPOSITORY}" \
${EXTRA_CHART_VERSION} \
Expand Down Expand Up @@ -216,7 +248,7 @@ var ciReleaseDeployCmd = &cobra.Command{

// helm release
command = fmt.Sprintf(`
set -euo pipefail
set -Eeuo pipefail
RELEASE_NAME='%s'
CHART_NAME='%s'
Expand All @@ -240,17 +272,36 @@ var ciReleaseDeployCmd = &cobra.Command{
# Detect pods in FAILED state
function show_failing_pods() {
failed_pods=$(kubectl get pod -l "release=$RELEASE_NAME,cronjob!=true" -n "$NAMESPACE" --no-headers | grep -Ev '([0-9]+)/\1' | grep -Eo '^[^ ]+')
echo ""
failed_pods=$(kubectl get pod -l "release=$RELEASE_NAME,cronjob!=true" -n "$NAMESPACE" -o custom-columns="POD:metadata.name,STATE:status.containerStatuses[*].ready" --no-headers | grep -E "<none>|false" | grep -Eo '^[^ ]+')
if [[ ! -z "$failed_pods" ]] ; then
echo "Failing pods:"
echo "$failed_pods"
echo ""
echo "Please check logs for the pods above"
while IFS= read -r pod; do
echo "---- ${NAMESPACE} / ${pod} ----"
echo "* Events"
kubectl get events --field-selector involvedObject.name=${pod},type!=Normal --show-kind=true --ignore-not-found=true --namespace ${NAMESPACE}
echo ""
echo "* Logs"
containers=$(kubectl get pods "${pod}" --namespace "${NAMESPACE}" -o json | jq -r 'try .status | .containerStatuses[] | select(.ready == false).name')
if [[ ! -z "$containers" ]] ; then
for container in ${containers}; do
kubectl logs "${pod}" --prefix=true --since="${DEPLOYMENT_TIMEOUT}" --namespace "${NAMESPACE}" -c "${container}"
done
else
echo "no logs found"
fi
echo "----"
done <<< "$failed_pods"
false
else
true
fi
false
}
trap show_failing_pods ERR
helm upgrade --install "${RELEASE_NAME}" "${CHART_NAME}" \
--repo "${CHART_REPOSITORY}" \
${EXTRA_CHART_VERSION} \
Expand Down Expand Up @@ -279,30 +330,27 @@ var ciReleaseDeployCmd = &cobra.Command{
LOGS_SHOWN=false
while true; do
if [ $LOGS_SHOWN == false ] && kubectl get pod -l job-name="${RELEASE_NAME}-post-release" -n "${NAMESPACE}" --ignore-not-found | grep -qE "Running|Completed" ; then
echo ""
echo "Deployment log:"
kubectl logs "job/${RELEASE_NAME}-post-release" -n "${NAMESPACE}" -f --timestamps=true || true
LOGS_SHOWN=true
echo ""
echo "Post-release log:"
kubectl logs "job/${RELEASE_NAME}-post-release" -n "${NAMESPACE}" -f --timestamps=true || true
LOGS_SHOWN=true
fi
# Helm command is complete.
if ! ps -p "$pid" > /dev/null; then
if grep -q BackoffLimitExceeded helm-output.log ; then
# Don't show BackoffLimitExceeded, it confuses everyone.
show_failing_pods
echo "The post-release job failed, see log output above."
else
echo "Helm output:"
cat helm-output.log
fi
wait $pid
break
wait $pid
if grep -q "UPGRADE FAILED" helm-output.log ; then
show_failing_pods
fi
break
fi
if [ $TIME_WAITING -gt 300 ]; then
echo "Timeout waiting for resources."
show_failing_pods
exit 1
echo "Timeout waiting for resources."
show_failing_pods
exit 1
fi
echo "."
Expand All @@ -314,9 +362,9 @@ var ciReleaseDeployCmd = &cobra.Command{
# Get all deployments and statefulsets in the release and check the status of each one.
statefulsets=$(kubectl get statefulset -n "$NAMESPACE" -l "release=${RELEASE_NAME}" -o name)
if [ ! -z "$statefulsets" ]; then
echo "$statefulsets" | xargs -n 1 kubectl rollout status -n "$NAMESPACE"
echo "$statefulsets" | xargs -n 1 kubectl rollout status -n "$NAMESPACE" --timeout 5m
fi
kubectl get deployment -n "$NAMESPACE" -l "release=${RELEASE_NAME}" -o name | xargs -n 1 kubectl rollout status -n "$NAMESPACE"
kubectl get deployment -n "$NAMESPACE" -l "release=${RELEASE_NAME}" -o name | xargs -n 1 kubectl rollout status -n "$NAMESPACE" --timeout 5m
`,
releaseName, chartName, chartRepository, chartVersionOverride,
siltaEnvironmentName, branchname,
Expand Down Expand Up @@ -423,8 +471,8 @@ var ciReleaseDeployCmd = &cobra.Command{

// TODO: rewrite the timeout handling and log printing after helm release
command = fmt.Sprintf(`
set -euo pipefail
set -Eeuo pipefail
RELEASE_NAME='%s'
CHART_NAME='%s'
CHART_REPOSITORY='%s'
Expand All @@ -451,17 +499,36 @@ var ciReleaseDeployCmd = &cobra.Command{
# Detect pods in FAILED state
function show_failing_pods() {
failed_pods=$(kubectl get pod -l "release=$RELEASE_NAME,cronjob!=true" -n "$NAMESPACE" --no-headers | grep -Ev '([0-9]+)/\1' | grep -Eo '^[^ ]+')
echo ""
failed_pods=$(kubectl get pod -l "release=$RELEASE_NAME,cronjob!=true" -n "$NAMESPACE" -o custom-columns="POD:metadata.name,STATE:status.containerStatuses[*].ready" --no-headers | grep -E "<none>|false" | grep -Eo '^[^ ]+')
if [[ ! -z "$failed_pods" ]] ; then
echo "Failing pods:"
echo "$failed_pods"
echo ""
echo "Please check logs for the pods above"
while IFS= read -r pod; do
echo "---- ${NAMESPACE} / ${pod} ----"
echo "* Events"
kubectl get events --field-selector involvedObject.name=${pod},type!=Normal --show-kind=true --ignore-not-found=true --namespace ${NAMESPACE}
echo ""
echo "* Logs"
containers=$(kubectl get pods "${pod}" --namespace "${NAMESPACE}" -o json | jq -r 'try .status | .containerStatuses[] | select(.ready == false).name')
if [[ ! -z "$containers" ]] ; then
for container in ${containers}; do
kubectl logs "${pod}" --prefix=true --since="${DEPLOYMENT_TIMEOUT}" --namespace "${NAMESPACE}" -c "${container}"
done
else
echo "no logs found"
fi
echo "----"
done <<< "$failed_pods"
false
else
true
fi
false
}
trap show_failing_pods ERR
helm upgrade --install "${RELEASE_NAME}" "${CHART_NAME}" \
--repo "${CHART_REPOSITORY}" \
${EXTRA_CHART_VERSION} \
Expand Down Expand Up @@ -494,30 +561,27 @@ var ciReleaseDeployCmd = &cobra.Command{
LOGS_SHOWN=false
while true; do
if [ $LOGS_SHOWN == false ] && kubectl get pod -l job-name="${RELEASE_NAME}-post-release" -n "${NAMESPACE}" --ignore-not-found | grep -qE "Running|Completed" ; then
echo ""
echo "Deployment log:"
kubectl logs "job/${RELEASE_NAME}-post-release" -n "${NAMESPACE}" -f --timestamps=true || true
LOGS_SHOWN=true
echo ""
echo "Post-release log:"
kubectl logs "job/${RELEASE_NAME}-post-release" -n "${NAMESPACE}" -f --timestamps=true || true
LOGS_SHOWN=true
fi
# Helm command is complete.
if ! ps -p "$pid" > /dev/null; then
if grep -q BackoffLimitExceeded helm-output.log ; then
# Don't show BackoffLimitExceeded, it confuses everyone.
show_failing_pods
echo "The post-release job failed, see log output above."
else
echo "Helm output:"
cat helm-output.log
fi
wait $pid
break
wait $pid
if grep -q "UPGRADE FAILED" helm-output.log ; then
show_failing_pods
fi
break
fi
if [ $TIME_WAITING -gt 300 ]; then
echo "Timeout waiting for resources."
show_failing_pods
exit 1
echo "Timeout waiting for resources."
show_failing_pods
exit 1
fi
echo "."
Expand All @@ -529,9 +593,9 @@ var ciReleaseDeployCmd = &cobra.Command{
# Get all deployments and statefulsets in the release and check the status of each one.
statefulsets=$(kubectl get statefulset -n "$NAMESPACE" -l "release=${RELEASE_NAME}" -o name)
if [ ! -z "$statefulsets" ]; then
echo "$statefulsets" | xargs -n 1 kubectl rollout status -n "$NAMESPACE"
echo "$statefulsets" | xargs -n 1 kubectl rollout status -n "$NAMESPACE" --timeout 5m
fi
kubectl get deployment -n "$NAMESPACE" -l "release=${RELEASE_NAME}" -o name | xargs -n 1 kubectl rollout status -n "$NAMESPACE"
kubectl get deployment -n "$NAMESPACE" -l "release=${RELEASE_NAME}" -o name | xargs -n 1 kubectl rollout status -n "$NAMESPACE" --timeout 5m
`,
releaseName, chartName, chartRepository, chartVersionOverride,
siltaEnvironmentName, branchname,
Expand Down
38 changes: 35 additions & 3 deletions cmd/ciReleaseValidate.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ var ciReleaseValidateCmd = &cobra.Command{

// TODO: rewrite the timeout handling and log printing after helm release
command := fmt.Sprintf(`
set -euo pipefail
set -Eeuo pipefail
RELEASE_NAME='%s'
CHART_NAME='%s'
CHART_REPOSITORY='%s'
Expand All @@ -127,7 +127,39 @@ var ciReleaseValidateCmd = &cobra.Command{
EXTRA_NOAUTHIPS='%s'
EXTRA_VPCNATIVE='%s'
EXTRA_CLUSTERTYPE='%s'
# Detect pods in FAILED state
function show_failing_pods() {
echo ""
failed_pods=$(kubectl get pod -l "release=$RELEASE_NAME,cronjob!=true" -n "$NAMESPACE" -o custom-columns="POD:metadata.name,STATE:status.containerStatuses[*].ready" --no-headers | grep -E "<none>|false" | grep -Eo '^[^ ]+')
if [[ ! -z "$failed_pods" ]] ; then
echo "Failing pods:"
while IFS= read -r pod; do
echo "---- ${NAMESPACE} / ${pod} ----"
echo "* Events"
kubectl get events --field-selector involvedObject.name=${pod},type!=Normal --show-kind=true --ignore-not-found=true --namespace ${NAMESPACE}
echo ""
echo "* Logs"
containers=$(kubectl get pods "${pod}" --namespace "${NAMESPACE}" -o json | jq -r 'try .status | .containerStatuses[] | select(.ready == false).name')
if [[ ! -z "$containers" ]] ; then
for container in ${containers}; do
kubectl logs "${pod}" --prefix=true --since="${DEPLOYMENT_TIMEOUT}" --namespace "${NAMESPACE}" -c "${container}"
done
else
echo "no logs found"
fi
echo "----"
done <<< "$failed_pods"
false
else
true
fi
}
trap show_failing_pods ERR
helm upgrade --dry-run --install "${RELEASE_NAME}" "${CHART_NAME}" \
--repo "${CHART_REPOSITORY}" \
${EXTRA_CHART_VERSION} \
Expand Down
Loading

0 comments on commit 045671c

Please sign in to comment.