From 30150a8728e96f5ca83a7166b75005360c6efca6 Mon Sep 17 00:00:00 2001 From: "D. Berge" Date: Thu, 21 Aug 2025 15:33:05 +0200 Subject: [PATCH] Kill runner if it hangs around for too long. This gives the import processes a chance to run. --- bin/runner.sh | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/bin/runner.sh b/bin/runner.sh index 663496b..191abe1 100755 --- a/bin/runner.sh +++ b/bin/runner.sh @@ -1,5 +1,7 @@ #!/bin/bash +# Maximum runtime in seconds before killing an overdue instance (e.g., 10 minutes) +MAX_RUNTIME_SECONDS=$((15 * 60)) DOUGAL_ROOT=${DOUGAL_ROOT:-$(dirname "$0")/..} @@ -80,8 +82,9 @@ function run () { # DESCRIPTION="" SERVICE="deferred_imports" - $BINDIR/send_alert.py -t "$TITLE" -s "$SERVICE" -l "critical" \ - -O "$(cat $STDOUTLOG)" -E "$(cat $STDERRLOG)" +# Disable GitLab alerts. They're just not very practical +# $BINDIR/send_alert.py -t "$TITLE" -s "$SERVICE" -l "critical" \ +# -O "$(cat $STDOUTLOG)" -E "$(cat $STDERRLOG)" exit 2 } @@ -97,14 +100,37 @@ function cleanup () { } if [[ -f $LOCKFILE ]]; then - PID=$(cat "$LOCKFILE") - if pgrep -F "$LOCKFILE"; then - print_warning $(printf "The previous process is still running (%d)" $PID) - exit 1 - else - rm "$LOCKFILE" - print_warning $(printf "Previous process (%d) not found. Must have died unexpectedly" $PID) - fi + PID=$(cat "$LOCKFILE") + if kill -0 "$PID" 2>/dev/null; then # Check if process is running + # Get elapsed time in D-HH:MM:SS format and convert to seconds + ELAPSED_STR=$(ps -p "$PID" -o etime= | tr -d '[:space:]') + if [ -n "$ELAPSED_STR" ]; then + # Convert D-HH:MM:SS to seconds + ELAPSED_SECONDS=$(echo "$ELAPSED_STR" | awk -F'[-:]' '{ + seconds = 0 + if (NF == 4) { seconds += $1 * 86400 } # Days + if (NF >= 3) { seconds += $NF-2 * 3600 } # Hours + if (NF >= 2) { seconds += $NF-1 * 60 } # Minutes + seconds += $NF # Seconds + print seconds + }') + if [ "$ELAPSED_SECONDS" -gt "$MAX_RUNTIME_SECONDS" ]; then + # Kill the overdue process (SIGTERM; use -9 for SIGKILL if needed) + kill "$PID" 2>/dev/null + print_warning $(printf "Killed overdue process (%d) that ran for %s (%d seconds)" "$PID" "$ELAPSED_STR" "$ELAPSED_SECONDS") + rm "$LOCKFILE" + else + print_warning $(printf "Previous process is still running (%d) for %s (%d seconds)" "$PID" "$ELAPSED_STR" "$ELAPSED_SECONDS") + exit 1 + fi + else + print_warning $(printf "Could not retrieve elapsed time for process (%d)" "$PID") + exit 1 + fi + else + rm "$LOCKFILE" + print_warning $(printf "Previous process (%d) not found. Must have died unexpectedly" "$PID") + fi fi echo "$$" > "$LOCKFILE" || {