mirror of
https://gitlab.com/wgp/dougal/software.git
synced 2025-12-06 05:47:07 +00:00
Kill runner if it hangs around for too long.
This gives the import processes a chance to run.
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Maximum runtime in seconds before killing an overdue instance (e.g., 10 minutes)
|
||||
MAX_RUNTIME_SECONDS=$((15 * 60))
|
||||
|
||||
DOUGAL_ROOT=${DOUGAL_ROOT:-$(dirname "$0")/..}
|
||||
|
||||
@@ -80,8 +82,9 @@ function run () {
|
||||
# DESCRIPTION=""
|
||||
SERVICE="deferred_imports"
|
||||
|
||||
$BINDIR/send_alert.py -t "$TITLE" -s "$SERVICE" -l "critical" \
|
||||
-O "$(cat $STDOUTLOG)" -E "$(cat $STDERRLOG)"
|
||||
# Disable GitLab alerts. They're just not very practical
|
||||
# $BINDIR/send_alert.py -t "$TITLE" -s "$SERVICE" -l "critical" \
|
||||
# -O "$(cat $STDOUTLOG)" -E "$(cat $STDERRLOG)"
|
||||
|
||||
exit 2
|
||||
}
|
||||
@@ -97,14 +100,37 @@ function cleanup () {
|
||||
}
|
||||
|
||||
if [[ -f $LOCKFILE ]]; then
|
||||
PID=$(cat "$LOCKFILE")
|
||||
if pgrep -F "$LOCKFILE"; then
|
||||
print_warning $(printf "The previous process is still running (%d)" $PID)
|
||||
exit 1
|
||||
else
|
||||
rm "$LOCKFILE"
|
||||
print_warning $(printf "Previous process (%d) not found. Must have died unexpectedly" $PID)
|
||||
fi
|
||||
PID=$(cat "$LOCKFILE")
|
||||
if kill -0 "$PID" 2>/dev/null; then # Check if process is running
|
||||
# Get elapsed time in D-HH:MM:SS format and convert to seconds
|
||||
ELAPSED_STR=$(ps -p "$PID" -o etime= | tr -d '[:space:]')
|
||||
if [ -n "$ELAPSED_STR" ]; then
|
||||
# Convert D-HH:MM:SS to seconds
|
||||
ELAPSED_SECONDS=$(echo "$ELAPSED_STR" | awk -F'[-:]' '{
|
||||
seconds = 0
|
||||
if (NF == 4) { seconds += $1 * 86400 } # Days
|
||||
if (NF >= 3) { seconds += $NF-2 * 3600 } # Hours
|
||||
if (NF >= 2) { seconds += $NF-1 * 60 } # Minutes
|
||||
seconds += $NF # Seconds
|
||||
print seconds
|
||||
}')
|
||||
if [ "$ELAPSED_SECONDS" -gt "$MAX_RUNTIME_SECONDS" ]; then
|
||||
# Kill the overdue process (SIGTERM; use -9 for SIGKILL if needed)
|
||||
kill "$PID" 2>/dev/null
|
||||
print_warning $(printf "Killed overdue process (%d) that ran for %s (%d seconds)" "$PID" "$ELAPSED_STR" "$ELAPSED_SECONDS")
|
||||
rm "$LOCKFILE"
|
||||
else
|
||||
print_warning $(printf "Previous process is still running (%d) for %s (%d seconds)" "$PID" "$ELAPSED_STR" "$ELAPSED_SECONDS")
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
print_warning $(printf "Could not retrieve elapsed time for process (%d)" "$PID")
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
rm "$LOCKFILE"
|
||||
print_warning $(printf "Previous process (%d) not found. Must have died unexpectedly" "$PID")
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$$" > "$LOCKFILE" || {
|
||||
|
||||
Reference in New Issue
Block a user