mirror of
https://gitlab.com/wgp/dougal/software.git
synced 2025-12-06 09:37:08 +00:00
Kill runner if it hangs around for too long.
This gives the import processes a chance to run.
This commit is contained in:
@@ -1,5 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Maximum runtime in seconds before killing an overdue instance (e.g., 10 minutes)
|
||||||
|
MAX_RUNTIME_SECONDS=$((15 * 60))
|
||||||
|
|
||||||
DOUGAL_ROOT=${DOUGAL_ROOT:-$(dirname "$0")/..}
|
DOUGAL_ROOT=${DOUGAL_ROOT:-$(dirname "$0")/..}
|
||||||
|
|
||||||
@@ -80,8 +82,9 @@ function run () {
|
|||||||
# DESCRIPTION=""
|
# DESCRIPTION=""
|
||||||
SERVICE="deferred_imports"
|
SERVICE="deferred_imports"
|
||||||
|
|
||||||
$BINDIR/send_alert.py -t "$TITLE" -s "$SERVICE" -l "critical" \
|
# Disable GitLab alerts. They're just not very practical
|
||||||
-O "$(cat $STDOUTLOG)" -E "$(cat $STDERRLOG)"
|
# $BINDIR/send_alert.py -t "$TITLE" -s "$SERVICE" -l "critical" \
|
||||||
|
# -O "$(cat $STDOUTLOG)" -E "$(cat $STDERRLOG)"
|
||||||
|
|
||||||
exit 2
|
exit 2
|
||||||
}
|
}
|
||||||
@@ -97,14 +100,37 @@ function cleanup () {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if [[ -f $LOCKFILE ]]; then
|
if [[ -f $LOCKFILE ]]; then
|
||||||
PID=$(cat "$LOCKFILE")
|
PID=$(cat "$LOCKFILE")
|
||||||
if pgrep -F "$LOCKFILE"; then
|
if kill -0 "$PID" 2>/dev/null; then # Check if process is running
|
||||||
print_warning $(printf "The previous process is still running (%d)" $PID)
|
# Get elapsed time in D-HH:MM:SS format and convert to seconds
|
||||||
exit 1
|
ELAPSED_STR=$(ps -p "$PID" -o etime= | tr -d '[:space:]')
|
||||||
else
|
if [ -n "$ELAPSED_STR" ]; then
|
||||||
rm "$LOCKFILE"
|
# Convert D-HH:MM:SS to seconds
|
||||||
print_warning $(printf "Previous process (%d) not found. Must have died unexpectedly" $PID)
|
ELAPSED_SECONDS=$(echo "$ELAPSED_STR" | awk -F'[-:]' '{
|
||||||
fi
|
seconds = 0
|
||||||
|
if (NF == 4) { seconds += $1 * 86400 } # Days
|
||||||
|
if (NF >= 3) { seconds += $NF-2 * 3600 } # Hours
|
||||||
|
if (NF >= 2) { seconds += $NF-1 * 60 } # Minutes
|
||||||
|
seconds += $NF # Seconds
|
||||||
|
print seconds
|
||||||
|
}')
|
||||||
|
if [ "$ELAPSED_SECONDS" -gt "$MAX_RUNTIME_SECONDS" ]; then
|
||||||
|
# Kill the overdue process (SIGTERM; use -9 for SIGKILL if needed)
|
||||||
|
kill "$PID" 2>/dev/null
|
||||||
|
print_warning $(printf "Killed overdue process (%d) that ran for %s (%d seconds)" "$PID" "$ELAPSED_STR" "$ELAPSED_SECONDS")
|
||||||
|
rm "$LOCKFILE"
|
||||||
|
else
|
||||||
|
print_warning $(printf "Previous process is still running (%d) for %s (%d seconds)" "$PID" "$ELAPSED_STR" "$ELAPSED_SECONDS")
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
print_warning $(printf "Could not retrieve elapsed time for process (%d)" "$PID")
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
rm "$LOCKFILE"
|
||||||
|
print_warning $(printf "Previous process (%d) not found. Must have died unexpectedly" "$PID")
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "$$" > "$LOCKFILE" || {
|
echo "$$" > "$LOCKFILE" || {
|
||||||
|
|||||||
Reference in New Issue
Block a user