feat: add health check script (cron */5, 3 fails = alert)

This commit is contained in:
FXY
2026-06-12 00:39:35 +08:00
parent df3c0bc287
commit 406af3ad45

58
scripts/health_check.sh Executable file
View File

@ -0,0 +1,58 @@
#!/bin/bash
# health_check.sh
# 每5分钟 cron 执行,检查 freqtrade 是否存活
# 连续3次异常则推送 Server酱 告警
HEALTH_FILE="/tmp/freqtrade_health"
MAX_FAILS=3
SENDKEY="SCT361386TEDBtHzmIJ7wEoumDj5M1L8OZ"
LOG_FILE="/home/ubuntu/freqtrade/user_data/logs/freqtrade.log"
# 检查1: Docker 是否运行
DOCKER_OK=$(docker ps --filter "name=freqtrade" --filter "status=running" -q | wc -l)
# 检查2: 最近心跳5分钟内
HEARTBEAT_OK=0
if [ -f "$LOG_FILE" ]; then
LAST_HB=$(tail -100 "$LOG_FILE" | grep "Bot heartbeat" | tail -1 | grep -oP "^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}")
if [ -n "$LAST_HB" ]; then
HB_TS=$(date -d "$LAST_HB" +%s 2>/dev/null)
NOW_TS=$(date +%s)
DIFF=$((NOW_TS - HB_TS))
if [ "$DIFF" -lt 300 ]; then
HEARTBEAT_OK=1
fi
fi
fi
# 累计失败次数
if [ "$DOCKER_OK" -eq 0 ] || [ "$HEARTBEAT_OK" -eq 0 ]; then
if [ -f "$HEALTH_FILE" ]; then
FAILS=$(cat "$HEALTH_FILE")
FAILS=$((FAILS + 1))
else
FAILS=1
fi
echo "$FAILS" > "$HEALTH_FILE"
if [ "$FAILS" -ge "$MAX_FAILS" ]; then
# 连续3次失败 → 推送告警
TITLE="⚠️ Freqtrade 异常告警"
DESP="## Freqtrade 异常告警\n\n"
DESP+="**时间**: $(date +%Y-%m-%d\ %H:%M:%S)\n\n"
DESP+="**Docker**: $([ "$DOCKER_OK" -eq 1 ] && echo "运行中" || echo "已停止")\n"
DESP+="**心跳**: $([ "$HEARTBEAT_OK" -eq 1 ] && echo "正常" || echo "异常")\n"
DESP+="**连续检查失败**: ${FAILS}次\n\n"
DESP+="请 SSH 检查:\n"
DESP+="\`\`\`\ndocker logs freqtrade --tail 30\n\`\`\`"
curl -s -X POST "https://sctapi.ftqq.com/${SENDKEY}.send" \
-d "title=${TITLE}" \
-d "desp=${DESP}" > /dev/null 2>&1
echo "告警已推送"
rm -f "$HEALTH_FILE"
fi
else
# 恢复正常 → 清除失败计数
rm -f "$HEALTH_FILE"
fi