diff --git a/scripts/health_check.sh b/scripts/health_check.sh new file mode 100755 index 0000000..479a20d --- /dev/null +++ b/scripts/health_check.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# health_check.sh +# 每5分钟 cron 执行,检查 freqtrade 是否存活 +# 连续3次异常则推送 Server酱 告警 + +HEALTH_FILE="/tmp/freqtrade_health" +MAX_FAILS=3 +SENDKEY="SCT361386TEDBtHzmIJ7wEoumDj5M1L8OZ" +LOG_FILE="/home/ubuntu/freqtrade/user_data/logs/freqtrade.log" + +# 检查1: Docker 是否运行 +DOCKER_OK=$(docker ps --filter "name=freqtrade" --filter "status=running" -q | wc -l) + +# 检查2: 最近心跳(5分钟内) +HEARTBEAT_OK=0 +if [ -f "$LOG_FILE" ]; then + LAST_HB=$(tail -100 "$LOG_FILE" | grep "Bot heartbeat" | tail -1 | grep -oP "^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}") + if [ -n "$LAST_HB" ]; then + HB_TS=$(date -d "$LAST_HB" +%s 2>/dev/null) + NOW_TS=$(date +%s) + DIFF=$((NOW_TS - HB_TS)) + if [ "$DIFF" -lt 300 ]; then + HEARTBEAT_OK=1 + fi + fi +fi + +# 累计失败次数 +if [ "$DOCKER_OK" -eq 0 ] || [ "$HEARTBEAT_OK" -eq 0 ]; then + if [ -f "$HEALTH_FILE" ]; then + FAILS=$(cat "$HEALTH_FILE") + FAILS=$((FAILS + 1)) + else + FAILS=1 + fi + echo "$FAILS" > "$HEALTH_FILE" + + if [ "$FAILS" -ge "$MAX_FAILS" ]; then + # 连续3次失败 → 推送告警 + TITLE="⚠️ Freqtrade 异常告警" + DESP="## Freqtrade 异常告警\n\n" + DESP+="**时间**: $(date +%Y-%m-%d\ %H:%M:%S)\n\n" + DESP+="**Docker**: $([ "$DOCKER_OK" -eq 1 ] && echo "运行中" || echo "已停止")\n" + DESP+="**心跳**: $([ "$HEARTBEAT_OK" -eq 1 ] && echo "正常" || echo "异常")\n" + DESP+="**连续检查失败**: ${FAILS}次\n\n" + DESP+="请 SSH 检查:\n" + DESP+="\`\`\`\ndocker logs freqtrade --tail 30\n\`\`\`" + + curl -s -X POST "https://sctapi.ftqq.com/${SENDKEY}.send" \ + -d "title=${TITLE}" \ + -d "desp=${DESP}" > /dev/null 2>&1 + echo "告警已推送" + rm -f "$HEALTH_FILE" + fi +else + # 恢复正常 → 清除失败计数 + rm -f "$HEALTH_FILE" +fi