mirror of
https://github.com/myronblair/jarvis
synced 2026-06-30 17:50:23 -05:00
ecbc2e09a5
1. agent.py: shell allow-check reads cfg, not server payload (RCE fix) 2. webhook.php: move WEBHOOK_SECRET to gitignored config.php; rotate secret 3. agent.py: replace recursive main() with while loop (RecursionError fix) 4. jarvis-deploy.sh: push force-revert to GitHub on syntax fail (loop fix) 5. agent.py: self_update() verifies SHA-256 before exec (integrity fix) 6. agent.php: remove JARVIS_IP from browser-action bypass (auth fix) 7. jarvis-watchdog.sh: escape SQL vars in alert() to prevent injection 8. jarvis-deploy.sh: atomic mv instead of cat+truncate (TOCTOU fix) Also: distribute jarvis-agent.py.sha256 alongside agent for integrity checks
119 lines
5.8 KiB
Bash
Executable File
119 lines
5.8 KiB
Bash
Executable File
#!/bin/bash
|
|
# JARVIS Self-Healing Watchdog — runs every 5 min via root cron
|
|
# Checks: lsws, mysql, redis, JARVIS HTTP, disk, memory
|
|
# Auto-heals: restarts failed services, restarts offline Proxmox VM agents
|
|
# Logs to: /home/jarvis.orbishosting.com/logs/watchdog.log
|
|
|
|
LOG=/home/jarvis.orbishosting.com/logs/watchdog.log
|
|
MYSQL="mysql -u jarvis_user -pJ4rv1s_Pr0t0c0l_2026! jarvis_db -se"
|
|
TS() { date '+%Y-%m-%d %H:%M:%S'; }
|
|
|
|
log() { echo "[$(TS)] $1" >> "$LOG"; }
|
|
|
|
# Escape single quotes for MySQL string interpolation in bash
|
|
sql_esc() { printf '%s' "$1" | sed "s/'/\\\\''/g"; }
|
|
|
|
alert() {
|
|
local type="$1" title="$2" msg="$3" sev="${4:-warning}"
|
|
local e_type e_title e_msg e_sev
|
|
e_type=$(sql_esc "$type"); e_title=$(sql_esc "$title")
|
|
e_msg=$(sql_esc "$msg"); e_sev=$(sql_esc "$sev")
|
|
$MYSQL "INSERT IGNORE INTO alerts (alert_type,title,message,severity,source_key,auto_resolve)
|
|
VALUES ('$e_type','$e_title','$e_msg','$e_sev','watchdog:$e_type',1);" 2>/dev/null
|
|
}
|
|
|
|
resolve() {
|
|
local e_key
|
|
e_key=$(sql_esc "$1")
|
|
$MYSQL "UPDATE alerts SET resolved=1,resolved_at=NOW()
|
|
WHERE source_key='watchdog:$e_key' AND resolved=0;" 2>/dev/null
|
|
}
|
|
|
|
# ── Service health ─────────────────────────────────────────────────────────────
|
|
for SVC in lsws mysql redis; do
|
|
if ! systemctl is-active --quiet "$SVC"; then
|
|
log "HEAL: $SVC is down — restarting"
|
|
systemctl restart "$SVC"
|
|
if systemctl is-active --quiet "$SVC"; then
|
|
log "HEAL: $SVC restarted successfully"
|
|
alert "service_down" "$SVC restarted" "JARVIS watchdog restarted $SVC which was stopped." "warning"
|
|
else
|
|
log "ERROR: $SVC failed to restart"
|
|
alert "service_down" "$SVC failed to restart" "$SVC is down and could not be restarted automatically." "critical"
|
|
fi
|
|
else
|
|
resolve "service_down_$SVC"
|
|
fi
|
|
done
|
|
|
|
# ── JARVIS HTTP self-check ─────────────────────────────────────────────────────
|
|
HTTP_CODE=$(curl -sk -o /dev/null -w "%{http_code}" --max-time 10 https://jarvis.orbishosting.com/api.php 2>/dev/null)
|
|
if [[ "$HTTP_CODE" == "5"* ]] || [[ -z "$HTTP_CODE" ]]; then
|
|
log "HEAL: JARVIS HTTP returned $HTTP_CODE — restarting lsws"
|
|
systemctl restart lsws
|
|
alert "jarvis_http" "JARVIS HTTP error — restarted OLS" "JARVIS returned HTTP $HTTP_CODE. OpenLiteSpeed was restarted." "critical"
|
|
else
|
|
resolve "jarvis_http"
|
|
fi
|
|
|
|
# ── Disk usage ─────────────────────────────────────────────────────────────────
|
|
DISK_PCT=$(df / | awk 'NR==2{print $5}' | tr -d '%')
|
|
if [ "$DISK_PCT" -ge 90 ]; then
|
|
log "ALERT: Disk at ${DISK_PCT}% (critical)"
|
|
alert "disk_critical" "Disk ${DISK_PCT}% full on DO server" "Root filesystem is ${DISK_PCT}% full. Immediate cleanup required." "critical"
|
|
elif [ "$DISK_PCT" -ge 80 ]; then
|
|
log "WARN: Disk at ${DISK_PCT}%"
|
|
alert "disk_warning" "Disk ${DISK_PCT}% full on DO server" "Root filesystem is ${DISK_PCT}% full." "warning"
|
|
else
|
|
$MYSQL "UPDATE alerts SET resolved=1,resolved_at=NOW() WHERE source_key IN ('watchdog:disk_critical','watchdog:disk_warning') AND resolved=0;" 2>/dev/null
|
|
fi
|
|
|
|
# ── Memory usage ──────────────────────────────────────────────────────────────
|
|
MEM_TOTAL=$(grep MemTotal /proc/meminfo | awk '{print $2}')
|
|
MEM_AVAIL=$(grep MemAvailable /proc/meminfo | awk '{print $2}')
|
|
MEM_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
|
|
if [ "$MEM_PCT" -ge 90 ]; then
|
|
log "ALERT: Memory at ${MEM_PCT}%"
|
|
alert "mem_critical" "Memory ${MEM_PCT}% used on DO server" "DO server memory is ${MEM_PCT}% used." "critical"
|
|
fi
|
|
|
|
# ── Offline agent auto-restart (Proxmox VMs only) ─────────────────────────────
|
|
# Map: agent_id → [proxmox_ip, vmid]
|
|
declare -A AGENT_PVE=(
|
|
["ollama_vm"]="10.48.200.90 210"
|
|
["ha_vm"]="10.48.200.90 101"
|
|
["networkbackup_vm"]="10.48.200.91 302"
|
|
)
|
|
|
|
OFFLINE=$($MYSQL "SELECT agent_id FROM registered_agents
|
|
WHERE status='offline' AND last_seen < DATE_SUB(NOW(), INTERVAL 5 MINUTE)
|
|
AND agent_type='linux';" 2>/dev/null)
|
|
|
|
for AID in $OFFLINE; do
|
|
# Check if we have a Proxmox mapping for this agent
|
|
for KEY in "${!AGENT_PVE[@]}"; do
|
|
if [[ "$AID" == *"$KEY"* ]] || [[ "$KEY" == *"$AID"* ]]; then
|
|
PVE_INFO=(${AGENT_PVE[$KEY]})
|
|
PVE_IP="${PVE_INFO[0]}"
|
|
VMID="${PVE_INFO[1]}"
|
|
log "HEAL: Attempting to restart jarvis-agent on $AID (VM $VMID @ $PVE_IP)"
|
|
sshpass -p 'Joker1974!!!' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
|
|
root@"$PVE_IP" \
|
|
"qm guest exec $VMID -- systemctl restart jarvis-agent" 2>/dev/null
|
|
log "HEAL: Restart command sent to $AID (exit: $?)"
|
|
alert "agent_offline" "Auto-restarted agent: $AID" \
|
|
"Agent $AID was offline. JARVIS watchdog sent restart command via Proxmox." "warning"
|
|
break
|
|
fi
|
|
done
|
|
done
|
|
|
|
# ── Deploy log rotation (keep last 1000 lines) ────────────────────────────────
|
|
for LOGFILE in "$LOG" /home/jarvis.orbishosting.com/logs/deploy.log /home/jarvis.orbishosting.com/logs/cron.log; do
|
|
[ -f "$LOGFILE" ] || continue
|
|
LINES=$(wc -l < "$LOGFILE")
|
|
if [ "$LINES" -gt 1000 ]; then
|
|
tail -500 "$LOGFILE" > "${LOGFILE}.tmp" && mv "${LOGFILE}.tmp" "$LOGFILE"
|
|
fi
|
|
done
|