mirror of
https://github.com/myronblair/jarvis
synced 2026-06-30 17:50:23 -05:00
Autonomous systems: watchdog, smart deploy, site health, auto-heal, agent installer
- deploy/jarvis-watchdog.sh: self-healing watchdog (every 5 min) * monitors lsws/mysql/redis, restarts on failure * JARVIS HTTP self-check, restarts OLS on 5xx * disk/memory alerts inserted to DB * offline Proxmox VM agents restarted via qm guest exec * log rotation (1000 line cap) - deploy/jarvis-deploy.sh: smart deploy with PHP validation * php8.3 syntax check on every changed .php file * auto-reverts git commit + inserts critical alert on syntax error * reloads OLS after JARVIS deploys - api/endpoints/facts_collector.php: site health monitoring * curls all 7 managed sites every 3 min * stores up/down status in kb_facts - api/endpoints/alerts.php: auto-heal + site alerts * dispatches restart_service commands when services down on agents * generates alerts from kb_facts site health data - public_html/install-agent.sh: one-liner Linux agent installer * installs deps, downloads agent, registers with JARVIS, sets up systemd - public_html/webhook.php: fixed infra deploy path to /opt/infra
This commit is contained in:
@@ -87,15 +87,56 @@ function refresh_agent_alerts(): void {
|
||||
}
|
||||
}
|
||||
|
||||
// Services down
|
||||
// Services down — alert AND dispatch auto-restart command
|
||||
foreach (($d['services'] ?? []) as $svc) {
|
||||
if (($svc['status'] ?? '') === 'active') continue;
|
||||
if (($svc['status'] ?? '') === 'unknown') continue; // not watched/installed
|
||||
if (($svc['status'] ?? '') === 'unknown') continue;
|
||||
$svcName = $svc['service'] ?? '';
|
||||
$key = 'agent:' . $id . ':svc:' . $svcName;
|
||||
upsert_alert($key, 'warning', 'Service Down: ' . $svcName . ' on ' . $hn,
|
||||
$svcName . ' is ' . ($svc['status'] ?? 'inactive') . ' on ' . $hn . '.');
|
||||
$still_active[$key] = true;
|
||||
// Auto-dispatch restart if no pending command already queued
|
||||
$pending = JarvisDB::query(
|
||||
"SELECT id FROM agent_commands WHERE agent_id=? AND command_type='restart_service'
|
||||
AND status IN ('pending','delivered') AND created_at > DATE_SUB(NOW(), INTERVAL 10 MINUTE)
|
||||
AND JSON_EXTRACT(command_data,'$.service')=?",
|
||||
[$id, $svcName]
|
||||
);
|
||||
if (empty($pending)) {
|
||||
JarvisDB::query(
|
||||
"INSERT INTO agent_commands (agent_id, command_type, command_data, status)
|
||||
VALUES (?,?,?,?)",
|
||||
[$id, 'restart_service', json_encode(['service' => $svcName]), 'pending']
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Site health alerts from kb_facts ──────────────────────────────────────
|
||||
$siteKeys = ['jarvis','tomsjavajive','epictravelexp','parkersling','orbishosting','orbisportal','tomtomgames'];
|
||||
$siteNames = [
|
||||
'jarvis' => 'jarvis.orbishosting.com',
|
||||
'tomsjavajive' => 'tomsjavajive.com',
|
||||
'epictravelexp'=> 'epictravelexpeditions.com',
|
||||
'parkersling' => 'parkerslingshot.epictravelexpeditions.com',
|
||||
'orbishosting' => 'orbishosting.com',
|
||||
'orbisportal' => 'orbis.orbishosting.com',
|
||||
'tomtomgames' => 'tomtomgames.com',
|
||||
];
|
||||
$siteFacts = JarvisDB::query(
|
||||
"SELECT fact_key, fact_value FROM kb_facts WHERE category='sites'
|
||||
AND updated_at > DATE_SUB(NOW(), INTERVAL 10 MINUTE)"
|
||||
);
|
||||
foreach ($siteFacts as $sf) {
|
||||
$skey = $sf['fact_key'];
|
||||
$status = $sf['fact_value'];
|
||||
$domain = $siteNames[$skey] ?? $skey;
|
||||
if ($status !== 'up') {
|
||||
$alertKey = 'site:' . $skey . ':down';
|
||||
upsert_alert($alertKey, 'critical', 'Site Down: ' . $domain,
|
||||
$domain . ' returned status ' . $status . '. Site may be unreachable.');
|
||||
$still_active[$alertKey] = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user