mirror of
https://github.com/myronblair/jarvis
synced 2026-06-30 17:50:23 -05:00
Autonomous systems: watchdog, smart deploy, site health, auto-heal, agent installer
- deploy/jarvis-watchdog.sh: self-healing watchdog (every 5 min) * monitors lsws/mysql/redis, restarts on failure * JARVIS HTTP self-check, restarts OLS on 5xx * disk/memory alerts inserted to DB * offline Proxmox VM agents restarted via qm guest exec * log rotation (1000 line cap) - deploy/jarvis-deploy.sh: smart deploy with PHP validation * php8.3 syntax check on every changed .php file * auto-reverts git commit + inserts critical alert on syntax error * reloads OLS after JARVIS deploys - api/endpoints/facts_collector.php: site health monitoring * curls all 7 managed sites every 3 min * stores up/down status in kb_facts - api/endpoints/alerts.php: auto-heal + site alerts * dispatches restart_service commands when services down on agents * generates alerts from kb_facts site health data - public_html/install-agent.sh: one-liner Linux agent installer * installs deps, downloads agent, registers with JARVIS, sets up systemd - public_html/webhook.php: fixed infra deploy path to /opt/infra
This commit is contained in:
@@ -87,15 +87,56 @@ function refresh_agent_alerts(): void {
|
||||
}
|
||||
}
|
||||
|
||||
// Services down
|
||||
// Services down — alert AND dispatch auto-restart command
|
||||
foreach (($d['services'] ?? []) as $svc) {
|
||||
if (($svc['status'] ?? '') === 'active') continue;
|
||||
if (($svc['status'] ?? '') === 'unknown') continue; // not watched/installed
|
||||
if (($svc['status'] ?? '') === 'unknown') continue;
|
||||
$svcName = $svc['service'] ?? '';
|
||||
$key = 'agent:' . $id . ':svc:' . $svcName;
|
||||
upsert_alert($key, 'warning', 'Service Down: ' . $svcName . ' on ' . $hn,
|
||||
$svcName . ' is ' . ($svc['status'] ?? 'inactive') . ' on ' . $hn . '.');
|
||||
$still_active[$key] = true;
|
||||
// Auto-dispatch restart if no pending command already queued
|
||||
$pending = JarvisDB::query(
|
||||
"SELECT id FROM agent_commands WHERE agent_id=? AND command_type='restart_service'
|
||||
AND status IN ('pending','delivered') AND created_at > DATE_SUB(NOW(), INTERVAL 10 MINUTE)
|
||||
AND JSON_EXTRACT(command_data,'$.service')=?",
|
||||
[$id, $svcName]
|
||||
);
|
||||
if (empty($pending)) {
|
||||
JarvisDB::query(
|
||||
"INSERT INTO agent_commands (agent_id, command_type, command_data, status)
|
||||
VALUES (?,?,?,?)",
|
||||
[$id, 'restart_service', json_encode(['service' => $svcName]), 'pending']
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Site health alerts from kb_facts ──────────────────────────────────────
|
||||
$siteKeys = ['jarvis','tomsjavajive','epictravelexp','parkersling','orbishosting','orbisportal','tomtomgames'];
|
||||
$siteNames = [
|
||||
'jarvis' => 'jarvis.orbishosting.com',
|
||||
'tomsjavajive' => 'tomsjavajive.com',
|
||||
'epictravelexp'=> 'epictravelexpeditions.com',
|
||||
'parkersling' => 'parkerslingshot.epictravelexpeditions.com',
|
||||
'orbishosting' => 'orbishosting.com',
|
||||
'orbisportal' => 'orbis.orbishosting.com',
|
||||
'tomtomgames' => 'tomtomgames.com',
|
||||
];
|
||||
$siteFacts = JarvisDB::query(
|
||||
"SELECT fact_key, fact_value FROM kb_facts WHERE category='sites'
|
||||
AND updated_at > DATE_SUB(NOW(), INTERVAL 10 MINUTE)"
|
||||
);
|
||||
foreach ($siteFacts as $sf) {
|
||||
$skey = $sf['fact_key'];
|
||||
$status = $sf['fact_value'];
|
||||
$domain = $siteNames[$skey] ?? $skey;
|
||||
if ($status !== 'up') {
|
||||
$alertKey = 'site:' . $skey . ':down';
|
||||
upsert_alert($alertKey, 'critical', 'Site Down: ' . $domain,
|
||||
$domain . ' returned status ' . $status . '. Site may be unreachable.');
|
||||
$still_active[$alertKey] = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -278,6 +278,39 @@ function collect_all(): array {
|
||||
$results['ollama'] = 'error: ' . $e->getMessage();
|
||||
}
|
||||
|
||||
// ── Site Health ───────────────────────────────────────────────────────
|
||||
try {
|
||||
$sites = [
|
||||
'jarvis' => 'https://jarvis.orbishosting.com',
|
||||
'tomsjavajive' => 'https://tomsjavajive.com',
|
||||
'epictravelexp'=> 'https://epictravelexpeditions.com',
|
||||
'parkersling' => 'https://parkerslingshot.epictravelexpeditions.com',
|
||||
'orbishosting' => 'https://orbishosting.com',
|
||||
'orbisportal' => 'https://orbis.orbishosting.com',
|
||||
'tomtomgames' => 'https://tomtomgames.com',
|
||||
];
|
||||
$down = [];
|
||||
foreach ($sites as $key => $url) {
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_TIMEOUT => 10,
|
||||
CURLOPT_CONNECTTIMEOUT => 5,
|
||||
CURLOPT_NOBODY => true,
|
||||
]);
|
||||
curl_exec($ch);
|
||||
$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
$status = ($code >= 200 && $code < 400) ? 'up' : "down-$code";
|
||||
KBEngine::storeFact('sites', $key, $status, $url, 180);
|
||||
if ($status !== 'up') $down[] = "$key($code)";
|
||||
}
|
||||
$results['sites'] = empty($down) ? 'all up' : 'DOWN: ' . implode(', ', $down);
|
||||
} catch (Exception $e) {
|
||||
$results['sites'] = 'error: ' . $e->getMessage();
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user