mirror of
https://github.com/myronblair/jarvis
synced 2026-06-30 17:50:23 -05:00
dc55e6c45b
- 4-tier chat: HA control → Ollama → Groq → Claude - Push-based agent system with heartbeat/metrics - Network monitoring, alerts, Proxmox, Home Assistant - Windows + Linux agent installers - Stats cache cron, facts collector, KB engine
178 lines
7.8 KiB
PHP
178 lines
7.8 KiB
PHP
<?php
|
|
/**
|
|
* JARVIS Alerts API
|
|
* GET /api/alerts — return active alerts (auto-generates agent alerts first)
|
|
* POST /api/alerts/resolve — resolve an alert by id
|
|
* POST /api/alerts — manually create an alert
|
|
*/
|
|
|
|
// ── Auto-generate alerts from agent data ─────────────────────────────────────
|
|
|
|
function refresh_agent_alerts(): void {
|
|
// Thresholds
|
|
$CPU_WARN = 85;
|
|
$MEM_WARN = 85;
|
|
$DISK_WARN = 88;
|
|
$DISK_CRIT = 95;
|
|
|
|
// ── Mark auto-resolve alerts whose condition cleared ──────────────────────
|
|
// We'll re-evaluate below and upsert; first collect keys that are still active
|
|
$still_active = [];
|
|
|
|
// ── Offline agents ────────────────────────────────────────────────────────
|
|
$offline = JarvisDB::query(
|
|
"SELECT agent_id, hostname FROM registered_agents
|
|
WHERE status='offline' OR last_seen < DATE_SUB(NOW(), INTERVAL 3 MINUTE)"
|
|
);
|
|
foreach ($offline as $ag) {
|
|
$key = 'agent:' . $ag['agent_id'] . ':offline';
|
|
upsert_alert($key, 'critical', 'Agent Offline: ' . $ag['hostname'],
|
|
'JARVIS Agent on ' . $ag['hostname'] . ' is not responding. Last contact was more than 3 minutes ago.');
|
|
$still_active[$key] = true;
|
|
}
|
|
|
|
// ── Metric-based alerts ───────────────────────────────────────────────────
|
|
// Get latest system metrics for each agent
|
|
$latest = JarvisDB::query(
|
|
"SELECT m.agent_id, m.metric_data, a.hostname
|
|
FROM agent_metrics m
|
|
JOIN registered_agents a ON a.agent_id = m.agent_id
|
|
WHERE m.metric_type = 'system'
|
|
AND (m.agent_id, m.recorded_at) IN (
|
|
SELECT agent_id, MAX(recorded_at) FROM agent_metrics
|
|
WHERE metric_type = 'system'
|
|
GROUP BY agent_id
|
|
)
|
|
AND m.recorded_at > DATE_SUB(NOW(), INTERVAL 5 MINUTE)"
|
|
);
|
|
|
|
foreach ($latest as $row) {
|
|
$d = json_decode($row['metric_data'] ?? '{}', true);
|
|
$hn = $row['hostname'];
|
|
$id = $row['agent_id'];
|
|
|
|
// CPU
|
|
$cpu = (float)($d['cpu_percent'] ?? 0);
|
|
if ($cpu >= $CPU_WARN) {
|
|
$key = 'agent:' . $id . ':cpu_high';
|
|
$sev = $cpu >= 95 ? 'critical' : 'warning';
|
|
upsert_alert($key, $sev, 'High CPU: ' . $hn,
|
|
round($cpu, 1) . '% CPU utilization on ' . $hn . '. Sustained high load detected.');
|
|
$still_active[$key] = true;
|
|
}
|
|
|
|
// Memory
|
|
$mem_pct = (float)($d['memory']['percent'] ?? 0);
|
|
if ($mem_pct >= $MEM_WARN) {
|
|
$key = 'agent:' . $id . ':mem_high';
|
|
$sev = $mem_pct >= 95 ? 'critical' : 'warning';
|
|
upsert_alert($key, $sev, 'High Memory: ' . $hn,
|
|
round($mem_pct, 1) . '% memory used on ' . $hn .
|
|
' (' . round($d['memory']['used_mb'] ?? 0) . '/' .
|
|
round($d['memory']['total_mb'] ?? 0) . ' MB).');
|
|
$still_active[$key] = true;
|
|
}
|
|
|
|
// Disk
|
|
foreach (($d['disk'] ?? []) as $disk) {
|
|
$pct = (int)($disk['percent'] ?? 0);
|
|
if ($pct >= $DISK_WARN) {
|
|
$mount = $disk['mount'] ?? '/';
|
|
$key = 'agent:' . $id . ':disk:' . str_replace('/', '_', $mount);
|
|
$sev = $pct >= $DISK_CRIT ? 'critical' : 'warning';
|
|
upsert_alert($key, $sev, 'Disk Full: ' . $hn . ' ' . $mount,
|
|
$mount . ' is ' . $pct . '% full on ' . $hn .
|
|
' (' . ($disk['used'] ?? '?') . ' of ' . ($disk['size'] ?? '?') . ' used).');
|
|
$still_active[$key] = true;
|
|
}
|
|
}
|
|
|
|
// Services down
|
|
foreach (($d['services'] ?? []) as $svc) {
|
|
if (($svc['status'] ?? '') === 'active') continue;
|
|
if (($svc['status'] ?? '') === 'unknown') continue; // not watched/installed
|
|
$svcName = $svc['service'] ?? '';
|
|
$key = 'agent:' . $id . ':svc:' . $svcName;
|
|
upsert_alert($key, 'warning', 'Service Down: ' . $svcName . ' on ' . $hn,
|
|
$svcName . ' is ' . ($svc['status'] ?? 'inactive') . ' on ' . $hn . '.');
|
|
$still_active[$key] = true;
|
|
}
|
|
}
|
|
|
|
// ── Auto-resolve alerts whose condition has cleared ────────────────────────
|
|
if (!empty($still_active)) {
|
|
$active_keys = array_keys($still_active);
|
|
// Get all auto-resolvable alerts that are unresolved
|
|
$open_auto = JarvisDB::query(
|
|
"SELECT id, source_key FROM alerts WHERE resolved=0 AND auto_resolve=1 AND source_key IS NOT NULL"
|
|
);
|
|
foreach ($open_auto as $row) {
|
|
if (!isset($still_active[$row['source_key']])) {
|
|
JarvisDB::query(
|
|
'UPDATE alerts SET resolved=1, resolved_at=NOW() WHERE id=?',
|
|
[$row['id']]
|
|
);
|
|
}
|
|
}
|
|
} else {
|
|
// Nothing active — resolve all auto alerts
|
|
JarvisDB::query(
|
|
"UPDATE alerts SET resolved=1, resolved_at=NOW()
|
|
WHERE resolved=0 AND auto_resolve=1"
|
|
);
|
|
}
|
|
}
|
|
|
|
function upsert_alert(string $key, string $sev, string $title, string $msg): void {
|
|
$existing = JarvisDB::query(
|
|
'SELECT id, severity FROM alerts WHERE source_key=? AND resolved=0 LIMIT 1',
|
|
[$key]
|
|
);
|
|
if ($existing) {
|
|
// Update severity/message if changed (e.g., warning → critical)
|
|
if ($existing[0]['severity'] !== $sev) {
|
|
JarvisDB::query(
|
|
'UPDATE alerts SET severity=?, title=?, message=?, created_at=NOW() WHERE id=?',
|
|
[$sev, $title, $msg, $existing[0]['id']]
|
|
);
|
|
}
|
|
} else {
|
|
JarvisDB::query(
|
|
'INSERT INTO alerts (alert_type, title, message, severity, source_key, auto_resolve) VALUES (?,?,?,?,?,1)',
|
|
['agent', $title, $msg, $sev, $key]
|
|
);
|
|
}
|
|
}
|
|
|
|
// ── Route ─────────────────────────────────────────────────────────────────────
|
|
|
|
if ($method === 'GET') {
|
|
// Rate-limit agent alert refresh to once per 60 seconds via kb_facts lock
|
|
$last_refresh = JarvisDB::query("SELECT fact_value FROM kb_facts WHERE category='agent' AND fact_key='alert_refresh' LIMIT 1");
|
|
$last_ts = !empty($last_refresh) ? (int)$last_refresh[0]['fact_value'] : 0;
|
|
if (time() - $last_ts >= 60) {
|
|
JarvisDB::query(
|
|
"INSERT INTO kb_facts (category, fact_key, fact_value, host) VALUES ('agent', 'alert_refresh', ?, 'local')
|
|
ON DUPLICATE KEY UPDATE fact_value=VALUES(fact_value), updated_at=NOW()",
|
|
[time()]
|
|
);
|
|
refresh_agent_alerts();
|
|
}
|
|
$alerts = JarvisDB::query(
|
|
'SELECT * FROM alerts WHERE resolved=0 ORDER BY severity DESC, created_at DESC LIMIT 30'
|
|
);
|
|
echo json_encode(['alerts' => $alerts ?: [], 'count' => count($alerts ?: [])]);
|
|
|
|
} elseif ($method === 'POST' && ($action === 'resolve' || ($data['action'] ?? '') === 'resolve')) {
|
|
$id = (int)($data['id'] ?? 0);
|
|
JarvisDB::query('UPDATE alerts SET resolved=1, resolved_at=NOW() WHERE id=?', [$id]);
|
|
echo json_encode(['success' => true]);
|
|
|
|
} elseif ($method === 'POST') {
|
|
JarvisDB::query(
|
|
'INSERT INTO alerts (alert_type, title, message, severity) VALUES (?,?,?,?)',
|
|
[$data['type'] ?? 'system', $data['title'] ?? 'Alert', $data['message'] ?? '', $data['severity'] ?? 'info']
|
|
);
|
|
echo json_encode(['success' => true]);
|
|
}
|