[orbis] Weekly backup 2026-06-09 — 52 files changed, 2700 insertions(+)

This commit is contained in:
DO Server Backup
2026-06-09 03:53:55 +00:00
parent 5b1f83b1ea
commit 34e2485b9a
52 changed files with 2700 additions and 0 deletions
+17
View File
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
PAT="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"
REPO_URL="https://${PAT}@github.com/myronblair/do-server-config.git"
REPO_DIR="/opt/do-server-config"
if [[ -d "$REPO_DIR/.git" ]]; then
cd "$REPO_DIR"
git config user.email "backup@orbishosting.com"
git config user.name "DO Server Backup"
git pull --rebase origin main -q 2>/dev/null || true
else
git clone "$REPO_URL" "$REPO_DIR"
cd "$REPO_DIR"
git config user.email "backup@orbishosting.com"
git config user.name "DO Server Backup"
fi
exec bash "$REPO_DIR/backup.sh"
+477
View File
@@ -0,0 +1,477 @@
#!/usr/bin/env python3
"""
JARVIS Agent — lightweight system monitor for Linux machines.
Registers with JARVIS, reports metrics, and executes commands.
Install: sudo bash /opt/jarvis-agent/install.sh
Config: /etc/jarvis-agent/config.json
Logs: journalctl -u jarvis-agent -f
"""
import json
import os
import platform
import socket
import subprocess
import sys
import time
import urllib.request
import urllib.error
import uuid
from datetime import datetime
from pathlib import Path
CONFIG_PATH = "/etc/jarvis-agent/config.json"
STATE_PATH = "/var/lib/jarvis-agent/state.json"
AGENT_VERSION = "2.3" # bumped on each release
# ── Config helpers ────────────────────────────────────────────────────────────
def load_config() -> dict:
if not os.path.exists(CONFIG_PATH):
print(f"[ERROR] Config not found at {CONFIG_PATH}. Run the installer first.", flush=True)
sys.exit(1)
with open(CONFIG_PATH) as f:
return json.load(f)
def load_state() -> dict:
if os.path.exists(STATE_PATH):
with open(STATE_PATH) as f:
return json.load(f)
return {}
def save_state(state: dict):
Path(STATE_PATH).parent.mkdir(parents=True, exist_ok=True)
with open(STATE_PATH, "w") as f:
json.dump(state, f, indent=2)
# ── HTTP helpers ──────────────────────────────────────────────────────────────
import ssl as _ssl
def _make_ssl_ctx(verify: bool) -> _ssl.SSLContext | None:
if not verify:
ctx = _ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = _ssl.CERT_NONE
return ctx
return None
_host_header: str = "" # set from config at startup
def api_post(url: str, payload: dict, headers: dict = {}, timeout: int = 15,
ssl_verify: bool = True) -> dict:
body = json.dumps(payload).encode()
req = urllib.request.Request(url, data=body, method="POST")
req.add_header("Content-Type", "application/json")
req.add_header("User-Agent", "JARVIS-Agent/1.0")
if _host_header:
req.add_header("Host", _host_header)
for k, v in headers.items():
req.add_header(k, v)
try:
ctx = _make_ssl_ctx(ssl_verify)
with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp:
return json.loads(resp.read().decode())
except urllib.error.HTTPError as e:
return {"error": f"HTTP {e.code}: {e.read().decode()[:200]}"}
except Exception as e:
return {"error": str(e)}
def api_get(url: str, headers: dict = {}, timeout: int = 10,
ssl_verify: bool = True) -> dict:
req = urllib.request.Request(url)
req.add_header("User-Agent", "JARVIS-Agent/1.0")
if _host_header:
req.add_header("Host", _host_header)
for k, v in headers.items():
req.add_header(k, v)
try:
ctx = _make_ssl_ctx(ssl_verify)
with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp:
return json.loads(resp.read().decode())
except Exception as e:
return {"error": str(e)}
# ── Registration ──────────────────────────────────────────────────────────────
def get_local_ip() -> str:
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
ip = s.getsockname()[0]
s.close()
return ip
except Exception:
return "unknown"
def detect_capabilities(cfg: dict) -> list:
caps = ["metrics", "commands"]
# Check for Proxmox
if os.path.exists("/usr/bin/pvesh") or os.path.exists("/usr/sbin/pveversion"):
caps.append("proxmox")
# Check for Docker
if os.path.exists("/usr/bin/docker") or os.path.exists("/usr/local/bin/docker"):
caps.append("docker")
# Check for Ollama
if os.path.exists("/usr/local/bin/ollama") or os.path.exists("/usr/bin/ollama"):
caps.append("ollama")
# Check for Home Assistant
if os.path.exists("/etc/homeassistant") or os.path.exists("/config/configuration.yaml"):
caps.append("homeassistant")
return caps
def register(cfg: dict, state: dict) -> str:
"""Register with JARVIS. Returns api_key."""
hostname = cfg.get("hostname", socket.gethostname())
agent_type = cfg.get("agent_type", "linux")
ip = get_local_ip()
capabilities = detect_capabilities(cfg)
agent_id = cfg.get("agent_id", f"{hostname}_{socket.gethostname()[:8]}")
ssl_verify = bool(cfg.get("ssl_verify", True))
print(f"[JARVIS] Registering as '{agent_id}' ({agent_type}) from {ip}...", flush=True)
result = api_post(
f"{cfg['jarvis_url']}/api/agent/register",
{
"hostname": hostname,
"agent_type": agent_type,
"ip_address": ip,
"capabilities": capabilities,
"agent_id": agent_id,
},
headers={"X-Registration-Key": cfg["registration_key"]},
ssl_verify=ssl_verify,
)
if "error" in result:
print(f"[ERROR] Registration failed: {result['error']}", flush=True)
return ""
api_key = result.get("api_key", "")
if api_key:
state["api_key"] = api_key
state["agent_id"] = result.get("agent_id", agent_id)
save_state(state)
print(f"[JARVIS] Registered. agent_id={state['agent_id']}", flush=True)
return api_key
# ── Metrics collection ────────────────────────────────────────────────────────
def read_cpu_percent() -> float:
try:
with open("/proc/stat") as f:
line = f.readline()
fields = list(map(int, line.split()[1:]))
idle = fields[3]
total = sum(fields)
return round((1 - idle / total) * 100, 1) if total else 0.0
except Exception:
return 0.0
_last_cpu = None
def get_cpu_percent() -> float:
global _last_cpu
try:
with open("/proc/stat") as f:
line = f.readline()
fields = list(map(int, line.split()[1:]))
idle = fields[3] + fields[4] # idle + iowait
total = sum(fields)
if _last_cpu:
d_idle = idle - _last_cpu[0]
d_total = total - _last_cpu[1]
result = round((1 - d_idle / d_total) * 100, 1) if d_total else 0.0
else:
result = 0.0
_last_cpu = (idle, total)
return result
except Exception:
return 0.0
def get_memory() -> dict:
mem = {}
try:
with open("/proc/meminfo") as f:
for line in f:
parts = line.split()
if parts[0] in ("MemTotal:", "MemAvailable:", "MemFree:", "Buffers:", "Cached:"):
mem[parts[0].rstrip(":")] = int(parts[1])
total = mem.get("MemTotal", 0)
available = mem.get("MemAvailable", 0)
used = total - available
return {
"total_mb": round(total / 1024, 1),
"used_mb": round(used / 1024, 1),
"free_mb": round(available / 1024, 1),
"percent": round(used / total * 100, 1) if total else 0,
}
except Exception:
return {}
def get_disk() -> list:
disks = []
try:
result = subprocess.run(["df", "-h", "--output=source,fstype,size,used,avail,pcent,target"],
capture_output=True, text=True, timeout=5)
lines = result.stdout.strip().split("\n")[1:]
for line in lines:
parts = line.split()
if len(parts) >= 7:
mount = parts[6]
if not any(mount.startswith(x) for x in ["/sys", "/proc", "/dev/pts", "/run", "/snap"]):
disks.append({
"mount": mount,
"size": parts[2],
"used": parts[3],
"avail": parts[4],
"percent": parts[5].rstrip("%"),
})
except Exception:
pass
return disks
def get_uptime() -> dict:
try:
with open("/proc/uptime") as f:
secs = float(f.read().split()[0])
days = int(secs // 86400)
hours = int((secs % 86400) // 3600)
minutes = int((secs % 3600) // 60)
return {"seconds": int(secs), "days": days, "hours": hours, "minutes": minutes,
"human": f"{days}d {hours}h {minutes}m"}
except Exception:
return {}
def get_services(cfg: dict) -> list:
watch = cfg.get("watch_services", ["ollama", "homeassistant", "mysql", "nginx", "apache2"])
statuses = []
for svc in watch:
try:
r = subprocess.run(["systemctl", "is-active", svc], capture_output=True, text=True, timeout=3)
statuses.append({"service": svc, "status": r.stdout.strip()})
except Exception:
statuses.append({"service": svc, "status": "unknown"})
return statuses
def get_load() -> list:
try:
with open("/proc/loadavg") as f:
parts = f.read().split()
return [float(parts[0]), float(parts[1]), float(parts[2])]
except Exception:
return [0, 0, 0]
def collect_metrics(cfg: dict) -> dict:
# First reading for CPU delta
get_cpu_percent()
time.sleep(1)
return {
"hostname": cfg.get("hostname", socket.gethostname()),
"cpu_percent": get_cpu_percent(),
"memory": get_memory(),
"disk": get_disk(),
"uptime": get_uptime(),
"load": get_load(),
"services": get_services(cfg),
"platform": platform.system(),
"timestamp": datetime.utcnow().isoformat() + "Z",
}
# ── Proxmox metrics ───────────────────────────────────────────────────────────
def collect_proxmox_metrics(cfg: dict) -> dict | None:
try:
result = subprocess.run(
["pvesh", "get", "/nodes/pve/status", "--output-format", "json"],
capture_output=True, text=True, timeout=10
)
node_status = json.loads(result.stdout)
vms_result = subprocess.run(
["pvesh", "get", "/nodes/pve/qemu", "--output-format", "json"],
capture_output=True, text=True, timeout=10
)
vms = json.loads(vms_result.stdout)
return {"node": node_status, "vms": vms}
except Exception as e:
return {"error": str(e)}
# ── Command execution ─────────────────────────────────────────────────────────
def execute_command(cmd: dict) -> dict:
cmd_type = cmd.get("command_type", "")
cmd_data = cmd.get("command_data", {})
try:
if cmd_type == "restart_service":
svc = cmd_data.get("service", "")
if not svc or "/" in svc:
return {"success": False, "error": "Invalid service name"}
r = subprocess.run(["systemctl", "restart", svc], capture_output=True, text=True, timeout=30)
return {"success": r.returncode == 0, "stdout": r.stdout, "stderr": r.stderr}
elif cmd_type == "get_logs":
svc = cmd_data.get("service", "")
lines = min(int(cmd_data.get("lines", 50)), 200)
if not svc or "/" in svc:
return {"success": False, "error": "Invalid service name"}
r = subprocess.run(["journalctl", "-u", svc, "-n", str(lines), "--no-pager"],
capture_output=True, text=True, timeout=15)
return {"success": True, "output": r.stdout}
elif cmd_type == "ping":
host = cmd_data.get("host", "8.8.8.8")
r = subprocess.run(["ping", "-c", "3", "-W", "2", host], capture_output=True, text=True, timeout=15)
return {"success": r.returncode == 0, "output": r.stdout}
elif cmd_type == "update":
updated = self_update(cfg)
return {"success": True, "updated": updated}
elif cmd_type == "shell":
# Guard reads LOCAL config, not the server-supplied payload
if not cfg.get("allow_shell_commands", False):
return {"success": False, "error": "Shell commands not enabled in agent config"}
cmd_str = cmd_data.get("command", "")
r = subprocess.run(cmd_str, shell=True, capture_output=True, text=True, timeout=30)
return {"success": True, "stdout": r.stdout[:2000], "stderr": r.stderr[:500]}
else:
return {"success": False, "error": f"Unknown command type: {cmd_type}"}
except subprocess.TimeoutExpired:
return {"success": False, "error": "Command timed out"}
except Exception as e:
return {"success": False, "error": str(e)}
# ── Main loop ─────────────────────────────────────────────────────────────────
def main():
global _host_header
cfg = load_config()
state = load_state()
jarvis_url = cfg["jarvis_url"].rstrip("/")
ssl_verify = bool(cfg.get("ssl_verify", True))
_host_header = cfg.get("host_header", "")
poll_interval = int(cfg.get("poll_interval", 30))
heartbeat_every = int(cfg.get("heartbeat_every", 10))
# Register if no API key yet — loop (not recurse) to avoid stack overflow
api_key = state.get("api_key", "")
while not api_key:
api_key = register(cfg, state)
if not api_key:
print("[ERROR] Could not register with JARVIS. Retrying in 60s...", flush=True)
time.sleep(60)
headers = {"X-Agent-Key": api_key}
last_metrics = 0
last_update_chk = 0
update_interval = int(cfg.get("update_check_hours", 24)) * 3600
tick = 0
print(f"[JARVIS] Agent v{AGENT_VERSION} running. Polling {jarvis_url} every {heartbeat_every}s.", flush=True)
while True:
tick += 1
now = time.time()
try:
# Heartbeat + get commands
hb = api_post(f"{jarvis_url}/api/agent/heartbeat", {}, headers, ssl_verify=ssl_verify)
if "error" in hb:
print(f"[WARN] Heartbeat failed: {hb['error']}", flush=True)
else:
commands = hb.get("commands", [])
for cmd in commands:
print(f"[CMD] Executing: {cmd['command_type']}", flush=True)
result = execute_command(cmd)
api_post(f"{jarvis_url}/api/agent/command_result",
{"command_id": cmd["id"], "success": result.get("success", False), "result": result},
headers, ssl_verify=ssl_verify)
# Self-update check (every update_interval seconds, default 24h)
if now - last_update_chk >= update_interval:
last_update_chk = now
self_update(cfg) # restarts process if update found
# Push metrics every poll_interval seconds
if now - last_metrics >= poll_interval:
metrics = collect_metrics(cfg)
api_post(f"{jarvis_url}/api/agent/metrics",
{"type": "system", "data": metrics}, headers, ssl_verify=ssl_verify)
# Proxmox metrics if available
if "proxmox" in detect_capabilities(cfg):
px = collect_proxmox_metrics(cfg)
if px:
api_post(f"{jarvis_url}/api/agent/metrics",
{"type": "proxmox", "data": px}, headers, ssl_verify=ssl_verify)
last_metrics = now
except Exception as e:
print(f"[ERROR] Loop error: {e}", flush=True)
time.sleep(heartbeat_every)
# ── Self-update ────────────────────────────────────────────────────────────────
def self_update(cfg: dict) -> bool:
"""Check JARVIS server for a newer version of this script.
Verifies SHA-256 hash from <update_url>.sha256 before replacing."""
import hashlib
jarvis_url = cfg.get("jarvis_url", "").rstrip("/")
default_update_url = f"{jarvis_url}/agent/jarvis-agent.py" if jarvis_url else ""
update_url = cfg.get("update_url", default_update_url)
if not update_url:
return False
script_path = os.path.abspath(__file__)
try:
# Download expected hash first
hash_url = update_url + ".sha256"
req_hash = urllib.request.Request(hash_url)
req_hash.add_header("User-Agent", "JARVIS-Agent/1.0")
if _host_header:
req_hash.add_header("Host", _host_header)
try:
with urllib.request.urlopen(req_hash, timeout=10) as resp:
expected_hash = resp.read().decode().strip().split()[0]
except Exception:
expected_hash = None
# Download new script
req = urllib.request.Request(update_url)
req.add_header("User-Agent", "JARVIS-Agent/1.0")
if _host_header:
req.add_header("Host", _host_header)
with urllib.request.urlopen(req, timeout=30) as resp:
new_content = resp.read()
# Verify hash if available — abort if mismatch
if expected_hash:
actual_hash = hashlib.sha256(new_content).hexdigest()
if actual_hash != expected_hash:
print(f"[JARVIS] Update hash mismatch (expected {expected_hash[:16]}… got {actual_hash[:16]}…) — aborting", flush=True)
return False
with open(script_path, "rb") as f:
current = f.read()
if new_content != current:
print(f"[JARVIS] Update verified — replacing {script_path} and restarting...", flush=True)
with open(script_path, "wb") as f:
f.write(new_content)
os.execv(sys.executable, [sys.executable] + sys.argv)
return True
return False
except Exception as e:
print(f"[JARVIS] Self-update check failed: {e}", flush=True)
return False
if __name__ == "__main__":
main()
+97
View File
@@ -0,0 +1,97 @@
#!/bin/bash
# JARVIS Full Backup — websites + databases
# Runs daily via cron, also triggered on-demand from admin portal
BACKUP_DIR="/var/backups/jarvis"
LOCK_FILE="$BACKUP_DIR/backup.lock"
LOG_FILE="$BACKUP_DIR/backup.log"
MANIFEST="$BACKUP_DIR/manifest.json"
KEEP_DAYS=7
DB_USER="root"
DB_PASS="b71e5c1a8c7457541b9c1db822de37adfa271926a38b6c20"
DATABASES="jarvis_db toms_tjj_db tomt_ttg_db epic_parkersling epic_epic_db parker_db"
SITES=(
"/home/epictravelexpeditions.com/public_html"
"/home/epictravelexpeditions.com/parkerslingshot"
"/home/jarvis.orbishosting.com/public_html"
"/home/orbishosting.com/public_html"
"/home/orbis.orbishosting.com/public_html"
"/home/parkerslingshotrentals.com/public_html"
"/home/tomsjavajive.com/public_html"
"/home/tomtomgames.com/public_html"
)
mkdir -p "$BACKUP_DIR"
# Prevent concurrent runs
if [ -f "$LOCK_FILE" ]; then
AGE=$(( $(date +%s) - $(stat -c %Y "$LOCK_FILE") ))
if [ "$AGE" -lt 3600 ]; then
echo "$(date): Backup already running (lock age ${AGE}s)" >> "$LOG_FILE"
exit 1
fi
rm -f "$LOCK_FILE"
fi
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
WORK_DIR=$(mktemp -d)
BACKUP_NAME="jarvis_backup_${TIMESTAMP}"
FINAL_FILE="$BACKUP_DIR/${BACKUP_NAME}.tar.gz"
echo "$(date): Starting backup $TIMESTAMP" > "$LOG_FILE"
echo "$TIMESTAMP" > "$LOCK_FILE"
trap "rm -f '$LOCK_FILE'; rm -rf '$WORK_DIR'" EXIT
mkdir -p "$WORK_DIR/sql" "$WORK_DIR/sites"
# ── SQL Dumps ────────────────────────────────────────────────────────────────
echo "$(date): Dumping databases..." >> "$LOG_FILE"
for DB in $DATABASES; do
mysqldump -u"$DB_USER" -p"$DB_PASS" --single-transaction --quick \
"$DB" 2>/dev/null > "$WORK_DIR/sql/${DB}.sql"
if [ $? -eq 0 ]; then
SIZE=$(du -sh "$WORK_DIR/sql/${DB}.sql" | cut -f1)
echo "$(date): $DB$SIZE" >> "$LOG_FILE"
else
echo "$(date): $DB — FAILED (may not exist)" >> "$LOG_FILE"
rm -f "$WORK_DIR/sql/${DB}.sql"
fi
done
# ── Website Files ────────────────────────────────────────────────────────────
echo "$(date): Archiving website files..." >> "$LOG_FILE"
for SITE in "${SITES[@]}"; do
if [ -d "$SITE" ]; then
SLUG=$(echo "$SITE" | sed 's|/home/||;s|/|_|g')
tar -czf "$WORK_DIR/sites/${SLUG}.tar.gz" -C "$(dirname "$SITE")" "$(basename "$SITE")" 2>/dev/null
SIZE=$(du -sh "$WORK_DIR/sites/${SLUG}.tar.gz" | cut -f1)
echo "$(date): $SITE$SIZE" >> "$LOG_FILE"
fi
done
# ── Final Archive ────────────────────────────────────────────────────────────
echo "$(date): Creating final archive..." >> "$LOG_FILE"
tar -czf "$FINAL_FILE" -C "$WORK_DIR" sql sites 2>/dev/null
FINAL_SIZE=$(du -sh "$FINAL_FILE" | cut -f1)
FINAL_BYTES=$(stat -c %s "$FINAL_FILE")
echo "$(date): Done — $FINAL_FILE ($FINAL_SIZE)" >> "$LOG_FILE"
# ── Update Manifest ───────────────────────────────────────────────────────────
BACKUPS_JSON="["
FIRST=1
for F in $(ls -t "$BACKUP_DIR"/jarvis_backup_*.tar.gz 2>/dev/null); do
FNAME=$(basename "$F")
FSIZE=$(stat -c %s "$F")
FDATE=$(stat -c %y "$F" | cut -d. -f1)
[ $FIRST -eq 0 ] && BACKUPS_JSON+=","
BACKUPS_JSON+="{\"file\":\"$FNAME\",\"size\":$FSIZE,\"date\":\"$FDATE\"}"
FIRST=0
done
BACKUPS_JSON+="]"
echo "{\"backups\":$BACKUPS_JSON,\"updated\":\"$(date -Iseconds)\"}" > "$MANIFEST"
# ── Cleanup old backups ───────────────────────────────────────────────────────
find "$BACKUP_DIR" -name "jarvis_backup_*.tar.gz" -mtime +$KEEP_DAYS -delete
echo "$(date): Backup complete. Kept last $KEEP_DAYS days." >> "$LOG_FILE"
+84
View File
@@ -0,0 +1,84 @@
#!/bin/bash
# JARVIS Auto-Deploy Runner — processes GitHub webhook queue every minute.
# Validates PHP syntax before deploying; auto-reverts on bad code.
# Restarts OLS after JARVIS deploys to pick up PHP changes.
QUEUE=/tmp/jarvis-deploy-queue.txt
LOG=/home/jarvis.orbishosting.com/logs/deploy.log
PHP=/usr/bin/php8.3
TS() { date '+%Y-%m-%d %H:%M:%S'; }
log() { echo "[$(TS)] $1" >> "$LOG"; }
[ ! -f "$QUEUE" ] && exit 0
[ ! -s "$QUEUE" ] && exit 0
# Atomically take ownership of the queue via rename — prevents TOCTOU loss of
# entries written between a cat and truncate
PROCESSING="${QUEUE}.processing"
mv "$QUEUE" "$PROCESSING" 2>/dev/null || exit 0
SNAPSHOT=$(cat "$PROCESSING")
rm -f "$PROCESSING"
while IFS= read -r path; do
[ -z "$path" ] && continue
[ ! -d "$path/.git" ] && log "SKIP $path — not a git repo" && continue
log "Deploying $path"
cd "$path" || continue
BEFORE=$(git rev-parse HEAD 2>/dev/null)
git fetch origin main >> "$LOG" 2>&1
REMOTE=$(git rev-parse origin/main 2>/dev/null)
if [ "$BEFORE" = "$REMOTE" ]; then
log "Already up to date: $path"
continue
fi
git pull origin main >> "$LOG" 2>&1
AFTER=$(git rev-parse HEAD 2>/dev/null)
CHANGED=$(git diff --name-only "$BEFORE" "$AFTER" 2>/dev/null)
# PHP syntax validation — check every changed .php file
SYNTAX_OK=true
BAD_FILE=""
while IFS= read -r f; do
[[ "$f" != *.php ]] && continue
[ ! -f "$f" ] && continue
if ! $PHP -l "$f" > /dev/null 2>&1; then
SYNTAX_OK=false
BAD_FILE="$f"
break
fi
done <<< "$CHANGED"
if [ "$SYNTAX_OK" = false ]; then
log "SYNTAX ERROR in $BAD_FILE — reverting locally and pushing revert to GitHub"
git reset --hard "$BEFORE" >> "$LOG" 2>&1
# Push the revert so GitHub matches the live server — prevents infinite re-deploy loop
git push --force origin HEAD:main >> "$LOG" 2>&1
PUSH_EXIT=$?
if [ $PUSH_EXIT -ne 0 ]; then
log "WARNING: Force-push of revert failed (exit $PUSH_EXIT) — bad commit still on GitHub"
fi
# Insert alert into JARVIS DB
BAD_ESCAPED=$(printf '%s' "$BAD_FILE" | sed "s/'/\\\\\\'/g")
mysql -u jarvis_user -pJ4rv1s_Pr0t0c0l_2026! jarvis_db -se \
"INSERT INTO alerts (alert_type,title,message,severity)
VALUES ('deploy_fail','Deploy reverted: syntax error',
'PHP syntax error in $BAD_ESCAPED. Commit $AFTER was reverted and force-pushed to GitHub.','critical');" 2>/dev/null
log "Reverted. Bad commit: $AFTER"
continue
fi
log "Deploy OK ($BEFORE -> $AFTER): $path"
log "Changed: $(echo "$CHANGED" | tr '\n' ' ')"
# Restart OLS after any JARVIS deploy to pick up PHP changes
if [[ "$path" == *"jarvis"* ]]; then
systemctl reload lsws 2>/dev/null || systemctl restart lsws 2>/dev/null
log "OLS reloaded for JARVIS deploy"
fi
done <<< "$SNAPSHOT"
+118
View File
@@ -0,0 +1,118 @@
#!/bin/bash
# JARVIS Self-Healing Watchdog — runs every 5 min via root cron
# Checks: lsws, mysql, redis, JARVIS HTTP, disk, memory
# Auto-heals: restarts failed services, restarts offline Proxmox VM agents
# Logs to: /home/jarvis.orbishosting.com/logs/watchdog.log
LOG=/home/jarvis.orbishosting.com/logs/watchdog.log
MYSQL="mysql -u jarvis_user -pJ4rv1s_Pr0t0c0l_2026! jarvis_db -se"
TS() { date '+%Y-%m-%d %H:%M:%S'; }
log() { echo "[$(TS)] $1" >> "$LOG"; }
# Escape single quotes for MySQL string interpolation in bash
sql_esc() { printf '%s' "$1" | sed "s/'/\\\\''/g"; }
alert() {
local type="$1" title="$2" msg="$3" sev="${4:-warning}"
local e_type e_title e_msg e_sev
e_type=$(sql_esc "$type"); e_title=$(sql_esc "$title")
e_msg=$(sql_esc "$msg"); e_sev=$(sql_esc "$sev")
$MYSQL "INSERT IGNORE INTO alerts (alert_type,title,message,severity,source_key,auto_resolve)
VALUES ('$e_type','$e_title','$e_msg','$e_sev','watchdog:$e_type',1);" 2>/dev/null
}
resolve() {
local e_key
e_key=$(sql_esc "$1")
$MYSQL "UPDATE alerts SET resolved=1,resolved_at=NOW()
WHERE source_key='watchdog:$e_key' AND resolved=0;" 2>/dev/null
}
# ── Service health ─────────────────────────────────────────────────────────────
for SVC in lsws mysql redis; do
if ! systemctl is-active --quiet "$SVC"; then
log "HEAL: $SVC is down — restarting"
systemctl restart "$SVC"
if systemctl is-active --quiet "$SVC"; then
log "HEAL: $SVC restarted successfully"
alert "service_down" "$SVC restarted" "JARVIS watchdog restarted $SVC which was stopped." "warning"
else
log "ERROR: $SVC failed to restart"
alert "service_down" "$SVC failed to restart" "$SVC is down and could not be restarted automatically." "critical"
fi
else
resolve "service_down_$SVC"
fi
done
# ── JARVIS HTTP self-check ─────────────────────────────────────────────────────
HTTP_CODE=$(curl -sk -o /dev/null -w "%{http_code}" --max-time 10 https://jarvis.orbishosting.com/api.php 2>/dev/null)
if [[ "$HTTP_CODE" == "5"* ]] || [[ -z "$HTTP_CODE" ]]; then
log "HEAL: JARVIS HTTP returned $HTTP_CODE — restarting lsws"
systemctl restart lsws
alert "jarvis_http" "JARVIS HTTP error — restarted OLS" "JARVIS returned HTTP $HTTP_CODE. OpenLiteSpeed was restarted." "critical"
else
resolve "jarvis_http"
fi
# ── Disk usage ─────────────────────────────────────────────────────────────────
DISK_PCT=$(df / | awk 'NR==2{print $5}' | tr -d '%')
if [ "$DISK_PCT" -ge 90 ]; then
log "ALERT: Disk at ${DISK_PCT}% (critical)"
alert "disk_critical" "Disk ${DISK_PCT}% full on DO server" "Root filesystem is ${DISK_PCT}% full. Immediate cleanup required." "critical"
elif [ "$DISK_PCT" -ge 80 ]; then
log "WARN: Disk at ${DISK_PCT}%"
alert "disk_warning" "Disk ${DISK_PCT}% full on DO server" "Root filesystem is ${DISK_PCT}% full." "warning"
else
$MYSQL "UPDATE alerts SET resolved=1,resolved_at=NOW() WHERE source_key IN ('watchdog:disk_critical','watchdog:disk_warning') AND resolved=0;" 2>/dev/null
fi
# ── Memory usage ──────────────────────────────────────────────────────────────
MEM_TOTAL=$(grep MemTotal /proc/meminfo | awk '{print $2}')
MEM_AVAIL=$(grep MemAvailable /proc/meminfo | awk '{print $2}')
MEM_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
if [ "$MEM_PCT" -ge 90 ]; then
log "ALERT: Memory at ${MEM_PCT}%"
alert "mem_critical" "Memory ${MEM_PCT}% used on DO server" "DO server memory is ${MEM_PCT}% used." "critical"
fi
# ── Offline agent auto-restart (Proxmox VMs only) ─────────────────────────────
# Map: agent_id → [proxmox_ip, vmid]
declare -A AGENT_PVE=(
["ollama_vm"]="orbisne.fortiddns.com 210"
["ha_vm"]="orbisne.fortiddns.com 101"
["networkbackup_vm"]="10.48.200.91 302"
)
OFFLINE=$($MYSQL "SELECT agent_id FROM registered_agents
WHERE status='offline' AND last_seen < DATE_SUB(NOW(), INTERVAL 5 MINUTE)
AND agent_type='linux';" 2>/dev/null)
for AID in $OFFLINE; do
# Check if we have a Proxmox mapping for this agent
for KEY in "${!AGENT_PVE[@]}"; do
if [[ "$AID" == *"$KEY"* ]] || [[ "$KEY" == *"$AID"* ]]; then
PVE_INFO=(${AGENT_PVE[$KEY]})
PVE_IP="${PVE_INFO[0]}"
VMID="${PVE_INFO[1]}"
log "HEAL: Attempting to restart jarvis-agent on $AID (VM $VMID @ $PVE_IP)"
sshpass -p 'Joker1974!!!' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 \
root@"$PVE_IP" \
"qm guest exec $VMID -- systemctl restart jarvis-agent" 2>/dev/null
log "HEAL: Restart command sent to $AID (exit: $?)"
alert "agent_offline" "Auto-restarted agent: $AID" \
"Agent $AID was offline. JARVIS watchdog sent restart command via Proxmox." "warning"
break
fi
done
done
# ── Deploy log rotation (keep last 1000 lines) ────────────────────────────────
for LOGFILE in "$LOG" /home/jarvis.orbishosting.com/logs/deploy.log /home/jarvis.orbishosting.com/logs/cron.log; do
[ -f "$LOGFILE" ] || continue
LINES=$(wc -l < "$LOGFILE")
if [ "$LINES" -gt 1000 ]; then
tail -500 "$LOGFILE" > "${LOGFILE}.tmp" && mv "${LOGFILE}.tmp" "$LOGFILE"
fi
done
+35
View File
@@ -0,0 +1,35 @@
#!/bin/bash
BACKUP_DIR="/home/tomtomgames.com/backups"
SITE_DIR="/home/tomtomgames.com/public_html"
DB_NAME="tomt_ttg_db"
DB_USER="tomt_ttg_user"
DB_PASS='q#q+mrOcozsa7I6J'
DATE=$(date +%Y-%m-%d_%H-%M-%S)
SQL_FILE="/tmp/ttg_db_${DATE}.sql"
ZIP_FILE="${BACKUP_DIR}/ttg_backup_${DATE}.zip"
mkdir -p "$BACKUP_DIR"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting backup..."
/usr/bin/mysqldump -u "$DB_USER" "-p${DB_PASS}" "$DB_NAME" > "$SQL_FILE" 2>&1
if [ $? -ne 0 ] || [ ! -s "$SQL_FILE" ]; then
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: Database export failed"
rm -f "$SQL_FILE"; exit 1
fi
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Database exported ($(du -sh "$SQL_FILE" | cut -f1))"
/usr/bin/zip -r "$ZIP_FILE" "$SITE_DIR" "$SQL_FILE" -x "*/backups/*" > /dev/null 2>&1
RC=$?; rm -f "$SQL_FILE"
if [ $RC -ne 0 ] || [ ! -f "$ZIP_FILE" ]; then
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: Archive creation failed"
rm -f "$ZIP_FILE"; exit 1
fi
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Archive created: $(basename "$ZIP_FILE") ($(du -sh "$ZIP_FILE" | cut -f1))"
ls -t "${BACKUP_DIR}"/ttg_backup_*.zip 2>/dev/null | tail -n +8 | while read old; do
rm -f "$old"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Pruned: $(basename "$old")"
done
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Backup complete."