diff --git a/llamacpp-monitor/config/service.env b/llamacpp-monitor/config/service.env
new file mode 100644
index 0000000..98f8323
--- /dev/null
+++ b/llamacpp-monitor/config/service.env
@@ -0,0 +1,11 @@
+# Service settings specific to this server
+# (can also override anything in the template_info.env file)
+
+# URL of the llama.cpp server to monitor
+LLAMA_SERVER_URL="http://localhost:8080"
+
+# Port to serve the monitor dashboard on
+MONITOR_PORT=80
+
+# Server Settings
+SSH_USER="root"
diff --git a/llamacpp-monitor/install.sh b/llamacpp-monitor/install.sh
new file mode 100755
index 0000000..06ccb07
--- /dev/null
+++ b/llamacpp-monitor/install.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+source "${AGENT_PATH}/common.sh"
+_check_required_env_vars "CONTAINER_NAME" "IMAGE_REGISTRY" "IMAGE_REPO" "IMAGE_TAG"
+
+echo "Pulling image ${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG}..."
+docker pull "$IMAGE_REGISTRY/$IMAGE_REPO:$IMAGE_TAG" || _die "Failed to pull image $IMAGE_REGISTRY/$IMAGE_REPO:$IMAGE_TAG"
+
+echo "Stopping and removing any existing container..."
+_stop_container $CONTAINER_NAME
+_remove_container $CONTAINER_NAME || _die "Failed to remove container ${CONTAINER_NAME}"
+
+echo "Starting container..."
+bash ./start.sh || _die "Failed to start container ${CONTAINER_NAME}"
+
+echo "Installation complete for service ${CONTAINER_NAME}."
+echo "Dashboard available at http://${HOST_NAME}:${MONITOR_PORT}"
diff --git a/llamacpp-monitor/logs.sh b/llamacpp-monitor/logs.sh
new file mode 100755
index 0000000..2c31a20
--- /dev/null
+++ b/llamacpp-monitor/logs.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+source "${AGENT_PATH}/common.sh"
+_check_required_env_vars "CONTAINER_NAME"
+
+echo "Showing logs for ${CONTAINER_NAME}... (Press Ctrl+C to stop)"
+_grey_start
+_get_container_logs $CONTAINER_NAME
+_grey_end
diff --git a/llamacpp-monitor/monitor.py b/llamacpp-monitor/monitor.py
new file mode 100644
index 0000000..bb13b35
--- /dev/null
+++ b/llamacpp-monitor/monitor.py
@@ -0,0 +1,465 @@
+#!/usr/bin/env python3
+"""llama.cpp monitor - lightweight diagnostics dashboard."""
+
+import http.server
+import json
+import os
+import re
+import subprocess
+import time
+import urllib.error
+import urllib.request
+
+LLAMA_SERVER_URL = os.environ.get("LLAMA_SERVER_URL", "http://localhost:8080")
+MONITOR_PORT = int(os.environ.get("MONITOR_PORT", "80"))
+
+
+def get_gpu_stats():
+ try:
+ result = subprocess.run(
+ ["nvidia-smi",
+ "--query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu,power.draw,power.limit,fan.speed,name",
+ "--format=csv,noheader,nounits"],
+ capture_output=True, text=True, timeout=5
+ )
+ if result.returncode == 0:
+ parts = [p.strip() for p in result.stdout.strip().split(",")]
+ return {
+ "available": True,
+ "utilization": float(parts[0]),
+ "memory_used": float(parts[1]),
+ "memory_total": float(parts[2]),
+ "temperature": float(parts[3]),
+ "power_draw": float(parts[4]),
+ "power_limit": float(parts[5]),
+ "fan_speed": float(parts[6]),
+ "name": parts[7],
+ }
+ except Exception:
+ pass
+ return {"available": False}
+
+
+def fetch_json(path):
+ try:
+ req = urllib.request.urlopen(f"{LLAMA_SERVER_URL}{path}", timeout=3)
+ return json.loads(req.read())
+ except Exception:
+ return None
+
+
+def fetch_text(path):
+ try:
+ req = urllib.request.urlopen(f"{LLAMA_SERVER_URL}{path}", timeout=3)
+ return req.read().decode()
+ except Exception:
+ return None
+
+
+def parse_prometheus(text):
+ if not text:
+ return {}
+ metrics = {}
+ for line in text.strip().split("\n"):
+ if line.startswith("#"):
+ continue
+ m = re.match(r'^([\w:]+)(?:\{[^}]*\})?\s+([\d.eE+-]+)', line)
+ if m:
+ key, val = m.group(1), m.group(2)
+ try:
+ metrics[key] = float(val)
+ except ValueError:
+ pass
+ return metrics
+
+
+def get_all_stats():
+ gpu = get_gpu_stats()
+ health = fetch_json("/health")
+ slots = fetch_json("/slots")
+ model = fetch_json("/v1/models")
+ metrics = parse_prometheus(fetch_text("/metrics"))
+
+ return {
+ "timestamp": time.time(),
+ "gpu": gpu,
+ "health": health,
+ "slots": slots,
+ "model": model,
+ "metrics": metrics,
+ "llama_url": LLAMA_SERVER_URL,
+ }
+
+
+DASHBOARD_HTML = r"""
+
+
+
+
+llama.cpp Monitor
+
+
+
+
+
+ llama.cpp Monitor
+ ...
+
+
+
+
+
+
+
+
+
+
+
GPU Memory
+
+
+ Used
+ --MiB
+ / --MiB
+
+
+
+
+
+
+
+
+
GPU Vitals
+
+
+ Temperature
+ --°C
+
+
+
+ Power
+ --W
+ / --W
+
+
+ Fan
+ --%
+
+
+ GPU
+ --
+
+
+
+
+
+
+
Model
+
+
--Model
+
--Parameters
+
--Size
+
--Context (train)
+
--GPU Layers
+
+
+
+
+
+
+
+
+
+
+
+
+
+"""
+
+
+class Handler(http.server.BaseHTTPRequestHandler):
+ def do_GET(self):
+ if self.path == "/" or self.path == "/index.html":
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+ self.wfile.write(DASHBOARD_HTML.encode())
+ elif self.path == "/api/stats":
+ stats = get_all_stats()
+ self.send_response(200)
+ self.send_header("Content-Type", "application/json")
+ self.send_header("Cache-Control", "no-cache")
+ self.end_headers()
+ self.wfile.write(json.dumps(stats).encode())
+ elif self.path == "/health":
+ self.send_response(200)
+ self.send_header("Content-Type", "application/json")
+ self.end_headers()
+ self.wfile.write(b'{"status":"ok"}')
+ else:
+ self.send_response(404)
+ self.end_headers()
+
+ def log_message(self, fmt, *args):
+ pass # suppress request logging
+
+
+def main():
+ server = http.server.HTTPServer(("0.0.0.0", MONITOR_PORT), Handler)
+ print(f"llama.cpp monitor listening on port {MONITOR_PORT}")
+ print(f" llama server: {LLAMA_SERVER_URL}")
+ try:
+ server.serve_forever()
+ except KeyboardInterrupt:
+ pass
+ server.server_close()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/llamacpp-monitor/ports.sh b/llamacpp-monitor/ports.sh
new file mode 100755
index 0000000..1ee2dcc
--- /dev/null
+++ b/llamacpp-monitor/ports.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+source "${AGENT_PATH}/common.sh"
+_check_required_env_vars "MONITOR_PORT"
+
+echo $MONITOR_PORT
diff --git a/llamacpp-monitor/start.sh b/llamacpp-monitor/start.sh
new file mode 100755
index 0000000..bc0c389
--- /dev/null
+++ b/llamacpp-monitor/start.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+source "${AGENT_PATH}/common.sh"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+_check_required_env_vars "CONTAINER_NAME" "LLAMA_SERVER_URL" "MONITOR_PORT" "IMAGE_REGISTRY" "IMAGE_REPO" "IMAGE_TAG"
+
+DOCKER_RUN_CMD="docker run -d \
+ --restart unless-stopped \
+ --name ${CONTAINER_NAME} \
+ -p ${MONITOR_PORT}:${MONITOR_PORT} \
+ -e LLAMA_SERVER_URL=${LLAMA_SERVER_URL} \
+ -e MONITOR_PORT=${MONITOR_PORT} \
+ -v ${SCRIPT_DIR}/monitor.py:/app/monitor.py:ro \
+ ${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG} \
+ python3 /app/monitor.py"
+
+echo "Starting container ${CONTAINER_NAME}..."
+
+if ! _create_and_start_container "$DOCKER_RUN_CMD" "$CONTAINER_NAME"; then
+ if _is_container_exists $CONTAINER_NAME; then
+ echo "Attempting to get logs from failed container..."
+ _get_container_logs $CONTAINER_NAME
+ fi
+ _die "Failed to start container ${CONTAINER_NAME}"
+fi
+
+if ! _is_container_running "$CONTAINER_NAME"; then
+ _get_container_logs $CONTAINER_NAME
+ _die "Container ${CONTAINER_NAME} is not running after start attempt"
+fi
+
+echo "Service ${CONTAINER_NAME} started on port ${MONITOR_PORT}."
diff --git a/llamacpp-monitor/status.sh b/llamacpp-monitor/status.sh
new file mode 100755
index 0000000..fedafa7
--- /dev/null
+++ b/llamacpp-monitor/status.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+source "${AGENT_PATH}/common.sh"
+_check_required_env_vars "CONTAINER_NAME" "MONITOR_PORT"
+
+_is_container_running $CONTAINER_NAME || _die "Service is not running - did not find container $CONTAINER_NAME."
+
+curl -sf http://localhost:${MONITOR_PORT}/health > /dev/null \
+ || _die "Service is not healthy - no response from monitor on port ${MONITOR_PORT}."
+
+echo "Service ${CONTAINER_NAME} is healthy."
+exit 0
diff --git a/llamacpp-monitor/stop.sh b/llamacpp-monitor/stop.sh
new file mode 100755
index 0000000..86f043d
--- /dev/null
+++ b/llamacpp-monitor/stop.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+source "${AGENT_PATH}/common.sh"
+_check_required_env_vars "CONTAINER_NAME"
+
+echo "Stopping service ${CONTAINER_NAME}..."
+_stop_container $CONTAINER_NAME || _die "Failed to stop container ${CONTAINER_NAME}"
+echo "Service ${CONTAINER_NAME} stopped."
diff --git a/llamacpp-monitor/template_info.env b/llamacpp-monitor/template_info.env
new file mode 100644
index 0000000..849fa4d
--- /dev/null
+++ b/llamacpp-monitor/template_info.env
@@ -0,0 +1,17 @@
+# DO NOT EDIT THIS FILE FOR YOUR SERVICE!
+# This file is replaced from the template whenever there is an update.
+# Edit the service.env file to make changes.
+
+# Template to use - always required!
+TEMPLATE=llamacpp-monitor
+REQUIRES_HOST_ROOT=false
+REQUIRES_DOCKER=true
+REQUIRES_DOCKER_ROOT=true
+
+# Container settings
+CONTAINER_NAME="llamacpp-monitor"
+
+# Image settings
+IMAGE_REGISTRY="docker.io"
+IMAGE_REPO="python"
+IMAGE_TAG="3.12-slim"
diff --git a/llamacpp-monitor/uninstall.sh b/llamacpp-monitor/uninstall.sh
new file mode 100755
index 0000000..ef207b4
--- /dev/null
+++ b/llamacpp-monitor/uninstall.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+source "${AGENT_PATH}/common.sh"
+_check_required_env_vars "CONTAINER_NAME"
+
+echo "Uninstalling service ${CONTAINER_NAME}..."
+_remove_container $CONTAINER_NAME || _die "Failed to remove container ${CONTAINER_NAME}"
+echo "Service ${CONTAINER_NAME} uninstalled."