diff --git a/llamacpp-monitor/config/service.env b/llamacpp-monitor/config/service.env new file mode 100644 index 0000000..98f8323 --- /dev/null +++ b/llamacpp-monitor/config/service.env @@ -0,0 +1,11 @@ +# Service settings specific to this server +# (can also override anything in the template_info.env file) + +# URL of the llama.cpp server to monitor +LLAMA_SERVER_URL="http://localhost:8080" + +# Port to serve the monitor dashboard on +MONITOR_PORT=80 + +# Server Settings +SSH_USER="root" diff --git a/llamacpp-monitor/install.sh b/llamacpp-monitor/install.sh new file mode 100755 index 0000000..06ccb07 --- /dev/null +++ b/llamacpp-monitor/install.sh @@ -0,0 +1,16 @@ +#!/bin/bash +source "${AGENT_PATH}/common.sh" +_check_required_env_vars "CONTAINER_NAME" "IMAGE_REGISTRY" "IMAGE_REPO" "IMAGE_TAG" + +echo "Pulling image ${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG}..." +docker pull "$IMAGE_REGISTRY/$IMAGE_REPO:$IMAGE_TAG" || _die "Failed to pull image $IMAGE_REGISTRY/$IMAGE_REPO:$IMAGE_TAG" + +echo "Stopping and removing any existing container..." +_stop_container $CONTAINER_NAME +_remove_container $CONTAINER_NAME || _die "Failed to remove container ${CONTAINER_NAME}" + +echo "Starting container..." +bash ./start.sh || _die "Failed to start container ${CONTAINER_NAME}" + +echo "Installation complete for service ${CONTAINER_NAME}." +echo "Dashboard available at http://${HOST_NAME}:${MONITOR_PORT}" diff --git a/llamacpp-monitor/logs.sh b/llamacpp-monitor/logs.sh new file mode 100755 index 0000000..2c31a20 --- /dev/null +++ b/llamacpp-monitor/logs.sh @@ -0,0 +1,8 @@ +#!/bin/bash +source "${AGENT_PATH}/common.sh" +_check_required_env_vars "CONTAINER_NAME" + +echo "Showing logs for ${CONTAINER_NAME}... (Press Ctrl+C to stop)" +_grey_start +_get_container_logs $CONTAINER_NAME +_grey_end diff --git a/llamacpp-monitor/monitor.py b/llamacpp-monitor/monitor.py new file mode 100644 index 0000000..bb13b35 --- /dev/null +++ b/llamacpp-monitor/monitor.py @@ -0,0 +1,465 @@ +#!/usr/bin/env python3 +"""llama.cpp monitor - lightweight diagnostics dashboard.""" + +import http.server +import json +import os +import re +import subprocess +import time +import urllib.error +import urllib.request + +LLAMA_SERVER_URL = os.environ.get("LLAMA_SERVER_URL", "http://localhost:8080") +MONITOR_PORT = int(os.environ.get("MONITOR_PORT", "80")) + + +def get_gpu_stats(): + try: + result = subprocess.run( + ["nvidia-smi", + "--query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu,power.draw,power.limit,fan.speed,name", + "--format=csv,noheader,nounits"], + capture_output=True, text=True, timeout=5 + ) + if result.returncode == 0: + parts = [p.strip() for p in result.stdout.strip().split(",")] + return { + "available": True, + "utilization": float(parts[0]), + "memory_used": float(parts[1]), + "memory_total": float(parts[2]), + "temperature": float(parts[3]), + "power_draw": float(parts[4]), + "power_limit": float(parts[5]), + "fan_speed": float(parts[6]), + "name": parts[7], + } + except Exception: + pass + return {"available": False} + + +def fetch_json(path): + try: + req = urllib.request.urlopen(f"{LLAMA_SERVER_URL}{path}", timeout=3) + return json.loads(req.read()) + except Exception: + return None + + +def fetch_text(path): + try: + req = urllib.request.urlopen(f"{LLAMA_SERVER_URL}{path}", timeout=3) + return req.read().decode() + except Exception: + return None + + +def parse_prometheus(text): + if not text: + return {} + metrics = {} + for line in text.strip().split("\n"): + if line.startswith("#"): + continue + m = re.match(r'^([\w:]+)(?:\{[^}]*\})?\s+([\d.eE+-]+)', line) + if m: + key, val = m.group(1), m.group(2) + try: + metrics[key] = float(val) + except ValueError: + pass + return metrics + + +def get_all_stats(): + gpu = get_gpu_stats() + health = fetch_json("/health") + slots = fetch_json("/slots") + model = fetch_json("/v1/models") + metrics = parse_prometheus(fetch_text("/metrics")) + + return { + "timestamp": time.time(), + "gpu": gpu, + "health": health, + "slots": slots, + "model": model, + "metrics": metrics, + "llama_url": LLAMA_SERVER_URL, + } + + +DASHBOARD_HTML = r""" + + + + +llama.cpp Monitor + + + + +

+ llama.cpp Monitor + ... + + +

+ +
+ +
+

GPU Utilization

+
+
+ Load + --% +
+
+
+
+
+ + +
+

GPU Memory

+
+
+ Used + --MiB + / --MiB +
+
+
+
+
+ + +
+

GPU Vitals

+
+
+ Temperature + --°C +
+
+
+ Power + --W + / --W +
+
+ Fan + --% +
+
+ GPU + -- +
+
+
+ + +
+

Model

+
+
--Model
+
--Parameters
+
--Size
+
--Context (train)
+
--GPU Layers
+
+
+ + +
+

Throughput

+
+
--
Prompt tok/s
+
--
Generate tok/s
+
--
Prompt Tokens (total)
+
--
Gen Tokens (total)
+
--
Requests
+
--
KV Cache Used
+
+
+ + +
+

Slots

+
+
Waiting for data...
+
+
+
+ + + + +""" + + +class Handler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + if self.path == "/" or self.path == "/index.html": + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + self.wfile.write(DASHBOARD_HTML.encode()) + elif self.path == "/api/stats": + stats = get_all_stats() + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Cache-Control", "no-cache") + self.end_headers() + self.wfile.write(json.dumps(stats).encode()) + elif self.path == "/health": + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(b'{"status":"ok"}') + else: + self.send_response(404) + self.end_headers() + + def log_message(self, fmt, *args): + pass # suppress request logging + + +def main(): + server = http.server.HTTPServer(("0.0.0.0", MONITOR_PORT), Handler) + print(f"llama.cpp monitor listening on port {MONITOR_PORT}") + print(f" llama server: {LLAMA_SERVER_URL}") + try: + server.serve_forever() + except KeyboardInterrupt: + pass + server.server_close() + + +if __name__ == "__main__": + main() diff --git a/llamacpp-monitor/ports.sh b/llamacpp-monitor/ports.sh new file mode 100755 index 0000000..1ee2dcc --- /dev/null +++ b/llamacpp-monitor/ports.sh @@ -0,0 +1,5 @@ +#!/bin/bash +source "${AGENT_PATH}/common.sh" +_check_required_env_vars "MONITOR_PORT" + +echo $MONITOR_PORT diff --git a/llamacpp-monitor/start.sh b/llamacpp-monitor/start.sh new file mode 100755 index 0000000..bc0c389 --- /dev/null +++ b/llamacpp-monitor/start.sh @@ -0,0 +1,31 @@ +#!/bin/bash +source "${AGENT_PATH}/common.sh" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +_check_required_env_vars "CONTAINER_NAME" "LLAMA_SERVER_URL" "MONITOR_PORT" "IMAGE_REGISTRY" "IMAGE_REPO" "IMAGE_TAG" + +DOCKER_RUN_CMD="docker run -d \ + --restart unless-stopped \ + --name ${CONTAINER_NAME} \ + -p ${MONITOR_PORT}:${MONITOR_PORT} \ + -e LLAMA_SERVER_URL=${LLAMA_SERVER_URL} \ + -e MONITOR_PORT=${MONITOR_PORT} \ + -v ${SCRIPT_DIR}/monitor.py:/app/monitor.py:ro \ + ${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG} \ + python3 /app/monitor.py" + +echo "Starting container ${CONTAINER_NAME}..." + +if ! _create_and_start_container "$DOCKER_RUN_CMD" "$CONTAINER_NAME"; then + if _is_container_exists $CONTAINER_NAME; then + echo "Attempting to get logs from failed container..." + _get_container_logs $CONTAINER_NAME + fi + _die "Failed to start container ${CONTAINER_NAME}" +fi + +if ! _is_container_running "$CONTAINER_NAME"; then + _get_container_logs $CONTAINER_NAME + _die "Container ${CONTAINER_NAME} is not running after start attempt" +fi + +echo "Service ${CONTAINER_NAME} started on port ${MONITOR_PORT}." diff --git a/llamacpp-monitor/status.sh b/llamacpp-monitor/status.sh new file mode 100755 index 0000000..fedafa7 --- /dev/null +++ b/llamacpp-monitor/status.sh @@ -0,0 +1,11 @@ +#!/bin/bash +source "${AGENT_PATH}/common.sh" +_check_required_env_vars "CONTAINER_NAME" "MONITOR_PORT" + +_is_container_running $CONTAINER_NAME || _die "Service is not running - did not find container $CONTAINER_NAME." + +curl -sf http://localhost:${MONITOR_PORT}/health > /dev/null \ + || _die "Service is not healthy - no response from monitor on port ${MONITOR_PORT}." + +echo "Service ${CONTAINER_NAME} is healthy." +exit 0 diff --git a/llamacpp-monitor/stop.sh b/llamacpp-monitor/stop.sh new file mode 100755 index 0000000..86f043d --- /dev/null +++ b/llamacpp-monitor/stop.sh @@ -0,0 +1,7 @@ +#!/bin/bash +source "${AGENT_PATH}/common.sh" +_check_required_env_vars "CONTAINER_NAME" + +echo "Stopping service ${CONTAINER_NAME}..." +_stop_container $CONTAINER_NAME || _die "Failed to stop container ${CONTAINER_NAME}" +echo "Service ${CONTAINER_NAME} stopped." diff --git a/llamacpp-monitor/template_info.env b/llamacpp-monitor/template_info.env new file mode 100644 index 0000000..849fa4d --- /dev/null +++ b/llamacpp-monitor/template_info.env @@ -0,0 +1,17 @@ +# DO NOT EDIT THIS FILE FOR YOUR SERVICE! +# This file is replaced from the template whenever there is an update. +# Edit the service.env file to make changes. + +# Template to use - always required! +TEMPLATE=llamacpp-monitor +REQUIRES_HOST_ROOT=false +REQUIRES_DOCKER=true +REQUIRES_DOCKER_ROOT=true + +# Container settings +CONTAINER_NAME="llamacpp-monitor" + +# Image settings +IMAGE_REGISTRY="docker.io" +IMAGE_REPO="python" +IMAGE_TAG="3.12-slim" diff --git a/llamacpp-monitor/uninstall.sh b/llamacpp-monitor/uninstall.sh new file mode 100755 index 0000000..ef207b4 --- /dev/null +++ b/llamacpp-monitor/uninstall.sh @@ -0,0 +1,7 @@ +#!/bin/bash +source "${AGENT_PATH}/common.sh" +_check_required_env_vars "CONTAINER_NAME" + +echo "Uninstalling service ${CONTAINER_NAME}..." +_remove_container $CONTAINER_NAME || _die "Failed to remove container ${CONTAINER_NAME}" +echo "Service ${CONTAINER_NAME} uninstalled."