diff --git a/llamacpp-monitor/config/service.env b/llamacpp-monitor/config/service.env
deleted file mode 100644
index 98f8323..0000000
--- a/llamacpp-monitor/config/service.env
+++ /dev/null
@@ -1,11 +0,0 @@
-# Service settings specific to this server
-# (can also override anything in the template_info.env file)
-
-# URL of the llama.cpp server to monitor
-LLAMA_SERVER_URL="http://localhost:8080"
-
-# Port to serve the monitor dashboard on
-MONITOR_PORT=80
-
-# Server Settings
-SSH_USER="root"
diff --git a/llamacpp-monitor/install.sh b/llamacpp-monitor/install.sh
deleted file mode 100755
index 06ccb07..0000000
--- a/llamacpp-monitor/install.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-source "${AGENT_PATH}/common.sh"
-_check_required_env_vars "CONTAINER_NAME" "IMAGE_REGISTRY" "IMAGE_REPO" "IMAGE_TAG"
-
-echo "Pulling image ${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG}..."
-docker pull "$IMAGE_REGISTRY/$IMAGE_REPO:$IMAGE_TAG" || _die "Failed to pull image $IMAGE_REGISTRY/$IMAGE_REPO:$IMAGE_TAG"
-
-echo "Stopping and removing any existing container..."
-_stop_container $CONTAINER_NAME
-_remove_container $CONTAINER_NAME || _die "Failed to remove container ${CONTAINER_NAME}"
-
-echo "Starting container..."
-bash ./start.sh || _die "Failed to start container ${CONTAINER_NAME}"
-
-echo "Installation complete for service ${CONTAINER_NAME}."
-echo "Dashboard available at http://${HOST_NAME}:${MONITOR_PORT}"
diff --git a/llamacpp-monitor/logs.sh b/llamacpp-monitor/logs.sh
deleted file mode 100755
index 2c31a20..0000000
--- a/llamacpp-monitor/logs.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-source "${AGENT_PATH}/common.sh"
-_check_required_env_vars "CONTAINER_NAME"
-
-echo "Showing logs for ${CONTAINER_NAME}... (Press Ctrl+C to stop)"
-_grey_start
-_get_container_logs $CONTAINER_NAME
-_grey_end
diff --git a/llamacpp-monitor/monitor.py b/llamacpp-monitor/monitor.py
deleted file mode 100644
index bb13b35..0000000
--- a/llamacpp-monitor/monitor.py
+++ /dev/null
@@ -1,465 +0,0 @@
-#!/usr/bin/env python3
-"""llama.cpp monitor - lightweight diagnostics dashboard."""
-
-import http.server
-import json
-import os
-import re
-import subprocess
-import time
-import urllib.error
-import urllib.request
-
-LLAMA_SERVER_URL = os.environ.get("LLAMA_SERVER_URL", "http://localhost:8080")
-MONITOR_PORT = int(os.environ.get("MONITOR_PORT", "80"))
-
-
-def get_gpu_stats():
- try:
- result = subprocess.run(
- ["nvidia-smi",
- "--query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu,power.draw,power.limit,fan.speed,name",
- "--format=csv,noheader,nounits"],
- capture_output=True, text=True, timeout=5
- )
- if result.returncode == 0:
- parts = [p.strip() for p in result.stdout.strip().split(",")]
- return {
- "available": True,
- "utilization": float(parts[0]),
- "memory_used": float(parts[1]),
- "memory_total": float(parts[2]),
- "temperature": float(parts[3]),
- "power_draw": float(parts[4]),
- "power_limit": float(parts[5]),
- "fan_speed": float(parts[6]),
- "name": parts[7],
- }
- except Exception:
- pass
- return {"available": False}
-
-
-def fetch_json(path):
- try:
- req = urllib.request.urlopen(f"{LLAMA_SERVER_URL}{path}", timeout=3)
- return json.loads(req.read())
- except Exception:
- return None
-
-
-def fetch_text(path):
- try:
- req = urllib.request.urlopen(f"{LLAMA_SERVER_URL}{path}", timeout=3)
- return req.read().decode()
- except Exception:
- return None
-
-
-def parse_prometheus(text):
- if not text:
- return {}
- metrics = {}
- for line in text.strip().split("\n"):
- if line.startswith("#"):
- continue
- m = re.match(r'^([\w:]+)(?:\{[^}]*\})?\s+([\d.eE+-]+)', line)
- if m:
- key, val = m.group(1), m.group(2)
- try:
- metrics[key] = float(val)
- except ValueError:
- pass
- return metrics
-
-
-def get_all_stats():
- gpu = get_gpu_stats()
- health = fetch_json("/health")
- slots = fetch_json("/slots")
- model = fetch_json("/v1/models")
- metrics = parse_prometheus(fetch_text("/metrics"))
-
- return {
- "timestamp": time.time(),
- "gpu": gpu,
- "health": health,
- "slots": slots,
- "model": model,
- "metrics": metrics,
- "llama_url": LLAMA_SERVER_URL,
- }
-
-
-DASHBOARD_HTML = r"""
-
-
-
-
-llama.cpp Monitor
-
-
-
-
-
- llama.cpp Monitor
- ...
-
-
-
-
-
-
-
-
-
-
-
GPU Memory
-
-
- Used
- --MiB
- / --MiB
-
-
-
-
-
-
-
-
-
GPU Vitals
-
-
- Temperature
- --°C
-
-
-
- Power
- --W
- / --W
-
-
- Fan
- --%
-
-
- GPU
- --
-
-
-
-
-
-
-
Model
-
-
--Model
-
--Parameters
-
--Size
-
--Context (train)
-
--GPU Layers
-
-
-
-
-
-
-
-
-
-
-
-
-
-"""
-
-
-class Handler(http.server.BaseHTTPRequestHandler):
- def do_GET(self):
- if self.path == "/" or self.path == "/index.html":
- self.send_response(200)
- self.send_header("Content-Type", "text/html; charset=utf-8")
- self.end_headers()
- self.wfile.write(DASHBOARD_HTML.encode())
- elif self.path == "/api/stats":
- stats = get_all_stats()
- self.send_response(200)
- self.send_header("Content-Type", "application/json")
- self.send_header("Cache-Control", "no-cache")
- self.end_headers()
- self.wfile.write(json.dumps(stats).encode())
- elif self.path == "/health":
- self.send_response(200)
- self.send_header("Content-Type", "application/json")
- self.end_headers()
- self.wfile.write(b'{"status":"ok"}')
- else:
- self.send_response(404)
- self.end_headers()
-
- def log_message(self, fmt, *args):
- pass # suppress request logging
-
-
-def main():
- server = http.server.HTTPServer(("0.0.0.0", MONITOR_PORT), Handler)
- print(f"llama.cpp monitor listening on port {MONITOR_PORT}")
- print(f" llama server: {LLAMA_SERVER_URL}")
- try:
- server.serve_forever()
- except KeyboardInterrupt:
- pass
- server.server_close()
-
-
-if __name__ == "__main__":
- main()
diff --git a/llamacpp-monitor/ports.sh b/llamacpp-monitor/ports.sh
deleted file mode 100755
index 1ee2dcc..0000000
--- a/llamacpp-monitor/ports.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-source "${AGENT_PATH}/common.sh"
-_check_required_env_vars "MONITOR_PORT"
-
-echo $MONITOR_PORT
diff --git a/llamacpp-monitor/start.sh b/llamacpp-monitor/start.sh
deleted file mode 100755
index b1c2e7e..0000000
--- a/llamacpp-monitor/start.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-source "${AGENT_PATH}/common.sh"
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-_check_required_env_vars "CONTAINER_NAME" "LLAMA_SERVER_URL" "MONITOR_PORT" "IMAGE_REGISTRY" "IMAGE_REPO" "IMAGE_TAG"
-
-# Build GPU device mounts if nvidia-smi is available on the host
-GPU_ARGS=""
-if command -v nvidia-smi &>/dev/null; then
- GPU_ARGS="--device /dev/nvidia0:/dev/nvidia0 \
- --device /dev/nvidiactl:/dev/nvidiactl \
- --device /dev/nvidia-uvm:/dev/nvidia-uvm \
- -v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi:ro \
- -v /usr/lib/nvidia:/usr/lib/nvidia:ro \
- -e LD_LIBRARY_PATH=/usr/lib/nvidia"
-fi
-
-DOCKER_RUN_CMD="docker run -d \
- --restart unless-stopped \
- --name ${CONTAINER_NAME} \
- --network host \
- ${GPU_ARGS} \
- -e LLAMA_SERVER_URL=${LLAMA_SERVER_URL} \
- -e MONITOR_PORT=${MONITOR_PORT} \
- -v ${SCRIPT_DIR}/monitor.py:/app/monitor.py:ro \
- ${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG} \
- python3 /app/monitor.py"
-
-echo "Starting container ${CONTAINER_NAME}..."
-
-if ! _create_and_start_container "$DOCKER_RUN_CMD" "$CONTAINER_NAME"; then
- if _is_container_exists $CONTAINER_NAME; then
- echo "Attempting to get logs from failed container..."
- _get_container_logs $CONTAINER_NAME
- fi
- _die "Failed to start container ${CONTAINER_NAME}"
-fi
-
-if ! _is_container_running "$CONTAINER_NAME"; then
- _get_container_logs $CONTAINER_NAME
- _die "Container ${CONTAINER_NAME} is not running after start attempt"
-fi
-
-echo "Service ${CONTAINER_NAME} started on port ${MONITOR_PORT}."
diff --git a/llamacpp-monitor/status.sh b/llamacpp-monitor/status.sh
deleted file mode 100755
index fedafa7..0000000
--- a/llamacpp-monitor/status.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-source "${AGENT_PATH}/common.sh"
-_check_required_env_vars "CONTAINER_NAME" "MONITOR_PORT"
-
-_is_container_running $CONTAINER_NAME || _die "Service is not running - did not find container $CONTAINER_NAME."
-
-curl -sf http://localhost:${MONITOR_PORT}/health > /dev/null \
- || _die "Service is not healthy - no response from monitor on port ${MONITOR_PORT}."
-
-echo "Service ${CONTAINER_NAME} is healthy."
-exit 0
diff --git a/llamacpp-monitor/stop.sh b/llamacpp-monitor/stop.sh
deleted file mode 100755
index 86f043d..0000000
--- a/llamacpp-monitor/stop.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-source "${AGENT_PATH}/common.sh"
-_check_required_env_vars "CONTAINER_NAME"
-
-echo "Stopping service ${CONTAINER_NAME}..."
-_stop_container $CONTAINER_NAME || _die "Failed to stop container ${CONTAINER_NAME}"
-echo "Service ${CONTAINER_NAME} stopped."
diff --git a/llamacpp-monitor/template_info.env b/llamacpp-monitor/template_info.env
deleted file mode 100644
index 849fa4d..0000000
--- a/llamacpp-monitor/template_info.env
+++ /dev/null
@@ -1,17 +0,0 @@
-# DO NOT EDIT THIS FILE FOR YOUR SERVICE!
-# This file is replaced from the template whenever there is an update.
-# Edit the service.env file to make changes.
-
-# Template to use - always required!
-TEMPLATE=llamacpp-monitor
-REQUIRES_HOST_ROOT=false
-REQUIRES_DOCKER=true
-REQUIRES_DOCKER_ROOT=true
-
-# Container settings
-CONTAINER_NAME="llamacpp-monitor"
-
-# Image settings
-IMAGE_REGISTRY="docker.io"
-IMAGE_REPO="python"
-IMAGE_TAG="3.12-slim"
diff --git a/llamacpp-monitor/uninstall.sh b/llamacpp-monitor/uninstall.sh
deleted file mode 100755
index ef207b4..0000000
--- a/llamacpp-monitor/uninstall.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-source "${AGENT_PATH}/common.sh"
-_check_required_env_vars "CONTAINER_NAME"
-
-echo "Uninstalling service ${CONTAINER_NAME}..."
-_remove_container $CONTAINER_NAME || _die "Failed to remove container ${CONTAINER_NAME}"
-echo "Service ${CONTAINER_NAME} uninstalled."