diff --git a/llamacpp-monitor/config/service.env b/llamacpp-monitor/config/service.env deleted file mode 100644 index 98f8323..0000000 --- a/llamacpp-monitor/config/service.env +++ /dev/null @@ -1,11 +0,0 @@ -# Service settings specific to this server -# (can also override anything in the template_info.env file) - -# URL of the llama.cpp server to monitor -LLAMA_SERVER_URL="http://localhost:8080" - -# Port to serve the monitor dashboard on -MONITOR_PORT=80 - -# Server Settings -SSH_USER="root" diff --git a/llamacpp-monitor/install.sh b/llamacpp-monitor/install.sh deleted file mode 100755 index 06ccb07..0000000 --- a/llamacpp-monitor/install.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -source "${AGENT_PATH}/common.sh" -_check_required_env_vars "CONTAINER_NAME" "IMAGE_REGISTRY" "IMAGE_REPO" "IMAGE_TAG" - -echo "Pulling image ${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG}..." -docker pull "$IMAGE_REGISTRY/$IMAGE_REPO:$IMAGE_TAG" || _die "Failed to pull image $IMAGE_REGISTRY/$IMAGE_REPO:$IMAGE_TAG" - -echo "Stopping and removing any existing container..." -_stop_container $CONTAINER_NAME -_remove_container $CONTAINER_NAME || _die "Failed to remove container ${CONTAINER_NAME}" - -echo "Starting container..." -bash ./start.sh || _die "Failed to start container ${CONTAINER_NAME}" - -echo "Installation complete for service ${CONTAINER_NAME}." -echo "Dashboard available at http://${HOST_NAME}:${MONITOR_PORT}" diff --git a/llamacpp-monitor/logs.sh b/llamacpp-monitor/logs.sh deleted file mode 100755 index 2c31a20..0000000 --- a/llamacpp-monitor/logs.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -source "${AGENT_PATH}/common.sh" -_check_required_env_vars "CONTAINER_NAME" - -echo "Showing logs for ${CONTAINER_NAME}... (Press Ctrl+C to stop)" -_grey_start -_get_container_logs $CONTAINER_NAME -_grey_end diff --git a/llamacpp-monitor/monitor.py b/llamacpp-monitor/monitor.py deleted file mode 100644 index bb13b35..0000000 --- a/llamacpp-monitor/monitor.py +++ /dev/null @@ -1,465 +0,0 @@ -#!/usr/bin/env python3 -"""llama.cpp monitor - lightweight diagnostics dashboard.""" - -import http.server -import json -import os -import re -import subprocess -import time -import urllib.error -import urllib.request - -LLAMA_SERVER_URL = os.environ.get("LLAMA_SERVER_URL", "http://localhost:8080") -MONITOR_PORT = int(os.environ.get("MONITOR_PORT", "80")) - - -def get_gpu_stats(): - try: - result = subprocess.run( - ["nvidia-smi", - "--query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu,power.draw,power.limit,fan.speed,name", - "--format=csv,noheader,nounits"], - capture_output=True, text=True, timeout=5 - ) - if result.returncode == 0: - parts = [p.strip() for p in result.stdout.strip().split(",")] - return { - "available": True, - "utilization": float(parts[0]), - "memory_used": float(parts[1]), - "memory_total": float(parts[2]), - "temperature": float(parts[3]), - "power_draw": float(parts[4]), - "power_limit": float(parts[5]), - "fan_speed": float(parts[6]), - "name": parts[7], - } - except Exception: - pass - return {"available": False} - - -def fetch_json(path): - try: - req = urllib.request.urlopen(f"{LLAMA_SERVER_URL}{path}", timeout=3) - return json.loads(req.read()) - except Exception: - return None - - -def fetch_text(path): - try: - req = urllib.request.urlopen(f"{LLAMA_SERVER_URL}{path}", timeout=3) - return req.read().decode() - except Exception: - return None - - -def parse_prometheus(text): - if not text: - return {} - metrics = {} - for line in text.strip().split("\n"): - if line.startswith("#"): - continue - m = re.match(r'^([\w:]+)(?:\{[^}]*\})?\s+([\d.eE+-]+)', line) - if m: - key, val = m.group(1), m.group(2) - try: - metrics[key] = float(val) - except ValueError: - pass - return metrics - - -def get_all_stats(): - gpu = get_gpu_stats() - health = fetch_json("/health") - slots = fetch_json("/slots") - model = fetch_json("/v1/models") - metrics = parse_prometheus(fetch_text("/metrics")) - - return { - "timestamp": time.time(), - "gpu": gpu, - "health": health, - "slots": slots, - "model": model, - "metrics": metrics, - "llama_url": LLAMA_SERVER_URL, - } - - -DASHBOARD_HTML = r""" - - - - -llama.cpp Monitor - - - - -

- llama.cpp Monitor - ... - - -

- -
- -
-

GPU Utilization

-
-
- Load - --% -
-
-
-
-
- - -
-

GPU Memory

-
-
- Used - --MiB - / --MiB -
-
-
-
-
- - -
-

GPU Vitals

-
-
- Temperature - --°C -
-
-
- Power - --W - / --W -
-
- Fan - --% -
-
- GPU - -- -
-
-
- - -
-

Model

-
-
--Model
-
--Parameters
-
--Size
-
--Context (train)
-
--GPU Layers
-
-
- - -
-

Throughput

-
-
--
Prompt tok/s
-
--
Generate tok/s
-
--
Prompt Tokens (total)
-
--
Gen Tokens (total)
-
--
Requests
-
--
KV Cache Used
-
-
- - -
-

Slots

-
-
Waiting for data...
-
-
-
- - - - -""" - - -class Handler(http.server.BaseHTTPRequestHandler): - def do_GET(self): - if self.path == "/" or self.path == "/index.html": - self.send_response(200) - self.send_header("Content-Type", "text/html; charset=utf-8") - self.end_headers() - self.wfile.write(DASHBOARD_HTML.encode()) - elif self.path == "/api/stats": - stats = get_all_stats() - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.send_header("Cache-Control", "no-cache") - self.end_headers() - self.wfile.write(json.dumps(stats).encode()) - elif self.path == "/health": - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.end_headers() - self.wfile.write(b'{"status":"ok"}') - else: - self.send_response(404) - self.end_headers() - - def log_message(self, fmt, *args): - pass # suppress request logging - - -def main(): - server = http.server.HTTPServer(("0.0.0.0", MONITOR_PORT), Handler) - print(f"llama.cpp monitor listening on port {MONITOR_PORT}") - print(f" llama server: {LLAMA_SERVER_URL}") - try: - server.serve_forever() - except KeyboardInterrupt: - pass - server.server_close() - - -if __name__ == "__main__": - main() diff --git a/llamacpp-monitor/ports.sh b/llamacpp-monitor/ports.sh deleted file mode 100755 index 1ee2dcc..0000000 --- a/llamacpp-monitor/ports.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -source "${AGENT_PATH}/common.sh" -_check_required_env_vars "MONITOR_PORT" - -echo $MONITOR_PORT diff --git a/llamacpp-monitor/start.sh b/llamacpp-monitor/start.sh deleted file mode 100755 index b1c2e7e..0000000 --- a/llamacpp-monitor/start.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -source "${AGENT_PATH}/common.sh" -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -_check_required_env_vars "CONTAINER_NAME" "LLAMA_SERVER_URL" "MONITOR_PORT" "IMAGE_REGISTRY" "IMAGE_REPO" "IMAGE_TAG" - -# Build GPU device mounts if nvidia-smi is available on the host -GPU_ARGS="" -if command -v nvidia-smi &>/dev/null; then - GPU_ARGS="--device /dev/nvidia0:/dev/nvidia0 \ - --device /dev/nvidiactl:/dev/nvidiactl \ - --device /dev/nvidia-uvm:/dev/nvidia-uvm \ - -v /usr/bin/nvidia-smi:/usr/bin/nvidia-smi:ro \ - -v /usr/lib/nvidia:/usr/lib/nvidia:ro \ - -e LD_LIBRARY_PATH=/usr/lib/nvidia" -fi - -DOCKER_RUN_CMD="docker run -d \ - --restart unless-stopped \ - --name ${CONTAINER_NAME} \ - --network host \ - ${GPU_ARGS} \ - -e LLAMA_SERVER_URL=${LLAMA_SERVER_URL} \ - -e MONITOR_PORT=${MONITOR_PORT} \ - -v ${SCRIPT_DIR}/monitor.py:/app/monitor.py:ro \ - ${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG} \ - python3 /app/monitor.py" - -echo "Starting container ${CONTAINER_NAME}..." - -if ! _create_and_start_container "$DOCKER_RUN_CMD" "$CONTAINER_NAME"; then - if _is_container_exists $CONTAINER_NAME; then - echo "Attempting to get logs from failed container..." - _get_container_logs $CONTAINER_NAME - fi - _die "Failed to start container ${CONTAINER_NAME}" -fi - -if ! _is_container_running "$CONTAINER_NAME"; then - _get_container_logs $CONTAINER_NAME - _die "Container ${CONTAINER_NAME} is not running after start attempt" -fi - -echo "Service ${CONTAINER_NAME} started on port ${MONITOR_PORT}." diff --git a/llamacpp-monitor/status.sh b/llamacpp-monitor/status.sh deleted file mode 100755 index fedafa7..0000000 --- a/llamacpp-monitor/status.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -source "${AGENT_PATH}/common.sh" -_check_required_env_vars "CONTAINER_NAME" "MONITOR_PORT" - -_is_container_running $CONTAINER_NAME || _die "Service is not running - did not find container $CONTAINER_NAME." - -curl -sf http://localhost:${MONITOR_PORT}/health > /dev/null \ - || _die "Service is not healthy - no response from monitor on port ${MONITOR_PORT}." - -echo "Service ${CONTAINER_NAME} is healthy." -exit 0 diff --git a/llamacpp-monitor/stop.sh b/llamacpp-monitor/stop.sh deleted file mode 100755 index 86f043d..0000000 --- a/llamacpp-monitor/stop.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -source "${AGENT_PATH}/common.sh" -_check_required_env_vars "CONTAINER_NAME" - -echo "Stopping service ${CONTAINER_NAME}..." -_stop_container $CONTAINER_NAME || _die "Failed to stop container ${CONTAINER_NAME}" -echo "Service ${CONTAINER_NAME} stopped." diff --git a/llamacpp-monitor/template_info.env b/llamacpp-monitor/template_info.env deleted file mode 100644 index 849fa4d..0000000 --- a/llamacpp-monitor/template_info.env +++ /dev/null @@ -1,17 +0,0 @@ -# DO NOT EDIT THIS FILE FOR YOUR SERVICE! -# This file is replaced from the template whenever there is an update. -# Edit the service.env file to make changes. - -# Template to use - always required! -TEMPLATE=llamacpp-monitor -REQUIRES_HOST_ROOT=false -REQUIRES_DOCKER=true -REQUIRES_DOCKER_ROOT=true - -# Container settings -CONTAINER_NAME="llamacpp-monitor" - -# Image settings -IMAGE_REGISTRY="docker.io" -IMAGE_REPO="python" -IMAGE_TAG="3.12-slim" diff --git a/llamacpp-monitor/uninstall.sh b/llamacpp-monitor/uninstall.sh deleted file mode 100755 index ef207b4..0000000 --- a/llamacpp-monitor/uninstall.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -source "${AGENT_PATH}/common.sh" -_check_required_env_vars "CONTAINER_NAME" - -echo "Uninstalling service ${CONTAINER_NAME}..." -_remove_container $CONTAINER_NAME || _die "Failed to remove container ${CONTAINER_NAME}" -echo "Service ${CONTAINER_NAME} uninstalled."