mirror of
https://github.com/jokob-sk/NetAlertX.git
synced 2026-03-31 07:12:23 -07:00
feat: add health check endpoint and related schemas with tests
This commit is contained in:
@@ -41,6 +41,7 @@ from .nettools_endpoint import ( # noqa: E402 [flake8 lint suppression]
|
||||
from .dbquery_endpoint import read_query, write_query, update_query, delete_query # noqa: E402 [flake8 lint suppression]
|
||||
from .sync_endpoint import handle_sync_post, handle_sync_get # noqa: E402 [flake8 lint suppression]
|
||||
from .logs_endpoint import clean_log # noqa: E402 [flake8 lint suppression]
|
||||
from .health_endpoint import get_health_status # noqa: E402 [flake8 lint suppression]
|
||||
from models.user_events_queue_instance import UserEventsQueueInstance # noqa: E402 [flake8 lint suppression]
|
||||
|
||||
from models.event_instance import EventInstance # noqa: E402 [flake8 lint suppression]
|
||||
@@ -86,6 +87,7 @@ from .openapi.schemas import ( # noqa: E402 [flake8 lint suppression]
|
||||
RecentEventsResponse, LastEventsResponse,
|
||||
NetworkTopologyResponse,
|
||||
InternetInfoResponse, NetworkInterfacesResponse,
|
||||
HealthCheckResponse,
|
||||
CreateEventRequest, CreateSessionRequest,
|
||||
DeleteSessionRequest, CreateNotificationRequest,
|
||||
SyncPushRequest, SyncPullResponse,
|
||||
@@ -1930,6 +1932,33 @@ def check_auth(payload=None):
|
||||
if request.method == "GET":
|
||||
return jsonify({"success": True, "message": "Authentication check successful"}), 200
|
||||
|
||||
|
||||
# --------------------------
|
||||
# Health endpoint
|
||||
# --------------------------
|
||||
@app.route("/health", methods=["GET"])
|
||||
@validate_request(
|
||||
operation_id="check_health",
|
||||
summary="System Health Check",
|
||||
description="Retrieve system vitality metrics including database size, memory pressure, system load, disk usage, and CPU temperature.",
|
||||
response_model=HealthCheckResponse,
|
||||
tags=["system", "health"],
|
||||
auth_callable=is_authorized
|
||||
)
|
||||
def check_health(payload=None):
|
||||
"""Get system health metrics for monitoring and diagnostics."""
|
||||
try:
|
||||
health_data = get_health_status()
|
||||
return jsonify({"success": True, **health_data}), 200
|
||||
except Exception as e:
|
||||
mylog("none", [f"[health] Error retrieving health status: {e}"])
|
||||
return jsonify({
|
||||
"success": False,
|
||||
"error": "Failed to retrieve health status",
|
||||
"message": str(e)
|
||||
}), 500
|
||||
|
||||
|
||||
# --------------------------
|
||||
# Background Server Start
|
||||
# --------------------------
|
||||
|
||||
147
server/api_server/health_endpoint.py
Normal file
147
server/api_server/health_endpoint.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Health check endpoint for NetAlertX system vitality monitoring."""
|
||||
|
||||
import os
|
||||
import psutil
|
||||
from pathlib import Path
|
||||
|
||||
from const import dbPath, dataPath
|
||||
from logger import mylog
|
||||
|
||||
|
||||
# ===============================================================================
|
||||
# Database Vitality
|
||||
# ===============================================================================
|
||||
|
||||
def get_db_size_mb():
|
||||
"""
|
||||
Calculate total database size in MB (app.db + app.db-wal).
|
||||
|
||||
Returns:
|
||||
float: Size in MB, or 0 if database files don't exist.
|
||||
"""
|
||||
try:
|
||||
db_file = Path(dbPath)
|
||||
wal_file = Path(f"{dbPath}-wal")
|
||||
|
||||
size_bytes = 0
|
||||
if db_file.exists():
|
||||
size_bytes += db_file.stat().st_size
|
||||
if wal_file.exists():
|
||||
size_bytes += wal_file.stat().st_size
|
||||
|
||||
return round(size_bytes / (1024 * 1024), 2)
|
||||
except Exception as e:
|
||||
mylog("verbose", [f"[health] Error calculating DB size: {e}"])
|
||||
return 0.0
|
||||
|
||||
|
||||
# ===============================================================================
|
||||
# Memory Pressure
|
||||
# ===============================================================================
|
||||
|
||||
def get_mem_usage_pct():
|
||||
"""
|
||||
Calculate memory usage percentage (used / total * 100).
|
||||
|
||||
Returns:
|
||||
int: Memory usage as integer percentage (0-100), or -1 on error.
|
||||
"""
|
||||
try:
|
||||
vm = psutil.virtual_memory()
|
||||
pct = int((vm.used / vm.total) * 100)
|
||||
return max(0, min(100, pct)) # Clamp to 0-100
|
||||
except Exception as e:
|
||||
mylog("verbose", [f"[health] Error calculating memory usage: {e}"])
|
||||
return -1
|
||||
|
||||
|
||||
# ===============================================================================
|
||||
# System Stress
|
||||
# ===============================================================================
|
||||
|
||||
def get_load_avg_1m():
|
||||
"""
|
||||
Get 1-minute load average.
|
||||
|
||||
Returns:
|
||||
float: 1-minute load average, or -1 on error.
|
||||
"""
|
||||
try:
|
||||
load_1m, _, _ = os.getloadavg()
|
||||
return round(load_1m, 2)
|
||||
except Exception as e:
|
||||
mylog("verbose", [f"[health] Error getting load average: {e}"])
|
||||
return -1.0
|
||||
|
||||
|
||||
# ===============================================================================
|
||||
# Disk Headroom
|
||||
# ===============================================================================
|
||||
|
||||
def get_storage_pct():
|
||||
"""
|
||||
Calculate disk usage percentage of /data mount.
|
||||
|
||||
Returns:
|
||||
int: Disk usage as integer percentage (0-100), or -1 on error.
|
||||
"""
|
||||
try:
|
||||
stat = os.statvfs(dataPath)
|
||||
total = stat.f_blocks * stat.f_frsize
|
||||
used = (stat.f_blocks - stat.f_bfree) * stat.f_frsize
|
||||
pct = int((used / total) * 100) if total > 0 else 0
|
||||
return max(0, min(100, pct)) # Clamp to 0-100
|
||||
except Exception as e:
|
||||
mylog("verbose", [f"[health] Error calculating storage usage: {e}"])
|
||||
return -1
|
||||
|
||||
|
||||
# ===============================================================================
|
||||
# Thermal Health
|
||||
# ===============================================================================
|
||||
|
||||
def get_cpu_temp():
|
||||
"""
|
||||
Get CPU temperature from hardware sensors if available.
|
||||
|
||||
Returns:
|
||||
int: CPU temperature in Celsius, or None if unavailable.
|
||||
"""
|
||||
try:
|
||||
temps = psutil.sensors_temperatures()
|
||||
if not temps:
|
||||
return None
|
||||
|
||||
# Prefer 'coretemp' (Intel), fallback to first available
|
||||
if "coretemp" in temps and temps["coretemp"]:
|
||||
return int(temps["coretemp"][0].current)
|
||||
|
||||
# Fallback to first sensor with data
|
||||
for sensor_type, readings in temps.items():
|
||||
if readings:
|
||||
return int(readings[0].current)
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
mylog("verbose", [f"[health] Error reading CPU temperature: {e}"])
|
||||
return None
|
||||
|
||||
|
||||
# ===============================================================================
|
||||
# Aggregator
|
||||
# ===============================================================================
|
||||
|
||||
def get_health_status():
|
||||
"""
|
||||
Collect all health metrics into a single dict.
|
||||
|
||||
Returns:
|
||||
dict: Dictionary with all health metrics.
|
||||
"""
|
||||
return {
|
||||
"db_size_mb": get_db_size_mb(),
|
||||
"mem_usage_pct": get_mem_usage_pct(),
|
||||
"load_1m": get_load_avg_1m(),
|
||||
"storage_pct": get_storage_pct(),
|
||||
"cpu_temp": get_cpu_temp(),
|
||||
}
|
||||
@@ -651,6 +651,34 @@ class NetworkInterfacesResponse(BaseResponse):
|
||||
interfaces: Dict[str, Any] = Field(..., description="Details about network interfaces.")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# HEALTH CHECK SCHEMAS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class HealthCheckResponse(BaseResponse):
|
||||
"""System health check with vitality metrics."""
|
||||
model_config = ConfigDict(
|
||||
extra="allow",
|
||||
json_schema_extra={
|
||||
"examples": [{
|
||||
"success": True,
|
||||
"db_size_mb": 125.45,
|
||||
"mem_usage_pct": 65,
|
||||
"load_1m": 2.15,
|
||||
"storage_pct": 42,
|
||||
"cpu_temp": 58
|
||||
}]
|
||||
}
|
||||
)
|
||||
|
||||
db_size_mb: float = Field(..., description="Database size in MB (app.db + app.db-wal)")
|
||||
mem_usage_pct: int = Field(..., ge=0, le=100, description="Memory usage percentage (0-100)")
|
||||
load_1m: float = Field(..., description="1-minute load average")
|
||||
storage_pct: int = Field(..., ge=0, le=100, description="Disk usage percentage of /data mount (0-100)")
|
||||
cpu_temp: Optional[int] = Field(None, description="CPU temperature in Celsius (nullable if unavailable)")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# EVENTS SCHEMAS
|
||||
# =============================================================================
|
||||
|
||||
Reference in New Issue
Block a user