Final touches on devcontainer

2026-04-15 06:31:44 -07:00 · 2025-10-19 00:39:26 +00:00
parent 62536e4bfb
commit 80c1459442
23 changed files with 845 additions and 77 deletions
--- a/install/production-filesystem/entrypoint.sh
+++ b/install/production-filesystem/entrypoint.sh
@@ -1,9 +1,43 @@
 #!/bin/sh

-# entrypoint.sh - Main container entrypoint script for NetAlertX
-
-#make this red
+################################################################################
+# NetAlertX Container Entrypoint
+################################################################################
+#
+# Purpose: Main entrypoint script for NetAlertX Docker containers
+#
+# Responsibilities:
+#   1. Display NetAlertX banner and container startup info
+#   2. Run pre-startup health checks
+#   3. Initialize required directories and log files
+#   4. Start and monitor core services (crond, php-fpm, nginx, Python backend)
+#   5. Handle service failures and graceful shutdown
+#   6. Manage process signals (INT, TERM) for clean container termination
+#
+# Environment Variables:
+#   - ENVIRONMENT: Container environment type (debian or alpine). If not "debian",
+#                  crond scheduler service will be started.
+#   - NETALERTX_DEBUG: If set to 1, services won't auto-shutdown on failure;
+#                      container will wait for all to exit naturally (development mode).
+#   - NETALERTX_PLUGINS_LOG: Directory path for plugin logs
+#   - SYSTEM_SERVICES_RUN_LOG: Directory path for service runtime logs
+#   - SYSTEM_SERVICES_RUN_TMP: Directory path for service temporary files
+#   - LOG_DB_IS_LOCKED: File path for database lock status
+#   - LOG_EXECUTION_QUEUE: File path for execution queue log
+#
+# Exit Codes:
+#   - 0: Graceful shutdown (unlikely in production)
+#   - 143: Caught signal (INT/TERM)
+#   - Non-zero: Service failure status code
+#
+# Service Monitoring:
+#   In production mode (NETALERTX_DEBUG != 1), if any service exits, all services
+#   are terminated and the container exits with the failed service's status code.
+#   This ensures container restart policies can properly reinitialize the stack.
+#
+################################################################################

+# Banner display
 printf '
 \033[1;31m
 _   _      _    ___  _           _  __   __
@@ -19,17 +53,31 @@ printf '

 set -u

-# Run all checks at container startup.
-for script in /services/check-*.sh; do
+# Run all pre-startup checks to validate container environment and dependencies
+for script in ${SYSTEM_SERVICES_SCRIPTS}/check-*.sh; do
 	sh "$script"
 done

+# Update vendor data (MAC address OUI database) in the background
+# This happens concurrently with service startup to avoid blocking container readiness
+${SYSTEM_SERVICES_SCRIPTS}/update_vendors.sh &


-SERVICES=""
-FAILED_NAME=""
-FAILED_STATUS=0

+# Service management state variables
+SERVICES=""                 # Space-separated list of active services in format "pid:name"
+FAILED_NAME=""              # Name of service that failed (used for error reporting)
+FAILED_STATUS=0             # Exit status code from failed service or signal
+
+################################################################################
+# is_pid_active() - Check if a process is alive and not in zombie/dead state
+################################################################################
+# Arguments:
+#   $1: Process ID to check
+# Returns:
+#   0 (success): Process is alive and healthy
+#   1 (failure): Process is dead, zombie, or PID is empty
+################################################################################
 is_pid_active() {
    pid="$1"
    [ -z "${pid}" ] && return 1
@@ -51,6 +99,10 @@ is_pid_active() {
 }

 add_service() {
+    # Start a new service script and track it for monitoring
+    # Arguments:
+    #   $1: Path to service startup script (e.g., /services/start-backend.sh)
+    #   $2: Human-readable service name (for logging and error reporting)
    script="$1"
    name="$2"
    "$script" &
@@ -58,6 +110,13 @@ add_service() {
    SERVICES="${SERVICES} ${pid}:${name}"
 }

+################################################################################
+# remove_service() - Remove a service from the active services list
+################################################################################
+# Arguments:
+#   $1: Process ID to remove
+# Updates: SERVICES variable to exclude the specified PID
+################################################################################
 remove_service() {
    target_pid="$1"
    updated=""
@@ -70,6 +129,16 @@ remove_service() {
    SERVICES="${updated}"
 }

+################################################################################
+# shutdown_services() - Gracefully stop all active services
+################################################################################
+# Process:
+#   1. Send SIGTERM to all active services (time to clean up)
+#   2. Wait for all services to fully terminate
+# Notes:
+#   - Tolerates services that are already dead
+#   - Uses 'wait' to reap zombie processes
+################################################################################
 shutdown_services() {
    for entry in ${SERVICES}; do
        pid="${entry%%:*}"
@@ -86,6 +155,15 @@ shutdown_services() {
    echo "All services stopped."
 }

+################################################################################
+# handle_exit() - Terminate all services and exit container
+################################################################################
+# Process:
+#   1. Log failure information if a service exited abnormally
+#   2. Shut down all remaining services gracefully
+#   3. Exit container with recorded status code
+# Note: Used when a monitored service fails or signal is caught
+################################################################################
 handle_exit() {
    if [ -n "${FAILED_NAME}" ]; then
        echo "Service ${FAILED_NAME} exited with status ${FAILED_STATUS}."
@@ -94,6 +172,15 @@ handle_exit() {
    exit "${FAILED_STATUS}"
 }

+################################################################################
+# on_signal() - Handle container signals (INT, TERM) for graceful shutdown
+################################################################################
+# Signals handled: SIGINT (Ctrl+C), SIGTERM (docker stop)
+# Process:
+#   1. Set exit status to 143 (128 + 15, standard SIGTERM code)
+#   2. Trigger full shutdown sequence
+# Note: Registered via 'trap' command below
+################################################################################
 on_signal() {
    echo "Caught signal, shutting down services..."
    FAILED_NAME="signal"
@@ -101,34 +188,53 @@ on_signal() {
    handle_exit
 }

-/services/update_vendors.sh &
-
+# Register signal handlers for graceful shutdown
 trap on_signal INT TERM

-[ ! -d "${NETALERTX_PLUGINS_LOG}" ] && mkdir -p "${NETALERTX_PLUGINS_LOG}"
-[ ! -d "${SYSTEM_SERVICES_RUN_LOG}" ] && mkdir -p "${SYSTEM_SERVICES_RUN_LOG}"
-[ ! -d "${SYSTEM_SERVICES_RUN_TMP}" ] && mkdir -p "${SYSTEM_SERVICES_RUN_TMP}"
-[ ! -f "${LOG_DB_IS_LOCKED}" ] && touch "${LOG_DB_IS_LOCKED}"
-[ ! -f "${LOG_EXECUTION_QUEUE}" ] && touch "${LOG_EXECUTION_QUEUE}"

+
+################################################################################
+# Service Startup Section
+################################################################################
+# Start services based on environment configuration
+
+# Only start crond scheduler on Alpine (non-Debian) environments
+# Debian typically uses systemd or other schedulers
 if [ "${ENVIRONMENT:-}" ] && [ "${ENVIRONMENT:-}" != "debian" ]; then
    add_service "/services/start-crond.sh" "crond"
 fi
-add_service "/services/start-php-fpm.sh" "php-fpm83"
-add_service "/services/start-nginx.sh" "nginx"
-add_service "/services/start-backend.sh" "python3"

+# Start core frontend and backend services
+# Order: web server, application server, then Python backend
+add_service "${SYSTEM_SERVICES}/start-php-fpm.sh" "php-fpm83"
+add_service "${SYSTEM_SERVICES}/start-nginx.sh" "nginx"
+add_service "${SYSTEM_SERVICES}/start-backend.sh" "python3"

-# if NETALERTX_DEBUG=1 then we will not kill any services if one fails. We will just wait for all to exit.
+################################################################################
+# Development Mode Debug Switch
+################################################################################
+# If NETALERTX_DEBUG=1, skip automatic service restart on failure
+# Useful for devcontainer debugging where individual services need to be debugged
 if [ "${NETALERTX_DEBUG:-0}" -eq 1 ]; then
 	echo "NETALERTX_DEBUG is set to 1, will not shut down other services if one fails."
 	wait
 	exit $?
 fi

-
-# If any service fails, we will shut down all others and exit with the same status.
-# This improves reliability in production environments by reinitializing the entire stack if one service fails.
+################################################################################
+# Service Monitoring Loop (Production Mode)
+################################################################################
+# Behavior depends on NETALERTX_DEBUG setting:
+#   - DEBUG OFF (production): Any service failure triggers full container restart
+#   - DEBUG ON: Services can fail individually; container waits for natural exit
+#
+# Loop Process:
+#   1. Check each active service every 10 seconds
+#   2. If service is not active, wait for it and capture exit status
+#   3. Log failure and terminate all other services
+#   4. Exit container with failed service's status code
+#   5. This enables Docker restart policies to reinitialize the stack
+################################################################################
 while [ -n "${SERVICES}" ]; do
    for entry in ${SERVICES}; do
        pid="${entry%%:*}"
@@ -147,6 +253,8 @@ while [ -n "${SERVICES}" ]; do
    sleep 10
 done

+# If we exit the loop with no service failures, set status to 1 (error)
+# This should not happen in normal operation
 if [ "${FAILED_STATUS}" -eq 0 ] && [ "${FAILED_NAME}" != "signal" ]; then
    FAILED_STATUS=1
 fi