Files
NetAlertX/install/production-filesystem/entrypoint.sh
2025-11-23 01:27:51 +00:00

346 lines
13 KiB
Bash
Executable File

#!/bin/sh
################################################################################
# NetAlertX Container Entrypoint
################################################################################
#
# Purpose: Main entrypoint script for NetAlertX Docker containers
#
# Responsibilities:
# 1. Display NetAlertX banner and container startup info
# 2. Run pre-startup health checks
# 3. Initialize required directories and log files
# 4. Start and monitor core services (crond, php-fpm, nginx, Python backend)
# 5. Handle service failures and graceful shutdown
# 6. Manage process signals (INT, TERM) for clean container termination
#
# Environment Variables:
# - ENVIRONMENT: Container environment type (debian or alpine). If not "debian",
# crond scheduler service will be started.
# - NETALERTX_DEBUG: If set to 1, services won't auto-shutdown on failure;
# container will wait for all to exit naturally (development mode).
# - NETALERTX_PLUGINS_LOG: Directory path for plugin logs
# - SYSTEM_SERVICES_RUN_LOG: Directory path for service runtime logs
# - SYSTEM_SERVICES_RUN_TMP: Directory path for service temporary files
# - LOG_DB_IS_LOCKED: File path for database lock status
# - LOG_EXECUTION_QUEUE: File path for execution queue log
#
# Exit Codes:
# - 0: Graceful shutdown (unlikely in production)
# - 143: Caught signal (INT/TERM)
# - Non-zero: Service failure status code
#
# Service Monitoring:
# In production mode (NETALERTX_DEBUG != 1), if any service exits, all services
# are terminated and the container exits with the failed service's status code.
# This ensures container restart policies can properly reinitialize the stack.
#
################################################################################
# Allow direct command execution (e.g., `docker run -it netalertx bash`).
if [ "$#" -gt 0 ]; then
case "$1" in
bash|/bin/bash|sh|/bin/sh)
exec "$@"
;;
esac
fi
# Banner display
RED='\033[1;31m'
GREY='\033[90m'
RESET='\033[0m'
printf "${RED}"
echo '
_ _ _ ___ _ _ __ __
| \ | | | | / _ \| | | | \ \ / /
| \| | ___| |_/ /_\ \ | ___ _ __| |_ \ V /
| . |/ _ \ __| _ | |/ _ \ __| __|/ \
| |\ | __/ |_| | | | | __/ | | |_/ /^\ \
\_| \_/\___|\__\_| |_/_|\___|_| \__\/ \/
'
printf "\033[0m"
echo ' Network intruder and presence detector.
https://netalertx.com
'
set -u
FAILED_STATUS=""
echo "Startup pre-checks"
for script in ${ENTRYPOINT_CHECKS}/*; do
if [ -n "${SKIP_TESTS:-}" ]; then
echo "Skipping startup checks as SKIP_TESTS is set."
break
fi
script_name=$(basename "$script" | sed 's/^[0-9]*-//;s/\.(sh|py)$//;s/-/ /g')
echo "--> ${script_name} "
if [ -n "${SKIP_STARTUP_CHECKS:-}" ] && echo "${SKIP_STARTUP_CHECKS}" | grep -q "\b${script_name}\b"; then
printf "${GREY}skip${RESET}\n"
continue
fi
"$script"
NETALERTX_DOCKER_ERROR_CHECK=$?
if [ ${NETALERTX_DOCKER_ERROR_CHECK} -eq 1 ]; then
>&2 printf "%s" "${RED}"
>&2 cat <<EOF
══════════════════════════════════════════════════════════════════════════════
❌ NetAlertX startup aborted: critical failure in ${script_name}.
https://github.com/jokob-sk/NetAlertX/blob/main/docs/docker-troubleshooting/troubleshooting.md
══════════════════════════════════════════════════════════════════════════════
EOF
>&2 printf "%s" "${RESET}"
if [ "${NETALERTX_DEBUG:-0}" -eq 1 ]; then
FAILED_STATUS="1"
echo "NETALERTX_DEBUG=1, continuing despite critical failure in ${script_name}."
else
exit 1
fi
elif [ ${NETALERTX_DOCKER_ERROR_CHECK} -ne 0 ]; then
# fail but continue checks so user can see all issues
FAILED_STATUS="${NETALERTX_DOCKER_ERROR_CHECK}"
echo "${script_name}: FAILED with ${FAILED_STATUS}"
echo "Failure detected in: ${script}"
# Continue to next check instead of exiting immediately
fi
done
if [ -n "${FAILED_STATUS}" ]; then
echo "Container startup checks failed with exit code ${FAILED_STATUS}."
if [ "${NETALERTX_DEBUG:-0}" -eq 1 ]; then
echo "NETALERTX_DEBUG=1, continuing despite failed pre-checks."
else
exit "${FAILED_STATUS}"
fi
fi
# Set APP_CONF_OVERRIDE based on GRAPHQL_PORT if not already set
if [ -n "${GRAPHQL_PORT:-}" ] && [ -z "${APP_CONF_OVERRIDE:-}" ]; then
export APP_CONF_OVERRIDE='{"GRAPHQL_PORT":"'"${GRAPHQL_PORT}"'"}'
echo "Setting APP_CONF_OVERRIDE to $APP_CONF_OVERRIDE"
fi
# Exit after checks if in check-only mode (for testing)
if [ "${NETALERTX_CHECK_ONLY:-0}" -eq 1 ]; then
exit 0
fi
# Update vendor data (MAC address OUI database) in the background
# This happens concurrently with service startup to avoid blocking container readiness
bash ${SYSTEM_SERVICES_SCRIPTS}/update_vendors.sh &
# Service management state variables
SERVICES="" # Space-separated list of active services in format "pid:name"
FAILED_NAME="" # Name of service that failed (used for error reporting)
################################################################################
# is_pid_active() - Check if a process is alive and not in zombie/dead state
################################################################################
# Arguments:
# $1: Process ID to check
# Returns:
# 0 (success): Process is alive and healthy
# 1 (failure): Process is dead, zombie, or PID is empty
################################################################################
is_pid_active() {
pid="$1"
[ -z "${pid}" ] && return 1
if ! kill -0 "${pid}" 2>/dev/null; then
return 1
fi
if [ -r "/proc/${pid}/status" ]; then
state_line=$(grep '^State:' "/proc/${pid}/status" 2>/dev/null || true)
case "${state_line}" in
*"(zombie)"*|*"(dead)"*)
return 1
;;
esac
fi
return 0
}
add_service() {
# Start a new service script and track it for monitoring
# Arguments:
# $1: Path to service startup script (e.g., /services/start-backend.sh)
# $2: Human-readable service name (for logging and error reporting)
script="$1"
name="$2"
"$script" &
pid=$!
SERVICES="${SERVICES} ${pid}:${name}"
}
################################################################################
# remove_service() - Remove a service from the active services list
################################################################################
# Arguments:
# $1: Process ID to remove
# Updates: SERVICES variable to exclude the specified PID
################################################################################
remove_service() {
target_pid="$1"
updated=""
for entry in ${SERVICES}; do
pid="${entry%%:*}"
[ -z "${pid}" ] && continue
[ "${pid}" = "${target_pid}" ] && continue
updated="${updated} ${entry}"
done
SERVICES="${updated}"
}
################################################################################
# shutdown_services() - Gracefully stop all active services
################################################################################
# Process:
# 1. Send SIGTERM to all active services (time to clean up)
# 2. Wait for all services to fully terminate
# Notes:
# - Tolerates services that are already dead
# - Uses 'wait' to reap zombie processes
################################################################################
shutdown_services() {
for entry in ${SERVICES}; do
pid="${entry%%:*}"
[ -z "${pid}" ] && continue
if is_pid_active "${pid}"; then
kill "${pid}" 2>/dev/null || true
fi
done
for entry in ${SERVICES}; do
pid="${entry%%:*}"
[ -z "${pid}" ] && continue
wait "${pid}" 2>/dev/null || true
done
echo "All services stopped."
}
################################################################################
# handle_exit() - Terminate all services and exit container
################################################################################
# Process:
# 1. Log failure information if a service exited abnormally
# 2. Shut down all remaining services gracefully
# 3. Exit container with recorded status code
# Note: Used when a monitored service fails or signal is caught
################################################################################
handle_exit() {
if [ -n "${FAILED_NAME}" ]; then
echo "Service ${FAILED_NAME} exited with status ${FAILED_STATUS}."
fi
shutdown_services
exit "${FAILED_STATUS}"
}
################################################################################
# on_signal() - Handle container signals (INT, TERM) for graceful shutdown
################################################################################
# Signals handled: SIGINT (Ctrl+C), SIGTERM (docker stop)
# Process:
# 1. Set exit status to 143 (128 + 15, standard SIGTERM code)
# 2. Trigger full shutdown sequence
# Note: Registered via 'trap' command below
################################################################################
on_signal() {
echo "Caught signal, shutting down services..."
FAILED_NAME="signal"
FAILED_STATUS=143
handle_exit
}
# Register signal handlers for graceful shutdown
trap on_signal INT TERM
################################################################################
# Service Startup Section
################################################################################
# Start services based on environment configuration
# Only start crond scheduler on Alpine (non-Debian) environments
# Debian typically uses systemd or other schedulers
if [ "${ENVIRONMENT:-}" ] && [ "${ENVIRONMENT:-}" != "debian" ]; then
add_service "/services/start-cron.sh" "supercronic"
fi
# Start core frontend and backend services
# Order: web server, application server, then Python backend
add_service "${SYSTEM_SERVICES}/start-php-fpm.sh" "php-fpm83"
add_service "${SYSTEM_SERVICES}/start-nginx.sh" "nginx"
add_service "${SYSTEM_SERVICES}/start-backend.sh" "python3"
################################################################################
# Development Mode Debug Switch
################################################################################
# If NETALERTX_DEBUG=1, skip automatic service restart on failure
# Useful for devcontainer debugging where individual services need to be debugged
if [ "${NETALERTX_DEBUG:-0}" -eq 1 ]; then
echo "NETALERTX_DEBUG is set to 1, will not shut down other services if one fails."
fi
################################################################################
# Service Monitoring Loop (Production Mode)
################################################################################
# Behavior depends on NETALERTX_DEBUG setting:
# - DEBUG OFF (production): Any service failure triggers full container restart
# - DEBUG ON: Services can fail individually; container waits for natural exit
#
# Loop Process:
# 1. Check each active service every 10 seconds
# 2. If service is not active, wait for it and capture exit status
# 3. Log failure and terminate all other services
# 4. Exit container with failed service's status code
# 5. This enables Docker restart policies to reinitialize the stack
################################################################################
while [ -n "${SERVICES}" ]; do
for entry in ${SERVICES}; do
pid="${entry%%:*}"
name="${entry#*:}"
[ -z "${pid}" ] && continue
if ! is_pid_active "${pid}"; then
wait "${pid}" 2>/dev/null
status=$?
# Handle intentional backend restart
if [ "${name}" = "python3" ] && [ -f "/tmp/backend_restart_pending" ]; then
echo "🔄 Backend restart requested via marker file."
rm -f "/tmp/backend_restart_pending"
remove_service "${pid}"
add_service "${SYSTEM_SERVICES}/start-backend.sh" "python3"
continue
fi
FAILED_STATUS=$status
FAILED_NAME="${name}"
remove_service "${pid}"
if [ "${NETALERTX_DEBUG:-0}" -eq 1 ]; then
echo "⚠️ Service ${name} exited with status ${status}. Debug mode active - continuing."
else
handle_exit
fi
fi
done
sleep 10
done
# If we exit the loop with no service failures, set status to 1 (error)
# This should not happen in normal operation
if [ "${FAILED_STATUS}" -eq 0 ] && [ "${FAILED_NAME}" != "signal" ]; then
FAILED_STATUS=1
fi