#!/usr/bin/env bash
#===============================================================================
# Cortex Installer — All-in-one setup for target Jetson devices
#
# Downloads the latest Cortex build from S3 (via license key) and sets up
# everything needed to run the surgical video platform on a fresh Jetson Thor.
#
# What it does:
#   1. Installs runtime dependencies (apt packages)
#   2. Redeems license key → downloads build artifacts via pre-signed S3 URLs
#   3. Extracts binaries/libs to /opt/verus/
#   4. Extracts SDK libs (Holoscan, CUDA, TensorRT)
#   5. Configures dynamic linker (ldconfig)
#   6. Configures NvSciIpc endpoints, permissions, directories
#   7. Installs systemd services
#   8. Sets up IoT Core (X.509 certificate for cloud connectivity)
#   9. Installs remote desktop (x11vnc + websockify)
#
# Usage:
#   sudo ./install_cortex.sh --key VTXS-XXXX-XXXX-XXXX    # Fresh install
#   sudo ./install_cortex.sh --update --key VTXS-...       # Update app only
#   sudo ./install_cortex.sh --status                      # Show install status
#   sudo ./install_cortex.sh --uninstall                   # Remove everything
#
# Prerequisites:
#   - Jetson Thor with JetPack 7
#   - License key from the Verus portal (my.verussurgical.com)
#   - Internet connectivity, curl, jq
#
#===============================================================================

# !!! UPDATE these on every change to this script !!!
INSTALLER_VERSION="1.3.2"
INSTALLER_DATE="2026-04-17"

set -euo pipefail

echo "Cortex Installer v${INSTALLER_VERSION} (${INSTALLER_DATE})"

# ── Self-update: check S3 for a newer installer (requires enrolled device) ──
# Uses IoT credential provider to authenticate — skips if not enrolled.
if [[ "${CORTEX_NO_SELF_UPDATE:-}" != "1" ]] && command -v curl &>/dev/null; then
    _SELF_UPDATE_DONE=false
    for _conf in /opt/verus/etc/device.conf "$HOME/.config/verus/device.conf"; do
        [[ -f "$_conf" ]] || continue
        _SU_CERT=$(grep '^iot_cert_path=' "$_conf" 2>/dev/null | cut -d= -f2)
        _SU_KEY=$(grep '^iot_key_path=' "$_conf" 2>/dev/null | cut -d= -f2)
        _SU_CA=$(grep '^iot_ca_path=' "$_conf" 2>/dev/null | cut -d= -f2)
        _SU_CRED_EP=$(grep '^iot_credential_endpoint=' "$_conf" 2>/dev/null | cut -d= -f2)
        _SU_ROLE=$(grep '^iot_role_alias=' "$_conf" 2>/dev/null | cut -d= -f2)
        [[ -n "$_SU_CERT" ]] && [[ -f "$_SU_CERT" ]] && break
    done
    if [[ -n "${_SU_CERT:-}" ]] && [[ -f "${_SU_CERT}" ]] && [[ -n "${_SU_CRED_EP:-}" ]]; then
        # Fetch temp credentials
        _SU_CREDS=$(curl -s --cert "$_SU_CERT" --key "$_SU_KEY" --cacert "$_SU_CA" \
            "https://${_SU_CRED_EP}/role-aliases/${_SU_ROLE}/credentials" 2>/dev/null || true)
        _SU_AK=$(echo "$_SU_CREDS" | grep -o '"accessKeyId":"[^"]*"' | cut -d'"' -f4)
        _SU_SK=$(echo "$_SU_CREDS" | grep -o '"secretAccessKey":"[^"]*"' | cut -d'"' -f4)
        _SU_ST=$(echo "$_SU_CREDS" | grep -o '"sessionToken":"[^"]*"' | cut -d'"' -f4)
        if [[ -n "$_SU_AK" ]]; then
            S3_INSTALLER_URL="https://verus-cortex-releases.s3.eu-central-1.amazonaws.com/latest/install_cortex.sh"
            REMOTE_VERSION=$(AWS_ACCESS_KEY_ID=$_SU_AK AWS_SECRET_ACCESS_KEY=$_SU_SK AWS_SESSION_TOKEN=$_SU_ST \
                curl -sf --aws-sigv4 "aws:amz:eu-central-1:s3" --range 0-512 "$S3_INSTALLER_URL" 2>/dev/null \
                | grep '^INSTALLER_VERSION=' | head -1 | cut -d'"' -f2 || true)
            if [[ -n "$REMOTE_VERSION" ]] && [[ "$REMOTE_VERSION" != "$INSTALLER_VERSION" ]]; then
                echo "Updating installer: v${INSTALLER_VERSION} → v${REMOTE_VERSION}"
                SELF_PATH="$(realpath "$0")"
                if AWS_ACCESS_KEY_ID=$_SU_AK AWS_SECRET_ACCESS_KEY=$_SU_SK AWS_SESSION_TOKEN=$_SU_ST \
                    curl -sfSL --aws-sigv4 "aws:amz:eu-central-1:s3" -o "${SELF_PATH}.new" "$S3_INSTALLER_URL" \
                    && [[ -s "${SELF_PATH}.new" ]]; then
                    chmod +x "${SELF_PATH}.new"
                    mv "${SELF_PATH}.new" "$SELF_PATH"
                    echo "Installer updated. Re-running..."
                    CORTEX_NO_SELF_UPDATE=1 exec "$SELF_PATH" "$@"
                fi
                rm -f "${SELF_PATH}.new"
            fi
        fi
    fi
fi

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'

# Configuration
S3_BUCKET="verus-cortex-releases"
AWS_PROFILE="cortex-device-user"
AWS_REGION="eu-central-1"
ARCH="aarch64"
S3_PREFIX="s3://${S3_BUCKET}/latest"
API_URL="https://euxkl75icd.execute-api.eu-central-1.amazonaws.com/prod"
DEVICE_CONF_DIR="/opt/verus/etc"
DEVICE_CONF="${DEVICE_CONF_DIR}/device.conf"

INSTALL_PREFIX="/opt/verus"
HOST_HOLOSCAN="/opt/nvidia/holoscan"
HOST_CUDA_LIBS="${INSTALL_PREFIX}/lib/cuda"
HOST_TENSORRT_LIBS="${INSTALL_PREFIX}/lib/tensorrt"
SYSTEMD_DIR="/etc/systemd/system"
LD_CONF_FILE="/etc/ld.so.conf.d/cortex.conf"
NVSCIIPC_CFG="/etc/nvsciipc.cfg"

DOWNLOAD_DIR="/tmp/cortex-install"
# Persistent cache of verified tarballs (survives across installer runs).
# Files here are only trusted when their SHA256 matches the manifest — a mismatch
# is treated as a stale cache and the tarball is re-downloaded.
CACHE_DIR="/var/cache/cortex-install"

# Tarball names (must match publish_release.sh)
APP_TARBALL="cortex-latest-${ARCH}.tar.gz"
SDK_TARBALL="cortex-sdk-latest-${ARCH}.tar.gz"
MODELS_TARBALL="cortex-models-latest-${ARCH}.tar.gz"

# Logging
log()      { echo -e "${BLUE}[INSTALL]${NC} $*"; }
log_ok()   { echo -e "${GREEN}[INSTALL] ✓${NC} $*"; }
log_warn() { echo -e "${YELLOW}[INSTALL] !${NC} $*"; }
log_err()  { echo -e "${RED}[INSTALL] ✗${NC} $*" >&2; }
log_step() { echo -e "\n${CYAN}[INSTALL]${NC} ${BOLD}$*${NC}"; }

#-------------------------------------------------------------------------------
# Parse arguments
#-------------------------------------------------------------------------------
MODE="full"        # full, update, status, uninstall
INSTALL_KEY=""
SKIP_DEPS=false
SKIP_SDK=false
SKIP_MODELS=false
BUILD_ENGINES=false
NEEDS_REBOOT=false
DEPLOY_USER="${SUDO_USER:-$(whoami)}"
DEPLOY_GROUP="$(id -gn "$DEPLOY_USER")"

while [[ $# -gt 0 ]]; do
    case "$1" in
        --key)            INSTALL_KEY="$2"; shift 2 ;;
        --update)         MODE=update; SKIP_DEPS=true; SKIP_SDK=true; SKIP_MODELS=true; shift ;;
        --update-full)    MODE=update; shift ;;
        --status)         MODE=status; shift ;;
        --uninstall)      MODE=uninstall; shift ;;
        --skip-deps)      SKIP_DEPS=true; shift ;;
        --skip-sdk)       SKIP_SDK=true; shift ;;
        --skip-models)    SKIP_MODELS=true; shift ;;
        --skip-services)  log_warn "--skip-services is deprecated (services are always installed)"; shift ;;
        --build-engines)  BUILD_ENGINES=true; shift ;;
        --kiosk)          SETUP_KIOSK=true; shift ;;
        --force)          FORCE_UPDATE=true; shift ;;
        --clear-cache)    rm -rf "/var/cache/cortex-install" && echo "Cache cleared." ; shift ;;
        --user)           DEPLOY_USER="$2"; DEPLOY_GROUP="$(id -gn "$2")"; shift 2 ;;
        -h|--help)
            cat << 'EOF'
Cortex Installer — All-in-one setup for target devices

Usage: sudo ./scripts/install_cortex.sh --key VTXS-XXXX-XXXX-XXXX [OPTIONS]

Modes:
    --key KEY         Install using a license key (no AWS CLI needed)
    --update --key    Quick update: app binaries only (skips deps, SDK, models)
    --update-full     Full update: re-downloads and re-installs everything
    --status          Show current installation status
    --uninstall       Remove all Cortex artifacts and services

Options:
    --skip-deps       Skip apt dependency installation
    --skip-sdk        Skip SDK libs download (Holoscan/CUDA/TensorRT/FFmpeg)
    --skip-models     Skip AI model download
    --build-engines   Build TensorRT engines after install (slow, ~5-10min)
    --kiosk           Set up kiosk mode (auto-login, locked desktop, single app)
    --force           Re-install even if already up to date
    --clear-cache     Wipe the local tarball cache (/var/cache/cortex-install/)
    --user USER       Set the runtime user (default: $SUDO_USER or current user)
    -h, --help        Show this help

Examples:
    First-time setup with license key:
        sudo ./install_cortex.sh --key VTXS-A3B7-K9M2-X4P1

    Quick update (app binaries only):
        sudo ./install_cortex.sh --update --key VTXS-A3B7-K9M2-X4P1

    Full update (deps + SDK + models + app):
        sudo ./install_cortex.sh --update-full --key VTXS-A3B7-K9M2-X4P1

After install:
    /opt/verus/bin/run_multiprocess.sh
EOF
            exit 0
            ;;
        *) log_err "Unknown option: $1"; exit 1 ;;
    esac
done

#-------------------------------------------------------------------------------
# Status
#-------------------------------------------------------------------------------
if [[ "$MODE" == "status" ]]; then
    echo -e "${BOLD}Cortex Installation Status${NC}"
    echo "─────────────────────────────────────────"

    # Installed version
    if [[ -f "${INSTALL_PREFIX}/manifest.json" ]]; then
        echo -e "Manifest:        ${GREEN}found${NC}"
        cat "${INSTALL_PREFIX}/manifest.json" | while IFS= read -r line; do
            echo "  $line"
        done
    else
        echo -e "Manifest:        ${YELLOW}not found${NC}"
    fi

    # Binaries
    if [[ -d "${INSTALL_PREFIX}/bin" ]]; then
        bins=$(ls "${INSTALL_PREFIX}/bin/"cortex* 2>/dev/null | wc -l)
        echo -e "Install prefix:  ${GREEN}${INSTALL_PREFIX}${NC} (${bins} binaries)"
        for bin in "${INSTALL_PREFIX}/bin/"cortex*; do
            [[ -x "$bin" ]] && echo "  $(basename "$bin")"
        done
    else
        echo -e "Install prefix:  ${RED}not installed${NC}"
    fi

    # Libraries
    if [[ -d "${INSTALL_PREFIX}/lib" ]]; then
        libs=$(find "${INSTALL_PREFIX}/lib" -name "*.so*" -type f 2>/dev/null | wc -l)
        echo -e "Verus libs:      ${GREEN}${libs} .so files${NC}"
    fi

    # SDK libs
    for label_path in "Holoscan:${HOST_HOLOSCAN}/lib" "CUDA:${HOST_CUDA_LIBS}" "TensorRT:${HOST_TENSORRT_LIBS}"; do
        label="${label_path%%:*}"
        path="${label_path#*:}"
        if [[ -d "$path" ]]; then
            count=$(find "$path" -name "*.so*" -type f 2>/dev/null | wc -l)
            echo -e "  ${label}:$(printf '%*s' $((12 - ${#label})) '')${GREEN}${count} libs${NC}"
        else
            echo -e "  ${label}:$(printf '%*s' $((12 - ${#label})) '')${YELLOW}not installed${NC}"
        fi
    done

    # TensorRT engines
    echo "AI models:"
    for model_dir in "${INSTALL_PREFIX}/models/oob" "${INSTALL_PREFIX}/models/tool_segmentation"; do
        name=$(basename "$model_dir")
        if [[ -d "$model_dir" ]]; then
            has_onnx=$(find "$model_dir" -name "*.onnx" 2>/dev/null | wc -l)
            has_engine=$(find "$model_dir" -name "*.engine" 2>/dev/null | wc -l)
            echo -e "  ${name}: ${GREEN}${has_onnx} onnx, ${has_engine} engine${NC}"
        else
            echo -e "  ${name}: ${YELLOW}not found${NC}"
        fi
    done

    # LD config
    if [[ -f "$LD_CONF_FILE" ]]; then
        echo -e "LD config:       ${GREEN}${LD_CONF_FILE}${NC}"
    else
        echo -e "LD config:       ${YELLOW}not installed${NC}"
    fi

    # NvSciIpc
    if [[ -f "$NVSCIIPC_CFG" ]] && grep -q "nvscistream_102" "$NVSCIIPC_CFG" 2>/dev/null; then
        echo -e "NvSciIpc:        ${GREEN}endpoints configured${NC}"
    else
        echo -e "NvSciIpc:        ${YELLOW}not configured${NC}"
    fi

    # systemd services
    echo "systemd services:"
    for svc in cortex-casemgmt cortex-uploader x11vnc websockify; do
        if [[ -f "${SYSTEMD_DIR}/${svc}.service" ]]; then
            status=$(systemctl is-active "$svc" 2>/dev/null || echo "inactive")
            enabled=$(systemctl is-enabled "$svc" 2>/dev/null || echo "disabled")
            echo -e "  ${svc}: ${GREEN}installed${NC} (${status}, ${enabled})"
        else
            echo -e "  ${svc}: ${YELLOW}not installed${NC}"
        fi
    done

    # IoT Core
    IOT_CERT_DIR="${INSTALL_PREFIX}/certs"
    if [[ -f "${IOT_CERT_DIR}/device.pem.crt" ]]; then
        IOT_THING=""
        for conf in "${DEVICE_CONF}" "/home/${SUDO_USER:-$(whoami)}/.config/verus/device.conf"; do
            [[ -f "$conf" ]] && IOT_THING=$(grep '^iot_thing_name=' "$conf" 2>/dev/null | cut -d= -f2) && break
        done
        echo -e "IoT Core:        ${GREEN}certificate present${NC} (thing: ${IOT_THING:-unknown})"
        # Check cert expiry
        if command -v openssl &>/dev/null; then
            CERT_EXPIRY=$(openssl x509 -enddate -noout -in "${IOT_CERT_DIR}/device.pem.crt" 2>/dev/null | cut -d= -f2)
            [[ -n "$CERT_EXPIRY" ]] && echo -e "  Cert expires:  ${CERT_EXPIRY}"
        fi
    else
        echo -e "IoT Core:        ${YELLOW}not provisioned${NC}"
    fi

    # Device config
    if [[ -f "${DEVICE_CONF}" ]]; then
        DEVICE_ID=$(grep '^device_id=' "${DEVICE_CONF}" 2>/dev/null | cut -d= -f2)
        TENANT_ID=$(grep '^tenant_id=' "${DEVICE_CONF}" 2>/dev/null | cut -d= -f2)
        echo -e "Device ID:       ${GREEN}${DEVICE_ID:-not set}${NC}"
        echo -e "Tenant:          ${GREEN}${TENANT_ID:-not set}${NC}"
    else
        echo -e "Device config:   ${YELLOW}not found${NC}"
    fi

    # Tarball cache
    if [[ -d "${CACHE_DIR}" ]]; then
        cache_size=$(du -sh "${CACHE_DIR}" 2>/dev/null | cut -f1)
        cache_count=$(find "${CACHE_DIR}" -maxdepth 1 -type f 2>/dev/null | wc -l)
        echo -e "Tarball cache:   ${GREEN}${CACHE_DIR}${NC} (${cache_count} files, ${cache_size})"
    else
        echo -e "Tarball cache:   ${YELLOW}empty${NC}"
    fi

    exit 0
fi

#-------------------------------------------------------------------------------
# Uninstall
#-------------------------------------------------------------------------------
if [[ "$MODE" == "uninstall" ]]; then
    log_step "Uninstalling Cortex"

    # Stop and remove all services
    for svc in cortex-casemgmt cortex-uploader cortex-ui cortex-video-core x11vnc websockify vnc-rotate.timer; do
        if systemctl is-active "$svc" &>/dev/null; then
            log "Stopping ${svc}..."
            systemctl stop "$svc" || true
        fi
        if [[ -f "${SYSTEMD_DIR}/${svc}.service" ]] || [[ -f "${SYSTEMD_DIR}/${svc}" ]]; then
            systemctl disable "$svc" 2>/dev/null || true
            rm -f "${SYSTEMD_DIR}/${svc}.service" "${SYSTEMD_DIR}/${svc}"
        fi
    done
    systemctl daemon-reload 2>/dev/null || true
    log_ok "Services stopped and removed"

    # Kill running cortex processes
    pkill -f "bin/cortex_" 2>/dev/null || true

    # Remove install prefix (/opt/verus — includes binaries, libs, certs, config)
    [[ -d "$INSTALL_PREFIX" ]] && rm -rf "$INSTALL_PREFIX" && log_ok "Removed ${INSTALL_PREFIX}"

    # Remove user-writable config
    REAL_HOME=$(eval echo "~${DEPLOY_USER}")
    rm -rf "${REAL_HOME}/.config/verus" 2>/dev/null && log_ok "Removed user config"

    # Remove Holoscan libs (only if we deployed them)
    if [[ -d "${HOST_HOLOSCAN}/lib" ]]; then
        rm -rf "${HOST_HOLOSCAN}/lib"
        rmdir "${HOST_HOLOSCAN}" 2>/dev/null || true
        rmdir "$(dirname "${HOST_HOLOSCAN}")" 2>/dev/null || true
        log_ok "Removed Holoscan libs"
    fi

    # Remove LD config
    if [[ -f "$LD_CONF_FILE" ]]; then
        rm -f "$LD_CONF_FILE"
        ldconfig
        log_ok "Removed ${LD_CONF_FILE}"
    fi

    # Clean up sockets
    rm -f /tmp/cortex-*.sock 2>/dev/null || true

    log_ok "Uninstall complete"
    echo ""
    echo -e "  ${YELLOW}Note: NvSciIpc config in ${NVSCIIPC_CFG} was not modified.${NC}"
    echo -e "  ${YELLOW}Note: IoT Thing and certificate in AWS were not deleted.${NC}"
    echo -e "  ${YELLOW}      Delete them in the AWS Console if decommissioning this device.${NC}"
    exit 0
fi

#-------------------------------------------------------------------------------
# Pre-flight checks
#-------------------------------------------------------------------------------
log_step "Pre-flight checks"

if [[ $EUID -ne 0 ]]; then
    log_err "This script must be run with sudo"
    exit 1
fi

# Download mode: --key (license key via API) or --update (IoT/AWS CLI)
# --update + --key is valid: re-uses the license key for pre-signed URLs (recommended)
USE_LICENSE_KEY=false
if [[ -n "$INSTALL_KEY" ]]; then
    USE_LICENSE_KEY=true
    # Validate key format (charset matches API: no 0/O/1/I)
    if [[ ! "$INSTALL_KEY" =~ ^VTXS-[A-HJ-NP-Z2-9]{4}-[A-HJ-NP-Z2-9]{4}-[A-HJ-NP-Z2-9]{4}$ ]]; then
        log_err "Invalid key format. Expected: VTXS-XXXX-XXXX-XXXX (uppercase, no 0/O/1/I)"
        exit 1
    fi
    # Install curl and jq if missing (needed for license key flow)
    MISSING_TOOLS=()
    command -v curl &>/dev/null || MISSING_TOOLS+=(curl)
    command -v jq &>/dev/null || MISSING_TOOLS+=(jq)
    if [[ ${#MISSING_TOOLS[@]} -gt 0 ]]; then
        log "Installing ${MISSING_TOOLS[*]}..."
        apt-get update -qq && apt-get install -y --no-install-recommends "${MISSING_TOOLS[@]}" || {
            log_err "Failed to install ${MISSING_TOOLS[*]}. Install manually: sudo apt-get install ${MISSING_TOOLS[*]}"
            exit 1
        }
    fi
    log_ok "License key mode (curl + jq available)"
elif [[ "$MODE" == "update" ]]; then
    # Update mode: download latest build using IoT credentials or IAM profile
    # Check for IoT cert (preferred — no IAM user needed)
    UPDATE_IOT_CERT=""
    UPDATE_IOT_KEY=""
    UPDATE_IOT_CA=""
    UPDATE_IOT_CRED_ENDPOINT=""
    UPDATE_IOT_ROLE_ALIAS=""
    for conf in "${DEVICE_CONF}" "/home/${DEPLOY_USER}/.config/verus/device.conf"; do
        [[ -f "$conf" ]] || continue
        UPDATE_IOT_CERT=$(grep '^iot_cert_path=' "$conf" 2>/dev/null | cut -d= -f2)
        UPDATE_IOT_KEY=$(grep '^iot_key_path=' "$conf" 2>/dev/null | cut -d= -f2)
        UPDATE_IOT_CA=$(grep '^iot_ca_path=' "$conf" 2>/dev/null | cut -d= -f2)
        UPDATE_IOT_CRED_ENDPOINT=$(grep '^iot_credential_endpoint=' "$conf" 2>/dev/null | cut -d= -f2)
        UPDATE_IOT_ROLE_ALIAS=$(grep '^iot_role_alias=' "$conf" 2>/dev/null | cut -d= -f2)
        [[ -n "$UPDATE_IOT_CERT" ]] && break
    done

    if [[ -n "$UPDATE_IOT_CERT" ]] && [[ -f "$UPDATE_IOT_CERT" ]] && [[ -n "$UPDATE_IOT_CRED_ENDPOINT" ]]; then
        # Fetch temp credentials via IoT Credential Provider
        for cmd in curl jq; do
            if ! command -v "$cmd" &>/dev/null; then
                log_err "$cmd is required for IoT credential fetch"
                exit 1
            fi
        done
        IOT_CREDS=$(curl -s --cert "$UPDATE_IOT_CERT" --key "$UPDATE_IOT_KEY" --cacert "$UPDATE_IOT_CA" \
            "https://${UPDATE_IOT_CRED_ENDPOINT}/role-aliases/${UPDATE_IOT_ROLE_ALIAS}/credentials" 2>/dev/null)
        export AWS_ACCESS_KEY_ID=$(echo "$IOT_CREDS" | jq -r '.credentials.accessKeyId // empty')
        export AWS_SECRET_ACCESS_KEY=$(echo "$IOT_CREDS" | jq -r '.credentials.secretAccessKey // empty')
        export AWS_SESSION_TOKEN=$(echo "$IOT_CREDS" | jq -r '.credentials.sessionToken // empty')
        if [[ -n "$AWS_ACCESS_KEY_ID" ]]; then
            AWS_PROFILE=""  # clear profile so aws CLI uses env vars
            log_ok "IoT credentials valid (update via credential provider)"
        else
            log_err "Failed to get IoT credentials for update."
            echo "  Re-run with: --update --key VTXS-XXXX-XXXX-XXXX"
            exit 1
        fi
    else
        log_err "No IoT certificate found on this device."
        echo "  Re-run with: --update --key VTXS-XXXX-XXXX-XXXX"
        exit 1
    fi
else
    log_err "No license key provided."
    echo "  Fresh install:  sudo ./scripts/install_cortex.sh --key VTXS-XXXX-XXXX-XXXX"
    echo "  Update:         sudo ./scripts/install_cortex.sh --update --key VTXS-XXXX-XXXX-XXXX"
    echo "  Get a key from your admin on the Verus portal."
    exit 1
fi

#-------------------------------------------------------------------------------
# Step 1: Install runtime dependencies
#-------------------------------------------------------------------------------
if ! $SKIP_DEPS; then
    log_step "Step 1/9: Installing runtime dependencies"

    # CRITICAL: services won't start without these. Abort if any fail.
    CRITICAL_PKGS=(
        # Tools
        curl jq
        # Remote access
        openssh-server
        # gRPC + Protobuf runtime (all services speak gRPC)
        libgrpc++1.51t64 libprotobuf32t64
        # FFmpeg codec deps (FFmpeg 7.x libs are bundled in the app tarball)
        libx264-164 libx265-199 libmp3lame0 libopus0 libvpx9
        # Image
        libjpeg-turbo8
        # Database
        libsqlite3-0 sqlite3
        # TLS
        libssl3t64
        # Qt6 core + QML (needed by cortex_core_ui)
        libqt6core6 libqt6gui6 libqt6quick6 libqt6qml6
        libqt6quickcontrols2-6 libqt6opengl6
        qml6-module-qtquick qml6-module-qtquick-controls
        qml6-module-qtquick-layouts qml6-module-qtquick-window
        qml6-module-qtqml-workerscript
        qml6-module-qtquick-templates
        # IoT Secure Tunneling
        python3-websocket
    )

    # OPTIONAL: nice-to-have. Install best-effort so one flaky package
    # (e.g. v4l2loopback-dkms on a mismatched kernel) doesn't block the install.
    OPTIONAL_PKGS=(
        # NvSciStream — may be missing on minimal JetPack images
        nvidia-l4t-nvsci
        # Qt6 WebEngine (teleconferencing)
        libqt6webenginequick6 qml6-module-qtwebengine
        # Qt6 Multimedia (video playback)
        qml6-module-qtmultimedia
        # v4l2loopback (teleconf virtual webcam) — DKMS fails on custom kernels
        v4l2loopback-dkms v4l2loopback-utils
        # trtexec CLI (for rebuilding TensorRT engines on device)
        tensorrt-dev
    )

    if ! apt-get update -qq; then
        log_err "apt-get update failed — check /etc/apt/sources.list and network"
        exit 1
    fi

    # Install critical packages atomically. If the transaction fails, retry
    # one-by-one so the user sees exactly which packages are the problem.
    log "Installing ${#CRITICAL_PKGS[@]} critical packages..."
    if ! apt-get install -y --no-install-recommends "${CRITICAL_PKGS[@]}"; then
        log_warn "Batch install failed — retrying per-package to pinpoint the culprit"
        for pkg in "${CRITICAL_PKGS[@]}"; do
            if ! dpkg -s "$pkg" &>/dev/null; then
                apt-get install -y --no-install-recommends "$pkg" \
                    || log_err "  failed: $pkg"
            fi
        done
    fi

    # Verify every critical package is actually present post-install.
    missing_critical=()
    for pkg in "${CRITICAL_PKGS[@]}"; do
        dpkg -s "$pkg" &>/dev/null || missing_critical+=("$pkg")
    done
    if [[ ${#missing_critical[@]} -gt 0 ]]; then
        log_err "Install cannot continue — these critical packages are missing:"
        printf '    %s\n' "${missing_critical[@]}"
        echo ""
        echo "  Common causes:"
        echo "    • Stale apt cache: run 'sudo apt-get update' and retry"
        echo "    • Universe repo disabled (Ubuntu): 'sudo add-apt-repository universe'"
        echo "    • Held or conflicting package: check 'apt-mark showhold' and 'apt list --upgradable'"
        echo "    • DKMS build failure from another package in the same transaction"
        echo ""
        echo "  Fix the apt error above, then re-run this installer."
        exit 1
    fi
    log_ok "Critical dependencies verified (${#CRITICAL_PKGS[@]} packages)"

    # Optional packages — install one by one, log but don't abort on failure.
    log "Installing ${#OPTIONAL_PKGS[@]} optional packages (best-effort)..."
    opt_skipped=()
    for pkg in "${OPTIONAL_PKGS[@]}"; do
        if dpkg -s "$pkg" &>/dev/null; then continue; fi
        if apt-get install -y --no-install-recommends "$pkg" &>/dev/null; then
            log_ok "  + $pkg"
        else
            log_warn "  - $pkg (apt error; related features may not work)"
            opt_skipped+=("$pkg")
        fi
    done
    if [[ ${#opt_skipped[@]} -gt 0 ]]; then
        log_warn "Skipped ${#opt_skipped[@]} optional package(s): ${opt_skipped[*]}"
    fi
    log_ok "Runtime dependencies installed"

    # v4l2loopback: load on boot + set permissions
    echo 'options v4l2loopback devices=1 video_nr=20 card_label="Cortex Scope" exclusive_caps=1' \
        | tee /etc/modprobe.d/v4l2loopback.conf >/dev/null
    echo 'v4l2loopback' | tee /etc/modules-load.d/v4l2loopback.conf >/dev/null
    if ! lsmod | grep -q v4l2loopback; then
        modprobe v4l2loopback devices=1 video_nr=20 card_label="Cortex Scope" exclusive_caps=1 || true
    fi
    echo 'KERNEL=="video20", MODE="0666"' | tee /etc/udev/rules.d/99-cortex-v4l2loopback.rules >/dev/null

    # Disable USB autosuspend for UltraSemi USB3 webcam (345f:2130)
    # The kernel suspends idle USB devices after 2s by default, and UVC cameras
    # often fail to wake properly — causing phantom disconnects.
    echo 'ACTION=="add", ATTR{idVendor}=="345f", ATTR{idProduct}=="2130", TEST=="power/control", ATTR{power/control}="on"' \
        | tee /etc/udev/rules.d/99-usb-webcam-no-suspend.rules >/dev/null
    log_ok "USB webcam autosuspend disabled (udev rule for 345f:2130)"

    udevadm control --reload-rules 2>/dev/null || true
    udevadm trigger 2>/dev/null || true
    log_ok "v4l2loopback configured (/dev/video20, auto-load on boot)"
else
    log_step "Step 1/9: Skipping full deps — checking essentials"

    # Even on --update, ensure critical packages are present.
    # New features may require packages that weren't in the original install.
    ESSENTIAL_PKGS=(openssh-server python3-websocket sqlite3 curl jq)
    MISSING_PKGS=()
    for pkg in "${ESSENTIAL_PKGS[@]}"; do
        if ! dpkg -s "$pkg" &>/dev/null; then
            MISSING_PKGS+=("$pkg")
        fi
    done
    if [[ ${#MISSING_PKGS[@]} -gt 0 ]]; then
        log "Installing missing essentials: ${MISSING_PKGS[*]}"
        apt-get update -qq
        apt-get install -y --no-install-recommends "${MISSING_PKGS[@]}" || {
            log_warn "Some essential packages failed to install"
        }
    fi
fi

#-------------------------------------------------------------------------------
# Step 2: Download artifacts
#-------------------------------------------------------------------------------
log_step "Step 2/9: Downloading artifacts"

# Clean stale downloads from previous failed installs
rm -rf "$DOWNLOAD_DIR"
mkdir -p "$DOWNLOAD_DIR" "$CACHE_DIR"

# Helper: verify a file's SHA256 matches the expected "sha256:..." string.
# Returns 0 on match, non-zero on mismatch / missing file.
sha_matches() {
    local path="$1" expected="$2"
    [[ -f "$path" ]] || return 1
    [[ -n "$expected" ]] || return 1
    local actual="sha256:$(sha256sum "$path" | cut -d' ' -f1)"
    [[ "$actual" == "$expected" ]]
}

# Helper: download a file into the persistent cache, verify its SHA256,
# and hard-link / copy into the staging download dir. When a cached tarball
# with a matching checksum already exists, the download is skipped.
#
# Arguments: URL DEST_PATH EXPECTED_SHA LABEL
download_and_verify() {
    local url="$1" dest="$2" expected_sha="$3" label="$4"
    local filename; filename=$(basename "$dest")
    local cached="${CACHE_DIR}/${filename}"

    # Cache hit — reuse and skip network.
    if [[ -n "$expected_sha" ]] && sha_matches "$cached" "$expected_sha"; then
        cp -f "$cached" "$dest"
        local fsize; fsize=$(du -sh "$dest" | cut -f1)
        log_ok "Cache hit for ${label} (${fsize}) — skipping download"
        return 0
    fi

    # Cache miss or stale cache — drop the stale copy, fetch fresh.
    [[ -f "$cached" ]] && rm -f "$cached"
    log "Downloading ${label}..."
    if ! curl -fSL --progress-bar -o "$cached" "$url"; then
        log_err "Failed to download ${label}"
        rm -f "$cached"
        exit 1
    fi

    if [[ -n "$expected_sha" ]]; then
        if ! sha_matches "$cached" "$expected_sha"; then
            local got="sha256:$(sha256sum "$cached" | cut -d' ' -f1)"
            log_err "Checksum mismatch for ${label}"
            log_err "  Expected: ${expected_sha}"
            log_err "  Got:      ${got}"
            rm -f "$cached"
            exit 1
        fi
        log_ok "Checksum verified for ${label}"
    fi

    cp -f "$cached" "$dest"
    local fsize; fsize=$(du -sh "$dest" | cut -f1)
    log_ok "Downloaded ${label} (${fsize})"
}

# Helper: is the remote version already installed?
# Returns 0 when remote == local (unless --force), 1 otherwise.
is_same_version_installed() {
    local remote_manifest="$1"
    [[ -f "$remote_manifest" ]] || return 1
    [[ -f "${INSTALL_PREFIX}/manifest.json" ]] || return 1
    command -v jq &>/dev/null || return 1
    local remote_ver remote_commit local_ver local_commit
    remote_ver=$(jq -r '.version // empty' "$remote_manifest")
    remote_commit=$(jq -r '.git_commit // empty' "$remote_manifest")
    local_ver=$(jq -r '.version // empty' "${INSTALL_PREFIX}/manifest.json")
    local_commit=$(jq -r '.git_commit // empty' "${INSTALL_PREFIX}/manifest.json")
    [[ -n "$remote_commit" ]] || return 1
    [[ "$remote_commit" == "$local_commit" ]] || return 1
    echo -e "  Installed: ${GREEN}${local_ver:-unknown}${NC} (${local_commit})"
    echo -e "  Available: ${GREEN}${remote_ver:-unknown}${NC} (${remote_commit})"
    return 0
}

# Helper: check L4T major version compatibility (warn on minor mismatch, fail on major)
check_platform_compatibility() {
    [[ -f "${DOWNLOAD_DIR}/manifest.json" ]] || return 0
    command -v jq &>/dev/null || return 0

    local manifest_l4t
    manifest_l4t=$(jq -r '.l4t_version // empty' "${DOWNLOAD_DIR}/manifest.json")
    [[ -z "$manifest_l4t" || "$manifest_l4t" == "unknown" ]] && return 0

    local local_l4t
    local_l4t=$(sed -n 's/^# R\([0-9]*\).*REVISION: \([0-9.]*\).*/\1.\2/p' /etc/nv_tegra_release 2>/dev/null || echo "")
    [[ -z "$local_l4t" ]] && return 0

    # Extract major version (e.g. "38" from "38.1.0")
    local manifest_major="${manifest_l4t%%.*}"
    local local_major="${local_l4t%%.*}"

    if [[ "$local_major" != "$manifest_major" ]]; then
        log_err "Platform mismatch: build is for L4T R${manifest_major} but this device runs L4T R${local_major}"
        echo "  SDK libs and TensorRT engines are not compatible across major L4T versions."
        exit 1
    fi

    if [[ "$local_l4t" != "$manifest_l4t" ]]; then
        log_warn "L4T minor version differs: build=${manifest_l4t}, device=${local_l4t}"
        echo "  This is usually fine. TensorRT engines may need rebuilding (run --build-engines after install)."
    else
        log_ok "Platform check passed (L4T ${local_l4t})"
    fi
}

if $USE_LICENSE_KEY; then
    # ── License key mode: redeem key via API, download with pre-signed URLs ──
    log "Redeeming license key..."
    HTTP_CODE=$(curl -s -o /tmp/cortex-redeem-response.json -w "%{http_code}" \
        -X POST "${API_URL}/install-keys/redeem" \
        -H "Content-Type: application/json" \
        -d "{\"key\": \"${INSTALL_KEY}\", \"hostname\": \"$(hostname -s)\"}")
    REDEEM_RESPONSE=$(cat /tmp/cortex-redeem-response.json 2>/dev/null || echo "")
    rm -f /tmp/cortex-redeem-response.json

    if [[ "$HTTP_CODE" != "200" ]]; then
        ERR_MSG=$(echo "$REDEEM_RESPONSE" | jq -r '.error // empty' 2>/dev/null)
        if [[ -z "$ERR_MSG" ]]; then
            log_err "License key validation failed (HTTP ${HTTP_CODE})"
        else
            log_err "License key validation failed: ${ERR_MSG}"
        fi
        exit 1
    fi

    # Verify response is valid JSON with expected fields
    if ! echo "$REDEEM_RESPONSE" | jq -e '.status == "ok" and .tenant_id and .downloads.app.url' &>/dev/null; then
        log_err "Invalid response from server — missing required fields"
        exit 1
    fi

    # Extract fields
    TENANT_ID=$(echo "$REDEEM_RESPONSE" | jq -r '.tenant_id')
    TENANT_NAME=$(echo "$REDEEM_RESPONSE" | jq -r '.tenant_name // .tenant_id')
    log_ok "Key valid — tenant: ${TENANT_NAME} (${TENANT_ID})"

    APP_URL=$(echo "$REDEEM_RESPONSE" | jq -r '.downloads.app.url')
    APP_SHA=$(echo "$REDEEM_RESPONSE" | jq -r '.downloads.app.checksum // empty')
    SDK_URL=$(echo "$REDEEM_RESPONSE" | jq -r '.downloads.sdk.url // empty')
    SDK_SHA=$(echo "$REDEEM_RESPONSE" | jq -r '.downloads.sdk.checksum // empty')
    MODELS_URL=$(echo "$REDEEM_RESPONSE" | jq -r '.downloads.models.url // empty')
    MODELS_SHA=$(echo "$REDEEM_RESPONSE" | jq -r '.downloads.models.checksum // empty')
    MANIFEST_URL=$(echo "$REDEEM_RESPONSE" | jq -r '.downloads.manifest.url // empty')

    # Extract IoT config (provisioned server-side by Lambda)
    IOT_THING_NAME=$(echo "$REDEEM_RESPONSE" | jq -r '.iot.thing_name // empty')
    IOT_CERT_PEM=$(echo "$REDEEM_RESPONSE" | jq -r '.iot.certificate_pem // empty')
    IOT_PRIVATE_KEY=$(echo "$REDEEM_RESPONSE" | jq -r '.iot.private_key // empty')
    IOT_ENDPOINT=$(echo "$REDEEM_RESPONSE" | jq -r '.iot.endpoint // empty')
    IOT_CRED_ENDPOINT=$(echo "$REDEEM_RESPONSE" | jq -r '.iot.credential_endpoint // empty')
    IOT_ROLE_ALIAS=$(echo "$REDEEM_RESPONSE" | jq -r '.iot.role_alias // empty')
    IOT_CA_URL=$(echo "$REDEEM_RESPONSE" | jq -r '.iot.ca_pem_url // empty')
    if [[ -n "$IOT_THING_NAME" ]] && [[ -n "$IOT_CERT_PEM" ]]; then
        log_ok "IoT certificate received from server (thing: ${IOT_THING_NAME})"
    else
        log_warn "No IoT config in server response — will try CLI provisioning in Step 8"
        log_warn "If CLI provisioning also fails, run install again with the cortex-device-provisioning AWS profile configured"
    fi

    # Download manifest first (needed for version check + platform compat check)
    if [[ -n "$MANIFEST_URL" ]]; then
        curl -sfSL -o "${DOWNLOAD_DIR}/manifest.json" "$MANIFEST_URL" 2>/dev/null || true
    fi

    # Version check: if the installed commit already matches the remote
    # manifest, skip download entirely (unless --force). This makes re-running
    # the installer against the same release cheap.
    if [[ "$MODE" != "update" ]] && is_same_version_installed "${DOWNLOAD_DIR}/manifest.json"; then
        if [[ "${FORCE_UPDATE:-false}" != "true" ]]; then
            log_ok "Already on the latest release — nothing to download."
            echo "  Use --force to re-install the same version."
            exit 0
        fi
        log "Forcing re-install of same version"
    fi

    # Platform check before downloading tarballs
    check_platform_compatibility

    # Download app tarball (always)
    download_and_verify "$APP_URL" "${DOWNLOAD_DIR}/${APP_TARBALL}" "$APP_SHA" "app"

    # Download SDK tarball
    if ! $SKIP_SDK && [[ -n "$SDK_URL" ]]; then
        download_and_verify "$SDK_URL" "${DOWNLOAD_DIR}/${SDK_TARBALL}" "$SDK_SHA" "SDK"
    fi

    # Download models tarball
    if ! $SKIP_MODELS && [[ -n "$MODELS_URL" ]]; then
        download_and_verify "$MODELS_URL" "${DOWNLOAD_DIR}/${MODELS_TARBALL}" "$MODELS_SHA" "models"
    fi

else
    # ── AWS CLI mode (--update on enrolled device) ──
    # Build profile/credential args — empty if using IoT env vars
    AWS_ARGS="--region $AWS_REGION"
    [[ -n "$AWS_PROFILE" ]] && AWS_ARGS="--profile $AWS_PROFILE $AWS_ARGS"

    AWS_CLI_BIN=""
    for p in /usr/local/bin/aws /usr/bin/aws /home/*/.local/bin/aws; do
        [[ -x "$p" ]] && AWS_CLI_BIN="$p" && break
    done
    [[ -z "$AWS_CLI_BIN" ]] && { log_err "AWS CLI not found"; exit 1; }

    log "Downloading manifest..."
    "$AWS_CLI_BIN" s3 cp "${S3_PREFIX}/manifest.json" "${DOWNLOAD_DIR}/manifest.json" \
        $AWS_ARGS 2>/dev/null || true

    if [[ -f "${DOWNLOAD_DIR}/manifest.json" ]]; then
        remote_commit=$(jq -r '.git_commit // empty' "${DOWNLOAD_DIR}/manifest.json")
        remote_date=$(jq -r '.build_date // empty' "${DOWNLOAD_DIR}/manifest.json")
        local_commit=""
        local_date=""
        if [[ -f "${INSTALL_PREFIX}/manifest.json" ]]; then
            local_commit=$(jq -r '.git_commit // empty' "${INSTALL_PREFIX}/manifest.json")
            local_date=$(jq -r '.build_date // empty' "${INSTALL_PREFIX}/manifest.json")
        fi

        if [[ -n "$local_commit" ]]; then
            echo -e "  Installed: ${CYAN}${local_commit}${NC}  (${local_date:-unknown})"
        else
            echo -e "  Installed: ${YELLOW}none${NC}"
        fi
        echo -e "  Available: ${CYAN}${remote_commit}${NC}  (${remote_date:-unknown})"

        if [[ "$remote_commit" == "$local_commit" ]] && [[ -n "$local_commit" ]]; then
            log_ok "Already up to date (${local_commit})"
            if [[ "${FORCE_UPDATE:-false}" != "true" ]]; then
                echo "  Use --force to re-install the same version."
                exit 0
            fi
            log "Forcing re-install of same version"
        fi
    fi

    # Platform check before downloading tarballs
    check_platform_compatibility

    # Helper for the aws-cli path: cache-aware s3 cp with checksum verify
    # pulled from manifest.json.
    aws_download_and_verify() {
        local tarball="$1" label="$2"
        local dest="${DOWNLOAD_DIR}/${tarball}"
        local cached="${CACHE_DIR}/${tarball}"
        local expected_sha=""
        if [[ -f "${DOWNLOAD_DIR}/manifest.json" ]]; then
            expected_sha=$(jq -r --arg t "$tarball" '.checksums[$t] // empty' "${DOWNLOAD_DIR}/manifest.json")
        fi

        if [[ -n "$expected_sha" ]] && sha_matches "$cached" "$expected_sha"; then
            cp -f "$cached" "$dest"
            log_ok "Cache hit for ${label} — skipping download"
            return 0
        fi

        [[ -f "$cached" ]] && rm -f "$cached"
        log "Downloading ${label} (${tarball})..."
        "$AWS_CLI_BIN" s3 cp "${S3_PREFIX}/${tarball}" "$cached" $AWS_ARGS || {
            log_err "Failed to download ${tarball}"; rm -f "$cached"; exit 1;
        }
        if [[ -n "$expected_sha" ]] && ! sha_matches "$cached" "$expected_sha"; then
            log_err "Checksum mismatch for ${label}"
            rm -f "$cached"; exit 1;
        fi
        cp -f "$cached" "$dest"
        local fsize; fsize=$(du -sh "$dest" | cut -f1)
        log_ok "Downloaded ${label} (${fsize})"
    }

    aws_download_and_verify "${APP_TARBALL}" "app"

    if ! $SKIP_SDK; then
        aws_download_and_verify "${SDK_TARBALL}" "SDK"
    fi

    if ! $SKIP_MODELS; then
        if "$AWS_CLI_BIN" s3 ls "${S3_PREFIX}/${MODELS_TARBALL}" $AWS_ARGS &>/dev/null; then
            aws_download_and_verify "${MODELS_TARBALL}" "models"
        else
            log_warn "No models tarball found in S3, skipping"
        fi
    else
        log "Skipping models download (--skip-models)"
    fi
fi  # end download mode branch

# (platform check was moved into check_platform_compatibility, called above)

#-------------------------------------------------------------------------------
# Step 3: Extract app artifacts
#-------------------------------------------------------------------------------
log_step "Step 3/9: Installing app to ${INSTALL_PREFIX}/"

# Stop and disable running services before overwriting binaries
# (disable prevents Restart=always from restarting mid-install)
for svc in cortex-casemgmt cortex-uploader cortex-ui cortex-video-core; do
    if systemctl is-enabled "$svc" &>/dev/null 2>&1; then
        log "Stopping ${svc}..."
        systemctl stop "$svc" 2>/dev/null || true
        systemctl disable "$svc" 2>/dev/null || true
    fi
done
systemctl daemon-reload 2>/dev/null || true
# Also kill any interactively-launched cortex processes
pkill -f "bin/cortex_" 2>/dev/null || true
sleep 1

mkdir -p "$INSTALL_PREFIX"

# Clean old binaries and libs before installing (prevents stale files from lingering)
if [[ -d "${INSTALL_PREFIX}/bin" ]] || [[ -d "${INSTALL_PREFIX}/lib" ]]; then
    log "Removing old binaries and libs..."
    rm -rf "${INSTALL_PREFIX}/bin" "${INSTALL_PREFIX}/lib" "${INSTALL_PREFIX}/share"
fi

# Extract app (bin, lib, share)
tar -xzf "${DOWNLOAD_DIR}/${APP_TARBALL}" -C "$INSTALL_PREFIX"
bin_count=$(find "${INSTALL_PREFIX}/bin" -type f -executable 2>/dev/null | wc -l)
lib_count=$(find "${INSTALL_PREFIX}/lib" -name "*.so*" -type f 2>/dev/null | wc -l)
log_ok "Installed ${bin_count} binaries, ${lib_count} libs to ${INSTALL_PREFIX}/"

# Copy manifest
[[ -f "${DOWNLOAD_DIR}/manifest.json" ]] && cp "${DOWNLOAD_DIR}/manifest.json" "${INSTALL_PREFIX}/manifest.json"

# Write tenant binding from license key (only on fresh install, not --update)
if $USE_LICENSE_KEY && [[ "$MODE" != "update" ]]; then
    mkdir -p "${DEVICE_CONF_DIR}"
    # device_id: use IoT thing name if available (provisioned by Lambda), else hostname
    DEVICE_ID="${IOT_THING_NAME:-cortex-$(hostname -s | tr -dc 'a-zA-Z0-9_-')}"
    cat > "${DEVICE_CONF}" << CONFEOF
# Cortex device configuration — written by license key installer
device_id=${DEVICE_ID}
tenant_id=${TENANT_ID}
tenant_name=${TENANT_NAME}
aws_region=eu-central-1
api_url=${API_URL}
s3_bucket_cases=verus-case-assets
s3_bucket_releases=verus-cortex-releases
installed_at=$(date -u +%Y-%m-%dT%H:%M:%SZ)
install_key=${INSTALL_KEY}
CONFEOF
    chown "${DEPLOY_USER}:${DEPLOY_GROUP}" "${DEVICE_CONF}"
    log_ok "Wrote ${DEVICE_CONF} (tenant: ${TENANT_NAME})"
fi

#-------------------------------------------------------------------------------
# Step 4: Extract SDK libs (if downloaded)
#-------------------------------------------------------------------------------
if [[ -f "${DOWNLOAD_DIR}/${SDK_TARBALL}" ]]; then
    log_step "Step 4/9: Installing SDK libraries"

    SDK_EXTRACT="/tmp/cortex-sdk-extract"
    rm -rf "$SDK_EXTRACT"
    mkdir -p "$SDK_EXTRACT"
    tar -xzf "${DOWNLOAD_DIR}/${SDK_TARBALL}" -C "$SDK_EXTRACT"

    # Holoscan SDK libs
    if [[ -d "${SDK_EXTRACT}/holoscan-lib" ]]; then
        mkdir -p "${HOST_HOLOSCAN}"
        rm -rf "${HOST_HOLOSCAN}/lib"
        mv "${SDK_EXTRACT}/holoscan-lib" "${HOST_HOLOSCAN}/lib"
        count=$(find "${HOST_HOLOSCAN}/lib" -name "*.so*" -type f 2>/dev/null | wc -l)
        log_ok "Holoscan SDK: ${count} libs → ${HOST_HOLOSCAN}/lib/"
    fi

    # CUDA libs
    if [[ -d "${SDK_EXTRACT}/cuda-lib" ]]; then
        mkdir -p "${HOST_CUDA_LIBS}"
        cp -a "${SDK_EXTRACT}/cuda-lib/"* "${HOST_CUDA_LIBS}/"
        count=$(find "${HOST_CUDA_LIBS}" -type f -o -type l 2>/dev/null | wc -l)
        log_ok "CUDA: ${count} libs → ${HOST_CUDA_LIBS}/"
    fi

    # TensorRT libs
    if [[ -d "${SDK_EXTRACT}/tensorrt-lib" ]]; then
        mkdir -p "${HOST_TENSORRT_LIBS}"
        cp -a "${SDK_EXTRACT}/tensorrt-lib/"* "${HOST_TENSORRT_LIBS}/"
        count=$(find "${HOST_TENSORRT_LIBS}" -type f -o -type l 2>/dev/null | wc -l)
        log_ok "TensorRT: ${count} libs → ${HOST_TENSORRT_LIBS}/"
    fi

    # FFmpeg 7.x libs (bundled because target apt only has FFmpeg 6.x)
    ffmpeg_count=$(find "${SDK_EXTRACT}/ffmpeg-lib" -type f -o -type l 2>/dev/null | wc -l)
    if [[ $ffmpeg_count -gt 0 ]]; then
        cp -a "${SDK_EXTRACT}/ffmpeg-lib/"* "${INSTALL_PREFIX}/lib/"
        log_ok "FFmpeg 7.x: ${ffmpeg_count} libs → ${INSTALL_PREFIX}/lib/"
    fi

    rm -rf "$SDK_EXTRACT"
else
    log_step "Step 4/9: Skipping SDK install (not downloaded)"
fi

# Extract models (if downloaded)
if [[ -f "${DOWNLOAD_DIR}/${MODELS_TARBALL}" ]]; then
    log "Installing AI models..."
    # tar -xzf preserves symlinks by default. The wake word tarball ships
    # verus_latest.onnx as a symlink to the current production model; the
    # cortex_app_verus binary loads it unchanged across releases.
    tar -xzf "${DOWNLOAD_DIR}/${MODELS_TARBALL}" -C "$INSTALL_PREFIX"
    log_ok "Models installed to ${INSTALL_PREFIX}/models/"

    # Verify the wake word symlink survived extraction
    WAKE_LATEST="${INSTALL_PREFIX}/models/verus_wake/verus_latest.onnx"
    if [[ -d "${INSTALL_PREFIX}/models/verus_wake" ]]; then
        if [[ -L "${WAKE_LATEST}" && -e "${WAKE_LATEST}" ]]; then
            log_ok "Wake word: verus_latest.onnx -> $(readlink "${WAKE_LATEST}")"
        elif [[ -f "${WAKE_LATEST}" ]]; then
            log "Wake word: verus_latest.onnx (regular file, not symlink)"
        else
            # Fallback: if only versioned files present, point latest at the newest
            LATEST_VERUS=$(ls -t "${INSTALL_PREFIX}/models/verus_wake/"verus_v*.onnx 2>/dev/null | head -1)
            if [[ -n "${LATEST_VERUS}" ]]; then
                ln -sf "$(basename "${LATEST_VERUS}")" "${WAKE_LATEST}"
                log_ok "Created verus_latest.onnx -> $(basename "${LATEST_VERUS}")"
            fi
        fi
    fi

    # Build TensorRT engines from ONNX — engines are GPU/TensorRT-version specific
    # and must be rebuilt on each target device
    BUILD_MODELS=""
    for p in "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/build_models.sh" "${INSTALL_PREFIX}/bin/build_models.sh"; do
        [[ -x "$p" ]] && BUILD_MODELS="$p" && break
    done
    if [[ -n "$BUILD_MODELS" ]]; then
        log_step "Building TensorRT engines (this may take several minutes)..."
        "$BUILD_MODELS" --rebuild || log_warn "Engine build had errors — services will retry at startup"
    else
        log_warn "build_models.sh not found — engines will be built on first service start"
    fi

    # Remove ONNX files after engines are built (saves ~200MB)
    MODELS_PREFIX="${INSTALL_PREFIX}/models"
    for model_dir in "${MODELS_PREFIX}/oob" "${MODELS_PREFIX}/tool_segmentation" "${MODELS_PREFIX}/sr"; do
        [[ -d "$model_dir" ]] || continue
        for onnx in "$model_dir"/*.onnx; do
            [[ -f "$onnx" ]] || continue
            engine="${onnx%.onnx}.engine"
            if [[ -f "$engine" ]]; then
                rm -f "$onnx"
                log "Removed $(basename "$onnx") (engine built successfully)"
            fi
        done
    done
fi

#-------------------------------------------------------------------------------
# Step 5: Configure dynamic linker
#-------------------------------------------------------------------------------
log_step "Step 5/9: Configuring dynamic linker"

LIBRARY_PATHS=(
    "${INSTALL_PREFIX}/lib"
    "${HOST_HOLOSCAN}/lib"
    "${HOST_CUDA_LIBS}"
    "${HOST_TENSORRT_LIBS}"
    "/usr/local/cuda/targets/sbsa-linux/lib"
)

printf '%s\n' "${LIBRARY_PATHS[@]}" | tee "$LD_CONF_FILE" >/dev/null
ldconfig
log_ok "Wrote ${LD_CONF_FILE} and ran ldconfig"

#-------------------------------------------------------------------------------
# Step 6: Configure host (NvSciIpc, permissions, directories)
#-------------------------------------------------------------------------------
log_step "Step 6/9: Configuring host"

# Recording storage directory
if [[ ! -d /var/recordings ]]; then
    mkdir -p /var/recordings
    chown "${DEPLOY_USER}:${DEPLOY_GROUP}" /var/recordings
    log_ok "Created /var/recordings (owner: ${DEPLOY_USER})"
elif [[ ! -w /var/recordings ]]; then
    chown -R "${DEPLOY_USER}:${DEPLOY_GROUP}" /var/recordings
    log_ok "Fixed /var/recordings ownership"
fi

# CAP_SYS_NICE for real-time scheduling (avoids needing sudo at runtime)
if [[ -x "${INSTALL_PREFIX}/bin/cortex_core_video" ]]; then
    setcap cap_sys_nice=ep "${INSTALL_PREFIX}/bin/cortex_core_video" 2>/dev/null && \
        log_ok "Granted CAP_SYS_NICE to cortex_core_video" || \
        log_warn "Could not set capabilities (setcap not available)"
fi

# NvSciIpc endpoints for multi-process video
# Uses a version marker to detect stale configs and re-provision the full set.
# Bump NVSCI_CFG_VERSION when endpoints change so --update picks up additions.
NVSCI_CFG_VERSION="cortex-nvsci-v3"
if [[ -f "$NVSCIIPC_CFG" ]]; then
    if grep -q "$NVSCI_CFG_VERSION" "$NVSCIIPC_CFG"; then
        log_ok "NvSciIpc endpoints up to date (${NVSCI_CFG_VERSION})"
    else
        # Remove any previous Cortex endpoint block before re-adding
        sed -i '/# Cortex compositor/,/# --- end cortex endpoints ---/d' "$NVSCIIPC_CFG"
        sed -i '/# Cortex source/,/# --- end cortex endpoints ---/d' "$NVSCIIPC_CFG"
        # Also remove individual stale entries that predate the block markers
        sed -i '/nvscistream_10[0-9]/d; /nvscistream_11[01]/d' "$NVSCIIPC_CFG"
        sed -i '/nvscistream_20[0-9]/d; /nvscistream_21[01]/d' "$NVSCIIPC_CFG"
        sed -i '/nvscistream_30[0-9]/d; /nvscistream_31[01]/d' "$NVSCIIPC_CFG"
        sed -i '/nvscistream_40[0-9]/d; /nvscistream_41[01]/d' "$NVSCIIPC_CFG"
        sed -i '/Cortex source capture endpoints/d' "$NVSCIIPC_CFG"
        # Remove stale compositor entries (0-9 range, only the ones we own)
        sed -i '/^INTER_PROCESS.*nvscistream_0 /d; /^INTER_PROCESS.*nvscistream_6 /d; /^INTER_PROCESS.*nvscistream_8 /d' "$NVSCIIPC_CFG"

        cat >> "$NVSCIIPC_CFG" << NVSCIEOF
# Cortex NvSciStream endpoints — ${NVSCI_CFG_VERSION}
# Compositor → downstream services (recorder, teleconf, NDI stream)
INTER_PROCESS   nvscistream_0        nvscistream_1     16  24576
INTER_PROCESS   nvscistream_6        nvscistream_7     16  24576
INTER_PROCESS   nvscistream_8        nvscistream_9     16  24576
# Source A (100-111): display, rec, deid, tseg, tele, ndi
INTER_PROCESS   nvscistream_100      nvscistream_101   16  24576
INTER_PROCESS   nvscistream_102      nvscistream_103   16  24576
INTER_PROCESS   nvscistream_104      nvscistream_105   16  24576
INTER_PROCESS   nvscistream_106      nvscistream_107   16  24576
INTER_PROCESS   nvscistream_108      nvscistream_109   16  24576
INTER_PROCESS   nvscistream_110      nvscistream_111   16  24576
# Source B (200-211)
INTER_PROCESS   nvscistream_200      nvscistream_201   16  24576
INTER_PROCESS   nvscistream_202      nvscistream_203   16  24576
INTER_PROCESS   nvscistream_204      nvscistream_205   16  24576
INTER_PROCESS   nvscistream_206      nvscistream_207   16  24576
INTER_PROCESS   nvscistream_208      nvscistream_209   16  24576
INTER_PROCESS   nvscistream_210      nvscistream_211   16  24576
# Source C (300-311)
INTER_PROCESS   nvscistream_300      nvscistream_301   16  24576
INTER_PROCESS   nvscistream_302      nvscistream_303   16  24576
INTER_PROCESS   nvscistream_304      nvscistream_305   16  24576
INTER_PROCESS   nvscistream_306      nvscistream_307   16  24576
INTER_PROCESS   nvscistream_308      nvscistream_309   16  24576
INTER_PROCESS   nvscistream_310      nvscistream_311   16  24576
# Source D (400-411)
INTER_PROCESS   nvscistream_400      nvscistream_401   16  24576
INTER_PROCESS   nvscistream_402      nvscistream_403   16  24576
INTER_PROCESS   nvscistream_404      nvscistream_405   16  24576
INTER_PROCESS   nvscistream_406      nvscistream_407   16  24576
INTER_PROCESS   nvscistream_408      nvscistream_409   16  24576
INTER_PROCESS   nvscistream_410      nvscistream_411   16  24576
# --- end cortex endpoints ---
NVSCIEOF

        log_ok "Provisioned NvSciIpc endpoints (0-9, 100-411) [${NVSCI_CFG_VERSION}]"
        echo -e "  ${YELLOW}NOTE: Reboot required for NvSciIpc kernel module to load new endpoints${NC}"
        NEEDS_REBOOT=true
    fi
else
    log_warn "${NVSCIIPC_CFG} not found (nvidia-l4t-nvsci not installed?)"
fi

#-------------------------------------------------------------------------------
# Step 7: Install systemd services
#-------------------------------------------------------------------------------
log_step "Step 7/9: Installing systemd services"
{
    LD_LIB_PATH="${INSTALL_PREFIX}/lib:${HOST_HOLOSCAN}/lib:${HOST_CUDA_LIBS}:${HOST_TENSORRT_LIBS}:/usr/local/cuda/targets/sbsa-linux/lib"

    # cortex-casemgmt.service — always-on case database + gRPC server
    tee "${SYSTEMD_DIR}/cortex-casemgmt.service" >/dev/null << SVCEOF
[Unit]
Description=Cortex Case Management
After=network.target
Wants=network-online.target

[Service]
Type=simple
User=${DEPLOY_USER}
Group=${DEPLOY_GROUP}

Environment="LD_LIBRARY_PATH=${LD_LIB_PATH}"
WorkingDirectory=${INSTALL_PREFIX}/bin
ExecStart=${INSTALL_PREFIX}/bin/cortex_core_casemgmt

Restart=always
RestartSec=2s
StartLimitBurst=5
StartLimitIntervalSec=300

PrivateTmp=no

StandardOutput=journal
StandardError=journal
SyslogIdentifier=cortex-casemgmt

[Install]
WantedBy=multi-user.target
SVCEOF
    log_ok "Installed cortex-casemgmt.service"

    # cortex-uploader.service — always-on cloud sync (case upload, heartbeat)
    tee "${SYSTEMD_DIR}/cortex-uploader.service" >/dev/null << SVCEOF
[Unit]
Description=Cortex Case Uploader
After=network-online.target cortex-casemgmt.service
Wants=network-online.target cortex-casemgmt.service

[Service]
Type=simple
User=${DEPLOY_USER}
Group=${DEPLOY_GROUP}

Environment="LD_LIBRARY_PATH=${LD_LIB_PATH}"
WorkingDirectory=${INSTALL_PREFIX}/bin
ExecStart=${INSTALL_PREFIX}/bin/cortex_app_uploader

Restart=always
RestartSec=5s
StartLimitBurst=5
StartLimitIntervalSec=300

PrivateTmp=no

StandardOutput=journal
StandardError=journal
SyslogIdentifier=cortex-uploader

[Install]
WantedBy=multi-user.target
SVCEOF
    log_ok "Installed cortex-uploader.service"

    # Remove old video-core/ui services if they exist (replaced by run_multiprocess.sh)
    for old_svc in cortex-video-core cortex-ui; do
        if [[ -f "${SYSTEMD_DIR}/${old_svc}.service" ]]; then
            systemctl disable "${old_svc}" 2>/dev/null || true
            rm -f "${SYSTEMD_DIR}/${old_svc}.service"
            log_ok "Removed legacy ${old_svc}.service"
        fi
    done

    systemctl daemon-reload
    systemctl enable cortex-casemgmt cortex-uploader 2>/dev/null || true
    systemctl restart cortex-casemgmt cortex-uploader 2>/dev/null || true
    log_ok "systemd services enabled and started"
}

#-------------------------------------------------------------------------------
# Step 8/9: Set up IoT Core (certificate + config)
#-------------------------------------------------------------------------------
# IoT provisioning happens in two ways:
#   a) License key install: Lambda provisions cert + thing server-side, returns
#      cert/key in the redeem response. No AWS CLI needed on device.
#   b) Manual/fallback: Use cortex-device-provisioning IAM user to call AWS CLI.
#
# After provisioning, the device uses ONLY its X.509 cert for all AWS access
# (via IoT Credential Provider). No IAM access keys remain on the device.

log_step "Step 8/9: Setting up IoT Core connectivity"
{  # Step 8 block

    IOT_CERT_DIR="${INSTALL_PREFIX}/certs"

    if [[ ! -f "${IOT_CERT_DIR}/device.pem.crt" ]]; then
        mkdir -p "${IOT_CERT_DIR}"

        # Method A: Check if license key redeem already provided IoT config
        if [[ -n "${IOT_THING_NAME:-}" ]] && [[ -n "${IOT_CERT_PEM:-}" ]]; then
            # Cert + key were returned by Lambda during license key redemption
            printf '%s\n' "$IOT_CERT_PEM" > "${IOT_CERT_DIR}/device.pem.crt"
            printf '%s\n' "$IOT_PRIVATE_KEY" > "${IOT_CERT_DIR}/device.private.key"
            curl -s -o "${IOT_CERT_DIR}/AmazonRootCA1.pem" "${IOT_CA_URL:-https://www.amazontrust.com/repository/AmazonRootCA1.pem}"
            chmod 600 "${IOT_CERT_DIR}/device.private.key"
            chown -R "${DEPLOY_USER}:${DEPLOY_GROUP}" "${IOT_CERT_DIR}"

            # Write IoT config to device.conf
            mkdir -p "${DEVICE_CONF_DIR}"
            [[ -f "${DEVICE_CONF}" ]] && sed -i '/^iot_/d' "${DEVICE_CONF}"
            cat >> "${DEVICE_CONF}" << IOTEOF

# IoT Core
iot_endpoint=${IOT_ENDPOINT:-iot.verussurgical.com}
iot_thing_name=${IOT_THING_NAME}
iot_cert_path=${IOT_CERT_DIR}/device.pem.crt
iot_key_path=${IOT_CERT_DIR}/device.private.key
iot_ca_path=${IOT_CERT_DIR}/AmazonRootCA1.pem
iot_credential_endpoint=${IOT_CRED_ENDPOINT:-cj9ykke9k8lm6.credentials.iot.eu-central-1.amazonaws.com}
iot_role_alias=${IOT_ROLE_ALIAS:-cortex-device-s3-alias}
IOTEOF
            log_ok "IoT configured from license key (thing: ${IOT_THING_NAME})"

        else
            # Method B: Use cortex-device-provisioning IAM user via AWS CLI
            AWS_CLI=""
            for p in /usr/local/bin/aws /usr/bin/aws /home/*/.local/bin/aws; do
                [[ -x "$p" ]] && AWS_CLI="$p" && break
            done

            if [[ -z "$AWS_CLI" ]]; then
                log_warn "AWS CLI not found — skipping IoT setup"
            else
                REAL_HOME=$(eval echo "~${DEPLOY_USER}")
                AWS_CREDS_FILE="${REAL_HOME}/.aws/credentials"

                # Use the provisioning-only profile (minimal permissions)
                IOT_PROFILE=""
                for profile in cortex-device-provisioning cortex-teleconf-dev; do
                    if grep -q "\[${profile}\]" "$AWS_CREDS_FILE" 2>/dev/null; then
                        IOT_PROFILE="$profile"
                        break
                    fi
                done

                if [[ -z "$IOT_PROFILE" ]]; then
                    log_warn "No provisioning AWS profile found — skipping IoT setup"
                else
                    THING_NAME=""
                    [[ -f "${DEVICE_CONF}" ]] && THING_NAME=$(grep '^device_id=' "${DEVICE_CONF}" 2>/dev/null | cut -d= -f2)
                    [[ -z "$THING_NAME" ]] && THING_NAME="cortex-$(hostname -s | tr -dc 'a-zA-Z0-9_-')"

                    log "Provisioning IoT thing: ${THING_NAME} (profile: ${IOT_PROFILE})"

                    export AWS_SHARED_CREDENTIALS_FILE="$AWS_CREDS_FILE"
                    export AWS_CONFIG_FILE="${REAL_HOME}/.aws/config"

                    CERT_JSON=$("$AWS_CLI" --profile "$IOT_PROFILE" iot create-keys-and-certificate \
                        --set-as-active \
                        --certificate-pem-outfile "${IOT_CERT_DIR}/device.pem.crt" \
                        --public-key-outfile "${IOT_CERT_DIR}/device.public.key" \
                        --private-key-outfile "${IOT_CERT_DIR}/device.private.key" \
                        --region eu-central-1 2>/dev/null) || true

                    CERT_ARN=$(echo "$CERT_JSON" | grep -o '"certificateArn"[[:space:]]*:[[:space:]]*"[^"]*"' | grep -o '"arn:[^"]*"' | tr -d '"')

                    if [[ -f "${IOT_CERT_DIR}/device.pem.crt" ]] && [[ -n "$CERT_ARN" ]]; then
                        chmod 600 "${IOT_CERT_DIR}/device.private.key"
                        chown -R "${DEPLOY_USER}:${DEPLOY_GROUP}" "${IOT_CERT_DIR}"
                        curl -s -o "${IOT_CERT_DIR}/AmazonRootCA1.pem" https://www.amazontrust.com/repository/AmazonRootCA1.pem

                        "$AWS_CLI" --profile "$IOT_PROFILE" iot create-thing \
                            --thing-name "${THING_NAME}" --thing-type-name cortex-device \
                            --region eu-central-1 2>/dev/null || true
                        "$AWS_CLI" --profile "$IOT_PROFILE" iot attach-policy \
                            --policy-name cortex-device-policy --target "${CERT_ARN}" \
                            --region eu-central-1 2>/dev/null || true
                        "$AWS_CLI" --profile "$IOT_PROFILE" iot attach-thing-principal \
                            --thing-name "${THING_NAME}" --principal "${CERT_ARN}" \
                            --region eu-central-1 2>/dev/null || true

                        mkdir -p "${DEVICE_CONF_DIR}"
                        [[ -f "${DEVICE_CONF}" ]] && sed -i '/^iot_/d' "${DEVICE_CONF}"
                        cat >> "${DEVICE_CONF}" << IOTEOF

# IoT Core
iot_endpoint=iot.verussurgical.com
iot_thing_name=${THING_NAME}
iot_cert_path=${IOT_CERT_DIR}/device.pem.crt
iot_key_path=${IOT_CERT_DIR}/device.private.key
iot_ca_path=${IOT_CERT_DIR}/AmazonRootCA1.pem
iot_credential_endpoint=cj9ykke9k8lm6.credentials.iot.eu-central-1.amazonaws.com
iot_role_alias=cortex-device-s3-alias
IOTEOF
                        log_ok "IoT provisioned via CLI (thing: ${THING_NAME})"
                    else
                        log_warn "IoT certificate generation failed"
                        rm -f "${IOT_CERT_DIR}/device.pem.crt" "${IOT_CERT_DIR}/device.public.key" \
                              "${IOT_CERT_DIR}/device.private.key" 2>/dev/null
                    fi

                    unset AWS_SHARED_CREDENTIALS_FILE AWS_CONFIG_FILE
                fi
            fi
        fi
    else
        log_ok "IoT certificate already exists"
    fi
}  # end Step 8 block

#-------------------------------------------------------------------------------
# Step 9/9: Install remote desktop (x11vnc + websockify)
#-------------------------------------------------------------------------------
log_step "Step 9/9: Installing remote desktop services (x11vnc + websockify)"
# Check both the installer's directory and the installed bin/ for the VNC script
INSTALL_VNC=""
for p in "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/install_vnc.sh" "${INSTALL_PREFIX}/bin/install_vnc.sh"; do
    [[ -x "$p" ]] && INSTALL_VNC="$p" && break
done
if [[ -n "$INSTALL_VNC" ]]; then
    "$INSTALL_VNC"
    log_ok "Remote desktop services installed"
else
    log_warn "install_vnc.sh not found — remote desktop not set up"
fi

#-------------------------------------------------------------------------------
# Optional: Build TensorRT engines
#-------------------------------------------------------------------------------
if $BUILD_ENGINES; then
    log_step "Building TensorRT engines (this may take 5-10 minutes)..."
    BUILD_MODELS=""
    for p in "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/build_models.sh" "${INSTALL_PREFIX}/bin/build_models.sh"; do
        [[ -x "$p" ]] && BUILD_MODELS="$p" && break
    done
    if [[ -n "$BUILD_MODELS" ]]; then
        "$BUILD_MODELS"
    else
        log_warn "build_models.sh not found — run it manually after install"
    fi
fi

#-------------------------------------------------------------------------------
# Optional: Kiosk mode
#-------------------------------------------------------------------------------
if ${SETUP_KIOSK:-false}; then
    log_step "Setting up kiosk mode..."

    # Install gnome-kiosk-script-session — lightweight single-app compositor
    # that replaces gnome-shell. Runs ~/.local/bin/gnome-kiosk-script as the
    # only application. No shell chrome, no taskbar, no Activities.
    if ! dpkg -s gnome-kiosk-script-session &>/dev/null; then
        log "Installing gnome-kiosk-script-session..."
        apt-get install -y -qq gnome-kiosk-script-session
    fi

    # Create the kiosk launch script at the path gnome-kiosk-script-session expects
    KIOSK_USER_HOME=$(eval echo "~${DEPLOY_USER}")
    KIOSK_SCRIPT="${KIOSK_USER_HOME}/.local/bin/gnome-kiosk-script"
    mkdir -p "$(dirname "$KIOSK_SCRIPT")"
    cat > "$KIOSK_SCRIPT" << 'KIOSKEOF'
#!/usr/bin/env bash
# Cortex Kiosk — launched by gnome-kiosk-script-session as the single app.
# Runs the multiprocess pipeline. When the Qt UI quits (quit button),
# the pipeline stops and the session ends (returns to GDM login).

export PATH="/opt/verus/bin:/usr/local/bin:/usr/bin:/bin"

# Suppress GNOME notifications and prevent app-not-responding dialog
gsettings set org.gnome.desktop.notifications show-banners false 2>/dev/null || true
gsettings set org.gnome.mutter check-alive-timeout 30000 2>/dev/null || true

# Disable screen lock and idle timeout
gsettings set org.gnome.desktop.screensaver lock-enabled false 2>/dev/null || true
gsettings set org.gnome.desktop.session idle-delay 0 2>/dev/null || true

# Run the pipeline — blocks until user quits via the UI
/opt/verus/bin/run_multiprocess.sh --source v4l2

# When the pipeline exits, end the session (returns to GDM)
KIOSKEOF
    chmod +x "$KIOSK_SCRIPT"
    chown "${DEPLOY_USER}:${DEPLOY_GROUP}" "$KIOSK_SCRIPT"
    chown "${DEPLOY_USER}:${DEPLOY_GROUP}" "$(dirname "$KIOSK_SCRIPT")"

    # Lock down keyboard shortcuts and desktop via dconf system-wide defaults
    mkdir -p /etc/dconf/db/local.d /etc/dconf/db/local.d/locks /etc/dconf/profile
    cat > /etc/dconf/db/local.d/01-cortex-kiosk << 'EOF'
[org/gnome/desktop/lockdown]
disable-command-line=true
disable-log-out=true

[org/gnome/desktop/notifications]
show-banners=false

[org/gnome/desktop/screensaver]
lock-enabled=false

[org/gnome/desktop/session]
idle-delay=uint32 0

[org/gnome/desktop/wm/keybindings]
close=@as []
switch-applications=@as []
switch-windows=@as []
panel-main-menu=@as []
minimize=@as []
maximize=@as []

[org/gnome/shell/keybindings]
toggle-overview=@as []

[org/gnome/settings-daemon/plugins/media-keys]
logout=@as []
EOF

    # Lock these keys so users can't override them
    cat > /etc/dconf/db/local.d/locks/cortex-kiosk << 'EOF'
/org/gnome/desktop/lockdown/disable-command-line
/org/gnome/desktop/lockdown/disable-log-out
/org/gnome/desktop/wm/keybindings/close
/org/gnome/desktop/wm/keybindings/switch-applications
/org/gnome/shell/keybindings/toggle-overview
EOF

    # dconf needs a profile to read local.d
    if ! grep -q "system-db:local" /etc/dconf/profile/user 2>/dev/null; then
        cat > /etc/dconf/profile/user << 'EOF'
user-db:user
system-db:local
EOF
    fi
    dconf update 2>/dev/null || true

    # Configure GDM for auto-login into the kiosk session
    GDM_CONF="/etc/gdm3/custom.conf"
    if [[ -f "$GDM_CONF" ]]; then
        # Set auto-login
        sed -i '/^\[daemon\]/,/^\[/ {
            s/^#*AutomaticLoginEnable=.*/AutomaticLoginEnable=true/
            s/^#*AutomaticLogin=.*/AutomaticLogin='"$DEPLOY_USER"'/
        }' "$GDM_CONF"
        # If AutomaticLoginEnable not present, add it
        if ! grep -q "AutomaticLoginEnable" "$GDM_CONF"; then
            sed -i '/^\[daemon\]/a AutomaticLoginEnable=true\nAutomaticLogin='"$DEPLOY_USER"'' "$GDM_CONF"
        fi
    fi

    # Set the default session for the user
    # Use xorg variant since WaylandEnable=false on these devices
    ACCOUNTSSERVICE_DIR="/var/lib/AccountsService/users"
    mkdir -p "$ACCOUNTSSERVICE_DIR"
    cat > "${ACCOUNTSSERVICE_DIR}/${DEPLOY_USER}" << 'EOF'
[User]
Session=gnome-kiosk-script-xorg
SystemAccount=false
EOF

    log_ok "Kiosk mode configured"
    log "  Auto-login: $DEPLOY_USER → Cortex Kiosk session"
    log "  Admin escape: reboot, click gear icon at GDM, select 'Ubuntu'"
    log "  Disable kiosk: sudo rm /usr/share/xsessions/cortex-kiosk.desktop"
    NEEDS_REBOOT=true
fi

#-------------------------------------------------------------------------------
# Cleanup
#-------------------------------------------------------------------------------
# Only remove the ephemeral staging dir — $CACHE_DIR holds verified tarballs
# that the next install run can reuse to skip multi-GB downloads.
rm -rf "$DOWNLOAD_DIR"
log_ok "Cleaned up staging dir (cache preserved at ${CACHE_DIR})"

#-------------------------------------------------------------------------------
# Summary
#-------------------------------------------------------------------------------
echo ""
echo -e "${BOLD}════════════════════════════════════════════${NC}"
echo -e "${BOLD}  Cortex Installation Complete${NC}"
echo -e "${BOLD}════════════════════════════════════════════${NC}"
echo ""
echo -e "  Install prefix:  ${GREEN}${INSTALL_PREFIX}${NC}"

if [[ -d "${INSTALL_PREFIX}/bin" ]]; then
    echo "  Binaries:"
    for bin in "${INSTALL_PREFIX}/bin/"cortex*; do
        [[ -x "$bin" ]] && echo "    $(basename "$bin")"
    done
fi

echo ""

if $NEEDS_REBOOT; then
    echo -e "  ${YELLOW}${BOLD}⚠  REBOOT REQUIRED${NC}"
    echo -e "  ${YELLOW}NvSciIpc endpoints were added. The kernel module${NC}"
    echo -e "  ${YELLOW}reads /etc/nvsciipc.cfg only at boot.${NC}"
    echo -e "  ${YELLOW}Run: sudo reboot${NC}"
    echo ""
fi

# Verify services are running
for svc in cortex-casemgmt cortex-uploader; do
    if systemctl is-active "$svc" &>/dev/null; then
        echo -e "  ${GREEN}✓${NC} ${svc} running"
    elif systemctl is-enabled "$svc" &>/dev/null; then
        echo -e "  ${YELLOW}!${NC} ${svc} enabled but not running"
    fi
done
echo ""

echo -e "  ${BOLD}Background services (auto-start on boot):${NC}"
echo "    systemctl status cortex-casemgmt cortex-uploader"
echo ""
echo -e "  ${BOLD}Start the video pipeline + UI:${NC}"
echo "    ${INSTALL_PREFIX}/bin/run_multiprocess.sh"
echo ""
echo -e "  ${BOLD}View logs:${NC}"
echo "    journalctl -u cortex-casemgmt -f"
echo "    journalctl -u cortex-uploader -f"

if [[ ! -f "${INSTALL_PREFIX}/models/oob/anonymization_model.engine" ]] 2>/dev/null; then
    echo ""
    echo -e "  ${YELLOW}TensorRT engines not built yet. Run:${NC}"
    echo "    sudo ./scripts/build_models.sh"
fi
