#!/bin/bash
# ai_jail.sh - Minimal mount namespace jail for Softaculous Code with AI
#
# Usage: ai_jail.sh <homedir> [-- <command...>]
#
# Strategy:
#   - Resolve Softaculous base directory from script location (portable across panels)
#   - Verify the caller is the owner of the requested home directory
#   - Create a per-user temp directory under <basedir>/tmp/jail/<username>/
#   - Create a private user+mount namespace (unshare --user --map-root-user --mount)
#   - After mount setup, drop ALL capabilities using capsh
#   - The process runs as UID 0 inside but has NO capabilities (powerless root)
#   - On the host, files written appear as the real user (mapped via --map-root-user)
#   - Save the real home by bind-mounting it to a reference in the per-user temp dir
#   - Mount a tmpfs over the parent dir to hide other users' homes
#   - Bind-mount only the current user's home back into the isolated parent directory
#   - Block other home prefixes, sensitive system files and directories
#   - Execute the command with all capabilities dropped

HOMEDIR="$1"
shift

if [ "$1" = "--" ]; then
    shift
fi

if [ -z "$HOMEDIR" ] || [ $# -eq 0 ]; then
    echo "Usage: ai_jail.sh <homedir> [-- <command...>]" >&2
    exit 1
fi

HOMEDIR=$(realpath -m "$HOMEDIR" 2>/dev/null || readlink -f "$HOMEDIR" 2>/dev/null || echo "$HOMEDIR")

if [ -z "$HOMEDIR" ] || [ "$HOMEDIR" = "/" ]; then
    echo "Refusing to jail root directory" >&2
    exit 1
fi

if [ ! -d "$HOMEDIR" ]; then
    echo "Home directory does not exist: $HOMEDIR" >&2
    exit 1
fi

# SECURITY: Check that user namespaces are enabled on the host.
# On AlmaLinux/CloudLinux v9, this defaults to 0 and must be increased.
# This check runs only on the first invocation (outside the namespace).
if [ -z "${AI_JAIL_NS:-}" ]; then
    MAX_NS=$(cat /proc/sys/user/max_user_namespaces 2>/dev/null || echo 0)
    if [ "$MAX_NS" -eq 0 ] 2>/dev/null; then
        echo "Error: user.max_user_namespaces is 0. User namespaces are disabled." >&2
        echo "Please run: sysctl user.max_user_namespaces=15000" >&2
        exit 1
    fi
fi

# SECURITY: Verify the caller is the owner of the requested home directory.
# This check runs only on the first invocation (outside the namespace).
# Inside the namespace, id -u returns 0 due to --map-root-user.
CALLER_UID=$(id -u)
CALLER_NAME=$(id -un 2>/dev/null || echo "")
if [ -z "${AI_JAIL_NS:-}" ]; then
    # First invocation - outside the namespace
    HOME_OWNER_STAT=$(stat -c '%U' "$HOMEDIR" 2>/dev/null)
    if [ "$HOME_OWNER_STAT" != "$CALLER_NAME" ]; then
        echo "Permission denied: $HOMEDIR is not owned by $CALLER_NAME" >&2
        exit 1
    fi
    # The jail must not be run as root on the host
    if [ "$CALLER_UID" -eq 0 ]; then
        echo "Error: Jail must not be run as root. Run as the actual user." >&2
        exit 1
    fi
fi

# Resolve paths relative to the Softaculous installation directory.
BIN_DIR="$(cd "$(dirname "$0")" && pwd)"
AI_BASEDIR="$(cd "$BIN_DIR/.." && pwd)"
AI_TMPDIR="$AI_BASEDIR/tmp/jail"

if [ ! -d "$AI_TMPDIR" ]; then
    echo "Error: Jail temp directory not found: $AI_TMPDIR" >&2
    echo "Please ensure the Softaculous installer has created this directory." >&2
    exit 1
fi

HOMEPARENT=$(dirname "$HOMEDIR")
HOMEBASE=$(basename "$HOMEDIR")

if [ -z "${AI_JAIL_NS:-}" ]; then
    # First invocation (outside namespace) - set up and re-exec
    AI_USER_DIR="$AI_TMPDIR/$CALLER_NAME"
    if [ ! -d "$AI_USER_DIR" ]; then
        mkdir -p "$AI_USER_DIR" 2>/dev/null || {
            echo "Error: Cannot create user jail directory: $AI_USER_DIR" >&2
            exit 1
        }
        chown "$CALLER_NAME" "$AI_USER_DIR" 2>/dev/null || true
        chmod 700 "$AI_USER_DIR" 2>/dev/null || true
    fi

    exec unshare --user --map-root-user --mount --pid --fork --propagation private \
        env AI_JAIL_NS=1 AI_REAL_CALLER="$CALLER_NAME" AI_REAL_UID="$CALLER_UID" \
        AI_USER_DIR="$AI_USER_DIR" \
        AI_HOMEPARENT="$HOMEPARENT" AI_HOMEBASE="$HOMEBASE" \
        "$0" "$HOMEDIR" -- "$@"
fi

# Re-read env vars passed from the parent process
CALLER_NAME="${AI_REAL_CALLER:-$CALLER_NAME}"
CALLER_UID="${AI_REAL_UID:-$CALLER_UID}"
AI_USER_DIR="${AI_USER_DIR:-$AI_TMPDIR/$CALLER_NAME}"
HOMEPARENT="${AI_HOMEPARENT:-$(dirname "$HOMEDIR")}"
HOMEBASE="${AI_HOMEBASE:-$(basename "$HOMEDIR")}"

# Save a bind-mount reference to the real home directory.
SAVED=$(mktemp -d "$AI_USER_DIR/ref_XXXXXX")
mount --bind "$HOMEDIR" "$SAVED"

# Mount a tmpfs over the parent directory, hiding all other users' homes
mount -t tmpfs -o size=1m,mode=0755 ai_jail_tmpfs "$HOMEPARENT"

# Create mount point and bind-mount the saved home back
mkdir -p "$HOMEPARENT/$HOMEBASE"
mount --bind "$SAVED" "$HOMEPARENT/$HOMEBASE"

# Block other common home directory prefixes
for prefix in /home2 /home3 /home4; do
    if [ -d "$prefix" ] && [ "$HOMEPARENT" != "$prefix" ]; then
        mount -t tmpfs -o size=1m,mode=0000 ai_jail_block "$prefix" 2>/dev/null
    fi
done
if [ -d "/var/www/vhosts" ] && [ "$HOMEPARENT" != "/var/www/vhosts" ]; then
    mount -t tmpfs -o size=1m,mode=0000 ai_jail_block "/var/www/vhosts" 2>/dev/null
fi

# Filter /etc/passwd to show only: root, nobody, the current user, and UIDs < 100
FILTERED_PASSWD=$(mktemp "$AI_USER_DIR/passwd_XXXXXX")
awk -v user="$CALLER_NAME" -F: '{
    uid = $3;
    if (uid < 100 || $1 == "root" || $1 == "nobody" || $1 == user) print $0
}' /etc/passwd > "$FILTERED_PASSWD"
mount --bind "$FILTERED_PASSWD" /etc/passwd
rm -f "$FILTERED_PASSWD"

# Filter /etc/group similarly
FILTERED_GROUP=$(mktemp "$AI_USER_DIR/group_XXXXXX")
awk -v user="$CALLER_NAME" -F: '{
    gid = $3;
    if (gid < 100 || $1 == "root" || $1 == "nobody" || $1 == user) print $0
}' /etc/group > "$FILTERED_GROUP"
mount --bind "$FILTERED_GROUP" /etc/group
rm -f "$FILTERED_GROUP"

# Block passwd/group backups
for f in /etc/passwd- /etc/group- /etc/shadow- /etc/gshadow- /etc/subuid /etc/subuid- /etc/subgid /etc/subgid-; do
    [ -e "$f" ] && mount --bind /dev/null "$f" 2>/dev/null || true
done

# Block auth/credential files
for f in /etc/shadow /etc/gshadow; do
    [ -e "$f" ] && mount --bind /dev/null "$f" 2>/dev/null || true
done

# Block SSH config
[ -d /etc/ssh ] && mount -t tmpfs -o size=1m,mode=0000 ai_jail_block /etc/ssh 2>/dev/null

# Block system logs
for d in /var/log /var/log/anaconda /var/log/chrony /var/log/cron /var/log/cups /var/log/exim /var/log/firewalld /var/log/gdm /var/log/imunify360 /var/log/mail /var/log/private /var/log/samba /var/log/sssd; do
    [ -d "$d" ] && mount -t tmpfs -o size=1m,mode=0000 ai_jail_block "$d" 2>/dev/null
done

# Block mailbox listings
for d in /var/mail /var/spool/mail; do
    [ -d "$d" ] && mount -t tmpfs -o size=1m,mode=0000 ai_jail_block "$d" 2>/dev/null
done

# Block network/system config
for f in /etc/hosts /etc/resolv.conf /etc/fstab /etc/crontab /etc/machine-id /etc/hostname; do
    [ -e "$f" ] && mount --bind /dev/null "$f" 2>/dev/null || true
done

# Block sensitive config dirs
for d in /etc/security /etc/pam.d /etc/selinux /etc/NetworkManager /etc/yum.repos.d /etc/apt /etc/systemd /etc/cron.d /etc/cron.daily /etc/cron.hourly /etc/cron.weekly /etc/cron.monthly; do
    [ -d "$d" ] && mount -t tmpfs -o size=1m,mode=0000 ai_jail_block "$d" 2>/dev/null
done

# Block DNS/server config
for f in /etc/named.conf /etc/my.cnf; do
    [ -e "$f" ] && mount --bind /dev/null "$f" 2>/dev/null || true
done
for d in /etc/mysql /var/named; do
    [ -d "$d" ] && mount -t tmpfs -o size=1m,mode=0000 ai_jail_block "$d" 2>/dev/null
done

# Remount /proc for the PID namespace so only namespace-local PIDs are visible.
# Without this, the host's /proc is still visible and exposes host process info.
mount -t proc proc /proc 2>/dev/null || true

# Block /proc info files
for f in /proc/version /proc/cpuinfo /proc/meminfo /proc/cmdline /proc/loadavg /proc/uptime; do
    [ -e "$f" ] && mount --bind /dev/null "$f" 2>/dev/null || true
done
# Block /proc/sys
[ -d /proc/sys ] && mount -t tmpfs -o size=1m,mode=0000 ai_jail_block /proc/sys 2>/dev/null

# Clean up temp reference mount
umount "$SAVED" 2>/dev/null || true
rmdir "$SAVED" 2>/dev/null || true

cd "$HOMEDIR" 2>/dev/null || cd /tmp

# Drop ALL capabilities so the process has no special privileges inside the namespace.
# With --map-root-user, the process is UID 0 inside the namespace but maps to the
# real user on the host. Dropping all caps ensures it cannot abuse namespace-local
# root for privilege escalation. The process appears as root inside the namespace
# but on the host, all file operations use the real user's UID.
CAPS_DROP="cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,"
CAPS_DROP="${CAPS_DROP}cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,"
CAPS_DROP="${CAPS_DROP}cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,"
CAPS_DROP="${CAPS_DROP}cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,"
CAPS_DROP="${CAPS_DROP}cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,"
CAPS_DROP="${CAPS_DROP}cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,"
CAPS_DROP="${CAPS_DROP}cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,"
CAPS_DROP="${CAPS_DROP}cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,"
CAPS_DROP="${CAPS_DROP}cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read,"
CAPS_DROP="${CAPS_DROP}cap_perfmon,cap_bpf,cap_checkpoint_restore"

# Generate a safe command string from the arguments for capsh -c
CMD_ARGS=""
for arg in "$@"; do
    CMD_ARGS="$CMD_ARGS $(printf '%q' "$arg")"
done

if command -v capsh >/dev/null 2>&1; then
    exec capsh --drop="$CAPS_DROP" -- -c "$CMD_ARGS"
fi

exec "$@"