Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion scripts/install-k8s.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# $env:SERVERS = "1" default: 1 (control-plane nodes)
# $env:AGENTS = "1" default: 1 (worker nodes)
# $env:K8S_VERSION = "v1.29.4-k3s1" default: latest
# $env:HOST_DATA_DIR = "C:\data" default: $env:USERPROFILE\.tracebloc
# $env:HOST_DATA_DIR = "C:\data" default: $env:USERPROFILE\.tracebloc (LOCAL disk; no NFS/UNC)
# $env:CLIENT_ENV = "dev" optional; if not set, CLIENT_ENV is not added to env in values
# =============================================================================

Expand Down Expand Up @@ -1409,6 +1409,21 @@ function Get-PfFreeGb {
} catch { return $null }
}

# "network" if $HOST_DATA_DIR is on a UNC path or a mapped network drive
# (Win32_LogicalDisk DriveType 4); "local" otherwise; $null if undeterminable
# (e.g. non-Windows under Pester - tests mock this). Mirrors preflight.sh
# _pf_storage_type: MySQL/InnoDB corrupts or crash-loops on network storage.
function Get-PfFsType {
try {
if ($HOST_DATA_DIR -like '\\*') { return "network" } # UNC path (\\server\share)
$qualifier = (Split-Path -Qualifier $HOST_DATA_DIR -ErrorAction SilentlyContinue)
if (-not $qualifier) { return "local" } # no drive letter, not UNC
$d = Get-CimInstance Win32_LogicalDisk -Filter "DeviceID='$qualifier'" -ErrorAction Stop
if ($d.DriveType -eq 4) { return "network" } # DriveType 4 = network drive
return "local"
} catch { return $null }
}

# Memory/CPU as the container runtime sees it (the Docker Desktop / WSL2 VM budget,
# which is what the pods actually get — smaller than the host). $null if the daemon
# is down or the value is junk, so callers fall back to the host (CIM) reader.
Expand Down Expand Up @@ -1488,6 +1503,22 @@ function Test-Preflight {
elseif ($disk -lt $warnDiskGb) { Warn "Disk: $disk GB free - recommended >= $warnDiskGb GB; images + data may fill it." }
else { Ok "Disk: $disk GB free" }

# Network-FS guard: MySQL/InnoDB corrupts or crash-loops on NFS/CIFS/SMB. Fail
# fast instead of a cryptic CrashLoopBackOff ~20 min in. (Mirrors preflight.sh.)
$fs = Get-PfFsType
if ($null -eq $fs) { Info "Storage: filesystem type undetermined; assuming local." }
elseif ($fs -eq "network") {
if ($env:TRACEBLOC_ALLOW_NETWORK_FS) {
Warn "Storage: $HOST_DATA_DIR is on a network filesystem - proceeding (TRACEBLOC_ALLOW_NETWORK_FS set); the client database may corrupt or crash-loop."
} else {
Write-PfFail "Storage: $HOST_DATA_DIR is on a network filesystem - the tracebloc client database (MySQL/InnoDB) corrupts or crash-loops on network storage."
$hardFail++
Hint "Fix: point HOST_DATA_DIR at a LOCAL disk (the default $env:USERPROFILE\.tracebloc is local)."
Hint " (or set `$env:TRACEBLOC_ALLOW_NETWORK_FS=1 to proceed anyway - not recommended for the database.)"
}
}
else { Ok "Storage: $HOST_DATA_DIR local disk" }

Info "Checking outbound connectivity to required services..."
$backendHost = (Get-BackendUrl) -replace '^https?://','' -replace '/$',''
$criticals = @(
Expand Down
2 changes: 2 additions & 0 deletions scripts/lib/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,8 @@ Advanced configuration (environment variables):
AGENTS Worker nodes (default: 1)
K8S_VERSION k3s image tag (default: v1.29.4-k3s1)
HOST_DATA_DIR Persistent data directory (default: ~/.tracebloc)
Must be on a LOCAL disk — NFS/CIFS/SMB is rejected (the database
corrupts on network storage). TRACEBLOC_ALLOW_NETWORK_FS=1 overrides.

Windows:
irm https://raw.githubusercontent.com/tracebloc/client/main/scripts/install.ps1 | iex
Expand Down
59 changes: 59 additions & 0 deletions scripts/lib/preflight.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# Escape hatches:
# TRACEBLOC_SKIP_PREFLIGHT=1 skip all checks
# TRACEBLOC_ALLOW_ARM64=1 proceed on arm64 despite amd64-only images
# TRACEBLOC_ALLOW_NETWORK_FS=1 proceed when HOST_DATA_DIR is on NFS/CIFS/SMB (DB may corrupt)
# PF_MIN_MEM_GB / PF_MIN_CPU / PF_MIN_DISK_GB lower the hard floors (CI / odd sites)
#
# This file is side-effect-safe to source (defaults + function defs only).
Expand Down Expand Up @@ -58,6 +59,32 @@ _pf_probe_url() {
# Free space in KB on the filesystem holding $1.
_pf_free_kb() { df -Pk "$1" 2>/dev/null | awk 'NR==2 {print $4}'; }

# Filesystem type holding $1, lower-cased (e.g. ext4, xfs, apfs, overlay, nfs,
# nfs4, cifs, smbfs), or empty if undeterminable. $1 may not exist yet at
# preflight, so walk up to the nearest existing parent. Tries findmnt (util-linux,
# bind-mount aware), then GNU `stat -f` (Linux only — BSD/macOS `stat -f` means
# "format string", not filesystem), then df+mount (portable, incl. macOS).
_pf_fstype() {
local p="$1" parent t="" mp
while [[ -n "$p" && ! -e "$p" ]]; do
parent="$(dirname "$p")"
[[ "$parent" == "$p" ]] && break
p="$parent"
done
[[ -z "$p" || ! -e "$p" ]] && return 0
if has findmnt; then
t="$(findmnt -nro FSTYPE --target "$p" 2>/dev/null | head -1)"
fi
if [[ -z "$t" && "$OS" != "Darwin" ]]; then
t="$(stat -f -c '%T' "$p" 2>/dev/null)"
fi
if [[ -z "$t" ]] && has df; then
mp="$(df "$p" 2>/dev/null | awk 'NR>1 && $NF ~ /^\// {print $NF}' | tail -1)"
[[ -n "$mp" ]] && t="$(mount 2>/dev/null | awk -v m="$mp" 'index($0," on "m" (")>0 {sub(/.* \(/,""); sub(/[,)].*/,""); print; exit}')"
fi
printf '%s' "$t" | tr '[:upper:]' '[:lower:]'
}

# Memory/CPU as the CONTAINER RUNTIME sees it (the budget the pods actually get).
# On Docker Desktop / Colima / WSL2 this is the VM's allocation — smaller than the
# host and the number that matters (a 36 GB Mac can cap its Docker VM at 4 GB). Echo
Expand Down Expand Up @@ -255,6 +282,37 @@ _pf_disk() {
return 0
}

# Network-filesystem guard for HOST_DATA_DIR. MySQL/InnoDB corrupts or crash-loops
# on NFS/CIFS/SMB (broken POSIX locking + unsafe O_DIRECT/fsync), and the chart's
# root chown init-container is blocked by NFS root_squash — so a network data dir
# fails ~20 min in with a cryptic CrashLoopBackOff. Catch it in seconds here.
_pf_storage_type() {
local target fstype
target="${HOST_DATA_DIR:-$HOME/.tracebloc}"
fstype="$(_pf_fstype "$target")"
if [[ -z "$fstype" ]]; then
info "Storage: ${target} — filesystem type undetermined; assuming local."
return 0
fi
case "$fstype" in
nfs|nfs3|nfs4|nfsd|cifs|smb|smbfs|smb2|smb3|afpfs|9p|ncpfs|gfs|gfs2|ocfs2|lustre|glusterfs|fuse.glusterfs|ceph|fuse.ceph|beegfs|fuse.sshfs|fuse.s3fs|davfs|fuse.davfs|webdav|fuse.rclone)
if [[ -n "${TRACEBLOC_ALLOW_NETWORK_FS:-}" ]]; then
warn "Storage: ${target} is on a network filesystem (${fstype}) — proceeding (TRACEBLOC_ALLOW_NETWORK_FS set); the client database may corrupt or crash-loop on network storage."
return 0
fi
_pf_fail_line "Storage: ${target} is on a network filesystem (${fstype}) — the tracebloc client database (MySQL/InnoDB) corrupts or crash-loops on network storage, and NFS root_squash blocks data-dir setup."
PF_HARD_FAIL=$(( ${PF_HARD_FAIL:-0} + 1 ))
hint "Fix: point HOST_DATA_DIR at a LOCAL disk — the default ~/.tracebloc is local:"
hint " HOST_DATA_DIR=\"\$HOME/.tracebloc\" ./install-k8s.sh"
hint " (or set TRACEBLOC_ALLOW_NETWORK_FS=1 to proceed anyway — not recommended for the database.)"
;;
*)
success "Storage: ${target} (${fstype})"
;;
esac
return 0
}

_pf_connectivity() {
info "Checking outbound connectivity to required services..."
# Can't probe without curl — and on the direct ./install-k8s.sh path the
Expand Down Expand Up @@ -330,6 +388,7 @@ run_preflight() {
_pf_cpu || true
_pf_memory || true
_pf_disk || true
_pf_storage_type || true
_pf_connectivity || true

if [[ "$PF_HARD_FAIL" -gt 0 ]]; then
Expand Down
33 changes: 32 additions & 1 deletion scripts/tests/install-k8s.Tests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,9 @@ Describe "Test-Preflight" {
Mock Err { throw "preflight-failed" } # Err exits; make it throwable to assert
Mock Get-PfCpu { 4 }; Mock Get-PfMemGb { 8 }; Mock Get-PfFreeGb { 50 }
Mock Get-WindowsArch { "amd64" }
Mock Get-PfFsType { "local" }
}
AfterEach { $env:TRACEBLOC_SKIP_PREFLIGHT = $null; $env:TRACEBLOC_ALLOW_ARM64 = $null }
AfterEach { $env:TRACEBLOC_SKIP_PREFLIGHT = $null; $env:TRACEBLOC_ALLOW_ARM64 = $null; $env:TRACEBLOC_ALLOW_NETWORK_FS = $null }

It "healthy environment -> does not throw" {
Mock Test-PfUrl { "ok" }
Expand Down Expand Up @@ -493,6 +494,36 @@ Describe "Test-Preflight" {
{ Test-Preflight } | Should -Not -Throw
$env:PF_MIN_MEM_GB = $null
}
It "network filesystem (HOST_DATA_DIR on NFS/UNC) -> fails (Err throws)" {
Mock Test-PfUrl { "ok" }; Mock Get-PfFsType { "network" }
{ Test-Preflight } | Should -Throw
}
It "network filesystem + TRACEBLOC_ALLOW_NETWORK_FS -> does not throw" {
Mock Test-PfUrl { "ok" }; Mock Get-PfFsType { "network" }
$env:TRACEBLOC_ALLOW_NETWORK_FS = "1"
{ Test-Preflight } | Should -Not -Throw
}
It "undetermined filesystem type -> does not throw (assume local)" {
Mock Test-PfUrl { "ok" }; Mock Get-PfFsType { $null }
{ Test-Preflight } | Should -Not -Throw
}
}

Describe "Get-PfFsType" -Skip:(-not $IsWindows) {
It "UNC path -> network" {
$HOST_DATA_DIR = "\\nas\share\tracebloc"
Get-PfFsType | Should -Be "network"
}
It "mapped network drive (DriveType 4) -> network" {
$HOST_DATA_DIR = "Z:\tracebloc"
Mock Get-CimInstance { [pscustomobject]@{ DriveType = 4 } }
Get-PfFsType | Should -Be "network"
}
It "local fixed disk (DriveType 3) -> local" {
$HOST_DATA_DIR = "C:\tracebloc"
Mock Get-CimInstance { [pscustomobject]@{ DriveType = 3 } }
Get-PfFsType | Should -Be "local"
}
}

Describe "Get-Pf* runtime (Docker VM) view preference" {
Expand Down
67 changes: 67 additions & 0 deletions scripts/tests/preflight.bats
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ setup() {
# Default-safe stubs (a healthy amd64 box); individual tests override.
_pf_probe_url() { echo ok; }
_pf_free_kb() { echo $((50 * 1024 * 1024)); } # 50 GB
_pf_fstype() { echo ext4; } # local disk (storage check passes)
_pf_total_mem_kb() { echo $((8 * 1024 * 1024)); } # 8 GB
_pf_ncpu() { echo 4; }
_pf_runtime_mem_kb() { echo ""; } # daemon "down" in tests → selectors/src use host
Expand Down Expand Up @@ -282,3 +283,69 @@ setup() {
[[ "$output" == *"Skipping connectivity"* ]]
PF_HARD_FAIL=0; _pf_connectivity >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 0 ]
}

# ── _pf_storage_type (network-FS guard for HOST_DATA_DIR) ────────────────────
# _pf_fstype is stubbed per-test; the storage check must reject network FSes but
# pass anything local — including overlay/tmpfs, which is what CI runners use.
@test "_pf_storage_type: local ext4 -> success, no hard fail" {
_pf_fstype() { echo ext4; }
run _pf_storage_type; [[ "$output" == *"ext4"* ]]
PF_HARD_FAIL=0; _pf_storage_type >/dev/null; [ "$PF_HARD_FAIL" -eq 0 ]
}

@test "_pf_storage_type: overlay (CI/containers) -> success, never blocked" {
_pf_fstype() { echo overlay; }
PF_HARD_FAIL=0; _pf_storage_type >/dev/null; [ "$PF_HARD_FAIL" -eq 0 ]
}

@test "_pf_storage_type: NFS -> hard fail naming the cause + local-path hint" {
_pf_fstype() { echo nfs; }
run _pf_storage_type
[[ "$output" == *"network filesystem (nfs)"* ]]
[[ "$output" == *"HOST_DATA_DIR"* ]]
PF_HARD_FAIL=0; _pf_storage_type >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 1 ]
}

@test "_pf_storage_type: NFS4 -> hard fail" {
_pf_fstype() { echo nfs4; }
PF_HARD_FAIL=0; _pf_storage_type >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 1 ]
}

@test "_pf_storage_type: CIFS -> hard fail" {
_pf_fstype() { echo cifs; }
PF_HARD_FAIL=0; _pf_storage_type >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 1 ]
}

@test "_pf_storage_type: fuse.sshfs -> hard fail (covers fuse.* network mounts)" {
_pf_fstype() { echo fuse.sshfs; }
PF_HARD_FAIL=0; _pf_storage_type >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 1 ]
}

@test "_pf_storage_type: NFS + TRACEBLOC_ALLOW_NETWORK_FS -> warn, no hard fail" {
_pf_fstype() { echo nfs; }; export TRACEBLOC_ALLOW_NETWORK_FS=1
run _pf_storage_type; [[ "$output" == *"proceeding"* ]]
PF_HARD_FAIL=0; _pf_storage_type >/dev/null; [ "$PF_HARD_FAIL" -eq 0 ]
unset TRACEBLOC_ALLOW_NETWORK_FS
}

@test "_pf_storage_type: undetermined fstype -> no hard fail (assume local)" {
_pf_fstype() { echo ""; }
PF_HARD_FAIL=0; _pf_storage_type >/dev/null; [ "$PF_HARD_FAIL" -eq 0 ]
}

# ── _pf_fstype reader (re-source for the real function) ──────────────────────
@test "_pf_fstype: lower-cases output and walks to the nearest existing parent" {
source "${BATS_TEST_DIRNAME}/../lib/preflight.sh"
has() { [[ "$1" == "findmnt" ]]; } # only findmnt 'present'
findmnt() { echo NFS4; } # upper-case, ignores args
run _pf_fstype "${BATS_TEST_TMPDIR}/does/not/exist/yet"
[ "$output" = "nfs4" ]
}

@test "_pf_fstype: real reader on this host -> a token or empty, never crashes" {
source "${BATS_TEST_DIRNAME}/../lib/preflight.sh"
OS="$(uname -s)"
run _pf_fstype /
[ "$status" -eq 0 ]
[[ -z "$output" || "$output" =~ ^[a-z0-9._/]+$ ]]
}
Loading