diff --git a/.github/workflows/container-build.yml b/.github/workflows/container-build.yml new file mode 100644 index 000000000..4322772b5 --- /dev/null +++ b/.github/workflows/container-build.yml @@ -0,0 +1,52 @@ +name: Build and Push Controller Container + +on: + push: + branches: [ master ] + pull_request: + paths: + - 'container_files/controller/**' + +concurrency: + group: container-build + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Source branch checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set LNST_SRC + run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + LNST_SRC="${{ github.event.pull_request.head.repo.clone_url }}@${{ github.event.pull_request.head.sha }}" + else + LNST_SRC="${{ github.server_url }}/${{ github.repository }}.git@${{ github.sha }}" + fi + echo "LNST_SRC=$LNST_SRC" + echo "LNST_SRC=$LNST_SRC" >> $GITHUB_ENV + + - name: Login to Quay.io + if: github.event_name == 'push' + run: podman login -u ${{ secrets.QUAY_USER }} -p ${{ secrets.QUAY_PASS }} quay.io + + - name: Build multi-arch container + run: | + podman manifest create quay.io/lnst/lnst-controller:latest + podman build \ + --platform linux/amd64,linux/arm64 \ + --build-arg LNST_SRC=${{ env.LNST_SRC }} \ + --manifest quay.io/lnst/lnst-controller:latest \ + container_files/controller + + - name: Push multi-arch manifest + if: github.event_name == 'push' + run: | + podman manifest push quay.io/lnst/lnst-controller:latest + podman manifest push quay.io/lnst/lnst-controller:latest quay.io/lnst/lnst-controller:${{ github.sha }} diff --git a/container_files/controller/Dockerfile b/container_files/controller/Dockerfile index 9deb7f4a8..4fe74b4de 100644 --- a/container_files/controller/Dockerfile +++ b/container_files/controller/Dockerfile @@ -13,14 +13,23 @@ RUN dnf install -y initscripts \ libnl3 \ lksctp-tools-devel \ git \ - libnl3-devel + libnl3-devel \ + jq \ + sshpass \ + openssh-clients RUN mkdir -p /root/.lnst -COPY . /lnst -COPY container_files/controller/pool /root/.lnst/pool +# LNST_SRC: git URL used to clone LNST source, accepts "repo_url" or "repo_url@ref" (branch/tag/commit) +ARG LNST_SRC=https://github.com/LNST-project/lnst.git +RUN repo="$LNST_SRC" && \ + ref="" && \ + case "$repo" in *@*) ref="${repo##*@}"; repo="${repo%@*}" ;; esac && \ + git clone "$repo" /lnst && \ + if [ -n "$ref" ]; then cd /lnst && git checkout "$ref"; fi +ENV LNST_SRC=${LNST_SRC} ENV UV_PROJECT_ENVIRONMENT=/root/lnst_venv -RUN cd /lnst && uv sync +RUN cd /lnst && uv sync --extra trex # UV_PROJECT_ENVIRONMENT places the venv outside /lnst # to prevent conflicts when user mounts host-machine's # lnst dir to /lnst diff --git a/container_files/controller/container_runner.py b/container_files/controller/container_runner.py index bb8ab448f..2923c79b8 100644 --- a/container_files/controller/container_runner.py +++ b/container_files/controller/container_runner.py @@ -1,17 +1,26 @@ +import json import os +import shutil +import ssl import sys import traceback -from typing import Any, Type, Optional +import zipfile +from functools import reduce +from typing import Any +from urllib.request import urlopen from lnst.Recipes.ENRT import * -from lnst.Controller.Recipe import BaseRecipe +from lnst.Controller.Recipe import BaseRecipe, export_recipe_run from lnst.Controller.Controller import Controller from lnst.Controller.RecipeResults import ResultLevel, ResultType from lnst.Controller.MachineMapper import ContainerMapper from lnst.Controller.ContainerPoolManager import ContainerPoolManager from lnst.Controller.RunSummaryFormatters import * -from lnst.Controller.RunSummaryFormatters.RunSummaryFormatter import RunSummaryFormatter + +RESULTS_DIR = "/root/.lnst/results" +POOL_DIR = "/root/.lnst/pool" +TEST_DB = os.getenv("TEST_DB", "/lnst/container_files/controller/test_db.json") class ContainerRunner: @@ -22,7 +31,6 @@ class ContainerRunner: * DEBUG: Set to 1 to enable debug mode * RECIPE: Name of the recipe class to run * RECIPE_PARAMS: Parameters to pass to the recipe class - * FORMATTERS: List of formatters to use * MULTIMATCH: Set to 1 to enable multimatch mode Agents in containers-specific environment variables: @@ -33,14 +41,30 @@ class ContainerRunner: def __init__(self) -> None: self._controller = Controller(**self._parse_controller_params()) - self._recipe_params: dict[str, Any] = self._parse_recipe_params() - if not os.getenv("RECIPE"): - raise ValueError("RECIPE environment variable is not set") - self._recipe_cls: Type[BaseRecipe] = eval(os.getenv("RECIPE", "")) - self._recipe: Optional[BaseRecipe] = None + if os.getenv("RECIPE"): + self._test_db = [ + { + "recipe_name": os.getenv("RECIPE", ""), + "params": self._parse_recipe_params(), + }, + ] + else: + self._test_db = self._load_test_db() + + + @staticmethod + def _load_test_db() -> list[dict[str, Any]]: + uri = TEST_DB + if "://" not in uri: + uri = f"file://{uri}" - self._formatters: list[Type[RunSummaryFormatter]] = self._parse_formatters() + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + with urlopen(uri, context=ctx) as resp: + return json.load(resp) def _parse_controller_params(self) -> dict: params = { @@ -68,41 +92,162 @@ def _parse_recipe_params(self) -> dict[str, Any]: return params - def _parse_formatters(self) -> list[Type[RunSummaryFormatter]]: - return [ - eval(formatter) - for formatter in os.getenv("FORMATTERS", "").split(";") - if formatter - ] + def _export_results(self, recipe, result_dir): + log_dir = f"{result_dir}/logs" + + # Export human-readable result summary (with debug output) + hr_fmt = HumanReadableRunSummaryFormatter(level=ResultLevel.DEBUG) + try: + with open(os.path.join(log_dir, "result_summary.log"), "w") as f: + for run in recipe.runs: + f.write(hr_fmt.format_run(run)) + f.write("\n") + except Exception: + print("Failed to export result_summary.log:", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + + # Export per-host log files from log_dir + for run in recipe.runs: + if not run.log_dir or not os.path.isdir(run.log_dir): + continue + try: + shutil.copytree(run.log_dir, log_dir, dirs_exist_ok=True) + except Exception: + print("Failed to copy log_dir:", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + + # Export JSON results and LRC files per run + json_fmt = JsonRunSummaryFormatter(pretty=True) + for i, run in enumerate(recipe.runs): + # LRC export + lrc_filename = f"run-data-{i}.lrc" + try: + export_recipe_run(run, export_dir=result_dir, name=lrc_filename) + except Exception: + print(f"Failed to export {lrc_filename}:", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + + # JSON export + json_filename = f"run-data-{i}.json" + with open(os.path.join(result_dir, json_filename), "w") as f: + try: + f.write(json_fmt.format_run(run)) + except Exception as exc: + exception_result = { + "result": "FAIL", + "type": "exception", + "message": str(exc), + } + json.dump([exception_result], f, indent=4) + + def _zip_results(self): + zip_path = os.path.join(RESULTS_DIR, "results.zip") + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + for root, _dirs, files in os.walk(RESULTS_DIR): + for fname in files: + fpath = os.path.join(root, fname) + if fpath == zip_path: + continue + arcname = os.path.relpath(fpath, RESULTS_DIR) + zf.write(fpath, arcname) def run(self) -> ResultType: - """Initialize recipe class with parameters provided in `RECIPE_PARAMS` - and execute. Function returns overall result. + """Execute all tests from test_db sequentially. + + Each test is independent -- a failure in one test does not prevent + subsequent tests from running. A summary is printed at the end. """ overall_result = ResultType.PASS + results: list[tuple[str, ResultType]] = [] + + for i, test in enumerate(self._test_db): + print(f"\n{'=' * 60}") + recipe_name = test["recipe_name"] + test_id = test.get("uuid", f"{i}_{recipe_name}") + + recipe = None + exc_info = None + result_dir = f"{RESULTS_DIR}/{test_id}" + log_dir = f"{result_dir}/logs" + os.makedirs(log_dir, exist_ok=True) + try: + recipe_cls = eval(recipe_name) + recipe = recipe_cls(**test.get("params", {})) + self._controller.run( + recipe, multimatch=bool(os.getenv("MULTIMATCH", False)) + ) - try: - self._recipe = self._recipe_cls(**self._recipe_params) - self._controller.run( - self._recipe, multimatch=bool(os.getenv("MULTIMATCH", False)) - ) - except Exception: - print("LNST Controller crashed with an exception:", file=sys.stderr) - traceback.print_exc(file=sys.stderr) - exit(ResultType.FAIL) - - for formatter in self._formatters: - fmt = formatter(level=ResultLevel.IMPORTANT) - for run in self._recipe.runs: - print(fmt.format_run(run)) - overall_result = ResultType.max_severity( - overall_result, run.overall_result + test_result = reduce( + ResultType.max_severity, + (run.overall_result for run in recipe.runs), + ResultType.PASS, ) + except Exception: + print( + f"Test {recipe_name} crashed with an exception:", + file=sys.stderr, + ) + traceback.print_exc(file=sys.stderr) + test_result = ResultType.FAIL + exc_info = traceback.format_exc() + + if recipe is not None: + try: + self._export_results(recipe, result_dir) + except Exception: + print( + f"Failed to export results for {recipe_name}:", + file=sys.stderr, + ) + traceback.print_exc(file=sys.stderr) + + if exc_info is not None: + with open(os.path.join(log_dir, "crash.log"), "w") as f: + f.write(exc_info) + + results.append((test_id, test_result)) + overall_result = ResultType.max_severity(overall_result, test_result) + + print(f"\n{'=' * 60}") + print("Test Summary:") + print(f"{'=' * 60}") + for test_id, result in results: + status = "PASS" if result == ResultType.PASS else "FAIL" + print(f" {test_id}: {status}") + print(f"\nOverall result: {'PASS' if overall_result == ResultType.PASS else 'FAIL'}") return overall_result +def _check_dir_access(path): + """Check if a directory exists and is accessible, warn about SELinux if not.""" + if not os.path.isdir(path): + print(f"Directory {path} does not exist or is not a directory.", file=sys.stderr) + return False + + try: + os.listdir(path) + except PermissionError: + print( + f"Permission denied accessing {path}. " + "If this directory is a mounted volume, SELinux may be " + "preventing access. Try running the container with " + "--security-opt label=disable", + file=sys.stderr, + ) + return False + return True + + if __name__ == "__main__": + if not _check_dir_access(POOL_DIR) or not _check_dir_access(RESULTS_DIR): + sys.exit(1) runner = ContainerRunner() - exit_code = 0 if runner.run() == ResultType.PASS else 1 + try: + exit_code = 0 if runner.run() == ResultType.PASS else 1 + except Exception: + traceback.print_exc(file=sys.stderr) + exit_code = 1 + finally: + runner._zip_results() exit(exit_code) diff --git a/container_files/controller/create_pool.py b/container_files/controller/create_pool.py new file mode 100755 index 000000000..c9d2c865a --- /dev/null +++ b/container_files/controller/create_pool.py @@ -0,0 +1,99 @@ +#!/bin/python + +import json +import argparse +from pathlib import Path +import xml.etree.ElementTree as ET +from xml.dom import minidom + +def main(): + parser = argparse.ArgumentParser(description="Generate an LNST Machine pool from test environment description") + + parser.add_argument( + '--test-environment-description', + type=load_TED, + required=True, + help="Path to the JSON or YAML test environment description file" + ) + parser.add_argument( + '-o', '--output', + type=Path, + required=True, + help="Directory where to output Machine pool files", + ) + + args = parser.parse_args() + + pool_path = create_machine_pool(args.test_environment_description, args.output) + print(pool_path) + + +def create_machine_pool(test_environment_description, pool_path): + # create a clean pool directory + try: + pool_path.mkdir(parents=True, exist_ok=True) + except OSError as e: + raise SystemExit(f"Failed to create pool directory {pool_path}: {e}") + for item in pool_path.iterdir(): + if item.is_file(): + item.unlink() + + for machine in test_environment_description: + test_system_name = machine['test_system_name'] + root = ET.Element("agentmachine") + + params_node = ET.SubElement(root, "params") + add_param(params_node, "hostname", machine['hostname']) + add_param(params_node, "rpc_port", "9999") + + interfaces_node = ET.SubElement(root, "interfaces") + + for i, mac_address in enumerate(machine['test_nic_hw_addrs']): + eth_node = ET.SubElement(interfaces_node, "eth", { + "label": "net1", + "id": f"eth{i}", + }) + + # Nested + eth_params = ET.SubElement(eth_node, "params") + add_param(eth_params, "hwaddr", mac_address) + + # Prettify the XML string + xml_str = ET.tostring(root, encoding='utf-8') + pretty_xml = minidom.parseString(xml_str).toprettyxml(indent=" ") + + # Save to file + save_path = Path(pool_path) / f"{test_system_name}_agent.xml" + save_path.write_text(pretty_xml) + print(f"Generated XML at: {save_path}") + + return pool_path + +def add_param(parent, name, value): + """Helper to create nodes.""" + ET.SubElement(parent, "param", {"name": name, "value": str(value)}) + +def load_TED(file_path): + """Reads a JSON or YAML file and returns a Python dict.""" + path = Path(file_path) + if not path.exists(): + raise argparse.ArgumentTypeError(f"File {file_path} does not exist.") + + ext = path.suffix.lower() + + with open(file_path, 'r') as f: + if ext in ['.yaml', '.yml']: + try: + import yaml + except ImportError: + raise argparse.ArgumentTypeError( + "PyYAML is required for YAML files. Install it with: pip install pyyaml" + ) + return yaml.safe_load(f) + elif ext == '.json': + return json.load(f) + else: + raise argparse.ArgumentTypeError("File must be .json, .yaml, or .yml") + +if __name__ == "__main__": + main() diff --git a/container_files/controller/entrypoint.sh b/container_files/controller/entrypoint.sh index 1df3dde54..47d823f65 100755 --- a/container_files/controller/entrypoint.sh +++ b/container_files/controller/entrypoint.sh @@ -1,2 +1,90 @@ -#!/bin/sh -exec /root/lnst_venv/bin/python /lnst/container_files/controller/container_runner.py +#!/bin/bash +set -euo pipefail + +PYTHON_PATH=/root/lnst_venv/bin/python +POOL_DIR="/root/.lnst/pool" +TED_FILE="/lnst/container_files/controller/pool/test_environment.json" +SETUP_AGENTS_SCRIPT="/lnst/container_files/controller/setup_agent.sh" + +# --------------------------------------------------------------------------- +# Phase 1 -- Pool generation +# --------------------------------------------------------------------------- +mkdir -p "$POOL_DIR" + +if ls "$POOL_DIR"/*.xml >/dev/null 2>&1; then + echo "Pool XML files already present in $POOL_DIR, skipping pool generation." +elif [ -f "$TED_FILE" ]; then + echo "No pool XML files found in $POOL_DIR, generating from $TED_FILE..." + "$PYTHON_PATH" /lnst/container_files/controller/create_pool.py \ + --test-environment-description "$TED_FILE" \ + -o "$POOL_DIR" +else + echo "ERROR: No .xml files found in $POOL_DIR and no test environment description found at $TED_FILE" >&2 + exit 1 +fi + +# --------------------------------------------------------------------------- +# Phase 2 -- Remote agent setup +# --------------------------------------------------------------------------- + +# Parses host entry $1 from $TED_FILE and populates the array named by $2 +# with the ssh command tokens. Sets $hostname and $ssh_port in caller scope. +build_ssh_cmd() { + local idx="$1" + local -n _cmd="$2" + + hostname=$(jq -r ".[$idx].hostname" "$TED_FILE" | xargs) + ssh_port=$(jq -r ".[$idx].ssh_port" "$TED_FILE" | xargs) + local username=$(jq -r ".[$idx].username" "$TED_FILE" | xargs) + local password=$(jq -r ".[$idx].password // empty" "$TED_FILE" | xargs) + + _cmd=() + if [[ -n "$password" ]]; then + _cmd+=("sshpass" "-p" "$password") + fi + _cmd+=("ssh" "-o" "StrictHostKeyChecking=no" "-o" "UserKnownHostsFile=/dev/null" + "-o" "ConnectTimeout=10" "-p" "$ssh_port" "${username}@${hostname}") +} + +if [[ ! -f "$TED_FILE" ]]; then + echo "WARNING: $TED_FILE not found -- skipping remote agent setup." >&2 +else + echo "Setting up agents on remote hosts defined in $TED_FILE..." + host_count=$(jq 'length' "$TED_FILE") + + for ((i = 0; i < host_count; i++)); do + build_ssh_cmd "$i" ssh_cmd + + echo "=== Setting up agent on $hostname (port $ssh_port) ===" + echo "SSH command: ${ssh_cmd[*]}" + + # Run setup_agents.sh remotely (piped over stdin) + if ! "${ssh_cmd[@]}" "bash -s -- 'git+${LNST_SRC}'" < "$SETUP_AGENTS_SCRIPT"; then + echo "ERROR: setup_agent.sh failed on $hostname" >&2 + exit 1 + fi + + # Verify expected NICs exist on the remote host + mapfile -t expected_macs < <(jq -r ".[$i].test_nic_hw_addrs[]" "$TED_FILE") + + remote_macs=$("${ssh_cmd[@]}" 'cat /sys/class/net/*/address' 2>/dev/null || true) + + for mac in "${expected_macs[@]}"; do + mac_lower=$(echo "$mac" | tr '[:upper:]' '[:lower:]') + + if ! echo "$remote_macs" | tr '[:upper:]' '[:lower:]' | grep -qF "$mac_lower"; then + available=$(echo "$remote_macs" | sort -u | paste -sd ', ' -) + echo "ERROR: NIC with MAC $mac not found on $hostname" >&2 + echo " Available MACs: $available" >&2 + exit 1 + fi + done + + echo "=== Agent setup complete on $hostname ===" + done +fi + +# --------------------------------------------------------------------------- +# Phase 3 -- Run controller +# --------------------------------------------------------------------------- +exec "$PYTHON_PATH" /lnst/container_files/controller/container_runner.py diff --git a/container_files/controller/pool/.gitkeep b/container_files/controller/pool/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/container_files/controller/pool/test_environment.example.json b/container_files/controller/pool/test_environment.example.json new file mode 100644 index 000000000..6c4531556 --- /dev/null +++ b/container_files/controller/pool/test_environment.example.json @@ -0,0 +1,22 @@ +[ + { + "test_system_name": "host1", + "hostname": "host.example.com", + "ssh_port": 22, + "username": "root", + "password": "root", + "test_nic_hw_addrs": [ + "00:00:00:00:00:00" + ] + }, + { + "test_system_name": "host2", + "hostname": "host2.example.com", + "ssh_port": 22, + "username": "root", + "password": "root", + "test_nic_hw_addrs": [ + "00:00:00:00:00:01" + ] + } +] diff --git a/container_files/controller/setup_agent.sh b/container_files/controller/setup_agent.sh new file mode 100644 index 000000000..9653ada6c --- /dev/null +++ b/container_files/controller/setup_agent.sh @@ -0,0 +1,86 @@ +#!/bin/bash +set -euo pipefail + +LNST_SRC="${1:-git+https://github.com/LNST-project/lnst.git}" + +UV_VERSION="${UV_VERSION:-0.11.6}" +export UV_PROJECT_ENVIRONMENT="/opt/lnst" +export UV_PYTHON_INSTALL_DIR="/opt/uv/python" +export UV_PYTHON="3.13" + +# --- Validation --- + +if [[ $EUID -ne 0 ]]; then + echo "Error: This script must be run as root (use sudo)." + exit 1 +fi + +if systemctl is-active --quiet lnst-agent 2>/dev/null && ss -tlnp | grep -q ':9999 '; then + echo "lnst-agent is already running and listening on port 9999." + systemctl --no-pager status lnst-agent + echo "To reinstall, run: systemctl stop lnst-agent && $0 $*" + exit 0 +fi + +echo "Installing LNST agent from: $LNST_SRC" + +# --- Install system packages --- + +echo "Installing system packages..." +dnf install -y \ + python3-devel \ + git \ + gcc \ + libnl3-devel \ + iproute-tc \ + tcpdump \ + iperf3 \ + NetworkManager-config-server \ + +# --- Install uv --- + +echo "Installing uv..." +curl -LsSf "https://astral.sh/uv/${UV_VERSION}/install.sh" | sh +export PATH="/root/.local/bin:$PATH" + +# --- Install lnst via uv --- + +echo "Installing lnst package..." +uv venv --python "$UV_PYTHON" "$UV_PROJECT_ENVIRONMENT" +uv pip install --python "$UV_PROJECT_ENVIRONMENT/bin/python" "$LNST_SRC" + +# --- Verify lnst-agent binary --- + +if [[ ! -x "$UV_PROJECT_ENVIRONMENT/bin/lnst-agent" ]]; then + echo "Error: lnst-agent not found after installation." + exit 1 +fi + +echo "lnst-agent installed at: $UV_PROJECT_ENVIRONMENT/bin/lnst-agent" + +# --- Set up systemd service --- + +LNST_COMMIT=$(echo "$LNST_SRC" | sed 's/.*@//') +LNST_REPO=$(echo "$LNST_SRC" | sed 's/^git+//; s/@.*//') +curl -LsSf "${LNST_REPO%.git}/raw/${LNST_COMMIT}/install/lnst-agent.service" \ + -o /usr/lib/systemd/system/lnst-agent.service +systemctl daemon-reload +systemctl enable lnst-agent +systemctl start lnst-agent + +# --- Verify agent is running --- + +echo "" +echo "Waiting for lnst-agent to listen on port 9999..." +for i in $(seq 1 10); do + if ss -tlnp | grep -q ':9999 '; then + echo "LNST agent installed and listening on port 9999." + systemctl --no-pager status lnst-agent + exit 0 + fi + sleep 1 +done + +echo "Error: lnst-agent is not listening on port 9999." +systemctl --no-pager status lnst-agent || true +exit 1 diff --git a/container_files/controller/test_db.json b/container_files/controller/test_db.json new file mode 100644 index 000000000..f0b62e9ae --- /dev/null +++ b/container_files/controller/test_db.json @@ -0,0 +1,42 @@ +[ + { + "uuid": "b930216b-a455-4bc0-bb99-69fe2a420596", + "recipe_name": "SimpleNetworkRecipe", + "params": { + "perf_tool_cpu": [6], + "dev_intr_cpu": [0], + "perf_parallel_processes": 1, + "offload_combinations": [ + {"gro": "on", "gso": "on", "tso": "on", "tx": "on", "rx": "on"} + ], + "perf_duration": 60, + "ip_versions": ["ipv4"], + "perf_tests": ["tcp_stream"], + "perf_msg_sizes": [131072], + "rx_pause_frames": false, + "tx_pause_frames": false, + "perf_iterations": 1, + "net_ipv4": "192.168.220.0/24" + } + }, + { + "uuid": "5af341af-daf0-4755-9749-3c8741c66b3e", + "recipe_name": "SimpleNetworkRecipe", + "params": { + "perf_tool_cpu": [6], + "dev_intr_cpu": [0], + "perf_parallel_processes": 1, + "offload_combinations": [ + {"gro": "on", "gso": "on", "tso": "on", "tx": "on", "rx": "on"} + ], + "perf_duration": 60, + "ip_versions": ["ipv6"], + "perf_tests": ["tcp_stream"], + "perf_msg_sizes": [131072], + "rx_pause_frames": false, + "tx_pause_frames": false, + "perf_iterations": 1, + "net_ipv6": "fd00:0:b100::/64" + } + } +] diff --git a/docs/source/extensions.rst b/docs/source/extensions.rst index 8a5686bc7..53686b8a4 100644 --- a/docs/source/extensions.rst +++ b/docs/source/extensions.rst @@ -183,19 +183,103 @@ following parameters as environment variables to controller container: It expects that you use CNI as network backend for Podman. -Using baremetal agents +Test environment description +```````````````````````````` + +Instead of manually creating machine pool XMLs, you can provide a test environment +description file (``test_environment.json``). The controller generates pool XMLs +from it at startup. + +The file must be located at ``/lnst/container_files/controller/pool/test_environment.json`` +inside the container (see ``test_environment.example.json`` for reference): + +.. code-block:: bash + + podman run -v /path/to/test_environment.json:/lnst/container_files/controller/pool/test_environment.json:ro ... + +Format: + +.. code-block:: json + + [ + { + "test_system_name": "host1", + "hostname": "machine1.example.com", + "ssh_port": 22, + "username": "root", + "password": "", + "test_nic_hw_addrs": [ + "00:00:5e:00:53:01" + ] + }, + { + "test_system_name": "host2", + "hostname": "machine2.example.com", + "ssh_port": 22, + "username": "root", + "password": "", + "test_nic_hw_addrs": [ + "00:00:5e:00:53:02" + ] + } + ] + +Fields: + +* ``test_system_name`` — unique identifier for the machine (used in pool XML filename) +* ``hostname`` — FQDN or IP of the remote host +* ``ssh_port`` — SSH port +* ``username`` — SSH username +* ``password`` — SSH password (leave empty for key-based authentication) +* ``test_nic_hw_addrs`` — list of MAC addresses of NICs used for testing (verified at startup) + + +.. _automatic-agent-setup: + +Automatic agent setup `````````````````````` -Firstly, you need to prepare machine XMLs if you decide to run agents on baremetal -machines (see :ref:`machines-pool`). Instead of putting them into `~/.lnst/pool` -directory, you need to put them into `container_files/controller/pool` directory. -Machine XMLs are copied to container during build process from -`container_files/controller/pool`. -Podman doesn't support copying files located outside of build context, so you -need to put it to LNST project directory. + +When a test environment description file is present, the controller container +automatically sets up LNST agents on remote hosts at startup. For each host +defined in ``test_environment.json``, the container: + +1. Connects via SSH using credentials from the test environment description +2. Runs ``setup_agent.sh`` remotely — installs LNST agent, its dependencies, + and starts the ``lnst-agent`` systemd service + +The agent is installed using the same LNST version as the controller (determined +by the ``LNST_SRC`` build argument). If the agent is already running on a host, +the setup is skipped. + +SSH authentication uses either the password from the test environment description +file or SSH keys mounted into the container (e.g. ``-v machine_keys/:/root/.ssh:ro``). .. note:: - To avoid having to deal with pool files you can simply mount your `~/.lnst/pool` directory - to `/root/.lnst/pool/` in the container (read-only access is sufficient). + Remote hosts must be reachable from the container via SSH. If using the default + network mode, you may need ``--network=host`` for the container. + +Using baremetal agents +`````````````````````` +If you decide to run agents on baremetal machines, you can either provide +a test environment description (see above) or prepare machine pool XMLs manually +(see :ref:`machines-pool`) and mount them into the container at runtime. + +Mount your pool directory to ``/root/.lnst/pool/`` in the container (read-only +access is sufficient): + +.. code-block:: bash + + podman run -v /path/to/pool:/root/.lnst/pool:ro ... lnst_controller + +.. warning:: + If SELinux is enforcing on the host, mounted volumes may not be accessible from inside the + container. The container runner will detect this on startup and print an error message. + To fix this, you can: + + * add the ``:z`` or ``:Z`` suffix to the volume mount (e.g. ``-v /host/path:/container/path:z``) + to let Podman relabel the directory automatically + * relabel the host directory manually with ``chcon -Rt svirt_sandbox_file_t /host/path`` + * disable SELinux label confinement for the container with ``--security-opt label=disable`` Build and run controller @@ -205,17 +289,36 @@ Build the controller image: .. code-block:: bash - cd your_lnst_project_directory - podman build . -t lnst_controller -f container_files/controller/Dockerfile + podman build -t lnst_controller -f container_files/controller/Dockerfile . + +The image clones LNST from ``https://github.com/LNST-project/lnst.git`` into +``/lnst`` inside the container and installs its dependencies. + +Pinning LNST version ++++++++++++++++++++++ + +By default, the image clones the latest version of LNST. To pin to a specific +git reference, use the ``LNST_SRC`` build argument: + +.. code-block:: bash -This will copy pool files to `/root/.lnst/pool/` in container and LNST from -`your_lnst_project_directory` to `/lnst` in container. + podman build --build-arg LNST_SRC="https://github.com/LNST-project/lnst.git@" \ + -t lnst_controller -f container_files/controller/Dockerfile . + +Where ```` can be a commit hash, branch name, or tag. + +The same ``LNST_SRC`` value is also used to install matching LNST version on +remote agents (see `Automatic agent setup`_). + +.. note:: + If you want to use a local copy of LNST (e.g. during development), you can mount + your project directory to ``/lnst`` in the container. The LNST virtual environment is + located outside of ``/lnst/``, so if your changes require reinstallation of LNST + and/or its dependencies, you need to rebuild the image. .. note:: - If you want to avoid rebuilding the image every time you change your LNST project (e.g. during - development), you can mount `your_lnst_project_directory` to `/lnst` in container. The LNST's - virtual environment is located outside of `/lnst/` directory, so if your changes requires - reinstallation fo LNST and/or its dependencies, you need to rebuild the image. + The machine pool is **not** baked into the image. You must mount it at runtime + (see `Using baremetal agents`_ above). Before running the container, you need to provide environment variables: @@ -229,11 +332,85 @@ Before running the container, you need to provide environment variables: `RECIPE`, `RECIPE_PARAMS` and `FORMATTERS` are parsed using Python's `eval` function, which is a security risk. Make sure you trust the source of these variables. +Using the test database ++++++++++++++++++++++++ + +Instead of specifying ``RECIPE`` and ``RECIPE_PARAMS`` environment variables, you can +define a list of tests in ``container_files/controller/test_db.json``. When the ``RECIPE`` +environment variable is **not** set, the container runner will automatically execute all +tests defined in ``test_db.json`` in order. + +The location of the test database can be overridden with the ``TEST_DB`` environment +variable. This accepts a local file path or an HTTP(S) URL: + +.. code-block:: bash + + # Local file (default) + podman run -e TEST_DB=/path/to/my_tests.json ... + + # Remote URL + podman run -e TEST_DB=https://server/tests.json ... + +Each entry in the JSON array is an object with the following keys: + +* ``uuid`` -- (optional) unique identifier for the test. When set, the results directory for this test uses the UUID as its name instead of the default ``{index}_{recipe_name}`` format. +* ``recipe_name`` -- string name of the recipe class (loaded from ``lnst.Recipes.ENRT``) +* ``params`` -- object of parameters to pass to the recipe constructor + +Example ``test_db.json``: + +.. code-block:: json + + [ + { + "uuid": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "recipe_name": "SimpleNetworkRecipe", + "params": { + "perf_iterations": 1, + "perf_duration": 10, + "driver": "ice" + } + }, + { + "recipe_name": "BondRecipe", + "params": { + "bonding_mode": "active-backup", + "miimon_value": 5, + "driver": "ice" + } + } + ] + +Tests run sequentially and each test is independent -- a failure in one test does not +prevent subsequent tests from running. A summary of pass/fail results is printed at the +end of the run. + +To use the test database, ``RECIPE`` and ``RECIPE_PARAMS`` must not be set. + +Exporting results ++++++++++++++++++ + +Results are automatically exported to ``/root/.lnst/results/`` inside the container. +Each recipe run gets its own subdirectory (e.g. ``0_SimpleNetworkRecipe/``) containing: + +* ``controller.log`` -- human-readable log with debug-level output +* ``run-data-{i}.json`` -- JSON-formatted results for each recipe run +* ``run-data-{i}.lrc`` -- pickled/compressed run data for each recipe run + +At the end of execution, all result directories are zipped into ``results.zip`` +and verified for integrity. + +To access results on the host, mount a volume to ``/root/.lnst/results/``: + +.. code-block:: bash + + podman run -e DEBUG=1 -v /host/path/to/results:/root/.lnst/results --rm --name lnst_controller lnst_controller + Now, you can run the controller: .. code-block:: bash - podman run -e RECIPE=SimpleNetworkRecipe -e RECIPE_PARAMS="perf_iterations=1;perf_duration=10" -e DEBUG=1 --rm --name lnst_controller lnst_controller + podman run -e RECIPE=SimpleNetworkRecipe -e RECIPE_PARAMS="perf_iterations=1;perf_duration=10" -e DEBUG=1 -v /path/to/pool:/root/.lnst/pool:ro --rm --name lnst_controller lnst_controller .. note:: @@ -248,7 +425,7 @@ Or you can run more complex recipes: .. code-block:: bash - podman run -e RECIPE=XDPDropRecipe -e RECIPE_PARAMS="perf_iterations=1;perf_tool_cpu=[0,1];multi_dev_interrupt_config={'host1':{'eth0':{'cpus':[0],'policy':'round-robin'}}}" --rm --name lnst_controller lnst_controller + podman run -e RECIPE=XDPDropRecipe -e RECIPE_PARAMS="perf_iterations=1;perf_tool_cpu=[0,1];multi_dev_interrupt_config={'host1':{'eth0':{'cpus':[0],'policy':'round-robin'}}}" -v /path/to/pool:/root/.lnst/pool:ro --rm --name lnst_controller lnst_controller Classes documentation diff --git a/lnst/Agent/Agent.py b/lnst/Agent/Agent.py index 41cecc4d8..e5619493f 100644 --- a/lnst/Agent/Agent.py +++ b/lnst/Agent/Agent.py @@ -987,6 +987,7 @@ def run(self): try: if self._server_handler.get_ctl_sock() is None: self._log_ctl.cancel_connection() + self._methods.machine_cleanup() try: logging.info("Waiting for connection.") self._server_handler.accept_connection()