diff --git a/scripts/firedrake-run-split-tests b/scripts/firedrake-run-split-tests index 9164168e07..752d2efabe 100755 --- a/scripts/firedrake-run-split-tests +++ b/scripts/firedrake-run-split-tests @@ -14,6 +14,14 @@ Usage: * is the number of different jobs * are additional arguments that are passed to pytest + The following environment variables can be used to configure the + outer process-tree timeout for each split job: + + * FIREDRAKE_RUN_SPLIT_TESTS_TIMEOUT: maximum wall time for each job + (default: 1800s) + * FIREDRAKE_RUN_SPLIT_TESTS_KILL_AFTER: grace period before forcibly + killing a timed-out job (default: 60s) + Example: firedrake-run-split-tests 3 4 tests/unit --verbose @@ -28,7 +36,8 @@ Requires: * pytest * pytest-split * mpi-pytest - * GNU parallel" + * GNU parallel + * GNU timeout" # Print out help message with no arguments or "-h" or "--help" if [[ "$#" -eq "0" ]] || [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then @@ -39,6 +48,13 @@ fi num_procs=$1 num_jobs=$2 extra_args=${@:3} +job_timeout=${FIREDRAKE_RUN_SPLIT_TESTS_TIMEOUT:-1800s} +kill_after=${FIREDRAKE_RUN_SPLIT_TESTS_KILL_AFTER:-60s} + +if ! command -v timeout >/dev/null 2>&1; then + echo "GNU timeout is required" >&2 + exit 1 +fi if [ $num_procs = 1 ]; then # Cannot use mpiexec -n 1 because this can sometimes hang with @@ -58,10 +74,11 @@ set -x # This incantation: # * Runs pytest under GNU parallel using the right number of jobs +# * Applies an outer timeout to the whole pytest/mpiexec process tree # * Uses tee to pipe stdout+stderr to both stdout and a log file # * Writes pytest's exit code to a file called jobN.errcode (for later inspection) parallel --line-buffer --tag \ - "${pytest_cmd} 2>&1 | tee ${log_file_prefix}{#}.log; \ + "timeout --kill-after=${kill_after} ${job_timeout} ${pytest_cmd} 2>&1 | tee ${log_file_prefix}{#}.log; \ echo \${PIPESTATUS[0]} > job{#}.errcode" \ ::: $(seq ${num_jobs})