#!/bin/sh -eu usage() { echo "Usage: ${PROGRAM} [-r|--restart] [-h|--help] [-c|--container ] [--scratch ] [--outdir ] [--batch_system ] (-f ) (-p ) " exit 0 } error() { echo "Error: $@" >&2 exit 1 } set_container() { [ -f "${1}" ] || error "Container ${1} does not exist." CWL_SINGULARITY_CACHE="${CWL_SINGULARITY_CACHE:-$1%/*}" if [ ! "${1##*/}" == "vlbi-cwl.sif" ]; then ln -sf "${1}" "$CWL_SINGULARITY_CACHE/vlbi-cwl.sif" ln -sf "${1}" "$CWL_SINGULARITY_CACHE/vlbi-cwl_latest.sif" fi } opts=$(getopt -o rhf:c:p: --long restart,help,container:,scratch:,outdir:,batch_system: \ -n 'pilot' -- "$@") eval set -- "$opts" RESTART="" SCRATCH="" BATCH_SYSTEM="slurm" PROGRAM="${0##*/}" while true; do case "$1" in -c | --container) set_container "${2}"; shift 2 ;; -f ) INPUT_FILE="${2}"; shift 2 ;; -h | --help ) usage ;; -p ) PIPELINE="${2}"; shift 2 ;; -r | --restart) RESTART="--restart"; shift ;; --scratch) SCRATCH="${2}"; shift 2 ;; --outdir) OUTDIR="${2}"; shift 2;; --batch_system) BATCH_SYSTEM="${2}"; shift 2;; * ) shift; break ;; esac done # TODO: clean this up WORKFLOW="${PIPELINE}/workflows/${1}.cwl" WORKFLOW_NAME=$(basename ${WORKFLOW%.cwl}) [ -f "${WORKFLOW}" ] || error "$(realpath ${WORKFLOW}) is invalid." INPUT_DIR="${2}" [ -d "${INPUT_DIR}" ] || error "Input directory does not exist." [ -n "${INPUT_FILE}" ] || error "Missing input file." [ -f "${INPUT_FILE}" ] || error "Invalid input file." [ ! -z "${CWL_SINGULARITY_CACHE}" ] || error "\$CWL_SINGULARITY_CACHE is not set or no container has been specified." PIPELINE_LOG="${HOME}/${WORKFLOW_NAME}.log" TMP_OUTDIR="${OUTDIR:-$INPUT_DIR}/toil/tmp/tmp/" JOB_LOG_DIR="${OUTDIR:-$INPUT_DIR}/toil/logs/" BATCH_LOG_DIR="${TOIL_BATCH_LOGS_DIR:-${OUTDIR}/toil/logs}" WORK_DIR="${OUTDIR:-$INPUT_DIR}/toil/work/" OUTPUT_DIR="${OUTDIR:-$INPUT_DIR}/${WORKFLOW_NAME}_results" JOBSTORE_DIR="${OUTDIR:-$INPUT_DIR}/toil/${WORKFLOW_NAME}_job/" STATS_DIR="${OUTDIR:-$OUTPUT_DIR}/stats" mkdir -p "$JOB_LOG_DIR" mkdir -p "$BATCH_LOG_DIR" mkdir -p "$WORK_DIR" mkdir -p "$OUTPUT_DIR" mkdir -p "$STATS_DIR" mkdir -p "$TMP_OUTDIR" TMPDIR_PREFIX="" if [ ! -z "${SCRATCH}" ]; then TMPDIR_PREFIX="--tmpdir-prefix ${SCRATCH}/tmp_${WORKFLOW_NAME}/" mkdir -p "${SCRATCH}/tmp_${WORKFLOW_NAME}" fi # Print information relevant for the run cat << EOF The following will be used in the run: Pipeline ┃ ${WORKFLOW} Input file ┃ ${INPUT_FILE} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━━ Output directory ┃ ${OUTPUT_DIR} Log directory ┃ ${JOB_LOG_DIR} Jobstore directory ┃ ${JOBSTORE_DIR} Intermediate output directory ┃ ${TMP_OUTDIR} Pipeline statistics directory ┃ ${STATS_DIR} EOF TOIL_COMMAND="toil-cwl-runner ${RESTART} ${TMPDIR_PREFIX} \ --singularity \ --clean never \ --retryCount 0 \ --disableCaching \ --logFile ${PIPELINE_LOG} \ --writeLogs ${JOB_LOG_DIR} \ --stats \ --clusterStats ${STATS_DIR} \ --batchSystem ${BATCH_SYSTEM} \ --batchLogsDir ${BATCH_LOG_DIR} \ --tmp-outdir-prefix ${TMP_OUTDIR} \ --workDir ${WORK_DIR} \ --outdir ${OUTPUT_DIR} \ --jobStore ${JOBSTORE_DIR} \ --bypass-file-store \ ${WORKFLOW} \ ${INPUT_FILE}" export TOIL_SLURM_ARGS="${TOIL_SLURM_ARGS:-"-p cosma5 -A durham -t 72:00:00"}" # Note the meaning of these SLURM options: # -N # number of nodes # -c # number of cores; available memory is tied to this if not specified separately # -p # partition (queue); # -A # project # -t # runtime in d-hh:mm:ss format export APPTAINERENV_PREPEND_PATH=${APPTAINERENV_PREPEND_PATH:-"$PIPELINE/scripts"} export APPTAINERENV_PYTHONPATH=${APPTAINERENV_PYTHONPATH:-"$PIPELINE/scripts:\$PYTHONPATH"} export APPTAINER_BIND=${APPTAINER_BIND:-"$HOME,$INPUT_DIR,$OUTPUT_DIR"} echo -e "env APPTAINERENV_PREPEND_PATH="$APPTAINERENV_PREPEND_PATH" \ APPTAINERENV_PYTHONPATH="$APPTAINERENV_PYTHONPATH" \ APPTAINER_BIND="$APPTAINER_BIND" \ TOIL_SLURM_ARGS="$TOIL_SLURM_ARGS" \ ${TOIL_COMMAND}" env APPTAINERENV_PREPEND_PATH="$APPTAINERENV_PREPEND_PATH" \ APPTAINERENV_PYTHONPATH="$APPTAINERENV_PYTHONPATH" \ APPTAINER_BIND="$APPTAINER_BIND" \ TOIL_SLURM_ARGS="$TOIL_SLURM_ARGS" \ ${TOIL_COMMAND} > ${OUTPUT_DIR}/${WORKFLOW_NAME}.out && STATUS=${?} || STATUS=${?} toil stats --raw ${JOBSTORE_DIR} > ${STATS_DIR}/${WORKFLOW_NAME}.stats.json || true toil stats --pretty ${JOBSTORE_DIR} > ${STATS_DIR}/${WORKFLOW_NAME}.stats.txt || true echo -e "\nThe pipeline was run using\n\n${TOIL_COMMAND}\n" if [ ${STATUS} -eq 0 ]; then echo -e "\nPipeline finished successfully.\n" else echo -e "\nPipeline failed with exit status ${STATUS}.\n" fi exit ${STATUS}