nf-core · imsarath · Jun 12, 2026 · Jun 12, 2026
diff --git a/modules/nf-core/gridss/preprocess/environment.yml b/modules/nf-core/gridss/preprocess/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::gridss=2.13.2"
diff --git a/modules/nf-core/gridss/preprocess/main.nf b/modules/nf-core/gridss/preprocess/main.nf
@@ -0,0 +1,59 @@
+process GRIDSS_PREPROCESS {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h50ea8bc_3':
+        'quay.io/biocontainers/gridss:2.13.2--h50ea8bc_3' }"
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fasta_fai)
+    tuple val(meta4), path(bwa_index)
+
+    output:
+    tuple val(meta), path("*.gridss.working"), emit: preprocess_dir
+    tuple val("${task.process}"), val('gridss'), eval("CallVariants --version 2>&1 | sed 's/-gridss\$//'"), topic: versions, emit: versions_gridss
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    ln -s \$(find -L ${bwa_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./
+
+    gridss \\
+        --threads ${task.cpus} \\
+        --steps preprocess \\
+        --jvmheap ${task.memory.toGiga() - 1}g \\
+        --otherjvmheap ${task.memory.toGiga() - 1}g \\
+        --reference ${fasta} \\
+        ${args} \\
+        ${bam}
+
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    mkdir -p ${prefix}.gridss.working/
+
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.cigar_metrics
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.computesamtags.changes.tsv
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.coverage.blacklist.bed
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.idsv_metrics
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_histogram.pdf
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_metrics
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.mapq_metrics
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam.csi
+    touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.tag_metrics
+    """
+}
diff --git a/modules/nf-core/gridss/preprocess/meta.yml b/modules/nf-core/gridss/preprocess/meta.yml
@@ -0,0 +1,100 @@
+name: "gridss_preprocess"
+description: Run the preprocess step of GRIDSS to extract multiple Picard
+  metrics (insert size, MAPQ, CIGAR, IDSV, tag and coverage metrics) from an
+  input BAM file prior to assembly and variant calling.
+keywords:
+  - gridss
+  - preprocess
+  - structural variants
+  - bam
+tools:
+  - gridss:
+      description: "GRIDSS: the Genomic Rearrangement IDentification Software Suite"
+      homepage: "https://github.com/PapenfussLab/gridss"
+      documentation: "https://github.com/PapenfussLab/gridss/wiki/GRIDSS-Documentation"
+      tool_dev_url: "https://github.com/PapenfussLab/gridss"
+      doi: "10.1186/s13059-021-02423-x"
+      licence:
+        - "GPL v3"
+      identifier: biotools:gridss
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
+    - bam:
+        type: file
+        description: Input BAM file
+        pattern: "*.bam"
+        ontologies:
+          - edam: http://edamontology.org/format_2572
+    - bai:
+        type: file
+        description: Index of the input BAM file
+        pattern: "*.bai"
+        ontologies: []
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing reference information
+    - fasta:
+        type: file
+        description: The reference fasta
+        pattern: "*.{fa,fna,fasta}"
+        ontologies: []
+  - - meta3:
+        type: map
+        description: |
+          Groovy Map containing reference information
+    - fasta_fai:
+        type: file
+        description: The index of the reference fasta
+        pattern: "*.fai"
+        ontologies: []
+  - - meta4:
+        type: map
+        description: |
+          Groovy Map containing reference information
+    - bwa_index:
+        type: directory
+        description: The BWA index created from the reference fasta, will be
+          generated by Gridss in the setupreference step
+output:
+  preprocess_dir:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test' ]
+      - "*.gridss.working":
+          type: directory
+          description: The working directory produced by the GRIDSS preprocess
+            step containing Picard metrics (insert size, MAPQ, CIGAR, IDSV, tag,
+            coverage) and SV-relevant reads used by downstream GRIDSS steps
+          pattern: "*.gridss.working"
+  versions_gridss:
+    - - ${task.process}:
+          type: string
+          description: The process
+      - gridss:
+          type: string
+          description: The tool name
+      - CallVariants --version 2>&1 | sed 's/-gridss\$//':
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The process
+      - gridss:
+          type: string
+          description: The tool name
+      - CallVariants --version 2>&1 | sed 's/-gridss\$//':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@imsarath"
+maintainers:
+  - "@imsarath"
diff --git a/modules/nf-core/gridss/preprocess/tests/main.nf.test b/modules/nf-core/gridss/preprocess/tests/main.nf.test
@@ -0,0 +1,107 @@
+nextflow_process {
+
+    name "Test Process GRIDSS_PREPROCESS"
+    script "../main.nf"
+    config "./nextflow.config"
+    process "GRIDSS_PREPROCESS"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "gridss"
+    tag "gridss/preprocess"
+    tag "bwa/index"
+
+    setup {
+
+        run("BWA_INDEX") {
+            script "../../../bwa/index/main.nf"
+            process {
+                """
+                input[0] = [ [id:'fasta'],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+                ]
+                """
+            }
+        }
+    }
+
+    test("human - bam - bwa") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+                ]
+                input[1] = [ [id:'fasta'],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = [ [id:'fasta_fai'],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+                ]
+                input[3] = BWA_INDEX.out.index
+                """
+            }
+        }
+
+        then {
+            def workdir = process.out.preprocess_dir[0][1]
+            def prefix  = "test.paired_end.sorted.bam"
+            // Picard `# Started on:` and R's PDF `/CreationDate` add per-run timestamps,
+            // and `sv.bam`/`sv.bam.csi` BGZF blocks differ across conda/docker — snapshot only stable parts.
+            // Presence of `sv.bam` and `sv.bam.csi` is covered by the directory listing snapshot.
+            def stripPicardHeaderMd5 = { f -> "${file(f).name}:md5,${listToMD5(path(f).readLines().findAll { !it.startsWith("#") })}" }
+
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.preprocess_dir.collect { meta, dir -> [meta, file(dir).list().findAll { it.startsWith(prefix) }.sort()] },
+                    path("${workdir}/${prefix}.computesamtags.changes.tsv"),
+                    path("${workdir}/${prefix}.coverage.blacklist.bed"),
+                    stripPicardHeaderMd5("${workdir}/${prefix}.cigar_metrics"),
+                    stripPicardHeaderMd5("${workdir}/${prefix}.idsv_metrics"),
+                    stripPicardHeaderMd5("${workdir}/${prefix}.insert_size_metrics"),
+                    stripPicardHeaderMd5("${workdir}/${prefix}.mapq_metrics"),
+                    stripPicardHeaderMd5("${workdir}/${prefix}.tag_metrics"),
+                    process.out.findAll { key, val -> key.startsWith("versions") }
+                ).match() }
+            )
+        }
+    }
+
+    test("human - bam - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+                ]
+                input[1] = [ [id:'fasta'],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = [ [id:'fasta_fai'],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+                ]
+                input[3] = [ [], [] ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.preprocess_dir,
+                    process.out.findAll { key, val -> key.startsWith("versions") }
+                ).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/gridss/preprocess/tests/main.nf.test.snap b/modules/nf-core/gridss/preprocess/tests/main.nf.test.snap
@@ -0,0 +1,83 @@
+{
+    "human - bam - stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    [
+                        "test.gridss.targeted.bam.cigar_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.computesamtags.changes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.coverage.blacklist.bed:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.idsv_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.insert_size_histogram.pdf:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.insert_size_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.mapq_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.sv.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.sv.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test.gridss.targeted.bam.tag_metrics:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            ],
+            {
+                "versions_gridss": [
+                    [
+                        "GRIDSS_PREPROCESS",
+                        "gridss",
+                        "2.13.2"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-06-12T14:30:07.433688807",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.3"
+        }
+    },
+    "human - bam - bwa": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    [
+                        "test.paired_end.sorted.bam.cigar_metrics",
+                        "test.paired_end.sorted.bam.computesamtags.changes.tsv",
+                        "test.paired_end.sorted.bam.coverage.blacklist.bed",
+                        "test.paired_end.sorted.bam.idsv_metrics",
+                        "test.paired_end.sorted.bam.insert_size_histogram.pdf",
+                        "test.paired_end.sorted.bam.insert_size_metrics",
+                        "test.paired_end.sorted.bam.mapq_metrics",
+                        "test.paired_end.sorted.bam.sv.bam",
+                        "test.paired_end.sorted.bam.sv.bam.csi",
+                        "test.paired_end.sorted.bam.tag_metrics"
+                    ]
+                ]
+            ],
+            "test.paired_end.sorted.bam.computesamtags.changes.tsv:md5,ce0a87ccee35f990cd878e12a8a84bae",
+            "test.paired_end.sorted.bam.coverage.blacklist.bed:md5,63a1da1606bf23357ad6b1b166c21651",
+            "test.paired_end.sorted.bam.cigar_metrics:md5,29b9a700b9da9f2cb23e199e8c15af31",
+            "test.paired_end.sorted.bam.idsv_metrics:md5,eeee010100dcea8b9f9eeeac44f8d142",
+            "test.paired_end.sorted.bam.insert_size_metrics:md5,4a7d860f1073a82c093373bdbdbd6bf3",
+            "test.paired_end.sorted.bam.mapq_metrics:md5,dff79a95a8f9b40de520ef438c9fab59",
+            "test.paired_end.sorted.bam.tag_metrics:md5,486bf2bab6f506dd20f98e7fa05ae39c",
+            {
+                "versions_gridss": [
+                    [
+                        "GRIDSS_PREPROCESS",
+                        "gridss",
+                        "2.13.2"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-06-12T14:29:56.150511142",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.3"
+        }
+    }
+}
diff --git a/modules/nf-core/gridss/preprocess/tests/nextflow.config b/modules/nf-core/gridss/preprocess/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+    withName: BWA_INDEX {
+        // GRIDSS requires the BWA index to be prefixed with full name - "genome.fasta"
+        ext.prefix = { "genome.fasta" }
+    }
+}