diff --git a/modules/nf-core/gridss/preprocess/environment.yml b/modules/nf-core/gridss/preprocess/environment.yml new file mode 100644 index 000000000000..2b33728daea5 --- /dev/null +++ b/modules/nf-core/gridss/preprocess/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::gridss=2.13.2" diff --git a/modules/nf-core/gridss/preprocess/main.nf b/modules/nf-core/gridss/preprocess/main.nf new file mode 100644 index 000000000000..7412a0e77232 --- /dev/null +++ b/modules/nf-core/gridss/preprocess/main.nf @@ -0,0 +1,59 @@ +process GRIDSS_PREPROCESS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h50ea8bc_3': + 'quay.io/biocontainers/gridss:2.13.2--h50ea8bc_3' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(bwa_index) + + output: + tuple val(meta), path("*.gridss.working"), emit: preprocess_dir + tuple val("${task.process}"), val('gridss'), eval("CallVariants --version 2>&1 | sed 's/-gridss\$//'"), topic: versions, emit: versions_gridss + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + ln -s \$(find -L ${bwa_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./ + + gridss \\ + --threads ${task.cpus} \\ + --steps preprocess \\ + --jvmheap ${task.memory.toGiga() - 1}g \\ + --otherjvmheap ${task.memory.toGiga() - 1}g \\ + --reference ${fasta} \\ + ${args} \\ + ${bam} + + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ${prefix}.gridss.working/ + + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.cigar_metrics + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.computesamtags.changes.tsv + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.coverage.blacklist.bed + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.idsv_metrics + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_histogram.pdf + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_metrics + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.mapq_metrics + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam.csi + touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.tag_metrics + """ +} diff --git a/modules/nf-core/gridss/preprocess/meta.yml b/modules/nf-core/gridss/preprocess/meta.yml new file mode 100644 index 000000000000..b61e8ac98df8 --- /dev/null +++ b/modules/nf-core/gridss/preprocess/meta.yml @@ -0,0 +1,100 @@ +name: "gridss_preprocess" +description: Run the preprocess step of GRIDSS to extract multiple Picard + metrics (insert size, MAPQ, CIGAR, IDSV, tag and coverage metrics) from an + input BAM file prior to assembly and variant calling. +keywords: + - gridss + - preprocess + - structural variants + - bam +tools: + - gridss: + description: "GRIDSS: the Genomic Rearrangement IDentification Software Suite" + homepage: "https://github.com/PapenfussLab/gridss" + documentation: "https://github.com/PapenfussLab/gridss/wiki/GRIDSS-Documentation" + tool_dev_url: "https://github.com/PapenfussLab/gridss" + doi: "10.1186/s13059-021-02423-x" + licence: + - "GPL v3" + identifier: biotools:gridss +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: Input BAM file + pattern: "*.bam" + ontologies: + - edam: http://edamontology.org/format_2572 + - bai: + type: file + description: Index of the input BAM file + pattern: "*.bai" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + - fasta: + type: file + description: The reference fasta + pattern: "*.{fa,fna,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + - fasta_fai: + type: file + description: The index of the reference fasta + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + - bwa_index: + type: directory + description: The BWA index created from the reference fasta, will be + generated by Gridss in the setupreference step +output: + preprocess_dir: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.gridss.working": + type: directory + description: The working directory produced by the GRIDSS preprocess + step containing Picard metrics (insert size, MAPQ, CIGAR, IDSV, tag, + coverage) and SV-relevant reads used by downstream GRIDSS steps + pattern: "*.gridss.working" + versions_gridss: + - - ${task.process}: + type: string + description: The process + - gridss: + type: string + description: The tool name + - CallVariants --version 2>&1 | sed 's/-gridss\$//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process + - gridss: + type: string + description: The tool name + - CallVariants --version 2>&1 | sed 's/-gridss\$//': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@imsarath" +maintainers: + - "@imsarath" diff --git a/modules/nf-core/gridss/preprocess/tests/main.nf.test b/modules/nf-core/gridss/preprocess/tests/main.nf.test new file mode 100644 index 000000000000..9dba41f0f6dd --- /dev/null +++ b/modules/nf-core/gridss/preprocess/tests/main.nf.test @@ -0,0 +1,107 @@ +nextflow_process { + + name "Test Process GRIDSS_PREPROCESS" + script "../main.nf" + config "./nextflow.config" + process "GRIDSS_PREPROCESS" + + tag "modules" + tag "modules_nfcore" + tag "gridss" + tag "gridss/preprocess" + tag "bwa/index" + + setup { + + run("BWA_INDEX") { + script "../../../bwa/index/main.nf" + process { + """ + input[0] = [ [id:'fasta'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + test("human - bam - bwa") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ [id:'fasta'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ [id:'fasta_fai'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = BWA_INDEX.out.index + """ + } + } + + then { + def workdir = process.out.preprocess_dir[0][1] + def prefix = "test.paired_end.sorted.bam" + // Picard `# Started on:` and R's PDF `/CreationDate` add per-run timestamps, + // and `sv.bam`/`sv.bam.csi` BGZF blocks differ across conda/docker — snapshot only stable parts. + // Presence of `sv.bam` and `sv.bam.csi` is covered by the directory listing snapshot. + def stripPicardHeaderMd5 = { f -> "${file(f).name}:md5,${listToMD5(path(f).readLines().findAll { !it.startsWith("#") })}" } + + assertAll( + { assert process.success }, + { assert snapshot( + process.out.preprocess_dir.collect { meta, dir -> [meta, file(dir).list().findAll { it.startsWith(prefix) }.sort()] }, + path("${workdir}/${prefix}.computesamtags.changes.tsv"), + path("${workdir}/${prefix}.coverage.blacklist.bed"), + stripPicardHeaderMd5("${workdir}/${prefix}.cigar_metrics"), + stripPicardHeaderMd5("${workdir}/${prefix}.idsv_metrics"), + stripPicardHeaderMd5("${workdir}/${prefix}.insert_size_metrics"), + stripPicardHeaderMd5("${workdir}/${prefix}.mapq_metrics"), + stripPicardHeaderMd5("${workdir}/${prefix}.tag_metrics"), + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("human - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ [id:'fasta'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ [id:'fasta_fai'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.preprocess_dir, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/gridss/preprocess/tests/main.nf.test.snap b/modules/nf-core/gridss/preprocess/tests/main.nf.test.snap new file mode 100644 index 000000000000..07af763a0d3b --- /dev/null +++ b/modules/nf-core/gridss/preprocess/tests/main.nf.test.snap @@ -0,0 +1,83 @@ +{ + "human - bam - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "test.gridss.targeted.bam.cigar_metrics:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.computesamtags.changes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.coverage.blacklist.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.idsv_metrics:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.insert_size_histogram.pdf:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.insert_size_metrics:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.mapq_metrics:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.sv.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.sv.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.gridss.targeted.bam.tag_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + { + "versions_gridss": [ + [ + "GRIDSS_PREPROCESS", + "gridss", + "2.13.2" + ] + ] + } + ], + "timestamp": "2026-06-12T14:30:07.433688807", + "meta": { + "nf-test": "0.9.4", + "nextflow": "26.04.3" + } + }, + "human - bam - bwa": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "test.paired_end.sorted.bam.cigar_metrics", + "test.paired_end.sorted.bam.computesamtags.changes.tsv", + "test.paired_end.sorted.bam.coverage.blacklist.bed", + "test.paired_end.sorted.bam.idsv_metrics", + "test.paired_end.sorted.bam.insert_size_histogram.pdf", + "test.paired_end.sorted.bam.insert_size_metrics", + "test.paired_end.sorted.bam.mapq_metrics", + "test.paired_end.sorted.bam.sv.bam", + "test.paired_end.sorted.bam.sv.bam.csi", + "test.paired_end.sorted.bam.tag_metrics" + ] + ] + ], + "test.paired_end.sorted.bam.computesamtags.changes.tsv:md5,ce0a87ccee35f990cd878e12a8a84bae", + "test.paired_end.sorted.bam.coverage.blacklist.bed:md5,63a1da1606bf23357ad6b1b166c21651", + "test.paired_end.sorted.bam.cigar_metrics:md5,29b9a700b9da9f2cb23e199e8c15af31", + "test.paired_end.sorted.bam.idsv_metrics:md5,eeee010100dcea8b9f9eeeac44f8d142", + "test.paired_end.sorted.bam.insert_size_metrics:md5,4a7d860f1073a82c093373bdbdbd6bf3", + "test.paired_end.sorted.bam.mapq_metrics:md5,dff79a95a8f9b40de520ef438c9fab59", + "test.paired_end.sorted.bam.tag_metrics:md5,486bf2bab6f506dd20f98e7fa05ae39c", + { + "versions_gridss": [ + [ + "GRIDSS_PREPROCESS", + "gridss", + "2.13.2" + ] + ] + } + ], + "timestamp": "2026-06-12T14:29:56.150511142", + "meta": { + "nf-test": "0.9.4", + "nextflow": "26.04.3" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/gridss/preprocess/tests/nextflow.config b/modules/nf-core/gridss/preprocess/tests/nextflow.config new file mode 100644 index 000000000000..f20e530a9467 --- /dev/null +++ b/modules/nf-core/gridss/preprocess/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: BWA_INDEX { + // GRIDSS requires the BWA index to be prefixed with full name - "genome.fasta" + ext.prefix = { "genome.fasta" } + } +}