Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/gridss/preprocess/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::gridss=2.13.2"
59 changes: 59 additions & 0 deletions modules/nf-core/gridss/preprocess/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
process GRIDSS_PREPROCESS {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h50ea8bc_3':
'quay.io/biocontainers/gridss:2.13.2--h50ea8bc_3' }"

input:
tuple val(meta), path(bam), path(bai)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fasta_fai)
tuple val(meta4), path(bwa_index)

output:
tuple val(meta), path("*.gridss.working"), emit: preprocess_dir
tuple val("${task.process}"), val('gridss'), eval("CallVariants --version 2>&1 | sed 's/-gridss\$//'"), topic: versions, emit: versions_gridss

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
ln -s \$(find -L ${bwa_index} -regex '.*\\.\\(amb\\|ann\\|pac\\|gridsscache\\|sa\\|bwt\\|img\\|alt\\)') ./

gridss \\
--threads ${task.cpus} \\
--steps preprocess \\
--jvmheap ${task.memory.toGiga() - 1}g \\
--otherjvmheap ${task.memory.toGiga() - 1}g \\
--reference ${fasta} \\
${args} \\
${bam}

"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
mkdir -p ${prefix}.gridss.working/

touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.cigar_metrics
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.computesamtags.changes.tsv
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.coverage.blacklist.bed
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.idsv_metrics
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_histogram.pdf
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.insert_size_metrics
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.mapq_metrics
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.sv.bam.csi
touch ${prefix}.gridss.working/${prefix}.gridss.targeted.bam.tag_metrics
"""
}
100 changes: 100 additions & 0 deletions modules/nf-core/gridss/preprocess/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
name: "gridss_preprocess"
description: Run the preprocess step of GRIDSS to extract multiple Picard
metrics (insert size, MAPQ, CIGAR, IDSV, tag and coverage metrics) from an
input BAM file prior to assembly and variant calling.
keywords:
- gridss
- preprocess
- structural variants
- bam
tools:
- gridss:
description: "GRIDSS: the Genomic Rearrangement IDentification Software Suite"
homepage: "https://github.com/PapenfussLab/gridss"
documentation: "https://github.com/PapenfussLab/gridss/wiki/GRIDSS-Documentation"
tool_dev_url: "https://github.com/PapenfussLab/gridss"
doi: "10.1186/s13059-021-02423-x"
licence:
- "GPL v3"
identifier: biotools:gridss
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- bam:
type: file
description: Input BAM file
pattern: "*.bam"
ontologies:
- edam: http://edamontology.org/format_2572
- bai:
type: file
description: Index of the input BAM file
pattern: "*.bai"
ontologies: []
- - meta2:
type: map
description: |
Groovy Map containing reference information
- fasta:
type: file
description: The reference fasta
pattern: "*.{fa,fna,fasta}"
ontologies: []
- - meta3:
type: map
description: |
Groovy Map containing reference information
- fasta_fai:
type: file
description: The index of the reference fasta
pattern: "*.fai"
ontologies: []
- - meta4:
type: map
description: |
Groovy Map containing reference information
- bwa_index:
type: directory
description: The BWA index created from the reference fasta, will be
generated by Gridss in the setupreference step
output:
preprocess_dir:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- "*.gridss.working":
type: directory
description: The working directory produced by the GRIDSS preprocess
step containing Picard metrics (insert size, MAPQ, CIGAR, IDSV, tag,
coverage) and SV-relevant reads used by downstream GRIDSS steps
pattern: "*.gridss.working"
versions_gridss:
- - ${task.process}:
type: string
description: The process
- gridss:
type: string
description: The tool name
- CallVariants --version 2>&1 | sed 's/-gridss\$//':
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The process
- gridss:
type: string
description: The tool name
- CallVariants --version 2>&1 | sed 's/-gridss\$//':
type: eval
description: The expression to obtain the version of the tool
authors:
- "@imsarath"
maintainers:
- "@imsarath"
107 changes: 107 additions & 0 deletions modules/nf-core/gridss/preprocess/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
nextflow_process {

name "Test Process GRIDSS_PREPROCESS"
script "../main.nf"
config "./nextflow.config"
process "GRIDSS_PREPROCESS"

tag "modules"
tag "modules_nfcore"
tag "gridss"
tag "gridss/preprocess"
tag "bwa/index"

setup {

run("BWA_INDEX") {
script "../../../bwa/index/main.nf"
process {
"""
input[0] = [ [id:'fasta'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
]
"""
}
}
}

test("human - bam - bwa") {

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [id:'fasta'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
]
input[2] = [ [id:'fasta_fai'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = BWA_INDEX.out.index
"""
}
}

then {
def workdir = process.out.preprocess_dir[0][1]
def prefix = "test.paired_end.sorted.bam"
// Picard `# Started on:` and R's PDF `/CreationDate` add per-run timestamps,
// and `sv.bam`/`sv.bam.csi` BGZF blocks differ across conda/docker — snapshot only stable parts.
// Presence of `sv.bam` and `sv.bam.csi` is covered by the directory listing snapshot.
def stripPicardHeaderMd5 = { f -> "${file(f).name}:md5,${listToMD5(path(f).readLines().findAll { !it.startsWith("#") })}" }

assertAll(
{ assert process.success },
{ assert snapshot(
process.out.preprocess_dir.collect { meta, dir -> [meta, file(dir).list().findAll { it.startsWith(prefix) }.sort()] },
path("${workdir}/${prefix}.computesamtags.changes.tsv"),
path("${workdir}/${prefix}.coverage.blacklist.bed"),
stripPicardHeaderMd5("${workdir}/${prefix}.cigar_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.idsv_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.insert_size_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.mapq_metrics"),
stripPicardHeaderMd5("${workdir}/${prefix}.tag_metrics"),
process.out.findAll { key, val -> key.startsWith("versions") }
).match() }
)
}
}

test("human - bam - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [id:'fasta'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
]
input[2] = [ [id:'fasta_fai'],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = [ [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.preprocess_dir,
process.out.findAll { key, val -> key.startsWith("versions") }
).match() }
)
}
}
}
83 changes: 83 additions & 0 deletions modules/nf-core/gridss/preprocess/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
{
"human - bam - stub": {
"content": [
[
[
{
"id": "test"
},
[
"test.gridss.targeted.bam.cigar_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.computesamtags.changes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.coverage.blacklist.bed:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.idsv_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.insert_size_histogram.pdf:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.insert_size_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.mapq_metrics:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.sv.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.sv.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e",
"test.gridss.targeted.bam.tag_metrics:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
],
{
"versions_gridss": [
[
"GRIDSS_PREPROCESS",
"gridss",
"2.13.2"
]
]
}
],
"timestamp": "2026-06-12T14:30:07.433688807",
"meta": {
"nf-test": "0.9.4",
"nextflow": "26.04.3"
}
},
"human - bam - bwa": {
"content": [
[
[
{
"id": "test"
},
[
"test.paired_end.sorted.bam.cigar_metrics",
"test.paired_end.sorted.bam.computesamtags.changes.tsv",
"test.paired_end.sorted.bam.coverage.blacklist.bed",
"test.paired_end.sorted.bam.idsv_metrics",
"test.paired_end.sorted.bam.insert_size_histogram.pdf",
"test.paired_end.sorted.bam.insert_size_metrics",
"test.paired_end.sorted.bam.mapq_metrics",
"test.paired_end.sorted.bam.sv.bam",
"test.paired_end.sorted.bam.sv.bam.csi",
"test.paired_end.sorted.bam.tag_metrics"
]
]
],
"test.paired_end.sorted.bam.computesamtags.changes.tsv:md5,ce0a87ccee35f990cd878e12a8a84bae",
"test.paired_end.sorted.bam.coverage.blacklist.bed:md5,63a1da1606bf23357ad6b1b166c21651",
"test.paired_end.sorted.bam.cigar_metrics:md5,29b9a700b9da9f2cb23e199e8c15af31",
"test.paired_end.sorted.bam.idsv_metrics:md5,eeee010100dcea8b9f9eeeac44f8d142",
"test.paired_end.sorted.bam.insert_size_metrics:md5,4a7d860f1073a82c093373bdbdbd6bf3",
"test.paired_end.sorted.bam.mapq_metrics:md5,dff79a95a8f9b40de520ef438c9fab59",
"test.paired_end.sorted.bam.tag_metrics:md5,486bf2bab6f506dd20f98e7fa05ae39c",
{
"versions_gridss": [
[
"GRIDSS_PREPROCESS",
"gridss",
"2.13.2"
]
]
}
],
"timestamp": "2026-06-12T14:29:56.150511142",
"meta": {
"nf-test": "0.9.4",
"nextflow": "26.04.3"
}
}
}
6 changes: 6 additions & 0 deletions modules/nf-core/gridss/preprocess/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
process {
withName: BWA_INDEX {
// GRIDSS requires the BWA index to be prefixed with full name - "genome.fasta"
ext.prefix = { "genome.fasta" }
}
}