diff --git a/modules/local/fastqvalidator/environment.yml b/modules/local/fastqvalidator/environment.yml new file mode 100644 index 00000000..32bc330d --- /dev/null +++ b/modules/local/fastqvalidator/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # TODO nf-core: List required Conda package(s). + # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + - "YOUR-TOOL-HERE" diff --git a/modules/local/fastqvalidator/main.nf b/modules/local/fastqvalidator/main.nf new file mode 100644 index 00000000..43bfbef3 --- /dev/null +++ b/modules/local/fastqvalidator/main.nf @@ -0,0 +1,55 @@ +process FASTQ_VALIDATOR { + tag "${sampleName}" + // tag "$meta.id" + label 'process_medium' + + input: + tuple val(sampleName), path(sampleRead) + val ready + + + // tuple val(meta), path(sampleRead) + + + + output: + path("*.fastq_report.csv") , emit: fastq_report + tuple val(sampleName), path(sampleRead) , emit: reads + + // tuple val(meta), path("*.bam") , emit: bam + // path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + fastqvalidator \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqvalidator: \$(fastqvalidator --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${sampleRead.simpleName}.check.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqvalidator: \$(fastqvalidator --version) + END_VERSIONS + """ +} diff --git a/modules/local/fastqvalidator/meta.yml b/modules/local/fastqvalidator/meta.yml new file mode 100644 index 00000000..ff660de6 --- /dev/null +++ b/modules/local/fastqvalidator/meta.yml @@ -0,0 +1,68 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fastqvalidator" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort + - example + - genomics +tools: + - "fastqvalidator": + ## TODO nf-core: Add a description and other details for the software below + description: "" + homepage: "" + documentation: "" + tool_dev_url: "" + doi: "" + licence: + identifier: biotools:fastqvalidator + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM + +## TODO nf-core: Add a description of all of the variables used as output +output: + - bam: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + ## TODO nf-core: Delete / customise this example output + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM + + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/local/fastqvalidator/tests/main.nf.test b/modules/local/fastqvalidator/tests/main.nf.test new file mode 100644 index 00000000..fdbed697 --- /dev/null +++ b/modules/local/fastqvalidator/tests/main.nf.test @@ -0,0 +1,73 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test fastqvalidator +nextflow_process { + + name "Test Process FASTQVALIDATOR" + script "../main.nf" + process "FASTQVALIDATOR" + + tag "modules" + tag "modules_" + tag "fastqvalidator" + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam") { + + // TODO nf-core: If you are created a test for a chained module + // (the module requires running more than one process to generate the required output) + // add the 'setup' method here. + // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules). + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples. + ) + } + + } + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix. + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + // TODO nf-core: define inputs of the process here. Example: + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/merge/environment.yml b/modules/nf-core/bcftools/merge/environment.yml new file mode 100644 index 00000000..55748860 --- /dev/null +++ b/modules/nf-core/bcftools/merge/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.21 diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf new file mode 100644 index 00000000..bcca1036 --- /dev/null +++ b/modules/nf-core/bcftools/merge/main.nf @@ -0,0 +1,74 @@ +process BCFTOOLS_MERGE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5acacb55c52bec97c61fd34ffa8721fce82ce823005793592e2a80bf71632cd0/data': + 'community.wave.seqera.io/library/bcftools:1.21--4335bec1d7b44d11' }" + + input: + tuple val(meta), path(vcfs), path(tbis) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(bed) + + output: + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf + tuple val(meta), path("*.{csi,tbi}") , emit: index, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def input = (vcfs.collect().size() > 1) ? vcfs.sort{ it.name } : vcfs + def regions = bed ? "--regions-file $bed" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + bcftools merge \\ + $args \\ + $regions \\ + --threads $task.cpus \\ + --output ${prefix}.${extension} \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/merge/meta.yml b/modules/nf-core/bcftools/merge/meta.yml new file mode 100644 index 00000000..2cf09a1d --- /dev/null +++ b/modules/nf-core/bcftools/merge/meta.yml @@ -0,0 +1,96 @@ +name: bcftools_merge +description: Merge VCF files +keywords: + - variant calling + - merge + - VCF +tools: + - merge: + description: | + Merge VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: file + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbis: + type: file + description: | + List containing the tbi index files corresponding to the vcfs input files + e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: "(Optional) The fasta reference file (only necessary for the `--gvcf + FILE` parameter)" + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: "(Optional) The fasta reference file index (only necessary for + the `--gvcf FILE` parameter)" + pattern: "*.fai" + - - meta4: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'genome' ] + - bed: + type: file + description: "(Optional) The bed regions to merge on" + pattern: "*.bed" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{bcf,vcf}{,.gz}": + type: file + description: merged output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{csi,tbi}": + type: file + description: index of merged output + pattern: "*.{csi,tbi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/merge/tests/bcf.config b/modules/nf-core/bcftools/merge/tests/bcf.config new file mode 100644 index 00000000..4467d07d --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/bcf.config @@ -0,0 +1,3 @@ +process { + ext.args = '--output-type u --no-version' +} diff --git a/modules/nf-core/bcftools/merge/tests/bcf_gz.config b/modules/nf-core/bcftools/merge/tests/bcf_gz.config new file mode 100644 index 00000000..280de8db --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/bcf_gz.config @@ -0,0 +1,3 @@ +process { + ext.args = '--output-type b --no-version' +} diff --git a/modules/nf-core/bcftools/merge/tests/main.nf.test b/modules/nf-core/bcftools/merge/tests/main.nf.test new file mode 100644 index 00000000..3995fc1a --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/main.nf.test @@ -0,0 +1,853 @@ +nextflow_process { + + name "Test Process BCFTOOLS_MERGE" + script "../main.nf" + process "BCFTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/merge" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf output") { + + config "./vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).md5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output") { + + config "./vcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - bcf output") { + + config "./bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("bcf") }, + { assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output") { + + config "./bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("bcf.gz") }, + { assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("csi") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + file(process.out.index.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index") { + + config "./vcf_gz_index_csi.config" + + when { + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("csi") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + file(process.out.index.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("tbi") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + file(process.out.index.get(0).get(1)).name, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], bed") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).md5, + process.out.versions, + ).match() } + ) + } + + } + + test("homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output") { + + config "./nextflow.gvcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ] + input[2] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + ] + input[3] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - one sample") { + + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).md5, + process.out.versions, + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf output - stub") { + + options "-stub" + config "./vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - stub") { + + options "-stub" + config "./vcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - bcf output - stub") { + + options "-stub" + config "./bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("bcf") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output - stub") { + + options "-stub" + config "./bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("bcf.gz") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index - stub") { + + options "-stub" + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("csi") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index - stub") { + + options "-stub" + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("csi") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index - stub") { + + options "-stub" + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert process.out.index.get(0).get(1).endsWith("tbi") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], bed - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output - stub") { + + options "-stub" + config "./nextflow.gvcf.config" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi', checkIfExists: true), + ] + ] + input[1] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ] + input[2] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + ] + input[3] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf.gz") }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - one sample - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.vcf.get(0).get(1).endsWith("vcf") }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bcftools/merge/tests/main.nf.test.snap b/modules/nf-core/bcftools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..5f2ac39e --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/main.nf.test.snap @@ -0,0 +1,607 @@ +{ + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + "test.vcf.gz.tbi", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:36:57.808346534" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:26.710546205" + }, + "sarscov2 - [vcf, tbi], [], [], bed": { + "content": [ + "febdcfb851dcfc83d8248520830aef10", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:02.283952426" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:47.351607982" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf output": { + "content": [ + "57bb84274f336465d0a0946b532093b0", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:36:29.065115273" + }, + "sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:42.876870714" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - tbi index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:56.095127856" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:30.988068166" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + "test.vcf.gz.csi", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:36:53.522123382" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:36:33.357373822" + }, + "sarscov2 - [vcf, tbi], [], [], bed - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:38:00.615531476" + }, + "sarscov2 - [vcf, tbi], [], [], [] - bcf output": { + "content": [ + "test.bcf", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:36:37.68065343" + }, + "sarscov2 - [vcf, tbi], [], [], [] - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:35.402926588" + }, + "sarscov2 - [vcf, tbi], [], [], [] - one sample - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:38:13.018206068" + }, + "homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:38:08.718741834" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - index": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + "test.vcf.gz.csi", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:36:49.114421247" + }, + "homo_sapiens - [vcf, tbi], fasta, fai, bed - vcf.gz output": { + "content": [ + "645b7f7f9131bfe350a9ec3cf82c17fe", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:13.928323894" + }, + "sarscov2 - [vcf, tbi], [], [], [] - one sample": { + "content": [ + "2a374cf02f0c32cf607646167e7f153b", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:18.074920523" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:22.322283619" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf.gz output - csi index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ], + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:37:51.791248219" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + "e0de448dc8e712956a03ce68d79a0b3a", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:36:21.537368529" + }, + "sarscov2 - [vcf, tbi], [], [], [] - bcf.gz output": { + "content": [ + "test.bcf.gz", + [ + "versions.yml:md5,ba7e9c7697d692c11c76006d21a8411f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T13:36:41.94604183" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/merge/tests/nextflow.config b/modules/nf-core/bcftools/merge/tests/nextflow.config new file mode 100644 index 00000000..c3f0b715 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BCFTOOLS_MERGE { + ext.args = '--force-samples --force-single --no-version' + } +} diff --git a/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config b/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config new file mode 100644 index 00000000..8c457b71 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/nextflow.gvcf.config @@ -0,0 +1,5 @@ +process { + withName: BCFTOOLS_MERGE { + ext.args = { "--force-samples --no-version --output-type z --gvcf $fasta" } + } +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf.config b/modules/nf-core/bcftools/merge/tests/vcf.config new file mode 100644 index 00000000..759222e5 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf.config @@ -0,0 +1,3 @@ +process { + ext.args = '--output-type v --no-version' +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz.config b/modules/nf-core/bcftools/merge/tests/vcf_gz.config new file mode 100644 index 00000000..8b6ad8b4 --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf_gz.config @@ -0,0 +1,3 @@ +process { + ext.args = '--output-type z --no-version' +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index.config b/modules/nf-core/bcftools/merge/tests/vcf_gz_index.config new file mode 100644 index 00000000..9f1e9b1d --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf_gz_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..8308ee1a --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf_gz_index_csi.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..9be4075b --- /dev/null +++ b/modules/nf-core/bcftools/merge/tests/vcf_gz_index_tbi.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/view/environment.yml b/modules/nf-core/bcftools/view/environment.yml new file mode 100644 index 00000000..55748860 --- /dev/null +++ b/modules/nf-core/bcftools/view/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.21 diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf new file mode 100644 index 00000000..5ef5d1bc --- /dev/null +++ b/modules/nf-core/bcftools/view/main.nf @@ -0,0 +1,75 @@ +process BCFTOOLS_VIEW { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5acacb55c52bec97c61fd34ffa8721fce82ce823005793592e2a80bf71632cd0/data': + 'community.wave.seqera.io/library/bcftools:1.21--4335bec1d7b44d11' }" + + input: + tuple val(meta), path(vcf), path(index) + path(regions) + path(targets) + path(samples) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ + bcftools view \\ + --output ${prefix}.${extension} \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def stub_index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && stub_index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${stub_index}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml new file mode 100644 index 00000000..aa7785f1 --- /dev/null +++ b/modules/nf-core/bcftools/view/meta.yml @@ -0,0 +1,88 @@ +name: bcftools_view +description: View, subset and filter VCF or BCF files by position and filtering expression. + Convert between VCF and BCF +keywords: + - variant calling + - view + - bcftools + - VCF +tools: + - view: + description: | + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be inspected. + e.g. 'file.vcf' + - index: + type: file + description: | + The tab index for the VCF file to be inspected. + e.g. 'file.tbi' + - - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + e.g. 'file.vcf' + - - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + e.g. 'file.vcf' + - - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test b/modules/nf-core/bcftools/view/tests/main.nf.test new file mode 100644 index 00000000..1e60c50d --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test @@ -0,0 +1,298 @@ +nextflow_process { + + name "Test Process BCFTOOLS_VIEW" + script "../main.nf" + process "BCFTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/view" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test.snap b/modules/nf-core/bcftools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..93d9adb6 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test.snap @@ -0,0 +1,333 @@ +{ + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:12:56.166195802" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:12:33.048845065" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:12:52.003188711" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:13:03.303865495" + }, + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,1bcbd0eff25d316ba915d06463aab17b" + ] + ], + [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:12:40.727725444" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.vcf", + [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:12:47.850438431" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:12:21.639961317" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:12:14.557692028" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,dcf736f3b92fe7943416d890ed15521e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T14:12:25.812993907" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/nextflow.config b/modules/nf-core/bcftools/view/tests/nextflow.config new file mode 100644 index 00000000..932e3ba6 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--no-version --output-type v' +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bwamem2/mem/environment.yml b/modules/nf-core/bwamem2/mem/environment.yml new file mode 100644 index 00000000..c069e281 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa-mem2 + - bwa-mem2=2.2.1 + # renovate: datasource=conda depName=bioconda/htslib + - htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.21 diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf new file mode 100644 index 00000000..eab662a8 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -0,0 +1,83 @@ +process BWAMEM2_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9ac054213e67b3c9308e409b459080bbe438f8fd6c646c351bc42887f35a42e7/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:e1f420694f8e42bd' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram, optional:true + tuple val(meta), path("*.crai") , emit: crai, optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa-mem2 \\ + mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 -@ $task.cpus ${reference} -o ${prefix}.${extension} - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml new file mode 100644 index 00000000..d17e0dbd --- /dev/null +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -0,0 +1,129 @@ +name: bwamem2_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["MIT"] + identifier: "biotools:bwa-mem2" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1930" # FASTQ + - - meta2: + type: map + description: | + Groovy Map containing reference/index information + e.g. [ id:'test' ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" # Genome index + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta,fna}" + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1929" # FASTA + - - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.{sam}" + ontologies: + - edam: "http://edamontology.org/format_2573" # SAM + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + ontologies: + - edam: "http://edamontology.org/format_3462" # CRAM + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: Index file for CRAM file + pattern: "*.{crai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Index file for BAM file + pattern: "*.{csi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@matthdsm" +maintainers: + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test b/modules/nf-core/bwamem2/mem/tests/main.nf.test new file mode 100644 index 00000000..9e0ab14a --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_process { + + name "Test Process BWAMEM2_MEM" + script "../main.nf" + process "BWAMEM2_MEM" + + tag "modules" + tag "modules_nfcore" + tag "bwamem2" + tag "bwamem2/mem" + tag "bwamem2/index" + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + test("sarscov2 - fastq, index, fasta, false") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap new file mode 100644 index 00000000..ec90701f --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap @@ -0,0 +1,129 @@ +{ + "sarscov2 - [fastq1, fastq2], index, fasta, false": { + "content": [ + "9505760d66e1d5a5d34ab79a98228c6", + "57aeef88ed701a8ebc8e2f0a381b2a6", + [ + "versions.yml:md5,ca1b1dcf82b92fb0751816fca16a477a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-27T10:57:52.782442426" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,ca1b1dcf82b92fb0751816fca16a477a" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + + ], + "versions": [ + "versions.yml:md5,ca1b1dcf82b92fb0751816fca16a477a" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-27T10:58:37.140104324" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "e0c38d5772ca5f4d5d9999f4477e933c", + "af8628d9df18b2d3d4f6fd47ef2bb872", + [ + "versions.yml:md5,ca1b1dcf82b92fb0751816fca16a477a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-27T10:58:19.047052261" + }, + "sarscov2 - fastq, index, fasta, false": { + "content": [ + "58d05395bbb819e929885bde415947ae", + "798439cbd7fd81cbcc5078022dc5479d", + [ + "versions.yml:md5,ca1b1dcf82b92fb0751816fca16a477a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-27T10:56:53.456559296" + }, + "sarscov2 - fastq, index, fasta, true": { + "content": [ + "276189f6f003f99a87664e74fad2893d", + "94fcf617f5b994584c4e8d4044e16b4f", + [ + "versions.yml:md5,ca1b1dcf82b92fb0751816fca16a477a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-27T10:57:21.949711746" + } +} \ No newline at end of file diff --git a/modules/nf-core/delly/call/environment.yml b/modules/nf-core/delly/call/environment.yml new file mode 100644 index 00000000..db3a41ec --- /dev/null +++ b/modules/nf-core/delly/call/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::delly=1.3.3 diff --git a/modules/nf-core/delly/call/main.nf b/modules/nf-core/delly/call/main.nf new file mode 100644 index 00000000..e4453e14 --- /dev/null +++ b/modules/nf-core/delly/call/main.nf @@ -0,0 +1,69 @@ +process DELLY_CALL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/delly:1.3.3--h4d20210_0' : + 'biocontainers/delly:1.3.3--h4d20210_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(vcf), path(vcf_index), path(exclude_bed) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.{bcf,vcf.gz}") , emit: bcf + tuple val(meta), path("*.{csi,tbi}") , emit: csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "bcf" + + def exclude = exclude_bed ? "--exclude ${exclude_bed}" : "" + + def bcf_output = suffix == "bcf" ? "--outfile ${prefix}.bcf" : "" + def vcf_output = suffix == "vcf" ? "| bgzip ${args2} --threads ${task.cpus} --stdout > ${prefix}.vcf.gz && tabix ${prefix}.vcf.gz" : "" + + def genotype = vcf ? "--vcffile ${vcf}" : "" + + """ + delly \\ + call \\ + ${args} \\ + ${bcf_output} \\ + --genome ${fasta} \\ + ${genotype} \\ + ${exclude} \\ + ${input} \\ + ${vcf_output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + delly: \$( echo \$(delly --version 2>&1) | sed 's/^.*Delly version: v//; s/ using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "bcf" + + def bcf_output = suffix == "bcf" ? "touch ${prefix}.bcf && touch ${prefix}.bcf.csi" : "" + def vcf_output = suffix == "vcf" ? "echo '' | gzip > ${prefix}.vcf.gz && touch ${prefix}.vcf.gz.tbi" : "" + + """ + ${bcf_output} + ${vcf_output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + delly: \$( echo \$(delly --version 2>&1) | sed 's/^.*Delly version: v//; s/ using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/delly/call/meta.yml b/modules/nf-core/delly/call/meta.yml new file mode 100644 index 00000000..ef7c238d --- /dev/null +++ b/modules/nf-core/delly/call/meta.yml @@ -0,0 +1,97 @@ +name: delly_call +description: Call structural variants +keywords: + - genome + - structural + - variants + - bcf +tools: + - delly: + description: Structural variant discovery by integrated paired-end and split-read + analysis + homepage: https://github.com/dellytools/delly + documentation: https://github.com/dellytools/delly/blob/master/README.md + doi: "10.1093/bioinformatics/bts378" + licence: ["BSD-3-Clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment must be sorted, indexed, and duplicate + marked + pattern: "*.{bam,cram}" + - input_index: + type: file + description: Index of the BAM/CRAM file + pattern: "*.{bai,crai}" + - vcf: + type: file + description: A BCF/VCF file to genotype with Delly. If this is supplied, the + variant calling will be skipped + pattern: "*.{vcf.gz,bcf}" + - vcf_index: + type: file + description: The index of the BCF/VCF file + pattern: "*.{tbi,csi}" + - exclude_bed: + type: file + description: An optional bed file containing regions to exclude from the called + VCF + pattern: "*.bed" + - - meta2: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing fasta index information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Index of reference fasta file to identify split-reads + pattern: "*.fai" +output: + - bcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{bcf,vcf.gz}": + type: file + description: Called variants in BCF/VCF format. Specify either "bcf" or "vcf" + in ext.suffix to define the output type + pattern: "*.{bcf,vcf.gz}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{csi,tbi}": + type: file + description: A generated csi index that matches the bcf output (not generated + for vcf files) + pattern: "*.{bcf.csi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@projectoriented" + - "@nvnieuwk" +maintainers: + - "@projectoriented" + - "@nvnieuwk" diff --git a/modules/nf-core/delly/call/tests/main.nf.test b/modules/nf-core/delly/call/tests/main.nf.test new file mode 100644 index 00000000..80cad20d --- /dev/null +++ b/modules/nf-core/delly/call/tests/main.nf.test @@ -0,0 +1,187 @@ +nextflow_process { + + name "Test Process DELLY_CALL" + script "../main.nf" + process "DELLY_CALL" + + tag "modules" + tag "modules_nfcore" + tag "delly" + tag "delly/call" + + test("homo_sapiens - bam, bai, [], [], [], fasta, fai") { + + config "./vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai", checkIfExists:true), + [], + [], + [] + ] + input[1] = [ + [ id:'fasta' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta", checkIfExists:true) + ] + input[2] = [ + [ id:'fai' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists:true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("bam") } + ) + } + + } + + test("homo_sapiens - cram, crai, [], [], [], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai", checkIfExists:true), + [], + [], + [] + ] + input[1] = [ + [ id:'fasta' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta", checkIfExists:true) + ] + input[2] = [ + [ id:'fai' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists:true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("cram") } + ) + } + + } + + test("homo_sapiens - cram, crai, [], [], bed, fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai", checkIfExists:true), + [], + [], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed", checkIfExists:true) + ] + input[1] = [ + [ id:'fasta' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta", checkIfExists:true) + ] + input[2] = [ + [ id:'fai' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists:true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("bed") } + ) + } + + } + + test("homo_sapiens - cram, crai, vcf, tbi, [], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi", checkIfExists:true), + [] + ] + input[1] = [ + [ id:'fasta' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta", checkIfExists:true) + ] + input[2] = [ + [ id:'fai' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists:true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("genotype") } + ) + } + + } + + test("homo_sapiens - bam, bai, [], [], [], fasta, fai - stub") { + + config "./vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam", checkIfExists:true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai", checkIfExists:true), + [], + [], + [] + ] + input[1] = [ + [ id:'fasta' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta", checkIfExists:true) + ] + input[2] = [ + [ id:'fai' ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists:true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } + ) + } + + } + +} diff --git a/modules/nf-core/delly/call/tests/main.nf.test.snap b/modules/nf-core/delly/call/tests/main.nf.test.snap new file mode 100644 index 00000000..00e30c32 --- /dev/null +++ b/modules/nf-core/delly/call/tests/main.nf.test.snap @@ -0,0 +1,247 @@ +{ + "bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcf:md5,7ef113f0b714693c1f74b128b9cebcbc" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bcf.csi:md5,c198abfc14584c5ac69c004057927e0b" + ] + ], + "2": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ], + "bcf": [ + [ + { + "id": "test" + }, + "test.bcf:md5,7ef113f0b714693c1f74b128b9cebcbc" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test.bcf.csi:md5,c198abfc14584c5ac69c004057927e0b" + ] + ], + "versions": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-13T14:04:34.259674112" + }, + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcf:md5,7ef113f0b714693c1f74b128b9cebcbc" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bcf.csi:md5,c198abfc14584c5ac69c004057927e0b" + ] + ], + "2": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ], + "bcf": [ + [ + { + "id": "test" + }, + "test.bcf:md5,7ef113f0b714693c1f74b128b9cebcbc" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test.bcf.csi:md5,c198abfc14584c5ac69c004057927e0b" + ] + ], + "versions": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-13T14:04:20.066199685" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ], + "bcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-13T14:05:05.154510065" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,b48079aeb9f9f99d7f89139de74fef91" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905" + ] + ], + "2": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ], + "bcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,b48079aeb9f9f99d7f89139de74fef91" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905" + ] + ], + "versions": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-13T14:04:04.701522324" + }, + "genotype": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcf:md5,7ef113f0b714693c1f74b128b9cebcbc" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bcf.csi:md5,c198abfc14584c5ac69c004057927e0b" + ] + ], + "2": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ], + "bcf": [ + [ + { + "id": "test" + }, + "test.bcf:md5,7ef113f0b714693c1f74b128b9cebcbc" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test.bcf.csi:md5,c198abfc14584c5ac69c004057927e0b" + ] + ], + "versions": [ + "versions.yml:md5,e5013091be9667ff33229211b68b9eac" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-13T14:04:49.032777903" + } +} \ No newline at end of file diff --git a/modules/nf-core/delly/call/tests/vcf.config b/modules/nf-core/delly/call/tests/vcf.config new file mode 100644 index 00000000..22e93313 --- /dev/null +++ b/modules/nf-core/delly/call/tests/vcf.config @@ -0,0 +1,5 @@ +process { + withName: "DELLY_CALL" { + ext.suffix = "vcf" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqutils/info/environment.yml b/modules/nf-core/fastqutils/info/environment.yml new file mode 100644 index 00000000..39607cc2 --- /dev/null +++ b/modules/nf-core/fastqutils/info/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::fastq_utils=0.25.2" diff --git a/modules/nf-core/fastqutils/info/main.nf b/modules/nf-core/fastqutils/info/main.nf new file mode 100644 index 00000000..fc40dc93 --- /dev/null +++ b/modules/nf-core/fastqutils/info/main.nf @@ -0,0 +1,47 @@ +process FASTQUTILS_INFO { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastq_utils:0.25.2--h96c455f_2': + 'biocontainers/fastq_utils:0.25.2--h96c455f_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.txt"), emit: info + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + fastq_info \\ + $args \\ + ${reads} + + echo "fastq_utils fastq_info ran and found no issues with ${reads}" > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastq_utils: \$(fastq_info -h 2>&1 | head -n 1 | sed 's/^fastq_utils //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastq_utils: \$(fastq_info -h 2>&1 | head -n 1 | sed 's/^fastq_utils //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqutils/info/meta.yml b/modules/nf-core/fastqutils/info/meta.yml new file mode 100644 index 00000000..0f230285 --- /dev/null +++ b/modules/nf-core/fastqutils/info/meta.yml @@ -0,0 +1,53 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fastqutils_info" +description: Performs quality control of FASTQ files +keywords: + - sort + - example + - genomics +tools: + - "fastqutils": + description: "Validation and manipulation of FASTQ files, scRNA-seq barcode pre-processing and UMI quantification." + homepage: "https://github.com/nunofonseca/fastq_utils" + documentation: "https://github.com/nunofonseca/fastq_utils" + tool_dev_url: "https://github.com/nunofonseca/fastq_utils" + licence: ["GPL v3"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fastq,fastq.gz}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + +output: + - info: + - meta: + type: map + description: Groovy Map containing sample information + - "*.txt": + type: file + description: Contains info if process ran successfully + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + # The tool will throw errors upon broken fastq files + +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/fastqutils/info/tests/main.nf.test b/modules/nf-core/fastqutils/info/tests/main.nf.test new file mode 100644 index 00000000..45c4a2b9 --- /dev/null +++ b/modules/nf-core/fastqutils/info/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process FASTQUTILS_INFO" + script "../main.nf" + process "FASTQUTILS_INFO" + + tag "modules" + tag "modules_nfcore" + tag "fastqutils" + tag "fastqutils/info" + + test("human - fastq - truncated") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_truncated_clean.fastq', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert snapshot( + process.errorReport.toString().contains("file truncated") + ).match() } + ) + } + + } + + test("human - fastq - quality-mismatch") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_quality_mismatch.fastq', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert snapshot( + process.errorReport.toString().contains("sequence and quality don't have the same length 52!=47") + ).match() } + ) + } + + } + + test("sarscov2 - fastq.gz - pe") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.info[0][1]).readLines() + ).match() } + ) + } + + } + + test("sarscov2 - fastq.gz - se - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fastqutils/info/tests/main.nf.test.snap b/modules/nf-core/fastqutils/info/tests/main.nf.test.snap new file mode 100644 index 00000000..2009c745 --- /dev/null +++ b/modules/nf-core/fastqutils/info/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "human - fastq - truncated": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-05-14T13:03:18.807484" + }, + "human - fastq - quality-mismatch": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-05-14T13:03:23.348794" + }, + "sarscov2 - fastq.gz - se - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,9d1e1938a5b8b9a3259c23820cd03cdc" + ], + "info": [ + [ + { + "id": "test", + "single_end": true + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,9d1e1938a5b8b9a3259c23820cd03cdc" + ] + }, + { + "FASTQUTILS_INFO": { + "fastq_utils": "0.25.2" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-05-14T13:03:34.142376" + }, + "sarscov2 - fastq.gz - pe": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.txt:md5,63adccb1bf3b3dc697010c44d6706060" + ] + ], + "1": [ + "versions.yml:md5,9d1e1938a5b8b9a3259c23820cd03cdc" + ], + "info": [ + [ + { + "id": "test", + "single_end": true + }, + "test.txt:md5,63adccb1bf3b3dc697010c44d6706060" + ] + ], + "versions": [ + "versions.yml:md5,9d1e1938a5b8b9a3259c23820cd03cdc" + ] + }, + [ + "fastq_utils fastq_info ran and found no issues with test_1.fastq.gz test_2.fastq.gz" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-05-14T13:03:28.959803" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/applybqsr/environment.yml b/modules/nf-core/gatk4/applybqsr/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/applybqsr/main.nf b/modules/nf-core/gatk4/applybqsr/main.nf new file mode 100644 index 00000000..979cb3a7 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/main.nf @@ -0,0 +1,62 @@ +process GATK4_APPLYBQSR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.cram" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ApplyBQSR \\ + --input $input \\ + --output ${prefix} \\ + --reference $fasta \\ + --bqsr-recal-file $bqsr_table \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}.cram" + """ + touch ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/applybqsr/meta.yml b/modules/nf-core/gatk4/applybqsr/meta.yml new file mode 100644 index 00000000..65d9c9e9 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/meta.yml @@ -0,0 +1,83 @@ +name: gatk4_applybqsr +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bam + - base quality score recalibration + - bqsr + - cram + - gatk4 +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Recalibrated BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Recalibrated CRAM file + pattern: "*.{cram}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yocra3" + - "@FriederikeHanssen" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/applybqsr/tests/main.nf.test b/modules/nf-core/gatk4/applybqsr/tests/main.nf.test new file mode 100644 index 00000000..89864a14 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process GATK4_APPLYBQSR" + script "../main.nf" + process "GATK4_APPLYBQSR" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/applybqsr" + + test("sarscov2 - bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + [] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam in, cram out") { + + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + [] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.cram[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - bam - intervals") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - cram") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert snapshot(file(process.out.cram[0][1]).name).match("test.cram") } + ) + } + } + + test("sarscov2 - cram - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("sarscov2 - bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + [] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/applybqsr/tests/main.nf.test.snap b/modules/nf-core/gatk4/applybqsr/tests/main.nf.test.snap new file mode 100644 index 00000000..9466d048 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/tests/main.nf.test.snap @@ -0,0 +1,193 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "versions": [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-15T07:01:35.048141874" + }, + "sarscov2 - bam - intervals": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,51259943bca01c92b20fb5b7ac09a246" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,51259943bca01c92b20fb5b7ac09a246" + ] + ], + "cram": [ + + ], + "versions": [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-15T07:00:30.908035862" + }, + "sarscov2 - cram": { + "content": [ + [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-15T07:00:59.930550712" + }, + "sarscov2 - bam in, cram out": { + "content": [ + [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ], + "test.cram" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-15T07:00:01.611711705" + }, + "test.cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-15T07:00:59.941506312" + }, + "sarscov2 - cram - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ], + "bam": [ + + ], + "cram": [ + [ + { + "id": "test" + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-15T07:01:14.787020044" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,213acb451007a0098a6a6a360fa68d72" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,213acb451007a0098a6a6a360fa68d72" + ] + ], + "cram": [ + + ], + "versions": [ + "versions.yml:md5,91928754768510a59bde637403ee6a94" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-15T06:59:29.61561398" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/applybqsr/tests/nextflow.config b/modules/nf-core/gatk4/applybqsr/tests/nextflow.config new file mode 100644 index 00000000..c86b1b37 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_APPLYBQSR { + ext.prefix = { "${meta.id}.bam" } + } +} diff --git a/modules/nf-core/gatk4/applybqsr/tests/nextflow_cram.config b/modules/nf-core/gatk4/applybqsr/tests/nextflow_cram.config new file mode 100644 index 00000000..cbab9b71 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/tests/nextflow_cram.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_APPLYBQSR { + ext.prefix = { "${meta.id}.cram" } + } +} diff --git a/modules/nf-core/gatk4/applyvqsr/environment.yml b/modules/nf-core/gatk4/applyvqsr/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/applyvqsr/main.nf b/modules/nf-core/gatk4/applyvqsr/main.nf new file mode 100644 index 00000000..c8ea3da5 --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/main.nf @@ -0,0 +1,63 @@ +process GATK4_APPLYVQSR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(vcf_tbi), path(recal), path(recal_index), path(tranches) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference_command = fasta ? "--reference $fasta" : '' + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK ApplyVQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ApplyVQSR \\ + --variant ${vcf} \\ + --output ${prefix}.vcf.gz \\ + $reference_command \\ + --tranches-file $tranches \\ + --recal-file $recal \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/applyvqsr/meta.yml b/modules/nf-core/gatk4/applyvqsr/meta.yml new file mode 100644 index 00000000..ceedff62 --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/meta.yml @@ -0,0 +1,91 @@ +name: gatk4_applyvqsr +description: | + Apply a score cutoff to filter variants based on a recalibration table. + AplyVQSR performs the second pass in a two-stage process called Variant Quality Score Recalibration (VQSR). + Specifically, it applies filtering to the input variants based on the recalibration table produced + in the first step by VariantRecalibrator and a target sensitivity value. +keywords: + - gatk4 + - variant quality score recalibration + - vcf + - vqsr +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: file + description: VCF file to be recalibrated, this should be the same file as used + for the first stage VariantRecalibrator. + pattern: "*.vcf" + - vcf_tbi: + type: file + description: tabix index for the input vcf file. + pattern: "*.vcf.tbi" + - recal: + type: file + description: Recalibration file produced when the input vcf was run through + VariantRecalibrator in stage 1. + pattern: "*.recal" + - recal_index: + type: file + description: Index file for the recalibration file. + pattern: ".recal.idx" + - tranches: + type: file + description: Tranches file produced when the input vcf was run through VariantRecalibrator + in stage 1. + pattern: ".tranches" + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - vcf: + - meta: + type: file + description: compressed vcf file containing the recalibrated variants. + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: file + description: compressed vcf file containing the recalibrated variants. + pattern: "*.vcf.gz" + - tbi: + - meta: + type: file + description: Index of recalibrated vcf file. + pattern: "*vcf.gz.tbi" + - "*.tbi": + type: file + description: Index of recalibrated vcf file. + pattern: "*vcf.gz.tbi" + - versions: + - versions.yml: + type: file + description: File containing software versions. + pattern: "versions.yml" +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/applyvqsr/tests/allelspecificity.config b/modules/nf-core/gatk4/applyvqsr/tests/allelspecificity.config new file mode 100644 index 00000000..ac368bd0 --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/tests/allelspecificity.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_APPLYVQSR { + ext.args = '--mode SNP --truth-sensitivity-filter-level 99.0 -AS' + } +} diff --git a/modules/nf-core/gatk4/applyvqsr/tests/main.nf.test b/modules/nf-core/gatk4/applyvqsr/tests/main.nf.test new file mode 100644 index 00000000..104eb7f0 --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_process { + + name "Test Process GATK4_APPLYVQSR" + script "../main.nf" + process "GATK4_APPLYVQSR" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/applyvqsr" + + test("human - vcf") { + + config "./no-allelspecificity.config" + + when { + process { + """ + input[0] = [ [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal.idx', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.tranches', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") }, + { assert snapshot(file(process.out.tbi.get(0).get(1)).name).match() } + ) + } + + } + + test("human - vcf - allele-specific") { + + config "./allelspecificity.config" + + when { + process { + """ + input[0] = [ [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.recal.idx', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2_allele_specific.tranches', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions_allelspecific") }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") }, + { assert snapshot(file(process.out.tbi.get(0).get(1)).name).match() } + ) + } + + } + + test("human - vcf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal.idx', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.tranches', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/applyvqsr/tests/main.nf.test.snap b/modules/nf-core/gatk4/applyvqsr/tests/main.nf.test.snap new file mode 100644 index 00000000..1f3e97cb --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "human - vcf": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-05-15T13:31:50.727658" + }, + "human - vcf - allele-specific": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-05-15T13:29:42.331816" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,198986205018e3e6cdd651680a5e8cdd" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:33:41.980566137" + }, + "human - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,198986205018e3e6cdd651680a5e8cdd" + ], + "tbi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,198986205018e3e6cdd651680a5e8cdd" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:34:08.561335315" + }, + "versions_allelspecific": { + "content": [ + [ + "versions.yml:md5,198986205018e3e6cdd651680a5e8cdd" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:33:56.418381574" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/applyvqsr/tests/no-allelspecificity.config b/modules/nf-core/gatk4/applyvqsr/tests/no-allelspecificity.config new file mode 100644 index 00000000..c08c918e --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/tests/no-allelspecificity.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_APPLYVQSR { + ext.args = '--mode SNP --truth-sensitivity-filter-level 99.0' + } +} diff --git a/modules/nf-core/gatk4/baserecalibrator/environment.yml b/modules/nf-core/gatk4/baserecalibrator/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/baserecalibrator/main.nf b/modules/nf-core/gatk4/baserecalibrator/main.nf new file mode 100644 index 00000000..493533c7 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/main.nf @@ -0,0 +1,63 @@ +process GATK4_BASERECALIBRATOR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(known_sites) + tuple val(meta6), path(known_sites_tbi) + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BaseRecalibrator \\ + --input $input \\ + --output ${prefix}.table \\ + --reference $fasta \\ + $interval_command \\ + $sites_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.table + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/baserecalibrator/meta.yml b/modules/nf-core/gatk4/baserecalibrator/meta.yml new file mode 100644 index 00000000..ba280e61 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/meta.yml @@ -0,0 +1,105 @@ +name: gatk4_baserecalibrator +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - base quality score recalibration + - table + - bqsr + - gatk4 + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - known_sites: + type: file + description: VCF files with known sites for indels / snps + pattern: "*.vcf.gz" + - - meta6: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - known_sites_tbi: + type: file + description: Tabix index of the known_sites + pattern: "*.vcf.gz.tbi" +output: + - table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.table": + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test b/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test new file mode 100644 index 00000000..97c1c1ec --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test @@ -0,0 +1,250 @@ +nextflow_process { + + name "Test Process GATK4_BASERECALIBRATOR" + script "../main.nf" + process "GATK4_BASERECALIBRATOR" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/baserecalibrator" + + test("sarscov2 - bam") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + input[3] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true)]) + input[4] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)]) + input[5] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - bam - intervals") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ]) + input[1] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + input[3] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true)]) + input[4] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)]) + input[5] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - multiple sites") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + input[3] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true)]) + input[4] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ]) + input[5] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [] + ]) + input[1] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)]) + input[2] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)]) + input[3] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)]) + input[4] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true)]) + input[5] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - bam - stub") { + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + input[3] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true)]) + input[4] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)]) + input[5] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - bam - intervals - stub") { + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ]) + input[1] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + input[3] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true)]) + input[4] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)]) + input[5] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - multiple sites - stub") { + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + input[3] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true)]) + input[4] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ]) + input[5] = Channel.of([ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - cram - stub") { + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [] + ]) + input[1] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)]) + input[2] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)]) + input[3] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)]) + input[4] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true)]) + input[5] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test.snap b/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test.snap new file mode 100644 index 00000000..efdf46b3 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test.snap @@ -0,0 +1,266 @@ +{ + "homo_sapiens - cram": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,35d89a3811aa31711fc9815b6b80e6ec" + ] + ], + "1": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,35d89a3811aa31711fc9815b6b80e6ec" + ] + ], + "versions": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T15:37:29.36262858" + }, + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:36:20.634357503" + }, + "sarscov2 - bam - intervals": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,9ecb5f00a2229291705addc09c0ec231" + ] + ], + "1": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,9ecb5f00a2229291705addc09c0ec231" + ] + ], + "versions": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:35:55.223087741" + }, + "sarscov2 - bam - multiple sites": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,e2e43abdc0c943c1a54dae816d0b9ea7" + ] + ], + "1": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,e2e43abdc0c943c1a54dae816d0b9ea7" + ] + ], + "versions": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:36:10.08523788" + }, + "homo_sapiens - cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T15:38:23.045092592" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,e2e43abdc0c943c1a54dae816d0b9ea7" + ] + ], + "1": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,e2e43abdc0c943c1a54dae816d0b9ea7" + ] + ], + "versions": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:35:41.89476551" + }, + "sarscov2 - bam - intervals - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T15:37:56.722728282" + }, + "sarscov2 - bam - multiple sites - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1150a8b62c614926eeedda9d4bc8f651" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T15:38:09.797767675" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/combinegvcfs/environment.yml b/modules/nf-core/gatk4/combinegvcfs/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/combinegvcfs/main.nf b/modules/nf-core/gatk4/combinegvcfs/main.nf new file mode 100644 index 00000000..1528ac73 --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/main.nf @@ -0,0 +1,59 @@ +process GATK4_COMBINEGVCFS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(vcf_idx) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.combined.g.vcf.gz"), emit: combined_gvcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{"--variant $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CombineGVCFs \\ + $input_list \\ + --output ${prefix}.combined.g.vcf.gz \\ + --reference ${fasta} \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.combined.g.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/combinegvcfs/meta.yml b/modules/nf-core/gatk4/combinegvcfs/meta.yml new file mode 100644 index 00000000..c4fd646c --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/meta.yml @@ -0,0 +1,73 @@ +name: gatk4_combinegvcfs +description: Combine per-sample gVCF files produced by HaplotypeCaller into a multi-sample + gVCF file +keywords: + - gvcf + - gatk4 + - vcf + - combinegvcfs + - short variant discovery +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform + at the Broad Institute, the toolkit offers a wide variety of tools with a primary + focus on variant discovery and genotyping. Its powerful processing engine and + high-performance computing features make it capable of taking on projects of + any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: Compressed VCF files + pattern: "*.vcf.gz" + - vcf_idx: + type: file + description: VCF Index file + pattern: "*.vcf.gz.idx" + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: FASTA index file + pattern: "*.fasta.fai" + - - dict: + type: file + description: FASTA dictionary file + pattern: "*.dict" +output: + - combined_gvcf: + - meta: + type: file + description: Compressed Combined GVCF file + pattern: "*.combined.g.vcf.gz" + - "*.combined.g.vcf.gz": + type: file + description: Compressed Combined GVCF file + pattern: "*.combined.g.vcf.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" + - "@maxulysse" +maintainers: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test b/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test new file mode 100644 index 00000000..df2d5b4e --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process GATK4_COMBINEGVCFS" + script "../main.nf" + process "GATK4_COMBINEGVCFS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/combinegvcfs" + + test("test_gatk4_combinegvcfs") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + // File has a timestamp in it and is in binary format, need to compare only a portion of the file (after header) + { assert snapshot(path(process.out.combined_gvcf[0][1]).linesGzip[102..-1], + process.out.versions + ).match() } + ) + } + } + + test("test_gatk4_combinegvcfs - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test.snap b/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test.snap new file mode 100644 index 00000000..00aea2bd --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test.snap @@ -0,0 +1,181 @@ +{ + "test_gatk4_combinegvcfs - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.combined.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,aca96fc758ea3451abf185e1470788b5" + ], + "combined_gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.combined.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,aca96fc758ea3451abf185e1470788b5" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:47:56.847188152" + }, + "test_gatk4_combinegvcfs": { + "content": [ + [ + "chr22\t1\t.\tA\t\t.\t.\tEND=1951\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t1952\t.\tT\t\t.\t.\tEND=1953\tGT:DP:GQ:MIN_DP:PL\t./.:1:3:1:0,3,25", + "chr22\t1954\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:1:0:1:0,0,0", + "chr22\t1955\t.\tT\t\t.\t.\tEND=1956\tGT:DP:GQ:MIN_DP:PL\t./.:1:3:1:0,3,33", + "chr22\t1957\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,68", + "chr22\t1958\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:1:3:1:0,3,34", + "chr22\t1959\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:3:9:3:0,9,101", + "chr22\t1960\t.\tA\t\t.\t.\tEND=1961\tGT:DP:GQ:MIN_DP:PL\t./.:3:6:2:0,6,58", + "chr22\t1962\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:0:2:0,0,0", + "chr22\t1963\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:6:15:6:0,15,225", + "chr22\t1964\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:6:18:6:0,18,197", + "chr22\t1965\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:11:33:11:0,33,335", + "chr22\t1966\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:25:75:25:0,75,687", + "chr22\t1967\t.\tG\t\t.\t.\tEND=1981\tGT:DP:GQ:MIN_DP:PL\t./.:107:99:35:0,105,934", + "chr22\t1982\trs7287620\tA\tG,\t.\t.\tBaseQRankSum=0.00;DP=211;ExcessHet=3.01;MQRankSum=0.00;RAW_MQandDP=759600,211;ReadPosRankSum=1.09\tGT:AD:DP:GQ:PL:SB\t./.:155,30,0:185:99:184,0,3246,648,3335,3984:77,78,15,15", + "chr22\t1983\t.\tA\t\t.\t.\tEND=2102\tGT:DP:GQ:MIN_DP:PL\t./.:476:99:209:0,120,1800", + "chr22\t2103\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:206:93:206:0,93,1395", + "chr22\t2104\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:194:48:194:0,48,720", + "chr22\t2105\t.\tC\t\t.\t.\tEND=2106\tGT:DP:GQ:MIN_DP:PL\t./.:177:30:171:0,30,450", + "chr22\t2107\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:156:24:156:0,24,360", + "chr22\t2108\t.\tT\t\t.\t.\tEND=2110\tGT:DP:GQ:MIN_DP:PL\t./.:110:21:96:0,21,315", + "chr22\t2111\t.\tC\t\t.\t.\tEND=2113\tGT:DP:GQ:MIN_DP:PL\t./.:47:15:29:0,15,225", + "chr22\t2114\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:14:12:14:0,12,180", + "chr22\t2115\t.\tA\t\t.\t.\tEND=2117\tGT:DP:GQ:MIN_DP:PL\t./.:10:6:8:0,6,90", + "chr22\t2118\t.\tC\t\t.\t.\tEND=2122\tGT:DP:GQ:MIN_DP:PL\t./.:7:0:3:0,0,0", + "chr22\t2123\t.\tC\tG,\t.\t.\tBaseQRankSum=0.967;DP=4;ExcessHet=3.01;MQRankSum=0.00;RAW_MQandDP=14400,4;ReadPosRankSum=0.967\tGT:AD:DP:GQ:PL:SB\t./.:1,2,0:3:1:41,0,1,43,7,50:0,1,1,1", + "chr22\t2124\t.\tG\t\t.\t.\tEND=2716\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t2717\t.\tT\t\t.\t.\tEND=2721\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t2722\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:10:30:10:0,30,247", + "chr22\t2723\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:12:36:12:0,36,297", + "chr22\t2724\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:18:54:18:0,54,445", + "chr22\t2725\t.\tA\t\t.\t.\tEND=2836\tGT:DP:GQ:MIN_DP:PL\t./.:250:99:36:0,108,890", + "chr22\t2837\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:116:72:116:0,72,1080", + "chr22\t2838\t.\tA\t\t.\t.\tEND=2907\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t2908\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:8:24:8:0,24,260", + "chr22\t2909\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:16:48:16:0,48,546", + "chr22\t2910\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:21:63:21:0,63,678", + "chr22\t2911\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:28:84:28:0,84,894", + "chr22\t2912\t.\tC\t\t.\t.\tEND=3119\tGT:DP:GQ:MIN_DP:PL\t./.:848:99:33:0,99,1061", + "chr22\t3120\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:190:54:190:0,54,810", + "chr22\t3121\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:178:39:178:0,39,585", + "chr22\t3122\t.\tC\t\t.\t.\tEND=3124\tGT:DP:GQ:MIN_DP:PL\t./.:147:15:137:0,15,225", + "chr22\t3125\t.\tG\t\t.\t.\tEND=3126\tGT:DP:GQ:MIN_DP:PL\t./.:115:9:110:0,9,135", + "chr22\t3127\t.\tT\t\t.\t.\tEND=3139\tGT:DP:GQ:MIN_DP:PL\t./.:5:6:3:0,6,90", + "chr22\t3140\t.\tA\tG,\t.\t.\tDP=2;ExcessHet=3.01;RAW_MQandDP=7200,2\tGT:AD:DP:GQ:PL:SB\t./.:0,2,0:2:6:49,6,0,49,6,49:0,0,1,1", + "chr22\t3141\t.\tG\t\t.\t.\tEND=3144\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t3145\t.\tT\t\t.\t.\tEND=3190\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t3191\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t3192\t.\tA\t\t.\t.\tEND=3196\tGT:DP:GQ:MIN_DP:PL\t./.:6:18:6:0,18,148", + "chr22\t3197\t.\tT\t\t.\t.\tEND=3200\tGT:DP:GQ:MIN_DP:PL\t./.:8:24:8:0,24,196", + "chr22\t3201\t.\tT\t\t.\t.\tEND=3207\tGT:DP:GQ:MIN_DP:PL\t./.:12:36:12:0,36,297", + "chr22\t3208\t.\tA\t\t.\t.\tEND=3209\tGT:DP:GQ:MIN_DP:PL\t./.:14:42:14:0,42,346", + "chr22\t3210\t.\tT\t\t.\t.\tEND=3217\tGT:DP:GQ:MIN_DP:PL\t./.:16:48:16:0,48,396", + "chr22\t3218\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:18:54:18:0,54,445", + "chr22\t3219\t.\tA\t\t.\t.\tEND=3265\tGT:DP:GQ:MIN_DP:PL\t./.:20:60:20:0,60,493", + "chr22\t3266\trs4008589\tT\tC,\t.\t.\tDP=20;ExcessHet=3.01;RAW_MQandDP=72000,20\tGT:AD:DP:GQ:PL:SB\t./.:0,20,0:20:60:495,60,0,495,60,495:0,0,10,10", + "chr22\t3267\t.\tT\t\t.\t.\tEND=3302\tGT:DP:GQ:MIN_DP:PL\t./.:20:60:20:0,60,493", + "chr22\t3303\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:20:54:20:0,54,810", + "chr22\t3304\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:20:42:20:0,42,630", + "chr22\t3305\t.\tT\t\t.\t.\tEND=3308\tGT:DP:GQ:MIN_DP:PL\t./.:20:36:20:0,36,540", + "chr22\t3309\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:20:24:20:0,24,360", + "chr22\t3310\t.\tA\t\t.\t.\tEND=3317\tGT:DP:GQ:MIN_DP:PL\t./.:16:18:12:0,18,270", + "chr22\t3318\t.\tG\t\t.\t.\tEND=3320\tGT:DP:GQ:MIN_DP:PL\t./.:8:0:6:0,0,0", + "chr22\t3321\t.\tT\t\t.\t.\tEND=3351\tGT:DP:GQ:MIN_DP:PL\t./.:1:3:1:0,3,24", + "chr22\t3352\t.\tT\t\t.\t.\tEND=3354\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,58", + "chr22\t3355\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:0:2:0,0,10", + "chr22\t3356\t.\tT\t\t.\t.\tEND=3365\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,47", + "chr22\t3366\t.\tT\t\t.\t.\tEND=3373\tGT:DP:GQ:MIN_DP:PL\t./.:3:9:3:0,9,83", + "chr22\t3374\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:4:12:4:0,12,135", + "chr22\t3375\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:4:0:4:0,0,19", + "chr22\t3376\t.\tA\t\t.\t.\tEND=3377\tGT:DP:GQ:MIN_DP:PL\t./.:4:12:4:0,12,124", + "chr22\t3378\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:4:0:4:0,0,80", + "chr22\t3379\t.\tG\t\t.\t.\tEND=3381\tGT:DP:GQ:MIN_DP:PL\t./.:4:12:4:0,12,122", + "chr22\t3382\t.\tC\t\t.\t.\tEND=3385\tGT:DP:GQ:MIN_DP:PL\t./.:5:15:5:0,15,146", + "chr22\t3386\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:5:0:5:0,0,112", + "chr22\t3387\t.\tA\t\t.\t.\tEND=3390\tGT:DP:GQ:MIN_DP:PL\t./.:7:21:7:0,21,193", + "chr22\t3391\t.\tA\t\t.\t.\tEND=3392\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,284", + "chr22\t3393\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:3:9:0,3,237", + "chr22\t3394\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,260", + "chr22\t3395\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:2:9:0,2,225", + "chr22\t3396\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:0:9:0,0,28", + "chr22\t3397\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:3:9:0,3,228", + "chr22\t3398\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,286", + "chr22\t3399\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:3:9:0,3,237", + "chr22\t3400\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:4:9:0,4,223", + "chr22\t3401\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,285", + "chr22\t3402\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:3:9:0,3,206", + "chr22\t3403\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:4:9:0,4,238", + "chr22\t3404\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,305", + "chr22\t3405\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:12:2:12:0,2,295", + "chr22\t3406\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:19:0:19:0,0,259", + "chr22\t3407\t.\tT\t\t.\t.\tEND=3408\tGT:DP:GQ:MIN_DP:PL\t./.:22:42:22:0,42,577", + "chr22\t3409\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:22:43:22:0,43,605", + "chr22\t3410\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:22:66:22:0,66,642", + "chr22\t3411\t.\tT\t\t.\t.\tEND=3412\tGT:DP:GQ:MIN_DP:PL\t./.:23:46:23:0,46,629", + "chr22\t3413\t.\tT\tG,TG,\t.\t.\tBaseQRankSum=-1.318e+00;DP=17;ExcessHet=3.01;MQRankSum=0.00;RAW_MQandDP=61200,17;ReadPosRankSum=2.37\tGT:AD:DP:GQ:PL:SB\t./.:8,5,2,0:15:44:86,0,108,44,85,250,107,136,225,271:6,2,6,1", + "chr22\t3414\t.\tG\t\t.\t.\tEND=3417\tGT:DP:GQ:MIN_DP:PL\t./.:17:51:17:0,51,478", + "chr22\t3418\t.\tC\t\t.\t.\tEND=3420\tGT:DP:GQ:MIN_DP:PL\t./.:18:54:18:0,54,506", + "chr22\t3421\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:19:57:19:0,57,522", + "chr22\t3422\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:28:84:28:0,84,811", + "chr22\t3423\t.\tA\t\t.\t.\tEND=3631\tGT:DP:GQ:MIN_DP:PL\t./.:1099:99:51:0,120,1800", + "chr22\t3632\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:99:96:99:0,96,1440", + "chr22\t3633\t.\tT\t\t.\t.\tEND=3634\tGT:DP:GQ:MIN_DP:PL\t./.:90:51:85:0,51,765", + "chr22\t3635\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:74:45:74:0,45,675", + "chr22\t3636\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:69:39:69:0,39,585", + "chr22\t3637\t.\tA\t\t.\t.\tEND=3644\tGT:DP:GQ:MIN_DP:PL\t./.:44:3:17:0,3,45", + "chr22\t3645\t.\tC\t\t.\t.\tEND=4465\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t4466\t.\tA\t\t.\t.\tEND=4487\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t4488\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t4489\t.\tT\t\t.\t.\tEND=4493\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t4494\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t4495\t.\tA\t\t.\t.\tEND=4498\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,48", + "chr22\t4499\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:6:18:6:0,18,148", + "chr22\t4500\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:10:30:10:0,30,247", + "chr22\t4501\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:14:42:14:0,42,346", + "chr22\t4502\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:24:72:24:0,72,593", + "chr22\t4503\t.\tG\t\t.\t.\tEND=4507\tGT:DP:GQ:MIN_DP:PL\t./.:32:90:30:0,90,742", + "chr22\t4508\t.\tG\t\t.\t.\tEND=4575\tGT:DP:GQ:MIN_DP:PL\t./.:60:99:34:0,102,841", + "chr22\t4576\t.\tT\t\t.\t.\tEND=4578\tGT:DP:GQ:MIN_DP:PL\t./.:48:90:48:0,90,1350", + "chr22\t4579\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:48:84:48:0,84,1260", + "chr22\t4580\t.\tA\t\t.\t.\tEND=4582\tGT:DP:GQ:MIN_DP:PL\t./.:42:54:40:0,54,810", + "chr22\t4583\t.\tG\t\t.\t.\tEND=4584\tGT:DP:GQ:MIN_DP:PL\t./.:38:48:36:0,48,720", + "chr22\t4585\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:34:42:34:0,42,630", + "chr22\t4586\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:32:24:32:0,24,360", + "chr22\t4587\t.\tA\t\t.\t.\tEND=4597\tGT:DP:GQ:MIN_DP:PL\t./.:16:12:4:0,12,99", + "chr22\t4598\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t4599\t.\tG\t\t.\t.\tEND=4604\tGT:DP:GQ:MIN_DP:PL\t./.:4:12:4:0,12,98", + "chr22\t4605\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t4606\t.\tT\t\t.\t.\tEND=4607\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t4608\t.\tA\t\t.\t.\tEND=40001\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0" + ], + [ + "versions.yml:md5,aca96fc758ea3451abf185e1470788b5" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T10:47:45.34645647" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/genotypegvcfs/environment.yml b/modules/nf-core/gatk4/genotypegvcfs/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/genotypegvcfs/main.nf b/modules/nf-core/gatk4/genotypegvcfs/main.nf new file mode 100644 index 00000000..dc2813a3 --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/main.nf @@ -0,0 +1,68 @@ +process GATK4_GENOTYPEGVCFS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(gvcf_index), path(intervals), path(intervals_index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(dbsnp) + tuple val(meta6), path(dbsnp_tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_command = input.name.endsWith(".vcf") || input.name.endsWith(".vcf.gz") ? "$input" : "gendb://$input" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GenotypeGVCFs \\ + --variant $input_command \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $interval_command \\ + $dbsnp_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/genotypegvcfs/meta.yml b/modules/nf-core/gatk4/genotypegvcfs/meta.yml new file mode 100644 index 00000000..0c1fe491 --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/meta.yml @@ -0,0 +1,117 @@ +name: gatk4_genotypegvcfs +description: | + Perform joint genotyping on one or more samples pre-called with HaplotypeCaller. +keywords: + - gatk4 + - genotype + - gvcf + - joint genotyping +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: | + gVCF(.gz) file or a GenomicsDB + pattern: "*.{vcf,vcf.gz}" + - gvcf_index: + type: file + description: | + index of gvcf file, or empty when providing GenomicsDB + pattern: "*.{idx,tbi}" + - intervals: + type: file + description: Interval file with the genomic regions included in the library + (optional) + - intervals_index: + type: file + description: Interval index file (optional) + - - meta2: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing fai information + e.g. [ id:'test' ] + - fai: + type: file + description: Reference fasta index file + pattern: "*.fai" + - - meta4: + type: map + description: | + Groovy Map containing dict information + e.g. [ id:'test' ] + - dict: + type: file + description: Reference fasta sequence dict file + pattern: "*.dict" + - - meta5: + type: map + description: | + Groovy Map containing dbsnp information + e.g. [ id:'test' ] + - dbsnp: + type: file + description: dbSNP VCF file + pattern: "*.vcf.gz" + - - meta6: + type: map + description: | + Groovy Map containing dbsnp tbi information + e.g. [ id:'test' ] + - dbsnp_tbi: + type: file + description: dbSNP VCF index file + pattern: "*.tbi" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Genotyped VCF file + pattern: "*.vcf.gz" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Tbi index for VCF file + pattern: "*.vcf.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@santiagorevale" + - "@maxulysse" +maintainers: + - "@santiagorevale" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/genotypegvcfs/tests/main.nf.test b/modules/nf-core/gatk4/genotypegvcfs/tests/main.nf.test new file mode 100644 index 00000000..25bc2d38 --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/tests/main.nf.test @@ -0,0 +1,285 @@ +nextflow_process { + + name "Test Process GATK4_GENOTYPEGVCFS" + script "../main.nf" + process "GATK4_GENOTYPEGVCFS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/genotypegvcfs" + tag "untar" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("homo_sapiens - [gvcf, idx, [], []], fasta, fai, dict, [], []") { + + when { + process { + """ + input[0] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true), + [], + [] + ] + input[1] = [ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true) + ] + input[2] = [ + [id:"fai"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true) + ] + input[3] = [ + [id:"dict"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists:true) + ] + input[4] = [[],[]] + input[5] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { [it[0], path(it[1]).vcf.variantsMD5] }, + process.out.tbi.collect { [it[0], file(it[1]).name] }, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [gvcf_gz, tbi, [], []], fasta, fai, dict, dbsnp, dbsnp_tbi") { + + when { + process { + """ + input[0] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + [], + [] + ] + input[1] = [ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true) + ] + input[2] = [ + [id:"fai"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true) + ] + input[3] = [ + [id:"dict"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists:true) + ] + input[4] = [ + [id:"dbsnp"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists:true) + ] + input[5] = [ + [id:"dbsnp"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists:true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { [it[0], path(it[1]).vcf.variantsMD5] }, + process.out.tbi.collect { [it[0], file(it[1]).name] }, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [gvcf_gz, tbi, bed, []], fasta, fai, dict, [], []") { + + when { + process { + """ + input[0] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists:true), + [] + ] + input[1] = [ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true) + ] + input[2] = [ + [id:"fai"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true) + ] + input[3] = [ + [id:"dict"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists:true) + ] + input[4] = [[],[]] + input[5] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { [it[0], path(it[1]).vcf.variantsMD5] }, + process.out.tbi.collect { [it[0], file(it[1]).name] }, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [gendb, [], [], []], fasta, fai, dict, [], []") { + + when { + process { + """ + input[0] = UNTAR.out.untar.map { meta, gendb -> [ meta, gendb, [], [], []] } + input[1] = [ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true) + ] + input[2] = [ + [id:"fai"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true) + ] + input[3] = [ + [id:"dict"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists:true) + ] + input[4] = [[],[]] + input[5] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { [it[0], path(it[1]).vcf.variantsMD5] }, + process.out.tbi.collect { [it[0], file(it[1]).name] }, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [gendb, bed, [], []], fasta, fai, dict, [], []") { + + when { + process { + """ + input[0] = UNTAR.out.untar.map { meta, gendb -> + [ + meta, + gendb, + [], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists:true), + [] + ] + } + input[1] = [ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true) + ] + input[2] = [ + [id:"fai"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true) + ] + input[3] = [ + [id:"dict"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists:true) + ] + input[4] = [[],[]] + input[5] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { [it[0], path(it[1]).vcf.variantsMD5] }, + process.out.tbi.collect { [it[0], file(it[1]).name] }, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [gvcf, idx, [], []], fasta, fai, dict, [], [] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true), + [], + [] + ] + input[1] = [ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true) + ] + input[2] = [ + [id:"fai"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true) + ] + input[3] = [ + [id:"dict"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists:true) + ] + input[4] = [[],[]] + input[5] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/genotypegvcfs/tests/main.nf.test.snap b/modules/nf-core/gatk4/genotypegvcfs/tests/main.nf.test.snap new file mode 100644 index 00000000..30e2dc1c --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/tests/main.nf.test.snap @@ -0,0 +1,191 @@ +{ + "homo_sapiens - [gendb, [], [], []], fasta, fai, dict, [], []": { + "content": [ + [ + [ + { + "id": "test" + }, + "1ab95fbc5ec55b208f3001572bec54fa" + ] + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,fb1ca56ba52eb3fcda70dbef401f06b9" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:16:45.625453031" + }, + "homo_sapiens - [gvcf, idx, [], []], fasta, fai, dict, [], []": { + "content": [ + [ + [ + { + "id": "test" + }, + "1ab95fbc5ec55b208f3001572bec54fa" + ] + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,fb1ca56ba52eb3fcda70dbef401f06b9" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:15:55.296562046" + }, + "homo_sapiens - [gvcf_gz, tbi, bed, []], fasta, fai, dict, [], []": { + "content": [ + [ + [ + { + "id": "test" + }, + "1ab95fbc5ec55b208f3001572bec54fa" + ] + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,fb1ca56ba52eb3fcda70dbef401f06b9" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:16:30.471742609" + }, + "homo_sapiens - [gvcf_gz, tbi, [], []], fasta, fai, dict, dbsnp, dbsnp_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "9b7d476515e07e5486633c42abd86cc" + ] + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,fb1ca56ba52eb3fcda70dbef401f06b9" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:16:13.266019505" + }, + "homo_sapiens - [gvcf, idx, [], []], fasta, fai, dict, [], [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,fb1ca56ba52eb3fcda70dbef401f06b9" + ], + "tbi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,fb1ca56ba52eb3fcda70dbef401f06b9" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:17:19.342242664" + }, + "homo_sapiens - [gendb, bed, [], []], fasta, fai, dict, [], []": { + "content": [ + [ + [ + { + "id": "test" + }, + "1ab95fbc5ec55b208f3001572bec54fa" + ] + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,fb1ca56ba52eb3fcda70dbef401f06b9" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:17:01.475664058" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/haplotypecaller/environment.yml b/modules/nf-core/gatk4/haplotypecaller/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/haplotypecaller/main.nf b/modules/nf-core/gatk4/haplotypecaller/main.nf new file mode 100644 index 00000000..1ef76789 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/main.nf @@ -0,0 +1,77 @@ +process GATK4_HAPLOTYPECALLER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals), path(dragstr_model) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(dbsnp) + tuple val(meta6), path(dbsnp_tbi) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , optional:true, emit: tbi + tuple val(meta), path("*.realigned.bam"), optional:true, emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + def dragstr_command = dragstr_model ? "--dragstr-params-path $dragstr_model" : "" + def bamout_command = args.contains("--bam-writer-type") ? "--bam-output ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + HaplotypeCaller \\ + --input $input \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + --native-pair-hmm-threads ${task.cpus} \\ + $dbsnp_command \\ + $interval_command \\ + $dragstr_command \\ + $bamout_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bamout_command = args.contains("--bam-writer-type") ? "--bam-output ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + + def stub_realigned_bam = bamout_command ? "touch ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + ${stub_realigned_bam} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/haplotypecaller/meta.yml b/modules/nf-core/gatk4/haplotypecaller/meta.yml new file mode 100644 index 00000000..9d4a05e9 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/meta.yml @@ -0,0 +1,124 @@ +name: gatk4_haplotypecaller +description: Call germline SNPs and indels via local re-assembly of haplotypes +keywords: + - gatk4 + - haplotype + - haplotypecaller +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - dragstr_model: + type: file + description: Text file containing the DragSTR model of the used BAM/CRAM file + (optional) + pattern: "*.txt" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - meta5: + type: map + description: | + Groovy Map containing dbsnp information + e.g. [ id:'test_dbsnp' ] + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - - meta6: + type: map + description: | + Groovy Map containing dbsnp information + e.g. [ id:'test_dbsnp' ] + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.realigned.bam": + type: file + description: Assembled haplotypes and locally realigned reads + pattern: "*.realigned.bam" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@suzannejin" + - "@FriederikeHanssen" +maintainers: + - "@suzannejin" + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test b/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test new file mode 100644 index 00000000..18d35f49 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test @@ -0,0 +1,154 @@ +// nf-core modules test gatk4/haplotypecaller +nextflow_process { + + name "Test Process GATK4_HAPLOTYPECALLER" + script "../main.nf" + process "GATK4_HAPLOTYPECALLER" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/haplotypecaller" + + test("homo_sapiens - [bam, bai] - fasta - fai - dict") { + + when { + process { + """ + input[0] = [ + [ id:'test_bam' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [], + [] + ] + input[1] = [ [ id:'test_fa' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id:'test_fai' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [ id:'test_dict' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + file(process.out.tbi[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + + test("homo_sapiens - [cram, crai] - fasta - fai - dict") { + + when { + process { + """ + input[0] = [ + [ id:'test_cram' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [], + [] + ] + input[1] = [ [ id:'test_fa' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id:'test_fai' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [ id:'test_dict' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + file(process.out.tbi[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + + test("homo_sapiens - [cram, crai] - fasta - fai - dict - sites - sites_tbi") { + + when { + process { + """ + input[0] = [ + [ id:'test_cram_sites' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [], + [] + ] + input[1] = [ [ id:'test_fa' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id:'test_fai' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [ id:'test_dict' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) ] + input[4] = [ [ id:'test_sites' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true) ] + input[5] = [ [ id:'test_sites_tbi' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + file(process.out.tbi[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + + test("homo_sapiens - [cram, crai, dragstr_model] - fasta - fai - dict - sites - sites_tbi") { + + when { + process { + """ + input[0] = [ + [ id:'test_cram_sites_dragstr' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test_paired_end_sorted_dragstrmodel.txt', checkIfExists: true) + ] + input[1] = [ [ id:'test_fa' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id:'test_fai' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [ id:'test_dict' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) ] + input[4] = [ [ id:'test_sites' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true) ] + input[5] = [ [ id:'test_sites_tbi' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + file(process.out.tbi[0][1]).name, + process.out.versions + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test.snap b/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test.snap new file mode 100644 index 00000000..e3ba2bdf --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test.snap @@ -0,0 +1,58 @@ +{ + "homo_sapiens - [cram, crai] - fasta - fai - dict": { + "content": [ + "test_cram.vcf.gz", + "test_cram.vcf.gz.tbi", + [ + "versions.yml:md5,2528212fed975c92514cf6641379e878" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:20:32.392608448" + }, + "homo_sapiens - [cram, crai] - fasta - fai - dict - sites - sites_tbi": { + "content": [ + "test_cram_sites.vcf.gz", + "test_cram_sites.vcf.gz.tbi", + [ + "versions.yml:md5,2528212fed975c92514cf6641379e878" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:20:49.012555768" + }, + "homo_sapiens - [bam, bai] - fasta - fai - dict": { + "content": [ + "test_bam.vcf.gz", + "test_bam.vcf.gz.tbi", + [ + "versions.yml:md5,2528212fed975c92514cf6641379e878" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:20:09.342159899" + }, + "homo_sapiens - [cram, crai, dragstr_model] - fasta - fai - dict - sites - sites_tbi": { + "content": [ + "test_cram_sites_dragstr.vcf.gz", + "test_cram_sites_dragstr.vcf.gz.tbi", + [ + "versions.yml:md5,2528212fed975c92514cf6641379e878" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:21:06.551678783" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/indexfeaturefile/environment.yml b/modules/nf-core/gatk4/indexfeaturefile/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/indexfeaturefile/main.nf b/modules/nf-core/gatk4/indexfeaturefile/main.nf new file mode 100644 index 00000000..6993537f --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/main.nf @@ -0,0 +1,41 @@ +process GATK4_INDEXFEATUREFILE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(feature_file) + + output: + tuple val(meta), path("*.{tbi,idx}"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + IndexFeatureFile \\ + --input $feature_file \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/indexfeaturefile/meta.yml b/modules/nf-core/gatk4/indexfeaturefile/meta.yml new file mode 100644 index 00000000..cfc717d2 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/meta.yml @@ -0,0 +1,46 @@ +name: gatk4_indexfeaturefile +description: Creates an index for a feature file, e.g. VCF or BED file. +keywords: + - feature + - gatk4 + - index + - indexfeaturefile +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - feature_file: + type: file + description: VCF/BED file + pattern: "*.{vcf,vcf.gz,bed,bed.gz}" +output: + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{tbi,idx}": + type: file + description: Index for VCF/BED file + pattern: "*.{tbi,idx}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@santiagorevale" +maintainers: + - "@santiagorevale" diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test new file mode 100644 index 00000000..994606f1 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test @@ -0,0 +1,105 @@ +// nf-core modules test gatk4/indexfeaturefile +nextflow_process { + + name "Test Process GATK4_INDEXFEATUREFILE" + script "../main.nf" + process "GATK4_INDEXFEATUREFILE" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/indexfeaturefile" + + test("test_gatk4_indexfeaturefile_bed") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("geneome.bed.idx") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_bed_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("genome.bed.gz.tbi") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_vcf") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("test.genome.vcf.idx") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_vcf_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("test.genome.vcf.gz.tbi") }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap new file mode 100644 index 00000000..788b9290 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap @@ -0,0 +1,132 @@ +{ + "genome.bed.gz.tbi": { + "content": [ + "genome.bed.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:03.068725" + }, + "test_gatk4_indexfeaturefile_vcf": { + "content": [ + [ + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:22:21.619530121" + }, + "geneome.bed.idx": { + "content": [ + "genome.bed.idx" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:56:46.988441" + }, + "test.genome.vcf.gz.tbi": { + "content": [ + "test.genome.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:51.898472" + }, + "test_gatk4_indexfeaturefile_bed_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.bed.gz.tbi:md5,4bc51e2351a6e83f20e13be75861f941" + ] + ], + "1": [ + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" + ], + "index": [ + [ + { + "id": "test" + }, + "genome.bed.gz.tbi:md5,4bc51e2351a6e83f20e13be75861f941" + ] + ], + "versions": [ + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:22:06.537858152" + }, + "test_gatk4_indexfeaturefile_vcf_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.genome.vcf.gz.tbi:md5,fedd68eaddf8d31257853d9da8325bd3" + ] + ], + "1": [ + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" + ], + "index": [ + [ + { + "id": "test" + }, + "test.genome.vcf.gz.tbi:md5,fedd68eaddf8d31257853d9da8325bd3" + ] + ], + "versions": [ + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:22:37.41693965" + }, + "test.genome.vcf.idx": { + "content": [ + "test.genome.vcf.idx" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:20.624337" + }, + "test_gatk4_indexfeaturefile_bed": { + "content": [ + [ + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T11:21:52.805375486" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/markduplicates/environment.yml b/modules/nf-core/gatk4/markduplicates/environment.yml new file mode 100644 index 00000000..352d20f8 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 \ No newline at end of file diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf new file mode 100644 index 00000000..f4bd896b --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -0,0 +1,86 @@ +process GATK4_MARKDUPLICATES { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/927ff9bb80d65b425cbe752db6648a84043feff6e8ca90e60f9ff6ddbe8938d5/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel_htslib_samtools:c1e4292d6ee27439'}" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + + output: + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + + def input_list = bam.collect { "--INPUT ${it}" }.join(' ') + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + + // Using samtools and not Markduplicates to compress to CRAM speeds up computation: + // https://medium.com/@acarroll.dna/looking-at-trade-offs-in-compression-levels-for-genomics-tools-eec2834e8b94 + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MarkDuplicates \\ + ${input_list} \\ + --OUTPUT ${prefix_bam} \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + ${reference} \\ + ${args} + + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.bam" + prefix_no_suffix = task.ext.prefix ? prefix.tokenize('.')[0] : "${meta.id}" + """ + touch ${prefix_no_suffix}.bam + touch ${prefix_no_suffix}.cram + touch ${prefix_no_suffix}.cram.crai + touch ${prefix_no_suffix}.bai + touch ${prefix}.metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml new file mode 100644 index 00000000..4772c5f3 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -0,0 +1,102 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where + duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - bam + - gatk4 + - markduplicates + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - - fasta: + type: file + description: Fasta file + pattern: "*.{fasta}" + - - fasta_fai: + type: file + description: Fasta index file + pattern: "*.{fai}" +output: + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*cram": + type: file + description: Marked duplicates CRAM file + pattern: "*.{cram}" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*bam": + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file + pattern: "*.{cram.crai}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics": + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/markduplicates/tests/bam.config b/modules/nf-core/gatk4/markduplicates/tests/bam.config new file mode 100644 index 00000000..0bbfbac3 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/bam.config @@ -0,0 +1,8 @@ +process { + + withName: GATK4_MARKDUPLICATES { + ext.args = '--CREATE_INDEX true' + ext.prefix = { "${meta.id}.bam" } + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/cram.config b/modules/nf-core/gatk4/markduplicates/tests/cram.config new file mode 100644 index 00000000..04a9b074 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/cram.config @@ -0,0 +1,8 @@ +process { + + withName: GATK4_MARKDUPLICATES { + ext.args = '--CREATE_INDEX true' + ext.prefix = { "${meta.id}.cram" } + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/main.nf.test b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test new file mode 100644 index 00000000..bbcf74db --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process GATK4_MARKDUPLICATES" + script "../main.nf" + process "GATK4_MARKDUPLICATES" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/markduplicates" + + test("sarscov2 - bam") { + config "./bam.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match("bam") }, + { assert snapshot(process.out.bai).match("bai") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("test.metrics") } + ) + } + } + + test("homo_sapiens - multiple bam") { + config "./bam.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bam).match("multi bam") }, + { assert snapshot(process.out.bai).match("multi bai") }, + { assert snapshot(process.out.versions).match("multi versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("multi test.metrics") } + ) + } + + } + + test("homo_sapiens - multiple cram") { + config "./cram.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("multi cram")}, + { assert snapshot(file(process.out.crai[0][1]).name).match("multi crai") }, + { assert snapshot(process.out.versions).match("multi cram versions") }, + { assert snapshot(file(process.out.metrics[0][1]).name).match("multi cram test.metrics") } + ) + } + + } + + test("stub") { + config "./bam.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [] + ] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap new file mode 100644 index 00000000..227ce698 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,160 @@ +{ + "multi bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d4c87a668273b589b59cf88b5a00bceb" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.03.1" + }, + "timestamp": "2025-05-06T19:13:35.646267615" + }, + "multi crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.780426007" + }, + "multi bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bai:md5,2b1603773979ad06f4dbcbaa90e93e8c" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.03.1" + }, + "timestamp": "2025-05-06T19:13:35.773636892" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,836be8dcb8e596b2e5eeb2d667db0832" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.03.1" + }, + "timestamp": "2025-05-06T19:37:41.921624379" + }, + "multi test.metrics": { + "content": [ + "test.bam.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:11.732892667" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bai:md5,fd1c3cc02f4e223d32eedeed842b86c7" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.03.1" + }, + "timestamp": "2025-05-06T19:13:21.104275114" + }, + "multi cram versions": { + "content": [ + [ + "versions.yml:md5,836be8dcb8e596b2e5eeb2d667db0832" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.03.1" + }, + "timestamp": "2025-05-06T19:38:08.855960769" + }, + "multi versions": { + "content": [ + [ + "versions.yml:md5,836be8dcb8e596b2e5eeb2d667db0832" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.03.1" + }, + "timestamp": "2025-05-06T19:37:55.051564286" + }, + "multi cram test.metrics": { + "content": [ + "test.cram.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.798977444" + }, + "multi cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:43:37.771137858" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,5542c553af351a43b97e84181c51a84e" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.03.1" + }, + "timestamp": "2025-05-06T19:13:21.060566406" + }, + "test.metrics": { + "content": [ + "test.bam.metrics" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-12T17:42:39.672508385" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/mergevcfs/environment.yml b/modules/nf-core/gatk4/mergevcfs/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/mergevcfs/main.nf b/modules/nf-core/gatk4/mergevcfs/main.nf new file mode 100644 index 00000000..1752f48a --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/main.nf @@ -0,0 +1,60 @@ +process GATK4_MERGEVCFS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf) + tuple val(meta2), path(dict) + + output: + tuple val(meta), path('*.vcf.gz'), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{ "--INPUT $it"}.join(' ') + def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MergeVcfs \\ + $input_list \\ + --OUTPUT ${prefix}.vcf.gz \\ + $reference_command \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mergevcfs/meta.yml b/modules/nf-core/gatk4/mergevcfs/meta.yml new file mode 100644 index 00000000..b4f61d78 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/meta.yml @@ -0,0 +1,64 @@ +name: gatk4_mergevcfs +description: Merges several vcf files +keywords: + - gatk4 + - merge + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: Two or more VCF files + pattern: "*.{vcf,vcf.gz}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - dict: + type: file + description: Optional Sequence Dictionary as input + pattern: "*.dict" +output: + - vcf: + - meta: + type: file + description: merged vcf file + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: file + description: merged vcf file + pattern: "*.vcf.gz" + - tbi: + - meta: + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + - "*.tbi": + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test new file mode 100644 index 00000000..343fff68 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process GATK4_MERGEVCFS" + script "../main.nf" + process "GATK4_MERGEVCFS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/mergevcfs" + + test("test_gatk4_mergevcfs") { + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.versions, + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs") + }, + ) + } + + } + + test("test_gatk4_mergevcfs_no_dict") { + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.versions, + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs_no_dict") + }, + ) + } + + } + + test("test_gatk4_mergevcfs_no_dict_stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.versions, + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs_no_dict_stub") + }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap new file mode 100644 index 00000000..5fab6dca --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap @@ -0,0 +1,44 @@ +{ + "test_gatk4_mergevcfs_no_dict_stub": { + "content": [ + [ + "versions.yml:md5,829a91432cc945f3227b1355587d25e0" + ], + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T08:29:25.813956308" + }, + "test_gatk4_mergevcfs": { + "content": [ + [ + "versions.yml:md5,829a91432cc945f3227b1355587d25e0" + ], + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T08:28:42.580265083" + }, + "test_gatk4_mergevcfs_no_dict": { + "content": [ + [ + "versions.yml:md5,829a91432cc945f3227b1355587d25e0" + ], + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T08:29:12.795329336" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/selectvariants/environment.yml b/modules/nf-core/gatk4/selectvariants/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/selectvariants/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/selectvariants/main.nf b/modules/nf-core/gatk4/selectvariants/main.nf new file mode 100644 index 00000000..f93888e6 --- /dev/null +++ b/modules/nf-core/gatk4/selectvariants/main.nf @@ -0,0 +1,58 @@ +process GATK4_SELECTVARIANTS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(vcf_idx), path (intervals) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval = intervals ? "--intervals ${intervals}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK SelectVariants] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + SelectVariants \\ + --variant $vcf \\ + --output ${prefix}.vcf.gz \\ + $interval \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/selectvariants/meta.yml b/modules/nf-core/gatk4/selectvariants/meta.yml new file mode 100644 index 00000000..5fd9c6f3 --- /dev/null +++ b/modules/nf-core/gatk4/selectvariants/meta.yml @@ -0,0 +1,68 @@ +name: gatk4_selectvariants +description: Select a subset of variants from a VCF file +keywords: + - gatk4 + - selectvariants + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036362532-SelectVariants + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: VCF(.gz) file + pattern: "*.{vcf,vcf.gz}" + - vcf_idx: + type: list + description: VCF file index + pattern: "*.{idx,tbi}" + - intervals: + type: file + description: One or more genomic intervals over which to operate + pattern: ".intervals" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.selectvariants.vcf.gz" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz.tbi": + type: file + description: Tabix index file + pattern: "*.vcf.gz.tbi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@mjcipriano" + - "@ramprasadn" +maintainers: + - "@mjcipriano" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/selectvariants/tests/main.nf.test b/modules/nf-core/gatk4/selectvariants/tests/main.nf.test new file mode 100644 index 00000000..d8a97d2c --- /dev/null +++ b/modules/nf-core/gatk4/selectvariants/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process GATK4_SELECTVARIANTS" + script "modules/nf-core/gatk4/selectvariants/main.nf" + process "GATK4_SELECTVARIANTS" + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/selectvariants" + + test("selectvariants - vcf input") { + + when { + params { + // define parameters here. Example: + // outdir = "tests/results" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} + ) + } + + + } + + test("selectvariants - gz input") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/variantrecalibrator/environment.yml b/modules/nf-core/gatk4/variantrecalibrator/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/variantrecalibrator/main.nf b/modules/nf-core/gatk4/variantrecalibrator/main.nf new file mode 100644 index 00000000..3c6048f4 --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/main.nf @@ -0,0 +1,71 @@ +process GATK4_VARIANTRECALIBRATOR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(tbi) // input vcf and tbi of variants to recalibrate + path resource_vcf // resource vcf + path resource_tbi // resource tbi + val labels // string (or list of strings) containing dedicated resource labels already formatted with '--resource:' tag + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.recal") , emit: recal + tuple val(meta), path("*.idx") , emit: idx + tuple val(meta), path("*.tranches"), emit: tranches + tuple val(meta), path("*plots.R") , emit: plots, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference_command = fasta ? "--reference $fasta " : '' + def labels_command = labels.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK VariantRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + VariantRecalibrator \\ + --variant $vcf \\ + --output ${prefix}.recal \\ + --tranches-file ${prefix}.tranches \\ + $reference_command \\ + --tmp-dir . \\ + $labels_command \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.recal + touch ${prefix}.idx + touch ${prefix}.tranches + touch ${prefix}plots.R + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/variantrecalibrator/meta.yml b/modules/nf-core/gatk4/variantrecalibrator/meta.yml new file mode 100644 index 00000000..72fcfd60 --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/meta.yml @@ -0,0 +1,112 @@ +name: gatk4_variantrecalibrator +description: | + Build a recalibration model to score variant quality for filtering purposes. + It is highly recommended to follow GATK best practices when using this module, + the gaussian mixture model requires a large number of samples to be used for the + tool to produce optimal results. For example, 30 samples for exome data. For more details see + https://gatk.broadinstitute.org/hc/en-us/articles/4402736812443-Which-training-sets-arguments-should-I-use-for-running-VQSR- +keywords: + - gatk4 + - recalibration model + - variantrecalibrator +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: input vcf file containing the variants to be recalibrated + pattern: "*.vcf.gz" + - tbi: + type: file + description: tbi file matching with -vcf + pattern: "*.vcf.gz.tbi" + - - resource_vcf: + type: file + description: all resource vcf files that are used with the corresponding '--resource' + label + pattern: "*.vcf.gz" + - - resource_tbi: + type: file + description: all resource tbi files that are used with the corresponding '--resource' + label + pattern: "*.vcf.gz.tbi" + - - labels: + type: string + description: necessary arguments for GATK VariantRecalibrator. Specified to + directly match the resources provided. More information can be found at + https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - recal: + - meta: + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + - "*.recal": + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + - idx: + - meta: + type: file + description: Index file for the recal output file + pattern: "*.idx" + - "*.idx": + type: file + description: Index file for the recal output file + pattern: "*.idx" + - tranches: + - meta: + type: file + description: Output tranches file used by ApplyVQSR + pattern: "*.tranches" + - "*.tranches": + type: file + description: Output tranches file used by ApplyVQSR + pattern: "*.tranches" + - plots: + - meta: + type: file + description: Optional output rscript file to aid in visualization of the input + data and learned model. + pattern: "*plots.R" + - "*plots.R": + type: file + description: Optional output rscript file to aid in visualization of the input + data and learned model. + pattern: "*plots.R" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" + - "@nickhsmith" +maintainers: + - "@GCJMackenzie" + - "@nickhsmith" diff --git a/modules/nf-core/gatk4/variantrecalibrator/tests/AS.config b/modules/nf-core/gatk4/variantrecalibrator/tests/AS.config new file mode 100644 index 00000000..eadb336e --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/tests/AS.config @@ -0,0 +1,7 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_VARIANTRECALIBRATOR { + ext.args = '-mode SNP -an QD -an MQ -an FS -AS' + } +} diff --git a/modules/nf-core/gatk4/variantrecalibrator/tests/main.nf.test b/modules/nf-core/gatk4/variantrecalibrator/tests/main.nf.test new file mode 100644 index 00000000..2ce4b46e --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/tests/main.nf.test @@ -0,0 +1,167 @@ +nextflow_process { + + name "Test Process GATK4_VARIANTRECALIBRATOR" + script "../main.nf" + process "GATK4_VARIANTRECALIBRATOR" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/variantrecalibrator" + + test("homo sapiens - vcf - allele specificity") { + config "./AS.config" + when { + process { + """ + input_vcf = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi", checkIfExists: true) + ] + + resources_vcf = [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz", checkIfExists: true) + ] + resources_tbi = [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi", checkIfExists: true) + ] + labels = [ + '--resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', + '--resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', + '--resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', + '--resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' + ] + + fasta = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta", checkIfExists: true) + fai = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists: true) + dict = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.dict", checkIfExists: true) + + input = [input_vcf, resources_vcf, resources_tbi, labels, fasta, fai, dict] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.recal[0][1]).name, + file(process.out.idx[0][1]).name, + process.out.tranches, + process.out.plots, + ).match() } + ) + } + + } + + test("homo sapiens - vcf - no allele specificity") { + config "./noAS.config" + when { + process { + """ + input_vcf = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi", checkIfExists: true) + ] + + resources_vcf = [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz", checkIfExists: true) + ] + resources_tbi = [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi", checkIfExists: true) + ] + labels = [ + '--resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', + '--resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', + '--resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', + '--resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' + ] + + fasta = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta", checkIfExists: true) + fai = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists: true) + dict = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.dict", checkIfExists: true) + + input = [input_vcf, resources_vcf, resources_tbi, labels, fasta, fai, dict] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.recal[0][1]).name, + file(process.out.idx[0][1]).name, + process.out.tranches, + process.out.plots, + ).match() } + ) + } + + } + + test("homo sapiens - vcf - stub") { + + options "-stub" + + when { + process { + """ + input_vcf = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi", checkIfExists: true) + ] + + resources_vcf = [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz", checkIfExists: true) + ] + resources_tbi = [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_omni2.5.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/1000G_phase1.snps.hg38.vcf.gz.tbi", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi", checkIfExists: true) + ] + labels = [ + '--resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', + '--resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', + '--resource:1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', + '--resource:dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' + ] + + fasta = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta", checkIfExists: true) + fai = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai", checkIfExists: true) + dict = file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/chr21/sequence/genome.dict", checkIfExists: true) + + input = [input_vcf, resources_vcf, resources_tbi, labels, fasta, fai, dict] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/variantrecalibrator/tests/main.nf.test.snap b/modules/nf-core/gatk4/variantrecalibrator/tests/main.nf.test.snap new file mode 100644 index 00000000..62882f25 --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/tests/main.nf.test.snap @@ -0,0 +1,133 @@ +{ + "homo sapiens - vcf - allele specificity": { + "content": [ + [ + "versions.yml:md5,fb9c98d8bd2f8335179f7c037c63bb0d" + ], + "test.recal", + "test.recal.idx", + [ + [ + { + "id": "test" + }, + "test.tranches:md5,ad52fa69325c758f458a30ee5b43d6b5" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T10:19:03.416258473" + }, + "homo sapiens - vcf - no allele specificity": { + "content": [ + [ + "versions.yml:md5,fb9c98d8bd2f8335179f7c037c63bb0d" + ], + "test.recal", + "test.recal.idx", + [ + [ + { + "id": "test" + }, + "test.tranches:md5,c029e52fd63a893e1154cc9144a19eeb" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T10:26:53.032044124" + }, + "homo sapiens - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.recal:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.idx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.tranches:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "testplots.R:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,fb9c98d8bd2f8335179f7c037c63bb0d" + ], + "idx": [ + [ + { + "id": "test" + }, + "test.idx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "plots": [ + [ + { + "id": "test" + }, + "testplots.R:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "recal": [ + [ + { + "id": "test" + }, + "test.recal:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tranches": [ + [ + { + "id": "test" + }, + "test.tranches:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,fb9c98d8bd2f8335179f7c037c63bb0d" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T10:14:43.858797367" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/variantrecalibrator/tests/noAS.config b/modules/nf-core/gatk4/variantrecalibrator/tests/noAS.config new file mode 100644 index 00000000..69efa09e --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/tests/noAS.config @@ -0,0 +1,8 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: GATK4_VARIANTRECALIBRATOR { + ext.args = '-mode SNP -an QD -an MQ -an FS -an SOR' + } +} diff --git a/modules/nf-core/gatk4/variantstotable/environment.yml b/modules/nf-core/gatk4/variantstotable/environment.yml new file mode 100644 index 00000000..b562b72c --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/variantstotable/main.nf b/modules/nf-core/gatk4/variantstotable/main.nf new file mode 100644 index 00000000..3dbb40af --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/main.nf @@ -0,0 +1,63 @@ +process GATK4_VARIANTSTOTABLE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(arguments_file), path(include_intervals), path(exclude_intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.tsv"), emit: table + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?:'' + def prefix = task.ext.prefix ?: "${meta.id}" + def arguments_file_arg = arguments_file ? "--arguments_file ${arguments_file}" : "" + def include_intervals_arg = include_intervals ? "-L ${include_intervals}" : "" + def exclude_intervals_arg = exclude_intervals ? "-XL ${exclude_intervals}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK VariantsToTable] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + VariantsToTable \\ + ${args} \\ + --variant $vcf \\ + --output ${prefix}.tsv \\ + --reference $fasta \\ + --tmp-dir . \\ + $arguments_file_arg \\ + $include_intervals_arg \\ + $exclude_intervals_arg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/variantstotable/meta.yml b/modules/nf-core/gatk4/variantstotable/meta.yml new file mode 100644 index 00000000..28ee5708 --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/meta.yml @@ -0,0 +1,90 @@ +name: gatk4_variantstotable +description: Extract fields from a VCF file to a tab-delimited table +keywords: + - filter + - gatk4 + - table + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing VCF information. Attribute `gatk_args` can be used to add arguments to gatk. + e.g. [ id:'test', gatk_args:'-F CHROM -F POS -F TYPE -GF AD'] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: Index of VCF file. + pattern: "*.{idx,tbi}" + - arguments_file: + type: file + description: "optional GATK arguments file" + pattern: "*.{txt,list,args,arguments}" + - include_intervals: + type: file + description: "optional GATK region file" + pattern: "*.{bed,bed.gz,interval,interval_list}" + - exclude_intervals: + type: file + description: "optional GATK exclude region file" + pattern: "*.{bed,bed.gz,interval,interval_list}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Fasta file of reference genome + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of fasta file + pattern: "*.fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary of fastea file + pattern: "*.dict" +output: + - table: + - meta: + type: file + description: GATK output + pattern: "*.tsv" + - "*.tsv": + type: file + description: GATK output + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lindenb" +maintainers: + - "@lindenb" diff --git a/modules/nf-core/gatk4/variantstotable/tests/main.nf.test b/modules/nf-core/gatk4/variantstotable/tests/main.nf.test new file mode 100644 index 00000000..e6eb8207 --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/tests/main.nf.test @@ -0,0 +1,91 @@ +nextflow_process { + + name "Test Process GATK4_VARIANTSTOTABLE" + script "../main.nf" + process "GATK4_VARIANTSTOTABLE" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/variantstotable" + + test("test1_gatk4_variant_to_table") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true), + [], + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test1_gatk4_variant_to_table - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true), + [], + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + +} diff --git a/modules/nf-core/gatk4/variantstotable/tests/main.nf.test.snap b/modules/nf-core/gatk4/variantstotable/tests/main.nf.test.snap new file mode 100644 index 00000000..c6633577 --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test1_gatk4_variant_to_table": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,fd8256465233e335beb509b0a8b43536" + ] + ], + "1": [ + "versions.yml:md5,b2bbb958a41e6a7ae2053052f4c4ae9f" + ], + "table": [ + [ + { + "id": "test" + }, + "test.tsv:md5,fd8256465233e335beb509b0a8b43536" + ] + ], + "versions": [ + "versions.yml:md5,b2bbb958a41e6a7ae2053052f4c4ae9f" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T12:11:35.809914981" + }, + "test1_gatk4_variant_to_table - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b2bbb958a41e6a7ae2053052f4c4ae9f" + ], + "table": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b2bbb958a41e6a7ae2053052f4c4ae9f" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-10-31T12:11:47.652764791" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/variantstotable/tests/nextflow.config b/modules/nf-core/gatk4/variantstotable/tests/nextflow.config new file mode 100644 index 00000000..0c5fde34 --- /dev/null +++ b/modules/nf-core/gatk4/variantstotable/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: GATK4_VARIANTSTOTABLE { + ext.args = "-F CHROM -F POS -F TYPE -GF AD" + } +} diff --git a/modules/nf-core/iqtree/environment.yml b/modules/nf-core/iqtree/environment.yml new file mode 100644 index 00000000..44d01313 --- /dev/null +++ b/modules/nf-core/iqtree/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::iqtree=2.4.0 diff --git a/modules/nf-core/iqtree/main.nf b/modules/nf-core/iqtree/main.nf new file mode 100644 index 00000000..46f0502c --- /dev/null +++ b/modules/nf-core/iqtree/main.nf @@ -0,0 +1,131 @@ +process IQTREE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/iqtree:2.4.0--h503566f_0' : + 'biocontainers/iqtree:2.4.0--h503566f_0' }" + + input: + tuple val(meta), path(alignment), path(tree) + path(tree_te) + path(lmclust) + path(mdef) + path(partitions_equal) + path(partitions_proportional) + path(partitions_unlinked) + path(guide_tree) + path(sitefreq_in) + path(constraint_tree) + path(trees_z) + path(suptree) + path(trees_rf) + + output: + tuple val(meta), path("*.treefile") , emit: phylogeny , optional: true + tuple val(meta), path("*.iqtree") , emit: report , optional: true + tuple val(meta), path("*.mldist") , emit: mldist , optional: true + tuple val(meta), path("*.lmap.svg") , emit: lmap_svg , optional: true + tuple val(meta), path("*.lmap.eps") , emit: lmap_eps , optional: true + tuple val(meta), path("*.lmap.quartetlh"), emit: lmap_quartetlh, optional: true + tuple val(meta), path("*.sitefreq") , emit: sitefreq_out , optional: true + tuple val(meta), path("*.ufboot") , emit: bootstrap , optional: true + tuple val(meta), path("*.state") , emit: state , optional: true + tuple val(meta), path("*.contree") , emit: contree , optional: true + tuple val(meta), path("*.nex") , emit: nex , optional: true + tuple val(meta), path("*.splits") , emit: splits , optional: true + tuple val(meta), path("*.suptree") , emit: suptree , optional: true + tuple val(meta), path("*.alninfo") , emit: alninfo , optional: true + tuple val(meta), path("*.partlh") , emit: partlh , optional: true + tuple val(meta), path("*.siteprob") , emit: siteprob , optional: true + tuple val(meta), path("*.sitelh") , emit: sitelh , optional: true + tuple val(meta), path("*.treels") , emit: treels , optional: true + tuple val(meta), path("*.rate ") , emit: rate , optional: true + tuple val(meta), path("*.mlrate") , emit: mlrate , optional: true + tuple val(meta), path("GTRPMIX.nex") , emit: exch_matrix , optional: true + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def alignment_arg = alignment ? "-s ${alignment.join(',')}" : '' + def tree_arg = tree ? "-t $tree" : '' + def tree_te_arg = tree_te ? "-te $tree_te" : '' + def lmclust_arg = lmclust ? "-lmclust $lmclust" : '' + def mdef_arg = mdef ? "-mdef $mdef" : '' + def partitions_equal_arg = partitions_equal ? "-q $partitions_equal" : '' + def partitions_proportional_arg = partitions_proportional ? "-spp $partitions_proportional" : '' + def partitions_unlinked_arg = partitions_unlinked ? "-sp $partitions_unlinked" : '' + def guide_tree_arg = guide_tree ? "-ft $guide_tree" : '' + def sitefreq_in_arg = sitefreq_in ? "-fs $sitefreq_in" : '' + def constraint_tree_arg = constraint_tree ? "-g $constraint_tree" : '' + def trees_z_arg = trees_z ? "-z $trees_z" : '' + def suptree_arg = suptree ? "-sup $suptree" : '' + def trees_rf_arg = trees_rf ? "-rf $trees_rf" : '' + def prefix = task.ext.prefix ?: meta.id + def memory = task.memory.toString().replaceAll(' ', '') + """ + iqtree \\ + $args \\ + $alignment_arg \\ + $tree_arg \\ + $tree_te_arg \\ + $lmclust_arg \\ + $mdef_arg \\ + $partitions_equal_arg \\ + $partitions_proportional_arg \\ + $partitions_unlinked_arg \\ + $guide_tree_arg \\ + $sitefreq_in_arg \\ + $constraint_tree_arg \\ + $trees_z_arg \\ + $suptree_arg \\ + $trees_rf\\ + -pre $prefix \\ + -nt AUTO \\ + -ntmax $task.cpus \\ + -mem $memory \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + iqtree: \$(echo \$(iqtree -version 2>&1) | sed 's/^IQ-TREE multicore version //;s/ .*//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: meta.id + """ + touch "${prefix}.treefile" + touch "${prefix}.iqtree" + touch "${prefix}.mldist" + touch "${prefix}.lmap.svg" + touch "${prefix}.lmap.eps" + touch "${prefix}.lmap.quartetlh" + touch "${prefix}.sitefreq" + touch "${prefix}.ufboot" + touch "${prefix}.state" + touch "${prefix}.contree" + touch "${prefix}.nex" + touch "${prefix}.splits" + touch "${prefix}.suptree" + touch "${prefix}.alninfo" + touch "${prefix}.partlh" + touch "${prefix}.siteprob" + touch "${prefix}.sitelh" + touch "${prefix}.treels" + touch "${prefix}.rate" + touch "${prefix}.mlrate" + touch "GTRPMIX.nex" + touch "${prefix}.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + iqtree: \$(echo \$(iqtree -version 2>&1) | sed 's/^IQ-TREE multicore version //;s/ .*//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/iqtree/meta.yml b/modules/nf-core/iqtree/meta.yml new file mode 100644 index 00000000..768cc215 --- /dev/null +++ b/modules/nf-core/iqtree/meta.yml @@ -0,0 +1,358 @@ +name: iqtree +description: Produces a Newick format phylogeny from a multiple sequence alignment + using the maximum likelihood algorithm. Capable of bacterial genome size alignments. +keywords: + - phylogeny + - newick + - maximum likelihood +tools: + - iqtree: + description: Efficient phylogenomic software by maximum likelihood. + homepage: http://www.iqtree.org + documentation: http://www.iqtree.org/doc + tool_dev_url: https://github.com/iqtree/iqtree2 + doi: 10.1093/molbev/msaa015 + licence: ["GPL v2-or-later"] + identifier: biotools:iqtree +input: + - - meta: + type: map + description: | + Groovy map containing sample information for the + alignment/tree file, e.g. [ id: 'test' ] + - alignment: + type: file + description: "One or more input alignment file in PHYLIP, FASTA, NEXUS, CLUSTAL + or MSF format or a directory of such files (-s)" + pattern: "*.{fasta,fas,fa,mfa,phy,aln,nex,nexus,msf}" + - tree: + type: file + description: "File containing one or multiple phylogenetic trees (-t): - Single + tree used e.g. as starting tree for tree search - Set of trees used e.g. for + distance computation, consensus tree construction" + pattern: "*.{tre,tree,treefile,newick,nwk,nex,nexus}" + - - tree_te: + type: file + description: "File containing single phylogenetic tree (-te) Use cases: - fixed + user tree to skip tree search - ancestral sequence reconstruction" + pattern: "*.{tre,tree,treefile,newick,nwk,nex,nexus}" + - - lmclust: + type: file + description: NEXUS file containing taxon clusters for quartet mapping analysis + (-lmclust) + pattern: "*.nex{us}" + - - mdef: + type: file + description: NEXUS model file defining new models (-mdef) + pattern: "*.nex{us}" + - - partitions_equal: + type: file + description: Partition file for edge-equal partition model, all partitions share + same set of branch lengths (-q) + pattern: "*.{nex,nexus,tre,tree,treefile}" + - - partitions_proportional: + type: file + description: Partition file for edge-equal partition model, all partitions share + same set of branch lengths (-spp) + pattern: "*.{nex,nexus,tre,tree,treefile}" + - - partitions_unlinked: + type: file + description: Partition file for edge-equal partition model, all partitions share + same set of branch lengths (-sp) + pattern: "*.{nex,nexus,tre,tree,treefile}" + - - guide_tree: + type: file + description: File containing guide tree for inference of site frequency profiles + (-ft) + pattern: "*.{nex,nexus,tre,tree,treefile}" + - - sitefreq_in: + type: file + description: Site frequency file (-fs) + pattern: "*.sitefreq" + - - constraint_tree: + type: file + description: File containing opological constraint tree in NEWICK format. The + constraint tree can be a multifurcating tree and need not to include all taxa. + (-g) + pattern: "*.{nwk,newick}" + - - trees_z: + type: file + description: File containing a set of trees for which log-likelihoods should + be computed (-z) + - - suptree: + type: file + description: File containing input “target” tree, support values are extracted + from trees passed via -t, and mapped onto the target tree (-sup) + - - trees_rf: + type: file + description: "File containing a second tree set (-rf). Used for computing the + distance to the primary tree set (`tree`)" + pattern: "*.{tre,tree,treefile,newick,nwk,nex,nexus}" +output: + - phylogeny: + - meta: + type: file + description: A phylogeny in Newick format + pattern: "*.{treefile}" + - "*.treefile": + type: file + description: A phylogeny in Newick format + pattern: "*.{treefile}" + - report: + - meta: + type: file + description: | + Main report file containing computational + results as well as a textual visualization + of the final tree + pattern: "*.{iqtree}" + - "*.iqtree": + type: file + description: | + Main report file containing computational + results as well as a textual visualization + of the final tree + pattern: "*.{iqtree}" + - mldist: + - meta: + type: file + description: | + File containing the pairwise maximum + likelihood distances as a matrix + pattern: "*.{mldist}" + - "*.mldist": + type: file + description: | + File containing the pairwise maximum + likelihood distances as a matrix + pattern: "*.{mldist}" + - lmap_svg: + - meta: + type: file + description: | + File containing likelihood mapping analysis + results in .svg format (-lmap/-lmclust) + pattern: "*.lmap.svg" + - "*.lmap.svg": + type: file + description: | + File containing likelihood mapping analysis + results in .svg format (-lmap/-lmclust) + pattern: "*.lmap.svg" + - lmap_eps: + - meta: + type: file + description: | + File containing likelihood mapping analysis + results in .eps format (-lmap/-lmclust) + pattern: "*.lmap.eps" + - "*.lmap.eps": + type: file + description: | + File containing likelihood mapping analysis + results in .eps format (-lmap/-lmclust) + pattern: "*.lmap.eps" + - lmap_quartetlh: + - meta: + type: file + description: | + File containing quartet log-likelihoods (-wql) + pattern: "*.lmap.quartetlh" + - "*.lmap.quartetlh": + type: file + description: | + File containing quartet log-likelihoods (-wql) + pattern: "*.lmap.quartetlh" + - sitefreq_out: + - meta: + type: file + description: | + File containing site frequency profiles (-ft) + pattern: "*.sitefreq" + - "*.sitefreq": + type: file + description: | + File containing site frequency profiles (-ft) + pattern: "*.sitefreq" + - bootstrap: + - meta: + type: file + description: | + File containing all bootstrap trees (-wbt/-wbtl) + pattern: "*.ufboot" + - "*.ufboot": + type: file + description: | + File containing all bootstrap trees (-wbt/-wbtl) + pattern: "*.ufboot" + - state: + - meta: + type: file + description: | + File containing ancestral sequences for all + nodes of the tree by empirical Bayesian method (-asr) + pattern: "*.{state}" + - "*.state": + type: file + description: | + File containing ancestral sequences for all + nodes of the tree by empirical Bayesian method (-asr) + pattern: "*.{state}" + - contree: + - meta: + type: file + description: | + File containing consensus tree (-con/-bb) + pattern: "*.{contree}" + - "*.contree": + type: file + description: | + File containing consensus tree (-con/-bb) + pattern: "*.{contree}" + - nex: + - meta: + type: file + description: | + File containing consensus network (-net/-bb) + pattern: "*.{nex}" + - "*.nex": + type: file + description: | + File containing consensus network (-net/-bb) + pattern: "*.{nex}" + - splits: + - meta: + type: file + description: | + File containing consensus network in star-dot format (-wsplits) + pattern: "*.{splits}" + - "*.splits": + type: file + description: | + File containing consensus network in star-dot format (-wsplits) + pattern: "*.{splits}" + - suptree: + - meta: + type: file + description: | + File containing tree with assigned support + values based on supplied "target" tree (-sup) + pattern: "*.{suptree}" + - "*.suptree": + type: file + description: | + File containing tree with assigned support + values based on supplied "target" tree (-sup) + pattern: "*.{suptree}" + - alninfo: + - meta: + type: file + description: | + File containing alignment site statistics (-alninfo) + pattern: "*.{alninfo}" + - "*.alninfo": + type: file + description: | + File containing alignment site statistics (-alninfo) + pattern: "*.{alninfo}" + - partlh: + - meta: + type: file + description: | + File containing partition log-likelihoods (-wpl) + pattern: "*.{partlh}" + - "*.partlh": + type: file + description: | + File containing partition log-likelihoods (-wpl) + pattern: "*.{partlh}" + - siteprob: + - meta: + type: file + description: | + File containing site posterior probabilities (-wspr/-wspm/-wspmr) + pattern: "*.{siteprob}" + - "*.siteprob": + type: file + description: | + File containing site posterior probabilities (-wspr/-wspm/-wspmr) + pattern: "*.{siteprob}" + - sitelh: + - meta: + type: file + description: | + File containing site log-likelihoods (-wsl/-wslr/-wslm/-wslmr) + pattern: "*.{sitelh}" + - "*.sitelh": + type: file + description: | + File containing site log-likelihoods (-wsl/-wslr/-wslm/-wslmr) + pattern: "*.{sitelh}" + - treels: + - meta: + type: file + description: | + File containing all locally optimal trees (-wt) + pattern: "*.{treels}" + - "*.treels": + type: file + description: | + File containing all locally optimal trees (-wt) + pattern: "*.{treels}" + - rate: + - meta: + type: file + description: | + File containing inferred site-specific + evolutionary rates (-wsr) + pattern: "*.{rate}" + - "*.rate ": + type: file + description: | + File containing inferred site-specific + evolutionary rates (-wsr) + pattern: "*.{rate}" + - mlrate: + - meta: + type: file + description: | + File containing site-specific substitution + rates determined by maximum likelihood (--mlrate) + pattern: "*.{mlrate}" + - "*.mlrate": + type: file + description: | + File containing site-specific substitution + rates determined by maximum likelihood (--mlrate) + pattern: "*.{mlrate}" + - exch_matrix: + - meta: + type: file + description: File containing the exchangeability matrix obtained from the optimization + (--link-exchange-rates) + pattern: "GTRPMIX.nex" + - GTRPMIX.nex: + type: file + description: File containing the exchangeability matrix obtained from the optimization + (--link-exchange-rates) + pattern: "GTRPMIX.nex" + - log: + - meta: + type: file + description: Log file of entire run + pattern: "*.{log}" + - "*.log": + type: file + description: Log file of entire run + pattern: "*.{log}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@avantonder" + - "@aunderwo" +maintainers: + - "@avantonder" + - "@aunderwo" diff --git a/modules/nf-core/iqtree/tests/iqtree_bootstrap.config b/modules/nf-core/iqtree/tests/iqtree_bootstrap.config new file mode 100644 index 00000000..28cc6624 --- /dev/null +++ b/modules/nf-core/iqtree/tests/iqtree_bootstrap.config @@ -0,0 +1,5 @@ +process { + withName: "IQTREE" { + ext.args = "-bb 1000 -wbt -wsplits -m LG" + } +} diff --git a/modules/nf-core/iqtree/tests/iqtree_optional.config b/modules/nf-core/iqtree/tests/iqtree_optional.config new file mode 100644 index 00000000..104861f2 --- /dev/null +++ b/modules/nf-core/iqtree/tests/iqtree_optional.config @@ -0,0 +1,5 @@ +process { + withName: "IQTREE" { + ext.args = "-m C10 -asr -alninfo -wpl -wspr -wsl -wsr --mlrate -lmap 150 -wql --mlrate" + } +} diff --git a/modules/nf-core/iqtree/tests/iqtree_treels.config b/modules/nf-core/iqtree/tests/iqtree_treels.config new file mode 100644 index 00000000..adfe8fb3 --- /dev/null +++ b/modules/nf-core/iqtree/tests/iqtree_treels.config @@ -0,0 +1,5 @@ +process { + withName: "IQTREE" { + ext.args = "-m LG -wt" + } +} diff --git a/modules/nf-core/iqtree/tests/main.nf.test b/modules/nf-core/iqtree/tests/main.nf.test new file mode 100644 index 00000000..bc70a7a0 --- /dev/null +++ b/modules/nf-core/iqtree/tests/main.nf.test @@ -0,0 +1,323 @@ +nextflow_process { + + name "Test Process IQTREE" + script "../main.nf" + process "IQTREE" + + tag "modules" + tag "modules_nfcore" + tag "iqtree" + + test("setoxin - basic") { + + when { + process { + """ + input[0] = [ [ id: "test" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists:true), + [] ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = [] + input[7] = [] + input[8] = [] + input[9] = [] + input[10] = [] + input[11] = [] + input[12] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.phylogeny.get(0).get(1)).readLines().first().contains("1apf") }, + { assert process.out.lmap_svg == []}, + { assert process.out.lmap_eps == []}, + { assert process.out.lmap_quartetlh == []}, + { assert process.out.sitefreq_out == []}, + { assert process.out.bootstrap == []}, + { assert process.out.state == []}, + { assert process.out.contree == []}, + { assert process.out.nex == []}, + { assert process.out.splits == []}, + { assert process.out.suptree == []}, + { assert process.out.alninfo == []}, + { assert process.out.partlh == []}, + { assert process.out.siteprob == []}, + { assert process.out.sitelh == []}, + { assert process.out.treels == []}, + { assert process.out.rate == []}, + { assert process.out.mlrate == []}, + { assert process.out.exch_matrix == []}, + { assert snapshot( path(process.out.mldist.get(0).get(1)).readLines().first(), + path(process.out.report.get(0).get(1)).readLines().first(), + path(process.out.log.get(0).get(1)).readLines().first(), + process.out.versions ).match() } + ) + } + } + +test("setoxin - treels") { + + config "./iqtree_treels.config" + + when { + process { + """ + input[0] = [ [ id: "test" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists:true), + [] ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = [] + input[7] = [] + input[8] = [] + input[9] = [] + input[10] = [] + input[11] = [] + input[12] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.phylogeny.get(0).get(1)).readLines().first().contains("1apf") }, + { assert path(process.out.treels.get(0).get(1)).readLines().first().contains("1apf") }, + { assert process.out.lmap_svg == []}, + { assert process.out.lmap_eps == []}, + { assert process.out.lmap_quartetlh == []}, + { assert process.out.sitefreq_out == []}, + { assert process.out.bootstrap == []}, + { assert process.out.state == []}, + { assert process.out.contree == []}, + { assert process.out.nex == []}, + { assert process.out.splits == []}, + { assert process.out.suptree == []}, + { assert process.out.alninfo == []}, + { assert process.out.partlh == []}, + { assert process.out.siteprob == []}, + { assert process.out.sitelh == []}, + { assert process.out.rate == []}, + { assert process.out.mlrate == []}, + { assert process.out.exch_matrix == []}, + { assert snapshot( path(process.out.mldist.get(0).get(1)).readLines().first(), + path(process.out.report.get(0).get(1)).readLines().first(), + path(process.out.log.get(0).get(1)).readLines().first(), + process.out.versions ).match() } + ) + } + } + + + test("setoxin - bootstrap") { + + config "./iqtree_bootstrap.config" + + when { + process { + """ + input[0] = [ [ id: "test" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists:true), + [] ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = [] + input[7] = [] + input[8] = [] + input[9] = [] + input[10] = [] + input[11] = [] + input[12] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.phylogeny.get(0).get(1)).readLines().first().contains("1apf") }, + { assert path(process.out.bootstrap.get(0).get(1)).readLines().first().contains("1apf") }, + { assert path(process.out.contree.get(0).get(1)).readLines().first().contains("1apf") }, + { assert path(process.out.splits.get(0).get(1)).readLines().first().contains("*") }, + { assert process.out.lmap_svg == []}, + { assert process.out.lmap_eps == []}, + { assert process.out.lmap_quartetlh == []}, + { assert process.out.sitefreq_out == []}, + { assert process.out.state == []}, + { assert process.out.suptree == []}, + { assert process.out.alninfo == []}, + { assert process.out.partlh == []}, + { assert process.out.siteprob == []}, + { assert process.out.sitelh == []}, + { assert process.out.treels == []}, + { assert process.out.rate == []}, + { assert process.out.mlrate == []}, + { assert process.out.exch_matrix == []}, + { assert snapshot( path(process.out.nex.get(0).get(1)).readLines()[0..12], + path(process.out.mldist.get(0).get(1)).readLines().first(), + path(process.out.report.get(0).get(1)).readLines().first(), + path(process.out.log.get(0).get(1)).readLines().first(), + process.out.versions ).match() } + ) + } + } + + test("hydrogenase - sup") { + + when { + process { + """ + input[0] = [ [ id: "test" ], + [], + file("https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/PF14720_seed.ft.LGCAT.newick", checkIfExists:true)] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = [] + input[7] = [] + input[8] = [] + input[9] = [] + input[10] = [] + input[11] = Channel.of( + file("https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/PF14720_seed.ft.LGCAT.newick", checkIfExists:true) + .text) + .collectFile(name: 'hydrogenase.newick') + input[12] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log.get(0).get(1)).readLines().first().contains("IQ-TREE") }, + { assert process.out.phylogeny == []}, + { assert process.out.report == []}, + { assert process.out.mldist == []}, + { assert process.out.lmap_svg == []}, + { assert process.out.lmap_eps == []}, + { assert process.out.lmap_quartetlh == []}, + { assert process.out.sitefreq_out == []}, + { assert process.out.bootstrap == []}, + { assert process.out.state == []}, + { assert process.out.contree == []}, + { assert process.out.nex == []}, + { assert process.out.splits == []}, + { assert process.out.alninfo == []}, + { assert process.out.partlh == []}, + { assert process.out.siteprob == []}, + { assert process.out.sitelh == []}, + { assert process.out.treels == []}, + { assert process.out.rate == []}, + { assert process.out.mlrate == []}, + { assert process.out.exch_matrix == []}, + { assert snapshot( path(process.out.log.get(0).get(1)).readLines().first(), + process.out.suptree, + process.out.versions ).match() } + ) + } + } + + test("hydrogenase - optional") { + + config "./iqtree_optional.config" + + when { + process { + """ + input[0] = [ [ id: "test" ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/PF14720_seed.alnfaa", checkIfExists:true), + [] ] + input[1] = file("https://raw.githubusercontent.com/nf-core/test-datasets/phyloplace/testdata/PF14720_seed.ft.LGCAT.newick", checkIfExists:true) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = [] + input[7] = [] + input[8] = [] + input[9] = [] + input[10] = [] + input[11] = [] + input[12] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.phylogeny.get(0).get(1)).readLines().first().contains("E1QGS5_DESB2/226-309") }, + { assert process.out.mldist == []}, + { assert process.out.sitefreq_out == []}, + { assert process.out.bootstrap == []}, + { assert process.out.contree == []}, + { assert process.out.nex == []}, + { assert process.out.suptree == []}, + { assert process.out.partlh == []}, + { assert process.out.exch_matrix == []}, + { assert snapshot( path(process.out.lmap_eps.get(0).get(1)).readLines()[0..4], + path(process.out.lmap_svg.get(0).get(1)).readLines()[0..5], + path(process.out.lmap_quartetlh.get(0).get(1)).readLines().first(), + path(process.out.mlrate.get(0).get(1)).readLines()[0..5], + path(process.out.report.get(0).get(1)).readLines().first(), + path(process.out.log.get(0).get(1)).readLines().first(), + process.out.treels, + process.out.alninfo, + process.out.siteprob, + process.out.sitelh, + process.out.rate, + process.out.state, + process.out.splits, + process.out.versions ).match() } + ) + } + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id: "test" ] , [], []] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + input[6] = [] + input[7] = [] + input[8] = [] + input[9] = [] + input[10] = [] + input[11] = [] + input[12] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( process.out ).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/iqtree/tests/main.nf.test.snap b/modules/nf-core/iqtree/tests/main.nf.test.snap new file mode 100644 index 00000000..96e2b65e --- /dev/null +++ b/modules/nf-core/iqtree/tests/main.nf.test.snap @@ -0,0 +1,527 @@ +{ + "setoxin - bootstrap": { + "content": [ + [ + "#nexus", + "", + "BEGIN Taxa;", + "DIMENSIONS ntax=5;", + "TAXLABELS", + "[1] '1apf'", + "[2] '1ahl'", + "[3] '1atx'", + "[4] '1sh1'", + "[5] '1bds'", + ";", + "END; [Taxa]", + "" + ], + "5", + "IQ-TREE 2.4.0 built Feb 12 2025", + "IQ-TREE multicore version 2.4.0 for Linux x86 64-bit built Feb 12 2025", + [ + "versions.yml:md5,e7cd33418cd75cafb5b8eeb03d681f63" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-29T11:11:57.926210656" + }, + "hydrogenase - sup": { + "content": [ + "IQ-TREE multicore version 2.4.0 for Linux x86 64-bit built Feb 12 2025", + [ + [ + { + "id": "test" + }, + "test.suptree:md5,4ad04b2105448a802d5eff346af67786" + ] + ], + [ + "versions.yml:md5,e7cd33418cd75cafb5b8eeb03d681f63" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-29T11:12:04.199205515" + }, + "setoxin - treels": { + "content": [ + "5", + "IQ-TREE 2.4.0 built Feb 12 2025", + "IQ-TREE multicore version 2.4.0 for Linux x86 64-bit built Feb 12 2025", + [ + "versions.yml:md5,e7cd33418cd75cafb5b8eeb03d681f63" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-29T11:11:47.332511976" + }, + "setoxin - basic": { + "content": [ + "5", + "IQ-TREE 2.4.0 built Feb 12 2025", + "IQ-TREE multicore version 2.4.0 for Linux x86 64-bit built Feb 12 2025", + [ + "versions.yml:md5,e7cd33418cd75cafb5b8eeb03d681f63" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-29T11:11:37.672175583" + }, + "hydrogenase - optional": { + "content": [ + [ + "%!PS-Adobe-3.0 EPSF-3.0", + "%%BoundingBox: 60 210 550 650", + "%%Pages: 1", + "%%Creator: IQ-TREE/TREE-PUZZLE", + "%%Title: Likelihood Mapping Analysis" + ], + [ + "", + "", + " versions.yml + "${task.process}": + ismapper: \$( echo \$( ismap --version 2>&1 ) | sed 's/^.*ismap //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p results/${prefix} + + touch results/${prefix}/${prefix}_left_final.fastq + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ismapper: \$( echo \$( ismap --version 2>&1 ) | sed 's/^.*ismap //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ismapper/meta.yml b/modules/nf-core/ismapper/meta.yml new file mode 100644 index 00000000..bb413e7a --- /dev/null +++ b/modules/nf-core/ismapper/meta.yml @@ -0,0 +1,54 @@ +name: ismapper +description: Identify insertion sites positions in bacterial genomes +keywords: + - fastq + - insertion + - bacteria +tools: + - ismapper: + description: A mapping-based tool for identification of the site and orientation + of IS insertions in bacterial genomes. + homepage: https://github.com/jhawkey/IS_mapper + documentation: https://github.com/jhawkey/IS_mapper + tool_dev_url: https://github.com/jhawkey/IS_mapper + doi: "10.1186/s12864-015-1860-2" + licence: ["BSD-3-Clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: A set of paired-end FASTQ files + pattern: "*.{fastq.gz,fq.gz}" + - reference: + type: file + description: Reference genome in GenBank format + pattern: "*.{gbk}" + - query: + type: file + description: Insertion sequences to query in FASTA format + pattern: "*.{fasta,fa}" +output: + - results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results/*: + type: directory + description: Directory containing ISMapper result files + pattern: "*/*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/ismapper/tests/main.nf.test b/modules/nf-core/ismapper/tests/main.nf.test new file mode 100644 index 00000000..961f887d --- /dev/null +++ b/modules/nf-core/ismapper/tests/main.nf.test @@ -0,0 +1,88 @@ +import groovy.io.FileType + +nextflow_process { + + name "Test Process ISMAPPER" + script "../main.nf" + process "ISMAPPER" + + tag "modules" + tag "modules_nfcore" + tag "ismapper" + + test("test-ismapper") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + file("https://github.com/jhawkey/IS_mapper/raw/master/test/inputs/S_suis_P17.gbk", checkIfExists: true), + file("https://github.com/jhawkey/IS_mapper/raw/master/test/inputs/ISSsu3.fasta", checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def all_files = [] + + file(process.out.results[0][1]).eachFileRecurse (FileType.FILES) { file -> + all_files << file + } + + def all_file_names = all_files.collect { it.name }.toSorted() + + def stable_file_names = [ + 'test__AM946016.1_table.txt' + ] + + def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted() + + assert snapshot( + all_file_names, + stable_files, + process.out.versions[0] + ).match() + } + ) + } + } + + test("test-ismapper-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + file("https://github.com/jhawkey/IS_mapper/raw/master/test/inputs/S_suis_P17.gbk", checkIfExists: true), + file("https://github.com/jhawkey/IS_mapper/raw/master/test/inputs/ISSsu3.fasta", checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/ismapper/tests/main.nf.test.snap b/modules/nf-core/ismapper/tests/main.nf.test.snap new file mode 100644 index 00000000..51318d3e --- /dev/null +++ b/modules/nf-core/ismapper/tests/main.nf.test.snap @@ -0,0 +1,67 @@ +{ + "test-ismapper": { + "content": [ + [ + "test_ISSsu3_left_final.fastq", + "test_ISSsu3_right_final.fastq", + "test__AM946016.1_closest.bed", + "test__AM946016.1_intersect.bed", + "test__AM946016.1_table.txt", + "test_left_AM946016.1_finalcov.bed", + "test_left_AM946016.1_merged.sorted.bed", + "test_left_AM946016.1_unpaired.bed", + "test_right_AM946016.1_finalcov.bed", + "test_right_AM946016.1_merged.sorted.bed", + "test_right_AM946016.1_unpaired.bed" + ], + [ + "test__AM946016.1_table.txt:md5,9e05cda3990cb841db2bfb6e6e04a1f5" + ], + "versions.yml:md5,bbe2280116459026bfc2304b2b6c0f5f" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T14:33:02.03683" + }, + "test-ismapper-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_left_final.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,bbe2280116459026bfc2304b2b6c0f5f" + ], + "results": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_left_final.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,bbe2280116459026bfc2304b2b6c0f5f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T14:31:56.815545" + } +} \ No newline at end of file diff --git a/modules/nf-core/lofreq/call/environment.yml b/modules/nf-core/lofreq/call/environment.yml new file mode 100644 index 00000000..4ade529a --- /dev/null +++ b/modules/nf-core/lofreq/call/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::lofreq=2.1.5 diff --git a/modules/nf-core/lofreq/call/main.nf b/modules/nf-core/lofreq/call/main.nf new file mode 100644 index 00000000..bfe8f213 --- /dev/null +++ b/modules/nf-core/lofreq/call/main.nf @@ -0,0 +1,50 @@ +process LOFREQ_CALL { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/lofreq:2.1.5--py38h588ecb2_4' : + 'biocontainers/lofreq:2.1.5--py38h588ecb2_4' }" + + input: + tuple val(meta), path(bam), path(intervals) + path fasta + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def options_intervals = intervals ? "-l ${intervals}" : "" + """ + lofreq \\ + call \\ + $args \\ + $options_intervals \\ + -f $fasta \\ + -o ${prefix}.vcf.gz \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/lofreq/call/meta.yml b/modules/nf-core/lofreq/call/meta.yml new file mode 100644 index 00000000..d2895b8d --- /dev/null +++ b/modules/nf-core/lofreq/call/meta.yml @@ -0,0 +1,53 @@ +name: lofreq_call +description: Lofreq subcommand to call low frequency variants from alignments +keywords: + - variant calling + - low frequency variant calling + - lofreq + - lofreq/call +tools: + - lofreq: + description: A fast and sensitive variant-caller for inferring SNVs and indels + from next-generation sequencing data + homepage: https://csb5.github.io/lofreq/ + documentation: https://csb5.github.io/lofreq/commands/ + doi: "10.1093/nar/gks918 " + licence: ["MIT"] + identifier: biotools:lofreq +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM input file + pattern: "*.{bam}" + - intervals: + type: file + description: BED file containing target regions for variant calling + pattern: "*.{bed}" + - - fasta: + type: file + description: The reference fasta file +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: VCF output file + pattern: "*.{vcf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@bjohnnyd" +maintainers: + - "@bjohnnyd" diff --git a/modules/nf-core/lofreq/call/tests/main.nf.test b/modules/nf-core/lofreq/call/tests/main.nf.test new file mode 100644 index 00000000..208df18d --- /dev/null +++ b/modules/nf-core/lofreq/call/tests/main.nf.test @@ -0,0 +1,94 @@ + +nextflow_process { + + name "Test Process LOFREQ_CALL" + script "../main.nf" + process "LOFREQ_CALL" + + tag "modules" + tag "modules_nfcore" + tag "lofreq" + tag "lofreq/call" + + test("test-lofreq-call") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf[0][1]).vcf.summary, + process.out.versions + ).match() + } + ) + } + } + + test("test-lofreq-call-intervals") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf[0][1]).vcf.summary, + process.out.versions + ).match() + } + ) + } + } + + test("test-lofreq-call-intervals-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/lofreq/call/tests/main.nf.test.snap b/modules/nf-core/lofreq/call/tests/main.nf.test.snap new file mode 100644 index 00000000..4689298a --- /dev/null +++ b/modules/nf-core/lofreq/call/tests/main.nf.test.snap @@ -0,0 +1,63 @@ +{ + "test-lofreq-call": { + "content": [ + "VcfFile [chromosomes=[], sampleCount=0, variantCount=0, phased=true, phasedAutodetect=true]", + [ + "versions.yml:md5,15df54aa4144c3622e5033b9cc762407" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-30T21:57:46.175949" + }, + "test-lofreq-call-intervals-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,15df54aa4144c3622e5033b9cc762407" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,15df54aa4144c3622e5033b9cc762407" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-30T21:56:44.869711" + }, + "test-lofreq-call-intervals": { + "content": [ + "VcfFile [chromosomes=[], sampleCount=0, variantCount=0, phased=true, phasedAutodetect=true]", + [ + "versions.yml:md5,15df54aa4144c3622e5033b9cc762407" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-30T21:57:50.722934" + } +} \ No newline at end of file diff --git a/modules/nf-core/lofreq/filter/environment.yml b/modules/nf-core/lofreq/filter/environment.yml new file mode 100644 index 00000000..4ade529a --- /dev/null +++ b/modules/nf-core/lofreq/filter/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::lofreq=2.1.5 diff --git a/modules/nf-core/lofreq/filter/main.nf b/modules/nf-core/lofreq/filter/main.nf new file mode 100644 index 00000000..a5b64952 --- /dev/null +++ b/modules/nf-core/lofreq/filter/main.nf @@ -0,0 +1,46 @@ +process LOFREQ_FILTER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/lofreq:2.1.5--py38h588ecb2_4' : + 'biocontainers/lofreq:2.1.5--py38h588ecb2_4' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + lofreq \\ + filter \\ + $args \\ + -i $vcf \\ + -o ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/lofreq/filter/meta.yml b/modules/nf-core/lofreq/filter/meta.yml new file mode 100644 index 00000000..f73d440e --- /dev/null +++ b/modules/nf-core/lofreq/filter/meta.yml @@ -0,0 +1,48 @@ +name: lofreq_filter +description: Lofreq subcommand to remove variants with low coverage or strand bias + potential +keywords: + - variant calling + - low frequency variant calling + - filtering + - lofreq + - lofreq/filter +tools: + - lofreq: + description: A fast and sensitive variant-caller for inferring SNVs and indels + from next-generation sequencing data + homepage: https://csb5.github.io/lofreq/ + documentation: https://csb5.github.io/lofreq/commands/ + doi: "10.1093/nar/gks918 " + licence: ["MIT"] + identifier: biotools:lofreq +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gz": + type: file + description: VCF filtered output file + pattern: "*.{vcf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@bjohnnyd" +maintainers: + - "@bjohnnyd" diff --git a/modules/nf-core/lofreq/filter/tests/main.nf.test b/modules/nf-core/lofreq/filter/tests/main.nf.test new file mode 100644 index 00000000..bff32a89 --- /dev/null +++ b/modules/nf-core/lofreq/filter/tests/main.nf.test @@ -0,0 +1,74 @@ +nextflow_process { + + name "Test Process LOFREQ_FILTER" + script "../main.nf" + process "LOFREQ_FILTER" + + tag "modules" + tag "modules_nfcore" + tag "lofreq" + tag "lofreq/filter" + + test("sarscov2 - vcf") { + when { + process { + """ + input[0] = [ + [ id:'testvcf' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - vcf.gz") { + when { + process { + """ + input[0] = [ + [ id:'testvcf' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - vcf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'testvcf' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} \ No newline at end of file diff --git a/modules/nf-core/lofreq/filter/tests/main.nf.test.snap b/modules/nf-core/lofreq/filter/tests/main.nf.test.snap new file mode 100644 index 00000000..74f46e71 --- /dev/null +++ b/modules/nf-core/lofreq/filter/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "sarscov2 - vcf.gz": { + "content": [ + { + "0": [ + [ + { + "id": "testvcf" + }, + "testvcf.vcf.gz:md5,f5d1d25be974a4dc1cbb7103a4c0c33c" + ] + ], + "1": [ + "versions.yml:md5,efcc2f31cb8b2b23438358460b0643b1" + ], + "vcf": [ + [ + { + "id": "testvcf" + }, + "testvcf.vcf.gz:md5,f5d1d25be974a4dc1cbb7103a4c0c33c" + ] + ], + "versions": [ + "versions.yml:md5,efcc2f31cb8b2b23438358460b0643b1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-24T10:06:37.482906518" + }, + "sarscov2 - vcf": { + "content": [ + { + "0": [ + [ + { + "id": "testvcf" + }, + "testvcf.vcf.gz:md5,f5d1d25be974a4dc1cbb7103a4c0c33c" + ] + ], + "1": [ + "versions.yml:md5,efcc2f31cb8b2b23438358460b0643b1" + ], + "vcf": [ + [ + { + "id": "testvcf" + }, + "testvcf.vcf.gz:md5,f5d1d25be974a4dc1cbb7103a4c0c33c" + ] + ], + "versions": [ + "versions.yml:md5,efcc2f31cb8b2b23438358460b0643b1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-24T10:06:31.229732594" + }, + "sarscov2 - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "testvcf" + }, + "testvcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,efcc2f31cb8b2b23438358460b0643b1" + ], + "vcf": [ + [ + { + "id": "testvcf" + }, + "testvcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,efcc2f31cb8b2b23438358460b0643b1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-24T10:06:46.494604414" + } +} \ No newline at end of file diff --git a/modules/nf-core/lofreq/indelqual/environment.yml b/modules/nf-core/lofreq/indelqual/environment.yml new file mode 100644 index 00000000..4ade529a --- /dev/null +++ b/modules/nf-core/lofreq/indelqual/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::lofreq=2.1.5 diff --git a/modules/nf-core/lofreq/indelqual/main.nf b/modules/nf-core/lofreq/indelqual/main.nf new file mode 100644 index 00000000..d73a8d6e --- /dev/null +++ b/modules/nf-core/lofreq/indelqual/main.nf @@ -0,0 +1,48 @@ +process LOFREQ_INDELQUAL { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/lofreq:2.1.5--py38h588ecb2_4' : + 'biocontainers/lofreq:2.1.5--py38h588ecb2_4' }" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + lofreq indelqual \\ + $args \\ + -f $fasta \\ + -o ${prefix}.bam \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/lofreq/indelqual/meta.yml b/modules/nf-core/lofreq/indelqual/meta.yml new file mode 100644 index 00000000..00788b11 --- /dev/null +++ b/modules/nf-core/lofreq/indelqual/meta.yml @@ -0,0 +1,57 @@ +name: lofreq_indelqual +description: Inserts indel qualities in a BAM file +keywords: + - variant calling + - variants + - bam + - indel + - qualities +tools: + - lofreq: + description: Lofreq is a fast and sensitive variant-caller for inferring SNVs + and indels from next-generation sequencing data. It's indelqual programme inserts + indel qualities in a BAM file + homepage: https://csb5.github.io/lofreq/ + doi: "10.1093/nar/gks918" + licence: ["MIT"] + identifier: biotools:lofreq +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - - meta2: + type: map + description: | + Groovy Map containing sample information about the reference fasta + e.g. [ id:'reference' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fasta,fa}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: BAM file with indel qualities inserted into it + pattern: "*.{bam}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kaurravneet4123" +maintainers: + - "@kaurravneet4123" + - "@krannich479" diff --git a/modules/nf-core/lofreq/indelqual/tests/main.nf.test b/modules/nf-core/lofreq/indelqual/tests/main.nf.test new file mode 100644 index 00000000..3771f22f --- /dev/null +++ b/modules/nf-core/lofreq/indelqual/tests/main.nf.test @@ -0,0 +1,71 @@ +nextflow_process { + + name "Test Process LOFREQ_INDELQUAL" + script "../main.nf" + process "LOFREQ_INDELQUAL" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "lofreq" + tag "lofreq/indelqual" + + test("sarscov2 - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + def bam = file(process.out.bam.get(0).get(1)) + assert bam.exists() + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/lofreq/indelqual/tests/main.nf.test.snap b/modules/nf-core/lofreq/indelqual/tests/main.nf.test.snap new file mode 100644 index 00000000..4090efd3 --- /dev/null +++ b/modules/nf-core/lofreq/indelqual/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.indelqual.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,822d596299afbb3ae6db66bfa9b77b3e" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.indelqual.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,822d596299afbb3ae6db66bfa9b77b3e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-19T13:31:30.184343961" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,822d596299afbb3ae6db66bfa9b77b3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-19T13:30:40.688426508" + } +} \ No newline at end of file diff --git a/modules/nf-core/lofreq/indelqual/tests/nextflow.config b/modules/nf-core/lofreq/indelqual/tests/nextflow.config new file mode 100644 index 00000000..b2eaec18 --- /dev/null +++ b/modules/nf-core/lofreq/indelqual/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: LOFREQ_INDELQUAL { + ext.args = '--dindel' + ext.prefix = { "${meta.id}.indelqual" } + } +} diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..7019a72e --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..db8df0d5 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,71 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file +output: + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..ca34fb5c --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..72d65e81 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 00000000..631b2025 --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,63 @@ +process SAMTOOLS_MERGE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input_files, stageAs: "?/*") + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + $input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 00000000..99b740b1 --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,113 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - gzi: + type: file + description: Index of the compressed reference file the CRAM was created with (optional) + pattern: "*.gzi" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: CRAM file + pattern: "*.{cram}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config new file mode 100644 index 00000000..8c5668cf --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 00000000..50d2a3ff --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,181 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + + test("bams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, + { assert snapshot(process.out.cram).match("bams_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, + { assert snapshot(process.out.crai).match("bams_crai") }, + { assert snapshot(process.out.versions).match("bams_versions") } + ) + } + } + + test("crams_fastq") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_fastq_cram") }, + { assert snapshot(process.out.bam).match("crams_fastq_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_fastq_crai") }, + { assert snapshot(process.out.csi).match("crams_fastq_csi") }, + { assert snapshot(process.out.versions).match("crams_fastq_versions") } + ) + } + } + + test("crams_fastq_gz") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz.fai', checkIfExists: true) + ]) + input[3] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_fastq_gz_cram") }, + { assert snapshot(process.out.bam).match("crams_fastq_gz_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_fastq_gz_crai") }, + { assert snapshot(process.out.csi).match("crams_fastq_gz_csi") }, + { assert snapshot(process.out.versions).match("crams_fastq_gz_versions") } + ) + } + } + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bams_stub") { + + config "./index.config" + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, + { assert snapshot(process.out.cram).match("bams_stub_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, + { assert snapshot(process.out.crai).match("bams_stub_crai") }, + { assert snapshot(process.out.versions).match("bams_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..0b837cab --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,284 @@ +{ + "bams_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:40:44.797676421" + }, + "bams_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:40:44.783173052" + }, + "bams_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:40:44.792460597" + }, + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:41:55.974418321" + }, + "crams_fastq_gz_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:33.529727229" + }, + "crams_fastq_gz_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:33.513173841" + }, + "crams_fastq_gz_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:33.539712108" + }, + "crams_fastq_gz_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:33.545307164" + }, + "bams_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:40:44.800270761" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:41:55.996124261" + }, + "crams_fastq_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:07.563858152" + }, + "bams_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:42:10.763406881" + }, + "crams_fastq_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:07.547955833" + }, + "crams_fastq_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:07.532545756" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:41:56.002249949" + }, + "crams_fastq_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:07.554334614" + }, + "bams_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:40:44.795206814" + }, + "bams_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:42:10.770631442" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:41:55.989829888" + }, + "bams_stub_versions": { + "content": [ + [ + "versions.yml:md5,d51d18a97513e370e43f0c891c51dfc4" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:42:10.784506816" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:41:55.983466113" + }, + "crams_fastq_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:07.559375147" + }, + "bams_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:42:10.752478012" + }, + "bams_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:42:10.777646961" + }, + "crams_fastq_gz_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-05-21T16:50:33.523135774" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 00000000..4443948b --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 00000000..77b020f7 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,66 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" +output: + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.stats": + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 00000000..5bc89309 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..df507be7 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,a27fe55e49a341f92379bb20a65c6a06" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:16.767396182" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:29.721580274" + }, + "cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:29:53.567964304" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d53a2584376d78942839e9933a34d11b" + ] + ], + "1": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d53a2584376d78942839e9933a34d11b" + ] + ], + "versions": [ + "versions.yml:md5,15b91d8c0e0440332e0fe4df80957043" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:28:50.73610604" + } +} \ No newline at end of file diff --git a/modules/nf-core/snpdists/environment.yml b/modules/nf-core/snpdists/environment.yml new file mode 100644 index 00000000..6ea1558b --- /dev/null +++ b/modules/nf-core/snpdists/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::snp-dists=0.8.2 diff --git a/modules/nf-core/snpdists/main.nf b/modules/nf-core/snpdists/main.nf new file mode 100644 index 00000000..9e8e2b60 --- /dev/null +++ b/modules/nf-core/snpdists/main.nf @@ -0,0 +1,33 @@ +process SNPDISTS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snp-dists:0.8.2--h5bf99c6_0' : + 'biocontainers/snp-dists:0.8.2--h5bf99c6_0' }" + + input: + tuple val(meta), path(alignment) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + snp-dists \\ + $args \\ + $alignment > ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpdists: \$(snp-dists -v 2>&1 | sed 's/snp-dists //;') + END_VERSIONS + """ +} diff --git a/modules/nf-core/snpdists/meta.yml b/modules/nf-core/snpdists/meta.yml new file mode 100644 index 00000000..612638f9 --- /dev/null +++ b/modules/nf-core/snpdists/meta.yml @@ -0,0 +1,45 @@ +name: snpdists +description: Pairwise SNP distance matrix from a FASTA sequence alignment +keywords: + - snp + - dist + - distance + - matrix +tools: + - snpdists: + description: Convert a FASTA alignment to SNP distance matrix + homepage: https://github.com/tseemann/snp-dists + documentation: https://github.com/tseemann/snp-dists + tool_dev_url: https://github.com/tseemann/snp-dists + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - alignment: + type: file + description: The input FASTA sequence alignment file + pattern: "*.{fasta,fasta.gz}" +output: + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: The output TSV file containing SNP distance matrix + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/nf-core/snpdists/tests/main.nf.test b/modules/nf-core/snpdists/tests/main.nf.test new file mode 100644 index 00000000..f1d7c7f6 --- /dev/null +++ b/modules/nf-core/snpdists/tests/main.nf.test @@ -0,0 +1,32 @@ + +nextflow_process { + + name "Test Process SNPDISTS" + script "../main.nf" + process "SNPDISTS" + + tag "modules" + tag "modules_nfcore" + tag "snpdists" + + test("test-snpdists") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/informative_sites.fas', checkIfExists: true) ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/snpdists/tests/main.nf.test.snap b/modules/nf-core/snpdists/tests/main.nf.test.snap new file mode 100644 index 00000000..b38ede9d --- /dev/null +++ b/modules/nf-core/snpdists/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "test-snpdists": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,0018e5ec43990eb16abe2411fff4e47e" + ] + ], + "1": [ + "versions.yml:md5,62490c6d1f42c5d2eebeea9fcf78f619" + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,0018e5ec43990eb16abe2411fff4e47e" + ] + ], + "versions": [ + "versions.yml:md5,62490c6d1f42c5d2eebeea9fcf78f619" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-28T17:00:19.004853" + } +} \ No newline at end of file diff --git a/modules/nf-core/snpeff/download/environment.yml b/modules/nf-core/snpeff/download/environment.yml new file mode 100644 index 00000000..48236b6f --- /dev/null +++ b/modules/nf-core/snpeff/download/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::snpeff=5.1 diff --git a/modules/nf-core/snpeff/download/main.nf b/modules/nf-core/snpeff/download/main.nf new file mode 100644 index 00000000..e4225933 --- /dev/null +++ b/modules/nf-core/snpeff/download/main.nf @@ -0,0 +1,54 @@ +process SNPEFF_DOWNLOAD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'biocontainers/snpeff:5.1--hdfd78af_2' }" + + input: + tuple val(meta), val(snpeff_db) + + output: + tuple val(meta), path('snpeff_cache'), emit: cache + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def avail_mem = 6144 + if (!task.memory) { + log.info '[snpEff] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + snpEff \\ + -Xmx${avail_mem}M \\ + download ${snpeff_db} \\ + -dataDir \${PWD}/snpeff_cache \\ + ${args} + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ + + stub: + """ + mkdir -p snpeff_cache/${snpeff_db} + + touch snpeff_cache/${snpeff_db}/sequence.I.bin + touch snpeff_cache/${snpeff_db}/sequence.bin + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ +} diff --git a/modules/nf-core/snpeff/download/meta.yml b/modules/nf-core/snpeff/download/meta.yml new file mode 100644 index 00000000..a3211fc7 --- /dev/null +++ b/modules/nf-core/snpeff/download/meta.yml @@ -0,0 +1,49 @@ +name: snpeff_download +description: Genetic variant annotation and functional effect prediction toolbox +keywords: + - annotation + - effect prediction + - snpeff + - variant + - vcf +tools: + - snpeff: + description: | + SnpEff is a variant annotation and effect prediction tool. + It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + homepage: https://pcingola.github.io/SnpEff/ + documentation: https://pcingola.github.io/SnpEff/se_introduction/ + licence: ["MIT"] + identifier: biotools:snpeff +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genome: + type: file + description: Reference genome in FASTA format + pattern: "*.{fasta,fna,fa}" + - cache_version: + type: string + description: Version of the snpEff cache to download +output: + - cache: + - meta: + type: file + description: | + snpEff cache + - snpeff_cache: + type: file + description: | + snpEff cache + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/snpeff/download/tests/main.nf.test b/modules/nf-core/snpeff/download/tests/main.nf.test new file mode 100644 index 00000000..ef547c6f --- /dev/null +++ b/modules/nf-core/snpeff/download/tests/main.nf.test @@ -0,0 +1,51 @@ + +nextflow_process { + + name "Test Process SNPEFF_DOWNLOAD" + script "../main.nf" + process "SNPEFF_DOWNLOAD" + + tag "modules" + tag "modules_nfcore" + tag "snpeff" + tag "snpeff/download" + + test("test-snpeff-download") { + + when { + process { + """ + input[0] = [ [ id:"WBcel235.105" ], "WBcel235.105" ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-snpeff-download-stub") { + options '-stub' + when { + process { + """ + input[0] = [ [ id:"WBcel235.105" ], "WBcel235.105" ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/snpeff/download/tests/main.nf.test.snap b/modules/nf-core/snpeff/download/tests/main.nf.test.snap new file mode 100644 index 00000000..5bccdd8a --- /dev/null +++ b/modules/nf-core/snpeff/download/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "test-snpeff-download-stub": { + "content": [ + { + "0": [ + [ + { + "id": "WBcel235.105" + }, + [ + [ + "sequence.I.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "sequence.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "1": [ + "versions.yml:md5,5fc7ed9f548eccf5fac9fdefc12ef56e" + ], + "cache": [ + [ + { + "id": "WBcel235.105" + }, + [ + [ + "sequence.I.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "sequence.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "versions": [ + "versions.yml:md5,5fc7ed9f548eccf5fac9fdefc12ef56e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-19T12:48:45.183665736" + }, + "test-snpeff-download": { + "content": [ + { + "0": [ + [ + { + "id": "WBcel235.105" + }, + [ + [ + "sequence.I.bin:md5,2fd1694bd91cf7952cbad8cfed161e53", + "sequence.II.bin:md5,bacedbdea89508e108223767fa260a4c", + "sequence.III.bin:md5,444118a9fb9d0a03c37e86094d8e52a9", + "sequence.IV.bin:md5,ff756628faa0b71cd65495668c3d82b5", + "sequence.V.bin:md5,d6ad5476162ac45829f719dd4ee3f4e7", + "sequence.X.bin:md5,b79bec6cc8f96b8373dac56bab5d0a6c", + "sequence.bin:md5,ec2bc2ae81755ab90fcf1848bc7ce41f", + "snpEffectPredictor.bin:md5,1d99251d0405f0a42913ed8b5b2c2fa7" + ] + ] + ] + ], + "1": [ + "versions.yml:md5,5fc7ed9f548eccf5fac9fdefc12ef56e" + ], + "cache": [ + [ + { + "id": "WBcel235.105" + }, + [ + [ + "sequence.I.bin:md5,2fd1694bd91cf7952cbad8cfed161e53", + "sequence.II.bin:md5,bacedbdea89508e108223767fa260a4c", + "sequence.III.bin:md5,444118a9fb9d0a03c37e86094d8e52a9", + "sequence.IV.bin:md5,ff756628faa0b71cd65495668c3d82b5", + "sequence.V.bin:md5,d6ad5476162ac45829f719dd4ee3f4e7", + "sequence.X.bin:md5,b79bec6cc8f96b8373dac56bab5d0a6c", + "sequence.bin:md5,ec2bc2ae81755ab90fcf1848bc7ce41f", + "snpEffectPredictor.bin:md5,1d99251d0405f0a42913ed8b5b2c2fa7" + ] + ] + ] + ], + "versions": [ + "versions.yml:md5,5fc7ed9f548eccf5fac9fdefc12ef56e" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T14:27:47.123555" + } +} \ No newline at end of file diff --git a/modules/nf-core/snpeff/snpeff/environment.yml b/modules/nf-core/snpeff/snpeff/environment.yml new file mode 100644 index 00000000..48236b6f --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::snpeff=5.1 diff --git a/modules/nf-core/snpeff/snpeff/main.nf b/modules/nf-core/snpeff/snpeff/main.nf new file mode 100644 index 00000000..a59758b6 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/main.nf @@ -0,0 +1,65 @@ +process SNPEFF_SNPEFF { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'biocontainers/snpeff:5.1--hdfd78af_2' }" + + input: + tuple val(meta), path(vcf) + val db + tuple val(meta2), path(cache) + + output: + tuple val(meta), path("*.ann.vcf"), emit: vcf + tuple val(meta), path("*.csv"), emit: report + tuple val(meta), path("*.html"), emit: summary_html + tuple val(meta), path("*.genes.txt"), emit: genes_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def avail_mem = 6144 + if (!task.memory) { + log.info '[snpEff] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + def prefix = task.ext.prefix ?: "${meta.id}" + def cache_command = cache ? "-dataDir \${PWD}/${cache}" : "" + """ + snpEff \\ + -Xmx${avail_mem}M \\ + $db \\ + $args \\ + -csvStats ${prefix}.csv \\ + $cache_command \\ + $vcf \\ + > ${prefix}.ann.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.ann.vcf + touch ${prefix}.csv + touch ${prefix}.html + touch ${prefix}.genes.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/snpeff/snpeff/meta.yml b/modules/nf-core/snpeff/snpeff/meta.yml new file mode 100644 index 00000000..ef3d495a --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/meta.yml @@ -0,0 +1,90 @@ +name: snpeff_snpeff +description: Genetic variant annotation and functional effect prediction toolbox +keywords: + - annotation + - effect prediction + - snpeff + - variant + - vcf +tools: + - snpeff: + description: | + SnpEff is a variant annotation and effect prediction tool. + It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + homepage: https://pcingola.github.io/SnpEff/ + documentation: https://pcingola.github.io/SnpEff/se_introduction/ + licence: ["MIT"] + identifier: biotools:snpeff +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + - - db: + type: string + description: | + which db to annotate with + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cache: + type: file + description: | + path to snpEff cache (optional) +output: + - vcf: + - meta: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + - "*.ann.vcf": + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + - report: + - meta: + type: file + description: snpEff report csv file + pattern: "*.csv" + - "*.csv": + type: file + description: snpEff report csv file + pattern: "*.csv" + - summary_html: + - meta: + type: file + description: snpEff summary statistics in html file + pattern: "*.html" + - "*.html": + type: file + description: snpEff summary statistics in html file + pattern: "*.html" + - genes_txt: + - meta: + type: file + description: txt (tab separated) file having counts of the number of variants + affecting each transcript and gene + pattern: "*.genes.txt" + - "*.genes.txt": + type: file + description: txt (tab separated) file having counts of the number of variants + affecting each transcript and gene + pattern: "*.genes.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/snpeff/snpeff/tests/main.nf.test b/modules/nf-core/snpeff/snpeff/tests/main.nf.test new file mode 100644 index 00000000..661e9672 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SNPEFF_SNPEFF" + script "../main.nf" + process "SNPEFF_SNPEFF" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "modules_snpeff" + tag "snpeff" + tag "snpeff/download" + tag "snpeff/snpeff" + + test("test_SNPEFF_SNPEFF") { + + setup { + run("SNPEFF_DOWNLOAD") { + script "../../download/main.nf" + process { + """ + input[0] = Channel.of([[id:params.snpeff_db], params.snpeff_db]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ]) + input[1] = params.snpeff_db + input[2] = SNPEFF_DOWNLOAD.out.cache + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report[0][1]).exists() }, + { assert path(process.out.summary_html[0][1]).exists() }, + { assert path(process.out.vcf[0][1]).exists() }, + { assert snapshot(process.out.genes_txt).match("genes_txt") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_SNPEFF_SNPEFF - stub") { + + options "-stub" + + setup { + run("SNPEFF_DOWNLOAD") { + script "../../download/main.nf" + process { + """ + input[0] = Channel.of([[id:params.snpeff_db], params.snpeff_db]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ]) + input[1] = params.snpeff_db + input[2] = SNPEFF_DOWNLOAD.out.cache + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/snpeff/snpeff/tests/main.nf.test.snap b/modules/nf-core/snpeff/snpeff/tests/main.nf.test.snap new file mode 100644 index 00000000..9fc0c9f5 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/tests/main.nf.test.snap @@ -0,0 +1,112 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,25d44a118d558b331d51ec00be0d997c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-18T17:37:18.879477" + }, + "genes_txt": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.genes.txt:md5,130536bf0237d7f3f746d32aaa32840a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-18T17:37:18.874822" + }, + "test_SNPEFF_SNPEFF - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.ann.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.genes.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,25d44a118d558b331d51ec00be0d997c" + ], + "genes_txt": [ + [ + { + "id": "test" + }, + "test.genes.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "report": [ + [ + { + "id": "test" + }, + "test.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary_html": [ + [ + { + "id": "test" + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.ann.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,25d44a118d558b331d51ec00be0d997c" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-03-05T14:55:15.626663647" + } +} \ No newline at end of file diff --git a/modules/nf-core/snpeff/snpeff/tests/nextflow.config b/modules/nf-core/snpeff/snpeff/tests/nextflow.config new file mode 100644 index 00000000..a950a047 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + snpeff_db = "WBcel235.105" +} diff --git a/modules/nf-core/snpsites/environment.yml b/modules/nf-core/snpsites/environment.yml new file mode 100644 index 00000000..d6bb02c0 --- /dev/null +++ b/modules/nf-core/snpsites/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::snp-sites=2.5.1 diff --git a/modules/nf-core/snpsites/main.nf b/modules/nf-core/snpsites/main.nf new file mode 100644 index 00000000..c0cc6081 --- /dev/null +++ b/modules/nf-core/snpsites/main.nf @@ -0,0 +1,50 @@ +process SNPSITES { + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snp-sites:2.5.1--hed695b0_0' : + 'biocontainers/snp-sites:2.5.1--hed695b0_0' }" + + input: + path alignment + + output: + path "*.fas" , emit: fasta + path "*.sites.txt" , emit: constant_sites + path "versions.yml" , emit: versions + env CONSTANT_SITES, emit: constant_sites_string + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + snp-sites \\ + $alignment \\ + $args \\ + > filtered_alignment.fas + + echo \$(snp-sites -C $alignment) > constant.sites.txt + + CONSTANT_SITES=\$(cat constant.sites.txt) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpsites: \$(snp-sites -V 2>&1 | sed 's/snp-sites //') + END_VERSIONS + """ + stub: + """ + touch filtered_alignment.fas + touch constant.sites.txt + CONSTANT_SITES=\$(cat constant.sites.txt) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpsites: \$(snp-sites -V 2>&1 | sed 's/snp-sites //') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/snpsites/meta.yml b/modules/nf-core/snpsites/meta.yml new file mode 100644 index 00000000..eafac8e8 --- /dev/null +++ b/modules/nf-core/snpsites/meta.yml @@ -0,0 +1,43 @@ +name: snpsites +description: Rapidly extracts SNPs from a multi-FASTA alignment. +keywords: + - SNPs + - invariant + - constant +tools: + - snpsites: + description: Rapidly extracts SNPs from a multi-FASTA alignment. + homepage: https://www.sanger.ac.uk/tool/snp-sites/ + documentation: https://github.com/sanger-pathogens/snp-sites + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - alignment: + type: file + description: fasta alignment file + pattern: "*.{fasta,fas,fa,aln}" +output: + - fasta: + - "*.fas": + type: file + description: Variant fasta file + pattern: "*.{fas}" + - constant_sites: + - "*.sites.txt": + type: file + description: Text file containing counts of constant sites + pattern: "*.{sites.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - constant_sites_string: + - CONSTANT_SITES: + type: integer + description: Value with the number of constant sites + pattern: "*.{sites.txt}" +authors: + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/modules/nf-core/snpsites/tests/main.nf.test b/modules/nf-core/snpsites/tests/main.nf.test new file mode 100644 index 00000000..60ae4b9a --- /dev/null +++ b/modules/nf-core/snpsites/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process SNPSITES" + script "../main.nf" + process "SNPSITES" + + tag "modules" + tag "modules_nfcore" + tag "snpsites" + + test("sarscov2 - all_sites_fas") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/all_sites.fas', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - all_sites_fas - stub") { + options "-stub" + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/all_sites.fas', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/snpsites/tests/main.nf.test.snap b/modules/nf-core/snpsites/tests/main.nf.test.snap new file mode 100644 index 00000000..71522e3b --- /dev/null +++ b/modules/nf-core/snpsites/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - all_sites_fas - stub": { + "content": [ + { + "0": [ + "filtered_alignment.fas:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "constant.sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "versions.yml:md5,af6942a02036a7ff2eadb6ecf344c619" + ], + "3": [ + "" + ], + "constant_sites": [ + "constant.sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "constant_sites_string": [ + "" + ], + "fasta": [ + "filtered_alignment.fas:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,af6942a02036a7ff2eadb6ecf344c619" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T15:20:57.589176616" + }, + "sarscov2 - all_sites_fas": { + "content": [ + { + "0": [ + "filtered_alignment.fas:md5,f96c7513003e878e16fa9eac9fcda0f4" + ], + "1": [ + "constant.sites.txt:md5,8b9b226e3787f7baaefce07405af22c9" + ], + "2": [ + "versions.yml:md5,af6942a02036a7ff2eadb6ecf344c619" + ], + "3": [ + "8789,5363,5747,9456" + ], + "constant_sites": [ + "constant.sites.txt:md5,8b9b226e3787f7baaefce07405af22c9" + ], + "constant_sites_string": [ + "8789,5363,5747,9456" + ], + "fasta": [ + "filtered_alignment.fas:md5,f96c7513003e878e16fa9eac9fcda0f4" + ], + "versions": [ + "versions.yml:md5,af6942a02036a7ff2eadb6ecf344c619" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T15:15:41.760289537" + } +} \ No newline at end of file diff --git a/modules/nf-core/tbprofiler/profile/environment.yml b/modules/nf-core/tbprofiler/profile/environment.yml new file mode 100644 index 00000000..04b40ecd --- /dev/null +++ b/modules/nf-core/tbprofiler/profile/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::tb-profiler=3.0.8 diff --git a/modules/nf-core/tbprofiler/profile/main.nf b/modules/nf-core/tbprofiler/profile/main.nf new file mode 100644 index 00000000..4d5f432a --- /dev/null +++ b/modules/nf-core/tbprofiler/profile/main.nf @@ -0,0 +1,41 @@ +process TBPROFILER_PROFILE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tb-profiler:3.0.8--pypyh5e36f6f_0' : + 'biocontainers/tb-profiler:3.0.8--pypyh5e36f6f_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("bam/*.bam") , emit: bam + tuple val(meta), path("results/*.csv") , emit: csv, optional: true + tuple val(meta), path("results/*.json"), emit: json + tuple val(meta), path("results/*.txt") , emit: txt, optional: true + tuple val(meta), path("vcf/*.vcf.gz") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def input_reads = meta.single_end ? "--read1 $reads" : "--read1 ${reads[0]} --read2 ${reads[1]}" + """ + tb-profiler \\ + profile \\ + $args \\ + --prefix ${prefix} \\ + --threads $task.cpus \\ + $input_reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tbprofiler: \$( echo \$(tb-profiler --version 2>&1) | sed 's/TBProfiler version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tbprofiler/profile/meta.yml b/modules/nf-core/tbprofiler/profile/meta.yml new file mode 100644 index 00000000..dd0e0b4a --- /dev/null +++ b/modules/nf-core/tbprofiler/profile/meta.yml @@ -0,0 +1,86 @@ +name: tbprofiler_profile +description: A tool to detect resistance and lineages of M. tuberculosis genomes +keywords: + - Mycobacterium tuberculosis + - resistance + - serotype +tools: + - tbprofiler: + description: Profiling tool for Mycobacterium tuberculosis to detect drug resistance + and lineage from WGS data + homepage: https://github.com/jodyphelan/TBProfiler + documentation: https://jodyphelan.gitbook.io/tb-profiler/ + tool_dev_url: https://github.com/jodyphelan/TBProfiler + doi: "10.1186/s13073-019-0650-x" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq.gz,fq.gz}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam/*.bam: + type: file + description: BAM file with alignment details + pattern: "*.bam" + - csv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results/*.csv: + type: file + description: Optional CSV formatted result file of resistance and strain type + pattern: "*.csv" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results/*.json: + type: file + description: JSON formatted result file of resistance and strain type + pattern: "*.json" + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results/*.txt: + type: file + description: Optional text file of resistance and strain type + pattern: "*.txt" + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf/*.vcf.gz: + type: file + description: VCF with variant info again reference genomes + pattern: "*.vcf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/tbprofiler/profile/tests/main.nf.test b/modules/nf-core/tbprofiler/profile/tests/main.nf.test new file mode 100644 index 00000000..6bd9c8b8 --- /dev/null +++ b/modules/nf-core/tbprofiler/profile/tests/main.nf.test @@ -0,0 +1,78 @@ + +nextflow_process { + + name "Test Process TBPROFILER_PROFILE" + script "../main.nf" + process "TBPROFILER_PROFILE" + + tag "modules" + tag "modules_nfcore" + tag "tbprofiler" + tag "tbprofiler/profile" + + test("test-tbprofiler-profile-illumina") { + config "./nextflow.illumina.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.csv, + file(process.out.json[0][1]).name, // unstable + process.out.txt, + path(process.out.vcf[0][1]).vcf.summary, + process.out.versions + ).match() + } + ) + } + } + + test("test-tbprofiler-profile-nanopore") { + config "./nextflow.nanopore.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.csv, + file(process.out.json[0][1]).name, // unstable + process.out.txt, + path(process.out.vcf[0][1]).vcf.summary, + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/tbprofiler/profile/tests/main.nf.test.snap b/modules/nf-core/tbprofiler/profile/tests/main.nf.test.snap new file mode 100644 index 00000000..8c59c20a --- /dev/null +++ b/modules/nf-core/tbprofiler/profile/tests/main.nf.test.snap @@ -0,0 +1,44 @@ +{ + "test-tbprofiler-profile-illumina": { + "content": [ + "643637f42d74111b557bded525fecf95", + [ + + ], + "test.results.json", + [ + + ], + "VcfFile [chromosomes=[], sampleCount=1, variantCount=0, phased=true, phasedAutodetect=true]", + [ + "versions.yml:md5,7e204ef6bca3bc02ae72295e980040c2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T13:48:51.627166" + }, + "test-tbprofiler-profile-nanopore": { + "content": [ + "46da99abc50c7fa4a7fbbf853dca9e11", + [ + + ], + "test.results.json", + [ + + ], + "VcfFile [chromosomes=[], sampleCount=1, variantCount=0, phased=true, phasedAutodetect=true]", + [ + "versions.yml:md5,7e204ef6bca3bc02ae72295e980040c2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T13:49:17.330177" + } +} \ No newline at end of file diff --git a/modules/nf-core/tbprofiler/profile/tests/nextflow.illumina.config b/modules/nf-core/tbprofiler/profile/tests/nextflow.illumina.config new file mode 100644 index 00000000..d941b4bb --- /dev/null +++ b/modules/nf-core/tbprofiler/profile/tests/nextflow.illumina.config @@ -0,0 +1,5 @@ +process { + withName: TBPROFILER_PROFILE { + ext.args = '--platform illumina' + } +} diff --git a/modules/nf-core/tbprofiler/profile/tests/nextflow.nanopore.config b/modules/nf-core/tbprofiler/profile/tests/nextflow.nanopore.config new file mode 100644 index 00000000..9316e104 --- /dev/null +++ b/modules/nf-core/tbprofiler/profile/tests/nextflow.nanopore.config @@ -0,0 +1,5 @@ +process { + withName: TBPROFILER_PROFILE { + ext.args = '--platform nanopore' + } +}