diff --git a/resources/analysisTools/indelCallingWorkflow/annotate_with_VEP.sh b/resources/analysisTools/indelCallingWorkflow/annotate_with_VEP.sh new file mode 100755 index 0000000..1539ede --- /dev/null +++ b/resources/analysisTools/indelCallingWorkflow/annotate_with_VEP.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +## Annotate the variants with VEP + +## To run +## LOCAL: sh annotate_variant_with_VEP.sh snvs_{pid}_somatic_snvs_conf_8_to_10.vcf snvs_{pid}_somatic_snvs_conf_8_to_10.VEP.vcf + +vep_species="homo_sapiens" +vep_assembly="GRCh37" +vep_out_format="vcf" + +input_vcf=${1} +output_vcf=${2} +threads=${VEP_FORKS} + +## Annotate the high confidence variants +## Parse for the functional consequences +${PERL_BINARY} ${VEP_SW_PATH} \ + --input_file $input_vcf \ + --species $vep_species \ + --assembly $vep_assembly \ + --output_file STDOUT \ + --format $vep_out_format \ + --fork $threads \ + --fasta ${VEP_FA_INDEX} \ + --everything \ + --vcf \ + --cache \ + --offline \ + --force_overwrite \ + --no_stats \ + --dir_cache ${VEP_CACHE_BASE} \ + | ${PYTHON_BINARY} ${TOOL_PARSE_VEP} | ${BGZIP_BINARY} -f > $output_vcf \ No newline at end of file diff --git a/resources/analysisTools/indelCallingWorkflow/filter_vcf.sh b/resources/analysisTools/indelCallingWorkflow/filter_vcf.sh index cc8f8c6..4bce1bc 100755 --- a/resources/analysisTools/indelCallingWorkflow/filter_vcf.sh +++ b/resources/analysisTools/indelCallingWorkflow/filter_vcf.sh @@ -22,6 +22,16 @@ source ${TOOL_ANALYZE_BAM_HEADER} getRefGenomeAndChrPrefixFromHeader ${FILENAME_TUMOR_BAM} # Sets CHR_PREFIX and REFERENCE_GENOME +########################################## VEP annotation ####################################### +## Run VEP on the somatic high confidence SNVs +${TOOL_ANNOTATE_VEP} ${FILENAME_VCF} ${FILENAME_VCF}.tmp +[[ "$?" != 0 ]] && echo "There was a non-zero exit code in VEP annotation" && exit 8 + +# Overwrite the original VCF file with the VEP annotated one +# NOTE: If there is an error here, one has to rerun the Annotation and DeepAnnotation steps +mv ${FILENAME_VCF}.tmp ${FILENAME_VCF} && ${TABIX_BINARY} -f -p vcf ${FILENAME_VCF} +[[ "$?" != 0 ]] && echo "There was a non-zero exit code in VEP annotation" && exit 9 + ########################################## Filter ############################################### outputFilenamePrefix=${FILENAME_VCF%.vcf.gz} diff --git a/resources/analysisTools/indelCallingWorkflow/indelCalling.sh b/resources/analysisTools/indelCallingWorkflow/indelCalling.sh index 3f62350..e8c6eef 100755 --- a/resources/analysisTools/indelCallingWorkflow/indelCalling.sh +++ b/resources/analysisTools/indelCallingWorkflow/indelCalling.sh @@ -49,7 +49,7 @@ ${PLATYPUS_BINARY} callVariants \ --verbosity=1 \ --bufferSize=${PLATYPUS_BUFFER_SIZE} \ --maxReads=${PLATYPUS_MAX_READS} \ - --minFlank=0 \ + --minFlank=0 \ ${PLATYPUS_PARAMS} [[ $? -gt 0 ]] && echo "Error during platypus indel calling." && exit 1 diff --git a/resources/analysisTools/indelCallingWorkflow/parse_VEP_annotations.py b/resources/analysisTools/indelCallingWorkflow/parse_VEP_annotations.py new file mode 100755 index 0000000..642347b --- /dev/null +++ b/resources/analysisTools/indelCallingWorkflow/parse_VEP_annotations.py @@ -0,0 +1,147 @@ +""" +parse_VEP_annotations.py + +This script parses VEP annotations from the input and writes the parsed output to STDOUT. +It contains functions to parse VEP format, gene consequences and HGVSc annotations, and format transcript information. + +Usage: + cat VEP_annotated.vcf | python parse_VEP_annotations.py > VEP_annotated_parsed.vcf + +""" + +import sys + +def parse_vep_annotations(): + """ + Parses VEP annotations from the input and writes the parsed output to STDOUT. + + This function reads input from `sys.stdin` line by line and processes each line. + If a line starts with "#" and contains "##INFO= 0: + transcript_info = "" + for gene in gene_consequence_hgvsc_ds: + for consequence in gene_consequence_hgvsc_ds[gene]: + transcript_info += "{0}|{1}({2});".format(gene, consequence, ','.join(gene_consequence_hgvsc_ds[gene][consequence])) + line = line + "\t" + transcript_info + "\n" + else: + line = line + "\t.\n" + return line + + +if __name__ == "__main__": + + # Parse VEP annotations and write to STDOUT + parse_vep_annotations() diff --git a/resources/configurationFiles/analysisIndelCalling.xml b/resources/configurationFiles/analysisIndelCalling.xml index 268bbf7..f02c426 100755 --- a/resources/configurationFiles/analysisIndelCalling.xml +++ b/resources/configurationFiles/analysisIndelCalling.xml @@ -160,6 +160,12 @@ --> + + + + + + @@ -270,10 +276,10 @@ - - - - + + + +