[Swift-commit] r6881 - in SwiftApps: . gatk
ketan at ci.uchicago.edu
ketan at ci.uchicago.edu
Mon Aug 19 12:03:40 CDT 2013
Author: ketan
Date: 2013-08-19 12:03:40 -0500 (Mon, 19 Aug 2013)
New Revision: 6881
Added:
SwiftApps/gatk/
SwiftApps/gatk/gatk.swift
Log:
gatk swift skeleton
Added: SwiftApps/gatk/gatk.swift
===================================================================
--- SwiftApps/gatk/gatk.swift (rev 0)
+++ SwiftApps/gatk/gatk.swift 2013-08-19 17:03:40 UTC (rev 6881)
@@ -0,0 +1,735 @@
+
+app () bwa (){
+
+ bwa_wrapper.py
+ --threads="4"
+
+ #if $input1.ext == "fastqillumina":
+ --illumina1.3
+ #end if
+
+ ## reference source
+ --fileSource=$genomeSource.refGenomeSource
+ #if $genomeSource.refGenomeSource == "history":
+ ##build index on the fly
+ --ref="${genomeSource.ownFile}"
+ --dbkey=$dbkey
+ #else:
+ ##use precomputed indexes
+ --ref="${ filter( lambda x: str( x[0] ) == str( $genomeSource.indices ), $__app__.tool_data_tables[ 'bwa_indexes' ].get_fields() )[0][-1] }"
+ --do_not_build_index
+ #end if
+
+ ## input file(s)
+ --input1=$paired.input1
+ #if $paired.sPaired == "paired":
+ --input2=$paired.input2
+ #end if
+
+ ## output file
+ --output=$output
+
+ ## run parameters
+ --genAlignType=$paired.sPaired
+ --params=$params.source_select
+ #if $params.source_select != "pre_set":
+ --maxEditDist=$params.maxEditDist
+ --fracMissingAligns=$params.fracMissingAligns
+ --maxGapOpens=$params.maxGapOpens
+ --maxGapExtens=$params.maxGapExtens
+ --disallowLongDel=$params.disallowLongDel
+ --disallowIndel=$params.disallowIndel
+ --seed=$params.seed
+ --maxEditDistSeed=$params.maxEditDistSeed
+ --mismatchPenalty=$params.mismatchPenalty
+ --gapOpenPenalty=$params.gapOpenPenalty
+ --gapExtensPenalty=$params.gapExtensPenalty
+ --suboptAlign=$params.suboptAlign
+ --noIterSearch=$params.noIterSearch
+ --outputTopN=$params.outputTopN
+ --outputTopNDisc=$params.outputTopNDisc
+ --maxInsertSize=$params.maxInsertSize
+ --maxOccurPairing=$params.maxOccurPairing
+ #if $params.readGroup.specReadGroup == "yes"
+ --rgid="$params.readGroup.rgid"
+ --rgcn="$params.readGroup.rgcn"
+ --rgds="$params.readGroup.rgds"
+ --rgdt="$params.readGroup.rgdt"
+ --rgfo="$params.readGroup.rgfo"
+ --rgks="$params.readGroup.rgks"
+ --rglb="$params.readGroup.rglb"
+ --rgpg="$params.readGroup.rgpg"
+ --rgpi="$params.readGroup.rgpi"
+ --rgpl="$params.readGroup.rgpl"
+ --rgpu="$params.readGroup.rgpu"
+ --rgsm="$params.readGroup.rgsm"
+ #end if
+ #end if
+
+ ## suppress output SAM header
+ --suppressHeader=$suppressHeader
+}
+
+app () addorrep (){
+
+ picard_wrapper.py
+ --input="$inputFile"
+ --rg-lb="$rglb"
+ --rg-pl="$rgpl"
+ --rg-pu="$rgpu"
+ --rg-sm="$rgsm"
+ --rg-id="$rgid"
+ --rg-opts=${readGroupOpts.rgOpts}
+ #if $readGroupOpts.rgOpts == "full"
+ --rg-cn="$readGroupOpts.rgcn"
+ --rg-ds="$readGroupOpts.rgds"
+ #end if
+ --output-format=$outputFormat
+ --output=$outFile
+ -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/AddOrReplaceReadGroups.jar"
+}
+
+
+app () markdup (){
+card_wrapper.py
+ --input="$input_file"
+ --remove-dups="$remDups"
+ --read-regex="$readRegex"
+ --opt-dup-dist="$optDupeDist"
+ --output-format=$outputFormat
+ --output-txt=$outMetrics
+ #if str( $outputFormat ) == "sam"
+ #if str( $remDups ) == "true"
+ --output-sam=$outFileSamRemoved
+ #else
+ --output-sam=$outFileSamMarked
+ #end if
+ #else if str( $outputFormat ) == "bam"
+ #if str( $remDups ) == "true"
+ --output-sam=$outFileBamRemoved
+ #else
+ --output-sam=$outFileBamMarked
+ #end if
+ #end if
+ -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/"
+ --picard-cmd="MarkDuplicates"
+}
+
+app () reordersambam (){
+ picard_wrapper.py
+ --input=$inputFile
+ #if $source.indexSource == "built-in"
+ --ref="${ filter( lambda x: str( x[0] ) == str( $source.ref ), $__app__.tool_data_tables[ 'picard_indexes' ].get_fields() )[0][-1] }"
+ #else
+ --ref-file=$refFile
+ --species-name=$source.speciesName
+ --build-name=$source.buildName
+ --trunc-names=$source.truncateSeqNames
+ #end if
+ --allow-inc-dict-concord=$allowIncDictConcord
+ --allow-contig-len-discord=$allowContigLenDiscord
+ --output-format=$outputFormat
+ --output=$outFile
+ -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/ReorderSam.jar"
+}
+
+app () realignertargetreator (){
+ gatk_wrapper.py
+ --max_jvm_heap_fraction "1"
+ --stdout "${output_log}"
+ -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
+ -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
+ -p 'java
+ -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
+ -T "RealignerTargetCreator"
+ -o "${output_interval}"
+ -et "NO_ET" ##ET no phone home
+ --num_threads 4 ##hard coded, for now
+ ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
+ #if $reference_source.reference_source_selector != "history":
+ -R "${reference_source.ref_file.fields.path}"
+ #end if
+ '
+ #set $rod_binding_names = dict()
+ #for $rod_binding in $rod_bind:
+ #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
+ #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
+ #else
+ #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
+ #end if
+ #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
+ -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
+ #end for
+
+ ##start standard gatk options
+ #if $gatk_param_type.gatk_param_type_selector == "advanced":
+ #for $pedigree in $gatk_param_type.pedigree:
+ -p '--pedigree "${pedigree.pedigree_file}"'
+ #end for
+ #for $pedigree_string in $gatk_param_type.pedigree_string_repeat:
+ -p '--pedigreeString "${pedigree_string.pedigree_string}"'
+ #end for
+ -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"'
+ #for $read_filter in $gatk_param_type.read_filter:
+ -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
+ ###raise Exception( str( dir( $read_filter ) ) )
+ #for $name, $param in $read_filter.read_filter_type.iteritems():
+ #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
+ #if hasattr( $param.input, 'truevalue' ):
+ ${param}
+ #else:
+ --${name} "${param}"
+ #end if
+ #end if
+ #end for
+ '
+ #end for
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ):
+ -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}"
+ #end for
+
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ):
+ -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}"
+ #end for
+
+ -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"'
+
+ -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
+ #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
+ -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
+ #end if
+ -p '
+ --baq "${gatk_param_type.baq}"
+ --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
+ ${gatk_param_type.use_original_qualities}
+ --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
+ --validation_strictness "${gatk_param_type.validation_strictness}"
+ --interval_merging "${gatk_param_type.interval_merging}"
+ ${gatk_param_type.disable_experimental_low_memory_sharding}
+ ${gatk_param_type.non_deterministic_random_seed}
+ '
+ #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ):
+ #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file":
+ -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}"
+ #else
+ -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"'
+ #end if
+ #end for
+ #end if
+
+ #if $reference_source.reference_source_selector == "history":
+ -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
+ #end if
+ ##end standard gatk options
+ ##start analysis specific options
+ #if $analysis_param_type.analysis_param_type_selector == "advanced":
+ -p '
+ --minReadsAtLocus "${analysis_param_type.minReadsAtLocus}"
+ --windowSize "${analysis_param_type.windowSize}"
+ --mismatchFraction "${analysis_param_type.mismatchFraction}"
+ --maxIntervalSize "${analysis_param_type.maxIntervalSize}"
+ '
+ #end if
+}
+
+app () indelrealigner (){
+ gatk_wrapper.py
+ --max_jvm_heap_fraction "1"
+ --stdout "${output_log}"
+ -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
+ -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
+ -p 'java
+ -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
+ -T "IndelRealigner"
+ -o "${output_bam}"
+ -et "NO_ET" ##ET no phone home
+ ##--num_threads 4 ##hard coded, for now
+ ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
+ #if $reference_source.reference_source_selector != "history":
+ -R "${reference_source.ref_file.fields.path}"
+ #end if
+ -LOD "${lod_threshold}"
+ ${knowns_only}
+ '
+
+ #set $rod_binding_names = dict()
+ #for $rod_binding in $rod_bind:
+ #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
+ #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
+ #else
+ #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
+ #end if
+ #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
+ -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
+ #end for
+
+ ##start standard gatk options
+ #if $gatk_param_type.gatk_param_type_selector == "advanced":
+ #for $pedigree in $gatk_param_type.pedigree:
+ -p '--pedigree "${pedigree.pedigree_file}"'
+ #end for
+ #for $pedigree_string in $gatk_param_type.pedigree_string_repeat:
+ -p '--pedigreeString "${pedigree_string.pedigree_string}"'
+ #end for
+ -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"'
+ #for $read_filter in $gatk_param_type.read_filter:
+ -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
+ ###raise Exception( str( dir( $read_filter ) ) )
+ #for $name, $param in $read_filter.read_filter_type.iteritems():
+ #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
+ #if hasattr( $param.input, 'truevalue' ):
+ ${param}
+ #else:
+ --${name} "${param}"
+ #end if
+ #end if
+ #end for
+ '
+ #end for
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ):
+ -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}"
+ #end for
+
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ):
+ -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}"
+ #end for
+
+ -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"'
+
+ -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
+ #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
+ -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
+ #end if
+ -p '
+ --baq "${gatk_param_type.baq}"
+ --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
+ ${gatk_param_type.use_original_qualities}
+ --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
+ --validation_strictness "${gatk_param_type.validation_strictness}"
+ --interval_merging "${gatk_param_type.interval_merging}"
+ ${gatk_param_type.disable_experimental_low_memory_sharding}
+ ${gatk_param_type.non_deterministic_random_seed}
+ '
+ #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ):
+ #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file":
+ -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}"
+ #else
+ -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"'
+ #end if
+ #end for
+ #end if
+ #if $reference_source.reference_source_selector == "history":
+ -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
+ #end if
+ ##end standard gatk options
+ ##start analysis specific options
+ -d "-targetIntervals" "${target_intervals}" "${target_intervals.ext}" "gatk_target_intervals"
+ -p '
+ --disable_bam_indexing
+ '
+ #if $analysis_param_type.analysis_param_type_selector == "advanced":
+ -p '
+ --entropyThreshold "${analysis_param_type.entropy_threshold}"
+ ${analysis_param_type.simplify_bam}
+ --consensusDeterminationModel "${analysis_param_type.consensus_determination_model}"
+ --maxIsizeForMovement "${analysis_param_type.max_insert_size_for_movement}"
+ --maxPositionalMoveAllowed "${analysis_param_type.max_positional_move_allowed}"
+ --maxConsensuses "${analysis_param_type.max_consensuses}"
+ --maxReadsForConsensuses "${analysis_param_type.max_reads_for_consensuses}"
+ --maxReadsForRealignment "${analysis_param_type.max_reads_for_realignment}"
+ ${analysis_param_type.no_original_alignment_tags}
+ '
+ #end if
+}
+
+app () countcovariates (){
+ gatk_wrapper.py
+ --max_jvm_heap_fraction "1"
+ --stdout "${output_log}"
+ -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
+ -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
+ -p 'java
+ -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
+ -T "CountCovariates"
+ --num_threads 4 ##hard coded, for now
+ -et "NO_ET" ##ET no phone home
+ ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
+ #if $reference_source.reference_source_selector != "history":
+ -R "${reference_source.ref_file.fields.path}"
+ #end if
+ --recal_file "${output_recal}"
+ ${standard_covs}
+ #if str( $covariates ) != "None":
+ #for $cov in str( $covariates ).split( ',' ):
+ -cov "${cov}"
+ #end for
+ #end if
+ '
+
+ #set $snp_dataset_provided = False
+ #set $rod_binding_names = dict()
+ #for $rod_binding in $rod_bind:
+ #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
+ #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
+ #else
+ #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
+ #end if
+ #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'dbsnp':
+ #set $snp_dataset_provided = True
+ #end if
+ #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
+ -d "--knownSites:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
+ #end for
+
+ ##start standard gatk options
+ #if $gatk_param_type.gatk_param_type_selector == "advanced":
+ #for $pedigree in $gatk_param_type.pedigree:
+ -p '--pedigree "${pedigree.pedigree_file}"'
+ #end for
+ #for $pedigree_string in $gatk_param_type.pedigree_string_repeat:
+ -p '--pedigreeString "${pedigree_string.pedigree_string}"'
+ #end for
+ -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"'
+ #for $read_filter in $gatk_param_type.read_filter:
+ -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
+ ###raise Exception( str( dir( $read_filter ) ) )
+ #for $name, $param in $read_filter.read_filter_type.iteritems():
+ #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
+ #if hasattr( $param.input, 'truevalue' ):
+ ${param}
+ #else:
+ --${name} "${param}"
+ #end if
+ #end if
+ #end for
+ '
+ #end for
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ):
+ -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}"
+ #end for
+
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ):
+ -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}"
+ #end for
+
+ -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"'
+
+ -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
+ #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
+ -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
+ #end if
+ -p '
+ --baq "${gatk_param_type.baq}"
+ --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
+ ${gatk_param_type.use_original_qualities}
+ --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
+ --validation_strictness "${gatk_param_type.validation_strictness}"
+ --interval_merging "${gatk_param_type.interval_merging}"
+ ${gatk_param_type.disable_experimental_low_memory_sharding}
+ ${gatk_param_type.non_deterministic_random_seed}
+ '
+ #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ):
+ #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file":
+ -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}"
+ #else
+ -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"'
+ #end if
+ #end for
+ #end if
+ #if str( $reference_source.reference_source_selector ) == "history":
+ -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
+ #end if
+ ##end standard gatk options
+
+ ##start analysis specific options
+ #if $analysis_param_type.analysis_param_type_selector == "advanced":
+ -p '
+ #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set":
+ --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}"
+ #end if
+ #if str( $analysis_param_type.default_platform ) != "default":
+ --default_platform "${analysis_param_type.default_platform}"
+ #end if
+ #if str( $analysis_param_type.force_read_group_type.force_read_group_type_selector ) == "set":
+ --force_read_group "${analysis_param_type.force_read_group_type.force_read_group}"
+ #end if
+ #if str( $analysis_param_type.force_platform ) != "default":
+ --force_platform "${analysis_param_type.force_platform}"
+ #end if
+ ${analysis_param_type.exception_if_no_tile}
+ #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set":
+ #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default":
+ --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}"
+ #end if
+ #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default":
+ --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}"
+ #end if
+ #end if
+ --window_size_nqs "${analysis_param_type.window_size_nqs}"
+ --homopolymer_nback "${analysis_param_type.homopolymer_nback}"
+ '
+ #end if
+ #if not $snp_dataset_provided:
+ -p '--run_without_dbsnp_potentially_ruining_quality'
+ #end if
+}
+
+app () tablerecalibrate (){
+ gatk_wrapper.py
+ --max_jvm_heap_fraction "1"
+ --stdout "${output_log}"
+ -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input"
+ -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index
+ -p 'java
+ -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
+ -T "TableRecalibration"
+ -o "${output_bam}"
+ -et "NO_ET" ##ET no phone home
+ ##--num_threads 4 ##hard coded, for now
+ ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
+ #if $reference_source.reference_source_selector != "history":
+ -R "${reference_source.ref_file.fields.path}"
+ #end if
+ --recal_file "${input_recal}"
+ --disable_bam_indexing
+ '
+ ##start standard gatk options
+ #if $gatk_param_type.gatk_param_type_selector == "advanced":
+ #for $pedigree in $gatk_param_type.pedigree:
+ -p '--pedigree "${pedigree.pedigree_file}"'
+ #end for
+ #for $pedigree_string in $gatk_param_type.pedigree_string_repeat:
+ -p '--pedigreeString "${pedigree_string.pedigree_string}"'
+ #end for
+ -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"'
+ #for $read_filter in $gatk_param_type.read_filter:
+ -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
+ ###raise Exception( str( dir( $read_filter ) ) )
+ #for $name, $param in $read_filter.read_filter_type.iteritems():
+ #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
+ #if hasattr( $param.input, 'truevalue' ):
+ ${param}
+ #else:
+ --${name} "${param}"
+ #end if
+ #end if
+ #end for
+ '
+ #end for
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ):
+ -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}"
+ #end for
+
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ):
+ -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}"
+ #end for
+
+ -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"'
+
+ -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
+ #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
+ -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
+ #end if
+ -p '
+ --baq "${gatk_param_type.baq}"
+ --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
+ ${gatk_param_type.use_original_qualities}
+ --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
+ --validation_strictness "${gatk_param_type.validation_strictness}"
+ --interval_merging "${gatk_param_type.interval_merging}"
+ ${gatk_param_type.disable_experimental_low_memory_sharding}
+ ${gatk_param_type.non_deterministic_random_seed}
+ '
+ #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ):
+ #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file":
+ -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}"
+ #else
+ -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"'
+ #end if
+ #end for
+ #end if
+
+ #if str( $reference_source.reference_source_selector ) == "history":
+ -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
+ #end if
+ ##end standard gatk options
+
+ ##start analysis specific options
+ #if $analysis_param_type.analysis_param_type_selector == "advanced":
+ -p '
+ #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set":
+ --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}"
+ #end if
+ #if str( $analysis_param_type.default_platform ) != "default":
+ --default_platform "${analysis_param_type.default_platform}"
+ #end if
+ #if str( $analysis_param_type.force_read_group_type.force_read_group_type_selector ) == "set":
+ --force_read_group "${analysis_param_type.force_read_group_type.force_read_group}"
+ #end if
+ #if str( $analysis_param_type.force_platform ) != "default":
+ --force_platform "${analysis_param_type.force_platform}"
+ #end if
+ ${analysis_param_type.exception_if_no_tile}
+ #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set":
+ #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default":
+ --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}"
+ #end if
+ #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default":
+ --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}"
+ #end if
+ #end if
+ ${analysis_param_type.simplify_bam}
+ --preserve_qscores_less_than "${analysis_param_type.preserve_qscores_less_than}"
+ --smoothing "${analysis_param_type.smoothing}"
+ --max_quality_score "${analysis_param_type.max_quality_score}"
+ --window_size_nqs "${analysis_param_type.window_size_nqs}"
+ --homopolymer_nback "${analysis_param_type.homopolymer_nback}"
+ ${analysis_param_type.do_not_write_original_quals}
+ '
+ #end if
+}
+
+app () unifiedgenotyper (){
+ gatk_wrapper.py
+ --max_jvm_heap_fraction "1"
+ --stdout "${output_log}"
+ #for $i, $input_bam in enumerate( $reference_source.input_bams ):
+ -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}"
+ -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index
+ #end for
+ -p 'java
+ -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk/GenomeAnalysisTK.jar"
+ -T "UnifiedGenotyper"
+ --num_threads 4 ##hard coded, for now
+ --out "${output_vcf}"
+ --metrics_file "${output_metrics}"
+ -et "NO_ET" ##ET no phone home
+ ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout
+ #if $reference_source.reference_source_selector != "history":
+ -R "${reference_source.ref_file.fields.path}"
+ #end if
+ --genotype_likelihoods_model "${genotype_likelihoods_model}"
+ --standard_min_confidence_threshold_for_calling "${standard_min_confidence_threshold_for_calling}"
+ --standard_min_confidence_threshold_for_emitting "${standard_min_confidence_threshold_for_emitting}"
+ '
+ #set $rod_binding_names = dict()
+ #for $rod_binding in $rod_bind:
+ #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom':
+ #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name
+ #else
+ #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector
+ #end if
+ #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1
+ -d "--dbsnp:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}"
+ #end for
+
+ ##start standard gatk options
+ #if $gatk_param_type.gatk_param_type_selector == "advanced":
+ #for $pedigree in $gatk_param_type.pedigree:
+ -p '--pedigree "${pedigree.pedigree_file}"'
+ #end for
+ #for $pedigree_string in $gatk_param_type.pedigree_string_repeat:
+ -p '--pedigreeString "${pedigree_string.pedigree_string}"'
+ #end for
+ -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"'
+ #for $read_filter in $gatk_param_type.read_filter:
+ -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}"
+ ###raise Exception( str( dir( $read_filter ) ) )
+ #for $name, $param in $read_filter.read_filter_type.iteritems():
+ #if $name not in [ "__current_case__", "read_filter_type_selector" ]:
+ #if hasattr( $param.input, 'truevalue' ):
+ ${param}
+ #else:
+ --${name} "${param}"
+ #end if
+ #end if
+ #end for
+ '
+ #end for
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ):
+ -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}"
+ #end for
+
+ #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ):
+ -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}"
+ #end for
+
+ -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"'
+
+ -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"'
+ #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE":
+ -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"'
+ #end if
+ -p '
+ --baq "${gatk_param_type.baq}"
+ --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}"
+ ${gatk_param_type.use_original_qualities}
+ --defaultBaseQualities "${gatk_param_type.default_base_qualities}"
+ --validation_strictness "${gatk_param_type.validation_strictness}"
+ --interval_merging "${gatk_param_type.interval_merging}"
+ ${gatk_param_type.disable_experimental_low_memory_sharding}
+ ${gatk_param_type.non_deterministic_random_seed}
+ '
+ #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ):
+ #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file":
+ -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}"
+ #else
+ -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"'
+ #end if
+ #end for
+ #end if
+
+ #if $reference_source.reference_source_selector == "history":
+ -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input"
+ #end if
+ ##end standard gatk options
+ ##start analysis specific options
+ #if $analysis_param_type.analysis_param_type_selector == "advanced":
+ -p '
+ --p_nonref_model "${analysis_param_type.p_nonref_model}"
+ --heterozygosity "${analysis_param_type.heterozygosity}"
+ --pcr_error_rate "${analysis_param_type.pcr_error_rate}"
+ --genotyping_mode "${analysis_param_type.genotyping_mode_type.genotyping_mode}"
+ #if str( $analysis_param_type.genotyping_mode_type.genotyping_mode ) == 'GENOTYPE_GIVEN_ALLELES':
+ --alleles "${analysis_param_type.genotyping_mode_type.input_alleles_rod}"
+ #end if
+ --output_mode "${analysis_param_type.output_mode}"
+ ${analysis_param_type.compute_SLOD}
+ --min_base_quality_score "${analysis_param_type.min_base_quality_score}"
+ --max_deletion_fraction "${analysis_param_type.max_deletion_fraction}"
+ --max_alternate_alleles "${analysis_param_type.max_alternate_alleles}"
+ --min_indel_count_for_genotyping "${analysis_param_type.min_indel_count_for_genotyping}"
+ --indel_heterozygosity "${analysis_param_type.indel_heterozygosity}"
+ --indelGapContinuationPenalty "${analysis_param_type.indelGapContinuationPenalty}"
+ --indelGapOpenPenalty "${analysis_param_type.indelGapOpenPenalty}"
+ --indelHaplotypeSize "${analysis_param_type.indelHaplotypeSize}"
+ ${analysis_param_type.doContextDependentGapPenalties}
+ #if str( $analysis_param_type.annotation ) != "None":
+ #for $annotation in str( $analysis_param_type.annotation.fields.gatk_value ).split( ','):
+ --annotation "${annotation}"
+ #end for
+ #end if
+ #for $additional_annotation in $analysis_param_type.additional_annotations:
+ --annotation "${additional_annotation.additional_annotation_name}"
+ #end for
+ #if str( $analysis_param_type.group ) != "None":
+ #for $group in str( $analysis_param_type.group ).split( ','):
+ --group "${group}"
+ #end for
+ #end if
+ #if str( $analysis_param_type.exclude_annotations ) != "None":
+ #for $annotation in str( $analysis_param_type.exclude_annotations.fields.gatk_value ).split( ','):
+ --excludeAnnotation "${annotation}"
+ #end for
+ #end if
+ '
+## #if str( $analysis_param_type.snpEff_rod_bind_type.snpEff_rod_bind_type_selector ) == 'set_snpEff':
+## -p '--annotation "SnpEff"'
+## -d "--snpEffFile:${analysis_param_type.snpEff_rod_bind_type.snpEff_rod_name},%(file_type)s" "${analysis_param_type.snpEff_rod_bind_type.snpEff_input_rod}" "${analysis_param_type.snpEff_rod_bind_type.snpEff_input_rod.ext}" "input_snpEff_${analysis_param_type.snpEff_rod_bind_type.snpEff_rod_name}"
+## #else:
+## -p '--excludeAnnotation "SnpEff"'
+## #end if
+ ${analysis_param_type.multiallelic}
+ #end if
+
+}
More information about the Swift-commit
mailing list