PageRenderTime 56ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/seqware-webservice/src/test/resources/net/sourceforge/seqware/common/metadata/GATKRecalibrationAndVariantCalling_1.3.16.ftl

https://github.com/lazycrazyowl/seqware
Freemarker Template | 1244 lines | 1080 code | 141 blank | 23 comment | 132 complexity | 064ebcfaf7c1a68a9b77cfa6859319ae MD5 | raw file
Possible License(s): GPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <adag xmlns="http://pegasus.isi.edu/schema/DAX" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/DAX http://pegasus.isi.edu/schema/dax-3.2.xsd" version="3.2" count="1" index="0" name="GATKRecalibrationAndVariantCalling_1.3.16">
  3. <#--
  4. DESCRIPTION:
  5. This workflow is designed to take a BAM file, break it down by chromosome, perform realignment, recalibration,
  6. duplicate flagging, and variant calling for small indels and SNVs. The result is a VCF file for SNVs and indels both filtered
  7. and un-filtered.
  8. This workflow is designed to work with GATK version 1.3.16.
  9. Docs:
  10. * http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v2
  11. * http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit
  12. TODO:
  13. * need to add WGS support where needed, see ./postProcessing/gatk/oicr_gatk_helper.sh
  14. * need to add annotation steps?
  15. -->
  16. <!-- the relative path -->
  17. <#assign workflow_name = "GATKRecalibrationAndVariantCalling/1.x.x"/>
  18. <!-- MACRO: to create a mkdir pre job and stage mkdir binary -->
  19. <#macro requires_dir dir>
  20. <profile namespace="env" key="GRIDSTART_PREJOB">/${workflow_bundle_dir}/${workflow_name}/bin/pegasus-dirmanager -c -d ${dir}</profile>
  21. </#macro>
  22. <!-- VARS -->
  23. <#-- Set seqware version -->
  24. <#assign seqware_version = "0.10.0"/>
  25. <#assign workflow_version = "1.3.16"/>
  26. <#assign inputs="${bam_inputs}"/>
  27. <#-- Set relative paths for files within the run-->
  28. <#assign bin_dir = "bin"/>
  29. <#assign data_dir = "data"/>
  30. <#assign accession_dir = "accessions"/>
  31. <#assign tmp_dir = "tmp"/>
  32. <!-- parent accessions -->
  33. <#assign parentAccessions = "${parent_accession}"/>
  34. <!-- EXECUTABLES INCLUDED WITH BUNDLE -->
  35. <executable namespace="seqware" name="runner" version="${seqware_version}"
  36. arch="x86_64" os="linux" installed="true" >
  37. <!-- the path to the tool that actually runs a given module -->
  38. <pfn url="file:///${workflow_bundle_dir}/${workflow_name}/bin/seqware-java-wrapper.sh" site="${seqware_cluster}"/>
  39. </executable>
  40. <executable namespace="pegasus" name="dirmanager" version="${seqware_version}"
  41. arch="x86_64" os="linux" installed="true" >
  42. <!-- the path to the tool that creates directories -->
  43. <pfn url="file:///${workflow_bundle_dir}/${workflow_name}/bin/pegasus-dirmanager" site="${seqware_cluster}"/>
  44. </executable>
  45. <!-- PROVISION -->
  46. <!-- Part 1: Job definitions -->
  47. <!-- Pre Job: make directories -->
  48. <job id="IDPRE0.1" namespace="pegasus" name="dirmanager" version="${seqware_version}">
  49. <argument>
  50. -c -d ${bin_dir}
  51. </argument>
  52. <profile namespace="globus" key="jobtype">condor</profile>
  53. <profile namespace="globus" key="count">1</profile>
  54. <profile namespace="globus" key="maxmemory">500</profile>
  55. </job>
  56. <!-- Pre Job: make directories -->
  57. <job id="IDPRE0.2" namespace="pegasus" name="dirmanager" version="${seqware_version}">
  58. <argument>
  59. -c -d ${data_dir}
  60. </argument>
  61. <profile namespace="globus" key="jobtype">condor</profile>
  62. <profile namespace="globus" key="count">1</profile>
  63. <profile namespace="globus" key="maxmemory">500</profile>
  64. </job>
  65. <!-- Pre Job: make directories -->
  66. <job id="IDPRE0.3" namespace="pegasus" name="dirmanager" version="${seqware_version}">
  67. <argument>
  68. -c -d ${accession_dir}
  69. </argument>
  70. <profile namespace="globus" key="jobtype">condor</profile>
  71. <profile namespace="globus" key="count">1</profile>
  72. <profile namespace="globus" key="maxmemory">500</profile>
  73. </job>
  74. <!-- Pre Job: make directories -->
  75. <job id="IDPRE0.4" namespace="pegasus" name="dirmanager" version="${seqware_version}">
  76. <argument>
  77. -c -d ${tmp_dir}
  78. </argument>
  79. <profile namespace="globus" key="jobtype">condor</profile>
  80. <profile namespace="globus" key="count">1</profile>
  81. <profile namespace="globus" key="maxmemory">500</profile>
  82. </job>
  83. <!-- Jobs for making data and scripts available -->
  84. <!-- Pre Job: makes seqware perl scripts available -->
  85. <job id="IDPRE1" namespace="seqware" name="runner" version="${seqware_version}">
  86. <argument>
  87. -Xmx1000M
  88. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  89. net.sourceforge.seqware.pipeline.runner.Runner
  90. --no-metadata
  91. --module net.sourceforge.seqware.pipeline.modules.utilities.ProvisionDependenciesBundle
  92. --
  93. --input-file ${workflow_bundle_dir}/${workflow_name}/dependencies/noarch/seqware-pipeline-perl-bin.noarch.zip
  94. --output-dir ${bin_dir}
  95. </argument>
  96. <profile namespace="globus" key="jobtype">condor</profile>
  97. <profile namespace="globus" key="count">1</profile>
  98. <profile namespace="globus" key="maxmemory">2000</profile>
  99. </job>
  100. <!-- Pre Job: makes annovar scripts available-->
  101. <job id="IDPRE2" namespace="seqware" name="runner" version="${seqware_version}">
  102. <argument>
  103. -Xmx1000M
  104. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  105. net.sourceforge.seqware.pipeline.runner.Runner
  106. --no-metadata
  107. --module net.sourceforge.seqware.pipeline.modules.utilities.ProvisionDependenciesBundle
  108. --
  109. --input-file ${workflow_bundle_dir}/${workflow_name}/dependencies/noarch/annovar-20110506.noarch.zip
  110. --output-dir ${bin_dir}
  111. </argument>
  112. <profile namespace="globus" key="jobtype">condor</profile>
  113. <profile namespace="globus" key="count">1</profile>
  114. <profile namespace="globus" key="maxmemory">2000</profile>
  115. </job>
  116. <!-- TODO: Update to latest version -->
  117. <!-- Pre Job: makes GATK available -->
  118. <job id="IDPRE3" namespace="seqware" name="runner" version="${seqware_version}">
  119. <argument>
  120. -Xmx1000M
  121. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  122. net.sourceforge.seqware.pipeline.runner.Runner
  123. --no-metadata
  124. --module net.sourceforge.seqware.pipeline.modules.utilities.ProvisionDependenciesBundle
  125. --
  126. --input-file ${workflow_bundle_dir}/${workflow_name}/dependencies/noarch/GenomeAnalysisTK-1.3.16.g6a5d5e7.noarch.zip
  127. --output-dir ${bin_dir}
  128. </argument>
  129. <profile namespace="globus" key="jobtype">condor</profile>
  130. <profile namespace="globus" key="count">1</profile>
  131. <profile namespace="globus" key="maxmemory">2000</profile>
  132. </job>
  133. <!-- Pre Job: makes picard available -->
  134. <job id="IDPRE4" namespace="seqware" name="runner" version="${seqware_version}">
  135. <argument>
  136. -Xmx1000M
  137. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  138. net.sourceforge.seqware.pipeline.runner.Runner
  139. --no-metadata
  140. --module net.sourceforge.seqware.pipeline.modules.utilities.ProvisionDependenciesBundle
  141. --
  142. --input-file ${workflow_bundle_dir}/${workflow_name}/dependencies/noarch/picard-tools-1.48.noarch.zip
  143. --output-dir ${bin_dir}
  144. </argument>
  145. <profile namespace="globus" key="jobtype">condor</profile>
  146. <profile namespace="globus" key="count">1</profile>
  147. <profile namespace="globus" key="maxmemory">2000</profile>
  148. </job>
  149. <!-- Pre Job: makes samtools available -->
  150. <job id="IDPRE5" namespace="seqware" name="runner" version="${seqware_version}">
  151. <argument>
  152. -Xmx1000M
  153. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  154. net.sourceforge.seqware.pipeline.runner.Runner
  155. --no-metadata
  156. --module net.sourceforge.seqware.pipeline.modules.utilities.ProvisionDependenciesBundle
  157. --
  158. --input-file ${workflow_bundle_dir}/${workflow_name}/dependencies/x86_64/samtools-0.1.17.x86_64.zip
  159. --output-dir ${bin_dir}
  160. </argument>
  161. <profile namespace="globus" key="jobtype">condor</profile>
  162. <profile namespace="globus" key="count">1</profile>
  163. <profile namespace="globus" key="maxmemory">2000</profile>
  164. </job>
  165. <!-- MODULE CALLS -->
  166. <!--
  167. Overview:
  168. * want to do a merge of all the Bam files and then do target and realignment
  169. * realign per chr
  170. * then merge everything
  171. * collapse at that point
  172. -->
  173. <!-- BEGIN LOOP: provision input BAM files and then index them -->
  174. <#list inputs?split(",") as input>
  175. <#list input?split("/") as tmp>
  176. <#assign basename = tmp/>
  177. </#list>
  178. <!-- Job: figure out if the input is a URL and, if it is, correclty download it to a staging area otherwise link to it -->
  179. <job id="ID0.${input_index}" namespace="seqware" name="runner" version="${seqware_version}">
  180. <argument>
  181. -Xmx1000M
  182. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  183. net.sourceforge.seqware.pipeline.runner.Runner
  184. --no-metadata
  185. --module net.sourceforge.seqware.pipeline.modules.utilities.ProvisionFiles
  186. --
  187. --input-file ${input}
  188. --output-dir ${data_dir}
  189. </argument>
  190. <profile namespace="globus" key="jobtype">condor</profile>
  191. <profile namespace="globus" key="count">1</profile>
  192. <profile namespace="globus" key="maxmemory">2000</profile>
  193. </job>
  194. <!-- TODO: this needs to output in name sorted for the fixmate step, need to test -->
  195. <!-- Here's an example filtering based on quality too: samtools view -b -F 4 ./bam/110316_I580_00038_612RG_LT_s_1_sequence_SE_novoalign.sam.sorted.bam | samtools view -b -F 256 - | samtools view -b -q 30 - > tmp/110316_I580_00038_612RG_LT_s_1_sequence_SE_novoalign.sam.sorted.bam -->
  196. <!-- Job: Samtools filter out unmapped and multi-hit -->
  197. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  198. <#assign algo = "SamtoolsUniqueHitFilter"/>
  199. <job id="ID1.${input_index}" namespace="seqware" name="runner" version="${seqware_version}">
  200. <argument>
  201. -Xmx1000M
  202. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  203. net.sourceforge.seqware.pipeline.runner.Runner
  204. --${metadata}
  205. <#list parentAccessions?split(",") as pa>
  206. --metadata-parent-accession ${pa}
  207. </#list>
  208. --metadata-processing-accession-file ${accession_dir}/${algo}_${input_index}_accession
  209. --metadata-output-file-prefix ${output_prefix}
  210. --metadata-workflow-run-accession ${workflow_run_accession}
  211. --module ${module}
  212. --
  213. --gcr-algorithm ${algo}
  214. --gcr-command ${bin_dir}/${samtools} view -b -F 4 ${data_dir}/${basename} | ${bin_dir}/${samtools} view -b -F 256 - | ${bin_dir}/${samtools} sort -n -m ${samtools_slots_memory_gigabytes - 2}000000000 - ${data_dir}/${basename}.filtered.namesorted
  215. </argument>
  216. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  217. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  218. <profile namespace="globus" key="jobtype">condor</profile>
  219. <profile namespace="globus" key="count">${samtools_slots}</profile>
  220. <profile namespace="globus" key="maxmemory">${samtools_slots_memory_gigabytes}000</profile>
  221. </job>
  222. <!-- Job: FixMateInfo -->
  223. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  224. <#assign parentAlgo = "${algo}"/>
  225. <#assign algo = "PicardFixMateInformation"/>
  226. <job id="ID10.${input_index}" namespace="seqware" name="runner" version="${seqware_version}">
  227. <argument>
  228. -Xmx1000M
  229. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  230. net.sourceforge.seqware.pipeline.runner.Runner
  231. --${metadata}
  232. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${input_index}_accession
  233. --metadata-processing-accession-file ${accession_dir}/${algo}_${input_index}_accession
  234. --metadata-output-file-prefix ${output_prefix}
  235. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  236. --module ${module}
  237. --
  238. --gcr-algorithm ${algo}
  239. --gcr-command ${java}
  240. -Xmx${picard_fixmate_mem}g
  241. -jar ${bin_dir}/${picardfixmate}
  242. INPUT=${data_dir}/${basename}.filtered.namesorted.bam
  243. OUTPUT=${data_dir}/${basename}.filtered.fixmate.sorted.bam
  244. VALIDATION_STRINGENCY=SILENT TMP_DIR=${tmp_dir}
  245. SORT_ORDER=coordinate
  246. </argument>
  247. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  248. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  249. <profile namespace="globus" key="jobtype">condor</profile>
  250. <profile namespace="globus" key="count">${picard_slots}</profile>
  251. <profile namespace="globus" key="maxmemory">${picard_fixmate_mem + 4}000</profile>
  252. </job>
  253. <!-- Index input BAMs -->
  254. <#assign algo = "IndexBam1"/>
  255. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  256. <job id="ID15.${input_index}" namespace="seqware" name="runner" version="${seqware_version}">
  257. <argument>
  258. -Xmx1000M
  259. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  260. net.sourceforge.seqware.pipeline.runner.Runner
  261. --${metadata}
  262. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${input_index}_accession
  263. --metadata-processing-accession-file ${accession_dir}/${algo}_accession
  264. --metadata-output-file-prefix ${output_prefix}
  265. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  266. --module ${module}
  267. --
  268. --gcr-algorithm ${algo}
  269. --gcr-command ${java}
  270. -Xmx${picard_index_bam_mem}g
  271. -jar ${bin_dir}/${picardindex}
  272. INPUT=${data_dir}/${basename}.filtered.fixmate.sorted.bam
  273. </argument>
  274. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  275. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  276. <profile namespace="globus" key="jobtype">condor</profile>
  277. <profile namespace="globus" key="count">${picard_slots}</profile>
  278. <profile namespace="globus" key="maxmemory">${picard_index_bam_mem + 4}000</profile>
  279. </job>
  280. </#list>
  281. <!-- END LOOP: provision input BAM files and then index them -->
  282. <!-- BEGIN LOOP: by chromosome -->
  283. <#list chr_sizes?split(",") as chr_size>
  284. <#assign chrArr = chr_size?split(":")/>
  285. <#assign chr = chrArr[0]/>
  286. <#assign size = chrArr[1]?number/>
  287. <!-- Job: RealignerTargetCreator -->
  288. <#assign parentAlgo = "IndexBam1"/>
  289. <#assign algo = "RealignerTargetCreator"/>
  290. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  291. <job id="ID20.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  292. <argument>
  293. -Xmx1000M
  294. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  295. net.sourceforge.seqware.pipeline.runner.Runner
  296. --${metadata}
  297. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_accession
  298. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  299. --metadata-output-file-prefix ${output_prefix}
  300. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  301. --module ${module}
  302. --
  303. --gcr-algorithm ${algo}
  304. --gcr-command ${java}
  305. -Xmx${gatk_realign_target_creator_mem}g
  306. -Djava.io.tmpdir=${tmp_dir}
  307. -jar ${bin_dir}/GenomeAnalysisTK-1.3-16-g6a5d5e7/GenomeAnalysisTK.jar
  308. -T RealignerTargetCreator
  309. -R ${ref_fasta}
  310. -o ${data_dir}/gatk.${chr}.intervals -L ${chr} -known ${gatk_dbsnp_vcf} -et NO_ET
  311. <#list inputs?split(",") as input>
  312. <#-- Set the basename from input name -->
  313. <#list input?split("/") as tmp>
  314. <#assign basename = tmp/>
  315. </#list>
  316. -I ${data_dir}/${basename}.filtered.fixmate.sorted.bam
  317. </#list>
  318. </argument>
  319. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  320. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  321. <profile namespace="globus" key="jobtype">condor</profile>
  322. <profile namespace="globus" key="count">${gatk_slots}</profile>
  323. <profile namespace="globus" key="maxmemory">${gatk_indel_realigner_mem + 4}000</profile>
  324. </job>
  325. <!-- Job: IndelRealigner -->
  326. <!-- NOTE: I had to remove -targetNotSorted to get this to work -->
  327. <#assign parentAlgo = "RealignerTargetCreator"/>
  328. <#assign algo = "IndelRealigner"/>
  329. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  330. <job id="ID30.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  331. <argument>
  332. -Xmx1000M
  333. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  334. net.sourceforge.seqware.pipeline.runner.Runner
  335. --${metadata}
  336. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  337. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  338. --metadata-output-file-prefix ${output_prefix}
  339. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  340. --module ${module}
  341. --
  342. --gcr-algorithm ${algo}
  343. --gcr-command ${java}
  344. -Xmx${gatk_indel_realigner_mem}g
  345. -Djava.io.tmpdir=${tmp_dir}
  346. -jar ${bin_dir}/GenomeAnalysisTK-1.3-16-g6a5d5e7/GenomeAnalysisTK.jar
  347. -R ${ref_fasta}
  348. -T IndelRealigner
  349. -L ${chr}
  350. -targetIntervals ${data_dir}/gatk.${chr}.intervals
  351. -o ${data_dir}/gatk.realigned.${chr}.bam
  352. -compress 0
  353. -et NO_ET
  354. <#list inputs?split(",") as input>
  355. <#-- Set the basename from input name, removing .input -->
  356. <#list input?split("/") as tmp>
  357. <#assign basename = tmp/>
  358. </#list>
  359. -I ${data_dir}/${basename}.filtered.fixmate.sorted.bam
  360. </#list>
  361. </argument>
  362. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  363. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  364. <profile namespace="globus" key="jobtype">condor</profile>
  365. <profile namespace="globus" key="count">${gatk_slots}</profile>
  366. <profile namespace="globus" key="maxmemory">${gatk_indel_realigner_mem + 4}000</profile>
  367. </job>
  368. <!-- Job: Sort by Query Name for Fixmate step, this require query name sort not coordinate, see http://picard.svn.sourceforge.net/viewvc/picard/tags/1.48/src/java/net/sf/picard/sam/FixMateInformation.java?revision=1005&view=markup -->
  369. <#assign parentAlgo = "IndelRealigner"/>
  370. <#assign algo = "RealignedBAMResorting"/>
  371. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  372. <job id="ID35.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  373. <argument>
  374. -Xmx1000M
  375. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  376. net.sourceforge.seqware.pipeline.runner.Runner
  377. --${metadata}
  378. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  379. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  380. --metadata-output-file-prefix ${output_prefix}
  381. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  382. --module ${module}
  383. --
  384. --gcr-algorithm ${algo}
  385. --gcr-command ${java}
  386. -Xmx${picard_index_bam_mem}g
  387. -jar ${bin_dir}/${picardsort}
  388. INPUT=${data_dir}/gatk.realigned.${chr}.bam
  389. OUTPUT=${data_dir}/gatk.realigned.${chr}.queryname-sorted.bam
  390. SORT_ORDER=queryname
  391. </argument>
  392. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  393. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  394. <profile namespace="globus" key="jobtype">condor</profile>
  395. <profile namespace="globus" key="count">${picard_slots}</profile>
  396. <profile namespace="globus" key="maxmemory">${picard_sort_mem + 4}000</profile>
  397. </job>
  398. <!-- Job: FixMates -->
  399. <#assign parentAlgo = "${algo}"/>
  400. <#assign algo = "FixMates"/>
  401. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  402. <job id="ID40.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  403. <argument>
  404. -Xmx1000M
  405. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  406. net.sourceforge.seqware.pipeline.runner.Runner
  407. --${metadata}
  408. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  409. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  410. --metadata-output-file-prefix ${output_prefix}
  411. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  412. --module ${module}
  413. --
  414. --gcr-algorithm ${algo}
  415. --gcr-command ${java}
  416. -Xmx${picard_fixmate_mem}g
  417. -Djava.io.tmpdir=${tmp_dir}
  418. -jar ${bin_dir}/${picardfixmate}
  419. INPUT=${data_dir}/gatk.realigned.${chr}.queryname-sorted.bam
  420. OUTPUT=${data_dir}/gatk.realigned.${chr}.fixmate.bam
  421. SO=coordinate
  422. </argument>
  423. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  424. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  425. <profile namespace="globus" key="jobtype">condor</profile>
  426. <profile namespace="globus" key="count">${picard_slots}</profile>
  427. <profile namespace="globus" key="maxmemory">${picard_fixmate_mem + 4}000</profile>
  428. </job>
  429. <!-- Job: Index BAM: Can't we just have picard tools above do this? -->
  430. <#assign parentAlgo = "${algo}"/>
  431. <#assign algo = "IndexBam2"/>
  432. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  433. <job id="ID50.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  434. <argument>
  435. -Xmx1000M
  436. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  437. net.sourceforge.seqware.pipeline.runner.Runner
  438. --${metadata}
  439. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  440. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  441. --metadata-output-file-prefix ${output_prefix}
  442. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  443. --module ${module}
  444. --
  445. --gcr-algorithm ${algo}
  446. --gcr-command ${java}
  447. -Xmx${picard_index_bam_mem}g
  448. -Djava.io.tmpdir=${tmp_dir}
  449. -jar ${bin_dir}/${picardindex}
  450. INPUT=${data_dir}/gatk.realigned.${chr}.fixmate.bam
  451. </argument>
  452. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  453. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  454. <profile namespace="globus" key="jobtype">condor</profile>
  455. <profile namespace="globus" key="count">${picard_slots}</profile>
  456. <profile namespace="globus" key="maxmemory">${picard_index_bam_mem + 4}000</profile>
  457. </job>
  458. <!-- Job: MarkDuplicates -->
  459. <#assign parentAlgo = "${algo}"/>
  460. <#assign algo = "PicardMarkDuplicates"/>
  461. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  462. <job id="ID55.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  463. <argument>
  464. -Xmx1000M
  465. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  466. net.sourceforge.seqware.pipeline.runner.Runner
  467. --${metadata}
  468. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  469. --metadata-processing-accession-file ${accession_dir}/${algo}_accession
  470. --metadata-output-file-prefix ${output_prefix}
  471. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  472. --module ${module}
  473. --
  474. --gcr-algorithm ${algo}
  475. --gcr-command ${java}
  476. -Xmx${picard_mark_dup_mem}g
  477. -jar ${bin_dir}/${picardmarkdups}
  478. INPUT=${data_dir}/gatk.realigned.${chr}.fixmate.bam
  479. OUTPUT=${data_dir}/gatk.realigned.${chr}.fixmate.markdups.bam
  480. CREATE_INDEX=true
  481. METRICS_FILE=${data_dir}/gatk.realigned.${chr}.fixmate.markdups.metrics
  482. VALIDATION_STRINGENCY=SILENT TMP_DIR=${tmp_dir}
  483. </argument>
  484. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  485. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  486. <profile namespace="globus" key="jobtype">condor</profile>
  487. <profile namespace="globus" key="count">${picard_slots}</profile>
  488. <profile namespace="globus" key="maxmemory">${picard_mark_dup_mem + 4}000</profile>
  489. </job>
  490. </#list>
  491. <!-- END LOOP: by chromosome -->
  492. <!-- TODO: SO ROB POINTS OUT THAT COUNT COVARIANTS ACTUALLY ONLY WORKS ON A PER LANE BASIS SO WE COULD SPLIT BY THAT RG_ID (BY LANE) -->
  493. <!-- Job: Count Covariates -->
  494. <#assign parentAlgo = "PicardMarkDuplicates"/>
  495. <#assign algo = "CountCovariates"/>
  496. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  497. <job id="ID60" namespace="seqware" name="runner" version="${seqware_version}">
  498. <argument>
  499. -Xmx1000M
  500. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  501. net.sourceforge.seqware.pipeline.runner.Runner
  502. --${metadata}
  503. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_accession
  504. --metadata-processing-accession-file ${accession_dir}/${algo}_accession
  505. --metadata-output-file-prefix ${output_prefix}
  506. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  507. --module ${module}
  508. --
  509. --gcr-algorithm ${algo}
  510. --gcr-command ${java}
  511. -Xmx${gatk_count_covariate_mem}g
  512. -Djava.io.tmpdir=${tmp_dir}
  513. -jar ${bin_dir}/GenomeAnalysisTK-1.3-16-g6a5d5e7/GenomeAnalysisTK.jar
  514. -l INFO
  515. -R ${ref_fasta}
  516. -knownSites ${gatk_dbsnp_vcf}
  517. <#list chr_sizes?split(",") as chr_size>
  518. <#assign chrArr = chr_size?split(":")/>
  519. <#assign chr = chrArr[0]/>
  520. <#assign size = chrArr[1]?number/>
  521. -I ${data_dir}/gatk.realigned.${chr}.fixmate.markdups.bam
  522. </#list>
  523. -T CountCovariates -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate
  524. -recalFile ${data_dir}/recal_data.csv -nt 8 -et NO_ET
  525. </argument>
  526. <!-- TODO: merge, do this on the combined bam files instead -->
  527. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  528. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  529. <profile namespace="globus" key="jobtype">condor</profile>
  530. <profile namespace="globus" key="count">${gatk_slots}</profile>
  531. <profile namespace="globus" key="maxmemory">${gatk_count_covariate_mem + 4}000</profile>
  532. </job>
  533. <!-- BEGIN LOOP: by chromosome -->
  534. <#list chr_sizes?split(",") as chr_size>
  535. <#assign chrArr = chr_size?split(":")/>
  536. <#assign chr = chrArr[0]/>
  537. <#assign size = chrArr[1]?number/>
  538. <!-- Job: Table Recal -->
  539. <#assign parentAlgo = "CountCovariates"/>
  540. <#assign algo = "TableRecalibration"/>
  541. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  542. <job id="ID70.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  543. <argument>
  544. -Xmx1000M
  545. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  546. net.sourceforge.seqware.pipeline.runner.Runner
  547. --${metadata}
  548. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_accession
  549. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  550. --metadata-output-file-prefix ${output_prefix}
  551. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  552. --module ${module}
  553. --
  554. --gcr-algorithm ${algo}
  555. --gcr-command ${java}
  556. -Xmx${gatk_count_covariate_mem}g
  557. -Djava.io.tmpdir=${tmp_dir}
  558. -jar ${bin_dir}/GenomeAnalysisTK-1.3-16-g6a5d5e7/GenomeAnalysisTK.jar
  559. --preserve_qscores_less_than 2 -T TableRecalibration -l INFO -R ${ref_fasta}
  560. -I ${data_dir}/gatk.realigned.${chr}.fixmate.markdups.bam
  561. --out ${data_dir}/gatk.realigned.recal.${chr}.bam -recalFile ${data_dir}/recal_data.csv -L ${chr} -et NO_ET
  562. </argument>
  563. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  564. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  565. <profile namespace="globus" key="jobtype">condor</profile>
  566. <profile namespace="globus" key="count">${gatk_slots}</profile>
  567. <profile namespace="globus" key="maxmemory">${gatk_count_covariate_mem + 4}000</profile>
  568. </job>
  569. <!-- Job: Index BAM -->
  570. <#assign parentAlgo = "TableRecalibration"/>
  571. <#assign algo = "IndexBam3"/>
  572. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  573. <job id="ID80.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  574. <argument>
  575. -Xmx1000M
  576. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  577. net.sourceforge.seqware.pipeline.runner.Runner
  578. --${metadata}
  579. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  580. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  581. --metadata-output-file-prefix ${output_prefix}
  582. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  583. --module ${module}
  584. --
  585. --gcr-algorithm ${algo}
  586. --gcr-command ${java}
  587. -Xmx${picard_index_bam_mem}g
  588. -Djava.io.tmpdir=${tmp_dir}
  589. -jar ${bin_dir}/${picardindex}
  590. INPUT=${data_dir}/gatk.realigned.recal.${chr}.bam
  591. </argument>
  592. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  593. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  594. <profile namespace="globus" key="jobtype">condor</profile>
  595. <profile namespace="globus" key="count">${picard_slots}</profile>
  596. <profile namespace="globus" key="maxmemory">${picard_index_bam_mem + 4}000</profile>
  597. </job>
  598. <!-- Job: Unified Genotyper SNP caller -->
  599. <!-- FIXME: it seems like I can only use -nt 1 according to http://getsatisfaction.com/gsa/topics/genotyper_error_unable_to_create_basicfeaturereader_using_feature_file -->
  600. <#assign parentAlgo = "IndexBam3"/>
  601. <#assign algo = "GATKUnifiedGenotyperSNV"/>
  602. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  603. <job id="ID90.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  604. <argument>
  605. -Xmx1000M
  606. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  607. net.sourceforge.seqware.pipeline.runner.Runner
  608. --${metadata}
  609. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  610. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  611. --metadata-output-file-prefix ${output_prefix}
  612. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  613. --module ${module}
  614. --
  615. --gcr-algorithm ${algo}
  616. --gcr-command ${java}
  617. -Xmx${gatk_unified_genotyper_mem}g
  618. -Djava.io.tmpdir=${tmp_dir}
  619. -jar ${bin_dir}/GenomeAnalysisTK-1.3-16-g6a5d5e7/GenomeAnalysisTK.jar
  620. -R ${ref_fasta}
  621. -T UnifiedGenotyper
  622. -I ${data_dir}/gatk.realigned.recal.${chr}.bam
  623. -D ${gatk_dbsnp_vcf}
  624. -o ${data_dir}/gatk.realigned.recal.bam.snps.raw.${chr}.vcf
  625. -stand_call_conf 30 -stand_emit_conf 1.0 -metrics ${data_dir}/gatk.realigned.recal.bam.snps.raw.${chr}.metrics
  626. -nt 1 -L ${chr} -et NO_ET
  627. </argument>
  628. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  629. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  630. <profile namespace="globus" key="jobtype">condor</profile>
  631. <profile namespace="globus" key="count">${gatk_slots}</profile>
  632. <profile namespace="globus" key="maxmemory">${gatk_unified_genotyper_mem + 4}000</profile>
  633. </job>
  634. <!-- TODO: move this to the SomaticIndelDetector and use "unpaired" to paramaterize, "minFraction .3", "minCoverage 4" -->
  635. <!-- see http://www.broadinstitute.org/gsa/wiki/index.php/Somatic_Indel_Detector -->
  636. <!-- Job: Indel caller -->
  637. <#assign parentAlgo = "IndexBam3"/>
  638. <#assign algo = "GATKUnifiedGenotyperIndel"/>
  639. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  640. <job id="ID100.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  641. <argument>
  642. -Xmx1000M
  643. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  644. net.sourceforge.seqware.pipeline.runner.Runner
  645. --${metadata}
  646. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  647. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  648. --metadata-output-file-prefix ${output_prefix}
  649. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  650. --module ${module}
  651. --
  652. --gcr-algorithm ${algo}
  653. --gcr-command ${java}
  654. -Xmx${gatk_indel_genotyper_mem}g
  655. -Djava.io.tmpdir=${tmp_dir}
  656. -jar ${bin_dir}/GenomeAnalysisTK-1.3-16-g6a5d5e7/GenomeAnalysisTK.jar
  657. -T UnifiedGenotyper
  658. -l INFO
  659. -R ${ref_fasta}
  660. -I ${data_dir}/gatk.realigned.recal.${chr}.bam
  661. -o ${data_dir}/gatk.realigned.recal.bam.indels.raw.${chr}.vcf
  662. -glm INDEL -G Standard -stand_emit_conf 10 -stand_call_conf 50 -dcov 1000
  663. -metrics ${data_dir}/gatk.realigned.recal.bam.indels.raw.${chr}.metrics
  664. -D ${gatk_dbsnp_vcf}
  665. -L ${chr} -et NO_ET
  666. </argument>
  667. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  668. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  669. <profile namespace="globus" key="jobtype">condor</profile>
  670. <profile namespace="globus" key="count">${gatk_slots}</profile>
  671. <profile namespace="globus" key="maxmemory">${gatk_indel_genotyper_mem + 4}000</profile>
  672. </job>
  673. <!-- Job: Indel Filter -->
  674. <#assign parentAlgo = "GATKUnifiedGenotyperIndel"/>
  675. <#assign algo = "GATKUnifiedGenotyperIndelFilter"/>
  676. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  677. <job id="ID110.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  678. <argument>
  679. -Xmx1000M
  680. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  681. net.sourceforge.seqware.pipeline.runner.Runner
  682. --${metadata}
  683. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  684. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  685. --metadata-output-file-prefix ${output_prefix}
  686. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  687. --module ${module}
  688. --
  689. --gcr-algorithm ${algo}
  690. --gcr-command bash
  691. ${workflow_bundle_dir}/${workflow_name}/bin/sw_module_gatk_indel_filter.sh
  692. ${java}
  693. ${gatk_variant_filter_mem}
  694. ${tmp_dir}
  695. ${bin_dir}/GenomeAnalysisTK-1.3-16-g6a5d5e7/GenomeAnalysisTK.jar
  696. ${ref_fasta}
  697. ${data_dir}/gatk.realigned.recal.bam.indels.raw.${chr}.vcf
  698. ${data_dir}/gatk.realigned.recal.bam.indels.filtered.${chr}.vcf
  699. ${chr}
  700. </argument>
  701. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  702. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  703. <profile namespace="globus" key="jobtype">condor</profile>
  704. <profile namespace="globus" key="count">${gatk_slots}</profile>
  705. <profile namespace="globus" key="maxmemory">${gatk_variant_filter_mem + 4}000</profile>
  706. </job>
  707. <!-- Job: SNV filter -->
  708. <!-- FIXME: will need a WGS version -->
  709. <#assign parentAlgo = "GATKUnifiedGenotyperSNV"/>
  710. <#assign algo = "GATKUnifiedGenotyperSNVFilter"/>
  711. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  712. <job id="ID120.${chr_size_index}" namespace="seqware" name="runner" version="${seqware_version}">
  713. <argument>
  714. -Xmx1000M
  715. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  716. net.sourceforge.seqware.pipeline.runner.Runner
  717. --${metadata}
  718. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  719. --metadata-processing-accession-file ${accession_dir}/${algo}_${chr}_accession
  720. --metadata-output-file-prefix ${output_prefix}
  721. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  722. --module ${module}
  723. --
  724. --gcr-algorithm ${algo}
  725. --gcr-command bash
  726. ${workflow_bundle_dir}/${workflow_name}/bin/sw_module_gatk_snv_filter.sh
  727. ${java}
  728. ${gatk_variant_filter_mem}
  729. ${tmp_dir}
  730. ${bin_dir}/GenomeAnalysisTK-1.3-16-g6a5d5e7/GenomeAnalysisTK.jar
  731. ${ref_fasta}
  732. ${data_dir}/gatk.realigned.recal.bam.snps.raw.${chr}.vcf
  733. ${data_dir}/gatk.realigned.recal.bam.snps.filtered.${chr}.vcf
  734. ${chr}
  735. ${data_dir}/gatk.realigned.recal.bam.indels.raw.${chr}.vcf
  736. </argument>
  737. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  738. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  739. <profile namespace="globus" key="jobtype">condor</profile>
  740. <profile namespace="globus" key="count">${gatk_slots}</profile>
  741. <profile namespace="globus" key="maxmemory">${gatk_variant_filter_mem + 4}000</profile>
  742. </job>
  743. </#list>
  744. <!-- END LOOP: by chromosome -->
  745. <!-- Job: merge SNV -->
  746. <#assign parentAlgo = "GATKUnifiedGenotyperSNV"/>
  747. <#assign algo = "MergeRawSNV"/>
  748. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  749. <job id="ID130" namespace="seqware" name="runner" version="${seqware_version}">
  750. <argument>
  751. -Xmx1000M
  752. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  753. net.sourceforge.seqware.pipeline.runner.Runner
  754. --${metadata}
  755. <#list chr_sizes?split(",") as chr_size>
  756. <#assign chrArr = chr_size?split(":")/>
  757. <#assign chr = chrArr[0]/>
  758. <#assign size = chrArr[1]?number/>
  759. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  760. </#list>
  761. --metadata-processing-accession-file ${accession_dir}/${algo}_accession
  762. --metadata-output-file-prefix ${output_prefix}
  763. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  764. --module ${module}
  765. --
  766. --gcr-algorithm ${algo}
  767. --gcr-command ${perl}
  768. ${workflow_bundle_dir}/${workflow_name}/bin/sw_module_merge_GATK_VCF.pl
  769. <#list chr_sizes?split(",") as chr_size>
  770. <#assign chrArr = chr_size?split(":")/>
  771. <#assign chr = chrArr[0]/>
  772. <#assign size = chrArr[1]?number/>
  773. --vcf-input-file ${data_dir}/gatk.realigned.recal.bam.snps.raw.${chr}.vcf
  774. </#list>
  775. --vcf-output-file ${data_dir}/gatk.realigned.recal.bam.snps.raw.merged.vcf
  776. </argument>
  777. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  778. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  779. <profile namespace="globus" key="jobtype">condor</profile>
  780. <profile namespace="globus" key="count">1</profile>
  781. <profile namespace="globus" key="maxmemory">2000</profile>
  782. </job>
  783. <!-- TODO: add a annotation step for things like overlaps with indel -->
  784. <!-- Job: merge filtered SNV -->
  785. <#assign parentAlgo = "GATKUnifiedGenotyperSNVFilter"/>
  786. <#assign algo = "MergeFilteredSNV"/>
  787. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  788. <job id="ID140" namespace="seqware" name="runner" version="${seqware_version}">
  789. <argument>
  790. -Xmx1000M
  791. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  792. net.sourceforge.seqware.pipeline.runner.Runner
  793. --${metadata}
  794. <#list chr_sizes?split(",") as chr_size>
  795. <#assign chrArr = chr_size?split(":")/>
  796. <#assign chr = chrArr[0]/>
  797. <#assign size = chrArr[1]?number/>
  798. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  799. </#list>
  800. --metadata-processing-accession-file ${accession_dir}/${algo}_accession
  801. --metadata-output-file-prefix ${output_prefix}
  802. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  803. --module ${module}
  804. --
  805. --gcr-algorithm ${algo}
  806. --gcr-command ${perl}
  807. ${workflow_bundle_dir}/${workflow_name}/bin/sw_module_merge_GATK_VCF.pl
  808. <#list chr_sizes?split(",") as chr_size>
  809. <#assign chrArr = chr_size?split(":")/>
  810. <#assign chr = chrArr[0]/>
  811. <#assign size = chrArr[1]?number/>
  812. --vcf-input-file ${data_dir}/gatk.realigned.recal.bam.snps.filtered.${chr}.vcf
  813. </#list>
  814. --vcf-output-file ${data_dir}/gatk.realigned.recal.bam.snps.filtered.merged.vcf
  815. </argument>
  816. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  817. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  818. <profile namespace="globus" key="jobtype">condor</profile>
  819. <profile namespace="globus" key="count">1</profile>
  820. <profile namespace="globus" key="maxmemory">2000</profile>
  821. </job>
  822. <!-- Job: merge indels -->
  823. <#assign parentAlgo = "GATKUnifiedGenotyperIndel"/>
  824. <#assign algo = "MergeRawIndel"/>
  825. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  826. <job id="ID150" namespace="seqware" name="runner" version="${seqware_version}">
  827. <argument>
  828. -Xmx1000M
  829. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  830. net.sourceforge.seqware.pipeline.runner.Runner
  831. --${metadata}
  832. <#list chr_sizes?split(",") as chr_size>
  833. <#assign chrArr = chr_size?split(":")/>
  834. <#assign chr = chrArr[0]/>
  835. <#assign size = chrArr[1]?number/>
  836. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  837. </#list>
  838. --metadata-processing-accession-file ${accession_dir}/${algo}_accession
  839. --metadata-output-file-prefix ${output_prefix}
  840. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  841. --module ${module}
  842. --
  843. --gcr-algorithm ${algo}
  844. --gcr-command ${perl}
  845. ${workflow_bundle_dir}/${workflow_name}/bin/sw_module_merge_GATK_VCF.pl
  846. <#list chr_sizes?split(",") as chr_size>
  847. <#assign chrArr = chr_size?split(":")/>
  848. <#assign chr = chrArr[0]/>
  849. <#assign size = chrArr[1]?number/>
  850. --vcf-input-file ${data_dir}/gatk.realigned.recal.bam.indels.raw.${chr}.vcf
  851. </#list>
  852. --vcf-output-file ${data_dir}/gatk.realigned.recal.bam.indels.raw.merged.vcf
  853. </argument>
  854. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  855. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  856. <profile namespace="globus" key="jobtype">condor</profile>
  857. <profile namespace="globus" key="count">1</profile>
  858. <profile namespace="globus" key="maxmemory">2000</profile>
  859. </job>
  860. <!-- Job: merge filtered Indel -->
  861. <#assign parentAlgo = "GATKUnifiedGenotyperIndelFilter"/>
  862. <#assign algo = "MergeFilteredIndel"/>
  863. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  864. <job id="ID160" namespace="seqware" name="runner" version="${seqware_version}">
  865. <argument>
  866. -Xmx1000M
  867. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  868. net.sourceforge.seqware.pipeline.runner.Runner
  869. --${metadata}
  870. <#list chr_sizes?split(",") as chr_size>
  871. <#assign chrArr = chr_size?split(":")/>
  872. <#assign chr = chrArr[0]/>
  873. <#assign size = chrArr[1]?number/>
  874. --metadata-parent-accession-file ${accession_dir}/${parentAlgo}_${chr}_accession
  875. </#list>
  876. --metadata-processing-accession-file ${accession_dir}/${algo}_accession
  877. --metadata-output-file-prefix ${output_prefix}
  878. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  879. --module ${module}
  880. --
  881. --gcr-algorithm ${algo}
  882. --gcr-command ${perl}
  883. ${workflow_bundle_dir}/${workflow_name}/bin/sw_module_merge_GATK_VCF.pl
  884. <#list chr_sizes?split(",") as chr_size>
  885. <#assign chrArr = chr_size?split(":")/>
  886. <#assign chr = chrArr[0]/>
  887. <#assign size = chrArr[1]?number/>
  888. --vcf-input-file ${data_dir}/gatk.realigned.recal.bam.indels.filtered.${chr}.vcf
  889. </#list>
  890. --vcf-output-file ${data_dir}/gatk.realigned.recal.bam.indels.filtered.merged.vcf
  891. </argument>
  892. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  893. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  894. <profile namespace="globus" key="jobtype">condor</profile>
  895. <profile namespace="globus" key="count">1</profile>
  896. <profile namespace="globus" key="maxmemory">2000</profile>
  897. </job>
  898. <!-- TODO: need to add an annotation step that works off of the filtered SNV VCF file and the filtered Indel VCF file as inputs that does a filter to annotate these files where indels and snvs overlap -->
  899. <!-- Job: merge raw variants ID150 130-->
  900. <#assign algo = "MergeRawVariants"/>
  901. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  902. <job id="ID170" namespace="seqware" name="runner" version="${seqware_version}">
  903. <argument>
  904. -Xmx1000M
  905. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/seqware-pipeline-${seqware_version}.jar
  906. net.sourceforge.seqware.pipeline.runner.Runner
  907. --${metadata}
  908. --metadata-parent-accession-file ${accession_dir}/MergeRawIndel_accession
  909. --metadata-parent-accession-file ${accession_dir}/MergeRawSNV_accession
  910. --metadata-processing-accession-file ${accession_dir}/${algo}_accession
  911. --metadata-output-file-prefix ${output_prefix}
  912. --metadata-workflow-run-ancestor-accession ${workflow_run_accession}
  913. --module ${module}
  914. --
  915. --gcr-algorithm ${algo}
  916. --gcr-output-file ${algo}:text/vcf-4:${output_dir}/seqware-${seqware_version}_GATKREcalibrationAndVariantCalling-${workflow_version}/${random}/${identifier}.gatk.realigned.recal.bam.variants.raw.merged.vcf
  917. --gcr-command ${perl}
  918. ${workflow_bundle_dir}/${workflow_name}/bin/sw_module_merge_GATK_VCF.pl
  919. --vcf-input-file ${data_dir}/gatk.realigned.recal.bam.snps.raw.merged.vcf
  920. --vcf-input-file ${data_dir}/gatk.realigned.recal.bam.indels.raw.merged.vcf
  921. --vcf-output-file ${output_dir}/seqware-${seqware_version}_GATKREcalibrationAndVariantCalling-${workflow_version}/${random}/${identifier}.gatk.realigned.recal.bam.variants.raw.merged.vcf
  922. </argument>
  923. <!-- See http://www.globus.org/api/c-globus-4.0.3/globus_gram_job_manager/html/globus_job_manager_rsl.html -->
  924. <!-- See http://pegasus.isi.edu/wms/docs/3.0/advanced_concepts_profiles.php#id2738647 -->
  925. <profile namespace="globus" key="jobtype">condor</profile>
  926. <profile namespace="globus" key="count">1</profile>
  927. <profile namespace="globus" key="maxmemory">2000</profile>
  928. <@requires_dir "${output_dir}/seqware-${seqware_version}_GATKREcalibrationAndVariantCalling-${workflow_version}/${random}"/>
  929. </job>
  930. <!-- Job: merge filtered variants 140 160-->
  931. <#assign algo = "MergeFilteredVariants"/>
  932. <#assign module = "net.sourceforge.seqware.pipeline.modules.GenericCommandRunner"/>
  933. <job id="ID180" namespace="seqware" name="runner" version="${seqware_version}">
  934. <argument>
  935. -Xmx1000M
  936. -classpath ${workflow_bundle_dir}/${workflow_name}/classes:${workflow_bundle_dir}/${workflow_name}/lib/

Large files files are truncated, but you can click here to view the full file