/tools/indels/indel_sam2interval.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 139 lines · 133 code · 6 blank · 0 comment · 0 complexity · 724de95e9dfbf156255c14e272e66eac MD5 · raw file

  1. <tool id="indel_sam2interval" name="Extract indels" version="1.0.0">
  2. <description>from SAM</description>
  3. <command interpreter="python">
  4. indel_sam2interval.py
  5. --input=$input1
  6. --include_base=$include_base
  7. --collapse=$collapse
  8. --int_out=$output1
  9. #if $ins_out.include_ins_out == "true"
  10. --bed_ins_out=$output2
  11. #else
  12. --bed_ins_out="None"
  13. #end if
  14. #if $del_out.include_del_out == "true"
  15. --bed_del_out=$output3
  16. #else
  17. --bed_del_out="None"
  18. #end if
  19. </command>
  20. <inputs>
  21. <param format="sam" name="input1" type="data" label="Select dataset to convert" />
  22. <param name="include_base" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include the relevant base(s) for each insertion (and a dash (-) for deletions)" />
  23. <param name="collapse" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Collapse repeated locations onto single line with counts" />
  24. <conditional name="ins_out">
  25. <param name="include_ins_out" type="select" label="Include insertions output bed file?">
  26. <option value="true">Yes</option>
  27. <option value="false">No</option>
  28. </param>
  29. <when value="true" />
  30. <when value="false" />
  31. </conditional>
  32. <conditional name="del_out">
  33. <param name="include_del_out" type="select" label="Include deletions output bed file?">
  34. <option value="true">Yes</option>
  35. <option value="false">No</option>
  36. </param>
  37. <when value="true" />
  38. <when value="false" />
  39. </conditional>
  40. </inputs>
  41. <outputs>
  42. <data format="interval" name="output1" />
  43. <data format="bed" name="output2">
  44. <filter>ins_out[ "include_ins_out" ] == "true"</filter>
  45. </data>
  46. <data format="bed" name="output3">
  47. <filter>del_out[ "include_del_out" ] == "true"</filter>
  48. </data>
  49. </outputs>
  50. <tests>
  51. <test>
  52. <param name="input1" value="indel_sam2interval_in1.sam" ftype="sam"/>
  53. <param name="include_base" value="true"/>
  54. <param name="collapse" value="true"/>
  55. <param name="include_ins_out" value="true" />
  56. <param name="include_del_out" value="true" />
  57. <output name="output1" file="indel_sam2interval_out1.interval" ftype="interval"/>
  58. <output name="output2" file="indel_sam2interval_out2.bed" ftype="bed"/>
  59. <output name="output3" file="indel_sam2interval_out3.bed" ftype="bed"/>
  60. </test>
  61. </tests>
  62. <help>
  63. **What it does**
  64. Given a SAM file containing indels, converts these to an interval file with a column indicating whether it is an insertion or a deletion, and then also can create a BED file for each type (one for insertions, one for deletions). The interval file can be combined with other like files to create a table useful for analysis with the Indel Analysis Table tool. The BED files can be useful for visualizing the reads.
  65. -----
  66. **Example**
  67. Suppose you have the following mapping results::
  68. r327 16 chrM 11 37 8M1D10M * 0 0 CTTACCAGATAGTCATCA -+&lt;2;?@BA@?-,.+4=4 XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:41^C35
  69. r457 0 chr1 14 37 14M * 0 0 ACCTGACAGATATC =/DF;?@1A@?-,. XT:A:U NM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
  70. r501 16 chrM 6 23 7M1I13M * 0 0 TCTGTGCCTACCAGACATTCA +=$2;?@BA@?-,.+4=4=4A XT:A:U NM:i:3 X0:i:1 X1:i:1 XM:i:2 XO:i:1 XG:i:1 MD:Z:28C36G9 XA:Z:chrM,+134263658,14M1I61M,4;
  71. r1288 16 chrM 8 37 11M1I7M * 0 0 TCACTTACCTGTACACACA /*F2;?@%A@?-,.+4=4= XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:2T0T1A69
  72. r1902 0 chr1 4 37 7M2D18M * 0 0 AGTCTCTTACCTGACGGTTATGA &lt;2;?@BA@?-,.+4=4=4AA663 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
  73. r2204 16 chrM 9 0 19M * 0 0 CTGGTACCTGACAGGTATC 2;?@BA@?-,.+4=4=4AA XT:A:R NM:i:1 X0:i:2 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:0T75 XA:Z:chrM,-564927,76M,1;
  74. r2314 16 chrM 6 37 10M2D8M * 0 0 TCACTCTTACGTCTGA &lt;2;?@BA@?-,.+4=4 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:25A5^CA45
  75. r3001 0 chrM 13 37 3M1D5M2I7M * 0 0 TACAGTCACCCTCATCA &lt;2;?@BA/(@?-,$&amp; XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
  76. r3218 0 chr1 13 37 8M1D7M * 0 0 TACAGTCACTCATCA &lt;2;?@BA/(@?-,$&amp; XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
  77. r4767 16 chr2 3 37 15M2I7M * 0 0 CAGACTCTCTTACCAAAGACAGAC &lt;2;?@BA/(@?-,.+4=4=4AA66 XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:2T1A4T65
  78. r5333 0 chrM 5 37 17M1D8M * 0 0 GTCTCTCATACCAGACAACGGCAT FB3$@BA/(@?-,.+4=4=4AA66 XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:45C10^C0C5C13
  79. r6690 16 chrM 7 23 20M * 0 0 CTCTCTTACCAGACAGACAT 2;?@BA/(@?-,.+4=4=4A XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76 XA:Z:chrM,-568532,76M,1;
  80. r7211 0 chrM 7 37 24M * 0 0 CGACAGAGACAAAATAACATTTAA //&lt;2;?@BA@?-,.+4=442;;6: XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:2 XO:i:1 XG:i:1 MD:Z:73G0G0
  81. r7899 69 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&amp;%#%$$$%#%#'#
  82. r9192 133 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&amp;&amp;%
  83. r9922 16 chrM 4 0 7M3I9M * 0 0 CCAGACATTTGAAATCAGG F/D4=44^D++26632;;6 XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
  84. r9987 16 chrM 4 0 9M1I18M * 0 0 AGGTTCTCATTACCTGACACTCATCTTG G/AD6"/+4=4426632;;6:&lt;2;?@BA XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
  85. r10145 16 chr1 16 0 5M2D7M * 0 0 CACATTGTTGTA G//+4=44=4AA XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
  86. r10324 16 chrM 15 0 6M1D5M * 0 0 CCGTTCTACTTG A@??8.G//+4= XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
  87. r12331 16 chrM 17 0 4M2I6M * 0 0 AGTCGAATACGTG 632;;6:&lt;2;?@B XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
  88. r12914 16 chr2 24 0 4M3I3M * 0 0 ACTACCCCAA G//+4=42,. XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
  89. r13452 16 chrM 13 0 3M1D11M * 0 0 TACGTCACTCATCA IIIABCCCICCCCI XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
  90. The following three files will be produced (Interval, Insertions BED and Deletions BED)::
  91. chr1 11 13 D - 1
  92. chr1 21 22 D - 1
  93. chr1 21 23 D - 1
  94. chr2 18 19 I AA 1
  95. chr2 28 29 I CCC 1
  96. chrM 11 12 I TTT 1
  97. chrM 13 14 I C 1
  98. chrM 13 14 I T 1
  99. chrM 16 17 D - 1
  100. chrM 16 18 D - 1
  101. chrM 19 20 D - 1
  102. chrM 19 20 I T 1
  103. chrM 21 22 D - 1
  104. chrM 21 22 I GA 1
  105. chrM 22 23 D - 1
  106. chr2 18 19
  107. chr2 28 29
  108. chrM 11 12
  109. chrM 13 14
  110. chrM 13 14
  111. chrM 19 20
  112. chrM 21 22
  113. chr1 11 13
  114. chr1 21 22
  115. chr1 21 23
  116. chrM 16 17
  117. chrM 16 18
  118. chrM 19 20
  119. chrM 21 22
  120. chrM 22 23
  121. For more information on SAM, please consult the `SAM format description`__.
  122. .. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
  123. </help>
  124. </tool>