PageRenderTime 25ms CodeModel.GetById 17ms app.highlight 3ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/indels/indel_sam2interval.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 139 lines | 133 code | 6 blank | 0 comment | 0 complexity | 724de95e9dfbf156255c14e272e66eac MD5 | raw file
  1<tool id="indel_sam2interval" name="Extract indels" version="1.0.0">
  2  <description>from SAM</description>
  3  <command interpreter="python">
  4    indel_sam2interval.py
  5      --input=$input1
  6      --include_base=$include_base
  7      --collapse=$collapse
  8      --int_out=$output1
  9      #if $ins_out.include_ins_out == "true"
 10        --bed_ins_out=$output2
 11      #else
 12        --bed_ins_out="None"
 13      #end if
 14      #if $del_out.include_del_out == "true"
 15        --bed_del_out=$output3
 16      #else
 17        --bed_del_out="None"
 18      #end if
 19  </command>
 20  <inputs>
 21    <param format="sam" name="input1" type="data" label="Select dataset to convert" />
 22    <param name="include_base" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include the relevant base(s) for each insertion (and a dash (-) for deletions)" />
 23    <param name="collapse" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Collapse repeated locations onto single line with counts" />
 24    <conditional name="ins_out">
 25      <param name="include_ins_out" type="select" label="Include insertions output bed file?">
 26        <option value="true">Yes</option>
 27        <option value="false">No</option>
 28      </param>
 29      <when value="true" />
 30      <when value="false" />
 31    </conditional>
 32    <conditional name="del_out">
 33      <param name="include_del_out" type="select" label="Include deletions output bed file?">
 34        <option value="true">Yes</option>
 35        <option value="false">No</option>
 36      </param>
 37      <when value="true" />
 38      <when value="false" />
 39    </conditional>
 40  </inputs>
 41  <outputs>
 42    <data format="interval" name="output1" />
 43    <data format="bed" name="output2">
 44      <filter>ins_out[ "include_ins_out" ] == "true"</filter>
 45    </data>
 46    <data format="bed" name="output3">
 47      <filter>del_out[ "include_del_out" ] == "true"</filter>
 48    </data>
 49  </outputs>
 50  <tests>
 51    <test>
 52      <param name="input1" value="indel_sam2interval_in1.sam" ftype="sam"/>
 53      <param name="include_base" value="true"/>
 54      <param name="collapse" value="true"/>
 55      <param name="include_ins_out" value="true" />
 56      <param name="include_del_out" value="true" />
 57      <output name="output1" file="indel_sam2interval_out1.interval" ftype="interval"/>
 58      <output name="output2" file="indel_sam2interval_out2.bed" ftype="bed"/>
 59      <output name="output3" file="indel_sam2interval_out3.bed" ftype="bed"/>
 60    </test>
 61  </tests>
 62  <help>
 63
 64**What it does**
 65
 66Given a SAM file containing indels, converts these to an interval file with a column indicating whether it is an insertion or a deletion, and then also can create a BED file for each type (one for insertions, one for deletions). The interval file can be combined with other like files to create a table useful for analysis with the Indel Analysis Table tool. The BED files can be useful for visualizing the reads.
 67
 68-----
 69
 70**Example**
 71
 72Suppose you have the following mapping results::
 73
 74 r327     16   chrM   11   37      8M1D10M   *   0   0             CTTACCAGATAGTCATCA   -+&lt;2;?@BA@?-,.+4=4             XT:A:U  NM:i:1  X0:i:1  X1:i:0  XM:i:0  XO:i:1  XG:i:1  MD:Z:41^C35
 75 r457      0   chr1   14   37          14M   *   0   0                 ACCTGACAGATATC   =/DF;?@1A@?-,.                 XT:A:U  NM:i:0  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 76 r501     16   chrM    6   23      7M1I13M   *   0   0          TCTGTGCCTACCAGACATTCA   +=$2;?@BA@?-,.+4=4=4A          XT:A:U  NM:i:3  X0:i:1  X1:i:1  XM:i:2  XO:i:1  XG:i:1  MD:Z:28C36G9        XA:Z:chrM,+134263658,14M1I61M,4;
 77 r1288    16   chrM    8   37      11M1I7M   *   0   0            TCACTTACCTGTACACACA   /*F2;?@%A@?-,.+4=4=            XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:2T0T1A69
 78 r1902     0   chr1    4   37      7M2D18M   *   0   0        AGTCTCTTACCTGACGGTTATGA   &lt;2;?@BA@?-,.+4=4=4AA663        XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
 79 r2204    16   chrM    9    0          19M   *   0   0            CTGGTACCTGACAGGTATC   2;?@BA@?-,.+4=4=4AA            XT:A:R  NM:i:1  X0:i:2  X1:i:0  XM:i:1  XO:i:0  XG:i:0  MD:Z:0T75           XA:Z:chrM,-564927,76M,1;
 80 r2314    16   chrM    6   37      10M2D8M   *   0   0               TCACTCTTACGTCTGA   &lt;2;?@BA@?-,.+4=4               XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:25A5^CA45
 81 r3001     0   chrM   13   37   3M1D5M2I7M   *   0   0              TACAGTCACCCTCATCA   &lt;2;?@BA/(@?-,$&amp;                XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
 82 r3218     0   chr1   13   37       8M1D7M   *   0   0                TACAGTCACTCATCA   &lt;2;?@BA/(@?-,$&amp;                XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
 83 r4767    16   chr2    3   37      15M2I7M   *   0   0       CAGACTCTCTTACCAAAGACAGAC   &lt;2;?@BA/(@?-,.+4=4=4AA66       XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:2T1A4T65
 84 r5333     0   chrM    5   37      17M1D8M   *   0   0       GTCTCTCATACCAGACAACGGCAT   FB3$@BA/(@?-,.+4=4=4AA66       XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:45C10^C0C5C13
 85 r6690    16   chrM    7   23          20M   *   0   0           CTCTCTTACCAGACAGACAT   2;?@BA/(@?-,.+4=4=4A           XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76             XA:Z:chrM,-568532,76M,1;
 86 r7211     0   chrM    7   37          24M   *   0   0       CGACAGAGACAAAATAACATTTAA   //&lt;2;?@BA@?-,.+4=442;;6:       XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:2  XO:i:1  XG:i:1  MD:Z:73G0G0
 87 r7899    69      *    0    0            *   *   0   0       CTGCGTGTTGGTGTCTACTGGGGT   #%#'##$#$##&amp;%#%$$$%#%#'#
 88 r9192   133      *    0    0            *   *   0   0       GTGCGTCGGGGAGGGTGCTGTCGG   ######%#$%#$$###($###&amp;&amp;%
 89 r9922    16   chrM    4    0       7M3I9M   *   0   0            CCAGACATTTGAAATCAGG   F/D4=44^D++26632;;6            XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 90 r9987    16   chrM    4    0      9M1I18M   *   0   0   AGGTTCTCATTACCTGACACTCATCTTG   G/AD6"/+4=4426632;;6:&lt;2;?@BA   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 91 r10145   16   chr1   16    0       5M2D7M   *   0   0                   CACATTGTTGTA   G//+4=44=4AA                   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 92 r10324   16   chrM   15    0       6M1D5M   *   0   0                   CCGTTCTACTTG   A@??8.G//+4=                   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 93 r12331   16   chrM   17    0       4M2I6M   *   0   0                  AGTCGAATACGTG   632;;6:&lt;2;?@B                  XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 94 r12914   16   chr2   24    0       4M3I3M   *   0   0                     ACTACCCCAA   G//+4=42,.                     XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 95 r13452   16   chrM   13    0      3M1D11M   *   0   0                 TACGTCACTCATCA   IIIABCCCICCCCI                 XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 96
 97
 98The following three files will be produced (Interval, Insertions BED and Deletions BED)::
 99
100 chr1   11   13   D     -   1
101 chr1   21   22   D     -   1
102 chr1   21   23   D     -   1
103 chr2   18   19   I    AA   1
104 chr2   28   29   I   CCC   1
105 chrM   11   12   I   TTT   1
106 chrM   13   14   I     C   1
107 chrM   13   14   I     T   1
108 chrM   16   17   D     -   1
109 chrM   16   18   D     -   1
110 chrM   19   20   D     -   1
111 chrM   19   20   I     T   1
112 chrM   21   22   D     -   1
113 chrM   21   22   I    GA   1
114 chrM   22   23   D     -   1
115
116 chr2   18   19
117 chr2   28   29
118 chrM   11   12
119 chrM   13   14
120 chrM   13   14
121 chrM   19   20
122 chrM   21   22
123
124 chr1   11   13
125 chr1   21   22
126 chr1   21   23
127 chrM   16   17
128 chrM   16   18
129 chrM   19   20
130 chrM   21   22
131 chrM   22   23
132
133For more information on SAM, please consult the `SAM format description`__.
134
135.. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
136
137
138  </help>
139</tool>