/tools/sr_mapping/PerM.xml
XML | 368 lines | 311 code | 33 blank | 24 comment | 0 complexity | fc1ac66d5359f331235a69c8b2b79ba6 MD5 | raw file
1<tool id="PerM" name="Map with PerM" version="1.0.0"> 2 <description>for SOLiD and Illumina</description> 3 <!-- works with PerM version 0.2.6 --> 4 <command> 5PerM 6#if $s.sourceOfRef.refSource == "history": 7 $s.sourceOfRef.ref 8#else: 9 $s.sourceOfRef.index.value 10#end if 11#if $s.mate.singleOrPairs == "single": 12 $s.mate.reads 13#else: 14 -1 $s.mate.reads1 -2 $s.mate.reads2 15 -U $s.mate.upperbound 16 -L $s.mate.lowerbound 17 $s.mate.excludeAmbiguousPairs 18#end if 19#if $s.space == "color": 20 --readFormat "csfastq" 21#else: 22 --readFormat "fastq" 23#end if 24#if $int($str($valAlign)) >= 0: 25 -v $valAlign 26#end if 27#if $align.options == "full": 28 --seed $align.seed 29 -$align.alignments 30 #if $str($align.delimiter) != "None": 31 --delimiter $align.delimiter 32 #end if 33 -T $align.sTrimL 34 $align.includeReadsWN 35 $align.statsOnly 36 $align.ignoreQS 37#end if 38#if $str($bUnmappedRead) == "true" and $s.space == "color": 39 -u $unmappedReadOutCS 40#elif $str($bUnmappedRead) == "true" and $s.space == "base": 41 -u $unmappedReadOut 42#end if 43-o $output --outputFormat sam --noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/' 44 </command> 45 <inputs> 46 <conditional name="s"> 47 <param name="space" label="Is your data color space (SOLiD) or base space (Illumina)?" type="select"> 48 <option value="color">Color space</option> 49 <option value="base">Base space</option> 50 </param> 51 <when value="color"> 52 <conditional name="sourceOfRef"> 53 <param name="refSource" label="Will you provide your own reference file from the history or use a built-in index?" type="select"> 54 <option value="indexed">Built-in index</option> 55 <option value="history">Fasta file from history</option> 56 </param> 57 <when value="indexed"> 58 <param name="index" type="select" label="Select a reference genome (with seed and read length)" help="if your genome of interest is not listed - contact Galaxy team"> 59 <options from_file="perm_color_index.loc"> 60 <column name="value" index="1" /> 61 <column name="name" index="0" /> 62 </options> 63 </param> 64 </when> 65 <when value="history"> 66 <param name="ref" format="fasta" type="data" label="Reference" /> 67 </when> 68 </conditional> 69 <conditional name="mate"> 70 <param name="singleOrPairs" label="Mate-paired?" type="select"> 71 <option value="single">Single-end</option> 72 <option value="paired">Mate pairs</option> 73 </param> 74 <when value="single"> 75 <param format="fastqcssanger" name="reads" type="data" label="Reads" /> 76 </when> 77 <when value="paired"> 78 <param name="reads1" format="fastqcssanger" label="Forward FASTQ file" type="data" /> 79 <param name="reads2" format="fastqcssanger" label="Reverse FASTQ file" type="data" /> 80 <param label="Upperbound of pairs separation (-U)" name="upperbound" type="integer" size="8" value="100000" /> 81 <param label="Lowerbound of pairs separation (-L)" name="lowerbound" type="integer" size="8" value="0" /> 82 <param label="Exclude ambiguous pairs (-e)" name="excludeAmbiguousPairs" type="boolean" checked="false" truevalue="-e" falsevalue="" /> 83 </when> 84 </conditional> 85 </when> 86 <when value="base"> 87 <conditional name="sourceOfRef"> 88 <param name="refSource" label="Will you provide your own reference file from the history or use a built-in index?" type="select"> 89 <option value="indexed">Built-in index</option> 90 <option value="history">Fasta file from history</option> 91 </param> 92 <when value="indexed"> 93 <param name="index" type="select" label="Select a reference genome with seed and read length" help="if your genome of interest is not listed - contact Galaxy team"> 94 <options from_file="perm_base_index.loc"> 95 <column name="value" index="1" /> 96 <column name="name" index="0" /> 97 </options> 98 </param> 99 </when> 100 <when value="history"> 101 <param name="ref" format="fasta" type="data" label="Reference" /> 102 </when> 103 </conditional> 104 <conditional name="mate"> 105 <param name="singleOrPairs" label="Mate-paired?" type="select"> 106 <option value="single">Single-end</option> 107 <option value="paired">Mate pairs</option> 108 </param> 109 <when value="single"> 110 <param format="fastqsanger" name="reads" type="data" label="Reads" /> 111 </when> 112 <when value="paired"> 113 <param name="reads1" format="fastqsanger" label="Forward FASTQ file" type="data" /> 114 <param name="reads2" format="fastqsanger" label="Reverse FASTQ file" type="data" /> 115 <param label="Upperbound of pairs separation (-U)" name="upperbound" type="integer" size="8" value="100000" /> 116 <param label="Lowerbound of pairs separation (-L)" name="lowerbound" type="integer" size="8" value="0" /> 117 <param label="Exclude ambiguous pairs (-e)" name="excludeAmbiguousPairs" type="boolean" checked="false" truevalue="-e" falsevalue="" /> 118 </when> 119 </conditional> 120 </when> 121 </conditional> 122 <param label="Maximum number of mismatches permitted in one end of full read (-v)" name="valAlign" type="integer" size="5" value="2" /> 123 <conditional name="align"> 124 <param help="Use default setting or specify full parameters list" label="PerM settings to use" name="options" type="select"> 125 <option value="preSet">Commonly used</option> 126 <option value="full">Full parameter list</option> 127 </param> 128 <when value="preSet"/> 129 <when value="full"> 130 <param label="Whether or not to report all valid alignments per read (-A/-B/-E)" name="alignments" type="select"> 131 <option value="A">Report all valid alignments</option> 132 <option value="B">Report the best alignments in terms of number of mismatches</option> 133 <option value="E">Report only uniquely mapped reads</option> 134 </param> 135 <param label="Choose the seed full sensitive to different number of mismatches (--seed)" name="seed" type="select" > 136 <option value="F2">2 mismatches</option> 137 <option value="S11">1 SNP + 1 color error</option> 138 <option value="F3">3 mismatches</option> 139 <option value="F4">4 mismatches</option> 140 </param> 141 <param label="Choose the delimiter to identify read name (--delimiter)" name="delimiter" type="select"> 142 <option value="None">Tab/Space/Comma</option> 143 <option value=":">Colon</option> 144 <option value="_">Underscore</option> 145 </param> 146 <param label="Use the first n bases of each read for alignment (-T)" name="sTrimL" type="integer" size="5" value="50" /> 147 <param name="includeReadsWN" type="boolean" checked="true" truevalue="--includeReadsWN" falsevalue="" label="Include reads with 'N' or '.' by encoding '.' as 3, 'N' as 'A' (--includeReadsWN)" /> 148 <param name="statsOnly" type="boolean" checked="false" truevalue="--statsOnly" falsevalue="" label="output mapping stats only. Don't output alignments (--statsOnly)" /> 149 <param name="ignoreQS" type="boolean" checked="false" truevalue="--ignoreQS" falsevalue="" label="Ignore quality scores (--ignoreQS)" /> 150 </when> 151 </conditional> <!-- options --> 152 <param name="bUnmappedRead" type="select" label="Output the unmapped reads (-u)"> 153 <option value="true">Yes</option> 154 <option value="false">No</option> 155 </param> 156 </inputs> 157 <outputs> 158 <data name="output" format="sam"/> 159 <data name="unmappedReadOut" format="fastqsanger"> 160 <filter>bUnmappedRead == "true" and s["space"] == "base"</filter> 161 </data> 162 <data name="unmappedReadOutCS" format="fastqcssanger"> 163 <filter>bUnmappedRead == "true" and s["space"] == "color"</filter> 164 </data> 165 </outputs> 166 <tests> 167 <test> 168 <!-- 169 PerM command: 170 PerM test-data/phiX.fasta 50 +seed F3 -m -s phiX_F3_50.index +readFormat .fastq 171 PerM phiX_F3_50.index -1 test-data/perm_in1.fastqsanger -2 test-data/perm_in2.fastqsanger -U 100000 -L 0 -e +readFormat .fastq -v 0 +seed F2 -A -T 50 +includeReadsWN -o perm_out1.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/' 172 You need to replace the + with 2 dashes. 173 --> 174 <param name="space" value="base" /> 175 <param name="refSource" value="indexed" /> 176 <param name="index" value="phiX_F3_50" /> 177 <param name="singleOrPairs" value="paired" /> 178 <param name="reads1" value="perm_in1.fastqsanger" ftype="fastqsanger" /> 179 <param name="reads2" value="perm_in2.fastqsanger" ftype="fastqsanger" /> 180 <param name="upperbound" value="100000" /> 181 <param name="lowerbound" value="0" /> 182 <param name="excludeAmbiguousPairs" value="true" /> 183 <param name="valAlign" value="0" /> 184 <param name="options" value="full" /> 185 <param name="alignments" value="A" /> 186 <param name="seed" value="F2" /> 187 <param name="delimiter" value="None" /> 188 <param name="sTrimL" value="50" /> 189 <param name="includeReadsWN" value="true" /> 190 <param name="statsOnly" value="false" /> 191 <param name="ignoreQS" value="false" /> 192 <param name="bUnmappedRead" value="false" /> 193 <output name="output" file="perm_out1.sam" ftype="sam" /> 194 </test> 195 <test> 196 <!-- 197 PerM command: 198 PerM test-data/chr_m.fasta test-data/perm_in3.fastqsanger +readFormat .fastq -v 2 -u perm_out3.fastqsanger -o perm_out2.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/' 199 You need to replace the + with 2 dashes. 200 --> 201 <param name="space" value="base" /> 202 <param name="refSource" value="history" /> 203 <param name="ref" value="chr_m.fasta" ftype="fasta" /> 204 <param name="singleOrPairs" value="single" /> 205 <param name="reads" value="perm_in3.fastqsanger" ftype="fastqsanger" /> 206 <param name="valAlign" value="2" /> 207 <param name="options" value="preSet" /> 208 <param name="bUnmappedRead" value="true" /> 209 <output name="output" file="perm_out2.sam" ftype="sam" /> 210 <output name="unmappedReadOut" file="perm_out3.fastqsanger" ftype="fastqsanger" /> 211 </test> 212 <test> 213 <!-- 214 PerM command: 215 PerM test-data/phiX.fasta test-data/perm_in4.fastqcssanger +readFormat .csfastq -v 1 -o perm_out4.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/' 216 You need to replace the + with 2 dashes. 217 --> 218 <param name="space" value="color" /> 219 <param name="refSource" value="history" /> 220 <param name="ref" value="phiX.fasta" ftype="fasta" /> 221 <param name="singleOrPairs" value="single" /> 222 <param name="reads" value="perm_in4.fastqcssanger" ftype="fastqcssanger" /> 223 <param name="valAlign" value="1" /> 224 <param name="options" value="preSet" /> 225 <param name="bUnmappedRead" value="false" /> 226 <output name="output" file="perm_out4.sam" ftype="sam" /> 227 </test> 228 <test> 229 <!-- 230 PerM command: 231 PerM equCab2.fasta 50 +seed F4 -m -s equCab2_F3_50.index +readFormat .csfastq 232 PerM equCab2_F3_50.index -1 test-data/perm_in5.fastqcssanger -2 test-data/perm_in6.fastqcssanger -U 90000 -L 10000 +readFormat .csfastq -v 3 -o perm_out5.sam +outputFormat sam +noSamHeader | tr '\r' '\n' | tr -cd "[:print:]\t\n " | grep "Reads\|Sub0\|Pairs\|single" | sed 's/.*Reads:,//' | sed 's/\/.*dat,_ Sub0/Sub0/' 233 You need to replace the + with 2 dashes. 234 hg19.fasta needs to be supplied. 235 --> 236 <param name="space" value="color" /> 237 <param name="refSource" value="indexed" /> 238 <param name="index" value="equCab2_chrM_F3_50" /> 239 <param name="singleOrPairs" value="paired" /> 240 <param name="reads1" value="perm_in5.fastqcssanger" ftype="fastqcssanger" /> 241 <param name="reads2" value="perm_in6.fastqcssanger" ftype="fastqcssanger" /> 242 <param name="upperbound" value="90000" /> 243 <param name="lowerbound" value="10000" /> 244 <param name="excludeAmbiguousPairs" value="false" /> 245 <param name="valAlign" value="3" /> 246 <param name="options" value="preSet" /> 247 <param name="bUnmappedRead" value="false" /> 248 <output name="output" file="perm_out5.sam" ftype="sam" /> 249 </test> 250 </tests> 251 <help> 252**What it does** 253 254PerM is a short read aligner designed to be ultrafast with long SOLiD reads to the whole genome or transcriptions. PerM can be fully sensitive to alignments with up to four mismatches and highly sensitive to a higher number of mismatches. 255 256**Development team** 257 258PerM is developed by Ting Chen's group, Center of Excellence in Genomic Sciences at the University of Southern California. If you have any questions, please email yanghoch at usc.edu or check the `project page`__. 259 260 .. __: http://code.google.com/p/perm/ 261 262**Citation** 263 264PerM: Efficient mapping of short sequencing reads with periodic full sensitive spaced seeds. Bioinformatics, 2009, 25 (19): 2514-2521. 265 266**Input** 267 268The input files are read files and a reference. Users can use the pre-indexed reference in Galaxy or upload their own reference. 269 270The uploaded reference file should be in the fasta format. Multiple sequences like transcriptions should be concatenated together separated by a header line that starts with the ">" character. 271 272Reads files must be in either fastqsanger or fastqcssanger format to use in PerM. However, there are several possible starting formats that can be converted to one of those two: fastq (any type), color-space fastq, fasta, csfasta, or csfasta+qualsolid. 273 274An uploaded base-space fastq file MUST be checked/transformed with FASTQGroomer tools in Galaxy to be converted to the fastqsanger format (this is true even if the original file is in Sanger format). 275 276Uploaded fasta and csfasta without quality score files can be transformed to fastqsanger by the FASTQGroomer, with pseudo quality scores added. 277 278An uploaded csfasta + qual pair can also be transformed into fastqcssanger by solid2fastq. 279 280**Outputs** 281 282The output mapping result is in SAM format, and has the following columns:: 283 284 Column Description 285 -------- -------------------------------------------------------- 286 1 QNAME Query (pair) NAME 287 2 FLAG bitwise FLAG 288 3 RNAME Reference sequence NAME 289 4 POS 1-based leftmost POSition/coordinate of clipped sequence 290 5 MAPQ MAPping Quality (Phred-scaled) 291 6 CIGAR extended CIGAR string 292 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) 293 8 MPOS 1-based Mate POSition 294 9 ISIZE Inferred insert SIZE 295 10 SEQ query SEQuence on the same strand as the reference 296 11 QUAL query QUALity (ASCII-33 gives the Phred base quality) 297 12 OPT variable OPTional fields in the format TAG:VTYPE:VALUE 298 12.1 NM Number of mismatches (SOLiD-specific) 299 12.2 CS Reads in color space (SOLiD-specific) 300 12.3 CQ Bases quality in color spacehidden="true" (SOLiD-specific) 301 302The flags are as follows:: 303 304 Flag Description 305 ------ ------------------------------------- 306 0x0001 the read is paired in sequencing 307 0x0002 the read is mapped in a proper pair 308 0x0004 the query sequence itself is unmapped 309 0x0008 the mate is unmapped 310 0x0010 strand of the query (1 for reverse) 311 0x0020 strand of the mate 312 0x0040 the read is the first read in a pair 313 0x0080 the read is the second read in a pair 314 0x0100 the alignment is not primary 315 316Here is some sample output:: 317 318 Qname FLAG Rname POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL NM CS CQ 319 491_28_332_F3 16 ref-1 282734 255 35M * 0 0 AGTCAAACTCCGAATGCCAATGACTTATCCTTAGG #%%%%%%%!!%%%!!%%%%%%%%!!%%%%%%%%%% NM:i:3 CS:Z:C0230202330012130103100230121001212 CQ:Z:################################### 320 491_28_332_F3 16 ref-1 269436 255 35M * 0 0 AGTCAAACTCCGAATGCCAATGACTTATCCTTAGG #%%%%%%%!!%%%!!%%%%%%%%!!%%%%%%%%%% NM:i:3 CS:Z:C0230202330012130103100230121001212 CQ:Z:################################### 321 322The user can check a checkbox for optional output containing the unmmaped reads in fastqsanger or fastqcssanger. The default is to produce it. 323 324**PerM parameter list** 325 326Below is a list of PerM command line options for PerM. Not all of these are relevant to Galaxy's implementation, but are included for completeness. 327 328The command for single-end:: 329 330 PerM [ref_or_index] [read] [options] 331 332The command for paired-end:: 333 334 PerM [ref_or_index] -1 [read1] -2 [read1] [options] 335 336The command-line options:: 337 338 -A Output all alignments within the given mismatch threshold, end-to-end. 339 -B Output best alignments in terms of mismatches in the given mismatch threshold. [Default] 340 -E Output only the uniquely mapped reads in the given mismatch threshold. 341 -m Create the reference index, without reusing the saved index. 342 -s PATH Save the reference index to accelerate the mapping in the future. If PATH is not specified, the default path will be used. 343 -v INT Where INT is the number of mismatches allowed in one end. [Default=2] 344 -T INT Where INT is the length to truncate read length to, so 30 means use only first 30 bases (signals). Leave blank if the full read is meant to be used. 345 -o PATH Where PATH is for output the mapping of one read set. PerM's output are in .mapping or .sam format, determined by the ext name of PATH. Ex: -o out.sam will output in SAM format; -o out.mapping will output in .mapping format. 346 -d PATH Where PATH is the directory for multiple read sets. 347 -u PATH Print the fastq file of those unmapped reads to the file in PATH. 348 --noSamHeader Print no SAM header so it is convenient to concatenate multiple SAM output files. 349 --includeReadsWN Encodes N or "." with A or 3, respectively. 350 --statsOnly Output the mapping statistics in stdout only, without saving alignments to files. 351 --ignoreQS Ignore the quality scores in fastq or QUAL files. 352 --seed {F2 | S11 | F3 | F4} Specify the seed pattern, which has a specific full sensitivity. Check the algorithm page (link below) for seed patterns to balance the sensitivity and running time. 353 --readFormat {fasta | fastq | csfasta | csfastq} Read in reads in the specified format, instead of guessing according to the extension name. 354 --delimiter CHAR Which is a character used as the delimiter to separate the the read id, and the additional info in the line with ">" in fasta or csfasta. 355 356Paired reads options:: 357 358 -e Exclude ambiguous paired. 359 -L INT Mate-paired separate lower bound. 360 -U INT Mate-paired separate upper bound. 361 -1 PATH The forward reads file path. 362 -2 PATH The reversed reads file path. 363 364See the PerM `algorithm page`__ for information on algorithms and seeds. 365 366 .. __: http://code.google.com/p/perm/wiki/Algorithms 367 </help> 368</tool>