/tools/samtools/sam_pileup.xml
https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 184 lines · 150 code · 23 blank · 11 comment · 0 complexity · b6f48c9b40e2f1594d0136e1bced194c MD5 · raw file
- <tool id="sam_pileup" name="Generate pileup" version="1.1.1">
- <description>from BAM dataset</description>
- <requirements>
- <requirement type="package">samtools</requirement>
- </requirements>
- <command interpreter="python">
- sam_pileup.py
- --input1=$input1
- --output=$output1
- --ref=$refOrHistory.reference
- #if $refOrHistory.reference == "history":
- --ownFile=$refOrHistory.ownFile
- #else:
- --ownFile="None"
- #end if
- --dbkey=${input1.metadata.dbkey}
- --indexDir=${GALAXY_DATA_INDEX_DIR}
- --bamIndex=${input1.metadata.bam_index}
- --lastCol=$lastCol
- --indels=$indels
- --mapCap=$mapCap
- --consensus=$c.consensus
- #if $c.consensus == "yes":
- --theta=$c.theta
- --hapNum=$c.hapNum
- --fraction=$c.fraction
- --phredProb=$c.phredProb
- #else:
- --theta="None"
- --hapNum="None"
- --fraction="None"
- --phredProb="None"
- #end if
- </command>
- <inputs>
- <conditional name="refOrHistory">
- <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?">
- <option value="indexed">Use a built-in index</option>
- <option value="history">Use one from the history</option>
- </param>
- <when value="indexed">
- <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
- <validator type="unspecified_build" />
- <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" />
- </param>
- </when>
- <when value="history">
- <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" />
- <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" />
- </when>
- </conditional>
- <param name="lastCol" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient">
- <option value="no">Do not print the mapping quality as the last column</option>
- <option value="yes">Print the mapping quality as the last column</option>
- </param>
- <param name="indels" type="select" label="Whether or not to print only output pileup lines containing indels">
- <option value="no">Print all lines</option>
- <option value="yes">Print only lines containing indels</option>
- </param>
- <param name="mapCap" type="integer" value="60" label="Where to cap mapping quality" />
- <conditional name="c">
- <param name="consensus" type="select" label="Call consensus according to MAQ model?">
- <option selected="true" value="no">No</option>
- <option value="yes">Yes</option>
- </param>
- <when value="no" />
- <when value="yes">
- <param name="theta" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
- <param name="hapNum" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
- <param name="fraction" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
- <param name="phredProb" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
- </when>
- </conditional>
- </inputs>
- <outputs>
- <data format="tabular" name="output1" label="${tool.name} on ${on_string}: converted pileup" />
- </outputs>
- <tests>
- <test>
- <!--
- Bam to pileup command:
- samtools faidx chr_m.fasta
- samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup
- chr_m.fasta is the prefix of the index
- -->
- <param name="reference" value="history" />
- <param name="input1" value="sam_pileup_in1.bam" ftype="bam" />
- <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
- <param name="lastCol" value="no" />
- <param name="indels" value="no" />
- <param name="mapCap" value="60" />
- <param name="consensus" value="no" />
- <output name="output1" file="sam_pileup_out1.pileup" />
- </test>
- <test>
- <!--
- Bam to pileup command:
- samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup
- chr_m.fasta is the prefix of the index
- -->
- <param name="reference" value="indexed" />
- <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
- <param name="lastCol" value="no" />
- <param name="indels" value="no" />
- <param name="mapCap" value="60" />
- <param name="consensus" value="yes" />
- <param name="theta" value="0.85" />
- <param name="hapNum" value="2" />
- <param name="fraction" value="0.001" />
- <param name="phredProb" value="40" />
- <output name="output1" file="sam_pileup_out2.pileup" />
- </test>
- </tests>
- <help>
- **What it does**
- Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generates two types of pileup datasets depending on the specified options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below.
- .. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
- ------
- **Types of pileup datasets**
- The description of pileup format below is largely based on information that can be found on SAMTools Pileup_ documentation page. The 6- and 10-column variants are described below.
- .. _Pileup: http://samtools.sourceforge.net/pileup.shtml
- **Six column pileup**::
- 1 2 3 4 5 6
- ---------------------------------
- chrM 412 A 2 ., II
- chrM 413 G 4 ..t, IIIH
- chrM 414 C 4 ...a III2
- chrM 415 C 4 TTTt III7
-
- where::
- Column Definition
- ------- ----------------------------
- 1 Chromosome
- 2 Position (1-based)
- 3 Reference base at that position
- 4 Coverage (# reads aligning over that position)
- 5 Bases within reads where (see Galaxy wiki for more info)
- 6 Quality values (phred33 scale, see Galaxy wiki for more)
-
- **Ten column pileup**
- The `ten-column` (consensus_) pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::
- 1 2 3 4 5 6 7 8 9 10
- ------------------------------------------------
- chrM 412 A A 75 0 25 2 ., II
- chrM 413 G G 72 0 25 4 ..t, IIIH
- chrM 414 C C 75 0 25 4 ...a III2
- chrM 415 C T 75 75 25 4 TTTt III7
- where::
- Column Definition
- ------- --------------------------------------------------------
- 1 Chromosome
- 2 Position (1-based)
- 3 Reference base at that position
- 4 Consensus bases
- 5 Consensus quality
- 6 SNP quality
- 7 Maximum mapping quality
- 8 Coverage (# reads aligning over that position)
- 9 Bases within reads where (see Galaxy wiki for more info)
- 10 Quality values (phred33 scale, see Galaxy wiki for more)
- .. _consensus: http://samtools.sourceforge.net/cns0.shtml
- </help>
- </tool>