/tools/peakcalling/ma2c.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 381 lines · 354 code · 26 blank · 1 comment · 0 complexity · 77ed750e8f11286909c3735dc63b3067 MD5 · raw file

  1. <tool name="MA2C" id="peakcalling_ma2c">
  2. <description>Peak Calling for ChIP-chip (Nimblegen)</description>
  3. <command interpreter="command">/bin/bash $shscript</command>
  4. <inputs>
  5. <repeat name="replicates" title="Replicate">
  6. <param name="tfile" type="data" label="Treatment file"/>
  7. <param name="cfile" type="data" label="Control file"/>
  8. </repeat>
  9. <param name="ndf" type="data" label="NDF file"/>
  10. <param name="pos" type="data" label="POS file" optional="true"/>
  11. <param name="bandwidth" type="text" label="Bandwidth" value="300">
  12. <validator type="in_range" max="1000" min="100" message="Bandwidth is out of range, Bandwidth has to be between 100 to 1000" />
  13. </param>
  14. <param name="max_gap" type="text" label="Max Gap" value="250">
  15. <validator type="in_range" max="1000" min="100" message="Max_Gap is out of range, Max_Gap has to be between 100 to 1000" />
  16. </param>
  17. <param name="min_probes" type="text" label="Min Probes" value="5">
  18. <validator type="in_range" max="10" min="2" message="Min_Probes is out of range, Min_Probes has to be between 2 to 10" />
  19. </param>
  20. <conditional name="threshold">
  21. <param name="method1" type="select" label="Threshold method">
  22. <option value="Pvalue">Pvalue</option>
  23. <option value="FDR">FDR</option>
  24. </param>
  25. <when value="Pvalue">
  26. <param name="value" type="text" label="Value (e.g. P=0.00001)" value="0.00001">
  27. <validator type="in_range" max="1" min="0" message="Pvalue is out of range, Pvalue has to be between 0 to 1" />
  28. </param>
  29. </when>
  30. <when value="FDR">
  31. <param name="value" type="text" label="Value (e.g. FDR=5 for 5%)" value="5">
  32. <validator type="in_range" max="100" min="0" message="FDR is out of range, FDR has to be between 0 to 100" />
  33. </param>
  34. </when>
  35. </conditional>
  36. <conditional name="normalization">
  37. <param name="method2" type="select" label="Normalization Method">
  38. <option value="Robust">Robust</option>
  39. <option value="Simple">Simple</option>
  40. </param>
  41. <when value="Robust">
  42. <!--NOTE: C value only when Robust-->
  43. <param name="cvalue" type="text" label="C value" value="2">
  44. <validator type="in_range" max="5" min="2" message="The C value for the robust normalization is out of range, the parameter has to be between 2 to 5" />
  45. </param>
  46. </when>
  47. <when value="Simple"></when>
  48. </conditional>
  49. </inputs>
  50. <configfiles>
  51. <configfile name="tag_file">
  52. [sample]
  53. #set $tmp = ""
  54. #set $tmp2 = ""
  55. #set $tmp3 = ""
  56. #for $i, $rep in enumerate($replicates)
  57. #set $tmp = $tmp + str($rep.tfile) + " "
  58. #set $tmp2 = $tmp2 + str($rep.cfile) + " "
  59. #set $tmp3 = $tmp3 + "chip_id_foo "
  60. #end for
  61. IP_FILE = $tmp
  62. INPUT_FILE = $tmp2
  63. NDF_FILE = $ndf
  64. ##POS files are optional
  65. #if $pos.name != "None":
  66. POS_FILE = $pos
  67. #end if
  68. DESIGN_ID = design_id_foo
  69. CHIP_ID = $tmp3
  70. [peak detection]
  71. METHOD = $threshold.method1
  72. BANDWIDTH = $bandwidth
  73. MAX_GAP = $max_gap
  74. MIN_PROBES = $min_probes
  75. THRESHOLD = $threshold.value
  76. [normalization]
  77. METHOD = $normalization.method2
  78. ##C values are only relevant w/ Robust
  79. #if $normalization.method2 == "Robust":
  80. C = $normalization.cvalue
  81. #end if
  82. </configfile>
  83. <configfile name="shscript">
  84. #!/bin/bash
  85. #set $gt = chr(62)
  86. #set $ad = chr(38)
  87. #set $dollar = chr (36)
  88. #for $i, $rep in enumerate($replicates)
  89. #set $tmp1 = str($rep.tfile)
  90. #set $tmp2 = str($rep.cfile)
  91. tfilesize=`du -b $tmp1 | awk '{print ${dollar}1}'`
  92. cfilesize=`du -b $tmp2 | awk '{print ${dollar}1}'`
  93. if [[ ${dollar}tfilesize -gt 1048576000 ]];then
  94. echo "treatment file is too big! 1G is the maximum!" ${gt}${ad}2
  95. exit;
  96. fi
  97. if [[ ${dollar}cfilesize -gt 1048576000 ]];then
  98. echo "control file is too big! 1G is the maximum!" ${gt}${ad}2
  99. exit;
  100. fi
  101. #end for
  102. ndfsize=`du -b $ndf | awk '{print ${dollar}1}'`
  103. if [[ ${dollar}ndfsize -gt 1048576000 ]];then
  104. echo "ndf file is too big! 1G is the maximum!" ${gt}${ad}2
  105. exit;
  106. fi
  107. if [ $pos != "None" ];then
  108. possize=`du -b $pos | awk '{print ${dollar}1}'`
  109. if [[ ${dollar}possize -gt 1048576000 ]];then
  110. echo "pos file is too big! 1G is the maximum!" ${gt}${ad}2
  111. exit;
  112. fi
  113. fi
  114. mv $tag_file ${tag_file}.tag
  115. ma2c `basename ${tag_file}`.tag ${ad}${gt} $log
  116. mv MA2C_Output/`basename ${tag_file}`_peaks.bed $bedoutput
  117. gunzip MA2C_Output/`basename ${tag_file}`_MA2Cscore.wig.gz
  118. mv MA2C_Output/`basename ${tag_file}`_MA2Cscore.wig $wigoutput
  119. </configfile>
  120. </configfiles>
  121. <outputs>
  122. <data format="bed" name="bedoutput" />
  123. <data format="wig" name="wigoutput" />
  124. <data format="txt" name="log" label="MA2C job log" />
  125. </outputs>
  126. <tests>
  127. <test maxseconds="3600" name="TreatmentFile">
  128. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  129. <param name="cfile" value="ma2c_control_ce4.pair" />
  130. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  131. <param name="pos" value="ma2c_pos_ce4.pos" />
  132. <param name="bandwidth" value="300" />
  133. <param name="max_gap" value="250" />
  134. <param name="min_probes" value="5" />
  135. <param name="method1" value="Pvalue" />
  136. <param name="value" value="0.001" />
  137. <param name="method2" value="Robust" />
  138. <param name="cvalue" value="2" />
  139. <output name="output" file="ma2c_1/sample_peaks.bed" />
  140. <output name="output" file="ma2c_1/sample_MA2Cscore.wig" lines_diff = "2" />
  141. <output name="output" file="ma2c_1/sample_log.bed" lines_diff = "100"/>
  142. </test>
  143. <test maxseconds="3600" name="ControlFile">
  144. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  145. <param name="cfile" value="ma2c_control_ce4.pair" />
  146. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  147. <param name="pos" value="ma2c_pos_ce4.pos" />
  148. <param name="bandwidth" value="300" />
  149. <param name="max_gap" value="250" />
  150. <param name="min_probes" value="5" />
  151. <param name="method1" value="Pvalue" />
  152. <param name="value" value="0.001" />
  153. <param name="method2" value="Robust" />
  154. <param name="cvalue" value="2" />
  155. <output name="output" file="ma2c_2/sample_peaks.bed" />
  156. <output name="output" file="ma2c_2/sample_MA2Cscore.wig" lines_diff = "2" />
  157. <output name="output" file="ma2c_2/sample_log.bed" lines_diff = "100"/>
  158. </test>
  159. <test maxseconds="3600" name="NDFFile">
  160. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  161. <param name="cfile" value="ma2c_control_ce4.pair" />
  162. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  163. <param name="pos" value="ma2c_pos_ce4.pos" />
  164. <param name="bandwidth" value="300" />
  165. <param name="max_gap" value="250" />
  166. <param name="min_probes" value="5" />
  167. <param name="method1" value="Pvalue" />
  168. <param name="value" value="0.001" />
  169. <param name="method2" value="Robust" />
  170. <param name="cvalue" value="2" />
  171. <output name="output" file="ma2c_3/sample_peaks.bed" />
  172. <output name="output" file="ma2c_3/sample_MA2Cscore.wig" lines_diff = "2" />
  173. <output name="output" file="ma2c_3/sample_log.bed" lines_diff = "100"/>
  174. </test>
  175. <test maxseconds="3600" name="POSFile">
  176. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  177. <param name="cfile" value="ma2c_control_ce4.pair" />
  178. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  179. <param name="pos" value="ma2c_pos_ce4.pos" />
  180. <param name="bandwidth" value="300" />
  181. <param name="max_gap" value="250" />
  182. <param name="min_probes" value="5" />
  183. <param name="method1" value="Pvalue" />
  184. <param name="value" value="0.001" />
  185. <param name="method2" value="Robust" />
  186. <param name="cvalue" value="2" />
  187. <output name="output" file="ma2c_4/sample_peaks.bed" />
  188. <output name="output" file="ma2c_4/sample_MA2Cscore.wig" lines_diff = "2" />
  189. <output name="output" file="ma2c_4/sample_log.bed" lines_diff = "100"/>
  190. </test>
  191. <test maxseconds="3600" name="BandWidth">
  192. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  193. <param name="cfile" value="ma2c_control_ce4.pair" />
  194. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  195. <param name="pos" value="ma2c_pos_ce4.pos" />
  196. <param name="bandwidth" value="300" />
  197. <param name="max_gap" value="250" />
  198. <param name="min_probes" value="5" />
  199. <param name="method1" value="Pvalue" />
  200. <param name="value" value="0.001" />
  201. <param name="method2" value="Robust" />
  202. <param name="cvalue" value="2" />
  203. <output name="output" file="ma2c_5/sample_peaks.bed" />
  204. <output name="output" file="ma2c_5/sample_MA2Cscore.wig" lines_diff = "2" />
  205. <output name="output" file="ma2c_5/sample_log.bed" lines_diff = "100"/>
  206. </test>
  207. <test maxseconds="3600" name="MaxGap">
  208. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  209. <param name="cfile" value="ma2c_control_ce4.pair" />
  210. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  211. <param name="pos" value="ma2c_pos_ce4.pos" />
  212. <param name="bandwidth" value="300" />
  213. <param name="max_gap" value="250" />
  214. <param name="min_probes" value="5" />
  215. <param name="method1" value="Pvalue" />
  216. <param name="value" value="0.001" />
  217. <param name="method2" value="Robust" />
  218. <param name="cvalue" value="2" />
  219. <output name="output" file="ma2c_6/sample_peaks.bed" />
  220. <output name="output" file="ma2c_6/sample_MA2Cscore.wig" lines_diff = "2" />
  221. <output name="output" file="ma2c_6/sample_log.bed" lines_diff = "100"/>
  222. </test>
  223. <test maxseconds="3600" name="MinProbes">
  224. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  225. <param name="cfile" value="ma2c_control_ce4.pair" />
  226. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  227. <param name="pos" value="ma2c_pos_ce4.pos" />
  228. <param name="bandwidth" value="300" />
  229. <param name="max_gap" value="250" />
  230. <param name="min_probes" value="5" />
  231. <param name="method1" value="Pvalue" />
  232. <param name="value" value="0.001" />
  233. <param name="method2" value="Robust" />
  234. <param name="cvalue" value="2" />
  235. <output name="output" file="ma2c_7/sample_peaks.bed" />
  236. <output name="output" file="ma2c_7/sample_MA2Cscore.wig" lines_diff = "2" />
  237. <output name="output" file="ma2c_7/sample_log.bed" lines_diff = "100"/>
  238. </test>
  239. <test maxseconds="3600" name="Pvalue">
  240. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  241. <param name="cfile" value="ma2c_control_ce4.pair" />
  242. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  243. <param name="pos" value="ma2c_pos_ce4.pos" />
  244. <param name="bandwidth" value="300" />
  245. <param name="max_gap" value="250" />
  246. <param name="min_probes" value="5" />
  247. <param name="method1" value="Pvalue" />
  248. <param name="value" value="0.001" />
  249. <param name="method2" value="Robust" />
  250. <param name="cvalue" value="2" />
  251. <output name="output" file="ma2c_8/sample_peaks.bed" />
  252. <output name="output" file="ma2c_8/sample_MA2Cscore.wig" lines_diff = "2" />
  253. <output name="output" file="ma2c_8/sample_log.bed" lines_diff = "100"/>
  254. </test>
  255. <test maxseconds="3600" name="FDR">
  256. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  257. <param name="cfile" value="ma2c_control_ce4.pair" />
  258. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  259. <param name="pos" value="ma2c_pos_ce4.pos" />
  260. <param name="bandwidth" value="300" />
  261. <param name="max_gap" value="250" />
  262. <param name="min_probes" value="5" />
  263. <param name="method1" value="FDR" />
  264. <param name="value" value="5" />
  265. <param name="method2" value="Robust" />
  266. <param name="cvalue" value="2" />
  267. <output name="output" file="ma2c_9/sample_peaks.bed" />
  268. <output name="output" file="ma2c_9/sample_MA2Cscore.wig" lines_diff = "2" />
  269. <output name="output" file="ma2c_9/sample_log.bed" lines_diff = "100"/>
  270. </test>
  271. <test maxseconds="3600" name="Robust">
  272. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  273. <param name="cfile" value="ma2c_control_ce4.pair" />
  274. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  275. <param name="pos" value="ma2c_pos_ce4.pos" />
  276. <param name="bandwidth" value="300" />
  277. <param name="max_gap" value="250" />
  278. <param name="min_probes" value="5" />
  279. <param name="method1" value="Pvalue" />
  280. <param name="value" value="0.001" />
  281. <param name="method2" value="Robust" />
  282. <param name="cvalue" value="2" />
  283. <output name="output" file="ma2c_10/sample_peaks.bed" />
  284. <output name="output" file="ma2c_10/sample_MA2Cscore.wig" lines_diff = "2" />
  285. <output name="output" file="ma2c_10/sample_log.bed" lines_diff = "100"/>
  286. </test>
  287. <test maxseconds="3600" name="Simple_1">
  288. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  289. <param name="cfile" value="ma2c_control_ce4.pair" />
  290. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  291. <param name="pos" value="ma2c_pos_ce4.pos" />
  292. <param name="bandwidth" value="300" />
  293. <param name="max_gap" value="250" />
  294. <param name="min_probes" value="5" />
  295. <param name="method1" value="Pvalue" />
  296. <param name="value" value="0.001" />
  297. <param name="method2" value="Simple" />
  298. <output name="output" file="ma2c_11/sample_peaks.bed" />
  299. <output name="output" file="ma2c_11/sample_MA2Cscore.wig" lines_diff = "2" />
  300. <output name="output" file="ma2c_11/sample_log.bed" lines_diff = "100"/>
  301. </test>
  302. <test maxseconds="3600" name="Simple_2">
  303. <param name="tfile" value="ma2c_treatment_ce4.pair" />
  304. <param name="cfile" value="ma2c_control_ce4.pair" />
  305. <param name="ndf" value="ma2c_ndf_ce4.ndf" />
  306. <param name="pos" value="ma2c_pos_ce4.pos" />
  307. <param name="bandwidth" value="300" />
  308. <param name="max_gap" value="250" />
  309. <param name="min_probes" value="5" />
  310. <param name="method1" value="Pvalue" />
  311. <param name="value" value="0.001" />
  312. <param name="method2" value="Simple" />
  313. <param name="cvalue" value="231" />
  314. <output name="output" file="ma2c_12/sample_peaks.bed" />
  315. <output name="output" file="ma2c_12/sample_MA2Cscore.wig" lines_diff = "2" />
  316. <output name="output" file="ma2c_12/sample_log.bed" lines_diff = "100"/>
  317. </test>
  318. </tests>
  319. <help>
  320. This tool performs peak calling for ChIP-chip (Nimblegen) data. MA2C
  321. is developped in Xiaole Shirley Liu's lab, by Jun Song, and rewritten
  322. in python by Tao Liu. The original java version is published on Genome
  323. Biology (pubmed: 17727723). The version deployed here is pMA2C 1.1.3.
  324. .. class:: infomark
  325. **TIP:** Please first upload your treatment and control files using the **Upload File from your computer tool**.
  326. .. class:: warningmark
  327. **NEED IMPROVEMENT**
  328. -----
  329. **Parameters**
  330. - **Replicates** click *Add new Replicate* button to choose NimbleGen
  331. pair data files from history.
  332. - **Treatment file** The input file for ChIP/treatment channel chosen from the
  333. history.
  334. - **Control file** The input file for input/control channel chosen
  335. from the history.
  336. - **NDF file** is the NimbleGen design file.
  337. - **POS file** is the optional NimbleGen design file.
  338. - **Bandwidth** is the bandwidth to detect peaks.
  339. - **Max Gap** is the maximum gap allowed for joining two significant
  340. probes to call peak.
  341. - **Min Probes** is the minimum number of probes required in the sliding window
  342. centered at each probe; a probe having fewer probes than this
  343. required number in its window will be ignored in the analysis.
  344. - **Threshold method** is the criteria used for detecting
  345. ChIP-enriched regions. Can be Pvalue or FDR or MA2C score.
  346. - **Value** is the cutoff used in the threshold method.
  347. - **Normalization Method** is the normalization method. Choices are
  348. Robust or Simple normalization.
  349. - **C value** is the parameter only for Robust normalization method.
  350. -----
  351. **Outputs**
  352. - **BED file** for peak locations in BED format
  353. - **WIGGLE file** for MA2C scores in WIGGLE format
  354. </help>
  355. </tool>