/tools/new_operations/join.xml

https://bitbucket.org/cistrome/cistrome-harvard/ · XML · 117 lines · 95 code · 22 blank · 0 comment · 0 complexity · 95a7f0a3c9f93f08ce844b18b3f90020 MD5 · raw file

  1. <tool id="gops_join_1" name="Join">
  2. <description>the intervals of two datasets side-by-side</description>
  3. <command interpreter="python">gops_join.py $input1 $input2 $output -1 ${input1.metadata.chromCol},${input1.metadata.startCol},${input1.metadata.endCol},${input1.metadata.strandCol} -2 ${input2.metadata.chromCol},${input2.metadata.startCol},${input2.metadata.endCol},${input2.metadata.strandCol} -m $min -f $fill</command>
  4. <inputs>
  5. <param format="interval" name="input1" type="data" help="First dataset">
  6. <label>Join</label>
  7. </param>
  8. <param format="interval" name="input2" type="data" help="Second dataset">
  9. <label>with</label>
  10. </param>
  11. <param name="min" size="4" type="integer" value="1" help="(bp)">
  12. <label>with min overlap</label>
  13. </param>
  14. <param name="fill" type="select" label="Return">
  15. <option value="none">Only records that are joined (INNER JOIN)</option>
  16. <option value="right">All records of first dataset (fill null with ".")</option>
  17. <option value="left">All records of second dataset (fill null with ".")</option>
  18. <option value="both">All records of both datasets (fill nulls with ".")</option>
  19. </param>
  20. </inputs>
  21. <outputs>
  22. <data format="interval" name="output" metadata_source="input1" />
  23. </outputs>
  24. <code file="operation_filter.py"/>
  25. <tests>
  26. <test>
  27. <param name="input1" value="1.bed" />
  28. <param name="input2" value="2.bed" />
  29. <param name="min" value="1" />
  30. <param name="fill" value="none" />
  31. <output name="output" file="gops-join-none.dat" />
  32. </test>
  33. <test>
  34. <param name="input1" value="1.bed" />
  35. <param name="input2" value="2.bed" />
  36. <param name="min" value="1" />
  37. <param name="fill" value="right" />
  38. <output name="output" file="gops-join-right.dat" />
  39. </test>
  40. <test>
  41. <param name="input1" value="1.bed" />
  42. <param name="input2" value="2.bed" />
  43. <param name="min" value="1" />
  44. <param name="fill" value="left" />
  45. <output name="output" file="gops-join-left.dat" />
  46. </test>
  47. <test>
  48. <param name="input1" value="1.bed" />
  49. <param name="input2" value="2.bed" />
  50. <param name="min" value="1" />
  51. <param name="fill" value="both" />
  52. <output name="output" file="gops-join-both.dat" />
  53. </test>
  54. <test>
  55. <param name="input1" value="1.bed" />
  56. <param name="input2" value="2.bed" />
  57. <param name="min" value="500" />
  58. <param name="fill" value="none" />
  59. <output name="output" file="gops-join-none-500.dat" />
  60. </test>
  61. <test>
  62. <param name="input1" value="1.bed" />
  63. <param name="input2" value="2.bed" />
  64. <param name="min" value="100" />
  65. <param name="fill" value="both" />
  66. <output name="output" file="gops-join-both-100.dat" />
  67. </test>
  68. </tests>
  69. <help>
  70. .. class:: infomark
  71. **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in interval format. Use "edit attributes" to set chromosome, start, end, and strand columns.
  72. -----
  73. **Screencasts!**
  74. See Galaxy Interval Operation Screencasts_ (right click to open this link in another window).
  75. .. _Screencasts: http://wiki.g2.bx.psu.edu/Learn/Interval%20Operations
  76. -----
  77. **Syntax**
  78. - **Where overlap** specifies the minimum overlap between intervals that allows them to be joined.
  79. - **Return only records that are joined** returns only the records of the first dataset that join to a record in the second dataset. This is analogous to an INNER JOIN.
  80. - **Return all records of first dataset (fill null with &quot;.&quot;)** returns all intervals of the first dataset, and any intervals that do not join an interval from the second dataset are filled in with a period(.). This is analogous to a LEFT JOIN.
  81. - **Return all records of second dataset (fill null with &quot;.&quot;)** returns all intervals of the second dataset, and any intervals that do not join an interval from the first dataset are filled in with a period(.). **Note that this may produce an invalid interval file, since a period(.) is not a valid chrom, start, end or strand.**
  82. - **Return all records of both datasets (fill nulls with &quot;.&quot;)** returns all records from both datasets, and fills on either the right or left with periods. **Note that this may produce an invalid interval file, since a period(.) is not a valid chrom, start, end or strand.**
  83. -----
  84. **Examples**
  85. .. image:: ${static_path}/operation_icons/gops_joinRecordsList.gif
  86. Only records that are joined (inner join):
  87. .. image:: ${static_path}/operation_icons/gops_joinInner.gif
  88. All records of first dataset:
  89. .. image:: ${static_path}/operation_icons/gops_joinLeftOuter.gif
  90. All records of second dataset:
  91. .. image:: ${static_path}/operation_icons/gops_joinRightOuter.gif
  92. All records of both datasets:
  93. .. image:: ${static_path}/operation_icons/gops_joinFullOuter.gif
  94. </help>
  95. </tool>