/static/gmaj/docs/sample.gmaj

https://bitbucket.org/cistrome/cistrome-harvard/ · Unknown · 239 lines · 233 code · 6 blank · 0 comment · 0 complexity · 02ceb12c0df73c9579ee05bdb29de612 MD5 · raw file

  1. #:gmaj
  2. #----------------------------------------------------------------
  3. # This file specifies input parameters for a Gmaj dataset.
  4. # See below for explanatory comments.
  5. #----------------------------------------------------------------
  6. title = "My favorite genomic region"
  7. datapath = /home/cathy/mydata/favreg/
  8. alignfile = tba.maf mlagan.maf
  9. refseq = any
  10. reconorg = none
  11. tabext = .gff .gtf .bed .ct .trk
  12. nowarn = maf_version repeat_type_missing
  13. skipotherseq = false
  14. seq 0:
  15. seqname = human.chr11 hg17.chr11 human
  16. exons = human.exons.bed chr11
  17. repeats = human.repeats
  18. links = human.links
  19. underlays = human.exons.bed chr11 exons
  20. #underlays.1 = human-mouse.underlays
  21. #underlays.2 = human-rat.underlays
  22. highlights = human.highlights
  23. offset = 4730995
  24. seq 1:
  25. seqname = mouse.chr7
  26. exons = mouse.exons
  27. repeats = mouse.repeats
  28. links = mouse.links
  29. underlays = mouse.underlays
  30. #underlays.0 = mouse-human.underlays
  31. #underlays.2 = mouse-rat.underlays
  32. highlights = mouse.highlights
  33. offset = 0
  34. seq 2:
  35. seqname = rat.chr1
  36. exons = rat.exons
  37. repeats = rat.repeats
  38. links = rat.links
  39. underlays = rat.underlays
  40. #underlays.0 = rat-human.underlays
  41. #underlays.1 = rat-mouse.underlays
  42. highlights = rat.highlights
  43. offset = 0
  44. #----------------------------------------------------------------
  45. # This file specifies input parameters for Gmaj, including the
  46. # names of all data files. You can omit this file and just give
  47. # Gmaj the name of your alignment file directly, but then you
  48. # don't get the opportunity to provide annotations, offsets,
  49. # additional alignment files, or other optional features.
  50. #
  51. # Syntax:
  52. #
  53. # Each key=value(s) pair must reside on its own single, separate
  54. # line. (Note that although the '=' was formerly optional, it
  55. # is now required.) Other than that, the format is fairly loose.
  56. # Even the order of lines is arbitrary, except that "seq N:"
  57. # defines the current sequence until it is superseded by a new
  58. # "seq N:" line. Values containing spaces must be enclosed in
  59. # double quotes. Embedded quotes in such strings can be escaped
  60. # with '\', but there is no way to escape the backslash: quoted
  61. # values should not end with '\' (insert a space before the
  62. # final quote if necessary). Lines with missing values are
  63. # skipped. A '#' at the beginning of a line marks a comment
  64. # that will be ignored, except for the identifier tag "#:gmaj"
  65. # at the top, which is mandatory.
  66. #
  67. # Required Fields:
  68. #
  69. # At least one alignfile is required. You do not have to provide
  70. # a section for every sequence (by default they will still be
  71. # displayed), but for each sequence you do mention, the "seq N:"
  72. # line and the seqname field are also required. Everything else
  73. # is optional.
  74. #
  75. # File Names and Locations:
  76. #
  77. # Filenames can be relative or absolute (fully qualified paths).
  78. # Gmaj will look for relative names in the following locations:
  79. #
  80. # 1. the separately specified "bundle" file (if any)
  81. # 2. the "datapath" specified here (if any)
  82. # 3. the same directory as this parameters file
  83. #
  84. # If you are using Gmaj's "bundle" feature, you must refer to
  85. # the files located in the bundle by their plain filenames,
  86. # without any path.
  87. #
  88. # Title:
  89. #
  90. # This string will be used as the title for the Gmaj windows.
  91. # Typically it describes the alignment data, including the name
  92. # of the locus. It does not control the applet button's label,
  93. # however, because the applet has not read this file yet;
  94. # instead there is a separate applet parameter for that.
  95. #
  96. # Reference Sequence:
  97. #
  98. # The refseq field identifies the reference sequence used in the
  99. # alignments. The default value "any" means that the alignments
  100. # were generated by a sequence-symmetric program such as TBA, so
  101. # the user should be allowed to select the reference sequence
  102. # interactively. Otherwise, the value must match the appropriate
  103. # sequence name from the MAF files (including the contig name, if
  104. # applicable).
  105. #
  106. # Reconstructed Sequence:
  107. #
  108. # If the alignment files include score rows for an ancestral
  109. # reconstruction, the reconorg field identifies which organism
  110. # these scores apply to. The default value "none" means Gmaj
  111. # will ignore the scores; otherwise the value must match the
  112. # species prefix of the appropriate sequence names from the MAF
  113. # files. Contig name extensions (e.g. ".chrX") are omitted, as
  114. # the scores can apply to any contig for that organism. A score
  115. # can be supplied only once for each base in the ancestral
  116. # genome.
  117. #
  118. # Tabular File Extensions:
  119. #
  120. # The tabext field specifies which filename extensions should
  121. # be treated as generic, tab-delimited formats (GFF/GTF/BED)
  122. # instead of the old PipMaker-style formats. The default list
  123. # is ".gff .gtf .bed .ct .trk". Note that it doesn't actually
  124. # matter which of these is used for a particular file, just
  125. # whether it is in the list.
  126. #
  127. # Warning Suppression:
  128. #
  129. # The nowarn field lists keywords for particular warning
  130. # messages that should not be displayed. This is especially
  131. # useful for applets, when the administrator has seen the
  132. # warning, checked the data, and determined that everything
  133. # is OK and the end user does not need to see the warning.
  134. # The keyword for each suppressible message is displayed at
  135. # the bottom of the message.
  136. #
  137. # Ignoring Sequences:
  138. #
  139. # The skipotherseq field specifies whether sequences that appear
  140. # in the MAF files but are not mentioned here should be ignored.
  141. # If so, these rows are simply skipped; no adjustments are made
  142. # to remove all-gap columns or join adjacent blocks, and empty
  143. # blocks are kept to preserve the MAF files' block numbering.
  144. # This feature is useful for saving memory, and for reducing the
  145. # number of pips when some species have many aligning contigs.
  146. # The default value is false, so all sequences are displayed.
  147. #
  148. # Sequence Numbers and Sequence Names:
  149. #
  150. # The seqname field serves to match up the parameter entries with
  151. # the sequence name in each row of the MAF alignments (including
  152. # the contig name, if applicable). The sequence number assigns
  153. # the display order, and is also used to identify the secondary
  154. # sequence for plot-specific underlays (see below).
  155. #
  156. # Multiple values can be given for each seqname keyword; in this
  157. # case the first is the primary name to be used for display, and
  158. # the rest are aliases for it. This is useful when two MAF files
  159. # use different names for the same sequences, or simply for
  160. # changing the display labels. Alias resolution is applied to
  161. # MAF seqnames, the refseq field, and the initzoom parameter, but
  162. # not to the reconorg field or annotation files. All primary and
  163. # alias names must be unique (except in the special case of
  164. # pairwise self-alignments).
  165. #
  166. # Sequence numbers start with 0 and must turn out to be
  167. # consecutive, after Gmaj fills in any gaps you leave with the
  168. # MAF sequences you don't mention here. Thus by default, if
  169. # the alignment files include ten sequences, the valid sequence
  170. # numbers would be 0-9, and Gmaj will assign any that you omit
  171. # (in the order it encounters them, which is affected by file
  172. # bundling). However if you set skipotherseq = true, then you
  173. # must assign consecutive numbers because Gmaj will not assign
  174. # any.
  175. #
  176. # File Specification Modifiers:
  177. #
  178. # The generic, tabular annotation formats (GFF/GTF/BED) allow
  179. # entries for several sequences to be combined in one file,
  180. # since they can be distinguished by the "seqname" or "chrom"
  181. # column. However in this case Gmaj will expect the column
  182. # value to match the seqname from the MAF alignments. If it
  183. # does not (e.g. if the MAF files include a species prefix but
  184. # the annotation file omits it), you can add a sequence
  185. # designation after the filename to tell Gmaj what to look for
  186. # in the annotation file.
  187. #
  188. # Gmaj has special support for annotation data that represents
  189. # exons or repeats (namely adding exon numbers and inferring
  190. # UTRs, or finding the PipMaker repeat category). For the exons
  191. # and repeats panels this is automatic, but you can also invoke
  192. # it explicitly for files used as linkbars, underlays, or text
  193. # highlights by adding a type hint of "exons" or "repeats" after
  194. # the filename. This only works if the file is in a generic
  195. # (GFF/GTF/BED) format and contains the appropriate type of data
  196. # (genes/exons or repeats).
  197. #
  198. # Underlays and Highlights:
  199. #
  200. # Gmaj allows you to specify color underlays independently for
  201. # each plot, i.e. for each combination of reference and
  202. # secondary sequences. Thus in the "seq 1:" section, the
  203. # "underlays.0" entry specifies the underlay file to be used
  204. # when sequence 1 is the reference and sequence 0 is the second
  205. # sequence. Note that there is e.g. no "underlays.1" entry in
  206. # the "seq 1:" section, since we do not usually have plots
  207. # aligning sequences with themselves.
  208. #
  209. # However, specifying a quadratic number of files quickly becomes
  210. # burdensome as the number of sequences grows. For the common
  211. # case where the same underlay file is used for most or all of a
  212. # particular reference sequence's plots, the plain "underlays"
  213. # entry (without a number) provides a default for that reference
  214. # sequence. This can still be overridden as needed by numbered
  215. # entries for special plots.
  216. #
  217. # The highlights file specifies colors for a particular row of
  218. # the text display, so there is only one for each sequence. If
  219. # you omit it, Gmaj will build default highlights based on the
  220. # exons file (if you provided one).
  221. #
  222. # Offsets:
  223. #
  224. # The offset parameter is used for display purposes only. It
  225. # specifies an adjustment to be added to all position labels and
  226. # displayed references for a particular sequence. For example,
  227. # this allows positions to be labeled with respect to some larger
  228. # region. However, note that all annotations must still be
  229. # specified relative to the sequences referred to in the MAF
  230. # files.
  231. #
  232. #----------------------------------------------------------------
  233. # Cathy Riemer, June 2008