/static/gmaj/docs/sample.gmaj
https://bitbucket.org/cistrome/cistrome-harvard/ · Unknown · 239 lines · 233 code · 6 blank · 0 comment · 0 complexity · 02ceb12c0df73c9579ee05bdb29de612 MD5 · raw file
- #:gmaj
- #----------------------------------------------------------------
- # This file specifies input parameters for a Gmaj dataset.
- # See below for explanatory comments.
- #----------------------------------------------------------------
- title = "My favorite genomic region"
- datapath = /home/cathy/mydata/favreg/
- alignfile = tba.maf mlagan.maf
- refseq = any
- reconorg = none
- tabext = .gff .gtf .bed .ct .trk
- nowarn = maf_version repeat_type_missing
- skipotherseq = false
- seq 0:
- seqname = human.chr11 hg17.chr11 human
- exons = human.exons.bed chr11
- repeats = human.repeats
- links = human.links
- underlays = human.exons.bed chr11 exons
- #underlays.1 = human-mouse.underlays
- #underlays.2 = human-rat.underlays
- highlights = human.highlights
- offset = 4730995
- seq 1:
- seqname = mouse.chr7
- exons = mouse.exons
- repeats = mouse.repeats
- links = mouse.links
- underlays = mouse.underlays
- #underlays.0 = mouse-human.underlays
- #underlays.2 = mouse-rat.underlays
- highlights = mouse.highlights
- offset = 0
- seq 2:
- seqname = rat.chr1
- exons = rat.exons
- repeats = rat.repeats
- links = rat.links
- underlays = rat.underlays
- #underlays.0 = rat-human.underlays
- #underlays.1 = rat-mouse.underlays
- highlights = rat.highlights
- offset = 0
- #----------------------------------------------------------------
- # This file specifies input parameters for Gmaj, including the
- # names of all data files. You can omit this file and just give
- # Gmaj the name of your alignment file directly, but then you
- # don't get the opportunity to provide annotations, offsets,
- # additional alignment files, or other optional features.
- #
- # Syntax:
- #
- # Each key=value(s) pair must reside on its own single, separate
- # line. (Note that although the '=' was formerly optional, it
- # is now required.) Other than that, the format is fairly loose.
- # Even the order of lines is arbitrary, except that "seq N:"
- # defines the current sequence until it is superseded by a new
- # "seq N:" line. Values containing spaces must be enclosed in
- # double quotes. Embedded quotes in such strings can be escaped
- # with '\', but there is no way to escape the backslash: quoted
- # values should not end with '\' (insert a space before the
- # final quote if necessary). Lines with missing values are
- # skipped. A '#' at the beginning of a line marks a comment
- # that will be ignored, except for the identifier tag "#:gmaj"
- # at the top, which is mandatory.
- #
- # Required Fields:
- #
- # At least one alignfile is required. You do not have to provide
- # a section for every sequence (by default they will still be
- # displayed), but for each sequence you do mention, the "seq N:"
- # line and the seqname field are also required. Everything else
- # is optional.
- #
- # File Names and Locations:
- #
- # Filenames can be relative or absolute (fully qualified paths).
- # Gmaj will look for relative names in the following locations:
- #
- # 1. the separately specified "bundle" file (if any)
- # 2. the "datapath" specified here (if any)
- # 3. the same directory as this parameters file
- #
- # If you are using Gmaj's "bundle" feature, you must refer to
- # the files located in the bundle by their plain filenames,
- # without any path.
- #
- # Title:
- #
- # This string will be used as the title for the Gmaj windows.
- # Typically it describes the alignment data, including the name
- # of the locus. It does not control the applet button's label,
- # however, because the applet has not read this file yet;
- # instead there is a separate applet parameter for that.
- #
- # Reference Sequence:
- #
- # The refseq field identifies the reference sequence used in the
- # alignments. The default value "any" means that the alignments
- # were generated by a sequence-symmetric program such as TBA, so
- # the user should be allowed to select the reference sequence
- # interactively. Otherwise, the value must match the appropriate
- # sequence name from the MAF files (including the contig name, if
- # applicable).
- #
- # Reconstructed Sequence:
- #
- # If the alignment files include score rows for an ancestral
- # reconstruction, the reconorg field identifies which organism
- # these scores apply to. The default value "none" means Gmaj
- # will ignore the scores; otherwise the value must match the
- # species prefix of the appropriate sequence names from the MAF
- # files. Contig name extensions (e.g. ".chrX") are omitted, as
- # the scores can apply to any contig for that organism. A score
- # can be supplied only once for each base in the ancestral
- # genome.
- #
- # Tabular File Extensions:
- #
- # The tabext field specifies which filename extensions should
- # be treated as generic, tab-delimited formats (GFF/GTF/BED)
- # instead of the old PipMaker-style formats. The default list
- # is ".gff .gtf .bed .ct .trk". Note that it doesn't actually
- # matter which of these is used for a particular file, just
- # whether it is in the list.
- #
- # Warning Suppression:
- #
- # The nowarn field lists keywords for particular warning
- # messages that should not be displayed. This is especially
- # useful for applets, when the administrator has seen the
- # warning, checked the data, and determined that everything
- # is OK and the end user does not need to see the warning.
- # The keyword for each suppressible message is displayed at
- # the bottom of the message.
- #
- # Ignoring Sequences:
- #
- # The skipotherseq field specifies whether sequences that appear
- # in the MAF files but are not mentioned here should be ignored.
- # If so, these rows are simply skipped; no adjustments are made
- # to remove all-gap columns or join adjacent blocks, and empty
- # blocks are kept to preserve the MAF files' block numbering.
- # This feature is useful for saving memory, and for reducing the
- # number of pips when some species have many aligning contigs.
- # The default value is false, so all sequences are displayed.
- #
- # Sequence Numbers and Sequence Names:
- #
- # The seqname field serves to match up the parameter entries with
- # the sequence name in each row of the MAF alignments (including
- # the contig name, if applicable). The sequence number assigns
- # the display order, and is also used to identify the secondary
- # sequence for plot-specific underlays (see below).
- #
- # Multiple values can be given for each seqname keyword; in this
- # case the first is the primary name to be used for display, and
- # the rest are aliases for it. This is useful when two MAF files
- # use different names for the same sequences, or simply for
- # changing the display labels. Alias resolution is applied to
- # MAF seqnames, the refseq field, and the initzoom parameter, but
- # not to the reconorg field or annotation files. All primary and
- # alias names must be unique (except in the special case of
- # pairwise self-alignments).
- #
- # Sequence numbers start with 0 and must turn out to be
- # consecutive, after Gmaj fills in any gaps you leave with the
- # MAF sequences you don't mention here. Thus by default, if
- # the alignment files include ten sequences, the valid sequence
- # numbers would be 0-9, and Gmaj will assign any that you omit
- # (in the order it encounters them, which is affected by file
- # bundling). However if you set skipotherseq = true, then you
- # must assign consecutive numbers because Gmaj will not assign
- # any.
- #
- # File Specification Modifiers:
- #
- # The generic, tabular annotation formats (GFF/GTF/BED) allow
- # entries for several sequences to be combined in one file,
- # since they can be distinguished by the "seqname" or "chrom"
- # column. However in this case Gmaj will expect the column
- # value to match the seqname from the MAF alignments. If it
- # does not (e.g. if the MAF files include a species prefix but
- # the annotation file omits it), you can add a sequence
- # designation after the filename to tell Gmaj what to look for
- # in the annotation file.
- #
- # Gmaj has special support for annotation data that represents
- # exons or repeats (namely adding exon numbers and inferring
- # UTRs, or finding the PipMaker repeat category). For the exons
- # and repeats panels this is automatic, but you can also invoke
- # it explicitly for files used as linkbars, underlays, or text
- # highlights by adding a type hint of "exons" or "repeats" after
- # the filename. This only works if the file is in a generic
- # (GFF/GTF/BED) format and contains the appropriate type of data
- # (genes/exons or repeats).
- #
- # Underlays and Highlights:
- #
- # Gmaj allows you to specify color underlays independently for
- # each plot, i.e. for each combination of reference and
- # secondary sequences. Thus in the "seq 1:" section, the
- # "underlays.0" entry specifies the underlay file to be used
- # when sequence 1 is the reference and sequence 0 is the second
- # sequence. Note that there is e.g. no "underlays.1" entry in
- # the "seq 1:" section, since we do not usually have plots
- # aligning sequences with themselves.
- #
- # However, specifying a quadratic number of files quickly becomes
- # burdensome as the number of sequences grows. For the common
- # case where the same underlay file is used for most or all of a
- # particular reference sequence's plots, the plain "underlays"
- # entry (without a number) provides a default for that reference
- # sequence. This can still be overridden as needed by numbered
- # entries for special plots.
- #
- # The highlights file specifies colors for a particular row of
- # the text display, so there is only one for each sequence. If
- # you omit it, Gmaj will build default highlights based on the
- # exons file (if you provided one).
- #
- # Offsets:
- #
- # The offset parameter is used for display purposes only. It
- # specifies an adjustment to be added to all position labels and
- # displayed references for a particular sequence. For example,
- # this allows positions to be labeled with respect to some larger
- # region. However, note that all annotations must still be
- # specified relative to the sequences referred to in the MAF
- # files.
- #
- #----------------------------------------------------------------
- # Cathy Riemer, June 2008