PageRenderTime 21ms CodeModel.GetById 13ms app.highlight 3ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/data_source/upload.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 255 lines | 171 code | 75 blank | 9 comment | 0 complexity | 3cb3aa83d7b58ae6088f56cd1ce5c631 MD5 | raw file
  1<?xml version="1.0"?>
  2
  3<tool name="Upload File" id="upload1" version="1.1.4" workflow_compatible="false">
  4  <description>
  5    from your computer  
  6  </description>
  7  <action module="galaxy.tools.actions.upload" class="UploadToolAction"/>
  8  <requirements>
  9      <requirement type="package">samtools</requirement>
 10  </requirements>
 11  <command interpreter="python">
 12      upload.py $GALAXY_ROOT_DIR $GALAXY_DATATYPES_CONF_FILE $paramfile
 13    #set $outnum = 0
 14    #while $varExists('output%i' % $outnum):
 15        #set $output = $getVar('output%i' % $outnum)
 16        #set $outnum += 1
 17        #set $file_name = $output.file_name
 18        ## FIXME: This is not future-proof for other uses of external_filename (other than for use by the library upload's "link data" feature)
 19        #if $output.dataset.dataset.external_filename:
 20            #set $file_name = "None"
 21        #end if
 22        ${output.dataset.dataset.id}:${output.files_path}:${file_name}
 23    #end while
 24  </command>
 25  <inputs nginx_upload="true">
 26    <param name="file_type" type="select" label="File Format" help="Which format? If for expression data, choose cel.zip or xys.zip. See help below">
 27      <options from_parameter="tool.app.datatypes_registry.upload_file_formats" transform_lines="[ &quot;%s%s%s&quot; % ( line, self.separator, line ) for line in obj ]">
 28        <column name="value" index="1"/>
 29        <column name="name" index="0"/>
 30        <filter type="sort_by" column="0"/>
 31        <filter type="add_value" name="Auto-detect" value="auto" index="0"/>
 32      </options>
 33    </param>
 34    <param name="async_datasets" type="hidden" value="None"/>
 35    <upload_dataset name="files" title="Specify Files for Dataset" file_type_name="file_type" metadata_ref="files_metadata">
 36      <param name="file_data" type="file" size="30" label="File (Please avoid Windows format text file)" ajax-upload="true" help="TIP1: Due to browser limitations, uploading files larger than 2GB is guaranteed to fail.  To upload large files, use the URL method (below) or ASPERA (please read the instruction). TIP2: If you want to upload expression data, please read the instruction and specify cel.zip or xys.zip for file format.">
 37        <validator type="expression" message="You will need to reselect the file you specified (%s)." substitute_value_in_message="True">not ( ( isinstance( value, unicode ) or isinstance( value, str ) ) and value != "" )</validator> <!-- use validator to post message to user about needing to reselect the file, since most browsers won't accept the value attribute for file inputs -->
 38      </param>
 39      <param name="url_paste" type="text" area="true" size="5x35" label="URL/Text" help="Here you may specify a list of URLs (one per line) or paste the contents of a file."/> 
 40      <param name="ftp_files" type="ftpfile" label="Files uploaded via ASPERA"/>
 41      <!-- Swap the following parameter for the select one that follows to
 42           enable the to_posix_lines option in the Web GUI. See Bitbucket
 43           Pull Request 171 for more information. -->
 44      <param name="to_posix_lines" type="hidden" value="Yes" />
 45      <!--
 46      <param name="to_posix_lines" type="select" display="checkboxes" multiple="True" label="Convert universal line endings to Posix line endings" help="Turn this option off if you upload a gzip, bz2 or zip archive which contains a binary file." value="Yes"> 
 47        <option value="Yes" selected="true">Yes</option>
 48      </param>
 49      -->
 50      <param name="space_to_tab" type="select" display="checkboxes" multiple="True" label="Convert spaces to tabs" help="Use this option if you are entering intervals by hand."> 
 51        <option value="Yes">Yes</option>
 52      </param>
 53      <param name="NAME" type="hidden" help="Name for dataset in upload"></param>
 54    </upload_dataset>
 55    <param name="dbkey" type="genomebuild" label="Genome" />
 56    <conditional name="files_metadata" title="Specify metadata" value_from="self:app.datatypes_registry.get_upload_metadata_params" value_ref="file_type" value_ref_in_group="False" />
 57    <!-- <param name="other_dbkey" type="text" label="Or user-defined Genome" /> -->
 58  </inputs>
 59  <help>
 60**ASPERA upload**
 61
 62To upload large files using ASPERA solution, you need to contact our Cistrome admin team (cistrome-bugs@jimmy.harvard.edu) by sending an email to us. Our team will send you a confirmation email with the username and password to use ASPERA web interface at http://cistrome.dfci.harvard.edu/aspera/user/. After login, you should go to the folder with your account name as folder name, and upload your files under that folder. The login information for our ASPERA site may change if necessary. You need to contact us again to get the login. Please remember to go back to this page to import the file into Cistrome after the upload is complete. The ASPERA upload folder will be cleaned every Friday morning.
 63
 64-----
 65
 66**Expression data upload**
 67
 68**File** should be a .zip archive containing .CEL or .XYS files, plus a .TXT pheno file. Please use correct file extensions in your zip archive.
 69
 70By setting file format as cel.zip or xys.zip, Cistrome allows you to upload sets of Affymetrix .CEL or NimbleGen .XYS files. The file you upload must be a ZIP archive with a .zip extension containing at least two .cel or .xys files (not both) and there must be exactly one .txt (Tab delimited) phenotype file. The phenotype files should describe the data set and must contain a "Sample" column for CEL/XYS file name, a "Group" column with values 0 and 1, and a "Key" column used to describe the members of sample and control groups.
 71
 72Example of phenotype file::
 73
 74    Sample	Key	Group
 75    Down-Syndrome-Heart-1.CEL	Down Syndrom	0
 76    Down-Syndrome-Heart-2.CEL	Down Syndrom	0
 77    Normal-Heart-1.CEL		Normal		1
 78    Normal-Heart-2.CEL		Normal		1
 79
 80Please make sure,
 81* The case of the head line should be same as this example.
 82* Please use TAB to separate the columns.
 83* Please do NOT leave SPACE atthe beginning or end of each line.
 84* Also please do NOT leave SPACE at the end of the phone file.
 85
 86The .zip archive will be shown in your Galaxy history after it is loaded. It can be used as input to the Gene Expression Index tool.
 87
 88-----
 89
 90**Auto-detect**
 91
 92The system will attempt to detect Axt, Fasta, Fastqsolexa, Gff, Gff3, Html, Lav, Maf, Tabular, Wiggle, Bed and Interval (Bed with headers) formats. If your file is not detected properly as one of the known formats, it most likely means that it has some format problems (e.g., different number of columns on different rows). You can still coerce the system to set your data to the format you think it should be.  You can also upload compressed files, which will automatically be decompressed. 
 93
 94-----
 95
 96**Ab1**
 97
 98A binary sequence file in 'ab1' format with a '.ab1' file extension.  You must manually select this 'File Format' when uploading the file.
 99
100-----
101
102**Axt**
103
104blastz pairwise alignment format.  Each alignment block in an axt file contains three lines: a summary line and 2 sequence lines.  Blocks are separated from one another by blank lines.  The summary line contains chromosomal position and size information about the alignment. It consists of 9 required fields.
105
106-----
107
108**Bam**
109
110A binary file compressed in the BGZF format with a '.bam' file extension.
111
112-----
113
114**Bed**
115
116* Tab delimited format (tabular)
117* Does not require header line
118* Contains 3 required fields:
119
120  - chrom - The name of the chromosome (e.g. chr3, chrY, chr2_random) or contig (e.g. ctgY1).
121  - chromStart - The starting position of the feature in the chromosome or contig. The first base in a chromosome is numbered 0.
122  - chromEnd - The ending position of the feature in the chromosome or contig. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
123
124* May contain 9 additional optional BED fields:
125
126  - name - Defines the name of the BED line. This label is displayed to the left of the BED line in the Genome Browser window when the track is open to full display mode or directly to the left of the item in pack mode.
127  - score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray).
128  - strand - Defines the strand - either '+' or '-'.
129  - thickStart - The starting position at which the feature is drawn thickly (for example, the start codon in gene displays).
130  - thickEnd - The ending position at which the feature is drawn thickly (for example, the stop codon in gene displays).
131  - itemRgb - An RGB value of the form R,G,B (e.g. 255,0,0). If the track line itemRgb attribute is set to "On", this RBG value will determine the display color of the data contained in this BED line. NOTE: It is recommended that a simple color scheme (eight colors or less) be used with this attribute to avoid overwhelming the color resources of the Genome Browser and your Internet browser.
132  - blockCount - The number of blocks (exons) in the BED line.
133  - blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
134  - blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
135
136* Example::
137
138    chr22 1000 5000 cloneA 960 + 1000 5000 0 2 567,488, 0,3512
139    chr22 2000 6000 cloneB 900 - 2000 6000 0 2 433,399, 0,3601
140
141-----
142
143**Fasta**
144
145A sequence in FASTA format consists of a single-line description, followed by lines of sequence data.  The first character of the description line is a greater-than (">") symbol in the first column.  All lines should be shorter than 80 characters::
146
147    >sequence1
148    atgcgtttgcgtgc
149    gtcggtttcgttgc
150    >sequence2
151    tttcgtgcgtatag
152    tggcgcggtga
153
154-----
155
156**FastqSolexa**
157
158FastqSolexa is the Illumina (Solexa) variant of the Fastq format, which stores sequences and quality scores in a single file::
159
160    @seq1  
161    GACAGCTTGGTTTTTAGTGAGTTGTTCCTTTCTTT  
162    +seq1  
163    hhhhhhhhhhhhhhhhhhhhhhhhhhPW@hhhhhh  
164    @seq2  
165    GCAATGACGGCAGCAATAAACTCAACAGGTGCTGG  
166    +seq2  
167    hhhhhhhhhhhhhhYhhahhhhWhAhFhSIJGChO
168    
169Or:: 
170
171    @seq1
172    GAATTGATCAGGACATAGGACAACTGTAGGCACCAT
173    +seq1
174    40 40 40 40 35 40 40 40 25 40 40 26 40 9 33 11 40 35 17 40 40 33 40 7 9 15 3 22 15 30 11 17 9 4 9 4
175    @seq2
176    GAGTTCTCGTCGCCTGTAGGCACCATCAATCGTATG
177    +seq2
178    40 15 40 17 6 36 40 40 40 25 40 9 35 33 40 14 14 18 15 17 19 28 31 4 24 18 27 14 15 18 2 8 12 8 11 9
179    
180-----
181
182**Gff**
183
184GFF lines have nine required fields that must be tab-separated.
185
186-----
187
188**Gff3**
189
190The GFF3 format addresses the most common extensions to GFF, while preserving backward compatibility with previous formats.
191
192-----
193
194**Interval (Genomic Intervals)**
195
196- Tab delimited format (tabular)
197- File must start with definition line in the following format (columns may be in any order).::
198
199    #CHROM START END STRAND
200
201- CHROM - The name of the chromosome (e.g. chr3, chrY, chr2_random) or contig (e.g. ctgY1).
202- START - The starting position of the feature in the chromosome or contig. The first base in a chromosome is numbered 0.
203- END - The ending position of the feature in the chromosome or contig. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
204- STRAND - Defines the strand - either '+' or '-'.
205
206- Example::
207
208    #CHROM START END   STRAND NAME COMMENT
209    chr1   10    100   +      exon myExon
210    chrX   1000  10050 -      gene myGene
211
212-----
213
214**Lav**
215
216Lav is the primary output format for BLASTZ.  The first line of a .lav file begins with #:lav..
217
218-----
219
220**MAF**
221
222TBA and multiz multiple alignment format.  The first line of a .maf file begins with ##maf. This word is followed by white-space-separated "variable=value" pairs. There should be no white space surrounding the "=".
223
224-----
225
226**Scf**
227
228A binary sequence file in 'scf' format with a '.scf' file extension.  You must manually select this 'File Format' when uploading the file.
229
230-----
231
232**Sff**
233
234A binary file in 'Standard Flowgram Format' with a '.sff' file extension.
235
236-----
237
238**Tabular (tab delimited)**
239
240Any data in tab delimited format (tabular)
241
242-----
243
244**Wig**
245
246The wiggle format is line-oriented.  Wiggle data is preceded by a track definition line, which adds a number of options for controlling the default display of this track.
247
248-----
249
250**Other text type**
251
252Any text file
253
254  </help>
255</tool>