PageRenderTime 35ms CodeModel.GetById 27ms app.highlight 5ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/multivariate_stats/pca.xml

https://bitbucket.org/cistrome/cistrome-harvard/
XML | 100 lines | 85 code | 15 blank | 0 comment | 0 complexity | 0a710935b81fb46bd6e1ed31b3a281ef MD5 | raw file
  1<tool id="pca1" name="Principal Component Analysis" version="1.0.2">
  2  <description> </description>
  3  <command interpreter="python">
  4    pca.py 
  5      $input1
  6      $var_cols
  7      $methodChoice.method
  8      $out_file1
  9      $out_file2
 10      #if $methodChoice.method == "svd":
 11      $methodChoice.scale
 12      #end if
 13  </command>
 14  <inputs>
 15    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
 16    <param name="var_cols" label="Select columns containing input variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
 17        <validator type="no_options" message="Please select at least one column."/>
 18    </param>
 19    <conditional name="methodChoice">
 20        <param name="method" type="select" label="Method" help="The correlation matrix can only be used if there are no constant variables">
 21            <option value="cor" selected="true">Eigenvectors of Correlation (princomp)</option>
 22            <option value="cov">Eigenvectors of Covariance (princomp)</option>
 23            <option value="svd">Singular Value Decomposition (prcomp)</option>
 24        </param>
 25        <when value="cor" />
 26        <when value="cov" />
 27        <when value="svd">
 28            <param name="scale" type="select" label="Centering and Scaling" help="Can be used to center and/or scale variables">
 29                <option value="none" selected="true">None</option>
 30                <option value="center">Center only</option>
 31                <option value="scale">Scale only</option>
 32                <option value="both">Center and Scale</option>
 33            </param>        
 34        </when>
 35    </conditional>
 36  </inputs>
 37  <outputs>
 38    <data format="input" name="out_file1" metadata_source="input1" />
 39    <data format="pdf" name="out_file2" />
 40  </outputs>
 41  <requirements>
 42    <requirement type="python-module">rpy</requirement>
 43  </requirements>
 44  <tests>
 45    <test>
 46        <param name="input1" value="iris.tabular"/>
 47        <param name="var_cols" value="1,2,3,4"/>
 48        <param name="method" value="cor"/>
 49        <output name="out_file1" file="pca_out1.tabular"/>
 50        <output name="out_file2" file="pca_out2.pdf"/>
 51    </test>
 52    <test>
 53        <param name="input1" value="iris.tabular"/>
 54        <param name="var_cols" value="1,2,3,4"/>
 55        <param name="method" value="cov"/>
 56        <output name="out_file1" file="pca_out3.tabular"/>
 57        <output name="out_file2" file="pca_out4.pdf"/>
 58    </test>
 59    <test>
 60        <param name="input1" value="iris.tabular"/>
 61        <param name="var_cols" value="1,2,3,4"/>
 62        <param name="method" value="svd"/>
 63        <param name="scale" value="both"/>
 64        <output name="out_file1" file="pca_out5.tabular"/>
 65        <output name="out_file2" file="pca_out6.pdf"/>
 66    </test>
 67  </tests>
 68  <help>
 69
 70
 71.. class:: infomark
 72
 73**TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
 74
 75-----
 76
 77.. class:: infomark
 78
 79**What it does**
 80
 81This tool performs Principal Component Analysis on the given numeric input data using functions from R statistical package - 'princomp' function (for Eigenvector based solution) and 'prcomp' function (for Singular value decomposition based solution). It outputs two files, one containing the summary statistics of PCA, and the other containing biplots of the observations and principal components.   
 82
 83*R Development Core Team (2009). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.*
 84
 85-----
 86
 87.. class:: warningmark
 88
 89**Note**
 90
 91- This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results. Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
 92
 93- The summary statistics in the output are described below:
 94
 95  - Std. deviation: Standard deviations of the principal components
 96  - Loadings: a list of eigen-vectors/variable loadings
 97  - Scores: Scores of the input data on the principal components
 98
 99  </help>
100</tool>