PageRenderTime 39ms CodeModel.GetById 13ms app.highlight 18ms RepoModel.GetById 1ms app.codeStats 1ms

/doc/build_dict-module.html

http://github.com/apresta/tagger
HTML | 284 lines | 243 code | 19 blank | 22 comment | 0 complexity | 840942380625f2cbec724e73c3024a26 MD5 | raw file
  1<?xml version="1.0" encoding="ascii"?>
  2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3          "DTD/xhtml1-transitional.dtd">
  4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5<head>
  6  <title>build_dict</title>
  7  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8  <script type="text/javascript" src="epydoc.js"></script>
  9</head>
 10
 11<body bgcolor="white" text="black" link="blue" vlink="#204080"
 12      alink="#204080">
 13<!-- ==================== NAVIGATION BAR ==================== -->
 14<table class="navbar" border="0" width="100%" cellpadding="0"
 15       bgcolor="#a0c0ff" cellspacing="0">
 16  <tr valign="middle">
 17
 18  <!-- Tree link -->
 19      <th>&nbsp;&nbsp;&nbsp;<a
 20        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
 21
 22  <!-- Index link -->
 23      <th>&nbsp;&nbsp;&nbsp;<a
 24        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
 25
 26  <!-- Help link -->
 27      <th>&nbsp;&nbsp;&nbsp;<a
 28        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
 29
 30  <!-- Project homepage -->
 31      <th class="navbar" align="right" width="100%">
 32        <table border="0" cellpadding="0" cellspacing="0">
 33          <tr><th class="navbar" align="center"
 34            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
 35          </tr></table></th>
 36  </tr>
 37</table>
 38<table width="100%" cellpadding="0" cellspacing="0">
 39  <tr valign="top">
 40    <td width="100%">
 41      <span class="breadcrumbs">
 42        Module&nbsp;build_dict
 43      </span>
 44    </td>
 45    <td>
 46      <table cellpadding="0" cellspacing="0">
 47        <!-- hide/show private -->
 48        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
 49    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
 50        <tr><td align="right"><span class="options"
 51            >[<a href="frames.html" target="_top">frames</a
 52            >]&nbsp;|&nbsp;<a href="build_dict-module.html"
 53            target="_top">no&nbsp;frames</a>]</span></td></tr>
 54      </table>
 55    </td>
 56  </tr>
 57</table>
 58<!-- ==================== MODULE DESCRIPTION ==================== -->
 59<h1 class="epydoc">Module build_dict</h1><p class="nomargin-top"><span class="codelink"><a href="build_dict-pysrc.html">source&nbsp;code</a></span></p>
 60<p>Usage: build_dict.py -o &lt;output file&gt; -s &lt;stopwords file&gt; 
 61  &lt;list of files&gt;</p>
 62
 63<!-- ==================== FUNCTIONS ==================== -->
 64<a name="section-Functions"></a>
 65<table class="summary" border="1" cellpadding="3"
 66       cellspacing="0" width="100%" bgcolor="white">
 67<tr bgcolor="#70b0f0" class="table-header">
 68  <td colspan="2" class="table-header">
 69    <table border="0" cellpadding="0" cellspacing="0" width="100%">
 70      <tr valign="top">
 71        <td align="left"><span class="table-header">Functions</span></td>
 72        <td align="right" valign="top"
 73         ><span class="options">[<a href="#section-Functions"
 74         class="privatelink" onclick="toggle_private();"
 75         >hide private</a>]</span></td>
 76      </tr>
 77    </table>
 78  </td>
 79</tr>
 80<tr>
 81    <td width="15%" align="right" valign="top" class="summary">
 82      <span class="summary-type">&nbsp;</span>
 83    </td><td class="summary">
 84      <table width="100%" cellpadding="0" cellspacing="0" border="0">
 85        <tr>
 86          <td><span class="summary-sig"><a href="build_dict-module.html#build_dict" class="summary-sig-name">build_dict</a>(<span class="summary-sig-arg">corpus</span>,
 87        <span class="summary-sig-arg">stopwords</span>=<span class="summary-sig-default">None</span>,
 88        <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>)</span><br />
 89      Returns:
 90      a dictionary of weights in the interval [0,1]</td>
 91          <td align="right" valign="top">
 92            <span class="codelink"><a href="build_dict-pysrc.html#build_dict">source&nbsp;code</a></span>
 93            
 94          </td>
 95        </tr>
 96      </table>
 97      
 98    </td>
 99  </tr>
100<tr>
101    <td width="15%" align="right" valign="top" class="summary">
102      <span class="summary-type">&nbsp;</span>
103    </td><td class="summary">
104      <table width="100%" cellpadding="0" cellspacing="0" border="0">
105        <tr>
106          <td><span class="summary-sig"><a href="build_dict-module.html#build_dict_from_files" class="summary-sig-name">build_dict_from_files</a>(<span class="summary-sig-arg">output_file</span>,
107        <span class="summary-sig-arg">corpus_files</span>,
108        <span class="summary-sig-arg">stopwords_file</span>=<span class="summary-sig-default">None</span>,
109        <span class="summary-sig-arg">reader</span>=<span class="summary-sig-default">SimpleReader()</span>,
110        <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">Stemmer()</span>,
111        <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
112        <span class="summary-sig-arg">verbose</span>=<span class="summary-sig-default">False</span>)</span></td>
113          <td align="right" valign="top">
114            <span class="codelink"><a href="build_dict-pysrc.html#build_dict_from_files">source&nbsp;code</a></span>
115            
116          </td>
117        </tr>
118      </table>
119      
120    </td>
121  </tr>
122</table>
123<!-- ==================== VARIABLES ==================== -->
124<a name="section-Variables"></a>
125<table class="summary" border="1" cellpadding="3"
126       cellspacing="0" width="100%" bgcolor="white">
127<tr bgcolor="#70b0f0" class="table-header">
128  <td colspan="2" class="table-header">
129    <table border="0" cellpadding="0" cellspacing="0" width="100%">
130      <tr valign="top">
131        <td align="left"><span class="table-header">Variables</span></td>
132        <td align="right" valign="top"
133         ><span class="options">[<a href="#section-Variables"
134         class="privatelink" onclick="toggle_private();"
135         >hide private</a>]</span></td>
136      </tr>
137    </table>
138  </td>
139</tr>
140<tr>
141    <td width="15%" align="right" valign="top" class="summary">
142      <span class="summary-type">&nbsp;</span>
143    </td><td class="summary">
144        <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="None">None</code>
145    </td>
146  </tr>
147</table>
148<!-- ==================== FUNCTION DETAILS ==================== -->
149<a name="section-FunctionDetails"></a>
150<table class="details" border="1" cellpadding="3"
151       cellspacing="0" width="100%" bgcolor="white">
152<tr bgcolor="#70b0f0" class="table-header">
153  <td colspan="2" class="table-header">
154    <table border="0" cellpadding="0" cellspacing="0" width="100%">
155      <tr valign="top">
156        <td align="left"><span class="table-header">Function Details</span></td>
157        <td align="right" valign="top"
158         ><span class="options">[<a href="#section-FunctionDetails"
159         class="privatelink" onclick="toggle_private();"
160         >hide private</a>]</span></td>
161      </tr>
162    </table>
163  </td>
164</tr>
165</table>
166<a name="build_dict"></a>
167<div>
168<table class="details" border="1" cellpadding="3"
169       cellspacing="0" width="100%" bgcolor="white">
170<tr><td>
171  <table width="100%" cellpadding="0" cellspacing="0" border="0">
172  <tr valign="top"><td>
173  <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict</span>(<span class="sig-arg">corpus</span>,
174        <span class="sig-arg">stopwords</span>=<span class="sig-default">None</span>,
175        <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>)</span>
176  </h3>
177  </td><td align="right" valign="top"
178    ><span class="codelink"><a href="build_dict-pysrc.html#build_dict">source&nbsp;code</a></span>&nbsp;
179    </td>
180  </tr></table>
181  
182  
183  <dl class="fields">
184    <dt>Parameters:</dt>
185    <dd><ul class="nomargin-top">
186        <li><strong class="pname"><code>corpus</code></strong> - a list of documents, represented as lists of (stemmed) words</li>
187        <li><strong class="pname"><code>stopwords</code></strong> - the list of (stemmed) words that should have zero weight</li>
188        <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse 
189          document frequency' or 'ICF' i.e. 'inverse collection frequency';
190          defaults to 'IDF')</li>
191    </ul></dd>
192    <dt>Returns:</dt>
193        <dd>a dictionary of weights in the interval [0,1]</dd>
194  </dl>
195</td></tr></table>
196</div>
197<a name="build_dict_from_files"></a>
198<div>
199<table class="details" border="1" cellpadding="3"
200       cellspacing="0" width="100%" bgcolor="white">
201<tr><td>
202  <table width="100%" cellpadding="0" cellspacing="0" border="0">
203  <tr valign="top"><td>
204  <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict_from_files</span>(<span class="sig-arg">output_file</span>,
205        <span class="sig-arg">corpus_files</span>,
206        <span class="sig-arg">stopwords_file</span>=<span class="sig-default">None</span>,
207        <span class="sig-arg">reader</span>=<span class="sig-default">SimpleReader()</span>,
208        <span class="sig-arg">stemmer</span>=<span class="sig-default">Stemmer()</span>,
209        <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
210        <span class="sig-arg">verbose</span>=<span class="sig-default">False</span>)</span>
211  </h3>
212  </td><td align="right" valign="top"
213    ><span class="codelink"><a href="build_dict-pysrc.html#build_dict_from_files">source&nbsp;code</a></span>&nbsp;
214    </td>
215  </tr></table>
216  
217  
218  <dl class="fields">
219    <dt>Parameters:</dt>
220    <dd><ul class="nomargin-top">
221        <li><strong class="pname"><code>output_file</code></strong> - the binary stream where the dictionary should be saved</li>
222        <li><strong class="pname"><code>corpus_files</code></strong> - a list of streams with words to process</li>
223        <li><strong class="pname"><code>stopwords_file</code></strong> - a stream containing a list of stopwords</li>
224        <li><strong class="pname"><code>reader</code></strong> - the <a href="tagger.Reader-class.html" class="link">Reader</a> 
225          object to be used</li>
226        <li><strong class="pname"><code>stemmer</code></strong> - the <a href="tagger.Stemmer-class.html" class="link">Stemmer</a> 
227          object to be used</li>
228        <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse 
229          document frequency' or 'ICF' i.e. 'inverse collection frequency';
230          defaults to 'IDF')</li>
231        <li><strong class="pname"><code>verbose</code></strong> - whether information on the progress should be printed on screen</li>
232    </ul></dd>
233  </dl>
234</td></tr></table>
235</div>
236<br />
237<!-- ==================== NAVIGATION BAR ==================== -->
238<table class="navbar" border="0" width="100%" cellpadding="0"
239       bgcolor="#a0c0ff" cellspacing="0">
240  <tr valign="middle">
241
242  <!-- Tree link -->
243      <th>&nbsp;&nbsp;&nbsp;<a
244        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
245
246  <!-- Index link -->
247      <th>&nbsp;&nbsp;&nbsp;<a
248        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
249
250  <!-- Help link -->
251      <th>&nbsp;&nbsp;&nbsp;<a
252        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
253
254  <!-- Project homepage -->
255      <th class="navbar" align="right" width="100%">
256        <table border="0" cellpadding="0" cellspacing="0">
257          <tr><th class="navbar" align="center"
258            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
259          </tr></table></th>
260  </tr>
261</table>
262<table border="0" cellpadding="0" cellspacing="0" width="100%%">
263  <tr>
264    <td align="left" class="footer">
265    Generated by Epydoc 3.0.1 on Fri May 13 11:13:02 2011
266    </td>
267    <td align="right" class="footer">
268      <a target="mainFrame" href="http://epydoc.sourceforge.net"
269        >http://epydoc.sourceforge.net</a>
270    </td>
271  </tr>
272</table>
273
274<script type="text/javascript">
275  <!--
276  // Private objects are initially displayed (because if
277  // javascript is turned off then we want them to be
278  // visible); but by default, we want to hide them.  So hide
279  // them unless we have a cookie that says to show them.
280  checkCookie();
281  // -->
282</script>
283</body>
284</html>