PageRenderTime 28ms CodeModel.GetById 16ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/doc/tagger.extras-module.html

http://github.com/apresta/tagger
HTML | 290 lines | 253 code | 14 blank | 23 comment | 0 complexity | 22069097a1db4fb09438a100c8d2947a MD5 | raw file
  1<?xml version="1.0" encoding="ascii"?>
  2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3          "DTD/xhtml1-transitional.dtd">
  4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5<head>
  6  <title>tagger.extras</title>
  7  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8  <script type="text/javascript" src="epydoc.js"></script>
  9</head>
 10
 11<body bgcolor="white" text="black" link="blue" vlink="#204080"
 12      alink="#204080">
 13<!-- ==================== NAVIGATION BAR ==================== -->
 14<table class="navbar" border="0" width="100%" cellpadding="0"
 15       bgcolor="#a0c0ff" cellspacing="0">
 16  <tr valign="middle">
 17
 18  <!-- Tree link -->
 19      <th>&nbsp;&nbsp;&nbsp;<a
 20        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
 21
 22  <!-- Index link -->
 23      <th>&nbsp;&nbsp;&nbsp;<a
 24        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
 25
 26  <!-- Help link -->
 27      <th>&nbsp;&nbsp;&nbsp;<a
 28        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
 29
 30  <!-- Project homepage -->
 31      <th class="navbar" align="right" width="100%">
 32        <table border="0" cellpadding="0" cellspacing="0">
 33          <tr><th class="navbar" align="center"
 34            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
 35          </tr></table></th>
 36  </tr>
 37</table>
 38<table width="100%" cellpadding="0" cellspacing="0">
 39  <tr valign="top">
 40    <td width="100%">
 41      <span class="breadcrumbs">
 42        Package&nbsp;tagger ::
 43        Module&nbsp;extras
 44      </span>
 45    </td>
 46    <td>
 47      <table cellpadding="0" cellspacing="0">
 48        <!-- hide/show private -->
 49        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
 50    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
 51        <tr><td align="right"><span class="options"
 52            >[<a href="frames.html" target="_top">frames</a
 53            >]&nbsp;|&nbsp;<a href="tagger.extras-module.html"
 54            target="_top">no&nbsp;frames</a>]</span></td></tr>
 55      </table>
 56    </td>
 57  </tr>
 58</table>
 59<!-- ==================== MODULE DESCRIPTION ==================== -->
 60<h1 class="epydoc">Module extras</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.extras-pysrc.html">source&nbsp;code</a></span></p>
 61<!-- ==================== CLASSES ==================== -->
 62<a name="section-Classes"></a>
 63<table class="summary" border="1" cellpadding="3"
 64       cellspacing="0" width="100%" bgcolor="white">
 65<tr bgcolor="#70b0f0" class="table-header">
 66  <td colspan="2" class="table-header">
 67    <table border="0" cellpadding="0" cellspacing="0" width="100%">
 68      <tr valign="top">
 69        <td align="left"><span class="table-header">Classes</span></td>
 70        <td align="right" valign="top"
 71         ><span class="options">[<a href="#section-Classes"
 72         class="privatelink" onclick="toggle_private();"
 73         >hide private</a>]</span></td>
 74      </tr>
 75    </table>
 76  </td>
 77</tr>
 78<tr>
 79    <td width="15%" align="right" valign="top" class="summary">
 80      <span class="summary-type">&nbsp;</span>
 81    </td><td class="summary">
 82        <a href="tagger.extras.UnicodeReader-class.html" class="summary-name">UnicodeReader</a><br />
 83      Reader subclass that converts Unicode strings to a close ASCII 
 84        representation
 85    </td>
 86  </tr>
 87<tr>
 88    <td width="15%" align="right" valign="top" class="summary">
 89      <span class="summary-type">&nbsp;</span>
 90    </td><td class="summary">
 91        <a href="tagger.extras.HTMLReader-class.html" class="summary-name">HTMLReader</a><br />
 92      Reader subclass that can parse HTML code from the input
 93    </td>
 94  </tr>
 95<tr>
 96    <td width="15%" align="right" valign="top" class="summary">
 97      <span class="summary-type">&nbsp;</span>
 98    </td><td class="summary">
 99        <a href="tagger.extras.SimpleReader-class.html" class="summary-name">SimpleReader</a><br />
100      Reader subclass that doesn't perform any advanced analysis of the 
101        text
102    </td>
103  </tr>
104<tr>
105    <td width="15%" align="right" valign="top" class="summary">
106      <span class="summary-type">&nbsp;</span>
107    </td><td class="summary">
108        <a href="tagger.extras.FastStemmer-class.html" class="summary-name">FastStemmer</a><br />
109      Stemmer subclass that uses a much faster, but less correct 
110        algorithm
111    </td>
112  </tr>
113<tr>
114    <td width="15%" align="right" valign="top" class="summary">
115      <span class="summary-type">&nbsp;</span>
116    </td><td class="summary">
117        <a href="tagger.extras.NaiveRater-class.html" class="summary-name">NaiveRater</a><br />
118      Rater subclass that jusk ranks single-word tags by their frequency 
119        and weight
120    </td>
121  </tr>
122</table>
123<!-- ==================== FUNCTIONS ==================== -->
124<a name="section-Functions"></a>
125<table class="summary" border="1" cellpadding="3"
126       cellspacing="0" width="100%" bgcolor="white">
127<tr bgcolor="#70b0f0" class="table-header">
128  <td colspan="2" class="table-header">
129    <table border="0" cellpadding="0" cellspacing="0" width="100%">
130      <tr valign="top">
131        <td align="left"><span class="table-header">Functions</span></td>
132        <td align="right" valign="top"
133         ><span class="options">[<a href="#section-Functions"
134         class="privatelink" onclick="toggle_private();"
135         >hide private</a>]</span></td>
136      </tr>
137    </table>
138  </td>
139</tr>
140<tr>
141    <td width="15%" align="right" valign="top" class="summary">
142      <span class="summary-type">&nbsp;</span>
143    </td><td class="summary">
144      <table width="100%" cellpadding="0" cellspacing="0" border="0">
145        <tr>
146          <td><span class="summary-sig"><a href="tagger.extras-module.html#build_dict_from_nltk" class="summary-sig-name">build_dict_from_nltk</a>(<span class="summary-sig-arg">output_file</span>,
147        <span class="summary-sig-arg">corpus</span>=<span class="summary-sig-default">None</span>,
148        <span class="summary-sig-arg">stopwords</span>=<span class="summary-sig-default">None</span>,
149        <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">Stemmer()</span>,
150        <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
151        <span class="summary-sig-arg">verbose</span>=<span class="summary-sig-default">False</span>)</span></td>
152          <td align="right" valign="top">
153            <span class="codelink"><a href="tagger.extras-pysrc.html#build_dict_from_nltk">source&nbsp;code</a></span>
154            
155          </td>
156        </tr>
157      </table>
158      
159    </td>
160  </tr>
161</table>
162<!-- ==================== VARIABLES ==================== -->
163<a name="section-Variables"></a>
164<table class="summary" border="1" cellpadding="3"
165       cellspacing="0" width="100%" bgcolor="white">
166<tr bgcolor="#70b0f0" class="table-header">
167  <td colspan="2" class="table-header">
168    <table border="0" cellpadding="0" cellspacing="0" width="100%">
169      <tr valign="top">
170        <td align="left"><span class="table-header">Variables</span></td>
171        <td align="right" valign="top"
172         ><span class="options">[<a href="#section-Variables"
173         class="privatelink" onclick="toggle_private();"
174         >hide private</a>]</span></td>
175      </tr>
176    </table>
177  </td>
178</tr>
179<tr>
180    <td width="15%" align="right" valign="top" class="summary">
181      <span class="summary-type">&nbsp;</span>
182    </td><td class="summary">
183        <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'tagger'"><code class="variable-quote">'</code><code class="variable-string">tagger</code><code class="variable-quote">'</code></code>
184    </td>
185  </tr>
186</table>
187<!-- ==================== FUNCTION DETAILS ==================== -->
188<a name="section-FunctionDetails"></a>
189<table class="details" border="1" cellpadding="3"
190       cellspacing="0" width="100%" bgcolor="white">
191<tr bgcolor="#70b0f0" class="table-header">
192  <td colspan="2" class="table-header">
193    <table border="0" cellpadding="0" cellspacing="0" width="100%">
194      <tr valign="top">
195        <td align="left"><span class="table-header">Function Details</span></td>
196        <td align="right" valign="top"
197         ><span class="options">[<a href="#section-FunctionDetails"
198         class="privatelink" onclick="toggle_private();"
199         >hide private</a>]</span></td>
200      </tr>
201    </table>
202  </td>
203</tr>
204</table>
205<a name="build_dict_from_nltk"></a>
206<div>
207<table class="details" border="1" cellpadding="3"
208       cellspacing="0" width="100%" bgcolor="white">
209<tr><td>
210  <table width="100%" cellpadding="0" cellspacing="0" border="0">
211  <tr valign="top"><td>
212  <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict_from_nltk</span>(<span class="sig-arg">output_file</span>,
213        <span class="sig-arg">corpus</span>=<span class="sig-default">None</span>,
214        <span class="sig-arg">stopwords</span>=<span class="sig-default">None</span>,
215        <span class="sig-arg">stemmer</span>=<span class="sig-default">Stemmer()</span>,
216        <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
217        <span class="sig-arg">verbose</span>=<span class="sig-default">False</span>)</span>
218  </h3>
219  </td><td align="right" valign="top"
220    ><span class="codelink"><a href="tagger.extras-pysrc.html#build_dict_from_nltk">source&nbsp;code</a></span>&nbsp;
221    </td>
222  </tr></table>
223  
224  
225  <dl class="fields">
226    <dt>Parameters:</dt>
227    <dd><ul class="nomargin-top">
228        <li><strong class="pname"><code>output_file</code></strong> - the name of the file where the dictionary should be saved</li>
229        <li><strong class="pname"><code>corpus</code></strong> - the NLTK corpus to use (defaults to nltk.corpus.reuters)</li>
230        <li><strong class="pname"><code>stopwords</code></strong> - a list of (not stemmed) stopwords (defaults to 
231          nltk.corpus.reuters.words('stopwords'))</li>
232        <li><strong class="pname"><code>stemmer</code></strong> - the <a href="tagger.tagger.Stemmer-class.html" 
233          class="link">Stemmer</a> object to be used</li>
234        <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse 
235          document frequency' or 'ICF' i.e. 'inverse collection frequency';
236          defaults to 'IDF')</li>
237        <li><strong class="pname"><code>verbose</code></strong> - whether information on the progress should be printed on screen</li>
238    </ul></dd>
239  </dl>
240</td></tr></table>
241</div>
242<br />
243<!-- ==================== NAVIGATION BAR ==================== -->
244<table class="navbar" border="0" width="100%" cellpadding="0"
245       bgcolor="#a0c0ff" cellspacing="0">
246  <tr valign="middle">
247
248  <!-- Tree link -->
249      <th>&nbsp;&nbsp;&nbsp;<a
250        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
251
252  <!-- Index link -->
253      <th>&nbsp;&nbsp;&nbsp;<a
254        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
255
256  <!-- Help link -->
257      <th>&nbsp;&nbsp;&nbsp;<a
258        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
259
260  <!-- Project homepage -->
261      <th class="navbar" align="right" width="100%">
262        <table border="0" cellpadding="0" cellspacing="0">
263          <tr><th class="navbar" align="center"
264            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
265          </tr></table></th>
266  </tr>
267</table>
268<table border="0" cellpadding="0" cellspacing="0" width="100%%">
269  <tr>
270    <td align="left" class="footer">
271    Generated by Epydoc 3.0.1 on Wed Jun  8 01:57:42 2011
272    </td>
273    <td align="right" class="footer">
274      <a target="mainFrame" href="http://epydoc.sourceforge.net"
275        >http://epydoc.sourceforge.net</a>
276    </td>
277  </tr>
278</table>
279
280<script type="text/javascript">
281  <!--
282  // Private objects are initially displayed (because if
283  // javascript is turned off then we want them to be
284  // visible); but by default, we want to hide them.  So hide
285  // them unless we have a cookie that says to show them.
286  checkCookie();
287  // -->
288</script>
289</body>
290</html>