PageRenderTime 39ms CodeModel.GetById 25ms app.highlight 8ms RepoModel.GetById 2ms app.codeStats 0ms

/doc/extras-module.html

http://github.com/apresta/tagger
HTML | 289 lines | 252 code | 14 blank | 23 comment | 0 complexity | c6e66c421033e70eaf6e11becfe0f48b MD5 | raw file
  1<?xml version="1.0" encoding="ascii"?>
  2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3          "DTD/xhtml1-transitional.dtd">
  4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5<head>
  6  <title>extras</title>
  7  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8  <script type="text/javascript" src="epydoc.js"></script>
  9</head>
 10
 11<body bgcolor="white" text="black" link="blue" vlink="#204080"
 12      alink="#204080">
 13<!-- ==================== NAVIGATION BAR ==================== -->
 14<table class="navbar" border="0" width="100%" cellpadding="0"
 15       bgcolor="#a0c0ff" cellspacing="0">
 16  <tr valign="middle">
 17
 18  <!-- Tree link -->
 19      <th>&nbsp;&nbsp;&nbsp;<a
 20        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
 21
 22  <!-- Index link -->
 23      <th>&nbsp;&nbsp;&nbsp;<a
 24        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
 25
 26  <!-- Help link -->
 27      <th>&nbsp;&nbsp;&nbsp;<a
 28        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
 29
 30  <!-- Project homepage -->
 31      <th class="navbar" align="right" width="100%">
 32        <table border="0" cellpadding="0" cellspacing="0">
 33          <tr><th class="navbar" align="center"
 34            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
 35          </tr></table></th>
 36  </tr>
 37</table>
 38<table width="100%" cellpadding="0" cellspacing="0">
 39  <tr valign="top">
 40    <td width="100%">
 41      <span class="breadcrumbs">
 42        Module&nbsp;extras
 43      </span>
 44    </td>
 45    <td>
 46      <table cellpadding="0" cellspacing="0">
 47        <!-- hide/show private -->
 48        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
 49    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
 50        <tr><td align="right"><span class="options"
 51            >[<a href="frames.html" target="_top">frames</a
 52            >]&nbsp;|&nbsp;<a href="extras-module.html"
 53            target="_top">no&nbsp;frames</a>]</span></td></tr>
 54      </table>
 55    </td>
 56  </tr>
 57</table>
 58<!-- ==================== MODULE DESCRIPTION ==================== -->
 59<h1 class="epydoc">Module extras</h1><p class="nomargin-top"><span class="codelink"><a href="extras-pysrc.html">source&nbsp;code</a></span></p>
 60<!-- ==================== CLASSES ==================== -->
 61<a name="section-Classes"></a>
 62<table class="summary" border="1" cellpadding="3"
 63       cellspacing="0" width="100%" bgcolor="white">
 64<tr bgcolor="#70b0f0" class="table-header">
 65  <td colspan="2" class="table-header">
 66    <table border="0" cellpadding="0" cellspacing="0" width="100%">
 67      <tr valign="top">
 68        <td align="left"><span class="table-header">Classes</span></td>
 69        <td align="right" valign="top"
 70         ><span class="options">[<a href="#section-Classes"
 71         class="privatelink" onclick="toggle_private();"
 72         >hide private</a>]</span></td>
 73      </tr>
 74    </table>
 75  </td>
 76</tr>
 77<tr>
 78    <td width="15%" align="right" valign="top" class="summary">
 79      <span class="summary-type">&nbsp;</span>
 80    </td><td class="summary">
 81        <a href="extras.UnicodeReader-class.html" class="summary-name">UnicodeReader</a><br />
 82      Reader subclass that converts Unicode strings to a close ASCII 
 83        representation
 84    </td>
 85  </tr>
 86<tr>
 87    <td width="15%" align="right" valign="top" class="summary">
 88      <span class="summary-type">&nbsp;</span>
 89    </td><td class="summary">
 90        <a href="extras.HTMLReader-class.html" class="summary-name">HTMLReader</a><br />
 91      Reader subclass that can parse HTML code from the input
 92    </td>
 93  </tr>
 94<tr>
 95    <td width="15%" align="right" valign="top" class="summary">
 96      <span class="summary-type">&nbsp;</span>
 97    </td><td class="summary">
 98        <a href="extras.SimpleReader-class.html" class="summary-name">SimpleReader</a><br />
 99      Reader subclass that doesn't perform any advanced analysis of the 
100        text
101    </td>
102  </tr>
103<tr>
104    <td width="15%" align="right" valign="top" class="summary">
105      <span class="summary-type">&nbsp;</span>
106    </td><td class="summary">
107        <a href="extras.FastStemmer-class.html" class="summary-name">FastStemmer</a><br />
108      Stemmer subclass that uses a much faster, but less correct 
109        algorithm
110    </td>
111  </tr>
112<tr>
113    <td width="15%" align="right" valign="top" class="summary">
114      <span class="summary-type">&nbsp;</span>
115    </td><td class="summary">
116        <a href="extras.NaiveRater-class.html" class="summary-name">NaiveRater</a><br />
117      Rater subclass that jusk ranks single-word tags by their frequency 
118        and weight
119    </td>
120  </tr>
121</table>
122<!-- ==================== FUNCTIONS ==================== -->
123<a name="section-Functions"></a>
124<table class="summary" border="1" cellpadding="3"
125       cellspacing="0" width="100%" bgcolor="white">
126<tr bgcolor="#70b0f0" class="table-header">
127  <td colspan="2" class="table-header">
128    <table border="0" cellpadding="0" cellspacing="0" width="100%">
129      <tr valign="top">
130        <td align="left"><span class="table-header">Functions</span></td>
131        <td align="right" valign="top"
132         ><span class="options">[<a href="#section-Functions"
133         class="privatelink" onclick="toggle_private();"
134         >hide private</a>]</span></td>
135      </tr>
136    </table>
137  </td>
138</tr>
139<tr>
140    <td width="15%" align="right" valign="top" class="summary">
141      <span class="summary-type">&nbsp;</span>
142    </td><td class="summary">
143      <table width="100%" cellpadding="0" cellspacing="0" border="0">
144        <tr>
145          <td><span class="summary-sig"><a href="extras-module.html#build_dict_from_nltk" class="summary-sig-name">build_dict_from_nltk</a>(<span class="summary-sig-arg">output_file</span>,
146        <span class="summary-sig-arg">corpus</span>=<span class="summary-sig-default">None</span>,
147        <span class="summary-sig-arg">stopwords</span>=<span class="summary-sig-default">None</span>,
148        <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">Stemmer()</span>,
149        <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
150        <span class="summary-sig-arg">verbose</span>=<span class="summary-sig-default">False</span>)</span></td>
151          <td align="right" valign="top">
152            <span class="codelink"><a href="extras-pysrc.html#build_dict_from_nltk">source&nbsp;code</a></span>
153            
154          </td>
155        </tr>
156      </table>
157      
158    </td>
159  </tr>
160</table>
161<!-- ==================== VARIABLES ==================== -->
162<a name="section-Variables"></a>
163<table class="summary" border="1" cellpadding="3"
164       cellspacing="0" width="100%" bgcolor="white">
165<tr bgcolor="#70b0f0" class="table-header">
166  <td colspan="2" class="table-header">
167    <table border="0" cellpadding="0" cellspacing="0" width="100%">
168      <tr valign="top">
169        <td align="left"><span class="table-header">Variables</span></td>
170        <td align="right" valign="top"
171         ><span class="options">[<a href="#section-Variables"
172         class="privatelink" onclick="toggle_private();"
173         >hide private</a>]</span></td>
174      </tr>
175    </table>
176  </td>
177</tr>
178<tr>
179    <td width="15%" align="right" valign="top" class="summary">
180      <span class="summary-type">&nbsp;</span>
181    </td><td class="summary">
182        <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="None">None</code>
183    </td>
184  </tr>
185</table>
186<!-- ==================== FUNCTION DETAILS ==================== -->
187<a name="section-FunctionDetails"></a>
188<table class="details" border="1" cellpadding="3"
189       cellspacing="0" width="100%" bgcolor="white">
190<tr bgcolor="#70b0f0" class="table-header">
191  <td colspan="2" class="table-header">
192    <table border="0" cellpadding="0" cellspacing="0" width="100%">
193      <tr valign="top">
194        <td align="left"><span class="table-header">Function Details</span></td>
195        <td align="right" valign="top"
196         ><span class="options">[<a href="#section-FunctionDetails"
197         class="privatelink" onclick="toggle_private();"
198         >hide private</a>]</span></td>
199      </tr>
200    </table>
201  </td>
202</tr>
203</table>
204<a name="build_dict_from_nltk"></a>
205<div>
206<table class="details" border="1" cellpadding="3"
207       cellspacing="0" width="100%" bgcolor="white">
208<tr><td>
209  <table width="100%" cellpadding="0" cellspacing="0" border="0">
210  <tr valign="top"><td>
211  <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict_from_nltk</span>(<span class="sig-arg">output_file</span>,
212        <span class="sig-arg">corpus</span>=<span class="sig-default">None</span>,
213        <span class="sig-arg">stopwords</span>=<span class="sig-default">None</span>,
214        <span class="sig-arg">stemmer</span>=<span class="sig-default">Stemmer()</span>,
215        <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
216        <span class="sig-arg">verbose</span>=<span class="sig-default">False</span>)</span>
217  </h3>
218  </td><td align="right" valign="top"
219    ><span class="codelink"><a href="extras-pysrc.html#build_dict_from_nltk">source&nbsp;code</a></span>&nbsp;
220    </td>
221  </tr></table>
222  
223  
224  <dl class="fields">
225    <dt>Parameters:</dt>
226    <dd><ul class="nomargin-top">
227        <li><strong class="pname"><code>output_file</code></strong> - the binary stream where the dictionary should be saved</li>
228        <li><strong class="pname"><code>corpus</code></strong> - the NLTK corpus to use (defaults to nltk.corpus.reuters)</li>
229        <li><strong class="pname"><code>stopwords</code></strong> - a list of (not stemmed) stopwords (defaults to 
230          nltk.corpus.reuters.words('stopwords'))</li>
231        <li><strong class="pname"><code>stemmer</code></strong> - the <a href="tagger.Stemmer-class.html" class="link">Stemmer</a> 
232          object to be used</li>
233        <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse 
234          document frequency' or 'ICF' i.e. 'inverse collection frequency';
235          defaults to 'IDF')</li>
236        <li><strong class="pname"><code>verbose</code></strong> - whether information on the progress should be printed on screen</li>
237    </ul></dd>
238  </dl>
239</td></tr></table>
240</div>
241<br />
242<!-- ==================== NAVIGATION BAR ==================== -->
243<table class="navbar" border="0" width="100%" cellpadding="0"
244       bgcolor="#a0c0ff" cellspacing="0">
245  <tr valign="middle">
246
247  <!-- Tree link -->
248      <th>&nbsp;&nbsp;&nbsp;<a
249        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
250
251  <!-- Index link -->
252      <th>&nbsp;&nbsp;&nbsp;<a
253        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
254
255  <!-- Help link -->
256      <th>&nbsp;&nbsp;&nbsp;<a
257        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
258
259  <!-- Project homepage -->
260      <th class="navbar" align="right" width="100%">
261        <table border="0" cellpadding="0" cellspacing="0">
262          <tr><th class="navbar" align="center"
263            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
264          </tr></table></th>
265  </tr>
266</table>
267<table border="0" cellpadding="0" cellspacing="0" width="100%%">
268  <tr>
269    <td align="left" class="footer">
270    Generated by Epydoc 3.0.1 on Fri May 13 11:13:02 2011
271    </td>
272    <td align="right" class="footer">
273      <a target="mainFrame" href="http://epydoc.sourceforge.net"
274        >http://epydoc.sourceforge.net</a>
275    </td>
276  </tr>
277</table>
278
279<script type="text/javascript">
280  <!--
281  // Private objects are initially displayed (because if
282  // javascript is turned off then we want them to be
283  // visible); but by default, we want to hide them.  So hide
284  // them unless we have a cookie that says to show them.
285  checkCookie();
286  // -->
287</script>
288</body>
289</html>