/doc/tagger.extras-module.html
HTML | 290 lines | 253 code | 14 blank | 23 comment | 0 complexity | 22069097a1db4fb09438a100c8d2947a MD5 | raw file
1<?xml version="1.0" encoding="ascii"?> 2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 3 "DTD/xhtml1-transitional.dtd"> 4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 5<head> 6 <title>tagger.extras</title> 7 <link rel="stylesheet" href="epydoc.css" type="text/css" /> 8 <script type="text/javascript" src="epydoc.js"></script> 9</head> 10 11<body bgcolor="white" text="black" link="blue" vlink="#204080" 12 alink="#204080"> 13<!-- ==================== NAVIGATION BAR ==================== --> 14<table class="navbar" border="0" width="100%" cellpadding="0" 15 bgcolor="#a0c0ff" cellspacing="0"> 16 <tr valign="middle"> 17 18 <!-- Tree link --> 19 <th> <a 20 href="module-tree.html">Trees</a> </th> 21 22 <!-- Index link --> 23 <th> <a 24 href="identifier-index.html">Indices</a> </th> 25 26 <!-- Help link --> 27 <th> <a 28 href="help.html">Help</a> </th> 29 30 <!-- Project homepage --> 31 <th class="navbar" align="right" width="100%"> 32 <table border="0" cellpadding="0" cellspacing="0"> 33 <tr><th class="navbar" align="center" 34 ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th> 35 </tr></table></th> 36 </tr> 37</table> 38<table width="100%" cellpadding="0" cellspacing="0"> 39 <tr valign="top"> 40 <td width="100%"> 41 <span class="breadcrumbs"> 42 Package tagger :: 43 Module extras 44 </span> 45 </td> 46 <td> 47 <table cellpadding="0" cellspacing="0"> 48 <!-- hide/show private --> 49 <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink" 50 onclick="toggle_private();">hide private</a>]</span></td></tr> 51 <tr><td align="right"><span class="options" 52 >[<a href="frames.html" target="_top">frames</a 53 >] | <a href="tagger.extras-module.html" 54 target="_top">no frames</a>]</span></td></tr> 55 </table> 56 </td> 57 </tr> 58</table> 59<!-- ==================== MODULE DESCRIPTION ==================== --> 60<h1 class="epydoc">Module extras</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.extras-pysrc.html">source code</a></span></p> 61<!-- ==================== CLASSES ==================== --> 62<a name="section-Classes"></a> 63<table class="summary" border="1" cellpadding="3" 64 cellspacing="0" width="100%" bgcolor="white"> 65<tr bgcolor="#70b0f0" class="table-header"> 66 <td colspan="2" class="table-header"> 67 <table border="0" cellpadding="0" cellspacing="0" width="100%"> 68 <tr valign="top"> 69 <td align="left"><span class="table-header">Classes</span></td> 70 <td align="right" valign="top" 71 ><span class="options">[<a href="#section-Classes" 72 class="privatelink" onclick="toggle_private();" 73 >hide private</a>]</span></td> 74 </tr> 75 </table> 76 </td> 77</tr> 78<tr> 79 <td width="15%" align="right" valign="top" class="summary"> 80 <span class="summary-type"> </span> 81 </td><td class="summary"> 82 <a href="tagger.extras.UnicodeReader-class.html" class="summary-name">UnicodeReader</a><br /> 83 Reader subclass that converts Unicode strings to a close ASCII 84 representation 85 </td> 86 </tr> 87<tr> 88 <td width="15%" align="right" valign="top" class="summary"> 89 <span class="summary-type"> </span> 90 </td><td class="summary"> 91 <a href="tagger.extras.HTMLReader-class.html" class="summary-name">HTMLReader</a><br /> 92 Reader subclass that can parse HTML code from the input 93 </td> 94 </tr> 95<tr> 96 <td width="15%" align="right" valign="top" class="summary"> 97 <span class="summary-type"> </span> 98 </td><td class="summary"> 99 <a href="tagger.extras.SimpleReader-class.html" class="summary-name">SimpleReader</a><br /> 100 Reader subclass that doesn't perform any advanced analysis of the 101 text 102 </td> 103 </tr> 104<tr> 105 <td width="15%" align="right" valign="top" class="summary"> 106 <span class="summary-type"> </span> 107 </td><td class="summary"> 108 <a href="tagger.extras.FastStemmer-class.html" class="summary-name">FastStemmer</a><br /> 109 Stemmer subclass that uses a much faster, but less correct 110 algorithm 111 </td> 112 </tr> 113<tr> 114 <td width="15%" align="right" valign="top" class="summary"> 115 <span class="summary-type"> </span> 116 </td><td class="summary"> 117 <a href="tagger.extras.NaiveRater-class.html" class="summary-name">NaiveRater</a><br /> 118 Rater subclass that jusk ranks single-word tags by their frequency 119 and weight 120 </td> 121 </tr> 122</table> 123<!-- ==================== FUNCTIONS ==================== --> 124<a name="section-Functions"></a> 125<table class="summary" border="1" cellpadding="3" 126 cellspacing="0" width="100%" bgcolor="white"> 127<tr bgcolor="#70b0f0" class="table-header"> 128 <td colspan="2" class="table-header"> 129 <table border="0" cellpadding="0" cellspacing="0" width="100%"> 130 <tr valign="top"> 131 <td align="left"><span class="table-header">Functions</span></td> 132 <td align="right" valign="top" 133 ><span class="options">[<a href="#section-Functions" 134 class="privatelink" onclick="toggle_private();" 135 >hide private</a>]</span></td> 136 </tr> 137 </table> 138 </td> 139</tr> 140<tr> 141 <td width="15%" align="right" valign="top" class="summary"> 142 <span class="summary-type"> </span> 143 </td><td class="summary"> 144 <table width="100%" cellpadding="0" cellspacing="0" border="0"> 145 <tr> 146 <td><span class="summary-sig"><a href="tagger.extras-module.html#build_dict_from_nltk" class="summary-sig-name">build_dict_from_nltk</a>(<span class="summary-sig-arg">output_file</span>, 147 <span class="summary-sig-arg">corpus</span>=<span class="summary-sig-default">None</span>, 148 <span class="summary-sig-arg">stopwords</span>=<span class="summary-sig-default">None</span>, 149 <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">Stemmer()</span>, 150 <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>, 151 <span class="summary-sig-arg">verbose</span>=<span class="summary-sig-default">False</span>)</span></td> 152 <td align="right" valign="top"> 153 <span class="codelink"><a href="tagger.extras-pysrc.html#build_dict_from_nltk">source code</a></span> 154 155 </td> 156 </tr> 157 </table> 158 159 </td> 160 </tr> 161</table> 162<!-- ==================== VARIABLES ==================== --> 163<a name="section-Variables"></a> 164<table class="summary" border="1" cellpadding="3" 165 cellspacing="0" width="100%" bgcolor="white"> 166<tr bgcolor="#70b0f0" class="table-header"> 167 <td colspan="2" class="table-header"> 168 <table border="0" cellpadding="0" cellspacing="0" width="100%"> 169 <tr valign="top"> 170 <td align="left"><span class="table-header">Variables</span></td> 171 <td align="right" valign="top" 172 ><span class="options">[<a href="#section-Variables" 173 class="privatelink" onclick="toggle_private();" 174 >hide private</a>]</span></td> 175 </tr> 176 </table> 177 </td> 178</tr> 179<tr> 180 <td width="15%" align="right" valign="top" class="summary"> 181 <span class="summary-type"> </span> 182 </td><td class="summary"> 183 <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'tagger'"><code class="variable-quote">'</code><code class="variable-string">tagger</code><code class="variable-quote">'</code></code> 184 </td> 185 </tr> 186</table> 187<!-- ==================== FUNCTION DETAILS ==================== --> 188<a name="section-FunctionDetails"></a> 189<table class="details" border="1" cellpadding="3" 190 cellspacing="0" width="100%" bgcolor="white"> 191<tr bgcolor="#70b0f0" class="table-header"> 192 <td colspan="2" class="table-header"> 193 <table border="0" cellpadding="0" cellspacing="0" width="100%"> 194 <tr valign="top"> 195 <td align="left"><span class="table-header">Function Details</span></td> 196 <td align="right" valign="top" 197 ><span class="options">[<a href="#section-FunctionDetails" 198 class="privatelink" onclick="toggle_private();" 199 >hide private</a>]</span></td> 200 </tr> 201 </table> 202 </td> 203</tr> 204</table> 205<a name="build_dict_from_nltk"></a> 206<div> 207<table class="details" border="1" cellpadding="3" 208 cellspacing="0" width="100%" bgcolor="white"> 209<tr><td> 210 <table width="100%" cellpadding="0" cellspacing="0" border="0"> 211 <tr valign="top"><td> 212 <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict_from_nltk</span>(<span class="sig-arg">output_file</span>, 213 <span class="sig-arg">corpus</span>=<span class="sig-default">None</span>, 214 <span class="sig-arg">stopwords</span>=<span class="sig-default">None</span>, 215 <span class="sig-arg">stemmer</span>=<span class="sig-default">Stemmer()</span>, 216 <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>, 217 <span class="sig-arg">verbose</span>=<span class="sig-default">False</span>)</span> 218 </h3> 219 </td><td align="right" valign="top" 220 ><span class="codelink"><a href="tagger.extras-pysrc.html#build_dict_from_nltk">source code</a></span> 221 </td> 222 </tr></table> 223 224 225 <dl class="fields"> 226 <dt>Parameters:</dt> 227 <dd><ul class="nomargin-top"> 228 <li><strong class="pname"><code>output_file</code></strong> - the name of the file where the dictionary should be saved</li> 229 <li><strong class="pname"><code>corpus</code></strong> - the NLTK corpus to use (defaults to nltk.corpus.reuters)</li> 230 <li><strong class="pname"><code>stopwords</code></strong> - a list of (not stemmed) stopwords (defaults to 231 nltk.corpus.reuters.words('stopwords'))</li> 232 <li><strong class="pname"><code>stemmer</code></strong> - the <a href="tagger.tagger.Stemmer-class.html" 233 class="link">Stemmer</a> object to be used</li> 234 <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse 235 document frequency' or 'ICF' i.e. 'inverse collection frequency'; 236 defaults to 'IDF')</li> 237 <li><strong class="pname"><code>verbose</code></strong> - whether information on the progress should be printed on screen</li> 238 </ul></dd> 239 </dl> 240</td></tr></table> 241</div> 242<br /> 243<!-- ==================== NAVIGATION BAR ==================== --> 244<table class="navbar" border="0" width="100%" cellpadding="0" 245 bgcolor="#a0c0ff" cellspacing="0"> 246 <tr valign="middle"> 247 248 <!-- Tree link --> 249 <th> <a 250 href="module-tree.html">Trees</a> </th> 251 252 <!-- Index link --> 253 <th> <a 254 href="identifier-index.html">Indices</a> </th> 255 256 <!-- Help link --> 257 <th> <a 258 href="help.html">Help</a> </th> 259 260 <!-- Project homepage --> 261 <th class="navbar" align="right" width="100%"> 262 <table border="0" cellpadding="0" cellspacing="0"> 263 <tr><th class="navbar" align="center" 264 ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th> 265 </tr></table></th> 266 </tr> 267</table> 268<table border="0" cellpadding="0" cellspacing="0" width="100%%"> 269 <tr> 270 <td align="left" class="footer"> 271 Generated by Epydoc 3.0.1 on Wed Jun 8 01:57:42 2011 272 </td> 273 <td align="right" class="footer"> 274 <a target="mainFrame" href="http://epydoc.sourceforge.net" 275 >http://epydoc.sourceforge.net</a> 276 </td> 277 </tr> 278</table> 279 280<script type="text/javascript"> 281 <!-- 282 // Private objects are initially displayed (because if 283 // javascript is turned off then we want them to be 284 // visible); but by default, we want to hide them. So hide 285 // them unless we have a cookie that says to show them. 286 checkCookie(); 287 // --> 288</script> 289</body> 290</html>