/doc/extras-module.html

http://github.com/apresta/tagger · HTML · 289 lines · 252 code · 14 blank · 23 comment · 0 complexity · c6e66c421033e70eaf6e11becfe0f48b MD5 · raw file

  1. <?xml version="1.0" encoding="ascii"?>
  2. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3. "DTD/xhtml1-transitional.dtd">
  4. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5. <head>
  6. <title>extras</title>
  7. <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8. <script type="text/javascript" src="epydoc.js"></script>
  9. </head>
  10. <body bgcolor="white" text="black" link="blue" vlink="#204080"
  11. alink="#204080">
  12. <!-- ==================== NAVIGATION BAR ==================== -->
  13. <table class="navbar" border="0" width="100%" cellpadding="0"
  14. bgcolor="#a0c0ff" cellspacing="0">
  15. <tr valign="middle">
  16. <!-- Tree link -->
  17. <th>&nbsp;&nbsp;&nbsp;<a
  18. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  19. <!-- Index link -->
  20. <th>&nbsp;&nbsp;&nbsp;<a
  21. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  22. <!-- Help link -->
  23. <th>&nbsp;&nbsp;&nbsp;<a
  24. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  25. <!-- Project homepage -->
  26. <th class="navbar" align="right" width="100%">
  27. <table border="0" cellpadding="0" cellspacing="0">
  28. <tr><th class="navbar" align="center"
  29. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  30. </tr></table></th>
  31. </tr>
  32. </table>
  33. <table width="100%" cellpadding="0" cellspacing="0">
  34. <tr valign="top">
  35. <td width="100%">
  36. <span class="breadcrumbs">
  37. Module&nbsp;extras
  38. </span>
  39. </td>
  40. <td>
  41. <table cellpadding="0" cellspacing="0">
  42. <!-- hide/show private -->
  43. <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
  44. onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
  45. <tr><td align="right"><span class="options"
  46. >[<a href="frames.html" target="_top">frames</a
  47. >]&nbsp;|&nbsp;<a href="extras-module.html"
  48. target="_top">no&nbsp;frames</a>]</span></td></tr>
  49. </table>
  50. </td>
  51. </tr>
  52. </table>
  53. <!-- ==================== MODULE DESCRIPTION ==================== -->
  54. <h1 class="epydoc">Module extras</h1><p class="nomargin-top"><span class="codelink"><a href="extras-pysrc.html">source&nbsp;code</a></span></p>
  55. <!-- ==================== CLASSES ==================== -->
  56. <a name="section-Classes"></a>
  57. <table class="summary" border="1" cellpadding="3"
  58. cellspacing="0" width="100%" bgcolor="white">
  59. <tr bgcolor="#70b0f0" class="table-header">
  60. <td colspan="2" class="table-header">
  61. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  62. <tr valign="top">
  63. <td align="left"><span class="table-header">Classes</span></td>
  64. <td align="right" valign="top"
  65. ><span class="options">[<a href="#section-Classes"
  66. class="privatelink" onclick="toggle_private();"
  67. >hide private</a>]</span></td>
  68. </tr>
  69. </table>
  70. </td>
  71. </tr>
  72. <tr>
  73. <td width="15%" align="right" valign="top" class="summary">
  74. <span class="summary-type">&nbsp;</span>
  75. </td><td class="summary">
  76. <a href="extras.UnicodeReader-class.html" class="summary-name">UnicodeReader</a><br />
  77. Reader subclass that converts Unicode strings to a close ASCII
  78. representation
  79. </td>
  80. </tr>
  81. <tr>
  82. <td width="15%" align="right" valign="top" class="summary">
  83. <span class="summary-type">&nbsp;</span>
  84. </td><td class="summary">
  85. <a href="extras.HTMLReader-class.html" class="summary-name">HTMLReader</a><br />
  86. Reader subclass that can parse HTML code from the input
  87. </td>
  88. </tr>
  89. <tr>
  90. <td width="15%" align="right" valign="top" class="summary">
  91. <span class="summary-type">&nbsp;</span>
  92. </td><td class="summary">
  93. <a href="extras.SimpleReader-class.html" class="summary-name">SimpleReader</a><br />
  94. Reader subclass that doesn't perform any advanced analysis of the
  95. text
  96. </td>
  97. </tr>
  98. <tr>
  99. <td width="15%" align="right" valign="top" class="summary">
  100. <span class="summary-type">&nbsp;</span>
  101. </td><td class="summary">
  102. <a href="extras.FastStemmer-class.html" class="summary-name">FastStemmer</a><br />
  103. Stemmer subclass that uses a much faster, but less correct
  104. algorithm
  105. </td>
  106. </tr>
  107. <tr>
  108. <td width="15%" align="right" valign="top" class="summary">
  109. <span class="summary-type">&nbsp;</span>
  110. </td><td class="summary">
  111. <a href="extras.NaiveRater-class.html" class="summary-name">NaiveRater</a><br />
  112. Rater subclass that jusk ranks single-word tags by their frequency
  113. and weight
  114. </td>
  115. </tr>
  116. </table>
  117. <!-- ==================== FUNCTIONS ==================== -->
  118. <a name="section-Functions"></a>
  119. <table class="summary" border="1" cellpadding="3"
  120. cellspacing="0" width="100%" bgcolor="white">
  121. <tr bgcolor="#70b0f0" class="table-header">
  122. <td colspan="2" class="table-header">
  123. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  124. <tr valign="top">
  125. <td align="left"><span class="table-header">Functions</span></td>
  126. <td align="right" valign="top"
  127. ><span class="options">[<a href="#section-Functions"
  128. class="privatelink" onclick="toggle_private();"
  129. >hide private</a>]</span></td>
  130. </tr>
  131. </table>
  132. </td>
  133. </tr>
  134. <tr>
  135. <td width="15%" align="right" valign="top" class="summary">
  136. <span class="summary-type">&nbsp;</span>
  137. </td><td class="summary">
  138. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  139. <tr>
  140. <td><span class="summary-sig"><a href="extras-module.html#build_dict_from_nltk" class="summary-sig-name">build_dict_from_nltk</a>(<span class="summary-sig-arg">output_file</span>,
  141. <span class="summary-sig-arg">corpus</span>=<span class="summary-sig-default">None</span>,
  142. <span class="summary-sig-arg">stopwords</span>=<span class="summary-sig-default">None</span>,
  143. <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">Stemmer()</span>,
  144. <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
  145. <span class="summary-sig-arg">verbose</span>=<span class="summary-sig-default">False</span>)</span></td>
  146. <td align="right" valign="top">
  147. <span class="codelink"><a href="extras-pysrc.html#build_dict_from_nltk">source&nbsp;code</a></span>
  148. </td>
  149. </tr>
  150. </table>
  151. </td>
  152. </tr>
  153. </table>
  154. <!-- ==================== VARIABLES ==================== -->
  155. <a name="section-Variables"></a>
  156. <table class="summary" border="1" cellpadding="3"
  157. cellspacing="0" width="100%" bgcolor="white">
  158. <tr bgcolor="#70b0f0" class="table-header">
  159. <td colspan="2" class="table-header">
  160. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  161. <tr valign="top">
  162. <td align="left"><span class="table-header">Variables</span></td>
  163. <td align="right" valign="top"
  164. ><span class="options">[<a href="#section-Variables"
  165. class="privatelink" onclick="toggle_private();"
  166. >hide private</a>]</span></td>
  167. </tr>
  168. </table>
  169. </td>
  170. </tr>
  171. <tr>
  172. <td width="15%" align="right" valign="top" class="summary">
  173. <span class="summary-type">&nbsp;</span>
  174. </td><td class="summary">
  175. <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="None">None</code>
  176. </td>
  177. </tr>
  178. </table>
  179. <!-- ==================== FUNCTION DETAILS ==================== -->
  180. <a name="section-FunctionDetails"></a>
  181. <table class="details" border="1" cellpadding="3"
  182. cellspacing="0" width="100%" bgcolor="white">
  183. <tr bgcolor="#70b0f0" class="table-header">
  184. <td colspan="2" class="table-header">
  185. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  186. <tr valign="top">
  187. <td align="left"><span class="table-header">Function Details</span></td>
  188. <td align="right" valign="top"
  189. ><span class="options">[<a href="#section-FunctionDetails"
  190. class="privatelink" onclick="toggle_private();"
  191. >hide private</a>]</span></td>
  192. </tr>
  193. </table>
  194. </td>
  195. </tr>
  196. </table>
  197. <a name="build_dict_from_nltk"></a>
  198. <div>
  199. <table class="details" border="1" cellpadding="3"
  200. cellspacing="0" width="100%" bgcolor="white">
  201. <tr><td>
  202. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  203. <tr valign="top"><td>
  204. <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict_from_nltk</span>(<span class="sig-arg">output_file</span>,
  205. <span class="sig-arg">corpus</span>=<span class="sig-default">None</span>,
  206. <span class="sig-arg">stopwords</span>=<span class="sig-default">None</span>,
  207. <span class="sig-arg">stemmer</span>=<span class="sig-default">Stemmer()</span>,
  208. <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
  209. <span class="sig-arg">verbose</span>=<span class="sig-default">False</span>)</span>
  210. </h3>
  211. </td><td align="right" valign="top"
  212. ><span class="codelink"><a href="extras-pysrc.html#build_dict_from_nltk">source&nbsp;code</a></span>&nbsp;
  213. </td>
  214. </tr></table>
  215. <dl class="fields">
  216. <dt>Parameters:</dt>
  217. <dd><ul class="nomargin-top">
  218. <li><strong class="pname"><code>output_file</code></strong> - the binary stream where the dictionary should be saved</li>
  219. <li><strong class="pname"><code>corpus</code></strong> - the NLTK corpus to use (defaults to nltk.corpus.reuters)</li>
  220. <li><strong class="pname"><code>stopwords</code></strong> - a list of (not stemmed) stopwords (defaults to
  221. nltk.corpus.reuters.words('stopwords'))</li>
  222. <li><strong class="pname"><code>stemmer</code></strong> - the <a href="tagger.Stemmer-class.html" class="link">Stemmer</a>
  223. object to be used</li>
  224. <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse
  225. document frequency' or 'ICF' i.e. 'inverse collection frequency';
  226. defaults to 'IDF')</li>
  227. <li><strong class="pname"><code>verbose</code></strong> - whether information on the progress should be printed on screen</li>
  228. </ul></dd>
  229. </dl>
  230. </td></tr></table>
  231. </div>
  232. <br />
  233. <!-- ==================== NAVIGATION BAR ==================== -->
  234. <table class="navbar" border="0" width="100%" cellpadding="0"
  235. bgcolor="#a0c0ff" cellspacing="0">
  236. <tr valign="middle">
  237. <!-- Tree link -->
  238. <th>&nbsp;&nbsp;&nbsp;<a
  239. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  240. <!-- Index link -->
  241. <th>&nbsp;&nbsp;&nbsp;<a
  242. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  243. <!-- Help link -->
  244. <th>&nbsp;&nbsp;&nbsp;<a
  245. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  246. <!-- Project homepage -->
  247. <th class="navbar" align="right" width="100%">
  248. <table border="0" cellpadding="0" cellspacing="0">
  249. <tr><th class="navbar" align="center"
  250. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  251. </tr></table></th>
  252. </tr>
  253. </table>
  254. <table border="0" cellpadding="0" cellspacing="0" width="100%%">
  255. <tr>
  256. <td align="left" class="footer">
  257. Generated by Epydoc 3.0.1 on Fri May 13 11:13:02 2011
  258. </td>
  259. <td align="right" class="footer">
  260. <a target="mainFrame" href="http://epydoc.sourceforge.net"
  261. >http://epydoc.sourceforge.net</a>
  262. </td>
  263. </tr>
  264. </table>
  265. <script type="text/javascript">
  266. <!--
  267. // Private objects are initially displayed (because if
  268. // javascript is turned off then we want them to be
  269. // visible); but by default, we want to hide them. So hide
  270. // them unless we have a cookie that says to show them.
  271. checkCookie();
  272. // -->
  273. </script>
  274. </body>
  275. </html>