PageRenderTime 30ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/doc/tagger.extras-module.html

http://github.com/apresta/tagger
HTML | 290 lines | 253 code | 14 blank | 23 comment | 0 complexity | 22069097a1db4fb09438a100c8d2947a MD5 | raw file
  1. <?xml version="1.0" encoding="ascii"?>
  2. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3. "DTD/xhtml1-transitional.dtd">
  4. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5. <head>
  6. <title>tagger.extras</title>
  7. <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8. <script type="text/javascript" src="epydoc.js"></script>
  9. </head>
  10. <body bgcolor="white" text="black" link="blue" vlink="#204080"
  11. alink="#204080">
  12. <!-- ==================== NAVIGATION BAR ==================== -->
  13. <table class="navbar" border="0" width="100%" cellpadding="0"
  14. bgcolor="#a0c0ff" cellspacing="0">
  15. <tr valign="middle">
  16. <!-- Tree link -->
  17. <th>&nbsp;&nbsp;&nbsp;<a
  18. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  19. <!-- Index link -->
  20. <th>&nbsp;&nbsp;&nbsp;<a
  21. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  22. <!-- Help link -->
  23. <th>&nbsp;&nbsp;&nbsp;<a
  24. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  25. <!-- Project homepage -->
  26. <th class="navbar" align="right" width="100%">
  27. <table border="0" cellpadding="0" cellspacing="0">
  28. <tr><th class="navbar" align="center"
  29. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  30. </tr></table></th>
  31. </tr>
  32. </table>
  33. <table width="100%" cellpadding="0" cellspacing="0">
  34. <tr valign="top">
  35. <td width="100%">
  36. <span class="breadcrumbs">
  37. Package&nbsp;tagger ::
  38. Module&nbsp;extras
  39. </span>
  40. </td>
  41. <td>
  42. <table cellpadding="0" cellspacing="0">
  43. <!-- hide/show private -->
  44. <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
  45. onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
  46. <tr><td align="right"><span class="options"
  47. >[<a href="frames.html" target="_top">frames</a
  48. >]&nbsp;|&nbsp;<a href="tagger.extras-module.html"
  49. target="_top">no&nbsp;frames</a>]</span></td></tr>
  50. </table>
  51. </td>
  52. </tr>
  53. </table>
  54. <!-- ==================== MODULE DESCRIPTION ==================== -->
  55. <h1 class="epydoc">Module extras</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.extras-pysrc.html">source&nbsp;code</a></span></p>
  56. <!-- ==================== CLASSES ==================== -->
  57. <a name="section-Classes"></a>
  58. <table class="summary" border="1" cellpadding="3"
  59. cellspacing="0" width="100%" bgcolor="white">
  60. <tr bgcolor="#70b0f0" class="table-header">
  61. <td colspan="2" class="table-header">
  62. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  63. <tr valign="top">
  64. <td align="left"><span class="table-header">Classes</span></td>
  65. <td align="right" valign="top"
  66. ><span class="options">[<a href="#section-Classes"
  67. class="privatelink" onclick="toggle_private();"
  68. >hide private</a>]</span></td>
  69. </tr>
  70. </table>
  71. </td>
  72. </tr>
  73. <tr>
  74. <td width="15%" align="right" valign="top" class="summary">
  75. <span class="summary-type">&nbsp;</span>
  76. </td><td class="summary">
  77. <a href="tagger.extras.UnicodeReader-class.html" class="summary-name">UnicodeReader</a><br />
  78. Reader subclass that converts Unicode strings to a close ASCII
  79. representation
  80. </td>
  81. </tr>
  82. <tr>
  83. <td width="15%" align="right" valign="top" class="summary">
  84. <span class="summary-type">&nbsp;</span>
  85. </td><td class="summary">
  86. <a href="tagger.extras.HTMLReader-class.html" class="summary-name">HTMLReader</a><br />
  87. Reader subclass that can parse HTML code from the input
  88. </td>
  89. </tr>
  90. <tr>
  91. <td width="15%" align="right" valign="top" class="summary">
  92. <span class="summary-type">&nbsp;</span>
  93. </td><td class="summary">
  94. <a href="tagger.extras.SimpleReader-class.html" class="summary-name">SimpleReader</a><br />
  95. Reader subclass that doesn't perform any advanced analysis of the
  96. text
  97. </td>
  98. </tr>
  99. <tr>
  100. <td width="15%" align="right" valign="top" class="summary">
  101. <span class="summary-type">&nbsp;</span>
  102. </td><td class="summary">
  103. <a href="tagger.extras.FastStemmer-class.html" class="summary-name">FastStemmer</a><br />
  104. Stemmer subclass that uses a much faster, but less correct
  105. algorithm
  106. </td>
  107. </tr>
  108. <tr>
  109. <td width="15%" align="right" valign="top" class="summary">
  110. <span class="summary-type">&nbsp;</span>
  111. </td><td class="summary">
  112. <a href="tagger.extras.NaiveRater-class.html" class="summary-name">NaiveRater</a><br />
  113. Rater subclass that jusk ranks single-word tags by their frequency
  114. and weight
  115. </td>
  116. </tr>
  117. </table>
  118. <!-- ==================== FUNCTIONS ==================== -->
  119. <a name="section-Functions"></a>
  120. <table class="summary" border="1" cellpadding="3"
  121. cellspacing="0" width="100%" bgcolor="white">
  122. <tr bgcolor="#70b0f0" class="table-header">
  123. <td colspan="2" class="table-header">
  124. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  125. <tr valign="top">
  126. <td align="left"><span class="table-header">Functions</span></td>
  127. <td align="right" valign="top"
  128. ><span class="options">[<a href="#section-Functions"
  129. class="privatelink" onclick="toggle_private();"
  130. >hide private</a>]</span></td>
  131. </tr>
  132. </table>
  133. </td>
  134. </tr>
  135. <tr>
  136. <td width="15%" align="right" valign="top" class="summary">
  137. <span class="summary-type">&nbsp;</span>
  138. </td><td class="summary">
  139. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  140. <tr>
  141. <td><span class="summary-sig"><a href="tagger.extras-module.html#build_dict_from_nltk" class="summary-sig-name">build_dict_from_nltk</a>(<span class="summary-sig-arg">output_file</span>,
  142. <span class="summary-sig-arg">corpus</span>=<span class="summary-sig-default">None</span>,
  143. <span class="summary-sig-arg">stopwords</span>=<span class="summary-sig-default">None</span>,
  144. <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">Stemmer()</span>,
  145. <span class="summary-sig-arg">measure</span>=<span class="summary-sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
  146. <span class="summary-sig-arg">verbose</span>=<span class="summary-sig-default">False</span>)</span></td>
  147. <td align="right" valign="top">
  148. <span class="codelink"><a href="tagger.extras-pysrc.html#build_dict_from_nltk">source&nbsp;code</a></span>
  149. </td>
  150. </tr>
  151. </table>
  152. </td>
  153. </tr>
  154. </table>
  155. <!-- ==================== VARIABLES ==================== -->
  156. <a name="section-Variables"></a>
  157. <table class="summary" border="1" cellpadding="3"
  158. cellspacing="0" width="100%" bgcolor="white">
  159. <tr bgcolor="#70b0f0" class="table-header">
  160. <td colspan="2" class="table-header">
  161. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  162. <tr valign="top">
  163. <td align="left"><span class="table-header">Variables</span></td>
  164. <td align="right" valign="top"
  165. ><span class="options">[<a href="#section-Variables"
  166. class="privatelink" onclick="toggle_private();"
  167. >hide private</a>]</span></td>
  168. </tr>
  169. </table>
  170. </td>
  171. </tr>
  172. <tr>
  173. <td width="15%" align="right" valign="top" class="summary">
  174. <span class="summary-type">&nbsp;</span>
  175. </td><td class="summary">
  176. <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'tagger'"><code class="variable-quote">'</code><code class="variable-string">tagger</code><code class="variable-quote">'</code></code>
  177. </td>
  178. </tr>
  179. </table>
  180. <!-- ==================== FUNCTION DETAILS ==================== -->
  181. <a name="section-FunctionDetails"></a>
  182. <table class="details" border="1" cellpadding="3"
  183. cellspacing="0" width="100%" bgcolor="white">
  184. <tr bgcolor="#70b0f0" class="table-header">
  185. <td colspan="2" class="table-header">
  186. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  187. <tr valign="top">
  188. <td align="left"><span class="table-header">Function Details</span></td>
  189. <td align="right" valign="top"
  190. ><span class="options">[<a href="#section-FunctionDetails"
  191. class="privatelink" onclick="toggle_private();"
  192. >hide private</a>]</span></td>
  193. </tr>
  194. </table>
  195. </td>
  196. </tr>
  197. </table>
  198. <a name="build_dict_from_nltk"></a>
  199. <div>
  200. <table class="details" border="1" cellpadding="3"
  201. cellspacing="0" width="100%" bgcolor="white">
  202. <tr><td>
  203. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  204. <tr valign="top"><td>
  205. <h3 class="epydoc"><span class="sig"><span class="sig-name">build_dict_from_nltk</span>(<span class="sig-arg">output_file</span>,
  206. <span class="sig-arg">corpus</span>=<span class="sig-default">None</span>,
  207. <span class="sig-arg">stopwords</span>=<span class="sig-default">None</span>,
  208. <span class="sig-arg">stemmer</span>=<span class="sig-default">Stemmer()</span>,
  209. <span class="sig-arg">measure</span>=<span class="sig-default"><code class="variable-quote">'</code><code class="variable-string">IDF</code><code class="variable-quote">'</code></span>,
  210. <span class="sig-arg">verbose</span>=<span class="sig-default">False</span>)</span>
  211. </h3>
  212. </td><td align="right" valign="top"
  213. ><span class="codelink"><a href="tagger.extras-pysrc.html#build_dict_from_nltk">source&nbsp;code</a></span>&nbsp;
  214. </td>
  215. </tr></table>
  216. <dl class="fields">
  217. <dt>Parameters:</dt>
  218. <dd><ul class="nomargin-top">
  219. <li><strong class="pname"><code>output_file</code></strong> - the name of the file where the dictionary should be saved</li>
  220. <li><strong class="pname"><code>corpus</code></strong> - the NLTK corpus to use (defaults to nltk.corpus.reuters)</li>
  221. <li><strong class="pname"><code>stopwords</code></strong> - a list of (not stemmed) stopwords (defaults to
  222. nltk.corpus.reuters.words('stopwords'))</li>
  223. <li><strong class="pname"><code>stemmer</code></strong> - the <a href="tagger.tagger.Stemmer-class.html"
  224. class="link">Stemmer</a> object to be used</li>
  225. <li><strong class="pname"><code>measure</code></strong> - the measure used to compute the weights ('IDF' i.e. 'inverse
  226. document frequency' or 'ICF' i.e. 'inverse collection frequency';
  227. defaults to 'IDF')</li>
  228. <li><strong class="pname"><code>verbose</code></strong> - whether information on the progress should be printed on screen</li>
  229. </ul></dd>
  230. </dl>
  231. </td></tr></table>
  232. </div>
  233. <br />
  234. <!-- ==================== NAVIGATION BAR ==================== -->
  235. <table class="navbar" border="0" width="100%" cellpadding="0"
  236. bgcolor="#a0c0ff" cellspacing="0">
  237. <tr valign="middle">
  238. <!-- Tree link -->
  239. <th>&nbsp;&nbsp;&nbsp;<a
  240. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  241. <!-- Index link -->
  242. <th>&nbsp;&nbsp;&nbsp;<a
  243. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  244. <!-- Help link -->
  245. <th>&nbsp;&nbsp;&nbsp;<a
  246. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  247. <!-- Project homepage -->
  248. <th class="navbar" align="right" width="100%">
  249. <table border="0" cellpadding="0" cellspacing="0">
  250. <tr><th class="navbar" align="center"
  251. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  252. </tr></table></th>
  253. </tr>
  254. </table>
  255. <table border="0" cellpadding="0" cellspacing="0" width="100%%">
  256. <tr>
  257. <td align="left" class="footer">
  258. Generated by Epydoc 3.0.1 on Wed Jun 8 01:57:42 2011
  259. </td>
  260. <td align="right" class="footer">
  261. <a target="mainFrame" href="http://epydoc.sourceforge.net"
  262. >http://epydoc.sourceforge.net</a>
  263. </td>
  264. </tr>
  265. </table>
  266. <script type="text/javascript">
  267. <!--
  268. // Private objects are initially displayed (because if
  269. // javascript is turned off then we want them to be
  270. // visible); but by default, we want to hide them. So hide
  271. // them unless we have a cookie that says to show them.
  272. checkCookie();
  273. // -->
  274. </script>
  275. </body>
  276. </html>