/doc/tagger.tagger.Stemmer-class.html

http://github.com/apresta/tagger · HTML · 333 lines · 288 code · 23 blank · 22 comment · 0 complexity · bbd97e80b562f54c05968b4ca666cb97 MD5 · raw file

  1. <?xml version="1.0" encoding="ascii"?>
  2. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3. "DTD/xhtml1-transitional.dtd">
  4. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5. <head>
  6. <title>tagger.tagger.Stemmer</title>
  7. <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8. <script type="text/javascript" src="epydoc.js"></script>
  9. </head>
  10. <body bgcolor="white" text="black" link="blue" vlink="#204080"
  11. alink="#204080">
  12. <!-- ==================== NAVIGATION BAR ==================== -->
  13. <table class="navbar" border="0" width="100%" cellpadding="0"
  14. bgcolor="#a0c0ff" cellspacing="0">
  15. <tr valign="middle">
  16. <!-- Tree link -->
  17. <th>&nbsp;&nbsp;&nbsp;<a
  18. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  19. <!-- Index link -->
  20. <th>&nbsp;&nbsp;&nbsp;<a
  21. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  22. <!-- Help link -->
  23. <th>&nbsp;&nbsp;&nbsp;<a
  24. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  25. <!-- Project homepage -->
  26. <th class="navbar" align="right" width="100%">
  27. <table border="0" cellpadding="0" cellspacing="0">
  28. <tr><th class="navbar" align="center"
  29. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  30. </tr></table></th>
  31. </tr>
  32. </table>
  33. <table width="100%" cellpadding="0" cellspacing="0">
  34. <tr valign="top">
  35. <td width="100%">
  36. <span class="breadcrumbs">
  37. Package&nbsp;tagger ::
  38. <a href="tagger.tagger-module.html">Module&nbsp;tagger</a> ::
  39. Class&nbsp;Stemmer
  40. </span>
  41. </td>
  42. <td>
  43. <table cellpadding="0" cellspacing="0">
  44. <!-- hide/show private -->
  45. <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
  46. onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
  47. <tr><td align="right"><span class="options"
  48. >[<a href="frames.html" target="_top">frames</a
  49. >]&nbsp;|&nbsp;<a href="tagger.tagger.Stemmer-class.html"
  50. target="_top">no&nbsp;frames</a>]</span></td></tr>
  51. </table>
  52. </td>
  53. </tr>
  54. </table>
  55. <!-- ==================== CLASS DESCRIPTION ==================== -->
  56. <h1 class="epydoc">Class Stemmer</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer">source&nbsp;code</a></span></p>
  57. <center>
  58. <center> <map id="class_hierarchy_for_stemmer" name="class_hierarchy_for_stemmer">
  59. <area shape="rect" id="node1" href="tagger.extras.FastStemmer-class.html" title="extras.FastStemmer" alt="" coords="5,61,165,88"/>
  60. <area shape="rect" id="node2" href="tagger.tagger.Stemmer-class.html" title="Stemmer" alt="" coords="44,5,127,32"/>
  61. </map>
  62. <img src="class_hierarchy_for_stemmer.gif" alt='' usemap="#class_hierarchy_for_stemmer" ismap="ismap" class="graph-without-title" />
  63. </center>
  64. </center>
  65. <hr />
  66. <p>Class for extracting the stem of a word</p>
  67. <p>(by default it uses a simple open-source implementation of Porter's
  68. algorithm; this can be improved a lot, so experimenting with different
  69. ones is advisable; nltk.stem provides different algorithms for many
  70. languages)</p>
  71. <!-- ==================== INSTANCE METHODS ==================== -->
  72. <a name="section-InstanceMethods"></a>
  73. <table class="summary" border="1" cellpadding="3"
  74. cellspacing="0" width="100%" bgcolor="white">
  75. <tr bgcolor="#70b0f0" class="table-header">
  76. <td colspan="2" class="table-header">
  77. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  78. <tr valign="top">
  79. <td align="left"><span class="table-header">Instance Methods</span></td>
  80. <td align="right" valign="top"
  81. ><span class="options">[<a href="#section-InstanceMethods"
  82. class="privatelink" onclick="toggle_private();"
  83. >hide private</a>]</span></td>
  84. </tr>
  85. </table>
  86. </td>
  87. </tr>
  88. <tr>
  89. <td width="15%" align="right" valign="top" class="summary">
  90. <span class="summary-type">&nbsp;</span>
  91. </td><td class="summary">
  92. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  93. <tr>
  94. <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#__call__" class="summary-sig-name">__call__</a>(<span class="summary-sig-arg">self</span>,
  95. <span class="summary-sig-arg">tag</span>)</span><br />
  96. Returns:
  97. the stemmed tag</td>
  98. <td align="right" valign="top">
  99. <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__call__">source&nbsp;code</a></span>
  100. </td>
  101. </tr>
  102. </table>
  103. </td>
  104. </tr>
  105. <tr>
  106. <td width="15%" align="right" valign="top" class="summary">
  107. <span class="summary-type">&nbsp;</span>
  108. </td><td class="summary">
  109. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  110. <tr>
  111. <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
  112. <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">None</span>)</span><br />
  113. Returns:
  114. a new <a href="tagger.tagger.Stemmer-class.html"
  115. class="link">Stemmer</a> object</td>
  116. <td align="right" valign="top">
  117. <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__init__">source&nbsp;code</a></span>
  118. </td>
  119. </tr>
  120. </table>
  121. </td>
  122. </tr>
  123. <tr>
  124. <td width="15%" align="right" valign="top" class="summary">
  125. <span class="summary-type">&nbsp;</span>
  126. </td><td class="summary">
  127. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  128. <tr>
  129. <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#preprocess" class="summary-sig-name">preprocess</a>(<span class="summary-sig-arg">self</span>,
  130. <span class="summary-sig-arg">string</span>)</span><br />
  131. Returns:
  132. the processed string</td>
  133. <td align="right" valign="top">
  134. <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.preprocess">source&nbsp;code</a></span>
  135. </td>
  136. </tr>
  137. </table>
  138. </td>
  139. </tr>
  140. </table>
  141. <!-- ==================== CLASS VARIABLES ==================== -->
  142. <a name="section-ClassVariables"></a>
  143. <table class="summary" border="1" cellpadding="3"
  144. cellspacing="0" width="100%" bgcolor="white">
  145. <tr bgcolor="#70b0f0" class="table-header">
  146. <td colspan="2" class="table-header">
  147. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  148. <tr valign="top">
  149. <td align="left"><span class="table-header">Class Variables</span></td>
  150. <td align="right" valign="top"
  151. ><span class="options">[<a href="#section-ClassVariables"
  152. class="privatelink" onclick="toggle_private();"
  153. >hide private</a>]</span></td>
  154. </tr>
  155. </table>
  156. </td>
  157. </tr>
  158. <tr>
  159. <td width="15%" align="right" valign="top" class="summary">
  160. <span class="summary-type">&nbsp;</span>
  161. </td><td class="summary">
  162. <a name="match_contractions"></a><span class="summary-name">match_contractions</span> = <code title="re.compile(r'(\w+)\'(m|re|d|ve|s|ll|t)?')">re.compile(r'<code class="re-group">(</code>\w<code class="re-op">+</code><code class="re-group">)</code>\'<code class="re-group">(</code>m<code class="re-op">|</code>re<code class="re-op">|</code>d<code class="re-op">|</code>ve<code class="re-op">|</code>s<code class="re-op">|</code>ll<code class="re-op">|</code>t<code class="re-group">)</code><code class="re-op">?</code>')</code>
  163. </td>
  164. </tr>
  165. <tr>
  166. <td width="15%" align="right" valign="top" class="summary">
  167. <span class="summary-type">&nbsp;</span>
  168. </td><td class="summary">
  169. <a name="match_hyphens"></a><span class="summary-name">match_hyphens</span> = <code title="re.compile(r'\b[-_]\b')">re.compile(r'\b<code class="re-group">[</code>-_<code class="re-group">]</code>\b')</code>
  170. </td>
  171. </tr>
  172. </table>
  173. <!-- ==================== METHOD DETAILS ==================== -->
  174. <a name="section-MethodDetails"></a>
  175. <table class="details" border="1" cellpadding="3"
  176. cellspacing="0" width="100%" bgcolor="white">
  177. <tr bgcolor="#70b0f0" class="table-header">
  178. <td colspan="2" class="table-header">
  179. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  180. <tr valign="top">
  181. <td align="left"><span class="table-header">Method Details</span></td>
  182. <td align="right" valign="top"
  183. ><span class="options">[<a href="#section-MethodDetails"
  184. class="privatelink" onclick="toggle_private();"
  185. >hide private</a>]</span></td>
  186. </tr>
  187. </table>
  188. </td>
  189. </tr>
  190. </table>
  191. <a name="__call__"></a>
  192. <div>
  193. <table class="details" border="1" cellpadding="3"
  194. cellspacing="0" width="100%" bgcolor="white">
  195. <tr><td>
  196. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  197. <tr valign="top"><td>
  198. <h3 class="epydoc"><span class="sig"><span class="sig-name">__call__</span>(<span class="sig-arg">self</span>,
  199. <span class="sig-arg">tag</span>)</span>
  200. <br /><em class="fname">(Call operator)</em>
  201. </h3>
  202. </td><td align="right" valign="top"
  203. ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__call__">source&nbsp;code</a></span>&nbsp;
  204. </td>
  205. </tr></table>
  206. <dl class="fields">
  207. <dt>Parameters:</dt>
  208. <dd><ul class="nomargin-top">
  209. <li><strong class="pname"><code>tag</code></strong> - the tag to be stemmed</li>
  210. </ul></dd>
  211. <dt>Returns:</dt>
  212. <dd>the stemmed tag</dd>
  213. </dl>
  214. </td></tr></table>
  215. </div>
  216. <a name="__init__"></a>
  217. <div>
  218. <table class="details" border="1" cellpadding="3"
  219. cellspacing="0" width="100%" bgcolor="white">
  220. <tr><td>
  221. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  222. <tr valign="top"><td>
  223. <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
  224. <span class="sig-arg">stemmer</span>=<span class="sig-default">None</span>)</span>
  225. <br /><em class="fname">(Constructor)</em>
  226. </h3>
  227. </td><td align="right" valign="top"
  228. ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__init__">source&nbsp;code</a></span>&nbsp;
  229. </td>
  230. </tr></table>
  231. <dl class="fields">
  232. <dt>Parameters:</dt>
  233. <dd><ul class="nomargin-top">
  234. <li><strong class="pname"><code>stemmer</code></strong> - an object or module with a 'stem' method (defaults to
  235. stemming.porter2)</li>
  236. </ul></dd>
  237. <dt>Returns:</dt>
  238. <dd>a new <a href="tagger.tagger.Stemmer-class.html"
  239. class="link">Stemmer</a> object</dd>
  240. </dl>
  241. </td></tr></table>
  242. </div>
  243. <a name="preprocess"></a>
  244. <div>
  245. <table class="details" border="1" cellpadding="3"
  246. cellspacing="0" width="100%" bgcolor="white">
  247. <tr><td>
  248. <table width="100%" cellpadding="0" cellspacing="0" border="0">
  249. <tr valign="top"><td>
  250. <h3 class="epydoc"><span class="sig"><span class="sig-name">preprocess</span>(<span class="sig-arg">self</span>,
  251. <span class="sig-arg">string</span>)</span>
  252. </h3>
  253. </td><td align="right" valign="top"
  254. ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.preprocess">source&nbsp;code</a></span>&nbsp;
  255. </td>
  256. </tr></table>
  257. <dl class="fields">
  258. <dt>Parameters:</dt>
  259. <dd><ul class="nomargin-top">
  260. <li><strong class="pname"><code>string</code></strong> - a string to be treated before passing it to the stemmer</li>
  261. </ul></dd>
  262. <dt>Returns:</dt>
  263. <dd>the processed string</dd>
  264. </dl>
  265. </td></tr></table>
  266. </div>
  267. <br />
  268. <!-- ==================== NAVIGATION BAR ==================== -->
  269. <table class="navbar" border="0" width="100%" cellpadding="0"
  270. bgcolor="#a0c0ff" cellspacing="0">
  271. <tr valign="middle">
  272. <!-- Tree link -->
  273. <th>&nbsp;&nbsp;&nbsp;<a
  274. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  275. <!-- Index link -->
  276. <th>&nbsp;&nbsp;&nbsp;<a
  277. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  278. <!-- Help link -->
  279. <th>&nbsp;&nbsp;&nbsp;<a
  280. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  281. <!-- Project homepage -->
  282. <th class="navbar" align="right" width="100%">
  283. <table border="0" cellpadding="0" cellspacing="0">
  284. <tr><th class="navbar" align="center"
  285. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  286. </tr></table></th>
  287. </tr>
  288. </table>
  289. <table border="0" cellpadding="0" cellspacing="0" width="100%%">
  290. <tr>
  291. <td align="left" class="footer">
  292. Generated by Epydoc 3.0.1 on Wed Jun 8 01:57:46 2011
  293. </td>
  294. <td align="right" class="footer">
  295. <a target="mainFrame" href="http://epydoc.sourceforge.net"
  296. >http://epydoc.sourceforge.net</a>
  297. </td>
  298. </tr>
  299. </table>
  300. <script type="text/javascript">
  301. <!--
  302. // Private objects are initially displayed (because if
  303. // javascript is turned off then we want them to be
  304. // visible); but by default, we want to hide them. So hide
  305. // them unless we have a cookie that says to show them.
  306. checkCookie();
  307. // -->
  308. </script>
  309. </body>
  310. </html>