PageRenderTime 154ms CodeModel.GetById 17ms app.highlight 8ms RepoModel.GetById 125ms app.codeStats 1ms

/doc/tagger.tagger.Stemmer-class.html

http://github.com/apresta/tagger
HTML | 333 lines | 288 code | 23 blank | 22 comment | 0 complexity | bbd97e80b562f54c05968b4ca666cb97 MD5 | raw file
  1<?xml version="1.0" encoding="ascii"?>
  2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3          "DTD/xhtml1-transitional.dtd">
  4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5<head>
  6  <title>tagger.tagger.Stemmer</title>
  7  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8  <script type="text/javascript" src="epydoc.js"></script>
  9</head>
 10
 11<body bgcolor="white" text="black" link="blue" vlink="#204080"
 12      alink="#204080">
 13<!-- ==================== NAVIGATION BAR ==================== -->
 14<table class="navbar" border="0" width="100%" cellpadding="0"
 15       bgcolor="#a0c0ff" cellspacing="0">
 16  <tr valign="middle">
 17
 18  <!-- Tree link -->
 19      <th>&nbsp;&nbsp;&nbsp;<a
 20        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
 21
 22  <!-- Index link -->
 23      <th>&nbsp;&nbsp;&nbsp;<a
 24        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
 25
 26  <!-- Help link -->
 27      <th>&nbsp;&nbsp;&nbsp;<a
 28        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
 29
 30  <!-- Project homepage -->
 31      <th class="navbar" align="right" width="100%">
 32        <table border="0" cellpadding="0" cellspacing="0">
 33          <tr><th class="navbar" align="center"
 34            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
 35          </tr></table></th>
 36  </tr>
 37</table>
 38<table width="100%" cellpadding="0" cellspacing="0">
 39  <tr valign="top">
 40    <td width="100%">
 41      <span class="breadcrumbs">
 42        Package&nbsp;tagger ::
 43        <a href="tagger.tagger-module.html">Module&nbsp;tagger</a> ::
 44        Class&nbsp;Stemmer
 45      </span>
 46    </td>
 47    <td>
 48      <table cellpadding="0" cellspacing="0">
 49        <!-- hide/show private -->
 50        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
 51    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
 52        <tr><td align="right"><span class="options"
 53            >[<a href="frames.html" target="_top">frames</a
 54            >]&nbsp;|&nbsp;<a href="tagger.tagger.Stemmer-class.html"
 55            target="_top">no&nbsp;frames</a>]</span></td></tr>
 56      </table>
 57    </td>
 58  </tr>
 59</table>
 60<!-- ==================== CLASS DESCRIPTION ==================== -->
 61<h1 class="epydoc">Class Stemmer</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer">source&nbsp;code</a></span></p>
 62<center>
 63<center>  <map id="class_hierarchy_for_stemmer" name="class_hierarchy_for_stemmer">
 64<area shape="rect" id="node1" href="tagger.extras.FastStemmer-class.html" title="extras.FastStemmer" alt="" coords="5,61,165,88"/>
 65<area shape="rect" id="node2" href="tagger.tagger.Stemmer-class.html" title="Stemmer" alt="" coords="44,5,127,32"/>
 66</map>
 67  <img src="class_hierarchy_for_stemmer.gif" alt='' usemap="#class_hierarchy_for_stemmer" ismap="ismap" class="graph-without-title" />
 68</center>
 69</center>
 70<hr />
 71<p>Class for extracting the stem of a word</p>
 72  <p>(by default it uses a simple open-source implementation of Porter's 
 73  algorithm; this can be improved a lot, so experimenting with different 
 74  ones is advisable; nltk.stem provides different algorithms for many 
 75  languages)</p>
 76
 77<!-- ==================== INSTANCE METHODS ==================== -->
 78<a name="section-InstanceMethods"></a>
 79<table class="summary" border="1" cellpadding="3"
 80       cellspacing="0" width="100%" bgcolor="white">
 81<tr bgcolor="#70b0f0" class="table-header">
 82  <td colspan="2" class="table-header">
 83    <table border="0" cellpadding="0" cellspacing="0" width="100%">
 84      <tr valign="top">
 85        <td align="left"><span class="table-header">Instance Methods</span></td>
 86        <td align="right" valign="top"
 87         ><span class="options">[<a href="#section-InstanceMethods"
 88         class="privatelink" onclick="toggle_private();"
 89         >hide private</a>]</span></td>
 90      </tr>
 91    </table>
 92  </td>
 93</tr>
 94<tr>
 95    <td width="15%" align="right" valign="top" class="summary">
 96      <span class="summary-type">&nbsp;</span>
 97    </td><td class="summary">
 98      <table width="100%" cellpadding="0" cellspacing="0" border="0">
 99        <tr>
100          <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#__call__" class="summary-sig-name">__call__</a>(<span class="summary-sig-arg">self</span>,
101        <span class="summary-sig-arg">tag</span>)</span><br />
102      Returns:
103      the stemmed tag</td>
104          <td align="right" valign="top">
105            <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__call__">source&nbsp;code</a></span>
106            
107          </td>
108        </tr>
109      </table>
110      
111    </td>
112  </tr>
113<tr>
114    <td width="15%" align="right" valign="top" class="summary">
115      <span class="summary-type">&nbsp;</span>
116    </td><td class="summary">
117      <table width="100%" cellpadding="0" cellspacing="0" border="0">
118        <tr>
119          <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
120        <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">None</span>)</span><br />
121      Returns:
122      a new <a href="tagger.tagger.Stemmer-class.html" 
123      class="link">Stemmer</a> object</td>
124          <td align="right" valign="top">
125            <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__init__">source&nbsp;code</a></span>
126            
127          </td>
128        </tr>
129      </table>
130      
131    </td>
132  </tr>
133<tr>
134    <td width="15%" align="right" valign="top" class="summary">
135      <span class="summary-type">&nbsp;</span>
136    </td><td class="summary">
137      <table width="100%" cellpadding="0" cellspacing="0" border="0">
138        <tr>
139          <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#preprocess" class="summary-sig-name">preprocess</a>(<span class="summary-sig-arg">self</span>,
140        <span class="summary-sig-arg">string</span>)</span><br />
141      Returns:
142      the processed string</td>
143          <td align="right" valign="top">
144            <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.preprocess">source&nbsp;code</a></span>
145            
146          </td>
147        </tr>
148      </table>
149      
150    </td>
151  </tr>
152</table>
153<!-- ==================== CLASS VARIABLES ==================== -->
154<a name="section-ClassVariables"></a>
155<table class="summary" border="1" cellpadding="3"
156       cellspacing="0" width="100%" bgcolor="white">
157<tr bgcolor="#70b0f0" class="table-header">
158  <td colspan="2" class="table-header">
159    <table border="0" cellpadding="0" cellspacing="0" width="100%">
160      <tr valign="top">
161        <td align="left"><span class="table-header">Class Variables</span></td>
162        <td align="right" valign="top"
163         ><span class="options">[<a href="#section-ClassVariables"
164         class="privatelink" onclick="toggle_private();"
165         >hide private</a>]</span></td>
166      </tr>
167    </table>
168  </td>
169</tr>
170<tr>
171    <td width="15%" align="right" valign="top" class="summary">
172      <span class="summary-type">&nbsp;</span>
173    </td><td class="summary">
174        <a name="match_contractions"></a><span class="summary-name">match_contractions</span> = <code title="re.compile(r'(\w+)\'(m|re|d|ve|s|ll|t)?')">re.compile(r'<code class="re-group">(</code>\w<code class="re-op">+</code><code class="re-group">)</code>\'<code class="re-group">(</code>m<code class="re-op">|</code>re<code class="re-op">|</code>d<code class="re-op">|</code>ve<code class="re-op">|</code>s<code class="re-op">|</code>ll<code class="re-op">|</code>t<code class="re-group">)</code><code class="re-op">?</code>')</code>
175    </td>
176  </tr>
177<tr>
178    <td width="15%" align="right" valign="top" class="summary">
179      <span class="summary-type">&nbsp;</span>
180    </td><td class="summary">
181        <a name="match_hyphens"></a><span class="summary-name">match_hyphens</span> = <code title="re.compile(r'\b[-_]\b')">re.compile(r'\b<code class="re-group">[</code>-_<code class="re-group">]</code>\b')</code>
182    </td>
183  </tr>
184</table>
185<!-- ==================== METHOD DETAILS ==================== -->
186<a name="section-MethodDetails"></a>
187<table class="details" border="1" cellpadding="3"
188       cellspacing="0" width="100%" bgcolor="white">
189<tr bgcolor="#70b0f0" class="table-header">
190  <td colspan="2" class="table-header">
191    <table border="0" cellpadding="0" cellspacing="0" width="100%">
192      <tr valign="top">
193        <td align="left"><span class="table-header">Method Details</span></td>
194        <td align="right" valign="top"
195         ><span class="options">[<a href="#section-MethodDetails"
196         class="privatelink" onclick="toggle_private();"
197         >hide private</a>]</span></td>
198      </tr>
199    </table>
200  </td>
201</tr>
202</table>
203<a name="__call__"></a>
204<div>
205<table class="details" border="1" cellpadding="3"
206       cellspacing="0" width="100%" bgcolor="white">
207<tr><td>
208  <table width="100%" cellpadding="0" cellspacing="0" border="0">
209  <tr valign="top"><td>
210  <h3 class="epydoc"><span class="sig"><span class="sig-name">__call__</span>(<span class="sig-arg">self</span>,
211        <span class="sig-arg">tag</span>)</span>
212    <br /><em class="fname">(Call operator)</em>
213  </h3>
214  </td><td align="right" valign="top"
215    ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__call__">source&nbsp;code</a></span>&nbsp;
216    </td>
217  </tr></table>
218  
219  
220  <dl class="fields">
221    <dt>Parameters:</dt>
222    <dd><ul class="nomargin-top">
223        <li><strong class="pname"><code>tag</code></strong> - the tag to be stemmed</li>
224    </ul></dd>
225    <dt>Returns:</dt>
226        <dd>the stemmed tag</dd>
227  </dl>
228</td></tr></table>
229</div>
230<a name="__init__"></a>
231<div>
232<table class="details" border="1" cellpadding="3"
233       cellspacing="0" width="100%" bgcolor="white">
234<tr><td>
235  <table width="100%" cellpadding="0" cellspacing="0" border="0">
236  <tr valign="top"><td>
237  <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
238        <span class="sig-arg">stemmer</span>=<span class="sig-default">None</span>)</span>
239    <br /><em class="fname">(Constructor)</em>
240  </h3>
241  </td><td align="right" valign="top"
242    ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__init__">source&nbsp;code</a></span>&nbsp;
243    </td>
244  </tr></table>
245  
246  
247  <dl class="fields">
248    <dt>Parameters:</dt>
249    <dd><ul class="nomargin-top">
250        <li><strong class="pname"><code>stemmer</code></strong> - an object or module with a 'stem' method (defaults to 
251          stemming.porter2)</li>
252    </ul></dd>
253    <dt>Returns:</dt>
254        <dd>a new <a href="tagger.tagger.Stemmer-class.html" 
255          class="link">Stemmer</a> object</dd>
256  </dl>
257</td></tr></table>
258</div>
259<a name="preprocess"></a>
260<div>
261<table class="details" border="1" cellpadding="3"
262       cellspacing="0" width="100%" bgcolor="white">
263<tr><td>
264  <table width="100%" cellpadding="0" cellspacing="0" border="0">
265  <tr valign="top"><td>
266  <h3 class="epydoc"><span class="sig"><span class="sig-name">preprocess</span>(<span class="sig-arg">self</span>,
267        <span class="sig-arg">string</span>)</span>
268  </h3>
269  </td><td align="right" valign="top"
270    ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.preprocess">source&nbsp;code</a></span>&nbsp;
271    </td>
272  </tr></table>
273  
274  
275  <dl class="fields">
276    <dt>Parameters:</dt>
277    <dd><ul class="nomargin-top">
278        <li><strong class="pname"><code>string</code></strong> - a string to be treated before passing it to the stemmer</li>
279    </ul></dd>
280    <dt>Returns:</dt>
281        <dd>the processed string</dd>
282  </dl>
283</td></tr></table>
284</div>
285<br />
286<!-- ==================== NAVIGATION BAR ==================== -->
287<table class="navbar" border="0" width="100%" cellpadding="0"
288       bgcolor="#a0c0ff" cellspacing="0">
289  <tr valign="middle">
290
291  <!-- Tree link -->
292      <th>&nbsp;&nbsp;&nbsp;<a
293        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
294
295  <!-- Index link -->
296      <th>&nbsp;&nbsp;&nbsp;<a
297        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
298
299  <!-- Help link -->
300      <th>&nbsp;&nbsp;&nbsp;<a
301        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
302
303  <!-- Project homepage -->
304      <th class="navbar" align="right" width="100%">
305        <table border="0" cellpadding="0" cellspacing="0">
306          <tr><th class="navbar" align="center"
307            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
308          </tr></table></th>
309  </tr>
310</table>
311<table border="0" cellpadding="0" cellspacing="0" width="100%%">
312  <tr>
313    <td align="left" class="footer">
314    Generated by Epydoc 3.0.1 on Wed Jun  8 01:57:46 2011
315    </td>
316    <td align="right" class="footer">
317      <a target="mainFrame" href="http://epydoc.sourceforge.net"
318        >http://epydoc.sourceforge.net</a>
319    </td>
320  </tr>
321</table>
322
323<script type="text/javascript">
324  <!--
325  // Private objects are initially displayed (because if
326  // javascript is turned off then we want them to be
327  // visible); but by default, we want to hide them.  So hide
328  // them unless we have a cookie that says to show them.
329  checkCookie();
330  // -->
331</script>
332</body>
333</html>