/doc/tagger.tagger.Stemmer-class.html
http://github.com/apresta/tagger · HTML · 333 lines · 288 code · 23 blank · 22 comment · 0 complexity · bbd97e80b562f54c05968b4ca666cb97 MD5 · raw file
- <?xml version="1.0" encoding="ascii"?>
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
- "DTD/xhtml1-transitional.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
- <head>
- <title>tagger.tagger.Stemmer</title>
- <link rel="stylesheet" href="epydoc.css" type="text/css" />
- <script type="text/javascript" src="epydoc.js"></script>
- </head>
- <body bgcolor="white" text="black" link="blue" vlink="#204080"
- alink="#204080">
- <!-- ==================== NAVIGATION BAR ==================== -->
- <table class="navbar" border="0" width="100%" cellpadding="0"
- bgcolor="#a0c0ff" cellspacing="0">
- <tr valign="middle">
- <!-- Tree link -->
- <th> <a
- href="module-tree.html">Trees</a> </th>
- <!-- Index link -->
- <th> <a
- href="identifier-index.html">Indices</a> </th>
- <!-- Help link -->
- <th> <a
- href="help.html">Help</a> </th>
- <!-- Project homepage -->
- <th class="navbar" align="right" width="100%">
- <table border="0" cellpadding="0" cellspacing="0">
- <tr><th class="navbar" align="center"
- ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
- </tr></table></th>
- </tr>
- </table>
- <table width="100%" cellpadding="0" cellspacing="0">
- <tr valign="top">
- <td width="100%">
- <span class="breadcrumbs">
- Package tagger ::
- <a href="tagger.tagger-module.html">Module tagger</a> ::
- Class Stemmer
- </span>
- </td>
- <td>
- <table cellpadding="0" cellspacing="0">
- <!-- hide/show private -->
- <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
- onclick="toggle_private();">hide private</a>]</span></td></tr>
- <tr><td align="right"><span class="options"
- >[<a href="frames.html" target="_top">frames</a
- >] | <a href="tagger.tagger.Stemmer-class.html"
- target="_top">no frames</a>]</span></td></tr>
- </table>
- </td>
- </tr>
- </table>
- <!-- ==================== CLASS DESCRIPTION ==================== -->
- <h1 class="epydoc">Class Stemmer</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer">source code</a></span></p>
- <center>
- <center> <map id="class_hierarchy_for_stemmer" name="class_hierarchy_for_stemmer">
- <area shape="rect" id="node1" href="tagger.extras.FastStemmer-class.html" title="extras.FastStemmer" alt="" coords="5,61,165,88"/>
- <area shape="rect" id="node2" href="tagger.tagger.Stemmer-class.html" title="Stemmer" alt="" coords="44,5,127,32"/>
- </map>
- <img src="class_hierarchy_for_stemmer.gif" alt='' usemap="#class_hierarchy_for_stemmer" ismap="ismap" class="graph-without-title" />
- </center>
- </center>
- <hr />
- <p>Class for extracting the stem of a word</p>
- <p>(by default it uses a simple open-source implementation of Porter's
- algorithm; this can be improved a lot, so experimenting with different
- ones is advisable; nltk.stem provides different algorithms for many
- languages)</p>
- <!-- ==================== INSTANCE METHODS ==================== -->
- <a name="section-InstanceMethods"></a>
- <table class="summary" border="1" cellpadding="3"
- cellspacing="0" width="100%" bgcolor="white">
- <tr bgcolor="#70b0f0" class="table-header">
- <td colspan="2" class="table-header">
- <table border="0" cellpadding="0" cellspacing="0" width="100%">
- <tr valign="top">
- <td align="left"><span class="table-header">Instance Methods</span></td>
- <td align="right" valign="top"
- ><span class="options">[<a href="#section-InstanceMethods"
- class="privatelink" onclick="toggle_private();"
- >hide private</a>]</span></td>
- </tr>
- </table>
- </td>
- </tr>
- <tr>
- <td width="15%" align="right" valign="top" class="summary">
- <span class="summary-type"> </span>
- </td><td class="summary">
- <table width="100%" cellpadding="0" cellspacing="0" border="0">
- <tr>
- <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#__call__" class="summary-sig-name">__call__</a>(<span class="summary-sig-arg">self</span>,
- <span class="summary-sig-arg">tag</span>)</span><br />
- Returns:
- the stemmed tag</td>
- <td align="right" valign="top">
- <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__call__">source code</a></span>
-
- </td>
- </tr>
- </table>
-
- </td>
- </tr>
- <tr>
- <td width="15%" align="right" valign="top" class="summary">
- <span class="summary-type"> </span>
- </td><td class="summary">
- <table width="100%" cellpadding="0" cellspacing="0" border="0">
- <tr>
- <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
- <span class="summary-sig-arg">stemmer</span>=<span class="summary-sig-default">None</span>)</span><br />
- Returns:
- a new <a href="tagger.tagger.Stemmer-class.html"
- class="link">Stemmer</a> object</td>
- <td align="right" valign="top">
- <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__init__">source code</a></span>
-
- </td>
- </tr>
- </table>
-
- </td>
- </tr>
- <tr>
- <td width="15%" align="right" valign="top" class="summary">
- <span class="summary-type"> </span>
- </td><td class="summary">
- <table width="100%" cellpadding="0" cellspacing="0" border="0">
- <tr>
- <td><span class="summary-sig"><a href="tagger.tagger.Stemmer-class.html#preprocess" class="summary-sig-name">preprocess</a>(<span class="summary-sig-arg">self</span>,
- <span class="summary-sig-arg">string</span>)</span><br />
- Returns:
- the processed string</td>
- <td align="right" valign="top">
- <span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.preprocess">source code</a></span>
-
- </td>
- </tr>
- </table>
-
- </td>
- </tr>
- </table>
- <!-- ==================== CLASS VARIABLES ==================== -->
- <a name="section-ClassVariables"></a>
- <table class="summary" border="1" cellpadding="3"
- cellspacing="0" width="100%" bgcolor="white">
- <tr bgcolor="#70b0f0" class="table-header">
- <td colspan="2" class="table-header">
- <table border="0" cellpadding="0" cellspacing="0" width="100%">
- <tr valign="top">
- <td align="left"><span class="table-header">Class Variables</span></td>
- <td align="right" valign="top"
- ><span class="options">[<a href="#section-ClassVariables"
- class="privatelink" onclick="toggle_private();"
- >hide private</a>]</span></td>
- </tr>
- </table>
- </td>
- </tr>
- <tr>
- <td width="15%" align="right" valign="top" class="summary">
- <span class="summary-type"> </span>
- </td><td class="summary">
- <a name="match_contractions"></a><span class="summary-name">match_contractions</span> = <code title="re.compile(r'(\w+)\'(m|re|d|ve|s|ll|t)?')">re.compile(r'<code class="re-group">(</code>\w<code class="re-op">+</code><code class="re-group">)</code>\'<code class="re-group">(</code>m<code class="re-op">|</code>re<code class="re-op">|</code>d<code class="re-op">|</code>ve<code class="re-op">|</code>s<code class="re-op">|</code>ll<code class="re-op">|</code>t<code class="re-group">)</code><code class="re-op">?</code>')</code>
- </td>
- </tr>
- <tr>
- <td width="15%" align="right" valign="top" class="summary">
- <span class="summary-type"> </span>
- </td><td class="summary">
- <a name="match_hyphens"></a><span class="summary-name">match_hyphens</span> = <code title="re.compile(r'\b[-_]\b')">re.compile(r'\b<code class="re-group">[</code>-_<code class="re-group">]</code>\b')</code>
- </td>
- </tr>
- </table>
- <!-- ==================== METHOD DETAILS ==================== -->
- <a name="section-MethodDetails"></a>
- <table class="details" border="1" cellpadding="3"
- cellspacing="0" width="100%" bgcolor="white">
- <tr bgcolor="#70b0f0" class="table-header">
- <td colspan="2" class="table-header">
- <table border="0" cellpadding="0" cellspacing="0" width="100%">
- <tr valign="top">
- <td align="left"><span class="table-header">Method Details</span></td>
- <td align="right" valign="top"
- ><span class="options">[<a href="#section-MethodDetails"
- class="privatelink" onclick="toggle_private();"
- >hide private</a>]</span></td>
- </tr>
- </table>
- </td>
- </tr>
- </table>
- <a name="__call__"></a>
- <div>
- <table class="details" border="1" cellpadding="3"
- cellspacing="0" width="100%" bgcolor="white">
- <tr><td>
- <table width="100%" cellpadding="0" cellspacing="0" border="0">
- <tr valign="top"><td>
- <h3 class="epydoc"><span class="sig"><span class="sig-name">__call__</span>(<span class="sig-arg">self</span>,
- <span class="sig-arg">tag</span>)</span>
- <br /><em class="fname">(Call operator)</em>
- </h3>
- </td><td align="right" valign="top"
- ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__call__">source code</a></span>
- </td>
- </tr></table>
-
-
- <dl class="fields">
- <dt>Parameters:</dt>
- <dd><ul class="nomargin-top">
- <li><strong class="pname"><code>tag</code></strong> - the tag to be stemmed</li>
- </ul></dd>
- <dt>Returns:</dt>
- <dd>the stemmed tag</dd>
- </dl>
- </td></tr></table>
- </div>
- <a name="__init__"></a>
- <div>
- <table class="details" border="1" cellpadding="3"
- cellspacing="0" width="100%" bgcolor="white">
- <tr><td>
- <table width="100%" cellpadding="0" cellspacing="0" border="0">
- <tr valign="top"><td>
- <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
- <span class="sig-arg">stemmer</span>=<span class="sig-default">None</span>)</span>
- <br /><em class="fname">(Constructor)</em>
- </h3>
- </td><td align="right" valign="top"
- ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.__init__">source code</a></span>
- </td>
- </tr></table>
-
-
- <dl class="fields">
- <dt>Parameters:</dt>
- <dd><ul class="nomargin-top">
- <li><strong class="pname"><code>stemmer</code></strong> - an object or module with a 'stem' method (defaults to
- stemming.porter2)</li>
- </ul></dd>
- <dt>Returns:</dt>
- <dd>a new <a href="tagger.tagger.Stemmer-class.html"
- class="link">Stemmer</a> object</dd>
- </dl>
- </td></tr></table>
- </div>
- <a name="preprocess"></a>
- <div>
- <table class="details" border="1" cellpadding="3"
- cellspacing="0" width="100%" bgcolor="white">
- <tr><td>
- <table width="100%" cellpadding="0" cellspacing="0" border="0">
- <tr valign="top"><td>
- <h3 class="epydoc"><span class="sig"><span class="sig-name">preprocess</span>(<span class="sig-arg">self</span>,
- <span class="sig-arg">string</span>)</span>
- </h3>
- </td><td align="right" valign="top"
- ><span class="codelink"><a href="tagger.tagger-pysrc.html#Stemmer.preprocess">source code</a></span>
- </td>
- </tr></table>
-
-
- <dl class="fields">
- <dt>Parameters:</dt>
- <dd><ul class="nomargin-top">
- <li><strong class="pname"><code>string</code></strong> - a string to be treated before passing it to the stemmer</li>
- </ul></dd>
- <dt>Returns:</dt>
- <dd>the processed string</dd>
- </dl>
- </td></tr></table>
- </div>
- <br />
- <!-- ==================== NAVIGATION BAR ==================== -->
- <table class="navbar" border="0" width="100%" cellpadding="0"
- bgcolor="#a0c0ff" cellspacing="0">
- <tr valign="middle">
- <!-- Tree link -->
- <th> <a
- href="module-tree.html">Trees</a> </th>
- <!-- Index link -->
- <th> <a
- href="identifier-index.html">Indices</a> </th>
- <!-- Help link -->
- <th> <a
- href="help.html">Help</a> </th>
- <!-- Project homepage -->
- <th class="navbar" align="right" width="100%">
- <table border="0" cellpadding="0" cellspacing="0">
- <tr><th class="navbar" align="center"
- ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
- </tr></table></th>
- </tr>
- </table>
- <table border="0" cellpadding="0" cellspacing="0" width="100%%">
- <tr>
- <td align="left" class="footer">
- Generated by Epydoc 3.0.1 on Wed Jun 8 01:57:46 2011
- </td>
- <td align="right" class="footer">
- <a target="mainFrame" href="http://epydoc.sourceforge.net"
- >http://epydoc.sourceforge.net</a>
- </td>
- </tr>
- </table>
- <script type="text/javascript">
- <!--
- // Private objects are initially displayed (because if
- // javascript is turned off then we want them to be
- // visible); but by default, we want to hide them. So hide
- // them unless we have a cookie that says to show them.
- checkCookie();
- // -->
- </script>
- </body>
- </html>