PageRenderTime 34ms CodeModel.GetById 19ms app.highlight 10ms RepoModel.GetById 2ms app.codeStats 0ms

/doc/tagger.Reader-class.html

http://github.com/apresta/tagger
HTML | 299 lines | 258 code | 19 blank | 22 comment | 0 complexity | 6f28b3f54ef6d1170b8206ff6d5684ef MD5 | raw file
  1<?xml version="1.0" encoding="ascii"?>
  2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3          "DTD/xhtml1-transitional.dtd">
  4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5<head>
  6  <title>tagger.Reader</title>
  7  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8  <script type="text/javascript" src="epydoc.js"></script>
  9</head>
 10
 11<body bgcolor="white" text="black" link="blue" vlink="#204080"
 12      alink="#204080">
 13<!-- ==================== NAVIGATION BAR ==================== -->
 14<table class="navbar" border="0" width="100%" cellpadding="0"
 15       bgcolor="#a0c0ff" cellspacing="0">
 16  <tr valign="middle">
 17
 18  <!-- Tree link -->
 19      <th>&nbsp;&nbsp;&nbsp;<a
 20        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
 21
 22  <!-- Index link -->
 23      <th>&nbsp;&nbsp;&nbsp;<a
 24        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
 25
 26  <!-- Help link -->
 27      <th>&nbsp;&nbsp;&nbsp;<a
 28        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
 29
 30  <!-- Project homepage -->
 31      <th class="navbar" align="right" width="100%">
 32        <table border="0" cellpadding="0" cellspacing="0">
 33          <tr><th class="navbar" align="center"
 34            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
 35          </tr></table></th>
 36  </tr>
 37</table>
 38<table width="100%" cellpadding="0" cellspacing="0">
 39  <tr valign="top">
 40    <td width="100%">
 41      <span class="breadcrumbs">
 42        <a href="tagger-module.html">Module&nbsp;tagger</a> ::
 43        Class&nbsp;Reader
 44      </span>
 45    </td>
 46    <td>
 47      <table cellpadding="0" cellspacing="0">
 48        <!-- hide/show private -->
 49        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
 50    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
 51        <tr><td align="right"><span class="options"
 52            >[<a href="frames.html" target="_top">frames</a
 53            >]&nbsp;|&nbsp;<a href="tagger.Reader-class.html"
 54            target="_top">no&nbsp;frames</a>]</span></td></tr>
 55      </table>
 56    </td>
 57  </tr>
 58</table>
 59<!-- ==================== CLASS DESCRIPTION ==================== -->
 60<h1 class="epydoc">Class Reader</h1><p class="nomargin-top"><span class="codelink"><a href="tagger-pysrc.html#Reader">source&nbsp;code</a></span></p>
 61<center>
 62<center>  <map id="class_hierarchy_for_reader" name="class_hierarchy_for_reader">
 63<area shape="rect" id="node1" href="extras.HTMLReader-class.html" title="extras.HTMLReader" alt="" coords="188,117,335,144"/>
 64<area shape="rect" id="node2" href="extras.SimpleReader-class.html" title="extras.SimpleReader" alt="" coords="5,61,157,88"/>
 65<area shape="rect" id="node3" href="extras.UnicodeReader-class.html" title="extras.UnicodeReader" alt="" coords="181,61,341,88"/>
 66<area shape="rect" id="node4" href="tagger.Reader-class.html" title="Reader" alt="" coords="137,5,204,32"/>
 67</map>
 68  <img src="class_hierarchy_for_reader.gif" alt='' usemap="#class_hierarchy_for_reader" ismap="ismap" class="graph-without-title" />
 69</center>
 70</center>
 71<hr />
 72<p>Class for parsing a string of text to obtain tags</p>
 73  <p>(it just turns the string to lowercase and splits it according to 
 74  whitespaces and punctuation, identifying proper nouns and terminal words;
 75  different rules and formats other than plain text could be used)</p>
 76
 77<!-- ==================== INSTANCE METHODS ==================== -->
 78<a name="section-InstanceMethods"></a>
 79<table class="summary" border="1" cellpadding="3"
 80       cellspacing="0" width="100%" bgcolor="white">
 81<tr bgcolor="#70b0f0" class="table-header">
 82  <td colspan="2" class="table-header">
 83    <table border="0" cellpadding="0" cellspacing="0" width="100%">
 84      <tr valign="top">
 85        <td align="left"><span class="table-header">Instance Methods</span></td>
 86        <td align="right" valign="top"
 87         ><span class="options">[<a href="#section-InstanceMethods"
 88         class="privatelink" onclick="toggle_private();"
 89         >hide private</a>]</span></td>
 90      </tr>
 91    </table>
 92  </td>
 93</tr>
 94<tr>
 95    <td width="15%" align="right" valign="top" class="summary">
 96      <span class="summary-type">&nbsp;</span>
 97    </td><td class="summary">
 98      <table width="100%" cellpadding="0" cellspacing="0" border="0">
 99        <tr>
100          <td><span class="summary-sig"><a href="tagger.Reader-class.html#__call__" class="summary-sig-name">__call__</a>(<span class="summary-sig-arg">self</span>,
101        <span class="summary-sig-arg">text</span>)</span><br />
102      Returns:
103      a list of tags respecting the order in the text</td>
104          <td align="right" valign="top">
105            <span class="codelink"><a href="tagger-pysrc.html#Reader.__call__">source&nbsp;code</a></span>
106            
107          </td>
108        </tr>
109      </table>
110      
111    </td>
112  </tr>
113<tr>
114    <td width="15%" align="right" valign="top" class="summary">
115      <span class="summary-type">&nbsp;</span>
116    </td><td class="summary">
117      <table width="100%" cellpadding="0" cellspacing="0" border="0">
118        <tr>
119          <td><span class="summary-sig"><a href="tagger.Reader-class.html#preprocess" class="summary-sig-name">preprocess</a>(<span class="summary-sig-arg">self</span>,
120        <span class="summary-sig-arg">text</span>)</span><br />
121      Returns:
122      the processed text</td>
123          <td align="right" valign="top">
124            <span class="codelink"><a href="tagger-pysrc.html#Reader.preprocess">source&nbsp;code</a></span>
125            
126          </td>
127        </tr>
128      </table>
129      
130    </td>
131  </tr>
132</table>
133<!-- ==================== CLASS VARIABLES ==================== -->
134<a name="section-ClassVariables"></a>
135<table class="summary" border="1" cellpadding="3"
136       cellspacing="0" width="100%" bgcolor="white">
137<tr bgcolor="#70b0f0" class="table-header">
138  <td colspan="2" class="table-header">
139    <table border="0" cellpadding="0" cellspacing="0" width="100%">
140      <tr valign="top">
141        <td align="left"><span class="table-header">Class Variables</span></td>
142        <td align="right" valign="top"
143         ><span class="options">[<a href="#section-ClassVariables"
144         class="privatelink" onclick="toggle_private();"
145         >hide private</a>]</span></td>
146      </tr>
147    </table>
148  </td>
149</tr>
150<tr>
151    <td width="15%" align="right" valign="top" class="summary">
152      <span class="summary-type">&nbsp;</span>
153    </td><td class="summary">
154        <a name="match_apostrophes"></a><span class="summary-name">match_apostrophes</span> = <code title="re.compile(r'`|\xe2\x80\x99')">re.compile(r'`<code class="re-op">|</code>\xe2\x80\x99')</code>
155    </td>
156  </tr>
157<tr>
158    <td width="15%" align="right" valign="top" class="summary">
159      <span class="summary-type">&nbsp;</span>
160    </td><td class="summary">
161        <a name="match_paragraphs"></a><span class="summary-name">match_paragraphs</span> = <code title="re.compile(r'[\.\?!\t\n\r\f\v]+')">re.compile(r'<code class="re-group">[</code>\.\?!\t\n\r\f\v<code class="re-group">]</code><code class="re-op">+</code>')</code>
162    </td>
163  </tr>
164<tr>
165    <td width="15%" align="right" valign="top" class="summary">
166      <span class="summary-type">&nbsp;</span>
167    </td><td class="summary">
168        <a name="match_phrases"></a><span class="summary-name">match_phrases</span> = <code title="re.compile(r'[,;:\(\)\[\]\{\}&lt;&gt;]+')">re.compile(r'<code class="re-group">[</code>,;:\(\)\[\]\{\}&lt;&gt;<code class="re-group">]</code><code class="re-op">+</code>')</code>
169    </td>
170  </tr>
171<tr>
172    <td width="15%" align="right" valign="top" class="summary">
173      <span class="summary-type">&nbsp;</span>
174    </td><td class="summary">
175        <a name="match_words"></a><span class="summary-name">match_words</span> = <code title="re.compile(r'[\w-\'_/&amp;]+')">re.compile(r'<code class="re-group">[</code>\w-\'_/&amp;<code class="re-group">]</code><code class="re-op">+</code>')</code>
176    </td>
177  </tr>
178</table>
179<!-- ==================== METHOD DETAILS ==================== -->
180<a name="section-MethodDetails"></a>
181<table class="details" border="1" cellpadding="3"
182       cellspacing="0" width="100%" bgcolor="white">
183<tr bgcolor="#70b0f0" class="table-header">
184  <td colspan="2" class="table-header">
185    <table border="0" cellpadding="0" cellspacing="0" width="100%">
186      <tr valign="top">
187        <td align="left"><span class="table-header">Method Details</span></td>
188        <td align="right" valign="top"
189         ><span class="options">[<a href="#section-MethodDetails"
190         class="privatelink" onclick="toggle_private();"
191         >hide private</a>]</span></td>
192      </tr>
193    </table>
194  </td>
195</tr>
196</table>
197<a name="__call__"></a>
198<div>
199<table class="details" border="1" cellpadding="3"
200       cellspacing="0" width="100%" bgcolor="white">
201<tr><td>
202  <table width="100%" cellpadding="0" cellspacing="0" border="0">
203  <tr valign="top"><td>
204  <h3 class="epydoc"><span class="sig"><span class="sig-name">__call__</span>(<span class="sig-arg">self</span>,
205        <span class="sig-arg">text</span>)</span>
206    <br /><em class="fname">(Call operator)</em>
207  </h3>
208  </td><td align="right" valign="top"
209    ><span class="codelink"><a href="tagger-pysrc.html#Reader.__call__">source&nbsp;code</a></span>&nbsp;
210    </td>
211  </tr></table>
212  
213  
214  <dl class="fields">
215    <dt>Parameters:</dt>
216    <dd><ul class="nomargin-top">
217        <li><strong class="pname"><code>text</code></strong> - the string of text to be tagged</li>
218    </ul></dd>
219    <dt>Returns:</dt>
220        <dd>a list of tags respecting the order in the text</dd>
221  </dl>
222</td></tr></table>
223</div>
224<a name="preprocess"></a>
225<div>
226<table class="details" border="1" cellpadding="3"
227       cellspacing="0" width="100%" bgcolor="white">
228<tr><td>
229  <table width="100%" cellpadding="0" cellspacing="0" border="0">
230  <tr valign="top"><td>
231  <h3 class="epydoc"><span class="sig"><span class="sig-name">preprocess</span>(<span class="sig-arg">self</span>,
232        <span class="sig-arg">text</span>)</span>
233  </h3>
234  </td><td align="right" valign="top"
235    ><span class="codelink"><a href="tagger-pysrc.html#Reader.preprocess">source&nbsp;code</a></span>&nbsp;
236    </td>
237  </tr></table>
238  
239  
240  <dl class="fields">
241    <dt>Parameters:</dt>
242    <dd><ul class="nomargin-top">
243        <li><strong class="pname"><code>text</code></strong> - a string containing the text document to perform any required 
244          transformation before splitting</li>
245    </ul></dd>
246    <dt>Returns:</dt>
247        <dd>the processed text</dd>
248  </dl>
249</td></tr></table>
250</div>
251<br />
252<!-- ==================== NAVIGATION BAR ==================== -->
253<table class="navbar" border="0" width="100%" cellpadding="0"
254       bgcolor="#a0c0ff" cellspacing="0">
255  <tr valign="middle">
256
257  <!-- Tree link -->
258      <th>&nbsp;&nbsp;&nbsp;<a
259        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
260
261  <!-- Index link -->
262      <th>&nbsp;&nbsp;&nbsp;<a
263        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
264
265  <!-- Help link -->
266      <th>&nbsp;&nbsp;&nbsp;<a
267        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
268
269  <!-- Project homepage -->
270      <th class="navbar" align="right" width="100%">
271        <table border="0" cellpadding="0" cellspacing="0">
272          <tr><th class="navbar" align="center"
273            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
274          </tr></table></th>
275  </tr>
276</table>
277<table border="0" cellpadding="0" cellspacing="0" width="100%%">
278  <tr>
279    <td align="left" class="footer">
280    Generated by Epydoc 3.0.1 on Fri May 13 11:13:08 2011
281    </td>
282    <td align="right" class="footer">
283      <a target="mainFrame" href="http://epydoc.sourceforge.net"
284        >http://epydoc.sourceforge.net</a>
285    </td>
286  </tr>
287</table>
288
289<script type="text/javascript">
290  <!--
291  // Private objects are initially displayed (because if
292  // javascript is turned off then we want them to be
293  // visible); but by default, we want to hide them.  So hide
294  // them unless we have a cookie that says to show them.
295  checkCookie();
296  // -->
297</script>
298</body>
299</html>