PageRenderTime 24ms CodeModel.GetById 14ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 0ms

/doc/tagger.tagger-module.html

http://github.com/apresta/tagger
HTML | 249 lines | 217 code | 11 blank | 21 comment | 0 complexity | d3f93963216efdfea24f9d513192ebe7 MD5 | raw file
  1<?xml version="1.0" encoding="ascii"?>
  2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3          "DTD/xhtml1-transitional.dtd">
  4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5<head>
  6  <title>tagger.tagger</title>
  7  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8  <script type="text/javascript" src="epydoc.js"></script>
  9</head>
 10
 11<body bgcolor="white" text="black" link="blue" vlink="#204080"
 12      alink="#204080">
 13<!-- ==================== NAVIGATION BAR ==================== -->
 14<table class="navbar" border="0" width="100%" cellpadding="0"
 15       bgcolor="#a0c0ff" cellspacing="0">
 16  <tr valign="middle">
 17
 18  <!-- Tree link -->
 19      <th>&nbsp;&nbsp;&nbsp;<a
 20        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
 21
 22  <!-- Index link -->
 23      <th>&nbsp;&nbsp;&nbsp;<a
 24        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
 25
 26  <!-- Help link -->
 27      <th>&nbsp;&nbsp;&nbsp;<a
 28        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
 29
 30  <!-- Project homepage -->
 31      <th class="navbar" align="right" width="100%">
 32        <table border="0" cellpadding="0" cellspacing="0">
 33          <tr><th class="navbar" align="center"
 34            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
 35          </tr></table></th>
 36  </tr>
 37</table>
 38<table width="100%" cellpadding="0" cellspacing="0">
 39  <tr valign="top">
 40    <td width="100%">
 41      <span class="breadcrumbs">
 42        Package&nbsp;tagger ::
 43        Module&nbsp;tagger
 44      </span>
 45    </td>
 46    <td>
 47      <table cellpadding="0" cellspacing="0">
 48        <!-- hide/show private -->
 49        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
 50    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
 51        <tr><td align="right"><span class="options"
 52            >[<a href="frames.html" target="_top">frames</a
 53            >]&nbsp;|&nbsp;<a href="tagger.tagger-module.html"
 54            target="_top">no&nbsp;frames</a>]</span></td></tr>
 55      </table>
 56    </td>
 57  </tr>
 58</table>
 59<!-- ==================== MODULE DESCRIPTION ==================== -->
 60<h1 class="epydoc">Module tagger</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.tagger-pysrc.html">source&nbsp;code</a></span></p>
 61<p>====== tagger ======</p>
 62  <p>Module for extracting tags from text documents.</p>
 63  <p>Copyright (C) 2011 by Alessandro Presta</p>
 64  <h1 class="heading">Configuration</h1>
 65    <p>Dependencies: python2.7, stemming, nltk (optional), lxml (optional),
 66    tkinter (optional)</p>
 67    <p>You can install the stemming package with:</p>
 68<pre class="literalblock">
 69   $ easy_install stemming
 70</pre>
 71  <h1 class="heading">Usage</h1>
 72    <p>Tagging a text document from Python:</p>
 73<pre class="literalblock">
 74   import tagger
 75   weights = pickle.load(open('data/dict.pkl', 'rb')) # or your own dictionary
 76   myreader = tagger.Reader() # or your own reader class
 77   mystemmer = tagger.Stemmer() # or your own stemmer class
 78   myrater = tagger.Rater(weights) # or your own... (you got the idea)
 79   mytagger = Tagger(myreader, mystemmer, myrater)
 80   best_3_tags = mytagger(text_string, 3)
 81</pre>
 82    <p>Running the module as a script:</p>
 83<pre class="literalblock">
 84   $ ./tagger.py &lt;text document(s) to tag&gt;
 85</pre>
 86    <p>Example:</p>
 87<pre class="literalblock">
 88   $ ./tagger.py tests/*
 89   Loading dictionary... 
 90   Tags for  tests/bbc1.txt :
 91   ['bin laden', 'obama', 'pakistan', 'killed', 'raid']
 92   Tags for  tests/bbc2.txt :
 93   ['jo yeates', 'bristol', 'vincent tabak', 'murder', 'strangled']
 94   Tags for  tests/bbc3.txt :
 95   ['snp', 'party', 'election', 'scottish', 'labour']
 96   Tags for  tests/guardian1.txt :
 97   ['bin laden', 'al-qaida', 'killed', 'pakistan', 'al-fawwaz']
 98   Tags for  tests/guardian2.txt :
 99   ['clegg', 'tory', 'lib dem', 'party', 'coalition']
100   Tags for  tests/post1.txt :
101   ['sony', 'stolen', 'playstation network', 'hacker attack', 'lawsuit']
102   Tags for  tests/wikipedia1.txt :
103   ['universe', 'anthropic principle', 'observed', 'cosmological', 'theory']
104   Tags for  tests/wikipedia2.txt :
105   ['beetroot', 'beet', 'betaine', 'blood pressure', 'dietary nitrate']
106   Tags for  tests/wikipedia3.txt :
107   ['the lounge lizards', 'jazz', 'john lurie', 'musical', 'albums']
108</pre>
109
110<!-- ==================== CLASSES ==================== -->
111<a name="section-Classes"></a>
112<table class="summary" border="1" cellpadding="3"
113       cellspacing="0" width="100%" bgcolor="white">
114<tr bgcolor="#70b0f0" class="table-header">
115  <td colspan="2" class="table-header">
116    <table border="0" cellpadding="0" cellspacing="0" width="100%">
117      <tr valign="top">
118        <td align="left"><span class="table-header">Classes</span></td>
119        <td align="right" valign="top"
120         ><span class="options">[<a href="#section-Classes"
121         class="privatelink" onclick="toggle_private();"
122         >hide private</a>]</span></td>
123      </tr>
124    </table>
125  </td>
126</tr>
127<tr>
128    <td width="15%" align="right" valign="top" class="summary">
129      <span class="summary-type">&nbsp;</span>
130    </td><td class="summary">
131        <a href="tagger.tagger.Tag-class.html" class="summary-name">Tag</a><br />
132      General class for tags (small units of text)
133    </td>
134  </tr>
135<tr>
136    <td width="15%" align="right" valign="top" class="summary">
137      <span class="summary-type">&nbsp;</span>
138    </td><td class="summary">
139        <a href="tagger.tagger.MultiTag-class.html" class="summary-name">MultiTag</a><br />
140      Class for aggregates of tags (usually next to each other in the 
141        document)
142    </td>
143  </tr>
144<tr>
145    <td width="15%" align="right" valign="top" class="summary">
146      <span class="summary-type">&nbsp;</span>
147    </td><td class="summary">
148        <a href="tagger.tagger.Reader-class.html" class="summary-name">Reader</a><br />
149      Class for parsing a string of text to obtain tags
150    </td>
151  </tr>
152<tr>
153    <td width="15%" align="right" valign="top" class="summary">
154      <span class="summary-type">&nbsp;</span>
155    </td><td class="summary">
156        <a href="tagger.tagger.Stemmer-class.html" class="summary-name">Stemmer</a><br />
157      Class for extracting the stem of a word
158    </td>
159  </tr>
160<tr>
161    <td width="15%" align="right" valign="top" class="summary">
162      <span class="summary-type">&nbsp;</span>
163    </td><td class="summary">
164        <a href="tagger.tagger.Rater-class.html" class="summary-name">Rater</a><br />
165      Class for estimating the relevance of tags
166    </td>
167  </tr>
168<tr>
169    <td width="15%" align="right" valign="top" class="summary">
170      <span class="summary-type">&nbsp;</span>
171    </td><td class="summary">
172        <a href="tagger.tagger.Tagger-class.html" class="summary-name">Tagger</a><br />
173      Master class for tagging text documents
174    </td>
175  </tr>
176</table>
177<!-- ==================== VARIABLES ==================== -->
178<a name="section-Variables"></a>
179<table class="summary" border="1" cellpadding="3"
180       cellspacing="0" width="100%" bgcolor="white">
181<tr bgcolor="#70b0f0" class="table-header">
182  <td colspan="2" class="table-header">
183    <table border="0" cellpadding="0" cellspacing="0" width="100%">
184      <tr valign="top">
185        <td align="left"><span class="table-header">Variables</span></td>
186        <td align="right" valign="top"
187         ><span class="options">[<a href="#section-Variables"
188         class="privatelink" onclick="toggle_private();"
189         >hide private</a>]</span></td>
190      </tr>
191    </table>
192  </td>
193</tr>
194<tr>
195    <td width="15%" align="right" valign="top" class="summary">
196      <span class="summary-type">&nbsp;</span>
197    </td><td class="summary">
198        <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'tagger'"><code class="variable-quote">'</code><code class="variable-string">tagger</code><code class="variable-quote">'</code></code>
199    </td>
200  </tr>
201</table>
202<!-- ==================== NAVIGATION BAR ==================== -->
203<table class="navbar" border="0" width="100%" cellpadding="0"
204       bgcolor="#a0c0ff" cellspacing="0">
205  <tr valign="middle">
206
207  <!-- Tree link -->
208      <th>&nbsp;&nbsp;&nbsp;<a
209        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
210
211  <!-- Index link -->
212      <th>&nbsp;&nbsp;&nbsp;<a
213        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
214
215  <!-- Help link -->
216      <th>&nbsp;&nbsp;&nbsp;<a
217        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
218
219  <!-- Project homepage -->
220      <th class="navbar" align="right" width="100%">
221        <table border="0" cellpadding="0" cellspacing="0">
222          <tr><th class="navbar" align="center"
223            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
224          </tr></table></th>
225  </tr>
226</table>
227<table border="0" cellpadding="0" cellspacing="0" width="100%%">
228  <tr>
229    <td align="left" class="footer">
230    Generated by Epydoc 3.0.1 on Wed Jun  8 01:57:42 2011
231    </td>
232    <td align="right" class="footer">
233      <a target="mainFrame" href="http://epydoc.sourceforge.net"
234        >http://epydoc.sourceforge.net</a>
235    </td>
236  </tr>
237</table>
238
239<script type="text/javascript">
240  <!--
241  // Private objects are initially displayed (because if
242  // javascript is turned off then we want them to be
243  // visible); but by default, we want to hide them.  So hide
244  // them unless we have a cookie that says to show them.
245  checkCookie();
246  // -->
247</script>
248</body>
249</html>