PageRenderTime 30ms CodeModel.GetById 11ms app.highlight 12ms RepoModel.GetById 1ms app.codeStats 0ms

/doc/tagger-module.html

http://github.com/apresta/tagger
HTML | 248 lines | 216 code | 11 blank | 21 comment | 0 complexity | f6a9277569235f27e7f931e8be0fc266 MD5 | raw file
  1<?xml version="1.0" encoding="ascii"?>
  2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3          "DTD/xhtml1-transitional.dtd">
  4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5<head>
  6  <title>tagger</title>
  7  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8  <script type="text/javascript" src="epydoc.js"></script>
  9</head>
 10
 11<body bgcolor="white" text="black" link="blue" vlink="#204080"
 12      alink="#204080">
 13<!-- ==================== NAVIGATION BAR ==================== -->
 14<table class="navbar" border="0" width="100%" cellpadding="0"
 15       bgcolor="#a0c0ff" cellspacing="0">
 16  <tr valign="middle">
 17
 18  <!-- Tree link -->
 19      <th>&nbsp;&nbsp;&nbsp;<a
 20        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
 21
 22  <!-- Index link -->
 23      <th>&nbsp;&nbsp;&nbsp;<a
 24        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
 25
 26  <!-- Help link -->
 27      <th>&nbsp;&nbsp;&nbsp;<a
 28        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
 29
 30  <!-- Project homepage -->
 31      <th class="navbar" align="right" width="100%">
 32        <table border="0" cellpadding="0" cellspacing="0">
 33          <tr><th class="navbar" align="center"
 34            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
 35          </tr></table></th>
 36  </tr>
 37</table>
 38<table width="100%" cellpadding="0" cellspacing="0">
 39  <tr valign="top">
 40    <td width="100%">
 41      <span class="breadcrumbs">
 42        Module&nbsp;tagger
 43      </span>
 44    </td>
 45    <td>
 46      <table cellpadding="0" cellspacing="0">
 47        <!-- hide/show private -->
 48        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
 49    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
 50        <tr><td align="right"><span class="options"
 51            >[<a href="frames.html" target="_top">frames</a
 52            >]&nbsp;|&nbsp;<a href="tagger-module.html"
 53            target="_top">no&nbsp;frames</a>]</span></td></tr>
 54      </table>
 55    </td>
 56  </tr>
 57</table>
 58<!-- ==================== MODULE DESCRIPTION ==================== -->
 59<h1 class="epydoc">Module tagger</h1><p class="nomargin-top"><span class="codelink"><a href="tagger-pysrc.html">source&nbsp;code</a></span></p>
 60<p>====== tagger ======</p>
 61  <p>Module for extracting tags from text documents.</p>
 62  <p>Copyright (C) 2011 by Alessandro Presta</p>
 63  <h1 class="heading">Configuration</h1>
 64    <p>Dependencies: python2.7, stemming, nltk (optional), lxml (optional),
 65    tkinter (optional)</p>
 66    <p>You can install the stemming package with:</p>
 67<pre class="literalblock">
 68   $ easy_install stemming
 69</pre>
 70  <h1 class="heading">Usage</h1>
 71    <p>Tagging a text document from Python:</p>
 72<pre class="literalblock">
 73   import tagger
 74   weights = pickle.load(open('data/dict.pkl', 'rb')) # or your own dictionary
 75   myreader = tagger.Reader() # or your own reader class
 76   mystemmer = tagger.Stemmer() # or your own stemmer class
 77   myrater = tagger.Rater(weights) # or your own... (you got the idea)
 78   mytagger = Tagger(myreader, mystemmer, myrater)
 79   best_3_tags = mytagger(text_string, 3)
 80</pre>
 81    <p>Running the module as a script:</p>
 82<pre class="literalblock">
 83   $ ./tagger.py &lt;text document(s) to tag&gt;
 84</pre>
 85    <p>Example:</p>
 86<pre class="literalblock">
 87   $ ./tagger.py tests/*
 88   Loading dictionary... 
 89   Tags for  tests/bbc1.txt :
 90   ['bin laden', 'obama', 'pakistan', 'killed', 'raid']
 91   Tags for  tests/bbc2.txt :
 92   ['jo yeates', 'bristol', 'vincent tabak', 'murder', 'strangled']
 93   Tags for  tests/bbc3.txt :
 94   ['snp', 'party', 'election', 'scottish', 'labour']
 95   Tags for  tests/guardian1.txt :
 96   ['bin laden', 'al-qaida', 'killed', 'pakistan', 'al-fawwaz']
 97   Tags for  tests/guardian2.txt :
 98   ['clegg', 'tory', 'lib dem', 'party', 'coalition']
 99   Tags for  tests/post1.txt :
100   ['sony', 'stolen', 'playstation network', 'hacker attack', 'lawsuit']
101   Tags for  tests/wikipedia1.txt :
102   ['universe', 'anthropic principle', 'observed', 'cosmological', 'theory']
103   Tags for  tests/wikipedia2.txt :
104   ['beetroot', 'beet', 'betaine', 'blood pressure', 'dietary nitrate']
105   Tags for  tests/wikipedia3.txt :
106   ['the lounge lizards', 'jazz', 'john lurie', 'musical', 'albums']
107</pre>
108
109<!-- ==================== CLASSES ==================== -->
110<a name="section-Classes"></a>
111<table class="summary" border="1" cellpadding="3"
112       cellspacing="0" width="100%" bgcolor="white">
113<tr bgcolor="#70b0f0" class="table-header">
114  <td colspan="2" class="table-header">
115    <table border="0" cellpadding="0" cellspacing="0" width="100%">
116      <tr valign="top">
117        <td align="left"><span class="table-header">Classes</span></td>
118        <td align="right" valign="top"
119         ><span class="options">[<a href="#section-Classes"
120         class="privatelink" onclick="toggle_private();"
121         >hide private</a>]</span></td>
122      </tr>
123    </table>
124  </td>
125</tr>
126<tr>
127    <td width="15%" align="right" valign="top" class="summary">
128      <span class="summary-type">&nbsp;</span>
129    </td><td class="summary">
130        <a href="tagger.Tag-class.html" class="summary-name">Tag</a><br />
131      General class for tags (small units of text)
132    </td>
133  </tr>
134<tr>
135    <td width="15%" align="right" valign="top" class="summary">
136      <span class="summary-type">&nbsp;</span>
137    </td><td class="summary">
138        <a href="tagger.MultiTag-class.html" class="summary-name">MultiTag</a><br />
139      Class for aggregates of tags (usually next to each other in the 
140        document)
141    </td>
142  </tr>
143<tr>
144    <td width="15%" align="right" valign="top" class="summary">
145      <span class="summary-type">&nbsp;</span>
146    </td><td class="summary">
147        <a href="tagger.Reader-class.html" class="summary-name">Reader</a><br />
148      Class for parsing a string of text to obtain tags
149    </td>
150  </tr>
151<tr>
152    <td width="15%" align="right" valign="top" class="summary">
153      <span class="summary-type">&nbsp;</span>
154    </td><td class="summary">
155        <a href="tagger.Stemmer-class.html" class="summary-name">Stemmer</a><br />
156      Class for extracting the stem of a word
157    </td>
158  </tr>
159<tr>
160    <td width="15%" align="right" valign="top" class="summary">
161      <span class="summary-type">&nbsp;</span>
162    </td><td class="summary">
163        <a href="tagger.Rater-class.html" class="summary-name">Rater</a><br />
164      Class for estimating the relevance of tags
165    </td>
166  </tr>
167<tr>
168    <td width="15%" align="right" valign="top" class="summary">
169      <span class="summary-type">&nbsp;</span>
170    </td><td class="summary">
171        <a href="tagger.Tagger-class.html" class="summary-name">Tagger</a><br />
172      Master class for tagging text documents
173    </td>
174  </tr>
175</table>
176<!-- ==================== VARIABLES ==================== -->
177<a name="section-Variables"></a>
178<table class="summary" border="1" cellpadding="3"
179       cellspacing="0" width="100%" bgcolor="white">
180<tr bgcolor="#70b0f0" class="table-header">
181  <td colspan="2" class="table-header">
182    <table border="0" cellpadding="0" cellspacing="0" width="100%">
183      <tr valign="top">
184        <td align="left"><span class="table-header">Variables</span></td>
185        <td align="right" valign="top"
186         ><span class="options">[<a href="#section-Variables"
187         class="privatelink" onclick="toggle_private();"
188         >hide private</a>]</span></td>
189      </tr>
190    </table>
191  </td>
192</tr>
193<tr>
194    <td width="15%" align="right" valign="top" class="summary">
195      <span class="summary-type">&nbsp;</span>
196    </td><td class="summary">
197        <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="None">None</code>
198    </td>
199  </tr>
200</table>
201<!-- ==================== NAVIGATION BAR ==================== -->
202<table class="navbar" border="0" width="100%" cellpadding="0"
203       bgcolor="#a0c0ff" cellspacing="0">
204  <tr valign="middle">
205
206  <!-- Tree link -->
207      <th>&nbsp;&nbsp;&nbsp;<a
208        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
209
210  <!-- Index link -->
211      <th>&nbsp;&nbsp;&nbsp;<a
212        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
213
214  <!-- Help link -->
215      <th>&nbsp;&nbsp;&nbsp;<a
216        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
217
218  <!-- Project homepage -->
219      <th class="navbar" align="right" width="100%">
220        <table border="0" cellpadding="0" cellspacing="0">
221          <tr><th class="navbar" align="center"
222            ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
223          </tr></table></th>
224  </tr>
225</table>
226<table border="0" cellpadding="0" cellspacing="0" width="100%%">
227  <tr>
228    <td align="left" class="footer">
229    Generated by Epydoc 3.0.1 on Fri May 13 11:13:02 2011
230    </td>
231    <td align="right" class="footer">
232      <a target="mainFrame" href="http://epydoc.sourceforge.net"
233        >http://epydoc.sourceforge.net</a>
234    </td>
235  </tr>
236</table>
237
238<script type="text/javascript">
239  <!--
240  // Private objects are initially displayed (because if
241  // javascript is turned off then we want them to be
242  // visible); but by default, we want to hide them.  So hide
243  // them unless we have a cookie that says to show them.
244  checkCookie();
245  // -->
246</script>
247</body>
248</html>