/doc/tagger.tagger-module.html

http://github.com/apresta/tagger · HTML · 249 lines · 217 code · 11 blank · 21 comment · 0 complexity · d3f93963216efdfea24f9d513192ebe7 MD5 · raw file

  1. <?xml version="1.0" encoding="ascii"?>
  2. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3. "DTD/xhtml1-transitional.dtd">
  4. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5. <head>
  6. <title>tagger.tagger</title>
  7. <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8. <script type="text/javascript" src="epydoc.js"></script>
  9. </head>
  10. <body bgcolor="white" text="black" link="blue" vlink="#204080"
  11. alink="#204080">
  12. <!-- ==================== NAVIGATION BAR ==================== -->
  13. <table class="navbar" border="0" width="100%" cellpadding="0"
  14. bgcolor="#a0c0ff" cellspacing="0">
  15. <tr valign="middle">
  16. <!-- Tree link -->
  17. <th>&nbsp;&nbsp;&nbsp;<a
  18. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  19. <!-- Index link -->
  20. <th>&nbsp;&nbsp;&nbsp;<a
  21. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  22. <!-- Help link -->
  23. <th>&nbsp;&nbsp;&nbsp;<a
  24. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  25. <!-- Project homepage -->
  26. <th class="navbar" align="right" width="100%">
  27. <table border="0" cellpadding="0" cellspacing="0">
  28. <tr><th class="navbar" align="center"
  29. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  30. </tr></table></th>
  31. </tr>
  32. </table>
  33. <table width="100%" cellpadding="0" cellspacing="0">
  34. <tr valign="top">
  35. <td width="100%">
  36. <span class="breadcrumbs">
  37. Package&nbsp;tagger ::
  38. Module&nbsp;tagger
  39. </span>
  40. </td>
  41. <td>
  42. <table cellpadding="0" cellspacing="0">
  43. <!-- hide/show private -->
  44. <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
  45. onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
  46. <tr><td align="right"><span class="options"
  47. >[<a href="frames.html" target="_top">frames</a
  48. >]&nbsp;|&nbsp;<a href="tagger.tagger-module.html"
  49. target="_top">no&nbsp;frames</a>]</span></td></tr>
  50. </table>
  51. </td>
  52. </tr>
  53. </table>
  54. <!-- ==================== MODULE DESCRIPTION ==================== -->
  55. <h1 class="epydoc">Module tagger</h1><p class="nomargin-top"><span class="codelink"><a href="tagger.tagger-pysrc.html">source&nbsp;code</a></span></p>
  56. <p>====== tagger ======</p>
  57. <p>Module for extracting tags from text documents.</p>
  58. <p>Copyright (C) 2011 by Alessandro Presta</p>
  59. <h1 class="heading">Configuration</h1>
  60. <p>Dependencies: python2.7, stemming, nltk (optional), lxml (optional),
  61. tkinter (optional)</p>
  62. <p>You can install the stemming package with:</p>
  63. <pre class="literalblock">
  64. $ easy_install stemming
  65. </pre>
  66. <h1 class="heading">Usage</h1>
  67. <p>Tagging a text document from Python:</p>
  68. <pre class="literalblock">
  69. import tagger
  70. weights = pickle.load(open('data/dict.pkl', 'rb')) # or your own dictionary
  71. myreader = tagger.Reader() # or your own reader class
  72. mystemmer = tagger.Stemmer() # or your own stemmer class
  73. myrater = tagger.Rater(weights) # or your own... (you got the idea)
  74. mytagger = Tagger(myreader, mystemmer, myrater)
  75. best_3_tags = mytagger(text_string, 3)
  76. </pre>
  77. <p>Running the module as a script:</p>
  78. <pre class="literalblock">
  79. $ ./tagger.py &lt;text document(s) to tag&gt;
  80. </pre>
  81. <p>Example:</p>
  82. <pre class="literalblock">
  83. $ ./tagger.py tests/*
  84. Loading dictionary...
  85. Tags for tests/bbc1.txt :
  86. ['bin laden', 'obama', 'pakistan', 'killed', 'raid']
  87. Tags for tests/bbc2.txt :
  88. ['jo yeates', 'bristol', 'vincent tabak', 'murder', 'strangled']
  89. Tags for tests/bbc3.txt :
  90. ['snp', 'party', 'election', 'scottish', 'labour']
  91. Tags for tests/guardian1.txt :
  92. ['bin laden', 'al-qaida', 'killed', 'pakistan', 'al-fawwaz']
  93. Tags for tests/guardian2.txt :
  94. ['clegg', 'tory', 'lib dem', 'party', 'coalition']
  95. Tags for tests/post1.txt :
  96. ['sony', 'stolen', 'playstation network', 'hacker attack', 'lawsuit']
  97. Tags for tests/wikipedia1.txt :
  98. ['universe', 'anthropic principle', 'observed', 'cosmological', 'theory']
  99. Tags for tests/wikipedia2.txt :
  100. ['beetroot', 'beet', 'betaine', 'blood pressure', 'dietary nitrate']
  101. Tags for tests/wikipedia3.txt :
  102. ['the lounge lizards', 'jazz', 'john lurie', 'musical', 'albums']
  103. </pre>
  104. <!-- ==================== CLASSES ==================== -->
  105. <a name="section-Classes"></a>
  106. <table class="summary" border="1" cellpadding="3"
  107. cellspacing="0" width="100%" bgcolor="white">
  108. <tr bgcolor="#70b0f0" class="table-header">
  109. <td colspan="2" class="table-header">
  110. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  111. <tr valign="top">
  112. <td align="left"><span class="table-header">Classes</span></td>
  113. <td align="right" valign="top"
  114. ><span class="options">[<a href="#section-Classes"
  115. class="privatelink" onclick="toggle_private();"
  116. >hide private</a>]</span></td>
  117. </tr>
  118. </table>
  119. </td>
  120. </tr>
  121. <tr>
  122. <td width="15%" align="right" valign="top" class="summary">
  123. <span class="summary-type">&nbsp;</span>
  124. </td><td class="summary">
  125. <a href="tagger.tagger.Tag-class.html" class="summary-name">Tag</a><br />
  126. General class for tags (small units of text)
  127. </td>
  128. </tr>
  129. <tr>
  130. <td width="15%" align="right" valign="top" class="summary">
  131. <span class="summary-type">&nbsp;</span>
  132. </td><td class="summary">
  133. <a href="tagger.tagger.MultiTag-class.html" class="summary-name">MultiTag</a><br />
  134. Class for aggregates of tags (usually next to each other in the
  135. document)
  136. </td>
  137. </tr>
  138. <tr>
  139. <td width="15%" align="right" valign="top" class="summary">
  140. <span class="summary-type">&nbsp;</span>
  141. </td><td class="summary">
  142. <a href="tagger.tagger.Reader-class.html" class="summary-name">Reader</a><br />
  143. Class for parsing a string of text to obtain tags
  144. </td>
  145. </tr>
  146. <tr>
  147. <td width="15%" align="right" valign="top" class="summary">
  148. <span class="summary-type">&nbsp;</span>
  149. </td><td class="summary">
  150. <a href="tagger.tagger.Stemmer-class.html" class="summary-name">Stemmer</a><br />
  151. Class for extracting the stem of a word
  152. </td>
  153. </tr>
  154. <tr>
  155. <td width="15%" align="right" valign="top" class="summary">
  156. <span class="summary-type">&nbsp;</span>
  157. </td><td class="summary">
  158. <a href="tagger.tagger.Rater-class.html" class="summary-name">Rater</a><br />
  159. Class for estimating the relevance of tags
  160. </td>
  161. </tr>
  162. <tr>
  163. <td width="15%" align="right" valign="top" class="summary">
  164. <span class="summary-type">&nbsp;</span>
  165. </td><td class="summary">
  166. <a href="tagger.tagger.Tagger-class.html" class="summary-name">Tagger</a><br />
  167. Master class for tagging text documents
  168. </td>
  169. </tr>
  170. </table>
  171. <!-- ==================== VARIABLES ==================== -->
  172. <a name="section-Variables"></a>
  173. <table class="summary" border="1" cellpadding="3"
  174. cellspacing="0" width="100%" bgcolor="white">
  175. <tr bgcolor="#70b0f0" class="table-header">
  176. <td colspan="2" class="table-header">
  177. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  178. <tr valign="top">
  179. <td align="left"><span class="table-header">Variables</span></td>
  180. <td align="right" valign="top"
  181. ><span class="options">[<a href="#section-Variables"
  182. class="privatelink" onclick="toggle_private();"
  183. >hide private</a>]</span></td>
  184. </tr>
  185. </table>
  186. </td>
  187. </tr>
  188. <tr>
  189. <td width="15%" align="right" valign="top" class="summary">
  190. <span class="summary-type">&nbsp;</span>
  191. </td><td class="summary">
  192. <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'tagger'"><code class="variable-quote">'</code><code class="variable-string">tagger</code><code class="variable-quote">'</code></code>
  193. </td>
  194. </tr>
  195. </table>
  196. <!-- ==================== NAVIGATION BAR ==================== -->
  197. <table class="navbar" border="0" width="100%" cellpadding="0"
  198. bgcolor="#a0c0ff" cellspacing="0">
  199. <tr valign="middle">
  200. <!-- Tree link -->
  201. <th>&nbsp;&nbsp;&nbsp;<a
  202. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  203. <!-- Index link -->
  204. <th>&nbsp;&nbsp;&nbsp;<a
  205. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  206. <!-- Help link -->
  207. <th>&nbsp;&nbsp;&nbsp;<a
  208. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  209. <!-- Project homepage -->
  210. <th class="navbar" align="right" width="100%">
  211. <table border="0" cellpadding="0" cellspacing="0">
  212. <tr><th class="navbar" align="center"
  213. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  214. </tr></table></th>
  215. </tr>
  216. </table>
  217. <table border="0" cellpadding="0" cellspacing="0" width="100%%">
  218. <tr>
  219. <td align="left" class="footer">
  220. Generated by Epydoc 3.0.1 on Wed Jun 8 01:57:42 2011
  221. </td>
  222. <td align="right" class="footer">
  223. <a target="mainFrame" href="http://epydoc.sourceforge.net"
  224. >http://epydoc.sourceforge.net</a>
  225. </td>
  226. </tr>
  227. </table>
  228. <script type="text/javascript">
  229. <!--
  230. // Private objects are initially displayed (because if
  231. // javascript is turned off then we want them to be
  232. // visible); but by default, we want to hide them. So hide
  233. // them unless we have a cookie that says to show them.
  234. checkCookie();
  235. // -->
  236. </script>
  237. </body>
  238. </html>