/doc/tagger-module.html

http://github.com/apresta/tagger · HTML · 248 lines · 216 code · 11 blank · 21 comment · 0 complexity · f6a9277569235f27e7f931e8be0fc266 MD5 · raw file

  1. <?xml version="1.0" encoding="ascii"?>
  2. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  3. "DTD/xhtml1-transitional.dtd">
  4. <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  5. <head>
  6. <title>tagger</title>
  7. <link rel="stylesheet" href="epydoc.css" type="text/css" />
  8. <script type="text/javascript" src="epydoc.js"></script>
  9. </head>
  10. <body bgcolor="white" text="black" link="blue" vlink="#204080"
  11. alink="#204080">
  12. <!-- ==================== NAVIGATION BAR ==================== -->
  13. <table class="navbar" border="0" width="100%" cellpadding="0"
  14. bgcolor="#a0c0ff" cellspacing="0">
  15. <tr valign="middle">
  16. <!-- Tree link -->
  17. <th>&nbsp;&nbsp;&nbsp;<a
  18. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  19. <!-- Index link -->
  20. <th>&nbsp;&nbsp;&nbsp;<a
  21. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  22. <!-- Help link -->
  23. <th>&nbsp;&nbsp;&nbsp;<a
  24. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  25. <!-- Project homepage -->
  26. <th class="navbar" align="right" width="100%">
  27. <table border="0" cellpadding="0" cellspacing="0">
  28. <tr><th class="navbar" align="center"
  29. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  30. </tr></table></th>
  31. </tr>
  32. </table>
  33. <table width="100%" cellpadding="0" cellspacing="0">
  34. <tr valign="top">
  35. <td width="100%">
  36. <span class="breadcrumbs">
  37. Module&nbsp;tagger
  38. </span>
  39. </td>
  40. <td>
  41. <table cellpadding="0" cellspacing="0">
  42. <!-- hide/show private -->
  43. <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
  44. onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
  45. <tr><td align="right"><span class="options"
  46. >[<a href="frames.html" target="_top">frames</a
  47. >]&nbsp;|&nbsp;<a href="tagger-module.html"
  48. target="_top">no&nbsp;frames</a>]</span></td></tr>
  49. </table>
  50. </td>
  51. </tr>
  52. </table>
  53. <!-- ==================== MODULE DESCRIPTION ==================== -->
  54. <h1 class="epydoc">Module tagger</h1><p class="nomargin-top"><span class="codelink"><a href="tagger-pysrc.html">source&nbsp;code</a></span></p>
  55. <p>====== tagger ======</p>
  56. <p>Module for extracting tags from text documents.</p>
  57. <p>Copyright (C) 2011 by Alessandro Presta</p>
  58. <h1 class="heading">Configuration</h1>
  59. <p>Dependencies: python2.7, stemming, nltk (optional), lxml (optional),
  60. tkinter (optional)</p>
  61. <p>You can install the stemming package with:</p>
  62. <pre class="literalblock">
  63. $ easy_install stemming
  64. </pre>
  65. <h1 class="heading">Usage</h1>
  66. <p>Tagging a text document from Python:</p>
  67. <pre class="literalblock">
  68. import tagger
  69. weights = pickle.load(open('data/dict.pkl', 'rb')) # or your own dictionary
  70. myreader = tagger.Reader() # or your own reader class
  71. mystemmer = tagger.Stemmer() # or your own stemmer class
  72. myrater = tagger.Rater(weights) # or your own... (you got the idea)
  73. mytagger = Tagger(myreader, mystemmer, myrater)
  74. best_3_tags = mytagger(text_string, 3)
  75. </pre>
  76. <p>Running the module as a script:</p>
  77. <pre class="literalblock">
  78. $ ./tagger.py &lt;text document(s) to tag&gt;
  79. </pre>
  80. <p>Example:</p>
  81. <pre class="literalblock">
  82. $ ./tagger.py tests/*
  83. Loading dictionary...
  84. Tags for tests/bbc1.txt :
  85. ['bin laden', 'obama', 'pakistan', 'killed', 'raid']
  86. Tags for tests/bbc2.txt :
  87. ['jo yeates', 'bristol', 'vincent tabak', 'murder', 'strangled']
  88. Tags for tests/bbc3.txt :
  89. ['snp', 'party', 'election', 'scottish', 'labour']
  90. Tags for tests/guardian1.txt :
  91. ['bin laden', 'al-qaida', 'killed', 'pakistan', 'al-fawwaz']
  92. Tags for tests/guardian2.txt :
  93. ['clegg', 'tory', 'lib dem', 'party', 'coalition']
  94. Tags for tests/post1.txt :
  95. ['sony', 'stolen', 'playstation network', 'hacker attack', 'lawsuit']
  96. Tags for tests/wikipedia1.txt :
  97. ['universe', 'anthropic principle', 'observed', 'cosmological', 'theory']
  98. Tags for tests/wikipedia2.txt :
  99. ['beetroot', 'beet', 'betaine', 'blood pressure', 'dietary nitrate']
  100. Tags for tests/wikipedia3.txt :
  101. ['the lounge lizards', 'jazz', 'john lurie', 'musical', 'albums']
  102. </pre>
  103. <!-- ==================== CLASSES ==================== -->
  104. <a name="section-Classes"></a>
  105. <table class="summary" border="1" cellpadding="3"
  106. cellspacing="0" width="100%" bgcolor="white">
  107. <tr bgcolor="#70b0f0" class="table-header">
  108. <td colspan="2" class="table-header">
  109. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  110. <tr valign="top">
  111. <td align="left"><span class="table-header">Classes</span></td>
  112. <td align="right" valign="top"
  113. ><span class="options">[<a href="#section-Classes"
  114. class="privatelink" onclick="toggle_private();"
  115. >hide private</a>]</span></td>
  116. </tr>
  117. </table>
  118. </td>
  119. </tr>
  120. <tr>
  121. <td width="15%" align="right" valign="top" class="summary">
  122. <span class="summary-type">&nbsp;</span>
  123. </td><td class="summary">
  124. <a href="tagger.Tag-class.html" class="summary-name">Tag</a><br />
  125. General class for tags (small units of text)
  126. </td>
  127. </tr>
  128. <tr>
  129. <td width="15%" align="right" valign="top" class="summary">
  130. <span class="summary-type">&nbsp;</span>
  131. </td><td class="summary">
  132. <a href="tagger.MultiTag-class.html" class="summary-name">MultiTag</a><br />
  133. Class for aggregates of tags (usually next to each other in the
  134. document)
  135. </td>
  136. </tr>
  137. <tr>
  138. <td width="15%" align="right" valign="top" class="summary">
  139. <span class="summary-type">&nbsp;</span>
  140. </td><td class="summary">
  141. <a href="tagger.Reader-class.html" class="summary-name">Reader</a><br />
  142. Class for parsing a string of text to obtain tags
  143. </td>
  144. </tr>
  145. <tr>
  146. <td width="15%" align="right" valign="top" class="summary">
  147. <span class="summary-type">&nbsp;</span>
  148. </td><td class="summary">
  149. <a href="tagger.Stemmer-class.html" class="summary-name">Stemmer</a><br />
  150. Class for extracting the stem of a word
  151. </td>
  152. </tr>
  153. <tr>
  154. <td width="15%" align="right" valign="top" class="summary">
  155. <span class="summary-type">&nbsp;</span>
  156. </td><td class="summary">
  157. <a href="tagger.Rater-class.html" class="summary-name">Rater</a><br />
  158. Class for estimating the relevance of tags
  159. </td>
  160. </tr>
  161. <tr>
  162. <td width="15%" align="right" valign="top" class="summary">
  163. <span class="summary-type">&nbsp;</span>
  164. </td><td class="summary">
  165. <a href="tagger.Tagger-class.html" class="summary-name">Tagger</a><br />
  166. Master class for tagging text documents
  167. </td>
  168. </tr>
  169. </table>
  170. <!-- ==================== VARIABLES ==================== -->
  171. <a name="section-Variables"></a>
  172. <table class="summary" border="1" cellpadding="3"
  173. cellspacing="0" width="100%" bgcolor="white">
  174. <tr bgcolor="#70b0f0" class="table-header">
  175. <td colspan="2" class="table-header">
  176. <table border="0" cellpadding="0" cellspacing="0" width="100%">
  177. <tr valign="top">
  178. <td align="left"><span class="table-header">Variables</span></td>
  179. <td align="right" valign="top"
  180. ><span class="options">[<a href="#section-Variables"
  181. class="privatelink" onclick="toggle_private();"
  182. >hide private</a>]</span></td>
  183. </tr>
  184. </table>
  185. </td>
  186. </tr>
  187. <tr>
  188. <td width="15%" align="right" valign="top" class="summary">
  189. <span class="summary-type">&nbsp;</span>
  190. </td><td class="summary">
  191. <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="None">None</code>
  192. </td>
  193. </tr>
  194. </table>
  195. <!-- ==================== NAVIGATION BAR ==================== -->
  196. <table class="navbar" border="0" width="100%" cellpadding="0"
  197. bgcolor="#a0c0ff" cellspacing="0">
  198. <tr valign="middle">
  199. <!-- Tree link -->
  200. <th>&nbsp;&nbsp;&nbsp;<a
  201. href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
  202. <!-- Index link -->
  203. <th>&nbsp;&nbsp;&nbsp;<a
  204. href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
  205. <!-- Help link -->
  206. <th>&nbsp;&nbsp;&nbsp;<a
  207. href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
  208. <!-- Project homepage -->
  209. <th class="navbar" align="right" width="100%">
  210. <table border="0" cellpadding="0" cellspacing="0">
  211. <tr><th class="navbar" align="center"
  212. ><a class="navbar" target="_top" href="http://github.com/apresta/tagger">tagger</a></th>
  213. </tr></table></th>
  214. </tr>
  215. </table>
  216. <table border="0" cellpadding="0" cellspacing="0" width="100%%">
  217. <tr>
  218. <td align="left" class="footer">
  219. Generated by Epydoc 3.0.1 on Fri May 13 11:13:02 2011
  220. </td>
  221. <td align="right" class="footer">
  222. <a target="mainFrame" href="http://epydoc.sourceforge.net"
  223. >http://epydoc.sourceforge.net</a>
  224. </td>
  225. </tr>
  226. </table>
  227. <script type="text/javascript">
  228. <!--
  229. // Private objects are initially displayed (because if
  230. // javascript is turned off then we want them to be
  231. // visible); but by default, we want to hide them. So hide
  232. // them unless we have a cookie that says to show them.
  233. checkCookie();
  234. // -->
  235. </script>
  236. </body>
  237. </html>