PageRenderTime 420ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/unlpbot/src/net/sf/unlpbot/irc/base/uNLPBotHeap.java

http://unlpbot.googlecode.com/
Java | 379 lines | 288 code | 8 blank | 83 comment | 36 complexity | 8bae41d670a6cde06d1dccf15e11b097 MD5 | raw file
Possible License(s): LGPL-2.1
  1. /**
  2. *
  3. * Projec name:
  4. * unlpbot
  5. * Module name:
  6. * unlpbot
  7. * Package name:
  8. * net.sf.unlpbot.irc.base
  9. * File name:
  10. * uNLPBotHeap.java
  11. * Created on:
  12. * 3-dic-2004
  13. * By:
  14. * gni
  15. * Email address:
  16. * gni at users.sourceforge.net
  17. *
  18. * --------------------------------------------------------------------------
  19. *
  20. * This file is part of uNLPBot project hosted on Sourceforge.net
  21. * at URL: http://sourceforge.net/projects/unlpbot/
  22. * You can visit project homepage
  23. * at URL: http://unlpbot.sourceforge.net/
  24. *
  25. * --------------------------------------------------------------------------
  26. *
  27. * Such project is owned, as date 3-dic-2004, by:
  28. * Sourceforge.net User:
  29. * gni
  30. * Sourceforge.net User ID:
  31. * 1154253
  32. * Sourceforge.net User Email address:
  33. * gni@users.sourceforge.net
  34. * Sourceforge.net Member since:
  35. * 2004-11-07 09:35 UTC
  36. *
  37. * --------------------------------------------------------------------------
  38. *
  39. * Details about project as date 3-dic-2004:
  40. * Sourceforge.net Project UNIX name: unlpbot
  41. * Sourceforge.net Project ID: 125225
  42. * Sourceforge.net Registered: 2004-11-27 15:55 UTC
  43. *
  44. * Sourceforge.net Original project description:
  45. * uNLPBot is a chatter bot based on NLP (Natural
  46. * Language Processing) theory, able to parse small but
  47. * representative subsets of english natural language and
  48. * to produce english sentences compliant to english grammar
  49. * and related to conversation threads.
  50. *
  51. * --------------------------------------------------------------------------
  52. *
  53. * This program is free software; you can redistribute it and/or modify it
  54. * under the terms of the GNU General Public License as published by the
  55. * Free Software Foundation; either version 2 of the License, or (at your
  56. * option) any later version.
  57. *
  58. * This program is distributed in the hope that it will be useful, but
  59. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  60. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  61. * for more details.
  62. *
  63. * You should have received a copy of the GNU General Public License along
  64. * with this program; if not, write to:
  65. * Free Software Foundation, Inc.
  66. * 59 Temple Place, Suite 330
  67. * Boston, MA 02111-1307
  68. * USA
  69. *
  70. * --------------------------------------------------------------------------
  71. * uNLPBot: Unintelligent Natural Language Processing chatter BOT
  72. * Copyright (C) 2004. All rights reserved.
  73. * Use is subject to license terms.
  74. * Initial developer(s): gni.
  75. *
  76. */
  77. package net.sf.unlpbot.irc.base;
  78. import java.lang.reflect.Method;
  79. import java.sql.Connection;
  80. import java.sql.DriverManager;
  81. import java.sql.ResultSet;
  82. import java.sql.SQLException;
  83. import com.mysql.jdbc.Statement;
  84. import java.util.*;
  85. import net.sf.snowball.SnowballProgram;
  86. import java.util.regex.*;
  87. /**
  88. * uNLPBot: Unintelligent Natural Language Processing chatter BOT<p>
  89. * Copyright (C) 2004. All rights reserved.<p>
  90. * Use is subject to license terms.<p>
  91. * Initial developer(s): gni.<p>
  92. * @author gni at users.sourceforge.net<p>
  93. */
  94. public class uNLPBotHeap {
  95. private Connection con=null;
  96. private String language=null;
  97. private int id=0;
  98. public boolean Connect() {
  99. try {
  100. this.con = DriverManager.getConnection("jdbc:mysql://"+uNLPBotGlobal.sqlhost+"/" +uNLPBotGlobal.sqldb+"?user="+uNLPBotGlobal.sqluser+"&password="+uNLPBotGlobal.sqlpass);
  101. return true;
  102. }
  103. catch (SQLException e) {
  104. System.out.println("SQL Exception: " + e.getMessage());
  105. e.printStackTrace(System.out);
  106. return false;
  107. }
  108. }
  109. public boolean Disconnect() {
  110. try {
  111. this.con.close();
  112. return true;
  113. }
  114. catch (SQLException e) {
  115. System.out.println("SQL Exception: " + e.getMessage());
  116. e.printStackTrace(System.out);
  117. return false;
  118. }
  119. }
  120. public uNLPBotHeap(String language, int id) {
  121. try {
  122. Class.forName("com.mysql.jdbc.Driver");
  123. }
  124. catch (ClassNotFoundException e) {
  125. System.out.println("Exception: Cannot Load JDBC Driver");
  126. return;
  127. }
  128. this.language = language;
  129. this.id = id;
  130. }
  131. public void MakeWordsAndStems(String message, Connection con) {
  132. this.con=con;
  133. Pattern unpattern = Pattern.compile("[^\\p{Alnum}??????]");
  134. Matcher unmatcher = unpattern.matcher(message);
  135. String cleanedmessage=unmatcher.replaceAll(" ");
  136. StringTokenizer st = new StringTokenizer(cleanedmessage);
  137. int nt = st.countTokens();
  138. String[] tokens = new String[nt];
  139. String[] stems = new String[nt];
  140. boolean m = true;
  141. int i=0;
  142. Pattern pattern = Pattern.compile("[\\p{Alnum}??????]+");
  143. while (st.hasMoreTokens() && m) {
  144. tokens[i] = st.nextToken().toLowerCase();
  145. try {
  146. Class stemClass = Class.forName("net.sf.snowball.ext." +
  147. this.language + "Stemmer");
  148. SnowballProgram stemmer = (SnowballProgram) stemClass.newInstance();
  149. Method stemMethod = stemClass.getMethod("stem", new Class[0]);
  150. stemmer.setCurrent(tokens[i]);
  151. Object [] emptyArgs = new Object[0];
  152. stemMethod.invoke(stemmer, emptyArgs);
  153. stems[i] = stemmer.getCurrent();
  154. Matcher matcher = pattern.matcher(tokens[i]);
  155. m=m && matcher.matches();
  156. i=i+1;
  157. } catch (Exception e) {}
  158. }
  159. int uresult=0;
  160. int fromid=0;
  161. int toid=0;
  162. int startword = 0;
  163. int endword = 0;
  164. for (i=0;i<nt && m;i++) {
  165. if (i==0) {
  166. startword=1;
  167. }
  168. if (i==(nt-1)) {
  169. endword=1;
  170. }
  171. if (i>0 && i<(nt-1) && i!=(nt-1)) {
  172. startword=0;
  173. endword=0;
  174. }
  175. try {
  176. Statement ustmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  177. uresult = ustmt.executeUpdate("INSERT INTO `words` ( `botid`,`word`,`quantity`,`start`,`end`) \n" +
  178. "VALUES (\n" +
  179. "'"+this.id+"','"+tokens[i]+"','"+1+"','"+startword+"','"+endword+"'\n" +
  180. ");");
  181. ustmt.close();
  182. } catch (Exception e) {
  183. try {
  184. Statement ustmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  185. uresult = ustmt.executeUpdate("UPDATE `words`\n" +
  186. "SET quantity=quantity+1,start=start+"+startword+",end=end+"+endword+"\n" +
  187. "WHERE `word`='"+tokens[i]+"' AND `botid`='"+this.id+"'\n" +
  188. ";");
  189. ustmt.close();
  190. } catch (Exception ee) {uresult=0;}
  191. }
  192. }
  193. int startstem=0;
  194. int endstem=0;
  195. for (i=0;i<nt && m;i++) {
  196. if (i==0) {
  197. startstem=1;
  198. }
  199. if (i==(nt-1)) {
  200. endstem=1;
  201. }
  202. if (i>0 && i<(nt-1) && i!=(nt-1)) {
  203. startstem=0;
  204. endstem=0;
  205. }
  206. try {
  207. Statement ustmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  208. uresult = ustmt.executeUpdate("INSERT INTO `stems` ( `botid`,`stem`,`quantity`,`start`,`end`) \n" +
  209. "VALUES (\n" +
  210. "'"+this.id+"','"+stems[i]+"','"+1+"','"+startstem+"','"+endstem+"'\n" +
  211. ");");
  212. ustmt.close();
  213. } catch (Exception e) {
  214. try {
  215. Statement ustmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  216. uresult = ustmt.executeUpdate("UPDATE `stems`\n" +
  217. "SET quantity=quantity+1,start=start+"+startstem+",end=end+"+endstem+"\n" +
  218. "WHERE `stem`='"+stems[i]+"' AND `botid`='"+this.id+"'\n" +
  219. ";");
  220. ustmt.close();
  221. } catch (Exception ee) {uresult=0;}
  222. }
  223. }
  224. }
  225. public void MakeWordsAndStems() {
  226. try {
  227. if (this.con.isClosed()) {
  228. this.Connect();
  229. }
  230. } catch (Exception e) {}
  231. try {
  232. Statement stmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  233. ResultSet rs = stmt.executeQuery("SELECT * FROM botlogs WHERE action='pubmsg' AND botid='"+this.id+"'");
  234. while (rs.next()) {
  235. String message = rs.getString("content");
  236. this.MakeWordsAndStems(message.toLowerCase(),this.con);
  237. }
  238. stmt.close();
  239. } catch (Exception e) {}
  240. }
  241. public void MakeEdges(String message, Connection con) {
  242. this.con=con;
  243. Pattern unpattern = Pattern.compile("[^\\p{Alnum}??????]");
  244. Matcher unmatcher = unpattern.matcher(message);
  245. String cleanedmessage=unmatcher.replaceAll(" ");
  246. StringTokenizer st = new StringTokenizer(cleanedmessage);
  247. int nt = st.countTokens();
  248. String[] tokens = new String[nt];
  249. String[] stems = new String[nt];
  250. boolean m = true;
  251. int i=0;
  252. Pattern pattern = Pattern.compile("[\\p{Alnum}??????]+");
  253. while (st.hasMoreTokens() && m) {
  254. tokens[i] = st.nextToken().toLowerCase();
  255. try {
  256. Class stemClass = Class.forName("net.sf.snowball.ext." +
  257. this.language + "Stemmer");
  258. SnowballProgram stemmer = (SnowballProgram) stemClass.newInstance();
  259. Method stemMethod = stemClass.getMethod("stem", new Class[0]);
  260. stemmer.setCurrent(tokens[i]);
  261. Object [] emptyArgs = new Object[0];
  262. stemMethod.invoke(stemmer, emptyArgs);
  263. stems[i] = stemmer.getCurrent();
  264. Matcher matcher = pattern.matcher(tokens[i]);
  265. m=m && matcher.matches();
  266. i=i+1;
  267. } catch (Exception e) {}
  268. }
  269. int uresult=0;
  270. int fromid=0;
  271. int toid=0;
  272. for (i=0;i<nt && m;i++) {
  273. try {
  274. Statement fstmt = (com.mysql.jdbc.Statement) con.createStatement();
  275. ResultSet frs = fstmt.executeQuery("SELECT id FROM words WHERE word='"+tokens[i]+"' AND botid='"+this.id+"'");
  276. if(frs.next()) {
  277. fromid=frs.getInt("id");
  278. }
  279. frs.close();
  280. fstmt.close();
  281. } catch (Exception fe) {}
  282. if (nt>1) {
  283. try {
  284. Statement tstmt = (com.mysql.jdbc.Statement) con.createStatement();
  285. ResultSet trs = tstmt.executeQuery("SELECT id FROM words WHERE word='"+tokens[i+1]+"' AND botid='"+this.id+"'");
  286. if(trs.next()) {
  287. toid=trs.getInt("id");
  288. }
  289. trs.close();
  290. tstmt.close();
  291. } catch (Exception te) {}
  292. } else {
  293. toid=fromid;
  294. }
  295. try {
  296. Statement ustmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  297. uresult = ustmt.executeUpdate("INSERT INTO `wedges` ( `botid`,`fromid`,`toid`,`quantity`) \n" +
  298. "VALUES (\n" +
  299. "'"+this.id+"','"+fromid+"','"+toid+"','"+1+"'\n" +
  300. ");");
  301. ustmt.close();
  302. } catch (Exception e) {
  303. try {
  304. Statement ustmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  305. uresult = ustmt.executeUpdate("UPDATE `wedges`\n" +
  306. "SET quantity=quantity+1\n" +
  307. "WHERE `fromid`='"+fromid+"' AND `toid`='"+toid+"' AND `botid`='"+this.id+"'\n" +
  308. ";");
  309. ustmt.close();
  310. } catch (Exception ee) {uresult=0;}
  311. }
  312. }
  313. for (i=0;i<nt && m;i++) {
  314. try {
  315. Statement fstmt = (com.mysql.jdbc.Statement) con.createStatement();
  316. ResultSet frs = fstmt.executeQuery("SELECT id FROM stems WHERE stem='"+stems[i]+"'");
  317. if(frs.next()) {
  318. fromid=frs.getInt("id");
  319. }
  320. frs.close();
  321. fstmt.close();
  322. } catch (Exception fe) {}
  323. if (nt>1) {
  324. try {
  325. Statement tstmt = (com.mysql.jdbc.Statement) con.createStatement();
  326. ResultSet trs = tstmt.executeQuery("SELECT id FROM stems WHERE stem='"+stems[i+1]+"'");
  327. if(trs.next()) {
  328. toid=trs.getInt("id");
  329. }
  330. trs.close();
  331. tstmt.close();
  332. } catch (Exception te) {}
  333. } else {
  334. toid=fromid;
  335. }
  336. try {
  337. Statement ustmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  338. uresult = ustmt.executeUpdate("INSERT INTO `sedges` ( `botid`,`fromid`,`toid`,`quantity`) \n" +
  339. "VALUES (\n" +
  340. "'"+this.id+"','"+fromid+"','"+toid+"','"+1+"'\n" +
  341. ");");
  342. ustmt.close();
  343. } catch (Exception e) {
  344. try {
  345. Statement ustmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  346. uresult = ustmt.executeUpdate("UPDATE `sedges`\n" +
  347. "SET quantity=quantity+1\n" +
  348. "WHERE `fromid`='"+fromid+"' AND `toid`='"+toid+"' AND `botid`='"+this.id+"'\n" +
  349. ";");
  350. ustmt.close();
  351. } catch (Exception ee) {uresult=0;}
  352. }
  353. }
  354. }
  355. public void MakeEdges() {
  356. try {
  357. if (this.con.isClosed()) {
  358. this.Connect();
  359. }
  360. } catch (Exception e) {}
  361. try {
  362. Statement stmt = (com.mysql.jdbc.Statement) this.con.createStatement();
  363. ResultSet rs = stmt.executeQuery("SELECT * FROM botlogs WHERE action='pubmsg' AND botid='"+this.id+"'");
  364. while (rs.next()) {
  365. String message = rs.getString("content");
  366. this.MakeEdges(message.toLowerCase(),this.con);
  367. }
  368. stmt.close();
  369. } catch (Exception e) {}
  370. }
  371. }