PageRenderTime 51ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/v1.01/project/src/net/sf/josser/rdf/impl/Content.java

https://bitbucket.org/novelli/josser
Java | 365 lines | 251 code | 24 blank | 90 comment | 103 complexity | 7beee5de96886203e594be46518b1793 MD5 | raw file
Possible License(s): Apache-2.0, GPL-2.0, LGPL-2.1, BSD-3-Clause
  1. /*
  2. ****************************************************************************************
  3. * Copyright © Giovanni Novelli
  4. * All Rights Reserved.
  5. ****************************************************************************************
  6. *
  7. * Title: JOSSER
  8. *
  9. * Description: JOSSER - A Java Tool capable to parse DMOZ RDF dumps and export them to
  10. * any JDBC compliant relational database
  11. *
  12. * Content.java
  13. *
  14. * Created on 22 October 2005, 22.00 by Giovanni Novelli
  15. *
  16. ****************************************************************************************
  17. * JOSSER is available under the terms of the GNU General Public License Version 2.
  18. *
  19. * The author does NOT allow redistribution of modifications of JOSSER under the terms
  20. * of the GNU General Public License Version 3 or any later version.
  21. *
  22. * This program is distributed in the hope that it will be useful, but WITHOUT ANY
  23. * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
  24. * PARTICULAR PURPOSE.
  25. *
  26. * For more details read file LICENSE
  27. *****************************************************************************************
  28. *
  29. * $Revision: 20 $
  30. * $Id: Content.java 20 2008-01-17 12:47:41Z gnovelli $
  31. * $HeadURL: https://josser.svn.sourceforge.net/svnroot/josser/tags/v1.01/project/src/net/sf/josser/rdf/impl/Content.java $
  32. *
  33. *****************************************************************************************
  34. */
  35. package net.sf.josser.rdf.impl;
  36. import net.sf.josser.jdbc.impl.Category;
  37. import net.sf.josser.jdbc.impl.ExternalPage;
  38. import net.sf.josser.Josser;
  39. import net.sf.josser.util.Static;
  40. /**
  41. * @author Copyright © Giovanni Novelli. All rights reserved.
  42. */
  43. public class Content extends Structure {
  44. private boolean processingExternalPage = false;
  45. private ExternalPage externalPageRow = null;
  46. private boolean processingExternalPageDescription = false;
  47. private String externalPageDescription = null;
  48. public Content(final String path) {
  49. super(path);
  50. this.setCategoryRow(new Category());
  51. this.setProcessingExternalPage(false);
  52. this.setExternalPageRow(null);
  53. this.setProcessingExternalPageDescription(false);
  54. this.setExternalPageDescription(null);
  55. this.setPhase(0);
  56. }
  57. @Override
  58. protected void processCategoryStart(final String line) {
  59. this.setProcessed(true);
  60. String Topic = null;
  61. String[] tokens = null;
  62. tokens = line.split("<Topic r:id=\"");
  63. if (tokens.length == 2) {
  64. tokens = tokens[1].split("\">");
  65. if (tokens.length == 1) {
  66. Topic = tokens[0];
  67. this.setProcessingCategory(true);
  68. this.getCategoryRow().setTopic(Topic);
  69. Static
  70. .setFiltermatching(Topic.startsWith(Josser
  71. .getTopicfilter()));
  72. if ((this.getPhase() == 0)
  73. && Topic.startsWith(Josser.getTopicfilter())) {
  74. this.setPhase(1);
  75. } else if ((this.getPhase() == 1)
  76. && !Topic.startsWith(Josser.getTopicfilter())) {
  77. this.setPhase(2);
  78. }
  79. }
  80. } else {
  81. this.setProcessed(false);
  82. }
  83. }
  84. @Override
  85. public void processCategory(final String line) {
  86. this.setProcessed(true);
  87. String[] tokens = null;
  88. if (line.startsWith(" <catid>")) {
  89. int catid = 0;
  90. tokens = line.split(" <catid>");
  91. if (tokens.length == 2) {
  92. tokens = tokens[1].split("</catid>");
  93. if (tokens.length == 1) {
  94. catid = Integer.parseInt(tokens[0]);
  95. this.getCategoryRow().setCatid(catid);
  96. }
  97. }
  98. } else if (line.startsWith("</Topic>")) {
  99. this.setProcessingCategory(false);
  100. } else if (line.startsWith(" <link r:resource=\"")) {
  101. /*
  102. * FIXME At the moment parsing is done on nodes of type ExternalPage
  103. */
  104. } else if (line.startsWith(" <link1 r:resource=\"")) {
  105. /*
  106. * FIXME At the moment parsing is done on nodes of type ExternalPage
  107. */
  108. } else if (line.startsWith(" <rss r:resource=\"")) {
  109. /*
  110. * FIXME At the moment parsing is done on nodes of type ExternalPage
  111. */
  112. } else if (line.startsWith(" <atom r:resource=\"")) {
  113. /*
  114. * FIXME At the moment parsing is done on nodes of type ExternalPage
  115. */
  116. } else if (line.startsWith(" <rss1 r:resource=\"")) {
  117. /*
  118. * FIXME At the moment parsing is done on nodes of type ExternalPage
  119. */
  120. } else if (line.startsWith(" <pdf r:resource=\"")) {
  121. /*
  122. * FIXME At the moment parsing is done on nodes of type ExternalPage
  123. */
  124. } else if (line.startsWith(" <pdf1 r:resource=\"")) {
  125. /*
  126. * FIXME At the moment parsing is done on nodes of type ExternalPage
  127. */
  128. } else {
  129. this.setProcessed(false);
  130. }
  131. }
  132. protected void processExternalPage(final String line) {
  133. this.setProcessed(true);
  134. String[] tokens = null;
  135. if (line.startsWith(" <d:Title>")) {
  136. String Title = null;
  137. tokens = line.split(" <d:Title>");
  138. if (tokens.length == 2) {
  139. tokens = tokens[1].split("</d:Title>");
  140. if (tokens.length == 1) {
  141. Title = tokens[0];
  142. this.getExternalPageRow().setTitle(Title);
  143. }
  144. }
  145. } else if (line.startsWith("</ExternalPage>")) {
  146. this.setProcessingExternalPage(false);
  147. this.getExternalPageRow().addBatch();
  148. } else if (line.startsWith(" <d:Description>")) {
  149. tokens = line.split(" <d:Description>");
  150. if (tokens.length == 2) {
  151. if (tokens[1].endsWith("</d:Description>")) {
  152. tokens = tokens[1].split("</d:Description>");
  153. if (tokens.length == 1) {
  154. this.setExternalPageDescription(tokens[0]);
  155. } else {
  156. this.setExternalPageDescription("");
  157. }
  158. this.getExternalPageRow().setDescription(
  159. this.getExternalPageDescription());
  160. } else {
  161. this.setProcessingExternalPageDescription(true);
  162. this.setExternalPageDescription(tokens[1]);
  163. }
  164. }
  165. } else if (line.endsWith(" </d:Description>")) {
  166. tokens = line.split(" </d:Description>");
  167. if (tokens.length == 2) {
  168. this.setExternalPageDescription(this
  169. .getExternalPageDescription()
  170. + tokens[0]);
  171. }
  172. this.setProcessingExternalPageDescription(false);
  173. this.getExternalPageRow().setDescription(
  174. this.getExternalPageDescription());
  175. } else if (this.isProcessingExternalPageDescription()) {
  176. this.setExternalPageDescription(this.getExternalPageDescription()
  177. + line);
  178. } else if (line.startsWith(" <topic>")) {
  179. /*
  180. * FIXME At the moment parsing of Topic is done once in nodes of
  181. * type Topic and not in nodes of type ExternalPage
  182. */
  183. } else if (line.startsWith(" <priority>")) {
  184. int priority = 0;
  185. tokens = line.split(" <priority>");
  186. if (tokens.length == 2) {
  187. tokens = tokens[1].split("</priority>");
  188. if (tokens.length == 1) {
  189. priority = Integer.parseInt(tokens[0]);
  190. this.getExternalPageRow().setPriority(priority);
  191. }
  192. }
  193. } else if (line.startsWith(" <mediadate>")) {
  194. String mediadate = null;
  195. tokens = line.split(" <mediadate>");
  196. if (tokens.length == 2) {
  197. tokens = tokens[1].split("</mediadate>");
  198. if (tokens.length == 1) {
  199. mediadate = tokens[0];
  200. this.getExternalPageRow().setMediadate(mediadate);
  201. }
  202. }
  203. } else if (line.startsWith(" <ages>")) {
  204. String ages = null;
  205. tokens = line.split(" <ages>");
  206. if (tokens.length == 2) {
  207. tokens = tokens[1].split("</ages>");
  208. if (tokens.length == 1) {
  209. ages = tokens[0];
  210. this.getExternalPageRow().setAges(ages);
  211. }
  212. }
  213. } else if (line.startsWith(" <type>")) {
  214. String type = null;
  215. tokens = line.split(" <type>");
  216. if (tokens.length == 2) {
  217. tokens = tokens[1].split("</type>");
  218. if (tokens.length == 1) {
  219. type = tokens[0];
  220. this.getExternalPageRow().setType(type);
  221. }
  222. }
  223. } else {
  224. this.setProcessed(false);
  225. }
  226. }
  227. protected void processExternalPageStart(final String line) {
  228. this.setProcessed(true);
  229. String about = null;
  230. String[] tokens = null;
  231. tokens = line.split("<ExternalPage about=\"");
  232. if (tokens.length == 2) {
  233. tokens = tokens[1].split("\">");
  234. if (tokens.length == 1) {
  235. about = tokens[0];
  236. this.setExternalPageRow(new ExternalPage());
  237. this.getExternalPageRow().setCatid(
  238. this.getCategoryRow().getCatid());
  239. this.getExternalPageRow().setLink(about);
  240. this.setProcessingExternalPage(true);
  241. } else {
  242. this.setExternalPageRow(new ExternalPage());
  243. this.getExternalPageRow().setLink("");
  244. this.setProcessingExternalPage(true);
  245. }
  246. } else {
  247. this.setProcessed(false);
  248. }
  249. }
  250. @Override
  251. public void process(final String line) {
  252. this.setProcessed(false);
  253. if (this.isProcessingCategory() || this.isProcessingExternalPage()) {
  254. if (this.isProcessingCategory()) {
  255. this.processCategory(line);
  256. } else if (this.isProcessingExternalPage()) {
  257. this.processExternalPage(line);
  258. }
  259. if (!this.isProcessed() && (line.length() > 0)) {
  260. }
  261. } else {
  262. if (!this.isProcessingExternalPage()) {
  263. if (line.startsWith("<ExternalPage about=\"")) {
  264. this.processExternalPageStart(line);
  265. }
  266. }
  267. if (!this.isProcessingCategory()) {
  268. if (line.startsWith("<Topic r:id=\"")) {
  269. this.processCategoryStart(line);
  270. }
  271. }
  272. if (!this.isProcessed() && (line.length() > 0)) {
  273. }
  274. }
  275. }
  276. @Override
  277. public int batchStore() {
  278. int result = 0;
  279. result += this.getExternalPageRow().executeBatch();
  280. return result;
  281. }
  282. @Override
  283. public int batchClear() {
  284. return this.getExternalPageRow().batchClear();
  285. }
  286. /**
  287. * @param externalPageDescription
  288. * The externalPageDescription to set.
  289. */
  290. protected void setExternalPageDescription(
  291. final String externalPageDescription) {
  292. this.externalPageDescription = externalPageDescription;
  293. }
  294. /**
  295. * @return Returns the externalPageDescription.
  296. */
  297. protected String getExternalPageDescription() {
  298. return this.externalPageDescription;
  299. }
  300. /**
  301. * @param externalPageRow
  302. * The externalPageRow to set.
  303. */
  304. protected void setExternalPageRow(final ExternalPage externalPageRow) {
  305. this.externalPageRow = externalPageRow;
  306. }
  307. /**
  308. * @return Returns the externalPageRow.
  309. */
  310. protected ExternalPage getExternalPageRow() {
  311. return this.externalPageRow;
  312. }
  313. /**
  314. * @param processingExternalPage
  315. * The processingExternalPage to set.
  316. */
  317. protected void setProcessingExternalPage(
  318. final boolean processingExternalPage) {
  319. this.processingExternalPage = processingExternalPage;
  320. }
  321. /**
  322. * @return Returns the processingExternalPage.
  323. */
  324. protected boolean isProcessingExternalPage() {
  325. return this.processingExternalPage;
  326. }
  327. /**
  328. * @param processingExternalPageDescription
  329. * The processingExternalPageDescription to set.
  330. */
  331. protected void setProcessingExternalPageDescription(
  332. final boolean processingExternalPageDescription) {
  333. this.processingExternalPageDescription = processingExternalPageDescription;
  334. }
  335. /**
  336. * @return Returns the processingExternalPageDescription.
  337. */
  338. protected boolean isProcessingExternalPageDescription() {
  339. return this.processingExternalPageDescription;
  340. }
  341. }