/androjena/src/com/hp/hpl/jena/rdf/arp/impl/XMLHandler.java

http://androjena.googlecode.com/ · Java · 538 lines · 359 code · 71 blank · 108 comment · 54 complexity · e43da18b51a44267e4a524d12051631e MD5 · raw file

  1. /*
  2. * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Hewlett-Packard Development Company, LP All rights
  3. * reserved.
  4. *
  5. * (c) Copyright 2003, Plugged In Software
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions are met: 1.
  9. * Redistributions of source code must retain the above copyright notice, this
  10. * list of conditions and the following disclaimer. 2. Redistributions in
  11. * binary form must reproduce the above copyright notice, this list of
  12. * conditions and the following disclaimer in the documentation and/or other
  13. * materials provided with the distribution. 3. The name of the author may not
  14. * be used to endorse or promote products derived from this software without
  15. * specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  18. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  19. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  20. * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  22. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  23. * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  24. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  25. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  26. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. *
  28. * $Id: XMLHandler.java,v 1.1 2009/06/29 08:55:38 castagna Exp $
  29. *
  30. * AUTHOR: Jeremy J. Carroll
  31. */
  32. /*
  33. * ARPFilter.java
  34. *
  35. * Created on June 21, 2001, 10:01 PM
  36. */
  37. package com.hp.hpl.jena.rdf.arp.impl;
  38. import java.net.MalformedURLException;
  39. import java.util.HashMap;
  40. import java.util.Iterator;
  41. import java.util.Map;
  42. import org.xml.sax.Attributes;
  43. import org.xml.sax.Locator;
  44. import org.xml.sax.SAXException;
  45. import org.xml.sax.SAXParseException;
  46. import com.hp.hpl.jena.iri.IRI;
  47. import com.hp.hpl.jena.iri.IRIFactory;
  48. import com.hp.hpl.jena.rdf.arp.ALiteral;
  49. import com.hp.hpl.jena.rdf.arp.ARPErrorNumbers;
  50. import com.hp.hpl.jena.rdf.arp.ARPHandlers;
  51. import com.hp.hpl.jena.rdf.arp.ARPOptions;
  52. import com.hp.hpl.jena.rdf.arp.AResource;
  53. import com.hp.hpl.jena.rdf.arp.ExtendedHandler;
  54. import com.hp.hpl.jena.rdf.arp.FatalParsingErrorException;
  55. import com.hp.hpl.jena.rdf.arp.ParseException;
  56. import com.hp.hpl.jena.rdf.arp.StatementHandler;
  57. import com.hp.hpl.jena.rdf.arp.states.Frame;
  58. import com.hp.hpl.jena.rdf.arp.states.FrameI;
  59. import com.hp.hpl.jena.rdf.arp.states.LookingForRDF;
  60. import com.hp.hpl.jena.rdf.arp.states.StartStateRDForDescription;
  61. /**
  62. * This class converts SAX events into a stream of encapsulated events suitable
  63. * for the RDF parser. In effect, this is the RDF lexer. updates by kers to
  64. * handle exporting namespace prefix maps.
  65. *
  66. * @author jjc
  67. */
  68. public class XMLHandler extends LexicalHandlerImpl implements ARPErrorNumbers,
  69. Names {
  70. boolean encodingProblems = false;
  71. protected Map<IRI, Map<String,Location>> idsUsed = new HashMap<IRI, Map<String,Location>>();
  72. protected int idsUsedCount = 0;
  73. public XMLHandler() {}
  74. public void triple(ANode s, ANode p, ANode o) {
  75. StatementHandler stmt;
  76. boolean bad=s.isTainted() || p.isTainted() || o.isTainted();
  77. if (bad) {
  78. stmt = badStatementHandler;
  79. } else {
  80. stmt = handlers.getStatementHandler();
  81. }
  82. AResourceInternal subj = (AResourceInternal) s;
  83. AResourceInternal pred = (AResourceInternal) p;
  84. if (!bad)
  85. subj.setHasBeenUsed();
  86. if (o instanceof AResource) {
  87. AResourceInternal obj = (AResourceInternal) o;
  88. if (!bad) obj.setHasBeenUsed();
  89. stmt.statement(subj, pred, obj);
  90. } else
  91. stmt.statement(subj, pred, (ALiteral) o);
  92. }
  93. // This is the current frame.
  94. FrameI frame;
  95. @Override
  96. public void startPrefixMapping(String prefix, String uri)
  97. throws SAXParseException {
  98. checkNamespaceURI(uri);
  99. handlers.getNamespaceHandler().startPrefixMapping(prefix, uri);
  100. }
  101. @Override
  102. public void endPrefixMapping(String prefix) {
  103. handlers.getNamespaceHandler().endPrefixMapping(prefix);
  104. }
  105. public Locator getLocator() {
  106. return locator;
  107. }
  108. Locator locator;
  109. @Override
  110. public void setDocumentLocator(Locator locator) {
  111. this.locator = locator;
  112. }
  113. static final private boolean DEBUG = false;
  114. @Override
  115. public void startElement(String uri, String localName, String rawName,
  116. Attributes atts) throws SAXException {
  117. if (Thread.interrupted())
  118. warning(null, ERR_INTERRUPTED, "Interrupt detected.");
  119. FrameI oldFrame = frame;
  120. frame = frame.startElement(uri, localName, rawName, atts);
  121. if (DEBUG)
  122. System.err.println("<" + rawName + "> :: "
  123. + getSimpleName(oldFrame.getClass()) + " --> "
  124. + getSimpleName(frame.getClass()));
  125. }
  126. @Override
  127. public void endElement(String uri, String localName, String rawName)
  128. throws SAXException {
  129. frame.endElement();
  130. frame = frame.getParent();
  131. frame.afterChild();
  132. if (DEBUG)
  133. System.err.println("</" + rawName + "> :: <--"
  134. + getSimpleName(frame.getClass()));
  135. }
  136. static public String getSimpleName(Class< ? extends FrameI> c) {
  137. String rslt[] = c.getName().split("\\.");
  138. return rslt[rslt.length - 1];
  139. }
  140. @Override
  141. public void characters(char ch[], int start, int length)
  142. throws SAXException {
  143. frame.characters(ch, start, length);
  144. }
  145. @Override
  146. public void ignorableWhitespace(char ch[], int start, int length)
  147. throws SAXException { // Never called.
  148. characters(ch, start, length);
  149. }
  150. void setUserData(String nodeId, Object v) {
  151. nodeIdUserData.put(nodeId, v);
  152. }
  153. Object getUserData(String nodeId) {
  154. return nodeIdUserData.get(nodeId);
  155. }
  156. @Override
  157. public void comment(char[] ch, int start, int length)
  158. throws SAXParseException {
  159. frame.comment(ch, start, length);
  160. }
  161. @Override
  162. public void processingInstruction(String target, String data)
  163. throws SAXException {
  164. frame.processingInstruction(target, data);
  165. }
  166. public void warning(Taint taintMe,int id, String msg) throws SAXParseException {
  167. if (options.getErrorMode(id) != EM_IGNORE)
  168. warning(taintMe,id, location(), msg);
  169. }
  170. void warning(Taint taintMe, int id, Location loc, String msg) throws SAXParseException {
  171. if (options.getErrorMode(id) != EM_IGNORE)
  172. warning(taintMe, id, new ParseException(id, loc, msg) {
  173. private static final long serialVersionUID = 1990910846204964756L;
  174. });
  175. }
  176. void generalError( int id, Exception e) throws SAXParseException {
  177. Location where = new Location(locator);
  178. // System.err.println(e.getMessage());
  179. warning(null, id, new ParseException(id, where, e));
  180. }
  181. void warning(Taint taintMe, int id, SAXParseException e) throws SAXParseException {
  182. try {
  183. switch (options.getErrorMode(id)) {
  184. case EM_IGNORE:
  185. break;
  186. case EM_WARNING:
  187. handlers.getErrorHandler().warning(e);
  188. break;
  189. case EM_ERROR:
  190. if (taintMe != null)
  191. taintMe.taint();
  192. handlers.getErrorHandler().error(e);
  193. break;
  194. case EM_FATAL:
  195. handlers.getErrorHandler().fatalError(e);
  196. break;
  197. }
  198. } catch (SAXParseException xx) {
  199. throw xx;
  200. } catch (SAXException ee) {
  201. throw new WrappedException(ee);
  202. }
  203. if (e instanceof ParseException && ((ParseException) e).isPromoted())
  204. throw e;
  205. if (options.getErrorMode(id) == EM_FATAL) {
  206. // If we get here, we shouldn't go on
  207. // throw an error into Jena.
  208. throw new FatalParsingErrorException();
  209. }
  210. }
  211. @Override
  212. public void error(SAXParseException e) throws SAXParseException {
  213. warning(null,ERR_SAX_ERROR, e);
  214. }
  215. @Override
  216. public void warning(SAXParseException e) throws SAXParseException {
  217. warning(null,WARN_SAX_WARNING, e);
  218. }
  219. @Override
  220. public void fatalError(SAXParseException e) throws SAXException {
  221. warning(null,ERR_SAX_FATAL_ERROR, e);
  222. // If we get here, we shouldn't go on
  223. // throw an error into Jena.
  224. throw new FatalParsingErrorException();
  225. }
  226. /**
  227. * @param v
  228. */
  229. public void endLocalScope(ANode v) {
  230. if (handlers.getExtendedHandler() != nullScopeHandler) {
  231. ARPResource bn = (ARPResource) v;
  232. if (!bn.getHasBeenUsed())
  233. return;
  234. if (bn.hasNodeID()) {
  235. // save for later end scope
  236. if (handlers.getExtendedHandler().discardNodesWithNodeID())
  237. return;
  238. String bnodeID = bn.nodeID;
  239. if (!nodeIdUserData.containsKey(bnodeID))
  240. nodeIdUserData.put(bnodeID, null);
  241. } else {
  242. handlers.getExtendedHandler().endBNodeScope(bn);
  243. }
  244. }
  245. }
  246. public void endRDF() {
  247. handlers.getExtendedHandler().endRDF();
  248. }
  249. public void startRDF() {
  250. handlers.getExtendedHandler().startRDF();
  251. }
  252. boolean ignoring(int eCode) {
  253. return options.getErrorMode(eCode) == EM_IGNORE;
  254. }
  255. public boolean isError(int eCode) {
  256. return options.getErrorMode(eCode) == EM_ERROR;
  257. }
  258. protected AbsXMLContext initialContext(String base, String lang)
  259. throws SAXParseException {
  260. return initialContextWithBase(base).withLang(this,lang);
  261. }
  262. private boolean allowRelativeReferences = false;
  263. private AbsXMLContext initialContextWithBase(String base) throws SAXParseException {
  264. allowRelativeReferences = false;
  265. if (base == null) {
  266. warning(null,IGN_NO_BASE_URI_SPECIFIED,
  267. "Base URI not specified for input file; local URI references will be in error.");
  268. return new XMLBaselessContext(this,
  269. ERR_RESOLVING_URI_AGAINST_NULL_BASE);
  270. } else if (base.equals("")) {
  271. allowRelativeReferences = true;
  272. warning(null,IGN_NO_BASE_URI_SPECIFIED,
  273. "Base URI specified as \"\"; local URI references will not be resolved.");
  274. return new XMLBaselessContext(this,
  275. WARN_RESOLVING_URI_AGAINST_EMPTY_BASE);
  276. } else {
  277. // if (base.toLowerCase().startsWith("file:")
  278. // && base.length()>5
  279. // && base.charAt(5) != '/'
  280. // ) {
  281. // System.err.print(base);
  282. // try {
  283. // base = new File(base.substring(5)).toURL().toString();
  284. // if (base.length()<=6
  285. // || base.charAt(6)!= '/')
  286. // base = "file://"+base.substring(5);
  287. // } catch (MalformedURLException e) {
  288. // // ignore, just leave it alone.
  289. // }
  290. // System.err.println(" ==> "+base);
  291. //
  292. // }
  293. return new XMLBaselessContext(this,
  294. ERR_RESOLVING_AGAINST_RELATIVE_BASE).withBase(this,base);
  295. }
  296. }
  297. /*
  298. private XMLContext initialContextWithBasex(String base)
  299. throws SAXParseException {
  300. XMLContext rslt = new XMLContext(this, base);
  301. RDFURIReference b = rslt.getURI();
  302. if (base == null) {
  303. warning(null,IGN_NO_BASE_URI_SPECIFIED,
  304. "Base URI not specified for input file; local URI references will be in error.");
  305. } else if (base.equals("")) {
  306. warning(null,IGN_NO_BASE_URI_SPECIFIED,
  307. "Base URI specified as \"\"; local URI references will not be resolved.");
  308. } else {
  309. checkBadURI(null,b);
  310. // Warnings on bad base.
  311. // if (b.isVeryBad()||b.isRelative()) {
  312. // return
  313. }
  314. return rslt;
  315. }
  316. */
  317. private ARPOptions options = ARPOptions.createNewOptions() ;
  318. private ARPHandlers handlers = ARPHandlers.createNewHandlers() ;
  319. StatementHandler getStatementHandler() {
  320. return handlers.getStatementHandler();
  321. }
  322. public ARPHandlers getHandlers() {
  323. return handlers;
  324. }
  325. public ARPOptions getOptions() {
  326. return options;
  327. }
  328. public void setOptionsWith(ARPOptions newOpts) {
  329. options = newOpts.copy();
  330. }
  331. public void setHandlersWith(ARPHandlers newHh) {
  332. handlers = ARPHandlers.createNewHandlers() ;
  333. handlers.setErrorHandler(newHh.getErrorHandler());
  334. handlers.setExtendedHandler(newHh.getExtendedHandler());
  335. handlers.setNamespaceHandler(newHh.getNamespaceHandler());
  336. handlers.setStatementHandler(newHh.getStatementHandler());
  337. }
  338. private Map<String, Object> nodeIdUserData;
  339. public void initParse(String base, String lang) throws SAXParseException {
  340. nodeIdUserData = new HashMap<String, Object>();
  341. idsUsed =
  342. ignoring(WARN_REDEFINITION_OF_ID)?
  343. null:
  344. new HashMap<IRI, Map<String,Location>>();
  345. idsUsedCount = 0;
  346. if (options.getEmbedding())
  347. frame = new LookingForRDF(this, initialContext(base, lang));
  348. else
  349. frame = new StartStateRDForDescription(this, initialContext(base,
  350. lang));
  351. }
  352. /**
  353. * This method must be always be called after parsing, e.g. in a finally
  354. * block.
  355. *
  356. */
  357. void afterParse() {
  358. while (frame != null) {
  359. frame.abort();
  360. frame = frame.getParent();
  361. }
  362. // endRDF();
  363. endBnodeScope();
  364. idsUsed = null;
  365. }
  366. void endBnodeScope() {
  367. if (handlers.getExtendedHandler() != nullScopeHandler) {
  368. Iterator<String> it = nodeIdUserData.keySet().iterator();
  369. while (it.hasNext()) {
  370. String nodeId = it.next();
  371. ARPResource bn = new ARPResource(this, nodeId);
  372. handlers.getExtendedHandler().endBNodeScope(bn);
  373. }
  374. }
  375. }
  376. public Location location() {
  377. return new Location(locator);
  378. }
  379. private IRIFactory factory = null ;
  380. IRIFactory iriFactory() {
  381. if (factory == null) {
  382. factory = options.getIRIFactory() ;
  383. if ( factory == null )
  384. factory = ARPOptions.getIRIFactoryGlobal() ;
  385. }
  386. return factory;
  387. }
  388. private void checkNamespaceURI(String uri) throws SAXParseException {
  389. ((Frame) frame).checkEncoding(null,uri);
  390. if (uri.length() != 0)
  391. {
  392. IRI u = iriFactory().create(uri);
  393. // if (u.isVeryBad()) {
  394. // warning(null,
  395. // WARN_BAD_NAMESPACE_URI,
  396. // "The namespace URI: <"
  397. // + uri
  398. // + "> is not well formed.");
  399. // return;
  400. //
  401. // }
  402. if (!u.isAbsolute()) {
  403. warning(null,
  404. WARN_RELATIVE_NAMESPACE_URI_DEPRECATED,
  405. "The namespace URI: <"
  406. + uri
  407. + "> is relative. Such use has been deprecated by the W3C, and may result in RDF interoperability failures. Use an absolute namespace URI.");
  408. }
  409. try {
  410. if (!u.toASCIIString().equals(u.toString()))
  411. warning(null,
  412. WARN_BAD_NAMESPACE_URI,
  413. "Non-ascii characters in a namespace URI may not be completely portable: <"
  414. + u.toString()
  415. + ">. Resulting RDF URI references are legal.");
  416. } catch (MalformedURLException e) {
  417. warning(null,
  418. WARN_BAD_NAMESPACE_URI,
  419. "toAscii failed for namespace URI: <"
  420. + u.toString()
  421. + ">. " + e.getMessage());
  422. }
  423. if (uri.startsWith(rdfns) && !uri.equals(rdfns))
  424. warning(null,WARN_BAD_RDF_NAMESPACE_URI, "Namespace URI ref <"
  425. + uri + "> may not be used in RDF/XML.");
  426. if (uri.startsWith(xmlns) && !uri.equals(xmlns))
  427. warning(null,WARN_BAD_XML_NAMESPACE_URI, "Namespace URI ref <"
  428. + uri + "> may not be used in RDF/XML.");
  429. }
  430. }
  431. public boolean allowRelativeURIs() {
  432. return allowRelativeReferences;
  433. }
  434. private IRI sameDocRef;
  435. public IRI sameDocRef() {
  436. if (sameDocRef==null){
  437. sameDocRef = iriFactory().create("");
  438. }
  439. return sameDocRef;
  440. }
  441. private StatementHandler badStatementHandler = nullStatementHandler;
  442. public void setBadStatementHandler(StatementHandler sh) {
  443. badStatementHandler = sh;
  444. }
  445. final public static StatementHandler nullStatementHandler =
  446. new StatementHandler() {
  447. public void statement(AResource s, AResource p, AResource o) {
  448. }
  449. public void statement(AResource s, AResource p, ALiteral o) {
  450. }
  451. };
  452. final public static ExtendedHandler nullScopeHandler = new ExtendedHandler() {
  453. public void endBNodeScope(AResource bnode) {
  454. }
  455. public void startRDF() {
  456. }
  457. public void endRDF() {
  458. }
  459. public boolean discardNodesWithNodeID() {
  460. return true;
  461. }
  462. };
  463. }