PageRenderTime 48ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/core/src/main/java/com/github/jsonldjava/core/RDFDatasetUtils.java

http://github.com/tristan/jsonld-java
Java | 537 lines | 411 code | 44 blank | 82 comment | 91 complexity | 8c1f659bf568f08112c96deb5cb32d80 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. package com.github.jsonldjava.core;
  2. import static com.github.jsonldjava.core.JSONLDConsts.RDF_FIRST;
  3. import static com.github.jsonldjava.core.JSONLDConsts.RDF_LANGSTRING;
  4. import static com.github.jsonldjava.core.JSONLDConsts.RDF_NIL;
  5. import static com.github.jsonldjava.core.JSONLDConsts.RDF_REST;
  6. import static com.github.jsonldjava.core.JSONLDConsts.RDF_TYPE;
  7. import static com.github.jsonldjava.core.JSONLDConsts.XSD_BOOLEAN;
  8. import static com.github.jsonldjava.core.JSONLDConsts.XSD_DOUBLE;
  9. import static com.github.jsonldjava.core.JSONLDConsts.XSD_INTEGER;
  10. import static com.github.jsonldjava.core.JSONLDConsts.XSD_STRING;
  11. import static com.github.jsonldjava.core.JSONLDUtils.isKeyword;
  12. import static com.github.jsonldjava.core.JSONLDUtils.isList;
  13. import static com.github.jsonldjava.core.JSONLDUtils.isObject;
  14. import static com.github.jsonldjava.core.JSONLDUtils.isValue;
  15. import static com.github.jsonldjava.core.Regex.HEX;
  16. import java.text.DecimalFormat;
  17. import java.util.ArrayList;
  18. import java.util.Collections;
  19. import java.util.LinkedHashMap;
  20. import java.util.List;
  21. import java.util.Map;
  22. import java.util.regex.Matcher;
  23. import java.util.regex.Pattern;
  24. public class RDFDatasetUtils {
  25. /**
  26. * Creates an array of RDF triples for the given graph.
  27. *
  28. * @param graph
  29. * the graph to create RDF triples for.
  30. * @param namer
  31. * a UniqueNamer for assigning blank node names.
  32. *
  33. * @return the array of RDF triples for the given graph.
  34. */
  35. @Deprecated
  36. // use RDFDataset.graphToRDF
  37. static List<Object> graphToRDF(Map<String, Object> graph, UniqueNamer namer) {
  38. final List<Object> rval = new ArrayList<Object>();
  39. for (final String id : graph.keySet()) {
  40. final Map<String, Object> node = (Map<String, Object>) graph.get(id);
  41. final List<String> properties = new ArrayList<String>(node.keySet());
  42. Collections.sort(properties);
  43. for (String property : properties) {
  44. final Object items = node.get(property);
  45. if ("@type".equals(property)) {
  46. property = RDF_TYPE;
  47. } else if (isKeyword(property)) {
  48. continue;
  49. }
  50. for (final Object item : (List<Object>) items) {
  51. // RDF subjects
  52. final Map<String, Object> subject = new LinkedHashMap<String, Object>();
  53. if (id.indexOf("_:") == 0) {
  54. subject.put("type", "blank node");
  55. subject.put("value", namer.getName(id));
  56. } else {
  57. subject.put("type", "IRI");
  58. subject.put("value", id);
  59. }
  60. // RDF predicates
  61. final Map<String, Object> predicate = new LinkedHashMap<String, Object>();
  62. predicate.put("type", "IRI");
  63. predicate.put("value", property);
  64. // convert @list to triples
  65. if (isList(item)) {
  66. listToRDF((List<Object>) ((Map<String, Object>) item).get("@list"), namer,
  67. subject, predicate, rval);
  68. }
  69. // convert value or node object to triple
  70. else {
  71. final Object object = objectToRDF(item, namer);
  72. final Map<String, Object> tmp = new LinkedHashMap<String, Object>();
  73. tmp.put("subject", subject);
  74. tmp.put("predicate", predicate);
  75. tmp.put("object", object);
  76. rval.add(tmp);
  77. }
  78. }
  79. }
  80. }
  81. return rval;
  82. }
  83. /**
  84. * Converts a @list value into linked list of blank node RDF triples (an RDF
  85. * collection).
  86. *
  87. * @param list
  88. * the @list value.
  89. * @param namer
  90. * a UniqueNamer for assigning blank node names.
  91. * @param subject
  92. * the subject for the head of the list.
  93. * @param predicate
  94. * the predicate for the head of the list.
  95. * @param triples
  96. * the array of triples to append to.
  97. */
  98. private static void listToRDF(List<Object> list, UniqueNamer namer,
  99. Map<String, Object> subject, Map<String, Object> predicate, List<Object> triples) {
  100. final Map<String, Object> first = new LinkedHashMap<String, Object>();
  101. first.put("type", "IRI");
  102. first.put("value", RDF_FIRST);
  103. final Map<String, Object> rest = new LinkedHashMap<String, Object>();
  104. rest.put("type", "IRI");
  105. rest.put("value", RDF_REST);
  106. final Map<String, Object> nil = new LinkedHashMap<String, Object>();
  107. nil.put("type", "IRI");
  108. nil.put("value", RDF_NIL);
  109. for (final Object item : list) {
  110. final Map<String, Object> blankNode = new LinkedHashMap<String, Object>();
  111. blankNode.put("type", "blank node");
  112. blankNode.put("value", namer.getName());
  113. {
  114. final Map<String, Object> tmp = new LinkedHashMap<String, Object>();
  115. tmp.put("subject", subject);
  116. tmp.put("predicate", predicate);
  117. tmp.put("object", blankNode);
  118. triples.add(tmp);
  119. }
  120. subject = blankNode;
  121. predicate = first;
  122. final Object object = objectToRDF(item, namer);
  123. {
  124. final Map<String, Object> tmp = new LinkedHashMap<String, Object>();
  125. tmp.put("subject", subject);
  126. tmp.put("predicate", predicate);
  127. tmp.put("object", object);
  128. triples.add(tmp);
  129. }
  130. predicate = rest;
  131. }
  132. final Map<String, Object> tmp = new LinkedHashMap<String, Object>();
  133. tmp.put("subject", subject);
  134. tmp.put("predicate", predicate);
  135. tmp.put("object", nil);
  136. triples.add(tmp);
  137. }
  138. /**
  139. * Converts a JSON-LD value object to an RDF literal or a JSON-LD string or
  140. * node object to an RDF resource.
  141. *
  142. * @param item
  143. * the JSON-LD value or node object.
  144. * @param namer
  145. * the UniqueNamer to use to assign blank node names.
  146. *
  147. * @return the RDF literal or RDF resource.
  148. */
  149. private static Object objectToRDF(Object item, UniqueNamer namer) {
  150. final Map<String, Object> object = new LinkedHashMap<String, Object>();
  151. // convert value object to RDF
  152. if (isValue(item)) {
  153. object.put("type", "literal");
  154. final Object value = ((Map<String, Object>) item).get("@value");
  155. final Object datatype = ((Map<String, Object>) item).get("@type");
  156. // convert to XSD datatypes as appropriate
  157. if (value instanceof Boolean || value instanceof Number) {
  158. // convert to XSD datatype
  159. if (value instanceof Boolean) {
  160. object.put("value", value.toString());
  161. object.put("datatype", datatype == null ? XSD_BOOLEAN : datatype);
  162. } else if (value instanceof Double || value instanceof Float) {
  163. // canonical double representation
  164. final DecimalFormat df = new DecimalFormat("0.0###############E0");
  165. object.put("value", df.format(value));
  166. object.put("datatype", datatype == null ? XSD_DOUBLE : datatype);
  167. } else {
  168. final DecimalFormat df = new DecimalFormat("0");
  169. object.put("value", df.format(value));
  170. object.put("datatype", datatype == null ? XSD_INTEGER : datatype);
  171. }
  172. } else if (((Map<String, Object>) item).containsKey("@language")) {
  173. object.put("value", value);
  174. object.put("datatype", datatype == null ? RDF_LANGSTRING : datatype);
  175. object.put("language", ((Map<String, Object>) item).get("@language"));
  176. } else {
  177. object.put("value", value);
  178. object.put("datatype", datatype == null ? XSD_STRING : datatype);
  179. }
  180. }
  181. // convert string/node object to RDF
  182. else {
  183. final String id = isObject(item) ? (String) ((Map<String, Object>) item).get("@id")
  184. : (String) item;
  185. if (id.indexOf("_:") == 0) {
  186. object.put("type", "blank node");
  187. object.put("value", namer.getName(id));
  188. } else {
  189. object.put("type", "IRI");
  190. object.put("value", id);
  191. }
  192. }
  193. return object;
  194. }
  195. public static String toNQuads(RDFDataset dataset) {
  196. final List<String> quads = new ArrayList<String>();
  197. for (String graphName : dataset.graphNames()) {
  198. final List<RDFDataset.Quad> triples = dataset.getQuads(graphName);
  199. if ("@default".equals(graphName)) {
  200. graphName = null;
  201. }
  202. for (final RDFDataset.Quad triple : triples) {
  203. quads.add(toNQuad(triple, graphName));
  204. }
  205. }
  206. Collections.sort(quads);
  207. String rval = "";
  208. for (final String quad : quads) {
  209. rval += quad;
  210. }
  211. return rval;
  212. }
  213. static String toNQuad(RDFDataset.Quad triple, String graphName, String bnode) {
  214. final RDFDataset.Node s = triple.getSubject();
  215. final RDFDataset.Node p = triple.getPredicate();
  216. final RDFDataset.Node o = triple.getObject();
  217. String quad = "";
  218. // subject is an IRI or bnode
  219. if (s.isIRI()) {
  220. quad += "<" + escape(s.getValue()) + ">";
  221. }
  222. // normalization mode
  223. else if (bnode != null) {
  224. quad += bnode.equals(s.getValue()) ? "_:a" : "_:z";
  225. }
  226. // normal mode
  227. else {
  228. quad += s.getValue();
  229. }
  230. // predicate is always an IRI
  231. quad += " <" + escape(p.getValue()) + "> ";
  232. // object is IRI, bnode or literal
  233. if (o.isIRI()) {
  234. quad += "<" + escape(o.getValue()) + ">";
  235. } else if (o.isBlankNode()) {
  236. // normalization mode
  237. if (bnode != null) {
  238. quad += bnode.equals(o.getValue()) ? "_:a" : "_:z";
  239. }
  240. // normal mode
  241. else {
  242. quad += o.getValue();
  243. }
  244. } else {
  245. final String escaped = escape(o.getValue());
  246. quad += "\"" + escaped + "\"";
  247. if (RDF_LANGSTRING.equals(o.getDatatype())) {
  248. quad += "@" + o.getLanguage();
  249. } else if (!XSD_STRING.equals(o.getDatatype())) {
  250. quad += "^^<" + escape(o.getDatatype()) + ">";
  251. }
  252. }
  253. // graph
  254. if (graphName != null) {
  255. if (graphName.indexOf("_:") != 0) {
  256. quad += " <" + escape(graphName) + ">";
  257. } else if (bnode != null) {
  258. quad += " _:g";
  259. } else {
  260. quad += " " + graphName;
  261. }
  262. }
  263. quad += " .\n";
  264. return quad;
  265. }
  266. static String toNQuad(RDFDataset.Quad triple, String graphName) {
  267. return toNQuad(triple, graphName, null);
  268. }
  269. final private static Pattern UCHAR_MATCHED = Pattern.compile("\\u005C(?:([tbnrf\\\"'])|(?:u("
  270. + HEX + "{4}))|(?:U(" + HEX + "{8})))");
  271. public static String unescape(String str) {
  272. String rval = str;
  273. if (str != null) {
  274. final Matcher m = UCHAR_MATCHED.matcher(str);
  275. while (m.find()) {
  276. String uni = m.group(0);
  277. if (m.group(1) == null) {
  278. final String hex = m.group(2) != null ? m.group(2) : m.group(3);
  279. final int v = Integer.parseInt(hex, 16);// hex =
  280. // hex.replaceAll("^(?:00)+",
  281. // "");
  282. if (v > 0xFFFF) {
  283. // deal with UTF-32
  284. // Integer v = Integer.parseInt(hex, 16);
  285. final int vt = v - 0x10000;
  286. final int vh = vt >> 10;
  287. final int v1 = vt & 0x3FF;
  288. final int w1 = 0xD800 + vh;
  289. final int w2 = 0xDC00 + v1;
  290. final StringBuffer b = new StringBuffer();
  291. b.appendCodePoint(w1);
  292. b.appendCodePoint(w2);
  293. uni = b.toString();
  294. } else {
  295. uni = Character.toString((char) v);
  296. }
  297. } else {
  298. final char c = m.group(1).charAt(0);
  299. switch (c) {
  300. case 'b':
  301. uni = "\b";
  302. break;
  303. case 'n':
  304. uni = "\n";
  305. break;
  306. case 't':
  307. uni = "\t";
  308. break;
  309. case 'f':
  310. uni = "\f";
  311. break;
  312. case 'r':
  313. uni = "\r";
  314. break;
  315. case '\'':
  316. uni = "'";
  317. break;
  318. case '\"':
  319. uni = "\"";
  320. break;
  321. case '\\':
  322. uni = "\\";
  323. break;
  324. default:
  325. // do nothing
  326. continue;
  327. }
  328. }
  329. final String pat = Pattern.quote(m.group(0));
  330. final String x = Integer.toHexString(uni.charAt(0));
  331. rval = rval.replaceAll(pat, uni);
  332. }
  333. }
  334. return rval;
  335. }
  336. public static String escape(String str) {
  337. String rval = "";
  338. for (int i = 0; i < str.length(); i++) {
  339. final char hi = str.charAt(i);
  340. if (hi <= 0x8 || hi == 0xB || hi == 0xC || (hi >= 0xE && hi <= 0x1F)
  341. || (hi >= 0x7F && hi <= 0xA0) || // 0xA0 is end of
  342. // non-printable latin-1
  343. // supplement
  344. // characters
  345. ((hi >= 0x24F // 0x24F is the end of latin extensions
  346. && !Character.isHighSurrogate(hi))
  347. // TODO: there's probably a lot of other characters that
  348. // shouldn't be escaped that
  349. // fall outside these ranges, this is one example from the
  350. // json-ld tests
  351. )) {
  352. rval += String.format("\\u%04x", (int) hi);
  353. } else if (Character.isHighSurrogate(hi)) {
  354. final char lo = str.charAt(++i);
  355. final int c = (hi << 10) + lo + (0x10000 - (0xD800 << 10) - 0xDC00);
  356. rval += String.format("\\U%08x", c);
  357. } else {
  358. switch (hi) {
  359. case '\b':
  360. rval += "\\b";
  361. break;
  362. case '\n':
  363. rval += "\\n";
  364. break;
  365. case '\t':
  366. rval += "\\t";
  367. break;
  368. case '\f':
  369. rval += "\\f";
  370. break;
  371. case '\r':
  372. rval += "\\r";
  373. break;
  374. // case '\'':
  375. // rval += "\\'";
  376. // break;
  377. case '\"':
  378. rval += "\\\"";
  379. // rval += "\\u0022";
  380. break;
  381. case '\\':
  382. rval += "\\\\";
  383. break;
  384. default:
  385. // just put the char as is
  386. rval += hi;
  387. break;
  388. }
  389. }
  390. }
  391. return rval;
  392. }
  393. private static class Regex {
  394. // define partial regexes
  395. // final public static Pattern IRI =
  396. // Pattern.compile("(?:<([^:]+:[^>]*)>)");
  397. final public static Pattern IRI = Pattern.compile("(?:<([^>]*)>)");
  398. final public static Pattern BNODE = Pattern.compile("(_:(?:[A-Za-z][A-Za-z0-9]*))");
  399. final public static Pattern PLAIN = Pattern.compile("\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"");
  400. final public static Pattern DATATYPE = Pattern.compile("(?:\\^\\^" + IRI + ")");
  401. final public static Pattern LANGUAGE = Pattern.compile("(?:@([a-z]+(?:-[a-zA-Z0-9]+)*))");
  402. final public static Pattern LITERAL = Pattern.compile("(?:" + PLAIN + "(?:" + DATATYPE
  403. + "|" + LANGUAGE + ")?)");
  404. final public static Pattern WS = Pattern.compile("[ \\t]+");
  405. final public static Pattern WSO = Pattern.compile("[ \\t]*");
  406. final public static Pattern EOLN = Pattern.compile("(?:\r\n)|(?:\n)|(?:\r)");
  407. final public static Pattern EMPTY = Pattern.compile("^" + WSO + "$");
  408. // define quad part regexes
  409. final public static Pattern SUBJECT = Pattern.compile("(?:" + IRI + "|" + BNODE + ")" + WS);
  410. final public static Pattern PROPERTY = Pattern.compile(IRI.pattern() + WS.pattern());
  411. final public static Pattern OBJECT = Pattern.compile("(?:" + IRI + "|" + BNODE + "|"
  412. + LITERAL + ")" + WSO);
  413. final public static Pattern GRAPH = Pattern.compile("(?:\\.|(?:(?:" + IRI + "|" + BNODE
  414. + ")" + WSO + "\\.))");
  415. // full quad regex
  416. final public static Pattern QUAD = Pattern.compile("^" + WSO + SUBJECT + PROPERTY + OBJECT
  417. + GRAPH + WSO + "$");
  418. }
  419. /**
  420. * Parses RDF in the form of N-Quads.
  421. *
  422. * @param input
  423. * the N-Quads input to parse.
  424. *
  425. * @return an RDF dataset.
  426. */
  427. public static RDFDataset parseNQuads(String input) throws JSONLDProcessingError {
  428. // build RDF dataset
  429. final RDFDataset dataset = new RDFDataset();
  430. // split N-Quad input into lines
  431. final String[] lines = Regex.EOLN.split(input);
  432. int lineNumber = 0;
  433. for (final String line : lines) {
  434. lineNumber++;
  435. // skip empty lines
  436. if (Regex.EMPTY.matcher(line).matches()) {
  437. continue;
  438. }
  439. // parse quad
  440. final Matcher match = Regex.QUAD.matcher(line);
  441. if (!match.matches()) {
  442. throw new JSONLDProcessingError("Error while parsing N-Quads; invalid quad.")
  443. .setType(JSONLDProcessingError.Error.PARSE_ERROR).setDetail("line",
  444. lineNumber);
  445. }
  446. // get subject
  447. RDFDataset.Node subject;
  448. if (match.group(1) != null) {
  449. subject = new RDFDataset.IRI(unescape(match.group(1)));
  450. } else {
  451. subject = new RDFDataset.BlankNode(unescape(match.group(2)));
  452. }
  453. // get predicate
  454. final RDFDataset.Node predicate = new RDFDataset.IRI(unescape(match.group(3)));
  455. // get object
  456. RDFDataset.Node object;
  457. if (match.group(4) != null) {
  458. object = new RDFDataset.IRI(unescape(match.group(4)));
  459. } else if (match.group(5) != null) {
  460. object = new RDFDataset.BlankNode(unescape(match.group(5)));
  461. } else {
  462. final String language = unescape(match.group(8));
  463. final String datatype = match.group(7) != null ? unescape(match.group(7)) : match
  464. .group(8) != null ? RDF_LANGSTRING : XSD_STRING;
  465. final String unescaped = unescape(match.group(6));
  466. object = new RDFDataset.Literal(unescaped, datatype, language);
  467. }
  468. // get graph name ('@default' is used for the default graph)
  469. String name = "@default";
  470. if (match.group(9) != null) {
  471. name = unescape(match.group(9));
  472. } else if (match.group(10) != null) {
  473. name = unescape(match.group(10));
  474. }
  475. final RDFDataset.Quad triple = new RDFDataset.Quad(subject, predicate, object, name);
  476. // initialise graph in dataset
  477. if (!dataset.containsKey(name)) {
  478. final List<RDFDataset.Quad> tmp = new ArrayList<RDFDataset.Quad>();
  479. tmp.add(triple);
  480. dataset.put(name, tmp);
  481. }
  482. // add triple if unique to its graph
  483. else {
  484. final List<RDFDataset.Quad> triples = (List<RDFDataset.Quad>) dataset.get(name);
  485. if (!triples.contains(triple)) {
  486. triples.add(triple);
  487. }
  488. }
  489. }
  490. return dataset;
  491. }
  492. }