/core/src/main/java/com/github/jsonldjava/core/RDFDatasetUtils.java
Java | 537 lines | 411 code | 44 blank | 82 comment | 91 complexity | 8c1f659bf568f08112c96deb5cb32d80 MD5 | raw file
Possible License(s): BSD-3-Clause
- package com.github.jsonldjava.core;
-
- import static com.github.jsonldjava.core.JSONLDConsts.RDF_FIRST;
- import static com.github.jsonldjava.core.JSONLDConsts.RDF_LANGSTRING;
- import static com.github.jsonldjava.core.JSONLDConsts.RDF_NIL;
- import static com.github.jsonldjava.core.JSONLDConsts.RDF_REST;
- import static com.github.jsonldjava.core.JSONLDConsts.RDF_TYPE;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_BOOLEAN;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_DOUBLE;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_INTEGER;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_STRING;
- import static com.github.jsonldjava.core.JSONLDUtils.isKeyword;
- import static com.github.jsonldjava.core.JSONLDUtils.isList;
- import static com.github.jsonldjava.core.JSONLDUtils.isObject;
- import static com.github.jsonldjava.core.JSONLDUtils.isValue;
- import static com.github.jsonldjava.core.Regex.HEX;
-
- import java.text.DecimalFormat;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.LinkedHashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
-
- public class RDFDatasetUtils {
-
- /**
- * Creates an array of RDF triples for the given graph.
- *
- * @param graph
- * the graph to create RDF triples for.
- * @param namer
- * a UniqueNamer for assigning blank node names.
- *
- * @return the array of RDF triples for the given graph.
- */
- @Deprecated
- // use RDFDataset.graphToRDF
- static List<Object> graphToRDF(Map<String, Object> graph, UniqueNamer namer) {
- final List<Object> rval = new ArrayList<Object>();
- for (final String id : graph.keySet()) {
- final Map<String, Object> node = (Map<String, Object>) graph.get(id);
- final List<String> properties = new ArrayList<String>(node.keySet());
- Collections.sort(properties);
- for (String property : properties) {
- final Object items = node.get(property);
- if ("@type".equals(property)) {
- property = RDF_TYPE;
- } else if (isKeyword(property)) {
- continue;
- }
-
- for (final Object item : (List<Object>) items) {
- // RDF subjects
- final Map<String, Object> subject = new LinkedHashMap<String, Object>();
- if (id.indexOf("_:") == 0) {
- subject.put("type", "blank node");
- subject.put("value", namer.getName(id));
- } else {
- subject.put("type", "IRI");
- subject.put("value", id);
- }
-
- // RDF predicates
- final Map<String, Object> predicate = new LinkedHashMap<String, Object>();
- predicate.put("type", "IRI");
- predicate.put("value", property);
-
- // convert @list to triples
- if (isList(item)) {
- listToRDF((List<Object>) ((Map<String, Object>) item).get("@list"), namer,
- subject, predicate, rval);
- }
- // convert value or node object to triple
- else {
- final Object object = objectToRDF(item, namer);
- final Map<String, Object> tmp = new LinkedHashMap<String, Object>();
- tmp.put("subject", subject);
- tmp.put("predicate", predicate);
- tmp.put("object", object);
- rval.add(tmp);
- }
- }
- }
- }
-
- return rval;
- }
-
- /**
- * Converts a @list value into linked list of blank node RDF triples (an RDF
- * collection).
- *
- * @param list
- * the @list value.
- * @param namer
- * a UniqueNamer for assigning blank node names.
- * @param subject
- * the subject for the head of the list.
- * @param predicate
- * the predicate for the head of the list.
- * @param triples
- * the array of triples to append to.
- */
- private static void listToRDF(List<Object> list, UniqueNamer namer,
- Map<String, Object> subject, Map<String, Object> predicate, List<Object> triples) {
- final Map<String, Object> first = new LinkedHashMap<String, Object>();
- first.put("type", "IRI");
- first.put("value", RDF_FIRST);
- final Map<String, Object> rest = new LinkedHashMap<String, Object>();
- rest.put("type", "IRI");
- rest.put("value", RDF_REST);
- final Map<String, Object> nil = new LinkedHashMap<String, Object>();
- nil.put("type", "IRI");
- nil.put("value", RDF_NIL);
-
- for (final Object item : list) {
- final Map<String, Object> blankNode = new LinkedHashMap<String, Object>();
- blankNode.put("type", "blank node");
- blankNode.put("value", namer.getName());
-
- {
- final Map<String, Object> tmp = new LinkedHashMap<String, Object>();
- tmp.put("subject", subject);
- tmp.put("predicate", predicate);
- tmp.put("object", blankNode);
- triples.add(tmp);
- }
-
- subject = blankNode;
- predicate = first;
- final Object object = objectToRDF(item, namer);
-
- {
- final Map<String, Object> tmp = new LinkedHashMap<String, Object>();
- tmp.put("subject", subject);
- tmp.put("predicate", predicate);
- tmp.put("object", object);
- triples.add(tmp);
- }
-
- predicate = rest;
- }
- final Map<String, Object> tmp = new LinkedHashMap<String, Object>();
- tmp.put("subject", subject);
- tmp.put("predicate", predicate);
- tmp.put("object", nil);
- triples.add(tmp);
- }
-
- /**
- * Converts a JSON-LD value object to an RDF literal or a JSON-LD string or
- * node object to an RDF resource.
- *
- * @param item
- * the JSON-LD value or node object.
- * @param namer
- * the UniqueNamer to use to assign blank node names.
- *
- * @return the RDF literal or RDF resource.
- */
- private static Object objectToRDF(Object item, UniqueNamer namer) {
- final Map<String, Object> object = new LinkedHashMap<String, Object>();
-
- // convert value object to RDF
- if (isValue(item)) {
- object.put("type", "literal");
- final Object value = ((Map<String, Object>) item).get("@value");
- final Object datatype = ((Map<String, Object>) item).get("@type");
-
- // convert to XSD datatypes as appropriate
- if (value instanceof Boolean || value instanceof Number) {
- // convert to XSD datatype
- if (value instanceof Boolean) {
- object.put("value", value.toString());
- object.put("datatype", datatype == null ? XSD_BOOLEAN : datatype);
- } else if (value instanceof Double || value instanceof Float) {
- // canonical double representation
- final DecimalFormat df = new DecimalFormat("0.0###############E0");
- object.put("value", df.format(value));
- object.put("datatype", datatype == null ? XSD_DOUBLE : datatype);
- } else {
- final DecimalFormat df = new DecimalFormat("0");
- object.put("value", df.format(value));
- object.put("datatype", datatype == null ? XSD_INTEGER : datatype);
- }
- } else if (((Map<String, Object>) item).containsKey("@language")) {
- object.put("value", value);
- object.put("datatype", datatype == null ? RDF_LANGSTRING : datatype);
- object.put("language", ((Map<String, Object>) item).get("@language"));
- } else {
- object.put("value", value);
- object.put("datatype", datatype == null ? XSD_STRING : datatype);
- }
- }
- // convert string/node object to RDF
- else {
- final String id = isObject(item) ? (String) ((Map<String, Object>) item).get("@id")
- : (String) item;
- if (id.indexOf("_:") == 0) {
- object.put("type", "blank node");
- object.put("value", namer.getName(id));
- } else {
- object.put("type", "IRI");
- object.put("value", id);
- }
- }
-
- return object;
- }
-
- public static String toNQuads(RDFDataset dataset) {
- final List<String> quads = new ArrayList<String>();
- for (String graphName : dataset.graphNames()) {
- final List<RDFDataset.Quad> triples = dataset.getQuads(graphName);
- if ("@default".equals(graphName)) {
- graphName = null;
- }
- for (final RDFDataset.Quad triple : triples) {
- quads.add(toNQuad(triple, graphName));
- }
- }
- Collections.sort(quads);
- String rval = "";
- for (final String quad : quads) {
- rval += quad;
- }
- return rval;
- }
-
- static String toNQuad(RDFDataset.Quad triple, String graphName, String bnode) {
- final RDFDataset.Node s = triple.getSubject();
- final RDFDataset.Node p = triple.getPredicate();
- final RDFDataset.Node o = triple.getObject();
-
- String quad = "";
-
- // subject is an IRI or bnode
- if (s.isIRI()) {
- quad += "<" + escape(s.getValue()) + ">";
- }
- // normalization mode
- else if (bnode != null) {
- quad += bnode.equals(s.getValue()) ? "_:a" : "_:z";
- }
- // normal mode
- else {
- quad += s.getValue();
- }
-
- // predicate is always an IRI
- quad += " <" + escape(p.getValue()) + "> ";
-
- // object is IRI, bnode or literal
- if (o.isIRI()) {
- quad += "<" + escape(o.getValue()) + ">";
- } else if (o.isBlankNode()) {
- // normalization mode
- if (bnode != null) {
- quad += bnode.equals(o.getValue()) ? "_:a" : "_:z";
- }
- // normal mode
- else {
- quad += o.getValue();
- }
- } else {
- final String escaped = escape(o.getValue());
- quad += "\"" + escaped + "\"";
- if (RDF_LANGSTRING.equals(o.getDatatype())) {
- quad += "@" + o.getLanguage();
- } else if (!XSD_STRING.equals(o.getDatatype())) {
- quad += "^^<" + escape(o.getDatatype()) + ">";
- }
- }
-
- // graph
- if (graphName != null) {
- if (graphName.indexOf("_:") != 0) {
- quad += " <" + escape(graphName) + ">";
- } else if (bnode != null) {
- quad += " _:g";
- } else {
- quad += " " + graphName;
- }
- }
-
- quad += " .\n";
- return quad;
- }
-
- static String toNQuad(RDFDataset.Quad triple, String graphName) {
- return toNQuad(triple, graphName, null);
- }
-
- final private static Pattern UCHAR_MATCHED = Pattern.compile("\\u005C(?:([tbnrf\\\"'])|(?:u("
- + HEX + "{4}))|(?:U(" + HEX + "{8})))");
-
- public static String unescape(String str) {
- String rval = str;
- if (str != null) {
- final Matcher m = UCHAR_MATCHED.matcher(str);
- while (m.find()) {
- String uni = m.group(0);
- if (m.group(1) == null) {
- final String hex = m.group(2) != null ? m.group(2) : m.group(3);
- final int v = Integer.parseInt(hex, 16);// hex =
- // hex.replaceAll("^(?:00)+",
- // "");
- if (v > 0xFFFF) {
- // deal with UTF-32
- // Integer v = Integer.parseInt(hex, 16);
- final int vt = v - 0x10000;
- final int vh = vt >> 10;
- final int v1 = vt & 0x3FF;
- final int w1 = 0xD800 + vh;
- final int w2 = 0xDC00 + v1;
-
- final StringBuffer b = new StringBuffer();
- b.appendCodePoint(w1);
- b.appendCodePoint(w2);
- uni = b.toString();
- } else {
- uni = Character.toString((char) v);
- }
- } else {
- final char c = m.group(1).charAt(0);
- switch (c) {
- case 'b':
- uni = "\b";
- break;
- case 'n':
- uni = "\n";
- break;
- case 't':
- uni = "\t";
- break;
- case 'f':
- uni = "\f";
- break;
- case 'r':
- uni = "\r";
- break;
- case '\'':
- uni = "'";
- break;
- case '\"':
- uni = "\"";
- break;
- case '\\':
- uni = "\\";
- break;
- default:
- // do nothing
- continue;
- }
- }
- final String pat = Pattern.quote(m.group(0));
- final String x = Integer.toHexString(uni.charAt(0));
- rval = rval.replaceAll(pat, uni);
- }
- }
- return rval;
- }
-
- public static String escape(String str) {
- String rval = "";
- for (int i = 0; i < str.length(); i++) {
- final char hi = str.charAt(i);
- if (hi <= 0x8 || hi == 0xB || hi == 0xC || (hi >= 0xE && hi <= 0x1F)
- || (hi >= 0x7F && hi <= 0xA0) || // 0xA0 is end of
- // non-printable latin-1
- // supplement
- // characters
- ((hi >= 0x24F // 0x24F is the end of latin extensions
- && !Character.isHighSurrogate(hi))
- // TODO: there's probably a lot of other characters that
- // shouldn't be escaped that
- // fall outside these ranges, this is one example from the
- // json-ld tests
- )) {
- rval += String.format("\\u%04x", (int) hi);
- } else if (Character.isHighSurrogate(hi)) {
- final char lo = str.charAt(++i);
- final int c = (hi << 10) + lo + (0x10000 - (0xD800 << 10) - 0xDC00);
- rval += String.format("\\U%08x", c);
- } else {
- switch (hi) {
- case '\b':
- rval += "\\b";
- break;
- case '\n':
- rval += "\\n";
- break;
- case '\t':
- rval += "\\t";
- break;
- case '\f':
- rval += "\\f";
- break;
- case '\r':
- rval += "\\r";
- break;
- // case '\'':
- // rval += "\\'";
- // break;
- case '\"':
- rval += "\\\"";
- // rval += "\\u0022";
- break;
- case '\\':
- rval += "\\\\";
- break;
- default:
- // just put the char as is
- rval += hi;
- break;
- }
- }
- }
- return rval;
- }
-
- private static class Regex {
- // define partial regexes
- // final public static Pattern IRI =
- // Pattern.compile("(?:<([^:]+:[^>]*)>)");
- final public static Pattern IRI = Pattern.compile("(?:<([^>]*)>)");
- final public static Pattern BNODE = Pattern.compile("(_:(?:[A-Za-z][A-Za-z0-9]*))");
- final public static Pattern PLAIN = Pattern.compile("\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"");
- final public static Pattern DATATYPE = Pattern.compile("(?:\\^\\^" + IRI + ")");
- final public static Pattern LANGUAGE = Pattern.compile("(?:@([a-z]+(?:-[a-zA-Z0-9]+)*))");
- final public static Pattern LITERAL = Pattern.compile("(?:" + PLAIN + "(?:" + DATATYPE
- + "|" + LANGUAGE + ")?)");
- final public static Pattern WS = Pattern.compile("[ \\t]+");
- final public static Pattern WSO = Pattern.compile("[ \\t]*");
- final public static Pattern EOLN = Pattern.compile("(?:\r\n)|(?:\n)|(?:\r)");
- final public static Pattern EMPTY = Pattern.compile("^" + WSO + "$");
-
- // define quad part regexes
- final public static Pattern SUBJECT = Pattern.compile("(?:" + IRI + "|" + BNODE + ")" + WS);
- final public static Pattern PROPERTY = Pattern.compile(IRI.pattern() + WS.pattern());
- final public static Pattern OBJECT = Pattern.compile("(?:" + IRI + "|" + BNODE + "|"
- + LITERAL + ")" + WSO);
- final public static Pattern GRAPH = Pattern.compile("(?:\\.|(?:(?:" + IRI + "|" + BNODE
- + ")" + WSO + "\\.))");
-
- // full quad regex
- final public static Pattern QUAD = Pattern.compile("^" + WSO + SUBJECT + PROPERTY + OBJECT
- + GRAPH + WSO + "$");
- }
-
- /**
- * Parses RDF in the form of N-Quads.
- *
- * @param input
- * the N-Quads input to parse.
- *
- * @return an RDF dataset.
- */
- public static RDFDataset parseNQuads(String input) throws JSONLDProcessingError {
- // build RDF dataset
- final RDFDataset dataset = new RDFDataset();
-
- // split N-Quad input into lines
- final String[] lines = Regex.EOLN.split(input);
- int lineNumber = 0;
- for (final String line : lines) {
- lineNumber++;
-
- // skip empty lines
- if (Regex.EMPTY.matcher(line).matches()) {
- continue;
- }
-
- // parse quad
- final Matcher match = Regex.QUAD.matcher(line);
- if (!match.matches()) {
- throw new JSONLDProcessingError("Error while parsing N-Quads; invalid quad.")
- .setType(JSONLDProcessingError.Error.PARSE_ERROR).setDetail("line",
- lineNumber);
- }
-
- // get subject
- RDFDataset.Node subject;
- if (match.group(1) != null) {
- subject = new RDFDataset.IRI(unescape(match.group(1)));
- } else {
- subject = new RDFDataset.BlankNode(unescape(match.group(2)));
- }
-
- // get predicate
- final RDFDataset.Node predicate = new RDFDataset.IRI(unescape(match.group(3)));
-
- // get object
- RDFDataset.Node object;
- if (match.group(4) != null) {
- object = new RDFDataset.IRI(unescape(match.group(4)));
- } else if (match.group(5) != null) {
- object = new RDFDataset.BlankNode(unescape(match.group(5)));
- } else {
- final String language = unescape(match.group(8));
- final String datatype = match.group(7) != null ? unescape(match.group(7)) : match
- .group(8) != null ? RDF_LANGSTRING : XSD_STRING;
- final String unescaped = unescape(match.group(6));
- object = new RDFDataset.Literal(unescaped, datatype, language);
- }
-
- // get graph name ('@default' is used for the default graph)
- String name = "@default";
- if (match.group(9) != null) {
- name = unescape(match.group(9));
- } else if (match.group(10) != null) {
- name = unescape(match.group(10));
- }
-
- final RDFDataset.Quad triple = new RDFDataset.Quad(subject, predicate, object, name);
-
- // initialise graph in dataset
- if (!dataset.containsKey(name)) {
- final List<RDFDataset.Quad> tmp = new ArrayList<RDFDataset.Quad>();
- tmp.add(triple);
- dataset.put(name, tmp);
- }
- // add triple if unique to its graph
- else {
- final List<RDFDataset.Quad> triples = (List<RDFDataset.Quad>) dataset.get(name);
- if (!triples.contains(triple)) {
- triples.add(triple);
- }
- }
- }
-
- return dataset;
- }
- }