RDFDatasetUtils.java

/core/src/main/java/com/github/jsonldjava/core/RDFDatasetUtils.java

http://github.com/tristan/jsonld-java
Java | 537 lines | 411 code | 44 blank | 82 comment | 91 complexity | 8c1f659bf568f08112c96deb5cb32d80 MD5 | raw file
Possible License(s): BSD-3-Clause

package com.github.jsonldjava.core;



import static com.github.jsonldjava.core.JSONLDConsts.RDF_FIRST;

import static com.github.jsonldjava.core.JSONLDConsts.RDF_LANGSTRING;

import static com.github.jsonldjava.core.JSONLDConsts.RDF_NIL;

import static com.github.jsonldjava.core.JSONLDConsts.RDF_REST;

import static com.github.jsonldjava.core.JSONLDConsts.RDF_TYPE;

import static com.github.jsonldjava.core.JSONLDConsts.XSD_BOOLEAN;

import static com.github.jsonldjava.core.JSONLDConsts.XSD_DOUBLE;

import static com.github.jsonldjava.core.JSONLDConsts.XSD_INTEGER;

import static com.github.jsonldjava.core.JSONLDConsts.XSD_STRING;

import static com.github.jsonldjava.core.JSONLDUtils.isKeyword;

import static com.github.jsonldjava.core.JSONLDUtils.isList;

import static com.github.jsonldjava.core.JSONLDUtils.isObject;

import static com.github.jsonldjava.core.JSONLDUtils.isValue;

import static com.github.jsonldjava.core.Regex.HEX;



import java.text.DecimalFormat;

import java.util.ArrayList;

import java.util.Collections;

import java.util.LinkedHashMap;

import java.util.List;

import java.util.Map;

import java.util.regex.Matcher;

import java.util.regex.Pattern;



public class RDFDatasetUtils {



    /**

     * Creates an array of RDF triples for the given graph.

     * 

     * @param graph

     *            the graph to create RDF triples for.

     * @param namer

     *            a UniqueNamer for assigning blank node names.

     * 

     * @return the array of RDF triples for the given graph.

     */

    @Deprecated

    // use RDFDataset.graphToRDF

    static List<Object> graphToRDF(Map<String, Object> graph, UniqueNamer namer) {

        final List<Object> rval = new ArrayList<Object>();

        for (final String id : graph.keySet()) {

            final Map<String, Object> node = (Map<String, Object>) graph.get(id);

            final List<String> properties = new ArrayList<String>(node.keySet());

            Collections.sort(properties);

            for (String property : properties) {

                final Object items = node.get(property);

                if ("@type".equals(property)) {

                    property = RDF_TYPE;

                } else if (isKeyword(property)) {

                    continue;

                }



                for (final Object item : (List<Object>) items) {

                    // RDF subjects

                    final Map<String, Object> subject = new LinkedHashMap<String, Object>();

                    if (id.indexOf("_:") == 0) {

                        subject.put("type", "blank node");

                        subject.put("value", namer.getName(id));

                    } else {

                        subject.put("type", "IRI");

                        subject.put("value", id);

                    }



                    // RDF predicates

                    final Map<String, Object> predicate = new LinkedHashMap<String, Object>();

                    predicate.put("type", "IRI");

                    predicate.put("value", property);



                    // convert @list to triples

                    if (isList(item)) {

                        listToRDF((List<Object>) ((Map<String, Object>) item).get("@list"), namer,

                                subject, predicate, rval);

                    }

                    // convert value or node object to triple

                    else {

                        final Object object = objectToRDF(item, namer);

                        final Map<String, Object> tmp = new LinkedHashMap<String, Object>();

                        tmp.put("subject", subject);

                        tmp.put("predicate", predicate);

                        tmp.put("object", object);

                        rval.add(tmp);

                    }

                }

            }

        }



        return rval;

    }



    /**

     * Converts a @list value into linked list of blank node RDF triples (an RDF

     * collection).

     * 

     * @param list

     *            the @list value.

     * @param namer

     *            a UniqueNamer for assigning blank node names.

     * @param subject

     *            the subject for the head of the list.

     * @param predicate

     *            the predicate for the head of the list.

     * @param triples

     *            the array of triples to append to.

     */

    private static void listToRDF(List<Object> list, UniqueNamer namer,

            Map<String, Object> subject, Map<String, Object> predicate, List<Object> triples) {

        final Map<String, Object> first = new LinkedHashMap<String, Object>();

        first.put("type", "IRI");

        first.put("value", RDF_FIRST);

        final Map<String, Object> rest = new LinkedHashMap<String, Object>();

        rest.put("type", "IRI");

        rest.put("value", RDF_REST);

        final Map<String, Object> nil = new LinkedHashMap<String, Object>();

        nil.put("type", "IRI");

        nil.put("value", RDF_NIL);



        for (final Object item : list) {

            final Map<String, Object> blankNode = new LinkedHashMap<String, Object>();

            blankNode.put("type", "blank node");

            blankNode.put("value", namer.getName());



            {

                final Map<String, Object> tmp = new LinkedHashMap<String, Object>();

                tmp.put("subject", subject);

                tmp.put("predicate", predicate);

                tmp.put("object", blankNode);

                triples.add(tmp);

            }



            subject = blankNode;

            predicate = first;

            final Object object = objectToRDF(item, namer);



            {

                final Map<String, Object> tmp = new LinkedHashMap<String, Object>();

                tmp.put("subject", subject);

                tmp.put("predicate", predicate);

                tmp.put("object", object);

                triples.add(tmp);

            }



            predicate = rest;

        }

        final Map<String, Object> tmp = new LinkedHashMap<String, Object>();

        tmp.put("subject", subject);

        tmp.put("predicate", predicate);

        tmp.put("object", nil);

        triples.add(tmp);

    }



    /**

     * Converts a JSON-LD value object to an RDF literal or a JSON-LD string or

     * node object to an RDF resource.

     * 

     * @param item

     *            the JSON-LD value or node object.

     * @param namer

     *            the UniqueNamer to use to assign blank node names.

     * 

     * @return the RDF literal or RDF resource.

     */

    private static Object objectToRDF(Object item, UniqueNamer namer) {

        final Map<String, Object> object = new LinkedHashMap<String, Object>();



        // convert value object to RDF

        if (isValue(item)) {

            object.put("type", "literal");

            final Object value = ((Map<String, Object>) item).get("@value");

            final Object datatype = ((Map<String, Object>) item).get("@type");



            // convert to XSD datatypes as appropriate

            if (value instanceof Boolean || value instanceof Number) {

                // convert to XSD datatype

                if (value instanceof Boolean) {

                    object.put("value", value.toString());

                    object.put("datatype", datatype == null ? XSD_BOOLEAN : datatype);

                } else if (value instanceof Double || value instanceof Float) {

                    // canonical double representation

                    final DecimalFormat df = new DecimalFormat("0.0###############E0");

                    object.put("value", df.format(value));

                    object.put("datatype", datatype == null ? XSD_DOUBLE : datatype);

                } else {

                    final DecimalFormat df = new DecimalFormat("0");

                    object.put("value", df.format(value));

                    object.put("datatype", datatype == null ? XSD_INTEGER : datatype);

                }

            } else if (((Map<String, Object>) item).containsKey("@language")) {

                object.put("value", value);

                object.put("datatype", datatype == null ? RDF_LANGSTRING : datatype);

                object.put("language", ((Map<String, Object>) item).get("@language"));

            } else {

                object.put("value", value);

                object.put("datatype", datatype == null ? XSD_STRING : datatype);

            }

        }

        // convert string/node object to RDF

        else {

            final String id = isObject(item) ? (String) ((Map<String, Object>) item).get("@id")

                    : (String) item;

            if (id.indexOf("_:") == 0) {

                object.put("type", "blank node");

                object.put("value", namer.getName(id));

            } else {

                object.put("type", "IRI");

                object.put("value", id);

            }

        }



        return object;

    }



    public static String toNQuads(RDFDataset dataset) {

        final List<String> quads = new ArrayList<String>();

        for (String graphName : dataset.graphNames()) {

            final List<RDFDataset.Quad> triples = dataset.getQuads(graphName);

            if ("@default".equals(graphName)) {

                graphName = null;

            }

            for (final RDFDataset.Quad triple : triples) {

                quads.add(toNQuad(triple, graphName));

            }

        }

        Collections.sort(quads);

        String rval = "";

        for (final String quad : quads) {

            rval += quad;

        }

        return rval;

    }



    static String toNQuad(RDFDataset.Quad triple, String graphName, String bnode) {

        final RDFDataset.Node s = triple.getSubject();

        final RDFDataset.Node p = triple.getPredicate();

        final RDFDataset.Node o = triple.getObject();



        String quad = "";



        // subject is an IRI or bnode

        if (s.isIRI()) {

            quad += "<" + escape(s.getValue()) + ">";

        }

        // normalization mode

        else if (bnode != null) {

            quad += bnode.equals(s.getValue()) ? "_:a" : "_:z";

        }

        // normal mode

        else {

            quad += s.getValue();

        }



        // predicate is always an IRI

        quad += " <" + escape(p.getValue()) + "> ";



        // object is IRI, bnode or literal

        if (o.isIRI()) {

            quad += "<" + escape(o.getValue()) + ">";

        } else if (o.isBlankNode()) {

            // normalization mode

            if (bnode != null) {

                quad += bnode.equals(o.getValue()) ? "_:a" : "_:z";

            }

            // normal mode

            else {

                quad += o.getValue();

            }

        } else {

            final String escaped = escape(o.getValue());

            quad += "\"" + escaped + "\"";

            if (RDF_LANGSTRING.equals(o.getDatatype())) {

                quad += "@" + o.getLanguage();

            } else if (!XSD_STRING.equals(o.getDatatype())) {

                quad += "^^<" + escape(o.getDatatype()) + ">";

            }

        }



        // graph

        if (graphName != null) {

            if (graphName.indexOf("_:") != 0) {

                quad += " <" + escape(graphName) + ">";

            } else if (bnode != null) {

                quad += " _:g";

            } else {

                quad += " " + graphName;

            }

        }



        quad += " .\n";

        return quad;

    }



    static String toNQuad(RDFDataset.Quad triple, String graphName) {

        return toNQuad(triple, graphName, null);

    }



    final private static Pattern UCHAR_MATCHED = Pattern.compile("\\u005C(?:([tbnrf\\\"'])|(?:u("

            + HEX + "{4}))|(?:U(" + HEX + "{8})))");



    public static String unescape(String str) {

        String rval = str;

        if (str != null) {

            final Matcher m = UCHAR_MATCHED.matcher(str);

            while (m.find()) {

                String uni = m.group(0);

                if (m.group(1) == null) {

                    final String hex = m.group(2) != null ? m.group(2) : m.group(3);

                    final int v = Integer.parseInt(hex, 16);// hex =

                                                            // hex.replaceAll("^(?:00)+",

                                                            // "");

                    if (v > 0xFFFF) {

                        // deal with UTF-32

                        // Integer v = Integer.parseInt(hex, 16);

                        final int vt = v - 0x10000;

                        final int vh = vt >> 10;

                        final int v1 = vt & 0x3FF;

                        final int w1 = 0xD800 + vh;

                        final int w2 = 0xDC00 + v1;



                        final StringBuffer b = new StringBuffer();

                        b.appendCodePoint(w1);

                        b.appendCodePoint(w2);

                        uni = b.toString();

                    } else {

                        uni = Character.toString((char) v);

                    }

                } else {

                    final char c = m.group(1).charAt(0);

                    switch (c) {

                    case 'b':

                        uni = "\b";

                        break;

                    case 'n':

                        uni = "\n";

                        break;

                    case 't':

                        uni = "\t";

                        break;

                    case 'f':

                        uni = "\f";

                        break;

                    case 'r':

                        uni = "\r";

                        break;

                    case '\'':

                        uni = "'";

                        break;

                    case '\"':

                        uni = "\"";

                        break;

                    case '\\':

                        uni = "\\";

                        break;

                    default:

                        // do nothing

                        continue;

                    }

                }

                final String pat = Pattern.quote(m.group(0));

                final String x = Integer.toHexString(uni.charAt(0));

                rval = rval.replaceAll(pat, uni);

            }

        }

        return rval;

    }



    public static String escape(String str) {

        String rval = "";

        for (int i = 0; i < str.length(); i++) {

            final char hi = str.charAt(i);

            if (hi <= 0x8 || hi == 0xB || hi == 0xC || (hi >= 0xE && hi <= 0x1F)

                    || (hi >= 0x7F && hi <= 0xA0) || // 0xA0 is end of

                                                     // non-printable latin-1

                                                     // supplement

                                                     // characters

                    ((hi >= 0x24F // 0x24F is the end of latin extensions

                    && !Character.isHighSurrogate(hi))

                    // TODO: there's probably a lot of other characters that

                    // shouldn't be escaped that

                    // fall outside these ranges, this is one example from the

                    // json-ld tests

                    )) {

                rval += String.format("\\u%04x", (int) hi);

            } else if (Character.isHighSurrogate(hi)) {

                final char lo = str.charAt(++i);

                final int c = (hi << 10) + lo + (0x10000 - (0xD800 << 10) - 0xDC00);

                rval += String.format("\\U%08x", c);

            } else {

                switch (hi) {

                case '\b':

                    rval += "\\b";

                    break;

                case '\n':

                    rval += "\\n";

                    break;

                case '\t':

                    rval += "\\t";

                    break;

                case '\f':

                    rval += "\\f";

                    break;

                case '\r':

                    rval += "\\r";

                    break;

                // case '\'':

                // rval += "\\'";

                // break;

                case '\"':

                    rval += "\\\"";

                    // rval += "\\u0022";

                    break;

                case '\\':

                    rval += "\\\\";

                    break;

                default:

                    // just put the char as is

                    rval += hi;

                    break;

                }

            }

        }

        return rval;

    }



    private static class Regex {

        // define partial regexes

        // final public static Pattern IRI =

        // Pattern.compile("(?:<([^:]+:[^>]*)>)");

        final public static Pattern IRI = Pattern.compile("(?:<([^>]*)>)");

        final public static Pattern BNODE = Pattern.compile("(_:(?:[A-Za-z][A-Za-z0-9]*))");

        final public static Pattern PLAIN = Pattern.compile("\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"");

        final public static Pattern DATATYPE = Pattern.compile("(?:\\^\\^" + IRI + ")");

        final public static Pattern LANGUAGE = Pattern.compile("(?:@([a-z]+(?:-[a-zA-Z0-9]+)*))");

        final public static Pattern LITERAL = Pattern.compile("(?:" + PLAIN + "(?:" + DATATYPE

                + "|" + LANGUAGE + ")?)");

        final public static Pattern WS = Pattern.compile("[ \\t]+");

        final public static Pattern WSO = Pattern.compile("[ \\t]*");

        final public static Pattern EOLN = Pattern.compile("(?:\r\n)|(?:\n)|(?:\r)");

        final public static Pattern EMPTY = Pattern.compile("^" + WSO + "$");



        // define quad part regexes

        final public static Pattern SUBJECT = Pattern.compile("(?:" + IRI + "|" + BNODE + ")" + WS);

        final public static Pattern PROPERTY = Pattern.compile(IRI.pattern() + WS.pattern());

        final public static Pattern OBJECT = Pattern.compile("(?:" + IRI + "|" + BNODE + "|"

                + LITERAL + ")" + WSO);

        final public static Pattern GRAPH = Pattern.compile("(?:\\.|(?:(?:" + IRI + "|" + BNODE

                + ")" + WSO + "\\.))");



        // full quad regex

        final public static Pattern QUAD = Pattern.compile("^" + WSO + SUBJECT + PROPERTY + OBJECT

                + GRAPH + WSO + "$");

    }



    /**

     * Parses RDF in the form of N-Quads.

     * 

     * @param input

     *            the N-Quads input to parse.

     * 

     * @return an RDF dataset.

     */

    public static RDFDataset parseNQuads(String input) throws JSONLDProcessingError {

        // build RDF dataset

        final RDFDataset dataset = new RDFDataset();



        // split N-Quad input into lines

        final String[] lines = Regex.EOLN.split(input);

        int lineNumber = 0;

        for (final String line : lines) {

            lineNumber++;



            // skip empty lines

            if (Regex.EMPTY.matcher(line).matches()) {

                continue;

            }



            // parse quad

            final Matcher match = Regex.QUAD.matcher(line);

            if (!match.matches()) {

                throw new JSONLDProcessingError("Error while parsing N-Quads; invalid quad.")

                        .setType(JSONLDProcessingError.Error.PARSE_ERROR).setDetail("line",

                                lineNumber);

            }



            // get subject

            RDFDataset.Node subject;

            if (match.group(1) != null) {

                subject = new RDFDataset.IRI(unescape(match.group(1)));

            } else {

                subject = new RDFDataset.BlankNode(unescape(match.group(2)));

            }



            // get predicate

            final RDFDataset.Node predicate = new RDFDataset.IRI(unescape(match.group(3)));



            // get object

            RDFDataset.Node object;

            if (match.group(4) != null) {

                object = new RDFDataset.IRI(unescape(match.group(4)));

            } else if (match.group(5) != null) {

                object = new RDFDataset.BlankNode(unescape(match.group(5)));

            } else {

                final String language = unescape(match.group(8));

                final String datatype = match.group(7) != null ? unescape(match.group(7)) : match

                        .group(8) != null ? RDF_LANGSTRING : XSD_STRING;

                final String unescaped = unescape(match.group(6));

                object = new RDFDataset.Literal(unescaped, datatype, language);

            }



            // get graph name ('@default' is used for the default graph)

            String name = "@default";

            if (match.group(9) != null) {

                name = unescape(match.group(9));

            } else if (match.group(10) != null) {

                name = unescape(match.group(10));

            }



            final RDFDataset.Quad triple = new RDFDataset.Quad(subject, predicate, object, name);



            // initialise graph in dataset

            if (!dataset.containsKey(name)) {

                final List<RDFDataset.Quad> tmp = new ArrayList<RDFDataset.Quad>();

                tmp.add(triple);

                dataset.put(name, tmp);

            }

            // add triple if unique to its graph

            else {

                final List<RDFDataset.Quad> triples = (List<RDFDataset.Quad>) dataset.get(name);

                if (!triples.contains(triple)) {

                    triples.add(triple);

                }

            }

        }



        return dataset;

    }

}