/core/src/main/java/com/github/jsonldjava/impl/TurtleTripleCallback.java
Java | 375 lines | 284 code | 35 blank | 56 comment | 71 complexity | 9b1ada7f4c25525515fd3593957a06a1 MD5 | raw file
Possible License(s): BSD-3-Clause
- package com.github.jsonldjava.impl;
-
- import static com.github.jsonldjava.core.JSONLDConsts.RDF_FIRST;
- import static com.github.jsonldjava.core.JSONLDConsts.RDF_NIL;
- import static com.github.jsonldjava.core.JSONLDConsts.RDF_REST;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_BOOLEAN;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_DOUBLE;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_FLOAT;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_INTEGER;
- import static com.github.jsonldjava.core.JSONLDConsts.XSD_STRING;
-
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.LinkedHashMap;
- import java.util.LinkedHashSet;
- import java.util.List;
- import java.util.Map;
- import java.util.Map.Entry;
- import java.util.Set;
-
- import com.github.jsonldjava.core.JSONLDTripleCallback;
- import com.github.jsonldjava.core.RDFDataset;
-
- public class TurtleTripleCallback implements JSONLDTripleCallback {
-
- private static final int MAX_LINE_LENGTH = 160;
- private static final int TAB_SPACES = 4;
- private static final String COLS_KEY = "..cols.."; // this shouldn't be a
- // valid iri/bnode i
- // hope!
- final Map<String, String> availableNamespaces = new LinkedHashMap<String, String>() {
- {
- // TODO: fill with default namespaces
- }
- };
- Set<String> usedNamespaces;
-
- public TurtleTripleCallback() {
- }
-
- @Override
- public Object call(RDFDataset dataset) {
- for (final Entry<String, String> e : dataset.getNamespaces().entrySet()) {
- availableNamespaces.put(e.getValue(), e.getKey());
- }
- usedNamespaces = new LinkedHashSet<String>();
-
- final int tabs = 0;
-
- final Map<String, List<Object>> refs = new LinkedHashMap<String, List<Object>>();
- final Map<String, Map<String, List<Object>>> ttl = new LinkedHashMap<String, Map<String, List<Object>>>();
-
- for (String graphName : dataset.keySet()) {
- final List<RDFDataset.Quad> triples = dataset.getQuads(graphName);
- if ("@default".equals(graphName)) {
- graphName = null;
- }
-
- // http://www.w3.org/TR/turtle/#unlabeled-bnodes
- // TODO: implement nesting for unlabled nodes
-
- // map of what the output should look like
- // subj (or [ if bnode) > pred > obj
- // > obj (set ref if IRI)
- // > pred > obj (set ref if bnode)
- // subj > etc etc etc
-
- // subjid -> [ ref, ref, ref ]
-
- String prevSubject = "";
- String prevPredicate = "";
-
- Map<String, List<Object>> thisSubject = null;
- List<Object> thisPredicate = null;
-
- for (final RDFDataset.Quad triple : triples) {
- final String subject = triple.getSubject().getValue();
- final String predicate = triple.getPredicate().getValue();
-
- if (prevSubject.equals(subject)) {
- if (prevPredicate.equals(predicate)) {
- // nothing to do
- } else {
- // new predicate
- if (thisSubject.containsKey(predicate)) {
- thisPredicate = thisSubject.get(predicate);
- } else {
- thisPredicate = new ArrayList<Object>();
- thisSubject.put(predicate, thisPredicate);
- }
- prevPredicate = predicate;
- }
- } else {
- // new subject
- if (ttl.containsKey(subject)) {
- thisSubject = ttl.get(subject);
- } else {
- thisSubject = new LinkedHashMap<String, List<Object>>();
- ttl.put(subject, thisSubject);
- }
- if (thisSubject.containsKey(predicate)) {
- thisPredicate = thisSubject.get(predicate);
- } else {
- thisPredicate = new ArrayList<Object>();
- thisSubject.put(predicate, thisPredicate);
- }
-
- prevSubject = subject;
- prevPredicate = predicate;
- }
-
- if (triple.getObject().isLiteral()) {
- thisPredicate.add(triple.getObject());
- } else {
- final String o = triple.getObject().getValue();
- if (o.startsWith("_:")) {
- // add ref to o
- if (!refs.containsKey(o)) {
- refs.put(o, new ArrayList<Object>());
- }
- refs.get(o).add(thisPredicate);
- }
- thisPredicate.add(o);
- }
- }
- }
-
- final Map<String, List<Object>> collections = new LinkedHashMap<String, List<Object>>();
-
- final List<String> subjects = new ArrayList<String>(ttl.keySet());
- // find collections
- for (final String subj : subjects) {
- Map<String, List<Object>> preds = ttl.get(subj);
- if (preds != null && preds.containsKey(RDF_FIRST)) {
- final List<Object> col = new ArrayList<Object>();
- collections.put(subj, col);
- while (true) {
- final List<Object> first = preds.remove(RDF_FIRST);
- final Object o = first.get(0);
- col.add(o);
- // refs
- if (refs.containsKey(o)) {
- refs.get(o).remove(first);
- refs.get(o).add(col);
- }
- final String next = (String) preds.remove(RDF_REST).get(0);
- if (RDF_NIL.equals(next)) {
- // end of this list
- break;
- }
- // if collections already contains a value for "next", add
- // it to this col and break out
- if (collections.containsKey(next)) {
- col.addAll(collections.remove(next));
- break;
- }
- preds = ttl.remove(next);
- refs.remove(next);
- }
- }
- }
-
- // process refs (nesting referenced bnodes if only one reference to them
- // in the whole graph)
- for (final String id : refs.keySet()) {
- // skip items if there is more than one reference to them in the
- // graph
- if (refs.get(id).size() > 1) {
- continue;
- }
-
- // otherwise embed them into the referenced location
- Object object = ttl.remove(id);
- if (collections.containsKey(id)) {
- object = new LinkedHashMap<String, List<Object>>();
- final List<Object> tmp = new ArrayList<Object>();
- tmp.add(collections.remove(id));
- ((HashMap<String, Object>) object).put(COLS_KEY, tmp);
- }
- final List<Object> predicate = (List<Object>) refs.get(id).get(0);
- // replace the one bnode ref with the object
- predicate.set(predicate.lastIndexOf(id), object);
- }
-
- // replace the rest of the collections
- for (final String id : collections.keySet()) {
- final Map<String, List<Object>> subj = ttl.get(id);
- if (!subj.containsKey(COLS_KEY)) {
- subj.put(COLS_KEY, new ArrayList<Object>());
- }
- subj.get(COLS_KEY).add(collections.get(id));
- }
-
- // build turtle output
- final String output = generateTurtle(ttl, 0, 0, false);
-
- String prefixes = "";
- for (final String prefix : usedNamespaces) {
- final String name = availableNamespaces.get(prefix);
- prefixes += "@prefix " + name + ": <" + prefix + "> .\n";
- }
-
- return ("".equals(prefixes) ? "" : prefixes + "\n") + output;
- }
-
- private String generateObject(Object object, String sep, boolean hasNext, int indentation,
- int lineLength) {
- String rval = "";
- String obj;
- if (object instanceof String) {
- obj = getURI((String) object);
- } else if (object instanceof RDFDataset.Literal) {
- obj = ((RDFDataset.Literal) object).getValue();
- final String dt = ((RDFDataset.Literal) object).getDatatype();
- if (dt != null) {
- // TODO: this probably isn't an exclusive list of all the
- // datatype literals that can be represented as native types
- if (!(XSD_DOUBLE.equals(dt) || XSD_INTEGER.equals(dt) || XSD_FLOAT.equals(dt) || XSD_BOOLEAN
- .equals(dt))) {
- obj = "\"" + obj + "\"";
- if (!XSD_STRING.equals(dt)) {
- obj += "^^" + getURI(dt);
- }
- }
- } else {
- obj = "\"" + obj + "\"";
- final String lang = ((RDFDataset.Literal) object).getLanguage();
- if (lang != null) {
- obj += "@" + lang;
- }
- }
- } else {
- // must be an object
- final Map<String, Map<String, List<Object>>> tmp = new LinkedHashMap<String, Map<String, List<Object>>>();
- tmp.put("_:x", (Map<String, List<Object>>) object);
- obj = generateTurtle(tmp, indentation + 1, lineLength, true);
- }
-
- final int idxofcr = obj.indexOf("\n");
- // check if output will fix in the max line length (factor in comma if
- // not the last item, current line length and length to the next CR)
- if ((hasNext ? 1 : 0) + lineLength + (idxofcr != -1 ? idxofcr : obj.length()) > MAX_LINE_LENGTH) {
- rval += "\n" + tabs(indentation + 1);
- lineLength = (indentation + 1) * TAB_SPACES;
- }
- rval += obj;
- if (idxofcr != -1) {
- lineLength += (obj.length() - obj.lastIndexOf("\n"));
- } else {
- lineLength += obj.length();
- }
- if (hasNext) {
- rval += sep;
- lineLength += sep.length();
- if (lineLength < MAX_LINE_LENGTH) {
- rval += " ";
- lineLength++;
- } else {
- rval += "\n";
- }
- }
- return rval;
- }
-
- private String generateTurtle(Map<String, Map<String, List<Object>>> ttl, int indentation,
- int lineLength, boolean isObject) {
- String rval = "";
- final Iterator<String> subjIter = ttl.keySet().iterator();
- while (subjIter.hasNext()) {
- final String subject = subjIter.next();
- final Map<String, List<Object>> subjval = ttl.get(subject);
- // boolean isBlankNode = subject.startsWith("_:");
- boolean hasOpenBnodeBracket = false;
- if (subject.startsWith("_:")) {
- // only open blank node bracket the node doesn't contain any
- // collections
- if (!subjval.containsKey(COLS_KEY)) {
- rval += "[ ";
- lineLength += 2;
- hasOpenBnodeBracket = true;
- }
-
- // TODO: according to http://www.rdfabout.com/demo/validator/
- // 1) collections as objects cannot contain any predicates other
- // than rdf:first and rdf:rest
- // 2) collections cannot be surrounded with [ ]
-
- // check for collection
- if (subjval.containsKey(COLS_KEY)) {
- final List<Object> collections = subjval.remove(COLS_KEY);
- for (final Object collection : collections) {
- rval += "( ";
- lineLength += 2;
- final Iterator<Object> objIter = ((List<Object>) collection).iterator();
- while (objIter.hasNext()) {
- final Object object = objIter.next();
- rval += generateObject(object, "", objIter.hasNext(), indentation,
- lineLength);
- lineLength = rval.length() - rval.lastIndexOf("\n");
- }
- rval += " ) ";
- lineLength += 3;
- }
- }
- // check for blank node
- } else {
- rval += getURI(subject) + " ";
- lineLength += subject.length() + 1;
- }
- final Iterator<String> predIter = ttl.get(subject).keySet().iterator();
- while (predIter.hasNext()) {
- final String predicate = predIter.next();
- rval += getURI(predicate) + " ";
- lineLength += predicate.length() + 1;
- final Iterator<Object> objIter = ttl.get(subject).get(predicate).iterator();
- while (objIter.hasNext()) {
- final Object object = objIter.next();
- rval += generateObject(object, ",", objIter.hasNext(), indentation, lineLength);
- lineLength = rval.length() - rval.lastIndexOf("\n");
- }
- if (predIter.hasNext()) {
- rval += " ;\n" + tabs(indentation + 1);
- lineLength = (indentation + 1) * TAB_SPACES;
- }
- }
- if (hasOpenBnodeBracket) {
- rval += " ]";
- }
- if (!isObject) {
- rval += " .\n";
- if (subjIter.hasNext()) { // add blank space if we have another
- // object below this
- rval += "\n";
- }
- }
- }
- return rval;
- }
-
- // TODO: Assert (TAB_SPACES == 4) otherwise this needs to be edited, and
- // should fail to compile
- private String tabs(int tabs) {
- String rval = "";
- for (int i = 0; i < tabs; i++) {
- rval += " "; // using spaces for tabs
- }
- return rval;
- }
-
- /**
- * checks the URI for a prefix, and if one is found, set used prefixes to
- * true
- *
- * @param predicate
- * @return
- */
- private String getURI(String uri) {
- // check for bnode
- if (uri.startsWith("_:")) {
- // return the bnode id
- return uri;
- }
- for (final String prefix : availableNamespaces.keySet()) {
- if (uri.startsWith(prefix)) {
- usedNamespaces.add(prefix);
- // return the prefixed URI
- return availableNamespaces.get(prefix) + ":" + uri.substring(prefix.length());
- }
- }
- // return the full URI
- return "<" + uri + ">";
- }
-
- }