/core/src/main/java/com/github/jsonldjava/core/NormalizeUtils.java
Java | 573 lines | 373 code | 52 blank | 148 comment | 104 complexity | ce5a6a66b4244c818802f5a37a0d02ec MD5 | raw file
Possible License(s): BSD-3-Clause
- package com.github.jsonldjava.core;
-
- import static com.github.jsonldjava.core.RDFDatasetUtils.parseNQuads;
- import static com.github.jsonldjava.core.RDFDatasetUtils.toNQuad;
-
- import java.io.UnsupportedEncodingException;
- import java.security.MessageDigest;
- import java.security.NoSuchAlgorithmException;
- import java.util.ArrayList;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.LinkedHashMap;
- import java.util.List;
- import java.util.Map;
-
- import com.github.jsonldjava.utils.JSONUtils;
-
- class NormalizeUtils {
-
- private final UniqueNamer namer;
- private final Map<String, Object> bnodes;
- private final List<Object> quads;
- private final Options options;
-
- public NormalizeUtils(List<Object> quads, Map<String, Object> bnodes, UniqueNamer namer,
- Options options) {
- this.options = options;
- this.quads = quads;
- this.bnodes = bnodes;
- this.namer = namer;
- }
-
- // generates unique and duplicate hashes for bnodes
- public Object hashBlankNodes(Collection<String> unnamed_) throws JSONLDProcessingError {
- List<String> unnamed = new ArrayList<String>(unnamed_);
- List<String> nextUnnamed = new ArrayList<String>();
- Map<String, List<String>> duplicates = new LinkedHashMap<String, List<String>>();
- Map<String, String> unique = new LinkedHashMap<String, String>();
-
- // NOTE: not using the same structure as javascript here to avoid
- // possible stack overflows
- // hash quads for each unnamed bnode
- for (int hui = 0;; hui++) {
- if (hui == unnamed.size()) {
- // done, name blank nodes
- Boolean named = false;
- List<String> hashes = new ArrayList<String>(unique.keySet());
- Collections.sort(hashes);
- for (final String hash : hashes) {
- final String bnode = unique.get(hash);
- namer.getName(bnode);
- named = true;
- }
-
- // continue to hash bnodes if a bnode was assigned a name
- if (named) {
- // this resets the initial variables, so it seems like it
- // has to go on the stack
- // but since this is the end of the function either way, it
- // might not have to
- // hashBlankNodes(unnamed);
- hui = -1;
- unnamed = nextUnnamed;
- nextUnnamed = new ArrayList<String>();
- duplicates = new LinkedHashMap<String, List<String>>();
- unique = new LinkedHashMap<String, String>();
- continue;
- }
- // name the duplicate hash bnods
- else {
- // names duplicate hash bnodes
- // enumerate duplicate hash groups in sorted order
- hashes = new ArrayList<String>(duplicates.keySet());
- Collections.sort(hashes);
-
- // process each group
- for (int pgi = 0;; pgi++) {
- if (pgi == hashes.size()) {
- // done, create JSON-LD array
- // return createArray();
- final List<String> normalized = new ArrayList<String>();
-
- // Note: At this point all bnodes in the set of RDF
- // quads have been
- // assigned canonical names, which have been stored
- // in the 'namer' object.
- // Here each quad is updated by assigning each of
- // its bnodes its new name
- // via the 'namer' object
-
- // update bnode names in each quad and serialize
- for (int cai = 0; cai < quads.size(); ++cai) {
- final Map<String, Object> quad = (Map<String, Object>) quads
- .get(cai);
- for (final String attr : new String[] { "subject", "object", "name" }) {
- if (quad.containsKey(attr)) {
- final Map<String, Object> qa = (Map<String, Object>) quad
- .get(attr);
- if (qa != null
- && "blank node".equals(qa.get("type"))
- && ((String) qa.get("value")).indexOf("_:c14n") != 0) {
- qa.put("value",
- namer.getName((String) qa.get(("value"))));
- }
- }
- }
- normalized
- .add(toNQuad(
- (RDFDataset.Quad) quad,
- quad.containsKey("name")
- && quad.get("name") != null ? (String) ((Map<String, Object>) quad
- .get("name")).get("value") : null));
- }
-
- // sort normalized output
- Collections.sort(normalized);
-
- // handle output format
- if (options.format != null) {
- if ("application/nquads".equals(options.format)) {
- String rval = "";
- for (final String n : normalized) {
- rval += n;
- }
- return rval;
- } else {
- throw new JSONLDProcessingError("Unknown output format.")
- .setType(JSONLDProcessingError.Error.UNKNOWN_FORMAT)
- .setDetail("format", options.format);
- }
- }
- String rval = "";
- for (final String n : normalized) {
- rval += n;
- }
- return parseNQuads(rval);
- }
-
- // name each group member
- final List<String> group = duplicates.get(hashes.get(pgi));
- final List<HashResult> results = new ArrayList<HashResult>();
- for (int n = 0;; n++) {
- if (n == group.size()) {
- // name bnodes in hash order
- Collections.sort(results, new Comparator<HashResult>() {
- @Override
- public int compare(HashResult a, HashResult b) {
- final int res = a.hash.compareTo(b.hash);
- return res;
- }
- });
- for (final HashResult r : results) {
- // name all bnodes in path namer in
- // key-entry order
- // Note: key-order is preserved in
- // javascript
- for (final String key : r.pathNamer.existing().keySet()) {
- namer.getName(key);
- }
- }
- // processGroup(i+1);
- break;
- } else {
- // skip already-named bnodes
- final String bnode = group.get(n);
- if (namer.isNamed(bnode)) {
- continue;
- }
-
- // hash bnode paths
- final UniqueNamer pathNamer = new UniqueNamer("_:b");
- pathNamer.getName(bnode);
-
- final HashResult result = hashPaths(bnode, bnodes, namer, pathNamer);
- results.add(result);
- }
- }
- }
- }
- }
-
- // hash unnamed bnode
- final String bnode = unnamed.get(hui);
- final String hash = hashQuads(bnode, bnodes, namer);
-
- // store hash as unique or a duplicate
- if (duplicates.containsKey(hash)) {
- duplicates.get(hash).add(bnode);
- nextUnnamed.add(bnode);
- } else if (unique.containsKey(hash)) {
- final List<String> tmp = new ArrayList<String>();
- tmp.add(unique.get(hash));
- tmp.add(bnode);
- duplicates.put(hash, tmp);
- nextUnnamed.add(unique.get(hash));
- nextUnnamed.add(bnode);
- unique.remove(hash);
- } else {
- unique.put(hash, bnode);
- }
- }
- }
-
- private static class HashResult {
- String hash;
- UniqueNamer pathNamer;
- }
-
- /**
- * Produces a hash for the paths of adjacent bnodes for a bnode,
- * incorporating all information about its subgraph of bnodes. This method
- * will recursively pick adjacent bnode permutations that produce the
- * lexicographically-least 'path' serializations.
- *
- * @param id
- * the ID of the bnode to hash paths for.
- * @param bnodes
- * the map of bnode quads.
- * @param namer
- * the canonical bnode namer.
- * @param pathNamer
- * the namer used to assign names to adjacent bnodes.
- * @param callback
- * (err, result) called once the operation completes.
- */
- private static HashResult hashPaths(String id, Map<String, Object> bnodes, UniqueNamer namer,
- UniqueNamer pathNamer) {
- try {
- // create SHA-1 digest
- final MessageDigest md = MessageDigest.getInstance("SHA-1");
-
- final Map<String, List<String>> groups = new LinkedHashMap<String, List<String>>();
- List<String> groupHashes;
- final List<Object> quads = (List<Object>) ((Map<String, Object>) bnodes.get(id))
- .get("quads");
-
- for (int hpi = 0;; hpi++) {
- if (hpi == quads.size()) {
- // done , hash groups
- groupHashes = new ArrayList<String>(groups.keySet());
- Collections.sort(groupHashes);
- for (int hgi = 0;; hgi++) {
- if (hgi == groupHashes.size()) {
- final HashResult res = new HashResult();
- res.hash = encodeHex(md.digest());
- res.pathNamer = pathNamer;
- return res;
- }
-
- // digest group hash
- final String groupHash = groupHashes.get(hgi);
- md.update(groupHash.getBytes("UTF-8"));
-
- // choose a path and namer from the permutations
- String chosenPath = null;
- UniqueNamer chosenNamer = null;
- final Permutator permutator = new Permutator(groups.get(groupHash));
- while (true) {
- Boolean contPermutation = false;
- Boolean breakOut = false;
- final List<String> permutation = permutator.next();
- UniqueNamer pathNamerCopy = pathNamer.clone();
-
- // build adjacent path
- String path = "";
- final List<String> recurse = new ArrayList<String>();
- for (final String bnode : permutation) {
- // use canonical name if available
- if (namer.isNamed(bnode)) {
- path += namer.getName(bnode);
- } else {
- // recurse if bnode isn't named in the path
- // yet
- if (!pathNamerCopy.isNamed(bnode)) {
- recurse.add(bnode);
- }
- path += pathNamerCopy.getName(bnode);
- }
-
- // skip permutation if path is already >= chosen
- // path
- if (chosenPath != null && path.length() >= chosenPath.length()
- && path.compareTo(chosenPath) > 0) {
- // return nextPermutation(true);
- if (permutator.hasNext()) {
- contPermutation = true;
- } else {
- // digest chosen path and update namer
- md.update(chosenPath.getBytes("UTF-8"));
- pathNamer = chosenNamer;
- // hash the nextGroup
- breakOut = true;
- }
- break;
- }
- }
-
- // if we should do the next permutation
- if (contPermutation) {
- continue;
- }
- // if we should stop processing this group
- if (breakOut) {
- break;
- }
-
- // does the next recursion
- for (int nrn = 0;; nrn++) {
- if (nrn == recurse.size()) {
- // return nextPermutation(false);
- if (chosenPath == null || path.compareTo(chosenPath) < 0) {
- chosenPath = path;
- chosenNamer = pathNamerCopy;
- }
- if (!permutator.hasNext()) {
- // digest chosen path and update namer
- md.update(chosenPath.getBytes("UTF-8"));
- pathNamer = chosenNamer;
- // hash the nextGroup
- breakOut = true;
- }
- break;
- }
-
- // do recursion
- final String bnode = recurse.get(nrn);
- final HashResult result = hashPaths(bnode, bnodes, namer,
- pathNamerCopy);
- path += pathNamerCopy.getName(bnode) + "<" + result.hash + ">";
- pathNamerCopy = result.pathNamer;
-
- // skip permutation if path is already >= chosen
- // path
- if (chosenPath != null && path.length() >= chosenPath.length()
- && path.compareTo(chosenPath) > 0) {
- // return nextPermutation(true);
- if (!permutator.hasNext()) {
- // digest chosen path and update namer
- md.update(chosenPath.getBytes("UTF-8"));
- pathNamer = chosenNamer;
- // hash the nextGroup
- breakOut = true;
- }
- break;
- }
- // do next recursion
- }
-
- // if we should stop processing this group
- if (breakOut) {
- break;
- }
- }
- }
- }
-
- // get adjacent bnode
- final Map<String, Object> quad = (Map<String, Object>) quads.get(hpi);
- String bnode = getAdjacentBlankNodeName((Map<String, Object>) quad.get("subject"),
- id);
- String direction = null;
- if (bnode != null) {
- // normal property
- direction = "p";
- } else {
- bnode = getAdjacentBlankNodeName((Map<String, Object>) quad.get("object"), id);
- if (bnode != null) {
- // reverse property
- direction = "r";
- }
- }
-
- if (bnode != null) {
- // get bnode name (try canonical, path, then hash)
- String name;
- if (namer.isNamed(bnode)) {
- name = namer.getName(bnode);
- } else if (pathNamer.isNamed(bnode)) {
- name = pathNamer.getName(bnode);
- } else {
- name = hashQuads(bnode, bnodes, namer);
- }
-
- // hash direction, property, end bnode name/hash
- final MessageDigest md1 = MessageDigest.getInstance("SHA-1");
- // String toHash = direction + (String) ((Map<String,
- // Object>) quad.get("predicate")).get("value") + name;
- md1.update(direction.getBytes("UTF-8"));
- md1.update(((String) ((Map<String, Object>) quad.get("predicate")).get("value"))
- .getBytes("UTF-8"));
- md1.update(name.getBytes("UTF-8"));
- final String groupHash = encodeHex(md1.digest());
- if (groups.containsKey(groupHash)) {
- groups.get(groupHash).add(bnode);
- } else {
- final List<String> tmp = new ArrayList<String>();
- tmp.add(bnode);
- groups.put(groupHash, tmp);
- }
- }
- }
- } catch (final NoSuchAlgorithmException e) {
- // TODO: i don't expect that SHA-1 is even NOT going to be
- // available?
- // look into this further
- throw new RuntimeException(e);
- } catch (final UnsupportedEncodingException e) {
- // TODO: i don't expect that UTF-8 is ever not going to be available
- // either
- throw new RuntimeException(e);
- }
- }
-
- /**
- * Hashes all of the quads about a blank node.
- *
- * @param id
- * the ID of the bnode to hash quads for.
- * @param bnodes
- * the mapping of bnodes to quads.
- * @param namer
- * the canonical bnode namer.
- *
- * @return the new hash.
- */
- private static String hashQuads(String id, Map<String, Object> bnodes, UniqueNamer namer) {
- // return cached hash
- if (((Map<String, Object>) bnodes.get(id)).containsKey("hash")) {
- return (String) ((Map<String, Object>) bnodes.get(id)).get("hash");
- }
-
- // serialize all of bnode's quads
- final List<Map<String, Object>> quads = (List<Map<String, Object>>) ((Map<String, Object>) bnodes
- .get(id)).get("quads");
- final List<String> nquads = new ArrayList<String>();
- for (int i = 0; i < quads.size(); ++i) {
- nquads.add(toNQuad((RDFDataset.Quad) quads.get(i),
- quads.get(i).get("name") != null ? (String) ((Map<String, Object>) quads.get(i)
- .get("name")).get("value") : null, id));
- }
- // sort serialized quads
- Collections.sort(nquads);
- // return hashed quads
- final String hash = sha1hash(nquads);
- ((Map<String, Object>) bnodes.get(id)).put("hash", hash);
- return hash;
- }
-
- /**
- * A helper class to sha1 hash all the strings in a collection
- *
- * @param nquads
- * @return
- */
- private static String sha1hash(Collection<String> nquads) {
- try {
- // create SHA-1 digest
- final MessageDigest md = MessageDigest.getInstance("SHA-1");
- for (final String nquad : nquads) {
- md.update(nquad.getBytes("UTF-8"));
- }
- return encodeHex(md.digest());
- } catch (final NoSuchAlgorithmException e) {
- throw new RuntimeException(e);
- } catch (final UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
- }
-
- // TODO: this is something to optimize
- private static String encodeHex(final byte[] data) {
- String rval = "";
- for (final byte b : data) {
- rval += String.format("%02x", b);
- }
- return rval;
- }
-
- /**
- * A helper function that gets the blank node name from an RDF quad node
- * (subject or object). If the node is a blank node and its value does not
- * match the given blank node ID, it will be returned.
- *
- * @param node
- * the RDF quad node.
- * @param id
- * the ID of the blank node to look next to.
- *
- * @return the adjacent blank node name or null if none was found.
- */
- private static String getAdjacentBlankNodeName(Map<String, Object> node, String id) {
- return "blank node".equals(node.get("type"))
- && (!node.containsKey("value") || !JSONUtils.equals(node.get("value"), id)) ? (String) node
- .get("value") : null;
- }
-
- private static class Permutator {
-
- private final List<String> list;
- private boolean done;
- private final Map<String, Boolean> left;
-
- public Permutator(List<String> list) {
- this.list = (List<String>) JSONLDUtils.clone(list);
- Collections.sort(this.list);
- this.done = false;
- this.left = new LinkedHashMap<String, Boolean>();
- for (final String i : this.list) {
- this.left.put(i, true);
- }
- }
-
- /**
- * Returns true if there is another permutation.
- *
- * @return true if there is another permutation, false if not.
- */
- public boolean hasNext() {
- return !this.done;
- }
-
- /**
- * Gets the next permutation. Call hasNext() to ensure there is another
- * one first.
- *
- * @return the next permutation.
- */
- public List<String> next() {
- final List<String> rval = (List<String>) JSONLDUtils.clone(this.list);
-
- // Calculate the next permutation using Steinhaus-Johnson-Trotter
- // permutation algoritm
-
- // get largest mobile element k
- // (mobile: element is grater than the one it is looking at)
- String k = null;
- int pos = 0;
- final int length = this.list.size();
- for (int i = 0; i < length; ++i) {
- final String element = this.list.get(i);
- final Boolean left = this.left.get(element);
- if ((k == null || element.compareTo(k) > 0)
- && ((left && i > 0 && element.compareTo(this.list.get(i - 1)) > 0) || (!left
- && i < (length - 1) && element.compareTo(this.list.get(i + 1)) > 0))) {
- k = element;
- pos = i;
- }
- }
-
- // no more permutations
- if (k == null) {
- this.done = true;
- } else {
- // swap k and the element it is looking at
- final int swap = this.left.get(k) ? pos - 1 : pos + 1;
- this.list.set(pos, this.list.get(swap));
- this.list.set(swap, k);
-
- // reverse the direction of all element larger than k
- for (int i = 0; i < length; i++) {
- if (this.list.get(i).compareTo(k) > 0) {
- this.left.put(this.list.get(i), !this.left.get(this.list.get(i)));
- }
- }
- }
-
- return rval;
- }
-
- }
-
- }