PageRenderTime 51ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/core/src/main/java/com/github/jsonldjava/core/NormalizeUtils.java

http://github.com/tristan/jsonld-java
Java | 573 lines | 373 code | 52 blank | 148 comment | 104 complexity | ce5a6a66b4244c818802f5a37a0d02ec MD5 | raw file
Possible License(s): BSD-3-Clause
  1. package com.github.jsonldjava.core;
  2. import static com.github.jsonldjava.core.RDFDatasetUtils.parseNQuads;
  3. import static com.github.jsonldjava.core.RDFDatasetUtils.toNQuad;
  4. import java.io.UnsupportedEncodingException;
  5. import java.security.MessageDigest;
  6. import java.security.NoSuchAlgorithmException;
  7. import java.util.ArrayList;
  8. import java.util.Collection;
  9. import java.util.Collections;
  10. import java.util.Comparator;
  11. import java.util.LinkedHashMap;
  12. import java.util.List;
  13. import java.util.Map;
  14. import com.github.jsonldjava.utils.JSONUtils;
  15. class NormalizeUtils {
  16. private final UniqueNamer namer;
  17. private final Map<String, Object> bnodes;
  18. private final List<Object> quads;
  19. private final Options options;
  20. public NormalizeUtils(List<Object> quads, Map<String, Object> bnodes, UniqueNamer namer,
  21. Options options) {
  22. this.options = options;
  23. this.quads = quads;
  24. this.bnodes = bnodes;
  25. this.namer = namer;
  26. }
  27. // generates unique and duplicate hashes for bnodes
  28. public Object hashBlankNodes(Collection<String> unnamed_) throws JSONLDProcessingError {
  29. List<String> unnamed = new ArrayList<String>(unnamed_);
  30. List<String> nextUnnamed = new ArrayList<String>();
  31. Map<String, List<String>> duplicates = new LinkedHashMap<String, List<String>>();
  32. Map<String, String> unique = new LinkedHashMap<String, String>();
  33. // NOTE: not using the same structure as javascript here to avoid
  34. // possible stack overflows
  35. // hash quads for each unnamed bnode
  36. for (int hui = 0;; hui++) {
  37. if (hui == unnamed.size()) {
  38. // done, name blank nodes
  39. Boolean named = false;
  40. List<String> hashes = new ArrayList<String>(unique.keySet());
  41. Collections.sort(hashes);
  42. for (final String hash : hashes) {
  43. final String bnode = unique.get(hash);
  44. namer.getName(bnode);
  45. named = true;
  46. }
  47. // continue to hash bnodes if a bnode was assigned a name
  48. if (named) {
  49. // this resets the initial variables, so it seems like it
  50. // has to go on the stack
  51. // but since this is the end of the function either way, it
  52. // might not have to
  53. // hashBlankNodes(unnamed);
  54. hui = -1;
  55. unnamed = nextUnnamed;
  56. nextUnnamed = new ArrayList<String>();
  57. duplicates = new LinkedHashMap<String, List<String>>();
  58. unique = new LinkedHashMap<String, String>();
  59. continue;
  60. }
  61. // name the duplicate hash bnods
  62. else {
  63. // names duplicate hash bnodes
  64. // enumerate duplicate hash groups in sorted order
  65. hashes = new ArrayList<String>(duplicates.keySet());
  66. Collections.sort(hashes);
  67. // process each group
  68. for (int pgi = 0;; pgi++) {
  69. if (pgi == hashes.size()) {
  70. // done, create JSON-LD array
  71. // return createArray();
  72. final List<String> normalized = new ArrayList<String>();
  73. // Note: At this point all bnodes in the set of RDF
  74. // quads have been
  75. // assigned canonical names, which have been stored
  76. // in the 'namer' object.
  77. // Here each quad is updated by assigning each of
  78. // its bnodes its new name
  79. // via the 'namer' object
  80. // update bnode names in each quad and serialize
  81. for (int cai = 0; cai < quads.size(); ++cai) {
  82. final Map<String, Object> quad = (Map<String, Object>) quads
  83. .get(cai);
  84. for (final String attr : new String[] { "subject", "object", "name" }) {
  85. if (quad.containsKey(attr)) {
  86. final Map<String, Object> qa = (Map<String, Object>) quad
  87. .get(attr);
  88. if (qa != null
  89. && "blank node".equals(qa.get("type"))
  90. && ((String) qa.get("value")).indexOf("_:c14n") != 0) {
  91. qa.put("value",
  92. namer.getName((String) qa.get(("value"))));
  93. }
  94. }
  95. }
  96. normalized
  97. .add(toNQuad(
  98. (RDFDataset.Quad) quad,
  99. quad.containsKey("name")
  100. && quad.get("name") != null ? (String) ((Map<String, Object>) quad
  101. .get("name")).get("value") : null));
  102. }
  103. // sort normalized output
  104. Collections.sort(normalized);
  105. // handle output format
  106. if (options.format != null) {
  107. if ("application/nquads".equals(options.format)) {
  108. String rval = "";
  109. for (final String n : normalized) {
  110. rval += n;
  111. }
  112. return rval;
  113. } else {
  114. throw new JSONLDProcessingError("Unknown output format.")
  115. .setType(JSONLDProcessingError.Error.UNKNOWN_FORMAT)
  116. .setDetail("format", options.format);
  117. }
  118. }
  119. String rval = "";
  120. for (final String n : normalized) {
  121. rval += n;
  122. }
  123. return parseNQuads(rval);
  124. }
  125. // name each group member
  126. final List<String> group = duplicates.get(hashes.get(pgi));
  127. final List<HashResult> results = new ArrayList<HashResult>();
  128. for (int n = 0;; n++) {
  129. if (n == group.size()) {
  130. // name bnodes in hash order
  131. Collections.sort(results, new Comparator<HashResult>() {
  132. @Override
  133. public int compare(HashResult a, HashResult b) {
  134. final int res = a.hash.compareTo(b.hash);
  135. return res;
  136. }
  137. });
  138. for (final HashResult r : results) {
  139. // name all bnodes in path namer in
  140. // key-entry order
  141. // Note: key-order is preserved in
  142. // javascript
  143. for (final String key : r.pathNamer.existing().keySet()) {
  144. namer.getName(key);
  145. }
  146. }
  147. // processGroup(i+1);
  148. break;
  149. } else {
  150. // skip already-named bnodes
  151. final String bnode = group.get(n);
  152. if (namer.isNamed(bnode)) {
  153. continue;
  154. }
  155. // hash bnode paths
  156. final UniqueNamer pathNamer = new UniqueNamer("_:b");
  157. pathNamer.getName(bnode);
  158. final HashResult result = hashPaths(bnode, bnodes, namer, pathNamer);
  159. results.add(result);
  160. }
  161. }
  162. }
  163. }
  164. }
  165. // hash unnamed bnode
  166. final String bnode = unnamed.get(hui);
  167. final String hash = hashQuads(bnode, bnodes, namer);
  168. // store hash as unique or a duplicate
  169. if (duplicates.containsKey(hash)) {
  170. duplicates.get(hash).add(bnode);
  171. nextUnnamed.add(bnode);
  172. } else if (unique.containsKey(hash)) {
  173. final List<String> tmp = new ArrayList<String>();
  174. tmp.add(unique.get(hash));
  175. tmp.add(bnode);
  176. duplicates.put(hash, tmp);
  177. nextUnnamed.add(unique.get(hash));
  178. nextUnnamed.add(bnode);
  179. unique.remove(hash);
  180. } else {
  181. unique.put(hash, bnode);
  182. }
  183. }
  184. }
  185. private static class HashResult {
  186. String hash;
  187. UniqueNamer pathNamer;
  188. }
  189. /**
  190. * Produces a hash for the paths of adjacent bnodes for a bnode,
  191. * incorporating all information about its subgraph of bnodes. This method
  192. * will recursively pick adjacent bnode permutations that produce the
  193. * lexicographically-least 'path' serializations.
  194. *
  195. * @param id
  196. * the ID of the bnode to hash paths for.
  197. * @param bnodes
  198. * the map of bnode quads.
  199. * @param namer
  200. * the canonical bnode namer.
  201. * @param pathNamer
  202. * the namer used to assign names to adjacent bnodes.
  203. * @param callback
  204. * (err, result) called once the operation completes.
  205. */
  206. private static HashResult hashPaths(String id, Map<String, Object> bnodes, UniqueNamer namer,
  207. UniqueNamer pathNamer) {
  208. try {
  209. // create SHA-1 digest
  210. final MessageDigest md = MessageDigest.getInstance("SHA-1");
  211. final Map<String, List<String>> groups = new LinkedHashMap<String, List<String>>();
  212. List<String> groupHashes;
  213. final List<Object> quads = (List<Object>) ((Map<String, Object>) bnodes.get(id))
  214. .get("quads");
  215. for (int hpi = 0;; hpi++) {
  216. if (hpi == quads.size()) {
  217. // done , hash groups
  218. groupHashes = new ArrayList<String>(groups.keySet());
  219. Collections.sort(groupHashes);
  220. for (int hgi = 0;; hgi++) {
  221. if (hgi == groupHashes.size()) {
  222. final HashResult res = new HashResult();
  223. res.hash = encodeHex(md.digest());
  224. res.pathNamer = pathNamer;
  225. return res;
  226. }
  227. // digest group hash
  228. final String groupHash = groupHashes.get(hgi);
  229. md.update(groupHash.getBytes("UTF-8"));
  230. // choose a path and namer from the permutations
  231. String chosenPath = null;
  232. UniqueNamer chosenNamer = null;
  233. final Permutator permutator = new Permutator(groups.get(groupHash));
  234. while (true) {
  235. Boolean contPermutation = false;
  236. Boolean breakOut = false;
  237. final List<String> permutation = permutator.next();
  238. UniqueNamer pathNamerCopy = pathNamer.clone();
  239. // build adjacent path
  240. String path = "";
  241. final List<String> recurse = new ArrayList<String>();
  242. for (final String bnode : permutation) {
  243. // use canonical name if available
  244. if (namer.isNamed(bnode)) {
  245. path += namer.getName(bnode);
  246. } else {
  247. // recurse if bnode isn't named in the path
  248. // yet
  249. if (!pathNamerCopy.isNamed(bnode)) {
  250. recurse.add(bnode);
  251. }
  252. path += pathNamerCopy.getName(bnode);
  253. }
  254. // skip permutation if path is already >= chosen
  255. // path
  256. if (chosenPath != null && path.length() >= chosenPath.length()
  257. && path.compareTo(chosenPath) > 0) {
  258. // return nextPermutation(true);
  259. if (permutator.hasNext()) {
  260. contPermutation = true;
  261. } else {
  262. // digest chosen path and update namer
  263. md.update(chosenPath.getBytes("UTF-8"));
  264. pathNamer = chosenNamer;
  265. // hash the nextGroup
  266. breakOut = true;
  267. }
  268. break;
  269. }
  270. }
  271. // if we should do the next permutation
  272. if (contPermutation) {
  273. continue;
  274. }
  275. // if we should stop processing this group
  276. if (breakOut) {
  277. break;
  278. }
  279. // does the next recursion
  280. for (int nrn = 0;; nrn++) {
  281. if (nrn == recurse.size()) {
  282. // return nextPermutation(false);
  283. if (chosenPath == null || path.compareTo(chosenPath) < 0) {
  284. chosenPath = path;
  285. chosenNamer = pathNamerCopy;
  286. }
  287. if (!permutator.hasNext()) {
  288. // digest chosen path and update namer
  289. md.update(chosenPath.getBytes("UTF-8"));
  290. pathNamer = chosenNamer;
  291. // hash the nextGroup
  292. breakOut = true;
  293. }
  294. break;
  295. }
  296. // do recursion
  297. final String bnode = recurse.get(nrn);
  298. final HashResult result = hashPaths(bnode, bnodes, namer,
  299. pathNamerCopy);
  300. path += pathNamerCopy.getName(bnode) + "<" + result.hash + ">";
  301. pathNamerCopy = result.pathNamer;
  302. // skip permutation if path is already >= chosen
  303. // path
  304. if (chosenPath != null && path.length() >= chosenPath.length()
  305. && path.compareTo(chosenPath) > 0) {
  306. // return nextPermutation(true);
  307. if (!permutator.hasNext()) {
  308. // digest chosen path and update namer
  309. md.update(chosenPath.getBytes("UTF-8"));
  310. pathNamer = chosenNamer;
  311. // hash the nextGroup
  312. breakOut = true;
  313. }
  314. break;
  315. }
  316. // do next recursion
  317. }
  318. // if we should stop processing this group
  319. if (breakOut) {
  320. break;
  321. }
  322. }
  323. }
  324. }
  325. // get adjacent bnode
  326. final Map<String, Object> quad = (Map<String, Object>) quads.get(hpi);
  327. String bnode = getAdjacentBlankNodeName((Map<String, Object>) quad.get("subject"),
  328. id);
  329. String direction = null;
  330. if (bnode != null) {
  331. // normal property
  332. direction = "p";
  333. } else {
  334. bnode = getAdjacentBlankNodeName((Map<String, Object>) quad.get("object"), id);
  335. if (bnode != null) {
  336. // reverse property
  337. direction = "r";
  338. }
  339. }
  340. if (bnode != null) {
  341. // get bnode name (try canonical, path, then hash)
  342. String name;
  343. if (namer.isNamed(bnode)) {
  344. name = namer.getName(bnode);
  345. } else if (pathNamer.isNamed(bnode)) {
  346. name = pathNamer.getName(bnode);
  347. } else {
  348. name = hashQuads(bnode, bnodes, namer);
  349. }
  350. // hash direction, property, end bnode name/hash
  351. final MessageDigest md1 = MessageDigest.getInstance("SHA-1");
  352. // String toHash = direction + (String) ((Map<String,
  353. // Object>) quad.get("predicate")).get("value") + name;
  354. md1.update(direction.getBytes("UTF-8"));
  355. md1.update(((String) ((Map<String, Object>) quad.get("predicate")).get("value"))
  356. .getBytes("UTF-8"));
  357. md1.update(name.getBytes("UTF-8"));
  358. final String groupHash = encodeHex(md1.digest());
  359. if (groups.containsKey(groupHash)) {
  360. groups.get(groupHash).add(bnode);
  361. } else {
  362. final List<String> tmp = new ArrayList<String>();
  363. tmp.add(bnode);
  364. groups.put(groupHash, tmp);
  365. }
  366. }
  367. }
  368. } catch (final NoSuchAlgorithmException e) {
  369. // TODO: i don't expect that SHA-1 is even NOT going to be
  370. // available?
  371. // look into this further
  372. throw new RuntimeException(e);
  373. } catch (final UnsupportedEncodingException e) {
  374. // TODO: i don't expect that UTF-8 is ever not going to be available
  375. // either
  376. throw new RuntimeException(e);
  377. }
  378. }
  379. /**
  380. * Hashes all of the quads about a blank node.
  381. *
  382. * @param id
  383. * the ID of the bnode to hash quads for.
  384. * @param bnodes
  385. * the mapping of bnodes to quads.
  386. * @param namer
  387. * the canonical bnode namer.
  388. *
  389. * @return the new hash.
  390. */
  391. private static String hashQuads(String id, Map<String, Object> bnodes, UniqueNamer namer) {
  392. // return cached hash
  393. if (((Map<String, Object>) bnodes.get(id)).containsKey("hash")) {
  394. return (String) ((Map<String, Object>) bnodes.get(id)).get("hash");
  395. }
  396. // serialize all of bnode's quads
  397. final List<Map<String, Object>> quads = (List<Map<String, Object>>) ((Map<String, Object>) bnodes
  398. .get(id)).get("quads");
  399. final List<String> nquads = new ArrayList<String>();
  400. for (int i = 0; i < quads.size(); ++i) {
  401. nquads.add(toNQuad((RDFDataset.Quad) quads.get(i),
  402. quads.get(i).get("name") != null ? (String) ((Map<String, Object>) quads.get(i)
  403. .get("name")).get("value") : null, id));
  404. }
  405. // sort serialized quads
  406. Collections.sort(nquads);
  407. // return hashed quads
  408. final String hash = sha1hash(nquads);
  409. ((Map<String, Object>) bnodes.get(id)).put("hash", hash);
  410. return hash;
  411. }
  412. /**
  413. * A helper class to sha1 hash all the strings in a collection
  414. *
  415. * @param nquads
  416. * @return
  417. */
  418. private static String sha1hash(Collection<String> nquads) {
  419. try {
  420. // create SHA-1 digest
  421. final MessageDigest md = MessageDigest.getInstance("SHA-1");
  422. for (final String nquad : nquads) {
  423. md.update(nquad.getBytes("UTF-8"));
  424. }
  425. return encodeHex(md.digest());
  426. } catch (final NoSuchAlgorithmException e) {
  427. throw new RuntimeException(e);
  428. } catch (final UnsupportedEncodingException e) {
  429. throw new RuntimeException(e);
  430. }
  431. }
  432. // TODO: this is something to optimize
  433. private static String encodeHex(final byte[] data) {
  434. String rval = "";
  435. for (final byte b : data) {
  436. rval += String.format("%02x", b);
  437. }
  438. return rval;
  439. }
  440. /**
  441. * A helper function that gets the blank node name from an RDF quad node
  442. * (subject or object). If the node is a blank node and its value does not
  443. * match the given blank node ID, it will be returned.
  444. *
  445. * @param node
  446. * the RDF quad node.
  447. * @param id
  448. * the ID of the blank node to look next to.
  449. *
  450. * @return the adjacent blank node name or null if none was found.
  451. */
  452. private static String getAdjacentBlankNodeName(Map<String, Object> node, String id) {
  453. return "blank node".equals(node.get("type"))
  454. && (!node.containsKey("value") || !JSONUtils.equals(node.get("value"), id)) ? (String) node
  455. .get("value") : null;
  456. }
  457. private static class Permutator {
  458. private final List<String> list;
  459. private boolean done;
  460. private final Map<String, Boolean> left;
  461. public Permutator(List<String> list) {
  462. this.list = (List<String>) JSONLDUtils.clone(list);
  463. Collections.sort(this.list);
  464. this.done = false;
  465. this.left = new LinkedHashMap<String, Boolean>();
  466. for (final String i : this.list) {
  467. this.left.put(i, true);
  468. }
  469. }
  470. /**
  471. * Returns true if there is another permutation.
  472. *
  473. * @return true if there is another permutation, false if not.
  474. */
  475. public boolean hasNext() {
  476. return !this.done;
  477. }
  478. /**
  479. * Gets the next permutation. Call hasNext() to ensure there is another
  480. * one first.
  481. *
  482. * @return the next permutation.
  483. */
  484. public List<String> next() {
  485. final List<String> rval = (List<String>) JSONLDUtils.clone(this.list);
  486. // Calculate the next permutation using Steinhaus-Johnson-Trotter
  487. // permutation algoritm
  488. // get largest mobile element k
  489. // (mobile: element is grater than the one it is looking at)
  490. String k = null;
  491. int pos = 0;
  492. final int length = this.list.size();
  493. for (int i = 0; i < length; ++i) {
  494. final String element = this.list.get(i);
  495. final Boolean left = this.left.get(element);
  496. if ((k == null || element.compareTo(k) > 0)
  497. && ((left && i > 0 && element.compareTo(this.list.get(i - 1)) > 0) || (!left
  498. && i < (length - 1) && element.compareTo(this.list.get(i + 1)) > 0))) {
  499. k = element;
  500. pos = i;
  501. }
  502. }
  503. // no more permutations
  504. if (k == null) {
  505. this.done = true;
  506. } else {
  507. // swap k and the element it is looking at
  508. final int swap = this.left.get(k) ? pos - 1 : pos + 1;
  509. this.list.set(pos, this.list.get(swap));
  510. this.list.set(swap, k);
  511. // reverse the direction of all element larger than k
  512. for (int i = 0; i < length; i++) {
  513. if (this.list.get(i).compareTo(k) > 0) {
  514. this.left.put(this.list.get(i), !this.left.get(this.list.get(i)));
  515. }
  516. }
  517. }
  518. return rval;
  519. }
  520. }
  521. }