/src/main/java/net/triptech/metahive/KeyValueIdentifier.java

https://github.com/stressfree/metahive · Java · 616 lines · 346 code · 88 blank · 182 comment · 106 complexity · 1653b99f0b729b6b224902d5b0ddbde6 MD5 · raw file

  1. /*******************************************************************************
  2. * Copyright (c) 2012 David Harrison, Triptech Ltd.
  3. * All rights reserved. This program and the accompanying materials
  4. * are made available under the terms of the GNU Public License v3.0
  5. * which accompanies this distribution, and is available at
  6. * http://www.gnu.org/licenses/gpl.html
  7. *
  8. * Contributors:
  9. * David Harrison, Triptech Ltd - initial API and implementation
  10. ******************************************************************************/
  11. package net.triptech.metahive;
  12. import java.util.ArrayList;
  13. import java.util.Collections;
  14. import java.util.HashMap;
  15. import java.util.List;
  16. import java.util.Map;
  17. import java.util.TreeMap;
  18. import net.triptech.metahive.model.KeyValueBoolean;
  19. import org.apache.commons.lang.StringUtils;
  20. /**
  21. * The Class KeyValueIdentifier.
  22. */
  23. public class KeyValueIdentifier {
  24. /**
  25. * Calculate the key value based on the newest value.
  26. *
  27. * @param values the values
  28. * @return the object
  29. */
  30. public static Object newest(final List<Object> values) {
  31. Object keyValue = null;
  32. if (values != null && values.size() > 0) {
  33. keyValue = values.get(values.size() - 1);
  34. }
  35. return keyValue;
  36. }
  37. /**
  38. * Calculate the key value based on the oldest value.
  39. *
  40. * @param values the values
  41. * @return the object
  42. */
  43. public static Object oldest(final List<Object> values) {
  44. Object keyValue = null;
  45. if (values != null && values.size() > 0) {
  46. keyValue = values.get(0);
  47. }
  48. return keyValue;
  49. }
  50. /**
  51. * Calculate the key value based on the most frequent (default to newest if none).
  52. *
  53. * @param values the values
  54. * @return the object
  55. */
  56. public static Object frequentDefaultNewest(final List<Object> values) {
  57. Object keyValue = frequent(values);
  58. if (keyValue == null) {
  59. keyValue = newest(values);
  60. } else {
  61. if (keyValue instanceof String && StringUtils.isBlank((String) keyValue)) {
  62. keyValue = newest(values);
  63. }
  64. }
  65. return keyValue;
  66. }
  67. /**
  68. * Calculate the key value based on the most frequent (default to oldest if none).
  69. *
  70. * @param values the values
  71. * @return the object
  72. */
  73. public static Object frequentDefaultOldest(final List<Object> values) {
  74. Object keyValue = frequent(values);
  75. if (keyValue == null) {
  76. keyValue = oldest(values);
  77. } else {
  78. if (keyValue instanceof String && StringUtils.isBlank((String) keyValue)) {
  79. keyValue = oldest(values);
  80. }
  81. }
  82. return keyValue;
  83. }
  84. /**
  85. * Calculate the key value by concatenating the values.
  86. *
  87. * @param values the values
  88. * @return the object
  89. */
  90. public static Object concat(final List<Object> values) {
  91. Object keyValue = null;
  92. TreeMap<String, Integer> valueSet = new TreeMap<String, Integer>();
  93. for (Object value : values) {
  94. if (value instanceof String) {
  95. valueSet.put((String) value, 0);
  96. }
  97. }
  98. if (valueSet.size() > 0) {
  99. StringBuilder sb = new StringBuilder();
  100. int count = valueSet.keySet().size();
  101. int counter = 1;
  102. for (String item : valueSet.keySet()) {
  103. if (sb.length() > 0) {
  104. if (counter == count) {
  105. sb.append(" and ");
  106. } else {
  107. sb.append(", ");
  108. }
  109. }
  110. sb.append(item);
  111. counter++;
  112. }
  113. keyValue = sb.toString();
  114. }
  115. return keyValue;
  116. }
  117. /**
  118. * Calculate the key value based on what value is unanimous.
  119. * If there is any conflict then the result is unclear.
  120. * This is only applicable to boolean type key values.
  121. *
  122. * @param values the values
  123. * @return the object
  124. */
  125. public static Object unclear(final List<Object> values) {
  126. KeyValueBoolean keyValue = null;
  127. boolean unclearKeyValue = false;
  128. for (Object value : values) {
  129. if (value instanceof KeyValueBoolean) {
  130. if (keyValue == null) {
  131. keyValue = (KeyValueBoolean) value;
  132. }
  133. if (keyValue != (KeyValueBoolean) value) {
  134. // Mismatch - default to unclear
  135. unclearKeyValue = true;
  136. }
  137. }
  138. }
  139. if (unclearKeyValue) {
  140. // Reset key value to unclear
  141. keyValue = KeyValueBoolean.BL_UNCLEAR;
  142. }
  143. return keyValue;
  144. }
  145. /**
  146. * Calculate the median value from the list of supplied values.
  147. * This assumes that the list of values are Double or KeyValueBoolean objects.
  148. * If no Double or KeyValueBoolean objects exist then null is returned.
  149. *
  150. * @param values the values
  151. * @return the object
  152. */
  153. public static Object median(final List<Object> values) {
  154. Object keyValue = null;
  155. if (values.size() > 0) {
  156. if (values.get(0) instanceof Double) {
  157. ArrayList<Double> sortedList = parseToSortedDoubleList(values);
  158. if (sortedList.size() > 0) {
  159. keyValue = getMedian(sortedList);
  160. }
  161. }
  162. if (values.get(0) instanceof KeyValueBoolean) {
  163. // Parse the list of doubles to a list of:
  164. // twos (true), ones (unclear), and zeros (false)
  165. List<Object> doubleValues = new ArrayList<Object>();
  166. for (Object object : values) {
  167. if (object instanceof KeyValueBoolean) {
  168. doubleValues.add(parseBooleanToDouble(object));
  169. }
  170. }
  171. ArrayList<Double> sortedList = parseToSortedDoubleList(doubleValues);
  172. if (sortedList.size() > 0) {
  173. keyValue = parseDoubleToBoolean(getMedian(sortedList));
  174. }
  175. }
  176. }
  177. return keyValue;
  178. }
  179. /**
  180. * Calculate the lower quartile value from the list of supplied values.
  181. * This assumes that the list of values are Double or KeyValueBoolean objects.
  182. * If no Double or KeyValueBoolean objects exist then null is returned.
  183. *
  184. * @param values the values
  185. * @return the object
  186. */
  187. public static Object quartileLower(final List<Object> values) {
  188. Object keyValue = null;
  189. if (values.size() > 0) {
  190. if (values.get(0) instanceof Double) {
  191. ArrayList<Double> sortedList = parseToSortedDoubleList(values);
  192. if (sortedList.size() > 0) {
  193. keyValue = getQuartileLower(sortedList);
  194. }
  195. }
  196. if (values.get(0) instanceof KeyValueBoolean) {
  197. // Parse the list of doubles to a list of:
  198. // twos (true), ones (unclear), and zeros (false)
  199. List<Object> doubleValues = new ArrayList<Object>();
  200. for (Object object : values) {
  201. if (object instanceof KeyValueBoolean) {
  202. doubleValues.add(parseBooleanToDouble(object));
  203. }
  204. }
  205. ArrayList<Double> sortedList = parseToSortedDoubleList(doubleValues);
  206. if (sortedList.size() > 0) {
  207. keyValue = parseDoubleToBoolean(getQuartileLower(sortedList));
  208. }
  209. }
  210. }
  211. return keyValue;
  212. }
  213. /**
  214. * Calculate the upper quartile value from the list of supplied values.
  215. * This assumes that the list of values are Double or KeyValueBoolean objects.
  216. * If no Double or KeyValueBoolean objects exist then null is returned.
  217. *
  218. * @param values the values
  219. * @return the object
  220. */
  221. public static Object quartileUpper(final List<Object> values) {
  222. Object keyValue = null;
  223. if (values.size() > 0) {
  224. if (values.get(0) instanceof Double) {
  225. ArrayList<Double> sortedList = parseToSortedDoubleList(values);
  226. if (sortedList.size() > 0) {
  227. keyValue = getQuartileUpper(sortedList);
  228. }
  229. }
  230. if (values.get(0) instanceof KeyValueBoolean) {
  231. // Parse the list of doubles to a list of:
  232. // twos (true), ones (unclear), and zeros (false)
  233. List<Object> doubleValues = new ArrayList<Object>();
  234. for (Object object : values) {
  235. if (object instanceof KeyValueBoolean) {
  236. doubleValues.add(parseBooleanToDouble(object));
  237. }
  238. }
  239. ArrayList<Double> sortedList = parseToSortedDoubleList(doubleValues);
  240. if (sortedList.size() > 0) {
  241. keyValue = parseDoubleToBoolean(getQuartileUpper(sortedList));
  242. }
  243. }
  244. }
  245. return keyValue;
  246. }
  247. /**
  248. * Calculate the total value from the list of supplied values.
  249. * This assumes that the list of values are Double objects.
  250. * If no Double objects exist in the values list then null is returned.
  251. *
  252. * @param values the values
  253. * @return the object
  254. */
  255. public static Object total(final List<Object> values) {
  256. Double keyValue = null;
  257. double runningTotal = 0;
  258. boolean valueSet = false;
  259. for (Object value : values) {
  260. if (value instanceof Double) {
  261. runningTotal += (Double) value;
  262. valueSet = true;
  263. }
  264. }
  265. if (valueSet) {
  266. // At least one valid Double value existed
  267. keyValue = runningTotal;
  268. }
  269. return keyValue;
  270. }
  271. /**
  272. * Calculate the average value from the list of supplied values.
  273. * This assumes that the list of values are Double objects.
  274. * If no Double objects exist in the values list then null is returned.
  275. *
  276. * @param values the values
  277. * @return the object
  278. */
  279. public static Object average(final List<Object> values) {
  280. Double keyValue = null;
  281. double runningTotal = 0;
  282. int count = 0;
  283. for (Object value : values) {
  284. if (value instanceof Double) {
  285. runningTotal += (Double) value;
  286. count++;
  287. }
  288. }
  289. if (count > 0) {
  290. // At least one valid Double value existed
  291. keyValue = runningTotal / count;
  292. }
  293. return keyValue;
  294. }
  295. /**
  296. * Calculate the highest value from the list of supplied values.
  297. * This assumes that the list of values are Double objects.
  298. * If no Double objects exist in the values list then null is returned. *
  299. *
  300. * @param values the values
  301. * @return the object
  302. */
  303. public static Object highest(final List<Object> values) {
  304. Double keyValue = null;
  305. ArrayList<Double> sortedList = parseToSortedDoubleList(values);
  306. if (sortedList.size() > 0) {
  307. keyValue = sortedList.get(sortedList.size() - 1);
  308. }
  309. return keyValue;
  310. }
  311. /**
  312. * Calculate the lowest value from the list of supplied values.
  313. * This assumes that the list of values are Double objects.
  314. * If no Double objects exist in the values list then null is returned. *
  315. *
  316. * @param values the values
  317. * @return the object
  318. */
  319. public static Object lowest(final List<Object> values) {
  320. Double keyValue = null;
  321. ArrayList<Double> sortedList = parseToSortedDoubleList(values);
  322. if (sortedList.size() > 0) {
  323. keyValue = sortedList.get(0);
  324. }
  325. return keyValue;
  326. }
  327. /**
  328. * Calculate the most frequent key value.
  329. *
  330. * @param values the values
  331. * @return the object
  332. */
  333. private static Object frequent(final List<Object> values) {
  334. Object keyValue = null;
  335. Map<String, Integer> hitCount = new HashMap<String, Integer>();
  336. Map<String, Object> originalCap = new HashMap<String, Object>();
  337. int maxHitCount = 0;
  338. if (values != null && values.size() > 0) {
  339. for (Object objValue : values) {
  340. String value = parseToString(objValue);
  341. int count = 0;
  342. if (!hitCount.containsKey(value.toUpperCase())) {
  343. originalCap.put(value.toUpperCase(), objValue);
  344. } else {
  345. count = hitCount.get(value.toUpperCase());
  346. }
  347. count++;
  348. if (count > maxHitCount) {
  349. maxHitCount = count;
  350. }
  351. hitCount.put(value.toUpperCase(), count);
  352. }
  353. }
  354. boolean keyValueSet = false;
  355. for (String valueKey : hitCount.keySet()) {
  356. int count = hitCount.get(valueKey);
  357. if (count == maxHitCount) {
  358. if (!keyValueSet) {
  359. keyValue = originalCap.get(valueKey);
  360. } else {
  361. // Invalidate the keyValue because there is a duplicate most frequent
  362. keyValue = null;
  363. }
  364. }
  365. }
  366. return keyValue;
  367. }
  368. /**
  369. * Parses the object value to a string.
  370. *
  371. * @param objValue the value as an object
  372. * @return the string
  373. */
  374. private static String parseToString(final Object objValue) {
  375. String value = "";
  376. if (objValue != null) {
  377. if (objValue instanceof String) {
  378. value = (String) objValue;
  379. }
  380. if (objValue instanceof Double) {
  381. value = String.valueOf((Double) objValue);
  382. }
  383. }
  384. return value;
  385. }
  386. /**
  387. * Parse the value list into a sorted double list.
  388. *
  389. * @param values the values
  390. * @return the array list
  391. */
  392. private static ArrayList<Double> parseToSortedDoubleList(final List<Object> values) {
  393. ArrayList<Double> list = new ArrayList<Double>();
  394. for (Object value : values) {
  395. if (value instanceof Double) {
  396. list.add((Double) value);
  397. }
  398. }
  399. Collections.sort(list);
  400. return list;
  401. }
  402. /**
  403. * Gets the median value from the sorted list of doubles.
  404. *
  405. * @param sortedList the sorted list
  406. * @return the median
  407. */
  408. private static double getMedian(final List<Double> sortedList) {
  409. double median = 0;
  410. if (sortedList.size() % 2 == 1) {
  411. median = sortedList.get((sortedList.size() + 1) / 2 - 1);
  412. } else {
  413. double lower = sortedList.get(sortedList.size() / 2 - 1);
  414. double upper = sortedList.get(sortedList.size() / 2);
  415. median = (lower + upper) / 2.0;
  416. }
  417. return median;
  418. }
  419. /**
  420. * Gets the lower quartile value.
  421. *
  422. * @param sortedList the sorted list
  423. * @return the quartile lower
  424. */
  425. private static double getQuartileLower(final List<Double> sortedList) {
  426. double quartileLower = 0;
  427. if (sortedList.size() > 3) {
  428. double median = getMedian(sortedList);
  429. quartileLower = getMedian(getValuesLessThan(sortedList, median));
  430. } else {
  431. // If less than three values return the first (lowest) value
  432. quartileLower = sortedList.get(0);
  433. }
  434. return quartileLower;
  435. }
  436. /**
  437. * Gets the upper quartile value.
  438. *
  439. * @param sortedList the sorted list
  440. * @return the quartile upper
  441. */
  442. private static double getQuartileUpper(final List<Double> sortedList) {
  443. double quartileUpper = 0;
  444. if (sortedList.size() > 3) {
  445. double median = getMedian(sortedList);
  446. quartileUpper = getMedian(getValuesGreaterThan(sortedList, median));
  447. } else {
  448. // If less than three values return the last (highest) value
  449. quartileUpper = sortedList.get(sortedList.size() - 1);
  450. }
  451. return quartileUpper;
  452. }
  453. /**
  454. * Gets the values greater than the supplied limit.
  455. *
  456. * @param values the values
  457. * @param limit the limit
  458. * @return the values greater than the supplied limit
  459. */
  460. private static List<Double> getValuesGreaterThan(final List<Double> values,
  461. final double limit) {
  462. List<Double> modValues = new ArrayList<Double>();
  463. for (double value : values) {
  464. if (value > limit || (value == limit)) {
  465. modValues.add(value);
  466. }
  467. }
  468. return modValues;
  469. }
  470. /**
  471. * Gets the values less than the supplied limit.
  472. *
  473. * @param values the values
  474. * @param limit the limit
  475. * @return the values less than the supplied limit
  476. */
  477. public static List<Double> getValuesLessThan(final List<Double> values,
  478. final double limit) {
  479. List<Double> modValues = new ArrayList<Double>();
  480. for (double value : values) {
  481. if (value < limit || (value == limit)) {
  482. modValues.add(value);
  483. }
  484. }
  485. return modValues;
  486. }
  487. /**
  488. * Parses the KeyValueBoolean to a double.
  489. *
  490. * @param bl the bl
  491. * @return the double
  492. */
  493. private static double parseBooleanToDouble(final Object object) {
  494. double value = 1;
  495. if (object instanceof KeyValueBoolean) {
  496. KeyValueBoolean bl = (KeyValueBoolean) object;
  497. if (bl == KeyValueBoolean.BL_TRUE) {
  498. value = 2;
  499. }
  500. if (bl == KeyValueBoolean.BL_UNCLEAR) {
  501. value = 1;
  502. }
  503. if (bl == KeyValueBoolean.BL_FALSE) {
  504. value = 0;
  505. }
  506. }
  507. return value;
  508. }
  509. /**
  510. * Parses the double to a KeyValueBoolean.
  511. *
  512. * @param value the value
  513. * @return the key value boolean
  514. */
  515. private static KeyValueBoolean parseDoubleToBoolean(final Double value) {
  516. KeyValueBoolean bl = KeyValueBoolean.BL_UNCLEAR;
  517. if (value == 2) {
  518. bl = KeyValueBoolean.BL_TRUE;
  519. }
  520. if (value == 1) {
  521. bl = KeyValueBoolean.BL_UNCLEAR;
  522. }
  523. if (value == 0) {
  524. bl = KeyValueBoolean.BL_FALSE;
  525. }
  526. return bl;
  527. }
  528. }