/src/main/java/com/cardence/lawshelf/cfr/CfrSectionTracker.java

https://bitbucket.org/cardence/legal-data-parser · Java · 245 lines · 200 code · 43 blank · 2 comment · 35 complexity · 93b6fb3f384695709d312661b14553a9 MD5 · raw file

  1. package com.cardence.lawshelf.cfr;
  2. import java.util.ArrayList;
  3. import java.util.HashMap;
  4. import java.util.Iterator;
  5. import java.util.List;
  6. import java.util.Map;
  7. import org.apache.commons.lang.ArrayUtils;
  8. import org.apache.commons.lang.StringUtils;
  9. import org.apache.commons.lang.math.NumberUtils;
  10. import org.springframework.beans.BeansException;
  11. import org.springframework.beans.factory.config.BeanDefinition;
  12. import org.springframework.context.ApplicationContext;
  13. import org.springframework.context.ApplicationContextAware;
  14. import org.springframework.context.annotation.Lazy;
  15. import org.springframework.context.annotation.Scope;
  16. import org.springframework.stereotype.Component;
  17. @Component
  18. @Scope(value = BeanDefinition.SCOPE_SINGLETON)
  19. @Lazy(value = false)
  20. public class CfrSectionTracker implements ApplicationContextAware {
  21. private static final List<String> HEADING_DELIMITERS = getHeadingSplitDelimiterList();
  22. private static final List<String> REFERENCE_DELIMITERS = getReferenceSplitDelimiterList();
  23. private enum SIDE {
  24. LEFT, RIGHT
  25. };
  26. private Map<String, String> childReferenceMapToParentReference;
  27. private Map<String, String> referenceMapToHeading;
  28. private Map<String, CfrSection> allCfrSections;
  29. private static ApplicationContext appctx;
  30. private static CfrSectionTracker me;
  31. public static CfrSectionTracker getInstance() {
  32. if (me == null) {
  33. me = appctx.getBean(CfrSectionTracker.class);
  34. }
  35. return me;
  36. }
  37. public void addNewChildHeadingForParentReference(CfrSection section, String parentReference) {
  38. final String childReference = getChildReference(section);
  39. this.getAllCfrSections().put(section.getHeading(), section);
  40. this.getReferenceMapToHeading().put(childReference, section.getHeading());
  41. this.mapChildReferenceToParentReference(childReference, parentReference);
  42. }
  43. private String getChildReference(CfrSection section) {
  44. try {
  45. return parseReferenceFromHeading(section.getHeading());
  46. } catch (Exception e) {
  47. return section.getCategoryLevel();
  48. }
  49. }
  50. public void mapChildReferenceToParentReference(String childReference, String parentReference) {
  51. if (StringUtils.isNotBlank(childReference) && StringUtils.isNotBlank(parentReference)) {
  52. this.getChildReferenceMapToParentReference().put(childReference, parentReference);
  53. }
  54. }
  55. public String getTitleTextForReference(String reference) {
  56. final String heading = this.getReferenceMapToHeading().get(reference);
  57. if (heading == null) {
  58. return null;
  59. } else {
  60. return parseTitleFromHeading(heading);
  61. }
  62. }
  63. public String getFullUniqueSourceReferenceForReference(String reference) {
  64. final String myCategoryLevel = parseCategoryLevelFromReference(reference);
  65. final String parentReference = this.getChildReferenceMapToParentReference().get(reference);
  66. if (parentReference == null) {
  67. return myCategoryLevel;
  68. } else {
  69. return appendCategoryLevelToSourceReference(myCategoryLevel,
  70. getFullUniqueSourceReferenceForReference(parentReference));
  71. }
  72. }
  73. public String getFullUniqueSourceReferenceForReferenceAndCategoryLevel(String reference, String categoryLevel) {
  74. final String parentReference = this.getChildReferenceMapToParentReference().get(reference);
  75. if (parentReference == null) {
  76. return categoryLevel;
  77. } else {
  78. return appendCategoryLevelToSourceReference(categoryLevel,
  79. getFullUniqueSourceReferenceForReference(parentReference));
  80. }
  81. }
  82. public String appendCategoryLevelToSourceReference(String categoryLevel, String sourceReference) {
  83. return sourceReference + "." + categoryLevel;
  84. }
  85. public String getHeadingForReference(String reference) {
  86. return this.getChildReferenceMapToParentReference().get(reference);
  87. }
  88. public static String parseCategoryTypeFromReference(String reference) {
  89. return translateCategoryTypeText(getLeftSideOfReference(reference));
  90. }
  91. public static String translateCategoryTypeText(String originalText) {
  92. if (StringUtils.isBlank(originalText)) {
  93. return originalText;
  94. }
  95. if (StringUtils.contains(originalText, "\u00A7") || StringUtils.contains(originalText, "&sect;")) {
  96. return "SECTION";
  97. }
  98. return originalText;
  99. }
  100. public static String parseCategoryLevelFromReference(String reference) {
  101. return getRightSideOfReference(reference);
  102. }
  103. public static String parseReferenceFromHeading(String heading) {
  104. return getLeftSideOfHeading(heading);
  105. }
  106. public static String parseTitleFromHeading(String heading) {
  107. return getRightSideOfHeading(heading);
  108. }
  109. private Map<String, String> getReferenceMapToHeading() {
  110. if (this.referenceMapToHeading == null) {
  111. this.referenceMapToHeading = new HashMap<String, String>();
  112. }
  113. return this.referenceMapToHeading;
  114. }
  115. private Map<String, String> getChildReferenceMapToParentReference() {
  116. if (this.childReferenceMapToParentReference == null) {
  117. this.childReferenceMapToParentReference = new HashMap<String, String>();
  118. }
  119. return this.childReferenceMapToParentReference;
  120. }
  121. private Map<String, CfrSection> getAllCfrSections() {
  122. if (this.allCfrSections == null) {
  123. this.allCfrSections = new HashMap<String, CfrSection>();
  124. }
  125. return this.allCfrSections;
  126. }
  127. private final static String getLeftSideOfHeading(String heading) {
  128. return getValueFromStringForSide(getHeadingLeftAndRightSides(heading), SIDE.LEFT);
  129. }
  130. private final static String getRightSideOfHeading(String heading) {
  131. return getValueFromStringForSide(getHeadingLeftAndRightSides(heading), SIDE.RIGHT);
  132. }
  133. private final static String getLeftSideOfReference(String reference) {
  134. return getValueFromStringForSide(getReferenceLeftAndRightSides(reference), SIDE.LEFT);
  135. }
  136. private final static String getRightSideOfReference(String reference) {
  137. return getValueFromStringForSide(getReferenceLeftAndRightSides(reference), SIDE.RIGHT);
  138. }
  139. private final static String getValueFromStringForSide(String[] stringParts, SIDE side) {
  140. int indexOfHeadingSplit = 0;
  141. switch (side) {
  142. case LEFT:
  143. indexOfHeadingSplit = 0;
  144. break;
  145. case RIGHT:
  146. indexOfHeadingSplit = 1;
  147. break;
  148. }
  149. if (stringParts == null) {
  150. return null;
  151. } else {
  152. String s = StringUtils.replace(stringParts[indexOfHeadingSplit], "[", "");
  153. return StringUtils.trimToEmpty(s);
  154. }
  155. }
  156. private final static String[] getHeadingLeftAndRightSides(String heading) {
  157. return splitStringWithDelimiters(heading, HEADING_DELIMITERS);
  158. }
  159. private final static String[] getReferenceLeftAndRightSides(String reference) {
  160. return splitStringWithDelimiters(reference, REFERENCE_DELIMITERS);
  161. }
  162. private final static String[] splitStringWithDelimiters(String stringToSplit, List<String> delimiters) {
  163. for (Iterator<String> it = delimiters.iterator(); it.hasNext();) {
  164. String delim = it.next();
  165. String[] headingParts = StringUtils.splitByWholeSeparator(stringToSplit, delim);
  166. if (headingParts != null && headingParts.length == 2) {
  167. return headingParts;
  168. } else if (headingParts.length > 2) {
  169. if (NumberUtils.isDigits("" + headingParts[1].charAt(0))) {
  170. final String firstPart = headingParts[0] + delim + headingParts[1];
  171. final String lastPart = StringUtils.join(ArrayUtils.subarray(headingParts, 2, headingParts.length),
  172. delim);
  173. return new String[] { firstPart, lastPart };
  174. } else {
  175. return new String[] { headingParts[0], StringUtils.substringAfter(stringToSplit, delim) };
  176. }
  177. }
  178. }
  179. throw new RuntimeException("Cannot split string " + stringToSplit + " into two parts");
  180. }
  181. private final static List<String> getHeadingSplitDelimiterList() {
  182. final ArrayList<String> delimList = new ArrayList<String>();
  183. // delimList.add("&ndash;"); // en dash
  184. delimList.add("\u2014"); // em dash
  185. // delimList.add("&mdash;"); // em dash
  186. delimList.add("\u2015"); // horizontal bar
  187. delimList.add("\u2012"); // figure dash
  188. delimList.add("\u2013"); // en dash
  189. delimList.add("-");
  190. delimList.add(" ["); // cfr uses this in reserved columns
  191. delimList.add("["); // cfr uses this in reserved columns
  192. return delimList;
  193. }
  194. private final static List<String> getReferenceSplitDelimiterList() {
  195. final ArrayList<String> delimList = new ArrayList<String>();
  196. delimList.add(" ");
  197. return delimList;
  198. }
  199. public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
  200. appctx = applicationContext;
  201. }
  202. }