/src/main/java/com/cardence/lawshelf/cfr/CfrSectionTracker.java
https://bitbucket.org/cardence/legal-data-parser · Java · 245 lines · 200 code · 43 blank · 2 comment · 35 complexity · 93b6fb3f384695709d312661b14553a9 MD5 · raw file
- package com.cardence.lawshelf.cfr;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import org.apache.commons.lang.ArrayUtils;
- import org.apache.commons.lang.StringUtils;
- import org.apache.commons.lang.math.NumberUtils;
- import org.springframework.beans.BeansException;
- import org.springframework.beans.factory.config.BeanDefinition;
- import org.springframework.context.ApplicationContext;
- import org.springframework.context.ApplicationContextAware;
- import org.springframework.context.annotation.Lazy;
- import org.springframework.context.annotation.Scope;
- import org.springframework.stereotype.Component;
- @Component
- @Scope(value = BeanDefinition.SCOPE_SINGLETON)
- @Lazy(value = false)
- public class CfrSectionTracker implements ApplicationContextAware {
- private static final List<String> HEADING_DELIMITERS = getHeadingSplitDelimiterList();
- private static final List<String> REFERENCE_DELIMITERS = getReferenceSplitDelimiterList();
- private enum SIDE {
- LEFT, RIGHT
- };
- private Map<String, String> childReferenceMapToParentReference;
- private Map<String, String> referenceMapToHeading;
- private Map<String, CfrSection> allCfrSections;
- private static ApplicationContext appctx;
- private static CfrSectionTracker me;
- public static CfrSectionTracker getInstance() {
- if (me == null) {
- me = appctx.getBean(CfrSectionTracker.class);
- }
- return me;
- }
- public void addNewChildHeadingForParentReference(CfrSection section, String parentReference) {
- final String childReference = getChildReference(section);
- this.getAllCfrSections().put(section.getHeading(), section);
- this.getReferenceMapToHeading().put(childReference, section.getHeading());
- this.mapChildReferenceToParentReference(childReference, parentReference);
- }
- private String getChildReference(CfrSection section) {
- try {
- return parseReferenceFromHeading(section.getHeading());
- } catch (Exception e) {
- return section.getCategoryLevel();
- }
- }
- public void mapChildReferenceToParentReference(String childReference, String parentReference) {
- if (StringUtils.isNotBlank(childReference) && StringUtils.isNotBlank(parentReference)) {
- this.getChildReferenceMapToParentReference().put(childReference, parentReference);
- }
- }
- public String getTitleTextForReference(String reference) {
- final String heading = this.getReferenceMapToHeading().get(reference);
- if (heading == null) {
- return null;
- } else {
- return parseTitleFromHeading(heading);
- }
- }
- public String getFullUniqueSourceReferenceForReference(String reference) {
- final String myCategoryLevel = parseCategoryLevelFromReference(reference);
- final String parentReference = this.getChildReferenceMapToParentReference().get(reference);
- if (parentReference == null) {
- return myCategoryLevel;
- } else {
- return appendCategoryLevelToSourceReference(myCategoryLevel,
- getFullUniqueSourceReferenceForReference(parentReference));
- }
- }
- public String getFullUniqueSourceReferenceForReferenceAndCategoryLevel(String reference, String categoryLevel) {
- final String parentReference = this.getChildReferenceMapToParentReference().get(reference);
- if (parentReference == null) {
- return categoryLevel;
- } else {
- return appendCategoryLevelToSourceReference(categoryLevel,
- getFullUniqueSourceReferenceForReference(parentReference));
- }
- }
- public String appendCategoryLevelToSourceReference(String categoryLevel, String sourceReference) {
- return sourceReference + "." + categoryLevel;
- }
- public String getHeadingForReference(String reference) {
- return this.getChildReferenceMapToParentReference().get(reference);
- }
- public static String parseCategoryTypeFromReference(String reference) {
- return translateCategoryTypeText(getLeftSideOfReference(reference));
- }
- public static String translateCategoryTypeText(String originalText) {
- if (StringUtils.isBlank(originalText)) {
- return originalText;
- }
- if (StringUtils.contains(originalText, "\u00A7") || StringUtils.contains(originalText, "§")) {
- return "SECTION";
- }
- return originalText;
- }
- public static String parseCategoryLevelFromReference(String reference) {
- return getRightSideOfReference(reference);
- }
- public static String parseReferenceFromHeading(String heading) {
- return getLeftSideOfHeading(heading);
- }
- public static String parseTitleFromHeading(String heading) {
- return getRightSideOfHeading(heading);
- }
- private Map<String, String> getReferenceMapToHeading() {
- if (this.referenceMapToHeading == null) {
- this.referenceMapToHeading = new HashMap<String, String>();
- }
- return this.referenceMapToHeading;
- }
- private Map<String, String> getChildReferenceMapToParentReference() {
- if (this.childReferenceMapToParentReference == null) {
- this.childReferenceMapToParentReference = new HashMap<String, String>();
- }
- return this.childReferenceMapToParentReference;
- }
- private Map<String, CfrSection> getAllCfrSections() {
- if (this.allCfrSections == null) {
- this.allCfrSections = new HashMap<String, CfrSection>();
- }
- return this.allCfrSections;
- }
- private final static String getLeftSideOfHeading(String heading) {
- return getValueFromStringForSide(getHeadingLeftAndRightSides(heading), SIDE.LEFT);
- }
- private final static String getRightSideOfHeading(String heading) {
- return getValueFromStringForSide(getHeadingLeftAndRightSides(heading), SIDE.RIGHT);
- }
- private final static String getLeftSideOfReference(String reference) {
- return getValueFromStringForSide(getReferenceLeftAndRightSides(reference), SIDE.LEFT);
- }
- private final static String getRightSideOfReference(String reference) {
- return getValueFromStringForSide(getReferenceLeftAndRightSides(reference), SIDE.RIGHT);
- }
- private final static String getValueFromStringForSide(String[] stringParts, SIDE side) {
- int indexOfHeadingSplit = 0;
- switch (side) {
- case LEFT:
- indexOfHeadingSplit = 0;
- break;
- case RIGHT:
- indexOfHeadingSplit = 1;
- break;
- }
- if (stringParts == null) {
- return null;
- } else {
- String s = StringUtils.replace(stringParts[indexOfHeadingSplit], "[", "");
- return StringUtils.trimToEmpty(s);
- }
- }
- private final static String[] getHeadingLeftAndRightSides(String heading) {
- return splitStringWithDelimiters(heading, HEADING_DELIMITERS);
- }
- private final static String[] getReferenceLeftAndRightSides(String reference) {
- return splitStringWithDelimiters(reference, REFERENCE_DELIMITERS);
- }
- private final static String[] splitStringWithDelimiters(String stringToSplit, List<String> delimiters) {
- for (Iterator<String> it = delimiters.iterator(); it.hasNext();) {
- String delim = it.next();
- String[] headingParts = StringUtils.splitByWholeSeparator(stringToSplit, delim);
- if (headingParts != null && headingParts.length == 2) {
- return headingParts;
- } else if (headingParts.length > 2) {
- if (NumberUtils.isDigits("" + headingParts[1].charAt(0))) {
- final String firstPart = headingParts[0] + delim + headingParts[1];
- final String lastPart = StringUtils.join(ArrayUtils.subarray(headingParts, 2, headingParts.length),
- delim);
- return new String[] { firstPart, lastPart };
- } else {
- return new String[] { headingParts[0], StringUtils.substringAfter(stringToSplit, delim) };
- }
- }
- }
- throw new RuntimeException("Cannot split string " + stringToSplit + " into two parts");
- }
- private final static List<String> getHeadingSplitDelimiterList() {
- final ArrayList<String> delimList = new ArrayList<String>();
- // delimList.add("–"); // en dash
- delimList.add("\u2014"); // em dash
- // delimList.add("—"); // em dash
- delimList.add("\u2015"); // horizontal bar
- delimList.add("\u2012"); // figure dash
- delimList.add("\u2013"); // en dash
- delimList.add("-");
- delimList.add(" ["); // cfr uses this in reserved columns
- delimList.add("["); // cfr uses this in reserved columns
- return delimList;
- }
- private final static List<String> getReferenceSplitDelimiterList() {
- final ArrayList<String> delimList = new ArrayList<String>();
- delimList.add(" ");
- return delimList;
- }
- public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
- appctx = applicationContext;
- }
- }