/code/DomainTheory.java
Java | 1285 lines | 1118 code | 48 blank | 119 comment | 373 complexity | 2e58a1ba8b673ac985b961aeb6eb5c6c MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- import java.util.*;
- import java.io.*;
- import java.text.*;
- /** This class completes the definition of a Domain Theory in Prolog syntax. In this system, a Domain Theory is a grammar
- defining a Kinship System. This is the level at which 90% of the action takes place in Active Learning.
- <p>
- Because so many methods are defined at the Domain Theory level, the code files are broken into 4 classes:
- DT_Abstract1, DT_Abstract2, DomainTheory & Learned_DT.
- @author Gary Morris, Northern Virginia Community College garymorris2245@verizon.net
- */
- public class DomainTheory extends DT_Abstract2 {
- public static PrintWriter testOutFile;
- public static boolean SIMILARITY_ON = true;
- public static boolean LGG_ONLY = false;
- public TreeMap<String, String> kinTermSymbols;
- public int priorDTMSize = 0;
- DomainTheory(String langName, String auth, String date, boolean partl) {
- languageName = langName;
- author = auth;
- createDate = date;
- partial = partl;
- theory = new TreeMap();
- } // end of general constructor
- /* This zero-arg constructor is for use ONLY by Serialization */
- DomainTheory() {
- languageName = "";
- author = " ";
- createDate = " ";
- partial = false;
- theory = new TreeMap();
- } // end of no-arg constructor
- /* This constructor is for cloning a DomainTheory. It is intended for use in Learning & Leave One Out testing. */
- DomainTheory(DomainTheory model) {
- languageName = model.languageName;
- if (model.addressTerms) {
- languageName += "(Adr)";
- }
- ctxt = model.ctxt;
- author = model.author;
- createDate = UDate.today();
- partial = model.partial;
- addressTerms = model.addressTerms;
- polygamyOK = model.polygamyOK;
- theory = new TreeMap(model.theory);
- userDefinedProperties = model.userDefinedProperties;
- synonyms = model.synonyms;
- umbrellas = model.umbrellas;
- overlaps = model.overlaps;
- nonUmbrellas = model.nonUmbrellas;
- nonOverlaps = model.nonOverlaps;
- nonSynonyms = model.nonSynonyms;
- } // end of cloning constructor
- /* This constructor is for creating the second DT in a Context */
- DomainTheory(Context contxt, boolean adr, String auth) {
- languageName = contxt.languageName;
- if (adr) {
- languageName += "(Adr)";
- }
- ctxt = contxt;
- author = auth;
- createDate = UDate.today();
- partial = false;
- addressTerms = adr;
- polygamyOK = ctxt.polygamyPermit;
- theory = new TreeMap();
- dyadsUndefined = new DyadTMap();
- } // end of 2nd_DT constructor
- DomainTheory(String langName) {
- languageName = langName;
- author = "";
- createDate = " ";
- partial = false;
- theory = new TreeMap();
- } // end of minimal constructor
- public TreeMap<String, String> getKTSymbols() {
- Set<String> kTerms = dyadsUndefined.keySet();
- kTerms.addAll(dyadsDefined.keySet());
- int DTMsize = kTerms.size(), n = 0;
- // If there are no kin terms yet, punt.
- if (DTMsize == 0) {
- return null;
- }
- // If the number of kin terms has not changed, use existing TMap.
- if (kinTermSymbols != null && DTMsize == priorDTMSize) {
- return kinTermSymbols;
- }
- // Else, create new TMap & return it.
- priorDTMSize = DTMsize;
- kinTermSymbols = new TreeMap<String, String>();
- String[] letters = makeLetters(DTMsize);
- for (String kTm : kTerms) {
- kinTermSymbols.put(kTm, letters[n++]);
- }
- return kinTermSymbols;
- }
- public String[] makeLetters(int size) {
- String[] ltrs = new String[size];
- String[] set1 = {"", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
- "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
- "V", "W", "X", "Y", "Z"};
- String[] set2 = {"A", "B", "C", "D", "E", "F", "G", "H", "I", "J",
- "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
- "V", "W", "X", "Y", "Z"};
- int n1 = 0, n2 = 0;
- for (int i = 0; i < size; i++) {
- ltrs[i] = set1[n1] + set2[(n2++ % 26)];
- if (n2 == 26) {
- n1++;
- }
- }
- return ltrs;
- }
- public SynonymCandidate swapSingleUmbrella(Issue issue) {
- if (!(issue instanceof UmbrellaCandidate)) {
- return null;
- }
- UmbrellaCandidate cand = (UmbrellaCandidate) issue;
- if (cand.subTerms.size() != 1) {
- return null;
- }
- Iterator sub = cand.subTerms.entrySet().iterator();
- Map.Entry entry = (Map.Entry) sub.next();
- String subTerm = (String) entry.getKey();
- ArrayList<Object> supportLists = (ArrayList<Object>) entry.getValue(),
- subTermStr = (ArrayList<Object>) supportLists.get(0),
- subTermDys = (ArrayList<Object>) supportLists.get(1);
- if (!equivalentLists(cand.pcStringsCovered, subTermStr)) {
- return null;
- }
- // OK, first post this as a nonUmbrella
- if (nonUmbrellas == null) {
- nonUmbrellas = new TreeMap();
- }
- if (nonUmbrellas.get(cand.kinTerm) == null) {
- nonUmbrellas.put(cand.kinTerm, new ArrayList<Object>());
- }
- ArrayList<Object> baddies = (ArrayList<Object>) nonUmbrellas.get(cand.kinTerm);
- baddies.add(subTerm);
- // Now we need to convert this UmbrellaCandidate to a SynonymCandidate
- ArrayList<Object> qns = new ArrayList<Object>(), ktermDyads = new ArrayList<Object>();
- int order = cand.kinTerm.compareTo(subTerm);
- String termPair = (order < 0 ? cand.kinTerm + "/" + subTerm : subTerm + "/" + cand.kinTerm);
- TreeMap umbPos = null;
- if (dyadsUndefined != null) {
- umbPos = (TreeMap) dyadsUndefined.get(cand.kinTerm);
- }
- if (umbPos != null) {
- ktermDyads.addAll(harvestLeaves(umbPos));
- }
- if (dyadsDefined != null) {
- umbPos = (TreeMap) dyadsDefined.get(cand.kinTerm);
- }
- if (umbPos != null) {
- ktermDyads.addAll(harvestLeaves(umbPos));
- }
- qns.add("Could these 2 kinTerms be synonyms?");
- qns.add("Can you provide any dyads that fit one term, but not the other?");
- SynonymCandidate syn = new SynonymCandidate(cand.kinTerm, qns, termPair,
- cand.pcStringsCovered, ktermDyads, subTermDys);
- return syn;
- } // end of method swapSingleUmbrella
- public String addNoise(String kinTerm, Oracle orca, float classNoiseProbabililty, float spellNoiseProbabiliity) {
- // With probability = classNoiseProbabililty, pick an incorrect kinTerm
- // With probability = spellNoiseProbabiliity, make a 1-character activity in spelling this kinTerm
- double diceRoll = Math.random();
- int tries = 0;
- boolean keepOn = true;
- if (diceRoll <= classNoiseProbabililty) { // We'll add class (kinTerm) name noise
- ArrayList<Object> classNames = new ArrayList<Object>(theory.keySet());
- classNames.removeAll(nonTerms);
- while (keepOn) {
- int which = (int) Math.floor((Math.random() * classNames.size()));
- if (classNames.isEmpty()) {
- Context.breakpoint();
- }
- if (!((String) classNames.get(which)).equals(kinTerm)) {
- if (orca != null) {
- Oracle.NoiseRecord noiseRec = new Oracle.NoiseRecord("class", kinTerm, (String) classNames.get(which));
- orca.holder.add(noiseRec);
- }
- return (String) classNames.get(which);
- }
- if (tries++ > 20) {
- keepOn = false;
- }
- } // end of while loop
- } // end of adding class name noise
- // if we get here, no class name noise was added, so add spelling noise
- diceRoll = Math.random();
- if (diceRoll <= spellNoiseProbabiliity) {
- String noises = "xz";
- char xChar = noises.charAt(0), zChar = noises.charAt(1);
- char[] letters = kinTerm.toCharArray();
- int which = (int) Math.floor((Math.random() * letters.length));
- char victim = letters[which];
- if (victim != xChar) {
- letters[which] = xChar;
- } else {
- letters[which] = zChar;
- }
- String newTerm = new String(letters);
- if (orca != null) {
- Oracle.NoiseRecord noiseRec = new Oracle.NoiseRecord("spell", kinTerm, newTerm);
- orca.holder.add(noiseRec);
- }
- kinTerm = newTerm;
- } // end of spelling noise
- return kinTerm;
- } // end of method addNoise
- public TreeMap makeNEG(String kinTerm, TreeMap pos) {
- // At this point:
- // (a) dyads for Defined terms have already been validated & removed from dyadsUndefined
- // (b) all dyads for KNOWN synonyms have been converted to dyads for the base term
- // (c) there are no circular references in 'synonyms' or in 'umbrellas'
- // (d) we know that all overlaping pairs have the lexically-low term as key
- // POS = the subtree of dyadsUndefined for kinTerm
- // If kinTerm is not an umbrella term or subTerm, make NEG = to all dyads
- // for other terms (Defined or Undefined)
- // If kinTerm is a declared umbrella term, add to POS any dyads for known subTerms
- // (whether Defined or Undefined), and subtract them from NEG
- // If kinTerm is a declared subTerm term, subtract from NEG any dyads for the umbrella term
- // (whether Defined or Undefined)
- // If kinTerm is an overlapping term, subtract from NEG any dyads for the overlap compainion term
- // (whether Defined or Undefined)
- // 3/16/07 CHANGE: Don't add dyads for umbrella's sub-terms to umbrella's POS -- just subtract from NEG
- TreeMap neg = mergeNestedTrees(dyadsUndefined, dyadsDefined);
- neg.remove(kinTerm);
- if (umbrellas != null) {
- Iterator umbIter = umbrellas.entrySet().iterator();
- while (umbIter.hasNext()) {
- Map.Entry entry = (Map.Entry) umbIter.next();
- String umbTerm = (String) entry.getKey();
- ArrayList<Object> subTerms = (ArrayList<Object>) entry.getValue();
- if (subTerms.contains(kinTerm)) // any umbrella term encompassing kinTerm is deleted from Neg
- {
- neg.remove(umbTerm);
- } else if (umbTerm.equals(kinTerm)) // if kinTerm == umbTerm, delete its encompassed terms from Neg
- {
- for (int i = 0; i < subTerms.size(); i++) {
- neg.remove(subTerms.get(i));
- } // end of kinTerm==umbTerm
- }
- } // end of loop thru umbrella terms
- } // end of umbrellas-is-not-null
- if (overlaps != null && overlaps.get(kinTerm) != null) { // there are overlapping terms
- ArrayList<Object> oLapList = (ArrayList<Object>) overlaps.get(kinTerm);
- for (int i = 0; i < oLapList.size(); i++) {
- neg.remove(oLapList.get(i));
- }
- }
- return mergeSubTrees(neg); // NOTE: pos & neg now are both exactStr -> dyadList.
- }
- public ArrayList<Object> validateNewDyads(String kinTerm, int maxNoise) throws KSBadHornClauseException, KSNoChainOfRelations2Alter,
- KSInternalErrorException, KSConstraintInconsistency, ClassNotFoundException {
- // Confirm that all the dyads gathered for already-accepted definitions fit those definitions.
- // Detect Synonyms & Umbrellas if possible. For system-created auxiliary predicates (e.g. gen<aux>), ANY hits are noise.
- // If a few oddballs show up, refer them to User via an Anomaly.
- // if (kinTerm.equals("gagi_zhakka") || kinTerm.equals("nauwa_nani")) Context.breakpoint();
- ArrayList<Object> posHits = new ArrayList<Object>(),
- posMisses = new ArrayList<Object>(),
- bogusDyads = new ArrayList<Object>(),
- validationIssues = new ArrayList<Object>();
- if (kinTerm.indexOf("aux>") > -1) {
- return validationIssues;
- }
- KinTermDef ktd = (KinTermDef) theory.get(kinTerm);
- if (dyadsDefined == null) {
- dyadsDefined = new DyadTMap();
- }
- TreeMap defTermTM = (TreeMap) dyadsDefined.remove(kinTerm); // PC_Str => ArrayList<Object> of dyads
- TreeMap unDefTM = (TreeMap) dyadsUndefined.remove(kinTerm); // ditto
- if (defTermTM == null) {
- defTermTM = new TreeMap();
- }
- Iterator cbIter = ktd.expandedDefs.iterator();
- if (unDefTM != null) {
- while (cbIter.hasNext()) {
- ClauseBody cb = (ClauseBody) cbIter.next();
- ArrayList<Object> dyads = (ArrayList<Object>) unDefTM.get(cb.pcString);
- if (dyads != null) {
- Iterator dyIter = dyads.iterator();
- while (dyIter.hasNext()) {
- Dyad dy = (Dyad) dyIter.next();
- if (fit(cb, dy)) { // remove from unDefTM; add to defTermTM
- dyIter.remove();
- if (defTermTM.get(cb.pcString) == null) {
- defTermTM.put(cb.pcString, new ArrayList<Object>());
- }
- ((ArrayList<Object>) defTermTM.get(cb.pcString)).add(dy);
- posHits.add(dy);
- }
- } // end of while loop
- if (dyads.isEmpty()) {
- unDefTM.remove(cb.pcString); // don't leave empties behind
- }
- } // end of dyads != null
- } // end of loop thru CBs
- }
- if (!defTermTM.isEmpty()) {
- dyadsDefined.put(kinTerm, defTermTM); // put 'em back, unless empty
- }
- if (unDefTM != null && !unDefTM.isEmpty()) {
- dyadsUndefined.put(kinTerm, unDefTM);
- }
- posMisses.addAll(harvestLeaves(unDefTM));
- bogusDyads = findImposters(kinTerm, ktd.expandedDefs);
- if (posMisses.isEmpty() && bogusDyads.isEmpty()) {
- return validationIssues; // All is Well. We're done.
- }
- if (posMisses.size() > 0) { // Some "pos" dyads did not match current definition
- Iterator dyIter = posMisses.iterator();
- while (dyIter.hasNext()) {
- Dyad dy = (Dyad) dyIter.next();
- if (dy.confirmed || dy.kinTermType == 7) {
- dyIter.remove();
- } else {
- dy.challenged = true;
- }
- }
- if (posMisses.size() > 0) {
- String defArt = (posMisses.size() == 1 ? "This " : "These "),
- pl = (posMisses.size() == 1 ? "" : "s"),
- vb = (posMisses.size() == 1 ? " does" : " do"),
- phr = (posMisses.size() == 1 ? "Is this " : "Are these ");
- String question = defArt + posMisses.size() + " dyad" + pl + vb + " not match the accepted definition of "
- + kinTerm + ".\n\t\t" + phr
- + "dyad" + pl + " wrongly labeled, or is the accepted definition wrong?";
- Anomaly oops = new Anomaly(ktd, kinTerm, question, "falseNeg", posHits, posMisses, null, null);
- validationIssues.add(oops);
- }
- } // end of making Anomaly for posMisses
- if (bogusDyads.size() > 0) { // Some dyads for OTHER terms match current definition
- // Maybe they're errors. Or maybe they are a synonym or umbrella term.
- // See if there a lot or just a few per kinTerm
- TreeMap bogusTM = new TreeMap();
- for (int i = 0; i < bogusDyads.size(); i++) { // Sort 'em by kinTerm
- Dyad dy = (Dyad) bogusDyads.get(i);
- String kterm = dy.kinTerm;
- if (bogusTM.get(kterm) == null) {
- bogusTM.put(kterm, new ArrayList<Object>());
- }
- ArrayList<Object> termList = (ArrayList<Object>) bogusTM.get(kterm);
- termList.add(dy);
- } // end of loop thru dyads
- int limit = new Double(Math.ceil(posHits.size() * maxNoise / 100d)).intValue();
- Iterator iter = bogusTM.values().iterator();
- limit = Math.max(limit, MainPane.NUMBER_OF_EGOS); // NUMBER_OF_EGOS is the smallest limit allowed
- while (iter.hasNext()) { // more than 'limit' implies this is NOT noise, it's a term
- ArrayList<Object> dyLst = (ArrayList<Object>) iter.next();
- String kterm = ((Dyad) dyLst.get(0)).kinTerm;
- if (dyLst.size() > limit && !kterm.equals("no__term")) {
- bogusDyads.removeAll(dyLst);
- }
- } // end of loop thru bogusTM = dyads sorted by kinTerm
- iter = bogusDyads.iterator();
- ArrayList<Object> confirmedBogies = new ArrayList<Object>();
- while (iter.hasNext()) {
- Dyad dy = (Dyad) iter.next();
- if (dy.confirmed || dy.kinTermType == 7) {
- confirmedBogies.add(dy);
- iter.remove();
- } else {
- dy.challenged = true;
- }
- }
- // Now bogusDyads contains only a few oddball terms, not comfirmed dyads, synonyms or umbrellas (hopefully)
- if (bogusDyads.size() > 0) { // i.e. we haven't emptied it.
- for (int i = 0; i < bogusDyads.size(); i++) {
- ((Dyad) bogusDyads.get(i)).challenged = true;
- }
- String defArt = (bogusDyads.size() == 1 ? "This " : "These "),
- pl = (bogusDyads.size() == 1 ? "" : "s"),
- vb = (bogusDyads.size() == 1 ? " does" : " do"),
- ha = (bogusDyads.size() == 1 ? " has" : " have"),
- phr = (bogusDyads.size() == 1 ? "Is this " : "Are these "),
- confirmed = (confirmedBogies.isEmpty() ? "" : "\nNOTE: You previously confirmed "
- + confirmedBogies.size() + " such.");
- String question = defArt + bogusDyads.size() + " dyad" + pl + vb + " match the accepted definition of "
- + kinTerm + " but " + ha + " been given other kin term" + pl + ".\n\t\t" + phr
- + "dyad" + pl + " wrongly labeled, or should the accepted definition be revised?" + confirmed;
- Anomaly oops = new Anomaly(ktd, kinTerm, question, "falsePos", bogusDyads, posHits, null, null);
- validationIssues.add(oops);
- } // end of we-didn't-empty-it
- } // end of making Anomaly for imposters
- ctxt.featuresHaveChanged = true;
- return validationIssues;
- } // end of method validateNewDyads
- public ArrayList<Object> findImposters(String kinTerm, ArrayList<Object> componentCBs) throws KSBadHornClauseException, KSNoChainOfRelations2Alter,
- KSInternalErrorException, KSConstraintInconsistency, ClassNotFoundException {
- // We're looking for anything in dyadsUndefined that meets the definition of
- // a clause of kinTerm's definition, but represents some other term.
- // We know that will be the case for umbrella terms and their subterms.
- // those are OK -- Exclude those.
- ArrayList<Object> imposters = new ArrayList<Object>(),
- subTermsOfKinTerm = (umbrellas == null ? null : (ArrayList<Object>) umbrellas.get(kinTerm)),
- overlapsOfKinTerm = (overlaps == null ? null : (ArrayList<Object>) overlaps.get(kinTerm));
- if (kinTerm.indexOf("<aux>") > -1) {
- return imposters; // auxiliaries can't have imposters
- }
- Iterator iter = dyadsUndefined.entrySet().iterator(), cbIter;
- while (iter.hasNext()) {
- Map.Entry entry = (Map.Entry) iter.next();
- String term = (String) entry.getKey();
- TreeMap termMap = (TreeMap) entry.getValue();
- if (termMap == null || termMap.isEmpty()) {
- iter.remove(); // Don't leave empties lying around!
- } else {
- ArrayList<Object> subTermsOfTerm = (umbrellas == null ? null : (ArrayList<Object>) umbrellas.get(term)),
- overlapsOfTerm = (overlaps == null ? null : (ArrayList<Object>) overlaps.get(term));
- boolean doIt = (!kinTerm.equals(term)
- && (subTermsOfKinTerm == null || !subTermsOfKinTerm.contains(term))
- && (subTermsOfTerm == null || !subTermsOfTerm.contains(kinTerm))
- && (overlapsOfKinTerm == null || !overlapsOfKinTerm.contains(term))
- && (overlapsOfTerm == null || !overlapsOfTerm.contains(kinTerm)));
- if (doIt) { // doIt = neither term is a subterm or overlap of the other
- cbIter = componentCBs.iterator(); // (no umbrella or overlaps relationship)
- while (cbIter.hasNext()) {
- ClauseBody cb = (ClauseBody) cbIter.next();
- ArrayList<Object> potentialImposters = (ArrayList<Object>) termMap.get(cb.pcString);
- if (potentialImposters != null) { // found some
- // Now verify that these really do fit the definition of kinTerm
- for (int i = 0; i < potentialImposters.size(); i++) {
- Dyad dy = (Dyad) potentialImposters.get(i);
- if (fit(cb, dy)) {
- imposters.add(dy);
- }
- } // end of loop thru potentialImposters
- } // end of potentialImposters not null
- } // end of loop thru CBs for this kinTerm
- } // end of doIt
- } // end of term Map not null
- } // end of loop thru all dyadsUndefined
- return imposters;
- } // end of method findImposters
- public void removeChallengedDyads(ArrayList<Object> dyads) {
- // Remove from this list all dyads where challenged = true;
- Iterator iter = dyads.iterator();
- while (iter.hasNext()) {
- Dyad dyad = (Dyad) iter.next();
- if (dyad.challenged) {
- iter.remove();
- }
- } // end of loop thru dydas
- } // end of method removeChallengedDyads
- /*
- public void removeSynUmbDyads(String kinTerm, ArrayList<Object> listOfDyads) {
- // Remove from this list all dyads that are part of a proposed
- // synonym or umbrella relationship
- ArrayList<Object> seen = new ArrayList<Object>(), removem = new ArrayList<Object>();
- Iterator iter = listOfDyads.iterator();
- while (iter.hasNext()) {
- Dyad dy = (Dyad)iter.next();
- String kterm = dy.kinTerm;
- if (removem.contains(kterm)) iter.remove();
- else if (! seen.contains(kterm)) {
- seen.add(kterm);
- if (umbAlreadyPosted(kinTerm, kterm) || umbAlreadyPosted(kterm, kinTerm)) {
- removem.add(kterm);
- iter.remove();
- }else {
- int order = kinTerm.compareTo(kterm);
- String word_pair = ((order < 0) ? kinTerm + "/" + kterm : kterm + "/" + kinTerm),
- keyWord = ((order < 0) ? kinTerm : kterm);
- if (synonymAlreadyProposed(keyWord, word_pair)) {
- removem.add(kterm);
- iter.remove();
- }
- } // end of no-umbrella-so-check-for-synonyms
- } // end of not-seen-before
- } // end of loop thru dyads
- } // end of method removeSynUmbDyads
- */
- public void postAnomaliesForUser(String kinTerm, ArrayList<Object> anomalies) {
- // First merge any duplicate anomalies
- ArrayList<Issue> compactedAnomalies = new ArrayList<Issue>();
- while (anomalies.size() > 0) {
- Anomaly baseCase = (Anomaly) anomalies.remove(0);
- Iterator iter = anomalies.iterator();
- while (iter.hasNext()) {
- Anomaly nextCase = (Anomaly) iter.next();
- if (baseCase.isEquivalentTo(nextCase)) {
- baseCase.absorb(nextCase);
- iter.remove();
- }
- } // end of loop thru remaining anomalies
- compactedAnomalies.add(baseCase);
- } // end of while anomalies.size() > 0
- // Now post the compacted anomalies
- if (compactedAnomalies.size() > 0) {
- if (issuesForUser.get(kinTerm) == null) {
- issuesForUser.put(kinTerm, new ArrayList<Issue>());
- }
- issuesForUser.get(kinTerm).addAll(compactedAnomalies);
- }
- } // end of method postAnomaliesForUser
- public void detectSynonymsAndUmbrellas(String kinTerm, TreeMap prePos, TreeMap preNeg, int maxNoise)
- throws KSInternalErrorException {
- // First, exclude any pcString supported only by challenged dyads
- // if (kinTerm.equals("daddy") || kinTerm.equals("dad")) {
- // Context.breakpoint();
- // }
- TreeMap pos = new TreeMap(prePos),
- neg = new TreeMap(preNeg);
- ArrayList<Object> filteredKTermDys = new ArrayList<Object>(),
- filtered2KTermDys = new ArrayList<Object>();
- // Threshhold2 = maxNoise% * avgNmbrOfDyadsPerKinType
- int threshhold1 = Math.max(2, MainPane.NUMBER_OF_EGOS - 1),
- threshhold2 = (int) Math.floor(countLeaves(pos) * maxNoise / (100d * pos.size()));
- Iterator posIter = pos.entrySet().iterator();
- while (posIter.hasNext()) {
- ArrayList<Object> dyadList = (ArrayList<Object>) ((Map.Entry) posIter.next()).getValue(),
- tempLst = new ArrayList<Object>();
- Iterator listIter = dyadList.iterator();
- boolean killIt = true;
- int cntr = 0;
- while (listIter.hasNext()) {
- Dyad dy = (Dyad) listIter.next();
- if (!dy.challenged) {
- killIt = false;
- cntr++;
- tempLst.add(dy);
- }
- } // end of loop thru dyadList
- if (killIt) {
- posIter.remove();
- } else {
- if (cntr >= threshhold1) {
- filteredKTermDys.addAll(tempLst);
- }
- if (cntr >= threshhold2) {
- filtered2KTermDys.addAll(tempLst);
- }
- }
- } // end of loop thru pos
- Iterator negIter = neg.entrySet().iterator();
- while (negIter.hasNext()) {
- ArrayList<Object> dyadList = (ArrayList<Object>) ((Map.Entry) negIter.next()).getValue();
- Iterator listIter = dyadList.iterator();
- boolean killIt = true;
- while (listIter.hasNext() && killIt) {
- if (!((Dyad) listIter.next()).challenged) {
- killIt = false;
- }
- }
- if (killIt) {
- negIter.remove();
- }
- }
- // Let otherSet = the set of all terms in NEG having pcStrings that appear in POS
- // otherSet structure: kinTerm => List of pcStrings seen for that kinTerm
- ArrayList<Object> filteredKTermPCStr = new ArrayList<Object>(),
- filtered2KTermPCStr = new ArrayList<Object>();
- posIter = pos.entrySet().iterator();
- while (posIter.hasNext()) {
- Map.Entry entry = (Map.Entry) posIter.next();
- String pcStr = (String) entry.getKey();
- ArrayList<Object> dyadList = (ArrayList<Object>) entry.getValue();
- if (dyadList.size() >= threshhold1) {
- filteredKTermPCStr.add(pcStr);
- }
- if (dyadList.size() >= threshhold2) {
- filtered2KTermPCStr.add(pcStr);
- }
- } // end of loop thru pos for filtering
- posIter = filteredKTermPCStr.iterator();
- TreeMap filteredOtherSet = new TreeMap();
- while (posIter.hasNext()) {
- String pcStr = (String) posIter.next();
- ArrayList<Object> negDyads = (ArrayList<Object>) neg.get(pcStr);
- if (negDyads != null && negDyads.size() >= threshhold1) {
- for (int i = 0; i < negDyads.size(); i++) {
- Dyad dy = (Dyad) negDyads.get(i);
- String kterm = dy.kinTerm;
- if (!dy.challenged && filteredOtherSet.get(kterm) == null) {
- filteredOtherSet.put(kterm, new ArrayList<Object>());
- }
- } // end of loop thru negDyads
- }
- } // end of loop thru pcStrings in filteredKTermPCStr
- posIter = filtered2KTermPCStr.iterator();
- TreeMap filtered2OtherSet = new TreeMap();
- while (posIter.hasNext()) {
- String pcStr = (String) posIter.next();
- ArrayList<Object> negDyads = (ArrayList<Object>) neg.get(pcStr);
- if (negDyads != null && negDyads.size() >= threshhold2) {
- for (int i = 0; i < negDyads.size(); i++) {
- Dyad dy = (Dyad) negDyads.get(i);
- String kterm = dy.kinTerm;
- if (!dy.challenged && filtered2OtherSet.get(kterm) == null) {
- filtered2OtherSet.put(kterm, new ArrayList<Object>());
- }
- } // end of loop thru negDyads
- }
- } // end of loop thru pcStrings in filtered2KTermPCStr
- // Now analyze each term in filteredOtherSet & filtered2OtherSet.
- // Propose all synonyms, umbrellas, and overlaps.
- identifySynsEtc(filteredOtherSet, kinTerm, filteredKTermPCStr, filteredKTermDys, threshhold1, 1);
- if (threshhold2 > threshhold1) {
- identifySynsEtc(filtered2OtherSet, kinTerm, filtered2KTermPCStr, filtered2KTermDys, threshhold2, 2);
- }
- } // end of method detectSynonymsAndUmbrellas
- public void identifySynsEtc(TreeMap otherSet, String kinTerm, ArrayList<Object> kinTermPCStrings,
- ArrayList<Object> ktermDys, int threshhold, int typ)
- throws KSInternalErrorException {
- // otherSet structure: kinTerm => List of pcStrings seen for that kinTerm
- Iterator otherIter = otherSet.entrySet().iterator();
- while (otherIter.hasNext()) {
- Map.Entry otherEntry = (Map.Entry) otherIter.next();
- String term = (String) otherEntry.getKey();
- ArrayList<Object> otherTermPCStrings = (ArrayList<Object>) otherEntry.getValue(),
- otherTermDys = new ArrayList<Object>();
- if (dyadsUndefined != null && dyadsUndefined.get(term) != null) {
- TreeMap tMap = (TreeMap) dyadsUndefined.get(term);
- Iterator tMapIter = tMap.entrySet().iterator();
- while (tMapIter.hasNext()) {
- Map.Entry entry = (Map.Entry) tMapIter.next();
- String kType = (String) entry.getKey();
- ArrayList<Object> dys = (ArrayList<Object>) entry.getValue();
- if (dys.size() >= threshhold) {
- otherTermPCStrings.add(kType);
- otherTermDys.addAll(dys);
- }
- } // end of loop thru tMap
- } // end of loop thru dyadsUndefined
- if (dyadsDefined != null && dyadsDefined.get(term) != null) {
- TreeMap tMap = (TreeMap) dyadsDefined.get(term);
- Iterator tMapIter = tMap.entrySet().iterator();
- while (tMapIter.hasNext()) {
- Map.Entry entry = (Map.Entry) tMapIter.next();
- String kType = (String) entry.getKey();
- ArrayList<Object> dys = (ArrayList<Object>) entry.getValue();
- if (dys.size() >= threshhold) {
- otherTermPCStrings.add(kType);
- otherTermDys.addAll(dys);
- }
- } // end of loop thru tMap
- } // end of loop thru dyadsDefined
- if (equivalentLists(kinTermPCStrings, otherTermPCStrings)
- && !synAlreadyPosted(kinTerm, term)
- && !synRejected(kinTerm, term)) {
- proposeSynonymToUser(kinTerm, term, kinTermPCStrings, ktermDys, otherTermDys, typ);
- } else if (equivalentLists(kinTermPCStrings, otherTermPCStrings)
- && synAlreadyPosted(kinTerm, term)) {
- return;
- } else if (!(nonUmbrellas.get(kinTerm) != null && ((ArrayList<Object>) nonUmbrellas.get(kinTerm)).contains(term))
- && !umbAlreadyPosted(kinTerm, term)
- && isSupersetOf(kinTermPCStrings, otherTermPCStrings)) {
- postPotUmbrellas(kinTerm, kinTermPCStrings, term, otherTermPCStrings, otherTermDys, typ);
- } else if (!isSupersetOf(kinTermPCStrings, otherTermPCStrings)
- && !isSupersetOf(otherTermPCStrings, kinTermPCStrings)
- && !overlapProposed(kinTerm, term)) {
- ArrayList<Object> interSect = new ArrayList<Object>();
- for (int k = 0; k < otherTermPCStrings.size(); k++) {
- if (kinTermPCStrings.contains(otherTermPCStrings.get(k))) {
- interSect.add(otherTermPCStrings.get(k));
- }
- }
- if (interSect.size() > 0) {
- proposeOverlapToUser(kinTerm, term, interSect, ktermDys, otherTermDys, typ);
- }
- } // end of found-an-overlap
- } // end of loop thru otherSet
- } // end of method identifySynsEtc
- public boolean umbAlreadyPosted(String umb, String sub) {
- if (umbrellas != null && umbrellas.get(umb) != null
- && ((ArrayList<Object>) umbrellas.get(umb)).contains(sub)) {
- return true;
- }
- if (potUmbrellas == null) {
- return false;
- }
- if (potUmbrellas.get(umb) == null) {
- return false;
- }
- ArrayList<Object> umbList = (ArrayList<Object>) potUmbrellas.get(umb);
- // structure of potUmbrellas is: umbTerm -> {list of pcStrings, Quad, ... Quad }
- // where Quad = {subTerm, list of its pcStrings, list of Dyads, filterType}
- for (int i = 1; i < umbList.size(); i++) {
- ArrayList<Object> quad = (ArrayList<Object>) umbList.get(i);
- if (quad.contains(sub)) {
- return true;
- }
- }
- return false;
- } // end of method umbAlreadyPosted
- public void postPotUmbrellas(String umb, ArrayList<Object> umbStrings, String sub,
- ArrayList<Object> subStrings, ArrayList<Object> dyads, int typ) {
- if (umb.equals("no__term") || sub.equals("no__term")) {
- return;
- }
- if (potUmbrellas == null) {
- potUmbrellas = new TreeMap();
- }
- // structure of potUmbrellas is: umbTerm -> {list of pcStrings, Quad, ... Quad }
- // where Quad = {subTerm, list of its pcStrings, list of Dyads, filterType}
- // if (umb.equals("ancestor")) Context.breakpoint();
- if (potUmbrellas.get(umb) == null) {
- potUmbrellas.put(umb, new ArrayList<Object>());
- }
- ArrayList<Object> umbQuads = (ArrayList<Object>) potUmbrellas.get(umb);
- if (umbQuads.isEmpty()) {
- umbQuads.add(umbStrings);
- }
- ArrayList<Object> quad = new ArrayList<Object>();
- quad.add(sub);
- quad.add(subStrings);
- quad.add(dyads);
- quad.add(new Integer(typ));
- umbQuads.add(quad);
- } // end of method postPotUmbrellas
- public void proposeUmbrellas() {
- // Post all the proposals to issuesForUser
- Iterator umbIter = potUmbrellas.entrySet().iterator();
- while (umbIter.hasNext()) {
- Map.Entry entry = (Map.Entry) umbIter.next();
- String umbTerm = (String) entry.getKey();
- ArrayList<Object> quads = (ArrayList<Object>) entry.getValue(),
- subTerms = extractTerms(quads);
- if (!umbrellaAlreadyProposed(umbTerm) && !umbrellaAlreadyKnown(umbTerm, quads)
- && !allTermsDefined(umbTerm, subTerms)) {
- ArrayList<Object> umbPCStrings = (ArrayList<Object>) quads.remove(0),
- questions = new ArrayList<Object>();
- questions.add("The term '" + umbTerm + "' seems to be an umbrella term that covers other terms.\n"
- + "Is this correct?");
- if (issuesForUser.get(umbTerm) == null) {
- issuesForUser.put(umbTerm, new ArrayList<Issue>());
- }
- ArrayList<Issue> theIssues = issuesForUser.get(umbTerm);
- theIssues.add(new UmbrellaCandidate(umbTerm, questions, umbPCStrings, quads));
- }
- umbIter.remove();
- }
- } // end of method proposeUmbrellas
- public ArrayList<Object> extractTerms(ArrayList<Object> triples) {
- ArrayList<Object> terms = new ArrayList<Object>();
- // Element 0 of triples is not a triple
- for (int i = 1; i < triples.size(); i++) {
- terms.add(((ArrayList<Object>) triples.get(i)).get(0));
- }
- return terms;
- } // end of method extractTerms
- public boolean umbrellaAlreadyKnown(String umbTerm, ArrayList<Object> triples) {
- if (umbrellas == null || umbrellas.get(umbTerm) == null) {
- return false;
- }
- ArrayList<Object> subTerms = (ArrayList<Object>) umbrellas.get(umbTerm), propSubs = new ArrayList<Object>();
- // Element 0 of triples is not a triple
- for (int i = 1; i < triples.size(); i++) {
- propSubs.add(((ArrayList<Object>) triples.get(i)).get(0));
- }
- if (equivalentLists(subTerms, propSubs)) {
- return true;
- }
- return false;
- } // end of method umbrellaAlreadyKnown
- public boolean umbrellaAlreadyProposed(String umbTerm) {
- if (issuesForUser == null) {
- return false;
- }
- ArrayList<Issue> issues = issuesForUser.get(umbTerm);
- if (issues == null) {
- return false;
- }
- for (int i = 0; i < issues.size(); i++) {
- Issue issue = (Issue) issues.get(i);
- if (issue instanceof UmbrellaCandidate) {
- return true;
- }
- }
- return false;
- } // end of method umbrellaAlreadyProposed
- public boolean overlapProposed(String kinTerm, String term) {
- if (issuesForUser.get(term) == null) {
- return false;
- }
- Iterator iter = issuesForUser.get(term).iterator();
- while (iter.hasNext()) {
- Issue issue = (Issue) iter.next();
- if (issue instanceof OverlapCandidate) {
- OverlapCandidate cand = (OverlapCandidate) issue;
- if (cand.otherTerm.equals(kinTerm)) {
- return true;
- }
- }
- } // end of loop thru issues for 'term'
- return false;
- } // end of method overlapProposed
- public void dataSelection(String kinTerm, ArrayList<Object> ktMatches, TreeMap pos, TreeMap neg, TreeMap dataReqCounter)
- throws KSInternalErrorException, FileNotFoundException, JavaSystemException, IOException, KSNoChainOfRelations2Alter,
- KSBadHornClauseException, KSConstraintInconsistency, ClassNotFoundException, KSParsingErrorException {
- // if (kinTerm.equals("father_in_law")) Context.breakpoint();
- // Reduce the list of KTDs to a set of KT_EQCs which group the KTDs (perhaps singletons) via the Library.ktSigTree.
- TreeMap ktdToKTD_EQC = new TreeMap();
- ArrayList<Object> solidKTD_EQCmatches = makeKTD_EQCs(ktMatches, ktdToKTD_EQC),
- perfectFits = new ArrayList<Object>();
- // Analyze each KTD_EQC's hits & misses, stored on the EQC, and make a global list of CB_EQC non-Hits.
- // Also check for any perfect fits, and make list of PC_String non-Hits
- TreeMap pcStrNonHits = new TreeMap();
- ArrayList<Object> cbNonHits = analyzeKTD_EQC_HitsAndMisses(solidKTD_EQCmatches, perfectFits, neg, pcStrNonHits);
- if (solidKTD_EQCmatches.size() == 1) { // Only 1 good choice; propose it.
- proposeDefinition(kinTerm, (Library.KTD_EQC) solidKTD_EQCmatches.get(0));
- return;
- }
- // If we get here, there are multiple solidMatches (enclosers). Check for a perfect fit.
- if (perfectFits.size() > 0) {
- Library.KTD_EQC eqc = (Library.KTD_EQC) perfectFits.get(0),
- otherEQC;
- if (perfectFits.size() > 1) {
- int totalCBCount = eqc.countClauses(), otherCount;
- for (int i = 1; i < perfectFits.size(); i++) {
- otherEQC = (Library.KTD_EQC) perfectFits.get(i);
- otherCount = otherEQC.countClauses();
- if (otherCount < totalCBCount) {
- totalCBCount = otherCount;
- eqc = otherEQC;
- } // end of found-smaller-one
- } // end of loop thru eqcs
- System.out.println("\n\n\t\t***** NOTICE: More than 1 perfect fit for " + kinTerm + ". Chose smallest.");
- System.out.println("\t\t" + perfectFits);
- } // end of more-than-one-perfect-fit
- proposeDefinition(kinTerm, eqc);
- solidKTD_EQCmatches.remove(eqc);
- return;
- }
- if (dataReqCounter.get(kinTerm) != null
- && ((Counter) dataReqCounter.get(kinTerm)).total() >= Library.MAX_DRS_BEFORE_GUESS) {
- ((Counter) dataReqCounter.get(kinTerm)).zero();
- proposeDefinition(kinTerm, (Library.KTD_EQC) solidKTD_EQCmatches.get(0));
- System.out.println("\n\n\t\t***** Exceeded Data Req limit for " + kinTerm + ". Taking a Guess.");
- return;
- } // end of Data-Reqs-Aren't-Working-So-Guess
- // Compute Similarity scores for all languages in the KTD_EQCs & store in Library. Compute sum of weights.
- double wtSum = (SIMILARITY_ON ? sumSimScores(solidKTD_EQCmatches) : sumSimScoresALT(solidKTD_EQCmatches));
- TreeMap stringMisses = rankStrNonHits(pcStrNonHits, wtSum);
- if (stringMisses.size() > 0) { // found PC_Strings that can separate candidates
- // testOutFile.println("\t" + kinTerm + "\t" + countLeaves(pcStrNonHits));
- askUserForData(kinTerm, stringMisses, pos, neg);
- } else if (cbNonHits.size() > 0) {
- // Group all equivalent CBs from cbNonHits into CB_EQCs
- TreeMap unMatchedCB_EQCs = groupIntoCB_EQCs(cbNonHits, ktdToKTD_EQC, wtSum);
- // Now assign a score to each CB_EQC in unMatchedCB_EQCs and choose lowest scoring CB_EQC.
- TreeMap rankedEQCs = lowToHi(unMatchedCB_EQCs);
- if (dataReqCounter.get(kinTerm) == null) {
- dataReqCounter.put(kinTerm, new Counter());
- }
- ((Counter) dataReqCounter.get(kinTerm)).incr();
- // testOutFile.println("\t" + kinTerm + "\t" + cbNonHits.size());
- askUserForData(kinTerm, rankedEQCs, pos, neg);
- }
- } // end of method dataSelection
- public ArrayList<Object> makeKTD_EQCs(ArrayList<Object> matches, TreeMap ktdToKTD_EQC)
- throws FileNotFoundException, JavaSystemException, IOException {
- // Reduce the list of KTDs to a set of KT_EQCs which group the KTDs
- // (perhaps singletons) via the Library.ktSigTree.
- ArrayList<Object> solidKTD_EQCmatches = new ArrayList<Object>(), triple,
- noisyPos = null, noisyNeg = null;
- if (Library.ktSigCompressed == null) {
- String fileName = Library.libraryDirectory + "KTSigCompressed";
- BufferedReader file = new BufferedReader(new FileReader(fileName));
- Library.ktSigCompressed = Library.readKTSig(file);
- }
- KinTermDef ktd;
- Library.KTD_EQC eqc = null;
- Iterator iter = matches.iterator();
- while (iter.hasNext()) {
- Object obj = iter.next();
- // matches may contain KTDs (if it's solidKTMatches)
- // or it may contain triples (if it's noisyKTMatches)
- if (obj instanceof KinTermDef) {
- ktd = (KinTermDef) obj;
- } else {
- triple = (ArrayList<Object>) obj;
- ktd = (KinTermDef) triple.get(0);
- noisyPos = (ArrayList<Object>) triple.get(1);
- noisyNeg = (ArrayList<Object>) triple.get(2);
- }
- if (ktd == null || ktd.eqcSigExact == null) {
- Context.breakpoint();
- }
- ArrayList<Object> eqcList = (ArrayList<Object>) Library.ktSigCompressed.get(ktd.eqcSigExact);
- if (eqcList == null) { // null means only 1 KTD in Library has this eqcSigExact
- eqc = new Library.KTD_EQC(ktd.eqcSigExact, ktd);
- // Ck for prior rejection of this KTD_EQC
- if (ctxt.notRejected(eqc, addressTerms)) {
- solidKTD_EQCmatches.add(eqc);
- ktdToKTD_EQC.put(ktd, eqc);
- }
- } else {
- for (int i = 0; i < eqcList.size(); i++) {
- eqc = (Library.KTD_EQC) eqcList.get(i);
- if (eqc.hasMember(ktd)) {
- eqc.swapPrototype(new Library.KTD_Ptr(ktd));
- // Ck for prior rejection of this KTD_EQC
- if (ctxt.notRejected(eqc, addressTerms)) {
- if (!solidKTD_EQCmatches.contains(eqc)) {
- solidKTD_EQCmatches.add(eqc);
- }
- ktdToKTD_EQC.put(ktd, eqc);
- }
- break; // end the loop
- }
- } // end of loop thru eqcs in eqcList
- }
- if (noisyPos != null) {
- eqc.noisyPosDyads = noisyPos;
- }
- if (noisyNeg != null) {
- eqc.noisyNegDyads = noisyNeg;
- }
- } // end of loop thru ktds in matches
- return solidKTD_EQCmatches;
- } // end of method makeKTD_EQCs
- public ArrayList<Object> analyzeKTD_EQC_HitsAndMisses(ArrayList<Object> solidKTD_EQCmatches, ArrayList<Object> perfectFits,
- TreeMap neg, TreeMap pcStrNonHits)
- throws KSParsingErrorException, JavaSystemException, KSInternalErrorException, ClassNotFoundException,
- KSConstraintInconsistency, KSBadHornClauseException, KSNoChainOfRelations2Alter {
- ArrayList<Object> allNonHits = new ArrayList<Object>(); // set of all CBs in any eqc that didn't match POS or NEG
- Iterator iter = solidKTD_EQCmatches.iterator();
- while (iter.hasNext()) {
- Library.KTD_EQC eqc = (Library.KTD_EQC) iter.next();
- TreeMap eqcStringMisses = new TreeMap();
- pcStrNonHits.put(eqc, eqcStringMisses);
- // pcStrNonHits is eqc => PC_String => (typical) CB
- KinTermDef ktd = eqc.prototype.getKTD();
- eqc.misFits = new ArrayList<Object>(); // misFits = all expDef CBs that did not match POS
- eqc.baseMisFits = new ArrayList<Object>(); // baseMisFits = all un-expanded CBs with 1+ expansion missing POS
- eqc.baseFits = new ArrayList<Object>(); // baseFits = all base CBs that had at least 1 expansion hit POS
- for (int i = 0; i < ktd.expandedDefs.size(); i++) {
- ClauseBody cb = (ClauseBody) ktd.expandedDefs.get(i);
- String firstExpansion = (String) cb.expansionPath.get(0);
- int where = firstExpansion.indexOf(":") + 1;
- Integer baseCB = new Integer(firstExpansion.substring(where));
- if (cb.posHit) {
- if (!eqc.baseFits.contains(baseCB)) {
- eqc.baseFits.add(baseCB);
- }
- } else {
- eqc.misFits.add(new Integer(cb.seqNmbr));
- if (!eqc.baseMisFits.contains(baseCB)) {
- eqc.baseMisFits.add(baseCB);
- }
- ArrayList<Object> negDyads = (ArrayList<Object>) neg.get(cb.pcString);
- // cb wasn't a posHit. If it's not a negHit either, put into allNonHits
- if (negDyads == null) {
- allNonHits.add(cb);
- eqcStringMisses.put(cb.pcString, cb);
- } else {
- boolean hitOne = false;
- for (int j = 0; j < negDyads.size(); j++) {
- Dyad dy = (Dyad) negDyads.get(j);
- if (fit(cb, dy)) {
- hitOne = true;
- j = negDyads.size();
- }
- } // end of loop thru dyads
- if (!hitOne) {
- …
Large files files are truncated, but you can click here to view the full file