PageRenderTime 47ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/GATEFiles/plugins/Crowd_Sourcing/src/gate/crowdsource/ne/EntityAnnotationResultsImporter.java

https://gitlab.com/mshepherd/resumeparser
Java | 266 lines | 199 code | 42 blank | 25 comment | 25 complexity | 4152a3c61aae55ffee70501c8405ff26 MD5 | raw file
  1. /*
  2. * EntityAnnotationResultsImporter.java
  3. *
  4. * Copyright (c) 1995-2014, The University of Sheffield. See the file
  5. * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
  6. *
  7. * This file is part of GATE (see http://gate.ac.uk/), and is free
  8. * software, licenced under the GNU Library General Public License,
  9. * Version 3, June 2007 (in the distribution as file licence.html,
  10. * and also available at http://gate.ac.uk/gate/licence.html).
  11. *
  12. * $Id: EntityAnnotationResultsImporter.java 17968 2014-05-11 16:37:34Z ian_roberts $
  13. */
  14. package gate.crowdsource.ne;
  15. import static gate.crowdsource.CrowdFlowerConstants.*;
  16. import java.util.List;
  17. import org.apache.log4j.Logger;
  18. import com.google.gson.JsonArray;
  19. import com.google.gson.JsonElement;
  20. import com.google.gson.JsonObject;
  21. import gate.Annotation;
  22. import gate.AnnotationSet;
  23. import gate.Resource;
  24. import gate.Utils;
  25. import gate.creole.AbstractLanguageAnalyser;
  26. import gate.creole.ExecutionException;
  27. import gate.creole.ExecutionInterruptedException;
  28. import gate.creole.ResourceInstantiationException;
  29. import gate.creole.metadata.CreoleParameter;
  30. import gate.creole.metadata.CreoleResource;
  31. import gate.creole.metadata.Optional;
  32. import gate.creole.metadata.RunTime;
  33. import gate.crowdsource.rest.CrowdFlowerClient;
  34. import gate.util.InvalidOffsetException;
  35. @CreoleResource(name = "Entity Annotation Results Importer",
  36. comment = "Import judgments from a CrowdFlower job created by "
  37. + "the Entity Annotation Job Builder as GATE annotations.",
  38. helpURL = "http://gate.ac.uk/userguide/sec:crowd:annotation:import")
  39. public class EntityAnnotationResultsImporter
  40. extends
  41. AbstractLanguageAnalyser {
  42. private static final long serialVersionUID = 3424823295729835240L;
  43. private static final Logger log = Logger
  44. .getLogger(EntityAnnotationResultsImporter.class);
  45. private String apiKey;
  46. private Long jobId;
  47. private String resultAnnotationType;
  48. private String resultASName;
  49. private String snippetAnnotationType;
  50. private String snippetASName;
  51. private String tokenAnnotationType;
  52. private String tokenASName;
  53. protected CrowdFlowerClient crowdFlowerClient;
  54. public String getApiKey() {
  55. return apiKey;
  56. }
  57. @CreoleParameter(comment = "CrowdFlower API key")
  58. public void setApiKey(String apiKey) {
  59. this.apiKey = apiKey;
  60. }
  61. public Long getJobId() {
  62. return jobId;
  63. }
  64. @RunTime
  65. @CreoleParameter
  66. public void setJobId(Long jobId) {
  67. this.jobId = jobId;
  68. }
  69. public String getResultAnnotationType() {
  70. return resultAnnotationType;
  71. }
  72. @RunTime
  73. @CreoleParameter
  74. public void setResultAnnotationType(String resultAnnotationType) {
  75. this.resultAnnotationType = resultAnnotationType;
  76. }
  77. public String getResultASName() {
  78. return resultASName;
  79. }
  80. @Optional
  81. @RunTime
  82. @CreoleParameter(defaultValue = "crowdResults")
  83. public void setResultASName(String resultASName) {
  84. this.resultASName = resultASName;
  85. }
  86. public String getSnippetAnnotationType() {
  87. return snippetAnnotationType;
  88. }
  89. @RunTime
  90. @CreoleParameter(defaultValue = "Sentence", comment = "Annotation type " +
  91. "representing the snippets (one snippet = one unit)")
  92. public void setSnippetAnnotationType(String snippetAnnotationType) {
  93. this.snippetAnnotationType = snippetAnnotationType;
  94. }
  95. public String getSnippetASName() {
  96. return snippetASName;
  97. }
  98. @Optional
  99. @RunTime
  100. @CreoleParameter(comment = "Annotation set where the snippets can be found")
  101. public void setSnippetASName(String snippetASName) {
  102. this.snippetASName = snippetASName;
  103. }
  104. public String getTokenAnnotationType() {
  105. return tokenAnnotationType;
  106. }
  107. @RunTime
  108. @CreoleParameter(defaultValue = "Token",
  109. comment = "Annotation type representing the \"tokens\" - the atomic " +
  110. "units that workers have selected to mark entity annotations.")
  111. public void setTokenAnnotationType(String tokenAnnotationType) {
  112. this.tokenAnnotationType = tokenAnnotationType;
  113. }
  114. public String getTokenASName() {
  115. return tokenASName;
  116. }
  117. @Optional
  118. @RunTime
  119. @CreoleParameter(comment = "Annotation set where tokens can be found")
  120. public void setTokenASName(String tokenASName) {
  121. this.tokenASName = tokenASName;
  122. }
  123. @Override
  124. public Resource init() throws ResourceInstantiationException {
  125. if(apiKey == null || "".equals(apiKey)) {
  126. throw new ResourceInstantiationException("API Key must be set");
  127. }
  128. crowdFlowerClient = new CrowdFlowerClient(apiKey);
  129. return this;
  130. }
  131. @Override
  132. public void execute() throws ExecutionException {
  133. if(isInterrupted()) throw new ExecutionInterruptedException();
  134. interrupted = false;
  135. try {
  136. if(jobId == null || jobId.longValue() <= 0) {
  137. throw new ExecutionException("Job ID must be provided");
  138. }
  139. AnnotationSet tokens = getDocument().getAnnotations(tokenASName).get(tokenAnnotationType);
  140. AnnotationSet snippetAnnotations = getDocument().getAnnotations(snippetASName)
  141. .get(snippetAnnotationType);
  142. AnnotationSet resultAS = getDocument().getAnnotations(resultASName);
  143. List<Annotation> allSnippets = Utils.inDocumentOrder(snippetAnnotations);
  144. for(Annotation snippet : allSnippets) {
  145. if(isInterrupted()) throw new ExecutionInterruptedException();
  146. Object unitId = snippet.getFeatures().get(resultAnnotationType + "_unit_id");
  147. if(unitId != null) {
  148. if(!(unitId instanceof Long)) {
  149. unitId = Long.valueOf(unitId.toString());
  150. }
  151. // find any existing result annotations within the span of this snippet
  152. // so we can avoid creating another annotation from this judgment if
  153. // one already exists
  154. AnnotationSet existingResults =
  155. Utils.getContainedAnnotations(resultAS, snippet,
  156. resultAnnotationType);
  157. // tokens under this snippet
  158. List<Annotation> snippetTokens = Utils.inDocumentOrder(
  159. Utils.getContainedAnnotations(tokens, snippet));
  160. JsonArray judgments =
  161. crowdFlowerClient.getJudgments(jobId,
  162. ((Long)unitId).longValue());
  163. if(judgments != null) {
  164. for(JsonElement judgmentElt : judgments) {
  165. JsonObject judgment = judgmentElt.getAsJsonObject();
  166. JsonArray answer =
  167. judgment.getAsJsonObject("data").get("answer")
  168. .getAsJsonArray();
  169. Long judgmentId = judgment.get("id").getAsLong();
  170. Double trust = judgment.get("trust").getAsDouble();
  171. Long workerId = judgment.get("worker_id").getAsLong();
  172. if(answer.size() > 0) {
  173. // judgment says there are some entities to annotate. Look for
  174. // sequences of consecutive token indices and create one result
  175. // annotation for each such sequence
  176. int startTok = 0;
  177. int curTok = startTok;
  178. while(curTok < answer.size()) {
  179. // we've reached the end of a consecutive sequence if either
  180. // (a) we're on the last element of answer or
  181. // (b) the next element is not this+1
  182. if(curTok == answer.size() - 1
  183. || answer.get(curTok).getAsInt() != answer.get(curTok + 1).getAsInt()) {
  184. Long startOffset = snippetTokens.get(answer.get(startTok).getAsInt()).getStartNode().getOffset();
  185. Long endOffset = snippetTokens.get(answer.get(curTok).getAsInt()).getEndNode().getOffset();
  186. startTok = curTok + 1;
  187. // check whether there's already an annotation at this location for this judgment
  188. AnnotationSet existingEntities = existingResults.getContained(startOffset, endOffset);
  189. boolean found = false;
  190. for(Annotation a : existingEntities) {
  191. if(judgmentId.equals(a.getFeatures().get(JUDGMENT_ID_FEATURE_NAME))) {
  192. found = true;
  193. break;
  194. }
  195. }
  196. if(!found) {
  197. // no existing annotation found, create one
  198. try {
  199. resultAS.add(startOffset, endOffset, resultAnnotationType, Utils.featureMap(
  200. JUDGMENT_ID_FEATURE_NAME, judgmentId,
  201. "trust", trust,
  202. "worker_id", workerId));
  203. } catch(InvalidOffsetException e) {
  204. throw new ExecutionException("Invalid offset obtained from existing annotation!", e);
  205. }
  206. }
  207. }
  208. curTok++;
  209. }
  210. }
  211. }
  212. } else {
  213. log.warn("Unit " + unitId + " has no judgments");
  214. }
  215. } else {
  216. log.warn("Found " + snippetAnnotationType + " annotation with no "
  217. + UNIT_ID_FEATURE_NAME + " feature, ignoring");
  218. }
  219. }
  220. } finally {
  221. interrupted = false;
  222. }
  223. }
  224. }