PageRenderTime 39ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/ixa-pipe-parse/src/main/java/eus/ixa/ixa/pipe/parse/ConstituentParserServer.java

https://gitlab.com/neuralconcept/NLPthirdParty
Java | 195 lines | 112 code | 14 blank | 69 comment | 7 complexity | 5e0f5d86584ee3a15949ca2ec5c7a49d MD5 | raw file
  1. /*
  2. *Copyright 2015 Rodrigo Agerri
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package eus.ixa.ixa.pipe.parse;
  14. import ixa.kaflib.KAFDocument;
  15. import java.io.BufferedReader;
  16. import java.io.BufferedWriter;
  17. import java.io.IOException;
  18. import java.io.InputStreamReader;
  19. import java.io.OutputStreamWriter;
  20. import java.io.StringReader;
  21. import java.io.UnsupportedEncodingException;
  22. import java.net.ServerSocket;
  23. import java.net.Socket;
  24. import java.util.Properties;
  25. import org.jdom2.JDOMException;
  26. import com.google.common.io.Files;
  27. /**
  28. *
  29. * @author ragerri
  30. *
  31. */
  32. public class ConstituentParserServer {
  33. /**
  34. * Get dynamically the version of ixa-pipe-parse by looking at the MANIFEST
  35. * file.
  36. */
  37. private final String version = CLI.class.getPackage().getImplementationVersion();
  38. /**
  39. * Get the git commit of the ixa-pipe-parse compiled by looking at the MANIFEST
  40. * file.
  41. */
  42. private final String commit = CLI.class.getPackage().getSpecificationVersion();
  43. /**
  44. * The model.
  45. */
  46. private String model = null;
  47. /**
  48. * The annotation output format, one of NAF (default) and oneline penn treebank.
  49. */
  50. private String outputFormat = null;
  51. /**
  52. * Construct a NameFinder server.
  53. *
  54. * @param properties
  55. * the properties
  56. */
  57. public ConstituentParserServer(Properties properties) {
  58. Integer port = Integer.parseInt(properties.getProperty("port"));
  59. model = properties.getProperty("model");
  60. outputFormat = properties.getProperty("outputFormat");
  61. String kafToString;
  62. ServerSocket socketServer = null;
  63. Socket activeSocket;
  64. BufferedReader inFromClient = null;
  65. BufferedWriter outToClient = null;
  66. try {
  67. Annotate annotator = new Annotate(properties);
  68. System.out.println("-> Trying to listen port... " + port);
  69. socketServer = new ServerSocket(port);
  70. System.out.println("-> Connected and listening to port " + port);
  71. while (true) {
  72. try {
  73. activeSocket = socketServer.accept();
  74. inFromClient = new BufferedReader(new InputStreamReader(activeSocket.getInputStream(), "UTF-8"));
  75. outToClient = new BufferedWriter(new OutputStreamWriter(activeSocket.getOutputStream(), "UTF-8"));
  76. //get data from client
  77. String stringFromClient = getClientData(inFromClient);
  78. // annotate
  79. kafToString = getAnnotations(annotator, stringFromClient);
  80. } catch (JDOMException e) {
  81. kafToString = "\n-> ERROR: Badly formatted NAF document!!\n";
  82. sendDataToClient(outToClient, kafToString);
  83. continue;
  84. } catch (UnsupportedEncodingException e) {
  85. kafToString = "\n-> ERROR: UTF-8 not supported!!\n";
  86. sendDataToClient(outToClient, kafToString);
  87. continue;
  88. } catch (IOException e) {
  89. kafToString = "\n -> ERROR: Input data not correct!!\n";
  90. sendDataToClient(outToClient, kafToString);
  91. continue;
  92. }
  93. //send data to server after all exceptions and close the outToClient
  94. sendDataToClient(outToClient, kafToString);
  95. //close the resources
  96. inFromClient.close();
  97. activeSocket.close();
  98. } //end of processing block
  99. } catch (IOException e) {
  100. e.printStackTrace();
  101. System.err.println("-> IOException due to failing to create the TCP socket or to wrongly provided model path.");
  102. } finally {
  103. System.out.println("closing tcp socket...");
  104. try {
  105. socketServer.close();
  106. } catch (IOException e) {
  107. e.printStackTrace();
  108. }
  109. }
  110. }
  111. /**
  112. * Read data from the client and output to a String.
  113. * @param inFromClient the client inputstream
  114. * @return the string from the client
  115. */
  116. private String getClientData(BufferedReader inFromClient) {
  117. StringBuilder stringFromClient = new StringBuilder();
  118. try {
  119. String line;
  120. while ((line = inFromClient.readLine()) != null) {
  121. if (line.matches("<ENDOFDOCUMENT>")) {
  122. break;
  123. }
  124. stringFromClient.append(line).append("\n");
  125. if (line.matches("</NAF>")) {
  126. break;
  127. }
  128. }
  129. }catch (IOException e) {
  130. e.printStackTrace();
  131. }
  132. return stringFromClient.toString();
  133. }
  134. /**
  135. * Send data back to server after annotation.
  136. * @param outToClient the outputstream to the client
  137. * @param kafToString the string to be processed
  138. * @throws IOException if io error
  139. */
  140. private void sendDataToClient(BufferedWriter outToClient, String kafToString) throws IOException {
  141. outToClient.write(kafToString);
  142. outToClient.close();
  143. }
  144. /**
  145. * Named Entity annotator.
  146. *
  147. * @param annotator
  148. * the annotator
  149. * @param stringFromClient
  150. * the string to be annotated
  151. * @return the annotation result
  152. * @throws IOException
  153. * if io error
  154. * @throws JDOMException
  155. * if xml error
  156. */
  157. private String getAnnotations(Annotate annotator, String stringFromClient)
  158. throws IOException, JDOMException {
  159. // get a breader from the string coming from the client
  160. BufferedReader clientReader = new BufferedReader(new StringReader(
  161. stringFromClient));
  162. KAFDocument kaf = KAFDocument.createFromStream(clientReader);
  163. final KAFDocument.LinguisticProcessor newLp = kaf.addLinguisticProcessor(
  164. "constituency",
  165. "ixa-pipe-parse-" + Files.getNameWithoutExtension(model), this.version
  166. + "-" + this.commit);
  167. newLp.setBeginTimestamp();
  168. String kafToString = null;
  169. if (outputFormat.equalsIgnoreCase("oneline")) {
  170. kafToString = annotator.parseToOneline(kaf);
  171. } else {
  172. annotator.parseToKAF(kaf);
  173. newLp.setEndTimestamp();
  174. kafToString = kaf.toString();
  175. }
  176. return kafToString;
  177. }
  178. }