PageRenderTime 51ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/okapi/steps/xliffkit/src/main/java/net/sf/okapi/steps/xliffkit/opc/OPCPackageReader.java

http://okapi.googlecode.com/
Java | 289 lines | 218 code | 35 blank | 36 comment | 38 complexity | c38eed2fa803373d17e65a9b7ac3fcba MD5 | raw file
Possible License(s): LGPL-2.1, LGPL-3.0
  1. /*===========================================================================
  2. Copyright (C) 2008-2011 by the Okapi Framework contributors
  3. -----------------------------------------------------------------------------
  4. This library is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU Lesser General Public License as published by
  6. the Free Software Foundation; either version 2.1 of the License, or (at
  7. your option) any later version.
  8. This library is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
  11. General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with this library; if not, write to the Free Software Foundation,
  14. Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  15. See also the full LGPL text here: http://www.gnu.org/copyleft/lesser.html
  16. ===========================================================================*/
  17. package net.sf.okapi.steps.xliffkit.opc;
  18. import java.io.File;
  19. import java.io.IOException;
  20. import java.util.LinkedList;
  21. import net.sf.okapi.common.Event;
  22. import net.sf.okapi.common.EventType;
  23. import net.sf.okapi.common.IParameters;
  24. import net.sf.okapi.common.LocaleId;
  25. import net.sf.okapi.common.Util;
  26. import net.sf.okapi.common.exceptions.OkapiIOException;
  27. import net.sf.okapi.common.filters.AbstractFilter;
  28. import net.sf.okapi.common.filterwriter.IFilterWriter;
  29. import net.sf.okapi.common.resource.ITextUnit;
  30. import net.sf.okapi.common.resource.Property;
  31. import net.sf.okapi.common.resource.RawDocument;
  32. import net.sf.okapi.common.resource.StartDocument;
  33. import net.sf.okapi.common.resource.StartSubDocument;
  34. import net.sf.okapi.filters.xliff.XLIFFFilter;
  35. import net.sf.okapi.lib.beans.sessions.OkapiJsonSession;
  36. import net.sf.okapi.steps.xliffkit.reader.TextUnitMerger;
  37. import org.apache.poi.openxml4j.opc.OPCPackage;
  38. import org.apache.poi.openxml4j.opc.PackagePart;
  39. public class OPCPackageReader extends AbstractFilter {
  40. private OPCPackage pack;
  41. private OkapiJsonSession session = new OkapiJsonSession();
  42. private Event event;
  43. private LinkedList<PackagePart> coreParts = new LinkedList<PackagePart>();
  44. private PackagePart activePart;
  45. private PackagePart resourcesPart;
  46. private XLIFFFilter xliffReader;
  47. private TextUnitMerger merger;
  48. private LocaleId srcLoc;
  49. private String outputEncoding;
  50. private IFilterWriter filterWriter;
  51. private boolean generateTargets = false;
  52. private String outputPath;
  53. private boolean cacheEvents = false;
  54. private LinkedList<Event> events = new LinkedList<Event>();
  55. //private Event sde;
  56. // private LocaleId trgLoc;
  57. public OPCPackageReader(TextUnitMerger merger) {
  58. super();
  59. this.merger = merger;
  60. }
  61. @Override
  62. protected boolean isUtf8Bom() {
  63. return false;
  64. }
  65. @Override
  66. protected boolean isUtf8Encoding() {
  67. return false;
  68. }
  69. private void writeEvent(Event event) {
  70. if (!generateTargets) return;
  71. if (filterWriter == null) return;
  72. if (events == null) return;
  73. if (cacheEvents) {
  74. events.add(event);
  75. }
  76. else {
  77. while (events.size() > 0)
  78. filterWriter.handleEvent(events.poll());
  79. filterWriter.handleEvent(event);
  80. }
  81. }
  82. @Override
  83. public void close() {
  84. clearParts();
  85. session.end();
  86. try {
  87. pack.close();
  88. } catch (IOException e) {
  89. throw new OkapiIOException("OPCPackageReader: cannot close package");
  90. }
  91. }
  92. private void clearParts() {
  93. coreParts.clear();
  94. activePart = null;
  95. resourcesPart = null;
  96. }
  97. @Override
  98. public IParameters getParameters() {
  99. return null;
  100. }
  101. @Override
  102. public boolean hasNext() {
  103. return event != null;
  104. }
  105. @Override
  106. public Event next() {
  107. Event prev = event;
  108. event = deserializeEvent();
  109. return prev;
  110. }
  111. /*
  112. * Deserializes events from JSON files in OPC package
  113. * @return null if no events are available
  114. */
  115. private Event deserializeEvent() {
  116. Event event = null;
  117. if (activePart == null) {
  118. activePart = coreParts.poll();
  119. if (activePart == null)
  120. return null;
  121. else
  122. resourcesPart = OPCPackageUtil.getResourcesPart(activePart);
  123. try {
  124. if (resourcesPart != null)
  125. session.start(resourcesPart.getInputStream());
  126. } catch (IOException e) {
  127. throw new OkapiIOException("OPCPackageReader: cannot get resources from package", e);
  128. }
  129. // Create XLIFF filter for the core document
  130. if (xliffReader != null) {
  131. xliffReader.close();
  132. xliffReader = null;
  133. }
  134. xliffReader = new XLIFFFilter();
  135. try {
  136. // Here targetLocale is set to srcLoc, actual target locale is taken from the StartSubDocument's property targetLocale
  137. xliffReader.open(new RawDocument(activePart.getInputStream(), "UTF-8", srcLoc, srcLoc));
  138. } catch (IOException e) {
  139. throw new RuntimeException(String.format("OPCPackageReader: cannot open input stream for %s",
  140. activePart.getPartName().getName()), e);
  141. }
  142. }
  143. event = session.deserialize(Event.class);
  144. if (event == null) {
  145. session.end();
  146. activePart = null;
  147. return deserializeEvent(); // Recursion until all parts are tried
  148. } else
  149. switch (event.getEventType()) {
  150. case START_DOCUMENT:
  151. processStartDocument(event);
  152. break;
  153. case END_DOCUMENT:
  154. processEndDocument(event);
  155. break;
  156. case TEXT_UNIT:
  157. processTextUnit(event); // updates tu with a target from xliff
  158. break;
  159. case START_SUBDOCUMENT:
  160. case START_GROUP:
  161. case END_SUBDOCUMENT:
  162. case END_GROUP:
  163. case DOCUMENT_PART:
  164. writeEvent(event);
  165. }
  166. return event;
  167. }
  168. @Override
  169. public void open(RawDocument input) {
  170. open(input, false);
  171. }
  172. @Override
  173. public void open(RawDocument input, boolean generateSkeleton) {
  174. try {
  175. srcLoc = input.getSourceLocale();
  176. //trgLoc = input.getTargetLocale();
  177. pack = OPCPackage.open(input.getStream());
  178. } catch (Exception e) {
  179. throw new OkapiIOException("OPCPackageReader: cannot open package", e);
  180. }
  181. clearParts();
  182. coreParts.addAll(OPCPackageUtil.getCoreParts(pack));
  183. event = deserializeEvent();
  184. }
  185. @Override
  186. public void setParameters(IParameters params) {
  187. }
  188. private ITextUnit getNextXliffTu() {
  189. if (xliffReader == null)
  190. throw new RuntimeException("OPCPackageReader: xliffReader is not initialized");
  191. Event ev = null;
  192. while (xliffReader.hasNext()) {
  193. ev = xliffReader.next();
  194. if (ev == null) return null;
  195. if (ev.getEventType() == EventType.START_SUBDOCUMENT) {
  196. StartSubDocument startSubDoc = (StartSubDocument)ev.getResource();
  197. Property prop = startSubDoc.getProperty("targetLanguage");
  198. if ( prop != null ) {
  199. LocaleId trgLoc = LocaleId.fromString(prop.getValue());
  200. merger.setTrgLoc(trgLoc);
  201. filterWriter.setOptions(trgLoc, outputEncoding);
  202. cacheEvents = false;
  203. }
  204. }
  205. if (ev.getEventType() == EventType.TEXT_UNIT) {
  206. return ev.getTextUnit();
  207. }
  208. }
  209. return null;
  210. }
  211. private void processStartDocument (Event event) {
  212. // Translate src doc name for writers
  213. StartDocument startDoc = (StartDocument)event.getResource();
  214. String srcName = startDoc.getName();
  215. String partName = activePart.getPartName().toString();
  216. String outFileName = outputPath + Util.getDirectoryName(partName) + "/" + Util.getFilename(srcName, true);
  217. filterWriter = startDoc.getFilterWriter();
  218. //System.out.println(startDoc.getName());
  219. if (generateTargets) {
  220. File outputFile = new File(outFileName);
  221. Util.createDirectories(outputFile.getAbsolutePath());
  222. filterWriter.setOutput(outputFile.getAbsolutePath());
  223. //sde = event; // Store for delayed processing
  224. cacheEvents = true; // In case output locale is not known until START_SUBDOCUMENT
  225. writeEvent(event);
  226. }
  227. }
  228. private void processEndDocument (Event event) {
  229. writeEvent(event);
  230. if (generateTargets)
  231. filterWriter.close();
  232. }
  233. private void processTextUnit(Event event) {
  234. if (merger == null) return;
  235. ITextUnit tu = event.getTextUnit();
  236. ITextUnit xtu = getNextXliffTu();
  237. if (xtu == null) return;
  238. // // Set tu source from xtu source
  239. // TextContainer tc = tu.getSource(); // tu source is empty string + codes in JSON
  240. // TextFragment xtf = xtu.getSource().getUnSegmentedContentCopy();
  241. // tc.append(xtf.getCodedText());
  242. // //tu.setSource(xtu.getSource());
  243. merger.mergeTargets(tu, xtu);
  244. writeEvent(event);
  245. }
  246. public void setGeneratorOptions(String outputEncoding, String outputPath) {
  247. this.outputEncoding = outputEncoding;
  248. this.generateTargets = !Util.isEmpty(outputPath);
  249. this.outputPath = outputPath;
  250. }
  251. }