PageRenderTime 38ms CodeModel.GetById 42ms RepoModel.GetById 1ms app.codeStats 0ms

/Application/src/org/tonguetied/datatransfer/importing/ExcelLanguageCentricParser.java

http://tongue-tied.googlecode.com/
Java | 289 lines | 207 code | 14 blank | 68 comment | 43 complexity | 2e7f25beef84ca85f6e7d3970ce4c69a MD5 | raw file
Possible License(s): AGPL-3.0, EPL-1.0, LGPL-2.1, Apache-2.0, BSD-3-Clause, Unlicense, GPL-2.0, LGPL-2.0
  1. /*
  2. * Copyright 2008 The Tongue-Tied Authors
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. * use this file except in compliance with the License. You may obtain a copy
  6. * of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. * License for the specific language governing permissions and limitations
  14. * under the License.
  15. */
  16. package org.tonguetied.datatransfer.importing;
  17. import java.util.ArrayList;
  18. import java.util.HashMap;
  19. import java.util.List;
  20. import java.util.Map;
  21. import org.apache.log4j.Logger;
  22. import org.apache.poi.hssf.record.BOFRecord;
  23. import org.apache.poi.hssf.record.BoundSheetRecord;
  24. import org.apache.poi.hssf.record.LabelSSTRecord;
  25. import org.apache.poi.hssf.record.NumberRecord;
  26. import org.apache.poi.hssf.record.Record;
  27. import org.apache.poi.hssf.record.RowRecord;
  28. import org.apache.poi.hssf.record.SSTRecord;
  29. import org.tonguetied.datatransfer.common.FormatType;
  30. import org.tonguetied.datatransfer.importing.ImportException.ImportErrorCode;
  31. import org.tonguetied.keywordmanagement.Bundle;
  32. import org.tonguetied.keywordmanagement.Country;
  33. import org.tonguetied.keywordmanagement.Keyword;
  34. import org.tonguetied.keywordmanagement.KeywordService;
  35. import org.tonguetied.keywordmanagement.Language;
  36. import org.tonguetied.keywordmanagement.Translation;
  37. import org.tonguetied.keywordmanagement.Country.CountryCode;
  38. import org.tonguetied.keywordmanagement.Language.LanguageCode;
  39. import org.tonguetied.keywordmanagement.Translation.TranslationState;
  40. /**
  41. * This class parses excel spreadsheets in the format for
  42. * {@link FormatType#xlsLanguage}. A map of {@link Keyword}s and their
  43. * {@link Translation}s are built by processing each cell of the spread sheet.
  44. *
  45. *
  46. * @author bsion
  47. *
  48. */
  49. public class ExcelLanguageCentricParser implements ExcelParser
  50. {
  51. private SSTRecord sstrec;
  52. private List<Language> languages;
  53. private Map<String, Keyword> keywords;
  54. private Keyword keyword;
  55. private Translation baseTranslation;
  56. private int lastColOfRow;
  57. private KeywordService keywordService;
  58. private List<ImportErrorCode> errorCodes;
  59. private static final Logger logger = Logger.getLogger(ExcelLanguageCentricParser.class);
  60. /**
  61. * Create a new instance of ExcelDataParser.
  62. *
  63. * @param keywordService
  64. */
  65. public ExcelLanguageCentricParser(KeywordService keywordService)
  66. {
  67. this.languages = new ArrayList<Language>();
  68. this.keywords = new HashMap<String, Keyword>();
  69. this.keywordService = keywordService;
  70. this.errorCodes = new ArrayList<ImportErrorCode>();
  71. }
  72. /**
  73. * This method listens for incoming records and handles them as required.
  74. *
  75. * @param record The record that was found while reading.
  76. */
  77. public void processRecord(Record record)
  78. {
  79. if (record == null)
  80. {
  81. if (logger.isInfoEnabled()) logger.info("no record to process");
  82. }
  83. else
  84. {
  85. switch (record.getSid())
  86. {
  87. // the BOFRecord can represent either the beginning of a sheet
  88. // or the workbook
  89. case BOFRecord.sid:
  90. if (!(record instanceof BOFRecord))
  91. throw new ImportException("unknown excel element", null);
  92. final BOFRecord bof = (BOFRecord) record;
  93. if (bof.getType() == BOFRecord.TYPE_WORKBOOK) {
  94. if (logger.isInfoEnabled())
  95. logger.info("Processing excel workbook");
  96. // assigned to the class level member
  97. }
  98. else if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
  99. if (logger.isInfoEnabled())
  100. logger.info("recordsize = " + bof.getRecordSize() +
  101. ", required version = " +
  102. bof.getRequiredVersion());
  103. }
  104. break;
  105. case BoundSheetRecord.sid:
  106. if (!(record instanceof BoundSheetRecord))
  107. throw new ImportException("unknown excel element", null);
  108. final BoundSheetRecord bsr = (BoundSheetRecord) record;
  109. // sheets named have no impact on generating query
  110. if (logger.isDebugEnabled())
  111. logger.debug("processing sheet: "+ bsr.getSheetname());
  112. break;
  113. case RowRecord.sid:
  114. if (!(record instanceof RowRecord))
  115. throw new ImportException("unknown excel element", null);
  116. final RowRecord rowrec = (RowRecord) record;
  117. lastColOfRow = rowrec.getLastCol();
  118. // if (rowrec.getRowNumber() > 0) {
  119. // if (logger.isDebugEnabled())
  120. // logger.debug("creating new keyword instance");
  121. // keyword = new Keyword();
  122. // }
  123. break;
  124. case NumberRecord.sid:
  125. if (!(record instanceof NumberRecord))
  126. throw new ImportException("unknown excel element", null);
  127. final NumberRecord numrec = (NumberRecord) record;
  128. logger.warn("Cell [" + numrec.getRow() + "," +
  129. numrec.getColumn() +
  130. "] expecting a string value not numeric: " +
  131. numrec.getValue() + ". Ignoring value");
  132. break;
  133. case SSTRecord.sid:
  134. if (!(record instanceof SSTRecord))
  135. throw new ImportException("unknown excel element", null);
  136. // SSTRecords store a array of unique strings used in Excel.
  137. sstrec = (SSTRecord) record;
  138. if (logger.isDebugEnabled()) {
  139. logger.debug("file contains " +
  140. sstrec.getNumUniqueStrings() + " unique strings");
  141. }
  142. break;
  143. case LabelSSTRecord.sid:
  144. if (!(record instanceof LabelSSTRecord))
  145. throw new ImportException("unknown excel element", null);
  146. final LabelSSTRecord lrec = (LabelSSTRecord) record;
  147. if (lrec.getRow() == 0)
  148. {
  149. processHeader(lrec);
  150. }
  151. else
  152. {
  153. if (lrec.getColumn() == 0) {
  154. String keywordStr =
  155. sstrec.getString(lrec.getSSTIndex()).getString();
  156. loadKeyword(keywordStr);
  157. }
  158. else if (lrec.getColumn() == 1) {
  159. keyword.setContext(
  160. sstrec.getString(lrec.getSSTIndex()).getString());
  161. }
  162. else if (lrec.getColumn() == 2) {
  163. baseTranslation = new Translation();
  164. baseTranslation.setKeyword(keyword);
  165. String name = sstrec.getString(lrec.getSSTIndex()).getString();
  166. Bundle bundle = keywordService.getBundleByName(name);
  167. baseTranslation.setBundle(bundle);
  168. }
  169. else if (lrec.getColumn() == 3) {
  170. String colHeader =
  171. sstrec.getString(lrec.getSSTIndex()).getString();
  172. String[] headers = colHeader.split(":");
  173. CountryCode code = CountryCode.valueOf(headers[0]);
  174. Country country = keywordService.getCountry(code);
  175. baseTranslation.setCountry(country);
  176. }
  177. else {
  178. Language language =
  179. languages.get(lrec.getColumn()-4);
  180. String value = sstrec.getString(lrec.getSSTIndex()).getString();
  181. Translation translation =
  182. baseTranslation.deepClone();
  183. if (language.getCode() == LanguageCode.zht) {
  184. language =
  185. keywordService.getLanguage(LanguageCode.zh);
  186. Country country = keywordService.getCountry(CountryCode.TW);
  187. translation.setCountry(country);
  188. }
  189. translation.setLanguage(language);
  190. translation.setState(TranslationState.UNVERIFIED);
  191. translation.setValue(value);
  192. keyword.addTranslation(translation);
  193. // System.out.println("String cell found with value "
  194. // + sstrec.getString(lrec.getSSTIndex()));
  195. }
  196. if (isLastColumn(lrec.getColumn())) {
  197. keywords.put(keyword.getKeyword(), keyword);
  198. }
  199. }
  200. break;
  201. default:
  202. break;
  203. }
  204. }
  205. }
  206. /**
  207. * @param keywordStr
  208. */
  209. private void loadKeyword(final String keywordStr)
  210. {
  211. keyword = keywords.get(keywordStr);
  212. if (keyword == null) {
  213. if (logger.isDebugEnabled())
  214. logger.debug("creating new keyword instance");
  215. keyword = new Keyword();
  216. keyword.setKeyword(keywordStr);
  217. }
  218. }
  219. /**
  220. * Column 0 keyword
  221. * Column 1 context
  222. * Column 2 Bundle
  223. * Column 3 Country
  224. * Column 4..n Languages
  225. * @param lrec
  226. */
  227. private void processHeader(LabelSSTRecord lrec)
  228. {
  229. if (lrec.getColumn() > 3)
  230. {
  231. String colHeader = sstrec.getString(lrec.getSSTIndex()).getString();
  232. String[] headers = colHeader.split(":");
  233. LanguageCode code = LanguageCode.valueOf(headers[0]);
  234. Language language;
  235. if (LanguageCode.zht == code)
  236. {
  237. language = new Language();
  238. language.setCode(code);
  239. language.setName("Traditional Chinese");
  240. }
  241. else
  242. {
  243. language = keywordService.getLanguage(code);
  244. }
  245. languages.add(language);
  246. }
  247. }
  248. /**
  249. * Determine if the column is the last column of the row in the spreadsheet.
  250. *
  251. * @param columnNum the column number to evaluate
  252. * @return <code>true</code> if the column is the last column,
  253. * <code>false</code> otherwise
  254. */
  255. public boolean isLastColumn(final short columnNum)
  256. {
  257. return lastColOfRow-1 == columnNum;
  258. }
  259. /**
  260. * @return the list of {@link Language}s used in this file, or an empty
  261. * list if no {@link Language}s were specified
  262. */
  263. protected List<Language> getLanguages()
  264. {
  265. return this.languages;
  266. }
  267. public List<ImportErrorCode> getErrorCodes()
  268. {
  269. return errorCodes;
  270. }
  271. public Map<String, Keyword> getKeywords()
  272. {
  273. return keywords;
  274. }
  275. }