PageRenderTime 5902ms CodeModel.GetById 42ms RepoModel.GetById 1ms app.codeStats 0ms

/Application/src/org/tonguetied/datatransfer/importing/ExcelKeywordParser.java

http://tongue-tied.googlecode.com/
Java | 271 lines | 217 code | 12 blank | 42 comment | 62 complexity | 36824ce7c3465bd19ea0572a1c46eb9f MD5 | raw file
Possible License(s): AGPL-3.0, EPL-1.0, LGPL-2.1, Apache-2.0, BSD-3-Clause, Unlicense, GPL-2.0, LGPL-2.0
  1. /*
  2. * Copyright 2008 The Tongue-Tied Authors
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. * use this file except in compliance with the License. You may obtain a copy
  6. * of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. * License for the specific language governing permissions and limitations
  14. * under the License.
  15. */
  16. package org.tonguetied.datatransfer.importing;
  17. import java.util.ArrayList;
  18. import java.util.HashMap;
  19. import java.util.List;
  20. import java.util.Map;
  21. import org.apache.commons.lang.StringUtils;
  22. import org.apache.log4j.Logger;
  23. import org.apache.poi.hssf.record.BOFRecord;
  24. import org.apache.poi.hssf.record.BoundSheetRecord;
  25. import org.apache.poi.hssf.record.LabelSSTRecord;
  26. import org.apache.poi.hssf.record.NumberRecord;
  27. import org.apache.poi.hssf.record.Record;
  28. import org.apache.poi.hssf.record.RowRecord;
  29. import org.apache.poi.hssf.record.SSTRecord;
  30. import org.tonguetied.datatransfer.common.FormatType;
  31. import org.tonguetied.datatransfer.importing.ImportException.ImportErrorCode;
  32. import org.tonguetied.keywordmanagement.Bundle;
  33. import org.tonguetied.keywordmanagement.Country;
  34. import org.tonguetied.keywordmanagement.Keyword;
  35. import org.tonguetied.keywordmanagement.KeywordService;
  36. import org.tonguetied.keywordmanagement.Language;
  37. import org.tonguetied.keywordmanagement.Translation;
  38. import org.tonguetied.keywordmanagement.Country.CountryCode;
  39. import org.tonguetied.keywordmanagement.Language.LanguageCode;
  40. import org.tonguetied.keywordmanagement.Translation.TranslationState;
  41. /**
  42. * This class parses excel spreadsheets in the format for
  43. * {@link FormatType#xls}. A map of {@link Keyword}s and their
  44. * {@link Translation}s are built by processing each cell of the spread sheet.
  45. *
  46. * @author bsion
  47. *
  48. */
  49. public class ExcelKeywordParser implements ExcelParser
  50. {
  51. private SSTRecord sstrec;
  52. private Map<String, Keyword> keywords;
  53. private Keyword keyword;
  54. private Translation baseTranslation;
  55. private RowType rowType;
  56. private KeywordService keywordService;
  57. private List<ImportErrorCode> errorCodes;
  58. private static final Logger logger = Logger.getLogger(ExcelKeywordParser.class);
  59. /**
  60. * Create a new instance of ExcelKeywordParser.
  61. *
  62. * @param keywordService
  63. */
  64. public ExcelKeywordParser(KeywordService keywordService)
  65. {
  66. this.keywordService = keywordService;
  67. this.keywords = new HashMap<String, Keyword>();
  68. this.errorCodes = new ArrayList<ImportErrorCode>();
  69. }
  70. public Map<String, Keyword> getKeywords()
  71. {
  72. return keywords;
  73. }
  74. public List<ImportErrorCode> getErrorCodes()
  75. {
  76. return errorCodes;
  77. }
  78. public void processRecord(Record record)
  79. {
  80. if (record == null)
  81. {
  82. if (logger.isInfoEnabled()) logger.info("no record to process");
  83. }
  84. else
  85. {
  86. switch (record.getSid())
  87. {
  88. // the BOFRecord can represent either the beginning of a sheet
  89. // or the workbook
  90. case BOFRecord.sid:
  91. if (!(record instanceof BOFRecord))
  92. throw new ImportException("unknown excel element", null);
  93. final BOFRecord bof = (BOFRecord) record;
  94. if (bof.getType() == BOFRecord.TYPE_WORKBOOK) {
  95. if (logger.isInfoEnabled())
  96. logger.info("Processing excel workbook");
  97. // assigned to the class level member
  98. }
  99. else if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
  100. if (logger.isInfoEnabled())
  101. logger.info("recordsize = " + bof.getRecordSize() +
  102. ", required version = " +
  103. bof.getRequiredVersion());
  104. }
  105. break;
  106. case BoundSheetRecord.sid:
  107. if (!(record instanceof BoundSheetRecord))
  108. throw new ImportException("unknown excel element", null);
  109. final BoundSheetRecord bsr = (BoundSheetRecord) record;
  110. // sheets named have no impact on generating query
  111. if (logger.isDebugEnabled())
  112. logger.debug("processing sheet: "+ bsr.getSheetname());
  113. break;
  114. case RowRecord.sid:
  115. if (!(record instanceof RowRecord))
  116. throw new ImportException("unknown excel element", null);
  117. if (logger.isDebugEnabled())
  118. {
  119. final RowRecord rowrec = (RowRecord) record;
  120. logger.debug("processing row: " + rowrec.getRowNumber());
  121. }
  122. break;
  123. case NumberRecord.sid:
  124. if (!(record instanceof NumberRecord))
  125. throw new ImportException("unknown excel element", null);
  126. final NumberRecord numrec = (NumberRecord) record;
  127. logger.warn("Cell [" + numrec.getRow() + "," +
  128. numrec.getColumn() +
  129. "] expecting a string value not numeric: " +
  130. numrec.getValue() + ". Ignoring value");
  131. break;
  132. case SSTRecord.sid:
  133. if (!(record instanceof SSTRecord))
  134. throw new ImportException("unknown excel element", null);
  135. // SSTRecords store a array of unique strings used in Excel.
  136. sstrec = (SSTRecord) record;
  137. if (logger.isDebugEnabled()) {
  138. logger.debug("file contains " +
  139. sstrec.getNumUniqueStrings() + " unique strings");
  140. }
  141. break;
  142. case LabelSSTRecord.sid:
  143. if (!(record instanceof LabelSSTRecord))
  144. throw new ImportException("unknown excel element", null);
  145. final LabelSSTRecord lrec = (LabelSSTRecord) record;
  146. if (lrec.getRow() != 0)
  147. {
  148. if (lrec.getColumn() == 0)
  149. {
  150. evaluateRowType(lrec);
  151. }
  152. else
  153. {
  154. final String cellValue = sstrec.getString(lrec.getSSTIndex()).getString();
  155. if (lrec.getColumn() == 1)
  156. {
  157. switch (rowType)
  158. {
  159. case keyword:
  160. // there were no translations for the previous keyword, so add to keywords
  161. if (keyword != null && keyword.getTranslations().isEmpty())
  162. keywords.put(keyword.getKeyword(), keyword);
  163. loadKeyword(cellValue);
  164. break;
  165. case context:
  166. if (StringUtils.isNotBlank(cellValue))
  167. keyword.setContext(cellValue);
  168. break;
  169. default:
  170. break;
  171. }
  172. }
  173. else if (lrec.getColumn() == 2)
  174. {
  175. baseTranslation = new Translation();
  176. baseTranslation.setKeyword(keyword);
  177. final LanguageCode code =
  178. ImporterUtils.evaluateLanguageCode(cellValue, errorCodes);
  179. Language language = null;
  180. if (code != null)
  181. {
  182. language = keywordService.getLanguage(code);
  183. if (language == null)
  184. errorCodes.add(ImportErrorCode.unknownLanguage);
  185. }
  186. baseTranslation.setLanguage(language);
  187. }
  188. else if (lrec.getColumn() == 4)
  189. {
  190. final CountryCode code =
  191. ImporterUtils.evaluateCountryCode(cellValue, errorCodes);
  192. Country country = null;
  193. if (code != null)
  194. {
  195. country = keywordService.getCountry(code);
  196. if (country == null)
  197. errorCodes.add(ImportErrorCode.unknownCountry);
  198. }
  199. baseTranslation.setCountry(country);
  200. }
  201. else if (lrec.getColumn() == 6)
  202. {
  203. final Bundle bundle = keywordService.getBundleByName(cellValue);
  204. if (bundle == null)
  205. errorCodes.add(ImportErrorCode.unknownBundle);
  206. baseTranslation.setBundle(bundle);
  207. }
  208. else if (lrec.getColumn() == 7)
  209. {
  210. final TranslationState state =
  211. ImporterUtils.evaluateTranslationState(cellValue, errorCodes);
  212. baseTranslation.setState(state);
  213. }
  214. else if (lrec.getColumn() == 8)
  215. {
  216. baseTranslation.setValue(cellValue);
  217. keyword.addTranslation(baseTranslation);
  218. keywords.put(keyword.getKeyword(), keyword);
  219. }
  220. }
  221. }
  222. break;
  223. default:
  224. break;
  225. }
  226. }
  227. }
  228. /**
  229. * Determine the type of the keyword attribute for the excel row.
  230. *
  231. * @param lrec the excel cell to evaluate
  232. */
  233. private void evaluateRowType(final LabelSSTRecord lrec)
  234. {
  235. final String value =
  236. sstrec.getString(lrec.getSSTIndex()).getString();
  237. if ("Keyword".equals(value))
  238. rowType = RowType.keyword;
  239. else if ("Context".equals(value))
  240. rowType = RowType.context;
  241. }
  242. /**
  243. * @param keywordStr the keyword string to evaluate
  244. */
  245. private void loadKeyword(final String keywordStr)
  246. {
  247. keyword = keywords.get(keywordStr);
  248. if (keyword == null) {
  249. if (logger.isDebugEnabled())
  250. logger.debug("creating new keyword instance");
  251. keyword = new Keyword();
  252. keyword.setKeyword(keywordStr);
  253. }
  254. }
  255. private static enum RowType
  256. {
  257. keyword, context
  258. }
  259. }