PageRenderTime 47ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/thirdparties-extension/org.apache.poi.xwpf.converter/src/main/java/org/apache/poi/xwpf/converter/internal/xhtml/XHTMLMapper.java

https://github.com/minstrelsy/xdocreport
Java | 392 lines | 275 code | 60 blank | 57 comment | 26 complexity | d39811e023caa6309adf08cd9e453a38 MD5 | raw file
  1. /**
  2. * Copyright (C) 2011 The XDocReport Team <xdocreport@googlegroups.com>
  3. *
  4. * All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining
  7. * a copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sublicense, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be
  15. * included in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  21. * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  22. * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  23. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24. */
  25. package org.apache.poi.xwpf.converter.internal.xhtml;
  26. import static org.apache.poi.xwpf.converter.internal.XWPFRunUtils.getRStyle;
  27. import java.io.OutputStream;
  28. import java.util.logging.Level;
  29. import java.util.logging.Logger;
  30. import org.apache.poi.xwpf.converter.IURIResolver;
  31. import org.apache.poi.xwpf.converter.internal.XWPFDocumentVisitor;
  32. import org.apache.poi.xwpf.converter.internal.itext.StyleEngineForIText;
  33. import org.apache.poi.xwpf.usermodel.XWPFDocument;
  34. import org.apache.poi.xwpf.usermodel.XWPFFooter;
  35. import org.apache.poi.xwpf.usermodel.XWPFHeader;
  36. import org.apache.poi.xwpf.usermodel.XWPFParagraph;
  37. import org.apache.poi.xwpf.usermodel.XWPFPictureData;
  38. import org.apache.poi.xwpf.usermodel.XWPFRun;
  39. import org.apache.poi.xwpf.usermodel.XWPFStyle;
  40. import org.apache.poi.xwpf.usermodel.XWPFTable;
  41. import org.apache.poi.xwpf.usermodel.XWPFTableCell;
  42. import org.apache.poi.xwpf.usermodel.XWPFTableRow;
  43. import org.apache.xmlbeans.XmlCursor;
  44. import org.apache.xmlbeans.XmlObject;
  45. import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
  46. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
  47. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
  48. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing;
  49. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
  50. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtrRef;
  51. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
  52. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
  53. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
  54. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
  55. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;
  56. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
  57. import fr.opensagres.xdocreport.utils.StringEscapeUtils;
  58. import fr.opensagres.xdocreport.utils.StringUtils;
  59. import fr.opensagres.xdocreport.xhtml.extension.CSSStylePropertyConstants;
  60. import fr.opensagres.xdocreport.xhtml.extension.XHTMLConstants;
  61. import fr.opensagres.xdocreport.xhtml.extension.XHTMLPageContentBuffer;
  62. public class XHTMLMapper
  63. extends XWPFDocumentVisitor<XHTMLPageContentBuffer, XHTMLMasterPage>
  64. implements XHTMLConstants, CSSStylePropertyConstants
  65. {
  66. private static final String WORD_MEDIA = "word/media/";
  67. /**
  68. * Logger for this class
  69. */
  70. private static final Logger LOGGER = Logger.getLogger( StyleEngineForIText.class.getName() );
  71. private POIXHTMLPage xhtml = null;
  72. private final IURIResolver resolver;
  73. private final int indent;
  74. private StyleEngineForXHTML styleEngine;
  75. protected OutputStream out;
  76. public XHTMLMapper( XWPFDocument document, int indent, IURIResolver resolver )
  77. throws Exception
  78. {
  79. super( document );
  80. this.resolver = resolver;
  81. this.indent = indent;
  82. styleEngine = new StyleEngineForXHTML( document, false, indent, resolver );
  83. }
  84. @Override
  85. protected XHTMLPageContentBuffer startVisitDocument( OutputStream out )
  86. throws Exception
  87. {
  88. xhtml = new POIXHTMLPage( styleEngine, indent );
  89. this.out = out;
  90. xhtml.getPageBodyContentBody().startElementNotEnclosed( DIV_ELEMENT );
  91. // HTML style
  92. StringBuilder htmlStyle = XHTMLStyleUtil.getStyle( document, defaults );
  93. setAttributStyleIfNeeded( xhtml.getPageBodyContentBody(), htmlStyle );
  94. xhtml.getPageBodyContentBody().endElementNotEnclosed();
  95. return xhtml.getPageBodyContentBody();
  96. }
  97. @Override
  98. protected void endVisitDocument()
  99. throws Exception
  100. {
  101. xhtml.getPageBodyContentBody().endElement( DIV_ELEMENT );
  102. /*
  103. * Writer writer = xhtml.getWriter(); if (writer != null) { xhtml.save(writer); } else
  104. */
  105. {
  106. // OutputStream out = xhtml.getOutputStream();
  107. xhtml.save( out );
  108. }
  109. }
  110. protected XHTMLPageContentBuffer startVisitPargraph( XWPFParagraph paragraph, XHTMLPageContentBuffer parentContainer )
  111. throws Exception
  112. {
  113. styleEngine.startVisitPargraph( paragraph, null );
  114. parentContainer.startElementNotEnclosed( P_ELEMENT );
  115. if ( paragraph.getStyleID() != null )
  116. {
  117. if ( LOGGER.isLoggable( Level.FINE ) )
  118. {
  119. LOGGER.fine( "StyleID " + paragraph.getStyleID() );
  120. }
  121. parentContainer.setAttribute( CLASS_ATTR, paragraph.getStyleID() );
  122. }
  123. // HTML style
  124. StringBuilder htmlStyle =
  125. XHTMLStyleUtil.getStyle( paragraph, super.getXWPFStyle( paragraph.getStyleID() ), defaults );
  126. setAttributStyleIfNeeded( parentContainer, htmlStyle );
  127. parentContainer.endElementNotEnclosed();
  128. return parentContainer;
  129. }
  130. @Override
  131. protected void endVisitPargraph( XWPFParagraph paragraph, XHTMLPageContentBuffer parentContainer,
  132. XHTMLPageContentBuffer paragraphContainer )
  133. throws Exception
  134. {
  135. paragraphContainer.endElement( P_ELEMENT );
  136. }
  137. @Override
  138. protected void visitEmptyRun( XHTMLPageContentBuffer paragraphContainer )
  139. throws Exception
  140. {
  141. paragraphContainer.startEndElement( BR_ELEMENT );
  142. }
  143. @Override
  144. protected void visitRun( XWPFRun run, XHTMLPageContentBuffer paragraphContainer )
  145. throws Exception
  146. {
  147. CTR ctr = run.getCTR();
  148. // HTML style
  149. CTString rStyle = getRStyle( run );
  150. XWPFStyle runStyle = super.getXWPFStyle( rStyle != null ? rStyle.getVal() : null );
  151. StringBuilder htmlStyle =
  152. XHTMLStyleUtil.getStyle( run, runStyle, super.getXWPFStyle( run.getParagraph().getStyle() ), defaults );
  153. // Grab the text and tabs of the text run
  154. // Do so in a way that preserves the ordering
  155. XmlCursor c = ctr.newCursor();
  156. c.selectPath( "./*" );
  157. while ( c.toNextSelection() )
  158. {
  159. XmlObject o = c.getObject();
  160. if ( o instanceof CTText )
  161. {
  162. CTText ctText = (CTText) o;
  163. String tagName = o.getDomNode().getNodeName();
  164. // Field Codes (w:instrText, defined in spec sec. 17.16.23)
  165. // come up as instances of CTText, but we don't want them
  166. // in the normal text output
  167. if ( !"w:instrText".equals( tagName ) )
  168. {
  169. paragraphContainer.startElementNotEnclosed( SPAN_ELEMENT );
  170. setAttributStyleIfNeeded( paragraphContainer, htmlStyle );
  171. paragraphContainer.endElementNotEnclosed();
  172. // Set the text by escaping it with HTML.
  173. paragraphContainer.setText( StringEscapeUtils.escapeHtml( ctText.getStringValue() ) );
  174. paragraphContainer.endElement( SPAN_ELEMENT );
  175. }
  176. }
  177. else if ( o instanceof CTPTab )
  178. {
  179. visitTab( paragraphContainer, (CTPTab) o );
  180. }
  181. else if ( o instanceof CTBr )
  182. {
  183. visitBR( paragraphContainer, (CTBr) o );
  184. }
  185. else if ( o instanceof CTEmpty )
  186. {
  187. // Some inline text elements get returned not as
  188. // themselves, but as CTEmpty, owing to some odd
  189. // definitions around line 5642 of the XSDs
  190. // This bit works around it, and replicates the above
  191. // rules for that case
  192. String tagName = o.getDomNode().getNodeName();
  193. if ( "w:tab".equals( tagName ) )
  194. {
  195. visitTab( paragraphContainer, null );
  196. }
  197. if ( "w:br".equals( tagName ) )
  198. {
  199. visitBR( paragraphContainer, null );
  200. }
  201. if ( "w:cr".equals( tagName ) )
  202. {
  203. visitBR( paragraphContainer, null );
  204. }
  205. }
  206. else if ( o instanceof CTDrawing )
  207. {
  208. visitDrawing( (CTDrawing) o, paragraphContainer );
  209. }
  210. }
  211. c.dispose();
  212. // super.visitPictures( run, paragraphContainer );
  213. }
  214. private void visitTab( XHTMLPageContentBuffer paragraphContainer, CTPTab o )
  215. {
  216. // TODO Auto-generated method stub
  217. }
  218. private void visitBR( XHTMLPageContentBuffer paragraphContainer, CTBr br )
  219. {
  220. paragraphContainer.startEndElement( BR_ELEMENT );
  221. }
  222. @Override
  223. protected XHTMLPageContentBuffer startVisitTable( XWPFTable table, XHTMLPageContentBuffer tableContainer )
  224. throws Exception
  225. {
  226. tableContainer.startElementNotEnclosed( TABLE_ELEMENT );
  227. // XWPFStyle tableStyle = super.getStyle(table.getStyleID());
  228. // HTML style
  229. // StringBuilder htmlStyle = XHTMLStyleUtil.getStyle(table, tableStyle, defaults);
  230. // setAttributStyleIfNeeded(xhtml.getPageBodyContentBody(), htmlStyle);
  231. tableContainer.endElementNotEnclosed();
  232. return tableContainer;
  233. }
  234. @Override
  235. protected void visitTableRow( XWPFTableRow row, XHTMLPageContentBuffer tableContainer, boolean firstRow,
  236. boolean lastRow )
  237. throws Exception
  238. {
  239. tableContainer.startElementNotEnclosed( TR_ELEMENT );
  240. tableContainer.endElementNotEnclosed();
  241. super.visitTableRow( row, tableContainer, firstRow, lastRow );
  242. tableContainer.endElement( TR_ELEMENT );
  243. }
  244. @Override
  245. protected XHTMLPageContentBuffer startVisitTableCell( XWPFTableCell tableCell,
  246. XHTMLPageContentBuffer tableContainer, boolean firstRow,
  247. boolean lastRow, boolean firstCell, boolean lastCell )
  248. {
  249. tableContainer.startElementNotEnclosed( TD_ELEMENT );
  250. CTTcPr tcPr = tableCell.getCTTc().getTcPr();
  251. // Colspan
  252. Integer colspan = null;
  253. CTDecimalNumber gridSpan = tcPr.getGridSpan();
  254. if ( gridSpan != null )
  255. {
  256. colspan = gridSpan.getVal().intValue();
  257. }
  258. if ( colspan != null )
  259. {
  260. tableContainer.setAttribute( COLSPAN_ATTR, colspan );
  261. }
  262. // HTML style
  263. StringBuilder htmlStyle = XHTMLStyleUtil.getStyle( tableCell, defaults );
  264. setAttributStyleIfNeeded( tableContainer, htmlStyle );
  265. tableContainer.endElementNotEnclosed();
  266. return tableContainer;
  267. }
  268. @Override
  269. protected void endVisitTableCell( XWPFTableCell cell, XHTMLPageContentBuffer tableContainer,
  270. XHTMLPageContentBuffer tableCellContainer )
  271. {
  272. tableContainer.endElement( TD_ELEMENT );
  273. }
  274. @Override
  275. protected void endVisitTable( XWPFTable table, XHTMLPageContentBuffer parentContainer,
  276. XHTMLPageContentBuffer tableContainer )
  277. throws Exception
  278. {
  279. tableContainer.endElement( TABLE_ELEMENT );
  280. }
  281. @Override
  282. protected void visitPicture( CTPicture picture, XHTMLPageContentBuffer parentContainer )
  283. throws Exception
  284. {
  285. parentContainer.startElementNotEnclosed( IMG_ELEMENT );
  286. String blipId = picture.getBlipFill().getBlip().getEmbed();
  287. // Src attribute
  288. XWPFPictureData pictureData = super.getPictureDataByID( blipId );
  289. if ( pictureData != null )
  290. {
  291. String src = pictureData.getFileName();
  292. if ( StringUtils.isNotEmpty( src ) )
  293. {
  294. src = resolver.resolve( WORD_MEDIA + src );
  295. parentContainer.setAttribute( SRC_ATTR, src );
  296. }
  297. }
  298. StringBuilder htmlStyle = XHTMLStyleUtil.getStyle( picture );
  299. setAttributStyleIfNeeded( parentContainer, htmlStyle );
  300. parentContainer.endElementNotEnclosed();
  301. parentContainer.endElement( IMG_ELEMENT );
  302. }
  303. private void setAttributStyleIfNeeded( XHTMLPageContentBuffer buffer, StringBuilder htmlStyle )
  304. {
  305. if ( htmlStyle.length() > 0 )
  306. {
  307. buffer.setAttribute( STYLE_ATTR, htmlStyle.toString() );
  308. }
  309. }
  310. @Override
  311. protected void visitHeader( XWPFHeader header, CTHdrFtrRef headerRef, CTSectPr sectPr, XHTMLMasterPage masterPage )
  312. throws Exception
  313. {
  314. // TODO Auto-generated method stub
  315. }
  316. @Override
  317. protected void visitFooter( XWPFFooter footer, CTHdrFtrRef footerRef, CTSectPr sectPr, XHTMLMasterPage masterPage )
  318. throws Exception
  319. {
  320. // TODO Auto-generated method stub
  321. }
  322. @Override
  323. protected void setActiveMasterPage( XHTMLMasterPage masterPage )
  324. {
  325. // TODO Auto-generated method stub
  326. }
  327. @Override
  328. protected XHTMLMasterPage createMasterPage( CTSectPr sectPr )
  329. {
  330. return new XHTMLMasterPage( sectPr );
  331. }
  332. }