PageRenderTime 133ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/thirdparties-extension/org.apache.poi.xwpf.converter.core/src/main/java/org/apache/poi/xwpf/converter/core/XWPFDocumentVisitor.java

https://github.com/minstrelsy/xdocreport
Java | 1397 lines | 996 code | 118 blank | 283 comment | 182 complexity | 2ee0d9cfda1138ee93328b854d5e99ce MD5 | raw file
  1. /**
  2. * Copyright (C) 2011-2012 The XDocReport Team <xdocreport@googlegroups.com>
  3. *
  4. * All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining
  7. * a copy of this software and associated documentation files (the
  8. * "Software"), to deal in the Software without restriction, including
  9. * without limitation the rights to use, copy, modify, merge, publish,
  10. * distribute, sublicense, and/or sell copies of the Software, and to
  11. * permit persons to whom the Software is furnished to do so, subject to
  12. * the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be
  15. * included in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  21. * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  22. * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  23. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24. */
  25. package org.apache.poi.xwpf.converter.core;
  26. import java.io.IOException;
  27. import java.math.BigInteger;
  28. import java.util.ArrayList;
  29. import java.util.Collections;
  30. import java.util.HashMap;
  31. import java.util.List;
  32. import java.util.Map;
  33. import java.util.logging.Level;
  34. import java.util.logging.Logger;
  35. import org.apache.poi.openxml4j.opc.PackagePart;
  36. import org.apache.poi.xwpf.converter.core.styles.XWPFStylesDocument;
  37. import org.apache.poi.xwpf.converter.core.utils.DxaUtil;
  38. import org.apache.poi.xwpf.converter.core.utils.StringUtils;
  39. import org.apache.poi.xwpf.converter.core.utils.XWPFRunHelper;
  40. import org.apache.poi.xwpf.converter.core.utils.XWPFTableUtil;
  41. import org.apache.poi.xwpf.usermodel.BodyElementType;
  42. import org.apache.poi.xwpf.usermodel.BodyType;
  43. import org.apache.poi.xwpf.usermodel.IBody;
  44. import org.apache.poi.xwpf.usermodel.IBodyElement;
  45. import org.apache.poi.xwpf.usermodel.XWPFAbstractNum;
  46. import org.apache.poi.xwpf.usermodel.XWPFDocument;
  47. import org.apache.poi.xwpf.usermodel.XWPFFooter;
  48. import org.apache.poi.xwpf.usermodel.XWPFHeader;
  49. import org.apache.poi.xwpf.usermodel.XWPFHeaderFooter;
  50. import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
  51. import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
  52. import org.apache.poi.xwpf.usermodel.XWPFNum;
  53. import org.apache.poi.xwpf.usermodel.XWPFParagraph;
  54. import org.apache.poi.xwpf.usermodel.XWPFPictureData;
  55. import org.apache.poi.xwpf.usermodel.XWPFRun;
  56. import org.apache.poi.xwpf.usermodel.XWPFStyle;
  57. import org.apache.poi.xwpf.usermodel.XWPFTable;
  58. import org.apache.poi.xwpf.usermodel.XWPFTableCell;
  59. import org.apache.poi.xwpf.usermodel.XWPFTableRow;
  60. import org.apache.xmlbeans.XmlCursor;
  61. import org.apache.xmlbeans.XmlException;
  62. import org.apache.xmlbeans.XmlObject;
  63. import org.apache.xmlbeans.XmlTokenSource;
  64. import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObject;
  65. import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
  66. import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
  67. import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTAnchor;
  68. import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
  69. import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTPosH;
  70. import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTPosV;
  71. import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTWrapSquare;
  72. import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromH;
  73. import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STRelFromV;
  74. import org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.STWrapText;
  75. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
  76. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
  77. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
  78. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing;
  79. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
  80. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr;
  81. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtrRef;
  82. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
  83. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTLvl;
  84. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNumPr;
  85. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
  86. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
  87. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
  88. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
  89. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
  90. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
  91. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRunTrackChange;
  92. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
  93. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtCell;
  94. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentBlock;
  95. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
  96. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
  97. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
  98. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSimpleField;
  99. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSmartTagRun;
  100. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
  101. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyle;
  102. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTabs;
  103. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
  104. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
  105. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
  106. import org.openxmlformats.schemas.wordprocessingml.x2006.main.FtrDocument;
  107. import org.openxmlformats.schemas.wordprocessingml.x2006.main.HdrDocument;
  108. import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBrType;
  109. import org.openxmlformats.schemas.wordprocessingml.x2006.main.STFldCharType;
  110. import org.openxmlformats.schemas.wordprocessingml.x2006.main.STMerge;
  111. import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
  112. /**
  113. * Visitor to visit elements from entry word/document.xml, word/header*.xml, word/footer*.xml
  114. *
  115. * @param <T>
  116. * @param <O>
  117. * @param <E>
  118. */
  119. public abstract class XWPFDocumentVisitor<T, O extends Options, E extends IXWPFMasterPage>
  120. {
  121. private static final Logger LOGGER = Logger.getLogger( XWPFDocumentVisitor.class.getName() );
  122. protected static final String WORD_MEDIA = "word/media/";
  123. protected final XWPFDocument document;
  124. private final MasterPageManager masterPageManager;
  125. private XWPFHeader currentHeader;
  126. private XWPFFooter currentFooter;
  127. protected final XWPFStylesDocument stylesDocument;
  128. protected final O options;
  129. private boolean pageBreakOnNextParagraph;
  130. /**
  131. * Map of w:numId and ListContext
  132. */
  133. private Map<Integer, ListContext> listContextMap;
  134. public XWPFDocumentVisitor( XWPFDocument document, O options )
  135. throws Exception
  136. {
  137. this.document = document;
  138. this.options = options;
  139. this.masterPageManager = new MasterPageManager( document, this );
  140. this.stylesDocument = createStylesDocument( document );
  141. }
  142. protected XWPFStylesDocument createStylesDocument( XWPFDocument document )
  143. throws XmlException, IOException
  144. {
  145. return new XWPFStylesDocument( document );
  146. }
  147. public XWPFStylesDocument getStylesDocument()
  148. {
  149. return stylesDocument;
  150. }
  151. public O getOptions()
  152. {
  153. return options;
  154. }
  155. public MasterPageManager getMasterPageManager()
  156. {
  157. return masterPageManager;
  158. }
  159. // ------------------------------ Start/End document visitor -----------
  160. /**
  161. * Main entry for visit XWPFDocument.
  162. *
  163. * @param out
  164. * @throws Exception
  165. */
  166. public void start()
  167. throws Exception
  168. {
  169. // start document
  170. T container = startVisitDocument();
  171. // Create IText, XHTML element for each XWPF elements from the w:body
  172. List<IBodyElement> bodyElements = document.getBodyElements();
  173. visitBodyElements( bodyElements, container );
  174. // end document
  175. endVisitDocument();
  176. }
  177. /**
  178. * Start of visit document.
  179. *
  180. * @return
  181. * @throws Exception
  182. */
  183. protected abstract T startVisitDocument()
  184. throws Exception;
  185. /**
  186. * End of visit document.
  187. *
  188. * @throws Exception
  189. */
  190. protected abstract void endVisitDocument()
  191. throws Exception;
  192. // ------------------------------ XWPF Elements visitor -----------
  193. protected void visitBodyElements( List<IBodyElement> bodyElements, T container )
  194. throws Exception
  195. {
  196. if ( !masterPageManager.isInitialized() )
  197. {
  198. // master page manager which hosts each <:w;sectPr declared in the word/document.xml
  199. // must be initialized. The initialization loop for each
  200. // <w:p paragraph to compute a list of <w:sectPr which contains information
  201. // about header/footer declared in the <w:headerReference/<w:footerReference
  202. masterPageManager.initialize();
  203. }
  204. String previousParagraphStyleName = null;
  205. for ( int i = 0; i < bodyElements.size(); i++ )
  206. {
  207. IBodyElement bodyElement = bodyElements.get( i );
  208. switch ( bodyElement.getElementType() )
  209. {
  210. case PARAGRAPH:
  211. XWPFParagraph paragraph = (XWPFParagraph) bodyElement;
  212. String paragraphStyleName = paragraph.getStyleID();
  213. boolean sameStyleBelow =
  214. ( paragraphStyleName != null && paragraphStyleName.equals( previousParagraphStyleName ) );
  215. visitParagraph( paragraph, i, container );
  216. break;
  217. case TABLE:
  218. previousParagraphStyleName = null;
  219. visitTable( (XWPFTable) bodyElement, i, container );
  220. break;
  221. }
  222. }
  223. }
  224. /**
  225. * Visit the given paragraph.
  226. *
  227. * @param paragraph
  228. * @param index
  229. * @param container
  230. * @throws Exception
  231. */
  232. protected void visitParagraph( XWPFParagraph paragraph, int index, T container )
  233. throws Exception
  234. {
  235. if ( isWordDocumentPartParsing() )
  236. {
  237. // header/footer is not parsing.
  238. // It's the word/document.xml which is parsing
  239. // test if the current paragraph define a <w:sectPr
  240. // to update the header/footer declared in the <w:headerReference/<w:footerReference
  241. masterPageManager.update( paragraph );
  242. }
  243. if ( pageBreakOnNextParagraph )
  244. {
  245. pageBreak();
  246. }
  247. this.pageBreakOnNextParagraph = false;
  248. ListItemContext itemContext = null;
  249. CTNumPr originalNumPr = stylesDocument.getParagraphNumPr( paragraph );
  250. CTNumPr numPr = getNumPr( originalNumPr );
  251. if ( numPr != null )
  252. {
  253. // paragraph is a numbered/bullet list
  254. // see http://msdn.microsoft.com/en-us/library/office/ee922775%28v=office.14%29.aspx
  255. // - <w:p>
  256. // - <w:pPr>
  257. // <w:pStyle w:val="style0" />
  258. // - <w:numPr>
  259. // <w:ilvl w:val="0" />
  260. // <w:numId w:val="2" />
  261. // </w:numPr>
  262. // get numbering.xml/w:num
  263. /**
  264. * <w:num w:numId="2"> <w:abstractNumId w:val="1" /> </w:num>
  265. */
  266. XWPFNum num = getXWPFNum( numPr );
  267. if ( num != null )
  268. {
  269. // get the abstractNum by usisng abstractNumId
  270. /**
  271. * <w:abstractNum w:abstractNumId="1"> <w:nsid w:val="3CBA6E67" /> <w:multiLevelType
  272. * w:val="hybridMultilevel" /> <w:tmpl w:val="7416D4FA" /> - <w:lvl w:ilvl="0" w:tplc="040C0001">
  273. * <w:start w:val="1" /> <w:numFmt w:val="bullet" /> <w:lvlText w:val="o" /> <w:lvlJc w:val="left" /> -
  274. * <w:pPr> <w:ind w:left="720" w:hanging="360" /> </w:pPr> - <w:rPr> <w:rFonts w:ascii="Symbol"
  275. * w:hAnsi="Symbol" w:hint="default" /> </w:rPr> </w:lvl>
  276. */
  277. XWPFAbstractNum abstractNum = getXWPFAbstractNum( num );
  278. // get the <w:lvl by using abstractNum and numPr level
  279. /**
  280. * <w:num w:numId="2"> <w:abstractNumId w:val="1" /> </w:num>
  281. */
  282. CTDecimalNumber ilvl = numPr.getIlvl();
  283. int level = ilvl != null ? ilvl.getVal().intValue() : 0;
  284. CTLvl lvl = abstractNum.getAbstractNum().getLvlArray( level );
  285. if ( lvl != null )
  286. {
  287. ListContext listContext = getListContext( originalNumPr.getNumId().getVal().intValue() );
  288. itemContext = listContext.addItem( lvl );
  289. }
  290. }
  291. }
  292. T paragraphContainer = startVisitParagraph( paragraph, itemContext, container );
  293. visitParagraphBody( paragraph, index, paragraphContainer );
  294. endVisitParagraph( paragraph, container, paragraphContainer );
  295. }
  296. private CTNumPr getNumPr( CTNumPr numPr )
  297. {
  298. if ( numPr != null )
  299. {
  300. XWPFNum num = getXWPFNum( numPr );
  301. if ( num != null )
  302. {
  303. // get the abstractNum by usisng abstractNumId
  304. /**
  305. * <w:abstractNum w:abstractNumId="1"> <w:nsid w:val="3CBA6E67" /> <w:multiLevelType
  306. * w:val="hybridMultilevel" /> <w:tmpl w:val="7416D4FA" /> - <w:lvl w:ilvl="0" w:tplc="040C0001">
  307. * <w:start w:val="1" /> <w:numFmt w:val="bullet" /> <w:lvlText w:val="o" /> <w:lvlJc w:val="left" /> -
  308. * <w:pPr> <w:ind w:left="720" w:hanging="360" /> </w:pPr> - <w:rPr> <w:rFonts w:ascii="Symbol"
  309. * w:hAnsi="Symbol" w:hint="default" /> </w:rPr> </w:lvl>
  310. */
  311. XWPFAbstractNum abstractNum = getXWPFAbstractNum( num );
  312. CTString numStyleLink = abstractNum.getAbstractNum().getNumStyleLink();
  313. String styleId = numStyleLink != null ? numStyleLink.getVal() : null;
  314. if ( styleId != null )
  315. {
  316. // has w:numStyleLink which reference other style
  317. /*
  318. * <w:abstractNum w:abstractNumId="0"> <w:nsid w:val="03916EF0"/> <w:multiLevelType
  319. * w:val="multilevel"/> <w:tmpl w:val="0409001D"/> <w:numStyleLink w:val="EricsListStyle"/>
  320. * </w:abstractNum>
  321. */
  322. CTStyle style = stylesDocument.getStyle( styleId );
  323. CTPPr ppr = style.getPPr();
  324. if ( ppr == null )
  325. {
  326. return null;
  327. }
  328. return getNumPr( ppr.getNumPr() );
  329. }
  330. }
  331. }
  332. return numPr;
  333. }
  334. private ListContext getListContext( int numId )
  335. {
  336. if ( listContextMap == null )
  337. {
  338. listContextMap = new HashMap<Integer, ListContext>();
  339. }
  340. ListContext listContext = listContextMap.get( numId );
  341. if ( listContext == null )
  342. {
  343. listContext = new ListContext();
  344. listContextMap.put( numId, listContext );
  345. }
  346. return listContext;
  347. }
  348. protected abstract T startVisitParagraph( XWPFParagraph paragraph, ListItemContext itemContext, T parentContainer )
  349. throws Exception;
  350. protected abstract void endVisitParagraph( XWPFParagraph paragraph, T parentContainer, T paragraphContainer )
  351. throws Exception;
  352. protected void visitParagraphBody( XWPFParagraph paragraph, int index, T paragraphContainer )
  353. throws Exception
  354. {
  355. List<XWPFRun> runs = paragraph.getRuns();
  356. if ( runs.isEmpty() )
  357. {
  358. // a new line must be generated if :
  359. // - there is next paragraph/table
  360. // - if the body is a cell (with none vMerge) and contains just this paragraph
  361. if ( isAddNewLine( paragraph, index ) )
  362. {
  363. visitEmptyRun( paragraphContainer );
  364. }
  365. // sometimes, POI tells that run is empty
  366. // but it can be have w:r in the w:pPr
  367. // <w:p><w:pPr .. <w:r> => See the header1.xml of DocxBig.docx ,
  368. // => test if it exist w:r
  369. // CTP p = paragraph.getCTP();
  370. // CTPPr pPr = p.getPPr();
  371. // if (pPr != null) {
  372. // XmlObject[] wRuns =
  373. // pPr.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:r");
  374. // if (wRuns != null) {
  375. // for ( int i = 0; i < wRuns.length; i++ )
  376. // {
  377. // XmlObject o = wRuns[i];
  378. // o.getDomNode().getParentNode()
  379. // if (o instanceof CTR) {
  380. // System.err.println(wRuns[i]);
  381. // }
  382. //
  383. // }
  384. // }
  385. // }
  386. // //XmlObject[] t =
  387. // o.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
  388. // //paragraph.getCTP().get
  389. }
  390. else
  391. {
  392. // Loop for each element of <w:r, w:fldSimple
  393. // to keep the order of those elements.
  394. visitRuns( paragraph, paragraphContainer );
  395. }
  396. // Page Break
  397. // Cannot use paragraph.isPageBreak() because it throws NPE because
  398. // pageBreak.getVal() can be null.
  399. CTPPr ppr = paragraph.getCTP().getPPr();
  400. if ( ppr != null )
  401. {
  402. if ( ppr.isSetPageBreakBefore() )
  403. {
  404. CTOnOff pageBreak = ppr.getPageBreakBefore();
  405. if ( pageBreak != null
  406. && ( pageBreak.getVal() == null || pageBreak.getVal().intValue() == STOnOff.INT_TRUE ) )
  407. {
  408. pageBreak();
  409. }
  410. }
  411. }
  412. }
  413. // ------------------------ Numbering --------------
  414. protected XWPFNum getXWPFNum( CTNumPr numPr )
  415. {
  416. CTDecimalNumber numID = numPr.getNumId();
  417. if ( numID == null )
  418. {
  419. // numID can be null, ignore the numbering
  420. // see https://code.google.com/p/xdocreport/issues/detail?id=239
  421. return null;
  422. }
  423. XWPFNum num = document.getNumbering().getNum( numID.getVal() );
  424. return num;
  425. }
  426. protected XWPFAbstractNum getXWPFAbstractNum( XWPFNum num )
  427. {
  428. CTDecimalNumber abstractNumID = num.getCTNum().getAbstractNumId();
  429. XWPFAbstractNum abstractNum = document.getNumbering().getAbstractNum( abstractNumID.getVal() );
  430. return abstractNum;
  431. }
  432. /**
  433. * Returns true if the given paragraph which is empty (none <w:r> run) must generate new line and false otherwise.
  434. *
  435. * @param paragraph
  436. * @param index
  437. * @return
  438. */
  439. private boolean isAddNewLine( XWPFParagraph paragraph, int index )
  440. {
  441. // a new line must be generated if :
  442. // - there is next paragraph/table
  443. // - if the body is a cell (with none vMerge) and contains just this paragraph
  444. IBody body = paragraph.getBody();
  445. List<IBodyElement> bodyElements = body.getBodyElements();
  446. if ( body.getPartType() == BodyType.TABLECELL && bodyElements.size() == 1 )
  447. {
  448. XWPFTableCell cell = (XWPFTableCell) body;
  449. STMerge.Enum vMerge = stylesDocument.getTableCellVMerge( cell );
  450. if ( vMerge != null && vMerge.equals( STMerge.CONTINUE ) )
  451. {
  452. // here a new line must not be generated because the body is a cell (with none vMerge) and contains just
  453. // this paragraph
  454. return false;
  455. }
  456. // Loop for each cell of the row : if all cells are empty, new line must be generated otherwise none empty
  457. // line must be generated.
  458. XWPFTableRow row = cell.getTableRow();
  459. List<XWPFTableCell> cells = row.getTableCells();
  460. for ( XWPFTableCell c : cells )
  461. {
  462. if ( c.getBodyElements().size() != 1 )
  463. {
  464. return false;
  465. }
  466. IBodyElement element = c.getBodyElements().get( 0 );
  467. if ( element.getElementType() != BodyElementType.PARAGRAPH )
  468. {
  469. return false;
  470. }
  471. return ( (XWPFParagraph) element ).getRuns().size() == 0;
  472. }
  473. return true;
  474. }
  475. // here a new line must be generated if there is next paragraph/table
  476. return bodyElements.size() > index + 1;
  477. }
  478. private void visitRuns( XWPFParagraph paragraph, T paragraphContainer )
  479. throws Exception
  480. {
  481. boolean fldCharTypeParsing = false;
  482. boolean pageNumber = false;
  483. String url = null;
  484. List<XmlObject> rListAfterSeparate = null;
  485. CTP ctp = paragraph.getCTP();
  486. XmlCursor c = ctp.newCursor();
  487. c.selectPath( "child::*" );
  488. while ( c.toNextSelection() )
  489. {
  490. XmlObject o = c.getObject();
  491. if ( o instanceof CTR )
  492. {
  493. /*
  494. * Test if it's : <w:r> <w:rPr /> <w:fldChar w:fldCharType="begin" /> </w:r>
  495. */
  496. CTR r = (CTR) o;
  497. STFldCharType.Enum fldCharType = XWPFRunHelper.getFldCharType( r );
  498. if ( fldCharType != null )
  499. {
  500. if ( fldCharType.equals( STFldCharType.BEGIN ) )
  501. {
  502. process( paragraph, paragraphContainer, pageNumber, url, rListAfterSeparate );
  503. fldCharTypeParsing = true;
  504. rListAfterSeparate = new ArrayList<XmlObject>();
  505. pageNumber = false;
  506. url = null;
  507. }
  508. else if ( fldCharType.equals( STFldCharType.END ) )
  509. {
  510. process( paragraph, paragraphContainer, pageNumber, url, rListAfterSeparate );
  511. fldCharTypeParsing = false;
  512. rListAfterSeparate = null;
  513. pageNumber = false;
  514. url = null;
  515. }
  516. }
  517. else
  518. {
  519. if ( fldCharTypeParsing )
  520. {
  521. String instrText = XWPFRunHelper.getInstrText( r );
  522. if ( instrText != null )
  523. {
  524. if ( StringUtils.isNotEmpty( instrText ) )
  525. {
  526. // test if it's <w:r><w:instrText>PAGE</w:instrText></w:r>
  527. boolean instrTextPage = XWPFRunHelper.isInstrTextPage( instrText );
  528. if ( !instrTextPage )
  529. {
  530. // test if it's <w:instrText>HYPERLINK
  531. // "http://code.google.com/p/xdocrepor"</w:instrText>
  532. String instrTextHyperlink = XWPFRunHelper.getInstrTextHyperlink( instrText );
  533. if ( instrTextHyperlink != null )
  534. {
  535. url = instrTextHyperlink;
  536. }
  537. }
  538. else
  539. {
  540. pageNumber = true;
  541. }
  542. }
  543. }
  544. else
  545. {
  546. rListAfterSeparate.add( r );
  547. }
  548. }
  549. else
  550. {
  551. XWPFRun run = new XWPFRun( r, paragraph );
  552. visitRun( run, false, null, paragraphContainer );
  553. }
  554. }
  555. }
  556. else
  557. {
  558. if ( fldCharTypeParsing )
  559. {
  560. rListAfterSeparate.add( o );
  561. }
  562. else
  563. {
  564. visitRun( paragraph, o, paragraphContainer );
  565. }
  566. }
  567. }
  568. c.dispose();
  569. process( paragraph, paragraphContainer, pageNumber, url, rListAfterSeparate );
  570. fldCharTypeParsing = false;
  571. rListAfterSeparate = null;
  572. pageNumber = false;
  573. url = null;
  574. }
  575. private void process( XWPFParagraph paragraph, T paragraphContainer, boolean pageNumber, String url,
  576. List<XmlObject> rListAfterSeparate )
  577. throws Exception
  578. {
  579. if ( rListAfterSeparate != null )
  580. {
  581. for ( XmlObject oAfterSeparate : rListAfterSeparate )
  582. {
  583. if ( oAfterSeparate instanceof CTR )
  584. {
  585. CTR ctr = (CTR) oAfterSeparate;
  586. XWPFRun run = new XWPFRun( ctr, paragraph );
  587. visitRun( run, pageNumber, url, paragraphContainer );
  588. }
  589. else
  590. {
  591. visitRun( paragraph, oAfterSeparate, paragraphContainer );
  592. }
  593. }
  594. }
  595. }
  596. private void visitRun( XWPFParagraph paragraph, XmlObject o, T paragraphContainer )
  597. throws Exception
  598. {
  599. if ( o instanceof CTHyperlink )
  600. {
  601. CTHyperlink link = (CTHyperlink) o;
  602. String anchor = link.getAnchor();
  603. String href = null;
  604. // Test if the is an id for hyperlink
  605. String hyperlinkId = link.getId();
  606. if ( StringUtils.isNotEmpty( hyperlinkId ) )
  607. {
  608. XWPFHyperlink hyperlink = document.getHyperlinkByID( hyperlinkId );
  609. href = hyperlink.getURL();
  610. }
  611. for ( CTR r : link.getRList() )
  612. {
  613. XWPFRun run = new XWPFHyperlinkRun( link, r, paragraph );
  614. visitRun( run, false, href != null ? href : "#" + anchor, paragraphContainer );
  615. }
  616. }
  617. else if ( o instanceof CTSdtRun )
  618. {
  619. CTSdtContentRun run = ( (CTSdtRun) o ).getSdtContent();
  620. for ( CTR r : run.getRList() )
  621. {
  622. XWPFRun ru = new XWPFRun( r, paragraph );
  623. visitRun( ru, false, null, paragraphContainer );
  624. }
  625. }
  626. else if ( o instanceof CTRunTrackChange )
  627. {
  628. for ( CTR r : ( (CTRunTrackChange) o ).getRList() )
  629. {
  630. XWPFRun run = new XWPFRun( r, paragraph );
  631. visitRun( run, false, null, paragraphContainer );
  632. }
  633. }
  634. else if ( o instanceof CTSimpleField )
  635. {
  636. CTSimpleField simpleField = (CTSimpleField) o;
  637. String instr = simpleField.getInstr();
  638. // 1) test if it's page number
  639. // <w:fldSimple w:instr=" PAGE \* MERGEFORMAT "> <w:r> <w:rPr> <w:noProof/>
  640. // </w:rPr> <w:t>- 1 -</w:t> </w:r> </w:fldSimple>
  641. boolean fieldPageNumber = XWPFRunHelper.isInstrTextPage( instr );
  642. String fieldHref = null;
  643. if ( !fieldPageNumber )
  644. {
  645. // not page number, test if it's hyperlink :
  646. // <w:instrText>HYPERLINK "http://code.google.com/p/xdocrepor"</w:instrText>
  647. fieldHref = XWPFRunHelper.getInstrTextHyperlink( instr );
  648. }
  649. for ( CTR r : simpleField.getRList() )
  650. {
  651. XWPFRun run = new XWPFRun( r, paragraph );
  652. visitRun( run, fieldPageNumber, fieldHref, paragraphContainer );
  653. }
  654. }
  655. else if ( o instanceof CTSmartTagRun )
  656. {
  657. // Smart Tags can be nested many times.
  658. // This implementation does not preserve the tagging information
  659. // buildRunsInOrderFromXml(o);
  660. }
  661. else if ( o instanceof CTBookmark )
  662. {
  663. CTBookmark bookmark = (CTBookmark) o;
  664. visitBookmark( bookmark, paragraph, paragraphContainer );
  665. }
  666. }
  667. protected abstract void visitEmptyRun( T paragraphContainer )
  668. throws Exception;
  669. protected void visitRun( XWPFRun run, boolean pageNumber, String url, T paragraphContainer )
  670. throws Exception
  671. {
  672. CTR ctr = run.getCTR();
  673. // Loop for each element of <w:run text, tab, image etc
  674. // to keep the order of thoses elements.
  675. XmlCursor c = ctr.newCursor();
  676. c.selectPath( "./*" );
  677. while ( c.toNextSelection() )
  678. {
  679. XmlObject o = c.getObject();
  680. if ( o instanceof CTText )
  681. {
  682. CTText ctText = (CTText) o;
  683. String tagName = o.getDomNode().getNodeName();
  684. // Field Codes (w:instrText, defined in spec sec. 17.16.23)
  685. // come up as instances of CTText, but we don't want them
  686. // in the normal text output
  687. if ( "w:instrText".equals( tagName ) )
  688. {
  689. }
  690. else
  691. {
  692. visitText( ctText, pageNumber, paragraphContainer );
  693. }
  694. }
  695. else if ( o instanceof CTPTab )
  696. {
  697. visitTab( (CTPTab) o, paragraphContainer );
  698. }
  699. else if ( o instanceof CTBr )
  700. {
  701. visitBR( (CTBr) o, paragraphContainer );
  702. }
  703. else if ( o instanceof CTEmpty )
  704. {
  705. // Some inline text elements get returned not as
  706. // themselves, but as CTEmpty, owing to some odd
  707. // definitions around line 5642 of the XSDs
  708. // This bit works around it, and replicates the above
  709. // rules for that case
  710. String tagName = o.getDomNode().getNodeName();
  711. if ( "w:tab".equals( tagName ) )
  712. {
  713. CTTabs tabs = stylesDocument.getParagraphTabs( run.getParagraph() );
  714. visitTabs( tabs, paragraphContainer );
  715. }
  716. if ( "w:br".equals( tagName ) )
  717. {
  718. visitBR( null, paragraphContainer );
  719. }
  720. if ( "w:cr".equals( tagName ) )
  721. {
  722. visitBR( null, paragraphContainer );
  723. }
  724. }
  725. else if ( o instanceof CTDrawing )
  726. {
  727. visitDrawing( (CTDrawing) o, paragraphContainer );
  728. }
  729. }
  730. c.dispose();
  731. }
  732. protected abstract void visitText( CTText ctText, boolean pageNumber, T paragraphContainer )
  733. throws Exception;
  734. protected abstract void visitTab( CTPTab o, T paragraphContainer )
  735. throws Exception;
  736. protected abstract void visitTabs( CTTabs tabs, T paragraphContainer )
  737. throws Exception;
  738. protected void visitBR( CTBr br, T paragraphContainer )
  739. throws Exception
  740. {
  741. STBrType.Enum brType = XWPFRunHelper.getBrType( br );
  742. if ( brType.equals( STBrType.PAGE ) )
  743. {
  744. pageBreakOnNextParagraph = true;
  745. }
  746. else
  747. {
  748. addNewLine( br, paragraphContainer );
  749. }
  750. }
  751. protected abstract void visitBookmark( CTBookmark bookmark, XWPFParagraph paragraph, T paragraphContainer )
  752. throws Exception;
  753. protected abstract void addNewLine( CTBr br, T paragraphContainer )
  754. throws Exception;
  755. protected abstract void pageBreak()
  756. throws Exception;
  757. protected void visitTable( XWPFTable table, int index, T container )
  758. throws Exception
  759. {
  760. // 1) Compute colWidth
  761. float[] colWidths = XWPFTableUtil.computeColWidths( table );
  762. T tableContainer = startVisitTable( table, colWidths, container );
  763. visitTableBody( table, colWidths, tableContainer );
  764. endVisitTable( table, container, tableContainer );
  765. }
  766. protected void visitTableBody( XWPFTable table, float[] colWidths, T tableContainer )
  767. throws Exception
  768. {
  769. // Proces Row
  770. boolean firstRow = false;
  771. boolean lastRow = false;
  772. List<XWPFTableRow> rows = table.getRows();
  773. int rowsSize = rows.size();
  774. for ( int i = 0; i < rowsSize; i++ )
  775. {
  776. firstRow = ( i == 0 );
  777. lastRow = isLastRow( i, rowsSize );
  778. XWPFTableRow row = rows.get( i );
  779. visitTableRow( row, colWidths, tableContainer, firstRow, lastRow, i, rowsSize );
  780. }
  781. }
  782. private boolean isLastRow( int rowIndex, int rowsSize )
  783. {
  784. return rowIndex == rowsSize - 1;
  785. }
  786. protected abstract T startVisitTable( XWPFTable table, float[] colWidths, T tableContainer )
  787. throws Exception;
  788. protected abstract void endVisitTable( XWPFTable table, T parentContainer, T tableContainer )
  789. throws Exception;
  790. protected void visitTableRow( XWPFTableRow row, float[] colWidths, T tableContainer, boolean firstRow,
  791. boolean lastRowIfNoneVMerge, int rowIndex, int rowsSize )
  792. throws Exception
  793. {
  794. boolean headerRow = stylesDocument.isTableRowHeader( row );
  795. startVisitTableRow( row, tableContainer, rowIndex, headerRow );
  796. int nbColumns = colWidths.length;
  797. // Process cell
  798. boolean firstCol = true;
  799. boolean lastCol = false;
  800. boolean lastRow = false;
  801. List<XWPFTableCell> vMergedCells = null;
  802. List<XWPFTableCell> cells = row.getTableCells();
  803. if ( nbColumns > cells.size() )
  804. {
  805. // Columns number is not equal to cells number.
  806. // POI have a bug with
  807. // <w:tr w:rsidR="00C55C20">
  808. // <w:tc>
  809. // <w:tc>...
  810. // <w:sdt>
  811. // <w:sdtContent>
  812. // <w:tc> <= this tc which is a XWPFTableCell is not included in the row.getTableCells();
  813. firstCol = true;
  814. int cellIndex = -1;
  815. CTRow ctRow = row.getCtRow();
  816. XmlCursor c = ctRow.newCursor();
  817. c.selectPath( "./*" );
  818. while ( c.toNextSelection() )
  819. {
  820. XmlObject o = c.getObject();
  821. if ( o instanceof CTTc )
  822. {
  823. CTTc tc = (CTTc) o;
  824. XWPFTableCell cell = row.getTableCell( tc );
  825. cellIndex = getCellIndex( cellIndex, cell );
  826. lastCol = ( cellIndex == nbColumns );
  827. vMergedCells = getVMergedCells( cell, rowIndex, cellIndex );
  828. if ( vMergedCells == null || vMergedCells.size() > 0 )
  829. {
  830. lastRow = isLastRow( lastRowIfNoneVMerge, rowIndex, rowsSize, vMergedCells );
  831. visitCell( cell, tableContainer, firstRow, lastRow, firstCol, lastCol, rowIndex, cellIndex,
  832. vMergedCells );
  833. }
  834. firstCol = false;
  835. }
  836. else if ( o instanceof CTSdtCell )
  837. {
  838. // Fix bug of POI
  839. CTSdtCell sdtCell = (CTSdtCell) o;
  840. List<CTTc> tcList = sdtCell.getSdtContent().getTcList();
  841. for ( CTTc ctTc : tcList )
  842. {
  843. XWPFTableCell cell = new XWPFTableCell( ctTc, row, row.getTable().getBody() );
  844. cellIndex = getCellIndex( cellIndex, cell );
  845. lastCol = ( cellIndex == nbColumns );
  846. vMergedCells = getVMergedCells( cell, rowIndex, cellIndex );
  847. if ( vMergedCells == null || vMergedCells.size() > 0 )
  848. {
  849. lastRow = isLastRow( lastRowIfNoneVMerge, rowIndex, rowsSize, vMergedCells );
  850. visitCell( cell, tableContainer, firstRow, lastRow, firstCol, lastCol, rowIndex, cellIndex,
  851. vMergedCells );
  852. }
  853. firstCol = false;
  854. }
  855. }
  856. }
  857. c.dispose();
  858. }
  859. else
  860. {
  861. // Column number is equal to cells number.
  862. for ( int i = 0; i < cells.size(); i++ )
  863. {
  864. lastCol = ( i == cells.size() - 1 );
  865. XWPFTableCell cell = cells.get( i );
  866. vMergedCells = getVMergedCells( cell, rowIndex, i );
  867. if ( vMergedCells == null || vMergedCells.size() > 0 )
  868. {
  869. lastRow = isLastRow( lastRowIfNoneVMerge, rowIndex, rowsSize, vMergedCells );
  870. visitCell( cell, tableContainer, firstRow, lastRow, firstCol, lastCol, rowIndex, i, vMergedCells );
  871. }
  872. firstCol = false;
  873. }
  874. }
  875. endVisitTableRow( row, tableContainer, firstRow, lastRow, headerRow );
  876. }
  877. private boolean isLastRow( boolean lastRowIfNoneVMerge, int rowIndex, int rowsSize, List<XWPFTableCell> vMergedCells )
  878. {
  879. if ( vMergedCells == null )
  880. {
  881. return lastRowIfNoneVMerge;
  882. }
  883. return isLastRow( rowIndex - 1 + vMergedCells.size(), rowsSize );
  884. }
  885. private int getCellIndex( int cellIndex, XWPFTableCell cell )
  886. {
  887. BigInteger gridSpan = stylesDocument.getTableCellGridSpan( cell.getCTTc().getTcPr() );
  888. if ( gridSpan != null )
  889. {
  890. cellIndex = cellIndex + gridSpan.intValue();
  891. }
  892. else
  893. {
  894. cellIndex++;
  895. }
  896. return cellIndex;
  897. }
  898. protected void startVisitTableRow( XWPFTableRow row, T tableContainer, int rowIndex, boolean headerRow )
  899. throws Exception
  900. {
  901. }
  902. protected void endVisitTableRow( XWPFTableRow row, T tableContainer, boolean firstRow, boolean lastRow,
  903. boolean headerRow )
  904. throws Exception
  905. {
  906. }
  907. protected void visitCell( XWPFTableCell cell, T tableContainer, boolean firstRow, boolean lastRow,
  908. boolean firstCol, boolean lastCol, int rowIndex, int cellIndex,
  909. List<XWPFTableCell> vMergedCells )
  910. throws Exception
  911. {
  912. T tableCellContainer =
  913. startVisitTableCell( cell, tableContainer, firstRow, lastRow, firstCol, lastCol, vMergedCells );
  914. visitTableCellBody( cell, vMergedCells, tableCellContainer );
  915. endVisitTableCell( cell, tableContainer, tableCellContainer );
  916. }
  917. private List<XWPFTableCell> getVMergedCells( XWPFTableCell cell, int rowIndex, int cellIndex )
  918. {
  919. List<XWPFTableCell> vMergedCells = null;
  920. STMerge.Enum vMerge = stylesDocument.getTableCellVMerge( cell );
  921. if ( vMerge != null )
  922. {
  923. if ( vMerge.equals( STMerge.RESTART ) )
  924. {
  925. // vMerge="restart"
  926. // Loop for each table cell of each row upon vMerge="restart" was found or cell without vMerge
  927. // was declared.
  928. vMergedCells = new ArrayList<XWPFTableCell>();
  929. vMergedCells.add( cell );
  930. XWPFTableRow row = null;
  931. XWPFTableCell c;
  932. XWPFTable table = cell.getTableRow().getTable();
  933. for ( int i = rowIndex + 1; i < table.getRows().size(); i++ )
  934. {
  935. row = table.getRow( i );
  936. c = row.getCell( cellIndex );
  937. if ( c == null )
  938. {
  939. break;
  940. }
  941. vMerge = stylesDocument.getTableCellVMerge( c );
  942. if ( vMerge != null && vMerge.equals( STMerge.CONTINUE ) )
  943. {
  944. vMergedCells.add( c );
  945. }
  946. else
  947. {
  948. return vMergedCells;
  949. }
  950. }
  951. }
  952. else
  953. {
  954. // vMerge="continue", ignore the cell because it was already processed
  955. return Collections.emptyList();
  956. }
  957. }
  958. return vMergedCells;
  959. }
  960. protected void visitTableCellBody( XWPFTableCell cell, List<XWPFTableCell> vMergeCells, T tableCellContainer )
  961. throws Exception
  962. {
  963. if ( vMergeCells != null )
  964. {
  965. for ( XWPFTableCell mergedCell : vMergeCells )
  966. {
  967. List<IBodyElement> bodyElements = mergedCell.getBodyElements();
  968. visitBodyElements( bodyElements, tableCellContainer );
  969. }
  970. }
  971. else
  972. {
  973. List<IBodyElement> bodyElements = cell.getBodyElements();
  974. visitBodyElements( bodyElements, tableCellContainer );
  975. }
  976. }
  977. protected abstract T startVisitTableCell( XWPFTableCell cell, T tableContainer, boolean firstRow, boolean lastRow,
  978. boolean firstCol, boolean lastCol, List<XWPFTableCell> vMergeCells )
  979. throws Exception;
  980. protected abstract void endVisitTableCell( XWPFTableCell cell, T tableContainer, T tableCellContainer )
  981. throws Exception;
  982. protected XWPFStyle getXWPFStyle( String styleID )
  983. {
  984. if ( styleID == null )
  985. return null;
  986. else
  987. return document.getStyles().getStyle( styleID );
  988. }
  989. /**
  990. * Returns true if word/document.xml is parsing and false otherwise.
  991. *
  992. * @return true if word/document.xml is parsing and false otherwise.
  993. */
  994. protected boolean isWordDocumentPartParsing()
  995. {
  996. return currentHeader == null && currentFooter == null;
  997. }
  998. // ------------------------------ Header/Footer visitor -----------
  999. protected void visitHeaderRef( CTHdrFtrRef headerRef, CTSectPr sectPr, E masterPage )
  1000. throws Exception
  1001. {
  1002. this.currentHeader = getXWPFHeader( headerRef );
  1003. visitHeader( currentHeader, headerRef, sectPr, masterPage );
  1004. this.currentHeader = null;
  1005. }
  1006. protected abstract void visitHeader( XWPFHeader header, CTHdrFtrRef headerRef, CTSectPr sectPr, E masterPage )
  1007. throws Exception;
  1008. protected void visitFooterRef( CTHdrFtrRef footerRef, CTSectPr sectPr, E masterPage )
  1009. throws Exception
  1010. {
  1011. this.currentFooter = getXWPFFooter( footerRef );
  1012. visitFooter( currentFooter, footerRef, sectPr, masterPage );
  1013. this.currentFooter = null;
  1014. }
  1015. protected abstract void visitFooter( XWPFFooter footer, CTHdrFtrRef footerRef, CTSectPr sectPr, E masterPage )
  1016. throws Exception;
  1017. /**
  1018. * Returns the list of {@link IBodyElement} of the given header/footer. We do that because
  1019. * {@link XWPFHeaderFooter#getBodyElements()} doesn't contains the // <w:sdt><w:sdtContent>
  1020. * <p
  1021. * (see JUnit Docx4j_GettingStarted, DocXperT_Output_4_3, Issue222 which defines page number in the <w:sdt. ...
  1022. *
  1023. * @param part
  1024. * @return
  1025. */
  1026. protected List<IBodyElement> getBodyElements( XWPFHeaderFooter part )
  1027. {
  1028. List<IBodyElement> bodyElements = new ArrayList<IBodyElement>();
  1029. XmlTokenSource headerFooter = part._getHdrFtr();
  1030. addBodyElements( headerFooter, part, bodyElements );
  1031. return bodyElements;
  1032. }
  1033. /**
  1034. * Add body elements from the given token source.
  1035. *
  1036. * @param source
  1037. * @param part
  1038. * @param bodyElements
  1039. */
  1040. private void addBodyElements( XmlTokenSource source, IBody part, List<IBodyElement> bodyElements )
  1041. {
  1042. // parse the document with cursor and add
  1043. // the XmlObject to its lists
  1044. XmlCursor cursor = source.newCursor();
  1045. cursor.selectPath( "./*" );
  1046. while ( cursor.toNextSelection() )
  1047. {
  1048. XmlObject o = cursor.getObject();
  1049. if ( o instanceof CTSdtBlock )
  1050. {
  1051. // <w:sdt><w:sdtContent><p...
  1052. CTSdtBlock block = (CTSdtBlock) o;
  1053. CTSdtContentBlock contentBlock = block.getSdtContent();
  1054. if ( contentBlock != null )
  1055. {
  1056. addBodyElements( contentBlock, part, bodyElements );
  1057. }
  1058. }
  1059. else if ( o instanceof CTP )
  1060. {
  1061. XWPFParagraph p = new XWPFParagraph( (CTP) o, part );
  1062. bodyElements.add( p );
  1063. }
  1064. else if ( o instanceof CTTbl )
  1065. {
  1066. XWPFTable t = new XWPFTable( (CTTbl) o, part );
  1067. bodyElements.add( t );
  1068. }
  1069. }
  1070. cursor.dispose();
  1071. }
  1072. /**
  1073. * Returns the {@link XWPFHeader} of the given header reference.
  1074. *
  1075. * @param headerref the header reference.
  1076. * @return
  1077. * @throws XmlException
  1078. * @throws IOException
  1079. */
  1080. protected XWPFHeader getXWPFHeader( CTHdrFtrRef headerRef )
  1081. throws XmlException, IOException
  1082. {
  1083. PackagePart hdrPart = document.getPartById( headerRef.getId() );
  1084. List<XWPFHeader> headers = document.getHeaderList();
  1085. for ( XWPFHeader header : headers )
  1086. {
  1087. if ( header.getPackagePart().equals( hdrPart ) )
  1088. {
  1089. // header is aleady loaded, return it.
  1090. return header;
  1091. }
  1092. }
  1093. // should never come, but load the header if needed.
  1094. HdrDocument hdrDoc = HdrDocument.Factory.parse( hdrPart.getInputStream() );
  1095. CTHdrFtr hdrFtr = hdrDoc.getHdr();
  1096. XWPFHeader hdr = new XWPFHeader( document, hdrFtr );
  1097. return hdr;
  1098. }
  1099. /**
  1100. * Returns the {@link XWPFFooter} of the given footer reference.
  1101. *
  1102. * @param footerRef the footer reference.
  1103. * @return
  1104. * @throws XmlException
  1105. * @throws IOException
  1106. */
  1107. protected XWPFFooter getXWPFFooter( CTHdrFtrRef footerRef )
  1108. throws XmlException, IOException
  1109. {
  1110. PackagePart hdrPart = document.getPartById( footerRef.getId() );
  1111. List<XWPFFooter> footers = document.getFooterList();
  1112. for ( XWPFFooter footer : footers )
  1113. {
  1114. if ( footer.getPackagePart().equals( hdrPart ) )
  1115. {
  1116. // footer is aleady loaded, return it.
  1117. return footer;
  1118. }
  1119. }
  1120. // should never come, but load the footer if needed.
  1121. FtrDocument hdrDoc = FtrDocument.Factory.parse( hdrPart.getInputStream() );
  1122. CTHdrFtr hdrFtr = hdrDoc.getFtr();
  1123. XWPFFooter ftr = new XWPFFooter( document, hdrFtr );
  1124. return ftr;
  1125. }
  1126. // ------------------------ Image --------------
  1127. protected void visitDrawing( CTDrawing drawing, T parentContainer )
  1128. throws Exception
  1129. {
  1130. List<CTInline> inlines = drawing.getInlineList();
  1131. for ( CTInline inline : inlines )
  1132. {
  1133. visitInline( inline, parentContainer );
  1134. }
  1135. List<CTAnchor> anchors = drawing.getAnchorList();
  1136. for ( CTAnchor anchor : anchors )
  1137. {
  1138. visitAnchor( anchor, parentContainer );
  1139. }
  1140. }
  1141. protected void visitAnchor( CTAnchor anchor, T parentContainer )
  1142. throws Exception
  1143. {
  1144. CTGraphicalObject graphic = anchor.getGraphic();
  1145. /*
  1146. * wp:positionH relativeFrom="column"> <wp:posOffset>-898525</wp:posOffset> </wp:positionH>
  1147. */
  1148. STRelFromH.Enum relativeFromH = null;
  1149. Float offsetX = null;
  1150. CTPosH positionH = anchor.getPositionH();
  1151. if ( positionH != null )
  1152. {
  1153. relativeFromH = positionH.getRelativeFrom();
  1154. offsetX = DxaUtil.emu2points( positionH.getPosOffset() );
  1155. }
  1156. STRelFromV.Enum relativeFromV = null;
  1157. Float offsetY = null;
  1158. CTPosV positionV = anchor.getPositionV();
  1159. if ( positionV != null )
  1160. {
  1161. relativeFromV = positionV.getRelativeFrom();
  1162. offsetY = DxaUtil.emu2points( positionV.getPosOffset() );
  1163. }
  1164. STWrapText.Enum wrapText = null;
  1165. CTWrapSquare wrapSquare = anchor.getWrapSquare();
  1166. if ( wrapSquare != null )
  1167. {
  1168. wrapText = wrapSquare.getWrapText();
  1169. }
  1170. visitGraphicalObject( parentContainer, graphic, offsetX, relativeFromH, offsetY, relativeFromV, wrapText );
  1171. }
  1172. protected void visitInline( CTInline inline, T parentContainer )
  1173. throws Exception
  1174. {
  1175. CTGraphicalObject graphic = inline.getGraphic();
  1176. visitGraphicalObject( parentContainer, graphic, null, null, null, null, null );
  1177. }
  1178. private void visitGraphicalObject( T parentContainer, CTGraphicalObject graphic, Float offsetX,
  1179. STRelFromH.Enum relativeFromH, Float offsetY, STRelFromV.Enum relativeFromV,
  1180. STWrapText.Enum wrapText )
  1181. throws Exception
  1182. {
  1183. if ( graphic != null )
  1184. {
  1185. CTGraphicalObjectData graphicData = graphic.getGraphicData();
  1186. if ( graphicData != null )
  1187. {
  1188. XmlCursor c = graphicData.newCursor();
  1189. c.selectPath( "./*" );
  1190. while ( c.toNextSelection() )
  1191. {
  1192. XmlObject o = c.getObject();
  1193. if ( o instanceof CTPicture )
  1194. {
  1195. CTPicture picture = (CTPicture) o;
  1196. // extract the picture if needed
  1197. IImageExtractor extractor = getImageExtractor();
  1198. if ( extractor != null )
  1199. {
  1200. XWPFPictureData pictureData = getPictureData( picture );
  1201. if ( pictureData != null )
  1202. {
  1203. try
  1204. {
  1205. extractor.extract( WORD_MEDIA + pictureData.getFileName(), pictureData.getData() );
  1206. }
  1207. catch ( Throwable e )
  1208. {
  1209. LOGGER.log( Level.SEVERE,
  1210. "Error while extracting the image " + pictureData.getFileName(), e );
  1211. }
  1212. }
  1213. }
  1214. // visit the picture.
  1215. visitPicture( picture, offsetX, relativeFromH, offsetY, relativeFromV, wrapText,
  1216. parentContainer );
  1217. }
  1218. }
  1219. c.dispose();
  1220. }
  1221. }
  1222. }
  1223. /**
  1224. * Returns the picture data of the given image id.
  1225. *
  1226. * @param blipId
  1227. * @return
  1228. */
  1229. protected XWPFPictureData getPictureDataByID( String blipId )
  1230. {
  1231. if ( currentHeader != null )
  1232. {
  1233. return currentHeader.getPictureDataByID( blipId );
  1234. }
  1235. if ( currentFooter != null )
  1236. {
  1237. return currentFooter.getPictureDataByID( blipId );
  1238. }
  1239. return document.getPictureDataByID( blipId );
  1240. }
  1241. /**
  1242. * Returns the image extractor and null otherwise.
  1243. *
  1244. * @return
  1245. */
  1246. protected IImageExtractor getImageExtractor()
  1247. {
  1248. return options.getExtractor();
  1249. }
  1250. /**
  1251. * Returns the picture data of the given picture.
  1252. *
  1253. * @param picture
  1254. * @return
  1255. */
  1256. public XWPFPictureData getPictureData( CTPicture picture )
  1257. {
  1258. String blipId = picture.getBlipFill().getBlip().getEmbed();
  1259. return getPictureDataByID( blipId );
  1260. }
  1261. protected abstract void visitPicture( CTPicture picture, Float offsetX, STRelFromH.Enum relativeFromH,
  1262. Float offsetY, STRelFromV.Enum relativeFromV, STWrapText.Enum wrapText,
  1263. T parentContainer )
  1264. throws Exception;
  1265. // ------------------------ Master page --------------
  1266. /**
  1267. * Set active master page.
  1268. *
  1269. * @param masterPage
  1270. */
  1271. protected abstract void setActiveMasterPage( E masterPage );
  1272. /**
  1273. * Create an instance of master page.
  1274. *
  1275. * @param sectPr
  1276. * @return
  1277. */
  1278. protected abstract IXWPFMasterPage createMasterPage( CTSectPr sectPr );
  1279. }