/src/org/apache/poi/hwpf/HWPFDocument.java
Java | 1042 lines | 553 code | 126 blank | 363 comment | 64 complexity | 677c332698819342ff2ece2baf93a4ed MD5 | raw file
Possible License(s): Apache-2.0
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
- package org.apache.poi.hwpf;
- import java.io.ByteArrayInputStream;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.util.Iterator;
- import org.apache.poi.hpsf.DocumentSummaryInformation;
- import org.apache.poi.hpsf.SummaryInformation;
- import org.apache.poi.hwpf.model.BookmarksTables;
- import org.apache.poi.hwpf.model.CHPBinTable;
- import org.apache.poi.hwpf.model.ComplexFileTable;
- import org.apache.poi.hwpf.model.DocumentProperties;
- import org.apache.poi.hwpf.model.EscherRecordHolder;
- import org.apache.poi.hwpf.model.FSPADocumentPart;
- import org.apache.poi.hwpf.model.FSPATable;
- import org.apache.poi.hwpf.model.FieldsTables;
- import org.apache.poi.hwpf.model.FontTable;
- import org.apache.poi.hwpf.model.ListTables;
- import org.apache.poi.hwpf.model.NoteType;
- import org.apache.poi.hwpf.model.NotesTables;
- import org.apache.poi.hwpf.model.PAPBinTable;
- import org.apache.poi.hwpf.model.PicturesTable;
- import org.apache.poi.hwpf.model.RevisionMarkAuthorTable;
- import org.apache.poi.hwpf.model.SavedByTable;
- import org.apache.poi.hwpf.model.SectionTable;
- import org.apache.poi.hwpf.model.ShapesTable;
- import org.apache.poi.hwpf.model.SinglentonTextPiece;
- import org.apache.poi.hwpf.model.StyleSheet;
- import org.apache.poi.hwpf.model.SubdocumentType;
- import org.apache.poi.hwpf.model.TextPiece;
- import org.apache.poi.hwpf.model.TextPieceTable;
- import org.apache.poi.hwpf.model.io.HWPFFileSystem;
- import org.apache.poi.hwpf.model.io.HWPFOutputStream;
- import org.apache.poi.hwpf.usermodel.Bookmarks;
- import org.apache.poi.hwpf.usermodel.BookmarksImpl;
- import org.apache.poi.hwpf.usermodel.Field;
- import org.apache.poi.hwpf.usermodel.Fields;
- import org.apache.poi.hwpf.usermodel.FieldsImpl;
- import org.apache.poi.hwpf.usermodel.HWPFList;
- import org.apache.poi.hwpf.usermodel.Notes;
- import org.apache.poi.hwpf.usermodel.NotesImpl;
- import org.apache.poi.hwpf.usermodel.OfficeDrawings;
- import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl;
- import org.apache.poi.hwpf.usermodel.Range;
- import org.apache.poi.poifs.common.POIFSConstants;
- import org.apache.poi.poifs.filesystem.DirectoryNode;
- import org.apache.poi.poifs.filesystem.DocumentEntry;
- import org.apache.poi.poifs.filesystem.Entry;
- import org.apache.poi.poifs.filesystem.EntryUtils;
- import org.apache.poi.poifs.filesystem.POIFSFileSystem;
- import org.apache.poi.util.Internal;
- /**
- *
- * This class acts as the bucket that we throw all of the Word data structures
- * into.
- *
- * @author Ryan Ackley
- */
- @SuppressWarnings("deprecation")
- public final class HWPFDocument extends HWPFDocumentCore
- {
- static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables";
- private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
- private static final String STREAM_DATA = "Data";
- private static final String STREAM_TABLE_0 = "0Table";
- private static final String STREAM_TABLE_1 = "1Table";
- /** table stream buffer*/
- protected byte[] _tableStream;
- /** data stream buffer*/
- protected byte[] _dataStream;
- /** Document wide Properties*/
- protected DocumentProperties _dop;
- /** Contains text of the document wrapped in a obfuscated Word data
- * structure*/
- protected ComplexFileTable _cft;
- /** Contains text buffer linked directly to single-piece document text piece */
- protected StringBuilder _text;
- /** Holds the save history for this document. */
- protected SavedByTable _sbt;
-
- /** Holds the revision mark authors for this document. */
- protected RevisionMarkAuthorTable _rmat;
- /** Holds FSBA (shape) information */
- private FSPATable _fspaHeaders;
- /** Holds FSBA (shape) information */
- private FSPATable _fspaMain;
- /** Escher Drawing Group information */
- protected EscherRecordHolder _escherRecordHolder;
- /** Holds pictures table */
- protected PicturesTable _pictures;
- /** Holds Office Art objects */
- @Deprecated
- protected ShapesTable _officeArts;
-
- /** Holds Office Art objects */
- protected OfficeDrawingsImpl _officeDrawingsHeaders;
- /** Holds Office Art objects */
- protected OfficeDrawingsImpl _officeDrawingsMain;
- /** Holds the bookmarks tables */
- protected BookmarksTables _bookmarksTables;
- /** Holds the bookmarks */
- protected Bookmarks _bookmarks;
- /** Holds the ending notes tables */
- protected NotesTables _endnotesTables = new NotesTables( NoteType.ENDNOTE );
- /** Holds the footnotes */
- protected Notes _endnotes = new NotesImpl( _endnotesTables );
- /** Holds the footnotes tables */
- protected NotesTables _footnotesTables = new NotesTables( NoteType.FOOTNOTE );
- /** Holds the footnotes */
- protected Notes _footnotes = new NotesImpl( _footnotesTables );
- /** Holds the fields PLCFs */
- protected FieldsTables _fieldsTables;
- /** Holds the fields */
- protected Fields _fields;
- protected HWPFDocument()
- {
- super();
- this._text = new StringBuilder("\r");
- }
- /**
- * This constructor loads a Word document from an InputStream.
- *
- * @param istream The InputStream that contains the Word document.
- * @throws IOException If there is an unexpected IOException from the passed
- * in InputStream.
- */
- public HWPFDocument(InputStream istream) throws IOException
- {
- //do Ole stuff
- this( verifyAndBuildPOIFS(istream) );
- }
- /**
- * This constructor loads a Word document from a POIFSFileSystem
- *
- * @param pfilesystem The POIFSFileSystem that contains the Word document.
- * @throws IOException If there is an unexpected IOException from the passed
- * in POIFSFileSystem.
- */
- public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException
- {
- this(pfilesystem.getRoot());
- }
- /**
- * This constructor loads a Word document from a specific point
- * in a POIFSFileSystem, probably not the default.
- * Used typically to open embedded documents.
- *
- * @param pfilesystem The POIFSFileSystem that contains the Word document.
- * @throws IOException If there is an unexpected IOException from the passed
- * in POIFSFileSystem.
- * @deprecated Use {@link #HWPFDocument(DirectoryNode)} instead
- */
- @Deprecated
- public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException
- {
- this(directory);
- }
-
- /**
- * This constructor loads a Word document from a specific point
- * in a POIFSFileSystem, probably not the default.
- * Used typically to open embeded documents.
- *
- * @param directory The DirectoryNode that contains the Word document.
- * @throws IOException If there is an unexpected IOException from the passed
- * in POIFSFileSystem.
- */
- public HWPFDocument(DirectoryNode directory) throws IOException
- {
- // Load the main stream and FIB
- // Also handles HPSF bits
- super(directory);
- // Is this document too old for us?
- if(_fib.getFibBase().getNFib() < 106) {
- throw new OldWordFileFormatException("The document is too old - Word 95 or older. Try HWPFOldDocument instead?");
- }
- // use the fib to determine the name of the table stream.
- String name = STREAM_TABLE_0;
- if (_fib.getFibBase().isFWhichTblStm())
- {
- name = STREAM_TABLE_1;
- }
- // Grab the table stream.
- DocumentEntry tableProps;
- try {
- tableProps =
- (DocumentEntry)directory.getEntry(name);
- } catch(FileNotFoundException fnfe) {
- throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
- }
- // read in the table stream.
- _tableStream = new byte[tableProps.getSize()];
- directory.createDocumentInputStream(name).read(_tableStream);
- _fib.fillVariableFields(_mainStream, _tableStream);
- // read in the data stream.
- try
- {
- DocumentEntry dataProps =
- (DocumentEntry)directory.getEntry(STREAM_DATA);
- _dataStream = new byte[dataProps.getSize()];
- directory.createDocumentInputStream(STREAM_DATA).read(_dataStream);
- }
- catch(java.io.FileNotFoundException e)
- {
- _dataStream = new byte[0];
- }
- // Get the cp of the start of text in the main stream
- // The latest spec doc says this is always zero!
- int fcMin = 0;
- //fcMin = _fib.getFcMin()
- // Start to load up our standard structures.
- _dop = new DocumentProperties(_tableStream, _fib.getFcDop(), _fib.getLcbDop() );
- _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
- TextPieceTable _tpt = _cft.getTextPieceTable();
- // Now load the rest of the properties, which need to be adjusted
- // for where text really begin
- _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt);
- _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt);
- _text = _tpt.getText();
- /*
- * in this mode we preserving PAPX/CHPX structure from file, so text may
- * miss from output, and text order may be corrupted
- */
- boolean preserveBinTables = false;
- try
- {
- preserveBinTables = Boolean.parseBoolean( System
- .getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
- }
- catch ( Exception exc )
- {
- // ignore;
- }
- if ( !preserveBinTables )
- {
- _cbt.rebuild( _cft );
- _pbt.rebuild( _text, _cft );
- }
- /*
- * Property to disable text rebuilding. In this mode changing the text
- * will lead to unpredictable behavior
- */
- boolean preserveTextTable = false;
- try
- {
- preserveTextTable = Boolean.parseBoolean( System
- .getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
- }
- catch ( Exception exc )
- {
- // ignore;
- }
- if ( !preserveTextTable )
- {
- _cft = new ComplexFileTable();
- _tpt = _cft.getTextPieceTable();
- final TextPiece textPiece = new SinglentonTextPiece( _text );
- _tpt.add( textPiece );
- _text = textPiece.getStringBuilder();
- }
- // Read FSPA and Escher information
- // _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(),
- // _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
- _fspaHeaders = new FSPATable( _tableStream, _fib,
- FSPADocumentPart.HEADER );
- _fspaMain = new FSPATable( _tableStream, _fib, FSPADocumentPart.MAIN );
- if (_fib.getFcDggInfo() != 0)
- {
- _escherRecordHolder = new EscherRecordHolder(_tableStream, _fib.getFcDggInfo(), _fib.getLcbDggInfo());
- } else
- {
- _escherRecordHolder = new EscherRecordHolder();
- }
- // read in the pictures stream
- _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspaMain, _escherRecordHolder);
- // And the art shapes stream
- _officeArts = new ShapesTable(_tableStream, _fib);
- // And escher pictures
- _officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder, _mainStream );
- _officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder, _mainStream);
- _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _fib.getSubdocumentTextStreamLength( SubdocumentType.MAIN));
- _ss = new StyleSheet(_tableStream, _fib.getFcStshf());
- _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());
- int listOffset = _fib.getFcPlfLst();
- int lfoOffset = _fib.getFcPlfLfo();
- if ( listOffset != 0 && _fib.getLcbPlfLst() != 0 )
- {
- _lt = new ListTables( _tableStream, listOffset, _fib.getFcPlfLfo(),
- _fib.getLcbPlfLfo() );
- }
- int sbtOffset = _fib.getFcSttbSavedBy();
- int sbtLength = _fib.getLcbSttbSavedBy();
- if (sbtOffset != 0 && sbtLength != 0)
- {
- _sbt = new SavedByTable(_tableStream, sbtOffset, sbtLength);
- }
- int rmarkOffset = _fib.getFcSttbfRMark();
- int rmarkLength = _fib.getLcbSttbfRMark();
- if (rmarkOffset != 0 && rmarkLength != 0)
- {
- _rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength);
- }
- _bookmarksTables = new BookmarksTables( _tableStream, _fib );
- _bookmarks = new BookmarksImpl( _bookmarksTables );
- _endnotesTables = new NotesTables( NoteType.ENDNOTE, _tableStream, _fib );
- _endnotes = new NotesImpl( _endnotesTables );
- _footnotesTables = new NotesTables( NoteType.FOOTNOTE, _tableStream, _fib );
- _footnotes = new NotesImpl( _footnotesTables );
- _fieldsTables = new FieldsTables(_tableStream, _fib);
- _fields = new FieldsImpl(_fieldsTables);
- }
- @Internal
- public TextPieceTable getTextTable()
- {
- return _cft.getTextPieceTable();
- }
- @Internal
- @Override
- public StringBuilder getText()
- {
- return _text;
- }
- public DocumentProperties getDocProperties()
- {
- return _dop;
- }
- public Range getOverallRange() {
- return new Range(0, _text.length(), this);
- }
- /**
- * Returns the range which covers the whole of the document, but excludes
- * any headers and footers.
- */
- public Range getRange()
- {
- // // First up, trigger a full-recalculate
- // // Needed in case of deletes etc
- // getOverallRange();
- //
- // if ( getFileInformationBlock().isFComplex() )
- // {
- // /*
- // * Page 31:
- // *
- // * main document must be found by examining the piece table entries
- // * from the 0th piece table entry from the piece table entry that
- // * describes cp=fib.ccpText.
- // */
- // // TODO: review
- // return new Range( _cpSplit.getMainDocumentStart(),
- // _cpSplit.getMainDocumentEnd(), this );
- // }
- //
- // /*
- // * Page 31:
- // *
- // * "In a non-complex file, this means text of the: main document
- // begins
- // * at fib.fcMin in the file and continues through
- // * fib.fcMin+fib.ccpText."
- // */
- // int bytesStart = getFileInformationBlock().getFcMin();
- //
- // int charsStart = getTextTable().getCharIndex( bytesStart );
- // int charsEnd = charsStart
- // + getFileInformationBlock().getSubdocumentTextStreamLength(
- // SubdocumentType.MAIN );
- // it seems much simpler -- sergey
- return getRange(SubdocumentType.MAIN);
- }
- private Range getRange( SubdocumentType subdocument )
- {
- int startCp = 0;
- for ( SubdocumentType previos : SubdocumentType.ORDERED )
- {
- int length = getFileInformationBlock()
- .getSubdocumentTextStreamLength( previos );
- if ( subdocument == previos )
- return new Range( startCp, startCp + length, this );
- startCp += length;
- }
- throw new UnsupportedOperationException(
- "Subdocument type not supported: " + subdocument );
- }
- /**
- * Returns the {@link Range} which covers all the Footnotes.
- *
- * @return the {@link Range} which covers all the Footnotes.
- */
- public Range getFootnoteRange()
- {
- return getRange( SubdocumentType.FOOTNOTE );
- }
- /**
- * Returns the {@link Range} which covers all endnotes.
- *
- * @return the {@link Range} which covers all endnotes.
- */
- public Range getEndnoteRange()
- {
- return getRange( SubdocumentType.ENDNOTE );
- }
- /**
- * Returns the {@link Range} which covers all annotations.
- *
- * @return the {@link Range} which covers all annotations.
- */
- public Range getCommentsRange()
- {
- return getRange( SubdocumentType.ANNOTATION );
- }
- /**
- * Returns the {@link Range} which covers all textboxes.
- *
- * @return the {@link Range} which covers all textboxes.
- */
- public Range getMainTextboxRange()
- {
- return getRange( SubdocumentType.TEXTBOX );
- }
- /**
- * Returns the range which covers all "Header Stories".
- * A header story contains a header, footer, end note
- * separators and footnote separators.
- */
- public Range getHeaderStoryRange() {
- return getRange( SubdocumentType.HEADER );
- }
- /**
- * Returns the character length of a document.
- * @return the character length of a document
- */
- public int characterLength()
- {
- return _text.length();
- }
- /**
- * Gets a reference to the saved -by table, which holds the save history for the document.
- *
- * @return the saved-by table.
- */
- @Internal
- public SavedByTable getSavedByTable()
- {
- return _sbt;
- }
- /**
- * Gets a reference to the revision mark author table, which holds the revision mark authors for the document.
- *
- * @return the saved-by table.
- */
- @Internal
- public RevisionMarkAuthorTable getRevisionMarkAuthorTable()
- {
- return _rmat;
- }
-
- /**
- * @return PicturesTable object, that is able to extract images from this document
- */
- public PicturesTable getPicturesTable() {
- return _pictures;
- }
- @Internal
- public EscherRecordHolder getEscherRecordHolder() {
- return _escherRecordHolder;
- }
- /**
- * @return ShapesTable object, that is able to extract office are shapes
- * from this document
- * @deprecated use {@link #getOfficeDrawingsMain()} instead
- */
- @Deprecated
- @Internal
- public ShapesTable getShapesTable()
- {
- return _officeArts;
- }
- public OfficeDrawings getOfficeDrawingsHeaders()
- {
- return _officeDrawingsHeaders;
- }
- public OfficeDrawings getOfficeDrawingsMain()
- {
- return _officeDrawingsMain;
- }
- /**
- * @return user-friendly interface to access document bookmarks
- */
- public Bookmarks getBookmarks()
- {
- return _bookmarks;
- }
- /**
- * @return user-friendly interface to access document endnotes
- */
- public Notes getEndnotes()
- {
- return _endnotes;
- }
- /**
- * @return user-friendly interface to access document footnotes
- */
- public Notes getFootnotes()
- {
- return _footnotes;
- }
- /**
- * @return FieldsTables object, that is able to extract fields descriptors from this document
- * @deprecated
- */
- @Deprecated
- @Internal
- public FieldsTables getFieldsTables() {
- return _fieldsTables;
- }
- /**
- * Returns user-friendly interface to access document {@link Field}s
- *
- * @return user-friendly interface to access document {@link Field}s
- */
- public Fields getFields()
- {
- return _fields;
- }
- /**
- * Writes out the word file that is represented by an instance of this class.
- *
- * @param out The OutputStream to write to.
- * @throws IOException If there is an unexpected IOException from the passed
- * in OutputStream.
- */
- public void write(OutputStream out)
- throws IOException
- {
- // initialize our streams for writing.
- HWPFFileSystem docSys = new HWPFFileSystem();
- HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
- HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
- //HWPFOutputStream dataStream = docSys.getStream("Data");
- int tableOffset = 0;
- // FileInformationBlock fib = (FileInformationBlock)_fib.clone();
- // clear the offsets and sizes in our FileInformationBlock.
- _fib.clearOffsetsSizes();
- // determine the FileInformationBLock size
- int fibSize = _fib.getSize();
- fibSize += POIFSConstants.SMALLER_BIG_BLOCK_SIZE -
- (fibSize % POIFSConstants.SMALLER_BIG_BLOCK_SIZE);
- // preserve space for the FileInformationBlock because we will be writing
- // it after we write everything else.
- byte[] placeHolder = new byte[fibSize];
- wordDocumentStream.write(placeHolder);
- int mainOffset = wordDocumentStream.getOffset();
- // write out the StyleSheet.
- _fib.setFcStshf(tableOffset);
- _ss.writeTo(tableStream);
- _fib.setLcbStshf(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- // get fcMin and fcMac because we will be writing the actual text with the
- // complex table.
- int fcMin = mainOffset;
- /*
- * clx (encoding of the sprm lists for a complex file and piece table
- * for a any file) Written immediately after the end of the previously
- * recorded structure. This is recorded in all Word documents
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 23 of 210
- */
-
- // write out the Complex table, includes text.
- _fib.setFcClx(tableOffset);
- _cft.writeTo(wordDocumentStream, tableStream);
- _fib.setLcbClx(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- int fcMac = wordDocumentStream.getOffset();
- /*
- * dop (document properties record) Written immediately after the end of
- * the previously recorded structure. This is recorded in all Word
- * documents
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 23 of 210
- */
- // write out the DocumentProperties.
- _fib.setFcDop(tableOffset);
- _dop.writeTo(tableStream);
- _fib.setLcbDop(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- /*
- * plcfBkmkf (table recording beginning CPs of bookmarks) Written
- * immediately after the sttbfBkmk, if the document contains bookmarks.
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 24 of 210
- */
- if ( _bookmarksTables != null )
- {
- _bookmarksTables.writePlcfBkmkf( _fib, tableStream );
- tableOffset = tableStream.getOffset();
- }
- /*
- * plcfBkmkl (table recording limit CPs of bookmarks) Written
- * immediately after the plcfBkmkf, if the document contains bookmarks.
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 24 of 210
- */
- if ( _bookmarksTables != null )
- {
- _bookmarksTables.writePlcfBkmkl( _fib, tableStream );
- tableOffset = tableStream.getOffset();
- }
- /*
- * plcfbteChpx (bin table for CHP FKPs) Written immediately after the
- * previously recorded table. This is recorded in all Word documents.
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 24 of 210
- */
- // write out the CHPBinTable.
- _fib.setFcPlcfbteChpx(tableOffset);
- _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
- _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- /*
- * plcfbtePapx (bin table for PAP FKPs) Written immediately after the
- * plcfbteChpx. This is recorded in all Word documents.
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 24 of 210
- */
- // write out the PAPBinTable.
- _fib.setFcPlcfbtePapx(tableOffset);
- _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
- _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- /*
- * plcfendRef (endnote reference position table) Written immediately
- * after the previously recorded table if the document contains endnotes
- *
- * plcfendTxt (endnote text position table) Written immediately after
- * the plcfendRef if the document contains endnotes
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 24 of 210
- */
- _endnotesTables.writeRef( _fib, tableStream );
- _endnotesTables.writeTxt( _fib, tableStream );
- tableOffset = tableStream.getOffset();
- /*
- * plcffld*** (table of field positions and statuses for annotation
- * subdocument) Written immediately after the previously recorded table,
- * if the ******* subdocument contains fields.
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 24 of 210
- */
- if ( _fieldsTables != null )
- {
- _fieldsTables.write( _fib, tableStream );
- tableOffset = tableStream.getOffset();
- }
- /*
- * plcffndRef (footnote reference position table) Written immediately
- * after the stsh if the document contains footnotes
- *
- * plcffndTxt (footnote text position table) Written immediately after
- * the plcffndRef if the document contains footnotes
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 24 of 210
- */
- _footnotesTables.writeRef( _fib, tableStream );
- _footnotesTables.writeTxt( _fib, tableStream );
- tableOffset = tableStream.getOffset();
- /*
- * plcfsed (section table) Written immediately after the previously
- * recorded table. Recorded in all Word documents
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 25 of 210
- */
- // write out the SectionTable.
- _fib.setFcPlcfsed(tableOffset);
- _st.writeTo(wordDocumentStream, tableStream);
- _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- // write out the list tables
- if ( _lt != null )
- {
- /*
- * plcflst (list formats) Written immediately after the end of the
- * previously recorded, if there are any lists defined in the
- * document. This begins with a short count of LSTF structures
- * followed by those LSTF structures. This is immediately followed
- * by the allocated data hanging off the LSTFs. This data consists
- * of the array of LVLs for each LSTF. (Each LVL consists of an LVLF
- * followed by two grpprls and an XST.)
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 25 of 210
- */
- _lt.writeListDataTo( _fib, tableStream );
- tableOffset = tableStream.getOffset();
- /*
- * plflfo (more list formats) Written immediately after the end of
- * the plcflst and its accompanying data, if there are any lists
- * defined in the document. This consists first of a PL of LFO
- * records, followed by the allocated data (if any) hanging off the
- * LFOs. The allocated data consists of the array of LFOLVLFs for
- * each LFO (and each LFOLVLF is immediately followed by some LVLs).
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 26 of 210
- */
- _lt.writeListOverridesTo( _fib, tableStream );
- tableOffset = tableStream.getOffset();
- }
- /*
- * sttbfBkmk (table of bookmark name strings) Written immediately after
- * the previously recorded table, if the document contains bookmarks.
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 27 of 210
- */
- if ( _bookmarksTables != null )
- {
- _bookmarksTables.writeSttbfBkmk( _fib, tableStream );
- tableOffset = tableStream.getOffset();
- }
- /*
- * sttbSavedBy (last saved by string table) Written immediately after
- * the previously recorded table.
- *
- * Microsoft Office Word 97-2007 Binary File Format (.doc)
- * Specification; Page 27 of 210
- */
- // write out the saved-by table.
- if (_sbt != null)
- {
- _fib.setFcSttbSavedBy(tableOffset);
- _sbt.writeTo(tableStream);
- _fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- }
-
- // write out the revision mark authors table.
- if (_rmat != null)
- {
- _fib.setFcSttbfRMark(tableOffset);
- _rmat.writeTo(tableStream);
- _fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- }
- // write out the FontTable.
- _fib.setFcSttbfffn(tableOffset);
- _ft.writeTo(tableStream);
- _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
- tableOffset = tableStream.getOffset();
- // set some variables in the FileInformationBlock.
- _fib.getFibBase().setFcMin(fcMin);
- _fib.getFibBase().setFcMac(fcMac);
- _fib.setCbMac(wordDocumentStream.getOffset());
- // make sure that the table, doc and data streams use big blocks.
- byte[] mainBuf = wordDocumentStream.toByteArray();
- if (mainBuf.length < 4096)
- {
- byte[] tempBuf = new byte[4096];
- System.arraycopy(mainBuf, 0, tempBuf, 0, mainBuf.length);
- mainBuf = tempBuf;
- }
- // Table1 stream will be used
- _fib.getFibBase().setFWhichTblStm( true );
- // write out the FileInformationBlock.
- //_fib.serialize(mainBuf, 0);
- _fib.writeTo(mainBuf, tableStream);
- byte[] tableBuf = tableStream.toByteArray();
- if (tableBuf.length < 4096)
- {
- byte[] tempBuf = new byte[4096];
- System.arraycopy(tableBuf, 0, tempBuf, 0, tableBuf.length);
- tableBuf = tempBuf;
- }
- byte[] dataBuf = _dataStream;
- if (dataBuf == null)
- {
- dataBuf = new byte[4096];
- }
- if (dataBuf.length < 4096)
- {
- byte[] tempBuf = new byte[4096];
- System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length);
- dataBuf = tempBuf;
- }
- // create new document preserving order of entries
- POIFSFileSystem pfs = new POIFSFileSystem();
- boolean docWritten = false;
- boolean dataWritten = false;
- boolean objectPoolWritten = false;
- boolean tableWritten = false;
- boolean propertiesWritten = false;
- for ( Iterator<Entry> iter = directory.getEntries(); iter.hasNext(); )
- {
- Entry entry = iter.next();
- if ( entry.getName().equals( STREAM_WORD_DOCUMENT ) )
- {
- if ( !docWritten )
- {
- pfs.createDocument( new ByteArrayInputStream( mainBuf ),
- STREAM_WORD_DOCUMENT );
- docWritten = true;
- }
- }
- else if ( entry.getName().equals( STREAM_OBJECT_POOL ) )
- {
- if ( !objectPoolWritten )
- {
- _objectPool.writeTo( pfs.getRoot() );
- objectPoolWritten = true;
- }
- }
- else if ( entry.getName().equals( STREAM_TABLE_0 )
- || entry.getName().equals( STREAM_TABLE_1 ) )
- {
- if ( !tableWritten )
- {
- pfs.createDocument( new ByteArrayInputStream( tableBuf ),
- STREAM_TABLE_1 );
- tableWritten = true;
- }
- }
- else if ( entry.getName().equals(
- SummaryInformation.DEFAULT_STREAM_NAME )
- || entry.getName().equals(
- DocumentSummaryInformation.DEFAULT_STREAM_NAME ) )
- {
- if ( !propertiesWritten )
- {
- writeProperties( pfs );
- propertiesWritten = true;
- }
- }
- else if ( entry.getName().equals( STREAM_DATA ) )
- {
- if ( !dataWritten )
- {
- pfs.createDocument( new ByteArrayInputStream( dataBuf ),
- STREAM_DATA );
- dataWritten = true;
- }
- }
- else
- {
- EntryUtils.copyNodeRecursively( entry, pfs.getRoot() );
- }
- }
- if ( !docWritten )
- pfs.createDocument( new ByteArrayInputStream( mainBuf ),
- STREAM_WORD_DOCUMENT );
- if ( !tableWritten )
- pfs.createDocument( new ByteArrayInputStream( tableBuf ),
- STREAM_TABLE_1 );
- if ( !propertiesWritten )
- writeProperties( pfs );
- if ( !dataWritten )
- pfs.createDocument( new ByteArrayInputStream( dataBuf ),
- STREAM_DATA );
- if ( !objectPoolWritten )
- _objectPool.writeTo( pfs.getRoot() );
- pfs.writeFilesystem( out );
- this.directory = pfs.getRoot();
- /*
- * since we updated all references in FIB and etc, using new arrays to
- * access data
- */
- this.directory = pfs.getRoot();
- this._tableStream = tableStream.toByteArray();
- this._dataStream = dataBuf;
- }
- @Internal
- public byte[] getDataStream()
- {
- return _dataStream;
- }
- @Internal
- public byte[] getTableStream()
- {
- return _tableStream;
- }
- public int registerList( HWPFList list )
- {
- if ( _lt == null )
- {
- _lt = new ListTables();
- }
- return _lt.addList( list.getListData(), list.getLFO(),
- list.getLFOData() );
- }
- public void delete(int start, int length)
- {
- Range r = new Range(start, start + length, this);
- r.delete();
- }
- }