PageRenderTime 5193ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java

https://github.com/minstrelsy/SimpleAndroidDocView
Java | 368 lines | 187 code | 43 blank | 138 comment | 16 complexity | 4b50336a137f1a24d0274bce878f8610 MD5 | raw file
Possible License(s): Apache-2.0
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hwpf.model;
  16. import java.io.IOException;
  17. import java.util.ArrayList;
  18. import java.util.Arrays;
  19. import java.util.Collections;
  20. import java.util.List;
  21. import org.apache.poi.hwpf.model.io.HWPFOutputStream;
  22. import org.apache.poi.util.Internal;
  23. import org.apache.poi.util.LittleEndian;
  24. /**
  25. * Represents a PAP FKP. The style properties for paragraph and character runs
  26. * are stored in fkps. There are PAP fkps for paragraph properties and CHP fkps
  27. * for character run properties. The first part of the fkp for both CHP and PAP
  28. * fkps consists of an array of 4 byte int offsets in the main stream for that
  29. * Paragraph's or Character run's text. The ending offset is the next
  30. * value in the array. For example, if an fkp has X number of Paragraph's
  31. * stored in it then there are (x + 1) 4 byte ints in the beginning array. The
  32. * number X is determined by the last byte in a 512 byte fkp.
  33. *
  34. * CHP and PAP fkps also store the compressed styles(grpprl) that correspond to
  35. * the offsets on the front of the fkp. The offset of the grpprls is determined
  36. * differently for CHP fkps and PAP fkps.
  37. *
  38. * @author Ryan Ackley
  39. */
  40. @Internal
  41. public final class PAPFormattedDiskPage extends FormattedDiskPage {
  42. private static final int BX_SIZE = 13;
  43. private static final int FC_SIZE = 4;
  44. private ArrayList<PAPX> _papxList = new ArrayList<PAPX>();
  45. private ArrayList<PAPX> _overFlow;
  46. /**
  47. * @deprecated Use {@link #PAPFormattedDiskPage()} instead
  48. */
  49. public PAPFormattedDiskPage( byte[] dataStream )
  50. {
  51. this();
  52. }
  53. public PAPFormattedDiskPage()
  54. {
  55. }
  56. /**
  57. * Creates a PAPFormattedDiskPage from a 512 byte array
  58. *
  59. * @deprecated Use
  60. * {@link #PAPFormattedDiskPage(byte[], byte[], int, CharIndexTranslator)}
  61. * instead
  62. */
  63. public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
  64. int offset, int fcMin, TextPieceTable tpt )
  65. {
  66. this( documentStream, dataStream, offset, tpt );
  67. }
  68. /**
  69. * Creates a PAPFormattedDiskPage from a 512 byte array
  70. */
  71. public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
  72. int offset, CharIndexTranslator translator )
  73. {
  74. super( documentStream, offset );
  75. for ( int x = 0; x < _crun; x++ )
  76. {
  77. int bytesStartAt = getStart( x );
  78. int bytesEndAt = getEnd( x );
  79. // int charStartAt = translator.getCharIndex( bytesStartAt );
  80. // int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt
  81. // );
  82. // PAPX papx = new PAPX( charStartAt, charEndAt, getGrpprl( x ),
  83. // getParagraphHeight( x ), dataStream );
  84. // _papxList.add( papx );
  85. for ( int[] range : translator.getCharIndexRanges( bytesStartAt,
  86. bytesEndAt ) )
  87. {
  88. PAPX papx = new PAPX( range[0], range[1], getGrpprl( x ),
  89. getParagraphHeight( x ), dataStream );
  90. _papxList.add( papx );
  91. }
  92. }
  93. _fkp = null;
  94. }
  95. /**
  96. * Fills the queue for writing.
  97. *
  98. * @param filler a List of PAPXs
  99. */
  100. public void fill(List<PAPX> filler)
  101. {
  102. _papxList.addAll(filler);
  103. }
  104. /**
  105. * Used when writing out a Word docunment. This method is part of a sequence
  106. * that is necessary because there is no easy and efficient way to
  107. * determine the number PAPX's that will fit into one FKP. THe sequence is
  108. * as follows:
  109. *
  110. * fill()
  111. * toByteArray()
  112. * getOverflow()
  113. *
  114. * @return The remaining PAPXs that didn't fit into this FKP.
  115. */
  116. ArrayList<PAPX> getOverflow()
  117. {
  118. return _overFlow;
  119. }
  120. /**
  121. * Gets the PAPX at index.
  122. * @param index The index to get the PAPX for.
  123. * @return The PAPX at index.
  124. */
  125. public PAPX getPAPX(int index)
  126. {
  127. return _papxList.get(index);
  128. }
  129. public List<PAPX> getPAPXs()
  130. {
  131. return Collections.unmodifiableList( _papxList );
  132. }
  133. /**
  134. * Gets the papx grpprl for the paragraph at index in this fkp.
  135. *
  136. * @param index The index of the papx to get.
  137. * @return a papx grpprl.
  138. */
  139. protected byte[] getGrpprl(int index)
  140. {
  141. int papxOffset = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE)));
  142. int size = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + papxOffset);
  143. if(size == 0)
  144. {
  145. size = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + ++papxOffset);
  146. }
  147. else
  148. {
  149. size--;
  150. }
  151. byte[] papx = new byte[size];
  152. System.arraycopy(_fkp, _offset + ++papxOffset, papx, 0, size);
  153. return papx;
  154. }
  155. /**
  156. * Creates a byte array representation of this data structure. Suitable for
  157. * writing to a Word document.
  158. *
  159. * @param dataStream required if PAPX is too big to fit in FKP
  160. *
  161. * @return A byte array representing this data structure.
  162. * @throws IOException
  163. * if an I/O error occurs.
  164. */
  165. protected byte[] toByteArray( HWPFOutputStream dataStream,
  166. CharIndexTranslator translator ) throws IOException
  167. {
  168. byte[] buf = new byte[512];
  169. int size = _papxList.size();
  170. int grpprlOffset = 0;
  171. int bxOffset = 0;
  172. int fcOffset = 0;
  173. byte[] lastGrpprl = new byte[0];
  174. // total size is currently the size of one FC
  175. int totalSize = FC_SIZE;
  176. int index = 0;
  177. for ( ; index < size; index++ )
  178. {
  179. byte[] grpprl = _papxList.get( index ).getGrpprl();
  180. int grpprlLength = grpprl.length;
  181. // is grpprl huge?
  182. if ( grpprlLength > 488 )
  183. {
  184. grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
  185. }
  186. // check to see if we have enough room for an FC, a BX, and the
  187. // grpprl
  188. // and the 1 byte size of the grpprl.
  189. int addition = 0;
  190. if ( !Arrays.equals( grpprl, lastGrpprl ) )
  191. {
  192. addition = ( FC_SIZE + BX_SIZE + grpprlLength + 1 );
  193. }
  194. else
  195. {
  196. addition = ( FC_SIZE + BX_SIZE );
  197. }
  198. totalSize += addition;
  199. // if size is uneven we will have to add one so the first grpprl
  200. // falls
  201. // on a word boundary
  202. if ( totalSize > 511 + ( index % 2 ) )
  203. {
  204. totalSize -= addition;
  205. break;
  206. }
  207. // grpprls must fall on word boundaries
  208. if ( grpprlLength % 2 > 0 )
  209. {
  210. totalSize += 1;
  211. }
  212. else
  213. {
  214. totalSize += 2;
  215. }
  216. lastGrpprl = grpprl;
  217. }
  218. // see if we couldn't fit some
  219. if ( index != size )
  220. {
  221. _overFlow = new ArrayList<PAPX>();
  222. _overFlow.addAll( _papxList.subList( index, size ) );
  223. }
  224. // index should equal number of papxs that will be in this fkp now.
  225. buf[511] = (byte) index;
  226. bxOffset = ( FC_SIZE * index ) + FC_SIZE;
  227. grpprlOffset = 511;
  228. PAPX papx = null;
  229. lastGrpprl = new byte[0];
  230. for ( int x = 0; x < index; x++ )
  231. {
  232. papx = _papxList.get( x );
  233. byte[] phe = papx.getParagraphHeight().toByteArray();
  234. byte[] grpprl = papx.getGrpprl();
  235. // is grpprl huge?
  236. if ( grpprl.length > 488 )
  237. {
  238. // if so do we have storage at getHugeGrpprlOffset()
  239. // int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
  240. // if ( hugeGrpprlOffset == -1 ) // then we have no storage...
  241. // {
  242. // throw new UnsupportedOperationException(
  243. // "This Paragraph has no dataStream storage." );
  244. // }
  245. // we have some storage...
  246. // get the size of the existing storage
  247. // int maxHugeGrpprlSize = LittleEndian.getUShort( dataStream,
  248. // hugeGrpprlOffset );
  249. //
  250. // if ( maxHugeGrpprlSize < grpprl.length - 2 )
  251. // { // grpprl.length-2 because we don't store the istd
  252. // throw new UnsupportedOperationException(
  253. // "This Paragraph's dataStream storage is too small." );
  254. // }
  255. // store grpprl at hugeGrpprlOffset
  256. // grpprl.length-2 because we don't store the istd
  257. // System.arraycopy( grpprl, 2, dataStream, hugeGrpprlOffset +
  258. // 2,
  259. // grpprl.length - 2 );
  260. // LittleEndian.putUShort( dataStream, hugeGrpprlOffset,
  261. // grpprl.length - 2 );
  262. byte[] hugePapx = new byte[grpprl.length - 2];
  263. System.arraycopy( grpprl, 2, hugePapx, 0, grpprl.length - 2 );
  264. int dataStreamOffset = dataStream.getOffset();
  265. dataStream.write( hugePapx );
  266. // grpprl = grpprl containing only a sprmPHugePapx2
  267. int istd = LittleEndian.getUShort( grpprl, 0 );
  268. grpprl = new byte[8];
  269. LittleEndian.putUShort( grpprl, 0, istd );
  270. LittleEndian.putUShort( grpprl, 2, 0x6646 ); // sprmPHugePapx2
  271. LittleEndian.putInt( grpprl, 4, dataStreamOffset );
  272. }
  273. boolean same = Arrays.equals( lastGrpprl, grpprl );
  274. if ( !same )
  275. {
  276. grpprlOffset -= ( grpprl.length + ( 2 - grpprl.length % 2 ) );
  277. grpprlOffset -= ( grpprlOffset % 2 );
  278. }
  279. // LittleEndian.putInt( buf, fcOffset, papx.getStartBytes() );
  280. LittleEndian.putInt( buf, fcOffset,
  281. translator.getByteIndex( papx.getStart() ) );
  282. buf[bxOffset] = (byte) ( grpprlOffset / 2 );
  283. System.arraycopy( phe, 0, buf, bxOffset + 1, phe.length );
  284. /*
  285. * refer to the section on PAPX in the spec. Places a size on the
  286. * front of the PAPX. Has to do with how the grpprl stays on word
  287. * boundaries.
  288. */
  289. if ( !same )
  290. {
  291. int copyOffset = grpprlOffset;
  292. if ( ( grpprl.length % 2 ) > 0 )
  293. {
  294. buf[copyOffset++] = (byte) ( ( grpprl.length + 1 ) / 2 );
  295. }
  296. else
  297. {
  298. buf[++copyOffset] = (byte) ( ( grpprl.length ) / 2 );
  299. copyOffset++;
  300. }
  301. System.arraycopy( grpprl, 0, buf, copyOffset, grpprl.length );
  302. lastGrpprl = grpprl;
  303. }
  304. bxOffset += BX_SIZE;
  305. fcOffset += FC_SIZE;
  306. }
  307. // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
  308. LittleEndian.putInt( buf, fcOffset,
  309. translator.getByteIndex( papx.getEnd() ) );
  310. return buf;
  311. }
  312. /**
  313. * Used to get the ParagraphHeight of a PAPX at a particular index.
  314. * @param index
  315. * @return The ParagraphHeight
  316. */
  317. private ParagraphHeight getParagraphHeight(int index)
  318. {
  319. int pheOffset = _offset + 1 + (((_crun + 1) * 4) + (index * 13));
  320. ParagraphHeight phe = new ParagraphHeight(_fkp, pheOffset);
  321. return phe;
  322. }
  323. }