PageRenderTime 3751ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 0ms

/components/forks/poi/src/loci/poi/hssf/record/SSTRecord.java

http://github.com/openmicroscopy/bioformats
Java | 478 lines | 175 code | 59 blank | 244 comment | 20 complexity | b6468b53ac2bca46a92a3e5f8c327ab0 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, BSD-2-Clause, MPL-2.0-no-copyleft-exception
  1. /*
  2. * #%L
  3. * Fork of Apache Jakarta POI.
  4. * %%
  5. * Copyright (C) 2008 - 2013 Open Microscopy Environment:
  6. * - Board of Regents of the University of Wisconsin-Madison
  7. * - Glencoe Software, Inc.
  8. * - University of Dundee
  9. * %%
  10. * Licensed under the Apache License, Version 2.0 (the "License");
  11. * you may not use this file except in compliance with the License.
  12. * You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. * #L%
  22. */
  23. /* ====================================================================
  24. Licensed to the Apache Software Foundation (ASF) under one or more
  25. contributor license agreements. See the NOTICE file distributed with
  26. this work for additional information regarding copyright ownership.
  27. The ASF licenses this file to You under the Apache License, Version 2.0
  28. (the "License"); you may not use this file except in compliance with
  29. the License. You may obtain a copy of the License at
  30. http://www.apache.org/licenses/LICENSE-2.0
  31. Unless required by applicable law or agreed to in writing, software
  32. distributed under the License is distributed on an "AS IS" BASIS,
  33. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  34. See the License for the specific language governing permissions and
  35. limitations under the License.
  36. ==================================================================== */
  37. package loci.poi.hssf.record;
  38. import loci.poi.util.IntMapper;
  39. import loci.poi.util.LittleEndianConsts;
  40. import java.util.Iterator;
  41. /**
  42. * Title: Static String Table Record
  43. * <P>
  44. * Description: This holds all the strings for LabelSSTRecords.
  45. * <P>
  46. * REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
  47. * 1-57231-498-2)
  48. * <P>
  49. * @author Andrew C. Oliver (acoliver at apache dot org)
  50. * @author Marc Johnson (mjohnson at apache dot org)
  51. * @author Glen Stampoultzis (glens at apache.org)
  52. *
  53. * @see loci.poi.hssf.record.LabelSSTRecord
  54. * @see loci.poi.hssf.record.ContinueRecord
  55. */
  56. public class SSTRecord
  57. extends Record
  58. {
  59. private static UnicodeString EMPTY_STRING = new UnicodeString("");
  60. /** how big can an SST record be? As big as any record can be: 8228 bytes */
  61. static final int MAX_RECORD_SIZE = 8228;
  62. /** standard record overhead: two shorts (record id plus data space size)*/
  63. static final int STD_RECORD_OVERHEAD =
  64. 2 * LittleEndianConsts.SHORT_SIZE;
  65. /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
  66. static final int SST_RECORD_OVERHEAD =
  67. ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) );
  68. /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
  69. static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD;
  70. /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
  71. static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE;
  72. public static final short sid = 0xfc;
  73. /** union of strings in the SST and EXTSST */
  74. private int field_1_num_strings;
  75. /** according to docs ONLY SST */
  76. private int field_2_num_unique_strings;
  77. private IntMapper field_3_strings;
  78. private SSTDeserializer deserializer;
  79. /** Offsets from the beginning of the SST record (even across continuations) */
  80. int[] bucketAbsoluteOffsets;
  81. /** Offsets relative the start of the current SST or continue record */
  82. int[] bucketRelativeOffsets;
  83. /**
  84. * default constructor
  85. */
  86. public SSTRecord()
  87. {
  88. field_1_num_strings = 0;
  89. field_2_num_unique_strings = 0;
  90. field_3_strings = new IntMapper();
  91. deserializer = new SSTDeserializer(field_3_strings);
  92. }
  93. /**
  94. * Constructs an SST record and sets its fields appropriately.
  95. *
  96. * @param in the RecordInputstream to read the record from
  97. */
  98. public SSTRecord( RecordInputStream in )
  99. {
  100. super( in );
  101. }
  102. /**
  103. * Add a string.
  104. *
  105. * @param string string to be added
  106. *
  107. * @return the index of that string in the table
  108. */
  109. public int addString( final UnicodeString string )
  110. {
  111. field_1_num_strings++;
  112. UnicodeString ucs = ( string == null ) ? EMPTY_STRING
  113. : string;
  114. int rval;
  115. int index = field_3_strings.getIndex(ucs);
  116. if ( index != -1 )
  117. {
  118. rval = index;
  119. }
  120. else
  121. {
  122. // This is a new string -- we didn't see it among the
  123. // strings we've already collected
  124. rval = field_3_strings.size();
  125. field_2_num_unique_strings++;
  126. SSTDeserializer.addToStringTable( field_3_strings, ucs );
  127. }
  128. return rval;
  129. }
  130. /**
  131. * @return number of strings
  132. */
  133. public int getNumStrings()
  134. {
  135. return field_1_num_strings;
  136. }
  137. /**
  138. * @return number of unique strings
  139. */
  140. public int getNumUniqueStrings()
  141. {
  142. return field_2_num_unique_strings;
  143. }
  144. /**
  145. * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
  146. * METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
  147. * ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
  148. * WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
  149. * WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
  150. *
  151. * @param count number of strings
  152. *
  153. */
  154. public void setNumStrings( final int count )
  155. {
  156. field_1_num_strings = count;
  157. }
  158. /**
  159. * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
  160. * METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
  161. * EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
  162. * IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
  163. * THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
  164. * THE RECORD
  165. *
  166. * @param count number of strings
  167. */
  168. public void setNumUniqueStrings( final int count )
  169. {
  170. field_2_num_unique_strings = count;
  171. }
  172. /**
  173. * Get a particular string by its index
  174. *
  175. * @param id index into the array of strings
  176. *
  177. * @return the desired string
  178. */
  179. public UnicodeString getString( final int id )
  180. {
  181. return (UnicodeString) field_3_strings.get( id );
  182. }
  183. public boolean isString16bit( final int id )
  184. {
  185. UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( id ) );
  186. return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
  187. }
  188. /**
  189. * Return a debugging string representation
  190. *
  191. * @return string representation
  192. */
  193. public String toString()
  194. {
  195. StringBuffer buffer = new StringBuffer();
  196. buffer.append( "[SST]\n" );
  197. buffer.append( " .numstrings = " )
  198. .append( Integer.toHexString( getNumStrings() ) ).append( "\n" );
  199. buffer.append( " .uniquestrings = " )
  200. .append( Integer.toHexString( getNumUniqueStrings() ) ).append( "\n" );
  201. for ( int k = 0; k < field_3_strings.size(); k++ )
  202. {
  203. UnicodeString s = (UnicodeString)field_3_strings.get( k );
  204. buffer.append( " .string_" + k + " = " )
  205. .append( s.getDebugInfo() ).append( "\n" );
  206. }
  207. buffer.append( "[/SST]\n" );
  208. return buffer.toString();
  209. }
  210. /**
  211. * @return sid
  212. */
  213. public short getSid()
  214. {
  215. return sid;
  216. }
  217. /**
  218. * @return hashcode
  219. */
  220. public int hashCode()
  221. {
  222. return field_2_num_unique_strings;
  223. }
  224. public boolean equals( Object o )
  225. {
  226. if ( ( o == null ) || ( o.getClass() != this.getClass() ) )
  227. {
  228. return false;
  229. }
  230. SSTRecord other = (SSTRecord) o;
  231. return ( ( field_1_num_strings == other
  232. .field_1_num_strings ) && ( field_2_num_unique_strings == other
  233. .field_2_num_unique_strings ) && field_3_strings
  234. .equals( other.field_3_strings ) );
  235. }
  236. /**
  237. * validate SID
  238. *
  239. * @param id the alleged SID
  240. *
  241. * @exception RecordFormatException if validation fails
  242. */
  243. protected void validateSid( final short id )
  244. throws RecordFormatException
  245. {
  246. if ( id != sid )
  247. {
  248. throw new RecordFormatException( "NOT An SST RECORD" );
  249. }
  250. }
  251. /**
  252. * Fill the fields from the data
  253. * <P>
  254. * The data consists of sets of string data. This string data is
  255. * arranged as follows:
  256. * <P>
  257. * <CODE><pre>
  258. * short string_length; // length of string data
  259. * byte string_flag; // flag specifying special string
  260. * // handling
  261. * short run_count; // optional count of formatting runs
  262. * int extend_length; // optional extension length
  263. * char[] string_data; // string data, can be byte[] or
  264. * // short[] (length of array is
  265. * // string_length)
  266. * int[] formatting_runs; // optional formatting runs (length of
  267. * // array is run_count)
  268. * byte[] extension; // optional extension (length of array
  269. * // is extend_length)
  270. * </pre></CODE>
  271. * <P>
  272. * The string_flag is bit mapped as follows:
  273. * <P>
  274. * <TABLE>
  275. * <TR>
  276. * <TH>Bit number</TH>
  277. * <TH>Meaning if 0</TH>
  278. * <TH>Meaning if 1</TH>
  279. * <TR>
  280. * <TR>
  281. * <TD>0</TD>
  282. * <TD>string_data is byte[]</TD>
  283. * <TD>string_data is short[]</TH>
  284. * <TR>
  285. * <TR>
  286. * <TD>1</TD>
  287. * <TD>Should always be 0</TD>
  288. * <TD>string_flag is defective</TH>
  289. * <TR>
  290. * <TR>
  291. * <TD>2</TD>
  292. * <TD>extension is not included</TD>
  293. * <TD>extension is included</TH>
  294. * <TR>
  295. * <TR>
  296. * <TD>3</TD>
  297. * <TD>formatting run data is not included</TD>
  298. * <TD>formatting run data is included</TH>
  299. * <TR>
  300. * <TR>
  301. * <TD>4</TD>
  302. * <TD>Should always be 0</TD>
  303. * <TD>string_flag is defective</TH>
  304. * <TR>
  305. * <TR>
  306. * <TD>5</TD>
  307. * <TD>Should always be 0</TD>
  308. * <TD>string_flag is defective</TH>
  309. * <TR>
  310. * <TR>
  311. * <TD>6</TD>
  312. * <TD>Should always be 0</TD>
  313. * <TD>string_flag is defective</TH>
  314. * <TR>
  315. * <TR>
  316. * <TD>7</TD>
  317. * <TD>Should always be 0</TD>
  318. * <TD>string_flag is defective</TH>
  319. * <TR>
  320. * </TABLE>
  321. * <P>
  322. * We can handle eating the overhead associated with bits 2 or 3
  323. * (or both) being set, but we have no idea what to do with the
  324. * associated data. The UnicodeString class can handle the byte[]
  325. * vs short[] nature of the actual string data
  326. *
  327. * @param in the RecordInputstream to read the record from
  328. */
  329. protected void fillFields( RecordInputStream in )
  330. {
  331. // this method is ALWAYS called after construction -- using
  332. // the nontrivial constructor, of course -- so this is where
  333. // we initialize our fields
  334. field_1_num_strings = in.readInt();
  335. field_2_num_unique_strings = in.readInt();
  336. field_3_strings = new IntMapper();
  337. deserializer = new SSTDeserializer(field_3_strings);
  338. deserializer.manufactureStrings( field_2_num_unique_strings, in );
  339. }
  340. /**
  341. * @return an iterator of the strings we hold. All instances are
  342. * UnicodeStrings
  343. */
  344. Iterator getStrings()
  345. {
  346. return field_3_strings.iterator();
  347. }
  348. /**
  349. * @return count of the strings we hold.
  350. */
  351. int countStrings()
  352. {
  353. return field_3_strings.size();
  354. }
  355. /**
  356. * called by the class that is responsible for writing this sucker.
  357. * Subclasses should implement this so that their data is passed back in a
  358. * byte array.
  359. *
  360. * @return size
  361. */
  362. public int serialize( int offset, byte[] data )
  363. {
  364. SSTSerializer serializer = new SSTSerializer(
  365. field_3_strings, getNumStrings(), getNumUniqueStrings() );
  366. int bytes = serializer.serialize( offset, data );
  367. bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
  368. bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
  369. return bytes;
  370. }
  371. public int getRecordSize()
  372. {
  373. SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(field_3_strings);
  374. int recordSize = calculator.getRecordSize();
  375. return recordSize;
  376. }
  377. SSTDeserializer getDeserializer()
  378. {
  379. return deserializer;
  380. }
  381. /**
  382. * Creates an extended string record based on the current contents of
  383. * the current SST record. The offset within the stream to the SST record
  384. * is required because the extended string record points directly to the
  385. * strings in the SST record.
  386. * <p>
  387. * NOTE: THIS FUNCTION MUST ONLY BE CALLED AFTER THE SST RECORD HAS BEEN
  388. * SERIALIZED.
  389. *
  390. * @param sstOffset The offset in the stream to the start of the
  391. * SST record.
  392. * @return The new SST record.
  393. */
  394. public ExtSSTRecord createExtSSTRecord(int sstOffset)
  395. {
  396. if (bucketAbsoluteOffsets == null || bucketAbsoluteOffsets == null)
  397. throw new IllegalStateException("SST record has not yet been serialized.");
  398. ExtSSTRecord extSST = new ExtSSTRecord();
  399. extSST.setNumStringsPerBucket((short)8);
  400. int[] absoluteOffsets = (int[]) bucketAbsoluteOffsets.clone();
  401. int[] relativeOffsets = (int[]) bucketRelativeOffsets.clone();
  402. for ( int i = 0; i < absoluteOffsets.length; i++ )
  403. absoluteOffsets[i] += sstOffset;
  404. extSST.setBucketOffsets(absoluteOffsets, relativeOffsets);
  405. return extSST;
  406. }
  407. /**
  408. * Calculates the size in bytes of the EXTSST record as it would be if the
  409. * record was serialized.
  410. *
  411. * @return The size of the ExtSST record in bytes.
  412. */
  413. public int calcExtSSTRecordSize()
  414. {
  415. return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
  416. }
  417. }