PageRenderTime 101ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/components/forks/poi/src/loci/poi/hssf/record/UnicodeString.java

http://github.com/openmicroscopy/bioformats
Java | 955 lines | 584 code | 132 blank | 239 comment | 201 complexity | 8126528f53897a35370bb2189a0d623e MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, Apache-2.0, BSD-2-Clause, MPL-2.0-no-copyleft-exception
  1. /*
  2. * #%L
  3. * Fork of Apache Jakarta POI.
  4. * %%
  5. * Copyright (C) 2008 - 2013 Open Microscopy Environment:
  6. * - Board of Regents of the University of Wisconsin-Madison
  7. * - Glencoe Software, Inc.
  8. * - University of Dundee
  9. * %%
  10. * Licensed under the Apache License, Version 2.0 (the "License");
  11. * you may not use this file except in compliance with the License.
  12. * You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. * #L%
  22. */
  23. /* ====================================================================
  24. Licensed to the Apache Software Foundation (ASF) under one or more
  25. contributor license agreements. See the NOTICE file distributed with
  26. this work for additional information regarding copyright ownership.
  27. The ASF licenses this file to You under the Apache License, Version 2.0
  28. (the "License"); you may not use this file except in compliance with
  29. the License. You may obtain a copy of the License at
  30. http://www.apache.org/licenses/LICENSE-2.0
  31. Unless required by applicable law or agreed to in writing, software
  32. distributed under the License is distributed on an "AS IS" BASIS,
  33. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  34. See the License for the specific language governing permissions and
  35. limitations under the License.
  36. ==================================================================== */
  37. package loci.poi.hssf.record;
  38. import loci.poi.util.BitField;
  39. import loci.poi.util.BitFieldFactory;
  40. import loci.poi.util.LittleEndian;
  41. import loci.poi.util.HexDump;
  42. import java.util.Iterator;
  43. import java.util.List;
  44. import java.util.ArrayList;
  45. import java.util.Collections;
  46. /**
  47. * Title: Unicode String<P>
  48. * Description: Unicode String record. We implement these as a record, although
  49. * they are really just standard fields that are in several records.
  50. * It is considered more desirable then repeating it in all of them.<P>
  51. * REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<P>
  52. * @author Andrew C. Oliver
  53. * @author Marc Johnson (mjohnson at apache dot org)
  54. * @author Glen Stampoultzis (glens at apache.org)
  55. * @version 2.0-pre
  56. */
  57. public class UnicodeString
  58. implements Comparable
  59. {
  60. public final static short sid = 0xFFF;
  61. private short field_1_charCount; // = 0;
  62. private byte field_2_optionflags; // = 0;
  63. private String field_3_string; // = null;
  64. private List field_4_format_runs;
  65. private byte[] field_5_ext_rst;
  66. private static final BitField highByte = BitFieldFactory.getInstance(0x1);
  67. private static final BitField extBit = BitFieldFactory.getInstance(0x4);
  68. private static final BitField richText = BitFieldFactory.getInstance(0x8);
  69. public static class FormatRun implements Comparable {
  70. private short character;
  71. private short fontIndex;
  72. public FormatRun(short character, short fontIndex) {
  73. this.character = character;
  74. this.fontIndex = fontIndex;
  75. }
  76. public short getCharacterPos() {
  77. return character;
  78. }
  79. public short getFontIndex() {
  80. return fontIndex;
  81. }
  82. public boolean equals(Object o) {
  83. if ((o == null) || (o.getClass() != this.getClass()))
  84. {
  85. return false;
  86. }
  87. FormatRun other = ( FormatRun ) o;
  88. return ((character == other.character) && (fontIndex == other.fontIndex));
  89. }
  90. public int compareTo(Object obj) {
  91. FormatRun r = (FormatRun)obj;
  92. if ((character == r.character) && (fontIndex == r.fontIndex))
  93. return 0;
  94. if (character == r.character)
  95. return fontIndex - r.fontIndex;
  96. else return character - r.character;
  97. }
  98. public String toString() {
  99. return "character="+character+",fontIndex="+fontIndex;
  100. }
  101. }
  102. private UnicodeString() {
  103. //Used for clone method.
  104. }
  105. public UnicodeString(String str)
  106. {
  107. setString(str);
  108. }
  109. /**
  110. * construct a unicode string record and fill its fields, ID is ignored
  111. * @param in the RecordInputstream to read the record from
  112. */
  113. public UnicodeString(RecordInputStream in)
  114. {
  115. validateSid(in.getSid());
  116. fillFields(in);
  117. }
  118. public int hashCode()
  119. {
  120. int stringHash = 0;
  121. if (field_3_string != null)
  122. stringHash = field_3_string.hashCode();
  123. return field_1_charCount + stringHash;
  124. }
  125. /**
  126. * Our handling of equals is inconsistent with compareTo. The trouble is because we don't truely understand
  127. * rich text fields yet it's difficult to make a sound comparison.
  128. *
  129. * @param o The object to compare.
  130. * @return true if the object is actually equal.
  131. */
  132. public boolean equals(Object o)
  133. {
  134. if ((o == null) || (o.getClass() != this.getClass()))
  135. {
  136. return false;
  137. }
  138. UnicodeString other = ( UnicodeString ) o;
  139. //Ok lets do this in stages to return a quickly, first check the actual string
  140. boolean eq = ((field_1_charCount == other.field_1_charCount)
  141. && (field_2_optionflags == other.field_2_optionflags)
  142. && field_3_string.equals(other.field_3_string));
  143. if (!eq) return false;
  144. //Ok string appears to be equal but now lets compare formatting runs
  145. if ((field_4_format_runs == null) && (other.field_4_format_runs == null))
  146. //Strings are equal, and there are not formtting runs.
  147. return true;
  148. if (((field_4_format_runs == null) && (other.field_4_format_runs != null)) ||
  149. (field_4_format_runs != null) && (other.field_4_format_runs == null))
  150. //Strings are equal, but one or the other has formatting runs
  151. return false;
  152. //Strings are equal, so now compare formatting runs.
  153. int size = field_4_format_runs.size();
  154. if (size != other.field_4_format_runs.size())
  155. return false;
  156. for (int i=0;i<size;i++) {
  157. FormatRun run1 = (FormatRun)field_4_format_runs.get(i);
  158. FormatRun run2 = (FormatRun)other.field_4_format_runs.get(i);
  159. if (!run1.equals(run2))
  160. return false;
  161. }
  162. //Well the format runs are equal as well!, better check the ExtRst data
  163. //Which by the way we dont know how to decode!
  164. if ((field_5_ext_rst == null) && (other.field_5_ext_rst == null))
  165. return true;
  166. if (((field_5_ext_rst == null) && (other.field_5_ext_rst != null)) ||
  167. ((field_5_ext_rst != null) && (other.field_5_ext_rst == null)))
  168. return false;
  169. size = field_5_ext_rst.length;
  170. if (size != field_5_ext_rst.length)
  171. return false;
  172. //Check individual bytes!
  173. for (int i=0;i<size;i++) {
  174. if (field_5_ext_rst[i] != other.field_5_ext_rst[i])
  175. return false;
  176. }
  177. //Phew!! After all of that we have finally worked out that the strings
  178. //are identical.
  179. return true;
  180. }
  181. /**
  182. * NO OP
  183. */
  184. protected void validateSid(short id)
  185. {
  186. // included only for interface compliance
  187. }
  188. /**
  189. * @param in the RecordInputstream to read the record from
  190. */
  191. protected void fillFields(RecordInputStream in)
  192. {
  193. field_1_charCount = in.readShort();
  194. field_2_optionflags = in.readByte();
  195. int runCount = 0;
  196. int extensionLength = 0;
  197. //Read the number of rich runs if rich text.
  198. if ( isRichText() )
  199. {
  200. runCount = in.readShort();
  201. }
  202. //Read the size of extended data if present.
  203. if ( isExtendedText() )
  204. {
  205. extensionLength = in.readInt();
  206. }
  207. //Now need to get the string data.
  208. //Turn off autocontinuation so that we can catch the continue boundary
  209. in.setAutoContinue(false);
  210. StringBuffer tmpString = new StringBuffer(field_1_charCount);
  211. int stringCharCount = field_1_charCount;
  212. boolean isCompressed = ((field_2_optionflags & 1) == 0);
  213. while (stringCharCount != 0) {
  214. if (in.remaining() == 0) {
  215. if (in.isContinueNext()) {
  216. in.nextRecord();
  217. //Check if we are now reading, compressed or uncompressed unicode.
  218. byte optionflags = in.readByte();
  219. isCompressed = ((optionflags & 1) == 0);
  220. } else
  221. throw new RecordFormatException("Expected continue record.");
  222. }
  223. if (isCompressed) {
  224. //Typecast direct to char from byte with high bit set causes all ones
  225. //in the high byte of the char (which is of course incorrect)
  226. char ch = (char)( (short)0xff & (short)in.readByte() );
  227. tmpString.append(ch);
  228. } else {
  229. char ch = (char) in.readShort();
  230. tmpString.append(ch);
  231. }
  232. stringCharCount --;
  233. }
  234. field_3_string = tmpString.toString();
  235. //Turn back on autocontinuation
  236. in.setAutoContinue(true);
  237. if (isRichText() && (runCount > 0)) {
  238. field_4_format_runs = new ArrayList(runCount);
  239. for (int i=0;i<runCount;i++) {
  240. field_4_format_runs.add(new FormatRun(in.readShort(), in.readShort()));
  241. //read reserved
  242. //in.readInt();
  243. }
  244. }
  245. if (isExtendedText() && (extensionLength > 0)) {
  246. field_5_ext_rst = new byte[extensionLength];
  247. for (int i=0;i<extensionLength;i++) {
  248. field_5_ext_rst[i] = in.readByte();
  249. }
  250. }
  251. }
  252. /**
  253. * get the number of characters in the string
  254. *
  255. *
  256. * @return number of characters
  257. *
  258. */
  259. public short getCharCount()
  260. {
  261. return field_1_charCount;
  262. }
  263. /**
  264. * set the number of characters in the string
  265. * @param cc - number of characters
  266. */
  267. public void setCharCount(short cc)
  268. {
  269. field_1_charCount = cc;
  270. }
  271. /**
  272. * get the option flags which among other things return if this is a 16-bit or
  273. * 8 bit string
  274. *
  275. * @return optionflags bitmask
  276. *
  277. */
  278. public byte getOptionFlags()
  279. {
  280. return field_2_optionflags;
  281. }
  282. /**
  283. * set the option flags which among other things return if this is a 16-bit or
  284. * 8 bit string
  285. *
  286. * @param of optionflags bitmask
  287. *
  288. */
  289. public void setOptionFlags(byte of)
  290. {
  291. field_2_optionflags = of;
  292. }
  293. /**
  294. * get the actual string this contains as a java String object
  295. *
  296. *
  297. * @return String
  298. *
  299. */
  300. public String getString()
  301. {
  302. return field_3_string;
  303. }
  304. /**
  305. * set the actual string this contains
  306. * @param string the text
  307. */
  308. public void setString(String string)
  309. {
  310. field_3_string = string;
  311. setCharCount((short)field_3_string.length());
  312. // scan for characters greater than 255 ... if any are
  313. // present, we have to use 16-bit encoding. Otherwise, we
  314. // can use 8-bit encoding
  315. boolean useUTF16 = false;
  316. int strlen = string.length();
  317. for ( int j = 0; j < strlen; j++ )
  318. {
  319. if ( string.charAt( j ) > 255 )
  320. {
  321. useUTF16 = true;
  322. break;
  323. }
  324. }
  325. if (useUTF16)
  326. //Set the uncomressed bit
  327. field_2_optionflags = highByte.setByte(field_2_optionflags);
  328. else field_2_optionflags = highByte.clearByte(field_2_optionflags);
  329. }
  330. public int getFormatRunCount() {
  331. if (field_4_format_runs == null)
  332. return 0;
  333. return field_4_format_runs.size();
  334. }
  335. public FormatRun getFormatRun(int index) {
  336. if (field_4_format_runs == null)
  337. return null;
  338. if ((index < 0) || (index >= field_4_format_runs.size()))
  339. return null;
  340. return (FormatRun)field_4_format_runs.get(index);
  341. }
  342. private int findFormatRunAt(int characterPos) {
  343. int size = field_4_format_runs.size();
  344. for (int i=0;i<size;i++) {
  345. FormatRun r = (FormatRun)field_4_format_runs.get(i);
  346. if (r.character == characterPos)
  347. return i;
  348. else if (r.character > characterPos)
  349. return -1;
  350. }
  351. return -1;
  352. }
  353. /** Adds a font run to the formatted string.
  354. *
  355. * If a font run exists at the current charcter location, then it is
  356. * replaced with the font run to be added.
  357. */
  358. public void addFormatRun(FormatRun r) {
  359. if (field_4_format_runs == null)
  360. field_4_format_runs = new ArrayList();
  361. int index = findFormatRunAt(r.character);
  362. if (index != -1)
  363. field_4_format_runs.remove(index);
  364. field_4_format_runs.add(r);
  365. //Need to sort the font runs to ensure that the font runs appear in
  366. //character order
  367. Collections.sort(field_4_format_runs);
  368. //Make sure that we now say that we are a rich string
  369. field_2_optionflags = richText.setByte(field_2_optionflags);
  370. }
  371. public Iterator formatIterator() {
  372. if (field_4_format_runs != null)
  373. return field_4_format_runs.iterator();
  374. return null;
  375. }
  376. public void removeFormatRun(FormatRun r) {
  377. field_4_format_runs.remove(r);
  378. if (field_4_format_runs.size() == 0) {
  379. field_4_format_runs = null;
  380. field_2_optionflags = richText.clearByte(field_2_optionflags);
  381. }
  382. }
  383. public void clearFormatting() {
  384. field_4_format_runs = null;
  385. field_2_optionflags = richText.clearByte(field_2_optionflags);
  386. }
  387. public byte[] getExtendedRst() {
  388. return this.field_5_ext_rst;
  389. }
  390. public void setExtendedRst(byte[] ext_rst) {
  391. if (ext_rst != null)
  392. field_2_optionflags = extBit.setByte(field_2_optionflags);
  393. else field_2_optionflags = extBit.clearByte(field_2_optionflags);
  394. this.field_5_ext_rst = ext_rst;
  395. }
  396. /**
  397. * unlike the real records we return the same as "getString()" rather than debug info
  398. * @see #getDebugInfo()
  399. * @return String value of the record
  400. */
  401. public String toString()
  402. {
  403. return getString();
  404. }
  405. /**
  406. * return a character representation of the fields of this record
  407. *
  408. *
  409. * @return String of output for biffviewer etc.
  410. *
  411. */
  412. public String getDebugInfo()
  413. {
  414. StringBuffer buffer = new StringBuffer();
  415. buffer.append("[UNICODESTRING]\n");
  416. buffer.append(" .charcount = ")
  417. .append(Integer.toHexString(getCharCount())).append("\n");
  418. buffer.append(" .optionflags = ")
  419. .append(Integer.toHexString(getOptionFlags())).append("\n");
  420. buffer.append(" .string = ").append(getString()).append("\n");
  421. if (field_4_format_runs != null) {
  422. for (int i = 0; i < field_4_format_runs.size();i++) {
  423. FormatRun r = (FormatRun)field_4_format_runs.get(i);
  424. buffer.append(" .format_run"+i+" = ").append(r.toString()).append("\n");
  425. }
  426. }
  427. if (field_5_ext_rst != null) {
  428. buffer.append(" .field_5_ext_rst = ").append("\n").append(HexDump.toHex(field_5_ext_rst)).append("\n");
  429. }
  430. buffer.append("[/UNICODESTRING]\n");
  431. return buffer.toString();
  432. }
  433. private int writeContinueIfRequired(UnicodeRecordStats stats, final int requiredSize, int offset, byte[] data) {
  434. //Basic string overhead
  435. if (stats.remainingSize < requiredSize) {
  436. //Check if be are already in a continue record, if so make sure that
  437. //we go back and write out our length
  438. if (stats.lastLengthPos != -1) {
  439. short lastRecordLength = (short)(offset - stats.lastLengthPos - 2);
  440. if (lastRecordLength > 8224)
  441. throw new InternalError();
  442. LittleEndian.putShort(data, stats.lastLengthPos, lastRecordLength);
  443. }
  444. LittleEndian.putShort(data, offset, ContinueRecord.sid);
  445. offset+=2;
  446. //Record the location of the last continue legnth position, but dont write
  447. //anything there yet (since we dont know what it will be!)
  448. stats.lastLengthPos = offset;
  449. offset += 2;
  450. stats.recordSize += 4;
  451. stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
  452. }
  453. return offset;
  454. }
  455. public int serialize(UnicodeRecordStats stats, final int offset, byte [] data)
  456. {
  457. int pos = offset;
  458. //Basic string overhead
  459. pos = writeContinueIfRequired(stats, 3, pos, data);
  460. // byte[] retval = new byte[ 3 + (getString().length() * charsize)];
  461. LittleEndian.putShort(data, pos, getCharCount());
  462. pos += 2;
  463. data[ pos ] = getOptionFlags();
  464. pos += 1;
  465. stats.recordSize += 3;
  466. stats.remainingSize-= 3;
  467. if (isRichText()) {
  468. if (field_4_format_runs != null) {
  469. pos = writeContinueIfRequired(stats, 2, pos, data);
  470. LittleEndian.putShort(data, pos, (short) field_4_format_runs.size());
  471. pos += 2;
  472. stats.recordSize += 2;
  473. stats.remainingSize -= 2;
  474. }
  475. }
  476. if ( isExtendedText() )
  477. {
  478. if (this.field_5_ext_rst != null) {
  479. pos = writeContinueIfRequired(stats, 4, pos, data);
  480. LittleEndian.putInt(data, pos, field_5_ext_rst.length);
  481. pos += 4;
  482. stats.recordSize += 4;
  483. stats.remainingSize -= 4;
  484. }
  485. }
  486. int charsize = isUncompressedUnicode() ? 2 : 1;
  487. int strSize = (getString().length() * charsize);
  488. byte[] strBytes = null;
  489. try {
  490. String unicodeString = getString();
  491. if (!isUncompressedUnicode())
  492. {
  493. strBytes = unicodeString.getBytes("ISO-8859-1");
  494. }
  495. else
  496. {
  497. strBytes = unicodeString.getBytes("UTF-16LE");
  498. }
  499. }
  500. catch (Exception e) {
  501. throw new InternalError();
  502. }
  503. if (strSize != strBytes.length)
  504. throw new InternalError("That shouldnt have happened!");
  505. //Check to see if the offset occurs mid string, if so then we need to add
  506. //the byte to start with that represents the first byte of the continue record.
  507. if (strSize > stats.remainingSize) {
  508. //Ok the offset occurs half way through the string, that means that
  509. //we need an extra byte after the continue record ie we didnt finish
  510. //writing out the string the 1st time through
  511. //But hang on, how many continue records did we span? What if this is
  512. //a REALLY long string. We need to work this all out.
  513. int ammountThatCantFit = strSize;
  514. int strPos = 0;
  515. while (ammountThatCantFit > 0) {
  516. int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
  517. //Make sure that the ammount that cant fit takes into account
  518. //whether we are writing double byte unicode
  519. if (isUncompressedUnicode()) {
  520. //We have the '-1' here because whether this is the first record or
  521. //subsequent continue records, there is always the case that the
  522. //number of bytes in a string on doube byte boundaries is actually odd.
  523. if ( ( (ammountWritten ) % 2) == 1)
  524. ammountWritten--;
  525. }
  526. System.arraycopy(strBytes, strPos, data, pos, ammountWritten);
  527. pos += ammountWritten;
  528. strPos += ammountWritten;
  529. stats.recordSize += ammountWritten;
  530. stats.remainingSize -= ammountWritten;
  531. //Ok lets subtract what we can write
  532. ammountThatCantFit -= ammountWritten;
  533. //Each iteration of this while loop is another continue record, unless
  534. //everything now fits.
  535. if (ammountThatCantFit > 0) {
  536. //We know that a continue WILL be requied, but use this common method
  537. pos = writeContinueIfRequired(stats, ammountThatCantFit, pos, data);
  538. //The first byte after a continue mid string is the extra byte to
  539. //indicate if this run is compressed or not.
  540. data[pos] = (byte) (isUncompressedUnicode() ? 0x1 : 0x0);
  541. pos++;
  542. stats.recordSize++;
  543. stats.remainingSize --;
  544. }
  545. }
  546. } else {
  547. if (strSize > (data.length-pos))
  548. System.out.println("Hmm shouldnt happen");
  549. //Ok the string fits nicely in the remaining size
  550. System.arraycopy(strBytes, 0, data, pos, strSize);
  551. pos += strSize;
  552. stats.recordSize += strSize;
  553. stats.remainingSize -= strSize;
  554. }
  555. if (isRichText() && (field_4_format_runs != null)) {
  556. int count = field_4_format_runs.size();
  557. //This will ensure that a run does not split a continue
  558. for (int i=0;i<count;i++) {
  559. pos = writeContinueIfRequired(stats, 4, pos, data);
  560. FormatRun r = (FormatRun)field_4_format_runs.get(i);
  561. LittleEndian.putShort(data, pos, r.character);
  562. pos += 2;
  563. LittleEndian.putShort(data, pos, r.fontIndex);
  564. pos += 2;
  565. //Each run count is four bytes
  566. stats.recordSize += 4;
  567. stats.remainingSize -=4;
  568. }
  569. }
  570. if (isExtendedText() && (field_5_ext_rst != null)) {
  571. //Ok ExtRst is actually not documented, so i am going to hope
  572. //that we can actually continue on byte boundaries
  573. int ammountThatCantFit = field_5_ext_rst.length - stats.remainingSize;
  574. int extPos = 0;
  575. if (ammountThatCantFit > 0) {
  576. while (ammountThatCantFit > 0) {
  577. //So for this record we have already written
  578. int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
  579. System.arraycopy(field_5_ext_rst, extPos, data, pos, ammountWritten);
  580. pos += ammountWritten;
  581. extPos += ammountWritten;
  582. stats.recordSize += ammountWritten;
  583. stats.remainingSize -= ammountWritten;
  584. //Ok lets subtract what we can write
  585. ammountThatCantFit -= ammountWritten;
  586. if (ammountThatCantFit > 0) {
  587. pos = writeContinueIfRequired(stats, 1, pos, data);
  588. }
  589. }
  590. } else {
  591. //We can fit wholey in what remains.
  592. System.arraycopy(field_5_ext_rst, 0, data, pos, field_5_ext_rst.length);
  593. pos += field_5_ext_rst.length;
  594. stats.remainingSize -= field_5_ext_rst.length;
  595. stats.recordSize += field_5_ext_rst.length;
  596. }
  597. }
  598. return pos - offset;
  599. }
  600. public void setCompressedUnicode() {
  601. field_2_optionflags = highByte.setByte(field_2_optionflags);
  602. }
  603. public void setUncompressedUnicode() {
  604. field_2_optionflags = highByte.clearByte(field_2_optionflags);
  605. }
  606. private boolean isUncompressedUnicode()
  607. {
  608. return highByte.isSet(getOptionFlags());
  609. }
  610. /** Returns the size of this record, given the ammount of record space
  611. * remaining, it will also include the size of writing a continue record.
  612. */
  613. public static class UnicodeRecordStats {
  614. public int recordSize;
  615. public int remainingSize = SSTRecord.MAX_RECORD_SIZE;
  616. public int lastLengthPos = -1;
  617. }
  618. public void getRecordSize(UnicodeRecordStats stats) {
  619. //Basic string overhead
  620. if (stats.remainingSize < 3) {
  621. //Needs a continue
  622. stats.recordSize += 4;
  623. stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
  624. }
  625. stats.recordSize += 3;
  626. stats.remainingSize-= 3;
  627. //Read the number of rich runs if rich text.
  628. if ( isRichText() )
  629. {
  630. //Run count
  631. if (stats.remainingSize < 2) {
  632. //Needs a continue
  633. //Reset the available space.
  634. stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
  635. //continue record overhead
  636. stats.recordSize+=4;
  637. }
  638. stats.recordSize += 2;
  639. stats.remainingSize -=2;
  640. }
  641. //Read the size of extended data if present.
  642. if ( isExtendedText() )
  643. {
  644. //Needs a continue
  645. //extension length
  646. if (stats.remainingSize < 4) {
  647. //Reset the available space.
  648. stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
  649. //continue record overhead
  650. stats.recordSize+=4;
  651. }
  652. stats.recordSize += 4;
  653. stats.remainingSize -=4;
  654. }
  655. int charsize = isUncompressedUnicode() ? 2 : 1;
  656. int strSize = (getString().length() * charsize);
  657. //Check to see if the offset occurs mid string, if so then we need to add
  658. //the byte to start with that represents the first byte of the continue record.
  659. if (strSize > stats.remainingSize) {
  660. //Ok the offset occurs half way through the string, that means that
  661. //we need an extra byte after the continue record ie we didnt finish
  662. //writing out the string the 1st time through
  663. //But hang on, how many continue records did we span? What if this is
  664. //a REALLY long string. We need to work this all out.
  665. int ammountThatCantFit = strSize;
  666. while (ammountThatCantFit > 0) {
  667. int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
  668. //Make sure that the ammount that cant fit takes into account
  669. //whether we are writing double byte unicode
  670. if (isUncompressedUnicode()) {
  671. //We have the '-1' here because whether this is the first record or
  672. //subsequent continue records, there is always the case that the
  673. //number of bytes in a string on doube byte boundaries is actually odd.
  674. if ( ( (ammountWritten) % 2) == 1)
  675. ammountWritten--;
  676. }
  677. stats.recordSize += ammountWritten;
  678. stats.remainingSize -= ammountWritten;
  679. //Ok lets subtract what we can write
  680. ammountThatCantFit -= ammountWritten;
  681. //Each iteration of this while loop is another continue record, unless
  682. //everything now fits.
  683. if (ammountThatCantFit > 0) {
  684. //Reset the available space.
  685. stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
  686. //continue record overhead
  687. stats.recordSize+=4;
  688. //The first byte after a continue mid string is the extra byte to
  689. //indicate if this run is compressed or not.
  690. stats.recordSize++;
  691. stats.remainingSize --;
  692. }
  693. }
  694. } else {
  695. //Ok the string fits nicely in the remaining size
  696. stats.recordSize += strSize;
  697. stats.remainingSize -= strSize;
  698. }
  699. if (isRichText() && (field_4_format_runs != null)) {
  700. int count = field_4_format_runs.size();
  701. //This will ensure that a run does not split a continue
  702. for (int i=0;i<count;i++) {
  703. if (stats.remainingSize < 4) {
  704. //Reset the available space.
  705. stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
  706. //continue record overhead
  707. stats.recordSize+=4;
  708. }
  709. //Each run count is four bytes
  710. stats.recordSize += 4;
  711. stats.remainingSize -=4;
  712. }
  713. }
  714. if (isExtendedText() && (field_5_ext_rst != null)) {
  715. //Ok ExtRst is actually not documented, so i am going to hope
  716. //that we can actually continue on byte boundaries
  717. int ammountThatCantFit = field_5_ext_rst.length - stats.remainingSize;
  718. if (ammountThatCantFit > 0) {
  719. while (ammountThatCantFit > 0) {
  720. //So for this record we have already written
  721. int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit);
  722. stats.recordSize += ammountWritten;
  723. stats.remainingSize -= ammountWritten;
  724. //Ok lets subtract what we can write
  725. ammountThatCantFit -= ammountWritten;
  726. if (ammountThatCantFit > 0) {
  727. //Each iteration of this while loop is another continue record.
  728. //Reset the available space.
  729. stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4;
  730. //continue record overhead
  731. stats.recordSize += 4;
  732. }
  733. }
  734. } else {
  735. //We can fit wholey in what remains.
  736. stats.remainingSize -= field_5_ext_rst.length;
  737. stats.recordSize += field_5_ext_rst.length;
  738. }
  739. }
  740. }
  741. public short getSid()
  742. {
  743. return sid;
  744. }
  745. public int compareTo(Object obj)
  746. {
  747. UnicodeString str = ( UnicodeString ) obj;
  748. int result = getString().compareTo(str.getString());
  749. //As per the equals method lets do this in stages
  750. if (result != 0)
  751. return result;
  752. //Ok string appears to be equal but now lets compare formatting runs
  753. if ((field_4_format_runs == null) && (str.field_4_format_runs == null))
  754. //Strings are equal, and there are no formtting runs.
  755. return 0;
  756. if ((field_4_format_runs == null) && (str.field_4_format_runs != null))
  757. //Strings are equal, but one or the other has formatting runs
  758. return 1;
  759. if ((field_4_format_runs != null) && (str.field_4_format_runs == null))
  760. //Strings are equal, but one or the other has formatting runs
  761. return -1;
  762. //Strings are equal, so now compare formatting runs.
  763. int size = field_4_format_runs.size();
  764. if (size != str.field_4_format_runs.size())
  765. return size - str.field_4_format_runs.size();
  766. for (int i=0;i<size;i++) {
  767. FormatRun run1 = (FormatRun)field_4_format_runs.get(i);
  768. FormatRun run2 = (FormatRun)str.field_4_format_runs.get(i);
  769. result = run1.compareTo(run2);
  770. if (result != 0)
  771. return result;
  772. }
  773. //Well the format runs are equal as well!, better check the ExtRst data
  774. //Which by the way we dont know how to decode!
  775. if ((field_5_ext_rst == null) && (str.field_5_ext_rst == null))
  776. return 0;
  777. if ((field_5_ext_rst == null) && (str.field_5_ext_rst != null))
  778. return 1;
  779. if ((field_5_ext_rst != null) && (str.field_5_ext_rst == null))
  780. return -1;
  781. size = field_5_ext_rst.length;
  782. if (size != field_5_ext_rst.length)
  783. return size - field_5_ext_rst.length;
  784. //Check individual bytes!
  785. for (int i=0;i<size;i++) {
  786. if (field_5_ext_rst[i] != str.field_5_ext_rst[i])
  787. return field_5_ext_rst[i] - str.field_5_ext_rst[i];
  788. }
  789. //Phew!! After all of that we have finally worked out that the strings
  790. //are identical.
  791. return 0;
  792. }
  793. public boolean isRichText()
  794. {
  795. return richText.isSet(getOptionFlags());
  796. }
  797. public boolean isExtendedText()
  798. {
  799. return extBit.isSet(getOptionFlags());
  800. }
  801. public Object clone() {
  802. UnicodeString str = new UnicodeString();
  803. str.field_1_charCount = field_1_charCount;
  804. str.field_2_optionflags = field_2_optionflags;
  805. str.field_3_string = field_3_string;
  806. if (field_4_format_runs != null) {
  807. str.field_4_format_runs = new ArrayList();
  808. int size = field_4_format_runs.size();
  809. for (int i = 0; i < size; i++) {
  810. FormatRun r = (FormatRun) field_4_format_runs.get(i);
  811. str.field_4_format_runs.add(new FormatRun(r.character, r.fontIndex));
  812. }
  813. }
  814. if (field_5_ext_rst != null) {
  815. str.field_5_ext_rst = new byte[field_5_ext_rst.length];
  816. System.arraycopy(field_5_ext_rst, 0, str.field_5_ext_rst, 0,
  817. field_5_ext_rst.length);
  818. }
  819. return str;
  820. }
  821. }