PageRenderTime 50ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/projects/jre-1.6.0/src/com/sun/org/apache/xml/internal/utils/FastStringBuffer.java

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Java | 1294 lines | 539 code | 192 blank | 563 comment | 148 complexity | c5723d30c1cecd2da51b09867c933a69 MD5 | raw file
  1. /*
  2. * Copyright 1999-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: FastStringBuffer.java,v 1.2.4.1 2005/09/15 08:15:44 suresh_emailid Exp $
  18. */
  19. package com.sun.org.apache.xml.internal.utils;
  20. /**
  21. * Bare-bones, unsafe, fast string buffer. No thread-safety, no
  22. * parameter range checking, exposed fields. Note that in typical
  23. * applications, thread-safety of a StringBuffer is a somewhat
  24. * dubious concept in any case.
  25. * <p>
  26. * Note that Stree and DTM used a single FastStringBuffer as a string pool,
  27. * by recording start and length indices within this single buffer. This
  28. * minimizes heap overhead, but of course requires more work when retrieving
  29. * the data.
  30. * <p>
  31. * FastStringBuffer operates as a "chunked buffer". Doing so
  32. * reduces the need to recopy existing information when an append
  33. * exceeds the space available; we just allocate another chunk and
  34. * flow across to it. (The array of chunks may need to grow,
  35. * admittedly, but that's a much smaller object.) Some excess
  36. * recopying may arise when we extract Strings which cross chunk
  37. * boundaries; larger chunks make that less frequent.
  38. * <p>
  39. * The size values are parameterized, to allow tuning this code. In
  40. * theory, Result Tree Fragments might want to be tuned differently
  41. * from the main document's text.
  42. * <p>
  43. * %REVIEW% An experiment in self-tuning is
  44. * included in the code (using nested FastStringBuffers to achieve
  45. * variation in chunk sizes), but this implementation has proven to
  46. * be problematic when data may be being copied from the FSB into itself.
  47. * We should either re-architect that to make this safe (if possible)
  48. * or remove that code and clean up for performance/maintainability reasons.
  49. * <p>
  50. */
  51. public class FastStringBuffer
  52. {
  53. // If nonzero, forces the inial chunk size.
  54. /**/static final int DEBUG_FORCE_INIT_BITS=0;
  55. // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
  56. // back into the same FSB (variable set from previous variable, for example)
  57. // and blocksize changes in mid-copy... there's risk of severe malfunction in
  58. // the read process, due to how the resizing code re-jiggers storage. Arggh.
  59. // If we want to retain the variable-size-block feature, we need to reconsider
  60. // that issue. For now, I have forced us into fixed-size mode.
  61. static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
  62. /** Manifest constant: Suppress leading whitespace.
  63. * This should be used when normalize-to-SAX is called for the first chunk of a
  64. * multi-chunk output, or one following unsuppressed whitespace in a previous
  65. * chunk.
  66. * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
  67. */
  68. public static final int SUPPRESS_LEADING_WS=0x01;
  69. /** Manifest constant: Suppress trailing whitespace.
  70. * This should be used when normalize-to-SAX is called for the last chunk of a
  71. * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
  72. */
  73. public static final int SUPPRESS_TRAILING_WS=0x02;
  74. /** Manifest constant: Suppress both leading and trailing whitespace.
  75. * This should be used when normalize-to-SAX is called for a complete string.
  76. * (I'm not wild about the name of this one. Ideas welcome.)
  77. * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
  78. */
  79. public static final int SUPPRESS_BOTH
  80. = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
  81. /** Manifest constant: Carry trailing whitespace of one chunk as leading
  82. * whitespace of the next chunk. Used internally; I don't see any reason
  83. * to make it public right now.
  84. */
  85. private static final int CARRY_WS=0x04;
  86. /**
  87. * Field m_chunkBits sets our chunking strategy, by saying how many
  88. * bits of index can be used within a single chunk before flowing over
  89. * to the next chunk. For example, if m_chunkbits is set to 15, each
  90. * chunk can contain up to 2^15 (32K) characters
  91. */
  92. int m_chunkBits = 15;
  93. /**
  94. * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
  95. * the largest permissible chunk size is in this particular FastStringBuffer
  96. * hierarchy.
  97. */
  98. int m_maxChunkBits = 15;
  99. /**
  100. * Field m_rechunkBits affects our chunk-growth strategy, by saying how
  101. * many chunks should be allocated at one size before we encapsulate them
  102. * into the first chunk of the next size up. For example, if m_rechunkBits
  103. * is set to 3, then after 8 chunks at a given size we will rebundle
  104. * them as the first element of a FastStringBuffer using a chunk size
  105. * 8 times larger (chunkBits shifted left three bits).
  106. */
  107. int m_rebundleBits = 2;
  108. /**
  109. * Field m_chunkSize establishes the maximum size of one chunk of the array
  110. * as 2**chunkbits characters.
  111. * (Which may also be the minimum size if we aren't tuning for storage)
  112. */
  113. int m_chunkSize; // =1<<(m_chunkBits-1);
  114. /**
  115. * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
  116. * worth of low-order '1' bits, useful for shift-and-mask addressing
  117. * within the chunks.
  118. */
  119. int m_chunkMask; // =m_chunkSize-1;
  120. /**
  121. * Field m_array holds the string buffer's text contents, using an
  122. * array-of-arrays. Note that this array, and the arrays it contains, may be
  123. * reallocated when necessary in order to allow the buffer to grow;
  124. * references to them should be considered to be invalidated after any
  125. * append. However, the only time these arrays are directly exposed
  126. * is in the sendSAXcharacters call.
  127. */
  128. char[][] m_array;
  129. /**
  130. * Field m_lastChunk is an index into m_array[], pointing to the last
  131. * chunk of the Chunked Array currently in use. Note that additional
  132. * chunks may actually be allocated, eg if the FastStringBuffer had
  133. * previously been truncated or if someone issued an ensureSpace request.
  134. * <p>
  135. * The insertion point for append operations is addressed by the combination
  136. * of m_lastChunk and m_firstFree.
  137. */
  138. int m_lastChunk = 0;
  139. /**
  140. * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
  141. * the first character in the Chunked Array which is not part of the
  142. * FastStringBuffer's current content. Since m_array[][] is zero-based,
  143. * the length of that content can be calculated as
  144. * (m_lastChunk<<m_chunkBits) + m_firstFree
  145. */
  146. int m_firstFree = 0;
  147. /**
  148. * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
  149. * length equals m_chunkSize, and which replaces m_array[0]. This allows
  150. * building a hierarchy of FastStringBuffers, where early appends use
  151. * a smaller chunkSize (for less wasted memory overhead) but later
  152. * ones use a larger chunkSize (for less heap activity overhead).
  153. */
  154. FastStringBuffer m_innerFSB = null;
  155. /**
  156. * Construct a FastStringBuffer, with allocation policy as per parameters.
  157. * <p>
  158. * For coding convenience, I've expressed both allocation sizes in terms of
  159. * a number of bits. That's needed for the final size of a chunk,
  160. * to permit fast and efficient shift-and-mask addressing. It's less critical
  161. * for the inital size, and may be reconsidered.
  162. * <p>
  163. * An alternative would be to accept integer sizes and round to powers of two;
  164. * that really doesn't seem to buy us much, if anything.
  165. *
  166. * @param initChunkBits Length in characters of the initial allocation
  167. * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
  168. * characters.) Later chunks will use larger allocation units, to trade off
  169. * allocation speed of large document against storage efficiency of small
  170. * ones.
  171. * @param maxChunkBits Number of character-offset bits that should be used for
  172. * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
  173. * characters.
  174. * @param rebundleBits Number of character-offset bits that addressing should
  175. * advance before we attempt to take a step from initChunkBits to maxChunkBits
  176. */
  177. public FastStringBuffer(int initChunkBits, int maxChunkBits,
  178. int rebundleBits)
  179. {
  180. if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
  181. // %REVIEW%
  182. // Should this force to larger value, or smaller? Smaller less efficient, but if
  183. // someone requested variable mode it's because they care about storage space.
  184. // On the other hand, given the other changes I'm making, odds are that we should
  185. // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
  186. // anyway; we need a permanant solution.
  187. //
  188. if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
  189. //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
  190. m_array = new char[16][];
  191. // Don't bite off more than we're prepared to swallow!
  192. if (initChunkBits > maxChunkBits)
  193. initChunkBits = maxChunkBits;
  194. m_chunkBits = initChunkBits;
  195. m_maxChunkBits = maxChunkBits;
  196. m_rebundleBits = rebundleBits;
  197. m_chunkSize = 1 << (initChunkBits);
  198. m_chunkMask = m_chunkSize - 1;
  199. m_array[0] = new char[m_chunkSize];
  200. }
  201. /**
  202. * Construct a FastStringBuffer, using a default rebundleBits value.
  203. *
  204. * NEEDSDOC @param initChunkBits
  205. * NEEDSDOC @param maxChunkBits
  206. */
  207. public FastStringBuffer(int initChunkBits, int maxChunkBits)
  208. {
  209. this(initChunkBits, maxChunkBits, 2);
  210. }
  211. /**
  212. * Construct a FastStringBuffer, using default maxChunkBits and
  213. * rebundleBits values.
  214. * <p>
  215. * ISSUE: Should this call assert initial size, or fixed size?
  216. * Now configured as initial, with a default for fixed.
  217. *
  218. * NEEDSDOC @param initChunkBits
  219. */
  220. public FastStringBuffer(int initChunkBits)
  221. {
  222. this(initChunkBits, 15, 2);
  223. }
  224. /**
  225. * Construct a FastStringBuffer, using a default allocation policy.
  226. */
  227. public FastStringBuffer()
  228. {
  229. // 10 bits is 1K. 15 bits is 32K. Remember that these are character
  230. // counts, so actual memory allocation unit is doubled for UTF-16 chars.
  231. //
  232. // For reference: In the original FastStringBuffer, we simply
  233. // overallocated by blocksize (default 1KB) on each buffer-growth.
  234. this(10, 15, 2);
  235. }
  236. /**
  237. * Get the length of the list. Synonym for length().
  238. *
  239. * @return the number of characters in the FastStringBuffer's content.
  240. */
  241. public final int size()
  242. {
  243. return (m_lastChunk << m_chunkBits) + m_firstFree;
  244. }
  245. /**
  246. * Get the length of the list. Synonym for size().
  247. *
  248. * @return the number of characters in the FastStringBuffer's content.
  249. */
  250. public final int length()
  251. {
  252. return (m_lastChunk << m_chunkBits) + m_firstFree;
  253. }
  254. /**
  255. * Discard the content of the FastStringBuffer, and most of the memory
  256. * that was allocated by it, restoring the initial state. Note that this
  257. * may eventually be different from setLength(0), which see.
  258. */
  259. public final void reset()
  260. {
  261. m_lastChunk = 0;
  262. m_firstFree = 0;
  263. // Recover the original chunk size
  264. FastStringBuffer innermost = this;
  265. while (innermost.m_innerFSB != null)
  266. {
  267. innermost = innermost.m_innerFSB;
  268. }
  269. m_chunkBits = innermost.m_chunkBits;
  270. m_chunkSize = innermost.m_chunkSize;
  271. m_chunkMask = innermost.m_chunkMask;
  272. // Discard the hierarchy
  273. m_innerFSB = null;
  274. m_array = new char[16][0];
  275. m_array[0] = new char[m_chunkSize];
  276. }
  277. /**
  278. * Directly set how much of the FastStringBuffer's storage is to be
  279. * considered part of its content. This is a fast but hazardous
  280. * operation. It is not protected against negative values, or values
  281. * greater than the amount of storage currently available... and even
  282. * if additional storage does exist, its contents are unpredictable.
  283. * The only safe use for our setLength() is to truncate the FastStringBuffer
  284. * to a shorter string.
  285. *
  286. * @param l New length. If l<0 or l>=getLength(), this operation will
  287. * not report an error but future operations will almost certainly fail.
  288. */
  289. public final void setLength(int l)
  290. {
  291. m_lastChunk = l >>> m_chunkBits;
  292. if (m_lastChunk == 0 && m_innerFSB != null)
  293. {
  294. // Replace this FSB with the appropriate inner FSB, truncated
  295. m_innerFSB.setLength(l, this);
  296. }
  297. else
  298. {
  299. m_firstFree = l & m_chunkMask;
  300. // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
  301. // us pointing at the start of a chunk which has not yet been allocated. Rather than
  302. // pay the cost of dealing with that in the append loops (more scattered and more
  303. // inner-loop), we correct it here by moving to the safe side of that
  304. // line -- as we would have left the indexes had we appended up to that point.
  305. if(m_firstFree==0 && m_lastChunk>0)
  306. {
  307. --m_lastChunk;
  308. m_firstFree=m_chunkSize;
  309. }
  310. }
  311. }
  312. /**
  313. * Subroutine for the public setLength() method. Deals with the fact
  314. * that truncation may require restoring one of the innerFSBs
  315. *
  316. * NEEDSDOC @param l
  317. * NEEDSDOC @param rootFSB
  318. */
  319. private final void setLength(int l, FastStringBuffer rootFSB)
  320. {
  321. m_lastChunk = l >>> m_chunkBits;
  322. if (m_lastChunk == 0 && m_innerFSB != null)
  323. {
  324. m_innerFSB.setLength(l, rootFSB);
  325. }
  326. else
  327. {
  328. // Undo encapsulation -- pop the innerFSB data back up to root.
  329. // Inefficient, but attempts to keep the code simple.
  330. rootFSB.m_chunkBits = m_chunkBits;
  331. rootFSB.m_maxChunkBits = m_maxChunkBits;
  332. rootFSB.m_rebundleBits = m_rebundleBits;
  333. rootFSB.m_chunkSize = m_chunkSize;
  334. rootFSB.m_chunkMask = m_chunkMask;
  335. rootFSB.m_array = m_array;
  336. rootFSB.m_innerFSB = m_innerFSB;
  337. rootFSB.m_lastChunk = m_lastChunk;
  338. // Finally, truncate this sucker.
  339. rootFSB.m_firstFree = l & m_chunkMask;
  340. }
  341. }
  342. /**
  343. * Note that this operation has been somewhat deoptimized by the shift to a
  344. * chunked array, as there is no factory method to produce a String object
  345. * directly from an array of arrays and hence a double copy is needed.
  346. * By using ensureCapacity we hope to minimize the heap overhead of building
  347. * the intermediate StringBuffer.
  348. * <p>
  349. * (It really is a pity that Java didn't design String as a final subclass
  350. * of MutableString, rather than having StringBuffer be a separate hierarchy.
  351. * We'd avoid a <strong>lot</strong> of double-buffering.)
  352. *
  353. * @return the contents of the FastStringBuffer as a standard Java string.
  354. */
  355. public final String toString()
  356. {
  357. int length = (m_lastChunk << m_chunkBits) + m_firstFree;
  358. return getString(new StringBuffer(length), 0, 0, length).toString();
  359. }
  360. /**
  361. * Append a single character onto the FastStringBuffer, growing the
  362. * storage if necessary.
  363. * <p>
  364. * NOTE THAT after calling append(), previously obtained
  365. * references to m_array[][] may no longer be valid....
  366. * though in fact they should be in this instance.
  367. *
  368. * @param value character to be appended.
  369. */
  370. public final void append(char value)
  371. {
  372. char[] chunk;
  373. // We may have preallocated chunks. If so, all but last should
  374. // be at full size.
  375. boolean lastchunk = (m_lastChunk + 1 == m_array.length);
  376. if (m_firstFree < m_chunkSize) // Simplified test single-character-fits
  377. chunk = m_array[m_lastChunk];
  378. else
  379. {
  380. // Extend array?
  381. int i = m_array.length;
  382. if (m_lastChunk + 1 == i)
  383. {
  384. char[][] newarray = new char[i + 16][];
  385. System.arraycopy(m_array, 0, newarray, 0, i);
  386. m_array = newarray;
  387. }
  388. // Advance one chunk
  389. chunk = m_array[++m_lastChunk];
  390. if (chunk == null)
  391. {
  392. // Hierarchical encapsulation
  393. if (m_lastChunk == 1 << m_rebundleBits
  394. && m_chunkBits < m_maxChunkBits)
  395. {
  396. // Should do all the work of both encapsulating
  397. // existing data and establishing new sizes/offsets
  398. m_innerFSB = new FastStringBuffer(this);
  399. }
  400. // Add a chunk.
  401. chunk = m_array[m_lastChunk] = new char[m_chunkSize];
  402. }
  403. m_firstFree = 0;
  404. }
  405. // Space exists in the chunk. Append the character.
  406. chunk[m_firstFree++] = value;
  407. }
  408. /**
  409. * Append the contents of a String onto the FastStringBuffer,
  410. * growing the storage if necessary.
  411. * <p>
  412. * NOTE THAT after calling append(), previously obtained
  413. * references to m_array[] may no longer be valid.
  414. *
  415. * @param value String whose contents are to be appended.
  416. */
  417. public final void append(String value)
  418. {
  419. if (value == null)
  420. return;
  421. int strlen = value.length();
  422. if (0 == strlen)
  423. return;
  424. int copyfrom = 0;
  425. char[] chunk = m_array[m_lastChunk];
  426. int available = m_chunkSize - m_firstFree;
  427. // Repeat while data remains to be copied
  428. while (strlen > 0)
  429. {
  430. // Copy what fits
  431. if (available > strlen)
  432. available = strlen;
  433. value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
  434. m_firstFree);
  435. strlen -= available;
  436. copyfrom += available;
  437. // If there's more left, allocate another chunk and continue
  438. if (strlen > 0)
  439. {
  440. // Extend array?
  441. int i = m_array.length;
  442. if (m_lastChunk + 1 == i)
  443. {
  444. char[][] newarray = new char[i + 16][];
  445. System.arraycopy(m_array, 0, newarray, 0, i);
  446. m_array = newarray;
  447. }
  448. // Advance one chunk
  449. chunk = m_array[++m_lastChunk];
  450. if (chunk == null)
  451. {
  452. // Hierarchical encapsulation
  453. if (m_lastChunk == 1 << m_rebundleBits
  454. && m_chunkBits < m_maxChunkBits)
  455. {
  456. // Should do all the work of both encapsulating
  457. // existing data and establishing new sizes/offsets
  458. m_innerFSB = new FastStringBuffer(this);
  459. }
  460. // Add a chunk.
  461. chunk = m_array[m_lastChunk] = new char[m_chunkSize];
  462. }
  463. available = m_chunkSize;
  464. m_firstFree = 0;
  465. }
  466. }
  467. // Adjust the insert point in the last chunk, when we've reached it.
  468. m_firstFree += available;
  469. }
  470. /**
  471. * Append the contents of a StringBuffer onto the FastStringBuffer,
  472. * growing the storage if necessary.
  473. * <p>
  474. * NOTE THAT after calling append(), previously obtained
  475. * references to m_array[] may no longer be valid.
  476. *
  477. * @param value StringBuffer whose contents are to be appended.
  478. */
  479. public final void append(StringBuffer value)
  480. {
  481. if (value == null)
  482. return;
  483. int strlen = value.length();
  484. if (0 == strlen)
  485. return;
  486. int copyfrom = 0;
  487. char[] chunk = m_array[m_lastChunk];
  488. int available = m_chunkSize - m_firstFree;
  489. // Repeat while data remains to be copied
  490. while (strlen > 0)
  491. {
  492. // Copy what fits
  493. if (available > strlen)
  494. available = strlen;
  495. value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
  496. m_firstFree);
  497. strlen -= available;
  498. copyfrom += available;
  499. // If there's more left, allocate another chunk and continue
  500. if (strlen > 0)
  501. {
  502. // Extend array?
  503. int i = m_array.length;
  504. if (m_lastChunk + 1 == i)
  505. {
  506. char[][] newarray = new char[i + 16][];
  507. System.arraycopy(m_array, 0, newarray, 0, i);
  508. m_array = newarray;
  509. }
  510. // Advance one chunk
  511. chunk = m_array[++m_lastChunk];
  512. if (chunk == null)
  513. {
  514. // Hierarchical encapsulation
  515. if (m_lastChunk == 1 << m_rebundleBits
  516. && m_chunkBits < m_maxChunkBits)
  517. {
  518. // Should do all the work of both encapsulating
  519. // existing data and establishing new sizes/offsets
  520. m_innerFSB = new FastStringBuffer(this);
  521. }
  522. // Add a chunk.
  523. chunk = m_array[m_lastChunk] = new char[m_chunkSize];
  524. }
  525. available = m_chunkSize;
  526. m_firstFree = 0;
  527. }
  528. }
  529. // Adjust the insert point in the last chunk, when we've reached it.
  530. m_firstFree += available;
  531. }
  532. /**
  533. * Append part of the contents of a Character Array onto the
  534. * FastStringBuffer, growing the storage if necessary.
  535. * <p>
  536. * NOTE THAT after calling append(), previously obtained
  537. * references to m_array[] may no longer be valid.
  538. *
  539. * @param chars character array from which data is to be copied
  540. * @param start offset in chars of first character to be copied,
  541. * zero-based.
  542. * @param length number of characters to be copied
  543. */
  544. public final void append(char[] chars, int start, int length)
  545. {
  546. int strlen = length;
  547. if (0 == strlen)
  548. return;
  549. int copyfrom = start;
  550. char[] chunk = m_array[m_lastChunk];
  551. int available = m_chunkSize - m_firstFree;
  552. // Repeat while data remains to be copied
  553. while (strlen > 0)
  554. {
  555. // Copy what fits
  556. if (available > strlen)
  557. available = strlen;
  558. System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
  559. available);
  560. strlen -= available;
  561. copyfrom += available;
  562. // If there's more left, allocate another chunk and continue
  563. if (strlen > 0)
  564. {
  565. // Extend array?
  566. int i = m_array.length;
  567. if (m_lastChunk + 1 == i)
  568. {
  569. char[][] newarray = new char[i + 16][];
  570. System.arraycopy(m_array, 0, newarray, 0, i);
  571. m_array = newarray;
  572. }
  573. // Advance one chunk
  574. chunk = m_array[++m_lastChunk];
  575. if (chunk == null)
  576. {
  577. // Hierarchical encapsulation
  578. if (m_lastChunk == 1 << m_rebundleBits
  579. && m_chunkBits < m_maxChunkBits)
  580. {
  581. // Should do all the work of both encapsulating
  582. // existing data and establishing new sizes/offsets
  583. m_innerFSB = new FastStringBuffer(this);
  584. }
  585. // Add a chunk.
  586. chunk = m_array[m_lastChunk] = new char[m_chunkSize];
  587. }
  588. available = m_chunkSize;
  589. m_firstFree = 0;
  590. }
  591. }
  592. // Adjust the insert point in the last chunk, when we've reached it.
  593. m_firstFree += available;
  594. }
  595. /**
  596. * Append the contents of another FastStringBuffer onto
  597. * this FastStringBuffer, growing the storage if necessary.
  598. * <p>
  599. * NOTE THAT after calling append(), previously obtained
  600. * references to m_array[] may no longer be valid.
  601. *
  602. * @param value FastStringBuffer whose contents are
  603. * to be appended.
  604. */
  605. public final void append(FastStringBuffer value)
  606. {
  607. // Complicating factor here is that the two buffers may use
  608. // different chunk sizes, and even if they're the same we're
  609. // probably on a different alignment due to previously appended
  610. // data. We have to work through the source in bite-sized chunks.
  611. if (value == null)
  612. return;
  613. int strlen = value.length();
  614. if (0 == strlen)
  615. return;
  616. int copyfrom = 0;
  617. char[] chunk = m_array[m_lastChunk];
  618. int available = m_chunkSize - m_firstFree;
  619. // Repeat while data remains to be copied
  620. while (strlen > 0)
  621. {
  622. // Copy what fits
  623. if (available > strlen)
  624. available = strlen;
  625. int sourcechunk = (copyfrom + value.m_chunkSize - 1)
  626. >>> value.m_chunkBits;
  627. int sourcecolumn = copyfrom & value.m_chunkMask;
  628. int runlength = value.m_chunkSize - sourcecolumn;
  629. if (runlength > available)
  630. runlength = available;
  631. System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
  632. m_array[m_lastChunk], m_firstFree, runlength);
  633. if (runlength != available)
  634. System.arraycopy(value.m_array[sourcechunk + 1], 0,
  635. m_array[m_lastChunk], m_firstFree + runlength,
  636. available - runlength);
  637. strlen -= available;
  638. copyfrom += available;
  639. // If there's more left, allocate another chunk and continue
  640. if (strlen > 0)
  641. {
  642. // Extend array?
  643. int i = m_array.length;
  644. if (m_lastChunk + 1 == i)
  645. {
  646. char[][] newarray = new char[i + 16][];
  647. System.arraycopy(m_array, 0, newarray, 0, i);
  648. m_array = newarray;
  649. }
  650. // Advance one chunk
  651. chunk = m_array[++m_lastChunk];
  652. if (chunk == null)
  653. {
  654. // Hierarchical encapsulation
  655. if (m_lastChunk == 1 << m_rebundleBits
  656. && m_chunkBits < m_maxChunkBits)
  657. {
  658. // Should do all the work of both encapsulating
  659. // existing data and establishing new sizes/offsets
  660. m_innerFSB = new FastStringBuffer(this);
  661. }
  662. // Add a chunk.
  663. chunk = m_array[m_lastChunk] = new char[m_chunkSize];
  664. }
  665. available = m_chunkSize;
  666. m_firstFree = 0;
  667. }
  668. }
  669. // Adjust the insert point in the last chunk, when we've reached it.
  670. m_firstFree += available;
  671. }
  672. /**
  673. * @return true if the specified range of characters are all whitespace,
  674. * as defined by XMLCharacterRecognizer.
  675. * <p>
  676. * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
  677. *
  678. * @param start Offset of first character in the range.
  679. * @param length Number of characters to send.
  680. */
  681. public boolean isWhitespace(int start, int length)
  682. {
  683. int sourcechunk = start >>> m_chunkBits;
  684. int sourcecolumn = start & m_chunkMask;
  685. int available = m_chunkSize - sourcecolumn;
  686. boolean chunkOK;
  687. while (length > 0)
  688. {
  689. int runlength = (length <= available) ? length : available;
  690. if (sourcechunk == 0 && m_innerFSB != null)
  691. chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
  692. else
  693. chunkOK = com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace(
  694. m_array[sourcechunk], sourcecolumn, runlength);
  695. if (!chunkOK)
  696. return false;
  697. length -= runlength;
  698. ++sourcechunk;
  699. sourcecolumn = 0;
  700. available = m_chunkSize;
  701. }
  702. return true;
  703. }
  704. /**
  705. * @param start Offset of first character in the range.
  706. * @param length Number of characters to send.
  707. * @return a new String object initialized from the specified range of
  708. * characters.
  709. */
  710. public String getString(int start, int length)
  711. {
  712. int startColumn = start & m_chunkMask;
  713. int startChunk = start >>> m_chunkBits;
  714. if (startColumn + length < m_chunkMask && m_innerFSB == null) {
  715. return getOneChunkString(startChunk, startColumn, length);
  716. }
  717. return getString(new StringBuffer(length), startChunk, startColumn,
  718. length).toString();
  719. }
  720. protected String getOneChunkString(int startChunk, int startColumn,
  721. int length) {
  722. return new String(m_array[startChunk], startColumn, length);
  723. }
  724. /**
  725. * @param sb StringBuffer to be appended to
  726. * @param start Offset of first character in the range.
  727. * @param length Number of characters to send.
  728. * @return sb with the requested text appended to it
  729. */
  730. StringBuffer getString(StringBuffer sb, int start, int length)
  731. {
  732. return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
  733. }
  734. /**
  735. * Internal support for toString() and getString().
  736. * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
  737. * and returns a StringBuffer supplied by the caller. This simplifies
  738. * m_innerFSB support.
  739. * <p>
  740. * Note that this operation has been somewhat deoptimized by the shift to a
  741. * chunked array, as there is no factory method to produce a String object
  742. * directly from an array of arrays and hence a double copy is needed.
  743. * By presetting length we hope to minimize the heap overhead of building
  744. * the intermediate StringBuffer.
  745. * <p>
  746. * (It really is a pity that Java didn't design String as a final subclass
  747. * of MutableString, rather than having StringBuffer be a separate hierarchy.
  748. * We'd avoid a <strong>lot</strong> of double-buffering.)
  749. *
  750. *
  751. * @param sb
  752. * @param startChunk
  753. * @param startColumn
  754. * @param length
  755. *
  756. * @return the contents of the FastStringBuffer as a standard Java string.
  757. */
  758. StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
  759. int length)
  760. {
  761. int stop = (startChunk << m_chunkBits) + startColumn + length;
  762. int stopChunk = stop >>> m_chunkBits;
  763. int stopColumn = stop & m_chunkMask;
  764. // Factored out
  765. //StringBuffer sb=new StringBuffer(length);
  766. for (int i = startChunk; i < stopChunk; ++i)
  767. {
  768. if (i == 0 && m_innerFSB != null)
  769. m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
  770. else
  771. sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
  772. startColumn = 0; // after first chunk
  773. }
  774. if (stopChunk == 0 && m_innerFSB != null)
  775. m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
  776. else if (stopColumn > startColumn)
  777. sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
  778. return sb;
  779. }
  780. /**
  781. * Get a single character from the string buffer.
  782. *
  783. *
  784. * @param pos character position requested.
  785. * @return A character from the requested position.
  786. */
  787. public char charAt(int pos)
  788. {
  789. int startChunk = pos >>> m_chunkBits;
  790. if (startChunk == 0 && m_innerFSB != null)
  791. return m_innerFSB.charAt(pos & m_chunkMask);
  792. else
  793. return m_array[startChunk][pos & m_chunkMask];
  794. }
  795. /**
  796. * Sends the specified range of characters as one or more SAX characters()
  797. * events.
  798. * Note that the buffer reference passed to the ContentHandler may be
  799. * invalidated if the FastStringBuffer is edited; it's the user's
  800. * responsibility to manage access to the FastStringBuffer to prevent this
  801. * problem from arising.
  802. * <p>
  803. * Note too that there is no promise that the output will be sent as a
  804. * single call. As is always true in SAX, one logical string may be split
  805. * across multiple blocks of memory and hence delivered as several
  806. * successive events.
  807. *
  808. * @param ch SAX ContentHandler object to receive the event.
  809. * @param start Offset of first character in the range.
  810. * @param length Number of characters to send.
  811. * @exception org.xml.sax.SAXException may be thrown by handler's
  812. * characters() method.
  813. */
  814. public void sendSAXcharacters(
  815. org.xml.sax.ContentHandler ch, int start, int length)
  816. throws org.xml.sax.SAXException
  817. {
  818. int startChunk = start >>> m_chunkBits;
  819. int startColumn = start & m_chunkMask;
  820. if (startColumn + length < m_chunkMask && m_innerFSB == null) {
  821. ch.characters(m_array[startChunk], startColumn, length);
  822. return;
  823. }
  824. int stop = start + length;
  825. int stopChunk = stop >>> m_chunkBits;
  826. int stopColumn = stop & m_chunkMask;
  827. for (int i = startChunk; i < stopChunk; ++i)
  828. {
  829. if (i == 0 && m_innerFSB != null)
  830. m_innerFSB.sendSAXcharacters(ch, startColumn,
  831. m_chunkSize - startColumn);
  832. else
  833. ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
  834. startColumn = 0; // after first chunk
  835. }
  836. // Last, or only, chunk
  837. if (stopChunk == 0 && m_innerFSB != null)
  838. m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
  839. else if (stopColumn > startColumn)
  840. {
  841. ch.characters(m_array[stopChunk], startColumn,
  842. stopColumn - startColumn);
  843. }
  844. }
  845. /**
  846. * Sends the specified range of characters as one or more SAX characters()
  847. * events, normalizing the characters according to XSLT rules.
  848. *
  849. * @param ch SAX ContentHandler object to receive the event.
  850. * @param start Offset of first character in the range.
  851. * @param length Number of characters to send.
  852. * @return normalization status to apply to next chunk (because we may
  853. * have been called recursively to process an inner FSB):
  854. * <dl>
  855. * <dt>0</dt>
  856. * <dd>if this output did not end in retained whitespace, and thus whitespace
  857. * at the start of the following chunk (if any) should be converted to a
  858. * single space.
  859. * <dt>SUPPRESS_LEADING_WS</dt>
  860. * <dd>if this output ended in retained whitespace, and thus whitespace
  861. * at the start of the following chunk (if any) should be completely
  862. * suppressed.</dd>
  863. * </dd>
  864. * </dl>
  865. * @exception org.xml.sax.SAXException may be thrown by handler's
  866. * characters() method.
  867. */
  868. public int sendNormalizedSAXcharacters(
  869. org.xml.sax.ContentHandler ch, int start, int length)
  870. throws org.xml.sax.SAXException
  871. {
  872. // This call always starts at the beginning of the
  873. // string being written out, either because it was called directly or
  874. // because it was an m_innerFSB recursion. This is important since
  875. // it gives us a well-known initial state for this flag:
  876. int stateForNextChunk=SUPPRESS_LEADING_WS;
  877. int stop = start + length;
  878. int startChunk = start >>> m_chunkBits;
  879. int startColumn = start & m_chunkMask;
  880. int stopChunk = stop >>> m_chunkBits;
  881. int stopColumn = stop & m_chunkMask;
  882. for (int i = startChunk; i < stopChunk; ++i)
  883. {
  884. if (i == 0 && m_innerFSB != null)
  885. stateForNextChunk=
  886. m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
  887. m_chunkSize - startColumn);
  888. else
  889. stateForNextChunk=
  890. sendNormalizedSAXcharacters(m_array[i], startColumn,
  891. m_chunkSize - startColumn,
  892. ch,stateForNextChunk);
  893. startColumn = 0; // after first chunk
  894. }
  895. // Last, or only, chunk
  896. if (stopChunk == 0 && m_innerFSB != null)
  897. stateForNextChunk= // %REVIEW% Is this update really needed?
  898. m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
  899. else if (stopColumn > startColumn)
  900. {
  901. stateForNextChunk= // %REVIEW% Is this update really needed?
  902. sendNormalizedSAXcharacters(m_array[stopChunk],
  903. startColumn, stopColumn - startColumn,
  904. ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
  905. }
  906. return stateForNextChunk;
  907. }
  908. static final char[] SINGLE_SPACE = {' '};
  909. /**
  910. * Internal method to directly normalize and dispatch the character array.
  911. * This version is aware of the fact that it may be called several times
  912. * in succession if the data is made up of multiple "chunks", and thus
  913. * must actively manage the handling of leading and trailing whitespace.
  914. *
  915. * Note: The recursion is due to the possible recursion of inner FSBs.
  916. *
  917. * @param ch The characters from the XML document.
  918. * @param start The start position in the array.
  919. * @param length The number of characters to read from the array.
  920. * @param handler SAX ContentHandler object to receive the event.
  921. * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
  922. * This is a bitfield contining two flags, bitwise-ORed together:
  923. * <dl>
  924. * <dt>SUPPRESS_LEADING_WS</dt>
  925. * <dd>When false, causes leading whitespace to be converted to a single
  926. * space; when true, causes it to be discarded entirely.
  927. * Should be set TRUE for the first chunk, and (in multi-chunk output)
  928. * whenever the previous chunk ended in retained whitespace.</dd>
  929. * <dt>SUPPRESS_TRAILING_WS</dt>
  930. * <dd>When false, causes trailing whitespace to be converted to a single
  931. * space; when true, causes it to be discarded entirely.
  932. * Should be set TRUE for the last or only chunk.
  933. * </dd>
  934. * </dl>
  935. * @return normalization status, as in the edgeTreatmentFlags parameter:
  936. * <dl>
  937. * <dt>0</dt>
  938. * <dd>if this output did not end in retained whitespace, and thus whitespace
  939. * at the start of the following chunk (if any) should be converted to a
  940. * single space.
  941. * <dt>SUPPRESS_LEADING_WS</dt>
  942. * <dd>if this output ended in retained whitespace, and thus whitespace
  943. * at the start of the following chunk (if any) should be completely
  944. * suppressed.</dd>
  945. * </dd>
  946. * </dl>
  947. *
  948. *
  949. * @exception org.xml.sax.SAXException Any SAX exception, possibly
  950. * wrapping another exception.
  951. */
  952. static int sendNormalizedSAXcharacters(char ch[],
  953. int start, int length,
  954. org.xml.sax.ContentHandler handler,
  955. int edgeTreatmentFlags)
  956. throws org.xml.sax.SAXException
  957. {
  958. boolean processingLeadingWhitespace =
  959. ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
  960. boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
  961. boolean suppressTrailingWhitespace =
  962. ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0);
  963. int currPos = start;
  964. int limit = start+length;
  965. // Strip any leading spaces first, if required
  966. if (processingLeadingWhitespace) {
  967. for (; currPos < limit
  968. && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
  969. currPos++) { }
  970. // If we've only encountered leading spaces, the
  971. // current state remains unchanged
  972. if (currPos == limit) {
  973. return edgeTreatmentFlags;
  974. }
  975. }
  976. // If we get here, there are no more leading spaces to strip
  977. while (currPos < limit) {
  978. int startNonWhitespace = currPos;
  979. // Grab a chunk of non-whitespace characters
  980. for (; currPos < limit
  981. && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
  982. currPos++) { }
  983. // Non-whitespace seen - emit them, along with a single
  984. // space for any preceding whitespace characters
  985. if (startNonWhitespace != currPos) {
  986. if (seenWhitespace) {
  987. handler.characters(SINGLE_SPACE, 0, 1);
  988. seenWhitespace = false;
  989. }
  990. handler.characters(ch, startNonWhitespace,
  991. currPos - startNonWhitespace);
  992. }
  993. int startWhitespace = currPos;
  994. // Consume any whitespace characters
  995. for (; currPos < limit
  996. && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
  997. currPos++) { }
  998. if (startWhitespace != currPos) {
  999. seenWhitespace = true;
  1000. }
  1001. }
  1002. return (seenWhitespace ? CARRY_WS : 0)
  1003. | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
  1004. }
  1005. /**
  1006. * Directly normalize and dispatch the character array.
  1007. *
  1008. * @param ch The characters from the XML document.
  1009. * @param start The start position in the array.
  1010. * @param length The number of characters to read from the array.
  1011. * @param handler SAX ContentHandler object to receive the event.
  1012. * @exception org.xml.sax.SAXException Any SAX exception, possibly
  1013. * wrapping another exception.
  1014. */
  1015. public static void sendNormalizedSAXcharacters(char ch[],
  1016. int start, int length,
  1017. org.xml.sax.ContentHandler handler)
  1018. throws org.xml.sax.SAXException
  1019. {
  1020. sendNormalizedSAXcharacters(ch, start, length,
  1021. handler, SUPPRESS_BOTH);
  1022. }
  1023. /**
  1024. * Sends the specified range of characters as sax Comment.
  1025. * <p>
  1026. * Note that, unlike sendSAXcharacters, this has to be done as a single
  1027. * call to LexicalHandler#comment.
  1028. *
  1029. * @param ch SAX LexicalHandler object to receive the event.
  1030. * @param start Offset of first character in the range.
  1031. * @param length Number of characters to send.
  1032. * @exception org.xml.sax.SAXException may be thrown by handler's
  1033. * characters() method.
  1034. */
  1035. public void sendSAXComment(
  1036. org.xml.sax.ext.LexicalHandler ch, int start, int length)
  1037. throws org.xml.sax.SAXException
  1038. {
  1039. // %OPT% Do it this way for now...
  1040. String comment = getString(start, length);
  1041. ch.comment(comment.toCharArray(), 0, length);
  1042. }
  1043. /**
  1044. * Copies characters from this string into the destination character
  1045. * array.
  1046. *
  1047. * @param srcBegin index of the first character in the string
  1048. * to copy.
  1049. * @param srcEnd index after the last character in the string
  1050. * to copy.
  1051. * @param dst the destination array.
  1052. * @param dstBegin the start offset in the destination array.
  1053. * @exception IndexOutOfBoundsException If any of the following
  1054. * is true:
  1055. * <ul><li><code>srcBegin</code> is negative.
  1056. * <li><code>srcBegin</code> is greater than <code>srcEnd</code>
  1057. * <li><code>srcEnd</code> is greater than the length of this
  1058. * string
  1059. * <li><code>dstBegin</code> is negative
  1060. * <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
  1061. * <code>dst.length</code></ul>
  1062. * @exception NullPointerException if <code>dst</code> is <code>null</code>
  1063. */
  1064. private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
  1065. {
  1066. // %TBD% Joe needs to write this function. Make public when implemented.
  1067. }
  1068. /**
  1069. * Encapsulation c'tor. After this is called, the source FastStringBuffer
  1070. * will be reset to use the new object as its m_innerFSB, and will have
  1071. * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
  1072. * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
  1073. *
  1074. * NEEDSDOC @param source
  1075. */
  1076. private FastStringBuffer(FastStringBuffer source)
  1077. {
  1078. // Copy existing information into new encapsulation
  1079. m_chunkBits = source.m_chunkBits;
  1080. m_maxChunkBits = source.m_maxChunkBits;
  1081. m_rebundleBits = source.m_rebundleBits;
  1082. m_chunkSize = source.m_chunkSize;
  1083. m_chunkMask = source.m_chunkMask;
  1084. m_array = source.m_array;
  1085. m_innerFSB = source.m_innerFSB;
  1086. // These have to be adjusted because we're calling just at the time
  1087. // when we would be about to allocate another chunk
  1088. m_lastChunk = source.m_lastChunk - 1;
  1089. m_firstFree = source.m_chunkSize;
  1090. // Establish capsule as the Inner FSB, reset chunk sizes/addressing
  1091. source.m_array = new char[16][];
  1092. source.m_innerFSB = this;
  1093. // Since we encapsulated just as we were about to append another
  1094. // chunk, return ready to create the chunk after the innerFSB
  1095. // -- 1, not 0.
  1096. source.m_lastChunk = 1;
  1097. source.m_firstFree = 0;
  1098. source.m_chunkBits += m_rebundleBits;
  1099. source.m_chunkSize = 1 << (source.m_chunkBits);
  1100. source.m_chunkMask = source.m_chunkSize - 1;
  1101. }
  1102. }