/projects/jre-1.6.0/src/com/sun/org/apache/xml/internal/utils/FastStringBuffer.java
Java | 1294 lines | 539 code | 192 blank | 563 comment | 148 complexity | c5723d30c1cecd2da51b09867c933a69 MD5 | raw file
1/*
2 * Copyright 1999-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/*
17 * $Id: FastStringBuffer.java,v 1.2.4.1 2005/09/15 08:15:44 suresh_emailid Exp $
18 */
19package com.sun.org.apache.xml.internal.utils;
20
21/**
22 * Bare-bones, unsafe, fast string buffer. No thread-safety, no
23 * parameter range checking, exposed fields. Note that in typical
24 * applications, thread-safety of a StringBuffer is a somewhat
25 * dubious concept in any case.
26 * <p>
27 * Note that Stree and DTM used a single FastStringBuffer as a string pool,
28 * by recording start and length indices within this single buffer. This
29 * minimizes heap overhead, but of course requires more work when retrieving
30 * the data.
31 * <p>
32 * FastStringBuffer operates as a "chunked buffer". Doing so
33 * reduces the need to recopy existing information when an append
34 * exceeds the space available; we just allocate another chunk and
35 * flow across to it. (The array of chunks may need to grow,
36 * admittedly, but that's a much smaller object.) Some excess
37 * recopying may arise when we extract Strings which cross chunk
38 * boundaries; larger chunks make that less frequent.
39 * <p>
40 * The size values are parameterized, to allow tuning this code. In
41 * theory, Result Tree Fragments might want to be tuned differently
42 * from the main document's text.
43 * <p>
44 * %REVIEW% An experiment in self-tuning is
45 * included in the code (using nested FastStringBuffers to achieve
46 * variation in chunk sizes), but this implementation has proven to
47 * be problematic when data may be being copied from the FSB into itself.
48 * We should either re-architect that to make this safe (if possible)
49 * or remove that code and clean up for performance/maintainability reasons.
50 * <p>
51 */
52public class FastStringBuffer
53{
54 // If nonzero, forces the inial chunk size.
55 /**/static final int DEBUG_FORCE_INIT_BITS=0;
56
57 // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
58 // back into the same FSB (variable set from previous variable, for example)
59 // and blocksize changes in mid-copy... there's risk of severe malfunction in
60 // the read process, due to how the resizing code re-jiggers storage. Arggh.
61 // If we want to retain the variable-size-block feature, we need to reconsider
62 // that issue. For now, I have forced us into fixed-size mode.
63 static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
64
65 /** Manifest constant: Suppress leading whitespace.
66 * This should be used when normalize-to-SAX is called for the first chunk of a
67 * multi-chunk output, or one following unsuppressed whitespace in a previous
68 * chunk.
69 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
70 */
71 public static final int SUPPRESS_LEADING_WS=0x01;
72
73 /** Manifest constant: Suppress trailing whitespace.
74 * This should be used when normalize-to-SAX is called for the last chunk of a
75 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
76 */
77 public static final int SUPPRESS_TRAILING_WS=0x02;
78
79 /** Manifest constant: Suppress both leading and trailing whitespace.
80 * This should be used when normalize-to-SAX is called for a complete string.
81 * (I'm not wild about the name of this one. Ideas welcome.)
82 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
83 */
84 public static final int SUPPRESS_BOTH
85 = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
86
87 /** Manifest constant: Carry trailing whitespace of one chunk as leading
88 * whitespace of the next chunk. Used internally; I don't see any reason
89 * to make it public right now.
90 */
91 private static final int CARRY_WS=0x04;
92
93 /**
94 * Field m_chunkBits sets our chunking strategy, by saying how many
95 * bits of index can be used within a single chunk before flowing over
96 * to the next chunk. For example, if m_chunkbits is set to 15, each
97 * chunk can contain up to 2^15 (32K) characters
98 */
99 int m_chunkBits = 15;
100
101 /**
102 * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
103 * the largest permissible chunk size is in this particular FastStringBuffer
104 * hierarchy.
105 */
106 int m_maxChunkBits = 15;
107
108 /**
109 * Field m_rechunkBits affects our chunk-growth strategy, by saying how
110 * many chunks should be allocated at one size before we encapsulate them
111 * into the first chunk of the next size up. For example, if m_rechunkBits
112 * is set to 3, then after 8 chunks at a given size we will rebundle
113 * them as the first element of a FastStringBuffer using a chunk size
114 * 8 times larger (chunkBits shifted left three bits).
115 */
116 int m_rebundleBits = 2;
117
118 /**
119 * Field m_chunkSize establishes the maximum size of one chunk of the array
120 * as 2**chunkbits characters.
121 * (Which may also be the minimum size if we aren't tuning for storage)
122 */
123 int m_chunkSize; // =1<<(m_chunkBits-1);
124
125 /**
126 * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
127 * worth of low-order '1' bits, useful for shift-and-mask addressing
128 * within the chunks.
129 */
130 int m_chunkMask; // =m_chunkSize-1;
131
132 /**
133 * Field m_array holds the string buffer's text contents, using an
134 * array-of-arrays. Note that this array, and the arrays it contains, may be
135 * reallocated when necessary in order to allow the buffer to grow;
136 * references to them should be considered to be invalidated after any
137 * append. However, the only time these arrays are directly exposed
138 * is in the sendSAXcharacters call.
139 */
140 char[][] m_array;
141
142 /**
143 * Field m_lastChunk is an index into m_array[], pointing to the last
144 * chunk of the Chunked Array currently in use. Note that additional
145 * chunks may actually be allocated, eg if the FastStringBuffer had
146 * previously been truncated or if someone issued an ensureSpace request.
147 * <p>
148 * The insertion point for append operations is addressed by the combination
149 * of m_lastChunk and m_firstFree.
150 */
151 int m_lastChunk = 0;
152
153 /**
154 * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
155 * the first character in the Chunked Array which is not part of the
156 * FastStringBuffer's current content. Since m_array[][] is zero-based,
157 * the length of that content can be calculated as
158 * (m_lastChunk<<m_chunkBits) + m_firstFree
159 */
160 int m_firstFree = 0;
161
162 /**
163 * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
164 * length equals m_chunkSize, and which replaces m_array[0]. This allows
165 * building a hierarchy of FastStringBuffers, where early appends use
166 * a smaller chunkSize (for less wasted memory overhead) but later
167 * ones use a larger chunkSize (for less heap activity overhead).
168 */
169 FastStringBuffer m_innerFSB = null;
170
171 /**
172 * Construct a FastStringBuffer, with allocation policy as per parameters.
173 * <p>
174 * For coding convenience, I've expressed both allocation sizes in terms of
175 * a number of bits. That's needed for the final size of a chunk,
176 * to permit fast and efficient shift-and-mask addressing. It's less critical
177 * for the inital size, and may be reconsidered.
178 * <p>
179 * An alternative would be to accept integer sizes and round to powers of two;
180 * that really doesn't seem to buy us much, if anything.
181 *
182 * @param initChunkBits Length in characters of the initial allocation
183 * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
184 * characters.) Later chunks will use larger allocation units, to trade off
185 * allocation speed of large document against storage efficiency of small
186 * ones.
187 * @param maxChunkBits Number of character-offset bits that should be used for
188 * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
189 * characters.
190 * @param rebundleBits Number of character-offset bits that addressing should
191 * advance before we attempt to take a step from initChunkBits to maxChunkBits
192 */
193 public FastStringBuffer(int initChunkBits, int maxChunkBits,
194 int rebundleBits)
195 {
196 if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
197
198 // %REVIEW%
199 // Should this force to larger value, or smaller? Smaller less efficient, but if
200 // someone requested variable mode it's because they care about storage space.
201 // On the other hand, given the other changes I'm making, odds are that we should
202 // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
203 // anyway; we need a permanant solution.
204 //
205 if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
206 //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
207
208 m_array = new char[16][];
209
210 // Don't bite off more than we're prepared to swallow!
211 if (initChunkBits > maxChunkBits)
212 initChunkBits = maxChunkBits;
213
214 m_chunkBits = initChunkBits;
215 m_maxChunkBits = maxChunkBits;
216 m_rebundleBits = rebundleBits;
217 m_chunkSize = 1 << (initChunkBits);
218 m_chunkMask = m_chunkSize - 1;
219 m_array[0] = new char[m_chunkSize];
220 }
221
222 /**
223 * Construct a FastStringBuffer, using a default rebundleBits value.
224 *
225 * NEEDSDOC @param initChunkBits
226 * NEEDSDOC @param maxChunkBits
227 */
228 public FastStringBuffer(int initChunkBits, int maxChunkBits)
229 {
230 this(initChunkBits, maxChunkBits, 2);
231 }
232
233 /**
234 * Construct a FastStringBuffer, using default maxChunkBits and
235 * rebundleBits values.
236 * <p>
237 * ISSUE: Should this call assert initial size, or fixed size?
238 * Now configured as initial, with a default for fixed.
239 *
240 * NEEDSDOC @param initChunkBits
241 */
242 public FastStringBuffer(int initChunkBits)
243 {
244 this(initChunkBits, 15, 2);
245 }
246
247 /**
248 * Construct a FastStringBuffer, using a default allocation policy.
249 */
250 public FastStringBuffer()
251 {
252
253 // 10 bits is 1K. 15 bits is 32K. Remember that these are character
254 // counts, so actual memory allocation unit is doubled for UTF-16 chars.
255 //
256 // For reference: In the original FastStringBuffer, we simply
257 // overallocated by blocksize (default 1KB) on each buffer-growth.
258 this(10, 15, 2);
259 }
260
261 /**
262 * Get the length of the list. Synonym for length().
263 *
264 * @return the number of characters in the FastStringBuffer's content.
265 */
266 public final int size()
267 {
268 return (m_lastChunk << m_chunkBits) + m_firstFree;
269 }
270
271 /**
272 * Get the length of the list. Synonym for size().
273 *
274 * @return the number of characters in the FastStringBuffer's content.
275 */
276 public final int length()
277 {
278 return (m_lastChunk << m_chunkBits) + m_firstFree;
279 }
280
281 /**
282 * Discard the content of the FastStringBuffer, and most of the memory
283 * that was allocated by it, restoring the initial state. Note that this
284 * may eventually be different from setLength(0), which see.
285 */
286 public final void reset()
287 {
288
289 m_lastChunk = 0;
290 m_firstFree = 0;
291
292 // Recover the original chunk size
293 FastStringBuffer innermost = this;
294
295 while (innermost.m_innerFSB != null)
296 {
297 innermost = innermost.m_innerFSB;
298 }
299
300 m_chunkBits = innermost.m_chunkBits;
301 m_chunkSize = innermost.m_chunkSize;
302 m_chunkMask = innermost.m_chunkMask;
303
304 // Discard the hierarchy
305 m_innerFSB = null;
306 m_array = new char[16][0];
307 m_array[0] = new char[m_chunkSize];
308 }
309
310 /**
311 * Directly set how much of the FastStringBuffer's storage is to be
312 * considered part of its content. This is a fast but hazardous
313 * operation. It is not protected against negative values, or values
314 * greater than the amount of storage currently available... and even
315 * if additional storage does exist, its contents are unpredictable.
316 * The only safe use for our setLength() is to truncate the FastStringBuffer
317 * to a shorter string.
318 *
319 * @param l New length. If l<0 or l>=getLength(), this operation will
320 * not report an error but future operations will almost certainly fail.
321 */
322 public final void setLength(int l)
323 {
324 m_lastChunk = l >>> m_chunkBits;
325
326 if (m_lastChunk == 0 && m_innerFSB != null)
327 {
328 // Replace this FSB with the appropriate inner FSB, truncated
329 m_innerFSB.setLength(l, this);
330 }
331 else
332 {
333 m_firstFree = l & m_chunkMask;
334
335 // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
336 // us pointing at the start of a chunk which has not yet been allocated. Rather than
337 // pay the cost of dealing with that in the append loops (more scattered and more
338 // inner-loop), we correct it here by moving to the safe side of that
339 // line -- as we would have left the indexes had we appended up to that point.
340 if(m_firstFree==0 && m_lastChunk>0)
341 {
342 --m_lastChunk;
343 m_firstFree=m_chunkSize;
344 }
345 }
346 }
347
348 /**
349 * Subroutine for the public setLength() method. Deals with the fact
350 * that truncation may require restoring one of the innerFSBs
351 *
352 * NEEDSDOC @param l
353 * NEEDSDOC @param rootFSB
354 */
355 private final void setLength(int l, FastStringBuffer rootFSB)
356 {
357
358 m_lastChunk = l >>> m_chunkBits;
359
360 if (m_lastChunk == 0 && m_innerFSB != null)
361 {
362 m_innerFSB.setLength(l, rootFSB);
363 }
364 else
365 {
366
367 // Undo encapsulation -- pop the innerFSB data back up to root.
368 // Inefficient, but attempts to keep the code simple.
369 rootFSB.m_chunkBits = m_chunkBits;
370 rootFSB.m_maxChunkBits = m_maxChunkBits;
371 rootFSB.m_rebundleBits = m_rebundleBits;
372 rootFSB.m_chunkSize = m_chunkSize;
373 rootFSB.m_chunkMask = m_chunkMask;
374 rootFSB.m_array = m_array;
375 rootFSB.m_innerFSB = m_innerFSB;
376 rootFSB.m_lastChunk = m_lastChunk;
377
378 // Finally, truncate this sucker.
379 rootFSB.m_firstFree = l & m_chunkMask;
380 }
381 }
382
383 /**
384 * Note that this operation has been somewhat deoptimized by the shift to a
385 * chunked array, as there is no factory method to produce a String object
386 * directly from an array of arrays and hence a double copy is needed.
387 * By using ensureCapacity we hope to minimize the heap overhead of building
388 * the intermediate StringBuffer.
389 * <p>
390 * (It really is a pity that Java didn't design String as a final subclass
391 * of MutableString, rather than having StringBuffer be a separate hierarchy.
392 * We'd avoid a <strong>lot</strong> of double-buffering.)
393 *
394 * @return the contents of the FastStringBuffer as a standard Java string.
395 */
396 public final String toString()
397 {
398
399 int length = (m_lastChunk << m_chunkBits) + m_firstFree;
400
401 return getString(new StringBuffer(length), 0, 0, length).toString();
402 }
403
404 /**
405 * Append a single character onto the FastStringBuffer, growing the
406 * storage if necessary.
407 * <p>
408 * NOTE THAT after calling append(), previously obtained
409 * references to m_array[][] may no longer be valid....
410 * though in fact they should be in this instance.
411 *
412 * @param value character to be appended.
413 */
414 public final void append(char value)
415 {
416
417 char[] chunk;
418
419 // We may have preallocated chunks. If so, all but last should
420 // be at full size.
421 boolean lastchunk = (m_lastChunk + 1 == m_array.length);
422
423 if (m_firstFree < m_chunkSize) // Simplified test single-character-fits
424 chunk = m_array[m_lastChunk];
425 else
426 {
427
428 // Extend array?
429 int i = m_array.length;
430
431 if (m_lastChunk + 1 == i)
432 {
433 char[][] newarray = new char[i + 16][];
434
435 System.arraycopy(m_array, 0, newarray, 0, i);
436
437 m_array = newarray;
438 }
439
440 // Advance one chunk
441 chunk = m_array[++m_lastChunk];
442
443 if (chunk == null)
444 {
445
446 // Hierarchical encapsulation
447 if (m_lastChunk == 1 << m_rebundleBits
448 && m_chunkBits < m_maxChunkBits)
449 {
450
451 // Should do all the work of both encapsulating
452 // existing data and establishing new sizes/offsets
453 m_innerFSB = new FastStringBuffer(this);
454 }
455
456 // Add a chunk.
457 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
458 }
459
460 m_firstFree = 0;
461 }
462
463 // Space exists in the chunk. Append the character.
464 chunk[m_firstFree++] = value;
465 }
466
467 /**
468 * Append the contents of a String onto the FastStringBuffer,
469 * growing the storage if necessary.
470 * <p>
471 * NOTE THAT after calling append(), previously obtained
472 * references to m_array[] may no longer be valid.
473 *
474 * @param value String whose contents are to be appended.
475 */
476 public final void append(String value)
477 {
478
479 if (value == null)
480 return;
481 int strlen = value.length();
482
483 if (0 == strlen)
484 return;
485
486 int copyfrom = 0;
487 char[] chunk = m_array[m_lastChunk];
488 int available = m_chunkSize - m_firstFree;
489
490 // Repeat while data remains to be copied
491 while (strlen > 0)
492 {
493
494 // Copy what fits
495 if (available > strlen)
496 available = strlen;
497
498 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
499 m_firstFree);
500
501 strlen -= available;
502 copyfrom += available;
503
504 // If there's more left, allocate another chunk and continue
505 if (strlen > 0)
506 {
507
508 // Extend array?
509 int i = m_array.length;
510
511 if (m_lastChunk + 1 == i)
512 {
513 char[][] newarray = new char[i + 16][];
514
515 System.arraycopy(m_array, 0, newarray, 0, i);
516
517 m_array = newarray;
518 }
519
520 // Advance one chunk
521 chunk = m_array[++m_lastChunk];
522
523 if (chunk == null)
524 {
525
526 // Hierarchical encapsulation
527 if (m_lastChunk == 1 << m_rebundleBits
528 && m_chunkBits < m_maxChunkBits)
529 {
530
531 // Should do all the work of both encapsulating
532 // existing data and establishing new sizes/offsets
533 m_innerFSB = new FastStringBuffer(this);
534 }
535
536 // Add a chunk.
537 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
538 }
539
540 available = m_chunkSize;
541 m_firstFree = 0;
542 }
543 }
544
545 // Adjust the insert point in the last chunk, when we've reached it.
546 m_firstFree += available;
547 }
548
549 /**
550 * Append the contents of a StringBuffer onto the FastStringBuffer,
551 * growing the storage if necessary.
552 * <p>
553 * NOTE THAT after calling append(), previously obtained
554 * references to m_array[] may no longer be valid.
555 *
556 * @param value StringBuffer whose contents are to be appended.
557 */
558 public final void append(StringBuffer value)
559 {
560
561 if (value == null)
562 return;
563 int strlen = value.length();
564
565 if (0 == strlen)
566 return;
567
568 int copyfrom = 0;
569 char[] chunk = m_array[m_lastChunk];
570 int available = m_chunkSize - m_firstFree;
571
572 // Repeat while data remains to be copied
573 while (strlen > 0)
574 {
575
576 // Copy what fits
577 if (available > strlen)
578 available = strlen;
579
580 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
581 m_firstFree);
582
583 strlen -= available;
584 copyfrom += available;
585
586 // If there's more left, allocate another chunk and continue
587 if (strlen > 0)
588 {
589
590 // Extend array?
591 int i = m_array.length;
592
593 if (m_lastChunk + 1 == i)
594 {
595 char[][] newarray = new char[i + 16][];
596
597 System.arraycopy(m_array, 0, newarray, 0, i);
598
599 m_array = newarray;
600 }
601
602 // Advance one chunk
603 chunk = m_array[++m_lastChunk];
604
605 if (chunk == null)
606 {
607
608 // Hierarchical encapsulation
609 if (m_lastChunk == 1 << m_rebundleBits
610 && m_chunkBits < m_maxChunkBits)
611 {
612
613 // Should do all the work of both encapsulating
614 // existing data and establishing new sizes/offsets
615 m_innerFSB = new FastStringBuffer(this);
616 }
617
618 // Add a chunk.
619 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
620 }
621
622 available = m_chunkSize;
623 m_firstFree = 0;
624 }
625 }
626
627 // Adjust the insert point in the last chunk, when we've reached it.
628 m_firstFree += available;
629 }
630
631 /**
632 * Append part of the contents of a Character Array onto the
633 * FastStringBuffer, growing the storage if necessary.
634 * <p>
635 * NOTE THAT after calling append(), previously obtained
636 * references to m_array[] may no longer be valid.
637 *
638 * @param chars character array from which data is to be copied
639 * @param start offset in chars of first character to be copied,
640 * zero-based.
641 * @param length number of characters to be copied
642 */
643 public final void append(char[] chars, int start, int length)
644 {
645
646 int strlen = length;
647
648 if (0 == strlen)
649 return;
650
651 int copyfrom = start;
652 char[] chunk = m_array[m_lastChunk];
653 int available = m_chunkSize - m_firstFree;
654
655 // Repeat while data remains to be copied
656 while (strlen > 0)
657 {
658
659 // Copy what fits
660 if (available > strlen)
661 available = strlen;
662
663 System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
664 available);
665
666 strlen -= available;
667 copyfrom += available;
668
669 // If there's more left, allocate another chunk and continue
670 if (strlen > 0)
671 {
672
673 // Extend array?
674 int i = m_array.length;
675
676 if (m_lastChunk + 1 == i)
677 {
678 char[][] newarray = new char[i + 16][];
679
680 System.arraycopy(m_array, 0, newarray, 0, i);
681
682 m_array = newarray;
683 }
684
685 // Advance one chunk
686 chunk = m_array[++m_lastChunk];
687
688 if (chunk == null)
689 {
690
691 // Hierarchical encapsulation
692 if (m_lastChunk == 1 << m_rebundleBits
693 && m_chunkBits < m_maxChunkBits)
694 {
695
696 // Should do all the work of both encapsulating
697 // existing data and establishing new sizes/offsets
698 m_innerFSB = new FastStringBuffer(this);
699 }
700
701 // Add a chunk.
702 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
703 }
704
705 available = m_chunkSize;
706 m_firstFree = 0;
707 }
708 }
709
710 // Adjust the insert point in the last chunk, when we've reached it.
711 m_firstFree += available;
712 }
713
714 /**
715 * Append the contents of another FastStringBuffer onto
716 * this FastStringBuffer, growing the storage if necessary.
717 * <p>
718 * NOTE THAT after calling append(), previously obtained
719 * references to m_array[] may no longer be valid.
720 *
721 * @param value FastStringBuffer whose contents are
722 * to be appended.
723 */
724 public final void append(FastStringBuffer value)
725 {
726
727 // Complicating factor here is that the two buffers may use
728 // different chunk sizes, and even if they're the same we're
729 // probably on a different alignment due to previously appended
730 // data. We have to work through the source in bite-sized chunks.
731 if (value == null)
732 return;
733 int strlen = value.length();
734
735 if (0 == strlen)
736 return;
737
738 int copyfrom = 0;
739 char[] chunk = m_array[m_lastChunk];
740 int available = m_chunkSize - m_firstFree;
741
742 // Repeat while data remains to be copied
743 while (strlen > 0)
744 {
745
746 // Copy what fits
747 if (available > strlen)
748 available = strlen;
749
750 int sourcechunk = (copyfrom + value.m_chunkSize - 1)
751 >>> value.m_chunkBits;
752 int sourcecolumn = copyfrom & value.m_chunkMask;
753 int runlength = value.m_chunkSize - sourcecolumn;
754
755 if (runlength > available)
756 runlength = available;
757
758 System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
759 m_array[m_lastChunk], m_firstFree, runlength);
760
761 if (runlength != available)
762 System.arraycopy(value.m_array[sourcechunk + 1], 0,
763 m_array[m_lastChunk], m_firstFree + runlength,
764 available - runlength);
765
766 strlen -= available;
767 copyfrom += available;
768
769 // If there's more left, allocate another chunk and continue
770 if (strlen > 0)
771 {
772
773 // Extend array?
774 int i = m_array.length;
775
776 if (m_lastChunk + 1 == i)
777 {
778 char[][] newarray = new char[i + 16][];
779
780 System.arraycopy(m_array, 0, newarray, 0, i);
781
782 m_array = newarray;
783 }
784
785 // Advance one chunk
786 chunk = m_array[++m_lastChunk];
787
788 if (chunk == null)
789 {
790
791 // Hierarchical encapsulation
792 if (m_lastChunk == 1 << m_rebundleBits
793 && m_chunkBits < m_maxChunkBits)
794 {
795
796 // Should do all the work of both encapsulating
797 // existing data and establishing new sizes/offsets
798 m_innerFSB = new FastStringBuffer(this);
799 }
800
801 // Add a chunk.
802 chunk = m_array[m_lastChunk] = new char[m_chunkSize];
803 }
804
805 available = m_chunkSize;
806 m_firstFree = 0;
807 }
808 }
809
810 // Adjust the insert point in the last chunk, when we've reached it.
811 m_firstFree += available;
812 }
813
814 /**
815 * @return true if the specified range of characters are all whitespace,
816 * as defined by XMLCharacterRecognizer.
817 * <p>
818 * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
819 *
820 * @param start Offset of first character in the range.
821 * @param length Number of characters to send.
822 */
823 public boolean isWhitespace(int start, int length)
824 {
825
826 int sourcechunk = start >>> m_chunkBits;
827 int sourcecolumn = start & m_chunkMask;
828 int available = m_chunkSize - sourcecolumn;
829 boolean chunkOK;
830
831 while (length > 0)
832 {
833 int runlength = (length <= available) ? length : available;
834
835 if (sourcechunk == 0 && m_innerFSB != null)
836 chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
837 else
838 chunkOK = com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace(
839 m_array[sourcechunk], sourcecolumn, runlength);
840
841 if (!chunkOK)
842 return false;
843
844 length -= runlength;
845
846 ++sourcechunk;
847
848 sourcecolumn = 0;
849 available = m_chunkSize;
850 }
851
852 return true;
853 }
854
855 /**
856 * @param start Offset of first character in the range.
857 * @param length Number of characters to send.
858 * @return a new String object initialized from the specified range of
859 * characters.
860 */
861 public String getString(int start, int length)
862 {
863 int startColumn = start & m_chunkMask;
864 int startChunk = start >>> m_chunkBits;
865 if (startColumn + length < m_chunkMask && m_innerFSB == null) {
866 return getOneChunkString(startChunk, startColumn, length);
867 }
868 return getString(new StringBuffer(length), startChunk, startColumn,
869 length).toString();
870 }
871
872 protected String getOneChunkString(int startChunk, int startColumn,
873 int length) {
874 return new String(m_array[startChunk], startColumn, length);
875 }
876
877 /**
878 * @param sb StringBuffer to be appended to
879 * @param start Offset of first character in the range.
880 * @param length Number of characters to send.
881 * @return sb with the requested text appended to it
882 */
883 StringBuffer getString(StringBuffer sb, int start, int length)
884 {
885 return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
886 }
887
888 /**
889 * Internal support for toString() and getString().
890 * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
891 * and returns a StringBuffer supplied by the caller. This simplifies
892 * m_innerFSB support.
893 * <p>
894 * Note that this operation has been somewhat deoptimized by the shift to a
895 * chunked array, as there is no factory method to produce a String object
896 * directly from an array of arrays and hence a double copy is needed.
897 * By presetting length we hope to minimize the heap overhead of building
898 * the intermediate StringBuffer.
899 * <p>
900 * (It really is a pity that Java didn't design String as a final subclass
901 * of MutableString, rather than having StringBuffer be a separate hierarchy.
902 * We'd avoid a <strong>lot</strong> of double-buffering.)
903 *
904 *
905 * @param sb
906 * @param startChunk
907 * @param startColumn
908 * @param length
909 *
910 * @return the contents of the FastStringBuffer as a standard Java string.
911 */
912 StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
913 int length)
914 {
915
916 int stop = (startChunk << m_chunkBits) + startColumn + length;
917 int stopChunk = stop >>> m_chunkBits;
918 int stopColumn = stop & m_chunkMask;
919
920 // Factored out
921 //StringBuffer sb=new StringBuffer(length);
922 for (int i = startChunk; i < stopChunk; ++i)
923 {
924 if (i == 0 && m_innerFSB != null)
925 m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
926 else
927 sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
928
929 startColumn = 0; // after first chunk
930 }
931
932 if (stopChunk == 0 && m_innerFSB != null)
933 m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
934 else if (stopColumn > startColumn)
935 sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
936
937 return sb;
938 }
939
940 /**
941 * Get a single character from the string buffer.
942 *
943 *
944 * @param pos character position requested.
945 * @return A character from the requested position.
946 */
947 public char charAt(int pos)
948 {
949 int startChunk = pos >>> m_chunkBits;
950
951 if (startChunk == 0 && m_innerFSB != null)
952 return m_innerFSB.charAt(pos & m_chunkMask);
953 else
954 return m_array[startChunk][pos & m_chunkMask];
955 }
956
957 /**
958 * Sends the specified range of characters as one or more SAX characters()
959 * events.
960 * Note that the buffer reference passed to the ContentHandler may be
961 * invalidated if the FastStringBuffer is edited; it's the user's
962 * responsibility to manage access to the FastStringBuffer to prevent this
963 * problem from arising.
964 * <p>
965 * Note too that there is no promise that the output will be sent as a
966 * single call. As is always true in SAX, one logical string may be split
967 * across multiple blocks of memory and hence delivered as several
968 * successive events.
969 *
970 * @param ch SAX ContentHandler object to receive the event.
971 * @param start Offset of first character in the range.
972 * @param length Number of characters to send.
973 * @exception org.xml.sax.SAXException may be thrown by handler's
974 * characters() method.
975 */
976 public void sendSAXcharacters(
977 org.xml.sax.ContentHandler ch, int start, int length)
978 throws org.xml.sax.SAXException
979 {
980
981 int startChunk = start >>> m_chunkBits;
982 int startColumn = start & m_chunkMask;
983 if (startColumn + length < m_chunkMask && m_innerFSB == null) {
984 ch.characters(m_array[startChunk], startColumn, length);
985 return;
986 }
987
988 int stop = start + length;
989 int stopChunk = stop >>> m_chunkBits;
990 int stopColumn = stop & m_chunkMask;
991
992 for (int i = startChunk; i < stopChunk; ++i)
993 {
994 if (i == 0 && m_innerFSB != null)
995 m_innerFSB.sendSAXcharacters(ch, startColumn,
996 m_chunkSize - startColumn);
997 else
998 ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
999
1000 startColumn = 0; // after first chunk
1001 }
1002
1003 // Last, or only, chunk
1004 if (stopChunk == 0 && m_innerFSB != null)
1005 m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1006 else if (stopColumn > startColumn)
1007 {
1008 ch.characters(m_array[stopChunk], startColumn,
1009 stopColumn - startColumn);
1010 }
1011 }
1012
1013 /**
1014 * Sends the specified range of characters as one or more SAX characters()
1015 * events, normalizing the characters according to XSLT rules.
1016 *
1017 * @param ch SAX ContentHandler object to receive the event.
1018 * @param start Offset of first character in the range.
1019 * @param length Number of characters to send.
1020 * @return normalization status to apply to next chunk (because we may
1021 * have been called recursively to process an inner FSB):
1022 * <dl>
1023 * <dt>0</dt>
1024 * <dd>if this output did not end in retained whitespace, and thus whitespace
1025 * at the start of the following chunk (if any) should be converted to a
1026 * single space.
1027 * <dt>SUPPRESS_LEADING_WS</dt>
1028 * <dd>if this output ended in retained whitespace, and thus whitespace
1029 * at the start of the following chunk (if any) should be completely
1030 * suppressed.</dd>
1031 * </dd>
1032 * </dl>
1033 * @exception org.xml.sax.SAXException may be thrown by handler's
1034 * characters() method.
1035 */
1036 public int sendNormalizedSAXcharacters(
1037 org.xml.sax.ContentHandler ch, int start, int length)
1038 throws org.xml.sax.SAXException
1039 {
1040 // This call always starts at the beginning of the
1041 // string being written out, either because it was called directly or
1042 // because it was an m_innerFSB recursion. This is important since
1043 // it gives us a well-known initial state for this flag:
1044 int stateForNextChunk=SUPPRESS_LEADING_WS;
1045
1046 int stop = start + length;
1047 int startChunk = start >>> m_chunkBits;
1048 int startColumn = start & m_chunkMask;
1049 int stopChunk = stop >>> m_chunkBits;
1050 int stopColumn = stop & m_chunkMask;
1051
1052 for (int i = startChunk; i < stopChunk; ++i)
1053 {
1054 if (i == 0 && m_innerFSB != null)
1055 stateForNextChunk=
1056 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1057 m_chunkSize - startColumn);
1058 else
1059 stateForNextChunk=
1060 sendNormalizedSAXcharacters(m_array[i], startColumn,
1061 m_chunkSize - startColumn,
1062 ch,stateForNextChunk);
1063
1064 startColumn = 0; // after first chunk
1065 }
1066
1067 // Last, or only, chunk
1068 if (stopChunk == 0 && m_innerFSB != null)
1069 stateForNextChunk= // %REVIEW% Is this update really needed?
1070 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1071 else if (stopColumn > startColumn)
1072 {
1073 stateForNextChunk= // %REVIEW% Is this update really needed?
1074 sendNormalizedSAXcharacters(m_array[stopChunk],
1075 startColumn, stopColumn - startColumn,
1076 ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1077 }
1078 return stateForNextChunk;
1079 }
1080
1081 static final char[] SINGLE_SPACE = {' '};
1082
1083 /**
1084 * Internal method to directly normalize and dispatch the character array.
1085 * This version is aware of the fact that it may be called several times
1086 * in succession if the data is made up of multiple "chunks", and thus
1087 * must actively manage the handling of leading and trailing whitespace.
1088 *
1089 * Note: The recursion is due to the possible recursion of inner FSBs.
1090 *
1091 * @param ch The characters from the XML document.
1092 * @param start The start position in the array.
1093 * @param length The number of characters to read from the array.
1094 * @param handler SAX ContentHandler object to receive the event.
1095 * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
1096 * This is a bitfield contining two flags, bitwise-ORed together:
1097 * <dl>
1098 * <dt>SUPPRESS_LEADING_WS</dt>
1099 * <dd>When false, causes leading whitespace to be converted to a single
1100 * space; when true, causes it to be discarded entirely.
1101 * Should be set TRUE for the first chunk, and (in multi-chunk output)
1102 * whenever the previous chunk ended in retained whitespace.</dd>
1103 * <dt>SUPPRESS_TRAILING_WS</dt>
1104 * <dd>When false, causes trailing whitespace to be converted to a single
1105 * space; when true, causes it to be discarded entirely.
1106 * Should be set TRUE for the last or only chunk.
1107 * </dd>
1108 * </dl>
1109 * @return normalization status, as in the edgeTreatmentFlags parameter:
1110 * <dl>
1111 * <dt>0</dt>
1112 * <dd>if this output did not end in retained whitespace, and thus whitespace
1113 * at the start of the following chunk (if any) should be converted to a
1114 * single space.
1115 * <dt>SUPPRESS_LEADING_WS</dt>
1116 * <dd>if this output ended in retained whitespace, and thus whitespace
1117 * at the start of the following chunk (if any) should be completely
1118 * suppressed.</dd>
1119 * </dd>
1120 * </dl>
1121 *
1122 *
1123 * @exception org.xml.sax.SAXException Any SAX exception, possibly
1124 * wrapping another exception.
1125 */
1126 static int sendNormalizedSAXcharacters(char ch[],
1127 int start, int length,
1128 org.xml.sax.ContentHandler handler,
1129 int edgeTreatmentFlags)
1130 throws org.xml.sax.SAXException
1131 {
1132 boolean processingLeadingWhitespace =
1133 ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1134 boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1135 boolean suppressTrailingWhitespace =
1136 ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0);
1137 int currPos = start;
1138 int limit = start+length;
1139
1140 // Strip any leading spaces first, if required
1141 if (processingLeadingWhitespace) {
1142 for (; currPos < limit
1143 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1144 currPos++) { }
1145
1146 // If we've only encountered leading spaces, the
1147 // current state remains unchanged
1148 if (currPos == limit) {
1149 return edgeTreatmentFlags;
1150 }
1151 }
1152
1153 // If we get here, there are no more leading spaces to strip
1154 while (currPos < limit) {
1155 int startNonWhitespace = currPos;
1156
1157 // Grab a chunk of non-whitespace characters
1158 for (; currPos < limit
1159 && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1160 currPos++) { }
1161
1162 // Non-whitespace seen - emit them, along with a single
1163 // space for any preceding whitespace characters
1164 if (startNonWhitespace != currPos) {
1165 if (seenWhitespace) {
1166 handler.characters(SINGLE_SPACE, 0, 1);
1167 seenWhitespace = false;
1168 }
1169 handler.characters(ch, startNonWhitespace,
1170 currPos - startNonWhitespace);
1171 }
1172
1173 int startWhitespace = currPos;
1174
1175 // Consume any whitespace characters
1176 for (; currPos < limit
1177 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1178 currPos++) { }
1179
1180 if (startWhitespace != currPos) {
1181 seenWhitespace = true;
1182 }
1183 }
1184
1185 return (seenWhitespace ? CARRY_WS : 0)
1186 | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1187 }
1188
1189 /**
1190 * Directly normalize and dispatch the character array.
1191 *
1192 * @param ch The characters from the XML document.
1193 * @param start The start position in the array.
1194 * @param length The number of characters to read from the array.
1195 * @param handler SAX ContentHandler object to receive the event.
1196 * @exception org.xml.sax.SAXException Any SAX exception, possibly
1197 * wrapping another exception.
1198 */
1199 public static void sendNormalizedSAXcharacters(char ch[],
1200 int start, int length,
1201 org.xml.sax.ContentHandler handler)
1202 throws org.xml.sax.SAXException
1203 {
1204 sendNormalizedSAXcharacters(ch, start, length,
1205 handler, SUPPRESS_BOTH);
1206 }
1207
1208 /**
1209 * Sends the specified range of characters as sax Comment.
1210 * <p>
1211 * Note that, unlike sendSAXcharacters, this has to be done as a single
1212 * call to LexicalHandler#comment.
1213 *
1214 * @param ch SAX LexicalHandler object to receive the event.
1215 * @param start Offset of first character in the range.
1216 * @param length Number of characters to send.
1217 * @exception org.xml.sax.SAXException may be thrown by handler's
1218 * characters() method.
1219 */
1220 public void sendSAXComment(
1221 org.xml.sax.ext.LexicalHandler ch, int start, int length)
1222 throws org.xml.sax.SAXException
1223 {
1224
1225 // %OPT% Do it this way for now...
1226 String comment = getString(start, length);
1227 ch.comment(comment.toCharArray(), 0, length);
1228 }
1229
1230 /**
1231 * Copies characters from this string into the destination character
1232 * array.
1233 *
1234 * @param srcBegin index of the first character in the string
1235 * to copy.
1236 * @param srcEnd index after the last character in the string
1237 * to copy.
1238 * @param dst the destination array.
1239 * @param dstBegin the start offset in the destination array.
1240 * @exception IndexOutOfBoundsException If any of the following
1241 * is true:
1242 * <ul><li><code>srcBegin</code> is negative.
1243 * <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1244 * <li><code>srcEnd</code> is greater than the length of this
1245 * string
1246 * <li><code>dstBegin</code> is negative
1247 * <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1248 * <code>dst.length</code></ul>
1249 * @exception NullPointerException if <code>dst</code> is <code>null</code>
1250 */
1251 private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1252 {
1253 // %TBD% Joe needs to write this function. Make public when implemented.
1254 }
1255
1256 /**
1257 * Encapsulation c'tor. After this is called, the source FastStringBuffer
1258 * will be reset to use the new object as its m_innerFSB, and will have
1259 * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1260 * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1261 *
1262 * NEEDSDOC @param source
1263 */
1264 private FastStringBuffer(FastStringBuffer source)
1265 {
1266
1267 // Copy existing information into new encapsulation
1268 m_chunkBits = source.m_chunkBits;
1269 m_maxChunkBits = source.m_maxChunkBits;
1270 m_rebundleBits = source.m_rebundleBits;
1271 m_chunkSize = source.m_chunkSize;
1272 m_chunkMask = source.m_chunkMask;
1273 m_array = source.m_array;
1274 m_innerFSB = source.m_innerFSB;
1275
1276 // These have to be adjusted because we're calling just at the time
1277 // when we would be about to allocate another chunk
1278 m_lastChunk = source.m_lastChunk - 1;
1279 m_firstFree = source.m_chunkSize;
1280
1281 // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1282 source.m_array = new char[16][];
1283 source.m_innerFSB = this;
1284
1285 // Since we encapsulated just as we were about to append another
1286 // chunk, return ready to create the chunk after the innerFSB
1287 // -- 1, not 0.
1288 source.m_lastChunk = 1;
1289 source.m_firstFree = 0;
1290 source.m_chunkBits += m_rebundleBits;
1291 source.m_chunkSize = 1 << (source.m_chunkBits);
1292 source.m_chunkMask = source.m_chunkSize - 1;
1293 }
1294}