PageRenderTime 443ms CodeModel.GetById 155ms app.highlight 217ms RepoModel.GetById 1ms app.codeStats 1ms

/projects/jre-1.6.0/src/com/sun/org/apache/xml/internal/utils/FastStringBuffer.java

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Java | 1294 lines | 539 code | 192 blank | 563 comment | 148 complexity | c5723d30c1cecd2da51b09867c933a69 MD5 | raw file
   1/*
   2 * Copyright 1999-2004 The Apache Software Foundation.
   3 *
   4 * Licensed under the Apache License, Version 2.0 (the "License");
   5 * you may not use this file except in compliance with the License.
   6 * You may obtain a copy of the License at
   7 *
   8 *     http://www.apache.org/licenses/LICENSE-2.0
   9 *
  10 * Unless required by applicable law or agreed to in writing, software
  11 * distributed under the License is distributed on an "AS IS" BASIS,
  12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 * See the License for the specific language governing permissions and
  14 * limitations under the License.
  15 */
  16/*
  17 * $Id: FastStringBuffer.java,v 1.2.4.1 2005/09/15 08:15:44 suresh_emailid Exp $
  18 */
  19package com.sun.org.apache.xml.internal.utils;
  20
  21/**
  22 * Bare-bones, unsafe, fast string buffer. No thread-safety, no
  23 * parameter range checking, exposed fields. Note that in typical
  24 * applications, thread-safety of a StringBuffer is a somewhat
  25 * dubious concept in any case.
  26 * <p>
  27 * Note that Stree and DTM used a single FastStringBuffer as a string pool,
  28 * by recording start and length indices within this single buffer. This
  29 * minimizes heap overhead, but of course requires more work when retrieving
  30 * the data.
  31 * <p>
  32 * FastStringBuffer operates as a "chunked buffer". Doing so
  33 * reduces the need to recopy existing information when an append
  34 * exceeds the space available; we just allocate another chunk and
  35 * flow across to it. (The array of chunks may need to grow,
  36 * admittedly, but that's a much smaller object.) Some excess
  37 * recopying may arise when we extract Strings which cross chunk
  38 * boundaries; larger chunks make that less frequent.
  39 * <p>
  40 * The size values are parameterized, to allow tuning this code. In
  41 * theory, Result Tree Fragments might want to be tuned differently 
  42 * from the main document's text. 
  43 * <p>
  44 * %REVIEW% An experiment in self-tuning is
  45 * included in the code (using nested FastStringBuffers to achieve
  46 * variation in chunk sizes), but this implementation has proven to
  47 * be problematic when data may be being copied from the FSB into itself.
  48 * We should either re-architect that to make this safe (if possible)
  49 * or remove that code and clean up for performance/maintainability reasons.
  50 * <p>
  51 */
  52public class FastStringBuffer
  53{
  54  // If nonzero, forces the inial chunk size.
  55  /**/static final int DEBUG_FORCE_INIT_BITS=0;
  56  
  57  	// %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
  58  	// back into the same FSB (variable set from previous variable, for example) 
  59  	// and blocksize changes in mid-copy... there's risk of severe malfunction in 
  60  	// the read process, due to how the resizing code re-jiggers storage. Arggh. 
  61  	// If we want to retain the variable-size-block feature, we need to reconsider 
  62  	// that issue. For now, I have forced us into fixed-size mode.
  63    static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
  64
  65	/** Manifest constant: Suppress leading whitespace.
  66	 * This should be used when normalize-to-SAX is called for the first chunk of a
  67	 * multi-chunk output, or one following unsuppressed whitespace in a previous
  68	 * chunk.
  69	 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
  70	 */
  71	public static final int SUPPRESS_LEADING_WS=0x01;
  72	
  73	/** Manifest constant: Suppress trailing whitespace.
  74	 * This should be used when normalize-to-SAX is called for the last chunk of a
  75	 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
  76	 */
  77	public static final int SUPPRESS_TRAILING_WS=0x02;
  78	
  79	/** Manifest constant: Suppress both leading and trailing whitespace.
  80	 * This should be used when normalize-to-SAX is called for a complete string.
  81	 * (I'm not wild about the name of this one. Ideas welcome.)
  82	 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
  83	 */
  84	public static final int SUPPRESS_BOTH
  85		= SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
  86
  87	/** Manifest constant: Carry trailing whitespace of one chunk as leading 
  88	 * whitespace of the next chunk. Used internally; I don't see any reason
  89	 * to make it public right now.
  90	 */
  91	private static final int CARRY_WS=0x04;
  92
  93	/**
  94   * Field m_chunkBits sets our chunking strategy, by saying how many
  95   * bits of index can be used within a single chunk before flowing over
  96   * to the next chunk. For example, if m_chunkbits is set to 15, each
  97   * chunk can contain up to 2^15 (32K) characters  
  98   */
  99  int m_chunkBits = 15;
 100
 101  /**
 102   * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
 103   * the largest permissible chunk size is in this particular FastStringBuffer
 104   * hierarchy. 
 105   */
 106  int m_maxChunkBits = 15;
 107
 108  /**
 109   * Field m_rechunkBits affects our chunk-growth strategy, by saying how
 110   * many chunks should be allocated at one size before we encapsulate them
 111   * into the first chunk of the next size up. For example, if m_rechunkBits
 112   * is set to 3, then after 8 chunks at a given size we will rebundle
 113   * them as the first element of a FastStringBuffer using a chunk size
 114   * 8 times larger (chunkBits shifted left three bits).
 115   */
 116  int m_rebundleBits = 2;
 117
 118  /**
 119   * Field m_chunkSize establishes the maximum size of one chunk of the array
 120   * as 2**chunkbits characters.
 121   * (Which may also be the minimum size if we aren't tuning for storage) 
 122   */
 123  int m_chunkSize;  // =1<<(m_chunkBits-1);
 124
 125  /**
 126   * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
 127   * worth of low-order '1' bits, useful for shift-and-mask addressing
 128   * within the chunks. 
 129   */
 130  int m_chunkMask;  // =m_chunkSize-1;
 131
 132  /**
 133   * Field m_array holds the string buffer's text contents, using an
 134   * array-of-arrays. Note that this array, and the arrays it contains, may be
 135   * reallocated when necessary in order to allow the buffer to grow;
 136   * references to them should be considered to be invalidated after any
 137   * append. However, the only time these arrays are directly exposed
 138   * is in the sendSAXcharacters call.
 139   */
 140  char[][] m_array;
 141
 142  /**
 143   * Field m_lastChunk is an index into m_array[], pointing to the last
 144   * chunk of the Chunked Array currently in use. Note that additional
 145   * chunks may actually be allocated, eg if the FastStringBuffer had
 146   * previously been truncated or if someone issued an ensureSpace request.
 147   * <p>
 148   * The insertion point for append operations is addressed by the combination
 149   * of m_lastChunk and m_firstFree.
 150   */
 151  int m_lastChunk = 0;
 152
 153  /**
 154   * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
 155   * the first character in the Chunked Array which is not part of the
 156   * FastStringBuffer's current content. Since m_array[][] is zero-based,
 157   * the length of that content can be calculated as
 158   * (m_lastChunk<<m_chunkBits) + m_firstFree 
 159   */
 160  int m_firstFree = 0;
 161
 162  /**
 163   * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
 164   * length equals m_chunkSize, and which replaces m_array[0]. This allows
 165   * building a hierarchy of FastStringBuffers, where early appends use
 166   * a smaller chunkSize (for less wasted memory overhead) but later
 167   * ones use a larger chunkSize (for less heap activity overhead).
 168   */
 169  FastStringBuffer m_innerFSB = null;
 170
 171  /**
 172   * Construct a FastStringBuffer, with allocation policy as per parameters.
 173   * <p>
 174   * For coding convenience, I've expressed both allocation sizes in terms of
 175   * a number of bits. That's needed for the final size of a chunk,
 176   * to permit fast and efficient shift-and-mask addressing. It's less critical
 177   * for the inital size, and may be reconsidered.
 178   * <p>
 179   * An alternative would be to accept integer sizes and round to powers of two;
 180   * that really doesn't seem to buy us much, if anything.
 181   *
 182   * @param initChunkBits Length in characters of the initial allocation
 183   * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
 184   * characters.) Later chunks will use larger allocation units, to trade off
 185   * allocation speed of large document against storage efficiency of small
 186   * ones.
 187   * @param maxChunkBits Number of character-offset bits that should be used for
 188   * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
 189   * characters.
 190   * @param rebundleBits Number of character-offset bits that addressing should
 191   * advance before we attempt to take a step from initChunkBits to maxChunkBits
 192   */
 193  public FastStringBuffer(int initChunkBits, int maxChunkBits,
 194                          int rebundleBits)
 195  {
 196    if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
 197    
 198    // %REVIEW%
 199    // Should this force to larger value, or smaller? Smaller less efficient, but if
 200    // someone requested variable mode it's because they care about storage space.
 201    // On the other hand, given the other changes I'm making, odds are that we should
 202    // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
 203    // anyway; we need a permanant solution.
 204    //
 205    if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
 206    //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
 207
 208    m_array = new char[16][];
 209
 210    // Don't bite off more than we're prepared to swallow!
 211    if (initChunkBits > maxChunkBits)
 212      initChunkBits = maxChunkBits;
 213
 214    m_chunkBits = initChunkBits;
 215    m_maxChunkBits = maxChunkBits;
 216    m_rebundleBits = rebundleBits;
 217    m_chunkSize = 1 << (initChunkBits);
 218    m_chunkMask = m_chunkSize - 1;
 219    m_array[0] = new char[m_chunkSize];
 220  }
 221
 222  /**
 223   * Construct a FastStringBuffer, using a default rebundleBits value.
 224   *
 225   * NEEDSDOC @param initChunkBits
 226   * NEEDSDOC @param maxChunkBits
 227   */
 228  public FastStringBuffer(int initChunkBits, int maxChunkBits)
 229  {
 230    this(initChunkBits, maxChunkBits, 2);
 231  }
 232
 233  /**
 234   * Construct a FastStringBuffer, using default maxChunkBits and
 235   * rebundleBits values.
 236   * <p>
 237   * ISSUE: Should this call assert initial size, or fixed size?
 238   * Now configured as initial, with a default for fixed.
 239   *
 240   * NEEDSDOC @param initChunkBits
 241   */
 242  public FastStringBuffer(int initChunkBits)
 243  {
 244    this(initChunkBits, 15, 2);
 245  }
 246
 247  /**
 248   * Construct a FastStringBuffer, using a default allocation policy.
 249   */
 250  public FastStringBuffer()
 251  {
 252
 253    // 10 bits is 1K. 15 bits is 32K. Remember that these are character
 254    // counts, so actual memory allocation unit is doubled for UTF-16 chars.
 255    //
 256    // For reference: In the original FastStringBuffer, we simply
 257    // overallocated by blocksize (default 1KB) on each buffer-growth.
 258    this(10, 15, 2);
 259  }
 260
 261  /**
 262   * Get the length of the list. Synonym for length().
 263   *
 264   * @return the number of characters in the FastStringBuffer's content.
 265   */
 266  public final int size()
 267  {
 268    return (m_lastChunk << m_chunkBits) + m_firstFree;
 269  }
 270
 271  /**
 272   * Get the length of the list. Synonym for size().
 273   *
 274   * @return the number of characters in the FastStringBuffer's content.
 275   */
 276  public final int length()
 277  {
 278    return (m_lastChunk << m_chunkBits) + m_firstFree;
 279  }
 280
 281  /**
 282   * Discard the content of the FastStringBuffer, and most of the memory
 283   * that was allocated by it, restoring the initial state. Note that this
 284   * may eventually be different from setLength(0), which see.
 285   */
 286  public final void reset()
 287  {
 288
 289    m_lastChunk = 0;
 290    m_firstFree = 0;
 291
 292    // Recover the original chunk size
 293    FastStringBuffer innermost = this;
 294
 295    while (innermost.m_innerFSB != null)
 296    {
 297      innermost = innermost.m_innerFSB;
 298    }
 299
 300    m_chunkBits = innermost.m_chunkBits;
 301    m_chunkSize = innermost.m_chunkSize;
 302    m_chunkMask = innermost.m_chunkMask;
 303
 304    // Discard the hierarchy
 305    m_innerFSB = null;
 306    m_array = new char[16][0];
 307    m_array[0] = new char[m_chunkSize];
 308  }
 309
 310  /**
 311   * Directly set how much of the FastStringBuffer's storage is to be
 312   * considered part of its content. This is a fast but hazardous
 313   * operation. It is not protected against negative values, or values
 314   * greater than the amount of storage currently available... and even
 315   * if additional storage does exist, its contents are unpredictable.
 316   * The only safe use for our setLength() is to truncate the FastStringBuffer
 317   * to a shorter string.
 318   *
 319   * @param l New length. If l<0 or l>=getLength(), this operation will
 320   * not report an error but future operations will almost certainly fail.
 321   */
 322  public final void setLength(int l)
 323  {
 324    m_lastChunk = l >>> m_chunkBits;
 325
 326    if (m_lastChunk == 0 && m_innerFSB != null)
 327    {
 328      // Replace this FSB with the appropriate inner FSB, truncated
 329      m_innerFSB.setLength(l, this);
 330    }
 331    else
 332    {
 333      m_firstFree = l & m_chunkMask;
 334      
 335	  // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
 336	  // us pointing at the start of a chunk which has not yet been allocated. Rather than 
 337	  // pay the cost of dealing with that in the append loops (more scattered and more
 338	  // inner-loop), we correct it here by moving to the safe side of that
 339	  // line -- as we would have left the indexes had we appended up to that point.
 340      if(m_firstFree==0 && m_lastChunk>0)
 341      {
 342      	--m_lastChunk;
 343      	m_firstFree=m_chunkSize;
 344      }
 345    }
 346  }
 347
 348  /**
 349   * Subroutine for the public setLength() method. Deals with the fact
 350   * that truncation may require restoring one of the innerFSBs
 351   *
 352   * NEEDSDOC @param l
 353   * NEEDSDOC @param rootFSB
 354   */
 355  private final void setLength(int l, FastStringBuffer rootFSB)
 356  {
 357
 358    m_lastChunk = l >>> m_chunkBits;
 359
 360    if (m_lastChunk == 0 && m_innerFSB != null)
 361    {
 362      m_innerFSB.setLength(l, rootFSB);
 363    }
 364    else
 365    {
 366
 367      // Undo encapsulation -- pop the innerFSB data back up to root.
 368      // Inefficient, but attempts to keep the code simple.
 369      rootFSB.m_chunkBits = m_chunkBits;
 370      rootFSB.m_maxChunkBits = m_maxChunkBits;
 371      rootFSB.m_rebundleBits = m_rebundleBits;
 372      rootFSB.m_chunkSize = m_chunkSize;
 373      rootFSB.m_chunkMask = m_chunkMask;
 374      rootFSB.m_array = m_array;
 375      rootFSB.m_innerFSB = m_innerFSB;
 376      rootFSB.m_lastChunk = m_lastChunk;
 377
 378      // Finally, truncate this sucker.
 379      rootFSB.m_firstFree = l & m_chunkMask;
 380    }
 381  }
 382
 383  /**
 384   * Note that this operation has been somewhat deoptimized by the shift to a
 385   * chunked array, as there is no factory method to produce a String object
 386   * directly from an array of arrays and hence a double copy is needed.
 387   * By using ensureCapacity we hope to minimize the heap overhead of building
 388   * the intermediate StringBuffer.
 389   * <p>
 390   * (It really is a pity that Java didn't design String as a final subclass
 391   * of MutableString, rather than having StringBuffer be a separate hierarchy.
 392   * We'd avoid a <strong>lot</strong> of double-buffering.)
 393   *
 394   * @return the contents of the FastStringBuffer as a standard Java string.
 395   */
 396  public final String toString()
 397  {
 398
 399    int length = (m_lastChunk << m_chunkBits) + m_firstFree;
 400
 401    return getString(new StringBuffer(length), 0, 0, length).toString();
 402  }
 403
 404  /**
 405   * Append a single character onto the FastStringBuffer, growing the
 406   * storage if necessary.
 407   * <p>
 408   * NOTE THAT after calling append(), previously obtained
 409   * references to m_array[][] may no longer be valid....
 410   * though in fact they should be in this instance.
 411   *
 412   * @param value character to be appended.
 413   */
 414  public final void append(char value)
 415  {
 416    
 417    char[] chunk;
 418
 419    // We may have preallocated chunks. If so, all but last should
 420    // be at full size.
 421    boolean lastchunk = (m_lastChunk + 1 == m_array.length);
 422
 423    if (m_firstFree < m_chunkSize)  // Simplified test single-character-fits
 424      chunk = m_array[m_lastChunk];
 425    else
 426    {
 427
 428      // Extend array?
 429      int i = m_array.length;
 430
 431      if (m_lastChunk + 1 == i)
 432      {
 433        char[][] newarray = new char[i + 16][];
 434
 435        System.arraycopy(m_array, 0, newarray, 0, i);
 436
 437        m_array = newarray;
 438      }
 439
 440      // Advance one chunk
 441      chunk = m_array[++m_lastChunk];
 442
 443      if (chunk == null)
 444      {
 445
 446        // Hierarchical encapsulation
 447        if (m_lastChunk == 1 << m_rebundleBits
 448                && m_chunkBits < m_maxChunkBits)
 449        {
 450
 451          // Should do all the work of both encapsulating
 452          // existing data and establishing new sizes/offsets
 453          m_innerFSB = new FastStringBuffer(this);
 454        }
 455
 456        // Add a chunk.
 457        chunk = m_array[m_lastChunk] = new char[m_chunkSize];
 458      }
 459
 460      m_firstFree = 0;
 461    }
 462
 463    // Space exists in the chunk. Append the character.
 464    chunk[m_firstFree++] = value;
 465  }
 466
 467  /**
 468   * Append the contents of a String onto the FastStringBuffer,
 469   * growing the storage if necessary.
 470   * <p>
 471   * NOTE THAT after calling append(), previously obtained
 472   * references to m_array[] may no longer be valid.
 473   *
 474   * @param value String whose contents are to be appended.
 475   */
 476  public final void append(String value)
 477  {
 478
 479    if (value == null) 
 480      return;
 481    int strlen = value.length();
 482
 483    if (0 == strlen)
 484      return;
 485
 486    int copyfrom = 0;
 487    char[] chunk = m_array[m_lastChunk];
 488    int available = m_chunkSize - m_firstFree;
 489
 490    // Repeat while data remains to be copied
 491    while (strlen > 0)
 492    {
 493
 494      // Copy what fits
 495      if (available > strlen)
 496        available = strlen;
 497
 498      value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
 499                     m_firstFree);
 500
 501      strlen -= available;
 502      copyfrom += available;
 503
 504      // If there's more left, allocate another chunk and continue
 505      if (strlen > 0)
 506      {
 507
 508        // Extend array?
 509        int i = m_array.length;
 510
 511        if (m_lastChunk + 1 == i)
 512        {
 513          char[][] newarray = new char[i + 16][];
 514
 515          System.arraycopy(m_array, 0, newarray, 0, i);
 516
 517          m_array = newarray;
 518        }
 519
 520        // Advance one chunk
 521        chunk = m_array[++m_lastChunk];
 522
 523        if (chunk == null)
 524        {
 525
 526          // Hierarchical encapsulation
 527          if (m_lastChunk == 1 << m_rebundleBits
 528                  && m_chunkBits < m_maxChunkBits)
 529          {
 530
 531            // Should do all the work of both encapsulating
 532            // existing data and establishing new sizes/offsets
 533            m_innerFSB = new FastStringBuffer(this);
 534          }
 535
 536          // Add a chunk. 
 537          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
 538        }
 539
 540        available = m_chunkSize;
 541        m_firstFree = 0;
 542      }
 543    }
 544
 545    // Adjust the insert point in the last chunk, when we've reached it.
 546    m_firstFree += available;
 547  }
 548
 549  /**
 550   * Append the contents of a StringBuffer onto the FastStringBuffer,
 551   * growing the storage if necessary.
 552   * <p>
 553   * NOTE THAT after calling append(), previously obtained
 554   * references to m_array[] may no longer be valid.
 555   *
 556   * @param value StringBuffer whose contents are to be appended.
 557   */
 558  public final void append(StringBuffer value)
 559  {
 560
 561    if (value == null) 
 562      return;
 563    int strlen = value.length();
 564
 565    if (0 == strlen)
 566      return;
 567
 568    int copyfrom = 0;
 569    char[] chunk = m_array[m_lastChunk];
 570    int available = m_chunkSize - m_firstFree;
 571
 572    // Repeat while data remains to be copied
 573    while (strlen > 0)
 574    {
 575
 576      // Copy what fits
 577      if (available > strlen)
 578        available = strlen;
 579
 580      value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
 581                     m_firstFree);
 582
 583      strlen -= available;
 584      copyfrom += available;
 585
 586      // If there's more left, allocate another chunk and continue
 587      if (strlen > 0)
 588      {
 589
 590        // Extend array?
 591        int i = m_array.length;
 592
 593        if (m_lastChunk + 1 == i)
 594        {
 595          char[][] newarray = new char[i + 16][];
 596
 597          System.arraycopy(m_array, 0, newarray, 0, i);
 598
 599          m_array = newarray;
 600        }
 601
 602        // Advance one chunk
 603        chunk = m_array[++m_lastChunk];
 604
 605        if (chunk == null)
 606        {
 607
 608          // Hierarchical encapsulation
 609          if (m_lastChunk == 1 << m_rebundleBits
 610                  && m_chunkBits < m_maxChunkBits)
 611          {
 612
 613            // Should do all the work of both encapsulating
 614            // existing data and establishing new sizes/offsets
 615            m_innerFSB = new FastStringBuffer(this);
 616          }
 617
 618          // Add a chunk.
 619          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
 620        }
 621
 622        available = m_chunkSize;
 623        m_firstFree = 0;
 624      }
 625    }
 626
 627    // Adjust the insert point in the last chunk, when we've reached it.
 628    m_firstFree += available;
 629  }
 630
 631  /**
 632   * Append part of the contents of a Character Array onto the
 633   * FastStringBuffer,  growing the storage if necessary.
 634   * <p>
 635   * NOTE THAT after calling append(), previously obtained
 636   * references to m_array[] may no longer be valid.
 637   *
 638   * @param chars character array from which data is to be copied
 639   * @param start offset in chars of first character to be copied,
 640   * zero-based.
 641   * @param length number of characters to be copied
 642   */
 643  public final void append(char[] chars, int start, int length)
 644  {
 645
 646    int strlen = length;
 647
 648    if (0 == strlen)
 649      return;
 650
 651    int copyfrom = start;
 652    char[] chunk = m_array[m_lastChunk];
 653    int available = m_chunkSize - m_firstFree;
 654
 655    // Repeat while data remains to be copied
 656    while (strlen > 0)
 657    {
 658
 659      // Copy what fits
 660      if (available > strlen)
 661        available = strlen;
 662
 663      System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
 664                       available);
 665
 666      strlen -= available;
 667      copyfrom += available;
 668
 669      // If there's more left, allocate another chunk and continue
 670      if (strlen > 0)
 671      {
 672
 673        // Extend array?
 674        int i = m_array.length;
 675
 676        if (m_lastChunk + 1 == i)
 677        {
 678          char[][] newarray = new char[i + 16][];
 679
 680          System.arraycopy(m_array, 0, newarray, 0, i);
 681
 682          m_array = newarray;
 683        }
 684
 685        // Advance one chunk
 686        chunk = m_array[++m_lastChunk];
 687
 688        if (chunk == null)
 689        {
 690
 691          // Hierarchical encapsulation
 692          if (m_lastChunk == 1 << m_rebundleBits
 693                  && m_chunkBits < m_maxChunkBits)
 694          {
 695
 696            // Should do all the work of both encapsulating
 697            // existing data and establishing new sizes/offsets
 698            m_innerFSB = new FastStringBuffer(this);
 699          }
 700
 701          // Add a chunk.
 702          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
 703        }
 704
 705        available = m_chunkSize;
 706        m_firstFree = 0;
 707      }
 708    }
 709
 710    // Adjust the insert point in the last chunk, when we've reached it.
 711    m_firstFree += available;
 712  }
 713
 714  /**
 715   * Append the contents of another FastStringBuffer onto
 716   * this FastStringBuffer, growing the storage if necessary.
 717   * <p>
 718   * NOTE THAT after calling append(), previously obtained
 719   * references to m_array[] may no longer be valid.
 720   *
 721   * @param value FastStringBuffer whose contents are
 722   * to be appended.
 723   */
 724  public final void append(FastStringBuffer value)
 725  {
 726
 727    // Complicating factor here is that the two buffers may use
 728    // different chunk sizes, and even if they're the same we're
 729    // probably on a different alignment due to previously appended
 730    // data. We have to work through the source in bite-sized chunks.
 731    if (value == null) 
 732      return;
 733    int strlen = value.length();
 734
 735    if (0 == strlen)
 736      return;
 737
 738    int copyfrom = 0;
 739    char[] chunk = m_array[m_lastChunk];
 740    int available = m_chunkSize - m_firstFree;
 741
 742    // Repeat while data remains to be copied
 743    while (strlen > 0)
 744    {
 745
 746      // Copy what fits
 747      if (available > strlen)
 748        available = strlen;
 749
 750      int sourcechunk = (copyfrom + value.m_chunkSize - 1)
 751                        >>> value.m_chunkBits;
 752      int sourcecolumn = copyfrom & value.m_chunkMask;
 753      int runlength = value.m_chunkSize - sourcecolumn;
 754
 755      if (runlength > available)
 756        runlength = available;
 757
 758      System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
 759                       m_array[m_lastChunk], m_firstFree, runlength);
 760
 761      if (runlength != available)
 762        System.arraycopy(value.m_array[sourcechunk + 1], 0,
 763                         m_array[m_lastChunk], m_firstFree + runlength,
 764                         available - runlength);
 765
 766      strlen -= available;
 767      copyfrom += available;
 768
 769      // If there's more left, allocate another chunk and continue
 770      if (strlen > 0)
 771      {
 772
 773        // Extend array?
 774        int i = m_array.length;
 775
 776        if (m_lastChunk + 1 == i)
 777        {
 778          char[][] newarray = new char[i + 16][];
 779
 780          System.arraycopy(m_array, 0, newarray, 0, i);
 781
 782          m_array = newarray;
 783        }
 784
 785        // Advance one chunk
 786        chunk = m_array[++m_lastChunk];
 787
 788        if (chunk == null)
 789        {
 790
 791          // Hierarchical encapsulation
 792          if (m_lastChunk == 1 << m_rebundleBits
 793                  && m_chunkBits < m_maxChunkBits)
 794          {
 795
 796            // Should do all the work of both encapsulating
 797            // existing data and establishing new sizes/offsets
 798            m_innerFSB = new FastStringBuffer(this);
 799          }
 800
 801          // Add a chunk. 
 802          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
 803        }
 804
 805        available = m_chunkSize;
 806        m_firstFree = 0;
 807      }
 808    }
 809
 810    // Adjust the insert point in the last chunk, when we've reached it.
 811    m_firstFree += available;
 812  }
 813
 814  /**
 815   * @return true if the specified range of characters are all whitespace,
 816   * as defined by XMLCharacterRecognizer.
 817   * <p>
 818   * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
 819   *
 820   * @param start Offset of first character in the range.
 821   * @param length Number of characters to send.
 822   */
 823  public boolean isWhitespace(int start, int length)
 824  {
 825
 826    int sourcechunk = start >>> m_chunkBits;
 827    int sourcecolumn = start & m_chunkMask;
 828    int available = m_chunkSize - sourcecolumn;
 829    boolean chunkOK;
 830
 831    while (length > 0)
 832    {
 833      int runlength = (length <= available) ? length : available;
 834
 835      if (sourcechunk == 0 && m_innerFSB != null)
 836        chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
 837      else
 838        chunkOK = com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace(
 839          m_array[sourcechunk], sourcecolumn, runlength);
 840
 841      if (!chunkOK)
 842        return false;
 843
 844      length -= runlength;
 845
 846      ++sourcechunk;
 847
 848      sourcecolumn = 0;
 849      available = m_chunkSize;
 850    }
 851
 852    return true;
 853  }
 854
 855  /**
 856   * @param start Offset of first character in the range.
 857   * @param length Number of characters to send.
 858   * @return a new String object initialized from the specified range of
 859   * characters.
 860   */
 861  public String getString(int start, int length)
 862  {
 863    int startColumn = start & m_chunkMask;
 864    int startChunk = start >>> m_chunkBits;
 865    if (startColumn + length < m_chunkMask && m_innerFSB == null) {
 866      return getOneChunkString(startChunk, startColumn, length);
 867    }
 868    return getString(new StringBuffer(length), startChunk, startColumn,
 869                     length).toString();
 870  }
 871
 872  protected String getOneChunkString(int startChunk, int startColumn,
 873                                     int length) {
 874    return new String(m_array[startChunk], startColumn, length);
 875  }
 876
 877  /**
 878   * @param sb StringBuffer to be appended to
 879   * @param start Offset of first character in the range.
 880   * @param length Number of characters to send.
 881   * @return sb with the requested text appended to it
 882   */
 883  StringBuffer getString(StringBuffer sb, int start, int length)
 884  {
 885    return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
 886  }
 887
 888  /**
 889   * Internal support for toString() and getString().
 890   * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
 891   * and returns a StringBuffer supplied by the caller. This simplifies
 892   * m_innerFSB support.
 893   * <p>
 894   * Note that this operation has been somewhat deoptimized by the shift to a
 895   * chunked array, as there is no factory method to produce a String object
 896   * directly from an array of arrays and hence a double copy is needed.
 897   * By presetting length we hope to minimize the heap overhead of building
 898   * the intermediate StringBuffer.
 899   * <p>
 900   * (It really is a pity that Java didn't design String as a final subclass
 901   * of MutableString, rather than having StringBuffer be a separate hierarchy.
 902   * We'd avoid a <strong>lot</strong> of double-buffering.)
 903   *
 904   *
 905   * @param sb
 906   * @param startChunk
 907   * @param startColumn
 908   * @param length
 909   * 
 910   * @return the contents of the FastStringBuffer as a standard Java string.
 911   */
 912  StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
 913                         int length)
 914  {
 915
 916    int stop = (startChunk << m_chunkBits) + startColumn + length;
 917    int stopChunk = stop >>> m_chunkBits;
 918    int stopColumn = stop & m_chunkMask;
 919
 920    // Factored out
 921    //StringBuffer sb=new StringBuffer(length);
 922    for (int i = startChunk; i < stopChunk; ++i)
 923    {
 924      if (i == 0 && m_innerFSB != null)
 925        m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
 926      else
 927        sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
 928
 929      startColumn = 0;  // after first chunk
 930    }
 931
 932    if (stopChunk == 0 && m_innerFSB != null)
 933      m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
 934    else if (stopColumn > startColumn)
 935      sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
 936
 937    return sb;
 938  }
 939
 940  /**
 941   * Get a single character from the string buffer.
 942   *
 943   *
 944   * @param pos character position requested.
 945   * @return A character from the requested position.
 946   */
 947  public char charAt(int pos)
 948  {
 949    int startChunk = pos >>> m_chunkBits;
 950
 951    if (startChunk == 0 && m_innerFSB != null)
 952      return m_innerFSB.charAt(pos & m_chunkMask);
 953    else
 954      return m_array[startChunk][pos & m_chunkMask];
 955  }
 956
 957  /**
 958   * Sends the specified range of characters as one or more SAX characters()
 959   * events.
 960   * Note that the buffer reference passed to the ContentHandler may be
 961   * invalidated if the FastStringBuffer is edited; it's the user's
 962   * responsibility to manage access to the FastStringBuffer to prevent this
 963   * problem from arising.
 964   * <p>
 965   * Note too that there is no promise that the output will be sent as a
 966   * single call. As is always true in SAX, one logical string may be split
 967   * across multiple blocks of memory and hence delivered as several
 968   * successive events.
 969   *
 970   * @param ch SAX ContentHandler object to receive the event.
 971   * @param start Offset of first character in the range.
 972   * @param length Number of characters to send.
 973   * @exception org.xml.sax.SAXException may be thrown by handler's
 974   * characters() method.
 975   */
 976  public void sendSAXcharacters(
 977          org.xml.sax.ContentHandler ch, int start, int length)
 978            throws org.xml.sax.SAXException
 979  {
 980
 981    int startChunk = start >>> m_chunkBits;
 982    int startColumn = start & m_chunkMask;
 983    if (startColumn + length < m_chunkMask && m_innerFSB == null) {
 984        ch.characters(m_array[startChunk], startColumn, length);
 985        return;
 986    }
 987    
 988    int stop = start + length;
 989    int stopChunk = stop >>> m_chunkBits;
 990    int stopColumn = stop & m_chunkMask;
 991
 992    for (int i = startChunk; i < stopChunk; ++i)
 993    {
 994      if (i == 0 && m_innerFSB != null)
 995        m_innerFSB.sendSAXcharacters(ch, startColumn,
 996                                     m_chunkSize - startColumn);
 997      else
 998        ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
 999
1000      startColumn = 0;  // after first chunk
1001    }
1002
1003    // Last, or only, chunk
1004    if (stopChunk == 0 && m_innerFSB != null)
1005      m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1006    else if (stopColumn > startColumn)
1007    {
1008      ch.characters(m_array[stopChunk], startColumn,
1009                    stopColumn - startColumn);
1010    }
1011  }
1012  
1013  /**
1014   * Sends the specified range of characters as one or more SAX characters()
1015   * events, normalizing the characters according to XSLT rules.
1016   *
1017   * @param ch SAX ContentHandler object to receive the event.
1018   * @param start Offset of first character in the range.
1019   * @param length Number of characters to send.
1020   * @return normalization status to apply to next chunk (because we may
1021   * have been called recursively to process an inner FSB):
1022   * <dl>
1023   * <dt>0</dt>
1024   * <dd>if this output did not end in retained whitespace, and thus whitespace
1025   * at the start of the following chunk (if any) should be converted to a
1026   * single space.
1027   * <dt>SUPPRESS_LEADING_WS</dt>
1028   * <dd>if this output ended in retained whitespace, and thus whitespace
1029   * at the start of the following chunk (if any) should be completely
1030   * suppressed.</dd>
1031   * </dd>
1032   * </dl>
1033   * @exception org.xml.sax.SAXException may be thrown by handler's
1034   * characters() method.
1035   */
1036  public int sendNormalizedSAXcharacters(
1037          org.xml.sax.ContentHandler ch, int start, int length)
1038            throws org.xml.sax.SAXException
1039  {
1040	// This call always starts at the beginning of the 
1041    // string being written out, either because it was called directly or
1042    // because it was an m_innerFSB recursion. This is important since
1043	// it gives us a well-known initial state for this flag:
1044	int stateForNextChunk=SUPPRESS_LEADING_WS;
1045
1046    int stop = start + length;
1047    int startChunk = start >>> m_chunkBits;
1048    int startColumn = start & m_chunkMask;
1049    int stopChunk = stop >>> m_chunkBits;
1050    int stopColumn = stop & m_chunkMask;
1051
1052    for (int i = startChunk; i < stopChunk; ++i)
1053    {
1054      if (i == 0 && m_innerFSB != null)
1055				stateForNextChunk=
1056        m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1057                                     m_chunkSize - startColumn);
1058      else
1059				stateForNextChunk=
1060        sendNormalizedSAXcharacters(m_array[i], startColumn, 
1061                                    m_chunkSize - startColumn, 
1062																		ch,stateForNextChunk);
1063
1064      startColumn = 0;  // after first chunk
1065    }
1066
1067    // Last, or only, chunk
1068    if (stopChunk == 0 && m_innerFSB != null)
1069			stateForNextChunk= // %REVIEW% Is this update really needed?
1070      m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1071    else if (stopColumn > startColumn)
1072    {
1073			stateForNextChunk= // %REVIEW% Is this update really needed?
1074      sendNormalizedSAXcharacters(m_array[stopChunk], 
1075																	startColumn, stopColumn - startColumn,
1076																	ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1077    }
1078		return stateForNextChunk;
1079  }
1080  
1081  static final char[] SINGLE_SPACE = {' '};
1082	  
1083  /**
1084   * Internal method to directly normalize and dispatch the character array.
1085   * This version is aware of the fact that it may be called several times
1086   * in succession if the data is made up of multiple "chunks", and thus
1087   * must actively manage the handling of leading and trailing whitespace.
1088   * 
1089   * Note: The recursion is due to the possible recursion of inner FSBs.
1090   *
1091   * @param ch The characters from the XML document.
1092   * @param start The start position in the array.
1093   * @param length The number of characters to read from the array.
1094   * @param handler SAX ContentHandler object to receive the event.
1095   * @param edgeTreatmentFlags How leading/trailing spaces should be handled. 
1096   * This is a bitfield contining two flags, bitwise-ORed together:
1097   * <dl>
1098   * <dt>SUPPRESS_LEADING_WS</dt>
1099   * <dd>When false, causes leading whitespace to be converted to a single
1100   * space; when true, causes it to be discarded entirely.
1101   * Should be set TRUE for the first chunk, and (in multi-chunk output)
1102   * whenever the previous chunk ended in retained whitespace.</dd>
1103   * <dt>SUPPRESS_TRAILING_WS</dt>
1104   * <dd>When false, causes trailing whitespace to be converted to a single
1105   * space; when true, causes it to be discarded entirely.
1106   * Should be set TRUE for the last or only chunk.
1107   * </dd>
1108   * </dl>
1109   * @return normalization status, as in the edgeTreatmentFlags parameter:
1110   * <dl>
1111   * <dt>0</dt>
1112   * <dd>if this output did not end in retained whitespace, and thus whitespace
1113   * at the start of the following chunk (if any) should be converted to a
1114   * single space.
1115   * <dt>SUPPRESS_LEADING_WS</dt>
1116   * <dd>if this output ended in retained whitespace, and thus whitespace
1117   * at the start of the following chunk (if any) should be completely
1118   * suppressed.</dd>
1119   * </dd>
1120   * </dl>
1121   *
1122   * 
1123   * @exception org.xml.sax.SAXException Any SAX exception, possibly
1124   *            wrapping another exception.
1125   */
1126  static int sendNormalizedSAXcharacters(char ch[], 
1127             int start, int length, 
1128             org.xml.sax.ContentHandler handler,
1129						 int edgeTreatmentFlags)
1130          throws org.xml.sax.SAXException
1131  {
1132     boolean processingLeadingWhitespace =
1133                       ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1134     boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1135     boolean suppressTrailingWhitespace =
1136                       ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0);
1137     int currPos = start;
1138     int limit = start+length;
1139
1140     // Strip any leading spaces first, if required
1141     if (processingLeadingWhitespace) {
1142         for (; currPos < limit
1143                && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1144              currPos++) { }
1145
1146         // If we've only encountered leading spaces, the
1147         // current state remains unchanged
1148         if (currPos == limit) {
1149             return edgeTreatmentFlags;
1150         }
1151     }
1152
1153     // If we get here, there are no more leading spaces to strip
1154     while (currPos < limit) {
1155         int startNonWhitespace = currPos;
1156
1157         // Grab a chunk of non-whitespace characters
1158         for (; currPos < limit
1159                && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1160              currPos++) { }
1161
1162         // Non-whitespace seen - emit them, along with a single
1163         // space for any preceding whitespace characters
1164         if (startNonWhitespace != currPos) {
1165             if (seenWhitespace) {
1166                 handler.characters(SINGLE_SPACE, 0, 1);
1167                 seenWhitespace = false;
1168             }
1169             handler.characters(ch, startNonWhitespace,
1170                                currPos - startNonWhitespace);
1171         }
1172
1173         int startWhitespace = currPos;
1174
1175         // Consume any whitespace characters
1176         for (; currPos < limit
1177                && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1178              currPos++) { }
1179
1180         if (startWhitespace != currPos) {
1181             seenWhitespace = true;
1182         }
1183     }
1184
1185     return (seenWhitespace ? CARRY_WS : 0)
1186            | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1187  }
1188
1189  /**
1190   * Directly normalize and dispatch the character array.
1191   *
1192   * @param ch The characters from the XML document.
1193   * @param start The start position in the array.
1194   * @param length The number of characters to read from the array.
1195   * @param handler SAX ContentHandler object to receive the event.
1196   * @exception org.xml.sax.SAXException Any SAX exception, possibly
1197   *            wrapping another exception.
1198   */
1199  public static void sendNormalizedSAXcharacters(char ch[], 
1200             int start, int length, 
1201             org.xml.sax.ContentHandler handler)
1202          throws org.xml.sax.SAXException
1203  {
1204		sendNormalizedSAXcharacters(ch, start, length, 
1205             handler, SUPPRESS_BOTH);
1206	}
1207		
1208	/**
1209   * Sends the specified range of characters as sax Comment.
1210   * <p>
1211   * Note that, unlike sendSAXcharacters, this has to be done as a single 
1212   * call to LexicalHandler#comment.
1213   *
1214   * @param ch SAX LexicalHandler object to receive the event.
1215   * @param start Offset of first character in the range.
1216   * @param length Number of characters to send.
1217   * @exception org.xml.sax.SAXException may be thrown by handler's
1218   * characters() method.
1219   */
1220  public void sendSAXComment(
1221          org.xml.sax.ext.LexicalHandler ch, int start, int length)
1222            throws org.xml.sax.SAXException
1223  {
1224
1225    // %OPT% Do it this way for now...
1226    String comment = getString(start, length);
1227    ch.comment(comment.toCharArray(), 0, length);
1228  }
1229
1230  /**
1231   * Copies characters from this string into the destination character
1232   * array.
1233   *
1234   * @param      srcBegin   index of the first character in the string
1235   *                        to copy.
1236   * @param      srcEnd     index after the last character in the string
1237   *                        to copy.
1238   * @param      dst        the destination array.
1239   * @param      dstBegin   the start offset in the destination array.
1240   * @exception IndexOutOfBoundsException If any of the following
1241   *            is true:
1242   *            <ul><li><code>srcBegin</code> is negative.
1243   *            <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1244   *            <li><code>srcEnd</code> is greater than the length of this
1245   *                string
1246   *            <li><code>dstBegin</code> is negative
1247   *            <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1248   *                <code>dst.length</code></ul>
1249   * @exception NullPointerException if <code>dst</code> is <code>null</code>
1250   */
1251  private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1252  {
1253    // %TBD% Joe needs to write this function.  Make public when implemented.
1254  }
1255
1256  /**
1257   * Encapsulation c'tor. After this is called, the source FastStringBuffer
1258   * will be reset to use the new object as its m_innerFSB, and will have
1259   * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1260   * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1261   *
1262   * NEEDSDOC @param source
1263   */
1264  private FastStringBuffer(FastStringBuffer source)
1265  {
1266
1267    // Copy existing information into new encapsulation
1268    m_chunkBits = source.m_chunkBits;
1269    m_maxChunkBits = source.m_maxChunkBits;
1270    m_rebundleBits = source.m_rebundleBits;
1271    m_chunkSize = source.m_chunkSize;
1272    m_chunkMask = source.m_chunkMask;
1273    m_array = source.m_array;
1274    m_innerFSB = source.m_innerFSB;
1275
1276    // These have to be adjusted because we're calling just at the time
1277    // when we would be about to allocate another chunk
1278    m_lastChunk = source.m_lastChunk - 1;
1279    m_firstFree = source.m_chunkSize;
1280
1281    // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1282    source.m_array = new char[16][];
1283    source.m_innerFSB = this;
1284
1285    // Since we encapsulated just as we were about to append another
1286    // chunk, return ready to create the chunk after the innerFSB
1287    // -- 1, not 0.
1288    source.m_lastChunk = 1;
1289    source.m_firstFree = 0;
1290    source.m_chunkBits += m_rebundleBits;
1291    source.m_chunkSize = 1 << (source.m_chunkBits);
1292    source.m_chunkMask = source.m_chunkSize - 1;
1293  }
1294}