/libs/headers/gc/cord.h
C++ Header | 327 lines | 56 code | 50 blank | 221 comment | 2 complexity | 49d0db4f59bb4874db24c5942ed8c2ee MD5 | raw file
1/* 2 * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved. 3 * 4 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED 5 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. 6 * 7 * Permission is hereby granted to use or copy this program 8 * for any purpose, provided the above notices are retained on all copies. 9 * Permission to modify the code and to distribute modified code is granted, 10 * provided the above notices are retained, and a notice that the code was 11 * modified is included with the above copyright notice. 12 * 13 * Author: Hans-J. Boehm (boehm@parc.xerox.com) 14 */ 15/* Boehm, October 5, 1995 4:20 pm PDT */ 16 17/* 18 * Cords are immutable character strings. A number of operations 19 * on long cords are much more efficient than their strings.h counterpart. 20 * In particular, concatenation takes constant time independent of the length 21 * of the arguments. (Cords are represented as trees, with internal 22 * nodes representing concatenation and leaves consisting of either C 23 * strings or a functional description of the string.) 24 * 25 * The following are reasonable applications of cords. They would perform 26 * unacceptably if C strings were used: 27 * - A compiler that produces assembly language output by repeatedly 28 * concatenating instructions onto a cord representing the output file. 29 * - A text editor that converts the input file to a cord, and then 30 * performs editing operations by producing a new cord representing 31 * the file after echa character change (and keeping the old ones in an 32 * edit history) 33 * 34 * For optimal performance, cords should be built by 35 * concatenating short sections. 36 * This interface is designed for maximum compatibility with C strings. 37 * ASCII NUL characters may be embedded in cords using CORD_from_fn. 38 * This is handled correctly, but CORD_to_char_star will produce a string 39 * with embedded NULs when given such a cord. 40 * 41 * This interface is fairly big, largely for performance reasons. 42 * The most basic constants and functions: 43 * 44 * CORD - the type of a cord; 45 * CORD_EMPTY - empty cord; 46 * CORD_len(cord) - length of a cord; 47 * CORD_cat(cord1,cord2) - concatenation of two cords; 48 * CORD_substr(cord, start, len) - substring (or subcord); 49 * CORD_pos i; CORD_FOR(i, cord) { ... CORD_pos_fetch(i) ... } - 50 * examine each character in a cord. CORD_pos_fetch(i) is the char. 51 * CORD_fetch(int i) - Retrieve i'th character (slowly). 52 * CORD_cmp(cord1, cord2) - compare two cords. 53 * CORD_from_file(FILE * f) - turn a read-only file into a cord. 54 * CORD_to_char_star(cord) - convert to C string. 55 * (Non-NULL C constant strings are cords.) 56 * CORD_printf (etc.) - cord version of printf. Use %r for cords. 57 */ 58# ifndef CORD_H 59 60# define CORD_H 61# include <stddef.h> 62# include <stdio.h> 63/* Cords have type const char *. This is cheating quite a bit, and not */ 64/* 100% portable. But it means that nonempty character string */ 65/* constants may be used as cords directly, provided the string is */ 66/* never modified in place. The empty cord is represented by, and */ 67/* can be written as, 0. */ 68 69typedef const char * CORD; 70 71/* An empty cord is always represented as nil */ 72# define CORD_EMPTY 0 73 74/* Is a nonempty cord represented as a C string? */ 75#define CORD_IS_STRING(s) (*(s) != '\0') 76 77/* Concatenate two cords. If the arguments are C strings, they may */ 78/* not be subsequently altered. */ 79CORD CORD_cat(CORD x, CORD y); 80 81/* Concatenate a cord and a C string with known length. Except for the */ 82/* empty string case, this is a special case of CORD_cat. Since the */ 83/* length is known, it can be faster. */ 84/* The string y is shared with the resulting CORD. Hence it should */ 85/* not be altered by the caller. */ 86CORD CORD_cat_char_star(CORD x, const char * y, size_t leny); 87 88/* Compute the length of a cord */ 89size_t CORD_len(CORD x); 90 91/* Cords may be represented by functions defining the ith character */ 92typedef char (* CORD_fn)(size_t i, void * client_data); 93 94/* Turn a functional description into a cord. */ 95CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len); 96 97/* Return the substring (subcord really) of x with length at most n, */ 98/* starting at position i. (The initial character has position 0.) */ 99CORD CORD_substr(CORD x, size_t i, size_t n); 100 101/* Return the argument, but rebalanced to allow more efficient */ 102/* character retrieval, substring operations, and comparisons. */ 103/* This is useful only for cords that were built using repeated */ 104/* concatenation. Guarantees log time access to the result, unless */ 105/* x was obtained through a large number of repeated substring ops */ 106/* or the embedded functional descriptions take longer to evaluate. */ 107/* May reallocate significant parts of the cord. The argument is not */ 108/* modified; only the result is balanced. */ 109CORD CORD_balance(CORD x); 110 111/* The following traverse a cord by applying a function to each */ 112/* character. This is occasionally appropriate, especially where */ 113/* speed is crucial. But, since C doesn't have nested functions, */ 114/* clients of this sort of traversal are clumsy to write. Consider */ 115/* the functions that operate on cord positions instead. */ 116 117/* Function to iteratively apply to individual characters in cord. */ 118typedef int (* CORD_iter_fn)(char c, void * client_data); 119 120/* Function to apply to substrings of a cord. Each substring is a */ 121/* a C character string, not a general cord. */ 122typedef int (* CORD_batched_iter_fn)(const char * s, void * client_data); 123# define CORD_NO_FN ((CORD_batched_iter_fn)0) 124 125/* Apply f1 to each character in the cord, in ascending order, */ 126/* starting at position i. If */ 127/* f2 is not CORD_NO_FN, then multiple calls to f1 may be replaced by */ 128/* a single call to f2. The parameter f2 is provided only to allow */ 129/* some optimization by the client. This terminates when the right */ 130/* end of this string is reached, or when f1 or f2 return != 0. In the */ 131/* latter case CORD_iter returns != 0. Otherwise it returns 0. */ 132/* The specified value of i must be < CORD_len(x). */ 133int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1, 134 CORD_batched_iter_fn f2, void * client_data); 135 136/* A simpler version that starts at 0, and without f2: */ 137int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data); 138# define CORD_iter(x, f1, cd) CORD_iter5(x, 0, f1, CORD_NO_FN, cd) 139 140/* Similar to CORD_iter5, but end-to-beginning. No provisions for */ 141/* CORD_batched_iter_fn. */ 142int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data); 143 144/* A simpler version that starts at the end: */ 145int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data); 146 147/* Functions that operate on cord positions. The easy way to traverse */ 148/* cords. A cord position is logically a pair consisting of a cord */ 149/* and an index into that cord. But it is much faster to retrieve a */ 150/* charcter based on a position than on an index. Unfortunately, */ 151/* positions are big (order of a few 100 bytes), so allocate them with */ 152/* caution. */ 153/* Things in cord_pos.h should be treated as opaque, except as */ 154/* described below. Also note that */ 155/* CORD_pos_fetch, CORD_next and CORD_prev have both macro and function */ 156/* definitions. The former may evaluate their argument more than once. */ 157# include "private/cord_pos.h" 158 159/* 160 Visible definitions from above: 161 162 typedef <OPAQUE but fairly big> CORD_pos[1]; 163 164 * Extract the cord from a position: 165 CORD CORD_pos_to_cord(CORD_pos p); 166 167 * Extract the current index from a position: 168 size_t CORD_pos_to_index(CORD_pos p); 169 170 * Fetch the character located at the given position: 171 char CORD_pos_fetch(CORD_pos p); 172 173 * Initialize the position to refer to the given cord and index. 174 * Note that this is the most expensive function on positions: 175 void CORD_set_pos(CORD_pos p, CORD x, size_t i); 176 177 * Advance the position to the next character. 178 * P must be initialized and valid. 179 * Invalidates p if past end: 180 void CORD_next(CORD_pos p); 181 182 * Move the position to the preceding character. 183 * P must be initialized and valid. 184 * Invalidates p if past beginning: 185 void CORD_prev(CORD_pos p); 186 187 * Is the position valid, i.e. inside the cord? 188 int CORD_pos_valid(CORD_pos p); 189*/ 190# define CORD_FOR(pos, cord) \ 191 for (CORD_set_pos(pos, cord, 0); CORD_pos_valid(pos); CORD_next(pos)) 192 193 194/* An out of memory handler to call. May be supplied by client. */ 195/* Must not return. */ 196extern void (* CORD_oom_fn)(void); 197 198/* Dump the representation of x to stdout in an implementation defined */ 199/* manner. Intended for debugging only. */ 200void CORD_dump(CORD x); 201 202/* The following could easily be implemented by the client. They are */ 203/* provided in cordxtra.c for convenience. */ 204 205/* Concatenate a character to the end of a cord. */ 206CORD CORD_cat_char(CORD x, char c); 207 208/* Concatenate n cords. */ 209CORD CORD_catn(int n, /* CORD */ ...); 210 211/* Return the character in CORD_substr(x, i, 1) */ 212char CORD_fetch(CORD x, size_t i); 213 214/* Return < 0, 0, or > 0, depending on whether x < y, x = y, x > y */ 215int CORD_cmp(CORD x, CORD y); 216 217/* A generalization that takes both starting positions for the */ 218/* comparison, and a limit on the number of characters to be compared. */ 219int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len); 220 221/* Find the first occurrence of s in x at position start or later. */ 222/* Return the position of the first character of s in x, or */ 223/* CORD_NOT_FOUND if there is none. */ 224size_t CORD_str(CORD x, size_t start, CORD s); 225 226/* Return a cord consisting of i copies of (possibly NUL) c. Dangerous */ 227/* in conjunction with CORD_to_char_star. */ 228/* The resulting representation takes constant space, independent of i. */ 229CORD CORD_chars(char c, size_t i); 230# define CORD_nul(i) CORD_chars('\0', (i)) 231 232/* Turn a file into cord. The file must be seekable. Its contents */ 233/* must remain constant. The file may be accessed as an immediate */ 234/* result of this call and/or as a result of subsequent accesses to */ 235/* the cord. Short files are likely to be immediately read, but */ 236/* long files are likely to be read on demand, possibly relying on */ 237/* stdio for buffering. */ 238/* We must have exclusive access to the descriptor f, i.e. we may */ 239/* read it at any time, and expect the file pointer to be */ 240/* where we left it. Normally this should be invoked as */ 241/* CORD_from_file(fopen(...)) */ 242/* CORD_from_file arranges to close the file descriptor when it is no */ 243/* longer needed (e.g. when the result becomes inaccessible). */ 244/* The file f must be such that ftell reflects the actual character */ 245/* position in the file, i.e. the number of characters that can be */ 246/* or were read with fread. On UNIX systems this is always true. On */ 247/* MS Windows systems, f must be opened in binary mode. */ 248CORD CORD_from_file(FILE * f); 249 250/* Equivalent to the above, except that the entire file will be read */ 251/* and the file pointer will be closed immediately. */ 252/* The binary mode restriction from above does not apply. */ 253CORD CORD_from_file_eager(FILE * f); 254 255/* Equivalent to the above, except that the file will be read on demand.*/ 256/* The binary mode restriction applies. */ 257CORD CORD_from_file_lazy(FILE * f); 258 259/* Turn a cord into a C string. The result shares no structure with */ 260/* x, and is thus modifiable. */ 261char * CORD_to_char_star(CORD x); 262 263/* Turn a C string into a CORD. The C string is copied, and so may */ 264/* subsequently be modified. */ 265CORD CORD_from_char_star(const char *s); 266 267/* Identical to the above, but the result may share structure with */ 268/* the argument and is thus not modifiable. */ 269const char * CORD_to_const_char_star(CORD x); 270 271/* Write a cord to a file, starting at the current position. No */ 272/* trailing NULs are newlines are added. */ 273/* Returns EOF if a write error occurs, 1 otherwise. */ 274int CORD_put(CORD x, FILE * f); 275 276/* "Not found" result for the following two functions. */ 277# define CORD_NOT_FOUND ((size_t)(-1)) 278 279/* A vague analog of strchr. Returns the position (an integer, not */ 280/* a pointer) of the first occurrence of (char) c inside x at position */ 281/* i or later. The value i must be < CORD_len(x). */ 282size_t CORD_chr(CORD x, size_t i, int c); 283 284/* A vague analog of strrchr. Returns index of the last occurrence */ 285/* of (char) c inside x at position i or earlier. The value i */ 286/* must be < CORD_len(x). */ 287size_t CORD_rchr(CORD x, size_t i, int c); 288 289 290/* The following are also not primitive, but are implemented in */ 291/* cordprnt.c. They provide functionality similar to the ANSI C */ 292/* functions with corresponding names, but with the following */ 293/* additions and changes: */ 294/* 1. A %r conversion specification specifies a CORD argument. Field */ 295/* width, precision, etc. have the same semantics as for %s. */ 296/* (Note that %c,%C, and %S were already taken.) */ 297/* 2. The format string is represented as a CORD. */ 298/* 3. CORD_sprintf and CORD_vsprintf assign the result through the 1st */ /* argument. Unlike their ANSI C versions, there is no need to guess */ 299/* the correct buffer size. */ 300/* 4. Most of the conversions are implement through the native */ 301/* vsprintf. Hence they are usually no faster, and */ 302/* idiosyncracies of the native printf are preserved. However, */ 303/* CORD arguments to CORD_sprintf and CORD_vsprintf are NOT copied; */ 304/* the result shares the original structure. This may make them */ 305/* very efficient in some unusual applications. */ 306/* The format string is copied. */ 307/* All functions return the number of characters generated or -1 on */ 308/* error. This complies with the ANSI standard, but is inconsistent */ 309/* with some older implementations of sprintf. */ 310 311/* The implementation of these is probably less portable than the rest */ 312/* of this package. */ 313 314#ifndef CORD_NO_IO 315 316#include <stdarg.h> 317 318int CORD_sprintf(CORD * out, CORD format, ...); 319int CORD_vsprintf(CORD * out, CORD format, va_list args); 320int CORD_fprintf(FILE * f, CORD format, ...); 321int CORD_vfprintf(FILE * f, CORD format, va_list args); 322int CORD_printf(CORD format, ...); 323int CORD_vprintf(CORD format, va_list args); 324 325#endif /* CORD_NO_IO */ 326 327# endif /* CORD_H */