PageRenderTime 23ms CodeModel.GetById 9ms app.highlight 10ms RepoModel.GetById 1ms app.codeStats 1ms

/libs/headers/gc/cord.h

http://github.com/nddrylliog/ooc
C++ Header | 327 lines | 56 code | 50 blank | 221 comment | 2 complexity | 49d0db4f59bb4874db24c5942ed8c2ee MD5 | raw file
  1/* 
  2 * Copyright (c) 1993-1994 by Xerox Corporation.  All rights reserved.
  3 *
  4 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
  5 * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
  6 *
  7 * Permission is hereby granted to use or copy this program
  8 * for any purpose,  provided the above notices are retained on all copies.
  9 * Permission to modify the code and to distribute modified code is granted,
 10 * provided the above notices are retained, and a notice that the code was
 11 * modified is included with the above copyright notice.
 12 *
 13 * Author: Hans-J. Boehm (boehm@parc.xerox.com)
 14 */
 15/* Boehm, October 5, 1995 4:20 pm PDT */
 16 
 17/*
 18 * Cords are immutable character strings.  A number of operations
 19 * on long cords are much more efficient than their strings.h counterpart.
 20 * In particular, concatenation takes constant time independent of the length
 21 * of the arguments.  (Cords are represented as trees, with internal
 22 * nodes representing concatenation and leaves consisting of either C
 23 * strings or a functional description of the string.)
 24 *
 25 * The following are reasonable applications of cords.  They would perform
 26 * unacceptably if C strings were used:
 27 * - A compiler that produces assembly language output by repeatedly
 28 *   concatenating instructions onto a cord representing the output file.
 29 * - A text editor that converts the input file to a cord, and then
 30 *   performs editing operations by producing a new cord representing
 31 *   the file after echa character change (and keeping the old ones in an
 32 *   edit history)
 33 *
 34 * For optimal performance, cords should be built by
 35 * concatenating short sections.
 36 * This interface is designed for maximum compatibility with C strings.
 37 * ASCII NUL characters may be embedded in cords using CORD_from_fn.
 38 * This is handled correctly, but CORD_to_char_star will produce a string
 39 * with embedded NULs when given such a cord. 
 40 *
 41 * This interface is fairly big, largely for performance reasons.
 42 * The most basic constants and functions:
 43 *
 44 * CORD - the type of a cord;
 45 * CORD_EMPTY - empty cord;
 46 * CORD_len(cord) - length of a cord;
 47 * CORD_cat(cord1,cord2) - concatenation of two cords;
 48 * CORD_substr(cord, start, len) - substring (or subcord);
 49 * CORD_pos i;  CORD_FOR(i, cord) {  ... CORD_pos_fetch(i) ... } -
 50 *    examine each character in a cord.  CORD_pos_fetch(i) is the char.
 51 * CORD_fetch(int i) - Retrieve i'th character (slowly).
 52 * CORD_cmp(cord1, cord2) - compare two cords.
 53 * CORD_from_file(FILE * f) - turn a read-only file into a cord.
 54 * CORD_to_char_star(cord) - convert to C string.
 55 *   (Non-NULL C constant strings are cords.)
 56 * CORD_printf (etc.) - cord version of printf. Use %r for cords.
 57 */
 58# ifndef CORD_H
 59
 60# define CORD_H
 61# include <stddef.h>
 62# include <stdio.h>
 63/* Cords have type const char *.  This is cheating quite a bit, and not	*/
 64/* 100% portable.  But it means that nonempty character string		*/
 65/* constants may be used as cords directly, provided the string is	*/
 66/* never modified in place.  The empty cord is represented by, and	*/
 67/* can be written as, 0.						*/
 68
 69typedef const char * CORD;
 70
 71/* An empty cord is always represented as nil 	*/
 72# define CORD_EMPTY 0
 73
 74/* Is a nonempty cord represented as a C string? */
 75#define CORD_IS_STRING(s) (*(s) != '\0')
 76
 77/* Concatenate two cords.  If the arguments are C strings, they may 	*/
 78/* not be subsequently altered.						*/
 79CORD CORD_cat(CORD x, CORD y);
 80
 81/* Concatenate a cord and a C string with known length.  Except for the	*/
 82/* empty string case, this is a special case of CORD_cat.  Since the	*/
 83/* length is known, it can be faster.					*/
 84/* The string y is shared with the resulting CORD.  Hence it should	*/
 85/* not be altered by the caller.					*/
 86CORD CORD_cat_char_star(CORD x, const char * y, size_t leny);
 87
 88/* Compute the length of a cord */
 89size_t CORD_len(CORD x);
 90
 91/* Cords may be represented by functions defining the ith character */
 92typedef char (* CORD_fn)(size_t i, void * client_data);
 93
 94/* Turn a functional description into a cord. 	*/
 95CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len);
 96
 97/* Return the substring (subcord really) of x with length at most n,	*/
 98/* starting at position i.  (The initial character has position 0.)	*/
 99CORD CORD_substr(CORD x, size_t i, size_t n);
100
101/* Return the argument, but rebalanced to allow more efficient   	*/
102/* character retrieval, substring operations, and comparisons.		*/
103/* This is useful only for cords that were built using repeated 	*/
104/* concatenation.  Guarantees log time access to the result, unless	*/
105/* x was obtained through a large number of repeated substring ops	*/
106/* or the embedded functional descriptions take longer to evaluate.	*/
107/* May reallocate significant parts of the cord.  The argument is not	*/
108/* modified; only the result is balanced.				*/
109CORD CORD_balance(CORD x);
110
111/* The following traverse a cord by applying a function to each 	*/
112/* character.  This is occasionally appropriate, especially where	*/
113/* speed is crucial.  But, since C doesn't have nested functions,	*/
114/* clients of this sort of traversal are clumsy to write.  Consider	*/
115/* the functions that operate on cord positions instead.		*/
116
117/* Function to iteratively apply to individual characters in cord.	*/
118typedef int (* CORD_iter_fn)(char c, void * client_data);
119
120/* Function to apply to substrings of a cord.  Each substring is a 	*/
121/* a C character string, not a general cord.				*/
122typedef int (* CORD_batched_iter_fn)(const char * s, void * client_data);
123# define CORD_NO_FN ((CORD_batched_iter_fn)0)
124
125/* Apply f1 to each character in the cord, in ascending order,		*/
126/* starting at position i. If						*/
127/* f2 is not CORD_NO_FN, then multiple calls to f1 may be replaced by	*/
128/* a single call to f2.  The parameter f2 is provided only to allow	*/
129/* some optimization by the client.  This terminates when the right	*/
130/* end of this string is reached, or when f1 or f2 return != 0.  In the	*/
131/* latter case CORD_iter returns != 0.  Otherwise it returns 0.		*/
132/* The specified value of i must be < CORD_len(x).			*/
133int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
134	       CORD_batched_iter_fn f2, void * client_data);
135
136/* A simpler version that starts at 0, and without f2:	*/
137int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data);
138# define CORD_iter(x, f1, cd) CORD_iter5(x, 0, f1, CORD_NO_FN, cd)
139
140/* Similar to CORD_iter5, but end-to-beginning.	No provisions for	*/
141/* CORD_batched_iter_fn.						*/
142int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data);
143
144/* A simpler version that starts at the end:	*/
145int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data);
146
147/* Functions that operate on cord positions.  The easy way to traverse	*/
148/* cords.  A cord position is logically a pair consisting of a cord	*/
149/* and an index into that cord.  But it is much faster to retrieve a	*/
150/* charcter based on a position than on an index.  Unfortunately,	*/
151/* positions are big (order of a few 100 bytes), so allocate them with	*/
152/* caution.								*/
153/* Things in cord_pos.h should be treated as opaque, except as		*/
154/* described below.  Also note that					*/
155/* CORD_pos_fetch, CORD_next and CORD_prev have both macro and function	*/
156/* definitions.  The former may evaluate their argument more than once. */
157# include "private/cord_pos.h"
158
159/*
160	Visible definitions from above:
161	
162	typedef <OPAQUE but fairly big> CORD_pos[1];
163	
164	* Extract the cord from a position:
165	CORD CORD_pos_to_cord(CORD_pos p);
166	
167	* Extract the current index from a position:
168	size_t CORD_pos_to_index(CORD_pos p);
169	
170	* Fetch the character located at the given position:
171	char CORD_pos_fetch(CORD_pos p);
172	
173	* Initialize the position to refer to the given cord and index.
174	* Note that this is the most expensive function on positions:
175	void CORD_set_pos(CORD_pos p, CORD x, size_t i);
176	
177	* Advance the position to the next character.
178	* P must be initialized and valid.
179	* Invalidates p if past end:
180	void CORD_next(CORD_pos p);
181	
182	* Move the position to the preceding character.
183	* P must be initialized and valid.
184	* Invalidates p if past beginning:
185	void CORD_prev(CORD_pos p);
186	
187	* Is the position valid, i.e. inside the cord?
188	int CORD_pos_valid(CORD_pos p);
189*/
190# define CORD_FOR(pos, cord) \
191    for (CORD_set_pos(pos, cord, 0); CORD_pos_valid(pos); CORD_next(pos))
192
193			
194/* An out of memory handler to call.  May be supplied by client.	*/
195/* Must not return.							*/
196extern void (* CORD_oom_fn)(void);
197
198/* Dump the representation of x to stdout in an implementation defined	*/
199/* manner.  Intended for debugging only.				*/
200void CORD_dump(CORD x);
201
202/* The following could easily be implemented by the client.  They are	*/
203/* provided in cordxtra.c for convenience.				*/
204
205/* Concatenate a character to the end of a cord.	*/
206CORD CORD_cat_char(CORD x, char c);
207
208/* Concatenate n cords.	*/
209CORD CORD_catn(int n, /* CORD */ ...);
210
211/* Return the character in CORD_substr(x, i, 1)  	*/
212char CORD_fetch(CORD x, size_t i);
213
214/* Return < 0, 0, or > 0, depending on whether x < y, x = y, x > y	*/
215int CORD_cmp(CORD x, CORD y);
216
217/* A generalization that takes both starting positions for the 		*/
218/* comparison, and a limit on the number of characters to be compared.	*/
219int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len);
220
221/* Find the first occurrence of s in x at position start or later.	*/
222/* Return the position of the first character of s in x, or		*/
223/* CORD_NOT_FOUND if there is none.					*/
224size_t CORD_str(CORD x, size_t start, CORD s);
225
226/* Return a cord consisting of i copies of (possibly NUL) c.  Dangerous	*/
227/* in conjunction with CORD_to_char_star.				*/
228/* The resulting representation takes constant space, independent of i.	*/
229CORD CORD_chars(char c, size_t i);
230# define CORD_nul(i) CORD_chars('\0', (i))
231
232/* Turn a file into cord.  The file must be seekable.  Its contents	*/
233/* must remain constant.  The file may be accessed as an immediate	*/
234/* result of this call and/or as a result of subsequent accesses to 	*/
235/* the cord.  Short files are likely to be immediately read, but	*/
236/* long files are likely to be read on demand, possibly relying on 	*/
237/* stdio for buffering.							*/
238/* We must have exclusive access to the descriptor f, i.e. we may	*/
239/* read it at any time, and expect the file pointer to be		*/
240/* where we left it.  Normally this should be invoked as		*/
241/* CORD_from_file(fopen(...))						*/
242/* CORD_from_file arranges to close the file descriptor when it is no	*/
243/* longer needed (e.g. when the result becomes inaccessible).		*/ 
244/* The file f must be such that ftell reflects the actual character	*/
245/* position in the file, i.e. the number of characters that can be 	*/
246/* or were read with fread.  On UNIX systems this is always true.  On	*/
247/* MS Windows systems, f must be opened in binary mode.			*/
248CORD CORD_from_file(FILE * f);
249
250/* Equivalent to the above, except that the entire file will be read	*/
251/* and the file pointer will be closed immediately.			*/
252/* The binary mode restriction from above does not apply.		*/
253CORD CORD_from_file_eager(FILE * f);
254
255/* Equivalent to the above, except that the file will be read on demand.*/
256/* The binary mode restriction applies.					*/
257CORD CORD_from_file_lazy(FILE * f);
258
259/* Turn a cord into a C string.	The result shares no structure with	*/
260/* x, and is thus modifiable.						*/
261char * CORD_to_char_star(CORD x);
262
263/* Turn a C string into a CORD.  The C string is copied, and so may	*/
264/* subsequently be modified.						*/
265CORD CORD_from_char_star(const char *s);
266
267/* Identical to the above, but the result may share structure with	*/
268/* the argument and is thus not modifiable.				*/
269const char * CORD_to_const_char_star(CORD x); 
270
271/* Write a cord to a file, starting at the current position.  No	*/
272/* trailing NULs are newlines are added.				*/
273/* Returns EOF if a write error occurs, 1 otherwise.			*/
274int CORD_put(CORD x, FILE * f);
275
276/* "Not found" result for the following two functions.			*/
277# define CORD_NOT_FOUND ((size_t)(-1))
278
279/* A vague analog of strchr.  Returns the position (an integer, not	*/
280/* a pointer) of the first occurrence of (char) c inside x at position 	*/
281/* i or later. The value i must be < CORD_len(x).			*/
282size_t CORD_chr(CORD x, size_t i, int c);
283
284/* A vague analog of strrchr.  Returns index of the last occurrence	*/
285/* of (char) c inside x at position i or earlier. The value i		*/
286/* must be < CORD_len(x).						*/
287size_t CORD_rchr(CORD x, size_t i, int c);
288
289
290/* The following are also not primitive, but are implemented in 	*/
291/* cordprnt.c.  They provide functionality similar to the ANSI C	*/
292/* functions with corresponding names, but with the following		*/
293/* additions and changes:						*/
294/* 1. A %r conversion specification specifies a CORD argument.  Field	*/
295/*    width, precision, etc. have the same semantics as for %s.		*/
296/*    (Note that %c,%C, and %S were already taken.)			*/
297/* 2. The format string is represented as a CORD.		        */
298/* 3. CORD_sprintf and CORD_vsprintf assign the result through the 1st	*/ 	/*    argument.	Unlike their ANSI C versions, there is no need to guess	*/
299/*    the correct buffer size.						*/
300/* 4. Most of the conversions are implement through the native 		*/
301/*    vsprintf.  Hence they are usually no faster, and 			*/
302/*    idiosyncracies of the native printf are preserved.  However,	*/
303/*    CORD arguments to CORD_sprintf and CORD_vsprintf are NOT copied;	*/
304/*    the result shares the original structure.  This may make them	*/
305/*    very efficient in some unusual applications.			*/
306/*    The format string is copied.					*/
307/* All functions return the number of characters generated or -1 on	*/
308/* error.  This complies with the ANSI standard, but is inconsistent	*/
309/* with some older implementations of sprintf.				*/
310
311/* The implementation of these is probably less portable than the rest	*/
312/* of this package.							*/
313
314#ifndef CORD_NO_IO
315
316#include <stdarg.h>
317
318int CORD_sprintf(CORD * out, CORD format, ...);
319int CORD_vsprintf(CORD * out, CORD format, va_list args);
320int CORD_fprintf(FILE * f, CORD format, ...);
321int CORD_vfprintf(FILE * f, CORD format, va_list args);
322int CORD_printf(CORD format, ...);
323int CORD_vprintf(CORD format, va_list args);
324
325#endif /* CORD_NO_IO */
326
327# endif /* CORD_H */