PageRenderTime 34ms CodeModel.GetById 18ms app.highlight 13ms RepoModel.GetById 1ms app.codeStats 0ms

/contrib/ntp/sntp/libopts/tokenize.c

https://bitbucket.org/freebsd/freebsd-head/
C | 321 lines | 270 code | 14 blank | 37 comment | 20 complexity | bfd40498ec5947c1d8031503537e5cc1 MD5 | raw file
  1/*
  2 *  This file defines the string_tokenize interface
  3 * Time-stamp:      "2006-06-24 15:27:49 bkorb"
  4 *
  5 *  string_tokenize copyright 2005 Bruce Korb
  6 *
  7 *  string_tokenize is free software; you can redistribute it and/or
  8 *  modify it under the terms of the GNU Lesser General Public
  9 *  License as published by the Free Software Foundation; either
 10 *  version 2.1 of the License, or (at your option) any later version.
 11 *
 12 *  string_tokenize is distributed in the hope that it will be useful,
 13 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 15 *  Lesser General Public License for more details.
 16 *
 17 *  You should have received a copy of the GNU Lesser General Public
 18 *  License along with string_tokenize; if not, write to:
 19 *             The Free Software Foundation, Inc.,
 20 *             51 Franklin Street, Fifth Floor,
 21 *             Boston, MA  02110-1301, USA.
 22 */
 23#include <ctype.h>
 24#include <errno.h>
 25#include <stdlib.h>
 26
 27#define cc_t   const unsigned char
 28#define ch_t   unsigned char
 29
 30/* = = = START-STATIC-FORWARD = = = */
 31/* static forward declarations maintained by :mkfwd */
 32static void
 33copy_cooked( ch_t** ppDest, char const ** ppSrc );
 34
 35static void
 36copy_raw( ch_t** ppDest, char const ** ppSrc );
 37/* = = = END-STATIC-FORWARD = = = */
 38
 39static void
 40copy_cooked( ch_t** ppDest, char const ** ppSrc )
 41{
 42    ch_t* pDest = (ch_t*)*ppDest;
 43    const ch_t* pSrc  = (const ch_t*)(*ppSrc + 1);
 44
 45    for (;;) {
 46        ch_t ch = *(pSrc++);
 47        switch (ch) {
 48        case NUL:   *ppSrc = NULL; return;
 49        case '"':   goto done;
 50        case '\\':
 51            pSrc += ao_string_cook_escape_char( (char*)pSrc, (char*)&ch, 0x7F );
 52            if (ch == 0x7F)
 53                break;
 54            /* FALLTHROUGH */
 55
 56        default:
 57            *(pDest++) = ch;
 58        }
 59    }
 60
 61 done:
 62    *ppDest = (ch_t*)pDest; /* next spot for storing character */
 63    *ppSrc  = (char const *)pSrc;  /* char following closing quote    */
 64}
 65
 66
 67static void
 68copy_raw( ch_t** ppDest, char const ** ppSrc )
 69{
 70    ch_t* pDest = *ppDest;
 71    cc_t* pSrc  = (cc_t*) (*ppSrc + 1);
 72
 73    for (;;) {
 74        ch_t ch = *(pSrc++);
 75        switch (ch) {
 76        case NUL:   *ppSrc = NULL; return;
 77        case '\'':  goto done;
 78        case '\\':
 79            /*
 80             *  *Four* escapes are handled:  newline removal, escape char
 81             *  quoting and apostrophe quoting
 82             */
 83            switch (*pSrc) {
 84            case NUL:   *ppSrc = NULL; return;
 85            case '\r':
 86                if (*(++pSrc) == '\n')
 87                    ++pSrc;
 88                continue;
 89
 90            case '\n':
 91                ++pSrc;
 92                continue;
 93
 94            case '\'':
 95                ch = '\'';
 96                /* FALLTHROUGH */
 97
 98            case '\\':
 99                ++pSrc;
100                break;
101            }
102            /* FALLTHROUGH */
103
104        default:
105            *(pDest++) = ch;
106        }
107    }
108
109 done:
110    *ppDest = pDest; /* next spot for storing character */
111    *ppSrc  = (char const *) pSrc;  /* char following closing quote    */
112}
113
114
115/*=export_func ao_string_tokenize
116 *
117 * what: tokenize an input string
118 *
119 * arg:  + char const* + string + string to be tokenized +
120 *
121 * ret_type:  token_list_t*
122 * ret_desc:  pointer to a structure that lists each token
123 *
124 * doc:
125 *
126 * This function will convert one input string into a list of strings.
127 * The list of strings is derived by separating the input based on
128 * white space separation.  However, if the input contains either single
129 * or double quote characters, then the text after that character up to
130 * a matching quote will become the string in the list.
131 *
132 *  The returned pointer should be deallocated with @code{free(3C)} when
133 *  are done using the data.  The data are placed in a single block of
134 *  allocated memory.  Do not deallocate individual token/strings.
135 *
136 *  The structure pointed to will contain at least these two fields:
137 *  @table @samp
138 *  @item tkn_ct
139 *  The number of tokens found in the input string.
140 *  @item tok_list
141 *  An array of @code{tkn_ct + 1} pointers to substring tokens, with
142 *  the last pointer set to NULL.
143 *  @end table
144 *
145 * There are two types of quoted strings: single quoted (@code{'}) and
146 * double quoted (@code{"}).  Singly quoted strings are fairly raw in that
147 * escape characters (@code{\\}) are simply another character, except when
148 * preceding the following characters:
149 * @example
150 * @code{\\}  double backslashes reduce to one
151 * @code{'}   incorporates the single quote into the string
152 * @code{\n}  suppresses both the backslash and newline character
153 * @end example
154 *
155 * Double quote strings are formed according to the rules of string
156 * constants in ANSI-C programs.
157 *
158 * example:
159 * @example
160 *    #include <stdlib.h>
161 *    int ix;
162 *    token_list_t* ptl = ao_string_tokenize( some_string )
163 *    for (ix = 0; ix < ptl->tkn_ct; ix++)
164 *       do_something_with_tkn( ptl->tkn_list[ix] );
165 *    free( ptl );
166 * @end example
167 * Note that everything is freed with the one call to @code{free(3C)}.
168 *
169 * err:
170 *  NULL is returned and @code{errno} will be set to indicate the problem:
171 *  @itemize @bullet
172 *  @item
173 *  @code{EINVAL} - There was an unterminated quoted string.
174 *  @item
175 *  @code{ENOENT} - The input string was empty.
176 *  @item
177 *  @code{ENOMEM} - There is not enough memory.
178 *  @end itemize
179=*/
180token_list_t*
181ao_string_tokenize( char const* str )
182{
183    int max_token_ct = 1; /* allow for trailing NUL on string */
184    token_list_t* res;
185
186    if (str == NULL)  goto bogus_str;
187
188    /*
189     *  Trim leading white space.  Use "ENOENT" and a NULL return to indicate
190     *  an empty string was passed.
191     */
192    while (isspace( (ch_t)*str ))  str++;
193    if (*str == NUL) {
194    bogus_str:
195        errno = ENOENT;
196        return NULL;
197    }
198
199    /*
200     *  Take an approximate count of tokens.  If no quoted strings are used,
201     *  it will be accurate.  If quoted strings are used, it will be a little
202     *  high and we'll squander the space for a few extra pointers.
203     */
204    {
205        cc_t* pz = (cc_t*)str;
206
207        do {
208            max_token_ct++;
209            while (! isspace( *++pz ))
210                if (*pz == NUL) goto found_nul;
211            while (isspace( *pz ))  pz++;
212        } while (*pz != NUL);
213
214    found_nul:
215        ;
216    }
217
218    res = malloc( sizeof(*res) + strlen(str) + (max_token_ct * sizeof(ch_t*)) );
219    if (res == NULL) {
220        errno = ENOMEM;
221        return res;
222    }
223
224    /*
225     *  Now copy each token into the output buffer.
226     */
227    {
228        ch_t* pzDest = (ch_t*)(res->tkn_list + (max_token_ct + 1));
229        res->tkn_ct  = 0;
230
231        do  {
232            res->tkn_list[ res->tkn_ct++ ] = pzDest;
233            for (;;) {
234                int ch = (ch_t)*str;
235                if (isspace( ch )) {
236                found_white_space:
237                    while (isspace( (ch_t)*++str ))  ;
238                    break;
239                }
240
241                switch (ch) {
242                case '"':
243                    copy_cooked( &pzDest, &str );
244                    if (str == NULL) {
245                        free(res);
246                        errno = EINVAL;
247                        return NULL;
248                    }
249                    if (isspace( (ch_t)*str ))
250                        goto found_white_space;
251                    break;
252
253                case '\'':
254                    copy_raw( &pzDest, &str );
255                    if (str == NULL) {
256                        free(res);
257                        errno = EINVAL;
258                        return NULL;
259                    }
260                    if (isspace( (ch_t)*str ))
261                        goto found_white_space;
262                    break;
263
264                case NUL:
265                    goto copy_done;
266
267                default:
268                    str++;
269                    *(pzDest++) = ch;
270                }
271            } copy_done:;
272
273            /*
274             * NUL terminate the last token and see if we have any more tokens.
275             */
276            *(pzDest++) = NUL;
277        } while (*str != NUL);
278
279        res->tkn_list[ res->tkn_ct ] = NULL;
280    }
281
282    return res;
283}
284
285#ifdef TEST
286#include <stdio.h>
287#include <string.h>
288
289int
290main( int argc, char** argv )
291{
292    if (argc == 1) {
293        printf("USAGE:  %s arg [ ... ]\n", *argv);
294        return 1;
295    }
296    while (--argc > 0) {
297        char* arg = *(++argv);
298        token_list_t* p = ao_string_tokenize( arg );
299        if (p == NULL) {
300            printf( "Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
301                    arg, errno, strerror( errno ));
302        } else {
303            int ix = 0;
304            printf( "Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct );
305            do {
306                printf( " %3d:  ``%s''\n", ix+1, p->tkn_list[ix] );
307            } while (++ix < p->tkn_ct);
308            free(p);
309        }
310    }
311    return 0;
312}
313#endif
314
315/*
316 * Local Variables:
317 * mode: C
318 * c-file-style: "stroustrup"
319 * indent-tabs-mode: nil
320 * End:
321 * end of autoopts/tokenize.c */