PageRenderTime 451ms CodeModel.GetById 100ms app.highlight 227ms RepoModel.GetById 115ms app.codeStats 1ms

/jansson/src/load.c

http://github.com/nicolasff/webdis
C | 885 lines | 778 code | 85 blank | 22 comment | 104 complexity | 8f07c2a0caaffca1507a9a2a8ac6c2fc MD5 | raw file
  1/*
  2 * Copyright (c) 2009, 2010 Petri Lehtinen <petri@digip.org>
  3 *
  4 * Jansson is free software; you can redistribute it and/or modify
  5 * it under the terms of the MIT license. See LICENSE for details.
  6 */
  7
  8#define _GNU_SOURCE
  9#include <ctype.h>
 10#include <errno.h>
 11#include <limits.h>
 12#include <stdio.h>
 13#include <stdlib.h>
 14#include <string.h>
 15#include <stdarg.h>
 16#include <assert.h>
 17
 18#include <jansson.h>
 19#include "jansson_private.h"
 20#include "strbuffer.h"
 21#include "utf.h"
 22
 23#define TOKEN_INVALID         -1
 24#define TOKEN_EOF              0
 25#define TOKEN_STRING         256
 26#define TOKEN_INTEGER        257
 27#define TOKEN_REAL           258
 28#define TOKEN_TRUE           259
 29#define TOKEN_FALSE          260
 30#define TOKEN_NULL           261
 31
 32/* read one byte from stream, return EOF on end of file */
 33typedef int (*get_func)(void *data);
 34
 35/* return non-zero if end of file has been reached */
 36typedef int (*eof_func)(void *data);
 37
 38typedef struct {
 39    get_func get;
 40    eof_func eof;
 41    void *data;
 42    int stream_pos;
 43    char buffer[5];
 44    int buffer_pos;
 45} stream_t;
 46
 47
 48typedef struct {
 49    stream_t stream;
 50    strbuffer_t saved_text;
 51    int token;
 52    int line, column;
 53    union {
 54        char *string;
 55        json_int_t integer;
 56        double real;
 57    } value;
 58} lex_t;
 59
 60
 61/*** error reporting ***/
 62
 63static void error_set(json_error_t *error, const lex_t *lex,
 64                      const char *msg, ...)
 65{
 66    va_list ap;
 67    char msg_text[JSON_ERROR_TEXT_LENGTH];
 68
 69    int line = -1, col = -1;
 70    const char *result = msg_text;
 71
 72    if(!error)
 73        return;
 74
 75    va_start(ap, msg);
 76    vsnprintf(msg_text, JSON_ERROR_TEXT_LENGTH, msg, ap);
 77    va_end(ap);
 78
 79    if(lex)
 80    {
 81        const char *saved_text = strbuffer_value(&lex->saved_text);
 82        char msg_with_context[JSON_ERROR_TEXT_LENGTH];
 83
 84        line = lex->line;
 85
 86        if(saved_text && saved_text[0])
 87        {
 88            if(lex->saved_text.length <= 20) {
 89                snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,
 90                         "%s near '%s'", msg_text, saved_text);
 91                result = msg_with_context;
 92            }
 93        }
 94        else
 95        {
 96            snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,
 97                     "%s near end of file", msg_text);
 98            result = msg_with_context;
 99        }
100    }
101
102    jsonp_error_set(error, line, col, "%s", result);
103}
104
105
106/*** lexical analyzer ***/
107
108static void
109stream_init(stream_t *stream, get_func get, eof_func eof, void *data)
110{
111    stream->get = get;
112    stream->eof = eof;
113    stream->data = data;
114    stream->stream_pos = 0;
115    stream->buffer[0] = '\0';
116    stream->buffer_pos = 0;
117}
118
119static char stream_get(stream_t *stream, json_error_t *error)
120{
121    char c;
122
123    if(!stream->buffer[stream->buffer_pos])
124    {
125        stream->buffer[0] = stream->get(stream->data);
126        stream->buffer_pos = 0;
127
128        c = stream->buffer[0];
129
130        if((unsigned char)c >= 0x80 && c != (char)EOF)
131        {
132            /* multi-byte UTF-8 sequence */
133            int i, count;
134
135            count = utf8_check_first(c);
136            if(!count)
137                goto out;
138
139            assert(count >= 2);
140
141            for(i = 1; i < count; i++)
142                stream->buffer[i] = stream->get(stream->data);
143
144            if(!utf8_check_full(stream->buffer, count, NULL))
145                goto out;
146
147            stream->stream_pos += count;
148            stream->buffer[count] = '\0';
149        }
150        else {
151            stream->buffer[1] = '\0';
152            stream->stream_pos++;
153        }
154    }
155
156    return stream->buffer[stream->buffer_pos++];
157
158out:
159    error_set(error, NULL, "unable to decode byte 0x%x at position %d",
160              (unsigned char)c, stream->stream_pos);
161
162    stream->buffer[0] = EOF;
163    stream->buffer[1] = '\0';
164    stream->buffer_pos = 1;
165
166    return EOF;
167}
168
169static void stream_unget(stream_t *stream, char c)
170{
171    assert(stream->buffer_pos > 0);
172    stream->buffer_pos--;
173    assert(stream->buffer[stream->buffer_pos] == c);
174}
175
176
177static int lex_get(lex_t *lex, json_error_t *error)
178{
179    return stream_get(&lex->stream, error);
180}
181
182static int lex_eof(lex_t *lex)
183{
184    return lex->stream.eof(lex->stream.data);
185}
186
187static void lex_save(lex_t *lex, char c)
188{
189    strbuffer_append_byte(&lex->saved_text, c);
190}
191
192static int lex_get_save(lex_t *lex, json_error_t *error)
193{
194    char c = stream_get(&lex->stream, error);
195    lex_save(lex, c);
196    return c;
197}
198
199static void lex_unget_unsave(lex_t *lex, char c)
200{
201    char d;
202    stream_unget(&lex->stream, c);
203    d = strbuffer_pop(&lex->saved_text);
204    assert(c == d);
205}
206
207static void lex_save_cached(lex_t *lex)
208{
209    while(lex->stream.buffer[lex->stream.buffer_pos] != '\0')
210    {
211        lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
212        lex->stream.buffer_pos++;
213    }
214}
215
216/* assumes that str points to 'u' plus at least 4 valid hex digits */
217static int32_t decode_unicode_escape(const char *str)
218{
219    int i;
220    int32_t value = 0;
221
222    assert(str[0] == 'u');
223
224    for(i = 1; i <= 4; i++) {
225        char c = str[i];
226        value <<= 4;
227        if(isdigit(c))
228            value += c - '0';
229        else if(islower(c))
230            value += c - 'a' + 10;
231        else if(isupper(c))
232            value += c - 'A' + 10;
233        else
234            assert(0);
235    }
236
237    return value;
238}
239
240static void lex_scan_string(lex_t *lex, json_error_t *error)
241{
242    char c;
243    const char *p;
244    char *t;
245    int i;
246
247    lex->value.string = NULL;
248    lex->token = TOKEN_INVALID;
249
250    c = lex_get_save(lex, error);
251
252    while(c != '"') {
253        if(c == (char)EOF) {
254            lex_unget_unsave(lex, c);
255            if(lex_eof(lex))
256                error_set(error, lex, "premature end of input");
257            goto out;
258        }
259
260        else if((unsigned char)c <= 0x1F) {
261            /* control character */
262            lex_unget_unsave(lex, c);
263            if(c == '\n')
264                error_set(error, lex, "unexpected newline", c);
265            else
266                error_set(error, lex, "control character 0x%x", c);
267            goto out;
268        }
269
270        else if(c == '\\') {
271            c = lex_get_save(lex, error);
272            if(c == 'u') {
273                c = lex_get_save(lex, error);
274                for(i = 0; i < 4; i++) {
275                    if(!isxdigit(c)) {
276                        lex_unget_unsave(lex, c);
277                        error_set(error, lex, "invalid escape");
278                        goto out;
279                    }
280                    c = lex_get_save(lex, error);
281                }
282            }
283            else if(c == '"' || c == '\\' || c == '/' || c == 'b' ||
284                    c == 'f' || c == 'n' || c == 'r' || c == 't')
285                c = lex_get_save(lex, error);
286            else {
287                lex_unget_unsave(lex, c);
288                error_set(error, lex, "invalid escape");
289                goto out;
290            }
291        }
292        else
293            c = lex_get_save(lex, error);
294    }
295
296    /* the actual value is at most of the same length as the source
297       string, because:
298         - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte
299         - a single \uXXXX escape (length 6) is converted to at most 3 bytes
300         - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair
301           are converted to 4 bytes
302    */
303    lex->value.string = malloc(lex->saved_text.length + 1);
304    if(!lex->value.string) {
305        /* this is not very nice, since TOKEN_INVALID is returned */
306        goto out;
307    }
308
309    /* the target */
310    t = lex->value.string;
311
312    /* + 1 to skip the " */
313    p = strbuffer_value(&lex->saved_text) + 1;
314
315    while(*p != '"') {
316        if(*p == '\\') {
317            p++;
318            if(*p == 'u') {
319                char buffer[4];
320                int length;
321                int32_t value;
322
323                value = decode_unicode_escape(p);
324                p += 5;
325
326                if(0xD800 <= value && value <= 0xDBFF) {
327                    /* surrogate pair */
328                    if(*p == '\\' && *(p + 1) == 'u') {
329                        int32_t value2 = decode_unicode_escape(++p);
330                        p += 5;
331
332                        if(0xDC00 <= value2 && value2 <= 0xDFFF) {
333                            /* valid second surrogate */
334                            value =
335                                ((value - 0xD800) << 10) +
336                                (value2 - 0xDC00) +
337                                0x10000;
338                        }
339                        else {
340                            /* invalid second surrogate */
341                            error_set(error, lex,
342                                      "invalid Unicode '\\u%04X\\u%04X'",
343                                      value, value2);
344                            goto out;
345                        }
346                    }
347                    else {
348                        /* no second surrogate */
349                        error_set(error, lex, "invalid Unicode '\\u%04X'",
350                                  value);
351                        goto out;
352                    }
353                }
354                else if(0xDC00 <= value && value <= 0xDFFF) {
355                    error_set(error, lex, "invalid Unicode '\\u%04X'", value);
356                    goto out;
357                }
358                else if(value == 0)
359                {
360                    error_set(error, lex, "\\u0000 is not allowed");
361                    goto out;
362                }
363
364                if(utf8_encode(value, buffer, &length))
365                    assert(0);
366
367                memcpy(t, buffer, length);
368                t += length;
369            }
370            else {
371                switch(*p) {
372                    case '"': case '\\': case '/':
373                        *t = *p; break;
374                    case 'b': *t = '\b'; break;
375                    case 'f': *t = '\f'; break;
376                    case 'n': *t = '\n'; break;
377                    case 'r': *t = '\r'; break;
378                    case 't': *t = '\t'; break;
379                    default: assert(0);
380                }
381                t++;
382                p++;
383            }
384        }
385        else
386            *(t++) = *(p++);
387    }
388    *t = '\0';
389    lex->token = TOKEN_STRING;
390    return;
391
392out:
393    free(lex->value.string);
394}
395
396#if JSON_INTEGER_IS_LONG_LONG
397#define json_strtoint     strtoll
398#else
399#define json_strtoint     strtol
400#endif
401
402static int lex_scan_number(lex_t *lex, char c, json_error_t *error)
403{
404    const char *saved_text;
405    char *end;
406    double value;
407
408    lex->token = TOKEN_INVALID;
409
410    if(c == '-')
411        c = lex_get_save(lex, error);
412
413    if(c == '0') {
414        c = lex_get_save(lex, error);
415        if(isdigit(c)) {
416            lex_unget_unsave(lex, c);
417            goto out;
418        }
419    }
420    else if(isdigit(c)) {
421        c = lex_get_save(lex, error);
422        while(isdigit(c))
423            c = lex_get_save(lex, error);
424    }
425    else {
426      lex_unget_unsave(lex, c);
427      goto out;
428    }
429
430    if(c != '.' && c != 'E' && c != 'e') {
431        json_int_t value;
432
433        lex_unget_unsave(lex, c);
434
435        saved_text = strbuffer_value(&lex->saved_text);
436
437        errno = 0;
438        value = json_strtoint(saved_text, &end, 10);
439        if(errno == ERANGE) {
440            if(value < 0)
441                error_set(error, lex, "too big negative integer");
442            else
443                error_set(error, lex, "too big integer");
444            goto out;
445        }
446
447        assert(end == saved_text + lex->saved_text.length);
448
449        lex->token = TOKEN_INTEGER;
450        lex->value.integer = value;
451        return 0;
452    }
453
454    if(c == '.') {
455        c = lex_get(lex, error);
456        if(!isdigit(c))
457            goto out;
458        lex_save(lex, c);
459
460        c = lex_get_save(lex, error);
461        while(isdigit(c))
462            c = lex_get_save(lex, error);
463    }
464
465    if(c == 'E' || c == 'e') {
466        c = lex_get_save(lex, error);
467        if(c == '+' || c == '-')
468            c = lex_get_save(lex, error);
469
470        if(!isdigit(c)) {
471            lex_unget_unsave(lex, c);
472            goto out;
473        }
474
475        c = lex_get_save(lex, error);
476        while(isdigit(c))
477            c = lex_get_save(lex, error);
478    }
479
480    lex_unget_unsave(lex, c);
481
482    saved_text = strbuffer_value(&lex->saved_text);
483    value = strtod(saved_text, &end);
484    assert(end == saved_text + lex->saved_text.length);
485
486    if(errno == ERANGE && value != 0) {
487        error_set(error, lex, "real number overflow");
488        goto out;
489    }
490
491    lex->token = TOKEN_REAL;
492    lex->value.real = value;
493    return 0;
494
495out:
496    return -1;
497}
498
499static int lex_scan(lex_t *lex, json_error_t *error)
500{
501    char c;
502
503    strbuffer_clear(&lex->saved_text);
504
505    if(lex->token == TOKEN_STRING) {
506        free(lex->value.string);
507        lex->value.string = NULL;
508    }
509
510    c = lex_get(lex, error);
511    while(c == ' ' || c == '\t' || c == '\n' || c == '\r')
512    {
513        if(c == '\n')
514            lex->line++;
515
516        c = lex_get(lex, error);
517    }
518
519    if(c == (char)EOF) {
520        if(lex_eof(lex))
521            lex->token = TOKEN_EOF;
522        else
523            lex->token = TOKEN_INVALID;
524        goto out;
525    }
526
527    lex_save(lex, c);
528
529    if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',')
530        lex->token = c;
531
532    else if(c == '"')
533        lex_scan_string(lex, error);
534
535    else if(isdigit(c) || c == '-') {
536        if(lex_scan_number(lex, c, error))
537            goto out;
538    }
539
540    else if(isupper(c) || islower(c)) {
541        /* eat up the whole identifier for clearer error messages */
542        const char *saved_text;
543
544        c = lex_get_save(lex, error);
545        while(isupper(c) || islower(c))
546            c = lex_get_save(lex, error);
547        lex_unget_unsave(lex, c);
548
549        saved_text = strbuffer_value(&lex->saved_text);
550
551        if(strcmp(saved_text, "true") == 0)
552            lex->token = TOKEN_TRUE;
553        else if(strcmp(saved_text, "false") == 0)
554            lex->token = TOKEN_FALSE;
555        else if(strcmp(saved_text, "null") == 0)
556            lex->token = TOKEN_NULL;
557        else
558            lex->token = TOKEN_INVALID;
559    }
560
561    else {
562        /* save the rest of the input UTF-8 sequence to get an error
563           message of valid UTF-8 */
564        lex_save_cached(lex);
565        lex->token = TOKEN_INVALID;
566    }
567
568out:
569    return lex->token;
570}
571
572static char *lex_steal_string(lex_t *lex)
573{
574    char *result = NULL;
575    if(lex->token == TOKEN_STRING)
576    {
577        result = lex->value.string;
578        lex->value.string = NULL;
579    }
580    return result;
581}
582
583static int lex_init(lex_t *lex, get_func get, eof_func eof, void *data)
584{
585    stream_init(&lex->stream, get, eof, data);
586    if(strbuffer_init(&lex->saved_text))
587        return -1;
588
589    lex->token = TOKEN_INVALID;
590    lex->line = 1;
591
592    return 0;
593}
594
595static void lex_close(lex_t *lex)
596{
597    if(lex->token == TOKEN_STRING)
598        free(lex->value.string);
599    strbuffer_close(&lex->saved_text);
600}
601
602
603/*** parser ***/
604
605static json_t *parse_value(lex_t *lex, json_error_t *error);
606
607static json_t *parse_object(lex_t *lex, json_error_t *error)
608{
609    json_t *object = json_object();
610    if(!object)
611        return NULL;
612
613    lex_scan(lex, error);
614    if(lex->token == '}')
615        return object;
616
617    while(1) {
618        char *key;
619        json_t *value;
620
621        if(lex->token != TOKEN_STRING) {
622            error_set(error, lex, "string or '}' expected");
623            goto error;
624        }
625
626        key = lex_steal_string(lex);
627        if(!key)
628            return NULL;
629
630        lex_scan(lex, error);
631        if(lex->token != ':') {
632            free(key);
633            error_set(error, lex, "':' expected");
634            goto error;
635        }
636
637        lex_scan(lex, error);
638        value = parse_value(lex, error);
639        if(!value) {
640            free(key);
641            goto error;
642        }
643
644        if(json_object_set_nocheck(object, key, value)) {
645            free(key);
646            json_decref(value);
647            goto error;
648        }
649
650        json_decref(value);
651        free(key);
652
653        lex_scan(lex, error);
654        if(lex->token != ',')
655            break;
656
657        lex_scan(lex, error);
658    }
659
660    if(lex->token != '}') {
661        error_set(error, lex, "'}' expected");
662        goto error;
663    }
664
665    return object;
666
667error:
668    json_decref(object);
669    return NULL;
670}
671
672static json_t *parse_array(lex_t *lex, json_error_t *error)
673{
674    json_t *array = json_array();
675    if(!array)
676        return NULL;
677
678    lex_scan(lex, error);
679    if(lex->token == ']')
680        return array;
681
682    while(lex->token) {
683        json_t *elem = parse_value(lex, error);
684        if(!elem)
685            goto error;
686
687        if(json_array_append(array, elem)) {
688            json_decref(elem);
689            goto error;
690        }
691        json_decref(elem);
692
693        lex_scan(lex, error);
694        if(lex->token != ',')
695            break;
696
697        lex_scan(lex, error);
698    }
699
700    if(lex->token != ']') {
701        error_set(error, lex, "']' expected");
702        goto error;
703    }
704
705    return array;
706
707error:
708    json_decref(array);
709    return NULL;
710}
711
712static json_t *parse_value(lex_t *lex, json_error_t *error)
713{
714    json_t *json;
715
716    switch(lex->token) {
717        case TOKEN_STRING: {
718            json = json_string_nocheck(lex->value.string);
719            break;
720        }
721
722        case TOKEN_INTEGER: {
723            json = json_integer(lex->value.integer);
724            break;
725        }
726
727        case TOKEN_REAL: {
728            json = json_real(lex->value.real);
729            break;
730        }
731
732        case TOKEN_TRUE:
733            json = json_true();
734            break;
735
736        case TOKEN_FALSE:
737            json = json_false();
738            break;
739
740        case TOKEN_NULL:
741            json = json_null();
742            break;
743
744        case '{':
745            json = parse_object(lex, error);
746            break;
747
748        case '[':
749            json = parse_array(lex, error);
750            break;
751
752        case TOKEN_INVALID:
753            error_set(error, lex, "invalid token");
754            return NULL;
755
756        default:
757            error_set(error, lex, "unexpected token");
758            return NULL;
759    }
760
761    if(!json)
762        return NULL;
763
764    return json;
765}
766
767static json_t *parse_json(lex_t *lex, json_error_t *error)
768{
769    lex_scan(lex, error);
770    if(lex->token != '[' && lex->token != '{') {
771        error_set(error, lex, "'[' or '{' expected");
772        return NULL;
773    }
774
775    return parse_value(lex, error);
776}
777
778typedef struct
779{
780    const char *data;
781    int pos;
782} string_data_t;
783
784static int string_get(void *data)
785{
786    char c;
787    string_data_t *stream = (string_data_t *)data;
788    c = stream->data[stream->pos];
789    if(c == '\0')
790        return EOF;
791    else
792    {
793        stream->pos++;
794        return c;
795    }
796}
797
798static int string_eof(void *data)
799{
800    string_data_t *stream = (string_data_t *)data;
801    return (stream->data[stream->pos] == '\0');
802}
803
804json_t *json_loads(const char *string, size_t flags, json_error_t *error)
805{
806    lex_t lex;
807    json_t *result;
808    (void)flags; /* unused */
809
810    string_data_t stream_data = {string, 0};
811
812    if(lex_init(&lex, string_get, string_eof, (void *)&stream_data))
813        return NULL;
814
815    jsonp_error_init(error, "<string>");
816
817    result = parse_json(&lex, error);
818    if(!result)
819        goto out;
820
821    lex_scan(&lex, error);
822    if(lex.token != TOKEN_EOF) {
823        error_set(error, &lex, "end of file expected");
824        json_decref(result);
825        result = NULL;
826    }
827
828out:
829    lex_close(&lex);
830    return result;
831}
832
833json_t *json_loadf(FILE *input, size_t flags, json_error_t *error)
834{
835    lex_t lex;
836    const char *source;
837    json_t *result;
838    (void)flags; /* unused */
839
840    if(lex_init(&lex, (get_func)fgetc, (eof_func)feof, input))
841        return NULL;
842
843    if(input == stdin)
844        source = "<stdin>";
845    else
846        source = "<stream>";
847
848    jsonp_error_init(error, source);
849
850    result = parse_json(&lex, error);
851    if(!result)
852        goto out;
853
854    lex_scan(&lex, error);
855    if(lex.token != TOKEN_EOF) {
856        error_set(error, &lex, "end of file expected");
857        json_decref(result);
858        result = NULL;
859    }
860
861out:
862    lex_close(&lex);
863    return result;
864}
865
866json_t *json_load_file(const char *path, size_t flags, json_error_t *error)
867{
868    json_t *result;
869    FILE *fp;
870
871    jsonp_error_init(error, path);
872
873    fp = fopen(path, "r");
874    if(!fp)
875    {
876        error_set(error, NULL, "unable to open %s: %s",
877                  path, strerror(errno));
878        return NULL;
879    }
880
881    result = json_loadf(fp, flags, error);
882
883    fclose(fp);
884    return result;
885}