PageRenderTime 161ms CodeModel.GetById 40ms app.highlight 69ms RepoModel.GetById 32ms app.codeStats 0ms

/c_src/encoder.c

https://github.com/ecd/jiffy
C | 840 lines | 715 code | 112 blank | 13 comment | 161 complexity | 1dbe85ab21acda35963efda461a71deb MD5 | raw file
  1// This file is part of Jiffy released under the MIT license.
  2// See the LICENSE file for more information.
  3
  4#include <assert.h>
  5#include <stdio.h>
  6#include <string.h>
  7
  8#include "erl_nif.h"
  9#include "jiffy.h"
 10
 11#define BIN_INC_SIZE 2048
 12
 13#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
 14
 15#define MAYBE_PRETTY(e)             \
 16do {                                \
 17    if(e->pretty) {                 \
 18        if(!enc_shift(e))           \
 19            return 0;               \
 20    }                               \
 21} while(0)
 22
 23#if WINDOWS || WIN32
 24#define inline __inline
 25#define snprintf  _snprintf
 26#endif
 27
 28typedef struct {
 29    ErlNifEnv*      env;
 30    jiffy_st*       atoms;
 31
 32    size_t          bytes_per_iter;
 33
 34    int             uescape;
 35    int             pretty;
 36
 37    int             shiftcnt;
 38    int             count;
 39
 40    size_t          iolen;
 41    size_t          iosize;
 42    ERL_NIF_TERM    iolist;
 43    ErlNifBinary    bin;
 44    ErlNifBinary*   curr;
 45
 46
 47    char*           p;
 48    unsigned char*  u;
 49    size_t          i;
 50} Encoder;
 51
 52
 53// String constants for pretty printing.
 54// Every string starts with its length.
 55#define NUM_SHIFTS 8
 56static char* shifts[NUM_SHIFTS] = {
 57    "\x01\n",
 58    "\x03\n  ",
 59    "\x05\n    ",
 60    "\x07\n      ",
 61    "\x09\n        ",
 62    "\x0b\n          ",
 63    "\x0d\n            ",
 64    "\x0f\n              "
 65};
 66
 67
 68Encoder*
 69enc_new(ErlNifEnv* env)
 70{
 71    jiffy_st* st = (jiffy_st*) enif_priv_data(env);
 72    Encoder* e = enif_alloc_resource(st->res_enc, sizeof(Encoder));
 73
 74    e->atoms = st;
 75    e->bytes_per_iter = DEFAULT_BYTES_PER_ITER;
 76    e->uescape = 0;
 77    e->pretty = 0;
 78    e->shiftcnt = 0;
 79    e->count = 0;
 80
 81    e->iolen = 0;
 82    e->iosize = 0;
 83    e->curr = &(e->bin);
 84    if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
 85        e->curr = NULL;
 86        enif_release_resource(e);
 87        return NULL;
 88    }
 89
 90    memset(e->curr->data, 0, e->curr->size);
 91
 92    e->p = (char*) e->curr->data;
 93    e->u = (unsigned char*) e->curr->data;
 94    e->i = 0;
 95
 96    return e;
 97}
 98
 99int
100enc_init(Encoder* e, ErlNifEnv* env)
101{
102    e->env = env;
103    return 1;
104}
105
106void
107enc_destroy(ErlNifEnv* env, void* obj)
108{
109    Encoder* e = (Encoder*) obj;
110
111    if(e->curr != NULL) {
112        enif_release_binary(e->curr);
113    }
114}
115
116ERL_NIF_TERM
117enc_error(Encoder* e, const char* msg)
118{
119    //assert(0 && msg);
120    return make_error(e->atoms, e->env, msg);
121}
122
123static inline int
124enc_ensure(Encoder* e, size_t req)
125{
126    size_t need = e->curr->size;
127    while(req >= (need - e->i)) need <<= 1;
128
129    if(need != e->curr->size) {
130        if(!enif_realloc_binary(e->curr, need)) {
131            return 0;
132        }
133        e->p = (char*) e->curr->data;
134        e->u = (unsigned char*) e->curr->data;
135    }
136
137    return 1;
138}
139
140int
141enc_result(Encoder* e, ERL_NIF_TERM* value)
142{
143    if(e->i != e->curr->size) {
144        if(!enif_realloc_binary(e->curr, e->i)) {
145            return 0;
146        }
147    }
148
149    *value = enif_make_binary(e->env, e->curr);
150    e->curr = NULL;
151    return 1;
152}
153
154int
155enc_done(Encoder* e, ERL_NIF_TERM* value)
156{
157    ERL_NIF_TERM last;
158
159    if(e->iolen == 0) {
160        return enc_result(e, value);
161    }
162
163    if(e->i > 0 ) {
164        if(!enc_result(e, &last)) {
165            return 0;
166        }
167
168        e->iolist = enif_make_list_cell(e->env, last, e->iolist);
169        e->iolen++;
170    }
171
172    *value = e->iolist;
173    return 1;
174}
175
176static inline int
177enc_unknown(Encoder* e, ERL_NIF_TERM value)
178{
179    ErlNifBinary* bin = e->curr;
180    ERL_NIF_TERM curr;
181
182    if(e->i > 0) {
183        if(!enc_result(e, &curr)) {
184            return 0;
185        }
186
187        e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
188        e->iolen++;
189    }
190
191    e->iolist = enif_make_list_cell(e->env, value, e->iolist);
192    e->iolen++;
193    
194    // Track the total number of bytes produced before
195    // splitting our IO buffer. We add 16 to this value
196    // as a rough estimate of the number of bytes that
197    // a bignum might produce when encoded.
198    e->iosize += e->i + 16;
199
200    // Reinitialize our binary for the next buffer.
201    e->curr = bin;
202    if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
203        return 0;
204    }
205
206    memset(e->curr->data, 0, e->curr->size);
207
208    e->p = (char*) e->curr->data;
209    e->u = (unsigned char*) e->curr->data;
210    e->i = 0;
211
212    return 1;
213}
214
215static inline int
216enc_literal(Encoder* e, const char* literal, size_t len)
217{
218    if(!enc_ensure(e, len)) {
219        return 0;
220    }
221
222    memcpy(&(e->p[e->i]), literal, len);
223    e->i += len;
224    e->count++;
225    return 1;
226}
227
228static inline int
229enc_string(Encoder* e, ERL_NIF_TERM val)
230{
231    ErlNifBinary bin;
232    char atom[512];
233
234    unsigned char* data;
235    size_t size;
236
237    int esc_extra = 0;
238    int ulen;
239    int uval;
240    int i;
241
242    if(enif_is_binary(e->env, val)) {
243        if(!enif_inspect_binary(e->env, val, &bin)) {
244            return 0;
245        }
246        data = bin.data;
247        size = bin.size;
248    } else if(enif_is_atom(e->env, val)) {
249        if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
250            return 0;
251        }
252        data = (unsigned char*) atom;
253        size = strlen(atom);
254    } else {
255        return 0;
256    }
257
258    i = 0;
259    while(i < size) {
260        switch((char) data[i]) {
261            case '\"':
262            case '\\':
263            case '\b':
264            case '\f':
265            case '\n':
266            case '\r':
267            case '\t':
268                esc_extra += 1;
269                i++;
270                continue;
271            default:
272                if(data[i] < 0x20) {
273                    esc_extra += 5;
274                    i++;
275                    continue;
276                } else if(data[i] < 0x80) {
277                    i++;
278                    continue;
279                }
280                ulen = utf8_validate(&(data[i]), size - i);
281                if(ulen < 0) {
282                    return 0;
283                }
284                if(e->uescape) {
285                    uval = utf8_to_unicode(&(data[i]), ulen);
286                    if(uval < 0) {
287                        return 0;
288                    }
289                    esc_extra += utf8_esc_len(uval);
290                    if(ulen < 0) {
291                        return 0;
292                    }
293                }
294                i += ulen;
295        }
296    }
297
298    if(!enc_ensure(e, size + esc_extra + 2)) {
299        return 0;
300    }
301
302    e->p[e->i++] = '\"';
303
304    i = 0;
305    while(i < size) {
306        switch((char) data[i]) {
307            case '\"':
308            case '\\':
309                e->p[e->i++] = '\\';
310                e->u[e->i++] = data[i];
311                i++;
312                continue;
313            case '\b':
314                e->p[e->i++] = '\\';
315                e->p[e->i++] = 'b';
316                i++;
317                continue;
318            case '\f':
319                e->p[e->i++] = '\\';
320                e->p[e->i++] = 'f';
321                i++;
322                continue;
323            case '\n':
324                e->p[e->i++] = '\\';
325                e->p[e->i++] = 'n';
326                i++;
327                continue;
328            case '\r':
329                e->p[e->i++] = '\\';
330                e->p[e->i++] = 'r';
331                i++;
332                continue;
333            case '\t':
334                e->p[e->i++] = '\\';
335                e->p[e->i++] = 't';
336                i++;
337                continue;
338            default:
339                if(data[i] < 0x20) {
340                    ulen = unicode_uescape(data[i], &(e->p[e->i]));
341                    if(ulen < 0) {
342                        return 0;
343                    }
344                    e->i += ulen;
345                    i++;
346                } else if((data[i] & 0x80) && e->uescape) {
347                    uval = utf8_to_unicode(&(data[i]), size-i);
348                    if(uval < 0) {
349                        return 0;
350                    }
351
352                    ulen = unicode_uescape(uval, &(e->p[e->i]));
353                    if(ulen < 0) {
354                        return 0;
355                    }
356                    e->i += ulen;
357
358                    ulen = utf8_len(uval);
359                    if(ulen < 0) {
360                        return 0;
361                    }
362                    i += ulen;
363                } else {
364                    e->u[e->i++] = data[i++];
365                }
366        }
367    }
368
369    e->p[e->i++] = '\"';
370    e->count++;
371
372    return 1;
373}
374
375static inline int
376enc_long(Encoder* e, ErlNifSInt64 val)
377{
378    if(!enc_ensure(e, 32)) {
379        return 0;
380    }
381
382#if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
383    snprintf(&(e->p[e->i]), 32, "%ld", val);
384#elif SIZEOF_LONG == 8
385    snprintf(&(e->p[e->i]), 32, "%ld", val);
386#else
387    snprintf(&(e->p[e->i]), 32, "%lld", val);
388#endif
389
390    e->i += strlen(&(e->p[e->i]));
391    e->count++;
392
393    return 1;
394}
395
396static inline int
397enc_double(Encoder* e, double val)
398{
399    char* start;
400    size_t len;
401
402    if(!enc_ensure(e, 32)) {
403        return 0;
404    }
405
406    start = &(e->p[e->i]);
407
408    if(!double_to_shortest(start, e->curr->size, &len, val)) {
409        return 0;
410    }
411
412    e->i += len;
413    e->count++;
414    return 1;
415}
416
417static inline int
418enc_char(Encoder* e, char c)
419{
420    if(!enc_ensure(e, 1)) {
421        return 0;
422    }
423
424    e->p[e->i++] = c;
425    return 1;
426}
427
428static int
429enc_shift(Encoder* e) {
430    int i;
431    char* shift;
432    assert(e->shiftcnt >= 0 && "Invalid shift count.");
433    shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
434
435    if(!enc_literal(e, shift + 1, *shift))
436        return 0;
437
438    // Finish the rest of this shift it's it bigger than
439    // our largest predefined constant.
440    for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
441        if(!enc_literal(e, "  ", 2))
442            return 0;
443    }
444
445    return 1;
446}
447
448static inline int
449enc_start_object(Encoder* e)
450{
451    e->count++;
452    e->shiftcnt++;
453    if(!enc_char(e, '{'))
454        return 0;
455    MAYBE_PRETTY(e);
456    return 1;
457}
458
459static inline int
460enc_end_object(Encoder* e)
461{
462    e->shiftcnt--;
463    MAYBE_PRETTY(e);
464    return enc_char(e, '}');
465}
466
467static inline int
468enc_start_array(Encoder* e)
469{
470    e->count++;
471    e->shiftcnt++;
472    if(!enc_char(e, '['))
473        return 0;
474    MAYBE_PRETTY(e);
475    return 1;
476}
477
478static inline int
479enc_end_array(Encoder* e)
480{
481    e->shiftcnt--;
482    MAYBE_PRETTY(e);
483    return enc_char(e, ']');
484}
485
486static inline int
487enc_colon(Encoder* e)
488{
489    if(e->pretty)
490        return enc_literal(e, " : ", 3);
491    return enc_char(e, ':');
492}
493
494static inline int
495enc_comma(Encoder* e)
496{
497    if(!enc_char(e, ','))
498        return 0;
499    MAYBE_PRETTY(e);
500    return 1;
501}
502
503#if MAP_TYPE_PRESENT
504int
505enc_map_to_ejson(ErlNifEnv* env, ERL_NIF_TERM map, ERL_NIF_TERM* out)
506{
507    ErlNifMapIterator iter;
508    size_t size;
509
510    ERL_NIF_TERM list;
511    ERL_NIF_TERM tuple;
512    ERL_NIF_TERM key;
513    ERL_NIF_TERM val;
514
515    if(!enif_get_map_size(env, map, &size)) {
516        fprintf(stderr, "bad map size\r\n");
517        return 0;
518    }
519
520    list = enif_make_list(env, 0);
521
522    if(size == 0) {
523        *out = enif_make_tuple1(env, list);
524        return 1;
525    }
526
527    if(!enif_map_iterator_create(env, map, &iter, ERL_NIF_MAP_ITERATOR_HEAD)) {
528        fprintf(stderr, "bad iterator create\r\n");
529        return 0;
530    }
531
532    do {
533        if(!enif_map_iterator_get_pair(env, &iter, &key, &val)) {
534            fprintf(stderr, "bad get pair\r\n");
535            return 0;
536        }
537        tuple = enif_make_tuple2(env, key, val);
538        list = enif_make_list_cell(env, tuple, list);
539    } while(enif_map_iterator_next(env, &iter));
540
541    *out = enif_make_tuple1(env, list);
542    return 1;
543}
544#endif
545
546ERL_NIF_TERM
547encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
548{
549    jiffy_st* st = (jiffy_st*) enif_priv_data(env);
550    Encoder* e;
551
552    ERL_NIF_TERM opts;
553    ERL_NIF_TERM val;
554    ERL_NIF_TERM tmp_argv[3];
555
556    if(argc != 2) {
557        return enif_make_badarg(env);
558    }
559
560    e = enc_new(env);
561    if(e == NULL) {
562        return make_error(st, env, "internal_error");
563    }
564
565    tmp_argv[0] = enif_make_resource(env, e);
566    tmp_argv[1] = enif_make_list(env, 1, argv[0]);
567    tmp_argv[2] = enif_make_list(env, 0);
568
569    enif_release_resource(e);
570
571    opts = argv[1];
572    if(!enif_is_list(env, opts)) {
573        return enif_make_badarg(env);
574    }
575
576    while(enif_get_list_cell(env, opts, &val, &opts)) {
577        if(enif_compare(val, e->atoms->atom_uescape) == 0) {
578            e->uescape = 1;
579        } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
580            e->pretty = 1;
581        } else if(enif_compare(val, e->atoms->atom_force_utf8) == 0) {
582            // Ignore, handled in Erlang
583        } else if(get_bytes_per_iter(env, val, &(e->bytes_per_iter))) {
584            continue;
585        } else {
586            return enif_make_badarg(env);
587        }
588    }
589
590    return encode_iter(env, 3, tmp_argv);
591}
592
593ERL_NIF_TERM
594encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
595{
596    Encoder* e;
597    jiffy_st* st = (jiffy_st*) enif_priv_data(env);
598
599    ERL_NIF_TERM ret = 0;
600
601    ERL_NIF_TERM stack;
602    ERL_NIF_TERM curr;
603    ERL_NIF_TERM item;
604    const ERL_NIF_TERM* tuple;
605    int arity;
606    ErlNifSInt64 lval;
607    double dval;
608
609    size_t start;
610    size_t processed;
611
612    if(argc != 3) {
613        return enif_make_badarg(env);
614    } else if(!enif_get_resource(env, argv[0], st->res_enc, (void**) &e)) {
615        return enif_make_badarg(env);
616    } else if(!enif_is_list(env, argv[1])) {
617        return enif_make_badarg(env);
618    } else if(!enif_is_list(env, argv[2])) {
619        return enif_make_badarg(env);
620    }
621
622    if(!enc_init(e, env)) {
623        return enif_make_badarg(env);
624    }
625
626    stack = argv[1];
627    e->iolist = argv[2];
628
629    start = e->iosize + e->i;
630
631    while(!enif_is_empty_list(env, stack)) {
632
633        processed = (e->iosize + e->i) - start;
634        if(should_yield(processed, e->bytes_per_iter)) {
635            consume_timeslice(env, processed, e->bytes_per_iter);
636            return enif_make_tuple4(
637                    env,
638                    st->atom_iter,
639                    argv[0],
640                    stack,
641                    e->iolist
642                );
643        }
644
645        if(!enif_get_list_cell(env, stack, &curr, &stack)) {
646            ret = enc_error(e, "internal_error");
647            goto done;
648        }
649        if(enif_is_identical(curr, e->atoms->ref_object)) {
650            if(!enif_get_list_cell(env, stack, &curr, &stack)) {
651                ret = enc_error(e, "internal_error");
652                goto done;
653            }
654            if(enif_is_empty_list(env, curr)) {
655                if(!enc_end_object(e)) {
656                    ret = enc_error(e, "internal_error");
657                    goto done;
658                }
659                continue;
660            }
661            if(!enif_get_list_cell(env, curr, &item, &curr)) {
662                ret = enc_error(e, "internal_error");
663                goto done;
664            }
665            if(!enif_get_tuple(env, item, &arity, &tuple)) {
666                ret = enc_error(e, "invalid_object_pair");
667                goto done;
668            }
669            if(arity != 2) {
670                ret = enc_error(e, "invalid_object_pair");
671                goto done;
672            }
673            if(!enc_comma(e)) {
674                ret = enc_error(e, "internal_error");
675                goto done;
676            }
677            if(!enc_string(e, tuple[0])) {
678                ret = enc_error(e, "invalid_object_key");
679                goto done;
680            }
681            if(!enc_colon(e)) {
682                ret = enc_error(e, "internal_error");
683                goto done;
684            }
685            stack = enif_make_list_cell(env, curr, stack);
686            stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
687            stack = enif_make_list_cell(env, tuple[1], stack);
688        } else if(enif_is_identical(curr, e->atoms->ref_array)) {
689            if(!enif_get_list_cell(env, stack, &curr, &stack)) {
690                ret = enc_error(e, "internal_error");
691                goto done;
692            }
693            if(enif_is_empty_list(env, curr)) {
694                if(!enc_end_array(e)) {
695                    ret = enc_error(e, "internal_error");
696                    goto done;
697                }
698                continue;
699            }
700            if(!enc_comma(e)) {
701                ret = enc_error(e, "internal_error");
702                goto done;
703            }
704            if(!enif_get_list_cell(env, curr, &item, &curr)) {
705                ret = enc_error(e, "internal_error");
706                goto done;
707            }
708            stack = enif_make_list_cell(env, curr, stack);
709            stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
710            stack = enif_make_list_cell(env, item, stack);
711        } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
712            if(!enc_literal(e, "null", 4)) {
713                ret = enc_error(e, "null");
714                goto done;
715            }
716        } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
717            if(!enc_literal(e, "true", 4)) {
718                ret = enc_error(e, "true");
719                goto done;
720            }
721        } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
722            if(!enc_literal(e, "false", 5)) {
723                ret = enc_error(e, "false");
724                goto done;
725            }
726        } else if(enif_is_binary(env, curr)) {
727            if(!enc_string(e, curr)) {
728                ret = enc_error(e, "invalid_string");
729                goto done;
730            }
731        } else if(enif_is_atom(env, curr)) {
732            if(!enc_string(e, curr)) {
733                ret = enc_error(e, "invalid_string");
734                goto done;
735            }
736        } else if(enif_get_int64(env, curr, &lval)) {
737            if(!enc_long(e, lval)) {
738                ret = enc_error(e, "internal_error");
739                goto done;
740            }
741        } else if(enif_get_double(env, curr, &dval)) {
742            if(!enc_double(e, dval)) {
743                ret = enc_error(e, "internal_error");
744                goto done;
745            }
746        } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
747            if(arity != 1) {
748                ret = enc_error(e, "invalid_ejson");
749                goto done;
750            }
751            if(!enif_is_list(env, tuple[0])) {
752                ret = enc_error(e, "invalid_object");
753                goto done;
754            }
755            if(!enc_start_object(e)) {
756                ret = enc_error(e, "internal_error");
757                goto done;
758            }
759            if(enif_is_empty_list(env, tuple[0])) {
760                if(!enc_end_object(e)) {
761                    ret = enc_error(e, "internal_error");
762                    goto done;
763                }
764                continue;
765            }
766            if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
767                ret = enc_error(e, "internal_error");
768                goto done;
769            }
770            if(!enif_get_tuple(env, item, &arity, &tuple)) {
771                ret = enc_error(e, "invalid_object_member");
772                goto done;
773            }
774            if(arity != 2) {
775                ret = enc_error(e, "invalid_object_member_arity");
776                goto done;
777            }
778            if(!enc_string(e, tuple[0])) {
779                ret = enc_error(e, "invalid_object_member_key");
780                goto done;
781            }
782            if(!enc_colon(e)) {
783                ret = enc_error(e, "internal_error");
784                goto done;
785            }
786            stack = enif_make_list_cell(env, curr, stack);
787            stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
788            stack = enif_make_list_cell(env, tuple[1], stack);
789#if MAP_TYPE_PRESENT
790        } else if(enif_is_map(env, curr)) {
791            if(!enc_map_to_ejson(env, curr, &curr)) {
792                ret = enc_error(e, "internal_error");
793                goto done;
794            }
795            stack = enif_make_list_cell(env, curr, stack);
796#endif
797        } else if(enif_is_list(env, curr)) {
798            if(!enc_start_array(e)) {
799                ret = enc_error(e, "internal_error");
800                goto done;
801            }
802            if(enif_is_empty_list(env, curr)) {
803                if(!enc_end_array(e)) {
804                    ret = enc_error(e, "internal_error");
805                    goto done;
806                }
807                continue;
808            }
809            if(!enif_get_list_cell(env, curr, &item, &curr)) {
810                ret = enc_error(e, "internal_error");
811                goto done;
812            }
813            stack = enif_make_list_cell(env, curr, stack);
814            stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
815            stack = enif_make_list_cell(env, item, stack);
816        } else {
817            if(!enc_unknown(e, curr)) {
818                ret = enc_error(e, "internal_error");
819                goto done;
820            }
821        }
822    }
823
824    if(!enc_done(e, &item)) {
825        ret = enc_error(e, "internal_error");
826        goto done;
827    }
828
829    if(e->iolen == 0) {
830        ret = item;
831    } else {
832        ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
833    }
834
835done:
836    processed = (e->iosize + e->i) - start;
837    consume_timeslice(env, processed, e->bytes_per_iter);
838
839    return ret;
840}