PageRenderTime 281ms CodeModel.GetById 61ms app.highlight 182ms RepoModel.GetById 29ms app.codeStats 0ms

/mordor/string.cpp

http://github.com/mozy/mordor
C++ | 854 lines | 754 code | 88 blank | 12 comment | 161 complexity | 709add1edbe3dac740180b2e2b727250 MD5 | raw file
  1// Copyright (c) 2009 - Mozy, Inc.
  2
  3#include <algorithm>
  4
  5#include <string.h>
  6
  7#include <openssl/md5.h>
  8#include <openssl/sha.h>
  9
 10#ifdef HAVE_CONFIG_H
 11#include "autoconfig.h"
 12#ifdef HAVE_ICONV
 13#include <iconv.h>
 14#endif
 15#endif
 16
 17#include "mordor/string.h"
 18#include "mordor/util.h"
 19
 20#include "assert.h"
 21#include "exception.h"
 22
 23#ifdef MSVC
 24#pragma comment(lib, "libeay32")
 25#endif
 26
 27namespace Mordor {
 28
 29std::string
 30base64decode(const std::string &src)
 31{
 32    std::string result;
 33    result.resize(src.size() * 3 / 4);
 34    char *writeBuf = &result[0];
 35
 36    const char* ptr = src.c_str();
 37    const char* end = ptr + src.size();
 38
 39    while(ptr < end) {
 40        int i = 0;
 41        int padding = 0;
 42        int packed = 0;
 43        for(; i < 4 && ptr < end; ++i, ++ptr) {
 44            if(*ptr == '=') {
 45                ++padding;
 46                packed <<= 6;
 47                continue;
 48            }
 49
 50            // padding with "=" only
 51            if (padding > 0)
 52                return "";
 53
 54            int val = 0;
 55            if(*ptr >= 'A' && *ptr <= 'Z')
 56                val = *ptr - 'A';
 57            else if(*ptr >= 'a' && *ptr <= 'z')
 58                val = *ptr - 'a' + 26;
 59            else if(*ptr >= '0' && *ptr <= '9')
 60                val = *ptr - '0' + 52;
 61            else if(*ptr == '+')
 62                val = 62;
 63            else if(*ptr == '/')
 64                val = 63;
 65            else
 66                return ""; // invalid character
 67
 68            packed = (packed << 6) | val;
 69        }
 70        if (i != 4)
 71            return "";
 72        if (padding > 0 && ptr != end)
 73            return "";
 74        if (padding > 2)
 75            return "";
 76
 77        *writeBuf++ = (char)((packed >> 16) & 0xff);
 78        if(padding != 2)
 79            *writeBuf++ = (char)((packed >> 8) & 0xff);
 80        if(padding == 0)
 81            *writeBuf++ = (char)(packed & 0xff);
 82    }
 83
 84    result.resize(writeBuf - result.c_str());
 85    return result;
 86}
 87
 88std::string
 89base64encode(const std::string& data)
 90{
 91    return base64encode(data.c_str(), data.size());
 92}
 93
 94std::string
 95base64encode(const void* data, size_t len)
 96{
 97    const char* base64 =
 98        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 99
100    std::string ret;
101    ret.reserve(len * 4 / 3 + 2);
102
103    const unsigned char* ptr = (const unsigned char*)data;
104    const unsigned char* end = ptr + len;
105
106    while(ptr < end) {
107        unsigned int packed = 0;
108        int i = 0;
109        int padding = 0;
110        for(; i < 3 && ptr < end; ++i, ++ptr)
111            packed = (packed << 8) | *ptr;
112        if(i == 2)
113            padding = 1;
114        else if (i == 1)
115            padding = 2;
116        for(; i < 3; ++i)
117            packed <<= 8;
118
119        ret.append(1, base64[packed >> 18]);
120        ret.append(1, base64[(packed >> 12) & 0x3f]);
121        if(padding != 2)
122            ret.append(1, base64[(packed >> 6) & 0x3f]);
123        if(padding == 0)
124            ret.append(1, base64[packed & 0x3f]);
125        ret.append(padding, '=');
126    }
127
128    return ret;
129}
130
131std::string
132md5(const std::string &data)
133{
134    return hexstringFromData(md5sum(data).c_str(), MD5_DIGEST_LENGTH);
135}
136
137std::string
138sha1(const std::string &data)
139{
140    return hexstringFromData(sha1sum(data).c_str(), SHA_DIGEST_LENGTH);
141}
142
143std::string
144md5sum(const void *data, size_t len)
145{
146    MD5_CTX ctx;
147    MD5_Init(&ctx);
148    MD5_Update(&ctx, data, len);
149    std::string result;
150    result.resize(MD5_DIGEST_LENGTH);
151    MD5_Final((unsigned char*)&result[0], &ctx);
152    return result;
153}
154
155std::string
156md5sum(const std::string &data)
157{
158    return md5sum(data.c_str(), data.size());
159}
160
161std::string
162sha0sum(const void *data, size_t len)
163{
164    SHA_CTX ctx;
165    SHA_Init(&ctx);
166    SHA_Update(&ctx, data, len);
167    std::string result;
168    result.resize(SHA_DIGEST_LENGTH);
169    SHA_Final((unsigned char*)&result[0], &ctx);
170    return result;
171}
172
173std::string
174sha0sum(const std::string & data)
175{
176    return sha0sum(data.c_str(), data.length());
177}
178
179std::string
180sha1sum(const void *data, size_t len)
181{
182    SHA_CTX ctx;
183    SHA1_Init(&ctx);
184    SHA1_Update(&ctx, data, len);
185    std::string result;
186    result.resize(SHA_DIGEST_LENGTH);
187    SHA1_Final((unsigned char*)&result[0], &ctx);
188    return result;
189}
190
191std::string
192sha1sum(const std::string &data)
193{
194    return sha1sum(data.c_str(), data.size());
195}
196
197struct xorStruct
198{
199    xorStruct(char value) : m_value(value) {}
200    char m_value;
201    char operator()(char in) const { return in ^ m_value; }
202};
203
204template <class CTX,
205    int (*Init)(CTX *),
206    int (*Update)(CTX *, const void *, size_t),
207    int (*Final)(unsigned char *, CTX *),
208    unsigned int B, unsigned int L>
209std::string
210hmac(const std::string &text, const std::string &key)
211{
212    std::string keyLocal = key;
213    CTX ctx;
214    if (keyLocal.size() > B) {
215        Init(&ctx);
216        Update(&ctx, keyLocal.c_str(), keyLocal.size());
217        keyLocal.resize(L);
218        Final((unsigned char *)&keyLocal[0], &ctx);
219    }
220    keyLocal.append(B - keyLocal.size(), '\0');
221    std::string ipad = keyLocal, opad = keyLocal;
222    std::transform(ipad.begin(), ipad.end(), ipad.begin(), xorStruct(0x36));
223    std::transform(opad.begin(), opad.end(), opad.begin(), xorStruct(0x5c));
224    Init(&ctx);
225    Update(&ctx, ipad.c_str(), B);
226    Update(&ctx, text.c_str(), text.size());
227    std::string result;
228    result.resize(L);
229    Final((unsigned char *)&result[0], &ctx);
230    Init(&ctx);
231    Update(&ctx, opad.c_str(), B);
232    Update(&ctx, result.c_str(), L);
233    Final((unsigned char *)&result[0], &ctx);
234    return result;
235}
236
237std::string
238hmacMd5(const std::string &text, const std::string &key)
239{
240    return hmac<MD5_CTX,
241        &MD5_Init,
242        &MD5_Update,
243        &MD5_Final,
244        MD5_CBLOCK, MD5_DIGEST_LENGTH>
245        (text, key);
246}
247
248std::string
249hmacSha1(const std::string &text, const std::string &key)
250{
251    return hmac<SHA_CTX,
252        &SHA1_Init,
253        &SHA1_Update,
254        &SHA1_Final,
255        SHA_CBLOCK, SHA_DIGEST_LENGTH>
256        (text, key);
257}
258
259std::string
260hmacSha256(const std::string &text, const std::string &key)
261{
262    return hmac<SHA256_CTX,
263        &SHA256_Init,
264        &SHA256_Update,
265        &SHA256_Final,
266        SHA256_CBLOCK, SHA256_DIGEST_LENGTH>
267        (text, key);
268}
269
270void
271hexstringFromData(const void *data, size_t len, char *output)
272{
273    const unsigned char *buf = (const unsigned char *)data;
274    size_t i, j;
275    for (i = j = 0; i < len; ++i) {
276        char c;
277        c = (buf[i] >> 4) & 0xf;
278        c = (c > 9) ? c + 'a' - 10 : c + '0';
279        output[j++] = c;
280        c = (buf[i] & 0xf);
281        c = (c > 9) ? c + 'a' - 10 : c + '0';
282        output[j++] = c;
283    }
284}
285
286std::string
287hexstringFromData(const void *data, size_t len)
288{
289    if (len == 0)
290        return std::string();
291    std::string result;
292    result.resize(len * 2);
293    hexstringFromData(data, len, &result[0]);
294    return result;
295}
296
297std::string
298hexstringFromData(const std::string &data)
299{
300    return hexstringFromData(data.c_str(), data.size());
301}
302
303void
304dataFromHexstring(const char *hexstring, size_t length, void *output)
305{
306    unsigned char *buf = (unsigned char *)output;
307    unsigned char byte;
308    if (length % 2 != 0)
309        MORDOR_THROW_EXCEPTION(std::invalid_argument("length"));
310    for (size_t i = 0; i < length; ++i) {
311        switch (hexstring[i]) {
312            case 'a':
313            case 'b':
314            case 'c':
315            case 'd':
316            case 'e':
317            case 'f':
318                byte = (hexstring[i] - 'a' + 10) << 4;
319                break;
320            case 'A':
321            case 'B':
322            case 'C':
323            case 'D':
324            case 'E':
325            case 'F':
326                byte = (hexstring[i] - 'A' + 10) << 4;
327                break;
328            case '0':
329            case '1':
330            case '2':
331            case '3':
332            case '4':
333            case '5':
334            case '6':
335            case '7':
336            case '8':
337            case '9':
338                byte = (hexstring[i] - '0') << 4;
339                break;
340            default:
341                MORDOR_THROW_EXCEPTION(std::invalid_argument("hexstring"));
342        }
343        ++i;
344        switch (hexstring[i]) {
345            case 'a':
346            case 'b':
347            case 'c':
348            case 'd':
349            case 'e':
350            case 'f':
351                byte |= hexstring[i] - 'a' + 10;
352                break;
353            case 'A':
354            case 'B':
355            case 'C':
356            case 'D':
357            case 'E':
358            case 'F':
359                byte |= hexstring[i] - 'A' + 10;
360                break;
361            case '0':
362            case '1':
363            case '2':
364            case '3':
365            case '4':
366            case '5':
367            case '6':
368            case '7':
369            case '8':
370            case '9':
371                byte |= hexstring[i] - '0';
372                break;
373            default:
374                MORDOR_THROW_EXCEPTION(std::invalid_argument("hexstring"));
375        }
376        *buf++ = byte;
377    }
378}
379
380std::string
381dataFromHexstring(const char *hexstring, size_t length)
382{
383    if (length % 2 != 0)
384        MORDOR_THROW_EXCEPTION(std::invalid_argument("length"));
385    if (length == 0)
386        return std::string();
387    std::string result;
388    result.resize(length / 2);
389    dataFromHexstring(hexstring, length, &result[0]);
390    return result;
391}
392
393std::string
394dataFromHexstring(const std::string &hexstring)
395{
396    return dataFromHexstring(hexstring.c_str(), hexstring.size());
397}
398
399void
400replace(std::string &str, char find, char replaceWith)
401{
402    size_t index = str.find(find);
403    while (index != std::string::npos) {
404        str[index] = replaceWith;
405        index = str.find(find, index + 1);
406    }
407}
408
409void
410replace(std::string &str, char find, const std::string &replaceWith)
411{
412    size_t index = str.find(find);
413    while (index != std::string::npos) {
414        str = str.substr(0, index) + replaceWith + str.substr(index + 1);
415        index = str.find(find, index + replaceWith.size());
416    }
417}
418
419void
420replace(std::string &str, const std::string &find, const std::string &replaceWith)
421{
422    size_t index = str.find(find);
423    while (index != std::string::npos) {
424        str = str.substr(0, index) + replaceWith + str.substr(index + find.size());
425        index = str.find(find, index + replaceWith.size());
426    }
427}
428
429std::vector<std::string>
430split(const std::string &str, char delim, size_t max)
431{
432    MORDOR_ASSERT(max > 1);
433    std::vector<std::string> result;
434    if (str.empty())
435        return result;
436
437    size_t last = 0;
438    size_t pos = str.find(delim);
439    while (pos != std::string::npos) {
440        result.push_back(str.substr(last, pos - last));
441        last = pos + 1;
442        if (--max == 1)
443            break;
444        pos = str.find(delim, last);
445    }
446    result.push_back(str.substr(last));
447    return result;
448}
449
450std::vector<std::string>
451split(const std::string &str, const char *delims, size_t max)
452{
453    MORDOR_ASSERT(max > 1);
454    std::vector<std::string> result;
455    if (str.empty())
456        return result;
457
458    size_t last = 0;
459    size_t pos = str.find_first_of(delims);
460    while (pos != std::string::npos) {
461        result.push_back(str.substr(last, pos - last));
462        last = pos + 1;
463        if (--max == 1)
464            break;
465        pos = str.find_first_of(delims, last);
466    }
467    result.push_back(str.substr(last));
468    return result;
469}
470
471static bool endsWith(const std::string &string, const std::string &suffix)
472{
473    return string.size() >= suffix.size() &&
474        strnicmp(string.c_str() + string.size() - suffix.size(),
475            suffix.c_str(), suffix.size()) == 0;
476}
477
478namespace {
479struct Suffix
480{
481    std::string suffix;
482    unsigned long long multiplier;
483};
484}
485
486unsigned long long stringToMicroseconds(const std::string &string)
487{
488    static const Suffix suffixes[] = {
489        { "microseconds", 1ull },
490        { "us", 1ull },
491        { "milliseconds", 1000ull },
492        { "ms", 1000ull },
493        { "seconds", 1000000ull },
494        { "minutes", 60 * 1000000ull },
495        { "m", 60 * 1000000ull },
496        { "hours", 60 * 60 * 1000000ull },
497        { "h", 60 * 60 * 1000000ull },
498        { "days", 24 * 60 * 60 * 1000000ull },
499        { "d", 24 * 60 * 60 * 1000000ull },
500        // s needs to go at the bottom since we're just suffix matching, and it
501        // would give a false positive for "minutes", etc.
502        { "s", 1000000ull }
503    };
504
505    std::string copy(string);
506    unsigned long long multiplier = 1ull;
507
508    // Strip leading whitespace
509    while (copy.size() > 1 && copy[0] == ' ')
510        copy = copy.substr(1);
511    // Strip trailing whitespace
512    while (copy.size() > 1 && copy[copy.size() -1] == ' ')
513        copy.resize(copy.size() - 1);
514
515    for (size_t i = 0; i < sizeof(suffixes)/sizeof(suffixes[0]); ++i) {
516        if (endsWith(copy, suffixes[i].suffix)) {
517            multiplier = suffixes[i].multiplier;
518            copy.resize(copy.size() - suffixes[i].suffix.size());
519            break;
520        }
521    }
522
523    // Strip whitespace between the number and the units
524    while (copy.size() > 1 && copy[copy.size() -1] == ' ')
525        copy.resize(copy.size() - 1);
526
527    // If there's a decimal point, use floating point arithmetic
528    if (copy.find('.') != std::string::npos)
529        return (unsigned long long)(multiplier *
530            boost::lexical_cast<double>(copy));
531    else
532        return multiplier * boost::lexical_cast<unsigned long long>(copy);
533}
534
535#ifdef WINDOWS
536static DWORD g_wcFlags = WC_ERR_INVALID_CHARS;
537static DWORD g_mbFlags = MB_ERR_INVALID_CHARS;
538
539std::string
540toUtf8(const utf16char *str, size_t len)
541{
542    if (len == (size_t)~0)
543        len = wcslen(str);
544    MORDOR_ASSERT(len < 0x80000000u);
545    std::string result;
546    if (len == 0)
547        return result;
548    int ret = WideCharToMultiByte(CP_UTF8, g_wcFlags, str, (int)len, NULL, 0, NULL, NULL);
549    MORDOR_ASSERT(ret >= 0);
550    if (ret == 0) {
551        if (lastError() == ERROR_INVALID_FLAGS) {
552            g_wcFlags = 0;
553            ret = WideCharToMultiByte(CP_UTF8, g_wcFlags, str, (int)len, NULL, 0, NULL, NULL);
554            MORDOR_ASSERT(ret >= 0);
555        }
556        if (ret == 0)
557            MORDOR_THROW_EXCEPTION_FROM_LAST_ERROR_API("WideCharToMultiByte");
558    }
559    result.resize(ret);
560    ret = WideCharToMultiByte(CP_UTF8, g_wcFlags, str, (int)len, &result[0], ret, NULL, NULL);
561    MORDOR_ASSERT(ret >= 0);
562    if (ret == 0)
563        MORDOR_THROW_EXCEPTION_FROM_LAST_ERROR_API("WideCharToMultiByte");
564    MORDOR_ASSERT(ret == result.size());
565
566    return result;
567}
568
569std::string
570toUtf8(const std::wstring &str)
571{
572    MORDOR_ASSERT(str.size() < 0x80000000u);
573    return toUtf8(str.c_str(), str.size());
574}
575
576utf16string
577toUtf16(const char *str, size_t len)
578{
579    if (len == (size_t)~0)
580        len = strlen(str);
581    MORDOR_ASSERT(len < 0x80000000u);
582    utf16string result;
583    if (len == 0)
584        return result;
585    int ret = MultiByteToWideChar(CP_UTF8, g_mbFlags, str, (int)len, NULL, 0);
586    MORDOR_ASSERT(ret >= 0);
587    if (ret == 0) {
588        if (lastError() == ERROR_INVALID_FLAGS) {
589            g_mbFlags = 0;
590            ret = MultiByteToWideChar(CP_UTF8, g_mbFlags, str, (int)len, NULL, 0);
591            MORDOR_ASSERT(ret >= 0);
592        }
593        if (ret == 0)
594            MORDOR_THROW_EXCEPTION_FROM_LAST_ERROR_API("MultiByteToWideChar");
595    }
596    result.resize(ret);
597    ret = MultiByteToWideChar(CP_UTF8, g_mbFlags, str, (int)len, &result[0], ret);
598    if (ret == 0)
599        MORDOR_THROW_EXCEPTION_FROM_LAST_ERROR_API("MultiByteToWideChar");
600    MORDOR_ASSERT(ret == result.size());
601
602    return result;
603}
604
605utf16string
606toUtf16(const std::string &str)
607{
608    MORDOR_ASSERT(str.size() < 0x80000000u);
609    return toUtf16(str.c_str(), str.size());
610}
611#elif defined (OSX)
612
613std::string
614toUtf8(CFStringRef string)
615{
616    const char *bytes = CFStringGetCStringPtr(string, kCFStringEncodingUTF8);
617    if (bytes)
618        return bytes;
619    std::string result;
620    CFIndex length = CFStringGetLength(string);
621    // Include extra byte for null termination
622    length = CFStringGetMaximumSizeForEncoding(length, kCFStringEncodingUTF8) + 1;
623    result.resize(length);
624    if (!CFStringGetCString(string, &result[0], length, kCFStringEncodingUTF8)) {
625        MORDOR_NOTREACHED();
626    }
627    result.resize(strlen(result.c_str()));
628    return result;
629}
630
631utf16string
632toUtf16(const char * str, size_t length)
633{
634    utf16string result;
635    if (length == 0u)
636        return result;
637    ScopedCFRef<CFStringRef> cfUtf8Str = CFStringCreateWithBytesNoCopy(NULL,
638        (const UInt8 *)str, (CFIndex)length, kCFStringEncodingUTF8, false,
639        kCFAllocatorNull);
640    if (!cfUtf8Str)
641        MORDOR_THROW_EXCEPTION(InvalidUnicodeException());
642#if MORDOR_BYTE_ORDER == MORDOR_LITTLE_ENDIAN
643    ScopedCFRef<CFDataRef> cfUtf16Data = CFStringCreateExternalRepresentation(
644        NULL, cfUtf8Str, kCFStringEncodingUTF16LE, 0);
645#elif MORDOR_BYTE_ORDER == MORDOR_BIG_ENDIAN
646    ScopedCFRef<CFDataRef> cfUtf16Data = CFStringCreateExternalRepresentation(
647        NULL, cfUtf8Str, kCFStringEncodingUTF16BE, 0);
648#endif
649    MORDOR_ASSERT(cfUtf16Data);
650    MORDOR_ASSERT(CFDataGetLength(cfUtf16Data) % sizeof(utf16char) == 0);
651    result.resize(CFDataGetLength(cfUtf16Data) / sizeof(utf16char));
652    CFDataGetBytes(cfUtf16Data, CFRangeMake(0,CFDataGetLength(cfUtf16Data)),
653        (UInt8 *)&result[0]);
654    return result;
655}
656
657utf16string
658toUtf16(const std::string &str)
659{
660    return toUtf16(str.c_str(), str.size());
661}
662
663#elif defined(HAVE_ICONV)
664
665namespace {
666
667class Iconv {
668    iconv_t m_iconv;
669public:
670    Iconv(const char* from, const char* to)
671        : m_iconv(iconv_open(to, from))
672    {
673        MORDOR_ASSERT(m_iconv != (iconv_t)-1);
674    }
675    ~Iconv() {
676        iconv_close(m_iconv);
677    }
678    size_t operator()(char** inbuf, size_t* inlen, char** outbuf, size_t* outlen) {
679        return iconv(m_iconv, inbuf, inlen, outbuf, outlen);
680    }
681};
682}
683
684utf16string
685toUtf16(const char *str, size_t len)
686{
687    utf16string result;
688    if (len == 0u)
689        return result;
690    result.resize(len);        // way enough (paired surrogate also)
691    size_t out_left = len * sizeof(utf16string::value_type);
692    char *out_buf = (char *)&result[0];
693    Iconv conv("UTF-8", "UTF-16LE");
694    size_t n = conv((char **)&str, &len, &out_buf, &out_left);
695    if (n == (size_t)-1) {
696        MORDOR_ASSERT(errno != E2BIG);
697        MORDOR_THROW_EXCEPTION(InvalidUnicodeException());
698    }
699    MORDOR_ASSERT(out_left % sizeof(utf16string::value_type) == 0);
700    result.resize(result.size() - out_left/sizeof(utf16string::value_type));
701    return result;
702}
703
704utf16string
705toUtf16(const std::string &str)
706{
707    return toUtf16(str.data(), str.size());
708}
709
710#endif
711
712std::string
713toUtf8(utf16char character)
714{
715    return toUtf8((utf32char)character);
716}
717
718std::string
719toUtf8(utf32char character)
720{
721    MORDOR_ASSERT(character <= 0x10ffff);
722    std::string result;
723    if (character <= 0x7f) {
724        result.append(1, (char)character);
725    } else if (character <= 0x7ff) {
726        result.resize(2);
727        result[0] = 0xc0 | ((character >> 6) & 0x1f);
728        result[1] = 0x80 | (character & 0x3f);
729    } else if (character <= 0xffff) {
730        result.resize(3);
731        result[0] = 0xe0 | ((character >> 12) & 0xf);
732        result[1] = 0x80 | ((character >> 6) & 0x3f);
733        result[2] = 0x80 | (character & 0x3f);
734    } else {
735        result.resize(4);
736        result[0] = 0xf0 | ((character >> 18) & 0x7);
737        result[1] = 0x80 | ((character >> 12) & 0x3f);
738        result[2] = 0x80 | ((character >> 6) & 0x3f);
739        result[3] = 0x80 | (character & 0x3f);
740    }
741    return result;
742}
743
744utf32char
745toUtf32(utf16char highSurrogate, utf16char lowSurrogate)
746{
747    MORDOR_ASSERT(isHighSurrogate(highSurrogate));
748    MORDOR_ASSERT(isLowSurrogate(lowSurrogate));
749    return ((((utf32char)highSurrogate - 0xd800) << 10) | ((utf32char)lowSurrogate - 0xdc00)) + 0x10000;
750}
751
752std::string
753toUtf8(utf16char highSurrogate, utf16char lowSurrogate)
754{
755    return toUtf8(toUtf32(highSurrogate, lowSurrogate));
756}
757
758bool isHighSurrogate(utf16char character)
759{
760    return character >= 0xd800 && character <= 0xdbff;
761}
762
763bool isLowSurrogate(utf16char character)
764{
765    return character >= 0xdc00 && character <= 0xdfff;
766}
767
768// following content is nearly copied from glib completely.
769// get more info, please refer to https://git.gnome.org/browse/glib/tree/glib/gutf8.c,
770// as well as http://en.wikipedia.org/wiki/UTF-8
771typedef unsigned char guchar;
772
773bool
774validateUtf8(const std::string &str)
775{
776    unsigned int val = 0;
777    unsigned int min = 0;
778    const char *begin = str.data();
779    const size_t len = str.size();
780
781#define CONTINUATION_CHAR                           \
782    do {                                     \
783        if ((*(guchar *)p & 0xc0) != 0x80) /* 10xxxxxx */ \
784        return (false);                   \
785        val <<= 6;                                        \
786        val |= (*(guchar *)p) & 0x3f;                     \
787    } while(0)
788
789#define CONTINUATION_CHARS(Count) \
790    for(int i = 0; i < Count; i++) {\
791        pos++; \
792        if (pos >= len) \
793        return false; \
794        p++; \
795        CONTINUATION_CHAR; \
796    }
797
798    size_t pos = 0;
799    for (const char *p = begin; pos < len; pos++, p = begin + pos) {
800        if (*(guchar *)p < 128)
801          /* done */;
802        else {
803            if ((*(guchar *)p & 0xe0) == 0xc0) /* 110xxxxx */ {
804                if ((*(guchar *)p & 0x1e) == 0)
805                  return false;
806                pos++;
807                if (pos >= len)
808                  return false;
809                p++;
810                if ((*(guchar *)p & 0xc0) != 0x80) /* 10xxxxxx */
811                  return false;
812            } else {
813                if ((*(guchar *)p & 0xf0) == 0xe0) /* 1110xxxx */ {
814                    min = (1 << 11);
815                    val = *(guchar *)p & 0x0f;
816                    CONTINUATION_CHARS(2);
817                } else if ((*(guchar *)p & 0xf8) == 0xf0) /* 11110xxx */ {
818                    min = (1 << 16);
819                    val = *(guchar *)p & 0x07;
820                    CONTINUATION_CHARS(3);
821                } else if ((*(guchar *)p & 0xfc) == 0xf8) /* 111110xx */ {
822                    min = (1 << 21);
823                    val = *(guchar *)p & 0x03;
824                    CONTINUATION_CHARS(4);
825                } else if ((*(guchar *)p & 0xfe) == 0xfc) /* 1111110x */ {
826                    min = (1 << 26);
827                    val = *(guchar *)p & 0x01;
828                    CONTINUATION_CHARS(5);
829                } else
830                  return false;
831
832                if (val < min)
833                  return false;
834            }
835        }
836    }
837    return true;
838}
839
840bool
841caseinsensitiveless::operator ()(const std::string &lhs, const std::string &rhs) const
842{
843    return stricmp(lhs.c_str(), rhs.c_str()) < 0;
844}
845
846std::ostream &operator <<(std::ostream &os, const charslice &slice)
847{
848    for (size_t i = 0; i < slice.m_len; ++i) {
849        os.put(slice.m_slice[i]);
850    }
851    return os;
852}
853
854}