/fctypes.cpp
C++ | 2621 lines | 2290 code | 85 blank | 246 comment | 214 complexity | b041340853aa66ac00698eb8c56868e9 MD5 | raw file
Possible License(s): Apache-2.0
- #include "gb-include.h"
- #include "Loop.h"
- #include "Entities.h"
- #include "UCWordIterator.h"
- #include "SafeBuf.h"
- #include "Xml.h"
- #include "XmlNode.h"
- #include "iana_charset.h"
- static bool g_clockInSync = false;
- bool g_clockNeedsUpdate = true;
- bool isClockInSync() {
- if ( g_hostdb.m_initialized && g_hostdb.m_hostId == 0 ) return true;
- return g_clockInSync;
- }
- bool print96 ( char *k ) {
- key_t *kp = (key_t *)k;
- printf("n1=0x%"XINT32" n0=0x%"XINT64"\n",(int32_t)kp->n1,(int64_t)kp->n0);
- return true;
- }
- bool print96 ( key_t *kp ) {
- printf("n1=0x%"XINT32" n0=0x%"XINT64"\n",(int32_t)kp->n1,(int64_t)kp->n0);
- return true;
- }
- bool print128 ( char *k ) {
- key128_t *kp = (key128_t *)k;
- printf("n1=0x%"XINT64" n0=0x%"XINT64"\n",(int64_t)kp->n1,(int64_t)kp->n0);
- return true;
- }
- bool print128 ( key128_t *kp ) {
- printf("n1=0x%"XINT64" n0=0x%"XINT64"\n",(int64_t)kp->n1,(int64_t)kp->n0);
- return true;
- }
- // . put all the maps here now
- // . convert "c" to lower case
- const unsigned char g_map_to_lower[] = {
- 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ,
- 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
- 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ,
- 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
- 32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 ,
- 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
- 48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 ,
- 56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
- 64 , 'a','b' ,'c' ,'d' ,'e' ,'f' ,'g' ,
- 'h', 'i','j' ,'k' ,'l' ,'m' ,'n' ,'o' ,
- 'p', 'q','r' ,'s' ,'t' ,'u' ,'v' ,'w' ,
- 'x', 'y','z' , 91 , 92 ,93 ,94 ,95 ,
- 96 , 'a','b' ,'c' ,'d' ,'e' ,'f' ,'g' ,
- 'h', 'i','j' ,'k' ,'l' ,'m' ,'n' ,'o' ,
- 'p', 'q','r' ,'s' ,'t' ,'u' ,'v' ,'w' ,
- 'x', 'y','z' ,123 ,124 ,125 ,126 ,127 ,
- 128,129,130,131,132,133,134,135,
- 136,137,138,139,140,141,142,143,
- 144,145,146,147,148,149,150,151,
- 152,153,154,155,156,157,158,159,
- 160,161,162,163,164,165,166,167,
- 168,169,170,171,172,173,174,175,
- 176,177,178,179,180,181,182,183,
- 184,185,186,187,188,189,190,191,
- 224,225,226,227,228,229,230,231,
- 232,233,234,235,236,237,238,239,
- 240,241,242,243,244,245,246,247,
- 248,249,250,251,252,253,254,223,
- 224,225,226,227,228,229,230,231,
- 232,233,234,235,236,237,238,239,
- 240,241,242,243,244,245,246,247,
- 248,249,250,251,252,253,254,255
- };
- // converts ascii chars and IS_O chars to their lower case versions
- const unsigned char g_map_to_upper[] = {
- 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ,
- 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
- 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ,
- 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
- 32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 ,
- 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
- 48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 ,
- 56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
- 64 , 'A','B' ,'C' ,'D' ,'E' ,'F' ,'G' ,
- 'H', 'I','J' ,'K' ,'L' ,'M' ,'N' ,'O' ,
- 'P', 'Q','R' ,'S' ,'T' ,'U' ,'V' ,'W' ,
- 'X', 'Y','Z' , 91 , 92 ,93 ,94 ,95 ,
- 96 , 'A','B' ,'C' ,'D' ,'E' ,'F' ,'G' ,
- 'H', 'I','J' ,'K' ,'L' ,'M' ,'N' ,'O' ,
- 'P', 'Q','R' ,'S' ,'T' ,'U' ,'V' ,'W' ,
- 'X', 'Y','Z' ,123 ,124 ,125 ,126 ,127 ,
- 128,129,130,131,132,133,134,135,
- 136,137,138,139,140,141,142,143,
- 144,145,146,147,148,149,150,151,
- 152,153,154,155,156,157,158,159,
- 160,161,162,163,164,165,166,167,
- 168,169,170,171,172,173,174,175,
- 176,177,178,179,180,181,182,183,
- 184,185,186,187,188,189,190,191,
- 192,193,194,195,196,197,198,199,
- 200,201,202,203,204,205,206,207,
- 208,209,210,211,212,213,214,215,
- 216,217,218,219,220,221,222,223,
- 192,193,194,195,196,197,198,199,
- 200,201,202,203,204,205,206,207,
- 208,209,210,211,212,213,214,215,
- 216,217,218,219,220,221,222,255
- };
- const unsigned char g_map_to_ascii[] = {
- 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ,
- 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
- 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ,
- 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
- 32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 ,
- 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
- 48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 ,
- 56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
- 64 , 'A','B' ,'C' ,'D' ,'E' ,'F' ,'G' ,
- 'H', 'I','J' ,'K' ,'L' ,'M' ,'N' ,'O' ,
- 'P', 'Q','R' ,'S' ,'T' ,'U' ,'V' ,'W' ,
- 'X', 'Y','Z' , 91 , 92 ,93 ,94 ,95 ,
- 96 , 'a','b' ,'c' ,'d' ,'e' ,'f' ,'g' ,
- 'h', 'i','j' ,'k' ,'l' ,'m' ,'n' ,'o' ,
- 'p', 'q','r' ,'s' ,'t' ,'u' ,'v' ,'w' ,
- 'x', 'y','z' ,123 ,124 ,125 ,126 ,127 ,
- 128,129,130,131, 132,133,134,135,
- 136,137,138,139, 140,141,142,143,
- 144,145,146,147, 148,149,150,151,
- 152,153,154,155, 156,157,158,159,
- 160,161,162,'#', 'o','Y','|','S',
- 168,169,'a',171, 172,173,174,175,
- 176,177,'2','3', 180,'u',182,183,
- ' ','1','o',187, 188,189,190,'?',
- 'A','A','A','A', 'A','A','A'/*198-AE*/,'C',
- 'E','E','E','E', 'I','I','I','I',
- 'D','N','O','O', 'O','O','O','x',
- 'O','U','U','U', 'U','Y',222/*TH*/,'s'/*changed from B*/,
- 'a','a','a','a', 'a','a','a'/*230-ae*/,'c',
- 'e','e','e','e', 'i','i','i','i',
- 'd','n','o','o', 'o','o','o','/',
- 'o','u','u','u', 'u','y',254/*th*/,'y'
- };
- const char g_map_is_upper[] = {
- 0,0,0,0,0,0,0,0, // 0 -7
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,1,1,1,1,1,1,1, // 64
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,0,0,0,0,0, // 88
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 15*8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 20*8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 192
- 1,1,1,1,1,1,1,1, // 200
- 1,1,1,1,1,1,1,0, // 208
- 1,1,1,1,1,1,1,1, // 216
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0, // 232
- 0,0,0,0,0,0,0,0, // 240
- 0,0,0,0,0,0,0,0}; // 248
- // can this character be in an html (or xml) tag name??
- const char g_map_canBeInTagName[] = {
- 0,0,0,0,0,0,0,0, // 0 -7
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,1,0,0, // 40 -- hyphen can be in tag name
- 0,0,0,0,0,0,0,0, // 48
- 0,0,0,0,0,0,0,0, // 56
- 0,1,1,1,1,1,1,1, // 64
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,0,0,0,0,0, // 88
- 0,1,1,1,1,1,1,1, // 96
- 1,1,1,1,1,1,1,1, // 104
- 1,1,1,1,1,1,1,1, // 112
- 1,1,1,0,0,0,0,0, // 15*8 = 120
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 20*8 = 160
- 0,0,0,0,0,0,0,0, // 168
- 0,0,0,0,0,0,0,0, // 176
- 0,0,0,0,0,0,0,0, // 184
- 0,0,0,0,0,0,0,0, // 192
- 0,0,0,0,0,0,0,0, // 200
- 0,0,0,0,0,0,0,0, // 208
- 0,0,0,0,0,0,0,0, // 216
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0, // 232
- 0,0,0,0,0,0,0,0, // 240
- 0,0,0,0,0,0,0,0}; // 248
- const char g_map_is_control [] = {
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 96
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,1, // 120, 127 = DEL
- 1,1,1,1,1,1,1,1, // 128
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,0,0,0,0,0,0,0, // 160 = backspace
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 192
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0, // 232
- 0,0,0,0,0,0,0,0, // 240
- 0,0,0,0,0,0,0,0}; // 248
- // people mix windows 1252 into latin-1 so we have to be less restrictive here...
- const char g_map_is_binary[] = {
- 1,1,1,1,1,1,1,1,
- 1,0,0,1,1,0,1,1, // \t=9 \n = 10 \r = 13
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 96
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,1, // 120, 127 = DEL
- 0,1,1,1,1,1,1,1, // 128 (128 is a quote)
- 1,1,1,1,1,1,1,1, // 136
- 1,0,0,0,0,0,0,1, // 144 (145 146 147 are quotes, 148 is dash, 149 bullet,150 dash)
- 0,0,1,1,0,0,1,1, // 152 (152 & 153 are quotes, 156 & 157 are double quotes)
- 0,0,0,0,0,0,0,0, // 160 = backspace (some urls have this???)
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 192
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0, // 232
- 0,0,0,0,0,0,0,0, // 240
- 0,0,0,0,0,0,0,0}; // 248
- // ' ' '\n' '\t' '\r'
- const char g_map_is_wspace[] = {
- 0,0,0,0,0,0,0,0, // 0 -7
- 0,1,1,0,0,1,0,0, // \t=9 \n = 10 \r = 13
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 1,0,0,0,0,0,0,0, // space=32
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 88
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 15*8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 160 -- turn off 160, it might be utf8 byte
- 0,0,0,0,0,0,0,0, // 168
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 192
- 0,0,0,0,0,0,0,0, // 200
- 0,0,0,0,0,0,0,0, // 208
- 0,0,0,0,0,0,0,0, // 216
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0, // 232
- 0,0,0,0,0,0,0,0, // 240
- 0,0,0,0,0,0,0,0}; // 248
- // '\n'
- const char g_map_is_vspace[] = {
- 0,0,0,0,0,0,0,0, // 0 -7
- 0,0,1,0,0,0,0,0, // \t=9 \n = 10
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // space=32
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 88
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 15*8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 20*8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 192
- 1,1,1,1,1,1,1,1, // 200
- 1,1,1,1,1,1,1,0, // 208
- 1,1,1,1,1,1,1,1, // 216
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0, // 232
- 0,0,0,0,0,0,0,0, // 240
- 0,0,0,0,0,0,0,0}; // 248
- // ' ' '\t'
- const char g_map_is_hspace[] = {
- 0,0,0,0,0,0,0,0, // 0 -7
- 0,1,0,0,0,0,0,0, // \t=9 \n = 10
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 1,0,0,0,0,0,0,0, // space=32
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 88
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 15*8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 20*8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 192
- 1,1,1,1,1,1,1,1, // 200
- 1,1,1,1,1,1,1,0, // 208
- 1,1,1,1,1,1,1,1, // 216
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0, // 232
- 0,0,0,0,0,0,0,0, // 240
- 0,0,0,0,0,0,0,0}; // 248
- const char g_map_is_vowel[] = {
- 0,0,0,0,0,0,0,0, // 0 -7
- 0,0,0,0,0,0,0,0, // 8-15
- 0,0,0,0,0,0,0,0, // 16-
- 0,0,0,0,0,0,0,0, // 24-
- 0,0,0,0,0,0,0,0, // 32-
- 0,0,0,0,0,0,0,0, // 40-
- 0,0,0,0,0,0,0,0, // 48-
- 0,0,0,0,0,0,0,0, // 56-
- 0,1,0,0,0,1,0,0, // 64 (A=65)
- 0,1,0,0,0,0,0,1, // 72
- 0,0,0,0,0,1,0,0, // 80
- 0,0,0,0,0,0,0,0, // 88-
- 0,1,0,0,0,1,0,0, // 96- (a=97)
- 0,1,0,0,0,0,0,1,
- 0,0,0,0,0,1,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 160
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 192
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0};
- // converts ascii chars and IS_O chars to their lower case versions
- const char g_map_is_lower[] = { // 97-122 and 224-255 (excluding 247)
- 0,0,0,0,0,0,0,0, // 0 -7
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,1,1,1,1,1,1,1, // 96
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,0,0,0,0,0, // 120
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 160
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 192
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 224
- 1,1,1,1,1,1,1,1, // 232
- 1,1,1,1,1,1,1,0, // 240
- 1,1,1,1,1,1,1,1}; // 248
- const char g_map_is_ascii[] = { // 32 to 126
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0, // 8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 32
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,0,
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0};
- // just from 0-127, used by the inlined *_utf8() functions in fctypes.h
- const char g_map_is_ascii3[] = { // 32 to 126
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 32
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0};
- const char g_map_is_iso[] = { // 32 to 126
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0, // 8
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 32
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 152
- 0,1,1,1,1,1,1,1, // 160
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1};
- const char g_map_is_punct[] = { // 33-47, 58-64, 91-96, 123-126, 161-191, 215,247
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 16
- 0,0,0,0,0,0,0,0,
- 0,1,1,1,1,1,1,1, // 32
- 1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0, // 48
- 0,0,1,1,1,1,1,1,
- 1,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 80
- 0,0,0,1,1,1,1,1,
- 1,0,0,0,0,0,0,0, // 96
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 112
- 0,0,0,1,1,1,1,0,
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 144
- 0,0,0,0,0,0,0,0,
- 0,1,1,1,1,1,1,1, // 160
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 176
- 1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0, // 192
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,1, // 208
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,1, // 240
- 0,0,0,0,0,0,0,0}; // 248
- const char g_map_is_alnum[] = { // 48-57, 65-90,97-122,192-255(excluding 215,247)
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 16
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 32
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 48
- 1,1,0,0,0,0,0,0,
- 0,1,1,1,1,1,1,1, // 64
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 80
- 1,1,1,0,0,0,0,0,
- 0,1,1,1,1,1,1,1, // 96
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 112
- 1,1,1,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 144
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 160
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 176
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 192
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,0, // 208
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 224
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,0, // 240
- 1,1,1,1,1,1,1,1};
- const char g_map_is_alpha[] = { // 65-90, 97-122, 192-255 (excluding 215, 247)
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 16
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 32
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 48
- 0,0,0,0,0,0,0,0,
- 0,1,1,1,1,1,1,1, // 64
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 80
- 1,1,1,0,0,0,0,0,
- 0,1,1,1,1,1,1,1, // 96
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 112
- 1,1,1,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 144
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 160
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 176
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 192
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,0, // 208
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 224
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,0, // 240
- 1,1,1,1,1,1,1,1};
- const char g_map_is_digit[] = { // 48-57
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 16
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 32
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 48
- 1,1,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0};
- const char g_map_is_hex[] = { // 48-57
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 16
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 32
- 0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1, // 48
- 1,1,0,0,0,0,0,0, // 56
- 0,1,1,1,1,1,1,0, // 64 (65='A')
- 0,0,0,0,0,0,0,0, // 72
- 0,0,0,0,0,0,0,0, // 80
- 0,0,0,0,0,0,0,0, // 88
- 0,1,1,1,1,1,1,0, // 96 (97='a')
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0};
- // stolen from is_alnum, but turned on - and _
- const char g_map_is_tagname_char [] = { // 48-57, 65-90,97-122,192-255(excluding 215,247)
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 16
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 32
- 0,0,0,0,0,1,0,0, // -
- 1,1,1,1,1,1,1,1, // 48
- 1,1,1,0,0,0,0,0, // we include the : for feedburner:origlink
- 0,1,1,1,1,1,1,1, // 64
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 80
- 1,1,1,0,0,0,0,1, // _
- 0,1,1,1,1,1,1,1, // 96
- 1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1, // 112
- 1,1,1,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 128
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 144
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 160
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 176
- 0,0,0,0,0,0,0,0,
- // we are no longer necessarily latin-1!!
- 0,0,0,0,0,0,0,0, // 192
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 208
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 224
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 240
- 0,0,0,0,0,0,0,0};
- const char g_map_is_tag_control_char[] = { // 48-57
- 0,0,0,0,0,0,0,0, // 0
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 16
- 0,0,0,0,0,0,0,0,
- 0,0,1,0,0,0,0,1, // 32 " and '
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0, // 48
- 0,0,0,0,1,0,1,0, // 56 < and >
- 0,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0};
- // when matching query terms to words/phrases in doc skip over spaces
- // or other punct so that "flypaper" in the query matches "fly paper" in the
- // doc
- /*
- const char g_map_is_match_skip[] = { // 48-57
- 0,0,0,0,0,0,0,0, // 0
- 0,1,1,0,0,0,0,0, // \t and \n
- 0,0,0,0,0,0,0,0, // 16
- 0,0,0,0,0,0,0,0,
- 1,0,0,0,0,0,0,1, // 32 space and '
- 0,0,0,0,0,1,0,0, // 40 -
- 0,0,0,0,0,0,0,0, // 48
- 0,0,0,0,0,0,0,0, // 56
- 0,0,0,0,0,0,0,0, // 64
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0};
- */
- // seems like this should be defined, but it isn't
- int32_t strnlen ( const char *s , int32_t maxLen ) {
- int32_t i ;
- for ( i = 0 ; i < maxLen ; i++ ) if ( ! s[i] ) return i;
- return i;
- }
- char *strncasestr( char *haystack, int32_t haylen, char *needle){
- int32_t matchLen = 0;
- int32_t needleLen = gbstrlen(needle);
- for (int32_t i = 0; i < haylen;i++){
- char c1 = to_lower_a(haystack[i]);
- char c2 = to_lower_a(needle[matchLen]);
- if ( c1 != c2 ){
- // no match
- matchLen = 0;
- continue;
- }
- // we matched another character
- matchLen++;
- if (matchLen < needleLen) continue;
-
- // we've matched the whole string
- return haystack + i - matchLen + 1;
- }
- return NULL;
- }
- char *strnstr2( char *haystack, int32_t haylen, char *needle){
- int32_t matchLen = 0;
- int32_t needleLen = gbstrlen(needle);
- for (int32_t i = 0; i < haylen;i++){
- char c1 = (haystack[i]);
- char c2 = (needle[matchLen]);
- if ( c1 != c2 ){
- // no match
- matchLen = 0;
- continue;
- }
- // we matched another character
- matchLen++;
- if (matchLen < needleLen) continue;
-
- // we've matched the whole string
- return haystack + i - matchLen + 1;
- }
- return NULL;
- }
- // . get the # of words in this string
- int32_t getNumWords ( char *s , int32_t len, int32_t titleVersion ) {
- int32_t wordCount = 0;
- bool inWord = false;
- for ( int32_t i = 0 ; i < len ; i++ ) {
- if ( ! is_alnum_a ( s[i] ) && s[i]!='\'' ) {
- inWord = false;
- continue;
- }
- if ( ! inWord ) {
- inWord = true;
- wordCount++;
- }
- }
- return wordCount;
- }
- // . this stores a "n" into "s" and returns the # of bytes written into "s"
- // . it also puts commas into the number
- // . it now also NULL terminates bytes written into "s"
- int32_t ulltoa ( char *s , uint64_t n ) {
- // if n is zero, it's easy
- if ( n == 0LL ) { *s++='0'; *s='\0'; return 1; }
- // a hunk is a number in [0,999]
- int32_t hunks[10];
- int32_t lastHunk = -1;
- // . get the hunks
- // . the first hunk we get is called the "lowest hunk"
- // . "lastHunk" is called the "highest hunk"
- for ( int32_t i = 0 ; i < 10 ; i++ ) {
- hunks[i] = n % 1000;
- n /= 1000;
- if ( hunks[i] != 0 ) lastHunk = i;
- }
- // remember start of buf for calculating # bytes written
- char *start = s;
- // print the hunks separated by comma
- for ( int32_t i = lastHunk ; i >= 0 ; i-- ) {
- // pad all hunks except highest hunk with zeroes
- if ( i != lastHunk ) sprintf ( s , "%03"INT32"" , hunks[i] );
- else sprintf ( s , "%"INT32"" , hunks[i] );
- s += gbstrlen(s);
- // comma after all hunks but lowest hunk
- if ( i != 0 ) *s++ = ',';
- }
- // null terminate it
- *s = '\0';
- // return # of bytes stored into "s"
- return s - start;
- }
- /*
- int32_t atol2 ( const char *s, int32_t len ) {
- char tmp[32];
- if ( len > 30 ) len = 30;
- gbmemcpy ( tmp , s , len );
- tmp [ len ] = '\0';
- return atol ( s );
- }
- */
- int32_t atol2 ( const char *s, int32_t len ) {
- // skip over spaces
- const char *end = s + len;
- while ( s < end && is_wspace_a ( *s ) ) s++;
- // return 0 if all spaces
- if ( s == end ) return 0;
- int32_t i = 0;
- int32_t val = 0;
- bool negative = false;
- if ( s[0] == '-' ) { negative = true; i++; }
- while ( i < len && is_digit(s[i]) ) val = val * 10 + ( s[i++] - '0' );
- if ( negative ) return -val;
- return val;
- }
- int64_t atoll1 ( const char *s ) {
- return atoll ( s );
- }
- int64_t atoll2 ( const char *s, int32_t len ) {
- // skip over spaces
- const char *end = s + len;
- while ( s < end && is_wspace_a ( *s ) ) s++;
- // return 0 if all spaces
- if ( s == end ) return 0;
- int32_t i = 0;
- int64_t val = 0LL;
- bool negative = false;
- if ( s[0] == '-' ) { negative = true; i++; }
- while ( i < len && is_digit(s[i]) ) val = val * 10LL + ( s[i++] - '0');
- if ( negative ) return -val;
- return val;
- }
- double atof2 ( const char *s, int32_t len ) {
- // skip over spaces
- const char *end = s + len;
- while ( s < end && is_wspace_a ( *s ) ) { s++; len--; }
- // return 0 if all spaces
- if ( s == end ) return 0;
- char tmpBuf[128];
- if ( len >= 128 ) len = 127;
- //strncpy ( dst , s , len );
- const char *p = s;
- const char *srcEnd = s + len;
- char *dst = tmpBuf;
- // remove commas
- for ( ; p < srcEnd ; p++ ) {
- // skip commas
- if ( *p == ',' ) continue;
- // otherwise store it
- *dst++ = *p;
- }
- // null term
- *dst = '\0';
- //buf[len] = '\0';
- return atof ( tmpBuf );
- }
- double atod2 ( char *s, int32_t len ) {
- // point to end
- char *end = s + len;
- // null term temp
- char c = *end;
- *end = '\0';
- // get it
- double ret = strtod ( s , NULL );
- // undo it
- *end = c;
- return ret;
- }
- bool atob ( const char *s, int32_t len ) {
- // skip over spaces
- const char *end = s + len;
- while ( s < end && is_wspace_a ( *s ) ) s++;
- // return false if all spaces
- if ( s == end ) return false;
- // parse the ascii bool value
- if ( s[0] == 't' || s[0] == 'T' ) return true;
- if ( s[0] == 'y' || s[0] == 'Y' ) return true;
- if ( ! is_digit ( *s ) || *s == '0' ) return false;
- return true;
- }
- // hexadecimal ascii to key_t
- int64_t htoint32_tint32_t ( const char *s, int32_t len ) {
- // skip over spaces
- const char *end = s + len;
- while ( s < end && is_wspace_a ( *s ) ) s++;
- // return 0 if all spaces
- if ( s == end ) return 0;
- int32_t i = 0;
- int64_t val = 0;
- while ( i < len && is_hex(s[i]) )
- val = val * 16 + htob ( s[i++] );
- return val;
- }
- // convert hex ascii string into binary at "dst"
- void hexToBin ( char *src , int32_t srcLen , char *dst ) {
- char *srcEnd = src + srcLen;
- for ( ; src && src < srcEnd ; ) {
- *dst = htob(*src++);
- *dst <<= 4;
- *dst |= htob(*src++);
- dst++;
- }
- // sanity check
- if ( src != srcEnd ) { char *xx=NULL;*xx=0; }
- }
- void binToHex ( unsigned char *src , int32_t srcLen , char *dst ) {
- unsigned char *srcEnd = src + srcLen;
- for ( ; src && src < srcEnd ; ) {
- *dst++ = btoh(*src>>4);
- *dst++ = btoh(*src&15);
- src++;
- }
- // always null term!
- *dst = '\0';
- // sanity check
- if ( src != srcEnd ) { char *xx=NULL;*xx=0; }
- }
- // . like strstr but haystack may not be NULL terminated
- // . needle, however, IS null terminated
- char *strncasestr ( char *haystack , char *needle , int32_t haystackSize ) {
- int32_t needleSize = gbstrlen(needle);
- int32_t n = haystackSize - needleSize ;
- for ( int32_t i = 0 ; i <= n ; i++ ) {
- // keep looping if first chars do not match
- if ( to_lower_a(haystack[i]) != to_lower_a(needle[0]) )
- continue;
- // if needle was only 1 char it's a match
- if ( ! needle[1] ) return &haystack[i];
- // compare the whole strings now
- if ( strncasecmp ( &haystack[i] , needle , needleSize ) == 0 )
- return &haystack[i];
- }
- return NULL;
- }
- // . like strstr but haystack may not be NULL terminated
- // . needle, however, IS null terminated
- char *strncasestr ( char *haystack , char *needle ,
- int32_t haystackSize, int32_t needleSize ) {
- int32_t n = haystackSize - needleSize ;
- for ( int32_t i = 0 ; i <= n ; i++ ) {
- // keep looping if first chars do not match
- if ( to_lower_a(haystack[i]) != to_lower_a(needle[0]) )
- continue;
- // if needle was only 1 char it's a match
- if ( ! needle[1] ) return &haystack[i];
- // compare the whole strings now
- if ( strncasecmp ( &haystack[i] , needle , needleSize ) == 0 )
- return &haystack[i];
- }
- return NULL;
- }
- char *strnstr ( char *haystack , char *needle , int32_t haystackSize ) {
- int32_t needleSize = gbstrlen(needle);
- int32_t n = haystackSize - needleSize ;
- for ( int32_t i = 0 ; i <= n ; i++ ) {
- // keep looping if first chars do not match
- if ( haystack[i] != needle[0] ) continue;
- // if needle was only 1 char it's a match
- if ( ! needle[1] ) return &haystack[i];
- // compare the whole strings now
- if ( strncmp ( &haystack[i] , needle , needleSize ) == 0 )
- return &haystack[i];
- }
- return NULL;
- }
- // independent of case
- char *gb_strcasestr ( char *haystack , char *needle ) {
- int32_t needleSize = gbstrlen(needle);
- int32_t haystackSize = gbstrlen(haystack);
- int32_t n = haystackSize - needleSize ;
- for ( int32_t i = 0 ; i <= n ; i++ ) {
- // keep looping if first chars do not match
- if ( to_lower_a(haystack[i]) != to_lower_a(needle[0]) )
- continue;
- // if needle was only 1 char it's a match
- if ( ! needle[1] ) return &haystack[i];
- // compare the whole strings now
- if ( strncasecmp ( &haystack[i] , needle , needleSize ) == 0 )
- return &haystack[i];
- }
- return NULL;
- }
- char *gb_strncasestr ( char *haystack , int32_t haystackSize , char *needle ) {
- // temp term
- char c = haystack[haystackSize];
- haystack[haystackSize] = '\0';
- char *res = gb_strcasestr ( haystack , needle );
- haystack[haystackSize] = c;
- return res;
- }
- // . convert < to < and > to >
- // . store "t" into "s"
- // . returns bytes stored into "s"
- // . NULL terminates "s" if slen > 0
- int32_t saftenTags ( char *s , int32_t slen , char *t , int32_t tlen ) {
- char *start = s ;
- // bail if slen is 0
- if ( slen <= 0 ) return 0;
- // leave a char for the \0
- char *send = s + slen - 1;
- char *tend = t + tlen;
- for ( ; t < tend && s + 4 < send ; t++ ) {
- if ( *t == '<' ) {
- *s++ = '&';
- *s++ = 'l';
- *s++ = 't';
- *s++ = ';';
- continue;
- }
- if ( *t == '>' ) {
- *s++ = '&';
- *s++ = 'g';
- *s++ = 't';
- *s++ = ';';
- continue;
- }
- *s++ = *t;
- }
- // NULL terminate "s"
- *s = '\0';
- // return # of bytes, excluding \0, stored into s
- return s - start;
- }
- // . if "doSpecial" is true, then we change <, > and & to
- // the following:
- // UnicodeData.txt:22E6;LESS-THAN BUT NOT EQUIVALENT TO;Sm;0;ON;;;;;Y;
- // UnicodeData.txt:22E7;GREATER-THAN BUT NOT EQUIVALENT TO;Sm;0;ON;;;;;Y;
- // UnicodeData.txt:E0026;TAG AMPERSAND;Cf;0;BN;;;;;N;;;;;
- // UnicodeData.txt:235E;APL FUNCTIONAL SYMBOL QUOTE QUAD;So;0;L;;;;;N;;;;;
- int32_t htmlDecode ( char *dst , char *src , int32_t srcLen , bool doSpecial ,
- int32_t niceness ) {
- if ( srcLen == 0 ) return 0;
- char *start = dst;
- char *srcEnd = src + srcLen;
- for ( ; src < srcEnd ; ) {
- // breathe
- QUICKPOLL(niceness);
- // utf8 support?
- char size = getUtf8CharSize(src);
- // all entities must start with '&'
- if ( *src != '&' ) {
- if ( size == 1 ) { *dst++ = *src++; continue; }
- gbmemcpy ( dst , src , size );
- src += size;
- dst += size;
- continue;
- //*dst++ = *src++; continue; }
- }
- // TODO: avoid doSpecial by not decoding crap in tags...
- //if ( src[0] == '<' ) {
- // // skip to tag end then!
- //
- // store decoded entity char into dst[j]
- uint32_t c;
- // "skip" is how many bytes the entities was in "src"
- int32_t skip = getEntity_a (src, srcEnd-src, &c );
- // ignore the "entity" if it was invalid
- if ( skip == 0 ) { *dst++ = *src++ ; continue; }
- // force this now always since some tags contain "
- // and it was causing the tags to be terminated too early
- // for richmondspca.org
- //if ( c == '\"' ) c = '\'';
- //if ( c == '<' ) c = '[';
- //if ( c == '>' ) c = ']';
- // . special mapping
- // . make < and > special so Xml::set() still works
- // . and make & special so we do not screw up summaries
- if ( doSpecial ) {
- // no longer use this!
- //char *xx=NULL;*xx=0;
- if ( c == '<' ) {
- // using [ and ] looks bad in event titles...
- *dst = '|';
- dst++;
- src += skip;
- continue;
- gbmemcpy(dst,"+!-",3);
- //gbmemcpy(dst,"<gb",3);
- dst += 3;
- src += skip;
- continue;
- // paragraph sign:
- //c = 0xc2b6;
- }
- if ( c == '>' ) {
- // using [ and ] looks bad in event titles...
- *dst = '|';
- dst++;
- src += skip;
- continue;
- //gbmemcpy(dst,"gb>",3);
- gbmemcpy(dst,"-!+",3);
- dst += 3;
- src += skip;
- continue;
- // high-rise hyphen:
- //c = 0xc2af;
- }
- // some tags have " in their value strings
- // so we have to preserve that!
- // use curling quote:
- //http://www.dwheeler.com/essays/quotes-test-utf-8.html
- // curling double and single quotes resp:
- // “ ” ‘ ”
- if ( c == '\"' ) {
- //c = 0x201c; // 0x235e;
- *dst = '\'';
- dst++;
- src += skip;
- continue;
- }
- //if ( c == '<' ) c = 0x22d6; // e6;
- //if ( c == '>' ) c = 0x22d7; // e7;
- // this was working ok, but just code it to an
- // ampersand. when displaying a page we can code all
- // ampersands back into & i guess! that way
- // the check for a " & " in the place name in
- // Address.cpp works out...
- //if ( c == '&' ) c = 0xff06; // full width ampersand
- }
- // . otherwise it was a legit entity
- // . store it into "dst" in utf8 format
- // . "numBytes" is how many bytes it stored into 'dst"
- int32_t numBytes = utf8Encode ( c , dst );
- // sanity check. do not eat our tail if dst == src
- if ( numBytes > skip ) { char *xx=NULL;*xx=0; }
- // advance dst ptr
- dst += numBytes;
- // skip over the encoded entity in the source string
- src += skip;
- }
- // NULL term
- *dst = '\0';
- return dst - start;
- }
- // cdata
- int32_t cdataDecode ( char *dst , char *src , int32_t niceness ) {
- if ( ! src ) return 0;
- char *start = dst;
- for ( ; *src ; ) {
- // breathe
- QUICKPOLL(niceness);
- // utf8 support?
- char size = getUtf8CharSize(src);
- // see SafeBuf::cdataEncode() we do the opposite here
- if ( src[0] != ']' ||
- src[1] != ']' ||
- src[2] != '&' ||
- src[3] != 'g' ||
- src[4] != 't' ) {
- if ( size == 1 ) { *dst++ = *src++; continue; }
- gbmemcpy ( dst , src , size );
- src += size;
- dst += size;
- continue;
- //*dst++ = *src++; continue; }
- }
- // make it ]]>
- gbmemcpy ( dst , "]]>" , 3 );
- src += 5;
- dst += 3;
- }
- // NULL term
- *dst = '\0';
- return dst - start;
- }
- // . make something safe as an form input value by translating the quotes
- // . store "t" into "s" and return bytes stored
- // . does not do bounds checking
- int32_t dequote ( char *s , char *send , char *t , int32_t tlen ) {
- char *start = s;
- char *tend = t + tlen;
- for ( ; t < tend && s < send ; t++ ) {
- if ( *t == '"' ) {
- if ( s + 5 >= send ) return 0;
- *s++ = '&';
- *s++ = '#';
- *s++ = '3';
- *s++ = '4';
- *s++ = ';';
- continue;
- }
- *s++ = *t;
- }
- // all or nothing
- if ( s + 1 >= send ) return 0;
- *s = '\0';
- return s - start;
- }
- bool dequote ( SafeBuf* sb , char *t , int32_t tlen ) {
- char *tend = t + tlen;
- for ( ; t < tend; t++ ) {
- if ( *t == '"' ) {
- sb->safeMemcpy(""", 5);
- continue;
- }
- *sb += *t;
- }
- *sb += '\0';
- return true;
- }
- //int32_t dequote ( char *s , char *t ) {
- // return dequote ( s , t , gbstrlen ( t ) );
- //}
- // . entity-ize a string so it's safe for html output
- // . store "t" into "s" and return bytes stored
- // . does bounds checking
- char *htmlEncode ( char *s , char *send , char *t , char *tend , bool pound ,
- int32_t niceness ) {
- for ( ; t < tend ; t++ ) {
- QUICKPOLL(niceness);
- if ( s + 7 >= send ) { *s = '\0'; return s; }
- if ( *t == '"' ) {
- *s++ = '&';
- *s++ = '#';
- *s++ = '3';
- *s++ = '4';
- *s++ = ';';
- continue;
- }
- if ( *t == '<' ) {
- *s++ = '&';
- *s++ = 'l';
- *s++ = 't';
- *s++ = ';';
- continue;
- }
- if ( *t == '>' ) {
- *s++ = '&';
- *s++ = 'g';
- *s++ = 't';
- *s++ = ';';
- continue;
- }
- if ( *t == '&' ) {
- *s++ = '&';
- *s++ = 'a';
- *s++ = 'm';
- *s++ = 'p';
- *s++ = ';';
- continue;
- }
- if ( *t == '#' && pound ) {
- *s++ = '&';
- *s++ = '#';
- *s++ = '0';
- *s++ = '3';
- *s++ = '5';
- *s++ = ';';
- continue;
- }
- *s++ = *t;
- }
- *s = '\0';
- return s;
- }
- // . entity-ize a string so it's safe for html output
- // . store "t" into "s" and return true on success
- bool htmlEncode ( SafeBuf* s , char *t , char *tend , bool pound ,
- int32_t niceness ) {
- for ( ; t < tend ; t++ ) {
- QUICKPOLL(niceness);
- if ( *t == '"' ) {
- s->safeMemcpy(""", 5);
- continue;
- }
- if ( *t == '<' ) {
- s->safeMemcpy("<", 4);
- continue;
- }
- if ( *t == '>' ) {
- s->safeMemcpy(">", 4);
- continue;
- }
- if ( *t == '&' ) {
- s->safeMemcpy("&", 5);
- continue;
- }
- if ( *t == '#' && pound ) {
- s->safeMemcpy("#", 6);
- continue;
- }
- // our own specially decoded entities!
- if ( *t == '+' && t[1]=='!' && t[2]=='-' ) {
- s->safeMemcpy("<",4);
- continue;
- }
- // our own specially decoded entities!
- if ( *t == '-' && t[1]=='!' && t[2]=='+' ) {
- s->safeMemcpy(">",4);
- continue;
- }
- *s += *t;
- }
- *s += '\0';
- return true;
- }
- // . convert "-->%22 , &-->%26, +-->%2b, space-->+, ?-->%3f is that it?
- // . convert so we can display as a cgi PARAMETER within a url
- // . used by HttPage2 (cached web page) to encode the query into a url
- // . used by PageRoot to do likewise
- // . returns bytes written into "d" not including terminating \0
- int32_t urlEncode ( char *d , int32_t dlen , char *s , int32_t slen, bool requestPath ) {
- char *dstart = d;
- // subtract 1 to make room for a terminating \0
- char *dend = d + dlen - 1;
- char *send = s + slen;
- for ( ; s < send && d < dend ; s++ ) {
- if ( *s == '\0' && requestPath ) {
- *d++ = *s;
- continue;
- }
- // encode if not fit for display
- if ( ! is_ascii ( *s ) ) goto encode;
- switch ( *s ) {
- case ' ': goto encode;
- case '&': goto encode;
- case '"': goto encode;
- case '+': goto encode;
- case '%': goto encode;
- case '#': goto encode;
- // encoding < and > are more for displaying on an
- // html page than sending to an http server
- case '>': goto encode;
- case '<': goto encode;
- case '?': if ( requestPath ) break;
- goto encode;
- }
- // otherwise, no need to encode
- *d++ = *s;
- continue;
- encode:
- // space to +
- if ( *s == ' ' && d + 1 < dend ) { *d++ = '+'; continue; }
- // break out if no room to encode
- if ( d + 2 >= dend ) break;
- *d++ = '%';
- // store first hex digit
- unsigned char v = ((unsigned char)*s)/16 ;
- if ( v < 10 ) v += '0';
- else v += 'A' - 10;
- *d++ = v;
- // store second hex digit
- v = ((unsigned char)*s) & 0x0f ;
- if ( v < 10 ) v += '0';
- else v += 'A' - 10;
- *d++ = v;
- }
- // NULL terminate it
- *d = '\0';
- // and return the length
- return d - dstart;
- }
- // determine the length of the encoded url, does NOT include NULL
- int32_t urlEncodeLen ( char *s , int32_t slen , bool requestPath ) {
- int32_t dLen = 0;
- char *send = s + slen;
- for ( ; s < send ; s++ ) {
- if ( *s == '\0' && requestPath ) {
- dLen++;
- continue;
- }
- // encode if not fit for display
- if ( ! is_ascii ( *s ) ) goto encode;
- switch ( *s ) {
- case ' ': goto encode;
- case '&': goto encode;
- case '"': goto encode;
- case '+': goto encode;
- case '%': goto encode;
- case '#': goto encode;
- // encoding < and > are more for displaying on an
- // html page than sending to an http server
- case '>': goto encode;
- case '<': goto encode;
- case '?': if ( requestPath ) break;
- goto encode;
- }
- // otherwise, no need to encode
- dLen++;
- continue;
- encode:
- // space to +
- if ( *s == ' ' ) { dLen++; continue; }
- // hex code
- dLen += 3; // %XX
- }
- //dLen++; // NULL TERM
- // and return the length
- return dLen;
- }
- // . decodes "s/slen" and stores into "dest"
- // . returns the number of bytes stored into "dest"
- int32_t urlDecode ( char *dest , char *s , int32_t slen ) {
- int32_t j = 0;
- for ( int32_t i = 0 ; i < slen ; i++ ) {
- if ( s[i] == '+' ) { dest[j++]=' '; continue; }
- dest[j++] = s[i];
- if ( s[i] != '%' ) continue;
- if ( i + 2 >= slen ) continue;
- // if two chars after are not hex chars, it's not an encoding
- if ( ! is_hex ( s[i+1] ) ) continue;
- if ( ! is_hex ( s[i+2] ) ) continue;
- // convert hex chars to values
- unsigned char a = htob ( s[i+1] ) * 16;
- unsigned char b = htob ( s[i+2] ) ;
- dest[j-1] = (char) (a + b);
- i += 2;
- }
- return j;
- }
- int32_t urlDecodeNoZeroes ( char *dest , char *s , int32_t slen ) {
- int32_t j = 0;
- for ( int32_t i = 0 ; i < slen ; i++ ) {
- if ( s[i] == '+' ) { dest[j++]=' '; continue; }
- dest[j++] = s[i];
- if ( s[i] != '%' ) continue;
- if ( i + 2 >= slen ) continue;
- // if two chars after are not hex chars, it's not an encoding
- if ( ! is_hex ( s[i+1] ) ) continue;
- if ( ! is_hex ( s[i+2] ) ) continue;
- // convert hex chars to values
- unsigned char a = htob ( s[i+1] ) * 16;
- unsigned char b = htob ( s[i+2] ) ;
- // NO ZEROES! fixes &content= having decoded \0's in it
- // and setting our parms
- if ( a + b == 0 ) {
- log("fctypes: urlDecodeNoZeros encountered url "
- "encoded zero. truncating http request.");
- return j;
- }
- dest[j-1] = (char) (a + b);
- i += 2;
- }
- return j;
- }
- // . like above, but only decodes chars that should not have been encoded
- // . will also encode binary chars
- int32_t urlNormCode ( char *d , int32_t dlen , char *s , int32_t slen ) {
- // save start of destination buffer for returning the length
- char *dstart = d;
- // subtract 1 for NULL termination
- char *dend = d + dlen - 1;
- char *send = s + slen;
- for ( ; s < send && d < dend ; s++ ) {
- // if its non-ascii, encode it so it displays correctly
- if ( ! is_ascii ( *s ) ) {
- // break if no room to encode it
- if ( d + 2 >= dend ) break;
- // store it encoded
- *d++ = '%';
- // store first hex digit
- unsigned char v = ((unsigned char)*s)/16 ;
- if ( v < 10 ) v += '0';
- else v += 'A' - 10;
- *d++ = v;
- // store second hex digit
- v = ((unsigned char)*s) & 0x0f ;
- if ( v < 10 ) v += '0';
- else v += 'A' - 10;
- *d++ = v;
- continue;
- }
- // store it
- *d++ = *s;
- // but it might be something encoded that should not have been
- if ( *s != '%' ) continue;
- // it requires to following chars to decode
- if ( s + 2 >= send ) continue;
- // if two chars after are not hex chars, it's not an encoding
- if ( ! is_hex ( s[1] ) ) continue;
- if ( ! is_hex ( s[2] ) ) continue;
- // convert hex chars to values
- unsigned char a = htob ( s[1] ) * 16;
- unsigned char b = htob ( s[2] ) ;
- unsigned char v = a + b;
- // don't decode if it decodes in these chars
- switch ( v ) {
- case ' ': continue;
- case '&': continue;
- case '"': continue;
- case '+': continue;
- case '%': continue;
- case '>': continue;
- case '<': continue;
- case '?': continue;
- case '=': continue;
- }
- // otherwise, it's fine to decode it
- d[-1] = (char) (a + b);
- // skip over those 2 chars as well as leading '%'
- s += 2;
- }
- // NULL terminate
- *d = '\0';
- // return length
- return d - dstart ;
- }
- // approximate # of non-punct words
- int32_t getNumWords ( char *s ) {
- int32_t count = 0;
- loop:
- // skip punct
- while ( ! is_alnum_a(*s) ) s++;
- // bail if done
- if ( !*s ) return count;
- // count a word
- count++;
- // skip word
- while ( is_alnum_a(*s) ) s++;
- // watch for ' letter punct
- if ( *s=='\'' && is_alnum_a(*(s+1)) && !is_alnum_a(*(s+2)) ) {
- // skip apostrophe
- s++;
- // skip rest of word
- while ( is_alnum_a(*s) ) s++;
- }
- goto loop;
- }
- static int64_t s_adjustment = 0;
- int64_t globalToLocalTimeMilliseconds ( int64_t global ) {
- // sanity check
- //if ( ! g_clockInSync )
- // log("gb: Converting global time but clock not in sync.");
- return global - s_adjustment;
- }
- int64_t localToGlobalTimeMilliseconds ( int64_t local ) {
- // sanity check
- //if ( ! g_clockInSync )
- // log("gb: Converting global time but clock not in sync.");
- return local + s_adjustment;
- }
- int32_t globalToLocalTimeSeconds ( int32_t global ) {
- // sanity check
- //if ( ! g_clockInSync )
- // log("gb: Converting global time but clock not in sync.");
- return global - (s_adjustment/1000);
- }
- int32_t localToGlobalTimeSeconds ( int32_t local ) {
- // sanity check
- //if ( ! g_clockInSync )
- // log("gb: Converting global time but clock not in sync.");
- return local + (s_adjustment/1000);
- }
- #include "Timedb.h"
- static char s_tafile[1024];
- static bool s_hasFileName = false;
- // returns false and sets g_errno on error
- bool setTimeAdjustmentFilename ( char *dir, char *filename ) {
- s_hasFileName = true;
- int32_t len1 = gbstrlen(dir);
- int32_t len2 = gbstrlen(filename);
- if ( len1 + len2 > 1000 ) { char *xx=NULL;*xx=0; }
- sprintf(s_tafile,"%s/%s",dir,filename);
- return true;
- }
- // returns false and sets g_errno on error
- bool loadTimeAdjustment ( ) {
- // bail if no filename to read
- if ( ! s_hasFileName ) return true;
- // read it in
- // one line in text
- int fd = open ( s_tafile , O_RDONLY );
- if ( fd < 0 ) {
- log("util: could not open %s for reading",s_tafile);
- g_errno = errno;
- return false;
- }
- char rbuf[1024];
- // read in max bytes
- int nr = read ( fd , rbuf , 1000 );
- if ( nr <= 10 || nr > 1000 ) {
- log("util: reading %s had error: %s",s_tafile,
- mstrerror(errno));
- close(fd);
- g_errno = errno;
- return false;
- }
- close(fd);
- // parse the text line
- int64_t stampTime = 0LL;
- int64_t clockAdj = 0LL;
- sscanf ( rbuf , "%"UINT64" %"INT64"", &stampTime, &clockAdj );
- // get stamp age
- int64_t local = gettimeofdayInMillisecondsLocal();
- int64_t stampAge = local - stampTime;
- // if too old forget about it
- if ( stampAge > 2*86400 ) return true;
- // update adjustment
- s_adjustment = clockAdj;
- // if stamp in file is within 2 days old, assume its still good
- // this will prevent having to rebuild a sortbydatetable
- // and really slow down loadups
- g_clockInSync = true;
- // note it
- log("util: loaded %s and put clock in sync. age=%"UINT64" adj=%"INT64"",
- s_tafile,stampAge,clockAdj);
- return true;
- }
- // . returns false and sets g_errno on error
- // . saved by Process::saveBlockingFiles1()
- bool saveTimeAdjustment ( ) {
- // fortget it if setTimeAdjustmentFilename never called
- if ( ! s_hasFileName ) return true;
- // must be in sync!
- if ( ! g_clockInSync ) return true;
- // store it
- int64_t local = gettimeofdayInMillisecondsLocal();
- char wbuf[1024];
- sprintf (wbuf,"%"UINT64" %"INT64"\n",local,s_adjustment);
- // write it out
- int fd = open ( s_tafile , O_CREAT|O_RDWR|O_TRUNC , 00666 );
- if ( fd < 0 ) {
- log("util: could not open %s for writing",s_tafile);
- g_errno = errno;
- return false;
- }
- // how many bytes to write?
- int32_t len = gbstrlen(wbuf);
- // read in max bytes
- int nw = write ( fd , wbuf , len );
- if ( nw != len ) {
- log("util: writing %s had error: %s",s_tafile,
- mstrerror(errno));
- close(fd);
- g_errno = errno;
- return false;
- }
- close(fd);
- // note it
- log("util: saved %s",s_tafile);
- // it was written ok
- return true;
- }
- // a "fake" settimeofdayInMilliseconds()
- void settimeofdayInMillisecondsGlobal ( int64_t newTime ) {
- // can't do this in sig handler
- if ( g_inSigHandler ) return;
- // this isn't async signal safe...
- struct timeval tv;
- gettimeofday ( &tv , NULL );
- int64_t now=(int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;
- // bail if no change... UNLESS we need to sync clock!!
- if ( s_adjustment == newTime - now && g_clockInSync ) return;
- // log it, that way we know if there is another issue
- // with flip-flopping (before we synced with host #0 and also
- // with proxy #0)
- int64_t delta = s_adjustment - (newTime - now) ;
- if ( delta > 100 || delta < -100 )
- logf(LOG_INFO,"gb: Updating clock adjustment from "
- "%"INT64" ms to %"INT64" ms", s_adjustment , newTime - now );
- // set adjustment
- s_adjustment = newTime - now;
- // return?
- if ( g_clockInSync ) return;
- // we are now in sync
- g_clockInSync = true;
- // log it
- if ( s_hasFileName )
- logf(LOG_INFO,"gb: clock is now synced with host #0. "
- "saving to %s",s_tafile);
- else
- logf(LOG_INFO,"gb: clock is now synced with host #0.");
- // save
- saveTimeAdjustment();
- // force timedb to load now!
- //initAllSortByDateTables ( );
- }
- time_t getTimeGlobal() {
- return gettimeofdayInMillisecondsSynced() / 1000;
- }
- time_t getTimeGlobalNoCore() {
- return gettimeofdayInMillisecondsGlobalNoCore() / 1000;
- }
- time_t getTimeSynced() {
- return gettimeofdayInMillisecondsSynced() / 1000;
- }
- int64_t gettimeofdayInMillisecondsGlobal() {
- return gettimeofdayInMillisecondsSynced();
- }
- #include "Threads.h"
- int64_t gettimeofdayInMillisecondsSynced() {
- // if in a sig handler then return g_now
- //if ( g_inSigHandler ) return g_nowGlobal;
- // i find that a pthread can call this function even though
- // a signal handler is underway in the main thread!
- if ( g_inSigHandler && ! g_threads.amThread() ) {
- char *xx = NULL; *xx = 0; }
- // sanity check
- if ( ! isClockInSync() ) {
- static int s_printed = 0;
- if ( (s_printed % 100) == 0 ) {
- s_printed++;
- log("xml: clock not in sync with host #0 yet!!!!!!");
- }
- //char *xx = NULL; *xx = 0; }
- }
- int64_t now;
- // the real tiem sigalrm interrupt in Loop.cpp sets this to
- // true once per millisecond
- if ( ! g_clockNeedsUpdate ) {
- now = g_now;
- }
- else {
- //if ( ! g_clockInSync )
- // log("gb: Getting global time but clock not in sync.");
- // this isn't async signal safe...
- struct timeval tv;
- gettimeofday ( &tv , NULL );
- now = (int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;
- }
- // update g_nowLocal
- if ( now > g_now ) g_now = now;
- g_clockNeedsUpdate = false;
- // adjust from Msg0x11 time adjustments
- now += s_adjustment;
- // update g_now if it is more accurate
- //if ( now > g_nowGlobal ) g_nowGlobal = now;
- return now;
- }
- int64_t gettimeofdayInMillisecondsGlobalNoCore() {
- // if in a sig handler then return g_now
- //if ( g_inSigHandler ) return g_nowGlobal;
- // i find that a pthread can call this function even though
- // a signal handler is underway in the main thread!
- if ( g_inSigHandler && ! g_threads.amThread() ) {
- char *xx = NULL; *xx = 0; }
- // sanity check
- //if ( ! g_clockInSync ) { char *xx = NULL; *xx = 0; }
- //if ( ! g_clockInSync )
- // log("gb: Getting global time but clock not in sync.");
- // this isn't async signal safe...
- struct timeval tv;
- gettimeofday ( &tv , NULL );
- int64_t now=(int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;
- // update g_nowLocal
- if ( now > g_now ) g_now = now;
- // adjust from Msg0x11 time adjustments
- now += s_adjustment;
- // update g_now if it is more accurate
- //if ( now > g_nowGlobal ) g_nowGlobal = now;
- return now;
- }
- int64_t gettimeofdayInMillisecondsLocal() {
- return gettimeofdayInMilliseconds();
- }
- uint64_t gettimeofdayInMicroseconds(void) {
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return(((uint64_t)tv.tv_sec * 1000000LL) + (uint64_t)tv.tv_usec);
- }
- // "local" means the time on this machine itself, NOT a timezone thing.
- int64_t gettimeofdayInMilliseconds() {
- // if in a sig handler then return g_now
- //if ( g_inSigHandler ) return g_now;
- // i find that a pthread can call this function even though
- // a signal handler is underway in the main thread!
- if ( g_inSigHandler && ! g_threads.amThread() ) {
- char *xx = NULL; *xx = 0; }
- // the real tiem sigalrm interrupt in Loop.cpp sets this to
- // true once per millisecond
- if ( ! g_clockNeedsUpdate )
- return g_now;
- g_clockNeedsUpdate = false;
- // this isn't async signal safe...
- struct timeval tv;
- //g_loop.disableTimer();
- gettimeofday ( &tv , NULL );
- //g_loop.enableTimer();
- int64_t now=(int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;
- // update g_nowLocal
- if ( now > g_now ) g_now = now;
- // adjust from Msg0x11 time adjustments
- //now += s_adjustment;
- // update g_now if it is more accurate
- // . or don't, bad to update it here because it could be very different
- // from what it should be
- //if ( now > g_now ) g_now = now;
- return now;
- }
- int64_t gettimeofdayInMilliseconds_force ( ) {
- g_clockNeedsUpdate = true;
- return gettimeofdayInMilliseconds();
- }
- time_t getTime () {
- return getTimeLocal();
- }
- // . get time in seconds
- // . use this instead of call to time(NULL) cuz it uses adjustment
- time_t getTimeLocal () {
- // if in a sig handler then return g_now/1000
- //if ( g_inSigHandler ) return (time_t)(g_now / 1000);
- // i find that a pthread can call this function even though
- // a signal handler is underway in the main thread!
- if ( g_inSigHandler && ! g_threads.amThread() ) {
- char *xx = NULL; *xx = 0; }
- // get time now
- uint32_t now = gettimeofdayInMilliseconds() / 1000;
- // and adjust it
- //now += s_adjustment / 1000;
- return (time_t)now;
- }
- // . make it so we can display the ascii string on an html browser
- int32_t saftenTags2 ( char *s , int32_t slen , char *t , int32_t tlen ) {
- char *start = s ;
- // bail if slen is 0
- if ( slen <= 0 ) return 0;
- // leave a char for the \0
- char *send = s + slen - 1;
- char *tend = t + tlen;
- for ( ; t < tend && s + 6 < send ; t++ ) {
- if ( *t == '<' ) {
- *s++ = '&';
- *s++ = 'l';
- *s++ = 't';
- *s++ = ';';
- continue;
- }
- if ( *t == '>' ) {
- *s++ = '&';
- *s++ = 'g';
- *s++ = 't';
- *s++ = ';';
- continue;
- }
- if ( *t == '&' ) {
- *s++ = '&';
- *s++ = 'a';
- *s++ = 'm';
- *s++ = 'p';
- *s++ = ';';
- continue;
- }
- *s++ = *t;
- }
- // return NULL if we broke out because there was not enough room
- //if ( s + 6 >= send ) return NULL;
- // NULL terminate "s"
- *s = '\0';
- // return # of bytes, excluding \0, stored into s
- return s - start;
- }
- void getCalendarFromMs(int64_t ms,
- int32_t* days,
- int32_t* hours,
- int32_t* minutes,
- int32_t* secs,
- int32_t* msecs) {
- int32_t s = 1000;
- int32_t m = s * 60;
- int32_t h = m * 60;
- int32_t d = h * 24;
- *days = ms / d;
- int64_t tmp = ms % d;
- *hours = tmp / h;
- tmp = tmp % h;
- *minutes = tmp / m;
- tmp = tmp % m;
- *secs = tmp / s;
-
- *msecs = tmp % s;
- }
- uint32_t calculateChecksum(char *buf, int32_t bufLen){
- uint32_t sum = 0;
- for(int32_t i = 0; i < bufLen>>2;i++)
- sum += ((uint32_t*)buf)[i];
- return sum;
- }
- bool anchorIsLink( char *tag, int32_t tagLen){
- if (strncasestr(tag, tagLen, "href")) return true;
- if (strncasestr(tag, tagLen, "onclick")) return true;
- return false;
- }
- bool has_alpha_a ( char *s , char *send ) {
- for ( ; s < send ; s++ )
- if (is_alpha_a(*s)) return true;
- return false;
- }
- bool has_alpha_utf8 ( char *s , char *send ) {
- char cs = 0;
- for ( ; s < send ; s += cs ) {
- cs = getUtf8CharSize ( s );
- if ( cs == 1 ) {
- if (is_alpha_a(*s)) return true;
- continue;
- }
- if ( is_alpha_utf8(s) ) return true;
- }
- return false;
- }
- //takes an input skips leading spaces
- //puts next nonspace char* in numPtr
- //an returns the next space after that.
- char* getNextNum(char* input, char** numPtr) {
- char* p = input;
- char* nextspace;
- while(*p && isspace(*p)) p++;
- nextspace = p;
- *numPtr = p;
- while(*nextspace && !isspace(*nextspace))
- nextspace++;
- return nextspace;
- }
- #include "HttpMime.h" // CT_HTML
- // returns length of stripped content, but will set g_errno and return -1
- // on error
- int32_t stripHtml( char *content, int32_t contentLen, int32_t version, int32_t strip ) {
- if ( !strip ) {
- log( LOG_WARN, "query: html stripping not required!" );
- return contentLen;
- }
- if ( ! content )
- return 0;
- if ( contentLen == 0 )
- return 0;
- // filter content if we should
- // keep this on the big stack so "content" still references something
- Xml tmpXml;
- // . get the content as xhtml (should be NULL terminated)
- // . parse as utf8 since all we are doing is messing with
- // the tags...content manipulation comes later
- if ( ! tmpXml.set ( content , contentLen,
- false, 0, false, version , true , 0 , CT_HTML ) )
- return -1;
- //if( strip == 4 )
- // return tmpXml.getText( content, contentLen );
- // go tag by tag
- int32_t n = tmpXml.getNumNodes();
- XmlNode *nodes = tmpXml.getNodes();
- // Xml class may have converted to utf16
- content = tmpXml.getContent();
- contentLen = tmpXml.getContentLen();
- char *x = content;
- char *xend = content + contentLen;
- int32_t stackid = -1;
- int32_t stackc = 0;
- char skipIt = 0;
- // . hack COL tag to NOT require a back tag
- // . do not leave it that way as it could mess up our parsing
- //g_nodes[25].m_hasBackTag = 0;
- for ( int32_t i = 0 ; i < n ; i++ ) {
- // get id of this node
- int32_t id = nodes[i].m_nodeId;
-
- // if strip is 4, just remove the script tag
- if( strip == 4 ){
- if ( id ){
- if ( id == 83 ){
- skipIt ^= 1;
- continue;
- }
- }
- else if ( skipIt ) continue;
- goto keepit;
- }
-
- // if strip is 3, ALL tags will be removed!
- if( strip == 3 ) {
- if( id ) {
- // . we dont want anything in between:
- // - script tags (83)
- // - style tags (111)
- if ((id == 83) || (id == 111)) skipIt ^= 1;
- // save img to have alt text kept.
- if ( id == 54 ) goto keepit;
- continue;
- }
- else {
- if( skipIt ) continue;
- goto keepit;
- }
- }
- // get it
- int32_t fk;
- if ( strip == 1 ) fk = g_nodes[id].m_filterKeep1;
- else fk = g_nodes[id].m_filterKeep2;
- // if tag is <link ...> only keep it if it has
- // rel="stylesheet" or rel=stylesheet
- if ( strip == 2 && id == 62 ) { // <link> tag id
- int32_t fflen;
- char *ff = nodes[i].getFieldValue ( "rel" , &fflen );
- if ( ff && fflen == 10 &&
- strncmp(ff,"stylesheet",10) == 0 )
- goto keepit;
- }
- // just remove just the tag if this is 2
- if ( fk == 2 ) continue;
- // keep it if not in a stack
- if ( ! stackc && fk ) goto keepit;
- // if no front/back for tag, just skip it
- if ( ! nodes[i].m_hasBackTag ) continue;
- // start stack if none
- if ( stackc == 0 ) {
- // but not if this is a back tag
- if ( nodes[i].m_node[1] == '/' ) continue;
- // now start the stack
- stackid = id;
- stackc = 1;
- continue;
- }
- // skip if this tag does not match what is on stack
- if ( id != stackid ) continue;
- // if ANOTHER front tag, inc stack
- if ( nodes[i].m_node[1] != '/' ) stackc++;
- // otherwise, dec the stack count
- else stackc--;
- // . ensure not negative from excess back tags
- // . reset stackid to -1 to indicate no stack
- if ( stackc <= 0 ) { stackid= -1; stackc = 0; }
- // skip it
- continue;
- keepit:
- // replace images with their alt text
- int32_t vlen;
- char *v;
- if ( id == 54 ) {
- v = nodes[i].getFieldValue("alt", &vlen );
- // try title if no alt text
- if ( ! v )
- v = nodes[i].getFieldValue("title", &vlen );
- if ( v ) { gbmemcpy ( x, v, vlen ); x += vlen; }
- continue;
- }
- // remove background image from body,table,td tags
- if ( id == 19 || id == 93 || id == 95 ) {
- v = nodes[i].getFieldValue("background", &vlen);
- // remove background, just sabotage it
- if ( v ) v[-4] = 'x';
- }
- // store it
- gbmemcpy ( x , nodes[i].m_node , nodes[i].m_nodeLen );
- x += nodes[i].m_nodeLen;
- // sanity check
- if ( x > xend ) { char *xx=NULL;*xx=0;}
- }
- contentLen = x - content;
- content [ contentLen ] = '\0';
- // unhack COL tag
- //g_nodes[25].m_hasBackTag = 1;
- return contentLen;
- }
- bool is_urlchar(char s) {
- // [a-z0-9/:_-.?$,~=#&%+@]
- if(isalnum(s)) return true;
- if(s == '/' ||
- s == ':' ||
- s == '_' ||
- s == '-' ||
- s == '.' ||
- s == '?' ||
- s == '$' ||
- s == ',' ||
- s == '~' ||
- s == '=' ||
- s == '#' ||
- s == '&' ||
- s == '%' ||
- s == '+' ||
- s == '@') return true;
- return false;
- }
- // don't allow "> in our input boxes
- int32_t cleanInput(char *outbuf, int32_t outbufSize, char *inbuf, int32_t inbufLen){
- char *p = outbuf;
- int32_t numQuotes=0;
- int32_t lastQuote = 0;
- for (int32_t i=0;i<inbufLen;i++){
- if (p-outbuf >= outbufSize-1) break;
-
- if (inbuf[i] == '"'){
- numQuotes++;
- lastQuote = i;
- }
- // if we have an odd number of quotes and a close angle bracket
- // it could be an xss attempt
- if (inbuf[i] == '>' && (numQuotes & 1)) {
- p = outbuf+lastQuote;
- break;
- }
- *p = inbuf[i];
- p++;
- }
- *p = '\0';
- return p-outbuf;
- }
- //
- // get rid of the virtual Msg class because it screws up how we
- // serialize/deserialize every time we compile gb it seems
- //
- int32_t getMsgStoredSize ( int32_t baseSize,
- int32_t *firstSizeParm,
- int32_t *lastSizeParm ) {
- //int32_t size = (int32_t)sizeof(Msg);
- int32_t size = baseSize;//getBaseSize();
- // add up string buffer sizes
- int32_t *sizePtr = firstSizeParm;//getFirstSizeParm(); // &size_qbuf;
- int32_t *sizeEnd = lastSizeParm;//getLastSizeParm (); // &size_displayMeta
- for ( ; sizePtr <= sizeEnd ; sizePtr++ )
- size += *sizePtr;
- return size;
- }
- // . return ptr to the buffer we serialize into
- // . return NULL and set g_errno on error
- char *serializeMsg ( int32_t baseSize ,
- int32_t *firstSizeParm ,
- int32_t *lastSizeParm ,
- char **firstStrPtr ,
- void *thisPtr ,
- int32_t *retSize ,
- char *userBuf ,
- int32_t userBufSize ,
- bool makePtrsRefNewBuf ) {
- // make a buffer to serialize into
- char *buf = NULL;
- //int32_t need = getStoredSize();
- int32_t need = getMsgStoredSize(baseSize,firstSizeParm,lastSizeParm);
- // big enough?
- if ( need <= userBufSize ) buf = userBuf;
- // alloc if we should
- if ( ! buf ) buf = (char *)mmalloc ( need , "Ra" );
- // bail on error, g_errno should be set
- if ( ! buf ) return NULL;
- // set how many bytes we will serialize into
- *retSize = need;
- // copy the easy stuff
- char *p = buf;
- gbmemcpy ( p , (char *)thisPtr , baseSize );//getBaseSize() );
- p += baseSize; // getBaseSize();
- // then store the strings!
- int32_t *sizePtr = firstSizeParm;//getFirstSizeParm(); // &size_qbuf;
- int32_t *sizeEnd = lastSizeParm;//getLastSizeParm (); // &size_displayMet
- char **strPtr = firstStrPtr;//getFirstStrPtr (); // &ptr_qbuf;
- for ( ; sizePtr <= sizeEnd ; ) {
- // if we are NULL, we are a "bookmark", so
- // we alloc'd space for it, but don't copy into
- // the space until after this call toe serialize()
- if ( ! *strPtr ) goto skip;
- // sanity check -- cannot copy onto ourselves
- if ( p > *strPtr && p < *strPtr + *sizePtr ) {
- char *xx = NULL; *xx = 0; }
- // copy the string into the buffer
- gbmemcpy ( p , *strPtr , *sizePtr );
- skip:
- // . make it point into the buffer now
- // . MDW: why? that is causing problems for the re-call in
- // Msg3a, it calls this twice with the same "m_r"
- if ( makePtrsRefNewBuf ) *strPtr = p;
- // advance our destination ptr
- p += *sizePtr;
- // advance both ptrs to next string
- sizePtr++;
- strPtr++;
- }
- return buf;
- }
- char *serializeMsg2 ( void *thisPtr ,
- int32_t objSize ,
- char **firstStrPtr ,
- int32_t *firstSizeParm ,
- int32_t *retSize ) {
- // make a buffer to serialize into
- char *buf = NULL;
- int32_t baseSize = (char *)firstStrPtr - (char *)thisPtr;
- int nptrs=((char *)firstSizeParm-(char *)firstStrPtr)/sizeof(char *);
- int32_t need = baseSize;
- need += nptrs * sizeof(char *);
- need += nptrs * sizeof(int32_t);
- // tally up the string sizes
- int32_t *srcSizePtr = (int32_t *)firstSizeParm;
- char **srcStrPtr = (char **)firstStrPtr;
- int32_t totalStringSizes = 0;
- for ( int i = 0 ; i < nptrs ; i++ ) {
- if ( srcStrPtr[i] == NULL ) continue;
- totalStringSizes += srcSizePtr[i];
- }
- int32_t stringBufferOffset = need;
- need += totalStringSizes;
- // alloc if we should
- if ( ! buf ) buf = (char *)mmalloc ( need , "sm2" );
- // bail on error, g_errno should be set
- if ( ! buf ) return NULL;
- // set how many bytes we will serialize into
- *retSize = need;
- // copy everything over except strings themselves
- char *p = buf;
- gbmemcpy ( p , (char *)thisPtr , stringBufferOffset );//need );
- // point to the string buffer
- p += stringBufferOffset;
- // then store the strings!
- char **dstStrPtr = (char **)(buf + baseSize );
- int32_t *dstSizePtr = (int32_t *)(buf + baseSize+sizeof(char *)*nptrs);
- for ( int count = 0 ; count < nptrs ; count++ ) {
- // copy ptrs
- //*dstStrPtr = *srcStrPtr;
- //*dstSizePtr = *srcSizePtr;
- // if we are NULL, we are a "bookmark", so
- // we alloc'd space for it, but don't copy into
- // the space until after this call toe serialize()
- if ( ! *srcStrPtr )
- goto skip;
- // if this is valid then size can't be 0! fix upstream.
- if ( ! *srcSizePtr ) { char *xx=NULL;*xx=0; }
- // if size is 0 use gbstrlen. helps with InjectionRequest
- // where we set ptr_url or ptr_content but not size_url, etc.
- //if ( ! *srcSizePtr )
- // *srcSizePtr = gbstrlen(*strPtr);
- // sanity check -- cannot copy onto ourselves
- if ( p > *srcStrPtr && p < *srcStrPtr + *srcSizePtr ) {
- char *xx = NULL; *xx = 0; }
- // copy the string into the buffer
- gbmemcpy ( p , *srcStrPtr , *srcSizePtr );
- skip:
- // point it now into the string buffer
- *dstStrPtr = p;
- // if it is 0 length, make ptr NULL in destination
- if ( *srcSizePtr == 0 || *srcStrPtr == NULL ) {
- *dstStrPtr = NULL;
- *dstSizePtr = 0;
- }
- // advance our destination ptr
- p += *dstSizePtr;
- // advance both ptrs to next string
- srcSizePtr++;
- srcStrPtr++;
- dstSizePtr++;
- dstStrPtr++;
- }
- return buf;
- }
- // convert offsets back into ptrs
- int32_t deserializeMsg ( int32_t baseSize ,
- int32_t *firstSizeParm ,
- int32_t *lastSizeParm ,
- char **firstStrPtr ,
- char *stringBuf ) {
- // point to our string buffer
- char *p = stringBuf;//getStringBuf(); // m_buf;
- // then store the strings!
- int32_t *sizePtr = firstSizeParm;//getFirstSizeParm(); // &size_qbuf;
- int32_t *sizeEnd = lastSizeParm;//getLastSizeParm (); // &size_displayMet
- char **strPtr = firstStrPtr;//getFirstStrPtr (); // &ptr_qbuf;
- for ( ; sizePtr <= sizeEnd ; ) {
- // convert the offset to a ptr
- *strPtr = p;
- // make it NULL if size is 0 though
- if ( *sizePtr == 0 ) *strPtr = NULL;
- // sanity check
- if ( *sizePtr < 0 ) { g_errno = ECORRUPTDATA; return -1;}
- // advance our destination ptr
- p += *sizePtr;
- // advance both ptrs to next string
- sizePtr++;
- strPtr++;
- }
- // return how many bytes we processed
- return baseSize + (p - stringBuf);//getStringBuf());
- }
- bool deserializeMsg2 ( char **firstStrPtr , // ptr_url
- int32_t *firstSizeParm ) { // size_url
- int nptrs=((char *)firstSizeParm-(char *)firstStrPtr)/sizeof(char *);
- // point to our string buffer
- char *p = ((char *)firstSizeParm + sizeof(int32_t)*nptrs);
- // then store the strings!
- int32_t *sizePtr = firstSizeParm;//getFirstSizeParm(); // &size_qbuf;
- //int32_t *sizeEnd = lastSizeParm;//getLastSizeParm (); // &size_displ
- char **strPtr = firstStrPtr;//getFirstStrPtr (); // &ptr_qbuf;
- int count = 0;
- for ( ; count < nptrs ; count++ ) { // sizePtr <= sizeEnd ; ) {
- // convert the offset to a ptr
- *strPtr = p;
- // make it NULL if size is 0 though
- if ( *sizePtr == 0 ) *strPtr = NULL;
- // sanity check
- if ( *sizePtr < 0 ) return false;//{ char *xx = NULL; *xx =0; }
- // advance our destination ptr
- p += *sizePtr;
- // advance both ptrs to next string
- sizePtr++;
- strPtr++;
- }
- // return how many bytes we processed
- //return baseSize + (p - stringBuf);//getStringBuf());
- return true;
- }
- // print it to stdout for debugging Dates.cpp
- int32_t printTime ( time_t ttt ) {
- //char *s = ctime(&ttt);
- // print in UTC!
- char *s = asctime ( gmtime(&ttt) );
- // strip \n
- s[gbstrlen(s)-1] = '\0';
- fprintf(stderr,"%s UTC\n",s);
- return 0;
- }
- // this uses our local timezone which is MST, so we need to tell
- // it to use UTC somehow...
- time_t mktime_utc ( struct tm *ttt ) {
- time_t local = mktime ( ttt );
- // bad?
- if ( local < 0 ) return local;
- /*
- // sanity check
- static char s_mm = 1;
- static int32_t s_localOff;
- if ( s_mm ) {
- s_mm = 0;
- struct tm ff;
- ff.tm_mon = 0;
- ff.tm_year = 70;
- ff.tm_mday = 1;
- ff.tm_hour = 0;
- ff.tm_min = 0;
- ff.tm_sec = 0;
- int32_t qq = mktime ( &ff );
- //fprintf(stderr,"qq=%"INT32"\n",qq);
- // . set this then
- // . we subtract s_localOff to further mktime() returns to
- // get it into utc
- s_localOff = qq;
- // sanity
- if ( s_localOff != timezone ) { char *xx=NULL;*xx=0; }
- }
- */
- // see what our timezone is!
- //fprintf(stderr,"%"INT32"=tz\n",timezone);
- // mod that
- return local - timezone;
- }
- bool verifyUtf8 ( char *txt , int32_t tlen ) {
- if ( ! txt || tlen <= 0 ) return true;
- char size;
- char *p = txt;
- char *pend = txt + tlen;
- for ( ; p < pend ; p += size ) {
- size = getUtf8CharSize(p);
- // skip if ascii
- if ( ! (p[0] & 0x80) ) continue;
- // ok, it's a utf8 char, it must have both hi bits set
- if ( (p[0] & 0xc0) != 0xc0 ) return false;
- // if only one byte, we are done.. how can that be?
- if ( size == 1 ) return false;
- //if ( ! utf8IsSane ( p[0] ) ) return false;
- // successive utf8 chars must have & 0xc0 be equal to 0x80
- // but the first char it must equal 0xc0, both set
- if ( (p[1] & 0xc0) != 0x80 ) return false;
- if ( size == 2 ) continue;
- if ( (p[2] & 0xc0) != 0x80 ) return false;
- if ( size == 3 ) continue;
- if ( (p[3] & 0xc0) != 0x80 ) return false;
- }
- if ( p != pend ) return false;
- return true;
- }
- bool verifyUtf8 ( char *txt ) {
- int32_t tlen = gbstrlen(txt);
- return verifyUtf8(txt,tlen);
- }