PageRenderTime 69ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/trunk/ThirdParty/HTMLayoutSDK/include/aux-cvt.h

#
C++ Header | 710 lines | 547 code | 44 blank | 119 comment | 48 complexity | 0cbde8644c06f6fc22d4f408604c2960 MD5 | raw file
Possible License(s): LGPL-2.0, AGPL-3.0, MIT, CC-BY-SA-3.0, GPL-2.0
  1. #ifndef __json_aux_h__
  2. #define __json_aux_h__
  3. /*
  4. * Terra Informatica Sciter and HTMLayout Engines
  5. * http://terrainformatica.com/sciter, http://terrainformatica.com/htmlayout
  6. *
  7. * basic primitives.
  8. *
  9. * The code and information provided "as-is" without
  10. * warranty of any kind, either expressed or implied.
  11. *
  12. * (C) 2003-2006, Andrew Fedoniouk (andrew@terrainformatica.com)
  13. */
  14. /**\file
  15. * \brief primitives
  16. **/
  17. /*
  18. pod::copy<T> - memcpy wrapper
  19. pod::move<T> - memmove wrapper
  20. pod::buffer<T> - dynamic buffer, string builder, etc.
  21. utf8::towcs() - utf8 to wchar_t* converter
  22. utf8::fromwcs() - wchar_t* to utf8 converter
  23. utf8::ostream - raw ASCII/UNICODE -> UTF8 converter
  24. utf8::oxstream - ASCII/UNICODE -> UTF8 converter with XML support
  25. inline bool streq(const char* s, const char* s1) - NULL safe string comparison function
  26. inline bool wcseq(const wchar* s, const wchar* s1) - NULL safe wide string comparison function
  27. inline bool streqi(const char* s, const char* s1) - the same, but case independent
  28. inline bool wcseqi(const wchar* s, const wchar* s1) - the same, but case independent
  29. w2a - helper object for const wchar_t* to const char* conversion
  30. a2w - helper object for const char* to const wchar_t* conversion
  31. w2utf - helper object for const wchar_t* to utf8 conversion
  32. utf2w - helper object for utf8 to const wchar_t* conversion
  33. t2w - const TCHAR* to const wchar_t* conversion, #definition
  34. w2t - const wchar_t* to const TCHAR* conversion, #definition
  35. itoa, itow - int to const char* converter
  36. atoi, wtoi - const char* to int converter (parser)
  37. ftoa, ftow - double to const char* converter
  38. */
  39. #pragma once
  40. #include <assert.h>
  41. #include <wchar.h>
  42. #include <string.h>
  43. #include <string>
  44. #include <stdlib.h>
  45. #include <stdio.h>
  46. // disable that warnings in VC 2005
  47. #pragma warning( push )
  48. #pragma warning(disable:4786) //identifier was truncated...
  49. #pragma warning(disable:4996) //'strcpy' was declared deprecated
  50. #pragma warning(disable:4100) //unreferenced formal parameter
  51. #ifndef byte
  52. typedef unsigned char byte;
  53. #endif
  54. //#include "aux-slice.h"
  55. // WARNING: macros below must be used only for passing parameters to functions!
  56. /*
  57. #if !defined(W2A) // wchar to multi-byte string converter (current locale)
  58. #define W2A aux::w2a
  59. #endif
  60. #if !defined(A2W) // multi-byte to wchar string converter (current locale)
  61. #define A2W aux::a2w
  62. #endif
  63. #if !defined(UTF2W) // utf-8 to wchar string converter
  64. #define UTF2W aux::utf2w
  65. #endif
  66. #if !defined(W2UTF) // wchar to utf-8 string converter
  67. #define W2UTF aux::w2utf
  68. #endif
  69. #if !defined(W2T)
  70. #if !defined(UNICODE)
  71. #define W2T(S) aux::w2a(S)
  72. #else
  73. #define W2T(S) (S)
  74. #endif
  75. #endif
  76. #if !defined(T2W)
  77. #if !defined(UNICODE)
  78. #define T2W(S) aux::a2w(S)
  79. #else
  80. #define T2W(S) (S)
  81. #endif
  82. #endif
  83. #if !defined(A2T)
  84. #if !defined(UNICODE)
  85. #define A2T(S) (S)
  86. #else
  87. #define A2T(S) aux::a2w(S)
  88. #endif
  89. #endif
  90. #if !defined(T2A)
  91. #if !defined(UNICODE)
  92. #define T2A(S) (S)
  93. #else
  94. #define T2A(S) aux::w2a(S)
  95. #endif
  96. #endif
  97. */
  98. #ifdef UNICODE
  99. #define a2t( S ) aux::a2w(S)
  100. #define t2a( S ) aux::w2a(S)
  101. #define w2t( S ) (S)
  102. #define t2w( S ) (S)
  103. #define t2i( S ) aux::wtoi(S,0)
  104. #define i2t( S ) aux::itow(S)
  105. #else
  106. #define a2t( S ) (S)
  107. #define t2a( S ) (S)
  108. #define w2t( S ) aux::w2a(S)
  109. #define t2w( S ) aux::a2w(S)
  110. #define t2i( S ) aux::atoi(S,0)
  111. #define i2t( S ) aux::itoa(S)
  112. #endif
  113. #define w2u( S ) aux::w2utf(S)
  114. #define u2w( S ) aux::utf2w(S)
  115. #define i2a( I ) aux::itoa(I)
  116. #define i2w( I ) aux::itow(I)
  117. #define a2i( S ) aux::atoi(S,0)
  118. #define w2i( S ) aux::wtoi(S,0)
  119. inline void* zalloc ( size_t sz)
  120. {
  121. void* p = malloc(sz);
  122. memset(p,0,sz);
  123. return p;
  124. }
  125. //elements in array literal
  126. #define items_in(a) (sizeof(a)/sizeof(a[0]))
  127. //chars in sting literal
  128. #define chars_in(s) (sizeof(s) / sizeof(s[0]) - 1)
  129. /**pod namespace - POD primitives. **/
  130. namespace pod
  131. {
  132. template <typename T> void copy ( T* dst, const T* src, size_t nelements)
  133. {
  134. memcpy(dst,src,nelements*sizeof(T));
  135. }
  136. template <typename T> void move ( T* dst, const T* src, size_t nelements)
  137. {
  138. memmove(dst,src,nelements*sizeof(T));
  139. }
  140. /** buffer - in-memory dynamic buffer implementation. **/
  141. template <typename T>
  142. class buffer
  143. {
  144. T* _body;
  145. size_t _allocated;
  146. size_t _size;
  147. T* reserve(size_t size)
  148. {
  149. size_t newsize = _size + size;
  150. if( newsize > _allocated )
  151. {
  152. _allocated = (_allocated * 3) / 2;
  153. if(_allocated < newsize) _allocated = newsize;
  154. T *newbody = new T[_allocated];
  155. copy(newbody,_body,_size);
  156. delete[] _body;
  157. _body = newbody;
  158. }
  159. return _body + _size;
  160. }
  161. public:
  162. buffer():_size(0) { _body = new T[_allocated = 256]; }
  163. ~buffer() { delete[] _body; }
  164. const T * data()
  165. {
  166. if(_size == _allocated) reserve(1);
  167. _body[_size] = 0; return _body;
  168. }
  169. size_t length() const { return _size; }
  170. void push(T c) { *reserve(1) = c; ++_size; }
  171. void push(const T *pc, size_t sz) { copy(reserve(sz),pc,sz); _size += sz; }
  172. void clear() { _size = 0; }
  173. };
  174. typedef buffer<byte> byte_buffer;
  175. typedef buffer<wchar_t> wchar_buffer;
  176. typedef buffer<char> char_buffer;
  177. }
  178. namespace utf8
  179. {
  180. // convert utf8 code unit sequence to wchar_t sequence
  181. inline bool towcs(const byte *utf8, size_t length, pod::wchar_buffer& outbuf)
  182. {
  183. if(!utf8 || length == 0) return true;
  184. const byte* pc = (const byte*)utf8;
  185. const byte* last = pc + length;
  186. unsigned int b;
  187. unsigned int num_errors = 0;
  188. while (pc < last)
  189. {
  190. b = *pc++;
  191. if( !b ) break; // 0 - is eos in all utf encodings
  192. if ((b & 0x80) == 0)
  193. {
  194. // 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx
  195. ;
  196. }
  197. else if ((b & 0xe0) == 0xc0)
  198. {
  199. // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
  200. if(pc == last) { outbuf.push('?'); ++num_errors; break; }
  201. b = (b & 0x1f) << 6;
  202. b |= (*pc++ & 0x3f);
  203. }
  204. else if ((b & 0xf0) == 0xe0)
  205. {
  206. // 3-byte sequence: zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
  207. if(pc >= last - 1) { outbuf.push('?'); ++num_errors; break; }
  208. b = (b & 0x0f) << 12;
  209. b |= (*pc++ & 0x3f) << 6;
  210. b |= (*pc++ & 0x3f);
  211. if(b == 0xFEFF &&
  212. outbuf.length() == 0) // bom at start
  213. continue; // skip it
  214. }
  215. else if ((b & 0xf8) == 0xf0)
  216. {
  217. // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
  218. if(pc >= last - 2) { outbuf.push('?'); break; }
  219. b = (b & 0x07) << 18;
  220. b |= (*pc++ & 0x3f) << 12;
  221. b |= (*pc++ & 0x3f) << 6;
  222. b |= (*pc++ & 0x3f);
  223. // b shall contain now full 21-bit unicode code point.
  224. assert((b & 0x1fffff) == b);
  225. if((b & 0x1fffff) != b)
  226. {
  227. outbuf.push('?');
  228. ++num_errors;
  229. continue;
  230. }
  231. //#pragma warning( suppress:4127 ) // warning C4127: conditional expression is constant
  232. if( sizeof(wchar_t) == 16 ) // Seems like Windows, wchar_t is utf16 code units sequence there.
  233. {
  234. outbuf.push( wchar_t(0xd7c0 + (b >> 10)) );
  235. outbuf.push( wchar_t(0xdc00 | (b & 0x3ff)) );
  236. }
  237. //#pragma warning( suppress:4127 ) // warning C4127: conditional expression is constant
  238. else if( sizeof(wchar_t) >= 21 ) // wchar_t is full ucs-4
  239. {
  240. outbuf.push( wchar_t(b) );
  241. }
  242. else
  243. {
  244. assert(0); // what? wchar_t is single byte here?
  245. }
  246. }
  247. else
  248. {
  249. assert(0); //bad start for UTF-8 multi-byte sequence"
  250. ++num_errors;
  251. b = '?';
  252. }
  253. outbuf.push( wchar_t(b) );
  254. }
  255. return num_errors == 0;
  256. }
  257. inline bool fromwcs(const wchar_t* wcs, size_t length, pod::byte_buffer& outbuf)
  258. {
  259. const wchar_t *pc = wcs;
  260. const wchar_t *end = pc + length;
  261. unsigned int num_errors = 0;
  262. for(unsigned int c = *pc; pc < end ; c = *(++pc))
  263. {
  264. if (c < (1 << 7))
  265. {
  266. outbuf.push(byte(c));
  267. }
  268. else if (c < (1 << 11))
  269. {
  270. outbuf.push(byte((c >> 6) | 0xc0));
  271. outbuf.push(byte((c & 0x3f) | 0x80));
  272. }
  273. else if (c < (1 << 16))
  274. {
  275. outbuf.push(byte((c >> 12) | 0xe0));
  276. outbuf.push(byte(((c >> 6) & 0x3f) | 0x80));
  277. outbuf.push(byte((c & 0x3f) | 0x80));
  278. }
  279. else if (c < (1 << 21))
  280. {
  281. outbuf.push(byte((c >> 18) | 0xf0));
  282. outbuf.push(byte(((c >> 12) & 0x3f) | 0x80));
  283. outbuf.push(byte(((c >> 6) & 0x3f) | 0x80));
  284. outbuf.push(byte((c & 0x3f) | 0x80));
  285. }
  286. else
  287. ++num_errors;
  288. }
  289. return num_errors == 0;
  290. }
  291. // UTF8 stream
  292. // class T must have two methods:
  293. // void push(unsigned char c)
  294. // void push(const unsigned char *pc, size_t sz)
  295. // bool X - true - XML markup character conversion (characters '<','>',etc).
  296. // false - no conversion at all.
  297. template <class T, bool X = true>
  298. class ostream_t : public T
  299. {
  300. public:
  301. ostream_t()
  302. {
  303. // utf8 byte order mark
  304. static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF };
  305. T::push(BOM, sizeof(BOM));
  306. }
  307. // intended to handle only ascii-7 strings
  308. // use this for markup output
  309. ostream_t& operator << (const char* str)
  310. {
  311. T::push((const unsigned char*)str,strlen(str)); return *this;
  312. }
  313. ostream_t& operator << (char c)
  314. {
  315. T::push((unsigned char)c); return *this;
  316. }
  317. // use UNICODE chars for value output
  318. ostream_t& operator << (const wchar_t* wstr)
  319. {
  320. const wchar_t *pc = wstr;
  321. for(unsigned int c = *pc; c ; c = *(++pc))
  322. {
  323. if(X)
  324. switch(c)
  325. {
  326. case '<': *this << "&lt;"; continue;
  327. case '>': *this << "&gt;"; continue;
  328. case '&': *this << "&amp;"; continue;
  329. case '"': *this << "&quot;"; continue;
  330. case '\'': *this << "&apos;"; continue;
  331. }
  332. if (c < (1 << 7))
  333. {
  334. T::push (byte(c));
  335. }
  336. else if (c < (1 << 11)) {
  337. T::push (byte((c >> 6) | 0xc0));
  338. T::push (byte((c & 0x3f) | 0x80));
  339. }
  340. else if (c < (1 << 16)) {
  341. T::push (byte((c >> 12) | 0xe0));
  342. T::push (byte(((c >> 6) & 0x3f) | 0x80));
  343. T::push (byte((c & 0x3f) | 0x80));
  344. }
  345. else if (c < (1 << 21))
  346. {
  347. T::push (byte((c >> 18) | 0xf0));
  348. T::push (byte(((c >> 12) & 0x3f) | 0x80));
  349. T::push (byte(((c >> 6) & 0x3f) | 0x80));
  350. T::push (byte((c & 0x3f) | 0x80));
  351. }
  352. }
  353. return *this;
  354. }
  355. ostream_t& operator << (const std::wstring& str)
  356. {
  357. return *this << (str.c_str());
  358. }
  359. };
  360. // raw ASCII/UNICODE -> UTF8 converter
  361. typedef ostream_t<pod::byte_buffer,false> ostream;
  362. // ASCII/UNICODE -> UTF8 converter with XML support
  363. typedef ostream_t<pod::byte_buffer,true> oxstream;
  364. } // namespace utf8
  365. namespace aux
  366. {
  367. template <typename T> struct slice;
  368. template <class T>
  369. inline T
  370. limit ( T v, T minv, T maxv )
  371. {
  372. assert(minv < maxv);
  373. if (minv >= maxv)
  374. return minv;
  375. if (v > maxv) return maxv;
  376. if (v < minv) return minv;
  377. return v;
  378. }
  379. // safe string comparison
  380. inline bool streq(const char* s, const char* s1)
  381. {
  382. if( s && s1 )
  383. return strcmp(s,s1) == 0;
  384. return false;
  385. }
  386. // safe wide string comparison
  387. inline bool wcseq(const wchar_t* s, const wchar_t* s1)
  388. {
  389. if( s && s1 )
  390. return wcscmp(s,s1) == 0;
  391. return false;
  392. }
  393. // safe case independent string comparison
  394. inline bool streqi(const char* s, const char* s1)
  395. {
  396. if( s && s1 )
  397. return _stricmp(s,s1) == 0;
  398. return false;
  399. }
  400. // safe case independent wide string comparison
  401. inline bool wcseqi(const wchar_t* s, const wchar_t* s1)
  402. {
  403. if( s && s1 )
  404. return wcsicmp(s,s1) == 0;
  405. return false;
  406. }
  407. // helper convertor objects wchar_t to ACP and vice versa
  408. class w2a
  409. {
  410. char local[16];
  411. char* buffer;
  412. unsigned int n;
  413. void init(const wchar_t* wstr, unsigned int nu)
  414. {
  415. n = WideCharToMultiByte(CP_ACP,0,wstr,nu,0,0,0,0);
  416. buffer = (n < (16-1))? local:new char[n+1];
  417. WideCharToMultiByte(CP_ACP,0,wstr,nu,buffer,n,0,0);
  418. buffer[n] = 0;
  419. }
  420. public:
  421. explicit w2a(const wchar_t* wstr):buffer(0),n(0)
  422. {
  423. if(wstr)
  424. init(wstr,(unsigned int)wcslen(wstr));
  425. }
  426. explicit w2a(const std::wstring& wstr):buffer(0),n(0)
  427. {
  428. init(wstr.c_str(),(unsigned int)wstr.length());
  429. }
  430. explicit w2a(slice<wchar_t> s);
  431. ~w2a() { if(buffer != local) delete[] buffer; }
  432. unsigned int length() const { return n; }
  433. operator const char*() { return buffer; }
  434. };
  435. class a2w
  436. {
  437. wchar_t local[16];
  438. wchar_t* buffer;
  439. unsigned int nu;
  440. void init(const char* str, unsigned int n)
  441. {
  442. nu = MultiByteToWideChar(CP_THREAD_ACP,0,str,n,0,0);
  443. buffer = ( nu < (16-1) )? local: new wchar_t[nu+1];
  444. MultiByteToWideChar(CP_ACP,0,str,n,buffer,nu);
  445. buffer[nu] = 0;
  446. }
  447. public:
  448. explicit a2w(const char* str):buffer(0), nu(0)
  449. {
  450. if(str)
  451. init(str, (unsigned int)strlen(str) );
  452. }
  453. explicit a2w(slice<char> s);
  454. ~a2w() { if(buffer != local) delete[] buffer; }
  455. unsigned int length() const { return nu; }
  456. operator const wchar_t*() { return buffer; }
  457. };
  458. // helper convertor objects wchar_t to utf8 and vice versa
  459. class utf2w
  460. {
  461. pod::wchar_buffer buffer;
  462. public:
  463. explicit utf2w(const byte* utf8, size_t length = 0)
  464. {
  465. if(utf8)
  466. {
  467. if( length == 0) length = strlen((const char*)utf8);
  468. utf8::towcs(utf8, length ,buffer);
  469. }
  470. }
  471. explicit utf2w(const char* utf8, size_t length = 0)
  472. {
  473. if(utf8)
  474. {
  475. if( length == 0) length = strlen(utf8);
  476. utf8::towcs((const byte*)utf8, length ,buffer);
  477. }
  478. }
  479. ~utf2w() {}
  480. operator const wchar_t*() { return buffer.data(); }
  481. unsigned int length() const { return (unsigned int)buffer.length(); }
  482. pod::wchar_buffer& get_buffer() { return buffer; }
  483. };
  484. class w2utf
  485. {
  486. pod::byte_buffer buffer;
  487. public:
  488. explicit w2utf(const wchar_t* wstr)
  489. {
  490. if(wstr)
  491. {
  492. size_t nu = wcslen(wstr);
  493. utf8::fromwcs(wstr,nu,buffer);
  494. }
  495. }
  496. explicit w2utf(const std::wstring& str)
  497. {
  498. utf8::fromwcs(str.c_str(),str.length(),buffer);
  499. }
  500. ~w2utf() {}
  501. operator const byte*() { return buffer.data(); }
  502. operator const char*() { return (const char*)buffer.data(); }
  503. unsigned int length() const { return (unsigned int)buffer.length(); }
  504. };
  505. /** Integer to string converter.
  506. Use it as ostream << itoa(234)
  507. **/
  508. class itoa
  509. {
  510. char buffer[38];
  511. public:
  512. itoa(int n, int radix = 10)
  513. {
  514. _itoa(n,buffer,radix);
  515. }
  516. operator const char*() { return buffer; }
  517. };
  518. /** Integer to wstring converter.
  519. Use it as wostream << itow(234)
  520. **/
  521. class itow
  522. {
  523. wchar_t buffer[38];
  524. public:
  525. itow(int n, int radix = 10)
  526. {
  527. _itow(n,buffer,radix);
  528. }
  529. operator const wchar_t*() { return buffer; }
  530. };
  531. /** Float to string converter.
  532. Use it as ostream << ftoa(234.1); or
  533. Use it as ostream << ftoa(234.1,"pt"); or
  534. **/
  535. class ftoa
  536. {
  537. char buffer[64];
  538. public:
  539. ftoa(double d, const char* units = "", int fractional_digits = 1)
  540. {
  541. _snprintf(buffer, 64, "%.*f%s", fractional_digits, d, units );
  542. buffer[63] = 0;
  543. }
  544. operator const char*() { return buffer; }
  545. };
  546. /** Float to wstring converter.
  547. Use it as wostream << ftow(234.1); or
  548. Use it as wostream << ftow(234.1,"pt"); or
  549. **/
  550. class ftow
  551. {
  552. wchar_t buffer[64];
  553. public:
  554. ftow(double d, const wchar_t* units = L"", int fractional_digits = 1)
  555. {
  556. _snwprintf(buffer, 64, L"%.*f%s", fractional_digits, d, units );
  557. buffer[63] = 0;
  558. }
  559. operator const wchar_t*() { return buffer; }
  560. };
  561. /** wstring to integer parser.
  562. **/
  563. inline int wtoi(const wchar_t *s, int default_value = 0)
  564. {
  565. if( !s ) return default_value;
  566. wchar_t *lastptr;
  567. long i = wcstol( s, &lastptr, 10 );
  568. return (lastptr != s)? (int)i : default_value;
  569. }
  570. /** string to integer parser.
  571. **/
  572. inline int atoi(const char *s, int default_value = 0)
  573. {
  574. if( !s ) return default_value;
  575. char *lastptr;
  576. long i = strtol( s, &lastptr, 10 );
  577. return (lastptr != s)? (int)i : default_value;
  578. }
  579. // class T must have two methods:
  580. // void push(wchar_t c)
  581. // void push(const wchar_t *pc, size_t sz)
  582. template <class T>
  583. class ostream_t : public T
  584. {
  585. public:
  586. ostream_t() {}
  587. // intended to handle only ascii-7 strings
  588. // use this for markup output
  589. ostream_t& operator << (const char* str)
  590. {
  591. if(!str) return *this;
  592. while( *str ) T::push(*str++);
  593. return *this;
  594. }
  595. ostream_t& operator << (char c)
  596. {
  597. T::push(c); return *this;
  598. }
  599. // intended to handle only ascii-7 strings
  600. // use this for markup output
  601. ostream_t& operator << (const wchar_t* str)
  602. {
  603. if(!str || !str[0]) return *this;
  604. T::push(str,wcslen(str)); return *this;
  605. }
  606. ostream_t& operator << (wchar_t c)
  607. {
  608. T::push(c); return *this;
  609. }
  610. };
  611. // wostream - a.k.a. wstring builder - buffer for dynamic composition of wchar_t strings
  612. typedef ostream_t<pod::wchar_buffer> wostream;
  613. }
  614. #pragma warning( pop )
  615. #endif