/src/ftk_util.c

http://ftk.googlecode.com/ · C · 852 lines · 685 code · 125 blank · 42 comment · 267 complexity · ec86dbcc7755acfe23541f385ab970f6 MD5 · raw file

  1. /*
  2. * File: ftk_util.c
  3. * Author: Li XianJing <xianjimli@hotmail.com>
  4. * Brief: common used functions.
  5. *
  6. * Copyright (c) 2009 - 2010 Li XianJing <xianjimli@hotmail.com>
  7. *
  8. * Licensed under the Academic Free License version 2.1
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; either version 2 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with this program; if not, write to the Free Software
  22. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23. */
  24. /*
  25. * History:
  26. * ================================================================
  27. * 2009-10-03 Li XianJing <xianjimli@hotmail.com> created
  28. *
  29. */
  30. #include "ftk_log.h"
  31. #include "ftk_util.h"
  32. /*UTF8-related functions are copied from glib.*/
  33. #define UTF8_COMPUTE(Char, Mask, Len) \
  34. if (Char < 128) \
  35. { \
  36. Len = 1; \
  37. Mask = 0x7f; \
  38. } \
  39. else if ((Char & 0xe0) == 0xc0) \
  40. { \
  41. Len = 2; \
  42. Mask = 0x1f; \
  43. } \
  44. else if ((Char & 0xf0) == 0xe0) \
  45. { \
  46. Len = 3; \
  47. Mask = 0x0f; \
  48. } \
  49. else if ((Char & 0xf8) == 0xf0) \
  50. { \
  51. Len = 4; \
  52. Mask = 0x07; \
  53. } \
  54. else if ((Char & 0xfc) == 0xf8) \
  55. { \
  56. Len = 5; \
  57. Mask = 0x03; \
  58. } \
  59. else if ((Char & 0xfe) == 0xfc) \
  60. { \
  61. Len = 6; \
  62. Mask = 0x01; \
  63. } \
  64. else \
  65. Len = -1;
  66. #define UTF8_LENGTH(Char) \
  67. ((Char) < 0x80 ? 1 : \
  68. ((Char) < 0x800 ? 2 : \
  69. ((Char) < 0x10000 ? 3 : \
  70. ((Char) < 0x200000 ? 4 : \
  71. ((Char) < 0x4000000 ? 5 : 6)))))
  72. #define UTF8_GET(Result, Chars, Count, Mask, Len) \
  73. (Result) = (Chars)[0] & (Mask); \
  74. for ((Count) = 1; (Count) < (Len); ++(Count)) \
  75. { \
  76. if (((Chars)[(Count)] & 0xc0) != 0x80) \
  77. { \
  78. (Result) = -1; \
  79. break; \
  80. } \
  81. (Result) <<= 6; \
  82. (Result) |= ((Chars)[(Count)] & 0x3f); \
  83. }
  84. #define UNICODE_VALID(Char) \
  85. ((Char) < 0x110000 && \
  86. (((Char) & 0xFFFFF800) != 0xD800) && \
  87. ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
  88. ((Char) & 0xFFFE) != 0xFFFE)
  89. static const char utf8_skip_data[256] = {
  90. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  91. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  92. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  93. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  94. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  95. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  96. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  97. 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
  98. };
  99. const char * const g_utf8_skip = utf8_skip_data;
  100. #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(const unsigned char *)(p)])
  101. unsigned short utf8_get_char (const char *p, const char** next)
  102. {
  103. int i, mask = 0, len;
  104. unsigned short result;
  105. unsigned char c = (unsigned char) *p;
  106. UTF8_COMPUTE (c, mask, len);
  107. if (len == -1)
  108. return (unsigned short)-1;
  109. UTF8_GET (result, p, i, mask, len);
  110. if(next != NULL)
  111. {
  112. *next = g_utf8_next_char(p);
  113. }
  114. return result;
  115. }
  116. unsigned short utf8_get_prev_char (const char *p, const char** prev)
  117. {
  118. int i = 0;
  119. for(i = 1; i < 8; i++)
  120. {
  121. unsigned char val = p[-i];
  122. if((val & 0x80) && !(val & 0x40))
  123. {
  124. continue;
  125. }
  126. else
  127. {
  128. if(prev != NULL)
  129. {
  130. *prev = p-i;
  131. }
  132. return utf8_get_char(p-i, NULL);
  133. }
  134. }
  135. if(prev != NULL)
  136. {
  137. *prev = p;
  138. }
  139. return 0;
  140. }
  141. int utf8_count_char(const char *str, int length)
  142. {
  143. int nr = 0;
  144. const char* iter = str;
  145. return_val_if_fail(str != NULL, 0);
  146. while(utf8_get_char(iter, &iter) && (iter - str) <= (int)length)
  147. {
  148. nr++;
  149. }
  150. return nr;
  151. }
  152. int unichar_to_utf8 (unsigned short c, char* outbuf)
  153. {
  154. /* If this gets modified, also update the copy in g_string_insert_unichar() */
  155. size_t len = 0;
  156. int first;
  157. int i;
  158. if (c < 0x80)
  159. {
  160. first = 0;
  161. len = 1;
  162. }
  163. else if (c < 0x800)
  164. {
  165. first = 0xc0;
  166. len = 2;
  167. }
  168. else if (c < 0x10000)
  169. {
  170. first = 0xe0;
  171. len = 3;
  172. }
  173. else if (c < 0x200000)
  174. {
  175. first = 0xf0;
  176. len = 4;
  177. }
  178. else if (c < 0x4000000)
  179. {
  180. first = 0xf8;
  181. len = 5;
  182. }
  183. else
  184. {
  185. first = 0xfc;
  186. len = 6;
  187. }
  188. if (outbuf)
  189. {
  190. for (i = len - 1; i > 0; --i)
  191. {
  192. outbuf[i] = (c & 0x3f) | 0x80;
  193. c >>= 6;
  194. }
  195. outbuf[0] = c | first;
  196. }
  197. return len;
  198. }
  199. #define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)
  200. char* utf16_to_utf8 (const unsigned short *str, long len, char* utf8, int out_len)
  201. {
  202. /* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ
  203. * are marked.
  204. */
  205. const unsigned short *in;
  206. char *out;
  207. char *result = NULL;
  208. int n_bytes;
  209. unsigned short high_surrogate;
  210. return_val_if_fail (str != NULL, NULL);
  211. n_bytes = 0;
  212. in = str;
  213. high_surrogate = 0;
  214. while ((len < 0 || in - str < len) && *in)
  215. {
  216. unsigned short c = *in;
  217. unsigned short wc;
  218. if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
  219. {
  220. if (high_surrogate)
  221. {
  222. wc = SURROGATE_VALUE (high_surrogate, c);
  223. high_surrogate = 0;
  224. }
  225. else
  226. {
  227. ftk_loge("Invalid sequence in conversion input");
  228. goto err_out;
  229. }
  230. }
  231. else
  232. {
  233. if (high_surrogate)
  234. {
  235. ftk_loge("Invalid sequence in conversion input");
  236. goto err_out;
  237. }
  238. if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
  239. {
  240. high_surrogate = c;
  241. goto next1;
  242. }
  243. else
  244. wc = c;
  245. }
  246. /********** DIFFERENT for UTF8/UCS4 **********/
  247. n_bytes += UTF8_LENGTH (wc);
  248. next1:
  249. in++;
  250. }
  251. if (high_surrogate)
  252. {
  253. ftk_loge("Partial character sequence at end of input");
  254. goto err_out;
  255. }
  256. /* At this point, everything is valid, and we just need to convert
  257. */
  258. /********** DIFFERENT for UTF8/UCS4 **********/
  259. //result = g_malloc (n_bytes + 1);
  260. result = utf8;
  261. assert(out_len > n_bytes);
  262. high_surrogate = 0;
  263. out = result;
  264. in = str;
  265. while (out < result + n_bytes)
  266. {
  267. unsigned short c = *in;
  268. unsigned short wc;
  269. if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
  270. {
  271. wc = SURROGATE_VALUE (high_surrogate, c);
  272. high_surrogate = 0;
  273. }
  274. else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
  275. {
  276. high_surrogate = c;
  277. goto next2;
  278. }
  279. else
  280. wc = c;
  281. /********** DIFFERENT for UTF8/UCS4 **********/
  282. out += unichar_to_utf8 (wc, out);
  283. next2:
  284. in++;
  285. }
  286. /********** DIFFERENT for UTF8/UCS4 **********/
  287. *out = '\0';
  288. return result;
  289. err_out:
  290. return NULL;
  291. }
  292. static int ftk_hex_to_int(char c)
  293. {
  294. if(c >= '0' && c <= '9')
  295. {
  296. return c - '0';
  297. }
  298. else if(c >= 'A' && c <= 'F')
  299. {
  300. return c - 'A' + 0x0A;
  301. }
  302. else if(c >= 'a' && c <= 'f')
  303. {
  304. return c - 'a' + 0x0a;
  305. }
  306. return 0;
  307. }
  308. static int ftk_parse_color_1(const char* value)
  309. {
  310. return ftk_hex_to_int(value[0]) * 16 + ftk_hex_to_int(value[1]);
  311. }
  312. FtkColor ftk_parse_color( const char* value)
  313. {
  314. FtkColor color = {0};
  315. return_val_if_fail(value != NULL && strlen(value) >= 8, color);
  316. color.a = ftk_parse_color_1(value);
  317. color.r = ftk_parse_color_1(value + 2);
  318. color.g = ftk_parse_color_1(value + 4);
  319. color.b = ftk_parse_color_1(value + 6);
  320. return color;
  321. }
  322. #define IS_CURRENT(path) (((path)[0] == '.') && \
  323. ((path)[1] == '/' || ((path)[1] == '\\') || ((path)[1] == '\0')))
  324. #define IS_HOME(path) (((path)[0] == '~') && \
  325. ((path)[1] == '/' || ((path)[1] == '\\') || ((path)[1] == '\0')))
  326. #define IS_PARENT(path) (((path)[0] == '.') && ((path)[1] == '.') && \
  327. ((path)[2] == '/' || ((path)[2] == '\\') || ((path)[2] == '\0') ))
  328. #define BREAK_IF_LAST(str) if((str)[0] == '\0') break;
  329. char* normalize_path(const char* path_in, char path_out[FTK_MAX_PATH+1])
  330. {
  331. int i = 0;
  332. int in_index = 0;
  333. int out_index = 0;
  334. return_val_if_fail(path_in != NULL && path_out != NULL, NULL);
  335. path_out[0] = '\0';
  336. for(in_index = 0; path_in[in_index] != '\0'; in_index++)
  337. {
  338. if(in_index == 0)
  339. {
  340. if(IS_CURRENT(path_in))
  341. {
  342. ftk_getcwd(path_out, FTK_MAX_PATH);
  343. out_index = strlen(path_out);
  344. continue;
  345. }
  346. #ifdef LINUX
  347. else if(IS_HOME(path_in))
  348. {
  349. const char* home = getenv("HOME");
  350. if(home != NULL)
  351. {
  352. ftk_strcpy(path_out, home);
  353. out_index = strlen(path_out);
  354. }
  355. continue;
  356. }
  357. else if(path_in[0] != '/')
  358. {
  359. ftk_getcwd(path_out, FTK_MAX_PATH);
  360. out_index = strlen(path_out);
  361. path_out[out_index++] = '/';
  362. path_out[out_index++] = path_in[in_index];
  363. continue;
  364. }
  365. #endif
  366. }
  367. if(path_in[in_index] == '\\' || path_in[in_index] == '/')
  368. {
  369. if(out_index == 0 || path_out[out_index - 1] != '/')
  370. {
  371. path_out[out_index++] = '/';
  372. }
  373. }
  374. else if(IS_CURRENT(path_in+in_index) || IS_HOME(path_in+in_index))
  375. {
  376. in_index++;
  377. BREAK_IF_LAST(path_in+in_index);
  378. }
  379. else if(IS_PARENT(path_in+in_index))
  380. {
  381. if(out_index > 1)
  382. {
  383. if(path_out[out_index - 1] == '/')
  384. {
  385. for(--out_index; path_out[out_index - 1] != '/'; out_index--);
  386. }
  387. else
  388. {
  389. ftk_logd("%s:%d %s is invalid path\n", __FILE__, __LINE__, path_in);
  390. in_index += 2;
  391. }
  392. }
  393. else
  394. {
  395. ftk_logd("%s:%d %s is invalid path\n", __FILE__, __LINE__, path_in);
  396. in_index += 2;
  397. }
  398. BREAK_IF_LAST(path_in+in_index);
  399. }
  400. else
  401. {
  402. path_out[out_index++] = path_in[in_index];
  403. }
  404. if(out_index >= FTK_MAX_PATH)
  405. {
  406. break;
  407. }
  408. }
  409. path_out[out_index] = '\0';
  410. for(i = 0; i < out_index; i++)
  411. {
  412. if(path_out[i] == '\\' || path_out[i] == '/')
  413. {
  414. path_out[i] = FTK_PATH_DELIM;
  415. }
  416. }
  417. return path_out;
  418. }
  419. const char* ftk_normalize_path(char path[FTK_MAX_PATH+1])
  420. {
  421. char path_out[FTK_MAX_PATH+1] = {0};
  422. return_val_if_fail(path != NULL, NULL);
  423. normalize_path(path, path_out);
  424. ftk_strncpy(path, path_out, FTK_MAX_PATH);
  425. return path;
  426. }
  427. const char* utf8_move_forward(const char* str, int nr)
  428. {
  429. int i = 0;
  430. const char* next = str;
  431. for(i = 0; i < nr; i++)
  432. {
  433. utf8_get_char(next, &next);
  434. }
  435. return next;
  436. }
  437. #ifdef USE_LINEBREAK
  438. #include "linebreak/linebreak.h"
  439. const char* ftk_line_break(const char* start, const char* end)
  440. {
  441. const char* p = end;
  442. const char* next = NULL;
  443. unsigned short c1 = 0;
  444. unsigned short c2 = 0;
  445. static int linebreak_inited = 0;
  446. if(linebreak_inited == 0)
  447. {
  448. init_linebreak();
  449. linebreak_inited = 1;
  450. }
  451. c2 = utf8_get_char(p, &next);
  452. c1 = utf8_get_prev_char(p, NULL);
  453. if(c1 != '\n' && c1 != '\r' && c2 != '\0' && c2 != '\n' && c2 != '\r')
  454. {
  455. size_t i = 0;
  456. char brks[256] = {0};
  457. size_t len = end - start + 1;
  458. assert(len < sizeof(brks));
  459. set_linebreaks_utf8((const utf8_t*)start, len, "zh", brks);
  460. i = len - 2;
  461. for(; i > 0; i--)
  462. {
  463. if(brks[i] == LINEBREAK_ALLOWBREAK || brks[i] == LINEBREAK_MUSTBREAK)
  464. {
  465. end = start + i + 1;
  466. break;
  467. }
  468. }
  469. // while((unsigned char)(*end) >= 0x80) end--;
  470. }
  471. return end;
  472. }
  473. #else
  474. int ftk_can_break(unsigned short c1, unsigned short c2)
  475. {
  476. if(c1 > 0x80 || c2 > 0x80)
  477. {
  478. return 1;
  479. }
  480. if(isdigit(c1) && isdigit(c2))
  481. {
  482. return 0;
  483. }
  484. if(isalpha(c1) && isalpha(c2))
  485. {
  486. return 0;
  487. }
  488. return 1;
  489. }
  490. const char* ftk_line_break(const char* start, const char* end)
  491. {
  492. const char* p = end;
  493. const char* next = NULL;
  494. unsigned short c1 = 0;
  495. unsigned short c2 = 0;
  496. c2 = utf8_get_char(p, &next);
  497. c1 = utf8_get_prev_char(p, NULL);
  498. if(c1 != '\n' && c1 != '\r' && c2 != '\0' && c2 != '\n' && c2 != '\r')
  499. {
  500. while(!ftk_can_break(c1, c2) && p > start)
  501. {
  502. next = p;
  503. c2 = c1;
  504. c1 = utf8_get_prev_char(next, &p);
  505. }
  506. end = p;
  507. }
  508. return end;
  509. }
  510. #endif
  511. int ftk_str2bool(const char* str)
  512. {
  513. if(str == NULL || str[0] == '0' || strcmp(str, "false") == 0 || strcmp(str, "no") == 0)
  514. {
  515. return 0;
  516. }
  517. return 1;
  518. }
  519. char* ftk_strs_cat(char* str, int len, const char* first, ...)
  520. {
  521. va_list arg;
  522. size_t dst = 0;
  523. const char* iter = first;
  524. return_val_if_fail(str != NULL && len > 0, NULL);
  525. va_start(arg, first);
  526. while(iter != NULL && dst < len)
  527. {
  528. for(; dst < len && *iter; iter++, dst++)
  529. {
  530. str[dst] = *iter;
  531. }
  532. iter = va_arg(arg, char*);
  533. }
  534. va_end(arg);
  535. if(dst < len)
  536. {
  537. str[dst] = '\0';
  538. }
  539. else
  540. {
  541. str[len-1] = '\0';
  542. }
  543. return str;
  544. }
  545. static long ftk_strtol_internal(const char* str, const char **end, int base)
  546. {
  547. int i = 0;
  548. long n = 0;
  549. char c = 0;
  550. return_val_if_fail(str != NULL && (base == 10 || base == 8 || base == 16), 0);
  551. if(base == 10)
  552. {
  553. for(i = 0; str[i] && i < 10; i++)
  554. {
  555. c = str[i];
  556. if(c < '0' || c > '9')
  557. {
  558. break;
  559. }
  560. n = n * base + c - '0';
  561. }
  562. }
  563. else if(base == 8)
  564. {
  565. for(i = 0; str[i] && i < 10; i++)
  566. {
  567. c = str[i];
  568. if(c < '0' || c > '7')
  569. {
  570. break;
  571. }
  572. n = n * base + c - '0';
  573. }
  574. }
  575. else if(base == 16)
  576. {
  577. for(i = 0; str[i] && i < 10; i++)
  578. {
  579. c = str[i];
  580. if((c >= '0' && c <= '9'))
  581. {
  582. c -= '0';
  583. }
  584. else if(c >= 'a' && c <= 'f')
  585. {
  586. c = c - 'a' + 10;
  587. }
  588. else if(c >= 'A' && c <= 'F')
  589. {
  590. c = c - 'A' + 10;
  591. }
  592. else
  593. {
  594. break;
  595. }
  596. n = n * base + c;
  597. }
  598. }
  599. if(end != NULL)
  600. {
  601. *end = str+i;
  602. }
  603. return n;
  604. }
  605. long ftk_strtol(const char* str, const char **end, int base)
  606. {
  607. long n = 0;
  608. int neg = 0;
  609. return_val_if_fail(str != NULL, 0);
  610. while(*str == ' ' || *str == '\t') str++;
  611. if(*str == '+' || *str == '-')
  612. {
  613. neg = *str == '-';
  614. str++;
  615. }
  616. n = ftk_strtol_internal(str, end, base);
  617. return neg ? -n : n;
  618. }
  619. int ftk_atoi(const char* str)
  620. {
  621. return ftk_strtol(str, NULL, 10);
  622. }
  623. double ftk_atof(const char* str)
  624. {
  625. int n = 0;
  626. int f = 0;
  627. int neg = 0;
  628. double result = 0;
  629. const char* p = NULL;
  630. return_val_if_fail(str != NULL, 0);
  631. if(str[0] == '+' || str[0] == '-')
  632. {
  633. neg = str[0] == '-';
  634. str++;
  635. }
  636. n = ftk_strtol_internal(str, &p, 10);
  637. if(p != NULL && *p == '.')
  638. {
  639. f = ftk_strtol_internal(p+1, NULL, 10);
  640. }
  641. result = f;
  642. while(result >= 1)
  643. {
  644. result = result / 10;
  645. }
  646. result = n + result;
  647. return neg ? -result : result;
  648. }
  649. static const char* ftk_itoa_simple(char* str, int len, int n, const char** end)
  650. {
  651. int i = 0;
  652. int value = n;
  653. int need_len = 0;
  654. return_val_if_fail(str != NULL && len > 2, NULL);
  655. if(n == 0)
  656. {
  657. str[0] = '0';
  658. str[1] = '\0';
  659. if(end != NULL)
  660. {
  661. *end = str + 1;
  662. }
  663. return str;
  664. }
  665. if(n < 0)
  666. {
  667. n = -n;
  668. str[0] = '-';
  669. need_len++;
  670. }
  671. value = n;
  672. while(value > 0)
  673. {
  674. value = value / 10;
  675. need_len++;
  676. }
  677. need_len++; /*for null char*/
  678. return_val_if_fail(len > (need_len), NULL);
  679. i = need_len - 2;
  680. while(n > 0)
  681. {
  682. str[i--] = (n % 10) + '0';
  683. n = n / 10;
  684. }
  685. str[need_len - 1] = '\0';
  686. if(end != NULL)
  687. {
  688. *end = str + need_len - 1;
  689. }
  690. return str;
  691. }
  692. const char* ftk_itoa(char* str, int len, int n)
  693. {
  694. return ftk_itoa_simple(str, len, n, NULL);
  695. }
  696. const char* ftk_ftoa(char* str, int len, double value)
  697. {
  698. int i = 0;
  699. char str_n[32] = {0};
  700. char str_f[32] = {0};
  701. int n = (int)value;
  702. int f = (int)((value - n) * 1000000000);
  703. ftk_itoa(str_n, sizeof(str_n), n);
  704. ftk_itoa(str_f, sizeof(str_f), f > 0 ? f : -f);
  705. if(f == 0)
  706. {
  707. strncpy(str, str_n, len);
  708. return str;
  709. }
  710. i = strlen(str_f) - 1;
  711. i = i > 6 ? 6 : i;
  712. str_f[i] = '\0';
  713. while(i > 0)
  714. {
  715. if(str_f[i] == '0')
  716. {
  717. str_f[i] = '\0';
  718. }
  719. i--;
  720. }
  721. return_val_if_fail(len > (strlen(str_n) + 1 + i), NULL);
  722. return ftk_strs_cat(str, len, str_n, ".", str_f, NULL);
  723. }
  724. char* ftk_strcpy(char* dst, const char* src)
  725. {
  726. return strcpy(dst, src);
  727. }