PageRenderTime 56ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/source/fitz/string.c

https://gitlab.com/zimumavo/mupdf
C | 509 lines | 392 code | 59 blank | 58 comment | 149 complexity | ce993610f32c4d5729362b6c2ac92ce0 MD5 | raw file
  1. #include "mupdf/fitz.h"
  2. static inline int
  3. fz_tolower(int c)
  4. {
  5. if (c >= 'A' && c <= 'Z')
  6. return c + 32;
  7. return c;
  8. }
  9. int
  10. fz_strcasecmp(const char *a, const char *b)
  11. {
  12. while (fz_tolower(*a) == fz_tolower(*b))
  13. {
  14. if (*a++ == 0)
  15. return 0;
  16. b++;
  17. }
  18. return fz_tolower(*a) - fz_tolower(*b);
  19. }
  20. char *
  21. fz_strsep(char **stringp, const char *delim)
  22. {
  23. char *ret = *stringp;
  24. if (!ret) return NULL;
  25. if ((*stringp = strpbrk(*stringp, delim)) != NULL)
  26. *((*stringp)++) = '\0';
  27. return ret;
  28. }
  29. size_t
  30. fz_strlcpy(char *dst, const char *src, size_t siz)
  31. {
  32. register char *d = dst;
  33. register const char *s = src;
  34. register size_t n = siz;
  35. /* Copy as many bytes as will fit */
  36. if (n != 0 && --n != 0) {
  37. do {
  38. if ((*d++ = *s++) == 0)
  39. break;
  40. } while (--n != 0);
  41. }
  42. /* Not enough room in dst, add NUL and traverse rest of src */
  43. if (n == 0) {
  44. if (siz != 0)
  45. *d = '\0'; /* NUL-terminate dst */
  46. while (*s++)
  47. ;
  48. }
  49. return(s - src - 1); /* count does not include NUL */
  50. }
  51. size_t
  52. fz_strlcat(char *dst, const char *src, size_t siz)
  53. {
  54. register char *d = dst;
  55. register const char *s = src;
  56. register size_t n = siz;
  57. size_t dlen;
  58. /* Find the end of dst and adjust bytes left but don't go past end */
  59. while (*d != '\0' && n-- != 0)
  60. d++;
  61. dlen = d - dst;
  62. n = siz - dlen;
  63. if (n == 0)
  64. return dlen + strlen(s);
  65. while (*s != '\0') {
  66. if (n != 1) {
  67. *d++ = *s;
  68. n--;
  69. }
  70. s++;
  71. }
  72. *d = '\0';
  73. return dlen + (s - src); /* count does not include NUL */
  74. }
  75. void
  76. fz_dirname(char *dir, const char *path, size_t n)
  77. {
  78. size_t i;
  79. if (!path || !path[0])
  80. {
  81. fz_strlcpy(dir, ".", n);
  82. return;
  83. }
  84. fz_strlcpy(dir, path, n);
  85. i = strlen(dir);
  86. for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
  87. for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, ".", n); return; }
  88. for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
  89. dir[i+1] = 0;
  90. }
  91. static inline int ishex(int a)
  92. {
  93. return (a >= 'A' && a <= 'F') ||
  94. (a >= 'a' && a <= 'f') ||
  95. (a >= '0' && a <= '9');
  96. }
  97. static inline int tohex(int c)
  98. {
  99. if (c >= '0' && c <= '9') return c - '0';
  100. if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
  101. if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
  102. return 0;
  103. }
  104. char *
  105. fz_urldecode(char *url)
  106. {
  107. char *s = url;
  108. char *p = url;
  109. while (*s)
  110. {
  111. int c = (unsigned char) *s++;
  112. if (c == '%' && ishex(s[0]) && ishex(s[1]))
  113. {
  114. int a = tohex(*s++);
  115. int b = tohex(*s++);
  116. *p++ = a << 4 | b;
  117. }
  118. else
  119. {
  120. *p++ = c;
  121. }
  122. }
  123. *p = 0;
  124. return url;
  125. }
  126. void
  127. fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page)
  128. {
  129. const char *s, *p;
  130. char num[40];
  131. int i, n;
  132. int z = 0;
  133. for (i = 0; page; page /= 10)
  134. num[i++] = '0' + page % 10;
  135. num[i] = 0;
  136. s = p = strchr(fmt, '%');
  137. if (p)
  138. {
  139. ++p;
  140. while (*p >= '0' && *p <= '9')
  141. z = z * 10 + (*p++ - '0');
  142. }
  143. if (p && *p == 'd')
  144. {
  145. ++p;
  146. }
  147. else
  148. {
  149. s = p = strrchr(fmt, '.');
  150. if (!p)
  151. s = p = fmt + strlen(fmt);
  152. }
  153. if (z < 1)
  154. z = 1;
  155. while (i < z && i < sizeof num)
  156. num[i++] = '0';
  157. n = s - fmt;
  158. if (n + i + strlen(p) >= size)
  159. fz_throw(ctx, FZ_ERROR_GENERIC, "path name buffer overflow");
  160. memcpy(path, fmt, n);
  161. while (i > 0)
  162. path[n++] = num[--i];
  163. fz_strlcpy(path + n, p, size - n);
  164. }
  165. #define SEP(x) ((x)=='/' || (x) == 0)
  166. char *
  167. fz_cleanname(char *name)
  168. {
  169. char *p, *q, *dotdot;
  170. int rooted;
  171. rooted = name[0] == '/';
  172. /*
  173. * invariants:
  174. * p points at beginning of path element we're considering.
  175. * q points just past the last path element we wrote (no slash).
  176. * dotdot points just past the point where .. cannot backtrack
  177. * any further (no slash).
  178. */
  179. p = q = dotdot = name + rooted;
  180. while (*p)
  181. {
  182. if(p[0] == '/') /* null element */
  183. p++;
  184. else if (p[0] == '.' && SEP(p[1]))
  185. p += 1; /* don't count the separator in case it is nul */
  186. else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
  187. {
  188. p += 2;
  189. if (q > dotdot) /* can backtrack */
  190. {
  191. while(--q > dotdot && *q != '/')
  192. ;
  193. }
  194. else if (!rooted) /* /.. is / but ./../ is .. */
  195. {
  196. if (q != name)
  197. *q++ = '/';
  198. *q++ = '.';
  199. *q++ = '.';
  200. dotdot = q;
  201. }
  202. }
  203. else /* real path element */
  204. {
  205. if (q != name+rooted)
  206. *q++ = '/';
  207. while ((*q = *p) != '/' && *q != 0)
  208. p++, q++;
  209. }
  210. }
  211. if (q == name) /* empty string is really "." */
  212. *q++ = '.';
  213. *q = '\0';
  214. return name;
  215. }
  216. enum
  217. {
  218. UTFmax = 4, /* maximum bytes per rune */
  219. Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
  220. Runeself = 0x80, /* rune and UTF sequences are the same (<) */
  221. Runeerror = 0xFFFD, /* decoding error in UTF */
  222. Runemax = 0x10FFFF, /* maximum rune value */
  223. };
  224. enum
  225. {
  226. Bit1 = 7,
  227. Bitx = 6,
  228. Bit2 = 5,
  229. Bit3 = 4,
  230. Bit4 = 3,
  231. Bit5 = 2,
  232. T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
  233. Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
  234. T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
  235. T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
  236. T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
  237. T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
  238. Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
  239. Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
  240. Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
  241. Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
  242. Maskx = (1<<Bitx)-1, /* 0011 1111 */
  243. Testx = Maskx ^ 0xFF, /* 1100 0000 */
  244. Bad = Runeerror,
  245. };
  246. int
  247. fz_chartorune(int *rune, const char *str)
  248. {
  249. int c, c1, c2, c3;
  250. long l;
  251. /*
  252. * one character sequence
  253. * 00000-0007F => T1
  254. */
  255. c = *(const unsigned char*)str;
  256. if(c < Tx) {
  257. *rune = c;
  258. return 1;
  259. }
  260. /*
  261. * two character sequence
  262. * 0080-07FF => T2 Tx
  263. */
  264. c1 = *(const unsigned char*)(str+1) ^ Tx;
  265. if(c1 & Testx)
  266. goto bad;
  267. if(c < T3) {
  268. if(c < T2)
  269. goto bad;
  270. l = ((c << Bitx) | c1) & Rune2;
  271. if(l <= Rune1)
  272. goto bad;
  273. *rune = l;
  274. return 2;
  275. }
  276. /*
  277. * three character sequence
  278. * 0800-FFFF => T3 Tx Tx
  279. */
  280. c2 = *(const unsigned char*)(str+2) ^ Tx;
  281. if(c2 & Testx)
  282. goto bad;
  283. if(c < T4) {
  284. l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
  285. if(l <= Rune2)
  286. goto bad;
  287. *rune = l;
  288. return 3;
  289. }
  290. /*
  291. * four character sequence (21-bit value)
  292. * 10000-1FFFFF => T4 Tx Tx Tx
  293. */
  294. c3 = *(const unsigned char*)(str+3) ^ Tx;
  295. if (c3 & Testx)
  296. goto bad;
  297. if (c < T5) {
  298. l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
  299. if (l <= Rune3)
  300. goto bad;
  301. *rune = l;
  302. return 4;
  303. }
  304. /*
  305. * Support for 5-byte or longer UTF-8 would go here, but
  306. * since we don't have that, we'll just fall through to bad.
  307. */
  308. /*
  309. * bad decoding
  310. */
  311. bad:
  312. *rune = Bad;
  313. return 1;
  314. }
  315. int
  316. fz_runetochar(char *str, int rune)
  317. {
  318. /* Runes are signed, so convert to unsigned for range check. */
  319. unsigned long c = (unsigned long)rune;
  320. /*
  321. * one character sequence
  322. * 00000-0007F => 00-7F
  323. */
  324. if(c <= Rune1) {
  325. str[0] = c;
  326. return 1;
  327. }
  328. /*
  329. * two character sequence
  330. * 0080-07FF => T2 Tx
  331. */
  332. if(c <= Rune2) {
  333. str[0] = T2 | (c >> 1*Bitx);
  334. str[1] = Tx | (c & Maskx);
  335. return 2;
  336. }
  337. /*
  338. * If the Rune is out of range, convert it to the error rune.
  339. * Do this test here because the error rune encodes to three bytes.
  340. * Doing it earlier would duplicate work, since an out of range
  341. * Rune wouldn't have fit in one or two bytes.
  342. */
  343. if (c > Runemax)
  344. c = Runeerror;
  345. /*
  346. * three character sequence
  347. * 0800-FFFF => T3 Tx Tx
  348. */
  349. if (c <= Rune3) {
  350. str[0] = T3 | (c >> 2*Bitx);
  351. str[1] = Tx | ((c >> 1*Bitx) & Maskx);
  352. str[2] = Tx | (c & Maskx);
  353. return 3;
  354. }
  355. /*
  356. * four character sequence (21-bit value)
  357. * 10000-1FFFFF => T4 Tx Tx Tx
  358. */
  359. str[0] = T4 | (c >> 3*Bitx);
  360. str[1] = Tx | ((c >> 2*Bitx) & Maskx);
  361. str[2] = Tx | ((c >> 1*Bitx) & Maskx);
  362. str[3] = Tx | (c & Maskx);
  363. return 4;
  364. }
  365. int
  366. fz_runelen(int c)
  367. {
  368. char str[10];
  369. return fz_runetochar(str, c);
  370. }
  371. int
  372. fz_utflen(const char *s)
  373. {
  374. int c, n, rune;
  375. n = 0;
  376. for(;;) {
  377. c = *(const unsigned char*)s;
  378. if(c < Runeself) {
  379. if(c == 0)
  380. return n;
  381. s++;
  382. } else
  383. s += fz_chartorune(&rune, s);
  384. n++;
  385. }
  386. return 0;
  387. }
  388. float fz_atof(const char *s)
  389. {
  390. float result;
  391. errno = 0;
  392. result = fz_strtof(s, NULL);
  393. if ((errno == ERANGE && result == 0) || isnan(result))
  394. /* Return 1.0 on underflow, as it's a small known value that won't cause a divide by 0. */
  395. return 1;
  396. result = fz_clamp(result, -FLT_MAX, FLT_MAX);
  397. return result;
  398. }
  399. int fz_atoi(const char *s)
  400. {
  401. if (s == NULL)
  402. return 0;
  403. return atoi(s);
  404. }
  405. fz_off_t fz_atoo(const char *s)
  406. {
  407. if (s == NULL)
  408. return 0;
  409. return fz_atoo_imp(s);
  410. }
  411. int fz_is_page_range(fz_context *ctx, const char *s)
  412. {
  413. /* TODO: check the actual syntax... */
  414. while (*s)
  415. {
  416. if ((*s < '0' || *s > '9') && *s != 'N' && *s != '-' && *s != ',')
  417. return 0;
  418. s++;
  419. }
  420. return 1;
  421. }
  422. const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n)
  423. {
  424. if (!s || !s[0])
  425. return NULL;
  426. if (s[0] == ',')
  427. s += 1;
  428. if (s[0] == 'N')
  429. {
  430. *a = n;
  431. s += 1;
  432. }
  433. else
  434. *a = strtol(s, (char**)&s, 10);
  435. if (s[0] == '-')
  436. {
  437. if (s[1] == 'N')
  438. {
  439. *b = n;
  440. s += 2;
  441. }
  442. else
  443. *b = strtol(s+1, (char**)&s, 10);
  444. }
  445. else
  446. *b = *a;
  447. *a = fz_clampi(*a, 1, n);
  448. *b = fz_clampi(*b, 1, n);
  449. return s;
  450. }