PageRenderTime 48ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/source/fitz/string.c

https://gitlab.com/koreader/mupdf-old
C | 398 lines | 296 code | 45 blank | 57 comment | 111 complexity | e337003c0012d845e2de1147381fef2e MD5 | raw file
  1. #include "mupdf/fitz.h"
  2. char *
  3. fz_strsep(char **stringp, const char *delim)
  4. {
  5. char *ret = *stringp;
  6. if (!ret) return NULL;
  7. if ((*stringp = strpbrk(*stringp, delim)) != NULL)
  8. *((*stringp)++) = '\0';
  9. return ret;
  10. }
  11. int
  12. fz_strlcpy(char *dst, const char *src, int siz)
  13. {
  14. register char *d = dst;
  15. register const char *s = src;
  16. register int n = siz;
  17. /* Copy as many bytes as will fit */
  18. if (n != 0 && --n != 0) {
  19. do {
  20. if ((*d++ = *s++) == 0)
  21. break;
  22. } while (--n != 0);
  23. }
  24. /* Not enough room in dst, add NUL and traverse rest of src */
  25. if (n == 0) {
  26. if (siz != 0)
  27. *d = '\0'; /* NUL-terminate dst */
  28. while (*s++)
  29. ;
  30. }
  31. return(s - src - 1); /* count does not include NUL */
  32. }
  33. int
  34. fz_strlcat(char *dst, const char *src, int siz)
  35. {
  36. register char *d = dst;
  37. register const char *s = src;
  38. register int n = siz;
  39. int dlen;
  40. /* Find the end of dst and adjust bytes left but don't go past end */
  41. while (*d != '\0' && n-- != 0)
  42. d++;
  43. dlen = d - dst;
  44. n = siz - dlen;
  45. if (n == 0)
  46. return dlen + strlen(s);
  47. while (*s != '\0') {
  48. if (n != 1) {
  49. *d++ = *s;
  50. n--;
  51. }
  52. s++;
  53. }
  54. *d = '\0';
  55. return dlen + (s - src); /* count does not include NUL */
  56. }
  57. void
  58. fz_dirname(char *dir, const char *path, int n)
  59. {
  60. int i;
  61. if (!path || !path[0])
  62. {
  63. fz_strlcpy(dir, ".", n);
  64. return;
  65. }
  66. fz_strlcpy(dir, path, n);
  67. i = strlen(dir);
  68. for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
  69. for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, ".", n); return; }
  70. for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
  71. dir[i+1] = 0;
  72. }
  73. static int ishex(int a)
  74. {
  75. return (a >= 'A' && a <= 'F') ||
  76. (a >= 'a' && a <= 'f') ||
  77. (a >= '0' && a <= '9');
  78. }
  79. static int tohex(int c)
  80. {
  81. if (c >= '0' && c <= '9') return c - '0';
  82. if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
  83. if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
  84. return 0;
  85. }
  86. char *
  87. fz_urldecode(char *url)
  88. {
  89. char *s = url;
  90. char *p = url;
  91. while (*s)
  92. {
  93. int c = (unsigned char) *s++;
  94. if (c == '%' && ishex(s[0]) && ishex(s[1]))
  95. {
  96. int a = tohex(*s++);
  97. int b = tohex(*s++);
  98. *p++ = a << 4 | b;
  99. }
  100. else
  101. {
  102. *p++ = c;
  103. }
  104. }
  105. *p = 0;
  106. return url;
  107. }
  108. #define SEP(x) ((x)=='/' || (x) == 0)
  109. char *
  110. fz_cleanname(char *name)
  111. {
  112. char *p, *q, *dotdot;
  113. int rooted;
  114. rooted = name[0] == '/';
  115. /*
  116. * invariants:
  117. * p points at beginning of path element we're considering.
  118. * q points just past the last path element we wrote (no slash).
  119. * dotdot points just past the point where .. cannot backtrack
  120. * any further (no slash).
  121. */
  122. p = q = dotdot = name + rooted;
  123. while (*p)
  124. {
  125. if(p[0] == '/') /* null element */
  126. p++;
  127. else if (p[0] == '.' && SEP(p[1]))
  128. p += 1; /* don't count the separator in case it is nul */
  129. else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
  130. {
  131. p += 2;
  132. if (q > dotdot) /* can backtrack */
  133. {
  134. while(--q > dotdot && *q != '/')
  135. ;
  136. }
  137. else if (!rooted) /* /.. is / but ./../ is .. */
  138. {
  139. if (q != name)
  140. *q++ = '/';
  141. *q++ = '.';
  142. *q++ = '.';
  143. dotdot = q;
  144. }
  145. }
  146. else /* real path element */
  147. {
  148. if (q != name+rooted)
  149. *q++ = '/';
  150. while ((*q = *p) != '/' && *q != 0)
  151. p++, q++;
  152. }
  153. }
  154. if (q == name) /* empty string is really "." */
  155. *q++ = '.';
  156. *q = '\0';
  157. return name;
  158. }
  159. enum
  160. {
  161. UTFmax = 4, /* maximum bytes per rune */
  162. Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
  163. Runeself = 0x80, /* rune and UTF sequences are the same (<) */
  164. Runeerror = 0xFFFD, /* decoding error in UTF */
  165. Runemax = 0x10FFFF, /* maximum rune value */
  166. };
  167. enum
  168. {
  169. Bit1 = 7,
  170. Bitx = 6,
  171. Bit2 = 5,
  172. Bit3 = 4,
  173. Bit4 = 3,
  174. Bit5 = 2,
  175. T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
  176. Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
  177. T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
  178. T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
  179. T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
  180. T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
  181. Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
  182. Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
  183. Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
  184. Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
  185. Maskx = (1<<Bitx)-1, /* 0011 1111 */
  186. Testx = Maskx ^ 0xFF, /* 1100 0000 */
  187. Bad = Runeerror,
  188. };
  189. int
  190. fz_chartorune(int *rune, const char *str)
  191. {
  192. int c, c1, c2, c3;
  193. long l;
  194. /*
  195. * one character sequence
  196. * 00000-0007F => T1
  197. */
  198. c = *(const unsigned char*)str;
  199. if(c < Tx) {
  200. *rune = c;
  201. return 1;
  202. }
  203. /*
  204. * two character sequence
  205. * 0080-07FF => T2 Tx
  206. */
  207. c1 = *(const unsigned char*)(str+1) ^ Tx;
  208. if(c1 & Testx)
  209. goto bad;
  210. if(c < T3) {
  211. if(c < T2)
  212. goto bad;
  213. l = ((c << Bitx) | c1) & Rune2;
  214. if(l <= Rune1)
  215. goto bad;
  216. *rune = l;
  217. return 2;
  218. }
  219. /*
  220. * three character sequence
  221. * 0800-FFFF => T3 Tx Tx
  222. */
  223. c2 = *(const unsigned char*)(str+2) ^ Tx;
  224. if(c2 & Testx)
  225. goto bad;
  226. if(c < T4) {
  227. l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
  228. if(l <= Rune2)
  229. goto bad;
  230. *rune = l;
  231. return 3;
  232. }
  233. /*
  234. * four character sequence (21-bit value)
  235. * 10000-1FFFFF => T4 Tx Tx Tx
  236. */
  237. c3 = *(const unsigned char*)(str+3) ^ Tx;
  238. if (c3 & Testx)
  239. goto bad;
  240. if (c < T5) {
  241. l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
  242. if (l <= Rune3)
  243. goto bad;
  244. *rune = l;
  245. return 4;
  246. }
  247. /*
  248. * Support for 5-byte or longer UTF-8 would go here, but
  249. * since we don't have that, we'll just fall through to bad.
  250. */
  251. /*
  252. * bad decoding
  253. */
  254. bad:
  255. *rune = Bad;
  256. return 1;
  257. }
  258. int
  259. fz_runetochar(char *str, int rune)
  260. {
  261. /* Runes are signed, so convert to unsigned for range check. */
  262. unsigned long c = (unsigned long)rune;
  263. /*
  264. * one character sequence
  265. * 00000-0007F => 00-7F
  266. */
  267. if(c <= Rune1) {
  268. str[0] = c;
  269. return 1;
  270. }
  271. /*
  272. * two character sequence
  273. * 0080-07FF => T2 Tx
  274. */
  275. if(c <= Rune2) {
  276. str[0] = T2 | (c >> 1*Bitx);
  277. str[1] = Tx | (c & Maskx);
  278. return 2;
  279. }
  280. /*
  281. * If the Rune is out of range, convert it to the error rune.
  282. * Do this test here because the error rune encodes to three bytes.
  283. * Doing it earlier would duplicate work, since an out of range
  284. * Rune wouldn't have fit in one or two bytes.
  285. */
  286. if (c > Runemax)
  287. c = Runeerror;
  288. /*
  289. * three character sequence
  290. * 0800-FFFF => T3 Tx Tx
  291. */
  292. if (c <= Rune3) {
  293. str[0] = T3 | (c >> 2*Bitx);
  294. str[1] = Tx | ((c >> 1*Bitx) & Maskx);
  295. str[2] = Tx | (c & Maskx);
  296. return 3;
  297. }
  298. /*
  299. * four character sequence (21-bit value)
  300. * 10000-1FFFFF => T4 Tx Tx Tx
  301. */
  302. str[0] = T4 | (c >> 3*Bitx);
  303. str[1] = Tx | ((c >> 2*Bitx) & Maskx);
  304. str[2] = Tx | ((c >> 1*Bitx) & Maskx);
  305. str[3] = Tx | (c & Maskx);
  306. return 4;
  307. }
  308. int
  309. fz_runelen(int c)
  310. {
  311. char str[10];
  312. return fz_runetochar(str, c);
  313. }
  314. int
  315. fz_utflen(const char *s)
  316. {
  317. int c, n, rune;
  318. n = 0;
  319. for(;;) {
  320. c = *(const unsigned char*)s;
  321. if(c < Runeself) {
  322. if(c == 0)
  323. return n;
  324. s++;
  325. } else
  326. s += fz_chartorune(&rune, s);
  327. n++;
  328. }
  329. return 0;
  330. }
  331. float fz_atof(const char *s)
  332. {
  333. float result;
  334. errno = 0;
  335. result = fz_strtof(s, NULL);
  336. if ((errno == ERANGE && result == 0) || isnan(result))
  337. /* Return 1.0 on underflow, as it's a small known value that won't cause a divide by 0. */
  338. return 1;
  339. result = fz_clamp(result, -FLT_MAX, FLT_MAX);
  340. return result;
  341. }
  342. int fz_atoi(const char *s)
  343. {
  344. if (s == NULL)
  345. return 0;
  346. return atoi(s);
  347. }
  348. fz_off_t fz_atoo(const char *s)
  349. {
  350. if (s == NULL)
  351. return 0;
  352. return fz_atoo_imp(s);
  353. }