/src/basic/escape.c

https://gitlab.com/unofficial-mirrors/systemd · C · 510 lines · 385 code · 87 blank · 38 comment · 77 complexity · 57a5e7baf282bc8bd9fd6063261f39a2 MD5 · raw file

  1. /* SPDX-License-Identifier: LGPL-2.1+ */
  2. /***
  3. This file is part of systemd.
  4. Copyright 2010 Lennart Poettering
  5. ***/
  6. #include <errno.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include "alloc-util.h"
  10. #include "escape.h"
  11. #include "hexdecoct.h"
  12. #include "macro.h"
  13. #include "utf8.h"
  14. size_t cescape_char(char c, char *buf) {
  15. char * buf_old = buf;
  16. switch (c) {
  17. case '\a':
  18. *(buf++) = '\\';
  19. *(buf++) = 'a';
  20. break;
  21. case '\b':
  22. *(buf++) = '\\';
  23. *(buf++) = 'b';
  24. break;
  25. case '\f':
  26. *(buf++) = '\\';
  27. *(buf++) = 'f';
  28. break;
  29. case '\n':
  30. *(buf++) = '\\';
  31. *(buf++) = 'n';
  32. break;
  33. case '\r':
  34. *(buf++) = '\\';
  35. *(buf++) = 'r';
  36. break;
  37. case '\t':
  38. *(buf++) = '\\';
  39. *(buf++) = 't';
  40. break;
  41. case '\v':
  42. *(buf++) = '\\';
  43. *(buf++) = 'v';
  44. break;
  45. case '\\':
  46. *(buf++) = '\\';
  47. *(buf++) = '\\';
  48. break;
  49. case '"':
  50. *(buf++) = '\\';
  51. *(buf++) = '"';
  52. break;
  53. case '\'':
  54. *(buf++) = '\\';
  55. *(buf++) = '\'';
  56. break;
  57. default:
  58. /* For special chars we prefer octal over
  59. * hexadecimal encoding, simply because glib's
  60. * g_strescape() does the same */
  61. if ((c < ' ') || (c >= 127)) {
  62. *(buf++) = '\\';
  63. *(buf++) = octchar((unsigned char) c >> 6);
  64. *(buf++) = octchar((unsigned char) c >> 3);
  65. *(buf++) = octchar((unsigned char) c);
  66. } else
  67. *(buf++) = c;
  68. break;
  69. }
  70. return buf - buf_old;
  71. }
  72. char *cescape_length(const char *s, size_t n) {
  73. const char *f;
  74. char *r, *t;
  75. assert(s || n == 0);
  76. /* Does C style string escaping. May be reversed with
  77. * cunescape(). */
  78. r = new(char, n*4 + 1);
  79. if (!r)
  80. return NULL;
  81. for (f = s, t = r; f < s + n; f++)
  82. t += cescape_char(*f, t);
  83. *t = 0;
  84. return r;
  85. }
  86. char *cescape(const char *s) {
  87. assert(s);
  88. return cescape_length(s, strlen(s));
  89. }
  90. int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
  91. int r = 1;
  92. assert(p);
  93. assert(*p);
  94. assert(ret);
  95. /* Unescapes C style. Returns the unescaped character in ret.
  96. * Sets *eight_bit to true if the escaped sequence either fits in
  97. * one byte in UTF-8 or is a non-unicode literal byte and should
  98. * instead be copied directly.
  99. */
  100. if (length != (size_t) -1 && length < 1)
  101. return -EINVAL;
  102. switch (p[0]) {
  103. case 'a':
  104. *ret = '\a';
  105. break;
  106. case 'b':
  107. *ret = '\b';
  108. break;
  109. case 'f':
  110. *ret = '\f';
  111. break;
  112. case 'n':
  113. *ret = '\n';
  114. break;
  115. case 'r':
  116. *ret = '\r';
  117. break;
  118. case 't':
  119. *ret = '\t';
  120. break;
  121. case 'v':
  122. *ret = '\v';
  123. break;
  124. case '\\':
  125. *ret = '\\';
  126. break;
  127. case '"':
  128. *ret = '"';
  129. break;
  130. case '\'':
  131. *ret = '\'';
  132. break;
  133. case 's':
  134. /* This is an extension of the XDG syntax files */
  135. *ret = ' ';
  136. break;
  137. case 'x': {
  138. /* hexadecimal encoding */
  139. int a, b;
  140. if (length != (size_t) -1 && length < 3)
  141. return -EINVAL;
  142. a = unhexchar(p[1]);
  143. if (a < 0)
  144. return -EINVAL;
  145. b = unhexchar(p[2]);
  146. if (b < 0)
  147. return -EINVAL;
  148. /* Don't allow NUL bytes */
  149. if (a == 0 && b == 0)
  150. return -EINVAL;
  151. *ret = (a << 4U) | b;
  152. *eight_bit = true;
  153. r = 3;
  154. break;
  155. }
  156. case 'u': {
  157. /* C++11 style 16bit unicode */
  158. int a[4];
  159. size_t i;
  160. uint32_t c;
  161. if (length != (size_t) -1 && length < 5)
  162. return -EINVAL;
  163. for (i = 0; i < 4; i++) {
  164. a[i] = unhexchar(p[1 + i]);
  165. if (a[i] < 0)
  166. return a[i];
  167. }
  168. c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
  169. /* Don't allow 0 chars */
  170. if (c == 0)
  171. return -EINVAL;
  172. *ret = c;
  173. r = 5;
  174. break;
  175. }
  176. case 'U': {
  177. /* C++11 style 32bit unicode */
  178. int a[8];
  179. size_t i;
  180. char32_t c;
  181. if (length != (size_t) -1 && length < 9)
  182. return -EINVAL;
  183. for (i = 0; i < 8; i++) {
  184. a[i] = unhexchar(p[1 + i]);
  185. if (a[i] < 0)
  186. return a[i];
  187. }
  188. c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
  189. ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
  190. /* Don't allow 0 chars */
  191. if (c == 0)
  192. return -EINVAL;
  193. /* Don't allow invalid code points */
  194. if (!unichar_is_valid(c))
  195. return -EINVAL;
  196. *ret = c;
  197. r = 9;
  198. break;
  199. }
  200. case '0':
  201. case '1':
  202. case '2':
  203. case '3':
  204. case '4':
  205. case '5':
  206. case '6':
  207. case '7': {
  208. /* octal encoding */
  209. int a, b, c;
  210. char32_t m;
  211. if (length != (size_t) -1 && length < 3)
  212. return -EINVAL;
  213. a = unoctchar(p[0]);
  214. if (a < 0)
  215. return -EINVAL;
  216. b = unoctchar(p[1]);
  217. if (b < 0)
  218. return -EINVAL;
  219. c = unoctchar(p[2]);
  220. if (c < 0)
  221. return -EINVAL;
  222. /* don't allow NUL bytes */
  223. if (a == 0 && b == 0 && c == 0)
  224. return -EINVAL;
  225. /* Don't allow bytes above 255 */
  226. m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
  227. if (m > 255)
  228. return -EINVAL;
  229. *ret = m;
  230. *eight_bit = true;
  231. r = 3;
  232. break;
  233. }
  234. default:
  235. return -EINVAL;
  236. }
  237. return r;
  238. }
  239. int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
  240. char *r, *t;
  241. const char *f;
  242. size_t pl;
  243. assert(s);
  244. assert(ret);
  245. /* Undoes C style string escaping, and optionally prefixes it. */
  246. pl = strlen_ptr(prefix);
  247. r = new(char, pl+length+1);
  248. if (!r)
  249. return -ENOMEM;
  250. if (prefix)
  251. memcpy(r, prefix, pl);
  252. for (f = s, t = r + pl; f < s + length; f++) {
  253. size_t remaining;
  254. bool eight_bit = false;
  255. char32_t u;
  256. int k;
  257. remaining = s + length - f;
  258. assert(remaining > 0);
  259. if (*f != '\\') {
  260. /* A literal, copy verbatim */
  261. *(t++) = *f;
  262. continue;
  263. }
  264. if (remaining == 1) {
  265. if (flags & UNESCAPE_RELAX) {
  266. /* A trailing backslash, copy verbatim */
  267. *(t++) = *f;
  268. continue;
  269. }
  270. free(r);
  271. return -EINVAL;
  272. }
  273. k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
  274. if (k < 0) {
  275. if (flags & UNESCAPE_RELAX) {
  276. /* Invalid escape code, let's take it literal then */
  277. *(t++) = '\\';
  278. continue;
  279. }
  280. free(r);
  281. return k;
  282. }
  283. f += k;
  284. if (eight_bit)
  285. /* One byte? Set directly as specified */
  286. *(t++) = u;
  287. else
  288. /* Otherwise encode as multi-byte UTF-8 */
  289. t += utf8_encode_unichar(t, u);
  290. }
  291. *t = 0;
  292. *ret = r;
  293. return t - r;
  294. }
  295. int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
  296. return cunescape_length_with_prefix(s, length, NULL, flags, ret);
  297. }
  298. int cunescape(const char *s, UnescapeFlags flags, char **ret) {
  299. return cunescape_length(s, strlen(s), flags, ret);
  300. }
  301. char *xescape(const char *s, const char *bad) {
  302. char *r, *t;
  303. const char *f;
  304. /* Escapes all chars in bad, in addition to \ and all special
  305. * chars, in \xFF style escaping. May be reversed with
  306. * cunescape(). */
  307. r = new(char, strlen(s) * 4 + 1);
  308. if (!r)
  309. return NULL;
  310. for (f = s, t = r; *f; f++) {
  311. if ((*f < ' ') || (*f >= 127) ||
  312. (*f == '\\') || strchr(bad, *f)) {
  313. *(t++) = '\\';
  314. *(t++) = 'x';
  315. *(t++) = hexchar(*f >> 4);
  316. *(t++) = hexchar(*f);
  317. } else
  318. *(t++) = *f;
  319. }
  320. *t = 0;
  321. return r;
  322. }
  323. char *octescape(const char *s, size_t len) {
  324. char *r, *t;
  325. const char *f;
  326. /* Escapes all chars in bad, in addition to \ and " chars,
  327. * in \nnn style escaping. */
  328. r = new(char, len * 4 + 1);
  329. if (!r)
  330. return NULL;
  331. for (f = s, t = r; f < s + len; f++) {
  332. if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
  333. *(t++) = '\\';
  334. *(t++) = '0' + (*f >> 6);
  335. *(t++) = '0' + ((*f >> 3) & 8);
  336. *(t++) = '0' + (*f & 8);
  337. } else
  338. *(t++) = *f;
  339. }
  340. *t = 0;
  341. return r;
  342. }
  343. static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
  344. assert(bad);
  345. for (; *s; s++) {
  346. if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
  347. *(t++) = '\\';
  348. *(t++) = *s == '\n' ? 'n' : 't';
  349. continue;
  350. }
  351. if (*s == '\\' || strchr(bad, *s))
  352. *(t++) = '\\';
  353. *(t++) = *s;
  354. }
  355. return t;
  356. }
  357. char *shell_escape(const char *s, const char *bad) {
  358. char *r, *t;
  359. r = new(char, strlen(s)*2+1);
  360. if (!r)
  361. return NULL;
  362. t = strcpy_backslash_escaped(r, s, bad, false);
  363. *t = 0;
  364. return r;
  365. }
  366. char* shell_maybe_quote(const char *s, EscapeStyle style) {
  367. const char *p;
  368. char *r, *t;
  369. assert(s);
  370. /* Encloses a string in quotes if necessary to make it OK as a shell
  371. * string. Note that we treat benign UTF-8 characters as needing
  372. * escaping too, but that should be OK. */
  373. for (p = s; *p; p++)
  374. if (*p <= ' ' ||
  375. *p >= 127 ||
  376. strchr(SHELL_NEED_QUOTES, *p))
  377. break;
  378. if (!*p)
  379. return strdup(s);
  380. r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
  381. if (!r)
  382. return NULL;
  383. t = r;
  384. if (style == ESCAPE_BACKSLASH)
  385. *(t++) = '"';
  386. else if (style == ESCAPE_POSIX) {
  387. *(t++) = '$';
  388. *(t++) = '\'';
  389. } else
  390. assert_not_reached("Bad EscapeStyle");
  391. t = mempcpy(t, s, p - s);
  392. if (style == ESCAPE_BACKSLASH)
  393. t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
  394. else
  395. t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
  396. if (style == ESCAPE_BACKSLASH)
  397. *(t++) = '"';
  398. else
  399. *(t++) = '\'';
  400. *t = 0;
  401. return r;
  402. }