/usr.bin/tar/util.c

https://bitbucket.org/freebsd/freebsd-head/ · C · 563 lines · 381 code · 45 blank · 137 comment · 166 complexity · 0f299ebed0d0b44c1037b8a95357ba3d MD5 · raw file

  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "bsdtar_platform.h"
  26. __FBSDID("$FreeBSD$");
  27. #ifdef HAVE_SYS_STAT_H
  28. #include <sys/stat.h>
  29. #endif
  30. #ifdef HAVE_SYS_TYPES_H
  31. #include <sys/types.h> /* Linux doesn't define mode_t, etc. in sys/stat.h. */
  32. #endif
  33. #include <ctype.h>
  34. #ifdef HAVE_ERRNO_H
  35. #include <errno.h>
  36. #endif
  37. #ifdef HAVE_IO_H
  38. #include <io.h>
  39. #endif
  40. #ifdef HAVE_STDARG_H
  41. #include <stdarg.h>
  42. #endif
  43. #ifdef HAVE_STDINT_H
  44. #include <stdint.h>
  45. #endif
  46. #include <stdio.h>
  47. #ifdef HAVE_STDLIB_H
  48. #include <stdlib.h>
  49. #endif
  50. #ifdef HAVE_STRING_H
  51. #include <string.h>
  52. #endif
  53. #ifdef HAVE_WCTYPE_H
  54. #include <wctype.h>
  55. #else
  56. /* If we don't have wctype, we need to hack up some version of iswprint(). */
  57. #define iswprint isprint
  58. #endif
  59. #include "bsdtar.h"
  60. #include "err.h"
  61. static size_t bsdtar_expand_char(char *, size_t, char);
  62. static const char *strip_components(const char *path, int elements);
  63. #if defined(_WIN32) && !defined(__CYGWIN__)
  64. #define read _read
  65. #endif
  66. /* TODO: Hack up a version of mbtowc for platforms with no wide
  67. * character support at all. I think the following might suffice,
  68. * but it needs careful testing.
  69. * #if !HAVE_MBTOWC
  70. * #define mbtowc(wcp, p, n) ((*wcp = *p), 1)
  71. * #endif
  72. */
  73. /*
  74. * Print a string, taking care with any non-printable characters.
  75. *
  76. * Note that we use a stack-allocated buffer to receive the formatted
  77. * string if we can. This is partly performance (avoiding a call to
  78. * malloc()), partly out of expedience (we have to call vsnprintf()
  79. * before malloc() anyway to find out how big a buffer we need; we may
  80. * as well point that first call at a small local buffer in case it
  81. * works), but mostly for safety (so we can use this to print messages
  82. * about out-of-memory conditions).
  83. */
  84. void
  85. safe_fprintf(FILE *f, const char *fmt, ...)
  86. {
  87. char fmtbuff_stack[256]; /* Place to format the printf() string. */
  88. char outbuff[256]; /* Buffer for outgoing characters. */
  89. char *fmtbuff_heap; /* If fmtbuff_stack is too small, we use malloc */
  90. char *fmtbuff; /* Pointer to fmtbuff_stack or fmtbuff_heap. */
  91. int fmtbuff_length;
  92. int length, n;
  93. va_list ap;
  94. const char *p;
  95. unsigned i;
  96. wchar_t wc;
  97. char try_wc;
  98. /* Use a stack-allocated buffer if we can, for speed and safety. */
  99. fmtbuff_heap = NULL;
  100. fmtbuff_length = sizeof(fmtbuff_stack);
  101. fmtbuff = fmtbuff_stack;
  102. /* Try formatting into the stack buffer. */
  103. va_start(ap, fmt);
  104. length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
  105. va_end(ap);
  106. /* If the result was too large, allocate a buffer on the heap. */
  107. if (length >= fmtbuff_length) {
  108. fmtbuff_length = length+1;
  109. fmtbuff_heap = malloc(fmtbuff_length);
  110. /* Reformat the result into the heap buffer if we can. */
  111. if (fmtbuff_heap != NULL) {
  112. fmtbuff = fmtbuff_heap;
  113. va_start(ap, fmt);
  114. length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
  115. va_end(ap);
  116. } else {
  117. /* Leave fmtbuff pointing to the truncated
  118. * string in fmtbuff_stack. */
  119. length = sizeof(fmtbuff_stack) - 1;
  120. }
  121. }
  122. /* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit
  123. * more portable, so we use that here instead. */
  124. n = mbtowc(NULL, NULL, 1); /* Reset the shift state. */
  125. /* Write data, expanding unprintable characters. */
  126. p = fmtbuff;
  127. i = 0;
  128. try_wc = 1;
  129. while (*p != '\0') {
  130. /* Convert to wide char, test if the wide
  131. * char is printable in the current locale. */
  132. if (try_wc && (n = mbtowc(&wc, p, length)) != -1) {
  133. length -= n;
  134. if (iswprint(wc) && wc != L'\\') {
  135. /* Printable, copy the bytes through. */
  136. while (n-- > 0)
  137. outbuff[i++] = *p++;
  138. } else {
  139. /* Not printable, format the bytes. */
  140. while (n-- > 0)
  141. i += (unsigned)bsdtar_expand_char(
  142. outbuff, i, *p++);
  143. }
  144. } else {
  145. /* After any conversion failure, don't bother
  146. * trying to convert the rest. */
  147. i += (unsigned)bsdtar_expand_char(outbuff, i, *p++);
  148. try_wc = 0;
  149. }
  150. /* If our output buffer is full, dump it and keep going. */
  151. if (i > (sizeof(outbuff) - 20)) {
  152. outbuff[i] = '\0';
  153. fprintf(f, "%s", outbuff);
  154. i = 0;
  155. }
  156. }
  157. outbuff[i] = '\0';
  158. fprintf(f, "%s", outbuff);
  159. /* If we allocated a heap-based formatting buffer, free it now. */
  160. if (fmtbuff_heap != NULL)
  161. free(fmtbuff_heap);
  162. }
  163. /*
  164. * Render an arbitrary sequence of bytes into printable ASCII characters.
  165. */
  166. static size_t
  167. bsdtar_expand_char(char *buff, size_t offset, char c)
  168. {
  169. size_t i = offset;
  170. if (isprint((unsigned char)c) && c != '\\')
  171. buff[i++] = c;
  172. else {
  173. buff[i++] = '\\';
  174. switch (c) {
  175. case '\a': buff[i++] = 'a'; break;
  176. case '\b': buff[i++] = 'b'; break;
  177. case '\f': buff[i++] = 'f'; break;
  178. case '\n': buff[i++] = 'n'; break;
  179. #if '\r' != '\n'
  180. /* On some platforms, \n and \r are the same. */
  181. case '\r': buff[i++] = 'r'; break;
  182. #endif
  183. case '\t': buff[i++] = 't'; break;
  184. case '\v': buff[i++] = 'v'; break;
  185. case '\\': buff[i++] = '\\'; break;
  186. default:
  187. sprintf(buff + i, "%03o", 0xFF & (int)c);
  188. i += 3;
  189. }
  190. }
  191. return (i - offset);
  192. }
  193. int
  194. yes(const char *fmt, ...)
  195. {
  196. char buff[32];
  197. char *p;
  198. ssize_t l;
  199. va_list ap;
  200. va_start(ap, fmt);
  201. vfprintf(stderr, fmt, ap);
  202. va_end(ap);
  203. fprintf(stderr, " (y/N)? ");
  204. fflush(stderr);
  205. l = read(2, buff, sizeof(buff) - 1);
  206. if (l < 0) {
  207. fprintf(stderr, "Keyboard read failed\n");
  208. exit(1);
  209. }
  210. if (l == 0)
  211. return (0);
  212. buff[l] = 0;
  213. for (p = buff; *p != '\0'; p++) {
  214. if (isspace((unsigned char)*p))
  215. continue;
  216. switch(*p) {
  217. case 'y': case 'Y':
  218. return (1);
  219. case 'n': case 'N':
  220. return (0);
  221. default:
  222. return (0);
  223. }
  224. }
  225. return (0);
  226. }
  227. /*-
  228. * The logic here for -C <dir> attempts to avoid
  229. * chdir() as long as possible. For example:
  230. * "-C /foo -C /bar file" needs chdir("/bar") but not chdir("/foo")
  231. * "-C /foo -C bar file" needs chdir("/foo/bar")
  232. * "-C /foo -C bar /file1" does not need chdir()
  233. * "-C /foo -C bar /file1 file2" needs chdir("/foo/bar") before file2
  234. *
  235. * The only correct way to handle this is to record a "pending" chdir
  236. * request and combine multiple requests intelligently until we
  237. * need to process a non-absolute file. set_chdir() adds the new dir
  238. * to the pending list; do_chdir() actually executes any pending chdir.
  239. *
  240. * This way, programs that build tar command lines don't have to worry
  241. * about -C with non-existent directories; such requests will only
  242. * fail if the directory must be accessed.
  243. *
  244. * TODO: Make this handle Windows paths correctly.
  245. */
  246. void
  247. set_chdir(struct bsdtar *bsdtar, const char *newdir)
  248. {
  249. if (newdir[0] == '/') {
  250. /* The -C /foo -C /bar case; dump first one. */
  251. free(bsdtar->pending_chdir);
  252. bsdtar->pending_chdir = NULL;
  253. }
  254. if (bsdtar->pending_chdir == NULL)
  255. /* Easy case: no previously-saved dir. */
  256. bsdtar->pending_chdir = strdup(newdir);
  257. else {
  258. /* The -C /foo -C bar case; concatenate */
  259. char *old_pending = bsdtar->pending_chdir;
  260. size_t old_len = strlen(old_pending);
  261. bsdtar->pending_chdir = malloc(old_len + strlen(newdir) + 2);
  262. if (old_pending[old_len - 1] == '/')
  263. old_pending[old_len - 1] = '\0';
  264. if (bsdtar->pending_chdir != NULL)
  265. sprintf(bsdtar->pending_chdir, "%s/%s",
  266. old_pending, newdir);
  267. free(old_pending);
  268. }
  269. if (bsdtar->pending_chdir == NULL)
  270. lafe_errc(1, errno, "No memory");
  271. }
  272. void
  273. do_chdir(struct bsdtar *bsdtar)
  274. {
  275. if (bsdtar->pending_chdir == NULL)
  276. return;
  277. if (chdir(bsdtar->pending_chdir) != 0) {
  278. lafe_errc(1, 0, "could not chdir to '%s'\n",
  279. bsdtar->pending_chdir);
  280. }
  281. free(bsdtar->pending_chdir);
  282. bsdtar->pending_chdir = NULL;
  283. }
  284. static const char *
  285. strip_components(const char *p, int elements)
  286. {
  287. /* Skip as many elements as necessary. */
  288. while (elements > 0) {
  289. switch (*p++) {
  290. case '/':
  291. #if defined(_WIN32) && !defined(__CYGWIN__)
  292. case '\\': /* Support \ path sep on Windows ONLY. */
  293. #endif
  294. elements--;
  295. break;
  296. case '\0':
  297. /* Path is too short, skip it. */
  298. return (NULL);
  299. }
  300. }
  301. /* Skip any / characters. This handles short paths that have
  302. * additional / termination. This also handles the case where
  303. * the logic above stops in the middle of a duplicate //
  304. * sequence (which would otherwise get converted to an
  305. * absolute path). */
  306. for (;;) {
  307. switch (*p) {
  308. case '/':
  309. #if defined(_WIN32) && !defined(__CYGWIN__)
  310. case '\\': /* Support \ path sep on Windows ONLY. */
  311. #endif
  312. ++p;
  313. break;
  314. case '\0':
  315. return (NULL);
  316. default:
  317. return (p);
  318. }
  319. }
  320. }
  321. /*
  322. * Handle --strip-components and any future path-rewriting options.
  323. * Returns non-zero if the pathname should not be extracted.
  324. *
  325. * TODO: Support pax-style regex path rewrites.
  326. */
  327. int
  328. edit_pathname(struct bsdtar *bsdtar, struct archive_entry *entry)
  329. {
  330. const char *name = archive_entry_pathname(entry);
  331. #if HAVE_REGEX_H
  332. char *subst_name;
  333. int r;
  334. #endif
  335. #if HAVE_REGEX_H
  336. r = apply_substitution(bsdtar, name, &subst_name, 0);
  337. if (r == -1) {
  338. lafe_warnc(0, "Invalid substitution, skipping entry");
  339. return 1;
  340. }
  341. if (r == 1) {
  342. archive_entry_copy_pathname(entry, subst_name);
  343. if (*subst_name == '\0') {
  344. free(subst_name);
  345. return -1;
  346. } else
  347. free(subst_name);
  348. name = archive_entry_pathname(entry);
  349. }
  350. if (archive_entry_hardlink(entry)) {
  351. r = apply_substitution(bsdtar, archive_entry_hardlink(entry), &subst_name, 1);
  352. if (r == -1) {
  353. lafe_warnc(0, "Invalid substitution, skipping entry");
  354. return 1;
  355. }
  356. if (r == 1) {
  357. archive_entry_copy_hardlink(entry, subst_name);
  358. free(subst_name);
  359. }
  360. }
  361. if (archive_entry_symlink(entry) != NULL) {
  362. r = apply_substitution(bsdtar, archive_entry_symlink(entry), &subst_name, 1);
  363. if (r == -1) {
  364. lafe_warnc(0, "Invalid substitution, skipping entry");
  365. return 1;
  366. }
  367. if (r == 1) {
  368. archive_entry_copy_symlink(entry, subst_name);
  369. free(subst_name);
  370. }
  371. }
  372. #endif
  373. /* Strip leading dir names as per --strip-components option. */
  374. if (bsdtar->strip_components > 0) {
  375. const char *linkname = archive_entry_hardlink(entry);
  376. name = strip_components(name, bsdtar->strip_components);
  377. if (name == NULL)
  378. return (1);
  379. if (linkname != NULL) {
  380. linkname = strip_components(linkname,
  381. bsdtar->strip_components);
  382. if (linkname == NULL)
  383. return (1);
  384. archive_entry_copy_hardlink(entry, linkname);
  385. }
  386. }
  387. /* By default, don't write or restore absolute pathnames. */
  388. if (!bsdtar->option_absolute_paths) {
  389. const char *rp, *p = name;
  390. int slashonly = 1;
  391. /* Remove leading "//./" or "//?/" or "//?/UNC/"
  392. * (absolute path prefixes used by Windows API) */
  393. if ((p[0] == '/' || p[0] == '\\') &&
  394. (p[1] == '/' || p[1] == '\\') &&
  395. (p[2] == '.' || p[2] == '?') &&
  396. (p[3] == '/' || p[3] == '\\'))
  397. {
  398. if (p[2] == '?' &&
  399. (p[4] == 'U' || p[4] == 'u') &&
  400. (p[5] == 'N' || p[5] == 'n') &&
  401. (p[6] == 'C' || p[6] == 'c') &&
  402. (p[7] == '/' || p[7] == '\\'))
  403. p += 8;
  404. else
  405. p += 4;
  406. slashonly = 0;
  407. }
  408. do {
  409. rp = p;
  410. /* Remove leading drive letter from archives created
  411. * on Windows. */
  412. if (((p[0] >= 'a' && p[0] <= 'z') ||
  413. (p[0] >= 'A' && p[0] <= 'Z')) &&
  414. p[1] == ':') {
  415. p += 2;
  416. slashonly = 0;
  417. }
  418. /* Remove leading "/../", "//", etc. */
  419. while (p[0] == '/' || p[0] == '\\') {
  420. if (p[1] == '.' && p[2] == '.' &&
  421. (p[3] == '/' || p[3] == '\\')) {
  422. p += 3; /* Remove "/..", leave "/"
  423. * for next pass. */
  424. slashonly = 0;
  425. } else
  426. p += 1; /* Remove "/". */
  427. }
  428. } while (rp != p);
  429. if (p != name && !bsdtar->warned_lead_slash) {
  430. /* Generate a warning the first time this happens. */
  431. if (slashonly)
  432. lafe_warnc(0,
  433. "Removing leading '%c' from member names",
  434. name[0]);
  435. else
  436. lafe_warnc(0,
  437. "Removing leading drive letter from "
  438. "member names");
  439. bsdtar->warned_lead_slash = 1;
  440. }
  441. /* Special case: Stripping everything yields ".". */
  442. if (*p == '\0')
  443. name = ".";
  444. else
  445. name = p;
  446. } else {
  447. /* Strip redundant leading '/' characters. */
  448. while (name[0] == '/' && name[1] == '/')
  449. name++;
  450. }
  451. /* Safely replace name in archive_entry. */
  452. if (name != archive_entry_pathname(entry)) {
  453. char *q = strdup(name);
  454. archive_entry_copy_pathname(entry, q);
  455. free(q);
  456. }
  457. return (0);
  458. }
  459. /*
  460. * It would be nice to just use printf() for formatting large numbers,
  461. * but the compatibility problems are quite a headache. Hence the
  462. * following simple utility function.
  463. */
  464. const char *
  465. tar_i64toa(int64_t n0)
  466. {
  467. static char buff[24];
  468. int64_t n = n0 < 0 ? -n0 : n0;
  469. char *p = buff + sizeof(buff);
  470. *--p = '\0';
  471. do {
  472. *--p = '0' + (int)(n % 10);
  473. n /= 10;
  474. } while (n > 0);
  475. if (n0 < 0)
  476. *--p = '-';
  477. return p;
  478. }
  479. /*
  480. * Like strcmp(), but try to be a little more aware of the fact that
  481. * we're comparing two paths. Right now, it just handles leading
  482. * "./" and trailing '/' specially, so that "a/b/" == "./a/b"
  483. *
  484. * TODO: Make this better, so that "./a//b/./c/" == "a/b/c"
  485. * TODO: After this works, push it down into libarchive.
  486. * TODO: Publish the path normalization routines in libarchive so
  487. * that bsdtar can normalize paths and use fast strcmp() instead
  488. * of this.
  489. *
  490. * Note: This is currently only used within write.c, so should
  491. * not handle \ path separators.
  492. */
  493. int
  494. pathcmp(const char *a, const char *b)
  495. {
  496. /* Skip leading './' */
  497. if (a[0] == '.' && a[1] == '/' && a[2] != '\0')
  498. a += 2;
  499. if (b[0] == '.' && b[1] == '/' && b[2] != '\0')
  500. b += 2;
  501. /* Find the first difference, or return (0) if none. */
  502. while (*a == *b) {
  503. if (*a == '\0')
  504. return (0);
  505. a++;
  506. b++;
  507. }
  508. /*
  509. * If one ends in '/' and the other one doesn't,
  510. * they're the same.
  511. */
  512. if (a[0] == '/' && a[1] == '\0' && b[0] == '\0')
  513. return (0);
  514. if (a[0] == '\0' && b[0] == '/' && b[1] == '\0')
  515. return (0);
  516. /* They're really different, return the correct sign. */
  517. return (*(const unsigned char *)a - *(const unsigned char *)b);
  518. }