PageRenderTime 62ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/contrib/libarchive/libarchive/archive_entry_link_resolver.c

https://bitbucket.org/freebsd/freebsd-head
C | 444 lines | 324 code | 44 blank | 76 comment | 89 complexity | 3e823d42dbcf65354bf63cd584252053 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, JSON, LGPL-2.1, GPL-2.0, LGPL-2.0, AGPL-1.0, BSD-2-Clause, 0BSD
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "archive_platform.h"
  26. __FBSDID("$FreeBSD$");
  27. #ifdef HAVE_SYS_STAT_H
  28. #include <sys/stat.h>
  29. #endif
  30. #ifdef HAVE_ERRNO_H
  31. #include <errno.h>
  32. #endif
  33. #include <stdio.h>
  34. #ifdef HAVE_STDLIB_H
  35. #include <stdlib.h>
  36. #endif
  37. #ifdef HAVE_STRING_H
  38. #include <string.h>
  39. #endif
  40. #include "archive.h"
  41. #include "archive_entry.h"
  42. /*
  43. * This is mostly a pretty straightforward hash table implementation.
  44. * The only interesting bit is the different strategies used to
  45. * match up links. These strategies match those used by various
  46. * archiving formats:
  47. * tar - content stored with first link, remainder refer back to it.
  48. * This requires us to match each subsequent link up with the
  49. * first appearance.
  50. * cpio - Old cpio just stored body with each link, match-ups were
  51. * implicit. This is trivial.
  52. * new cpio - New cpio only stores body with last link, match-ups
  53. * are implicit. This is actually quite tricky; see the notes
  54. * below.
  55. */
  56. /* Users pass us a format code, we translate that into a strategy here. */
  57. #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
  58. #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
  59. #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
  60. #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
  61. /* Initial size of link cache. */
  62. #define links_cache_initial_size 1024
  63. struct links_entry {
  64. struct links_entry *next;
  65. struct links_entry *previous;
  66. struct archive_entry *canonical;
  67. struct archive_entry *entry;
  68. size_t hash;
  69. unsigned int links; /* # links not yet seen */
  70. };
  71. struct archive_entry_linkresolver {
  72. struct links_entry **buckets;
  73. struct links_entry *spare;
  74. unsigned long number_entries;
  75. size_t number_buckets;
  76. int strategy;
  77. };
  78. #define NEXT_ENTRY_DEFERRED 1
  79. #define NEXT_ENTRY_PARTIAL 2
  80. #define NEXT_ENTRY_ALL (NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
  81. static struct links_entry *find_entry(struct archive_entry_linkresolver *,
  82. struct archive_entry *);
  83. static void grow_hash(struct archive_entry_linkresolver *);
  84. static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
  85. struct archive_entry *);
  86. static struct links_entry *next_entry(struct archive_entry_linkresolver *,
  87. int);
  88. struct archive_entry_linkresolver *
  89. archive_entry_linkresolver_new(void)
  90. {
  91. struct archive_entry_linkresolver *res;
  92. /* Check for positive power-of-two */
  93. if (links_cache_initial_size == 0 ||
  94. (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
  95. return (NULL);
  96. res = calloc(1, sizeof(struct archive_entry_linkresolver));
  97. if (res == NULL)
  98. return (NULL);
  99. res->number_buckets = links_cache_initial_size;
  100. res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
  101. if (res->buckets == NULL) {
  102. free(res);
  103. return (NULL);
  104. }
  105. return (res);
  106. }
  107. void
  108. archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
  109. int fmt)
  110. {
  111. int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
  112. switch (fmtbase) {
  113. case ARCHIVE_FORMAT_7ZIP:
  114. case ARCHIVE_FORMAT_AR:
  115. case ARCHIVE_FORMAT_ZIP:
  116. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
  117. break;
  118. case ARCHIVE_FORMAT_CPIO:
  119. switch (fmt) {
  120. case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
  121. case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
  122. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
  123. break;
  124. default:
  125. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
  126. break;
  127. }
  128. break;
  129. case ARCHIVE_FORMAT_MTREE:
  130. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
  131. break;
  132. case ARCHIVE_FORMAT_ISO9660:
  133. case ARCHIVE_FORMAT_SHAR:
  134. case ARCHIVE_FORMAT_TAR:
  135. case ARCHIVE_FORMAT_XAR:
  136. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
  137. break;
  138. default:
  139. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
  140. break;
  141. }
  142. }
  143. void
  144. archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
  145. {
  146. struct links_entry *le;
  147. if (res == NULL)
  148. return;
  149. while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
  150. archive_entry_free(le->entry);
  151. free(res->buckets);
  152. free(res);
  153. }
  154. void
  155. archive_entry_linkify(struct archive_entry_linkresolver *res,
  156. struct archive_entry **e, struct archive_entry **f)
  157. {
  158. struct links_entry *le;
  159. struct archive_entry *t;
  160. *f = NULL; /* Default: Don't return a second entry. */
  161. if (*e == NULL) {
  162. le = next_entry(res, NEXT_ENTRY_DEFERRED);
  163. if (le != NULL) {
  164. *e = le->entry;
  165. le->entry = NULL;
  166. }
  167. return;
  168. }
  169. /* If it has only one link, then we're done. */
  170. if (archive_entry_nlink(*e) == 1)
  171. return;
  172. /* Directories, devices never have hardlinks. */
  173. if (archive_entry_filetype(*e) == AE_IFDIR
  174. || archive_entry_filetype(*e) == AE_IFBLK
  175. || archive_entry_filetype(*e) == AE_IFCHR)
  176. return;
  177. switch (res->strategy) {
  178. case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
  179. le = find_entry(res, *e);
  180. if (le != NULL) {
  181. archive_entry_unset_size(*e);
  182. archive_entry_copy_hardlink(*e,
  183. archive_entry_pathname(le->canonical));
  184. } else
  185. insert_entry(res, *e);
  186. return;
  187. case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
  188. le = find_entry(res, *e);
  189. if (le != NULL) {
  190. archive_entry_copy_hardlink(*e,
  191. archive_entry_pathname(le->canonical));
  192. } else
  193. insert_entry(res, *e);
  194. return;
  195. case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
  196. /* This one is trivial. */
  197. return;
  198. case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
  199. le = find_entry(res, *e);
  200. if (le != NULL) {
  201. /*
  202. * Put the new entry in le, return the
  203. * old entry from le.
  204. */
  205. t = *e;
  206. *e = le->entry;
  207. le->entry = t;
  208. /* Make the old entry into a hardlink. */
  209. archive_entry_unset_size(*e);
  210. archive_entry_copy_hardlink(*e,
  211. archive_entry_pathname(le->canonical));
  212. /* If we ran out of links, return the
  213. * final entry as well. */
  214. if (le->links == 0) {
  215. *f = le->entry;
  216. le->entry = NULL;
  217. }
  218. } else {
  219. /*
  220. * If we haven't seen it, tuck it away
  221. * for future use.
  222. */
  223. le = insert_entry(res, *e);
  224. le->entry = *e;
  225. *e = NULL;
  226. }
  227. return;
  228. default:
  229. break;
  230. }
  231. return;
  232. }
  233. static struct links_entry *
  234. find_entry(struct archive_entry_linkresolver *res,
  235. struct archive_entry *entry)
  236. {
  237. struct links_entry *le;
  238. size_t hash, bucket;
  239. dev_t dev;
  240. int64_t ino;
  241. /* Free a held entry. */
  242. if (res->spare != NULL) {
  243. archive_entry_free(res->spare->canonical);
  244. archive_entry_free(res->spare->entry);
  245. free(res->spare);
  246. res->spare = NULL;
  247. }
  248. dev = archive_entry_dev(entry);
  249. ino = archive_entry_ino64(entry);
  250. hash = (size_t)(dev ^ ino);
  251. /* Try to locate this entry in the links cache. */
  252. bucket = hash & (res->number_buckets - 1);
  253. for (le = res->buckets[bucket]; le != NULL; le = le->next) {
  254. if (le->hash == hash
  255. && dev == archive_entry_dev(le->canonical)
  256. && ino == archive_entry_ino64(le->canonical)) {
  257. /*
  258. * Decrement link count each time and release
  259. * the entry if it hits zero. This saves
  260. * memory and is necessary for detecting
  261. * missed links.
  262. */
  263. --le->links;
  264. if (le->links > 0)
  265. return (le);
  266. /* Remove it from this hash bucket. */
  267. if (le->previous != NULL)
  268. le->previous->next = le->next;
  269. if (le->next != NULL)
  270. le->next->previous = le->previous;
  271. if (res->buckets[bucket] == le)
  272. res->buckets[bucket] = le->next;
  273. res->number_entries--;
  274. /* Defer freeing this entry. */
  275. res->spare = le;
  276. return (le);
  277. }
  278. }
  279. return (NULL);
  280. }
  281. static struct links_entry *
  282. next_entry(struct archive_entry_linkresolver *res, int mode)
  283. {
  284. struct links_entry *le;
  285. size_t bucket;
  286. /* Free a held entry. */
  287. if (res->spare != NULL) {
  288. archive_entry_free(res->spare->canonical);
  289. archive_entry_free(res->spare->entry);
  290. free(res->spare);
  291. res->spare = NULL;
  292. }
  293. /* Look for next non-empty bucket in the links cache. */
  294. for (bucket = 0; bucket < res->number_buckets; bucket++) {
  295. for (le = res->buckets[bucket]; le != NULL; le = le->next) {
  296. if (le->entry != NULL &&
  297. (mode & NEXT_ENTRY_DEFERRED) == 0)
  298. continue;
  299. if (le->entry == NULL &&
  300. (mode & NEXT_ENTRY_PARTIAL) == 0)
  301. continue;
  302. /* Remove it from this hash bucket. */
  303. if (le->next != NULL)
  304. le->next->previous = le->previous;
  305. if (le->previous != NULL)
  306. le->previous->next = le->next;
  307. else
  308. res->buckets[bucket] = le->next;
  309. res->number_entries--;
  310. /* Defer freeing this entry. */
  311. res->spare = le;
  312. return (le);
  313. }
  314. }
  315. return (NULL);
  316. }
  317. static struct links_entry *
  318. insert_entry(struct archive_entry_linkresolver *res,
  319. struct archive_entry *entry)
  320. {
  321. struct links_entry *le;
  322. size_t hash, bucket;
  323. /* Add this entry to the links cache. */
  324. le = calloc(1, sizeof(struct links_entry));
  325. if (le == NULL)
  326. return (NULL);
  327. le->canonical = archive_entry_clone(entry);
  328. /* If the links cache is getting too full, enlarge the hash table. */
  329. if (res->number_entries > res->number_buckets * 2)
  330. grow_hash(res);
  331. hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry);
  332. bucket = hash & (res->number_buckets - 1);
  333. /* If we could allocate the entry, record it. */
  334. if (res->buckets[bucket] != NULL)
  335. res->buckets[bucket]->previous = le;
  336. res->number_entries++;
  337. le->next = res->buckets[bucket];
  338. le->previous = NULL;
  339. res->buckets[bucket] = le;
  340. le->hash = hash;
  341. le->links = archive_entry_nlink(entry) - 1;
  342. return (le);
  343. }
  344. static void
  345. grow_hash(struct archive_entry_linkresolver *res)
  346. {
  347. struct links_entry *le, **new_buckets;
  348. size_t new_size;
  349. size_t i, bucket;
  350. /* Try to enlarge the bucket list. */
  351. new_size = res->number_buckets * 2;
  352. if (new_size < res->number_buckets)
  353. return;
  354. new_buckets = calloc(new_size, sizeof(struct links_entry *));
  355. if (new_buckets == NULL)
  356. return;
  357. for (i = 0; i < res->number_buckets; i++) {
  358. while (res->buckets[i] != NULL) {
  359. /* Remove entry from old bucket. */
  360. le = res->buckets[i];
  361. res->buckets[i] = le->next;
  362. /* Add entry to new bucket. */
  363. bucket = le->hash & (new_size - 1);
  364. if (new_buckets[bucket] != NULL)
  365. new_buckets[bucket]->previous = le;
  366. le->next = new_buckets[bucket];
  367. le->previous = NULL;
  368. new_buckets[bucket] = le;
  369. }
  370. }
  371. free(res->buckets);
  372. res->buckets = new_buckets;
  373. res->number_buckets = new_size;
  374. }
  375. struct archive_entry *
  376. archive_entry_partial_links(struct archive_entry_linkresolver *res,
  377. unsigned int *links)
  378. {
  379. struct archive_entry *e;
  380. struct links_entry *le;
  381. /* Free a held entry. */
  382. if (res->spare != NULL) {
  383. archive_entry_free(res->spare->canonical);
  384. archive_entry_free(res->spare->entry);
  385. free(res->spare);
  386. res->spare = NULL;
  387. }
  388. le = next_entry(res, NEXT_ENTRY_PARTIAL);
  389. if (le != NULL) {
  390. e = le->canonical;
  391. if (links != NULL)
  392. *links = le->links;
  393. le->canonical = NULL;
  394. } else {
  395. e = NULL;
  396. if (links != NULL)
  397. *links = 0;
  398. }
  399. return (e);
  400. }