archive_entry_link_resolver.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "archive_platform.h"
  26. __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.4 2008/09/05 06:15:25 kientzle Exp $");
  27. #ifdef HAVE_SYS_STAT_H
  28. #include <sys/stat.h>
  29. #endif
  30. #ifdef HAVE_ERRNO_H
  31. #include <errno.h>
  32. #endif
  33. #include <stdio.h>
  34. #ifdef HAVE_STDLIB_H
  35. #include <stdlib.h>
  36. #endif
  37. #ifdef HAVE_STRING_H
  38. #include <string.h>
  39. #endif
  40. #include "archive.h"
  41. #include "archive_entry.h"
  42. /*
  43. * This is mostly a pretty straightforward hash table implementation.
  44. * The only interesting bit is the different strategies used to
  45. * match up links. These strategies match those used by various
  46. * archiving formats:
  47. * tar - content stored with first link, remainder refer back to it.
  48. * This requires us to match each subsequent link up with the
  49. * first appearance.
  50. * cpio - Old cpio just stored body with each link, match-ups were
  51. * implicit. This is trivial.
  52. * new cpio - New cpio only stores body with last link, match-ups
  53. * are implicit. This is actually quite tricky; see the notes
  54. * below.
  55. */
  56. /* Users pass us a format code, we translate that into a strategy here. */
  57. #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
  58. #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
  59. #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
  60. #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
  61. /* Initial size of link cache. */
  62. #define links_cache_initial_size 1024
  63. struct links_entry {
  64. struct links_entry *next;
  65. struct links_entry *previous;
  66. int links; /* # links not yet seen */
  67. int hash;
  68. struct archive_entry *canonical;
  69. struct archive_entry *entry;
  70. };
  71. struct archive_entry_linkresolver {
  72. struct links_entry **buckets;
  73. struct links_entry *spare;
  74. unsigned long number_entries;
  75. size_t number_buckets;
  76. int strategy;
  77. };
  78. static struct links_entry *find_entry(struct archive_entry_linkresolver *,
  79. struct archive_entry *);
  80. static void grow_hash(struct archive_entry_linkresolver *);
  81. static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
  82. struct archive_entry *);
  83. static struct links_entry *next_entry(struct archive_entry_linkresolver *);
  84. struct archive_entry_linkresolver *
  85. archive_entry_linkresolver_new(void)
  86. {
  87. struct archive_entry_linkresolver *res;
  88. size_t i;
  89. res = malloc(sizeof(struct archive_entry_linkresolver));
  90. if (res == NULL)
  91. return (NULL);
  92. memset(res, 0, sizeof(struct archive_entry_linkresolver));
  93. res->number_buckets = links_cache_initial_size;
  94. res->buckets = malloc(res->number_buckets *
  95. sizeof(res->buckets[0]));
  96. if (res->buckets == NULL) {
  97. free(res);
  98. return (NULL);
  99. }
  100. for (i = 0; i < res->number_buckets; i++)
  101. res->buckets[i] = NULL;
  102. return (res);
  103. }
  104. void
  105. archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
  106. int fmt)
  107. {
  108. int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
  109. switch (fmtbase) {
  110. case ARCHIVE_FORMAT_CPIO:
  111. switch (fmt) {
  112. case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
  113. case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
  114. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
  115. break;
  116. default:
  117. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
  118. break;
  119. }
  120. break;
  121. case ARCHIVE_FORMAT_MTREE:
  122. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
  123. break;
  124. case ARCHIVE_FORMAT_TAR:
  125. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
  126. break;
  127. default:
  128. res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
  129. break;
  130. }
  131. }
  132. void
  133. archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
  134. {
  135. struct links_entry *le;
  136. if (res == NULL)
  137. return;
  138. if (res->buckets != NULL) {
  139. while ((le = next_entry(res)) != NULL)
  140. archive_entry_free(le->entry);
  141. free(res->buckets);
  142. res->buckets = NULL;
  143. }
  144. free(res);
  145. }
  146. void
  147. archive_entry_linkify(struct archive_entry_linkresolver *res,
  148. struct archive_entry **e, struct archive_entry **f)
  149. {
  150. struct links_entry *le;
  151. struct archive_entry *t;
  152. *f = NULL; /* Default: Don't return a second entry. */
  153. if (*e == NULL) {
  154. le = next_entry(res);
  155. if (le != NULL) {
  156. *e = le->entry;
  157. le->entry = NULL;
  158. }
  159. return;
  160. }
  161. /* If it has only one link, then we're done. */
  162. if (archive_entry_nlink(*e) == 1)
  163. return;
  164. /* Directories never have hardlinks. */
  165. if (archive_entry_filetype(*e) == AE_IFDIR)
  166. return;
  167. switch (res->strategy) {
  168. case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
  169. le = find_entry(res, *e);
  170. if (le != NULL) {
  171. archive_entry_unset_size(*e);
  172. archive_entry_copy_hardlink(*e,
  173. archive_entry_pathname(le->canonical));
  174. } else
  175. insert_entry(res, *e);
  176. return;
  177. case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
  178. le = find_entry(res, *e);
  179. if (le != NULL) {
  180. archive_entry_copy_hardlink(*e,
  181. archive_entry_pathname(le->canonical));
  182. } else
  183. insert_entry(res, *e);
  184. return;
  185. case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
  186. /* This one is trivial. */
  187. return;
  188. case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
  189. le = find_entry(res, *e);
  190. if (le != NULL) {
  191. /*
  192. * Put the new entry in le, return the
  193. * old entry from le.
  194. */
  195. t = *e;
  196. *e = le->entry;
  197. le->entry = t;
  198. /* Make the old entry into a hardlink. */
  199. archive_entry_unset_size(*e);
  200. archive_entry_copy_hardlink(*e,
  201. archive_entry_pathname(le->canonical));
  202. /* If we ran out of links, return the
  203. * final entry as well. */
  204. if (le->links == 0) {
  205. *f = le->entry;
  206. le->entry = NULL;
  207. }
  208. } else {
  209. /*
  210. * If we haven't seen it, tuck it away
  211. * for future use.
  212. */
  213. le = insert_entry(res, *e);
  214. le->entry = *e;
  215. *e = NULL;
  216. }
  217. return;
  218. default:
  219. break;
  220. }
  221. return;
  222. }
  223. static struct links_entry *
  224. find_entry(struct archive_entry_linkresolver *res,
  225. struct archive_entry *entry)
  226. {
  227. struct links_entry *le;
  228. int hash, bucket;
  229. dev_t dev;
  230. ino_t ino;
  231. /* Free a held entry. */
  232. if (res->spare != NULL) {
  233. archive_entry_free(res->spare->canonical);
  234. archive_entry_free(res->spare->entry);
  235. free(res->spare);
  236. res->spare = NULL;
  237. }
  238. /* If the links cache overflowed and got flushed, don't bother. */
  239. if (res->buckets == NULL)
  240. return (NULL);
  241. dev = archive_entry_dev(entry);
  242. ino = archive_entry_ino(entry);
  243. hash = dev ^ ino;
  244. /* Try to locate this entry in the links cache. */
  245. bucket = hash % res->number_buckets;
  246. for (le = res->buckets[bucket]; le != NULL; le = le->next) {
  247. if (le->hash == hash
  248. && dev == archive_entry_dev(le->canonical)
  249. && ino == archive_entry_ino(le->canonical)) {
  250. /*
  251. * Decrement link count each time and release
  252. * the entry if it hits zero. This saves
  253. * memory and is necessary for detecting
  254. * missed links.
  255. */
  256. --le->links;
  257. if (le->links > 0)
  258. return (le);
  259. /* Remove it from this hash bucket. */
  260. if (le->previous != NULL)
  261. le->previous->next = le->next;
  262. if (le->next != NULL)
  263. le->next->previous = le->previous;
  264. if (res->buckets[bucket] == le)
  265. res->buckets[bucket] = le->next;
  266. res->number_entries--;
  267. /* Defer freeing this entry. */
  268. res->spare = le;
  269. return (le);
  270. }
  271. }
  272. return (NULL);
  273. }
  274. static struct links_entry *
  275. next_entry(struct archive_entry_linkresolver *res)
  276. {
  277. struct links_entry *le;
  278. size_t bucket;
  279. /* Free a held entry. */
  280. if (res->spare != NULL) {
  281. archive_entry_free(res->spare->canonical);
  282. free(res->spare);
  283. res->spare = NULL;
  284. }
  285. /* If the links cache overflowed and got flushed, don't bother. */
  286. if (res->buckets == NULL)
  287. return (NULL);
  288. /* Look for next non-empty bucket in the links cache. */
  289. for (bucket = 0; bucket < res->number_buckets; bucket++) {
  290. le = res->buckets[bucket];
  291. if (le != NULL) {
  292. /* Remove it from this hash bucket. */
  293. if (le->next != NULL)
  294. le->next->previous = le->previous;
  295. res->buckets[bucket] = le->next;
  296. res->number_entries--;
  297. /* Defer freeing this entry. */
  298. res->spare = le;
  299. return (le);
  300. }
  301. }
  302. return (NULL);
  303. }
  304. static struct links_entry *
  305. insert_entry(struct archive_entry_linkresolver *res,
  306. struct archive_entry *entry)
  307. {
  308. struct links_entry *le;
  309. int hash, bucket;
  310. /* Add this entry to the links cache. */
  311. le = malloc(sizeof(struct links_entry));
  312. if (le == NULL)
  313. return (NULL);
  314. memset(le, 0, sizeof(*le));
  315. le->canonical = archive_entry_clone(entry);
  316. /* If the links cache is getting too full, enlarge the hash table. */
  317. if (res->number_entries > res->number_buckets * 2)
  318. grow_hash(res);
  319. hash = archive_entry_dev(entry) ^ archive_entry_ino(entry);
  320. bucket = hash % res->number_buckets;
  321. /* If we could allocate the entry, record it. */
  322. if (res->buckets[bucket] != NULL)
  323. res->buckets[bucket]->previous = le;
  324. res->number_entries++;
  325. le->next = res->buckets[bucket];
  326. le->previous = NULL;
  327. res->buckets[bucket] = le;
  328. le->hash = hash;
  329. le->links = archive_entry_nlink(entry) - 1;
  330. return (le);
  331. }
  332. static void
  333. grow_hash(struct archive_entry_linkresolver *res)
  334. {
  335. struct links_entry *le, **new_buckets;
  336. size_t new_size;
  337. size_t i, bucket;
  338. /* Try to enlarge the bucket list. */
  339. new_size = res->number_buckets * 2;
  340. new_buckets = malloc(new_size * sizeof(struct links_entry *));
  341. if (new_buckets != NULL) {
  342. memset(new_buckets, 0,
  343. new_size * sizeof(struct links_entry *));
  344. for (i = 0; i < res->number_buckets; i++) {
  345. while (res->buckets[i] != NULL) {
  346. /* Remove entry from old bucket. */
  347. le = res->buckets[i];
  348. res->buckets[i] = le->next;
  349. /* Add entry to new bucket. */
  350. bucket = le->hash % new_size;
  351. if (new_buckets[bucket] != NULL)
  352. new_buckets[bucket]->previous =
  353. le;
  354. le->next = new_buckets[bucket];
  355. le->previous = NULL;
  356. new_buckets[bucket] = le;
  357. }
  358. }
  359. free(res->buckets);
  360. res->buckets = new_buckets;
  361. res->number_buckets = new_size;
  362. }
  363. }