PageRenderTime 56ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/fs/btrfs/extent-tree.c

https://github.com/mstsirkin/linux
C | 1831 lines | 1426 code | 184 blank | 221 comment | 284 complexity | 900047df3aed2c5100bcfe2b68762246 MD5 | raw file
  1. /*
  2. * Copyright (C) 2007 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/sched.h>
  19. #include <linux/pagemap.h>
  20. #include <linux/writeback.h>
  21. #include <linux/blkdev.h>
  22. #include <linux/sort.h>
  23. #include <linux/rcupdate.h>
  24. #include <linux/kthread.h>
  25. #include <linux/slab.h>
  26. #include "compat.h"
  27. #include "hash.h"
  28. #include "ctree.h"
  29. #include "disk-io.h"
  30. #include "print-tree.h"
  31. #include "transaction.h"
  32. #include "volumes.h"
  33. #include "locking.h"
  34. #include "free-space-cache.h"
  35. /* control flags for do_chunk_alloc's force field
  36. * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
  37. * if we really need one.
  38. *
  39. * CHUNK_ALLOC_FORCE means it must try to allocate one
  40. *
  41. * CHUNK_ALLOC_LIMITED means to only try and allocate one
  42. * if we have very few chunks already allocated. This is
  43. * used as part of the clustering code to help make sure
  44. * we have a good pool of storage to cluster in, without
  45. * filling the FS with empty chunks
  46. *
  47. */
  48. enum {
  49. CHUNK_ALLOC_NO_FORCE = 0,
  50. CHUNK_ALLOC_FORCE = 1,
  51. CHUNK_ALLOC_LIMITED = 2,
  52. };
  53. static int update_block_group(struct btrfs_trans_handle *trans,
  54. struct btrfs_root *root,
  55. u64 bytenr, u64 num_bytes, int alloc);
  56. static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
  57. struct btrfs_root *root,
  58. u64 bytenr, u64 num_bytes, u64 parent,
  59. u64 root_objectid, u64 owner_objectid,
  60. u64 owner_offset, int refs_to_drop,
  61. struct btrfs_delayed_extent_op *extra_op);
  62. static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
  63. struct extent_buffer *leaf,
  64. struct btrfs_extent_item *ei);
  65. static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
  66. struct btrfs_root *root,
  67. u64 parent, u64 root_objectid,
  68. u64 flags, u64 owner, u64 offset,
  69. struct btrfs_key *ins, int ref_mod);
  70. static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
  71. struct btrfs_root *root,
  72. u64 parent, u64 root_objectid,
  73. u64 flags, struct btrfs_disk_key *key,
  74. int level, struct btrfs_key *ins);
  75. static int do_chunk_alloc(struct btrfs_trans_handle *trans,
  76. struct btrfs_root *extent_root, u64 alloc_bytes,
  77. u64 flags, int force);
  78. static int find_next_key(struct btrfs_path *path, int level,
  79. struct btrfs_key *key);
  80. static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
  81. int dump_block_groups);
  82. static noinline int
  83. block_group_cache_done(struct btrfs_block_group_cache *cache)
  84. {
  85. smp_mb();
  86. return cache->cached == BTRFS_CACHE_FINISHED;
  87. }
  88. static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
  89. {
  90. return (cache->flags & bits) == bits;
  91. }
  92. static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
  93. {
  94. atomic_inc(&cache->count);
  95. }
  96. void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
  97. {
  98. if (atomic_dec_and_test(&cache->count)) {
  99. WARN_ON(cache->pinned > 0);
  100. WARN_ON(cache->reserved > 0);
  101. WARN_ON(cache->reserved_pinned > 0);
  102. kfree(cache->free_space_ctl);
  103. kfree(cache);
  104. }
  105. }
  106. /*
  107. * this adds the block group to the fs_info rb tree for the block group
  108. * cache
  109. */
  110. static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
  111. struct btrfs_block_group_cache *block_group)
  112. {
  113. struct rb_node **p;
  114. struct rb_node *parent = NULL;
  115. struct btrfs_block_group_cache *cache;
  116. spin_lock(&info->block_group_cache_lock);
  117. p = &info->block_group_cache_tree.rb_node;
  118. while (*p) {
  119. parent = *p;
  120. cache = rb_entry(parent, struct btrfs_block_group_cache,
  121. cache_node);
  122. if (block_group->key.objectid < cache->key.objectid) {
  123. p = &(*p)->rb_left;
  124. } else if (block_group->key.objectid > cache->key.objectid) {
  125. p = &(*p)->rb_right;
  126. } else {
  127. spin_unlock(&info->block_group_cache_lock);
  128. return -EEXIST;
  129. }
  130. }
  131. rb_link_node(&block_group->cache_node, parent, p);
  132. rb_insert_color(&block_group->cache_node,
  133. &info->block_group_cache_tree);
  134. spin_unlock(&info->block_group_cache_lock);
  135. return 0;
  136. }
  137. /*
  138. * This will return the block group at or after bytenr if contains is 0, else
  139. * it will return the block group that contains the bytenr
  140. */
  141. static struct btrfs_block_group_cache *
  142. block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
  143. int contains)
  144. {
  145. struct btrfs_block_group_cache *cache, *ret = NULL;
  146. struct rb_node *n;
  147. u64 end, start;
  148. spin_lock(&info->block_group_cache_lock);
  149. n = info->block_group_cache_tree.rb_node;
  150. while (n) {
  151. cache = rb_entry(n, struct btrfs_block_group_cache,
  152. cache_node);
  153. end = cache->key.objectid + cache->key.offset - 1;
  154. start = cache->key.objectid;
  155. if (bytenr < start) {
  156. if (!contains && (!ret || start < ret->key.objectid))
  157. ret = cache;
  158. n = n->rb_left;
  159. } else if (bytenr > start) {
  160. if (contains && bytenr <= end) {
  161. ret = cache;
  162. break;
  163. }
  164. n = n->rb_right;
  165. } else {
  166. ret = cache;
  167. break;
  168. }
  169. }
  170. if (ret)
  171. btrfs_get_block_group(ret);
  172. spin_unlock(&info->block_group_cache_lock);
  173. return ret;
  174. }
  175. static int add_excluded_extent(struct btrfs_root *root,
  176. u64 start, u64 num_bytes)
  177. {
  178. u64 end = start + num_bytes - 1;
  179. set_extent_bits(&root->fs_info->freed_extents[0],
  180. start, end, EXTENT_UPTODATE, GFP_NOFS);
  181. set_extent_bits(&root->fs_info->freed_extents[1],
  182. start, end, EXTENT_UPTODATE, GFP_NOFS);
  183. return 0;
  184. }
  185. static void free_excluded_extents(struct btrfs_root *root,
  186. struct btrfs_block_group_cache *cache)
  187. {
  188. u64 start, end;
  189. start = cache->key.objectid;
  190. end = start + cache->key.offset - 1;
  191. clear_extent_bits(&root->fs_info->freed_extents[0],
  192. start, end, EXTENT_UPTODATE, GFP_NOFS);
  193. clear_extent_bits(&root->fs_info->freed_extents[1],
  194. start, end, EXTENT_UPTODATE, GFP_NOFS);
  195. }
  196. static int exclude_super_stripes(struct btrfs_root *root,
  197. struct btrfs_block_group_cache *cache)
  198. {
  199. u64 bytenr;
  200. u64 *logical;
  201. int stripe_len;
  202. int i, nr, ret;
  203. if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
  204. stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
  205. cache->bytes_super += stripe_len;
  206. ret = add_excluded_extent(root, cache->key.objectid,
  207. stripe_len);
  208. BUG_ON(ret);
  209. }
  210. for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
  211. bytenr = btrfs_sb_offset(i);
  212. ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
  213. cache->key.objectid, bytenr,
  214. 0, &logical, &nr, &stripe_len);
  215. BUG_ON(ret);
  216. while (nr--) {
  217. cache->bytes_super += stripe_len;
  218. ret = add_excluded_extent(root, logical[nr],
  219. stripe_len);
  220. BUG_ON(ret);
  221. }
  222. kfree(logical);
  223. }
  224. return 0;
  225. }
  226. static struct btrfs_caching_control *
  227. get_caching_control(struct btrfs_block_group_cache *cache)
  228. {
  229. struct btrfs_caching_control *ctl;
  230. spin_lock(&cache->lock);
  231. if (cache->cached != BTRFS_CACHE_STARTED) {
  232. spin_unlock(&cache->lock);
  233. return NULL;
  234. }
  235. /* We're loading it the fast way, so we don't have a caching_ctl. */
  236. if (!cache->caching_ctl) {
  237. spin_unlock(&cache->lock);
  238. return NULL;
  239. }
  240. ctl = cache->caching_ctl;
  241. atomic_inc(&ctl->count);
  242. spin_unlock(&cache->lock);
  243. return ctl;
  244. }
  245. static void put_caching_control(struct btrfs_caching_control *ctl)
  246. {
  247. if (atomic_dec_and_test(&ctl->count))
  248. kfree(ctl);
  249. }
  250. /*
  251. * this is only called by cache_block_group, since we could have freed extents
  252. * we need to check the pinned_extents for any extents that can't be used yet
  253. * since their free space will be released as soon as the transaction commits.
  254. */
  255. static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
  256. struct btrfs_fs_info *info, u64 start, u64 end)
  257. {
  258. u64 extent_start, extent_end, size, total_added = 0;
  259. int ret;
  260. while (start < end) {
  261. ret = find_first_extent_bit(info->pinned_extents, start,
  262. &extent_start, &extent_end,
  263. EXTENT_DIRTY | EXTENT_UPTODATE);
  264. if (ret)
  265. break;
  266. if (extent_start <= start) {
  267. start = extent_end + 1;
  268. } else if (extent_start > start && extent_start < end) {
  269. size = extent_start - start;
  270. total_added += size;
  271. ret = btrfs_add_free_space(block_group, start,
  272. size);
  273. BUG_ON(ret);
  274. start = extent_end + 1;
  275. } else {
  276. break;
  277. }
  278. }
  279. if (start < end) {
  280. size = end - start;
  281. total_added += size;
  282. ret = btrfs_add_free_space(block_group, start, size);
  283. BUG_ON(ret);
  284. }
  285. return total_added;
  286. }
  287. static noinline void caching_thread(struct btrfs_work *work)
  288. {
  289. struct btrfs_block_group_cache *block_group;
  290. struct btrfs_fs_info *fs_info;
  291. struct btrfs_caching_control *caching_ctl;
  292. struct btrfs_root *extent_root;
  293. struct btrfs_path *path;
  294. struct extent_buffer *leaf;
  295. struct btrfs_key key;
  296. u64 total_found = 0;
  297. u64 last = 0;
  298. u32 nritems;
  299. int ret = 0;
  300. caching_ctl = container_of(work, struct btrfs_caching_control, work);
  301. block_group = caching_ctl->block_group;
  302. fs_info = block_group->fs_info;
  303. extent_root = fs_info->extent_root;
  304. path = btrfs_alloc_path();
  305. if (!path)
  306. goto out;
  307. last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
  308. /*
  309. * We don't want to deadlock with somebody trying to allocate a new
  310. * extent for the extent root while also trying to search the extent
  311. * root to add free space. So we skip locking and search the commit
  312. * root, since its read-only
  313. */
  314. path->skip_locking = 1;
  315. path->search_commit_root = 1;
  316. path->reada = 1;
  317. key.objectid = last;
  318. key.offset = 0;
  319. key.type = BTRFS_EXTENT_ITEM_KEY;
  320. again:
  321. mutex_lock(&caching_ctl->mutex);
  322. /* need to make sure the commit_root doesn't disappear */
  323. down_read(&fs_info->extent_commit_sem);
  324. ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
  325. if (ret < 0)
  326. goto err;
  327. leaf = path->nodes[0];
  328. nritems = btrfs_header_nritems(leaf);
  329. while (1) {
  330. if (btrfs_fs_closing(fs_info) > 1) {
  331. last = (u64)-1;
  332. break;
  333. }
  334. if (path->slots[0] < nritems) {
  335. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  336. } else {
  337. ret = find_next_key(path, 0, &key);
  338. if (ret)
  339. break;
  340. if (need_resched() ||
  341. btrfs_next_leaf(extent_root, path)) {
  342. caching_ctl->progress = last;
  343. btrfs_release_path(path);
  344. up_read(&fs_info->extent_commit_sem);
  345. mutex_unlock(&caching_ctl->mutex);
  346. cond_resched();
  347. goto again;
  348. }
  349. leaf = path->nodes[0];
  350. nritems = btrfs_header_nritems(leaf);
  351. continue;
  352. }
  353. if (key.objectid < block_group->key.objectid) {
  354. path->slots[0]++;
  355. continue;
  356. }
  357. if (key.objectid >= block_group->key.objectid +
  358. block_group->key.offset)
  359. break;
  360. if (key.type == BTRFS_EXTENT_ITEM_KEY) {
  361. total_found += add_new_free_space(block_group,
  362. fs_info, last,
  363. key.objectid);
  364. last = key.objectid + key.offset;
  365. if (total_found > (1024 * 1024 * 2)) {
  366. total_found = 0;
  367. wake_up(&caching_ctl->wait);
  368. }
  369. }
  370. path->slots[0]++;
  371. }
  372. ret = 0;
  373. total_found += add_new_free_space(block_group, fs_info, last,
  374. block_group->key.objectid +
  375. block_group->key.offset);
  376. caching_ctl->progress = (u64)-1;
  377. spin_lock(&block_group->lock);
  378. block_group->caching_ctl = NULL;
  379. block_group->cached = BTRFS_CACHE_FINISHED;
  380. spin_unlock(&block_group->lock);
  381. err:
  382. btrfs_free_path(path);
  383. up_read(&fs_info->extent_commit_sem);
  384. free_excluded_extents(extent_root, block_group);
  385. mutex_unlock(&caching_ctl->mutex);
  386. out:
  387. wake_up(&caching_ctl->wait);
  388. put_caching_control(caching_ctl);
  389. btrfs_put_block_group(block_group);
  390. }
  391. static int cache_block_group(struct btrfs_block_group_cache *cache,
  392. struct btrfs_trans_handle *trans,
  393. struct btrfs_root *root,
  394. int load_cache_only)
  395. {
  396. struct btrfs_fs_info *fs_info = cache->fs_info;
  397. struct btrfs_caching_control *caching_ctl;
  398. int ret = 0;
  399. smp_mb();
  400. if (cache->cached != BTRFS_CACHE_NO)
  401. return 0;
  402. /*
  403. * We can't do the read from on-disk cache during a commit since we need
  404. * to have the normal tree locking. Also if we are currently trying to
  405. * allocate blocks for the tree root we can't do the fast caching since
  406. * we likely hold important locks.
  407. */
  408. if (trans && (!trans->transaction->in_commit) &&
  409. (root && root != root->fs_info->tree_root)) {
  410. spin_lock(&cache->lock);
  411. if (cache->cached != BTRFS_CACHE_NO) {
  412. spin_unlock(&cache->lock);
  413. return 0;
  414. }
  415. cache->cached = BTRFS_CACHE_STARTED;
  416. spin_unlock(&cache->lock);
  417. ret = load_free_space_cache(fs_info, cache);
  418. spin_lock(&cache->lock);
  419. if (ret == 1) {
  420. cache->cached = BTRFS_CACHE_FINISHED;
  421. cache->last_byte_to_unpin = (u64)-1;
  422. } else {
  423. cache->cached = BTRFS_CACHE_NO;
  424. }
  425. spin_unlock(&cache->lock);
  426. if (ret == 1) {
  427. free_excluded_extents(fs_info->extent_root, cache);
  428. return 0;
  429. }
  430. }
  431. if (load_cache_only)
  432. return 0;
  433. caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
  434. BUG_ON(!caching_ctl);
  435. INIT_LIST_HEAD(&caching_ctl->list);
  436. mutex_init(&caching_ctl->mutex);
  437. init_waitqueue_head(&caching_ctl->wait);
  438. caching_ctl->block_group = cache;
  439. caching_ctl->progress = cache->key.objectid;
  440. /* one for caching kthread, one for caching block group list */
  441. atomic_set(&caching_ctl->count, 2);
  442. caching_ctl->work.func = caching_thread;
  443. spin_lock(&cache->lock);
  444. if (cache->cached != BTRFS_CACHE_NO) {
  445. spin_unlock(&cache->lock);
  446. kfree(caching_ctl);
  447. return 0;
  448. }
  449. cache->caching_ctl = caching_ctl;
  450. cache->cached = BTRFS_CACHE_STARTED;
  451. spin_unlock(&cache->lock);
  452. down_write(&fs_info->extent_commit_sem);
  453. list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
  454. up_write(&fs_info->extent_commit_sem);
  455. btrfs_get_block_group(cache);
  456. btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
  457. return ret;
  458. }
  459. /*
  460. * return the block group that starts at or after bytenr
  461. */
  462. static struct btrfs_block_group_cache *
  463. btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
  464. {
  465. struct btrfs_block_group_cache *cache;
  466. cache = block_group_cache_tree_search(info, bytenr, 0);
  467. return cache;
  468. }
  469. /*
  470. * return the block group that contains the given bytenr
  471. */
  472. struct btrfs_block_group_cache *btrfs_lookup_block_group(
  473. struct btrfs_fs_info *info,
  474. u64 bytenr)
  475. {
  476. struct btrfs_block_group_cache *cache;
  477. cache = block_group_cache_tree_search(info, bytenr, 1);
  478. return cache;
  479. }
  480. static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
  481. u64 flags)
  482. {
  483. struct list_head *head = &info->space_info;
  484. struct btrfs_space_info *found;
  485. flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
  486. BTRFS_BLOCK_GROUP_METADATA;
  487. rcu_read_lock();
  488. list_for_each_entry_rcu(found, head, list) {
  489. if (found->flags & flags) {
  490. rcu_read_unlock();
  491. return found;
  492. }
  493. }
  494. rcu_read_unlock();
  495. return NULL;
  496. }
  497. /*
  498. * after adding space to the filesystem, we need to clear the full flags
  499. * on all the space infos.
  500. */
  501. void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
  502. {
  503. struct list_head *head = &info->space_info;
  504. struct btrfs_space_info *found;
  505. rcu_read_lock();
  506. list_for_each_entry_rcu(found, head, list)
  507. found->full = 0;
  508. rcu_read_unlock();
  509. }
  510. static u64 div_factor(u64 num, int factor)
  511. {
  512. if (factor == 10)
  513. return num;
  514. num *= factor;
  515. do_div(num, 10);
  516. return num;
  517. }
  518. static u64 div_factor_fine(u64 num, int factor)
  519. {
  520. if (factor == 100)
  521. return num;
  522. num *= factor;
  523. do_div(num, 100);
  524. return num;
  525. }
  526. u64 btrfs_find_block_group(struct btrfs_root *root,
  527. u64 search_start, u64 search_hint, int owner)
  528. {
  529. struct btrfs_block_group_cache *cache;
  530. u64 used;
  531. u64 last = max(search_hint, search_start);
  532. u64 group_start = 0;
  533. int full_search = 0;
  534. int factor = 9;
  535. int wrapped = 0;
  536. again:
  537. while (1) {
  538. cache = btrfs_lookup_first_block_group(root->fs_info, last);
  539. if (!cache)
  540. break;
  541. spin_lock(&cache->lock);
  542. last = cache->key.objectid + cache->key.offset;
  543. used = btrfs_block_group_used(&cache->item);
  544. if ((full_search || !cache->ro) &&
  545. block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
  546. if (used + cache->pinned + cache->reserved <
  547. div_factor(cache->key.offset, factor)) {
  548. group_start = cache->key.objectid;
  549. spin_unlock(&cache->lock);
  550. btrfs_put_block_group(cache);
  551. goto found;
  552. }
  553. }
  554. spin_unlock(&cache->lock);
  555. btrfs_put_block_group(cache);
  556. cond_resched();
  557. }
  558. if (!wrapped) {
  559. last = search_start;
  560. wrapped = 1;
  561. goto again;
  562. }
  563. if (!full_search && factor < 10) {
  564. last = search_start;
  565. full_search = 1;
  566. factor = 10;
  567. goto again;
  568. }
  569. found:
  570. return group_start;
  571. }
  572. /* simple helper to search for an existing extent at a given offset */
  573. int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
  574. {
  575. int ret;
  576. struct btrfs_key key;
  577. struct btrfs_path *path;
  578. path = btrfs_alloc_path();
  579. if (!path)
  580. return -ENOMEM;
  581. key.objectid = start;
  582. key.offset = len;
  583. btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
  584. ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
  585. 0, 0);
  586. btrfs_free_path(path);
  587. return ret;
  588. }
  589. /*
  590. * helper function to lookup reference count and flags of extent.
  591. *
  592. * the head node for delayed ref is used to store the sum of all the
  593. * reference count modifications queued up in the rbtree. the head
  594. * node may also store the extent flags to set. This way you can check
  595. * to see what the reference count and extent flags would be if all of
  596. * the delayed refs are not processed.
  597. */
  598. int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
  599. struct btrfs_root *root, u64 bytenr,
  600. u64 num_bytes, u64 *refs, u64 *flags)
  601. {
  602. struct btrfs_delayed_ref_head *head;
  603. struct btrfs_delayed_ref_root *delayed_refs;
  604. struct btrfs_path *path;
  605. struct btrfs_extent_item *ei;
  606. struct extent_buffer *leaf;
  607. struct btrfs_key key;
  608. u32 item_size;
  609. u64 num_refs;
  610. u64 extent_flags;
  611. int ret;
  612. path = btrfs_alloc_path();
  613. if (!path)
  614. return -ENOMEM;
  615. key.objectid = bytenr;
  616. key.type = BTRFS_EXTENT_ITEM_KEY;
  617. key.offset = num_bytes;
  618. if (!trans) {
  619. path->skip_locking = 1;
  620. path->search_commit_root = 1;
  621. }
  622. again:
  623. ret = btrfs_search_slot(trans, root->fs_info->extent_root,
  624. &key, path, 0, 0);
  625. if (ret < 0)
  626. goto out_free;
  627. if (ret == 0) {
  628. leaf = path->nodes[0];
  629. item_size = btrfs_item_size_nr(leaf, path->slots[0]);
  630. if (item_size >= sizeof(*ei)) {
  631. ei = btrfs_item_ptr(leaf, path->slots[0],
  632. struct btrfs_extent_item);
  633. num_refs = btrfs_extent_refs(leaf, ei);
  634. extent_flags = btrfs_extent_flags(leaf, ei);
  635. } else {
  636. #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
  637. struct btrfs_extent_item_v0 *ei0;
  638. BUG_ON(item_size != sizeof(*ei0));
  639. ei0 = btrfs_item_ptr(leaf, path->slots[0],
  640. struct btrfs_extent_item_v0);
  641. num_refs = btrfs_extent_refs_v0(leaf, ei0);
  642. /* FIXME: this isn't correct for data */
  643. extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
  644. #else
  645. BUG();
  646. #endif
  647. }
  648. BUG_ON(num_refs == 0);
  649. } else {
  650. num_refs = 0;
  651. extent_flags = 0;
  652. ret = 0;
  653. }
  654. if (!trans)
  655. goto out;
  656. delayed_refs = &trans->transaction->delayed_refs;
  657. spin_lock(&delayed_refs->lock);
  658. head = btrfs_find_delayed_ref_head(trans, bytenr);
  659. if (head) {
  660. if (!mutex_trylock(&head->mutex)) {
  661. atomic_inc(&head->node.refs);
  662. spin_unlock(&delayed_refs->lock);
  663. btrfs_release_path(path);
  664. /*
  665. * Mutex was contended, block until it's released and try
  666. * again
  667. */
  668. mutex_lock(&head->mutex);
  669. mutex_unlock(&head->mutex);
  670. btrfs_put_delayed_ref(&head->node);
  671. goto again;
  672. }
  673. if (head->extent_op && head->extent_op->update_flags)
  674. extent_flags |= head->extent_op->flags_to_set;
  675. else
  676. BUG_ON(num_refs == 0);
  677. num_refs += head->node.ref_mod;
  678. mutex_unlock(&head->mutex);
  679. }
  680. spin_unlock(&delayed_refs->lock);
  681. out:
  682. WARN_ON(num_refs == 0);
  683. if (refs)
  684. *refs = num_refs;
  685. if (flags)
  686. *flags = extent_flags;
  687. out_free:
  688. btrfs_free_path(path);
  689. return ret;
  690. }
  691. /*
  692. * Back reference rules. Back refs have three main goals:
  693. *
  694. * 1) differentiate between all holders of references to an extent so that
  695. * when a reference is dropped we can make sure it was a valid reference
  696. * before freeing the extent.
  697. *
  698. * 2) Provide enough information to quickly find the holders of an extent
  699. * if we notice a given block is corrupted or bad.
  700. *
  701. * 3) Make it easy to migrate blocks for FS shrinking or storage pool
  702. * maintenance. This is actually the same as #2, but with a slightly
  703. * different use case.
  704. *
  705. * There are two kinds of back refs. The implicit back refs is optimized
  706. * for pointers in non-shared tree blocks. For a given pointer in a block,
  707. * back refs of this kind provide information about the block's owner tree
  708. * and the pointer's key. These information allow us to find the block by
  709. * b-tree searching. The full back refs is for pointers in tree blocks not
  710. * referenced by their owner trees. The location of tree block is recorded
  711. * in the back refs. Actually the full back refs is generic, and can be
  712. * used in all cases the implicit back refs is used. The major shortcoming
  713. * of the full back refs is its overhead. Every time a tree block gets
  714. * COWed, we have to update back refs entry for all pointers in it.
  715. *
  716. * For a newly allocated tree block, we use implicit back refs for
  717. * pointers in it. This means most tree related operations only involve
  718. * implicit back refs. For a tree block created in old transaction, the
  719. * only way to drop a reference to it is COW it. So we can detect the
  720. * event that tree block loses its owner tree's reference and do the
  721. * back refs conversion.
  722. *
  723. * When a tree block is COW'd through a tree, there are four cases:
  724. *
  725. * The reference count of the block is one and the tree is the block's
  726. * owner tree. Nothing to do in this case.
  727. *
  728. * The reference count of the block is one and the tree is not the
  729. * block's owner tree. In this case, full back refs is used for pointers
  730. * in the block. Remove these full back refs, add implicit back refs for
  731. * every pointers in the new block.
  732. *
  733. * The reference count of the block is greater than one and the tree is
  734. * the block's owner tree. In this case, implicit back refs is used for
  735. * pointers in the block. Add full back refs for every pointers in the
  736. * block, increase lower level extents' reference counts. The original
  737. * implicit back refs are entailed to the new block.
  738. *
  739. * The reference count of the block is greater than one and the tree is
  740. * not the block's owner tree. Add implicit back refs for every pointer in
  741. * the new block, increase lower level extents' reference count.
  742. *
  743. * Back Reference Key composing:
  744. *
  745. * The key objectid corresponds to the first byte in the extent,
  746. * The key type is used to differentiate between types of back refs.
  747. * There are different meanings of the key offset for different types
  748. * of back refs.
  749. *
  750. * File extents can be referenced by:
  751. *
  752. * - multiple snapshots, subvolumes, or different generations in one subvol
  753. * - different files inside a single subvolume
  754. * - different offsets inside a file (bookend extents in file.c)
  755. *
  756. * The extent ref structure for the implicit back refs has fields for:
  757. *
  758. * - Objectid of the subvolume root
  759. * - objectid of the file holding the reference
  760. * - original offset in the file
  761. * - how many bookend extents
  762. *
  763. * The key offset for the implicit back refs is hash of the first
  764. * three fields.
  765. *
  766. * The extent ref structure for the full back refs has field for:
  767. *
  768. * - number of pointers in the tree leaf
  769. *
  770. * The key offset for the implicit back refs is the first byte of
  771. * the tree leaf
  772. *
  773. * When a file extent is allocated, The implicit back refs is used.
  774. * the fields are filled in:
  775. *
  776. * (root_key.objectid, inode objectid, offset in file, 1)
  777. *
  778. * When a file extent is removed file truncation, we find the
  779. * corresponding implicit back refs and check the following fields:
  780. *
  781. * (btrfs_header_owner(leaf), inode objectid, offset in file)
  782. *
  783. * Btree extents can be referenced by:
  784. *
  785. * - Different subvolumes
  786. *
  787. * Both the implicit back refs and the full back refs for tree blocks
  788. * only consist of key. The key offset for the implicit back refs is
  789. * objectid of block's owner tree. The key offset for the full back refs
  790. * is the first byte of parent block.
  791. *
  792. * When implicit back refs is used, information about the lowest key and
  793. * level of the tree block are required. These information are stored in
  794. * tree block info structure.
  795. */
  796. #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
  797. static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
  798. struct btrfs_root *root,
  799. struct btrfs_path *path,
  800. u64 owner, u32 extra_size)
  801. {
  802. struct btrfs_extent_item *item;
  803. struct btrfs_extent_item_v0 *ei0;
  804. struct btrfs_extent_ref_v0 *ref0;
  805. struct btrfs_tree_block_info *bi;
  806. struct extent_buffer *leaf;
  807. struct btrfs_key key;
  808. struct btrfs_key found_key;
  809. u32 new_size = sizeof(*item);
  810. u64 refs;
  811. int ret;
  812. leaf = path->nodes[0];
  813. BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
  814. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  815. ei0 = btrfs_item_ptr(leaf, path->slots[0],
  816. struct btrfs_extent_item_v0);
  817. refs = btrfs_extent_refs_v0(leaf, ei0);
  818. if (owner == (u64)-1) {
  819. while (1) {
  820. if (path->slots[0] >= btrfs_header_nritems(leaf)) {
  821. ret = btrfs_next_leaf(root, path);
  822. if (ret < 0)
  823. return ret;
  824. BUG_ON(ret > 0);
  825. leaf = path->nodes[0];
  826. }
  827. btrfs_item_key_to_cpu(leaf, &found_key,
  828. path->slots[0]);
  829. BUG_ON(key.objectid != found_key.objectid);
  830. if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
  831. path->slots[0]++;
  832. continue;
  833. }
  834. ref0 = btrfs_item_ptr(leaf, path->slots[0],
  835. struct btrfs_extent_ref_v0);
  836. owner = btrfs_ref_objectid_v0(leaf, ref0);
  837. break;
  838. }
  839. }
  840. btrfs_release_path(path);
  841. if (owner < BTRFS_FIRST_FREE_OBJECTID)
  842. new_size += sizeof(*bi);
  843. new_size -= sizeof(*ei0);
  844. ret = btrfs_search_slot(trans, root, &key, path,
  845. new_size + extra_size, 1);
  846. if (ret < 0)
  847. return ret;
  848. BUG_ON(ret);
  849. ret = btrfs_extend_item(trans, root, path, new_size);
  850. leaf = path->nodes[0];
  851. item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
  852. btrfs_set_extent_refs(leaf, item, refs);
  853. /* FIXME: get real generation */
  854. btrfs_set_extent_generation(leaf, item, 0);
  855. if (owner < BTRFS_FIRST_FREE_OBJECTID) {
  856. btrfs_set_extent_flags(leaf, item,
  857. BTRFS_EXTENT_FLAG_TREE_BLOCK |
  858. BTRFS_BLOCK_FLAG_FULL_BACKREF);
  859. bi = (struct btrfs_tree_block_info *)(item + 1);
  860. /* FIXME: get first key of the block */
  861. memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
  862. btrfs_set_tree_block_level(leaf, bi, (int)owner);
  863. } else {
  864. btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
  865. }
  866. btrfs_mark_buffer_dirty(leaf);
  867. return 0;
  868. }
  869. #endif
  870. static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
  871. {
  872. u32 high_crc = ~(u32)0;
  873. u32 low_crc = ~(u32)0;
  874. __le64 lenum;
  875. lenum = cpu_to_le64(root_objectid);
  876. high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
  877. lenum = cpu_to_le64(owner);
  878. low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
  879. lenum = cpu_to_le64(offset);
  880. low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
  881. return ((u64)high_crc << 31) ^ (u64)low_crc;
  882. }
  883. static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
  884. struct btrfs_extent_data_ref *ref)
  885. {
  886. return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
  887. btrfs_extent_data_ref_objectid(leaf, ref),
  888. btrfs_extent_data_ref_offset(leaf, ref));
  889. }
  890. static int match_extent_data_ref(struct extent_buffer *leaf,
  891. struct btrfs_extent_data_ref *ref,
  892. u64 root_objectid, u64 owner, u64 offset)
  893. {
  894. if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
  895. btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
  896. btrfs_extent_data_ref_offset(leaf, ref) != offset)
  897. return 0;
  898. return 1;
  899. }
  900. static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
  901. struct btrfs_root *root,
  902. struct btrfs_path *path,
  903. u64 bytenr, u64 parent,
  904. u64 root_objectid,
  905. u64 owner, u64 offset)
  906. {
  907. struct btrfs_key key;
  908. struct btrfs_extent_data_ref *ref;
  909. struct extent_buffer *leaf;
  910. u32 nritems;
  911. int ret;
  912. int recow;
  913. int err = -ENOENT;
  914. key.objectid = bytenr;
  915. if (parent) {
  916. key.type = BTRFS_SHARED_DATA_REF_KEY;
  917. key.offset = parent;
  918. } else {
  919. key.type = BTRFS_EXTENT_DATA_REF_KEY;
  920. key.offset = hash_extent_data_ref(root_objectid,
  921. owner, offset);
  922. }
  923. again:
  924. recow = 0;
  925. ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
  926. if (ret < 0) {
  927. err = ret;
  928. goto fail;
  929. }
  930. if (parent) {
  931. if (!ret)
  932. return 0;
  933. #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
  934. key.type = BTRFS_EXTENT_REF_V0_KEY;
  935. btrfs_release_path(path);
  936. ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
  937. if (ret < 0) {
  938. err = ret;
  939. goto fail;
  940. }
  941. if (!ret)
  942. return 0;
  943. #endif
  944. goto fail;
  945. }
  946. leaf = path->nodes[0];
  947. nritems = btrfs_header_nritems(leaf);
  948. while (1) {
  949. if (path->slots[0] >= nritems) {
  950. ret = btrfs_next_leaf(root, path);
  951. if (ret < 0)
  952. err = ret;
  953. if (ret)
  954. goto fail;
  955. leaf = path->nodes[0];
  956. nritems = btrfs_header_nritems(leaf);
  957. recow = 1;
  958. }
  959. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  960. if (key.objectid != bytenr ||
  961. key.type != BTRFS_EXTENT_DATA_REF_KEY)
  962. goto fail;
  963. ref = btrfs_item_ptr(leaf, path->slots[0],
  964. struct btrfs_extent_data_ref);
  965. if (match_extent_data_ref(leaf, ref, root_objectid,
  966. owner, offset)) {
  967. if (recow) {
  968. btrfs_release_path(path);
  969. goto again;
  970. }
  971. err = 0;
  972. break;
  973. }
  974. path->slots[0]++;
  975. }
  976. fail:
  977. return err;
  978. }
  979. static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
  980. struct btrfs_root *root,
  981. struct btrfs_path *path,
  982. u64 bytenr, u64 parent,
  983. u64 root_objectid, u64 owner,
  984. u64 offset, int refs_to_add)
  985. {
  986. struct btrfs_key key;
  987. struct extent_buffer *leaf;
  988. u32 size;
  989. u32 num_refs;
  990. int ret;
  991. key.objectid = bytenr;
  992. if (parent) {
  993. key.type = BTRFS_SHARED_DATA_REF_KEY;
  994. key.offset = parent;
  995. size = sizeof(struct btrfs_shared_data_ref);
  996. } else {
  997. key.type = BTRFS_EXTENT_DATA_REF_KEY;
  998. key.offset = hash_extent_data_ref(root_objectid,
  999. owner, offset);
  1000. size = sizeof(struct btrfs_extent_data_ref);
  1001. }
  1002. ret = btrfs_insert_empty_item(trans, root, path, &key, size);
  1003. if (ret && ret != -EEXIST)
  1004. goto fail;
  1005. leaf = path->nodes[0];
  1006. if (parent) {
  1007. struct btrfs_shared_data_ref *ref;
  1008. ref = btrfs_item_ptr(leaf, path->slots[0],
  1009. struct btrfs_shared_data_ref);
  1010. if (ret == 0) {
  1011. btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
  1012. } else {
  1013. num_refs = btrfs_shared_data_ref_count(leaf, ref);
  1014. num_refs += refs_to_add;
  1015. btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
  1016. }
  1017. } else {
  1018. struct btrfs_extent_data_ref *ref;
  1019. while (ret == -EEXIST) {
  1020. ref = btrfs_item_ptr(leaf, path->slots[0],
  1021. struct btrfs_extent_data_ref);
  1022. if (match_extent_data_ref(leaf, ref, root_objectid,
  1023. owner, offset))
  1024. break;
  1025. btrfs_release_path(path);
  1026. key.offset++;
  1027. ret = btrfs_insert_empty_item(trans, root, path, &key,
  1028. size);
  1029. if (ret && ret != -EEXIST)
  1030. goto fail;
  1031. leaf = path->nodes[0];
  1032. }
  1033. ref = btrfs_item_ptr(leaf, path->slots[0],
  1034. struct btrfs_extent_data_ref);
  1035. if (ret == 0) {
  1036. btrfs_set_extent_data_ref_root(leaf, ref,
  1037. root_objectid);
  1038. btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
  1039. btrfs_set_extent_data_ref_offset(leaf, ref, offset);
  1040. btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
  1041. } else {
  1042. num_refs = btrfs_extent_data_ref_count(leaf, ref);
  1043. num_refs += refs_to_add;
  1044. btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
  1045. }
  1046. }
  1047. btrfs_mark_buffer_dirty(leaf);
  1048. ret = 0;
  1049. fail:
  1050. btrfs_release_path(path);
  1051. return ret;
  1052. }
  1053. static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
  1054. struct btrfs_root *root,
  1055. struct btrfs_path *path,
  1056. int refs_to_drop)
  1057. {
  1058. struct btrfs_key key;
  1059. struct btrfs_extent_data_ref *ref1 = NULL;
  1060. struct btrfs_shared_data_ref *ref2 = NULL;
  1061. struct extent_buffer *leaf;
  1062. u32 num_refs = 0;
  1063. int ret = 0;
  1064. leaf = path->nodes[0];
  1065. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  1066. if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
  1067. ref1 = btrfs_item_ptr(leaf, path->slots[0],
  1068. struct btrfs_extent_data_ref);
  1069. num_refs = btrfs_extent_data_ref_count(leaf, ref1);
  1070. } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
  1071. ref2 = btrfs_item_ptr(leaf, path->slots[0],
  1072. struct btrfs_shared_data_ref);
  1073. num_refs = btrfs_shared_data_ref_count(leaf, ref2);
  1074. #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
  1075. } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
  1076. struct btrfs_extent_ref_v0 *ref0;
  1077. ref0 = btrfs_item_ptr(leaf, path->slots[0],
  1078. struct btrfs_extent_ref_v0);
  1079. num_refs = btrfs_ref_count_v0(leaf, ref0);
  1080. #endif
  1081. } else {
  1082. BUG();
  1083. }
  1084. BUG_ON(num_refs < refs_to_drop);
  1085. num_refs -= refs_to_drop;
  1086. if (num_refs == 0) {
  1087. ret = btrfs_del_item(trans, root, path);
  1088. } else {
  1089. if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
  1090. btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
  1091. else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
  1092. btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
  1093. #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
  1094. else {
  1095. struct btrfs_extent_ref_v0 *ref0;
  1096. ref0 = btrfs_item_ptr(leaf, path->slots[0],
  1097. struct btrfs_extent_ref_v0);
  1098. btrfs_set_ref_count_v0(leaf, ref0, num_refs);
  1099. }
  1100. #endif
  1101. btrfs_mark_buffer_dirty(leaf);
  1102. }
  1103. return ret;
  1104. }
  1105. static noinline u32 extent_data_ref_count(struct btrfs_root *root,
  1106. struct btrfs_path *path,
  1107. struct btrfs_extent_inline_ref *iref)
  1108. {
  1109. struct btrfs_key key;
  1110. struct extent_buffer *leaf;
  1111. struct btrfs_extent_data_ref *ref1;
  1112. struct btrfs_shared_data_ref *ref2;
  1113. u32 num_refs = 0;
  1114. leaf = path->nodes[0];
  1115. btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  1116. if (iref) {
  1117. if (btrfs_extent_inline_ref_type(leaf, iref) ==
  1118. BTRFS_EXTENT_DATA_REF_KEY) {
  1119. ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
  1120. num_refs = btrfs_extent_data_ref_count(leaf, ref1);
  1121. } else {
  1122. ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
  1123. num_refs = btrfs_shared_data_ref_count(leaf, ref2);
  1124. }
  1125. } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
  1126. ref1 = btrfs_item_ptr(leaf, path->slots[0],
  1127. struct btrfs_extent_data_ref);
  1128. num_refs = btrfs_extent_data_ref_count(leaf, ref1);
  1129. } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
  1130. ref2 = btrfs_item_ptr(leaf, path->slots[0],
  1131. struct btrfs_shared_data_ref);
  1132. num_refs = btrfs_shared_data_ref_count(leaf, ref2);
  1133. #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
  1134. } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
  1135. struct btrfs_extent_ref_v0 *ref0;
  1136. ref0 = btrfs_item_ptr(leaf, path->slots[0],
  1137. struct btrfs_extent_ref_v0);
  1138. num_refs = btrfs_ref_count_v0(leaf, ref0);
  1139. #endif
  1140. } else {
  1141. WARN_ON(1);
  1142. }
  1143. return num_refs;
  1144. }
  1145. static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
  1146. struct btrfs_root *root,
  1147. struct btrfs_path *path,
  1148. u64 bytenr, u64 parent,
  1149. u64 root_objectid)
  1150. {
  1151. struct btrfs_key key;
  1152. int ret;
  1153. key.objectid = bytenr;
  1154. if (parent) {
  1155. key.type = BTRFS_SHARED_BLOCK_REF_KEY;
  1156. key.offset = parent;
  1157. } else {
  1158. key.type = BTRFS_TREE_BLOCK_REF_KEY;
  1159. key.offset = root_objectid;
  1160. }
  1161. ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
  1162. if (ret > 0)
  1163. ret = -ENOENT;
  1164. #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
  1165. if (ret == -ENOENT && parent) {
  1166. btrfs_release_path(path);
  1167. key.type = BTRFS_EXTENT_REF_V0_KEY;
  1168. ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
  1169. if (ret > 0)
  1170. ret = -ENOENT;
  1171. }
  1172. #endif
  1173. return ret;
  1174. }
  1175. static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
  1176. struct btrfs_root *root,
  1177. struct btrfs_path *path,
  1178. u64 bytenr, u64 parent,
  1179. u64 root_objectid)
  1180. {
  1181. struct btrfs_key key;
  1182. int ret;
  1183. key.objectid = bytenr;
  1184. if (parent) {
  1185. key.type = BTRFS_SHARED_BLOCK_REF_KEY;
  1186. key.offset = parent;
  1187. } else {
  1188. key.type = BTRFS_TREE_BLOCK_REF_KEY;
  1189. key.offset = root_objectid;
  1190. }
  1191. ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
  1192. btrfs_release_path(path);
  1193. return ret;
  1194. }
  1195. static inline int extent_ref_type(u64 parent, u64 owner)
  1196. {
  1197. int type;
  1198. if (owner < BTRFS_FIRST_FREE_OBJECTID) {
  1199. if (parent > 0)
  1200. type = BTRFS_SHARED_BLOCK_REF_KEY;
  1201. else
  1202. type = BTRFS_TREE_BLOCK_REF_KEY;
  1203. } else {
  1204. if (parent > 0)
  1205. type = BTRFS_SHARED_DATA_REF_KEY;
  1206. else
  1207. type = BTRFS_EXTENT_DATA_REF_KEY;
  1208. }
  1209. return type;
  1210. }
  1211. static int find_next_key(struct btrfs_path *path, int level,
  1212. struct btrfs_key *key)
  1213. {
  1214. for (; level < BTRFS_MAX_LEVEL; level++) {
  1215. if (!path->nodes[level])
  1216. break;
  1217. if (path->slots[level] + 1 >=
  1218. btrfs_header_nritems(path->nodes[level]))
  1219. continue;
  1220. if (level == 0)
  1221. btrfs_item_key_to_cpu(path->nodes[level], key,
  1222. path->slots[level] + 1);
  1223. else
  1224. btrfs_node_key_to_cpu(path->nodes[level], key,
  1225. path->slots[level] + 1);
  1226. return 0;
  1227. }
  1228. return 1;
  1229. }
  1230. /*
  1231. * look for inline back ref. if back ref is found, *ref_ret is set
  1232. * to the address of inline back ref, and 0 is returned.
  1233. *
  1234. * if back ref isn't found, *ref_ret is set to the address where it
  1235. * should be inserted, and -ENOENT is returned.
  1236. *
  1237. * if insert is true and there are too many inline back refs, the path
  1238. * points to the extent item, and -EAGAIN is returned.
  1239. *
  1240. * NOTE: inline back refs are ordered in the same way that back ref
  1241. * items in the tree are ordered.
  1242. */
  1243. static noinline_for_stack
  1244. int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
  1245. struct btrfs_root *root,
  1246. struct btrfs_path *path,
  1247. struct btrfs_extent_inline_ref **ref_ret,
  1248. u64 bytenr, u64 num_bytes,
  1249. u64 parent, u64 root_objectid,
  1250. u64 owner, u64 offset, int insert)
  1251. {
  1252. struct btrfs_key key;
  1253. struct extent_buffer *leaf;
  1254. struct btrfs_extent_item *ei;
  1255. struct btrfs_extent_inline_ref *iref;
  1256. u64 flags;
  1257. u64 item_size;
  1258. unsigned long ptr;
  1259. unsigned long end;
  1260. int extra_size;
  1261. int type;
  1262. int want;
  1263. int ret;
  1264. int err = 0;
  1265. key.objectid = bytenr;
  1266. key.type = BTRFS_EXTENT_ITEM_KEY;
  1267. key.offset = num_bytes;
  1268. want = extent_ref_type(parent, owner);
  1269. if (insert) {
  1270. extra_size = btrfs_extent_inline_ref_size(want);
  1271. path->keep_locks = 1;
  1272. } else
  1273. extra_size = -1;
  1274. ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
  1275. if (ret < 0) {
  1276. err = ret;
  1277. goto out;
  1278. }
  1279. BUG_ON(ret);
  1280. leaf = path->nodes[0];
  1281. item_size = btrfs_item_size_nr(leaf, path->slots[0]);
  1282. #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
  1283. if (item_size < sizeof(*ei)) {
  1284. if (!insert) {
  1285. err = -ENOENT;
  1286. goto out;
  1287. }
  1288. ret = convert_extent_item_v0(trans, root, path, owner,
  1289. extra_size);
  1290. if (ret < 0) {
  1291. err = ret;
  1292. goto out;
  1293. }
  1294. leaf = path->nodes[0];
  1295. item_size = btrfs_item_size_nr(leaf, path->slots[0]);
  1296. }
  1297. #endif
  1298. BUG_ON(item_size < sizeof(*ei));
  1299. ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
  1300. flags = btrfs_extent_flags(leaf, ei);
  1301. ptr = (unsigned long)(ei + 1);
  1302. end = (unsigned long)ei + item_size;
  1303. if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
  1304. ptr += sizeof(struct btrfs_tree_block_info);
  1305. BUG_ON(ptr > end);
  1306. } else {
  1307. BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
  1308. }
  1309. err = -ENOENT;
  1310. while (1) {
  1311. if (ptr >= end) {
  1312. WARN_ON(ptr > end);
  1313. break;
  1314. }
  1315. iref = (struct btrfs_extent_inline_ref *)ptr;
  1316. type = btrfs_extent_inline_ref_type(leaf, iref);
  1317. if (want < type)
  1318. break;
  1319. if (want > type) {
  1320. ptr += btrfs_extent_inline_ref_size(type);
  1321. continue;
  1322. }
  1323. if (type == BTRFS_EXTENT_DATA_REF_KEY) {
  1324. struct btrfs_extent_data_ref *dref;
  1325. dref = (struct btrfs_extent_data_ref *)(&iref->offset);
  1326. if (match_extent_data_ref(leaf, dref, root_objectid,
  1327. owner, offset)) {
  1328. err = 0;
  1329. break;
  1330. }
  1331. if (hash_extent_data_ref_item(leaf, dref) <
  1332. hash_extent_data_ref(root_objectid, owner, offset))
  1333. break;
  1334. } else {
  1335. u64 ref_offset;
  1336. ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
  1337. if (parent > 0) {
  1338. if (parent == ref_offset) {
  1339. err = 0;
  1340. break;
  1341. }
  1342. if (ref_offset < parent)
  1343. break;
  1344. } else {
  1345. if (root_objectid == ref_offset) {
  1346. err = 0;
  1347. break;
  1348. }
  1349. if (ref_offset < root_objectid)
  1350. break;
  1351. }
  1352. }
  1353. ptr += btrfs_extent_inline_ref_size(type);
  1354. }
  1355. if (err == -ENOENT && insert) {
  1356. if (item_size + extra_size >=
  1357. BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
  1358. err = -EAGAIN;
  1359. goto out;
  1360. }
  1361. /*
  1362. * To add new inline back ref, we have to make sure
  1363. * there is no corresponding back ref item.
  1364. * For simplicity, we just do not add new inline back
  1365. * ref if there is any kind of item for this block
  1366. */
  1367. if (find_next_key(path, 0, &key) == 0 &&
  1368. key.objectid == bytenr &&
  1369. key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
  1370. err = -EAGAIN;
  1371. goto out;
  1372. }
  1373. }
  1374. *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
  1375. out:
  1376. if (insert) {
  1377. path->keep_locks = 0;
  1378. btrfs_unlock_up_safe(path, 1);
  1379. }
  1380. return err;
  1381. }
  1382. /*
  1383. * helper to add new inline back ref
  1384. */
  1385. static noinline_for_stack
  1386. int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
  1387. struct btrfs_root *root,
  1388. struct btrfs_path *path,
  1389. struct btrfs_extent_inline_ref *iref,
  1390. u64 parent, u64 root_objectid,
  1391. u64 owner, u64 offset, int refs_to_add,
  1392. struct btrfs_delayed_extent_op *extent_op)
  1393. {
  1394. struct extent_buffer *leaf;
  1395. struct btrfs_extent_item *ei;
  1396. unsigned long ptr;
  1397. unsigned long end;
  1398. unsigned long item_offset;
  1399. u64 refs;
  1400. int size;
  1401. int type;
  1402. int ret;
  1403. leaf = path->nodes[0];
  1404. ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
  1405. item_offset = (unsigned long)iref - (unsigned long)ei;
  1406. type = extent_ref_type(parent, owner);
  1407. size = btrfs_extent_inline_ref_size(type);
  1408. ret = btrfs_extend_item(trans, root, path, size);
  1409. ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
  1410. refs = btrfs_extent_refs(leaf, ei);
  1411. refs += refs_to_add;
  1412. btrfs_set_extent_refs(leaf, ei, refs);
  1413. if (extent_op)
  1414. __run_delayed_extent_op(extent_op, leaf, ei);
  1415. ptr = (unsigned long)ei + item_offset;
  1416. end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
  1417. if (ptr < end - size)
  1418. memmove_extent_buffer(leaf, ptr + size, ptr,
  1419. end - size - ptr);
  1420. iref = (struct btrfs_extent_inline_ref *)ptr;
  1421. btrfs_set_extent_inline_ref_type(leaf, iref, type);
  1422. if (type == BTRFS_EXTENT_DATA_REF_KEY) {
  1423. struct btrfs_extent_data_ref *dref;
  1424. dref = (struct btrfs_extent_data_ref *)(&iref->offset);
  1425. btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
  1426. btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
  1427. btrfs_set_extent_data_ref_offset(leaf, dref, offset);
  1428. btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
  1429. } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
  1430. struct btrfs_shared_data_ref *sref;
  1431. sref = (struct btrfs_shared_data_ref *)(iref + 1);
  1432. btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
  1433. btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
  1434. } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
  1435. btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
  1436. } else {
  1437. btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
  1438. }
  1439. btrfs_mark_buffer_dirty(leaf);
  1440. return 0;
  1441. }
  1442. static int lookup_extent_backref(struct btrfs_trans_handle *trans,
  1443. struct btrfs_root *root,
  1444. struct btrfs_path *path,
  1445. struct btrfs_extent_inline_ref **ref_ret,
  1446. u64 bytenr, u64 num_bytes, u64 parent,
  1447. u64 root_objectid, u64 owner, u64 offset)
  1448. {
  1449. int ret;
  1450. ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
  1451. bytenr, num_bytes, parent,
  1452. root_objectid, owner, offset, 0);
  1453. if (ret != -ENOENT)
  1454. return ret;
  1455. btrfs_release_path(path);
  1456. *ref_ret = NULL;
  1457. if (owner < BTRFS_FIRST_FREE_OBJECTID) {
  1458. ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
  1459. root_objectid);
  1460. } else {
  1461. ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
  1462. root_objectid, owner, offset);
  1463. }
  1464. return ret;
  1465. }
  1466. /*
  1467. * helper to update/remove inline back ref
  1468. */
  1469. static noinline_for_stack
  1470. int update_inline_extent_backref(struct btrfs_trans_handle *trans,
  1471. struct btrfs_root *root,
  1472. struct btrfs_path *path,
  1473. struct btrfs_extent_inline_ref *iref,
  1474. int refs_to_mod,
  1475. struct btrfs_delayed_extent_op *extent_op)
  1476. {
  1477. struct extent_buffer *leaf;
  1478. struct btrfs_extent_item *ei;
  1479. struct btrfs_extent_data_ref *dref = NULL;
  1480. struct btrfs_shared_data_ref *sref = NULL;
  1481. unsigned long ptr;
  1482. unsigned long end;
  1483. u32 item_size;
  1484. int size;
  1485. int type;
  1486. int ret;
  1487. u64 refs;
  1488. leaf = path->nodes[0];
  1489. ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
  1490. refs = btrfs_extent_refs(leaf, ei);
  1491. WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
  1492. refs += refs_to_mod;
  1493. btrfs_set_extent_refs(leaf, ei, refs);
  1494. if (extent_op)
  1495. __run_delayed_extent_op(extent_op, leaf, ei);
  1496. type = btrfs_extent_inline_ref_type(leaf, iref);
  1497. if (type == BTRFS_EXTENT_DATA_REF_KEY) {
  1498. dref = (struct btrfs_extent_data_ref *)(&iref->offset);
  1499. refs = btrfs_extent_data_ref_count(leaf, dref);
  1500. } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
  1501. sref = (struct btrfs_shared_data_ref *)(iref + 1);
  1502. refs = btrfs_shared_data_ref_count(leaf, sref);
  1503. } else {
  1504. refs = 1;
  1505. BUG_ON(refs_to_mod != -1);
  1506. }
  1507. BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
  1508. refs += refs_to_mod;
  1509. if (refs > 0) {
  1510. if (type == BTRFS_EXTENT_DATA_REF_KEY)
  1511. btrfs_set_extent_data_ref_count(leaf, dref, refs);
  1512. else
  1513. btrfs_set_shared_data_ref_count(leaf, sref, refs);
  1514. } else {
  1515. size = btrfs_extent_inline_ref_size(type);
  1516. item_size = btrfs_item_size_nr(leaf, path->slots[0]);
  1517. ptr = (unsigned long)iref;
  1518. end = (unsigned long)ei + item_size;
  1519. if (ptr + size < end)
  1520. memmove_extent_buffer(leaf, ptr, ptr + size,
  1521. end - ptr - size);
  1522. item_size -= size;
  1523. ret = btrfs_truncate_item(trans, root, path, item_size, 1);
  1524. }
  1525. btrfs_mark_buffer_dirty(leaf);
  1526. return 0;
  1527. }
  1528. static noinline_for_stack
  1529. int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
  1530. struct btrfs_root *root,
  1531. struct btrfs_path *path,
  1532. u64 bytenr, u64 num_bytes, u64 parent,
  1533. u64 root_objectid, u64 owner,
  1534. u64 offset, int refs_to_add,
  1535. struct btrfs_delayed_extent_op *extent_op)
  1536. {
  1537. struct btrfs_extent_inline_ref *iref;
  1538. int ret;
  1539. ret = lookup_inline_extent_backref(trans, root, path, &iref,
  1540. bytenr, num_bytes, parent,
  1541. root_objectid, owner, offset, 1);
  1542. if (ret == 0) {
  1543. BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
  1544. ret = update_inline_extent_backref(trans, root, path, iref,
  1545. refs_to_add, extent_op);
  1546. } else if (ret == -ENOENT) {
  1547. ret = setup_inline_extent_backref(trans, root, path, iref,
  1548. parent, root_objectid,
  1549. owner, offset, refs_to_add,
  1550. extent_op);
  1551. }
  1552. return ret;
  1553. }
  1554. static int insert_extent_backref(struct btrfs_trans_handle *trans,
  1555. struct btrfs_root *root,
  1556. struct btrfs_path *path,
  1557. u64 bytenr, u64 parent, u64 root_objectid,
  1558. u64 owner, u64 offset, int refs_to_add)
  1559. {
  1560. int ret;
  1561. if (owner < BTRFS_FIRST_FREE_OBJECTID) {
  1562. BUG_ON(refs_to_add != 1);
  1563. ret = insert_tree_block_ref(trans, root, path, bytenr,
  1564. parent, root_objectid);
  1565. } else {
  1566. ret = insert_extent_data_ref(trans, root, path, bytenr,
  1567. parent, root_objectid,
  1568. owner, offset, refs_to_add);
  1569. }
  1570. return ret;
  1571. }
  1572. static int remove_extent_backref(struct btrfs_trans_handle *trans,
  1573. struct btrfs_root *root,
  1574. struct btrfs_path *path,
  1575. struct btrfs_extent_inline_ref *iref,
  1576. int refs_to_drop, int is_data)
  1577. {
  1578. int ret;
  1579. BUG_ON(!is_data && refs_to_drop != 1);
  1580. if (iref) {
  1581. ret = update_inline_extent_backref(trans, root, path, iref,
  1582. -refs_to_drop, NULL);
  1583. } else if (is_data) {
  1584. ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
  1585. } else {
  1586. ret = btrfs_del_item(trans, root, path);
  1587. }
  1588. return ret;
  1589. }
  1590. static int btrfs_issue_discard(struct block_device *bdev,
  1591. u64 start, u64 len)
  1592. {
  1593. return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
  1594. }
  1595. static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
  1596. u64 num_bytes, u64 *actual_bytes)
  1597. {
  1598. int ret;
  1599. u64 discarded_bytes = 0;
  1600. struct btrfs_multi_bio *multi = NULL;
  1601. /* Tell the block device(s) that the sectors can be discarded */
  1602. ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
  1603. bytenr, &num_bytes, &multi, 0);
  1604. if (!ret) {
  1605. struct btrfs_bio_stripe *stripe = multi->stripes;
  1606. int i;
  1607. for (i = 0; i < multi->num_stripes; i++, stripe++) {
  1608. if (!stripe->dev->can_discard)
  1609. continue;
  1610. ret = btrfs_issue_discard(stripe->dev->bdev,
  1611. stripe->physical,
  1612. stripe->length);
  1613. if (!ret)
  1614. discarded_bytes += stripe->length;
  1615. else if (ret != -EOPNOTSUPP)
  1616. break;
  1617. /*
  1618. * Just in case we get back EOPNOTSUPP for some reason,
  1619. * just ignore the return value so we don't screw up
  1620. * people calling discard_extent.
  1621. */
  1622. ret = 0;
  1623. }
  1624. kfree(multi);
  1625. }
  1626. if (actual_bytes)
  1627. *actual_bytes = discarded_bytes;
  1628. return ret;
  1629. }
  1630. int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
  1631. struct btrfs_root *root,
  1632. u64 bytenr, u64 num_bytes, u64 parent,
  1633. u64 root_objectid, u64 owner, u64 offset)
  1634. {
  1635. int ret;
  1636. BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
  1637. root_objectid == BTRFS_TREE_LOG_OBJECTID);
  1638. if (owner < BTRFS_FIRST_FREE_OBJECTID) {
  1639. ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
  1640. parent, root_objectid, (int)owner,
  1641. BTRFS_ADD_DELAYED_REF, NULL);
  1642. } else {
  1643. ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
  1644. parent, root_objectid, owner, offset,
  1645. BTRFS_ADD_DELAYED_REF, NULL);
  1646. }
  1647. return