PageRenderTime 44ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 1ms

/fs/btrfs/ordered-data.c

https://github.com/mstsirkin/linux
C | 983 lines | 670 code | 105 blank | 208 comment | 121 complexity | 8261c992af5aafebe7eb7689fd6f0b0c MD5 | raw file
  1. /*
  2. * Copyright (C) 2007 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/slab.h>
  19. #include <linux/blkdev.h>
  20. #include <linux/writeback.h>
  21. #include <linux/pagevec.h>
  22. #include "ctree.h"
  23. #include "transaction.h"
  24. #include "btrfs_inode.h"
  25. #include "extent_io.h"
  26. static u64 entry_end(struct btrfs_ordered_extent *entry)
  27. {
  28. if (entry->file_offset + entry->len < entry->file_offset)
  29. return (u64)-1;
  30. return entry->file_offset + entry->len;
  31. }
  32. /* returns NULL if the insertion worked, or it returns the node it did find
  33. * in the tree
  34. */
  35. static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
  36. struct rb_node *node)
  37. {
  38. struct rb_node **p = &root->rb_node;
  39. struct rb_node *parent = NULL;
  40. struct btrfs_ordered_extent *entry;
  41. while (*p) {
  42. parent = *p;
  43. entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
  44. if (file_offset < entry->file_offset)
  45. p = &(*p)->rb_left;
  46. else if (file_offset >= entry_end(entry))
  47. p = &(*p)->rb_right;
  48. else
  49. return parent;
  50. }
  51. rb_link_node(node, parent, p);
  52. rb_insert_color(node, root);
  53. return NULL;
  54. }
  55. /*
  56. * look for a given offset in the tree, and if it can't be found return the
  57. * first lesser offset
  58. */
  59. static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
  60. struct rb_node **prev_ret)
  61. {
  62. struct rb_node *n = root->rb_node;
  63. struct rb_node *prev = NULL;
  64. struct rb_node *test;
  65. struct btrfs_ordered_extent *entry;
  66. struct btrfs_ordered_extent *prev_entry = NULL;
  67. while (n) {
  68. entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
  69. prev = n;
  70. prev_entry = entry;
  71. if (file_offset < entry->file_offset)
  72. n = n->rb_left;
  73. else if (file_offset >= entry_end(entry))
  74. n = n->rb_right;
  75. else
  76. return n;
  77. }
  78. if (!prev_ret)
  79. return NULL;
  80. while (prev && file_offset >= entry_end(prev_entry)) {
  81. test = rb_next(prev);
  82. if (!test)
  83. break;
  84. prev_entry = rb_entry(test, struct btrfs_ordered_extent,
  85. rb_node);
  86. if (file_offset < entry_end(prev_entry))
  87. break;
  88. prev = test;
  89. }
  90. if (prev)
  91. prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
  92. rb_node);
  93. while (prev && file_offset < entry_end(prev_entry)) {
  94. test = rb_prev(prev);
  95. if (!test)
  96. break;
  97. prev_entry = rb_entry(test, struct btrfs_ordered_extent,
  98. rb_node);
  99. prev = test;
  100. }
  101. *prev_ret = prev;
  102. return NULL;
  103. }
  104. /*
  105. * helper to check if a given offset is inside a given entry
  106. */
  107. static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
  108. {
  109. if (file_offset < entry->file_offset ||
  110. entry->file_offset + entry->len <= file_offset)
  111. return 0;
  112. return 1;
  113. }
  114. static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
  115. u64 len)
  116. {
  117. if (file_offset + len <= entry->file_offset ||
  118. entry->file_offset + entry->len <= file_offset)
  119. return 0;
  120. return 1;
  121. }
  122. /*
  123. * look find the first ordered struct that has this offset, otherwise
  124. * the first one less than this offset
  125. */
  126. static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
  127. u64 file_offset)
  128. {
  129. struct rb_root *root = &tree->tree;
  130. struct rb_node *prev = NULL;
  131. struct rb_node *ret;
  132. struct btrfs_ordered_extent *entry;
  133. if (tree->last) {
  134. entry = rb_entry(tree->last, struct btrfs_ordered_extent,
  135. rb_node);
  136. if (offset_in_entry(entry, file_offset))
  137. return tree->last;
  138. }
  139. ret = __tree_search(root, file_offset, &prev);
  140. if (!ret)
  141. ret = prev;
  142. if (ret)
  143. tree->last = ret;
  144. return ret;
  145. }
  146. /* allocate and add a new ordered_extent into the per-inode tree.
  147. * file_offset is the logical offset in the file
  148. *
  149. * start is the disk block number of an extent already reserved in the
  150. * extent allocation tree
  151. *
  152. * len is the length of the extent
  153. *
  154. * The tree is given a single reference on the ordered extent that was
  155. * inserted.
  156. */
  157. static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
  158. u64 start, u64 len, u64 disk_len,
  159. int type, int dio, int compress_type)
  160. {
  161. struct btrfs_ordered_inode_tree *tree;
  162. struct rb_node *node;
  163. struct btrfs_ordered_extent *entry;
  164. tree = &BTRFS_I(inode)->ordered_tree;
  165. entry = kzalloc(sizeof(*entry), GFP_NOFS);
  166. if (!entry)
  167. return -ENOMEM;
  168. entry->file_offset = file_offset;
  169. entry->start = start;
  170. entry->len = len;
  171. entry->disk_len = disk_len;
  172. entry->bytes_left = len;
  173. entry->inode = inode;
  174. entry->compress_type = compress_type;
  175. if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
  176. set_bit(type, &entry->flags);
  177. if (dio)
  178. set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
  179. /* one ref for the tree */
  180. atomic_set(&entry->refs, 1);
  181. init_waitqueue_head(&entry->wait);
  182. INIT_LIST_HEAD(&entry->list);
  183. INIT_LIST_HEAD(&entry->root_extent_list);
  184. trace_btrfs_ordered_extent_add(inode, entry);
  185. spin_lock(&tree->lock);
  186. node = tree_insert(&tree->tree, file_offset,
  187. &entry->rb_node);
  188. BUG_ON(node);
  189. spin_unlock(&tree->lock);
  190. spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
  191. list_add_tail(&entry->root_extent_list,
  192. &BTRFS_I(inode)->root->fs_info->ordered_extents);
  193. spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
  194. BUG_ON(node);
  195. return 0;
  196. }
  197. int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
  198. u64 start, u64 len, u64 disk_len, int type)
  199. {
  200. return __btrfs_add_ordered_extent(inode, file_offset, start, len,
  201. disk_len, type, 0,
  202. BTRFS_COMPRESS_NONE);
  203. }
  204. int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
  205. u64 start, u64 len, u64 disk_len, int type)
  206. {
  207. return __btrfs_add_ordered_extent(inode, file_offset, start, len,
  208. disk_len, type, 1,
  209. BTRFS_COMPRESS_NONE);
  210. }
  211. int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
  212. u64 start, u64 len, u64 disk_len,
  213. int type, int compress_type)
  214. {
  215. return __btrfs_add_ordered_extent(inode, file_offset, start, len,
  216. disk_len, type, 0,
  217. compress_type);
  218. }
  219. /*
  220. * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
  221. * when an ordered extent is finished. If the list covers more than one
  222. * ordered extent, it is split across multiples.
  223. */
  224. int btrfs_add_ordered_sum(struct inode *inode,
  225. struct btrfs_ordered_extent *entry,
  226. struct btrfs_ordered_sum *sum)
  227. {
  228. struct btrfs_ordered_inode_tree *tree;
  229. tree = &BTRFS_I(inode)->ordered_tree;
  230. spin_lock(&tree->lock);
  231. list_add_tail(&sum->list, &entry->list);
  232. spin_unlock(&tree->lock);
  233. return 0;
  234. }
  235. /*
  236. * this is used to account for finished IO across a given range
  237. * of the file. The IO may span ordered extents. If
  238. * a given ordered_extent is completely done, 1 is returned, otherwise
  239. * 0.
  240. *
  241. * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
  242. * to make sure this function only returns 1 once for a given ordered extent.
  243. *
  244. * file_offset is updated to one byte past the range that is recorded as
  245. * complete. This allows you to walk forward in the file.
  246. */
  247. int btrfs_dec_test_first_ordered_pending(struct inode *inode,
  248. struct btrfs_ordered_extent **cached,
  249. u64 *file_offset, u64 io_size)
  250. {
  251. struct btrfs_ordered_inode_tree *tree;
  252. struct rb_node *node;
  253. struct btrfs_ordered_extent *entry = NULL;
  254. int ret;
  255. u64 dec_end;
  256. u64 dec_start;
  257. u64 to_dec;
  258. tree = &BTRFS_I(inode)->ordered_tree;
  259. spin_lock(&tree->lock);
  260. node = tree_search(tree, *file_offset);
  261. if (!node) {
  262. ret = 1;
  263. goto out;
  264. }
  265. entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
  266. if (!offset_in_entry(entry, *file_offset)) {
  267. ret = 1;
  268. goto out;
  269. }
  270. dec_start = max(*file_offset, entry->file_offset);
  271. dec_end = min(*file_offset + io_size, entry->file_offset +
  272. entry->len);
  273. *file_offset = dec_end;
  274. if (dec_start > dec_end) {
  275. printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
  276. (unsigned long long)dec_start,
  277. (unsigned long long)dec_end);
  278. }
  279. to_dec = dec_end - dec_start;
  280. if (to_dec > entry->bytes_left) {
  281. printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
  282. (unsigned long long)entry->bytes_left,
  283. (unsigned long long)to_dec);
  284. }
  285. entry->bytes_left -= to_dec;
  286. if (entry->bytes_left == 0)
  287. ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
  288. else
  289. ret = 1;
  290. out:
  291. if (!ret && cached && entry) {
  292. *cached = entry;
  293. atomic_inc(&entry->refs);
  294. }
  295. spin_unlock(&tree->lock);
  296. return ret == 0;
  297. }
  298. /*
  299. * this is used to account for finished IO across a given range
  300. * of the file. The IO should not span ordered extents. If
  301. * a given ordered_extent is completely done, 1 is returned, otherwise
  302. * 0.
  303. *
  304. * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
  305. * to make sure this function only returns 1 once for a given ordered extent.
  306. */
  307. int btrfs_dec_test_ordered_pending(struct inode *inode,
  308. struct btrfs_ordered_extent **cached,
  309. u64 file_offset, u64 io_size)
  310. {
  311. struct btrfs_ordered_inode_tree *tree;
  312. struct rb_node *node;
  313. struct btrfs_ordered_extent *entry = NULL;
  314. int ret;
  315. tree = &BTRFS_I(inode)->ordered_tree;
  316. spin_lock(&tree->lock);
  317. node = tree_search(tree, file_offset);
  318. if (!node) {
  319. ret = 1;
  320. goto out;
  321. }
  322. entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
  323. if (!offset_in_entry(entry, file_offset)) {
  324. ret = 1;
  325. goto out;
  326. }
  327. if (io_size > entry->bytes_left) {
  328. printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
  329. (unsigned long long)entry->bytes_left,
  330. (unsigned long long)io_size);
  331. }
  332. entry->bytes_left -= io_size;
  333. if (entry->bytes_left == 0)
  334. ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
  335. else
  336. ret = 1;
  337. out:
  338. if (!ret && cached && entry) {
  339. *cached = entry;
  340. atomic_inc(&entry->refs);
  341. }
  342. spin_unlock(&tree->lock);
  343. return ret == 0;
  344. }
  345. /*
  346. * used to drop a reference on an ordered extent. This will free
  347. * the extent if the last reference is dropped
  348. */
  349. int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
  350. {
  351. struct list_head *cur;
  352. struct btrfs_ordered_sum *sum;
  353. trace_btrfs_ordered_extent_put(entry->inode, entry);
  354. if (atomic_dec_and_test(&entry->refs)) {
  355. while (!list_empty(&entry->list)) {
  356. cur = entry->list.next;
  357. sum = list_entry(cur, struct btrfs_ordered_sum, list);
  358. list_del(&sum->list);
  359. kfree(sum);
  360. }
  361. kfree(entry);
  362. }
  363. return 0;
  364. }
  365. /*
  366. * remove an ordered extent from the tree. No references are dropped
  367. * and you must wake_up entry->wait. You must hold the tree lock
  368. * while you call this function.
  369. */
  370. static int __btrfs_remove_ordered_extent(struct inode *inode,
  371. struct btrfs_ordered_extent *entry)
  372. {
  373. struct btrfs_ordered_inode_tree *tree;
  374. struct btrfs_root *root = BTRFS_I(inode)->root;
  375. struct rb_node *node;
  376. tree = &BTRFS_I(inode)->ordered_tree;
  377. node = &entry->rb_node;
  378. rb_erase(node, &tree->tree);
  379. tree->last = NULL;
  380. set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
  381. spin_lock(&root->fs_info->ordered_extent_lock);
  382. list_del_init(&entry->root_extent_list);
  383. trace_btrfs_ordered_extent_remove(inode, entry);
  384. /*
  385. * we have no more ordered extents for this inode and
  386. * no dirty pages. We can safely remove it from the
  387. * list of ordered extents
  388. */
  389. if (RB_EMPTY_ROOT(&tree->tree) &&
  390. !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
  391. list_del_init(&BTRFS_I(inode)->ordered_operations);
  392. }
  393. spin_unlock(&root->fs_info->ordered_extent_lock);
  394. return 0;
  395. }
  396. /*
  397. * remove an ordered extent from the tree. No references are dropped
  398. * but any waiters are woken.
  399. */
  400. int btrfs_remove_ordered_extent(struct inode *inode,
  401. struct btrfs_ordered_extent *entry)
  402. {
  403. struct btrfs_ordered_inode_tree *tree;
  404. int ret;
  405. tree = &BTRFS_I(inode)->ordered_tree;
  406. spin_lock(&tree->lock);
  407. ret = __btrfs_remove_ordered_extent(inode, entry);
  408. spin_unlock(&tree->lock);
  409. wake_up(&entry->wait);
  410. return ret;
  411. }
  412. /*
  413. * wait for all the ordered extents in a root. This is done when balancing
  414. * space between drives.
  415. */
  416. int btrfs_wait_ordered_extents(struct btrfs_root *root,
  417. int nocow_only, int delay_iput)
  418. {
  419. struct list_head splice;
  420. struct list_head *cur;
  421. struct btrfs_ordered_extent *ordered;
  422. struct inode *inode;
  423. INIT_LIST_HEAD(&splice);
  424. spin_lock(&root->fs_info->ordered_extent_lock);
  425. list_splice_init(&root->fs_info->ordered_extents, &splice);
  426. while (!list_empty(&splice)) {
  427. cur = splice.next;
  428. ordered = list_entry(cur, struct btrfs_ordered_extent,
  429. root_extent_list);
  430. if (nocow_only &&
  431. !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
  432. !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
  433. list_move(&ordered->root_extent_list,
  434. &root->fs_info->ordered_extents);
  435. cond_resched_lock(&root->fs_info->ordered_extent_lock);
  436. continue;
  437. }
  438. list_del_init(&ordered->root_extent_list);
  439. atomic_inc(&ordered->refs);
  440. /*
  441. * the inode may be getting freed (in sys_unlink path).
  442. */
  443. inode = igrab(ordered->inode);
  444. spin_unlock(&root->fs_info->ordered_extent_lock);
  445. if (inode) {
  446. btrfs_start_ordered_extent(inode, ordered, 1);
  447. btrfs_put_ordered_extent(ordered);
  448. if (delay_iput)
  449. btrfs_add_delayed_iput(inode);
  450. else
  451. iput(inode);
  452. } else {
  453. btrfs_put_ordered_extent(ordered);
  454. }
  455. spin_lock(&root->fs_info->ordered_extent_lock);
  456. }
  457. spin_unlock(&root->fs_info->ordered_extent_lock);
  458. return 0;
  459. }
  460. /*
  461. * this is used during transaction commit to write all the inodes
  462. * added to the ordered operation list. These files must be fully on
  463. * disk before the transaction commits.
  464. *
  465. * we have two modes here, one is to just start the IO via filemap_flush
  466. * and the other is to wait for all the io. When we wait, we have an
  467. * extra check to make sure the ordered operation list really is empty
  468. * before we return
  469. */
  470. int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
  471. {
  472. struct btrfs_inode *btrfs_inode;
  473. struct inode *inode;
  474. struct list_head splice;
  475. INIT_LIST_HEAD(&splice);
  476. mutex_lock(&root->fs_info->ordered_operations_mutex);
  477. spin_lock(&root->fs_info->ordered_extent_lock);
  478. again:
  479. list_splice_init(&root->fs_info->ordered_operations, &splice);
  480. while (!list_empty(&splice)) {
  481. btrfs_inode = list_entry(splice.next, struct btrfs_inode,
  482. ordered_operations);
  483. inode = &btrfs_inode->vfs_inode;
  484. list_del_init(&btrfs_inode->ordered_operations);
  485. /*
  486. * the inode may be getting freed (in sys_unlink path).
  487. */
  488. inode = igrab(inode);
  489. if (!wait && inode) {
  490. list_add_tail(&BTRFS_I(inode)->ordered_operations,
  491. &root->fs_info->ordered_operations);
  492. }
  493. spin_unlock(&root->fs_info->ordered_extent_lock);
  494. if (inode) {
  495. if (wait)
  496. btrfs_wait_ordered_range(inode, 0, (u64)-1);
  497. else
  498. filemap_flush(inode->i_mapping);
  499. btrfs_add_delayed_iput(inode);
  500. }
  501. cond_resched();
  502. spin_lock(&root->fs_info->ordered_extent_lock);
  503. }
  504. if (wait && !list_empty(&root->fs_info->ordered_operations))
  505. goto again;
  506. spin_unlock(&root->fs_info->ordered_extent_lock);
  507. mutex_unlock(&root->fs_info->ordered_operations_mutex);
  508. return 0;
  509. }
  510. /*
  511. * Used to start IO or wait for a given ordered extent to finish.
  512. *
  513. * If wait is one, this effectively waits on page writeback for all the pages
  514. * in the extent, and it waits on the io completion code to insert
  515. * metadata into the btree corresponding to the extent
  516. */
  517. void btrfs_start_ordered_extent(struct inode *inode,
  518. struct btrfs_ordered_extent *entry,
  519. int wait)
  520. {
  521. u64 start = entry->file_offset;
  522. u64 end = start + entry->len - 1;
  523. trace_btrfs_ordered_extent_start(inode, entry);
  524. /*
  525. * pages in the range can be dirty, clean or writeback. We
  526. * start IO on any dirty ones so the wait doesn't stall waiting
  527. * for pdflush to find them
  528. */
  529. if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
  530. filemap_fdatawrite_range(inode->i_mapping, start, end);
  531. if (wait) {
  532. wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
  533. &entry->flags));
  534. }
  535. }
  536. /*
  537. * Used to wait on ordered extents across a large range of bytes.
  538. */
  539. int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
  540. {
  541. u64 end;
  542. u64 orig_end;
  543. struct btrfs_ordered_extent *ordered;
  544. int found;
  545. if (start + len < start) {
  546. orig_end = INT_LIMIT(loff_t);
  547. } else {
  548. orig_end = start + len - 1;
  549. if (orig_end > INT_LIMIT(loff_t))
  550. orig_end = INT_LIMIT(loff_t);
  551. }
  552. again:
  553. /* start IO across the range first to instantiate any delalloc
  554. * extents
  555. */
  556. filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
  557. /* The compression code will leave pages locked but return from
  558. * writepage without setting the page writeback. Starting again
  559. * with WB_SYNC_ALL will end up waiting for the IO to actually start.
  560. */
  561. filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
  562. filemap_fdatawait_range(inode->i_mapping, start, orig_end);
  563. end = orig_end;
  564. found = 0;
  565. while (1) {
  566. ordered = btrfs_lookup_first_ordered_extent(inode, end);
  567. if (!ordered)
  568. break;
  569. if (ordered->file_offset > orig_end) {
  570. btrfs_put_ordered_extent(ordered);
  571. break;
  572. }
  573. if (ordered->file_offset + ordered->len < start) {
  574. btrfs_put_ordered_extent(ordered);
  575. break;
  576. }
  577. found++;
  578. btrfs_start_ordered_extent(inode, ordered, 1);
  579. end = ordered->file_offset;
  580. btrfs_put_ordered_extent(ordered);
  581. if (end == 0 || end == start)
  582. break;
  583. end--;
  584. }
  585. if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
  586. EXTENT_DELALLOC, 0, NULL)) {
  587. schedule_timeout(1);
  588. goto again;
  589. }
  590. return 0;
  591. }
  592. /*
  593. * find an ordered extent corresponding to file_offset. return NULL if
  594. * nothing is found, otherwise take a reference on the extent and return it
  595. */
  596. struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
  597. u64 file_offset)
  598. {
  599. struct btrfs_ordered_inode_tree *tree;
  600. struct rb_node *node;
  601. struct btrfs_ordered_extent *entry = NULL;
  602. tree = &BTRFS_I(inode)->ordered_tree;
  603. spin_lock(&tree->lock);
  604. node = tree_search(tree, file_offset);
  605. if (!node)
  606. goto out;
  607. entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
  608. if (!offset_in_entry(entry, file_offset))
  609. entry = NULL;
  610. if (entry)
  611. atomic_inc(&entry->refs);
  612. out:
  613. spin_unlock(&tree->lock);
  614. return entry;
  615. }
  616. /* Since the DIO code tries to lock a wide area we need to look for any ordered
  617. * extents that exist in the range, rather than just the start of the range.
  618. */
  619. struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
  620. u64 file_offset,
  621. u64 len)
  622. {
  623. struct btrfs_ordered_inode_tree *tree;
  624. struct rb_node *node;
  625. struct btrfs_ordered_extent *entry = NULL;
  626. tree = &BTRFS_I(inode)->ordered_tree;
  627. spin_lock(&tree->lock);
  628. node = tree_search(tree, file_offset);
  629. if (!node) {
  630. node = tree_search(tree, file_offset + len);
  631. if (!node)
  632. goto out;
  633. }
  634. while (1) {
  635. entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
  636. if (range_overlaps(entry, file_offset, len))
  637. break;
  638. if (entry->file_offset >= file_offset + len) {
  639. entry = NULL;
  640. break;
  641. }
  642. entry = NULL;
  643. node = rb_next(node);
  644. if (!node)
  645. break;
  646. }
  647. out:
  648. if (entry)
  649. atomic_inc(&entry->refs);
  650. spin_unlock(&tree->lock);
  651. return entry;
  652. }
  653. /*
  654. * lookup and return any extent before 'file_offset'. NULL is returned
  655. * if none is found
  656. */
  657. struct btrfs_ordered_extent *
  658. btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
  659. {
  660. struct btrfs_ordered_inode_tree *tree;
  661. struct rb_node *node;
  662. struct btrfs_ordered_extent *entry = NULL;
  663. tree = &BTRFS_I(inode)->ordered_tree;
  664. spin_lock(&tree->lock);
  665. node = tree_search(tree, file_offset);
  666. if (!node)
  667. goto out;
  668. entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
  669. atomic_inc(&entry->refs);
  670. out:
  671. spin_unlock(&tree->lock);
  672. return entry;
  673. }
  674. /*
  675. * After an extent is done, call this to conditionally update the on disk
  676. * i_size. i_size is updated to cover any fully written part of the file.
  677. */
  678. int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
  679. struct btrfs_ordered_extent *ordered)
  680. {
  681. struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
  682. struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
  683. u64 disk_i_size;
  684. u64 new_i_size;
  685. u64 i_size_test;
  686. u64 i_size = i_size_read(inode);
  687. struct rb_node *node;
  688. struct rb_node *prev = NULL;
  689. struct btrfs_ordered_extent *test;
  690. int ret = 1;
  691. if (ordered)
  692. offset = entry_end(ordered);
  693. else
  694. offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
  695. spin_lock(&tree->lock);
  696. disk_i_size = BTRFS_I(inode)->disk_i_size;
  697. /* truncate file */
  698. if (disk_i_size > i_size) {
  699. BTRFS_I(inode)->disk_i_size = i_size;
  700. ret = 0;
  701. goto out;
  702. }
  703. /*
  704. * if the disk i_size is already at the inode->i_size, or
  705. * this ordered extent is inside the disk i_size, we're done
  706. */
  707. if (disk_i_size == i_size || offset <= disk_i_size) {
  708. goto out;
  709. }
  710. /*
  711. * we can't update the disk_isize if there are delalloc bytes
  712. * between disk_i_size and this ordered extent
  713. */
  714. if (test_range_bit(io_tree, disk_i_size, offset - 1,
  715. EXTENT_DELALLOC, 0, NULL)) {
  716. goto out;
  717. }
  718. /*
  719. * walk backward from this ordered extent to disk_i_size.
  720. * if we find an ordered extent then we can't update disk i_size
  721. * yet
  722. */
  723. if (ordered) {
  724. node = rb_prev(&ordered->rb_node);
  725. } else {
  726. prev = tree_search(tree, offset);
  727. /*
  728. * we insert file extents without involving ordered struct,
  729. * so there should be no ordered struct cover this offset
  730. */
  731. if (prev) {
  732. test = rb_entry(prev, struct btrfs_ordered_extent,
  733. rb_node);
  734. BUG_ON(offset_in_entry(test, offset));
  735. }
  736. node = prev;
  737. }
  738. while (node) {
  739. test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
  740. if (test->file_offset + test->len <= disk_i_size)
  741. break;
  742. if (test->file_offset >= i_size)
  743. break;
  744. if (test->file_offset >= disk_i_size)
  745. goto out;
  746. node = rb_prev(node);
  747. }
  748. new_i_size = min_t(u64, offset, i_size);
  749. /*
  750. * at this point, we know we can safely update i_size to at least
  751. * the offset from this ordered extent. But, we need to
  752. * walk forward and see if ios from higher up in the file have
  753. * finished.
  754. */
  755. if (ordered) {
  756. node = rb_next(&ordered->rb_node);
  757. } else {
  758. if (prev)
  759. node = rb_next(prev);
  760. else
  761. node = rb_first(&tree->tree);
  762. }
  763. i_size_test = 0;
  764. if (node) {
  765. /*
  766. * do we have an area where IO might have finished
  767. * between our ordered extent and the next one.
  768. */
  769. test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
  770. if (test->file_offset > offset)
  771. i_size_test = test->file_offset;
  772. } else {
  773. i_size_test = i_size;
  774. }
  775. /*
  776. * i_size_test is the end of a region after this ordered
  777. * extent where there are no ordered extents. As long as there
  778. * are no delalloc bytes in this area, it is safe to update
  779. * disk_i_size to the end of the region.
  780. */
  781. if (i_size_test > offset &&
  782. !test_range_bit(io_tree, offset, i_size_test - 1,
  783. EXTENT_DELALLOC, 0, NULL)) {
  784. new_i_size = min_t(u64, i_size_test, i_size);
  785. }
  786. BTRFS_I(inode)->disk_i_size = new_i_size;
  787. ret = 0;
  788. out:
  789. /*
  790. * we need to remove the ordered extent with the tree lock held
  791. * so that other people calling this function don't find our fully
  792. * processed ordered entry and skip updating the i_size
  793. */
  794. if (ordered)
  795. __btrfs_remove_ordered_extent(inode, ordered);
  796. spin_unlock(&tree->lock);
  797. if (ordered)
  798. wake_up(&ordered->wait);
  799. return ret;
  800. }
  801. /*
  802. * search the ordered extents for one corresponding to 'offset' and
  803. * try to find a checksum. This is used because we allow pages to
  804. * be reclaimed before their checksum is actually put into the btree
  805. */
  806. int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
  807. u32 *sum)
  808. {
  809. struct btrfs_ordered_sum *ordered_sum;
  810. struct btrfs_sector_sum *sector_sums;
  811. struct btrfs_ordered_extent *ordered;
  812. struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
  813. unsigned long num_sectors;
  814. unsigned long i;
  815. u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
  816. int ret = 1;
  817. ordered = btrfs_lookup_ordered_extent(inode, offset);
  818. if (!ordered)
  819. return 1;
  820. spin_lock(&tree->lock);
  821. list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
  822. if (disk_bytenr >= ordered_sum->bytenr) {
  823. num_sectors = ordered_sum->len / sectorsize;
  824. sector_sums = ordered_sum->sums;
  825. for (i = 0; i < num_sectors; i++) {
  826. if (sector_sums[i].bytenr == disk_bytenr) {
  827. *sum = sector_sums[i].sum;
  828. ret = 0;
  829. goto out;
  830. }
  831. }
  832. }
  833. }
  834. out:
  835. spin_unlock(&tree->lock);
  836. btrfs_put_ordered_extent(ordered);
  837. return ret;
  838. }
  839. /*
  840. * add a given inode to the list of inodes that must be fully on
  841. * disk before a transaction commit finishes.
  842. *
  843. * This basically gives us the ext3 style data=ordered mode, and it is mostly
  844. * used to make sure renamed files are fully on disk.
  845. *
  846. * It is a noop if the inode is already fully on disk.
  847. *
  848. * If trans is not null, we'll do a friendly check for a transaction that
  849. * is already flushing things and force the IO down ourselves.
  850. */
  851. int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
  852. struct btrfs_root *root,
  853. struct inode *inode)
  854. {
  855. u64 last_mod;
  856. last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
  857. /*
  858. * if this file hasn't been changed since the last transaction
  859. * commit, we can safely return without doing anything
  860. */
  861. if (last_mod < root->fs_info->last_trans_committed)
  862. return 0;
  863. /*
  864. * the transaction is already committing. Just start the IO and
  865. * don't bother with all of this list nonsense
  866. */
  867. if (trans && root->fs_info->running_transaction->blocked) {
  868. btrfs_wait_ordered_range(inode, 0, (u64)-1);
  869. return 0;
  870. }
  871. spin_lock(&root->fs_info->ordered_extent_lock);
  872. if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
  873. list_add_tail(&BTRFS_I(inode)->ordered_operations,
  874. &root->fs_info->ordered_operations);
  875. }
  876. spin_unlock(&root->fs_info->ordered_extent_lock);
  877. return 0;
  878. }