PageRenderTime 87ms CodeModel.GetById 17ms app.highlight 53ms RepoModel.GetById 0ms app.codeStats 1ms

/fs/btrfs/free-space-cache.c

https://bitbucket.org/slukk/jb-tsm-kernel-4.2
C | 2693 lines | 2027 code | 385 blank | 281 comment | 362 complexity | af944864f119a8810e99a784b98d0beb MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, AGPL-1.0

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * Copyright (C) 2008 Red Hat.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/pagemap.h>
  20#include <linux/sched.h>
  21#include <linux/slab.h>
  22#include <linux/math64.h>
  23#include "ctree.h"
  24#include "free-space-cache.h"
  25#include "transaction.h"
  26#include "disk-io.h"
  27#include "extent_io.h"
  28#include "inode-map.h"
  29
  30#define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
  31#define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
  32
  33static int link_free_space(struct btrfs_free_space_ctl *ctl,
  34			   struct btrfs_free_space *info);
  35
  36static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
  37					       struct btrfs_path *path,
  38					       u64 offset)
  39{
  40	struct btrfs_key key;
  41	struct btrfs_key location;
  42	struct btrfs_disk_key disk_key;
  43	struct btrfs_free_space_header *header;
  44	struct extent_buffer *leaf;
  45	struct inode *inode = NULL;
  46	int ret;
  47
  48	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
  49	key.offset = offset;
  50	key.type = 0;
  51
  52	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  53	if (ret < 0)
  54		return ERR_PTR(ret);
  55	if (ret > 0) {
  56		btrfs_release_path(path);
  57		return ERR_PTR(-ENOENT);
  58	}
  59
  60	leaf = path->nodes[0];
  61	header = btrfs_item_ptr(leaf, path->slots[0],
  62				struct btrfs_free_space_header);
  63	btrfs_free_space_key(leaf, header, &disk_key);
  64	btrfs_disk_key_to_cpu(&location, &disk_key);
  65	btrfs_release_path(path);
  66
  67	inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
  68	if (!inode)
  69		return ERR_PTR(-ENOENT);
  70	if (IS_ERR(inode))
  71		return inode;
  72	if (is_bad_inode(inode)) {
  73		iput(inode);
  74		return ERR_PTR(-ENOENT);
  75	}
  76
  77	inode->i_mapping->flags &= ~__GFP_FS;
  78
  79	return inode;
  80}
  81
  82struct inode *lookup_free_space_inode(struct btrfs_root *root,
  83				      struct btrfs_block_group_cache
  84				      *block_group, struct btrfs_path *path)
  85{
  86	struct inode *inode = NULL;
  87
  88	spin_lock(&block_group->lock);
  89	if (block_group->inode)
  90		inode = igrab(block_group->inode);
  91	spin_unlock(&block_group->lock);
  92	if (inode)
  93		return inode;
  94
  95	inode = __lookup_free_space_inode(root, path,
  96					  block_group->key.objectid);
  97	if (IS_ERR(inode))
  98		return inode;
  99
 100	spin_lock(&block_group->lock);
 101	if (!btrfs_fs_closing(root->fs_info)) {
 102		block_group->inode = igrab(inode);
 103		block_group->iref = 1;
 104	}
 105	spin_unlock(&block_group->lock);
 106
 107	return inode;
 108}
 109
 110int __create_free_space_inode(struct btrfs_root *root,
 111			      struct btrfs_trans_handle *trans,
 112			      struct btrfs_path *path, u64 ino, u64 offset)
 113{
 114	struct btrfs_key key;
 115	struct btrfs_disk_key disk_key;
 116	struct btrfs_free_space_header *header;
 117	struct btrfs_inode_item *inode_item;
 118	struct extent_buffer *leaf;
 119	int ret;
 120
 121	ret = btrfs_insert_empty_inode(trans, root, path, ino);
 122	if (ret)
 123		return ret;
 124
 125	leaf = path->nodes[0];
 126	inode_item = btrfs_item_ptr(leaf, path->slots[0],
 127				    struct btrfs_inode_item);
 128	btrfs_item_key(leaf, &disk_key, path->slots[0]);
 129	memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
 130			     sizeof(*inode_item));
 131	btrfs_set_inode_generation(leaf, inode_item, trans->transid);
 132	btrfs_set_inode_size(leaf, inode_item, 0);
 133	btrfs_set_inode_nbytes(leaf, inode_item, 0);
 134	btrfs_set_inode_uid(leaf, inode_item, 0);
 135	btrfs_set_inode_gid(leaf, inode_item, 0);
 136	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
 137	btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
 138			      BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
 139	btrfs_set_inode_nlink(leaf, inode_item, 1);
 140	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
 141	btrfs_set_inode_block_group(leaf, inode_item, offset);
 142	btrfs_mark_buffer_dirty(leaf);
 143	btrfs_release_path(path);
 144
 145	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
 146	key.offset = offset;
 147	key.type = 0;
 148
 149	ret = btrfs_insert_empty_item(trans, root, path, &key,
 150				      sizeof(struct btrfs_free_space_header));
 151	if (ret < 0) {
 152		btrfs_release_path(path);
 153		return ret;
 154	}
 155	leaf = path->nodes[0];
 156	header = btrfs_item_ptr(leaf, path->slots[0],
 157				struct btrfs_free_space_header);
 158	memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
 159	btrfs_set_free_space_key(leaf, header, &disk_key);
 160	btrfs_mark_buffer_dirty(leaf);
 161	btrfs_release_path(path);
 162
 163	return 0;
 164}
 165
 166int create_free_space_inode(struct btrfs_root *root,
 167			    struct btrfs_trans_handle *trans,
 168			    struct btrfs_block_group_cache *block_group,
 169			    struct btrfs_path *path)
 170{
 171	int ret;
 172	u64 ino;
 173
 174	ret = btrfs_find_free_objectid(root, &ino);
 175	if (ret < 0)
 176		return ret;
 177
 178	return __create_free_space_inode(root, trans, path, ino,
 179					 block_group->key.objectid);
 180}
 181
 182int btrfs_truncate_free_space_cache(struct btrfs_root *root,
 183				    struct btrfs_trans_handle *trans,
 184				    struct btrfs_path *path,
 185				    struct inode *inode)
 186{
 187	loff_t oldsize;
 188	int ret = 0;
 189
 190	trans->block_rsv = root->orphan_block_rsv;
 191	ret = btrfs_block_rsv_check(trans, root,
 192				    root->orphan_block_rsv,
 193				    0, 5);
 194	if (ret)
 195		return ret;
 196
 197	oldsize = i_size_read(inode);
 198	btrfs_i_size_write(inode, 0);
 199	truncate_pagecache(inode, oldsize, 0);
 200
 201	/*
 202	 * We don't need an orphan item because truncating the free space cache
 203	 * will never be split across transactions.
 204	 */
 205	ret = btrfs_truncate_inode_items(trans, root, inode,
 206					 0, BTRFS_EXTENT_DATA_KEY);
 207	if (ret) {
 208		WARN_ON(1);
 209		return ret;
 210	}
 211
 212	ret = btrfs_update_inode(trans, root, inode);
 213	return ret;
 214}
 215
 216static int readahead_cache(struct inode *inode)
 217{
 218	struct file_ra_state *ra;
 219	unsigned long last_index;
 220
 221	ra = kzalloc(sizeof(*ra), GFP_NOFS);
 222	if (!ra)
 223		return -ENOMEM;
 224
 225	file_ra_state_init(ra, inode->i_mapping);
 226	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
 227
 228	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
 229
 230	kfree(ra);
 231
 232	return 0;
 233}
 234
 235int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 236			    struct btrfs_free_space_ctl *ctl,
 237			    struct btrfs_path *path, u64 offset)
 238{
 239	struct btrfs_free_space_header *header;
 240	struct extent_buffer *leaf;
 241	struct page *page;
 242	u32 *checksums = NULL, *crc;
 243	char *disk_crcs = NULL;
 244	struct btrfs_key key;
 245	struct list_head bitmaps;
 246	u64 num_entries;
 247	u64 num_bitmaps;
 248	u64 generation;
 249	u32 cur_crc = ~(u32)0;
 250	pgoff_t index = 0;
 251	unsigned long first_page_offset;
 252	int num_checksums;
 253	int ret = 0;
 254
 255	INIT_LIST_HEAD(&bitmaps);
 256
 257	/* Nothing in the space cache, goodbye */
 258	if (!i_size_read(inode))
 259		goto out;
 260
 261	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
 262	key.offset = offset;
 263	key.type = 0;
 264
 265	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 266	if (ret < 0)
 267		goto out;
 268	else if (ret > 0) {
 269		btrfs_release_path(path);
 270		ret = 0;
 271		goto out;
 272	}
 273
 274	ret = -1;
 275
 276	leaf = path->nodes[0];
 277	header = btrfs_item_ptr(leaf, path->slots[0],
 278				struct btrfs_free_space_header);
 279	num_entries = btrfs_free_space_entries(leaf, header);
 280	num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
 281	generation = btrfs_free_space_generation(leaf, header);
 282	btrfs_release_path(path);
 283
 284	if (BTRFS_I(inode)->generation != generation) {
 285		printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
 286		       " not match free space cache generation (%llu)\n",
 287		       (unsigned long long)BTRFS_I(inode)->generation,
 288		       (unsigned long long)generation);
 289		goto out;
 290	}
 291
 292	if (!num_entries)
 293		goto out;
 294
 295	/* Setup everything for doing checksumming */
 296	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
 297	checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
 298	if (!checksums)
 299		goto out;
 300	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
 301	disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
 302	if (!disk_crcs)
 303		goto out;
 304
 305	ret = readahead_cache(inode);
 306	if (ret)
 307		goto out;
 308
 309	while (1) {
 310		struct btrfs_free_space_entry *entry;
 311		struct btrfs_free_space *e;
 312		void *addr;
 313		unsigned long offset = 0;
 314		unsigned long start_offset = 0;
 315		int need_loop = 0;
 316
 317		if (!num_entries && !num_bitmaps)
 318			break;
 319
 320		if (index == 0) {
 321			start_offset = first_page_offset;
 322			offset = start_offset;
 323		}
 324
 325		page = grab_cache_page(inode->i_mapping, index);
 326		if (!page)
 327			goto free_cache;
 328
 329		if (!PageUptodate(page)) {
 330			btrfs_readpage(NULL, page);
 331			lock_page(page);
 332			if (!PageUptodate(page)) {
 333				unlock_page(page);
 334				page_cache_release(page);
 335				printk(KERN_ERR "btrfs: error reading free "
 336				       "space cache\n");
 337				goto free_cache;
 338			}
 339		}
 340		addr = kmap(page);
 341
 342		if (index == 0) {
 343			u64 *gen;
 344
 345			memcpy(disk_crcs, addr, first_page_offset);
 346			gen = addr + (sizeof(u32) * num_checksums);
 347			if (*gen != BTRFS_I(inode)->generation) {
 348				printk(KERN_ERR "btrfs: space cache generation"
 349				       " (%llu) does not match inode (%llu)\n",
 350				       (unsigned long long)*gen,
 351				       (unsigned long long)
 352				       BTRFS_I(inode)->generation);
 353				kunmap(page);
 354				unlock_page(page);
 355				page_cache_release(page);
 356				goto free_cache;
 357			}
 358			crc = (u32 *)disk_crcs;
 359		}
 360		entry = addr + start_offset;
 361
 362		/* First lets check our crc before we do anything fun */
 363		cur_crc = ~(u32)0;
 364		cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
 365					  PAGE_CACHE_SIZE - start_offset);
 366		btrfs_csum_final(cur_crc, (char *)&cur_crc);
 367		if (cur_crc != *crc) {
 368			printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
 369			       index);
 370			kunmap(page);
 371			unlock_page(page);
 372			page_cache_release(page);
 373			goto free_cache;
 374		}
 375		crc++;
 376
 377		while (1) {
 378			if (!num_entries)
 379				break;
 380
 381			need_loop = 1;
 382			e = kmem_cache_zalloc(btrfs_free_space_cachep,
 383					      GFP_NOFS);
 384			if (!e) {
 385				kunmap(page);
 386				unlock_page(page);
 387				page_cache_release(page);
 388				goto free_cache;
 389			}
 390
 391			e->offset = le64_to_cpu(entry->offset);
 392			e->bytes = le64_to_cpu(entry->bytes);
 393			if (!e->bytes) {
 394				kunmap(page);
 395				kmem_cache_free(btrfs_free_space_cachep, e);
 396				unlock_page(page);
 397				page_cache_release(page);
 398				goto free_cache;
 399			}
 400
 401			if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
 402				spin_lock(&ctl->tree_lock);
 403				ret = link_free_space(ctl, e);
 404				spin_unlock(&ctl->tree_lock);
 405				if (ret) {
 406					printk(KERN_ERR "Duplicate entries in "
 407					       "free space cache, dumping\n");
 408					kunmap(page);
 409					unlock_page(page);
 410					page_cache_release(page);
 411					goto free_cache;
 412				}
 413			} else {
 414				e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
 415				if (!e->bitmap) {
 416					kunmap(page);
 417					kmem_cache_free(
 418						btrfs_free_space_cachep, e);
 419					unlock_page(page);
 420					page_cache_release(page);
 421					goto free_cache;
 422				}
 423				spin_lock(&ctl->tree_lock);
 424				ret = link_free_space(ctl, e);
 425				ctl->total_bitmaps++;
 426				ctl->op->recalc_thresholds(ctl);
 427				spin_unlock(&ctl->tree_lock);
 428				if (ret) {
 429					printk(KERN_ERR "Duplicate entries in "
 430					       "free space cache, dumping\n");
 431					kunmap(page);
 432					unlock_page(page);
 433					page_cache_release(page);
 434					goto free_cache;
 435				}
 436				list_add_tail(&e->list, &bitmaps);
 437			}
 438
 439			num_entries--;
 440			offset += sizeof(struct btrfs_free_space_entry);
 441			if (offset + sizeof(struct btrfs_free_space_entry) >=
 442			    PAGE_CACHE_SIZE)
 443				break;
 444			entry++;
 445		}
 446
 447		/*
 448		 * We read an entry out of this page, we need to move on to the
 449		 * next page.
 450		 */
 451		if (need_loop) {
 452			kunmap(page);
 453			goto next;
 454		}
 455
 456		/*
 457		 * We add the bitmaps at the end of the entries in order that
 458		 * the bitmap entries are added to the cache.
 459		 */
 460		e = list_entry(bitmaps.next, struct btrfs_free_space, list);
 461		list_del_init(&e->list);
 462		memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
 463		kunmap(page);
 464		num_bitmaps--;
 465next:
 466		unlock_page(page);
 467		page_cache_release(page);
 468		index++;
 469	}
 470
 471	ret = 1;
 472out:
 473	kfree(checksums);
 474	kfree(disk_crcs);
 475	return ret;
 476free_cache:
 477	__btrfs_remove_free_space_cache(ctl);
 478	goto out;
 479}
 480
 481int load_free_space_cache(struct btrfs_fs_info *fs_info,
 482			  struct btrfs_block_group_cache *block_group)
 483{
 484	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 485	struct btrfs_root *root = fs_info->tree_root;
 486	struct inode *inode;
 487	struct btrfs_path *path;
 488	int ret;
 489	bool matched;
 490	u64 used = btrfs_block_group_used(&block_group->item);
 491
 492	/*
 493	 * If we're unmounting then just return, since this does a search on the
 494	 * normal root and not the commit root and we could deadlock.
 495	 */
 496	if (btrfs_fs_closing(fs_info))
 497		return 0;
 498
 499	/*
 500	 * If this block group has been marked to be cleared for one reason or
 501	 * another then we can't trust the on disk cache, so just return.
 502	 */
 503	spin_lock(&block_group->lock);
 504	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
 505		spin_unlock(&block_group->lock);
 506		return 0;
 507	}
 508	spin_unlock(&block_group->lock);
 509
 510	path = btrfs_alloc_path();
 511	if (!path)
 512		return 0;
 513
 514	inode = lookup_free_space_inode(root, block_group, path);
 515	if (IS_ERR(inode)) {
 516		btrfs_free_path(path);
 517		return 0;
 518	}
 519
 520	ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
 521				      path, block_group->key.objectid);
 522	btrfs_free_path(path);
 523	if (ret <= 0)
 524		goto out;
 525
 526	spin_lock(&ctl->tree_lock);
 527	matched = (ctl->free_space == (block_group->key.offset - used -
 528				       block_group->bytes_super));
 529	spin_unlock(&ctl->tree_lock);
 530
 531	if (!matched) {
 532		__btrfs_remove_free_space_cache(ctl);
 533		printk(KERN_ERR "block group %llu has an wrong amount of free "
 534		       "space\n", block_group->key.objectid);
 535		ret = -1;
 536	}
 537out:
 538	if (ret < 0) {
 539		/* This cache is bogus, make sure it gets cleared */
 540		spin_lock(&block_group->lock);
 541		block_group->disk_cache_state = BTRFS_DC_CLEAR;
 542		spin_unlock(&block_group->lock);
 543		ret = 0;
 544
 545		printk(KERN_ERR "btrfs: failed to load free space cache "
 546		       "for block group %llu\n", block_group->key.objectid);
 547	}
 548
 549	iput(inode);
 550	return ret;
 551}
 552
 553int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 554			    struct btrfs_free_space_ctl *ctl,
 555			    struct btrfs_block_group_cache *block_group,
 556			    struct btrfs_trans_handle *trans,
 557			    struct btrfs_path *path, u64 offset)
 558{
 559	struct btrfs_free_space_header *header;
 560	struct extent_buffer *leaf;
 561	struct rb_node *node;
 562	struct list_head *pos, *n;
 563	struct page **pages;
 564	struct page *page;
 565	struct extent_state *cached_state = NULL;
 566	struct btrfs_free_cluster *cluster = NULL;
 567	struct extent_io_tree *unpin = NULL;
 568	struct list_head bitmap_list;
 569	struct btrfs_key key;
 570	u64 start, end, len;
 571	u64 bytes = 0;
 572	u32 *crc, *checksums;
 573	unsigned long first_page_offset;
 574	int index = 0, num_pages = 0;
 575	int entries = 0;
 576	int bitmaps = 0;
 577	int ret = -1;
 578	bool next_page = false;
 579	bool out_of_space = false;
 580
 581	INIT_LIST_HEAD(&bitmap_list);
 582
 583	node = rb_first(&ctl->free_space_offset);
 584	if (!node)
 585		return 0;
 586
 587	if (!i_size_read(inode))
 588		return -1;
 589
 590	num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
 591		PAGE_CACHE_SHIFT;
 592
 593	/* Since the first page has all of our checksums and our generation we
 594	 * need to calculate the offset into the page that we can start writing
 595	 * our entries.
 596	 */
 597	first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
 598
 599	filemap_write_and_wait(inode->i_mapping);
 600	btrfs_wait_ordered_range(inode, inode->i_size &
 601				 ~(root->sectorsize - 1), (u64)-1);
 602
 603	/* make sure we don't overflow that first page */
 604	if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
 605		/* this is really the same as running out of space, where we also return 0 */
 606		printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
 607		ret = 0;
 608		goto out_update;
 609	}
 610
 611	/* We need a checksum per page. */
 612	crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
 613	if (!crc)
 614		return -1;
 615
 616	pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
 617	if (!pages) {
 618		kfree(crc);
 619		return -1;
 620	}
 621
 622	/* Get the cluster for this block_group if it exists */
 623	if (block_group && !list_empty(&block_group->cluster_list))
 624		cluster = list_entry(block_group->cluster_list.next,
 625				     struct btrfs_free_cluster,
 626				     block_group_list);
 627
 628	/*
 629	 * We shouldn't have switched the pinned extents yet so this is the
 630	 * right one
 631	 */
 632	unpin = root->fs_info->pinned_extents;
 633
 634	/*
 635	 * Lock all pages first so we can lock the extent safely.
 636	 *
 637	 * NOTE: Because we hold the ref the entire time we're going to write to
 638	 * the page find_get_page should never fail, so we don't do a check
 639	 * after find_get_page at this point.  Just putting this here so people
 640	 * know and don't freak out.
 641	 */
 642	while (index < num_pages) {
 643		page = grab_cache_page(inode->i_mapping, index);
 644		if (!page) {
 645			int i;
 646
 647			for (i = 0; i < num_pages; i++) {
 648				unlock_page(pages[i]);
 649				page_cache_release(pages[i]);
 650			}
 651			goto out_free;
 652		}
 653		pages[index] = page;
 654		index++;
 655	}
 656
 657	index = 0;
 658	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
 659			 0, &cached_state, GFP_NOFS);
 660
 661	/*
 662	 * When searching for pinned extents, we need to start at our start
 663	 * offset.
 664	 */
 665	if (block_group)
 666		start = block_group->key.objectid;
 667
 668	/* Write out the extent entries */
 669	do {
 670		struct btrfs_free_space_entry *entry;
 671		void *addr;
 672		unsigned long offset = 0;
 673		unsigned long start_offset = 0;
 674
 675		next_page = false;
 676
 677		if (index == 0) {
 678			start_offset = first_page_offset;
 679			offset = start_offset;
 680		}
 681
 682		if (index >= num_pages) {
 683			out_of_space = true;
 684			break;
 685		}
 686
 687		page = pages[index];
 688
 689		addr = kmap(page);
 690		entry = addr + start_offset;
 691
 692		memset(addr, 0, PAGE_CACHE_SIZE);
 693		while (node && !next_page) {
 694			struct btrfs_free_space *e;
 695
 696			e = rb_entry(node, struct btrfs_free_space, offset_index);
 697			entries++;
 698
 699			entry->offset = cpu_to_le64(e->offset);
 700			entry->bytes = cpu_to_le64(e->bytes);
 701			if (e->bitmap) {
 702				entry->type = BTRFS_FREE_SPACE_BITMAP;
 703				list_add_tail(&e->list, &bitmap_list);
 704				bitmaps++;
 705			} else {
 706				entry->type = BTRFS_FREE_SPACE_EXTENT;
 707			}
 708			node = rb_next(node);
 709			if (!node && cluster) {
 710				node = rb_first(&cluster->root);
 711				cluster = NULL;
 712			}
 713			offset += sizeof(struct btrfs_free_space_entry);
 714			if (offset + sizeof(struct btrfs_free_space_entry) >=
 715			    PAGE_CACHE_SIZE)
 716				next_page = true;
 717			entry++;
 718		}
 719
 720		/*
 721		 * We want to add any pinned extents to our free space cache
 722		 * so we don't leak the space
 723		 */
 724		while (block_group && !next_page &&
 725		       (start < block_group->key.objectid +
 726			block_group->key.offset)) {
 727			ret = find_first_extent_bit(unpin, start, &start, &end,
 728						    EXTENT_DIRTY);
 729			if (ret) {
 730				ret = 0;
 731				break;
 732			}
 733
 734			/* This pinned extent is out of our range */
 735			if (start >= block_group->key.objectid +
 736			    block_group->key.offset)
 737				break;
 738
 739			len = block_group->key.objectid +
 740				block_group->key.offset - start;
 741			len = min(len, end + 1 - start);
 742
 743			entries++;
 744			entry->offset = cpu_to_le64(start);
 745			entry->bytes = cpu_to_le64(len);
 746			entry->type = BTRFS_FREE_SPACE_EXTENT;
 747
 748			start = end + 1;
 749			offset += sizeof(struct btrfs_free_space_entry);
 750			if (offset + sizeof(struct btrfs_free_space_entry) >=
 751			    PAGE_CACHE_SIZE)
 752				next_page = true;
 753			entry++;
 754		}
 755		*crc = ~(u32)0;
 756		*crc = btrfs_csum_data(root, addr + start_offset, *crc,
 757				       PAGE_CACHE_SIZE - start_offset);
 758		kunmap(page);
 759
 760		btrfs_csum_final(*crc, (char *)crc);
 761		crc++;
 762
 763		bytes += PAGE_CACHE_SIZE;
 764
 765		index++;
 766	} while (node || next_page);
 767
 768	/* Write out the bitmaps */
 769	list_for_each_safe(pos, n, &bitmap_list) {
 770		void *addr;
 771		struct btrfs_free_space *entry =
 772			list_entry(pos, struct btrfs_free_space, list);
 773
 774		if (index >= num_pages) {
 775			out_of_space = true;
 776			break;
 777		}
 778		page = pages[index];
 779
 780		addr = kmap(page);
 781		memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
 782		*crc = ~(u32)0;
 783		*crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
 784		kunmap(page);
 785		btrfs_csum_final(*crc, (char *)crc);
 786		crc++;
 787		bytes += PAGE_CACHE_SIZE;
 788
 789		list_del_init(&entry->list);
 790		index++;
 791	}
 792
 793	if (out_of_space) {
 794		btrfs_drop_pages(pages, num_pages);
 795		unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
 796				     i_size_read(inode) - 1, &cached_state,
 797				     GFP_NOFS);
 798		ret = 0;
 799		goto out_free;
 800	}
 801
 802	/* Zero out the rest of the pages just to make sure */
 803	while (index < num_pages) {
 804		void *addr;
 805
 806		page = pages[index];
 807		addr = kmap(page);
 808		memset(addr, 0, PAGE_CACHE_SIZE);
 809		kunmap(page);
 810		bytes += PAGE_CACHE_SIZE;
 811		index++;
 812	}
 813
 814	/* Write the checksums and trans id to the first page */
 815	{
 816		void *addr;
 817		u64 *gen;
 818
 819		page = pages[0];
 820
 821		addr = kmap(page);
 822		memcpy(addr, checksums, sizeof(u32) * num_pages);
 823		gen = addr + (sizeof(u32) * num_pages);
 824		*gen = trans->transid;
 825		kunmap(page);
 826	}
 827
 828	ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
 829					    bytes, &cached_state);
 830	btrfs_drop_pages(pages, num_pages);
 831	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
 832			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
 833
 834	if (ret) {
 835		ret = 0;
 836		goto out_free;
 837	}
 838
 839	BTRFS_I(inode)->generation = trans->transid;
 840
 841	filemap_write_and_wait(inode->i_mapping);
 842
 843	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
 844	key.offset = offset;
 845	key.type = 0;
 846
 847	ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
 848	if (ret < 0) {
 849		ret = -1;
 850		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
 851				 EXTENT_DIRTY | EXTENT_DELALLOC |
 852				 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
 853		goto out_free;
 854	}
 855	leaf = path->nodes[0];
 856	if (ret > 0) {
 857		struct btrfs_key found_key;
 858		BUG_ON(!path->slots[0]);
 859		path->slots[0]--;
 860		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 861		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
 862		    found_key.offset != offset) {
 863			ret = -1;
 864			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
 865					 EXTENT_DIRTY | EXTENT_DELALLOC |
 866					 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
 867					 GFP_NOFS);
 868			btrfs_release_path(path);
 869			goto out_free;
 870		}
 871	}
 872	header = btrfs_item_ptr(leaf, path->slots[0],
 873				struct btrfs_free_space_header);
 874	btrfs_set_free_space_entries(leaf, header, entries);
 875	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
 876	btrfs_set_free_space_generation(leaf, header, trans->transid);
 877	btrfs_mark_buffer_dirty(leaf);
 878	btrfs_release_path(path);
 879
 880	ret = 1;
 881
 882out_free:
 883	kfree(checksums);
 884	kfree(pages);
 885
 886out_update:
 887	if (ret != 1) {
 888		invalidate_inode_pages2_range(inode->i_mapping, 0, index);
 889		BTRFS_I(inode)->generation = 0;
 890	}
 891	btrfs_update_inode(trans, root, inode);
 892	return ret;
 893}
 894
 895int btrfs_write_out_cache(struct btrfs_root *root,
 896			  struct btrfs_trans_handle *trans,
 897			  struct btrfs_block_group_cache *block_group,
 898			  struct btrfs_path *path)
 899{
 900	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 901	struct inode *inode;
 902	int ret = 0;
 903
 904	root = root->fs_info->tree_root;
 905
 906	spin_lock(&block_group->lock);
 907	if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
 908		spin_unlock(&block_group->lock);
 909		return 0;
 910	}
 911	spin_unlock(&block_group->lock);
 912
 913	inode = lookup_free_space_inode(root, block_group, path);
 914	if (IS_ERR(inode))
 915		return 0;
 916
 917	ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
 918				      path, block_group->key.objectid);
 919	if (ret < 0) {
 920		spin_lock(&block_group->lock);
 921		block_group->disk_cache_state = BTRFS_DC_ERROR;
 922		spin_unlock(&block_group->lock);
 923		ret = 0;
 924
 925		printk(KERN_ERR "btrfs: failed to write free space cace "
 926		       "for block group %llu\n", block_group->key.objectid);
 927	}
 928
 929	iput(inode);
 930	return ret;
 931}
 932
 933static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
 934					  u64 offset)
 935{
 936	BUG_ON(offset < bitmap_start);
 937	offset -= bitmap_start;
 938	return (unsigned long)(div_u64(offset, unit));
 939}
 940
 941static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
 942{
 943	return (unsigned long)(div_u64(bytes, unit));
 944}
 945
 946static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
 947				   u64 offset)
 948{
 949	u64 bitmap_start;
 950	u64 bytes_per_bitmap;
 951
 952	bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
 953	bitmap_start = offset - ctl->start;
 954	bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
 955	bitmap_start *= bytes_per_bitmap;
 956	bitmap_start += ctl->start;
 957
 958	return bitmap_start;
 959}
 960
 961static int tree_insert_offset(struct rb_root *root, u64 offset,
 962			      struct rb_node *node, int bitmap)
 963{
 964	struct rb_node **p = &root->rb_node;
 965	struct rb_node *parent = NULL;
 966	struct btrfs_free_space *info;
 967
 968	while (*p) {
 969		parent = *p;
 970		info = rb_entry(parent, struct btrfs_free_space, offset_index);
 971
 972		if (offset < info->offset) {
 973			p = &(*p)->rb_left;
 974		} else if (offset > info->offset) {
 975			p = &(*p)->rb_right;
 976		} else {
 977			/*
 978			 * we could have a bitmap entry and an extent entry
 979			 * share the same offset.  If this is the case, we want
 980			 * the extent entry to always be found first if we do a
 981			 * linear search through the tree, since we want to have
 982			 * the quickest allocation time, and allocating from an
 983			 * extent is faster than allocating from a bitmap.  So
 984			 * if we're inserting a bitmap and we find an entry at
 985			 * this offset, we want to go right, or after this entry
 986			 * logically.  If we are inserting an extent and we've
 987			 * found a bitmap, we want to go left, or before
 988			 * logically.
 989			 */
 990			if (bitmap) {
 991				if (info->bitmap) {
 992					WARN_ON_ONCE(1);
 993					return -EEXIST;
 994				}
 995				p = &(*p)->rb_right;
 996			} else {
 997				if (!info->bitmap) {
 998					WARN_ON_ONCE(1);
 999					return -EEXIST;
1000				}
1001				p = &(*p)->rb_left;
1002			}
1003		}
1004	}
1005
1006	rb_link_node(node, parent, p);
1007	rb_insert_color(node, root);
1008
1009	return 0;
1010}
1011
1012/*
1013 * searches the tree for the given offset.
1014 *
1015 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1016 * want a section that has at least bytes size and comes at or after the given
1017 * offset.
1018 */
1019static struct btrfs_free_space *
1020tree_search_offset(struct btrfs_free_space_ctl *ctl,
1021		   u64 offset, int bitmap_only, int fuzzy)
1022{
1023	struct rb_node *n = ctl->free_space_offset.rb_node;
1024	struct btrfs_free_space *entry, *prev = NULL;
1025
1026	/* find entry that is closest to the 'offset' */
1027	while (1) {
1028		if (!n) {
1029			entry = NULL;
1030			break;
1031		}
1032
1033		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1034		prev = entry;
1035
1036		if (offset < entry->offset)
1037			n = n->rb_left;
1038		else if (offset > entry->offset)
1039			n = n->rb_right;
1040		else
1041			break;
1042	}
1043
1044	if (bitmap_only) {
1045		if (!entry)
1046			return NULL;
1047		if (entry->bitmap)
1048			return entry;
1049
1050		/*
1051		 * bitmap entry and extent entry may share same offset,
1052		 * in that case, bitmap entry comes after extent entry.
1053		 */
1054		n = rb_next(n);
1055		if (!n)
1056			return NULL;
1057		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1058		if (entry->offset != offset)
1059			return NULL;
1060
1061		WARN_ON(!entry->bitmap);
1062		return entry;
1063	} else if (entry) {
1064		if (entry->bitmap) {
1065			/*
1066			 * if previous extent entry covers the offset,
1067			 * we should return it instead of the bitmap entry
1068			 */
1069			n = &entry->offset_index;
1070			while (1) {
1071				n = rb_prev(n);
1072				if (!n)
1073					break;
1074				prev = rb_entry(n, struct btrfs_free_space,
1075						offset_index);
1076				if (!prev->bitmap) {
1077					if (prev->offset + prev->bytes > offset)
1078						entry = prev;
1079					break;
1080				}
1081			}
1082		}
1083		return entry;
1084	}
1085
1086	if (!prev)
1087		return NULL;
1088
1089	/* find last entry before the 'offset' */
1090	entry = prev;
1091	if (entry->offset > offset) {
1092		n = rb_prev(&entry->offset_index);
1093		if (n) {
1094			entry = rb_entry(n, struct btrfs_free_space,
1095					offset_index);
1096			BUG_ON(entry->offset > offset);
1097		} else {
1098			if (fuzzy)
1099				return entry;
1100			else
1101				return NULL;
1102		}
1103	}
1104
1105	if (entry->bitmap) {
1106		n = &entry->offset_index;
1107		while (1) {
1108			n = rb_prev(n);
1109			if (!n)
1110				break;
1111			prev = rb_entry(n, struct btrfs_free_space,
1112					offset_index);
1113			if (!prev->bitmap) {
1114				if (prev->offset + prev->bytes > offset)
1115					return prev;
1116				break;
1117			}
1118		}
1119		if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1120			return entry;
1121	} else if (entry->offset + entry->bytes > offset)
1122		return entry;
1123
1124	if (!fuzzy)
1125		return NULL;
1126
1127	while (1) {
1128		if (entry->bitmap) {
1129			if (entry->offset + BITS_PER_BITMAP *
1130			    ctl->unit > offset)
1131				break;
1132		} else {
1133			if (entry->offset + entry->bytes > offset)
1134				break;
1135		}
1136
1137		n = rb_next(&entry->offset_index);
1138		if (!n)
1139			return NULL;
1140		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1141	}
1142	return entry;
1143}
1144
1145static inline void
1146__unlink_free_space(struct btrfs_free_space_ctl *ctl,
1147		    struct btrfs_free_space *info)
1148{
1149	rb_erase(&info->offset_index, &ctl->free_space_offset);
1150	ctl->free_extents--;
1151}
1152
1153static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1154			      struct btrfs_free_space *info)
1155{
1156	__unlink_free_space(ctl, info);
1157	ctl->free_space -= info->bytes;
1158}
1159
1160static int link_free_space(struct btrfs_free_space_ctl *ctl,
1161			   struct btrfs_free_space *info)
1162{
1163	int ret = 0;
1164
1165	BUG_ON(!info->bitmap && !info->bytes);
1166	ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1167				 &info->offset_index, (info->bitmap != NULL));
1168	if (ret)
1169		return ret;
1170
1171	ctl->free_space += info->bytes;
1172	ctl->free_extents++;
1173	return ret;
1174}
1175
1176static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1177{
1178	struct btrfs_block_group_cache *block_group = ctl->private;
1179	u64 max_bytes;
1180	u64 bitmap_bytes;
1181	u64 extent_bytes;
1182	u64 size = block_group->key.offset;
1183	u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1184	int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1185
1186	BUG_ON(ctl->total_bitmaps > max_bitmaps);
1187
1188	/*
1189	 * The goal is to keep the total amount of memory used per 1gb of space
1190	 * at or below 32k, so we need to adjust how much memory we allow to be
1191	 * used by extent based free space tracking
1192	 */
1193	if (size < 1024 * 1024 * 1024)
1194		max_bytes = MAX_CACHE_BYTES_PER_GIG;
1195	else
1196		max_bytes = MAX_CACHE_BYTES_PER_GIG *
1197			div64_u64(size, 1024 * 1024 * 1024);
1198
1199	/*
1200	 * we want to account for 1 more bitmap than what we have so we can make
1201	 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1202	 * we add more bitmaps.
1203	 */
1204	bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1205
1206	if (bitmap_bytes >= max_bytes) {
1207		ctl->extents_thresh = 0;
1208		return;
1209	}
1210
1211	/*
1212	 * we want the extent entry threshold to always be at most 1/2 the maxw
1213	 * bytes we can have, or whatever is less than that.
1214	 */
1215	extent_bytes = max_bytes - bitmap_bytes;
1216	extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1217
1218	ctl->extents_thresh =
1219		div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1220}
1221
1222static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1223			      struct btrfs_free_space *info, u64 offset,
1224			      u64 bytes)
1225{
1226	unsigned long start, count;
1227
1228	start = offset_to_bit(info->offset, ctl->unit, offset);
1229	count = bytes_to_bits(bytes, ctl->unit);
1230	BUG_ON(start + count > BITS_PER_BITMAP);
1231
1232	bitmap_clear(info->bitmap, start, count);
1233
1234	info->bytes -= bytes;
1235	ctl->free_space -= bytes;
1236}
1237
1238static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1239			    struct btrfs_free_space *info, u64 offset,
1240			    u64 bytes)
1241{
1242	unsigned long start, count;
1243
1244	start = offset_to_bit(info->offset, ctl->unit, offset);
1245	count = bytes_to_bits(bytes, ctl->unit);
1246	BUG_ON(start + count > BITS_PER_BITMAP);
1247
1248	bitmap_set(info->bitmap, start, count);
1249
1250	info->bytes += bytes;
1251	ctl->free_space += bytes;
1252}
1253
1254static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1255			 struct btrfs_free_space *bitmap_info, u64 *offset,
1256			 u64 *bytes)
1257{
1258	unsigned long found_bits = 0;
1259	unsigned long bits, i;
1260	unsigned long next_zero;
1261
1262	i = offset_to_bit(bitmap_info->offset, ctl->unit,
1263			  max_t(u64, *offset, bitmap_info->offset));
1264	bits = bytes_to_bits(*bytes, ctl->unit);
1265
1266	for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1267	     i < BITS_PER_BITMAP;
1268	     i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1269		next_zero = find_next_zero_bit(bitmap_info->bitmap,
1270					       BITS_PER_BITMAP, i);
1271		if ((next_zero - i) >= bits) {
1272			found_bits = next_zero - i;
1273			break;
1274		}
1275		i = next_zero;
1276	}
1277
1278	if (found_bits) {
1279		*offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1280		*bytes = (u64)(found_bits) * ctl->unit;
1281		return 0;
1282	}
1283
1284	return -1;
1285}
1286
1287static struct btrfs_free_space *
1288find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1289{
1290	struct btrfs_free_space *entry;
1291	struct rb_node *node;
1292	int ret;
1293
1294	if (!ctl->free_space_offset.rb_node)
1295		return NULL;
1296
1297	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1298	if (!entry)
1299		return NULL;
1300
1301	for (node = &entry->offset_index; node; node = rb_next(node)) {
1302		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1303		if (entry->bytes < *bytes)
1304			continue;
1305
1306		if (entry->bitmap) {
1307			ret = search_bitmap(ctl, entry, offset, bytes);
1308			if (!ret)
1309				return entry;
1310			continue;
1311		}
1312
1313		*offset = entry->offset;
1314		*bytes = entry->bytes;
1315		return entry;
1316	}
1317
1318	return NULL;
1319}
1320
1321static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1322			   struct btrfs_free_space *info, u64 offset)
1323{
1324	info->offset = offset_to_bitmap(ctl, offset);
1325	info->bytes = 0;
1326	link_free_space(ctl, info);
1327	ctl->total_bitmaps++;
1328
1329	ctl->op->recalc_thresholds(ctl);
1330}
1331
1332static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1333			struct btrfs_free_space *bitmap_info)
1334{
1335	unlink_free_space(ctl, bitmap_info);
1336	kfree(bitmap_info->bitmap);
1337	kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1338	ctl->total_bitmaps--;
1339	ctl->op->recalc_thresholds(ctl);
1340}
1341
1342static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1343			      struct btrfs_free_space *bitmap_info,
1344			      u64 *offset, u64 *bytes)
1345{
1346	u64 end;
1347	u64 search_start, search_bytes;
1348	int ret;
1349
1350again:
1351	end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1352
1353	/*
1354	 * XXX - this can go away after a few releases.
1355	 *
1356	 * since the only user of btrfs_remove_free_space is the tree logging
1357	 * stuff, and the only way to test that is under crash conditions, we
1358	 * want to have this debug stuff here just in case somethings not
1359	 * working.  Search the bitmap for the space we are trying to use to
1360	 * make sure its actually there.  If its not there then we need to stop
1361	 * because something has gone wrong.
1362	 */
1363	search_start = *offset;
1364	search_bytes = *bytes;
1365	search_bytes = min(search_bytes, end - search_start + 1);
1366	ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1367	BUG_ON(ret < 0 || search_start != *offset);
1368
1369	if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1370		bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1371		*bytes -= end - *offset + 1;
1372		*offset = end + 1;
1373	} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1374		bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1375		*bytes = 0;
1376	}
1377
1378	if (*bytes) {
1379		struct rb_node *next = rb_next(&bitmap_info->offset_index);
1380		if (!bitmap_info->bytes)
1381			free_bitmap(ctl, bitmap_info);
1382
1383		/*
1384		 * no entry after this bitmap, but we still have bytes to
1385		 * remove, so something has gone wrong.
1386		 */
1387		if (!next)
1388			return -EINVAL;
1389
1390		bitmap_info = rb_entry(next, struct btrfs_free_space,
1391				       offset_index);
1392
1393		/*
1394		 * if the next entry isn't a bitmap we need to return to let the
1395		 * extent stuff do its work.
1396		 */
1397		if (!bitmap_info->bitmap)
1398			return -EAGAIN;
1399
1400		/*
1401		 * Ok the next item is a bitmap, but it may not actually hold
1402		 * the information for the rest of this free space stuff, so
1403		 * look for it, and if we don't find it return so we can try
1404		 * everything over again.
1405		 */
1406		search_start = *offset;
1407		search_bytes = *bytes;
1408		ret = search_bitmap(ctl, bitmap_info, &search_start,
1409				    &search_bytes);
1410		if (ret < 0 || search_start != *offset)
1411			return -EAGAIN;
1412
1413		goto again;
1414	} else if (!bitmap_info->bytes)
1415		free_bitmap(ctl, bitmap_info);
1416
1417	return 0;
1418}
1419
1420static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1421			       struct btrfs_free_space *info, u64 offset,
1422			       u64 bytes)
1423{
1424	u64 bytes_to_set = 0;
1425	u64 end;
1426
1427	end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1428
1429	bytes_to_set = min(end - offset, bytes);
1430
1431	bitmap_set_bits(ctl, info, offset, bytes_to_set);
1432
1433	return bytes_to_set;
1434
1435}
1436
1437static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1438		      struct btrfs_free_space *info)
1439{
1440	struct btrfs_block_group_cache *block_group = ctl->private;
1441
1442	/*
1443	 * If we are below the extents threshold then we can add this as an
1444	 * extent, and don't have to deal with the bitmap
1445	 */
1446	if (ctl->free_extents < ctl->extents_thresh) {
1447		/*
1448		 * If this block group has some small extents we don't want to
1449		 * use up all of our free slots in the cache with them, we want
1450		 * to reserve them to larger extents, however if we have plent
1451		 * of cache left then go ahead an dadd them, no sense in adding
1452		 * the overhead of a bitmap if we don't have to.
1453		 */
1454		if (info->bytes <= block_group->sectorsize * 4) {
1455			if (ctl->free_extents * 2 <= ctl->extents_thresh)
1456				return false;
1457		} else {
1458			return false;
1459		}
1460	}
1461
1462	/*
1463	 * some block groups are so tiny they can't be enveloped by a bitmap, so
1464	 * don't even bother to create a bitmap for this
1465	 */
1466	if (BITS_PER_BITMAP * block_group->sectorsize >
1467	    block_group->key.offset)
1468		return false;
1469
1470	return true;
1471}
1472
1473static struct btrfs_free_space_op free_space_op = {
1474	.recalc_thresholds	= recalculate_thresholds,
1475	.use_bitmap		= use_bitmap,
1476};
1477
1478static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1479			      struct btrfs_free_space *info)
1480{
1481	struct btrfs_free_space *bitmap_info;
1482	struct btrfs_block_group_cache *block_group = NULL;
1483	int added = 0;
1484	u64 bytes, offset, bytes_added;
1485	int ret;
1486
1487	bytes = info->bytes;
1488	offset = info->offset;
1489
1490	if (!ctl->op->use_bitmap(ctl, info))
1491		return 0;
1492
1493	if (ctl->op == &free_space_op)
1494		block_group = ctl->private;
1495again:
1496	/*
1497	 * Since we link bitmaps right into the cluster we need to see if we
1498	 * have a cluster here, and if so and it has our bitmap we need to add
1499	 * the free space to that bitmap.
1500	 */
1501	if (block_group && !list_empty(&block_group->cluster_list)) {
1502		struct btrfs_free_cluster *cluster;
1503		struct rb_node *node;
1504		struct btrfs_free_space *entry;
1505
1506		cluster = list_entry(block_group->cluster_list.next,
1507				     struct btrfs_free_cluster,
1508				     block_group_list);
1509		spin_lock(&cluster->lock);
1510		node = rb_first(&cluster->root);
1511		if (!node) {
1512			spin_unlock(&cluster->lock);
1513			goto no_cluster_bitmap;
1514		}
1515
1516		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1517		if (!entry->bitmap) {
1518			spin_unlock(&cluster->lock);
1519			goto no_cluster_bitmap;
1520		}
1521
1522		if (entry->offset == offset_to_bitmap(ctl, offset)) {
1523			bytes_added = add_bytes_to_bitmap(ctl, entry,
1524							  offset, bytes);
1525			bytes -= bytes_added;
1526			offset += bytes_added;
1527		}
1528		spin_unlock(&cluster->lock);
1529		if (!bytes) {
1530			ret = 1;
1531			goto out;
1532		}
1533	}
1534
1535no_cluster_bitmap:
1536	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1537					 1, 0);
1538	if (!bitmap_info) {
1539		BUG_ON(added);
1540		goto new_bitmap;
1541	}
1542
1543	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1544	bytes -= bytes_added;
1545	offset += bytes_added;
1546	added = 0;
1547
1548	if (!bytes) {
1549		ret = 1;
1550		goto out;
1551	} else
1552		goto again;
1553
1554new_bitmap:
1555	if (info && info->bitmap) {
1556		add_new_bitmap(ctl, info, offset);
1557		added = 1;
1558		info = NULL;
1559		goto again;
1560	} else {
1561		spin_unlock(&ctl->tree_lock);
1562
1563		/* no pre-allocated info, allocate a new one */
1564		if (!info) {
1565			info = kmem_cache_zalloc(btrfs_free_space_cachep,
1566						 GFP_NOFS);
1567			if (!info) {
1568				spin_lock(&ctl->tree_lock);
1569				ret = -ENOMEM;
1570				goto out;
1571			}
1572		}
1573
1574		/* allocate the bitmap */
1575		info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1576		spin_lock(&ctl->tree_lock);
1577		if (!info->bitmap) {
1578			ret = -ENOMEM;
1579			goto out;
1580		}
1581		goto again;
1582	}
1583
1584out:
1585	if (info) {
1586		if (info->bitmap)
1587			kfree(info->bitmap);
1588		kmem_cache_free(btrfs_free_space_cachep, info);
1589	}
1590
1591	return ret;
1592}
1593
1594static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1595			  struct btrfs_free_space *info, bool update_stat)
1596{
1597	struct btrfs_free_space *left_info;
1598	struct btrfs_free_space *right_info;
1599	bool merged = false;
1600	u64 offset = info->offset;
1601	u64 bytes = info->bytes;
1602
1603	/*
1604	 * first we want to see if there is free space adjacent to the range we
1605	 * are adding, if there is remove that struct and add a new one to
1606	 * cover the entire range
1607	 */
1608	right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1609	if (right_info && rb_prev(&right_info->offset_index))
1610		left_info = rb_entry(rb_prev(&right_info->offset_index),
1611				     struct btrfs_free_space, offset_index);
1612	else
1613		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1614
1615	if (right_info && !right_info->bitmap) {
1616		if (update_stat)
1617			unlink_free_space(ctl, right_info);
1618		else
1619			__unlink_free_space(ctl, right_info);
1620		info->bytes += right_info->bytes;
1621		kmem_cache_free(btrfs_free_space_cachep, right_info);
1622		merged = true;
1623	}
1624
1625	if (left_info && !left_info->bitmap &&
1626	    left_info->offset + left_info->bytes == offset) {
1627		if (update_stat)
1628			unlink_free_space(ctl, left_info);
1629		else
1630			__unlink_free_space(ctl, left_info);
1631		info->offset = left_info->offset;
1632		info->bytes += left_info->bytes;
1633		kmem_cache_free(btrfs_free_space_cachep, left_info);
1634		merged = true;
1635	}
1636
1637	return merged;
1638}
1639
1640int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1641			   u64 offset, u64 bytes)
1642{
1643	struct btrfs_free_space *info;
1644	int ret = 0;
1645
1646	info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1647	if (!info)
1648		return -ENOMEM;
1649
1650	info->offset = offset;
1651	info->bytes = bytes;
1652
1653	spin_lock(&ctl->tree_lock);
1654
1655	if (try_merge_free_space(ctl, info, true))
1656		goto link;
1657
1658	/*
1659	 * There was no extent directly to the left or right of this new
1660	 * extent then we know we're going to have to allocate a new extent, so
1661	 * before we do that see if we need to drop this into a bitmap
1662	 */
1663	ret = insert_into_bitmap(ctl, info);
1664	if (ret < 0) {
1665		goto out;
1666	} else if (ret) {
1667		ret = 0;
1668		goto out;
1669	}
1670link:
1671	ret = link_free_space(ctl, info);
1672	if (ret)
1673		kmem_cache_free(btrfs_free_space_cachep, info);
1674out:
1675	spin_unlock(&ctl->tree_lock);
1676
1677	if (ret) {
1678		printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1679		BUG_ON(ret == -EEXIST);
1680	}
1681
1682	return ret;
1683}
1684
1685int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1686			    u64 offset, u64 bytes)
1687{
1688	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1689	struct btrfs_free_space *info;
1690	struct btrfs_free_space *next_info = NULL;
1691	int ret = 0;
1692
1693	spin_lock(&ctl->tree_lock);
1694
1695again:
1696	info = tree_search_offset(ctl, offset, 0, 0);
1697	if (!info) {
1698		/*
1699		 * oops didn't find an extent that matched the space we wanted
1700		 * to remove, look for a bitmap instead
1701		 */
1702		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1703					  1, 0);
1704		if (!info) {
1705			WARN_ON(1);
1706			goto out_lock;
1707		}
1708	}
1709
1710	if (info->bytes < bytes && rb_next(&info->offset_index)) {
1711		u64 end;
1712		next_info = rb_entry(rb_next(&info->offset_index),
1713					     struct btrfs_free_space,
1714					     offset_index);
1715
1716		if (next_info->bitmap)
1717			end = next_info->offset +
1718			      BITS_PER_BITMAP * ctl->unit - 1;
1719		else
1720			end = next_info->offset + next_info->bytes;
1721
1722		if (next_info->bytes < bytes ||
1723		    next_info->offset > offset || offset > end) {
1724			printk(KERN_CRIT "Found free space at %llu, size %llu,"
1725			      " trying to use %llu\n",
1726			      (unsigned long long)info->offset,
1727			      (unsigned long long)info->bytes,
1728			      (unsigned long long)bytes);
1729			WARN_ON(1);
1730			ret = -EINVAL;
1731			goto out_lock;
1732		}
1733
1734		info = next_info;
1735	}
1736
1737	if (info->bytes == bytes) {
1738		unlink_free_space(ctl, info);
1739		if (info->bitmap) {
1740			kfree(info->bitmap);
1741			ctl->total_bitmaps--;
1742		}
1743		kmem_cache_free(btrfs_free_space_cachep, info);
1744		goto out_lock;
1745	}
1746
1747	if (!info->bitmap && info->offset == offset) {
1748		unlink_free_space(ctl, info);
1749		info->offset += bytes;
1750		info->bytes -= bytes;
1751		link_free_space(ctl, info);
1752		goto out_lock;
1753	}
1754
1755	if (!info->bitmap && info->offset <= offset &&
1756	    info->offset + info->bytes >= offset + bytes) {
1757		u64 old_start = info->offset;
1758		/*
1759		 * we're freeing space in the middle of the info,
1760		 * this can happen during tree log replay
1761		 *
1762		 * first unlink the old info and then
1763		 * insert it again after the hole we're creating
1764		 */
1765		unlink_free_space(ctl, info);
1766		if (offset + bytes < info->offset + info->bytes) {
1767			u64 old_end = info->offset + info->bytes;
1768
1769			info->offset = offset + bytes;
1770			info->bytes = old_end - info->offset;
1771			ret = link_free_space(ctl, info);
1772			WARN_ON(ret);
1773			if (ret)
1774				goto out_lock;
1775		} else {
1776			/* the hole we're creating ends at the end
1777			 * of the info struct, just free the info
1778			 */
1779			kmem_cache_free(btrfs_free_space_cachep, info);
1780		}
1781		spin_unlock(&ctl->tree_lock);
1782
1783		/* step two, insert a new info struct to cover
1784		 * anything before the hole
1785		 */
1786		ret = btrfs_add_free_space(block_group, old_start,
1787					   offset - old_start);
1788		WARN_ON(ret);
1789		goto out;
1790	}
1791
1792	ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1793	if (ret == -EAGAIN)
1794		goto again;
1795	BUG_ON(ret);
1796out_lock:
1797	spin_unlock(&ctl->tree_lock);
1798out:
1799	return ret;
1800}
1801
1802void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1803			   u64 bytes)
1804{
1805	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1806	struct btrfs_free_space *info;
1807	struct rb_node *n;
1808	int count = 0;
1809
1810	for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1811		info = rb_entry(n, struct btrfs_free_space, offset_index);
1812		if (info->bytes >= bytes)
1813			count++;
1814		printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1815		       (unsigned long long)info->offset,
1816		       (unsigned long long)info->bytes,
1817		       (info->bitmap) ? "yes" : "no");
1818	}
1819	printk(KERN_INFO "block group has cluster?: %s\n",
1820	       list_empty(&block_group->cluster_list) ? "no" : "yes");
1821	printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1822	       "\n", count);
1823}
1824
1825void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1826{
1827	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1828
1829	spin_lock_init(&ctl->tree_lock);
1830	ctl->unit = block_group->sectorsize;
1831	ctl->start = block_group->key.objectid;
1832	ctl->private = block_group;
1833	ctl->op = &free_space_op;
1834
1835	/*
1836	 * we only want to have 32k of ram per block group for keeping
1837	 * track of free space, and if we pass 1/2 of that we want to
1838	 * start converting things over to using bitmaps
1839	 */
1840	ctl->extents_thresh = ((1024 * 32) / 2) /
1841				sizeof(struct btrfs_free_space);
1842}
1843
1844/*
1845 * for a given cluster, put all of its extents back into the free
1846 * space cache.  If the block group passed doesn't match the block group
1847 * pointed to by the cluster, someone else raced in and freed the
1848 * cluster already.  In that case, we just return without changing anything
1849 */
1850static int
1851__btrfs_return_cluster_to_free_space(
1852			     struct btrfs_block_group_cache *block_group,
1853			     struct btrfs_free_cluster *cluster)
1854{
1855	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1856	struct btrfs_free_space *entry;
1857	struct rb_node *node;
1858
1859	spin_lock(&cluster->lock);
1860	if (cluster->block_group != block_group)
1861		goto out;
1862
1863	cluster->block_group = NULL;
1864	cluster->window_start = 0;
1865	list_del_init(&cluster->block_group_list);
1866
1867	node = rb_first(&cluster->root);
1868	while (node) {
1869		bool bitmap;
1870
1871		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1872		node = rb_next(&entry->offset_index);
1873		rb_erase(&entry->offset_index, &cluster->root);
1874
1875		bitmap = (entry->bitmap != NULL);
1876		if (!bitmap)
1877			try_merge_free_space(ctl, entry, false);
1878		tree_insert_offset(&ctl->free_space_offset,
1879				   entry->offset, &entry->offset_index, bitmap);
1880	}
1881	cluster->root = RB_ROOT;
1882
1883out:
1884	spin_unlock(&cluster->lock);
1885	btrfs_put_block_group(block_group);
1886	return 0;
1887}
1888
1889void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
1890{
1891	struct btrfs_free_space *info;
1892	struct rb_node *node;
1893
1894	while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1895		info = rb_entry(node, struct btrfs_free_space, offset_index);
1896		if (!info->bitmap) {
1897			unlink_free_space(ctl, info);
1898			kmem_cache_free(btrfs_free_space_cachep, info);
1899		} else {
1900			free_bitmap(ctl, info);
1901		}
1902		if (need_resched()) {
1903			spin_unlock(&ctl->tree_lock);
1904			cond_resched();
1905			spin_lock(&ctl->tree_lock);
1906		}
1907	}
1908}
1909
1910void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1911{
1912	spin_lock(&ctl->tree_lock);
1913	__btrfs_remove_free_space_cache_locked(ctl);
1914	spin_unlock(&ctl->tree_lock);
1915}
1916
1917void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1918{
1919	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1920	struct btrfs_free_cluster *cluster;
1921	struct list_head *head;
1922
1923	spin_lock(&ctl->tree_lock);
1924	while ((head = block_group->cluster_list.next) !=
1925	       &block_group->cluster_list) {
1926		cluster = list_entry(head, struct btrfs_free_cluster,
1927				     block_group_list);
1928
1929		WARN_ON(cluster->block_group != block_group);
1930		__btrfs_return_cluster_to_free_space(block_group, cluster);
1931		if (need_resched()) {
1932			spin_unlock(&ctl->tree_lock);
1933			cond_resched();
1934			spin_lock(&ctl->tree_lock);
1935		}
1936	}
1937	__btrfs_remove_free_space_cache_locked(ctl);
1938	spin_unlock(&ctl->tree_lock);
1939
1940}
1941
1942u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1943			       u64 offset, u64 bytes, u64 empty_size)
1944{
1945	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1946	struct btrfs_free_space *entry = NULL;
1947	u64 bytes_search = bytes + empty_size;
1948	u64 ret = 0;
1949
1950	spin_lock(&ctl->tree_lock);
1951	entry = find_free_space(ctl, &offset, &bytes_search);
1952	if (!entry)
1953		goto out;
1954
1955	ret = offset;
1956	if (entry->bitmap) {
1957		bitmap_clear_bits(ctl, entry, offset, bytes);
1958		if (!entry->bytes)
1959			free_bitmap(ctl, entry);
1960	} else {
1961		unlink_free_space(ctl, entry);
1962		entry->offset += bytes;
1963		entry->bytes -= bytes;
1964		if (!entry->bytes)
1965			kmem_cache_free(btrfs_free_space_cachep, entry);
1966		else
1967			link_free_space(ctl, entry);
1968	}
1969
1970out:
1971	spin_unlock(&ctl->tree_lock);
1972
1973	return ret;
1974}
1975
1976/*
1977 * given a clu…

Large files files are truncated, but you can click here to view the full file