PageRenderTime 84ms CodeModel.GetById 8ms app.highlight 64ms RepoModel.GetById 0ms app.codeStats 1ms

/fs/btrfs/volumes.c

https://bitbucket.org/slukk/jb-tsm-kernel-4.2
C | 3718 lines | 2949 code | 513 blank | 256 comment | 540 complexity | 58b4fc98f53631a2fc9f626b94464381 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, AGPL-1.0

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18#include <linux/sched.h>
  19#include <linux/bio.h>
  20#include <linux/slab.h>
  21#include <linux/buffer_head.h>
  22#include <linux/blkdev.h>
  23#include <linux/random.h>
  24#include <linux/iocontext.h>
  25#include <linux/capability.h>
  26#include <asm/div64.h>
  27#include "compat.h"
  28#include "ctree.h"
  29#include "extent_map.h"
  30#include "disk-io.h"
  31#include "transaction.h"
  32#include "print-tree.h"
  33#include "volumes.h"
  34#include "async-thread.h"
  35
  36static int init_first_rw_device(struct btrfs_trans_handle *trans,
  37				struct btrfs_root *root,
  38				struct btrfs_device *device);
  39static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
  40
  41static DEFINE_MUTEX(uuid_mutex);
  42static LIST_HEAD(fs_uuids);
  43
  44static void lock_chunks(struct btrfs_root *root)
  45{
  46	mutex_lock(&root->fs_info->chunk_mutex);
  47}
  48
  49static void unlock_chunks(struct btrfs_root *root)
  50{
  51	mutex_unlock(&root->fs_info->chunk_mutex);
  52}
  53
  54static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
  55{
  56	struct btrfs_device *device;
  57	WARN_ON(fs_devices->opened);
  58	while (!list_empty(&fs_devices->devices)) {
  59		device = list_entry(fs_devices->devices.next,
  60				    struct btrfs_device, dev_list);
  61		list_del(&device->dev_list);
  62		kfree(device->name);
  63		kfree(device);
  64	}
  65	kfree(fs_devices);
  66}
  67
  68int btrfs_cleanup_fs_uuids(void)
  69{
  70	struct btrfs_fs_devices *fs_devices;
  71
  72	while (!list_empty(&fs_uuids)) {
  73		fs_devices = list_entry(fs_uuids.next,
  74					struct btrfs_fs_devices, list);
  75		list_del(&fs_devices->list);
  76		free_fs_devices(fs_devices);
  77	}
  78	return 0;
  79}
  80
  81static noinline struct btrfs_device *__find_device(struct list_head *head,
  82						   u64 devid, u8 *uuid)
  83{
  84	struct btrfs_device *dev;
  85
  86	list_for_each_entry(dev, head, dev_list) {
  87		if (dev->devid == devid &&
  88		    (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
  89			return dev;
  90		}
  91	}
  92	return NULL;
  93}
  94
  95static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
  96{
  97	struct btrfs_fs_devices *fs_devices;
  98
  99	list_for_each_entry(fs_devices, &fs_uuids, list) {
 100		if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
 101			return fs_devices;
 102	}
 103	return NULL;
 104}
 105
 106static void requeue_list(struct btrfs_pending_bios *pending_bios,
 107			struct bio *head, struct bio *tail)
 108{
 109
 110	struct bio *old_head;
 111
 112	old_head = pending_bios->head;
 113	pending_bios->head = head;
 114	if (pending_bios->tail)
 115		tail->bi_next = old_head;
 116	else
 117		pending_bios->tail = tail;
 118}
 119
 120/*
 121 * we try to collect pending bios for a device so we don't get a large
 122 * number of procs sending bios down to the same device.  This greatly
 123 * improves the schedulers ability to collect and merge the bios.
 124 *
 125 * But, it also turns into a long list of bios to process and that is sure
 126 * to eventually make the worker thread block.  The solution here is to
 127 * make some progress and then put this work struct back at the end of
 128 * the list if the block device is congested.  This way, multiple devices
 129 * can make progress from a single worker thread.
 130 */
 131static noinline int run_scheduled_bios(struct btrfs_device *device)
 132{
 133	struct bio *pending;
 134	struct backing_dev_info *bdi;
 135	struct btrfs_fs_info *fs_info;
 136	struct btrfs_pending_bios *pending_bios;
 137	struct bio *tail;
 138	struct bio *cur;
 139	int again = 0;
 140	unsigned long num_run;
 141	unsigned long batch_run = 0;
 142	unsigned long limit;
 143	unsigned long last_waited = 0;
 144	int force_reg = 0;
 145	struct blk_plug plug;
 146
 147	/*
 148	 * this function runs all the bios we've collected for
 149	 * a particular device.  We don't want to wander off to
 150	 * another device without first sending all of these down.
 151	 * So, setup a plug here and finish it off before we return
 152	 */
 153	blk_start_plug(&plug);
 154
 155	bdi = blk_get_backing_dev_info(device->bdev);
 156	fs_info = device->dev_root->fs_info;
 157	limit = btrfs_async_submit_limit(fs_info);
 158	limit = limit * 2 / 3;
 159
 160loop:
 161	spin_lock(&device->io_lock);
 162
 163loop_lock:
 164	num_run = 0;
 165
 166	/* take all the bios off the list at once and process them
 167	 * later on (without the lock held).  But, remember the
 168	 * tail and other pointers so the bios can be properly reinserted
 169	 * into the list if we hit congestion
 170	 */
 171	if (!force_reg && device->pending_sync_bios.head) {
 172		pending_bios = &device->pending_sync_bios;
 173		force_reg = 1;
 174	} else {
 175		pending_bios = &device->pending_bios;
 176		force_reg = 0;
 177	}
 178
 179	pending = pending_bios->head;
 180	tail = pending_bios->tail;
 181	WARN_ON(pending && !tail);
 182
 183	/*
 184	 * if pending was null this time around, no bios need processing
 185	 * at all and we can stop.  Otherwise it'll loop back up again
 186	 * and do an additional check so no bios are missed.
 187	 *
 188	 * device->running_pending is used to synchronize with the
 189	 * schedule_bio code.
 190	 */
 191	if (device->pending_sync_bios.head == NULL &&
 192	    device->pending_bios.head == NULL) {
 193		again = 0;
 194		device->running_pending = 0;
 195	} else {
 196		again = 1;
 197		device->running_pending = 1;
 198	}
 199
 200	pending_bios->head = NULL;
 201	pending_bios->tail = NULL;
 202
 203	spin_unlock(&device->io_lock);
 204
 205	while (pending) {
 206
 207		rmb();
 208		/* we want to work on both lists, but do more bios on the
 209		 * sync list than the regular list
 210		 */
 211		if ((num_run > 32 &&
 212		    pending_bios != &device->pending_sync_bios &&
 213		    device->pending_sync_bios.head) ||
 214		   (num_run > 64 && pending_bios == &device->pending_sync_bios &&
 215		    device->pending_bios.head)) {
 216			spin_lock(&device->io_lock);
 217			requeue_list(pending_bios, pending, tail);
 218			goto loop_lock;
 219		}
 220
 221		cur = pending;
 222		pending = pending->bi_next;
 223		cur->bi_next = NULL;
 224		atomic_dec(&fs_info->nr_async_bios);
 225
 226		if (atomic_read(&fs_info->nr_async_bios) < limit &&
 227		    waitqueue_active(&fs_info->async_submit_wait))
 228			wake_up(&fs_info->async_submit_wait);
 229
 230		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 231
 232		submit_bio(cur->bi_rw, cur);
 233		num_run++;
 234		batch_run++;
 235		if (need_resched())
 236			cond_resched();
 237
 238		/*
 239		 * we made progress, there is more work to do and the bdi
 240		 * is now congested.  Back off and let other work structs
 241		 * run instead
 242		 */
 243		if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
 244		    fs_info->fs_devices->open_devices > 1) {
 245			struct io_context *ioc;
 246
 247			ioc = current->io_context;
 248
 249			/*
 250			 * the main goal here is that we don't want to
 251			 * block if we're going to be able to submit
 252			 * more requests without blocking.
 253			 *
 254			 * This code does two great things, it pokes into
 255			 * the elevator code from a filesystem _and_
 256			 * it makes assumptions about how batching works.
 257			 */
 258			if (ioc && ioc->nr_batch_requests > 0 &&
 259			    time_before(jiffies, ioc->last_waited + HZ/50UL) &&
 260			    (last_waited == 0 ||
 261			     ioc->last_waited == last_waited)) {
 262				/*
 263				 * we want to go through our batch of
 264				 * requests and stop.  So, we copy out
 265				 * the ioc->last_waited time and test
 266				 * against it before looping
 267				 */
 268				last_waited = ioc->last_waited;
 269				if (need_resched())
 270					cond_resched();
 271				continue;
 272			}
 273			spin_lock(&device->io_lock);
 274			requeue_list(pending_bios, pending, tail);
 275			device->running_pending = 1;
 276
 277			spin_unlock(&device->io_lock);
 278			btrfs_requeue_work(&device->work);
 279			goto done;
 280		}
 281	}
 282
 283	cond_resched();
 284	if (again)
 285		goto loop;
 286
 287	spin_lock(&device->io_lock);
 288	if (device->pending_bios.head || device->pending_sync_bios.head)
 289		goto loop_lock;
 290	spin_unlock(&device->io_lock);
 291
 292done:
 293	blk_finish_plug(&plug);
 294	return 0;
 295}
 296
 297static void pending_bios_fn(struct btrfs_work *work)
 298{
 299	struct btrfs_device *device;
 300
 301	device = container_of(work, struct btrfs_device, work);
 302	run_scheduled_bios(device);
 303}
 304
 305static noinline int device_list_add(const char *path,
 306			   struct btrfs_super_block *disk_super,
 307			   u64 devid, struct btrfs_fs_devices **fs_devices_ret)
 308{
 309	struct btrfs_device *device;
 310	struct btrfs_fs_devices *fs_devices;
 311	u64 found_transid = btrfs_super_generation(disk_super);
 312	char *name;
 313
 314	fs_devices = find_fsid(disk_super->fsid);
 315	if (!fs_devices) {
 316		fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
 317		if (!fs_devices)
 318			return -ENOMEM;
 319		INIT_LIST_HEAD(&fs_devices->devices);
 320		INIT_LIST_HEAD(&fs_devices->alloc_list);
 321		list_add(&fs_devices->list, &fs_uuids);
 322		memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
 323		fs_devices->latest_devid = devid;
 324		fs_devices->latest_trans = found_transid;
 325		mutex_init(&fs_devices->device_list_mutex);
 326		device = NULL;
 327	} else {
 328		device = __find_device(&fs_devices->devices, devid,
 329				       disk_super->dev_item.uuid);
 330	}
 331	if (!device) {
 332		if (fs_devices->opened)
 333			return -EBUSY;
 334
 335		device = kzalloc(sizeof(*device), GFP_NOFS);
 336		if (!device) {
 337			/* we can safely leave the fs_devices entry around */
 338			return -ENOMEM;
 339		}
 340		device->devid = devid;
 341		device->work.func = pending_bios_fn;
 342		memcpy(device->uuid, disk_super->dev_item.uuid,
 343		       BTRFS_UUID_SIZE);
 344		spin_lock_init(&device->io_lock);
 345		device->name = kstrdup(path, GFP_NOFS);
 346		if (!device->name) {
 347			kfree(device);
 348			return -ENOMEM;
 349		}
 350		INIT_LIST_HEAD(&device->dev_alloc_list);
 351
 352		mutex_lock(&fs_devices->device_list_mutex);
 353		list_add_rcu(&device->dev_list, &fs_devices->devices);
 354		mutex_unlock(&fs_devices->device_list_mutex);
 355
 356		device->fs_devices = fs_devices;
 357		fs_devices->num_devices++;
 358	} else if (!device->name || strcmp(device->name, path)) {
 359		name = kstrdup(path, GFP_NOFS);
 360		if (!name)
 361			return -ENOMEM;
 362		kfree(device->name);
 363		device->name = name;
 364		if (device->missing) {
 365			fs_devices->missing_devices--;
 366			device->missing = 0;
 367		}
 368	}
 369
 370	if (found_transid > fs_devices->latest_trans) {
 371		fs_devices->latest_devid = devid;
 372		fs_devices->latest_trans = found_transid;
 373	}
 374	*fs_devices_ret = fs_devices;
 375	return 0;
 376}
 377
 378static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
 379{
 380	struct btrfs_fs_devices *fs_devices;
 381	struct btrfs_device *device;
 382	struct btrfs_device *orig_dev;
 383
 384	fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
 385	if (!fs_devices)
 386		return ERR_PTR(-ENOMEM);
 387
 388	INIT_LIST_HEAD(&fs_devices->devices);
 389	INIT_LIST_HEAD(&fs_devices->alloc_list);
 390	INIT_LIST_HEAD(&fs_devices->list);
 391	mutex_init(&fs_devices->device_list_mutex);
 392	fs_devices->latest_devid = orig->latest_devid;
 393	fs_devices->latest_trans = orig->latest_trans;
 394	memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
 395
 396	/* We have held the volume lock, it is safe to get the devices. */
 397	list_for_each_entry(orig_dev, &orig->devices, dev_list) {
 398		device = kzalloc(sizeof(*device), GFP_NOFS);
 399		if (!device)
 400			goto error;
 401
 402		device->name = kstrdup(orig_dev->name, GFP_NOFS);
 403		if (!device->name) {
 404			kfree(device);
 405			goto error;
 406		}
 407
 408		device->devid = orig_dev->devid;
 409		device->work.func = pending_bios_fn;
 410		memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
 411		spin_lock_init(&device->io_lock);
 412		INIT_LIST_HEAD(&device->dev_list);
 413		INIT_LIST_HEAD(&device->dev_alloc_list);
 414
 415		list_add(&device->dev_list, &fs_devices->devices);
 416		device->fs_devices = fs_devices;
 417		fs_devices->num_devices++;
 418	}
 419	return fs_devices;
 420error:
 421	free_fs_devices(fs_devices);
 422	return ERR_PTR(-ENOMEM);
 423}
 424
 425int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
 426{
 427	struct btrfs_device *device, *next;
 428
 429	mutex_lock(&uuid_mutex);
 430again:
 431	/* This is the initialized path, it is safe to release the devices. */
 432	list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
 433		if (device->in_fs_metadata)
 434			continue;
 435
 436		if (device->bdev) {
 437			blkdev_put(device->bdev, device->mode);
 438			device->bdev = NULL;
 439			fs_devices->open_devices--;
 440		}
 441		if (device->writeable) {
 442			list_del_init(&device->dev_alloc_list);
 443			device->writeable = 0;
 444			fs_devices->rw_devices--;
 445		}
 446		list_del_init(&device->dev_list);
 447		fs_devices->num_devices--;
 448		kfree(device->name);
 449		kfree(device);
 450	}
 451
 452	if (fs_devices->seed) {
 453		fs_devices = fs_devices->seed;
 454		goto again;
 455	}
 456
 457	mutex_unlock(&uuid_mutex);
 458	return 0;
 459}
 460
 461static void __free_device(struct work_struct *work)
 462{
 463	struct btrfs_device *device;
 464
 465	device = container_of(work, struct btrfs_device, rcu_work);
 466
 467	if (device->bdev)
 468		blkdev_put(device->bdev, device->mode);
 469
 470	kfree(device->name);
 471	kfree(device);
 472}
 473
 474static void free_device(struct rcu_head *head)
 475{
 476	struct btrfs_device *device;
 477
 478	device = container_of(head, struct btrfs_device, rcu);
 479
 480	INIT_WORK(&device->rcu_work, __free_device);
 481	schedule_work(&device->rcu_work);
 482}
 483
 484static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 485{
 486	struct btrfs_device *device;
 487
 488	if (--fs_devices->opened > 0)
 489		return 0;
 490
 491	mutex_lock(&fs_devices->device_list_mutex);
 492	list_for_each_entry(device, &fs_devices->devices, dev_list) {
 493		struct btrfs_device *new_device;
 494
 495		if (device->bdev)
 496			fs_devices->open_devices--;
 497
 498		if (device->writeable) {
 499			list_del_init(&device->dev_alloc_list);
 500			fs_devices->rw_devices--;
 501		}
 502
 503		if (device->can_discard)
 504			fs_devices->num_can_discard--;
 505
 506		new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
 507		BUG_ON(!new_device);
 508		memcpy(new_device, device, sizeof(*new_device));
 509		new_device->name = kstrdup(device->name, GFP_NOFS);
 510		BUG_ON(device->name && !new_device->name);
 511		new_device->bdev = NULL;
 512		new_device->writeable = 0;
 513		new_device->in_fs_metadata = 0;
 514		new_device->can_discard = 0;
 515		list_replace_rcu(&device->dev_list, &new_device->dev_list);
 516
 517		call_rcu(&device->rcu, free_device);
 518	}
 519	mutex_unlock(&fs_devices->device_list_mutex);
 520
 521	WARN_ON(fs_devices->open_devices);
 522	WARN_ON(fs_devices->rw_devices);
 523	fs_devices->opened = 0;
 524	fs_devices->seeding = 0;
 525
 526	return 0;
 527}
 528
 529int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 530{
 531	struct btrfs_fs_devices *seed_devices = NULL;
 532	int ret;
 533
 534	mutex_lock(&uuid_mutex);
 535	ret = __btrfs_close_devices(fs_devices);
 536	if (!fs_devices->opened) {
 537		seed_devices = fs_devices->seed;
 538		fs_devices->seed = NULL;
 539	}
 540	mutex_unlock(&uuid_mutex);
 541
 542	while (seed_devices) {
 543		fs_devices = seed_devices;
 544		seed_devices = fs_devices->seed;
 545		__btrfs_close_devices(fs_devices);
 546		free_fs_devices(fs_devices);
 547	}
 548	return ret;
 549}
 550
 551static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 552				fmode_t flags, void *holder)
 553{
 554	struct request_queue *q;
 555	struct block_device *bdev;
 556	struct list_head *head = &fs_devices->devices;
 557	struct btrfs_device *device;
 558	struct block_device *latest_bdev = NULL;
 559	struct buffer_head *bh;
 560	struct btrfs_super_block *disk_super;
 561	u64 latest_devid = 0;
 562	u64 latest_transid = 0;
 563	u64 devid;
 564	int seeding = 1;
 565	int ret = 0;
 566
 567	flags |= FMODE_EXCL;
 568
 569	list_for_each_entry(device, head, dev_list) {
 570		if (device->bdev)
 571			continue;
 572		if (!device->name)
 573			continue;
 574
 575		bdev = blkdev_get_by_path(device->name, flags, holder);
 576		if (IS_ERR(bdev)) {
 577			printk(KERN_INFO "open %s failed\n", device->name);
 578			goto error;
 579		}
 580		set_blocksize(bdev, 4096);
 581
 582		bh = btrfs_read_dev_super(bdev);
 583		if (!bh) {
 584			ret = -EINVAL;
 585			goto error_close;
 586		}
 587
 588		disk_super = (struct btrfs_super_block *)bh->b_data;
 589		devid = btrfs_stack_device_id(&disk_super->dev_item);
 590		if (devid != device->devid)
 591			goto error_brelse;
 592
 593		if (memcmp(device->uuid, disk_super->dev_item.uuid,
 594			   BTRFS_UUID_SIZE))
 595			goto error_brelse;
 596
 597		device->generation = btrfs_super_generation(disk_super);
 598		if (!latest_transid || device->generation > latest_transid) {
 599			latest_devid = devid;
 600			latest_transid = device->generation;
 601			latest_bdev = bdev;
 602		}
 603
 604		if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
 605			device->writeable = 0;
 606		} else {
 607			device->writeable = !bdev_read_only(bdev);
 608			seeding = 0;
 609		}
 610
 611		q = bdev_get_queue(bdev);
 612		if (blk_queue_discard(q)) {
 613			device->can_discard = 1;
 614			fs_devices->num_can_discard++;
 615		}
 616
 617		device->bdev = bdev;
 618		device->in_fs_metadata = 0;
 619		device->mode = flags;
 620
 621		if (!blk_queue_nonrot(bdev_get_queue(bdev)))
 622			fs_devices->rotating = 1;
 623
 624		fs_devices->open_devices++;
 625		if (device->writeable) {
 626			fs_devices->rw_devices++;
 627			list_add(&device->dev_alloc_list,
 628				 &fs_devices->alloc_list);
 629		}
 630		brelse(bh);
 631		continue;
 632
 633error_brelse:
 634		brelse(bh);
 635error_close:
 636		blkdev_put(bdev, flags);
 637error:
 638		continue;
 639	}
 640	if (fs_devices->open_devices == 0) {
 641		ret = -EIO;
 642		goto out;
 643	}
 644	fs_devices->seeding = seeding;
 645	fs_devices->opened = 1;
 646	fs_devices->latest_bdev = latest_bdev;
 647	fs_devices->latest_devid = latest_devid;
 648	fs_devices->latest_trans = latest_transid;
 649	fs_devices->total_rw_bytes = 0;
 650out:
 651	return ret;
 652}
 653
 654int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 655		       fmode_t flags, void *holder)
 656{
 657	int ret;
 658
 659	mutex_lock(&uuid_mutex);
 660	if (fs_devices->opened) {
 661		fs_devices->opened++;
 662		ret = 0;
 663	} else {
 664		ret = __btrfs_open_devices(fs_devices, flags, holder);
 665	}
 666	mutex_unlock(&uuid_mutex);
 667	return ret;
 668}
 669
 670int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 671			  struct btrfs_fs_devices **fs_devices_ret)
 672{
 673	struct btrfs_super_block *disk_super;
 674	struct block_device *bdev;
 675	struct buffer_head *bh;
 676	int ret;
 677	u64 devid;
 678	u64 transid;
 679
 680	mutex_lock(&uuid_mutex);
 681
 682	flags |= FMODE_EXCL;
 683	bdev = blkdev_get_by_path(path, flags, holder);
 684
 685	if (IS_ERR(bdev)) {
 686		ret = PTR_ERR(bdev);
 687		goto error;
 688	}
 689
 690	ret = set_blocksize(bdev, 4096);
 691	if (ret)
 692		goto error_close;
 693	bh = btrfs_read_dev_super(bdev);
 694	if (!bh) {
 695		ret = -EINVAL;
 696		goto error_close;
 697	}
 698	disk_super = (struct btrfs_super_block *)bh->b_data;
 699	devid = btrfs_stack_device_id(&disk_super->dev_item);
 700	transid = btrfs_super_generation(disk_super);
 701	if (disk_super->label[0])
 702		printk(KERN_INFO "device label %s ", disk_super->label);
 703	else
 704		printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
 705	printk(KERN_CONT "devid %llu transid %llu %s\n",
 706	       (unsigned long long)devid, (unsigned long long)transid, path);
 707	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
 708
 709	brelse(bh);
 710error_close:
 711	blkdev_put(bdev, flags);
 712error:
 713	mutex_unlock(&uuid_mutex);
 714	return ret;
 715}
 716
 717/* helper to account the used device space in the range */
 718int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
 719				   u64 end, u64 *length)
 720{
 721	struct btrfs_key key;
 722	struct btrfs_root *root = device->dev_root;
 723	struct btrfs_dev_extent *dev_extent;
 724	struct btrfs_path *path;
 725	u64 extent_end;
 726	int ret;
 727	int slot;
 728	struct extent_buffer *l;
 729
 730	*length = 0;
 731
 732	if (start >= device->total_bytes)
 733		return 0;
 734
 735	path = btrfs_alloc_path();
 736	if (!path)
 737		return -ENOMEM;
 738	path->reada = 2;
 739
 740	key.objectid = device->devid;
 741	key.offset = start;
 742	key.type = BTRFS_DEV_EXTENT_KEY;
 743
 744	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 745	if (ret < 0)
 746		goto out;
 747	if (ret > 0) {
 748		ret = btrfs_previous_item(root, path, key.objectid, key.type);
 749		if (ret < 0)
 750			goto out;
 751	}
 752
 753	while (1) {
 754		l = path->nodes[0];
 755		slot = path->slots[0];
 756		if (slot >= btrfs_header_nritems(l)) {
 757			ret = btrfs_next_leaf(root, path);
 758			if (ret == 0)
 759				continue;
 760			if (ret < 0)
 761				goto out;
 762
 763			break;
 764		}
 765		btrfs_item_key_to_cpu(l, &key, slot);
 766
 767		if (key.objectid < device->devid)
 768			goto next;
 769
 770		if (key.objectid > device->devid)
 771			break;
 772
 773		if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
 774			goto next;
 775
 776		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
 777		extent_end = key.offset + btrfs_dev_extent_length(l,
 778								  dev_extent);
 779		if (key.offset <= start && extent_end > end) {
 780			*length = end - start + 1;
 781			break;
 782		} else if (key.offset <= start && extent_end > start)
 783			*length += extent_end - start;
 784		else if (key.offset > start && extent_end <= end)
 785			*length += extent_end - key.offset;
 786		else if (key.offset > start && key.offset <= end) {
 787			*length += end - key.offset + 1;
 788			break;
 789		} else if (key.offset > end)
 790			break;
 791
 792next:
 793		path->slots[0]++;
 794	}
 795	ret = 0;
 796out:
 797	btrfs_free_path(path);
 798	return ret;
 799}
 800
 801/*
 802 * find_free_dev_extent - find free space in the specified device
 803 * @trans:	transaction handler
 804 * @device:	the device which we search the free space in
 805 * @num_bytes:	the size of the free space that we need
 806 * @start:	store the start of the free space.
 807 * @len:	the size of the free space. that we find, or the size of the max
 808 * 		free space if we don't find suitable free space
 809 *
 810 * this uses a pretty simple search, the expectation is that it is
 811 * called very infrequently and that a given device has a small number
 812 * of extents
 813 *
 814 * @start is used to store the start of the free space if we find. But if we
 815 * don't find suitable free space, it will be used to store the start position
 816 * of the max free space.
 817 *
 818 * @len is used to store the size of the free space that we find.
 819 * But if we don't find suitable free space, it is used to store the size of
 820 * the max free space.
 821 */
 822int find_free_dev_extent(struct btrfs_trans_handle *trans,
 823			 struct btrfs_device *device, u64 num_bytes,
 824			 u64 *start, u64 *len)
 825{
 826	struct btrfs_key key;
 827	struct btrfs_root *root = device->dev_root;
 828	struct btrfs_dev_extent *dev_extent;
 829	struct btrfs_path *path;
 830	u64 hole_size;
 831	u64 max_hole_start;
 832	u64 max_hole_size;
 833	u64 extent_end;
 834	u64 search_start;
 835	u64 search_end = device->total_bytes;
 836	int ret;
 837	int slot;
 838	struct extent_buffer *l;
 839
 840	/* FIXME use last free of some kind */
 841
 842	/* we don't want to overwrite the superblock on the drive,
 843	 * so we make sure to start at an offset of at least 1MB
 844	 */
 845	search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
 846
 847	max_hole_start = search_start;
 848	max_hole_size = 0;
 849
 850	if (search_start >= search_end) {
 851		ret = -ENOSPC;
 852		goto error;
 853	}
 854
 855	path = btrfs_alloc_path();
 856	if (!path) {
 857		ret = -ENOMEM;
 858		goto error;
 859	}
 860	path->reada = 2;
 861
 862	key.objectid = device->devid;
 863	key.offset = search_start;
 864	key.type = BTRFS_DEV_EXTENT_KEY;
 865
 866	ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
 867	if (ret < 0)
 868		goto out;
 869	if (ret > 0) {
 870		ret = btrfs_previous_item(root, path, key.objectid, key.type);
 871		if (ret < 0)
 872			goto out;
 873	}
 874
 875	while (1) {
 876		l = path->nodes[0];
 877		slot = path->slots[0];
 878		if (slot >= btrfs_header_nritems(l)) {
 879			ret = btrfs_next_leaf(root, path);
 880			if (ret == 0)
 881				continue;
 882			if (ret < 0)
 883				goto out;
 884
 885			break;
 886		}
 887		btrfs_item_key_to_cpu(l, &key, slot);
 888
 889		if (key.objectid < device->devid)
 890			goto next;
 891
 892		if (key.objectid > device->devid)
 893			break;
 894
 895		if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
 896			goto next;
 897
 898		if (key.offset > search_start) {
 899			hole_size = key.offset - search_start;
 900
 901			if (hole_size > max_hole_size) {
 902				max_hole_start = search_start;
 903				max_hole_size = hole_size;
 904			}
 905
 906			/*
 907			 * If this free space is greater than which we need,
 908			 * it must be the max free space that we have found
 909			 * until now, so max_hole_start must point to the start
 910			 * of this free space and the length of this free space
 911			 * is stored in max_hole_size. Thus, we return
 912			 * max_hole_start and max_hole_size and go back to the
 913			 * caller.
 914			 */
 915			if (hole_size >= num_bytes) {
 916				ret = 0;
 917				goto out;
 918			}
 919		}
 920
 921		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
 922		extent_end = key.offset + btrfs_dev_extent_length(l,
 923								  dev_extent);
 924		if (extent_end > search_start)
 925			search_start = extent_end;
 926next:
 927		path->slots[0]++;
 928		cond_resched();
 929	}
 930
 931	hole_size = search_end- search_start;
 932	if (hole_size > max_hole_size) {
 933		max_hole_start = search_start;
 934		max_hole_size = hole_size;
 935	}
 936
 937	/* See above. */
 938	if (hole_size < num_bytes)
 939		ret = -ENOSPC;
 940	else
 941		ret = 0;
 942
 943out:
 944	btrfs_free_path(path);
 945error:
 946	*start = max_hole_start;
 947	if (len)
 948		*len = max_hole_size;
 949	return ret;
 950}
 951
 952static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
 953			  struct btrfs_device *device,
 954			  u64 start)
 955{
 956	int ret;
 957	struct btrfs_path *path;
 958	struct btrfs_root *root = device->dev_root;
 959	struct btrfs_key key;
 960	struct btrfs_key found_key;
 961	struct extent_buffer *leaf = NULL;
 962	struct btrfs_dev_extent *extent = NULL;
 963
 964	path = btrfs_alloc_path();
 965	if (!path)
 966		return -ENOMEM;
 967
 968	key.objectid = device->devid;
 969	key.offset = start;
 970	key.type = BTRFS_DEV_EXTENT_KEY;
 971
 972	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 973	if (ret > 0) {
 974		ret = btrfs_previous_item(root, path, key.objectid,
 975					  BTRFS_DEV_EXTENT_KEY);
 976		if (ret)
 977			goto out;
 978		leaf = path->nodes[0];
 979		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 980		extent = btrfs_item_ptr(leaf, path->slots[0],
 981					struct btrfs_dev_extent);
 982		BUG_ON(found_key.offset > start || found_key.offset +
 983		       btrfs_dev_extent_length(leaf, extent) < start);
 984	} else if (ret == 0) {
 985		leaf = path->nodes[0];
 986		extent = btrfs_item_ptr(leaf, path->slots[0],
 987					struct btrfs_dev_extent);
 988	}
 989	BUG_ON(ret);
 990
 991	if (device->bytes_used > 0)
 992		device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
 993	ret = btrfs_del_item(trans, root, path);
 994
 995out:
 996	btrfs_free_path(path);
 997	return ret;
 998}
 999
1000int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1001			   struct btrfs_device *device,
1002			   u64 chunk_tree, u64 chunk_objectid,
1003			   u64 chunk_offset, u64 start, u64 num_bytes)
1004{
1005	int ret;
1006	struct btrfs_path *path;
1007	struct btrfs_root *root = device->dev_root;
1008	struct btrfs_dev_extent *extent;
1009	struct extent_buffer *leaf;
1010	struct btrfs_key key;
1011
1012	WARN_ON(!device->in_fs_metadata);
1013	path = btrfs_alloc_path();
1014	if (!path)
1015		return -ENOMEM;
1016
1017	key.objectid = device->devid;
1018	key.offset = start;
1019	key.type = BTRFS_DEV_EXTENT_KEY;
1020	ret = btrfs_insert_empty_item(trans, root, path, &key,
1021				      sizeof(*extent));
1022	BUG_ON(ret);
1023
1024	leaf = path->nodes[0];
1025	extent = btrfs_item_ptr(leaf, path->slots[0],
1026				struct btrfs_dev_extent);
1027	btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
1028	btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
1029	btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1030
1031	write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1032		    (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
1033		    BTRFS_UUID_SIZE);
1034
1035	btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1036	btrfs_mark_buffer_dirty(leaf);
1037	btrfs_free_path(path);
1038	return ret;
1039}
1040
1041static noinline int find_next_chunk(struct btrfs_root *root,
1042				    u64 objectid, u64 *offset)
1043{
1044	struct btrfs_path *path;
1045	int ret;
1046	struct btrfs_key key;
1047	struct btrfs_chunk *chunk;
1048	struct btrfs_key found_key;
1049
1050	path = btrfs_alloc_path();
1051	BUG_ON(!path);
1052
1053	key.objectid = objectid;
1054	key.offset = (u64)-1;
1055	key.type = BTRFS_CHUNK_ITEM_KEY;
1056
1057	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1058	if (ret < 0)
1059		goto error;
1060
1061	BUG_ON(ret == 0);
1062
1063	ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
1064	if (ret) {
1065		*offset = 0;
1066	} else {
1067		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1068				      path->slots[0]);
1069		if (found_key.objectid != objectid)
1070			*offset = 0;
1071		else {
1072			chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
1073					       struct btrfs_chunk);
1074			*offset = found_key.offset +
1075				btrfs_chunk_length(path->nodes[0], chunk);
1076		}
1077	}
1078	ret = 0;
1079error:
1080	btrfs_free_path(path);
1081	return ret;
1082}
1083
1084static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
1085{
1086	int ret;
1087	struct btrfs_key key;
1088	struct btrfs_key found_key;
1089	struct btrfs_path *path;
1090
1091	root = root->fs_info->chunk_root;
1092
1093	path = btrfs_alloc_path();
1094	if (!path)
1095		return -ENOMEM;
1096
1097	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1098	key.type = BTRFS_DEV_ITEM_KEY;
1099	key.offset = (u64)-1;
1100
1101	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1102	if (ret < 0)
1103		goto error;
1104
1105	BUG_ON(ret == 0);
1106
1107	ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
1108				  BTRFS_DEV_ITEM_KEY);
1109	if (ret) {
1110		*objectid = 1;
1111	} else {
1112		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1113				      path->slots[0]);
1114		*objectid = found_key.offset + 1;
1115	}
1116	ret = 0;
1117error:
1118	btrfs_free_path(path);
1119	return ret;
1120}
1121
1122/*
1123 * the device information is stored in the chunk root
1124 * the btrfs_device struct should be fully filled in
1125 */
1126int btrfs_add_device(struct btrfs_trans_handle *trans,
1127		     struct btrfs_root *root,
1128		     struct btrfs_device *device)
1129{
1130	int ret;
1131	struct btrfs_path *path;
1132	struct btrfs_dev_item *dev_item;
1133	struct extent_buffer *leaf;
1134	struct btrfs_key key;
1135	unsigned long ptr;
1136
1137	root = root->fs_info->chunk_root;
1138
1139	path = btrfs_alloc_path();
1140	if (!path)
1141		return -ENOMEM;
1142
1143	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1144	key.type = BTRFS_DEV_ITEM_KEY;
1145	key.offset = device->devid;
1146
1147	ret = btrfs_insert_empty_item(trans, root, path, &key,
1148				      sizeof(*dev_item));
1149	if (ret)
1150		goto out;
1151
1152	leaf = path->nodes[0];
1153	dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1154
1155	btrfs_set_device_id(leaf, dev_item, device->devid);
1156	btrfs_set_device_generation(leaf, dev_item, 0);
1157	btrfs_set_device_type(leaf, dev_item, device->type);
1158	btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1159	btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1160	btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1161	btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1162	btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1163	btrfs_set_device_group(leaf, dev_item, 0);
1164	btrfs_set_device_seek_speed(leaf, dev_item, 0);
1165	btrfs_set_device_bandwidth(leaf, dev_item, 0);
1166	btrfs_set_device_start_offset(leaf, dev_item, 0);
1167
1168	ptr = (unsigned long)btrfs_device_uuid(dev_item);
1169	write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1170	ptr = (unsigned long)btrfs_device_fsid(dev_item);
1171	write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
1172	btrfs_mark_buffer_dirty(leaf);
1173
1174	ret = 0;
1175out:
1176	btrfs_free_path(path);
1177	return ret;
1178}
1179
1180static int btrfs_rm_dev_item(struct btrfs_root *root,
1181			     struct btrfs_device *device)
1182{
1183	int ret;
1184	struct btrfs_path *path;
1185	struct btrfs_key key;
1186	struct btrfs_trans_handle *trans;
1187
1188	root = root->fs_info->chunk_root;
1189
1190	path = btrfs_alloc_path();
1191	if (!path)
1192		return -ENOMEM;
1193
1194	trans = btrfs_start_transaction(root, 0);
1195	if (IS_ERR(trans)) {
1196		btrfs_free_path(path);
1197		return PTR_ERR(trans);
1198	}
1199	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1200	key.type = BTRFS_DEV_ITEM_KEY;
1201	key.offset = device->devid;
1202	lock_chunks(root);
1203
1204	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1205	if (ret < 0)
1206		goto out;
1207
1208	if (ret > 0) {
1209		ret = -ENOENT;
1210		goto out;
1211	}
1212
1213	ret = btrfs_del_item(trans, root, path);
1214	if (ret)
1215		goto out;
1216out:
1217	btrfs_free_path(path);
1218	unlock_chunks(root);
1219	btrfs_commit_transaction(trans, root);
1220	return ret;
1221}
1222
1223int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1224{
1225	struct btrfs_device *device;
1226	struct btrfs_device *next_device;
1227	struct block_device *bdev;
1228	struct buffer_head *bh = NULL;
1229	struct btrfs_super_block *disk_super;
1230	struct btrfs_fs_devices *cur_devices;
1231	u64 all_avail;
1232	u64 devid;
1233	u64 num_devices;
1234	u8 *dev_uuid;
1235	int ret = 0;
1236	bool clear_super = false;
1237
1238	mutex_lock(&uuid_mutex);
1239	mutex_lock(&root->fs_info->volume_mutex);
1240
1241	all_avail = root->fs_info->avail_data_alloc_bits |
1242		root->fs_info->avail_system_alloc_bits |
1243		root->fs_info->avail_metadata_alloc_bits;
1244
1245	if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
1246	    root->fs_info->fs_devices->num_devices <= 4) {
1247		printk(KERN_ERR "btrfs: unable to go below four devices "
1248		       "on raid10\n");
1249		ret = -EINVAL;
1250		goto out;
1251	}
1252
1253	if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
1254	    root->fs_info->fs_devices->num_devices <= 2) {
1255		printk(KERN_ERR "btrfs: unable to go below two "
1256		       "devices on raid1\n");
1257		ret = -EINVAL;
1258		goto out;
1259	}
1260
1261	if (strcmp(device_path, "missing") == 0) {
1262		struct list_head *devices;
1263		struct btrfs_device *tmp;
1264
1265		device = NULL;
1266		devices = &root->fs_info->fs_devices->devices;
1267		/*
1268		 * It is safe to read the devices since the volume_mutex
1269		 * is held.
1270		 */
1271		list_for_each_entry(tmp, devices, dev_list) {
1272			if (tmp->in_fs_metadata && !tmp->bdev) {
1273				device = tmp;
1274				break;
1275			}
1276		}
1277		bdev = NULL;
1278		bh = NULL;
1279		disk_super = NULL;
1280		if (!device) {
1281			printk(KERN_ERR "btrfs: no missing devices found to "
1282			       "remove\n");
1283			goto out;
1284		}
1285	} else {
1286		bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
1287					  root->fs_info->bdev_holder);
1288		if (IS_ERR(bdev)) {
1289			ret = PTR_ERR(bdev);
1290			goto out;
1291		}
1292
1293		set_blocksize(bdev, 4096);
1294		bh = btrfs_read_dev_super(bdev);
1295		if (!bh) {
1296			ret = -EINVAL;
1297			goto error_close;
1298		}
1299		disk_super = (struct btrfs_super_block *)bh->b_data;
1300		devid = btrfs_stack_device_id(&disk_super->dev_item);
1301		dev_uuid = disk_super->dev_item.uuid;
1302		device = btrfs_find_device(root, devid, dev_uuid,
1303					   disk_super->fsid);
1304		if (!device) {
1305			ret = -ENOENT;
1306			goto error_brelse;
1307		}
1308	}
1309
1310	if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
1311		printk(KERN_ERR "btrfs: unable to remove the only writeable "
1312		       "device\n");
1313		ret = -EINVAL;
1314		goto error_brelse;
1315	}
1316
1317	if (device->writeable) {
1318		lock_chunks(root);
1319		list_del_init(&device->dev_alloc_list);
1320		unlock_chunks(root);
1321		root->fs_info->fs_devices->rw_devices--;
1322		clear_super = true;
1323	}
1324
1325	ret = btrfs_shrink_device(device, 0);
1326	if (ret)
1327		goto error_undo;
1328
1329	ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1330	if (ret)
1331		goto error_undo;
1332
1333	device->in_fs_metadata = 0;
1334	btrfs_scrub_cancel_dev(root, device);
1335
1336	/*
1337	 * the device list mutex makes sure that we don't change
1338	 * the device list while someone else is writing out all
1339	 * the device supers.
1340	 */
1341
1342	cur_devices = device->fs_devices;
1343	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1344	list_del_rcu(&device->dev_list);
1345
1346	device->fs_devices->num_devices--;
1347
1348	if (device->missing)
1349		root->fs_info->fs_devices->missing_devices--;
1350
1351	next_device = list_entry(root->fs_info->fs_devices->devices.next,
1352				 struct btrfs_device, dev_list);
1353	if (device->bdev == root->fs_info->sb->s_bdev)
1354		root->fs_info->sb->s_bdev = next_device->bdev;
1355	if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1356		root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1357
1358	if (device->bdev)
1359		device->fs_devices->open_devices--;
1360
1361	call_rcu(&device->rcu, free_device);
1362	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1363
1364	num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
1365	btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
1366
1367	if (cur_devices->open_devices == 0) {
1368		struct btrfs_fs_devices *fs_devices;
1369		fs_devices = root->fs_info->fs_devices;
1370		while (fs_devices) {
1371			if (fs_devices->seed == cur_devices)
1372				break;
1373			fs_devices = fs_devices->seed;
1374		}
1375		fs_devices->seed = cur_devices->seed;
1376		cur_devices->seed = NULL;
1377		lock_chunks(root);
1378		__btrfs_close_devices(cur_devices);
1379		unlock_chunks(root);
1380		free_fs_devices(cur_devices);
1381	}
1382
1383	/*
1384	 * at this point, the device is zero sized.  We want to
1385	 * remove it from the devices list and zero out the old super
1386	 */
1387	if (clear_super) {
1388		/* make sure this device isn't detected as part of
1389		 * the FS anymore
1390		 */
1391		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
1392		set_buffer_dirty(bh);
1393		sync_dirty_buffer(bh);
1394	}
1395
1396	ret = 0;
1397
1398error_brelse:
1399	brelse(bh);
1400error_close:
1401	if (bdev)
1402		blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
1403out:
1404	mutex_unlock(&root->fs_info->volume_mutex);
1405	mutex_unlock(&uuid_mutex);
1406	return ret;
1407error_undo:
1408	if (device->writeable) {
1409		lock_chunks(root);
1410		list_add(&device->dev_alloc_list,
1411			 &root->fs_info->fs_devices->alloc_list);
1412		unlock_chunks(root);
1413		root->fs_info->fs_devices->rw_devices++;
1414	}
1415	goto error_brelse;
1416}
1417
1418/*
1419 * does all the dirty work required for changing file system's UUID.
1420 */
1421static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1422				struct btrfs_root *root)
1423{
1424	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1425	struct btrfs_fs_devices *old_devices;
1426	struct btrfs_fs_devices *seed_devices;
1427	struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
1428	struct btrfs_device *device;
1429	u64 super_flags;
1430
1431	BUG_ON(!mutex_is_locked(&uuid_mutex));
1432	if (!fs_devices->seeding)
1433		return -EINVAL;
1434
1435	seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
1436	if (!seed_devices)
1437		return -ENOMEM;
1438
1439	old_devices = clone_fs_devices(fs_devices);
1440	if (IS_ERR(old_devices)) {
1441		kfree(seed_devices);
1442		return PTR_ERR(old_devices);
1443	}
1444
1445	list_add(&old_devices->list, &fs_uuids);
1446
1447	memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
1448	seed_devices->opened = 1;
1449	INIT_LIST_HEAD(&seed_devices->devices);
1450	INIT_LIST_HEAD(&seed_devices->alloc_list);
1451	mutex_init(&seed_devices->device_list_mutex);
1452
1453	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1454	list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
1455			      synchronize_rcu);
1456	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1457
1458	list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1459	list_for_each_entry(device, &seed_devices->devices, dev_list) {
1460		device->fs_devices = seed_devices;
1461	}
1462
1463	fs_devices->seeding = 0;
1464	fs_devices->num_devices = 0;
1465	fs_devices->open_devices = 0;
1466	fs_devices->seed = seed_devices;
1467
1468	generate_random_uuid(fs_devices->fsid);
1469	memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1470	memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1471	super_flags = btrfs_super_flags(disk_super) &
1472		      ~BTRFS_SUPER_FLAG_SEEDING;
1473	btrfs_set_super_flags(disk_super, super_flags);
1474
1475	return 0;
1476}
1477
1478/*
1479 * strore the expected generation for seed devices in device items.
1480 */
1481static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
1482			       struct btrfs_root *root)
1483{
1484	struct btrfs_path *path;
1485	struct extent_buffer *leaf;
1486	struct btrfs_dev_item *dev_item;
1487	struct btrfs_device *device;
1488	struct btrfs_key key;
1489	u8 fs_uuid[BTRFS_UUID_SIZE];
1490	u8 dev_uuid[BTRFS_UUID_SIZE];
1491	u64 devid;
1492	int ret;
1493
1494	path = btrfs_alloc_path();
1495	if (!path)
1496		return -ENOMEM;
1497
1498	root = root->fs_info->chunk_root;
1499	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1500	key.offset = 0;
1501	key.type = BTRFS_DEV_ITEM_KEY;
1502
1503	while (1) {
1504		ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1505		if (ret < 0)
1506			goto error;
1507
1508		leaf = path->nodes[0];
1509next_slot:
1510		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1511			ret = btrfs_next_leaf(root, path);
1512			if (ret > 0)
1513				break;
1514			if (ret < 0)
1515				goto error;
1516			leaf = path->nodes[0];
1517			btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1518			btrfs_release_path(path);
1519			continue;
1520		}
1521
1522		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1523		if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
1524		    key.type != BTRFS_DEV_ITEM_KEY)
1525			break;
1526
1527		dev_item = btrfs_item_ptr(leaf, path->slots[0],
1528					  struct btrfs_dev_item);
1529		devid = btrfs_device_id(leaf, dev_item);
1530		read_extent_buffer(leaf, dev_uuid,
1531				   (unsigned long)btrfs_device_uuid(dev_item),
1532				   BTRFS_UUID_SIZE);
1533		read_extent_buffer(leaf, fs_uuid,
1534				   (unsigned long)btrfs_device_fsid(dev_item),
1535				   BTRFS_UUID_SIZE);
1536		device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1537		BUG_ON(!device);
1538
1539		if (device->fs_devices->seeding) {
1540			btrfs_set_device_generation(leaf, dev_item,
1541						    device->generation);
1542			btrfs_mark_buffer_dirty(leaf);
1543		}
1544
1545		path->slots[0]++;
1546		goto next_slot;
1547	}
1548	ret = 0;
1549error:
1550	btrfs_free_path(path);
1551	return ret;
1552}
1553
1554int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1555{
1556	struct request_queue *q;
1557	struct btrfs_trans_handle *trans;
1558	struct btrfs_device *device;
1559	struct block_device *bdev;
1560	struct list_head *devices;
1561	struct super_block *sb = root->fs_info->sb;
1562	u64 total_bytes;
1563	int seeding_dev = 0;
1564	int ret = 0;
1565
1566	if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1567		return -EINVAL;
1568
1569	bdev = blkdev_get_by_path(device_path, FMODE_EXCL,
1570				  root->fs_info->bdev_holder);
1571	if (IS_ERR(bdev))
1572		return PTR_ERR(bdev);
1573
1574	if (root->fs_info->fs_devices->seeding) {
1575		seeding_dev = 1;
1576		down_write(&sb->s_umount);
1577		mutex_lock(&uuid_mutex);
1578	}
1579
1580	filemap_write_and_wait(bdev->bd_inode->i_mapping);
1581	mutex_lock(&root->fs_info->volume_mutex);
1582
1583	devices = &root->fs_info->fs_devices->devices;
1584	/*
1585	 * we have the volume lock, so we don't need the extra
1586	 * device list mutex while reading the list here.
1587	 */
1588	list_for_each_entry(device, devices, dev_list) {
1589		if (device->bdev == bdev) {
1590			ret = -EEXIST;
1591			goto error;
1592		}
1593	}
1594
1595	device = kzalloc(sizeof(*device), GFP_NOFS);
1596	if (!device) {
1597		/* we can safely leave the fs_devices entry around */
1598		ret = -ENOMEM;
1599		goto error;
1600	}
1601
1602	device->name = kstrdup(device_path, GFP_NOFS);
1603	if (!device->name) {
1604		kfree(device);
1605		ret = -ENOMEM;
1606		goto error;
1607	}
1608
1609	ret = find_next_devid(root, &device->devid);
1610	if (ret) {
1611		kfree(device->name);
1612		kfree(device);
1613		goto error;
1614	}
1615
1616	trans = btrfs_start_transaction(root, 0);
1617	if (IS_ERR(trans)) {
1618		kfree(device->name);
1619		kfree(device);
1620		ret = PTR_ERR(trans);
1621		goto error;
1622	}
1623
1624	lock_chunks(root);
1625
1626	q = bdev_get_queue(bdev);
1627	if (blk_queue_discard(q))
1628		device->can_discard = 1;
1629	device->writeable = 1;
1630	device->work.func = pending_bios_fn;
1631	generate_random_uuid(device->uuid);
1632	spin_lock_init(&device->io_lock);
1633	device->generation = trans->transid;
1634	device->io_width = root->sectorsize;
1635	device->io_align = root->sectorsize;
1636	device->sector_size = root->sectorsize;
1637	device->total_bytes = i_size_read(bdev->bd_inode);
1638	device->disk_total_bytes = device->total_bytes;
1639	device->dev_root = root->fs_info->dev_root;
1640	device->bdev = bdev;
1641	device->in_fs_metadata = 1;
1642	device->mode = FMODE_EXCL;
1643	set_blocksize(device->bdev, 4096);
1644
1645	if (seeding_dev) {
1646		sb->s_flags &= ~MS_RDONLY;
1647		ret = btrfs_prepare_sprout(trans, root);
1648		BUG_ON(ret);
1649	}
1650
1651	device->fs_devices = root->fs_info->fs_devices;
1652
1653	/*
1654	 * we don't want write_supers to jump in here with our device
1655	 * half setup
1656	 */
1657	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1658	list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
1659	list_add(&device->dev_alloc_list,
1660		 &root->fs_info->fs_devices->alloc_list);
1661	root->fs_info->fs_devices->num_devices++;
1662	root->fs_info->fs_devices->open_devices++;
1663	root->fs_info->fs_devices->rw_devices++;
1664	if (device->can_discard)
1665		root->fs_info->fs_devices->num_can_discard++;
1666	root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1667
1668	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
1669		root->fs_info->fs_devices->rotating = 1;
1670
1671	total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
1672	btrfs_set_super_total_bytes(&root->fs_info->super_copy,
1673				    total_bytes + device->total_bytes);
1674
1675	total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
1676	btrfs_set_super_num_devices(&root->fs_info->super_copy,
1677				    total_bytes + 1);
1678	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1679
1680	if (seeding_dev) {
1681		ret = init_first_rw_device(trans, root, device);
1682		BUG_ON(ret);
1683		ret = btrfs_finish_sprout(trans, root);
1684		BUG_ON(ret);
1685	} else {
1686		ret = btrfs_add_device(trans, root, device);
1687	}
1688
1689	/*
1690	 * we've got more storage, clear any full flags on the space
1691	 * infos
1692	 */
1693	btrfs_clear_space_info_full(root->fs_info);
1694
1695	unlock_chunks(root);
1696	btrfs_commit_transaction(trans, root);
1697
1698	if (seeding_dev) {
1699		mutex_unlock(&uuid_mutex);
1700		up_write(&sb->s_umount);
1701
1702		ret = btrfs_relocate_sys_chunks(root);
1703		BUG_ON(ret);
1704	}
1705out:
1706	mutex_unlock(&root->fs_info->volume_mutex);
1707	return ret;
1708error:
1709	blkdev_put(bdev, FMODE_EXCL);
1710	if (seeding_dev) {
1711		mutex_unlock(&uuid_mutex);
1712		up_write(&sb->s_umount);
1713	}
1714	goto out;
1715}
1716
1717static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
1718					struct btrfs_device *device)
1719{
1720	int ret;
1721	struct btrfs_path *path;
1722	struct btrfs_root *root;
1723	struct btrfs_dev_item *dev_item;
1724	struct extent_buffer *leaf;
1725	struct btrfs_key key;
1726
1727	root = device->dev_root->fs_info->chunk_root;
1728
1729	path = btrfs_alloc_path();
1730	if (!path)
1731		return -ENOMEM;
1732
1733	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1734	key.type = BTRFS_DEV_ITEM_KEY;
1735	key.offset = device->devid;
1736
1737	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1738	if (ret < 0)
1739		goto out;
1740
1741	if (ret > 0) {
1742		ret = -ENOENT;
1743		goto out;
1744	}
1745
1746	leaf = path->nodes[0];
1747	dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1748
1749	btrfs_set_device_id(leaf, dev_item, device->devid);
1750	btrfs_set_device_type(leaf, dev_item, device->type);
1751	btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1752	btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1753	btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1754	btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
1755	btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1756	btrfs_mark_buffer_dirty(leaf);
1757
1758out:
1759	btrfs_free_path(path);
1760	return ret;
1761}
1762
1763static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
1764		      struct btrfs_device *device, u64 new_size)
1765{
1766	struct btrfs_super_block *super_copy =
1767		&device->dev_root->fs_info->super_copy;
1768	u64 old_total = btrfs_super_total_bytes(super_copy);
1769	u64 diff = new_size - device->total_bytes;
1770
1771	if (!device->writeable)
1772		return -EACCES;
1773	if (new_size <= device->total_bytes)
1774		return -EINVAL;
1775
1776	btrfs_set_super_total_bytes(super_copy, old_total + diff);
1777	device->fs_devices->total_rw_bytes += diff;
1778
1779	device->total_bytes = new_size;
1780	device->disk_total_bytes = new_size;
1781	btrfs_clear_space_info_full(device->dev_root->fs_info);
1782
1783	return btrfs_update_device(trans, device);
1784}
1785
1786int btrfs_grow_device(struct btrfs_trans_handle *trans,
1787		      struct btrfs_device *device, u64 new_size)
1788{
1789	int ret;
1790	lock_chunks(device->dev_root);
1791	ret = __btrfs_grow_device(trans, device, new_size);
1792	unlock_chunks(device->dev_root);
1793	return ret;
1794}
1795
1796static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1797			    struct btrfs_root *root,
1798			    u64 chunk_tree, u64 chunk_objectid,
1799			    u64 chunk_offset)
1800{
1801	int ret;
1802	struct btrfs_path *path;
1803	struct btrfs_key key;
1804
1805	root = root->fs_info->chunk_root;
1806	path = btrfs_alloc_path();
1807	if (!path)
1808		return -ENOMEM;
1809
1810	key.objectid = chunk_objectid;
1811	key.offset = chunk_offset;
1812	key.type = BTRFS_CHUNK_ITEM_KEY;
1813
1814	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1815	BUG_ON(ret);
1816
1817	ret = btrfs_del_item(trans, root, path);
1818
1819	btrfs_free_path(path);
1820	return ret;
1821}
1822
1823static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1824			chunk_offset)
1825{
1826	struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1827	struct btrfs_disk_key *disk_key;
1828	struct btrfs_chunk *chunk;
1829	u8 *ptr;
1830	int ret = 0;
1831	u32 num_stripes;
1832	u32 array_size;
1833	u32 len = 0;
1834	u32 cur;
1835	struct btrfs_key key;
1836
1837	array_size = btrfs_super_sys_array_size(super_copy);
1838
1839	ptr = super_copy->sys_chunk_array;
1840	cur = 0;
1841
1842	while (cur < array_size) {
1843		disk_key = (struct btrfs_disk_key *)ptr;
1844		btrfs_disk_key_to_cpu(&key, disk_key);
1845
1846		len = sizeof(*disk_key);
1847
1848		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1849			chunk = (struct btrfs_chunk *)(ptr + len);
1850			num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1851			len += btrfs_chunk_item_size(num_stripes);
1852		} else {
1853			ret = -EIO;
1854			break;
1855		}
1856		if (key.objectid == chunk_objectid &&
1857		    key.offset == chunk_offset) {
1858			memmove(ptr, ptr + len, array_size - (cur + len));
1859			array_size -= len;
1860			btrfs_set_super_sys_array_size(super_copy, array_size);
1861		} else {
1862			ptr += len;
1863			cur += len;
1864		}
1865	}
1866	return ret;
1867}
1868
1869static int btrfs_relocate_chunk(struct btrfs_root *root,
1870			 u64 chunk_tree, u64 chunk_objectid,
1871			 u64 chunk_offset)
1872{
1873	struct extent_map_tree *em_tree;
1874	struct btrfs_root *extent_root;
1875	struct btrfs_trans_handle *trans;
1876	struct extent_map *em;
1877	struct map_lookup *map;
1878	int ret;
1879	int i;
1880
1881	root = root->fs_info->chunk_root;
1882	extent_root = root->fs_info->extent_root;
1883	em_tree = &root->fs_info->mapping_tree.map_tree;
1884
1885	ret = btrfs_can_relocate(extent_root, chunk_offset);
1886	if (ret)
1887		return -ENOSPC;
1888
1889	/* step one, relocate all the extents inside this chunk */
1890	ret = btrfs_relocate_block_group(extent_root, chunk_offset);
1891	if (ret)
1892		return ret;
1893
1894	trans = btrfs_start_transaction(root, 0);
1895	BUG_ON(IS_ERR(trans));
1896
1897	lock_chunks(root);
1898
1899	/*
1900	 * step two, delete the device extents and the
1901	 * chunk tree entries
1902	 */
1903	read_lock(&em_tree->lock);
1904	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1905	read_unlock(&em_tree->lock);
1906
1907	BUG_ON(em->start > chunk_offset ||
1908	       em->start + em->len < chunk_offset);
1909	map = (struct map_lookup *)em->bdev;
1910
1911	for (i = 0; i < map->num_stripes; i++) {
1912		ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1913					    map->stripes[i].physical);
1914		BUG_ON(ret);
1915
1916		if (map->stripes[i].dev) {
1917			ret = btrfs_update_device(trans, map->stripes[i].dev);
1918			BUG_ON(ret);
1919		}
1920	}
1921	ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1922			       chunk_offset);
1923
1924	BUG_ON(ret);
1925
1926	trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
1927
1928	if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1929		ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1930		BUG_ON(ret);
1931	}
1932
1933	ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1934	BUG_ON(ret);
1935
1936	write_lock(&em_tree->lock);
1937	remove_extent_mapping(em_tree, em);
1938	write_unlock(&em_tree->lock);
1939
1940	kfree(map);
1941	em->bdev = NULL;
1942
1943	/* once for the tree */
1944	free_extent_map(em);
1945	/* once for us */
1946	free_extent_map(em);
1947
1948	unlock_chunks(root);
1949	btrfs_end_transaction(trans, root);
1950	return 0;
1951}
1952
1953static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1954{
1955	struct btrfs_root *chunk_root = root->fs_info->chunk_root;
1956	struct btrfs_path *path;
1957	struct extent_buffer *leaf;
1958	struct btrfs_chunk *chunk;
1959	struct btrfs_key key;
1960	struct btrfs_key found_key;
1961	u64 chunk_tree = chunk_root->root_key.objectid;
1962	u64 chunk_type;
1963	bool retried = false;
1964	int failed = 0;
1965	int ret;
1966
1967	path = btrfs_alloc_path();
1968	if (!path)
1969		return -ENOMEM;
1970
1971again:
1972	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1973	key.offset = (u64)-1;
1974	key.type = BTRFS_CHUNK_ITEM_KEY;
1975
1976	while (1) {
1977		ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1978		if (ret < 0)
1979			goto error;
1980		BUG_ON(ret == 0);
1981
1982		ret = btrfs_previous_item(chunk_root, path, key.objectid,
1983					  key.type);
1984		if (ret 

Large files files are truncated, but you can click here to view the full file