/fs/ext3/super.c
C | 3099 lines | 2409 code | 326 blank | 364 comment | 399 complexity | a10a7d459b0e9de15d1a1f0f0cb117d8 MD5 | raw file
Possible License(s): LGPL-2.0, AGPL-1.0, GPL-2.0
Large files files are truncated, but you can click here to view the full file
1/* 2 * linux/fs/ext3/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19#include <linux/module.h> 20#include <linux/string.h> 21#include <linux/fs.h> 22#include <linux/time.h> 23#include <linux/jbd.h> 24#include <linux/ext3_fs.h> 25#include <linux/ext3_jbd.h> 26#include <linux/slab.h> 27#include <linux/init.h> 28#include <linux/blkdev.h> 29#include <linux/parser.h> 30#include <linux/buffer_head.h> 31#include <linux/exportfs.h> 32#include <linux/vfs.h> 33#include <linux/random.h> 34#include <linux/mount.h> 35#include <linux/namei.h> 36#include <linux/quotaops.h> 37#include <linux/seq_file.h> 38#include <linux/log2.h> 39#include <linux/cleancache.h> 40 41#include <asm/uaccess.h> 42 43#include "xattr.h" 44#include "acl.h" 45#include "namei.h" 46 47#define CREATE_TRACE_POINTS 48#include <trace/events/ext3.h> 49 50#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED 51 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA 52#else 53 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA 54#endif 55 56static int ext3_load_journal(struct super_block *, struct ext3_super_block *, 57 unsigned long journal_devnum); 58static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 59 unsigned int); 60static int ext3_commit_super(struct super_block *sb, 61 struct ext3_super_block *es, 62 int sync); 63static void ext3_mark_recovery_complete(struct super_block * sb, 64 struct ext3_super_block * es); 65static void ext3_clear_journal_err(struct super_block * sb, 66 struct ext3_super_block * es); 67static int ext3_sync_fs(struct super_block *sb, int wait); 68static const char *ext3_decode_error(struct super_block * sb, int errno, 69 char nbuf[16]); 70static int ext3_remount (struct super_block * sb, int * flags, char * data); 71static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); 72static int ext3_unfreeze(struct super_block *sb); 73static int ext3_freeze(struct super_block *sb); 74 75/* 76 * Wrappers for journal_start/end. 77 * 78 * The only special thing we need to do here is to make sure that all 79 * journal_end calls result in the superblock being marked dirty, so 80 * that sync() will call the filesystem's write_super callback if 81 * appropriate. 82 */ 83handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) 84{ 85 journal_t *journal; 86 87 if (sb->s_flags & MS_RDONLY) 88 return ERR_PTR(-EROFS); 89 90 /* Special case here: if the journal has aborted behind our 91 * backs (eg. EIO in the commit thread), then we still need to 92 * take the FS itself readonly cleanly. */ 93 journal = EXT3_SB(sb)->s_journal; 94 if (is_journal_aborted(journal)) { 95 ext3_abort(sb, __func__, 96 "Detected aborted journal"); 97 return ERR_PTR(-EROFS); 98 } 99 100 return journal_start(journal, nblocks); 101} 102 103/* 104 * The only special thing we need to do here is to make sure that all 105 * journal_stop calls result in the superblock being marked dirty, so 106 * that sync() will call the filesystem's write_super callback if 107 * appropriate. 108 */ 109int __ext3_journal_stop(const char *where, handle_t *handle) 110{ 111 struct super_block *sb; 112 int err; 113 int rc; 114 115 sb = handle->h_transaction->t_journal->j_private; 116 err = handle->h_err; 117 rc = journal_stop(handle); 118 119 if (!err) 120 err = rc; 121 if (err) 122 __ext3_std_error(sb, where, err); 123 return err; 124} 125 126void ext3_journal_abort_handle(const char *caller, const char *err_fn, 127 struct buffer_head *bh, handle_t *handle, int err) 128{ 129 char nbuf[16]; 130 const char *errstr = ext3_decode_error(NULL, err, nbuf); 131 132 if (bh) 133 BUFFER_TRACE(bh, "abort"); 134 135 if (!handle->h_err) 136 handle->h_err = err; 137 138 if (is_handle_aborted(handle)) 139 return; 140 141 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n", 142 caller, errstr, err_fn); 143 144 journal_abort_handle(handle); 145} 146 147void ext3_msg(struct super_block *sb, const char *prefix, 148 const char *fmt, ...) 149{ 150 struct va_format vaf; 151 va_list args; 152 153 va_start(args, fmt); 154 155 vaf.fmt = fmt; 156 vaf.va = &args; 157 158 printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 159 160 va_end(args); 161} 162 163/* Deal with the reporting of failure conditions on a filesystem such as 164 * inconsistencies detected or read IO failures. 165 * 166 * On ext2, we can store the error state of the filesystem in the 167 * superblock. That is not possible on ext3, because we may have other 168 * write ordering constraints on the superblock which prevent us from 169 * writing it out straight away; and given that the journal is about to 170 * be aborted, we can't rely on the current, or future, transactions to 171 * write out the superblock safely. 172 * 173 * We'll just use the journal_abort() error code to record an error in 174 * the journal instead. On recovery, the journal will complain about 175 * that error until we've noted it down and cleared it. 176 */ 177 178static void ext3_handle_error(struct super_block *sb) 179{ 180 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 181 182 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 183 es->s_state |= cpu_to_le16(EXT3_ERROR_FS); 184 185 if (sb->s_flags & MS_RDONLY) 186 return; 187 188 if (!test_opt (sb, ERRORS_CONT)) { 189 journal_t *journal = EXT3_SB(sb)->s_journal; 190 191 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); 192 if (journal) 193 journal_abort(journal, -EIO); 194 } 195 if (test_opt (sb, ERRORS_RO)) { 196 ext3_msg(sb, KERN_CRIT, 197 "error: remounting filesystem read-only"); 198 sb->s_flags |= MS_RDONLY; 199 } 200 ext3_commit_super(sb, es, 1); 201 if (test_opt(sb, ERRORS_PANIC)) 202 panic("EXT3-fs (%s): panic forced after error\n", 203 sb->s_id); 204} 205 206void ext3_error(struct super_block *sb, const char *function, 207 const char *fmt, ...) 208{ 209 struct va_format vaf; 210 va_list args; 211 212 va_start(args, fmt); 213 214 vaf.fmt = fmt; 215 vaf.va = &args; 216 217 printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n", 218 sb->s_id, function, &vaf); 219 220 va_end(args); 221 222 ext3_handle_error(sb); 223} 224 225static const char *ext3_decode_error(struct super_block * sb, int errno, 226 char nbuf[16]) 227{ 228 char *errstr = NULL; 229 230 switch (errno) { 231 case -EIO: 232 errstr = "IO failure"; 233 break; 234 case -ENOMEM: 235 errstr = "Out of memory"; 236 break; 237 case -EROFS: 238 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT) 239 errstr = "Journal has aborted"; 240 else 241 errstr = "Readonly filesystem"; 242 break; 243 default: 244 /* If the caller passed in an extra buffer for unknown 245 * errors, textualise them now. Else we just return 246 * NULL. */ 247 if (nbuf) { 248 /* Check for truncated error codes... */ 249 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 250 errstr = nbuf; 251 } 252 break; 253 } 254 255 return errstr; 256} 257 258/* __ext3_std_error decodes expected errors from journaling functions 259 * automatically and invokes the appropriate error response. */ 260 261void __ext3_std_error (struct super_block * sb, const char * function, 262 int errno) 263{ 264 char nbuf[16]; 265 const char *errstr; 266 267 /* Special case: if the error is EROFS, and we're not already 268 * inside a transaction, then there's really no point in logging 269 * an error. */ 270 if (errno == -EROFS && journal_current_handle() == NULL && 271 (sb->s_flags & MS_RDONLY)) 272 return; 273 274 errstr = ext3_decode_error(sb, errno, nbuf); 275 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr); 276 277 ext3_handle_error(sb); 278} 279 280/* 281 * ext3_abort is a much stronger failure handler than ext3_error. The 282 * abort function may be used to deal with unrecoverable failures such 283 * as journal IO errors or ENOMEM at a critical moment in log management. 284 * 285 * We unconditionally force the filesystem into an ABORT|READONLY state, 286 * unless the error response on the fs has been set to panic in which 287 * case we take the easy way out and panic immediately. 288 */ 289 290void ext3_abort(struct super_block *sb, const char *function, 291 const char *fmt, ...) 292{ 293 struct va_format vaf; 294 va_list args; 295 296 va_start(args, fmt); 297 298 vaf.fmt = fmt; 299 vaf.va = &args; 300 301 printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n", 302 sb->s_id, function, &vaf); 303 304 va_end(args); 305 306 if (test_opt(sb, ERRORS_PANIC)) 307 panic("EXT3-fs: panic from previous error\n"); 308 309 if (sb->s_flags & MS_RDONLY) 310 return; 311 312 ext3_msg(sb, KERN_CRIT, 313 "error: remounting filesystem read-only"); 314 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 315 sb->s_flags |= MS_RDONLY; 316 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); 317 if (EXT3_SB(sb)->s_journal) 318 journal_abort(EXT3_SB(sb)->s_journal, -EIO); 319} 320 321void ext3_warning(struct super_block *sb, const char *function, 322 const char *fmt, ...) 323{ 324 struct va_format vaf; 325 va_list args; 326 327 va_start(args, fmt); 328 329 vaf.fmt = fmt; 330 vaf.va = &args; 331 332 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n", 333 sb->s_id, function, &vaf); 334 335 va_end(args); 336} 337 338void ext3_update_dynamic_rev(struct super_block *sb) 339{ 340 struct ext3_super_block *es = EXT3_SB(sb)->s_es; 341 342 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 343 return; 344 345 ext3_msg(sb, KERN_WARNING, 346 "warning: updating to rev %d because of " 347 "new feature flag, running e2fsck is recommended", 348 EXT3_DYNAMIC_REV); 349 350 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 351 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 352 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV); 353 /* leave es->s_feature_*compat flags alone */ 354 /* es->s_uuid will be set by e2fsck if empty */ 355 356 /* 357 * The rest of the superblock fields should be zero, and if not it 358 * means they are likely already in use, so leave them alone. We 359 * can leave it up to e2fsck to clean up any inconsistencies there. 360 */ 361} 362 363/* 364 * Open the external journal device 365 */ 366static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) 367{ 368 struct block_device *bdev; 369 char b[BDEVNAME_SIZE]; 370 371 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 372 if (IS_ERR(bdev)) 373 goto fail; 374 return bdev; 375 376fail: 377 ext3_msg(sb, "error: failed to open journal device %s: %ld", 378 __bdevname(dev, b), PTR_ERR(bdev)); 379 380 return NULL; 381} 382 383/* 384 * Release the journal device 385 */ 386static int ext3_blkdev_put(struct block_device *bdev) 387{ 388 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 389} 390 391static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 392{ 393 struct block_device *bdev; 394 int ret = -ENODEV; 395 396 bdev = sbi->journal_bdev; 397 if (bdev) { 398 ret = ext3_blkdev_put(bdev); 399 sbi->journal_bdev = NULL; 400 } 401 return ret; 402} 403 404static inline struct inode *orphan_list_entry(struct list_head *l) 405{ 406 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode; 407} 408 409static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) 410{ 411 struct list_head *l; 412 413 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d", 414 le32_to_cpu(sbi->s_es->s_last_orphan)); 415 416 ext3_msg(sb, KERN_ERR, "sb_info orphan list:"); 417 list_for_each(l, &sbi->s_orphan) { 418 struct inode *inode = orphan_list_entry(l); 419 ext3_msg(sb, KERN_ERR, " " 420 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 421 inode->i_sb->s_id, inode->i_ino, inode, 422 inode->i_mode, inode->i_nlink, 423 NEXT_ORPHAN(inode)); 424 } 425} 426 427static void ext3_put_super (struct super_block * sb) 428{ 429 struct ext3_sb_info *sbi = EXT3_SB(sb); 430 struct ext3_super_block *es = sbi->s_es; 431 int i, err; 432 433 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 434 ext3_xattr_put_super(sb); 435 err = journal_destroy(sbi->s_journal); 436 sbi->s_journal = NULL; 437 if (err < 0) 438 ext3_abort(sb, __func__, "Couldn't clean up the journal"); 439 440 if (!(sb->s_flags & MS_RDONLY)) { 441 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 442 es->s_state = cpu_to_le16(sbi->s_mount_state); 443 BUFFER_TRACE(sbi->s_sbh, "marking dirty"); 444 mark_buffer_dirty(sbi->s_sbh); 445 ext3_commit_super(sb, es, 1); 446 } 447 448 for (i = 0; i < sbi->s_gdb_count; i++) 449 brelse(sbi->s_group_desc[i]); 450 kfree(sbi->s_group_desc); 451 percpu_counter_destroy(&sbi->s_freeblocks_counter); 452 percpu_counter_destroy(&sbi->s_freeinodes_counter); 453 percpu_counter_destroy(&sbi->s_dirs_counter); 454 brelse(sbi->s_sbh); 455#ifdef CONFIG_QUOTA 456 for (i = 0; i < MAXQUOTAS; i++) 457 kfree(sbi->s_qf_names[i]); 458#endif 459 460 /* Debugging code just in case the in-memory inode orphan list 461 * isn't empty. The on-disk one can be non-empty if we've 462 * detected an error and taken the fs readonly, but the 463 * in-memory list had better be clean by this point. */ 464 if (!list_empty(&sbi->s_orphan)) 465 dump_orphan_list(sb, sbi); 466 J_ASSERT(list_empty(&sbi->s_orphan)); 467 468 invalidate_bdev(sb->s_bdev); 469 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 470 /* 471 * Invalidate the journal device's buffers. We don't want them 472 * floating about in memory - the physical journal device may 473 * hotswapped, and it breaks the `ro-after' testing code. 474 */ 475 sync_blockdev(sbi->journal_bdev); 476 invalidate_bdev(sbi->journal_bdev); 477 ext3_blkdev_remove(sbi); 478 } 479 sb->s_fs_info = NULL; 480 kfree(sbi->s_blockgroup_lock); 481 kfree(sbi); 482} 483 484static struct kmem_cache *ext3_inode_cachep; 485 486/* 487 * Called inside transaction, so use GFP_NOFS 488 */ 489static struct inode *ext3_alloc_inode(struct super_block *sb) 490{ 491 struct ext3_inode_info *ei; 492 493 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); 494 if (!ei) 495 return NULL; 496 ei->i_block_alloc_info = NULL; 497 ei->vfs_inode.i_version = 1; 498 atomic_set(&ei->i_datasync_tid, 0); 499 atomic_set(&ei->i_sync_tid, 0); 500 return &ei->vfs_inode; 501} 502 503static int ext3_drop_inode(struct inode *inode) 504{ 505 int drop = generic_drop_inode(inode); 506 507 trace_ext3_drop_inode(inode, drop); 508 return drop; 509} 510 511static void ext3_i_callback(struct rcu_head *head) 512{ 513 struct inode *inode = container_of(head, struct inode, i_rcu); 514 INIT_LIST_HEAD(&inode->i_dentry); 515 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 516} 517 518static void ext3_destroy_inode(struct inode *inode) 519{ 520 if (!list_empty(&(EXT3_I(inode)->i_orphan))) { 521 printk("EXT3 Inode %p: orphan list check failed!\n", 522 EXT3_I(inode)); 523 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 524 EXT3_I(inode), sizeof(struct ext3_inode_info), 525 false); 526 dump_stack(); 527 } 528 call_rcu(&inode->i_rcu, ext3_i_callback); 529} 530 531static void init_once(void *foo) 532{ 533 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 534 535 INIT_LIST_HEAD(&ei->i_orphan); 536#ifdef CONFIG_EXT3_FS_XATTR 537 init_rwsem(&ei->xattr_sem); 538#endif 539 mutex_init(&ei->truncate_mutex); 540 inode_init_once(&ei->vfs_inode); 541} 542 543static int init_inodecache(void) 544{ 545 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 546 sizeof(struct ext3_inode_info), 547 0, (SLAB_RECLAIM_ACCOUNT| 548 SLAB_MEM_SPREAD), 549 init_once); 550 if (ext3_inode_cachep == NULL) 551 return -ENOMEM; 552 return 0; 553} 554 555static void destroy_inodecache(void) 556{ 557 kmem_cache_destroy(ext3_inode_cachep); 558} 559 560static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 561{ 562#if defined(CONFIG_QUOTA) 563 struct ext3_sb_info *sbi = EXT3_SB(sb); 564 565 if (sbi->s_jquota_fmt) { 566 char *fmtname = ""; 567 568 switch (sbi->s_jquota_fmt) { 569 case QFMT_VFS_OLD: 570 fmtname = "vfsold"; 571 break; 572 case QFMT_VFS_V0: 573 fmtname = "vfsv0"; 574 break; 575 case QFMT_VFS_V1: 576 fmtname = "vfsv1"; 577 break; 578 } 579 seq_printf(seq, ",jqfmt=%s", fmtname); 580 } 581 582 if (sbi->s_qf_names[USRQUOTA]) 583 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 584 585 if (sbi->s_qf_names[GRPQUOTA]) 586 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 587 588 if (test_opt(sb, USRQUOTA)) 589 seq_puts(seq, ",usrquota"); 590 591 if (test_opt(sb, GRPQUOTA)) 592 seq_puts(seq, ",grpquota"); 593#endif 594} 595 596static char *data_mode_string(unsigned long mode) 597{ 598 switch (mode) { 599 case EXT3_MOUNT_JOURNAL_DATA: 600 return "journal"; 601 case EXT3_MOUNT_ORDERED_DATA: 602 return "ordered"; 603 case EXT3_MOUNT_WRITEBACK_DATA: 604 return "writeback"; 605 } 606 return "unknown"; 607} 608 609/* 610 * Show an option if 611 * - it's set to a non-default value OR 612 * - if the per-sb default is different from the global default 613 */ 614static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) 615{ 616 struct super_block *sb = vfs->mnt_sb; 617 struct ext3_sb_info *sbi = EXT3_SB(sb); 618 struct ext3_super_block *es = sbi->s_es; 619 unsigned long def_mount_opts; 620 621 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 622 623 if (sbi->s_sb_block != 1) 624 seq_printf(seq, ",sb=%lu", sbi->s_sb_block); 625 if (test_opt(sb, MINIX_DF)) 626 seq_puts(seq, ",minixdf"); 627 if (test_opt(sb, GRPID)) 628 seq_puts(seq, ",grpid"); 629 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS)) 630 seq_puts(seq, ",nogrpid"); 631 if (sbi->s_resuid != EXT3_DEF_RESUID || 632 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) { 633 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 634 } 635 if (sbi->s_resgid != EXT3_DEF_RESGID || 636 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { 637 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 638 } 639 if (test_opt(sb, ERRORS_RO)) { 640 int def_errors = le16_to_cpu(es->s_errors); 641 642 if (def_errors == EXT3_ERRORS_PANIC || 643 def_errors == EXT3_ERRORS_CONTINUE) { 644 seq_puts(seq, ",errors=remount-ro"); 645 } 646 } 647 if (test_opt(sb, ERRORS_CONT)) 648 seq_puts(seq, ",errors=continue"); 649 if (test_opt(sb, ERRORS_PANIC)) 650 seq_puts(seq, ",errors=panic"); 651 if (test_opt(sb, NO_UID32)) 652 seq_puts(seq, ",nouid32"); 653 if (test_opt(sb, DEBUG)) 654 seq_puts(seq, ",debug"); 655 if (test_opt(sb, OLDALLOC)) 656 seq_puts(seq, ",oldalloc"); 657#ifdef CONFIG_EXT3_FS_XATTR 658 if (test_opt(sb, XATTR_USER)) 659 seq_puts(seq, ",user_xattr"); 660 if (!test_opt(sb, XATTR_USER) && 661 (def_mount_opts & EXT3_DEFM_XATTR_USER)) { 662 seq_puts(seq, ",nouser_xattr"); 663 } 664#endif 665#ifdef CONFIG_EXT3_FS_POSIX_ACL 666 if (test_opt(sb, POSIX_ACL)) 667 seq_puts(seq, ",acl"); 668 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL)) 669 seq_puts(seq, ",noacl"); 670#endif 671 if (!test_opt(sb, RESERVATION)) 672 seq_puts(seq, ",noreservation"); 673 if (sbi->s_commit_interval) { 674 seq_printf(seq, ",commit=%u", 675 (unsigned) (sbi->s_commit_interval / HZ)); 676 } 677 678 /* 679 * Always display barrier state so it's clear what the status is. 680 */ 681 seq_puts(seq, ",barrier="); 682 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 683 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); 684 if (test_opt(sb, DATA_ERR_ABORT)) 685 seq_puts(seq, ",data_err=abort"); 686 687 if (test_opt(sb, NOLOAD)) 688 seq_puts(seq, ",norecovery"); 689 690 ext3_show_quota_options(seq, sb); 691 692 return 0; 693} 694 695 696static struct inode *ext3_nfs_get_inode(struct super_block *sb, 697 u64 ino, u32 generation) 698{ 699 struct inode *inode; 700 701 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) 702 return ERR_PTR(-ESTALE); 703 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) 704 return ERR_PTR(-ESTALE); 705 706 /* iget isn't really right if the inode is currently unallocated!! 707 * 708 * ext3_read_inode will return a bad_inode if the inode had been 709 * deleted, so we should be safe. 710 * 711 * Currently we don't know the generation for parent directory, so 712 * a generation of 0 means "accept any" 713 */ 714 inode = ext3_iget(sb, ino); 715 if (IS_ERR(inode)) 716 return ERR_CAST(inode); 717 if (generation && inode->i_generation != generation) { 718 iput(inode); 719 return ERR_PTR(-ESTALE); 720 } 721 722 return inode; 723} 724 725static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid, 726 int fh_len, int fh_type) 727{ 728 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 729 ext3_nfs_get_inode); 730} 731 732static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid, 733 int fh_len, int fh_type) 734{ 735 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 736 ext3_nfs_get_inode); 737} 738 739/* 740 * Try to release metadata pages (indirect blocks, directories) which are 741 * mapped via the block device. Since these pages could have journal heads 742 * which would prevent try_to_free_buffers() from freeing them, we must use 743 * jbd layer's try_to_free_buffers() function to release them. 744 */ 745static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 746 gfp_t wait) 747{ 748 journal_t *journal = EXT3_SB(sb)->s_journal; 749 750 WARN_ON(PageChecked(page)); 751 if (!page_has_buffers(page)) 752 return 0; 753 if (journal) 754 return journal_try_to_free_buffers(journal, page, 755 wait & ~__GFP_WAIT); 756 return try_to_free_buffers(page); 757} 758 759#ifdef CONFIG_QUOTA 760#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 761#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 762 763static int ext3_write_dquot(struct dquot *dquot); 764static int ext3_acquire_dquot(struct dquot *dquot); 765static int ext3_release_dquot(struct dquot *dquot); 766static int ext3_mark_dquot_dirty(struct dquot *dquot); 767static int ext3_write_info(struct super_block *sb, int type); 768static int ext3_quota_on(struct super_block *sb, int type, int format_id, 769 struct path *path); 770static int ext3_quota_on_mount(struct super_block *sb, int type); 771static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 772 size_t len, loff_t off); 773static ssize_t ext3_quota_write(struct super_block *sb, int type, 774 const char *data, size_t len, loff_t off); 775 776static const struct dquot_operations ext3_quota_operations = { 777 .write_dquot = ext3_write_dquot, 778 .acquire_dquot = ext3_acquire_dquot, 779 .release_dquot = ext3_release_dquot, 780 .mark_dirty = ext3_mark_dquot_dirty, 781 .write_info = ext3_write_info, 782 .alloc_dquot = dquot_alloc, 783 .destroy_dquot = dquot_destroy, 784}; 785 786static const struct quotactl_ops ext3_qctl_operations = { 787 .quota_on = ext3_quota_on, 788 .quota_off = dquot_quota_off, 789 .quota_sync = dquot_quota_sync, 790 .get_info = dquot_get_dqinfo, 791 .set_info = dquot_set_dqinfo, 792 .get_dqblk = dquot_get_dqblk, 793 .set_dqblk = dquot_set_dqblk 794}; 795#endif 796 797static const struct super_operations ext3_sops = { 798 .alloc_inode = ext3_alloc_inode, 799 .destroy_inode = ext3_destroy_inode, 800 .write_inode = ext3_write_inode, 801 .dirty_inode = ext3_dirty_inode, 802 .drop_inode = ext3_drop_inode, 803 .evict_inode = ext3_evict_inode, 804 .put_super = ext3_put_super, 805 .sync_fs = ext3_sync_fs, 806 .freeze_fs = ext3_freeze, 807 .unfreeze_fs = ext3_unfreeze, 808 .statfs = ext3_statfs, 809 .remount_fs = ext3_remount, 810 .show_options = ext3_show_options, 811#ifdef CONFIG_QUOTA 812 .quota_read = ext3_quota_read, 813 .quota_write = ext3_quota_write, 814#endif 815 .bdev_try_to_free_page = bdev_try_to_free_page, 816}; 817 818static const struct export_operations ext3_export_ops = { 819 .fh_to_dentry = ext3_fh_to_dentry, 820 .fh_to_parent = ext3_fh_to_parent, 821 .get_parent = ext3_get_parent, 822}; 823 824enum { 825 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 826 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 827 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 828 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 829 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 830 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 831 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 832 Opt_data_err_abort, Opt_data_err_ignore, 833 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 834 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 835 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 836 Opt_resize, Opt_usrquota, Opt_grpquota 837}; 838 839static const match_table_t tokens = { 840 {Opt_bsd_df, "bsddf"}, 841 {Opt_minix_df, "minixdf"}, 842 {Opt_grpid, "grpid"}, 843 {Opt_grpid, "bsdgroups"}, 844 {Opt_nogrpid, "nogrpid"}, 845 {Opt_nogrpid, "sysvgroups"}, 846 {Opt_resgid, "resgid=%u"}, 847 {Opt_resuid, "resuid=%u"}, 848 {Opt_sb, "sb=%u"}, 849 {Opt_err_cont, "errors=continue"}, 850 {Opt_err_panic, "errors=panic"}, 851 {Opt_err_ro, "errors=remount-ro"}, 852 {Opt_nouid32, "nouid32"}, 853 {Opt_nocheck, "nocheck"}, 854 {Opt_nocheck, "check=none"}, 855 {Opt_debug, "debug"}, 856 {Opt_oldalloc, "oldalloc"}, 857 {Opt_orlov, "orlov"}, 858 {Opt_user_xattr, "user_xattr"}, 859 {Opt_nouser_xattr, "nouser_xattr"}, 860 {Opt_acl, "acl"}, 861 {Opt_noacl, "noacl"}, 862 {Opt_reservation, "reservation"}, 863 {Opt_noreservation, "noreservation"}, 864 {Opt_noload, "noload"}, 865 {Opt_noload, "norecovery"}, 866 {Opt_nobh, "nobh"}, 867 {Opt_bh, "bh"}, 868 {Opt_commit, "commit=%u"}, 869 {Opt_journal_update, "journal=update"}, 870 {Opt_journal_inum, "journal=%u"}, 871 {Opt_journal_dev, "journal_dev=%u"}, 872 {Opt_abort, "abort"}, 873 {Opt_data_journal, "data=journal"}, 874 {Opt_data_ordered, "data=ordered"}, 875 {Opt_data_writeback, "data=writeback"}, 876 {Opt_data_err_abort, "data_err=abort"}, 877 {Opt_data_err_ignore, "data_err=ignore"}, 878 {Opt_offusrjquota, "usrjquota="}, 879 {Opt_usrjquota, "usrjquota=%s"}, 880 {Opt_offgrpjquota, "grpjquota="}, 881 {Opt_grpjquota, "grpjquota=%s"}, 882 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 883 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 884 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 885 {Opt_grpquota, "grpquota"}, 886 {Opt_noquota, "noquota"}, 887 {Opt_quota, "quota"}, 888 {Opt_usrquota, "usrquota"}, 889 {Opt_barrier, "barrier=%u"}, 890 {Opt_barrier, "barrier"}, 891 {Opt_nobarrier, "nobarrier"}, 892 {Opt_resize, "resize"}, 893 {Opt_err, NULL}, 894}; 895 896static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) 897{ 898 ext3_fsblk_t sb_block; 899 char *options = (char *) *data; 900 901 if (!options || strncmp(options, "sb=", 3) != 0) 902 return 1; /* Default location */ 903 options += 3; 904 /*todo: use simple_strtoll with >32bit ext3 */ 905 sb_block = simple_strtoul(options, &options, 0); 906 if (*options && *options != ',') { 907 ext3_msg(sb, "error: invalid sb specification: %s", 908 (char *) *data); 909 return 1; 910 } 911 if (*options == ',') 912 options++; 913 *data = (void *) options; 914 return sb_block; 915} 916 917#ifdef CONFIG_QUOTA 918static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 919{ 920 struct ext3_sb_info *sbi = EXT3_SB(sb); 921 char *qname; 922 923 if (sb_any_quota_loaded(sb) && 924 !sbi->s_qf_names[qtype]) { 925 ext3_msg(sb, KERN_ERR, 926 "Cannot change journaled " 927 "quota options when quota turned on"); 928 return 0; 929 } 930 qname = match_strdup(args); 931 if (!qname) { 932 ext3_msg(sb, KERN_ERR, 933 "Not enough memory for storing quotafile name"); 934 return 0; 935 } 936 if (sbi->s_qf_names[qtype] && 937 strcmp(sbi->s_qf_names[qtype], qname)) { 938 ext3_msg(sb, KERN_ERR, 939 "%s quota file already specified", QTYPE2NAME(qtype)); 940 kfree(qname); 941 return 0; 942 } 943 sbi->s_qf_names[qtype] = qname; 944 if (strchr(sbi->s_qf_names[qtype], '/')) { 945 ext3_msg(sb, KERN_ERR, 946 "quotafile must be on filesystem root"); 947 kfree(sbi->s_qf_names[qtype]); 948 sbi->s_qf_names[qtype] = NULL; 949 return 0; 950 } 951 set_opt(sbi->s_mount_opt, QUOTA); 952 return 1; 953} 954 955static int clear_qf_name(struct super_block *sb, int qtype) { 956 957 struct ext3_sb_info *sbi = EXT3_SB(sb); 958 959 if (sb_any_quota_loaded(sb) && 960 sbi->s_qf_names[qtype]) { 961 ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options" 962 " when quota turned on"); 963 return 0; 964 } 965 /* 966 * The space will be released later when all options are confirmed 967 * to be correct 968 */ 969 sbi->s_qf_names[qtype] = NULL; 970 return 1; 971} 972#endif 973 974static int parse_options (char *options, struct super_block *sb, 975 unsigned int *inum, unsigned long *journal_devnum, 976 ext3_fsblk_t *n_blocks_count, int is_remount) 977{ 978 struct ext3_sb_info *sbi = EXT3_SB(sb); 979 char * p; 980 substring_t args[MAX_OPT_ARGS]; 981 int data_opt = 0; 982 int option; 983#ifdef CONFIG_QUOTA 984 int qfmt; 985#endif 986 987 if (!options) 988 return 1; 989 990 while ((p = strsep (&options, ",")) != NULL) { 991 int token; 992 if (!*p) 993 continue; 994 /* 995 * Initialize args struct so we know whether arg was 996 * found; some options take optional arguments. 997 */ 998 args[0].to = args[0].from = 0; 999 token = match_token(p, tokens, args); 1000 switch (token) { 1001 case Opt_bsd_df: 1002 clear_opt (sbi->s_mount_opt, MINIX_DF); 1003 break; 1004 case Opt_minix_df: 1005 set_opt (sbi->s_mount_opt, MINIX_DF); 1006 break; 1007 case Opt_grpid: 1008 set_opt (sbi->s_mount_opt, GRPID); 1009 break; 1010 case Opt_nogrpid: 1011 clear_opt (sbi->s_mount_opt, GRPID); 1012 break; 1013 case Opt_resuid: 1014 if (match_int(&args[0], &option)) 1015 return 0; 1016 sbi->s_resuid = option; 1017 break; 1018 case Opt_resgid: 1019 if (match_int(&args[0], &option)) 1020 return 0; 1021 sbi->s_resgid = option; 1022 break; 1023 case Opt_sb: 1024 /* handled by get_sb_block() instead of here */ 1025 /* *sb_block = match_int(&args[0]); */ 1026 break; 1027 case Opt_err_panic: 1028 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1029 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1030 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 1031 break; 1032 case Opt_err_ro: 1033 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1034 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1035 set_opt (sbi->s_mount_opt, ERRORS_RO); 1036 break; 1037 case Opt_err_cont: 1038 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1039 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1040 set_opt (sbi->s_mount_opt, ERRORS_CONT); 1041 break; 1042 case Opt_nouid32: 1043 set_opt (sbi->s_mount_opt, NO_UID32); 1044 break; 1045 case Opt_nocheck: 1046 clear_opt (sbi->s_mount_opt, CHECK); 1047 break; 1048 case Opt_debug: 1049 set_opt (sbi->s_mount_opt, DEBUG); 1050 break; 1051 case Opt_oldalloc: 1052 set_opt (sbi->s_mount_opt, OLDALLOC); 1053 break; 1054 case Opt_orlov: 1055 clear_opt (sbi->s_mount_opt, OLDALLOC); 1056 break; 1057#ifdef CONFIG_EXT3_FS_XATTR 1058 case Opt_user_xattr: 1059 set_opt (sbi->s_mount_opt, XATTR_USER); 1060 break; 1061 case Opt_nouser_xattr: 1062 clear_opt (sbi->s_mount_opt, XATTR_USER); 1063 break; 1064#else 1065 case Opt_user_xattr: 1066 case Opt_nouser_xattr: 1067 ext3_msg(sb, KERN_INFO, 1068 "(no)user_xattr options not supported"); 1069 break; 1070#endif 1071#ifdef CONFIG_EXT3_FS_POSIX_ACL 1072 case Opt_acl: 1073 set_opt(sbi->s_mount_opt, POSIX_ACL); 1074 break; 1075 case Opt_noacl: 1076 clear_opt(sbi->s_mount_opt, POSIX_ACL); 1077 break; 1078#else 1079 case Opt_acl: 1080 case Opt_noacl: 1081 ext3_msg(sb, KERN_INFO, 1082 "(no)acl options not supported"); 1083 break; 1084#endif 1085 case Opt_reservation: 1086 set_opt(sbi->s_mount_opt, RESERVATION); 1087 break; 1088 case Opt_noreservation: 1089 clear_opt(sbi->s_mount_opt, RESERVATION); 1090 break; 1091 case Opt_journal_update: 1092 /* @@@ FIXME */ 1093 /* Eventually we will want to be able to create 1094 a journal file here. For now, only allow the 1095 user to specify an existing inode to be the 1096 journal file. */ 1097 if (is_remount) { 1098 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1099 "journal on remount"); 1100 return 0; 1101 } 1102 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1103 break; 1104 case Opt_journal_inum: 1105 if (is_remount) { 1106 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1107 "journal on remount"); 1108 return 0; 1109 } 1110 if (match_int(&args[0], &option)) 1111 return 0; 1112 *inum = option; 1113 break; 1114 case Opt_journal_dev: 1115 if (is_remount) { 1116 ext3_msg(sb, KERN_ERR, "error: cannot specify " 1117 "journal on remount"); 1118 return 0; 1119 } 1120 if (match_int(&args[0], &option)) 1121 return 0; 1122 *journal_devnum = option; 1123 break; 1124 case Opt_noload: 1125 set_opt (sbi->s_mount_opt, NOLOAD); 1126 break; 1127 case Opt_commit: 1128 if (match_int(&args[0], &option)) 1129 return 0; 1130 if (option < 0) 1131 return 0; 1132 if (option == 0) 1133 option = JBD_DEFAULT_MAX_COMMIT_AGE; 1134 sbi->s_commit_interval = HZ * option; 1135 break; 1136 case Opt_data_journal: 1137 data_opt = EXT3_MOUNT_JOURNAL_DATA; 1138 goto datacheck; 1139 case Opt_data_ordered: 1140 data_opt = EXT3_MOUNT_ORDERED_DATA; 1141 goto datacheck; 1142 case Opt_data_writeback: 1143 data_opt = EXT3_MOUNT_WRITEBACK_DATA; 1144 datacheck: 1145 if (is_remount) { 1146 if (test_opt(sb, DATA_FLAGS) == data_opt) 1147 break; 1148 ext3_msg(sb, KERN_ERR, 1149 "error: cannot change " 1150 "data mode on remount. The filesystem " 1151 "is mounted in data=%s mode and you " 1152 "try to remount it in data=%s mode.", 1153 data_mode_string(test_opt(sb, 1154 DATA_FLAGS)), 1155 data_mode_string(data_opt)); 1156 return 0; 1157 } else { 1158 clear_opt(sbi->s_mount_opt, DATA_FLAGS); 1159 sbi->s_mount_opt |= data_opt; 1160 } 1161 break; 1162 case Opt_data_err_abort: 1163 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1164 break; 1165 case Opt_data_err_ignore: 1166 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1167 break; 1168#ifdef CONFIG_QUOTA 1169 case Opt_usrjquota: 1170 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1171 return 0; 1172 break; 1173 case Opt_grpjquota: 1174 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1175 return 0; 1176 break; 1177 case Opt_offusrjquota: 1178 if (!clear_qf_name(sb, USRQUOTA)) 1179 return 0; 1180 break; 1181 case Opt_offgrpjquota: 1182 if (!clear_qf_name(sb, GRPQUOTA)) 1183 return 0; 1184 break; 1185 case Opt_jqfmt_vfsold: 1186 qfmt = QFMT_VFS_OLD; 1187 goto set_qf_format; 1188 case Opt_jqfmt_vfsv0: 1189 qfmt = QFMT_VFS_V0; 1190 goto set_qf_format; 1191 case Opt_jqfmt_vfsv1: 1192 qfmt = QFMT_VFS_V1; 1193set_qf_format: 1194 if (sb_any_quota_loaded(sb) && 1195 sbi->s_jquota_fmt != qfmt) { 1196 ext3_msg(sb, KERN_ERR, "error: cannot change " 1197 "journaled quota options when " 1198 "quota turned on."); 1199 return 0; 1200 } 1201 sbi->s_jquota_fmt = qfmt; 1202 break; 1203 case Opt_quota: 1204 case Opt_usrquota: 1205 set_opt(sbi->s_mount_opt, QUOTA); 1206 set_opt(sbi->s_mount_opt, USRQUOTA); 1207 break; 1208 case Opt_grpquota: 1209 set_opt(sbi->s_mount_opt, QUOTA); 1210 set_opt(sbi->s_mount_opt, GRPQUOTA); 1211 break; 1212 case Opt_noquota: 1213 if (sb_any_quota_loaded(sb)) { 1214 ext3_msg(sb, KERN_ERR, "error: cannot change " 1215 "quota options when quota turned on."); 1216 return 0; 1217 } 1218 clear_opt(sbi->s_mount_opt, QUOTA); 1219 clear_opt(sbi->s_mount_opt, USRQUOTA); 1220 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1221 break; 1222#else 1223 case Opt_quota: 1224 case Opt_usrquota: 1225 case Opt_grpquota: 1226 ext3_msg(sb, KERN_ERR, 1227 "error: quota options not supported."); 1228 break; 1229 case Opt_usrjquota: 1230 case Opt_grpjquota: 1231 case Opt_offusrjquota: 1232 case Opt_offgrpjquota: 1233 case Opt_jqfmt_vfsold: 1234 case Opt_jqfmt_vfsv0: 1235 case Opt_jqfmt_vfsv1: 1236 ext3_msg(sb, KERN_ERR, 1237 "error: journaled quota options not " 1238 "supported."); 1239 break; 1240 case Opt_noquota: 1241 break; 1242#endif 1243 case Opt_abort: 1244 set_opt(sbi->s_mount_opt, ABORT); 1245 break; 1246 case Opt_nobarrier: 1247 clear_opt(sbi->s_mount_opt, BARRIER); 1248 break; 1249 case Opt_barrier: 1250 if (args[0].from) { 1251 if (match_int(&args[0], &option)) 1252 return 0; 1253 } else 1254 option = 1; /* No argument, default to 1 */ 1255 if (option) 1256 set_opt(sbi->s_mount_opt, BARRIER); 1257 else 1258 clear_opt(sbi->s_mount_opt, BARRIER); 1259 break; 1260 case Opt_ignore: 1261 break; 1262 case Opt_resize: 1263 if (!is_remount) { 1264 ext3_msg(sb, KERN_ERR, 1265 "error: resize option only available " 1266 "for remount"); 1267 return 0; 1268 } 1269 if (match_int(&args[0], &option) != 0) 1270 return 0; 1271 *n_blocks_count = option; 1272 break; 1273 case Opt_nobh: 1274 ext3_msg(sb, KERN_WARNING, 1275 "warning: ignoring deprecated nobh option"); 1276 break; 1277 case Opt_bh: 1278 ext3_msg(sb, KERN_WARNING, 1279 "warning: ignoring deprecated bh option"); 1280 break; 1281 default: 1282 ext3_msg(sb, KERN_ERR, 1283 "error: unrecognized mount option \"%s\" " 1284 "or missing value", p); 1285 return 0; 1286 } 1287 } 1288#ifdef CONFIG_QUOTA 1289 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { 1290 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) 1291 clear_opt(sbi->s_mount_opt, USRQUOTA); 1292 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) 1293 clear_opt(sbi->s_mount_opt, GRPQUOTA); 1294 1295 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { 1296 ext3_msg(sb, KERN_ERR, "error: old and new quota " 1297 "format mixing."); 1298 return 0; 1299 } 1300 1301 if (!sbi->s_jquota_fmt) { 1302 ext3_msg(sb, KERN_ERR, "error: journaled quota format " 1303 "not specified."); 1304 return 0; 1305 } 1306 } else { 1307 if (sbi->s_jquota_fmt) { 1308 ext3_msg(sb, KERN_ERR, "error: journaled quota format " 1309 "specified with no journaling " 1310 "enabled."); 1311 return 0; 1312 } 1313 } 1314#endif 1315 return 1; 1316} 1317 1318static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, 1319 int read_only) 1320{ 1321 struct ext3_sb_info *sbi = EXT3_SB(sb); 1322 int res = 0; 1323 1324 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 1325 ext3_msg(sb, KERN_ERR, 1326 "error: revision level too high, " 1327 "forcing read-only mode"); 1328 res = MS_RDONLY; 1329 } 1330 if (read_only) 1331 return res; 1332 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 1333 ext3_msg(sb, KERN_WARNING, 1334 "warning: mounting unchecked fs, " 1335 "running e2fsck is recommended"); 1336 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 1337 ext3_msg(sb, KERN_WARNING, 1338 "warning: mounting fs with errors, " 1339 "running e2fsck is recommended"); 1340 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && 1341 le16_to_cpu(es->s_mnt_count) >= 1342 le16_to_cpu(es->s_max_mnt_count)) 1343 ext3_msg(sb, KERN_WARNING, 1344 "warning: maximal mount count reached, " 1345 "running e2fsck is recommended"); 1346 else if (le32_to_cpu(es->s_checkinterval) && 1347 (le32_to_cpu(es->s_lastcheck) + 1348 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1349 ext3_msg(sb, KERN_WARNING, 1350 "warning: checktime reached, " 1351 "running e2fsck is recommended"); 1352#if 0 1353 /* @@@ We _will_ want to clear the valid bit if we find 1354 inconsistencies, to force a fsck at reboot. But for 1355 a plain journaled filesystem we can keep it set as 1356 valid forever! :) */ 1357 es->s_state &= cpu_to_le16(~EXT3_VALID_FS); 1358#endif 1359 if (!le16_to_cpu(es->s_max_mnt_count)) 1360 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1361 le16_add_cpu(&es->s_mnt_count, 1); 1362 es->s_mtime = cpu_to_le32(get_seconds()); 1363 ext3_update_dynamic_rev(sb); 1364 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 1365 1366 ext3_commit_super(sb, es, 1); 1367 if (test_opt(sb, DEBUG)) 1368 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, " 1369 "bpg=%lu, ipg=%lu, mo=%04lx]", 1370 sb->s_blocksize, 1371 sbi->s_groups_count, 1372 EXT3_BLOCKS_PER_GROUP(sb), 1373 EXT3_INODES_PER_GROUP(sb), 1374 sbi->s_mount_opt); 1375 1376 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 1377 char b[BDEVNAME_SIZE]; 1378 ext3_msg(sb, KERN_INFO, "using external journal on %s", 1379 bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); 1380 } else { 1381 ext3_msg(sb, KERN_INFO, "using internal journal"); 1382 } 1383 cleancache_init_fs(sb); 1384 return res; 1385} 1386 1387/* Called at mount-time, super-block is locked */ 1388static int ext3_check_descriptors(struct super_block *sb) 1389{ 1390 struct ext3_sb_info *sbi = EXT3_SB(sb); 1391 int i; 1392 1393 ext3_debug ("Checking group descriptors"); 1394 1395 for (i = 0; i < sbi->s_groups_count; i++) { 1396 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); 1397 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i); 1398 ext3_fsblk_t last_block; 1399 1400 if (i == sbi->s_groups_count - 1) 1401 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 1402 else 1403 last_block = first_block + 1404 (EXT3_BLOCKS_PER_GROUP(sb) - 1); 1405 1406 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || 1407 le32_to_cpu(gdp->bg_block_bitmap) > last_block) 1408 { 1409 ext3_error (sb, "ext3_check_descriptors", 1410 "Block bitmap for group %d" 1411 " not in group (block %lu)!", 1412 i, (unsigned long) 1413 le32_to_cpu(gdp->bg_block_bitmap)); 1414 return 0; 1415 } 1416 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || 1417 le32_to_cpu(gdp->bg_inode_bitmap) > last_block) 1418 { 1419 ext3_error (sb, "ext3_check_descriptors", 1420 "Inode bitmap for group %d" 1421 " not in group (block %lu)!", 1422 i, (unsigned long) 1423 le32_to_cpu(gdp->bg_inode_bitmap)); 1424 return 0; 1425 } 1426 if (le32_to_cpu(gdp->bg_inode_table) < first_block || 1427 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 > 1428 last_block) 1429 { 1430 ext3_error (sb, "ext3_check_descriptors", 1431 "Inode table for group %d" 1432 " not in group (block %lu)!", 1433 i, (unsigned long) 1434 le32_to_cpu(gdp->bg_inode_table)); 1435 return 0; 1436 } 1437 } 1438 1439 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); 1440 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb)); 1441 return 1; 1442} 1443 1444 1445/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at 1446 * the superblock) which were deleted from all directories, but held open by 1447 * a process at the time of a crash. We walk the list and try to delete these 1448 * inodes at recovery time (only with a read-write filesystem). 1449 * 1450 * In order to keep the orphan inode chain consistent during traversal (in 1451 * case of crash during recovery), we link each inode into the superblock 1452 * orphan list_head and handle it the same way as an inode deletion during 1453 * normal operation (which journals the operations for us). 1454 * 1455 * We only do an iget() and an iput() on each inode, which is very safe if we 1456 * accidentally point at an in-use or already deleted inode. The worst that 1457 * can happen in this case is that we get a "bit already cleared" message from 1458 * ext3_free_inode(). The only reason we would point at a wrong inode is if 1459 * e2fsck was run on this filesystem, and it must have already done the orphan 1460 * inode cleanup for us, so we can safely abort without any further action. 1461 */ 1462static void ext3_orphan_cleanup (struct super_block * sb, 1463 struct ext3_super_block * es) 1464{ 1465 unsigned int s_flags = sb->s_flags; 1466 int nr_orphans = 0, nr_truncates = 0; 1467#ifdef CONFIG_QUOTA 1468 int i; 1469#endif 1470 if (!es->s_last_orphan) { 1471 jbd_debug(4, "no orphan inodes to clean up\n"); 1472 return; 1473 } 1474 1475 if (bdev_read_only(sb->s_bdev)) { 1476 ext3_msg(sb, KERN_ERR, "error: write access " 1477 "unavailable, skipping orphan cleanup."); 1478 return; 1479 } 1480 1481 /* Check if feature set allows readwrite operations */ 1482 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) { 1483 ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " 1484 "unknown ROCOMPAT features"); 1485 return; 1486 } 1487 1488 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1489 if (es->s_last_orphan) 1490 jbd_debug(1, "Errors on filesystem, " 1491 "clearing orphan list.\n"); 1492 es->s_last_orphan = 0; 1493 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1494 return; 1495 } 1496 1497 if (s_flags & MS_RDONLY) { 1498 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); 1499 sb->s_flags &= ~MS_RDONLY; 1500 } 1501#ifdef CONFIG_QUOTA 1502 /* Needed for iput() to work correctly and not trash data */ 1503 sb->s_flags |= MS_ACTIVE; 1504 /* Turn on quotas so that they are updated correctly */ 1505 for (i = 0; i < MAXQUOTAS; i++) { 1506 if (EXT3_SB(sb)->s_qf_names[i]) { 1507 int ret = ext3_quota_on_mount(sb, i); 1508 if (ret < 0) 1509 ext3_msg(sb, KERN_ERR, 1510 "error: cannot turn on journaled " 1511 "quota: %d", ret); 1512 } 1513 } 1514#endif 1515 1516 while (es->s_last_orphan) { 1517 struct inode *inode; 1518 1519 inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); 1520 if (IS_ERR(inode)) { 1521 es->s_last_orphan = 0; 1522 break; 1523 } 1524 1525 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); 1526 dquot_initialize(inode); 1527 if (inode->i_nlink) { 1528 printk(KERN_DEBUG 1529 "%s: truncating inode %lu to %Ld bytes\n", 1530 __func__, inode->i_ino, inode->i_size); 1531 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1532 inode->i_ino, inode->i_size); 1533 ext3_truncate(inode); 1534 nr_truncates++; 1535 } else { 1536 printk(KERN_DEBUG 1537 "%s: deleting unreferenced inode %lu\n", 1538 __func__, inode->i_ino); 1539 jbd_debug(2, "deleting unreferenced inode %lu\n", 1540 inode->i_ino); 1541 nr_orphans++; 1542 } 1543 iput(inode); /* The delete magic happens here! */ 1544 } 1545 1546#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1547 1548 if (nr_orphans) 1549 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted", 1550 PLURAL(nr_orphans)); 1551 if (nr_truncates) 1552 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up", 1553 PLURAL(nr_truncates)); 1554#ifdef CONFIG_QUOTA 1555 /* Turn quotas off */ 1556 for (i = 0; i < MAXQUOTAS; i++) { 1557 if (sb_dqopt(sb)->files[i]) 1558 dquot_quota_off(sb, i); 1559 } 1560#endif 1561 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1562} 1563 1564/* 1565 * Maximal file size. There is a direct, and {,double-,triple-}indirect 1566 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. 1567 * We need to be 1 filesystem block less than the 2^32 sector limit. 1568 */ 1569static loff_t ext3_max_size(int bits) 1570{ 1571 loff_t res = EXT3_NDIR_BLOCKS; 1572 int meta_blocks; 1573 loff_t upper_limit; 1574 1575 /* This is calculated to be the largest file size for a 1576 * dense, file such that the total number of 1577 * sectors in the file, including data and all indirect blocks, 1578 * does not exceed 2^32 -1 1579 * __u32 i_blocks representing the total number of 1580 * 512 bytes blocks of the file 1581 */ 1582 upper_limit = (1LL << 32) - 1; 1583 1584 /* total blocks in file system block size */ 1585 upper_limit >>= (bits - 9); 1586 1587 1588 /* indirect blocks */ 1589 meta_blocks = 1; 1590 /* double indirect blocks */ 1591 meta_blocks += 1 + (1LL << (bits-2)); 1592 /* tripple indirect blocks */ 1593 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); 1594 1595 upper_limit -= meta_blocks; 1596 upper_limit <<= bits; 1597 1598 res += 1LL << (bits-2); 1599 res += 1LL << (2*(bits-2)); 1600 res += 1LL << (3*(bits-2)); 1601 res <<= bits; 1602 if (res > upper_limit) 1603 res = upper_limit; 1604 1605 if (res > MAX_LFS_FILESIZE) 1606 res = MAX_LFS_FILESIZE; 1607 1608 return res; 1609} 1610 1611static ext3_fsblk_t descriptor_loc(struct super_block *sb, 1612 ext3_fsblk_t logic_sb_block, 1613 int nr) 1614{ 1615 struct ext3_sb_info *sbi = EXT3_SB(sb); 1616 unsigned long bg, first_meta_bg; 1617 int has_super = 0; 1618 1619 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 1620 1621 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || 1622 nr < first_meta_bg) 1623 return (logic_sb_block + nr + 1); 1624 bg = sbi->s_desc_per_block * nr; 1625 if (ext3_bg_has_super(sb, bg)) 1626 has_super = 1; 1627 return (has_super + ext3_group_first_block_no(sb, bg)); 1628} 1629 1630 1631static int ext3_fill_super (struct super_block *sb, void *data, int silent) 1632{ 1633 struct buffer_head * bh; 1634 struct ext3_super_block *es = NULL; 1635 struct ext3_sb_info *sbi; 1636 ext3_fsblk_t block; 1637 ext3_fsblk_t sb_block = get_sb_block(&data, sb); 1638 ext3_fsblk_t logic_sb_block; 1639 unsigned long offset = 0; 1640 unsigned int journal_inum = 0; 1641 unsigned long journal_devnum = 0; 1642 unsigned long def_mount_opts; 1643 struct inode *root; 1644 int blocksize; 1645 int hblock; 1646 int db_count; 1647 int i; 1648 int needs_recovery; 1649 int ret = -EINVAL; 1650 __le32 features; 1651 int err; 1652 1653 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1654 if (!sbi) 1655 return -ENOMEM; 1656 1657 sbi->s_blockgroup_lock = 1658 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 1659 if (!sbi->s_blockgroup_lock) { 1660 kfree(sbi); 1661 return -ENOMEM; 1662 } 1663 sb->s_fs_info = sbi; 1664 sbi->s_mount_opt = 0; 1665 sbi->s_resuid = EXT3_DEF_RESUID; 1666 sbi->s_resgid = EXT3_DEF_RESGID; 1667 sbi->s_sb_block = sb_block; 1668 1669 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1670 if (!blocksize) { 1671 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize"); 1672 goto out_fail; 1673 } 1674 1675 /* 1676 * The ext3 superblock will not be buffer aligned for other than 1kB 1677 * block sizes. We need to calculate the offset from buffer start. 1678 */ 1679 if (blocksize != EXT3_MIN_BLOCK_SIZE) { 1680 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1681 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1682 } else { 1683 logic_sb_block = sb_block; 1684 } 1685 1686 if (!(bh = sb_bread(sb, logic_sb_block))) { 1687 ext3_msg(sb, KERN_ERR, "error: unable to read superblock"); 1688 goto out_fail; 1689 } 1690 /* 1691 * Note: s_es must be initialized as soon as possible because 1692 * some ext3 macro-instructions depend on its value 1693 */ 1694 es = (struct ext3_super_block *) (bh->b_data + offset); 1695 sbi->s_es = es; 1696 sb->s_magic = le16_to_cpu(es->s_magic); 1697 if (sb->s_magic != EXT3_SUPER_MAGIC) 1698 goto cantfind_ext3; 1699 1700 /* Set defaults before we parse the mount options */ 1701 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1702 if (def_mount_opts & EXT3_DEFM_DEBUG) 1703 set_opt(sbi->s_mount_opt, DEBUG); 1704 if (def_mount_opts & EXT3_DEFM_BSDGROUPS) 1705 set_opt(sbi->s_mount_opt, GRPID); 1706 if (def_mount_opts & EXT3_DEFM_UID16) 1707 set_opt(sbi->s_mount_opt, NO_UID32); 1708#ifdef CONFIG_EXT3_FS_XATTR 1709 if (def_mount_opts & EXT3_DEFM_XATTR_USER) 1710 set_opt(sbi->s_mount_opt, XATTR_USER); 1711#endif 1712#ifdef CONFIG_EXT3_FS_POSIX_ACL 1713 if (def_mount_opts & EXT3_DEFM_ACL) 1714 set_opt(sbi->s_mount_opt, POSIX_ACL); 1715#endif 1716 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) 1717 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1718 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) 1719 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1720 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) 1721 set_opt(sbi->s_mount_opt, WRITEBACK_DATA); 1722 1723 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) 1724 set_opt(sbi->s_mount_opt, ERRORS_PANIC); 1725 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE) 1726 set_opt(sbi->s_mount_opt, ERRORS_CONT); 1727 else 1728 set_opt(sbi->s_mount_opt, ERRORS_RO); 1729 1730 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1731 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1732 1733 /* enable barriers by default */ 1734 set_opt(sbi->s_mount_opt, BARRIER); 1735 set_opt(sbi->s_mount_opt, RESERVATION); 1736 1737 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1738 NULL, 0)) 1739 goto failed_mount; 1740 1741 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1742 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 1743 1744 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && 1745 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1746 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1747 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1748 ext3_msg(sb, KERN_WARNING, 1749 "warning: feature flags set on rev 0 fs, " 1750 "running e2fsck is recommended"); 1751 /* 1752 * Check feature flags regardless of the revision level, since we 1753 * previously didn't change the revision level when setting the flags, 1754 * so there is a chance incompat flags are set on a rev 0 filesystem. 1755 */ 1756 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); 1757 if (features) { 1758 ext3_msg(sb, KERN_ERR, 1759 "error: couldn't mount because of unsupported " 1760 "optional features (%x)", le32_to_cpu(features)); 1761 goto failed_mount; 1762 } 1763 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); 1764 if (!(sb->s_flags & MS_RDONLY) && features) { 1765 ext3_msg(sb, KERN_ERR, 1766 "error: couldn't mount RDWR because of unsupported " 1767 "optional features (%x)", le32_to_cpu(features)); 1768 goto failed_mount; 1769 } 1770 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1771 1772 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1773 blocksize > EXT3_MAX_BLOCK_SIZE) { 1774 ext3_msg(sb, KERN_ERR, 1775 "error: couldn't mount because of unsupported " 1776 "filesystem blocksize %d", blocksize); 1777 goto failed_mount; 1778 } 1779 1780 hblock = bdev_logical_block_size(sb->s_bdev); 1781 if (sb->s_blocksize != blocksize) { 1782 /* 1783 * Make sure the blocksize for the filesystem is larger 1784 * than the hardware sectorsize for the machine. 1785 */ 1786 if (blocksize < hblock) { 1787 ext3_msg(sb, KERN_ERR, 1788 "error: fsblocksize %d too small for " 1789 "hardware sectorsize %d", blocksize, hblock); 1790 goto failed_mount; 1791 } 1792 1793 brelse (bh); 1794 if (!sb_set_blocksize(sb, blocksize)) { 1795 ext3_msg(sb, KERN_ERR, 1796 "error: bad blocksize %d", blocksize); 1797 goto out_fail; 1798 } 1799 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1800 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1801 bh = sb_bread(sb, logic_sb_block); 1802 if (!bh) { 1803 ext3_msg(sb, KERN_ERR, 1804 "error: can't read superblock on 2nd try"); 1805 goto…
Large files files are truncated, but you can click here to view the full file