/fs/ext4/super.c
C | 5054 lines | 3929 code | 561 blank | 564 comment | 674 complexity | 1908ad9899d5e89a4757b72a72c2b11e MD5 | raw file
Possible License(s): LGPL-2.0, AGPL-1.0, GPL-2.0
Large files files are truncated, but you can click here to view the full file
1/* 2 * linux/fs/ext4/super.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * from 10 * 11 * linux/fs/minix/inode.c 12 * 13 * Copyright (C) 1991, 1992 Linus Torvalds 14 * 15 * Big-endian to little-endian byte-swapping/bitmaps by 16 * David S. Miller (davem@caip.rutgers.edu), 1995 17 */ 18 19#include <linux/module.h> 20#include <linux/string.h> 21#include <linux/fs.h> 22#include <linux/time.h> 23#include <linux/vmalloc.h> 24#include <linux/jbd2.h> 25#include <linux/slab.h> 26#include <linux/init.h> 27#include <linux/blkdev.h> 28#include <linux/parser.h> 29#include <linux/buffer_head.h> 30#include <linux/exportfs.h> 31#include <linux/vfs.h> 32#include <linux/random.h> 33#include <linux/mount.h> 34#include <linux/namei.h> 35#include <linux/quotaops.h> 36#include <linux/seq_file.h> 37#include <linux/proc_fs.h> 38#include <linux/ctype.h> 39#include <linux/log2.h> 40#include <linux/crc16.h> 41#include <linux/cleancache.h> 42#include <asm/uaccess.h> 43 44#include <linux/kthread.h> 45#include <linux/freezer.h> 46 47#include "ext4.h" 48#include "ext4_jbd2.h" 49#include "xattr.h" 50#include "acl.h" 51#include "mballoc.h" 52 53#define CREATE_TRACE_POINTS 54#include <trace/events/ext4.h> 55 56static struct proc_dir_entry *ext4_proc_root; 57static struct kset *ext4_kset; 58static struct ext4_lazy_init *ext4_li_info; 59static struct mutex ext4_li_mtx; 60static struct ext4_features *ext4_feat; 61 62static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 63 unsigned long journal_devnum); 64static int ext4_commit_super(struct super_block *sb, int sync); 65static void ext4_mark_recovery_complete(struct super_block *sb, 66 struct ext4_super_block *es); 67static void ext4_clear_journal_err(struct super_block *sb, 68 struct ext4_super_block *es); 69static int ext4_sync_fs(struct super_block *sb, int wait); 70static const char *ext4_decode_error(struct super_block *sb, int errno, 71 char nbuf[16]); 72static int ext4_remount(struct super_block *sb, int *flags, char *data); 73static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 74static int ext4_unfreeze(struct super_block *sb); 75static void ext4_write_super(struct super_block *sb); 76static int ext4_freeze(struct super_block *sb); 77static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, 78 const char *dev_name, void *data); 79static inline int ext2_feature_set_ok(struct super_block *sb); 80static inline int ext3_feature_set_ok(struct super_block *sb); 81static int ext4_feature_set_ok(struct super_block *sb, int readonly); 82static void ext4_destroy_lazyinit_thread(void); 83static void ext4_unregister_li_request(struct super_block *sb); 84static void ext4_clear_request_list(void); 85 86#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 87static struct file_system_type ext2_fs_type = { 88 .owner = THIS_MODULE, 89 .name = "ext2", 90 .mount = ext4_mount, 91 .kill_sb = kill_block_super, 92 .fs_flags = FS_REQUIRES_DEV, 93}; 94#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) 95#else 96#define IS_EXT2_SB(sb) (0) 97#endif 98 99 100#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 101static struct file_system_type ext3_fs_type = { 102 .owner = THIS_MODULE, 103 .name = "ext3", 104 .mount = ext4_mount, 105 .kill_sb = kill_block_super, 106 .fs_flags = FS_REQUIRES_DEV, 107}; 108#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 109#else 110#define IS_EXT3_SB(sb) (0) 111#endif 112 113void *ext4_kvmalloc(size_t size, gfp_t flags) 114{ 115 void *ret; 116 117 ret = kmalloc(size, flags); 118 if (!ret) 119 ret = __vmalloc(size, flags, PAGE_KERNEL); 120 return ret; 121} 122 123void *ext4_kvzalloc(size_t size, gfp_t flags) 124{ 125 void *ret; 126 127 ret = kzalloc(size, flags); 128 if (!ret) 129 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); 130 return ret; 131} 132 133void ext4_kvfree(void *ptr) 134{ 135 if (is_vmalloc_addr(ptr)) 136 vfree(ptr); 137 else 138 kfree(ptr); 139 140} 141 142ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 143 struct ext4_group_desc *bg) 144{ 145 return le32_to_cpu(bg->bg_block_bitmap_lo) | 146 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 147 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); 148} 149 150ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, 151 struct ext4_group_desc *bg) 152{ 153 return le32_to_cpu(bg->bg_inode_bitmap_lo) | 154 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 155 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); 156} 157 158ext4_fsblk_t ext4_inode_table(struct super_block *sb, 159 struct ext4_group_desc *bg) 160{ 161 return le32_to_cpu(bg->bg_inode_table_lo) | 162 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 163 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 164} 165 166__u32 ext4_free_blks_count(struct super_block *sb, 167 struct ext4_group_desc *bg) 168{ 169 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 170 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 171 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); 172} 173 174__u32 ext4_free_inodes_count(struct super_block *sb, 175 struct ext4_group_desc *bg) 176{ 177 return le16_to_cpu(bg->bg_free_inodes_count_lo) | 178 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 179 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); 180} 181 182__u32 ext4_used_dirs_count(struct super_block *sb, 183 struct ext4_group_desc *bg) 184{ 185 return le16_to_cpu(bg->bg_used_dirs_count_lo) | 186 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 187 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); 188} 189 190__u32 ext4_itable_unused_count(struct super_block *sb, 191 struct ext4_group_desc *bg) 192{ 193 return le16_to_cpu(bg->bg_itable_unused_lo) | 194 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 195 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); 196} 197 198void ext4_block_bitmap_set(struct super_block *sb, 199 struct ext4_group_desc *bg, ext4_fsblk_t blk) 200{ 201 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); 202 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 203 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); 204} 205 206void ext4_inode_bitmap_set(struct super_block *sb, 207 struct ext4_group_desc *bg, ext4_fsblk_t blk) 208{ 209 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); 210 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 211 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); 212} 213 214void ext4_inode_table_set(struct super_block *sb, 215 struct ext4_group_desc *bg, ext4_fsblk_t blk) 216{ 217 bg->bg_inode_table_lo = cpu_to_le32((u32)blk); 218 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 219 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 220} 221 222void ext4_free_blks_set(struct super_block *sb, 223 struct ext4_group_desc *bg, __u32 count) 224{ 225 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 226 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 227 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); 228} 229 230void ext4_free_inodes_set(struct super_block *sb, 231 struct ext4_group_desc *bg, __u32 count) 232{ 233 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); 234 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 235 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); 236} 237 238void ext4_used_dirs_set(struct super_block *sb, 239 struct ext4_group_desc *bg, __u32 count) 240{ 241 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); 242 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 243 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); 244} 245 246void ext4_itable_unused_set(struct super_block *sb, 247 struct ext4_group_desc *bg, __u32 count) 248{ 249 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); 250 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 251 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); 252} 253 254 255/* Just increment the non-pointer handle value */ 256static handle_t *ext4_get_nojournal(void) 257{ 258 handle_t *handle = current->journal_info; 259 unsigned long ref_cnt = (unsigned long)handle; 260 261 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); 262 263 ref_cnt++; 264 handle = (handle_t *)ref_cnt; 265 266 current->journal_info = handle; 267 return handle; 268} 269 270 271/* Decrement the non-pointer handle value */ 272static void ext4_put_nojournal(handle_t *handle) 273{ 274 unsigned long ref_cnt = (unsigned long)handle; 275 276 BUG_ON(ref_cnt == 0); 277 278 ref_cnt--; 279 handle = (handle_t *)ref_cnt; 280 281 current->journal_info = handle; 282} 283 284/* 285 * Wrappers for jbd2_journal_start/end. 286 * 287 * The only special thing we need to do here is to make sure that all 288 * journal_end calls result in the superblock being marked dirty, so 289 * that sync() will call the filesystem's write_super callback if 290 * appropriate. 291 * 292 * To avoid j_barrier hold in userspace when a user calls freeze(), 293 * ext4 prevents a new handle from being started by s_frozen, which 294 * is in an upper layer. 295 */ 296handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 297{ 298 journal_t *journal; 299 handle_t *handle; 300 301 trace_ext4_journal_start(sb, nblocks, _RET_IP_); 302 if (sb->s_flags & MS_RDONLY) 303 return ERR_PTR(-EROFS); 304 305 journal = EXT4_SB(sb)->s_journal; 306 handle = ext4_journal_current_handle(); 307 308 /* 309 * If a handle has been started, it should be allowed to 310 * finish, otherwise deadlock could happen between freeze 311 * and others(e.g. truncate) due to the restart of the 312 * journal handle if the filesystem is forzen and active 313 * handles are not stopped. 314 */ 315 if (!handle) 316 vfs_check_frozen(sb, SB_FREEZE_TRANS); 317 318 if (!journal) 319 return ext4_get_nojournal(); 320 /* 321 * Special case here: if the journal has aborted behind our 322 * backs (eg. EIO in the commit thread), then we still need to 323 * take the FS itself readonly cleanly. 324 */ 325 if (is_journal_aborted(journal)) { 326 ext4_abort(sb, "Detected aborted journal"); 327 return ERR_PTR(-EROFS); 328 } 329 return jbd2_journal_start(journal, nblocks); 330} 331 332/* 333 * The only special thing we need to do here is to make sure that all 334 * jbd2_journal_stop calls result in the superblock being marked dirty, so 335 * that sync() will call the filesystem's write_super callback if 336 * appropriate. 337 */ 338int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) 339{ 340 struct super_block *sb; 341 int err; 342 int rc; 343 344 if (!ext4_handle_valid(handle)) { 345 ext4_put_nojournal(handle); 346 return 0; 347 } 348 sb = handle->h_transaction->t_journal->j_private; 349 err = handle->h_err; 350 rc = jbd2_journal_stop(handle); 351 352 if (!err) 353 err = rc; 354 if (err) 355 __ext4_std_error(sb, where, line, err); 356 return err; 357} 358 359void ext4_journal_abort_handle(const char *caller, unsigned int line, 360 const char *err_fn, struct buffer_head *bh, 361 handle_t *handle, int err) 362{ 363 char nbuf[16]; 364 const char *errstr = ext4_decode_error(NULL, err, nbuf); 365 366 BUG_ON(!ext4_handle_valid(handle)); 367 368 if (bh) 369 BUFFER_TRACE(bh, "abort"); 370 371 if (!handle->h_err) 372 handle->h_err = err; 373 374 if (is_handle_aborted(handle)) 375 return; 376 377 printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", 378 caller, line, errstr, err_fn); 379 380 jbd2_journal_abort_handle(handle); 381} 382 383static void __save_error_info(struct super_block *sb, const char *func, 384 unsigned int line) 385{ 386 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 387 388 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 389 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 390 es->s_last_error_time = cpu_to_le32(get_seconds()); 391 strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); 392 es->s_last_error_line = cpu_to_le32(line); 393 if (!es->s_first_error_time) { 394 es->s_first_error_time = es->s_last_error_time; 395 strncpy(es->s_first_error_func, func, 396 sizeof(es->s_first_error_func)); 397 es->s_first_error_line = cpu_to_le32(line); 398 es->s_first_error_ino = es->s_last_error_ino; 399 es->s_first_error_block = es->s_last_error_block; 400 } 401 /* 402 * Start the daily error reporting function if it hasn't been 403 * started already 404 */ 405 if (!es->s_error_count) 406 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); 407 es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); 408} 409 410static void save_error_info(struct super_block *sb, const char *func, 411 unsigned int line) 412{ 413 __save_error_info(sb, func, line); 414 ext4_commit_super(sb, 1); 415} 416 417 418/* Deal with the reporting of failure conditions on a filesystem such as 419 * inconsistencies detected or read IO failures. 420 * 421 * On ext2, we can store the error state of the filesystem in the 422 * superblock. That is not possible on ext4, because we may have other 423 * write ordering constraints on the superblock which prevent us from 424 * writing it out straight away; and given that the journal is about to 425 * be aborted, we can't rely on the current, or future, transactions to 426 * write out the superblock safely. 427 * 428 * We'll just use the jbd2_journal_abort() error code to record an error in 429 * the journal instead. On recovery, the journal will complain about 430 * that error until we've noted it down and cleared it. 431 */ 432 433static void ext4_handle_error(struct super_block *sb) 434{ 435 if (sb->s_flags & MS_RDONLY) 436 return; 437 438 if (!test_opt(sb, ERRORS_CONT)) { 439 journal_t *journal = EXT4_SB(sb)->s_journal; 440 441 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 442 if (journal) 443 jbd2_journal_abort(journal, -EIO); 444 } 445 if (test_opt(sb, ERRORS_RO)) { 446 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 447 sb->s_flags |= MS_RDONLY; 448 } 449 if (test_opt(sb, ERRORS_PANIC)) 450 panic("EXT4-fs (device %s): panic forced after error\n", 451 sb->s_id); 452} 453 454void __ext4_error(struct super_block *sb, const char *function, 455 unsigned int line, const char *fmt, ...) 456{ 457 struct va_format vaf; 458 va_list args; 459 460 va_start(args, fmt); 461 vaf.fmt = fmt; 462 vaf.va = &args; 463 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", 464 sb->s_id, function, line, current->comm, &vaf); 465 va_end(args); 466 save_error_info(sb, function, line); 467 468 ext4_handle_error(sb); 469} 470 471void ext4_error_inode(struct inode *inode, const char *function, 472 unsigned int line, ext4_fsblk_t block, 473 const char *fmt, ...) 474{ 475 va_list args; 476 struct va_format vaf; 477 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 478 479 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 480 es->s_last_error_block = cpu_to_le64(block); 481 save_error_info(inode->i_sb, function, line); 482 va_start(args, fmt); 483 vaf.fmt = fmt; 484 vaf.va = &args; 485 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 486 inode->i_sb->s_id, function, line, inode->i_ino); 487 if (block) 488 printk(KERN_CONT "block %llu: ", block); 489 printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf); 490 va_end(args); 491 492 ext4_handle_error(inode->i_sb); 493} 494 495void ext4_error_file(struct file *file, const char *function, 496 unsigned int line, ext4_fsblk_t block, 497 const char *fmt, ...) 498{ 499 va_list args; 500 struct va_format vaf; 501 struct ext4_super_block *es; 502 struct inode *inode = file->f_dentry->d_inode; 503 char pathname[80], *path; 504 505 es = EXT4_SB(inode->i_sb)->s_es; 506 es->s_last_error_ino = cpu_to_le32(inode->i_ino); 507 save_error_info(inode->i_sb, function, line); 508 path = d_path(&(file->f_path), pathname, sizeof(pathname)); 509 if (IS_ERR(path)) 510 path = "(unknown)"; 511 printk(KERN_CRIT 512 "EXT4-fs error (device %s): %s:%d: inode #%lu: ", 513 inode->i_sb->s_id, function, line, inode->i_ino); 514 if (block) 515 printk(KERN_CONT "block %llu: ", block); 516 va_start(args, fmt); 517 vaf.fmt = fmt; 518 vaf.va = &args; 519 printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf); 520 va_end(args); 521 522 ext4_handle_error(inode->i_sb); 523} 524 525static const char *ext4_decode_error(struct super_block *sb, int errno, 526 char nbuf[16]) 527{ 528 char *errstr = NULL; 529 530 switch (errno) { 531 case -EIO: 532 errstr = "IO failure"; 533 break; 534 case -ENOMEM: 535 errstr = "Out of memory"; 536 break; 537 case -EROFS: 538 if (!sb || (EXT4_SB(sb)->s_journal && 539 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) 540 errstr = "Journal has aborted"; 541 else 542 errstr = "Readonly filesystem"; 543 break; 544 default: 545 /* If the caller passed in an extra buffer for unknown 546 * errors, textualise them now. Else we just return 547 * NULL. */ 548 if (nbuf) { 549 /* Check for truncated error codes... */ 550 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 551 errstr = nbuf; 552 } 553 break; 554 } 555 556 return errstr; 557} 558 559/* __ext4_std_error decodes expected errors from journaling functions 560 * automatically and invokes the appropriate error response. */ 561 562void __ext4_std_error(struct super_block *sb, const char *function, 563 unsigned int line, int errno) 564{ 565 char nbuf[16]; 566 const char *errstr; 567 568 /* Special case: if the error is EROFS, and we're not already 569 * inside a transaction, then there's really no point in logging 570 * an error. */ 571 if (errno == -EROFS && journal_current_handle() == NULL && 572 (sb->s_flags & MS_RDONLY)) 573 return; 574 575 errstr = ext4_decode_error(sb, errno, nbuf); 576 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", 577 sb->s_id, function, line, errstr); 578 save_error_info(sb, function, line); 579 580 ext4_handle_error(sb); 581} 582 583/* 584 * ext4_abort is a much stronger failure handler than ext4_error. The 585 * abort function may be used to deal with unrecoverable failures such 586 * as journal IO errors or ENOMEM at a critical moment in log management. 587 * 588 * We unconditionally force the filesystem into an ABORT|READONLY state, 589 * unless the error response on the fs has been set to panic in which 590 * case we take the easy way out and panic immediately. 591 */ 592 593void __ext4_abort(struct super_block *sb, const char *function, 594 unsigned int line, const char *fmt, ...) 595{ 596 va_list args; 597 598 save_error_info(sb, function, line); 599 va_start(args, fmt); 600 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, 601 function, line); 602 vprintk(fmt, args); 603 printk("\n"); 604 va_end(args); 605 606 if ((sb->s_flags & MS_RDONLY) == 0) { 607 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); 608 sb->s_flags |= MS_RDONLY; 609 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; 610 if (EXT4_SB(sb)->s_journal) 611 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 612 save_error_info(sb, function, line); 613 } 614 if (test_opt(sb, ERRORS_PANIC)) 615 panic("EXT4-fs panic from previous error\n"); 616} 617 618void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) 619{ 620 struct va_format vaf; 621 va_list args; 622 623 va_start(args, fmt); 624 vaf.fmt = fmt; 625 vaf.va = &args; 626 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); 627 va_end(args); 628} 629 630void __ext4_warning(struct super_block *sb, const char *function, 631 unsigned int line, const char *fmt, ...) 632{ 633 struct va_format vaf; 634 va_list args; 635 636 va_start(args, fmt); 637 vaf.fmt = fmt; 638 vaf.va = &args; 639 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", 640 sb->s_id, function, line, &vaf); 641 va_end(args); 642} 643 644void __ext4_grp_locked_error(const char *function, unsigned int line, 645 struct super_block *sb, ext4_group_t grp, 646 unsigned long ino, ext4_fsblk_t block, 647 const char *fmt, ...) 648__releases(bitlock) 649__acquires(bitlock) 650{ 651 struct va_format vaf; 652 va_list args; 653 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 654 655 es->s_last_error_ino = cpu_to_le32(ino); 656 es->s_last_error_block = cpu_to_le64(block); 657 __save_error_info(sb, function, line); 658 659 va_start(args, fmt); 660 661 vaf.fmt = fmt; 662 vaf.va = &args; 663 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", 664 sb->s_id, function, line, grp); 665 if (ino) 666 printk(KERN_CONT "inode %lu: ", ino); 667 if (block) 668 printk(KERN_CONT "block %llu:", (unsigned long long) block); 669 printk(KERN_CONT "%pV\n", &vaf); 670 va_end(args); 671 672 if (test_opt(sb, ERRORS_CONT)) { 673 ext4_commit_super(sb, 0); 674 return; 675 } 676 677 ext4_unlock_group(sb, grp); 678 ext4_handle_error(sb); 679 /* 680 * We only get here in the ERRORS_RO case; relocking the group 681 * may be dangerous, but nothing bad will happen since the 682 * filesystem will have already been marked read/only and the 683 * journal has been aborted. We return 1 as a hint to callers 684 * who might what to use the return value from 685 * ext4_grp_locked_error() to distinguish between the 686 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 687 * aggressively from the ext4 function in question, with a 688 * more appropriate error code. 689 */ 690 ext4_lock_group(sb, grp); 691 return; 692} 693 694void ext4_update_dynamic_rev(struct super_block *sb) 695{ 696 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 697 698 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 699 return; 700 701 ext4_warning(sb, 702 "updating to rev %d because of new feature flag, " 703 "running e2fsck is recommended", 704 EXT4_DYNAMIC_REV); 705 706 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); 707 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); 708 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); 709 /* leave es->s_feature_*compat flags alone */ 710 /* es->s_uuid will be set by e2fsck if empty */ 711 712 /* 713 * The rest of the superblock fields should be zero, and if not it 714 * means they are likely already in use, so leave them alone. We 715 * can leave it up to e2fsck to clean up any inconsistencies there. 716 */ 717} 718 719/* 720 * Open the external journal device 721 */ 722static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) 723{ 724 struct block_device *bdev; 725 char b[BDEVNAME_SIZE]; 726 727 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); 728 if (IS_ERR(bdev)) 729 goto fail; 730 return bdev; 731 732fail: 733 ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", 734 __bdevname(dev, b), PTR_ERR(bdev)); 735 return NULL; 736} 737 738/* 739 * Release the journal device 740 */ 741static int ext4_blkdev_put(struct block_device *bdev) 742{ 743 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 744} 745 746static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 747{ 748 struct block_device *bdev; 749 int ret = -ENODEV; 750 751 bdev = sbi->journal_bdev; 752 if (bdev) { 753 ret = ext4_blkdev_put(bdev); 754 sbi->journal_bdev = NULL; 755 } 756 return ret; 757} 758 759static inline struct inode *orphan_list_entry(struct list_head *l) 760{ 761 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; 762} 763 764static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) 765{ 766 struct list_head *l; 767 768 ext4_msg(sb, KERN_ERR, "sb orphan head is %d", 769 le32_to_cpu(sbi->s_es->s_last_orphan)); 770 771 printk(KERN_ERR "sb_info orphan list:\n"); 772 list_for_each(l, &sbi->s_orphan) { 773 struct inode *inode = orphan_list_entry(l); 774 printk(KERN_ERR " " 775 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 776 inode->i_sb->s_id, inode->i_ino, inode, 777 inode->i_mode, inode->i_nlink, 778 NEXT_ORPHAN(inode)); 779 } 780} 781 782static void ext4_put_super(struct super_block *sb) 783{ 784 struct ext4_sb_info *sbi = EXT4_SB(sb); 785 struct ext4_super_block *es = sbi->s_es; 786 int i, err; 787 788 ext4_unregister_li_request(sb); 789 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 790 791 flush_workqueue(sbi->dio_unwritten_wq); 792 destroy_workqueue(sbi->dio_unwritten_wq); 793 794 lock_super(sb); 795 if (sb->s_dirt) 796 ext4_commit_super(sb, 1); 797 798 if (sbi->s_journal) { 799 err = jbd2_journal_destroy(sbi->s_journal); 800 sbi->s_journal = NULL; 801 if (err < 0) 802 ext4_abort(sb, "Couldn't clean up the journal"); 803 } 804 805 del_timer(&sbi->s_err_report); 806 ext4_release_system_zone(sb); 807 ext4_mb_release(sb); 808 ext4_ext_release(sb); 809 ext4_xattr_put_super(sb); 810 811 if (!(sb->s_flags & MS_RDONLY)) { 812 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 813 es->s_state = cpu_to_le16(sbi->s_mount_state); 814 ext4_commit_super(sb, 1); 815 } 816 if (sbi->s_proc) { 817 remove_proc_entry(sb->s_id, ext4_proc_root); 818 } 819 kobject_del(&sbi->s_kobj); 820 821 for (i = 0; i < sbi->s_gdb_count; i++) 822 brelse(sbi->s_group_desc[i]); 823 ext4_kvfree(sbi->s_group_desc); 824 ext4_kvfree(sbi->s_flex_groups); 825 percpu_counter_destroy(&sbi->s_freeblocks_counter); 826 percpu_counter_destroy(&sbi->s_freeinodes_counter); 827 percpu_counter_destroy(&sbi->s_dirs_counter); 828 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 829 brelse(sbi->s_sbh); 830#ifdef CONFIG_QUOTA 831 for (i = 0; i < MAXQUOTAS; i++) 832 kfree(sbi->s_qf_names[i]); 833#endif 834 835 /* Debugging code just in case the in-memory inode orphan list 836 * isn't empty. The on-disk one can be non-empty if we've 837 * detected an error and taken the fs readonly, but the 838 * in-memory list had better be clean by this point. */ 839 if (!list_empty(&sbi->s_orphan)) 840 dump_orphan_list(sb, sbi); 841 J_ASSERT(list_empty(&sbi->s_orphan)); 842 843 invalidate_bdev(sb->s_bdev); 844 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { 845 /* 846 * Invalidate the journal device's buffers. We don't want them 847 * floating about in memory - the physical journal device may 848 * hotswapped, and it breaks the `ro-after' testing code. 849 */ 850 sync_blockdev(sbi->journal_bdev); 851 invalidate_bdev(sbi->journal_bdev); 852 ext4_blkdev_remove(sbi); 853 } 854 if (sbi->s_mmp_tsk) 855 kthread_stop(sbi->s_mmp_tsk); 856 sb->s_fs_info = NULL; 857 /* 858 * Now that we are completely done shutting down the 859 * superblock, we need to actually destroy the kobject. 860 */ 861 unlock_super(sb); 862 kobject_put(&sbi->s_kobj); 863 wait_for_completion(&sbi->s_kobj_unregister); 864 kfree(sbi->s_blockgroup_lock); 865 kfree(sbi); 866} 867 868static struct kmem_cache *ext4_inode_cachep; 869 870/* 871 * Called inside transaction, so use GFP_NOFS 872 */ 873static struct inode *ext4_alloc_inode(struct super_block *sb) 874{ 875 struct ext4_inode_info *ei; 876 877 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); 878 if (!ei) 879 return NULL; 880 881 ei->vfs_inode.i_version = 1; 882 ei->vfs_inode.i_data.writeback_index = 0; 883 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 884 INIT_LIST_HEAD(&ei->i_prealloc_list); 885 spin_lock_init(&ei->i_prealloc_lock); 886 ei->i_reserved_data_blocks = 0; 887 ei->i_reserved_meta_blocks = 0; 888 ei->i_allocated_meta_blocks = 0; 889 ei->i_da_metadata_calc_len = 0; 890 spin_lock_init(&(ei->i_block_reservation_lock)); 891#ifdef CONFIG_QUOTA 892 ei->i_reserved_quota = 0; 893#endif 894 ei->jinode = NULL; 895 INIT_LIST_HEAD(&ei->i_completed_io_list); 896 spin_lock_init(&ei->i_completed_io_lock); 897 ei->cur_aio_dio = NULL; 898 ei->i_sync_tid = 0; 899 ei->i_datasync_tid = 0; 900 atomic_set(&ei->i_ioend_count, 0); 901 atomic_set(&ei->i_aiodio_unwritten, 0); 902 903 return &ei->vfs_inode; 904} 905 906static int ext4_drop_inode(struct inode *inode) 907{ 908 int drop = generic_drop_inode(inode); 909 910 trace_ext4_drop_inode(inode, drop); 911 return drop; 912} 913 914static void ext4_i_callback(struct rcu_head *head) 915{ 916 struct inode *inode = container_of(head, struct inode, i_rcu); 917 INIT_LIST_HEAD(&inode->i_dentry); 918 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 919} 920 921static void ext4_destroy_inode(struct inode *inode) 922{ 923 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 924 ext4_msg(inode->i_sb, KERN_ERR, 925 "Inode %lu (%p): orphan list check failed!", 926 inode->i_ino, EXT4_I(inode)); 927 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, 928 EXT4_I(inode), sizeof(struct ext4_inode_info), 929 true); 930 dump_stack(); 931 } 932 call_rcu(&inode->i_rcu, ext4_i_callback); 933} 934 935static void init_once(void *foo) 936{ 937 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 938 939 INIT_LIST_HEAD(&ei->i_orphan); 940#ifdef CONFIG_EXT4_FS_XATTR 941 init_rwsem(&ei->xattr_sem); 942#endif 943 init_rwsem(&ei->i_data_sem); 944 inode_init_once(&ei->vfs_inode); 945} 946 947static int init_inodecache(void) 948{ 949 ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", 950 sizeof(struct ext4_inode_info), 951 0, (SLAB_RECLAIM_ACCOUNT| 952 SLAB_MEM_SPREAD), 953 init_once); 954 if (ext4_inode_cachep == NULL) 955 return -ENOMEM; 956 return 0; 957} 958 959static void destroy_inodecache(void) 960{ 961 kmem_cache_destroy(ext4_inode_cachep); 962} 963 964void ext4_clear_inode(struct inode *inode) 965{ 966 invalidate_inode_buffers(inode); 967 end_writeback(inode); 968 dquot_drop(inode); 969 ext4_discard_preallocations(inode); 970 if (EXT4_I(inode)->jinode) { 971 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 972 EXT4_I(inode)->jinode); 973 jbd2_free_inode(EXT4_I(inode)->jinode); 974 EXT4_I(inode)->jinode = NULL; 975 } 976} 977 978static inline void ext4_show_quota_options(struct seq_file *seq, 979 struct super_block *sb) 980{ 981#if defined(CONFIG_QUOTA) 982 struct ext4_sb_info *sbi = EXT4_SB(sb); 983 984 if (sbi->s_jquota_fmt) { 985 char *fmtname = ""; 986 987 switch (sbi->s_jquota_fmt) { 988 case QFMT_VFS_OLD: 989 fmtname = "vfsold"; 990 break; 991 case QFMT_VFS_V0: 992 fmtname = "vfsv0"; 993 break; 994 case QFMT_VFS_V1: 995 fmtname = "vfsv1"; 996 break; 997 } 998 seq_printf(seq, ",jqfmt=%s", fmtname); 999 } 1000 1001 if (sbi->s_qf_names[USRQUOTA]) 1002 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 1003 1004 if (sbi->s_qf_names[GRPQUOTA]) 1005 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1006 1007 if (test_opt(sb, USRQUOTA)) 1008 seq_puts(seq, ",usrquota"); 1009 1010 if (test_opt(sb, GRPQUOTA)) 1011 seq_puts(seq, ",grpquota"); 1012#endif 1013} 1014 1015/* 1016 * Show an option if 1017 * - it's set to a non-default value OR 1018 * - if the per-sb default is different from the global default 1019 */ 1020static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 1021{ 1022 int def_errors; 1023 unsigned long def_mount_opts; 1024 struct super_block *sb = vfs->mnt_sb; 1025 struct ext4_sb_info *sbi = EXT4_SB(sb); 1026 struct ext4_super_block *es = sbi->s_es; 1027 1028 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 1029 def_errors = le16_to_cpu(es->s_errors); 1030 1031 if (sbi->s_sb_block != 1) 1032 seq_printf(seq, ",sb=%llu", sbi->s_sb_block); 1033 if (test_opt(sb, MINIX_DF)) 1034 seq_puts(seq, ",minixdf"); 1035 if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) 1036 seq_puts(seq, ",grpid"); 1037 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) 1038 seq_puts(seq, ",nogrpid"); 1039 if (sbi->s_resuid != EXT4_DEF_RESUID || 1040 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { 1041 seq_printf(seq, ",resuid=%u", sbi->s_resuid); 1042 } 1043 if (sbi->s_resgid != EXT4_DEF_RESGID || 1044 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { 1045 seq_printf(seq, ",resgid=%u", sbi->s_resgid); 1046 } 1047 if (test_opt(sb, ERRORS_RO)) { 1048 if (def_errors == EXT4_ERRORS_PANIC || 1049 def_errors == EXT4_ERRORS_CONTINUE) { 1050 seq_puts(seq, ",errors=remount-ro"); 1051 } 1052 } 1053 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) 1054 seq_puts(seq, ",errors=continue"); 1055 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) 1056 seq_puts(seq, ",errors=panic"); 1057 if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) 1058 seq_puts(seq, ",nouid32"); 1059 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 1060 seq_puts(seq, ",debug"); 1061 if (test_opt(sb, OLDALLOC)) 1062 seq_puts(seq, ",oldalloc"); 1063#ifdef CONFIG_EXT4_FS_XATTR 1064 if (test_opt(sb, XATTR_USER)) 1065 seq_puts(seq, ",user_xattr"); 1066 if (!test_opt(sb, XATTR_USER)) 1067 seq_puts(seq, ",nouser_xattr"); 1068#endif 1069#ifdef CONFIG_EXT4_FS_POSIX_ACL 1070 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) 1071 seq_puts(seq, ",acl"); 1072 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 1073 seq_puts(seq, ",noacl"); 1074#endif 1075 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 1076 seq_printf(seq, ",commit=%u", 1077 (unsigned) (sbi->s_commit_interval / HZ)); 1078 } 1079 if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { 1080 seq_printf(seq, ",min_batch_time=%u", 1081 (unsigned) sbi->s_min_batch_time); 1082 } 1083 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1084 seq_printf(seq, ",max_batch_time=%u", 1085 (unsigned) sbi->s_min_batch_time); 1086 } 1087 1088 /* 1089 * We're changing the default of barrier mount option, so 1090 * let's always display its mount state so it's clear what its 1091 * status is. 1092 */ 1093 seq_puts(seq, ",barrier="); 1094 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); 1095 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 1096 seq_puts(seq, ",journal_async_commit"); 1097 else if (test_opt(sb, JOURNAL_CHECKSUM)) 1098 seq_puts(seq, ",journal_checksum"); 1099 if (test_opt(sb, I_VERSION)) 1100 seq_puts(seq, ",i_version"); 1101 if (!test_opt(sb, DELALLOC) && 1102 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1103 seq_puts(seq, ",nodelalloc"); 1104 1105 if (!test_opt(sb, MBLK_IO_SUBMIT)) 1106 seq_puts(seq, ",nomblk_io_submit"); 1107 if (sbi->s_stripe) 1108 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1109 /* 1110 * journal mode get enabled in different ways 1111 * So just print the value even if we didn't specify it 1112 */ 1113 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 1114 seq_puts(seq, ",data=journal"); 1115 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) 1116 seq_puts(seq, ",data=ordered"); 1117 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 1118 seq_puts(seq, ",data=writeback"); 1119 1120 if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) 1121 seq_printf(seq, ",inode_readahead_blks=%u", 1122 sbi->s_inode_readahead_blks); 1123 1124 if (test_opt(sb, DATA_ERR_ABORT)) 1125 seq_puts(seq, ",data_err=abort"); 1126 1127 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 1128 seq_puts(seq, ",noauto_da_alloc"); 1129 1130 if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) 1131 seq_puts(seq, ",discard"); 1132 1133 if (test_opt(sb, NOLOAD)) 1134 seq_puts(seq, ",norecovery"); 1135 1136 if (test_opt(sb, DIOREAD_NOLOCK)) 1137 seq_puts(seq, ",dioread_nolock"); 1138 1139 if (test_opt(sb, BLOCK_VALIDITY) && 1140 !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) 1141 seq_puts(seq, ",block_validity"); 1142 1143 if (!test_opt(sb, INIT_INODE_TABLE)) 1144 seq_puts(seq, ",noinit_itable"); 1145 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) 1146 seq_printf(seq, ",init_itable=%u", 1147 (unsigned) sbi->s_li_wait_mult); 1148 1149 ext4_show_quota_options(seq, sb); 1150 1151 return 0; 1152} 1153 1154static struct inode *ext4_nfs_get_inode(struct super_block *sb, 1155 u64 ino, u32 generation) 1156{ 1157 struct inode *inode; 1158 1159 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) 1160 return ERR_PTR(-ESTALE); 1161 if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 1162 return ERR_PTR(-ESTALE); 1163 1164 /* iget isn't really right if the inode is currently unallocated!! 1165 * 1166 * ext4_read_inode will return a bad_inode if the inode had been 1167 * deleted, so we should be safe. 1168 * 1169 * Currently we don't know the generation for parent directory, so 1170 * a generation of 0 means "accept any" 1171 */ 1172 inode = ext4_iget(sb, ino); 1173 if (IS_ERR(inode)) 1174 return ERR_CAST(inode); 1175 if (generation && inode->i_generation != generation) { 1176 iput(inode); 1177 return ERR_PTR(-ESTALE); 1178 } 1179 1180 return inode; 1181} 1182 1183static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, 1184 int fh_len, int fh_type) 1185{ 1186 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 1187 ext4_nfs_get_inode); 1188} 1189 1190static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, 1191 int fh_len, int fh_type) 1192{ 1193 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 1194 ext4_nfs_get_inode); 1195} 1196 1197/* 1198 * Try to release metadata pages (indirect blocks, directories) which are 1199 * mapped via the block device. Since these pages could have journal heads 1200 * which would prevent try_to_free_buffers() from freeing them, we must use 1201 * jbd2 layer's try_to_free_buffers() function to release them. 1202 */ 1203static int bdev_try_to_free_page(struct super_block *sb, struct page *page, 1204 gfp_t wait) 1205{ 1206 journal_t *journal = EXT4_SB(sb)->s_journal; 1207 1208 WARN_ON(PageChecked(page)); 1209 if (!page_has_buffers(page)) 1210 return 0; 1211 if (journal) 1212 return jbd2_journal_try_to_free_buffers(journal, page, 1213 wait & ~__GFP_WAIT); 1214 return try_to_free_buffers(page); 1215} 1216 1217#ifdef CONFIG_QUOTA 1218#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") 1219#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 1220 1221static int ext4_write_dquot(struct dquot *dquot); 1222static int ext4_acquire_dquot(struct dquot *dquot); 1223static int ext4_release_dquot(struct dquot *dquot); 1224static int ext4_mark_dquot_dirty(struct dquot *dquot); 1225static int ext4_write_info(struct super_block *sb, int type); 1226static int ext4_quota_on(struct super_block *sb, int type, int format_id, 1227 struct path *path); 1228static int ext4_quota_off(struct super_block *sb, int type); 1229static int ext4_quota_on_mount(struct super_block *sb, int type); 1230static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 1231 size_t len, loff_t off); 1232static ssize_t ext4_quota_write(struct super_block *sb, int type, 1233 const char *data, size_t len, loff_t off); 1234 1235static const struct dquot_operations ext4_quota_operations = { 1236 .get_reserved_space = ext4_get_reserved_space, 1237 .write_dquot = ext4_write_dquot, 1238 .acquire_dquot = ext4_acquire_dquot, 1239 .release_dquot = ext4_release_dquot, 1240 .mark_dirty = ext4_mark_dquot_dirty, 1241 .write_info = ext4_write_info, 1242 .alloc_dquot = dquot_alloc, 1243 .destroy_dquot = dquot_destroy, 1244}; 1245 1246static const struct quotactl_ops ext4_qctl_operations = { 1247 .quota_on = ext4_quota_on, 1248 .quota_off = ext4_quota_off, 1249 .quota_sync = dquot_quota_sync, 1250 .get_info = dquot_get_dqinfo, 1251 .set_info = dquot_set_dqinfo, 1252 .get_dqblk = dquot_get_dqblk, 1253 .set_dqblk = dquot_set_dqblk 1254}; 1255#endif 1256 1257static const struct super_operations ext4_sops = { 1258 .alloc_inode = ext4_alloc_inode, 1259 .destroy_inode = ext4_destroy_inode, 1260 .write_inode = ext4_write_inode, 1261 .dirty_inode = ext4_dirty_inode, 1262 .drop_inode = ext4_drop_inode, 1263 .evict_inode = ext4_evict_inode, 1264 .put_super = ext4_put_super, 1265 .sync_fs = ext4_sync_fs, 1266 .freeze_fs = ext4_freeze, 1267 .unfreeze_fs = ext4_unfreeze, 1268 .statfs = ext4_statfs, 1269 .remount_fs = ext4_remount, 1270 .show_options = ext4_show_options, 1271#ifdef CONFIG_QUOTA 1272 .quota_read = ext4_quota_read, 1273 .quota_write = ext4_quota_write, 1274#endif 1275 .bdev_try_to_free_page = bdev_try_to_free_page, 1276}; 1277 1278static const struct super_operations ext4_nojournal_sops = { 1279 .alloc_inode = ext4_alloc_inode, 1280 .destroy_inode = ext4_destroy_inode, 1281 .write_inode = ext4_write_inode, 1282 .dirty_inode = ext4_dirty_inode, 1283 .drop_inode = ext4_drop_inode, 1284 .evict_inode = ext4_evict_inode, 1285 .write_super = ext4_write_super, 1286 .put_super = ext4_put_super, 1287 .statfs = ext4_statfs, 1288 .remount_fs = ext4_remount, 1289 .show_options = ext4_show_options, 1290#ifdef CONFIG_QUOTA 1291 .quota_read = ext4_quota_read, 1292 .quota_write = ext4_quota_write, 1293#endif 1294 .bdev_try_to_free_page = bdev_try_to_free_page, 1295}; 1296 1297static const struct export_operations ext4_export_ops = { 1298 .fh_to_dentry = ext4_fh_to_dentry, 1299 .fh_to_parent = ext4_fh_to_parent, 1300 .get_parent = ext4_get_parent, 1301}; 1302 1303enum { 1304 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 1305 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1306 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1307 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1308 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, 1309 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1310 Opt_journal_update, Opt_journal_dev, 1311 Opt_journal_checksum, Opt_journal_async_commit, 1312 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1313 Opt_data_err_abort, Opt_data_err_ignore, 1314 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1315 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1316 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1317 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1318 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, 1319 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1320 Opt_inode_readahead_blks, Opt_journal_ioprio, 1321 Opt_dioread_nolock, Opt_dioread_lock, 1322 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, 1323}; 1324 1325static const match_table_t tokens = { 1326 {Opt_bsd_df, "bsddf"}, 1327 {Opt_minix_df, "minixdf"}, 1328 {Opt_grpid, "grpid"}, 1329 {Opt_grpid, "bsdgroups"}, 1330 {Opt_nogrpid, "nogrpid"}, 1331 {Opt_nogrpid, "sysvgroups"}, 1332 {Opt_resgid, "resgid=%u"}, 1333 {Opt_resuid, "resuid=%u"}, 1334 {Opt_sb, "sb=%u"}, 1335 {Opt_err_cont, "errors=continue"}, 1336 {Opt_err_panic, "errors=panic"}, 1337 {Opt_err_ro, "errors=remount-ro"}, 1338 {Opt_nouid32, "nouid32"}, 1339 {Opt_debug, "debug"}, 1340 {Opt_oldalloc, "oldalloc"}, 1341 {Opt_orlov, "orlov"}, 1342 {Opt_user_xattr, "user_xattr"}, 1343 {Opt_nouser_xattr, "nouser_xattr"}, 1344 {Opt_acl, "acl"}, 1345 {Opt_noacl, "noacl"}, 1346 {Opt_noload, "noload"}, 1347 {Opt_noload, "norecovery"}, 1348 {Opt_nobh, "nobh"}, 1349 {Opt_bh, "bh"}, 1350 {Opt_commit, "commit=%u"}, 1351 {Opt_min_batch_time, "min_batch_time=%u"}, 1352 {Opt_max_batch_time, "max_batch_time=%u"}, 1353 {Opt_journal_update, "journal=update"}, 1354 {Opt_journal_dev, "journal_dev=%u"}, 1355 {Opt_journal_checksum, "journal_checksum"}, 1356 {Opt_journal_async_commit, "journal_async_commit"}, 1357 {Opt_abort, "abort"}, 1358 {Opt_data_journal, "data=journal"}, 1359 {Opt_data_ordered, "data=ordered"}, 1360 {Opt_data_writeback, "data=writeback"}, 1361 {Opt_data_err_abort, "data_err=abort"}, 1362 {Opt_data_err_ignore, "data_err=ignore"}, 1363 {Opt_offusrjquota, "usrjquota="}, 1364 {Opt_usrjquota, "usrjquota=%s"}, 1365 {Opt_offgrpjquota, "grpjquota="}, 1366 {Opt_grpjquota, "grpjquota=%s"}, 1367 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1368 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1369 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, 1370 {Opt_grpquota, "grpquota"}, 1371 {Opt_noquota, "noquota"}, 1372 {Opt_quota, "quota"}, 1373 {Opt_usrquota, "usrquota"}, 1374 {Opt_barrier, "barrier=%u"}, 1375 {Opt_barrier, "barrier"}, 1376 {Opt_nobarrier, "nobarrier"}, 1377 {Opt_i_version, "i_version"}, 1378 {Opt_stripe, "stripe=%u"}, 1379 {Opt_resize, "resize"}, 1380 {Opt_delalloc, "delalloc"}, 1381 {Opt_nodelalloc, "nodelalloc"}, 1382 {Opt_mblk_io_submit, "mblk_io_submit"}, 1383 {Opt_nomblk_io_submit, "nomblk_io_submit"}, 1384 {Opt_block_validity, "block_validity"}, 1385 {Opt_noblock_validity, "noblock_validity"}, 1386 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1387 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1388 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1389 {Opt_auto_da_alloc, "auto_da_alloc"}, 1390 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1391 {Opt_dioread_nolock, "dioread_nolock"}, 1392 {Opt_dioread_lock, "dioread_lock"}, 1393 {Opt_discard, "discard"}, 1394 {Opt_nodiscard, "nodiscard"}, 1395 {Opt_init_itable, "init_itable=%u"}, 1396 {Opt_init_itable, "init_itable"}, 1397 {Opt_noinit_itable, "noinit_itable"}, 1398 {Opt_err, NULL}, 1399}; 1400 1401static ext4_fsblk_t get_sb_block(void **data) 1402{ 1403 ext4_fsblk_t sb_block; 1404 char *options = (char *) *data; 1405 1406 if (!options || strncmp(options, "sb=", 3) != 0) 1407 return 1; /* Default location */ 1408 1409 options += 3; 1410 /* TODO: use simple_strtoll with >32bit ext4 */ 1411 sb_block = simple_strtoul(options, &options, 0); 1412 if (*options && *options != ',') { 1413 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", 1414 (char *) *data); 1415 return 1; 1416 } 1417 if (*options == ',') 1418 options++; 1419 *data = (void *) options; 1420 1421 return sb_block; 1422} 1423 1424#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) 1425static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" 1426 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; 1427 1428#ifdef CONFIG_QUOTA 1429static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) 1430{ 1431 struct ext4_sb_info *sbi = EXT4_SB(sb); 1432 char *qname; 1433 1434 if (sb_any_quota_loaded(sb) && 1435 !sbi->s_qf_names[qtype]) { 1436 ext4_msg(sb, KERN_ERR, 1437 "Cannot change journaled " 1438 "quota options when quota turned on"); 1439 return 0; 1440 } 1441 qname = match_strdup(args); 1442 if (!qname) { 1443 ext4_msg(sb, KERN_ERR, 1444 "Not enough memory for storing quotafile name"); 1445 return 0; 1446 } 1447 if (sbi->s_qf_names[qtype] && 1448 strcmp(sbi->s_qf_names[qtype], qname)) { 1449 ext4_msg(sb, KERN_ERR, 1450 "%s quota file already specified", QTYPE2NAME(qtype)); 1451 kfree(qname); 1452 return 0; 1453 } 1454 sbi->s_qf_names[qtype] = qname; 1455 if (strchr(sbi->s_qf_names[qtype], '/')) { 1456 ext4_msg(sb, KERN_ERR, 1457 "quotafile must be on filesystem root"); 1458 kfree(sbi->s_qf_names[qtype]); 1459 sbi->s_qf_names[qtype] = NULL; 1460 return 0; 1461 } 1462 set_opt(sb, QUOTA); 1463 return 1; 1464} 1465 1466static int clear_qf_name(struct super_block *sb, int qtype) 1467{ 1468 1469 struct ext4_sb_info *sbi = EXT4_SB(sb); 1470 1471 if (sb_any_quota_loaded(sb) && 1472 sbi->s_qf_names[qtype]) { 1473 ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" 1474 " when quota turned on"); 1475 return 0; 1476 } 1477 /* 1478 * The space will be released later when all options are confirmed 1479 * to be correct 1480 */ 1481 sbi->s_qf_names[qtype] = NULL; 1482 return 1; 1483} 1484#endif 1485 1486static int parse_options(char *options, struct super_block *sb, 1487 unsigned long *journal_devnum, 1488 unsigned int *journal_ioprio, 1489 ext4_fsblk_t *n_blocks_count, int is_remount) 1490{ 1491 struct ext4_sb_info *sbi = EXT4_SB(sb); 1492 char *p; 1493 substring_t args[MAX_OPT_ARGS]; 1494 int data_opt = 0; 1495 int option; 1496#ifdef CONFIG_QUOTA 1497 int qfmt; 1498#endif 1499 1500 if (!options) 1501 return 1; 1502 1503 while ((p = strsep(&options, ",")) != NULL) { 1504 int token; 1505 if (!*p) 1506 continue; 1507 1508 /* 1509 * Initialize args struct so we know whether arg was 1510 * found; some options take optional arguments. 1511 */ 1512 args[0].to = args[0].from = NULL; 1513 token = match_token(p, tokens, args); 1514 switch (token) { 1515 case Opt_bsd_df: 1516 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1517 clear_opt(sb, MINIX_DF); 1518 break; 1519 case Opt_minix_df: 1520 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1521 set_opt(sb, MINIX_DF); 1522 1523 break; 1524 case Opt_grpid: 1525 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1526 set_opt(sb, GRPID); 1527 1528 break; 1529 case Opt_nogrpid: 1530 ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); 1531 clear_opt(sb, GRPID); 1532 1533 break; 1534 case Opt_resuid: 1535 if (match_int(&args[0], &option)) 1536 return 0; 1537 sbi->s_resuid = option; 1538 break; 1539 case Opt_resgid: 1540 if (match_int(&args[0], &option)) 1541 return 0; 1542 sbi->s_resgid = option; 1543 break; 1544 case Opt_sb: 1545 /* handled by get_sb_block() instead of here */ 1546 /* *sb_block = match_int(&args[0]); */ 1547 break; 1548 case Opt_err_panic: 1549 clear_opt(sb, ERRORS_CONT); 1550 clear_opt(sb, ERRORS_RO); 1551 set_opt(sb, ERRORS_PANIC); 1552 break; 1553 case Opt_err_ro: 1554 clear_opt(sb, ERRORS_CONT); 1555 clear_opt(sb, ERRORS_PANIC); 1556 set_opt(sb, ERRORS_RO); 1557 break; 1558 case Opt_err_cont: 1559 clear_opt(sb, ERRORS_RO); 1560 clear_opt(sb, ERRORS_PANIC); 1561 set_opt(sb, ERRORS_CONT); 1562 break; 1563 case Opt_nouid32: 1564 set_opt(sb, NO_UID32); 1565 break; 1566 case Opt_debug: 1567 set_opt(sb, DEBUG); 1568 break; 1569 case Opt_oldalloc: 1570 set_opt(sb, OLDALLOC); 1571 break; 1572 case Opt_orlov: 1573 clear_opt(sb, OLDALLOC); 1574 break; 1575#ifdef CONFIG_EXT4_FS_XATTR 1576 case Opt_user_xattr: 1577 set_opt(sb, XATTR_USER); 1578 break; 1579 case Opt_nouser_xattr: 1580 clear_opt(sb, XATTR_USER); 1581 break; 1582#else 1583 case Opt_user_xattr: 1584 case Opt_nouser_xattr: 1585 ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); 1586 break; 1587#endif 1588#ifdef CONFIG_EXT4_FS_POSIX_ACL 1589 case Opt_acl: 1590 set_opt(sb, POSIX_ACL); 1591 break; 1592 case Opt_noacl: 1593 clear_opt(sb, POSIX_ACL); 1594 break; 1595#else 1596 case Opt_acl: 1597 case Opt_noacl: 1598 ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); 1599 break; 1600#endif 1601 case Opt_journal_update: 1602 /* @@@ FIXME */ 1603 /* Eventually we will want to be able to create 1604 a journal file here. For now, only allow the 1605 user to specify an existing inode to be the 1606 journal file. */ 1607 if (is_remount) { 1608 ext4_msg(sb, KERN_ERR, 1609 "Cannot specify journal on remount"); 1610 return 0; 1611 } 1612 set_opt(sb, UPDATE_JOURNAL); 1613 break; 1614 case Opt_journal_dev: 1615 if (is_remount) { 1616 ext4_msg(sb, KERN_ERR, 1617 "Cannot specify journal on remount"); 1618 return 0; 1619 } 1620 if (match_int(&args[0], &option)) 1621 return 0; 1622 *journal_devnum = option; 1623 break; 1624 case Opt_journal_checksum: 1625 set_opt(sb, JOURNAL_CHECKSUM); 1626 break; 1627 case Opt_journal_async_commit: 1628 set_opt(sb, JOURNAL_ASYNC_COMMIT); 1629 set_opt(sb, JOURNAL_CHECKSUM); 1630 break; 1631 case Opt_noload: 1632 set_opt(sb, NOLOAD); 1633 break; 1634 case Opt_commit: 1635 if (match_int(&args[0], &option)) 1636 return 0; 1637 if (option < 0) 1638 return 0; 1639 if (option == 0) 1640 option = JBD2_DEFAULT_MAX_COMMIT_AGE; 1641 sbi->s_commit_interval = HZ * option; 1642 break; 1643 case Opt_max_batch_time: 1644 if (match_int(&args[0], &option)) 1645 return 0; 1646 if (option < 0) 1647 return 0; 1648 if (option == 0) 1649 option = EXT4_DEF_MAX_BATCH_TIME; 1650 sbi->s_max_batch_time = option; 1651 break; 1652 case Opt_min_batch_time: 1653 if (match_int(&args[0], &option)) 1654 return 0; 1655 if (option < 0) 1656 return 0; 1657 sbi->s_min_batch_time = option; 1658 break; 1659 case Opt_data_journal: 1660 data_opt = EXT4_MOUNT_JOURNAL_DATA; 1661 goto datacheck; 1662 case Opt_data_ordered: 1663 data_opt = EXT4_MOUNT_ORDERED_DATA; 1664 goto datacheck; 1665 case Opt_data_writeback: 1666 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1667 datacheck: 1668 if (is_remount) { 1669 if (test_opt(sb, DATA_FLAGS) != data_opt) { 1670 ext4_msg(sb, KERN_ERR, 1671 "Cannot change data mode on remount"); 1672 return 0; 1673 } 1674 } else { 1675 clear_opt(sb, DATA_FLAGS); 1676 sbi->s_mount_opt |= data_opt; 1677 } 1678 break; 1679 case Opt_data_err_abort: 1680 set_opt(sb, DATA_ERR_ABORT); 1681 break; 1682 case Opt_data_err_ignore: 1683 clear_opt(sb, DATA_ERR_ABORT); 1684 break; 1685#ifdef CONFIG_QUOTA 1686 case Opt_usrjquota: 1687 if (!set_qf_name(sb, USRQUOTA, &args[0])) 1688 return 0; 1689 break; 1690 case Opt_grpjquota: 1691 if (!set_qf_name(sb, GRPQUOTA, &args[0])) 1692 return 0; 1693 break; 1694 case Opt_offusrjquota: 1695 if (!clear_qf_name(sb, USRQUOTA)) 1696 return 0; 1697 break; 1698 case Opt_offgrpjquota: 1699 if (!clear_qf_name(sb, GRPQUOTA)) 1700 return 0; 1701 break; 1702 1703 case Opt_jqfmt_vfsold: 1704 qfmt = QFMT_VFS_OLD; 1705 goto set_qf_format; 1706 case Opt_jqfmt_vfsv0: 1707 qfmt = QFMT_VFS_V0; 1708 goto set_qf_format; 1709 case Opt_jqfmt_vfsv1: 1710 qfmt = QFMT_VFS_V1; 1711set_qf_format: 1712 if (sb_any_quota_loaded(sb) && 1713 sbi->s_jquota_fmt != qfmt) { 1714 ext4_msg(sb, KERN_ERR, "Cannot change " 1715 "journaled quota options when " 1716 "quota turned on"); 1717 return 0; 1718 } 1719 sbi->s_jquota_fmt = qfmt; 1720 break; 1721 case Opt_quota: 1722 case Opt_usrquota: 1723 set_opt(sb, QUOTA); 1724 set_opt(sb, USRQUOTA); 1725 break; 1726 case Opt_grpquota: 1727 set_opt(sb, QUOTA); 1728 set_opt(sb, GRPQUOTA); 1729 break; 1730 case Opt_noquota: 1731 if (sb_any_quota_loaded(sb)) { 1732 ext4_msg(sb, KERN_ERR, "Cannot change quota " 1733 "options when quota turned on"); 1734 return 0; 1735 } 1736 clear_opt(sb, QUOTA); 1737 clear_opt(sb, USRQUOTA); 1738 clear_opt(sb, GRPQUOTA); 1739 break; 1740#else 1741 case Opt_quota: 1742 case Opt_usrquota: 1743 case Opt_grpquota: 1744 ext4_msg(sb, KERN_ERR, 1745 "quota options not supported"); 1746 break; 1747 case Opt_usrjquota: 1748 case Opt_grpjquota: 1749 case Opt_offusrjquota: 1750 case Opt_offgrpjquota: 1751 case Opt_jqfmt_vfsold: 1752 case Opt_jqfmt_vfsv0: 1753 case Opt_jqfmt_vfsv1: 1754 ext4_msg(sb, KERN_ERR, 1755 "journaled quota options not supported"); 1756 break; 1757 case Opt_noquota: 1758 break; 1759#endif 1760 case Opt_abort: 1761 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1762 break; 1763 case Opt_nobarrier: 1764 clear_opt(sb, BARRIER); 1765 break; 1766 case Opt_barrier: 1767 if (args[0].from) { 1768 if (match_int(&args[0], &option)) 1769 return 0; 1770 } else 1771 option = 1; /* No argument, default to 1 */ 1772 if (option) 1773 set_opt(sb, BARRIER); 1774 else 1775 clear_opt(sb, BARRIER); 1776 break; 1777 case Opt_ignore: 1778 break; 1779 case Opt_resize: 1780 if (!is_remount) { 1781 ext4_msg(sb, KERN_ERR, 1782 "resize option only available " 1783 "for remount"); 1784 return 0; 1785 } 1786 if (match_int(&args[0], &option) != 0) 1787…
Large files files are truncated, but you can click here to view the full file