/linux/src/fs/namei.c
C | 1433 lines | 952 code | 177 blank | 304 comment | 247 complexity | 2d3e11e8be013de68c690a999f574d0e MD5 | raw file
1/* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * OSKit support added by the University of Utah, 1997 7 */ 8 9/* 10 * Some corrections by tytso. 11 */ 12 13/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 14 * lookup logic. 15 */ 16 17#include <linux/mm.h> 18#include <linux/proc_fs.h> 19#include <linux/smp_lock.h> 20#include <linux/quotaops.h> 21 22#include <asm/uaccess.h> 23#include <asm/unaligned.h> 24#include <asm/semaphore.h> 25#include <asm/page.h> 26#include <asm/pgtable.h> 27 28#include <asm/namei.h> 29 30/* This can be removed after the beta phase. */ 31#define CACHE_SUPERVISE /* debug the correctness of dcache entries */ 32#undef DEBUG /* some other debugging */ 33 34 35#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 36 37#ifndef OSKIT 38/* [Feb-1997 T. Schoebel-Theuer] 39 * Fundamental changes in the pathname lookup mechanisms (namei) 40 * were necessary because of omirr. The reason is that omirr needs 41 * to know the _real_ pathname, not the user-supplied one, in case 42 * of symlinks (and also when transname replacements occur). 43 * 44 * The new code replaces the old recursive symlink resolution with 45 * an iterative one (in case of non-nested symlink chains). It does 46 * this with calls to <fs>_follow_link(). 47 * As a side effect, dir_namei(), _namei() and follow_link() are now 48 * replaced with a single function lookup_dentry() that can handle all 49 * the special cases of the former code. 50 * 51 * With the new dcache, the pathname is stored at each inode, at least as 52 * long as the refcount of the inode is positive. As a side effect, the 53 * size of the dcache depends on the inode cache and thus is dynamic. 54 * 55 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 56 * resolution to correspond with current state of the code. 57 * 58 * Note that the symlink resolution is not *completely* iterative. 59 * There is still a significant amount of tail- and mid- recursion in 60 * the algorithm. Also, note that <fs>_readlink() is not used in 61 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 62 * may return different results than <fs>_follow_link(). Many virtual 63 * filesystems (including /proc) exhibit this behavior. 64 */ 65 66/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 67 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 68 * and the name already exists in form of a symlink, try to create the new 69 * name indicated by the symlink. The old code always complained that the 70 * name already exists, due to not following the symlink even if its target 71 * is nonexistent. The new semantics affects also mknod() and link() when 72 * the name is a symlink pointing to a non-existant name. 73 * 74 * I don't know which semantics is the right one, since I have no access 75 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 76 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 77 * "old" one. Personally, I think the new semantics is much more logical. 78 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 79 * file does succeed in both HP-UX and SunOs, but not in Solaris 80 * and in the old Linux semantics. 81 */ 82 83/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 84 * semantics. See the comments in "open_namei" and "do_link" below. 85 * 86 * [10-Sep-98 Alan Modra] Another symlink change. 87 */ 88 89/* In order to reduce some races, while at the same time doing additional 90 * checking and hopefully speeding things up, we copy filenames to the 91 * kernel data space before using them.. 92 * 93 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 94 */ 95static inline int do_getname(const char *filename, char *page) 96{ 97 int retval; 98 unsigned long len = PAGE_SIZE; 99 100 if ((unsigned long) filename >= TASK_SIZE) { 101 if (!segment_eq(get_fs(), KERNEL_DS)) 102 return -EFAULT; 103 } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE) 104 len = TASK_SIZE - (unsigned long) filename; 105 106 retval = strncpy_from_user((char *)page, filename, len); 107 if (retval > 0) { 108 if (retval < len) 109 return 0; 110 return -ENAMETOOLONG; 111 } else if (!retval) 112 retval = -ENOENT; 113 return retval; 114} 115#endif /* OSKIT */ 116 117char * getname(const char * filename) 118{ 119#ifdef OSKIT 120 return (char *)filename; 121#else 122 char *tmp, *result; 123 124 result = ERR_PTR(-ENOMEM); 125 tmp = __getname(); 126 if (tmp) { 127 int retval = do_getname(filename, tmp); 128 129 result = tmp; 130 if (retval < 0) { 131 putname(tmp); 132 result = ERR_PTR(retval); 133 } 134 } 135 return result; 136#endif /* OSKIT */ 137} 138 139/* 140 * permission() 141 * 142 * is used to check for read/write/execute permissions on a file. 143 * We use "fsuid" for this, letting us set arbitrary permissions 144 * for filesystem access without changing the "normal" uids which 145 * are used for other things.. 146 */ 147int permission(struct inode * inode,int mask) 148{ 149 int mode = inode->i_mode; 150 151 if (inode->i_op && inode->i_op->permission) 152 return inode->i_op->permission(inode, mask); 153 else if ((mask & S_IWOTH) && IS_RDONLY(inode) && 154 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 155 return -EROFS; /* Nobody gets write access to a read-only fs */ 156 else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode)) 157 return -EACCES; /* Nobody gets write access to an immutable file */ 158 else if (current->fsuid == inode->i_uid) 159 mode >>= 6; 160 else if (in_group_p(inode->i_gid)) 161 mode >>= 3; 162 if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE)) 163 return 0; 164 /* read and search access */ 165 if ((mask == S_IROTH) || 166 (S_ISDIR(mode) && !(mask & ~(S_IROTH | S_IXOTH)))) 167 if (capable(CAP_DAC_READ_SEARCH)) 168 return 0; 169 return -EACCES; 170} 171 172/* 173 * get_write_access() gets write permission for a file. 174 * put_write_access() releases this write permission. 175 * This is used for regular files. 176 * We cannot support write (and maybe mmap read-write shared) accesses and 177 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 178 * can have the following values: 179 * 0: no writers, no VM_DENYWRITE mappings 180 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 181 * > 0: (i_writecount) users are writing to the file. 182 */ 183int get_write_access(struct inode * inode) 184{ 185#ifdef OSKIT 186 /* This doesn't need to do anything since i_writecount is only 187 so exec can do ETXTBSY. */ 188#else 189 if (inode->i_writecount < 0) 190 return -ETXTBSY; 191 inode->i_writecount++; 192#endif /* OSKIT */ 193 return 0; 194} 195 196void put_write_access(struct inode * inode) 197{ 198#ifdef OSKIT 199 /* This doesn't need to do anything since i_writecount is only 200 so exec can do ETXTBSY. */ 201#else 202 inode->i_writecount--; 203#endif 204} 205 206/* 207 * "." and ".." are special - ".." especially so because it has to be able 208 * to know about the current root directory and parent relationships 209 */ 210static struct dentry * reserved_lookup(struct dentry * parent, struct qstr * name) 211{ 212 struct dentry *result = NULL; 213 if (name->name[0] == '.') { 214 switch (name->len) { 215 default: 216 break; 217 case 2: 218 if (name->name[1] != '.') 219 break; 220 221 if (parent != current->fs->root) 222 parent = parent->d_covers->d_parent; 223 /* fallthrough */ 224 case 1: 225 result = parent; 226 } 227 } 228 return dget(result); 229} 230 231/* 232 * Internal lookup() using the new generic dcache. 233 */ 234static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) 235{ 236 struct dentry * dentry = d_lookup(parent, name); 237 238 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { 239 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { 240 dput(dentry); 241 dentry = NULL; 242 } 243 } 244 return dentry; 245} 246 247/* 248 * This is called when everything else fails, and we actually have 249 * to go to the low-level filesystem to find out what we should do.. 250 * 251 * We get the directory semaphore, and after getting that we also 252 * make sure that nobody added the entry to the dcache in the meantime.. 253 */ 254static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) 255{ 256 struct dentry * result; 257 struct inode *dir = parent->d_inode; 258 259 down(&dir->i_sem); 260 /* 261 * First re-do the cached lookup just in case it was created 262 * while we waited for the directory semaphore.. 263 * 264 * FIXME! This could use version numbering or similar to 265 * avoid unnecessary cache lookups. 266 */ 267 result = cached_lookup(parent, name, flags); 268 if (!result) { 269 struct dentry * dentry = d_alloc(parent, name); 270 result = ERR_PTR(-ENOMEM); 271 if (dentry) { 272 result = dir->i_op->lookup(dir, dentry); 273 if (result) 274 dput(dentry); 275 else 276 result = dentry; 277 } 278 } 279 up(&dir->i_sem); 280 return result; 281} 282 283static struct dentry * do_follow_link(struct dentry *base, struct dentry *dentry, unsigned int follow) 284{ 285 struct inode * inode = dentry->d_inode; 286 287 if ((follow & LOOKUP_FOLLOW) 288 && inode && inode->i_op && inode->i_op->follow_link) { 289 if (current->link_count < 5) { 290 struct dentry * result; 291 292 current->link_count++; 293 /* This eats the base */ 294 result = inode->i_op->follow_link(dentry, base, follow); 295 current->link_count--; 296 dput(dentry); 297 return result; 298 } 299 dput(dentry); 300 dentry = ERR_PTR(-ELOOP); 301 } 302 dput(base); 303 return dentry; 304} 305 306static inline struct dentry * follow_mount(struct dentry * dentry) 307{ 308 struct dentry * mnt = dentry->d_mounts; 309 310 if (mnt != dentry) { 311 dget(mnt); 312 dput(dentry); 313 dentry = mnt; 314 } 315 return dentry; 316} 317 318/* 319 * Name resolution. 320 * 321 * This is the basic name resolution function, turning a pathname 322 * into the final dentry. 323 */ 324struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned int lookup_flags) 325{ 326 struct dentry * dentry; 327 struct inode *inode; 328 329 if (*name == '/') { 330 if (base) 331 dput(base); 332 do { 333 name++; 334 } while (*name == '/'); 335 __prefix_lookup_dentry(name, lookup_flags); 336 base = dget(current->fs->root); 337 } else if (!base) { 338 base = dget(current->fs->pwd); 339 } 340 341 if (!*name) 342 goto return_base; 343 344 inode = base->d_inode; 345 lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_SLASHOK; 346 347 /* At this point we know we have a real path component. */ 348 for(;;) { 349 int err; 350 unsigned long hash; 351 struct qstr this; 352 unsigned int flags; 353 unsigned int c; 354 355 err = permission(inode, MAY_EXEC); 356 dentry = ERR_PTR(err); 357 if (err) 358 break; 359 360 this.name = name; 361 c = *(const unsigned char *)name; 362 363 hash = init_name_hash(); 364 do { 365 name++; 366 hash = partial_name_hash(c, hash); 367 c = *(const unsigned char *)name; 368 } while (c && (c != '/')); 369 this.len = name - (const char *) this.name; 370 this.hash = end_name_hash(hash); 371 372 /* remove trailing slashes? */ 373 flags = lookup_flags; 374 if (c) { 375 char tmp; 376 377 flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 378 do { 379 tmp = *++name; 380 } while (tmp == '/'); 381 if (tmp) 382 flags |= LOOKUP_CONTINUE; 383 } 384 385 /* 386 * See if the low-level filesystem might want 387 * to use its own hash.. 388 */ 389 if (base->d_op && base->d_op->d_hash) { 390 int error; 391 error = base->d_op->d_hash(base, &this); 392 if (error < 0) { 393 dentry = ERR_PTR(error); 394 break; 395 } 396 } 397 398 /* This does the actual lookups.. */ 399 dentry = reserved_lookup(base, &this); 400 if (!dentry) { 401 dentry = cached_lookup(base, &this, flags); 402 if (!dentry) { 403 dentry = real_lookup(base, &this, flags); 404 if (IS_ERR(dentry)) 405 break; 406 } 407 } 408 409 /* Check mountpoints.. */ 410 dentry = follow_mount(dentry); 411 412 base = do_follow_link(base, dentry, flags); 413 if (IS_ERR(base)) 414 goto return_base; 415 416 inode = base->d_inode; 417 if (flags & LOOKUP_DIRECTORY) { 418 if (!inode) 419 goto no_inode; 420 dentry = ERR_PTR(-ENOTDIR); 421 if (!inode->i_op || !inode->i_op->lookup) 422 break; 423 if (flags & LOOKUP_CONTINUE) 424 continue; 425 } 426return_base: 427 return base; 428/* 429 * The case of a nonexisting file is special. 430 * 431 * In the middle of a pathname lookup (ie when 432 * LOOKUP_CONTINUE is set), it's an obvious 433 * error and returns ENOENT. 434 * 435 * At the end of a pathname lookup it's legal, 436 * and we return a negative dentry. However, we 437 * get here only if there were trailing slashes, 438 * which is legal only if we know it's supposed 439 * to be a directory (ie "mkdir"). Thus the 440 * LOOKUP_SLASHOK flag. 441 */ 442no_inode: 443 dentry = ERR_PTR(-ENOENT); 444 if (flags & LOOKUP_CONTINUE) 445 break; 446 if (flags & LOOKUP_SLASHOK) 447 goto return_base; 448 break; 449 } 450 dput(base); 451 return dentry; 452} 453 454/* 455 * namei() 456 * 457 * is used by most simple commands to get the inode of a specified name. 458 * Open, link etc use their own routines, but this is enough for things 459 * like 'chmod' etc. 460 * 461 * namei exists in two versions: namei/lnamei. The only difference is 462 * that namei follows links, while lnamei does not. 463 */ 464struct dentry * __namei(const char *pathname, unsigned int lookup_flags) 465{ 466 char *name; 467 struct dentry *dentry; 468 469 name = getname(pathname); 470 dentry = (struct dentry *) name; 471 if (!IS_ERR(name)) { 472 dentry = lookup_dentry(name, NULL, lookup_flags); 473 putname(name); 474 if (!IS_ERR(dentry)) { 475 if (!dentry->d_inode) { 476 dput(dentry); 477 dentry = ERR_PTR(-ENOENT); 478 } 479 } 480 } 481 return dentry; 482} 483 484/* 485 * It's inline, so penalty for filesystems that don't use sticky bit is 486 * minimal. 487 */ 488static inline int check_sticky(struct inode *dir, struct inode *inode) 489{ 490 if (!(dir->i_mode & S_ISVTX)) 491 return 0; 492 if (inode->i_uid == current->fsuid) 493 return 0; 494 if (dir->i_uid == current->fsuid) 495 return 0; 496 return !capable(CAP_FOWNER); 497} 498 499/* 500 * Check whether we can remove a link victim from directory dir, check 501 * whether the type of victim is right. 502 * 1. We can't do it if dir is read-only (done in permission()) 503 * 2. We should have write and exec permissions on dir 504 * 3. We can't remove anything from append-only dir 505 * 4. We can't do anything with immutable dir (done in permission()) 506 * 5. If the sticky bit on dir is set we should either 507 * a. be owner of dir, or 508 * b. be owner of victim, or 509 * c. have CAP_FOWNER capability 510 * 6. If the victim is append-only or immutable we can't do antyhing with 511 * links pointing to it. 512 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 513 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 514 * 9. We can't remove a root or mountpoint. 515 */ 516static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir) 517{ 518 int error; 519 if (!victim->d_inode || victim->d_parent->d_inode != dir) 520 return -ENOENT; 521 error = permission(dir,MAY_WRITE | MAY_EXEC); 522 if (error) 523 return error; 524 if (IS_APPEND(dir)) 525 return -EPERM; 526 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 527 IS_IMMUTABLE(victim->d_inode)) 528 return -EPERM; 529 if (isdir) { 530 if (!S_ISDIR(victim->d_inode->i_mode)) 531 return -ENOTDIR; 532 if (IS_ROOT(victim)) 533 return -EBUSY; 534 if (victim->d_mounts != victim->d_covers) 535 return -EBUSY; 536 } else if (S_ISDIR(victim->d_inode->i_mode)) 537 return -EISDIR; 538 return 0; 539} 540 541/* Check whether we can create an object with dentry child in directory 542 * dir. 543 * 1. We can't do it if child already exists (open has special treatment for 544 * this case, but since we are inlined it's OK) 545 * 2. We can't do it if dir is read-only (done in permission()) 546 * 3. We should have write and exec permissions on dir 547 * 4. We can't do it if dir is immutable (done in permission()) 548 */ 549static inline int may_create(struct inode *dir, struct dentry *child) { 550 if (child->d_inode) 551 return -EEXIST; 552 return permission(dir,MAY_WRITE | MAY_EXEC); 553} 554 555static inline struct dentry *get_parent(struct dentry *dentry) 556{ 557 return dget(dentry->d_parent); 558} 559 560static inline void unlock_dir(struct dentry *dir) 561{ 562 up(&dir->d_inode->i_sem); 563 dput(dir); 564} 565 566/* 567 * We need to do a check-parent every time 568 * after we have locked the parent - to verify 569 * that the parent is still our parent and 570 * that we are still hashed onto it.. 571 * 572 * This is requied in case two processes race 573 * on removing (or moving) the same entry: the 574 * parent lock will serialize them, but the 575 * other process will be too late.. 576 */ 577#define check_parent(dir, dentry) \ 578 ((dir) == (dentry)->d_parent && !list_empty(&dentry->d_hash)) 579 580/* 581 * Locking the parent is needed to: 582 * - serialize directory operations 583 * - make sure the parent doesn't change from 584 * under us in the middle of an operation. 585 * 586 * NOTE! Right now we'd rather use a "struct inode" 587 * for this, but as I expect things to move toward 588 * using dentries instead for most things it is 589 * probably better to start with the conceptually 590 * better interface of relying on a path of dentries. 591 */ 592static inline struct dentry *lock_parent(struct dentry *dentry) 593{ 594 struct dentry *dir = dget(dentry->d_parent); 595 596 down(&dir->d_inode->i_sem); 597 return dir; 598} 599 600/* 601 * Whee.. Deadlock country. Happily there are only two VFS 602 * operations that do this.. 603 */ 604static inline void double_lock(struct dentry *d1, struct dentry *d2) 605{ 606 struct semaphore *s1 = &d1->d_inode->i_sem; 607 struct semaphore *s2 = &d2->d_inode->i_sem; 608 609 if (s1 != s2) { 610 if ((unsigned long) s1 < (unsigned long) s2) { 611 struct semaphore *tmp = s2; 612 s2 = s1; s1 = tmp; 613 } 614 down(s1); 615 } 616 down(s2); 617} 618 619static inline void double_unlock(struct dentry *d1, struct dentry *d2) 620{ 621 struct semaphore *s1 = &d1->d_inode->i_sem; 622 struct semaphore *s2 = &d2->d_inode->i_sem; 623 624 up(s1); 625 if (s1 != s2) 626 up(s2); 627 dput(d1); 628 dput(d2); 629} 630 631 632/* 633 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security 634 * reasons. 635 * 636 * O_DIRECTORY translates into forcing a directory lookup. 637 */ 638static inline int lookup_flags(unsigned int f) 639{ 640 unsigned long retval = LOOKUP_FOLLOW; 641 642 if (f & O_NOFOLLOW) 643 retval &= ~LOOKUP_FOLLOW; 644 645 if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 646 retval &= ~LOOKUP_FOLLOW; 647 648 if (f & O_DIRECTORY) 649 retval |= LOOKUP_DIRECTORY; 650 651 return retval; 652} 653 654/* 655 * open_namei() 656 * 657 * namei for open - this is in fact almost the whole open-routine. 658 * 659 * Note that the low bits of "flag" aren't the same as in the open 660 * system call - they are 00 - no permissions needed 661 * 01 - read permission needed 662 * 10 - write permission needed 663 * 11 - read/write permissions needed 664 * which is a lot more logical, and also allows the "no perm" needed 665 * for symlinks (where the permissions are checked later). 666 */ 667struct dentry * open_namei(const char * pathname, int flag, int mode) 668{ 669 int acc_mode, error; 670 struct inode *inode; 671 struct dentry *dentry; 672 673 mode &= S_IALLUGO & ~current->fs->umask; 674 mode |= S_IFREG; 675 676 dentry = lookup_dentry(pathname, NULL, lookup_flags(flag)); 677 if (IS_ERR(dentry)) 678 return dentry; 679 680 acc_mode = ACC_MODE(flag); 681 if (flag & O_CREAT) { 682 struct dentry *dir; 683 684 if (dentry->d_inode) { 685 if (!(flag & O_EXCL)) 686 goto nocreate; 687 error = -EEXIST; 688 goto exit; 689 } 690 691 dir = lock_parent(dentry); 692 if (!check_parent(dir, dentry)) { 693 /* 694 * Really nasty race happened. What's the 695 * right error code? We had a dentry, but 696 * before we could use it it was removed 697 * by somebody else. We could just re-try 698 * everything, I guess. 699 * 700 * ENOENT is definitely wrong. 701 */ 702 error = -ENOENT; 703 unlock_dir(dir); 704 goto exit; 705 } 706 707 /* 708 * Somebody might have created the file while we 709 * waited for the directory lock.. So we have to 710 * re-do the existence test. 711 */ 712 if (dentry->d_inode) { 713 error = 0; 714 if (flag & O_EXCL) 715 error = -EEXIST; 716 } else if ((error = may_create(dir->d_inode, dentry)) == 0) { 717 if (!dir->d_inode->i_op || !dir->d_inode->i_op->create) 718 error = -EACCES; 719 else { 720 DQUOT_INIT(dir->d_inode); 721 error = dir->d_inode->i_op->create(dir->d_inode, dentry, mode); 722 /* Don't check for write permission, don't truncate */ 723 acc_mode = 0; 724 flag &= ~O_TRUNC; 725 } 726 } 727 unlock_dir(dir); 728 if (error) 729 goto exit; 730 } 731 732nocreate: 733 error = -ENOENT; 734 inode = dentry->d_inode; 735 if (!inode) 736 goto exit; 737 738 error = -ELOOP; 739 if (S_ISLNK(inode->i_mode)) 740 goto exit; 741 742 error = -EISDIR; 743 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) 744 goto exit; 745 746 error = permission(inode,acc_mode); 747 if (error) 748 goto exit; 749 750 /* 751 * FIFO's, sockets and device files are special: they don't 752 * actually live on the filesystem itself, and as such you 753 * can write to them even if the filesystem is read-only. 754 */ 755 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 756 flag &= ~O_TRUNC; 757 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 758 error = -EACCES; 759 if (IS_NODEV(inode)) 760 goto exit; 761 762 flag &= ~O_TRUNC; 763 } else { 764 error = -EROFS; 765 if (IS_RDONLY(inode) && (flag & 2)) 766 goto exit; 767 } 768 /* 769 * An append-only file must be opened in append mode for writing. 770 */ 771 error = -EPERM; 772 if (IS_APPEND(inode)) { 773 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 774 goto exit; 775 if (flag & O_TRUNC) 776 goto exit; 777 } 778 779 if (flag & O_TRUNC) { 780 error = get_write_access(inode); 781 if (error) 782 goto exit; 783 784 /* 785 * Refuse to truncate files with mandatory locks held on them. 786 */ 787 error = locks_verify_locked(inode); 788 if (!error) { 789 DQUOT_INIT(inode); 790 791 error = do_truncate(dentry, 0); 792 } 793 put_write_access(inode); 794 if (error) 795 goto exit; 796 } else 797 if (flag & FMODE_WRITE) 798 DQUOT_INIT(inode); 799 800 return dentry; 801 802exit: 803 dput(dentry); 804 return ERR_PTR(error); 805} 806 807struct dentry * do_mknod(const char * filename, int mode, dev_t dev) 808{ 809 int error; 810 struct dentry *dir; 811 struct dentry *dentry, *retval; 812 813 mode &= ~current->fs->umask; 814 dentry = lookup_dentry(filename, NULL, LOOKUP_FOLLOW); 815 if (IS_ERR(dentry)) 816 return dentry; 817 818 dir = lock_parent(dentry); 819 error = -ENOENT; 820 if (!check_parent(dir, dentry)) 821 goto exit_lock; 822 823 error = may_create(dir->d_inode, dentry); 824 if (error) 825 goto exit_lock; 826 827 error = -EPERM; 828 if (!dir->d_inode->i_op || !dir->d_inode->i_op->mknod) 829 goto exit_lock; 830 831 DQUOT_INIT(dir->d_inode); 832 error = dir->d_inode->i_op->mknod(dir->d_inode, dentry, mode, dev); 833exit_lock: 834 retval = ERR_PTR(error); 835 if (!error) 836 retval = dget(dentry); 837 unlock_dir(dir); 838 dput(dentry); 839 return retval; 840} 841 842asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev) 843{ 844 int error; 845 char * tmp; 846 847 lock_kernel(); 848 error = -EPERM; 849 if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !capable(CAP_SYS_ADMIN))) 850 goto out; 851 error = -EINVAL; 852 switch (mode & S_IFMT) { 853 case 0: 854 mode |= S_IFREG; 855 break; 856 case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: 857 break; 858 default: 859 goto out; 860 } 861 tmp = getname(filename); 862 error = PTR_ERR(tmp); 863 if (!IS_ERR(tmp)) { 864 struct dentry * dentry = do_mknod(tmp,mode,dev); 865 putname(tmp); 866 error = PTR_ERR(dentry); 867 if (!IS_ERR(dentry)) { 868 dput(dentry); 869 error = 0; 870 } 871 } 872out: 873 unlock_kernel(); 874 return error; 875} 876 877/* 878 * Look out: this function may change a normal dentry 879 * into a directory dentry (different size).. 880 */ 881#ifdef OSKIT 882 int do_mkdir(const char * pathname, int mode) 883#else 884static inline int do_mkdir(const char * pathname, int mode) 885#endif 886{ 887 int error; 888 struct dentry *dir; 889 struct dentry *dentry; 890 891 dentry = lookup_dentry(pathname, NULL, LOOKUP_SLASHOK); 892 error = PTR_ERR(dentry); 893 if (IS_ERR(dentry)) 894 goto exit; 895 896 /* 897 * EEXIST is kind of a strange error code to 898 * return, but basically if the dentry was moved 899 * or unlinked while we locked the parent, we 900 * do know that it _did_ exist before, and as 901 * such it makes perfect sense.. In contrast, 902 * ENOENT doesn't make sense for mkdir. 903 */ 904 dir = lock_parent(dentry); 905 error = -EEXIST; 906 if (!check_parent(dir, dentry)) 907 goto exit_lock; 908 909 error = may_create(dir->d_inode, dentry); 910 if (error) 911 goto exit_lock; 912 913 error = -EPERM; 914 if (!dir->d_inode->i_op || !dir->d_inode->i_op->mkdir) 915 goto exit_lock; 916 917 DQUOT_INIT(dir->d_inode); 918 mode &= 0777 & ~current->fs->umask; 919 error = dir->d_inode->i_op->mkdir(dir->d_inode, dentry, mode); 920 921exit_lock: 922 unlock_dir(dir); 923 dput(dentry); 924exit: 925 return error; 926} 927 928asmlinkage int sys_mkdir(const char * pathname, int mode) 929{ 930 int error; 931 char * tmp; 932 933 lock_kernel(); 934 tmp = getname(pathname); 935 error = PTR_ERR(tmp); 936 if (!IS_ERR(tmp)) { 937 error = do_mkdir(tmp,mode); 938 putname(tmp); 939 } 940 unlock_kernel(); 941 return error; 942} 943 944int vfs_rmdir(struct inode *dir, struct dentry *dentry) 945{ 946 int error; 947 948 error = may_delete(dir, dentry, 1); 949 if (error) 950 return error; 951 952 if (!dir->i_op || !dir->i_op->rmdir) 953 return -EPERM; 954 955 DQUOT_INIT(dir); 956 957 /* 958 * We try to drop the dentry early: we should have 959 * a usage count of 2 if we're the only user of this 960 * dentry, and if that is true (possibly after pruning 961 * the dcache), then we drop the dentry now. 962 * 963 * A low-level filesystem can, if it choses, legally 964 * do a 965 * 966 * if (!list_empty(&dentry->d_hash)) 967 * return -EBUSY; 968 * 969 * if it cannot handle the case of removing a directory 970 * that is still in use by something else.. 971 */ 972 switch (dentry->d_count) { 973 default: 974 shrink_dcache_parent(dentry); 975 if (dentry->d_count != 2) 976 break; 977 case 2: 978 d_drop(dentry); 979 } 980 981 error = dir->i_op->rmdir(dir, dentry); 982 983 return error; 984} 985 986#ifdef OSKIT 987 int do_rmdir(const char * name) 988#else 989static inline int do_rmdir(const char * name) 990#endif 991{ 992 int error; 993 struct dentry *dir; 994 struct dentry *dentry; 995 996 dentry = lookup_dentry(name, NULL, 0); 997 error = PTR_ERR(dentry); 998 if (IS_ERR(dentry)) 999 goto exit; 1000 1001 error = -ENOENT; 1002 if (!dentry->d_inode) 1003 goto exit_dput; 1004 1005 dir = dget(dentry->d_parent); 1006 1007 /* 1008 * The dentry->d_count stuff confuses d_delete() enough to 1009 * not kill the inode from under us while it is locked. This 1010 * wouldn't be needed, except the dentry semaphore is really 1011 * in the inode, not in the dentry.. 1012 */ 1013 dentry->d_count++; 1014 double_lock(dir, dentry); 1015 1016 error = -ENOENT; 1017 if (check_parent(dir, dentry)) 1018 error = vfs_rmdir(dir->d_inode, dentry); 1019 1020 double_unlock(dentry, dir); 1021exit_dput: 1022 dput(dentry); 1023exit: 1024 return error; 1025} 1026 1027asmlinkage int sys_rmdir(const char * pathname) 1028{ 1029 int error; 1030 char * tmp; 1031 1032 lock_kernel(); 1033 tmp = getname(pathname); 1034 error = PTR_ERR(tmp); 1035 if (!IS_ERR(tmp)) { 1036 error = do_rmdir(tmp); 1037 putname(tmp); 1038 } 1039 unlock_kernel(); 1040 return error; 1041} 1042 1043int vfs_unlink(struct inode *dir, struct dentry *dentry) 1044{ 1045 int error; 1046 1047 error = may_delete(dir, dentry, 0); 1048 if (!error) { 1049 error = -EPERM; 1050 if (dir->i_op && dir->i_op->unlink) { 1051 DQUOT_INIT(dir); 1052 error = dir->i_op->unlink(dir, dentry); 1053 } 1054 } 1055 return error; 1056} 1057 1058#ifdef OSKIT 1059 int do_unlink(const char * name) 1060#else 1061static inline int do_unlink(const char * name) 1062#endif 1063{ 1064 int error; 1065 struct dentry *dir; 1066 struct dentry *dentry; 1067 1068 dentry = lookup_dentry(name, NULL, 0); 1069 error = PTR_ERR(dentry); 1070 if (IS_ERR(dentry)) 1071 goto exit; 1072 1073 dir = lock_parent(dentry); 1074 error = -ENOENT; 1075 if (check_parent(dir, dentry)) 1076 error = vfs_unlink(dir->d_inode, dentry); 1077 1078 unlock_dir(dir); 1079 dput(dentry); 1080exit: 1081 return error; 1082} 1083 1084asmlinkage int sys_unlink(const char * pathname) 1085{ 1086 int error; 1087 char * tmp; 1088 1089 lock_kernel(); 1090 tmp = getname(pathname); 1091 error = PTR_ERR(tmp); 1092 if (!IS_ERR(tmp)) { 1093 error = do_unlink(tmp); 1094 putname(tmp); 1095 } 1096 unlock_kernel(); 1097 return error; 1098} 1099 1100#ifdef OSKIT 1101 int do_symlink(const char * oldname, const char * newname) 1102#else 1103static inline int do_symlink(const char * oldname, const char * newname) 1104#endif 1105{ 1106 int error; 1107 struct dentry *dir; 1108 struct dentry *dentry; 1109 1110 dentry = lookup_dentry(newname, NULL, 0); 1111 1112 error = PTR_ERR(dentry); 1113 if (IS_ERR(dentry)) 1114 goto exit; 1115 1116 dir = lock_parent(dentry); 1117 error = -ENOENT; 1118 if (!check_parent(dir, dentry)) 1119 goto exit_lock; 1120 1121 error = may_create(dir->d_inode, dentry); 1122 if (error) 1123 goto exit_lock; 1124 1125 error = -EPERM; 1126 if (!dir->d_inode->i_op || !dir->d_inode->i_op->symlink) 1127 goto exit_lock; 1128 1129 DQUOT_INIT(dir->d_inode); 1130 error = dir->d_inode->i_op->symlink(dir->d_inode, dentry, oldname); 1131 1132exit_lock: 1133 unlock_dir(dir); 1134 dput(dentry); 1135exit: 1136 return error; 1137} 1138 1139asmlinkage int sys_symlink(const char * oldname, const char * newname) 1140{ 1141 int error; 1142 char * from; 1143 1144 lock_kernel(); 1145 from = getname(oldname); 1146 error = PTR_ERR(from); 1147 if (!IS_ERR(from)) { 1148 char * to; 1149 to = getname(newname); 1150 error = PTR_ERR(to); 1151 if (!IS_ERR(to)) { 1152 error = do_symlink(from,to); 1153 putname(to); 1154 } 1155 putname(from); 1156 } 1157 unlock_kernel(); 1158 return error; 1159} 1160 1161#ifdef OSKIT 1162 int do_link(struct dentry *dentry, const char * newname) 1163#else 1164static inline int do_link(const char * oldname, const char * newname) 1165#endif 1166{ 1167 struct dentry *old_dentry, *new_dentry, *dir; 1168 struct inode *inode; 1169 int error; 1170 1171 /* 1172 * Hardlinks are often used in delicate situations. We avoid 1173 * security-related surprises by not following symlinks on the 1174 * newname. --KAB 1175 * 1176 * We don't follow them on the oldname either to be compatible 1177 * with linux 2.0, and to avoid hard-linking to directories 1178 * and other special files. --ADM 1179 */ 1180#ifdef OSKIT 1181 dget(dentry); 1182 old_dentry = dentry; 1183#else 1184 old_dentry = lookup_dentry(oldname, NULL, 0); 1185#endif 1186 error = PTR_ERR(old_dentry); 1187 if (IS_ERR(old_dentry)) 1188 goto exit; 1189 1190 new_dentry = lookup_dentry(newname, NULL, 0); 1191 error = PTR_ERR(new_dentry); 1192 if (IS_ERR(new_dentry)) 1193 goto exit_old; 1194 1195 dir = lock_parent(new_dentry); 1196 error = -ENOENT; 1197 if (!check_parent(dir, new_dentry)) 1198 goto exit_lock; 1199 1200 error = -ENOENT; 1201 inode = old_dentry->d_inode; 1202 if (!inode) 1203 goto exit_lock; 1204 1205 error = may_create(dir->d_inode, new_dentry); 1206 if (error) 1207 goto exit_lock; 1208 1209 error = -EXDEV; 1210 if (dir->d_inode->i_dev != inode->i_dev) 1211 goto exit_lock; 1212 1213 /* 1214 * A link to an append-only or immutable file cannot be created. 1215 */ 1216 error = -EPERM; 1217 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1218 goto exit_lock; 1219 1220 error = -EPERM; 1221 if (!dir->d_inode->i_op || !dir->d_inode->i_op->link) 1222 goto exit_lock; 1223 1224 DQUOT_INIT(dir->d_inode); 1225 error = dir->d_inode->i_op->link(old_dentry, dir->d_inode, new_dentry); 1226 1227exit_lock: 1228 unlock_dir(dir); 1229 dput(new_dentry); 1230exit_old: 1231 dput(old_dentry); 1232exit: 1233 return error; 1234} 1235 1236#ifndef OSKIT 1237asmlinkage int sys_link(const char * oldname, const char * newname) 1238{ 1239 int error; 1240 char * from; 1241 1242 lock_kernel(); 1243 from = getname(oldname); 1244 error = PTR_ERR(from); 1245 if (!IS_ERR(from)) { 1246 char * to; 1247 to = getname(newname); 1248 error = PTR_ERR(to); 1249 if (!IS_ERR(to)) { 1250 error = do_link(from,to); 1251 putname(to); 1252 } 1253 putname(from); 1254 } 1255 unlock_kernel(); 1256 return error; 1257} 1258#endif 1259 1260int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 1261 struct inode *new_dir, struct dentry *new_dentry) 1262{ 1263 int error; 1264 int need_rehash = 0; 1265 1266 if (old_dentry->d_inode == new_dentry->d_inode) 1267 return 0; 1268 1269 error = may_delete(old_dir, old_dentry, 1); 1270 if (error) 1271 return error; 1272 1273 if (new_dir->i_dev != old_dir->i_dev) 1274 return -EXDEV; 1275 1276 if (!new_dentry->d_inode) 1277 error = may_create(new_dir, new_dentry); 1278 else 1279 error = may_delete(new_dir, new_dentry, 1); 1280 if (error) 1281 return error; 1282 1283 if (!old_dir->i_op || !old_dir->i_op->rename) 1284 return -EPERM; 1285 1286 /* 1287 * If we are going to change the parent - check write permissions, 1288 * we'll need to flip '..'. 1289 */ 1290 if (new_dir != old_dir) { 1291 error = permission(old_dentry->d_inode, MAY_WRITE); 1292 } 1293 if (error) 1294 return error; 1295 1296 DQUOT_INIT(old_dir); 1297 DQUOT_INIT(new_dir); 1298 down(&old_dir->i_sb->s_vfs_rename_sem); 1299 error = -EINVAL; 1300 if (is_subdir(new_dentry, old_dentry)) 1301 goto out_unlock; 1302 if (new_dentry->d_inode) { 1303 error = -EBUSY; 1304 if (d_invalidate(new_dentry)<0) 1305 goto out_unlock; 1306 need_rehash = 1; 1307 } 1308 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 1309 if (need_rehash) 1310 d_rehash(new_dentry); 1311 if (!error) 1312 d_move(old_dentry,new_dentry); 1313out_unlock: 1314 up(&old_dir->i_sb->s_vfs_rename_sem); 1315 return error; 1316} 1317 1318int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 1319 struct inode *new_dir, struct dentry *new_dentry) 1320{ 1321 int error; 1322 1323 if (old_dentry->d_inode == new_dentry->d_inode) 1324 return 0; 1325 1326 error = may_delete(old_dir, old_dentry, 0); 1327 if (error) 1328 return error; 1329 1330 if (new_dir->i_dev != old_dir->i_dev) 1331 return -EXDEV; 1332 1333 if (!new_dentry->d_inode) 1334 error = may_create(new_dir, new_dentry); 1335 else 1336 error = may_delete(new_dir, new_dentry, 0); 1337 if (error) 1338 return error; 1339 1340 if (!old_dir->i_op || !old_dir->i_op->rename) 1341 return -EPERM; 1342 1343 DQUOT_INIT(old_dir); 1344 DQUOT_INIT(new_dir); 1345 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 1346 if (error) 1347 return error; 1348 /* The following d_move() should become unconditional */ 1349 if (!(old_dir->i_sb->s_flags & MS_ODD_RENAME)) { 1350 d_move(old_dentry, new_dentry); 1351 } 1352 return 0; 1353} 1354 1355int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 1356 struct inode *new_dir, struct dentry *new_dentry) 1357{ 1358 if (S_ISDIR(old_dentry->d_inode->i_mode)) 1359 return vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 1360 else 1361 return vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 1362} 1363 1364#ifdef OSKIT 1365 int do_rename(const char * oldname, const char * newname) 1366#else 1367static inline int do_rename(const char * oldname, const char * newname) 1368#endif 1369{ 1370 int error; 1371 struct dentry * old_dir, * new_dir; 1372 struct dentry * old_dentry, *new_dentry; 1373 1374 old_dentry = lookup_dentry(oldname, NULL, 0); 1375 1376 error = PTR_ERR(old_dentry); 1377 if (IS_ERR(old_dentry)) 1378 goto exit; 1379 1380 error = -ENOENT; 1381 if (!old_dentry->d_inode) 1382 goto exit_old; 1383 1384 { 1385 unsigned int flags = 0; 1386 if (S_ISDIR(old_dentry->d_inode->i_mode)) 1387 flags = LOOKUP_SLASHOK; 1388 new_dentry = lookup_dentry(newname, NULL, flags); 1389 } 1390 1391 error = PTR_ERR(new_dentry); 1392 if (IS_ERR(new_dentry)) 1393 goto exit_old; 1394 1395 new_dir = get_parent(new_dentry); 1396 old_dir = get_parent(old_dentry); 1397 1398 double_lock(new_dir, old_dir); 1399 1400 error = -ENOENT; 1401 if (check_parent(old_dir, old_dentry) && check_parent(new_dir, new_dentry)) 1402 error = vfs_rename(old_dir->d_inode, old_dentry, 1403 new_dir->d_inode, new_dentry); 1404 1405 double_unlock(new_dir, old_dir); 1406 dput(new_dentry); 1407exit_old: 1408 dput(old_dentry); 1409exit: 1410 return error; 1411} 1412 1413asmlinkage int sys_rename(const char * oldname, const char * newname) 1414{ 1415 int error; 1416 char * from; 1417 1418 lock_kernel(); 1419 from = getname(oldname); 1420 error = PTR_ERR(from); 1421 if (!IS_ERR(from)) { 1422 char * to; 1423 to = getname(newname); 1424 error = PTR_ERR(to); 1425 if (!IS_ERR(to)) { 1426 error = do_rename(from,to); 1427 putname(to); 1428 } 1429 putname(from); 1430 } 1431 unlock_kernel(); 1432 return error; 1433}