PageRenderTime 54ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/linux/src/fs/namei.c

https://bitbucket.org/mischief/oskit
C | 1433 lines | 952 code | 177 blank | 304 comment | 247 complexity | 2d3e11e8be013de68c690a999f574d0e MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. * linux/fs/namei.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. *
  6. * OSKit support added by the University of Utah, 1997
  7. */
  8. /*
  9. * Some corrections by tytso.
  10. */
  11. /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
  12. * lookup logic.
  13. */
  14. #include <linux/mm.h>
  15. #include <linux/proc_fs.h>
  16. #include <linux/smp_lock.h>
  17. #include <linux/quotaops.h>
  18. #include <asm/uaccess.h>
  19. #include <asm/unaligned.h>
  20. #include <asm/semaphore.h>
  21. #include <asm/page.h>
  22. #include <asm/pgtable.h>
  23. #include <asm/namei.h>
  24. /* This can be removed after the beta phase. */
  25. #define CACHE_SUPERVISE /* debug the correctness of dcache entries */
  26. #undef DEBUG /* some other debugging */
  27. #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
  28. #ifndef OSKIT
  29. /* [Feb-1997 T. Schoebel-Theuer]
  30. * Fundamental changes in the pathname lookup mechanisms (namei)
  31. * were necessary because of omirr. The reason is that omirr needs
  32. * to know the _real_ pathname, not the user-supplied one, in case
  33. * of symlinks (and also when transname replacements occur).
  34. *
  35. * The new code replaces the old recursive symlink resolution with
  36. * an iterative one (in case of non-nested symlink chains). It does
  37. * this with calls to <fs>_follow_link().
  38. * As a side effect, dir_namei(), _namei() and follow_link() are now
  39. * replaced with a single function lookup_dentry() that can handle all
  40. * the special cases of the former code.
  41. *
  42. * With the new dcache, the pathname is stored at each inode, at least as
  43. * long as the refcount of the inode is positive. As a side effect, the
  44. * size of the dcache depends on the inode cache and thus is dynamic.
  45. *
  46. * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
  47. * resolution to correspond with current state of the code.
  48. *
  49. * Note that the symlink resolution is not *completely* iterative.
  50. * There is still a significant amount of tail- and mid- recursion in
  51. * the algorithm. Also, note that <fs>_readlink() is not used in
  52. * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
  53. * may return different results than <fs>_follow_link(). Many virtual
  54. * filesystems (including /proc) exhibit this behavior.
  55. */
  56. /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
  57. * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
  58. * and the name already exists in form of a symlink, try to create the new
  59. * name indicated by the symlink. The old code always complained that the
  60. * name already exists, due to not following the symlink even if its target
  61. * is nonexistent. The new semantics affects also mknod() and link() when
  62. * the name is a symlink pointing to a non-existant name.
  63. *
  64. * I don't know which semantics is the right one, since I have no access
  65. * to standards. But I found by trial that HP-UX 9.0 has the full "new"
  66. * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
  67. * "old" one. Personally, I think the new semantics is much more logical.
  68. * Note that "ln old new" where "new" is a symlink pointing to a non-existing
  69. * file does succeed in both HP-UX and SunOs, but not in Solaris
  70. * and in the old Linux semantics.
  71. */
  72. /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
  73. * semantics. See the comments in "open_namei" and "do_link" below.
  74. *
  75. * [10-Sep-98 Alan Modra] Another symlink change.
  76. */
  77. /* In order to reduce some races, while at the same time doing additional
  78. * checking and hopefully speeding things up, we copy filenames to the
  79. * kernel data space before using them..
  80. *
  81. * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
  82. */
  83. static inline int do_getname(const char *filename, char *page)
  84. {
  85. int retval;
  86. unsigned long len = PAGE_SIZE;
  87. if ((unsigned long) filename >= TASK_SIZE) {
  88. if (!segment_eq(get_fs(), KERNEL_DS))
  89. return -EFAULT;
  90. } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
  91. len = TASK_SIZE - (unsigned long) filename;
  92. retval = strncpy_from_user((char *)page, filename, len);
  93. if (retval > 0) {
  94. if (retval < len)
  95. return 0;
  96. return -ENAMETOOLONG;
  97. } else if (!retval)
  98. retval = -ENOENT;
  99. return retval;
  100. }
  101. #endif /* OSKIT */
  102. char * getname(const char * filename)
  103. {
  104. #ifdef OSKIT
  105. return (char *)filename;
  106. #else
  107. char *tmp, *result;
  108. result = ERR_PTR(-ENOMEM);
  109. tmp = __getname();
  110. if (tmp) {
  111. int retval = do_getname(filename, tmp);
  112. result = tmp;
  113. if (retval < 0) {
  114. putname(tmp);
  115. result = ERR_PTR(retval);
  116. }
  117. }
  118. return result;
  119. #endif /* OSKIT */
  120. }
  121. /*
  122. * permission()
  123. *
  124. * is used to check for read/write/execute permissions on a file.
  125. * We use "fsuid" for this, letting us set arbitrary permissions
  126. * for filesystem access without changing the "normal" uids which
  127. * are used for other things..
  128. */
  129. int permission(struct inode * inode,int mask)
  130. {
  131. int mode = inode->i_mode;
  132. if (inode->i_op && inode->i_op->permission)
  133. return inode->i_op->permission(inode, mask);
  134. else if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
  135. (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
  136. return -EROFS; /* Nobody gets write access to a read-only fs */
  137. else if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
  138. return -EACCES; /* Nobody gets write access to an immutable file */
  139. else if (current->fsuid == inode->i_uid)
  140. mode >>= 6;
  141. else if (in_group_p(inode->i_gid))
  142. mode >>= 3;
  143. if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
  144. return 0;
  145. /* read and search access */
  146. if ((mask == S_IROTH) ||
  147. (S_ISDIR(mode) && !(mask & ~(S_IROTH | S_IXOTH))))
  148. if (capable(CAP_DAC_READ_SEARCH))
  149. return 0;
  150. return -EACCES;
  151. }
  152. /*
  153. * get_write_access() gets write permission for a file.
  154. * put_write_access() releases this write permission.
  155. * This is used for regular files.
  156. * We cannot support write (and maybe mmap read-write shared) accesses and
  157. * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
  158. * can have the following values:
  159. * 0: no writers, no VM_DENYWRITE mappings
  160. * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
  161. * > 0: (i_writecount) users are writing to the file.
  162. */
  163. int get_write_access(struct inode * inode)
  164. {
  165. #ifdef OSKIT
  166. /* This doesn't need to do anything since i_writecount is only
  167. so exec can do ETXTBSY. */
  168. #else
  169. if (inode->i_writecount < 0)
  170. return -ETXTBSY;
  171. inode->i_writecount++;
  172. #endif /* OSKIT */
  173. return 0;
  174. }
  175. void put_write_access(struct inode * inode)
  176. {
  177. #ifdef OSKIT
  178. /* This doesn't need to do anything since i_writecount is only
  179. so exec can do ETXTBSY. */
  180. #else
  181. inode->i_writecount--;
  182. #endif
  183. }
  184. /*
  185. * "." and ".." are special - ".." especially so because it has to be able
  186. * to know about the current root directory and parent relationships
  187. */
  188. static struct dentry * reserved_lookup(struct dentry * parent, struct qstr * name)
  189. {
  190. struct dentry *result = NULL;
  191. if (name->name[0] == '.') {
  192. switch (name->len) {
  193. default:
  194. break;
  195. case 2:
  196. if (name->name[1] != '.')
  197. break;
  198. if (parent != current->fs->root)
  199. parent = parent->d_covers->d_parent;
  200. /* fallthrough */
  201. case 1:
  202. result = parent;
  203. }
  204. }
  205. return dget(result);
  206. }
  207. /*
  208. * Internal lookup() using the new generic dcache.
  209. */
  210. static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
  211. {
  212. struct dentry * dentry = d_lookup(parent, name);
  213. if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
  214. if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
  215. dput(dentry);
  216. dentry = NULL;
  217. }
  218. }
  219. return dentry;
  220. }
  221. /*
  222. * This is called when everything else fails, and we actually have
  223. * to go to the low-level filesystem to find out what we should do..
  224. *
  225. * We get the directory semaphore, and after getting that we also
  226. * make sure that nobody added the entry to the dcache in the meantime..
  227. */
  228. static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
  229. {
  230. struct dentry * result;
  231. struct inode *dir = parent->d_inode;
  232. down(&dir->i_sem);
  233. /*
  234. * First re-do the cached lookup just in case it was created
  235. * while we waited for the directory semaphore..
  236. *
  237. * FIXME! This could use version numbering or similar to
  238. * avoid unnecessary cache lookups.
  239. */
  240. result = cached_lookup(parent, name, flags);
  241. if (!result) {
  242. struct dentry * dentry = d_alloc(parent, name);
  243. result = ERR_PTR(-ENOMEM);
  244. if (dentry) {
  245. result = dir->i_op->lookup(dir, dentry);
  246. if (result)
  247. dput(dentry);
  248. else
  249. result = dentry;
  250. }
  251. }
  252. up(&dir->i_sem);
  253. return result;
  254. }
  255. static struct dentry * do_follow_link(struct dentry *base, struct dentry *dentry, unsigned int follow)
  256. {
  257. struct inode * inode = dentry->d_inode;
  258. if ((follow & LOOKUP_FOLLOW)
  259. && inode && inode->i_op && inode->i_op->follow_link) {
  260. if (current->link_count < 5) {
  261. struct dentry * result;
  262. current->link_count++;
  263. /* This eats the base */
  264. result = inode->i_op->follow_link(dentry, base, follow);
  265. current->link_count--;
  266. dput(dentry);
  267. return result;
  268. }
  269. dput(dentry);
  270. dentry = ERR_PTR(-ELOOP);
  271. }
  272. dput(base);
  273. return dentry;
  274. }
  275. static inline struct dentry * follow_mount(struct dentry * dentry)
  276. {
  277. struct dentry * mnt = dentry->d_mounts;
  278. if (mnt != dentry) {
  279. dget(mnt);
  280. dput(dentry);
  281. dentry = mnt;
  282. }
  283. return dentry;
  284. }
  285. /*
  286. * Name resolution.
  287. *
  288. * This is the basic name resolution function, turning a pathname
  289. * into the final dentry.
  290. */
  291. struct dentry * lookup_dentry(const char * name, struct dentry * base, unsigned int lookup_flags)
  292. {
  293. struct dentry * dentry;
  294. struct inode *inode;
  295. if (*name == '/') {
  296. if (base)
  297. dput(base);
  298. do {
  299. name++;
  300. } while (*name == '/');
  301. __prefix_lookup_dentry(name, lookup_flags);
  302. base = dget(current->fs->root);
  303. } else if (!base) {
  304. base = dget(current->fs->pwd);
  305. }
  306. if (!*name)
  307. goto return_base;
  308. inode = base->d_inode;
  309. lookup_flags &= LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_SLASHOK;
  310. /* At this point we know we have a real path component. */
  311. for(;;) {
  312. int err;
  313. unsigned long hash;
  314. struct qstr this;
  315. unsigned int flags;
  316. unsigned int c;
  317. err = permission(inode, MAY_EXEC);
  318. dentry = ERR_PTR(err);
  319. if (err)
  320. break;
  321. this.name = name;
  322. c = *(const unsigned char *)name;
  323. hash = init_name_hash();
  324. do {
  325. name++;
  326. hash = partial_name_hash(c, hash);
  327. c = *(const unsigned char *)name;
  328. } while (c && (c != '/'));
  329. this.len = name - (const char *) this.name;
  330. this.hash = end_name_hash(hash);
  331. /* remove trailing slashes? */
  332. flags = lookup_flags;
  333. if (c) {
  334. char tmp;
  335. flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
  336. do {
  337. tmp = *++name;
  338. } while (tmp == '/');
  339. if (tmp)
  340. flags |= LOOKUP_CONTINUE;
  341. }
  342. /*
  343. * See if the low-level filesystem might want
  344. * to use its own hash..
  345. */
  346. if (base->d_op && base->d_op->d_hash) {
  347. int error;
  348. error = base->d_op->d_hash(base, &this);
  349. if (error < 0) {
  350. dentry = ERR_PTR(error);
  351. break;
  352. }
  353. }
  354. /* This does the actual lookups.. */
  355. dentry = reserved_lookup(base, &this);
  356. if (!dentry) {
  357. dentry = cached_lookup(base, &this, flags);
  358. if (!dentry) {
  359. dentry = real_lookup(base, &this, flags);
  360. if (IS_ERR(dentry))
  361. break;
  362. }
  363. }
  364. /* Check mountpoints.. */
  365. dentry = follow_mount(dentry);
  366. base = do_follow_link(base, dentry, flags);
  367. if (IS_ERR(base))
  368. goto return_base;
  369. inode = base->d_inode;
  370. if (flags & LOOKUP_DIRECTORY) {
  371. if (!inode)
  372. goto no_inode;
  373. dentry = ERR_PTR(-ENOTDIR);
  374. if (!inode->i_op || !inode->i_op->lookup)
  375. break;
  376. if (flags & LOOKUP_CONTINUE)
  377. continue;
  378. }
  379. return_base:
  380. return base;
  381. /*
  382. * The case of a nonexisting file is special.
  383. *
  384. * In the middle of a pathname lookup (ie when
  385. * LOOKUP_CONTINUE is set), it's an obvious
  386. * error and returns ENOENT.
  387. *
  388. * At the end of a pathname lookup it's legal,
  389. * and we return a negative dentry. However, we
  390. * get here only if there were trailing slashes,
  391. * which is legal only if we know it's supposed
  392. * to be a directory (ie "mkdir"). Thus the
  393. * LOOKUP_SLASHOK flag.
  394. */
  395. no_inode:
  396. dentry = ERR_PTR(-ENOENT);
  397. if (flags & LOOKUP_CONTINUE)
  398. break;
  399. if (flags & LOOKUP_SLASHOK)
  400. goto return_base;
  401. break;
  402. }
  403. dput(base);
  404. return dentry;
  405. }
  406. /*
  407. * namei()
  408. *
  409. * is used by most simple commands to get the inode of a specified name.
  410. * Open, link etc use their own routines, but this is enough for things
  411. * like 'chmod' etc.
  412. *
  413. * namei exists in two versions: namei/lnamei. The only difference is
  414. * that namei follows links, while lnamei does not.
  415. */
  416. struct dentry * __namei(const char *pathname, unsigned int lookup_flags)
  417. {
  418. char *name;
  419. struct dentry *dentry;
  420. name = getname(pathname);
  421. dentry = (struct dentry *) name;
  422. if (!IS_ERR(name)) {
  423. dentry = lookup_dentry(name, NULL, lookup_flags);
  424. putname(name);
  425. if (!IS_ERR(dentry)) {
  426. if (!dentry->d_inode) {
  427. dput(dentry);
  428. dentry = ERR_PTR(-ENOENT);
  429. }
  430. }
  431. }
  432. return dentry;
  433. }
  434. /*
  435. * It's inline, so penalty for filesystems that don't use sticky bit is
  436. * minimal.
  437. */
  438. static inline int check_sticky(struct inode *dir, struct inode *inode)
  439. {
  440. if (!(dir->i_mode & S_ISVTX))
  441. return 0;
  442. if (inode->i_uid == current->fsuid)
  443. return 0;
  444. if (dir->i_uid == current->fsuid)
  445. return 0;
  446. return !capable(CAP_FOWNER);
  447. }
  448. /*
  449. * Check whether we can remove a link victim from directory dir, check
  450. * whether the type of victim is right.
  451. * 1. We can't do it if dir is read-only (done in permission())
  452. * 2. We should have write and exec permissions on dir
  453. * 3. We can't remove anything from append-only dir
  454. * 4. We can't do anything with immutable dir (done in permission())
  455. * 5. If the sticky bit on dir is set we should either
  456. * a. be owner of dir, or
  457. * b. be owner of victim, or
  458. * c. have CAP_FOWNER capability
  459. * 6. If the victim is append-only or immutable we can't do antyhing with
  460. * links pointing to it.
  461. * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
  462. * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
  463. * 9. We can't remove a root or mountpoint.
  464. */
  465. static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
  466. {
  467. int error;
  468. if (!victim->d_inode || victim->d_parent->d_inode != dir)
  469. return -ENOENT;
  470. error = permission(dir,MAY_WRITE | MAY_EXEC);
  471. if (error)
  472. return error;
  473. if (IS_APPEND(dir))
  474. return -EPERM;
  475. if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
  476. IS_IMMUTABLE(victim->d_inode))
  477. return -EPERM;
  478. if (isdir) {
  479. if (!S_ISDIR(victim->d_inode->i_mode))
  480. return -ENOTDIR;
  481. if (IS_ROOT(victim))
  482. return -EBUSY;
  483. if (victim->d_mounts != victim->d_covers)
  484. return -EBUSY;
  485. } else if (S_ISDIR(victim->d_inode->i_mode))
  486. return -EISDIR;
  487. return 0;
  488. }
  489. /* Check whether we can create an object with dentry child in directory
  490. * dir.
  491. * 1. We can't do it if child already exists (open has special treatment for
  492. * this case, but since we are inlined it's OK)
  493. * 2. We can't do it if dir is read-only (done in permission())
  494. * 3. We should have write and exec permissions on dir
  495. * 4. We can't do it if dir is immutable (done in permission())
  496. */
  497. static inline int may_create(struct inode *dir, struct dentry *child) {
  498. if (child->d_inode)
  499. return -EEXIST;
  500. return permission(dir,MAY_WRITE | MAY_EXEC);
  501. }
  502. static inline struct dentry *get_parent(struct dentry *dentry)
  503. {
  504. return dget(dentry->d_parent);
  505. }
  506. static inline void unlock_dir(struct dentry *dir)
  507. {
  508. up(&dir->d_inode->i_sem);
  509. dput(dir);
  510. }
  511. /*
  512. * We need to do a check-parent every time
  513. * after we have locked the parent - to verify
  514. * that the parent is still our parent and
  515. * that we are still hashed onto it..
  516. *
  517. * This is requied in case two processes race
  518. * on removing (or moving) the same entry: the
  519. * parent lock will serialize them, but the
  520. * other process will be too late..
  521. */
  522. #define check_parent(dir, dentry) \
  523. ((dir) == (dentry)->d_parent && !list_empty(&dentry->d_hash))
  524. /*
  525. * Locking the parent is needed to:
  526. * - serialize directory operations
  527. * - make sure the parent doesn't change from
  528. * under us in the middle of an operation.
  529. *
  530. * NOTE! Right now we'd rather use a "struct inode"
  531. * for this, but as I expect things to move toward
  532. * using dentries instead for most things it is
  533. * probably better to start with the conceptually
  534. * better interface of relying on a path of dentries.
  535. */
  536. static inline struct dentry *lock_parent(struct dentry *dentry)
  537. {
  538. struct dentry *dir = dget(dentry->d_parent);
  539. down(&dir->d_inode->i_sem);
  540. return dir;
  541. }
  542. /*
  543. * Whee.. Deadlock country. Happily there are only two VFS
  544. * operations that do this..
  545. */
  546. static inline void double_lock(struct dentry *d1, struct dentry *d2)
  547. {
  548. struct semaphore *s1 = &d1->d_inode->i_sem;
  549. struct semaphore *s2 = &d2->d_inode->i_sem;
  550. if (s1 != s2) {
  551. if ((unsigned long) s1 < (unsigned long) s2) {
  552. struct semaphore *tmp = s2;
  553. s2 = s1; s1 = tmp;
  554. }
  555. down(s1);
  556. }
  557. down(s2);
  558. }
  559. static inline void double_unlock(struct dentry *d1, struct dentry *d2)
  560. {
  561. struct semaphore *s1 = &d1->d_inode->i_sem;
  562. struct semaphore *s2 = &d2->d_inode->i_sem;
  563. up(s1);
  564. if (s1 != s2)
  565. up(s2);
  566. dput(d1);
  567. dput(d2);
  568. }
  569. /*
  570. * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
  571. * reasons.
  572. *
  573. * O_DIRECTORY translates into forcing a directory lookup.
  574. */
  575. static inline int lookup_flags(unsigned int f)
  576. {
  577. unsigned long retval = LOOKUP_FOLLOW;
  578. if (f & O_NOFOLLOW)
  579. retval &= ~LOOKUP_FOLLOW;
  580. if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
  581. retval &= ~LOOKUP_FOLLOW;
  582. if (f & O_DIRECTORY)
  583. retval |= LOOKUP_DIRECTORY;
  584. return retval;
  585. }
  586. /*
  587. * open_namei()
  588. *
  589. * namei for open - this is in fact almost the whole open-routine.
  590. *
  591. * Note that the low bits of "flag" aren't the same as in the open
  592. * system call - they are 00 - no permissions needed
  593. * 01 - read permission needed
  594. * 10 - write permission needed
  595. * 11 - read/write permissions needed
  596. * which is a lot more logical, and also allows the "no perm" needed
  597. * for symlinks (where the permissions are checked later).
  598. */
  599. struct dentry * open_namei(const char * pathname, int flag, int mode)
  600. {
  601. int acc_mode, error;
  602. struct inode *inode;
  603. struct dentry *dentry;
  604. mode &= S_IALLUGO & ~current->fs->umask;
  605. mode |= S_IFREG;
  606. dentry = lookup_dentry(pathname, NULL, lookup_flags(flag));
  607. if (IS_ERR(dentry))
  608. return dentry;
  609. acc_mode = ACC_MODE(flag);
  610. if (flag & O_CREAT) {
  611. struct dentry *dir;
  612. if (dentry->d_inode) {
  613. if (!(flag & O_EXCL))
  614. goto nocreate;
  615. error = -EEXIST;
  616. goto exit;
  617. }
  618. dir = lock_parent(dentry);
  619. if (!check_parent(dir, dentry)) {
  620. /*
  621. * Really nasty race happened. What's the
  622. * right error code? We had a dentry, but
  623. * before we could use it it was removed
  624. * by somebody else. We could just re-try
  625. * everything, I guess.
  626. *
  627. * ENOENT is definitely wrong.
  628. */
  629. error = -ENOENT;
  630. unlock_dir(dir);
  631. goto exit;
  632. }
  633. /*
  634. * Somebody might have created the file while we
  635. * waited for the directory lock.. So we have to
  636. * re-do the existence test.
  637. */
  638. if (dentry->d_inode) {
  639. error = 0;
  640. if (flag & O_EXCL)
  641. error = -EEXIST;
  642. } else if ((error = may_create(dir->d_inode, dentry)) == 0) {
  643. if (!dir->d_inode->i_op || !dir->d_inode->i_op->create)
  644. error = -EACCES;
  645. else {
  646. DQUOT_INIT(dir->d_inode);
  647. error = dir->d_inode->i_op->create(dir->d_inode, dentry, mode);
  648. /* Don't check for write permission, don't truncate */
  649. acc_mode = 0;
  650. flag &= ~O_TRUNC;
  651. }
  652. }
  653. unlock_dir(dir);
  654. if (error)
  655. goto exit;
  656. }
  657. nocreate:
  658. error = -ENOENT;
  659. inode = dentry->d_inode;
  660. if (!inode)
  661. goto exit;
  662. error = -ELOOP;
  663. if (S_ISLNK(inode->i_mode))
  664. goto exit;
  665. error = -EISDIR;
  666. if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
  667. goto exit;
  668. error = permission(inode,acc_mode);
  669. if (error)
  670. goto exit;
  671. /*
  672. * FIFO's, sockets and device files are special: they don't
  673. * actually live on the filesystem itself, and as such you
  674. * can write to them even if the filesystem is read-only.
  675. */
  676. if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
  677. flag &= ~O_TRUNC;
  678. } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
  679. error = -EACCES;
  680. if (IS_NODEV(inode))
  681. goto exit;
  682. flag &= ~O_TRUNC;
  683. } else {
  684. error = -EROFS;
  685. if (IS_RDONLY(inode) && (flag & 2))
  686. goto exit;
  687. }
  688. /*
  689. * An append-only file must be opened in append mode for writing.
  690. */
  691. error = -EPERM;
  692. if (IS_APPEND(inode)) {
  693. if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
  694. goto exit;
  695. if (flag & O_TRUNC)
  696. goto exit;
  697. }
  698. if (flag & O_TRUNC) {
  699. error = get_write_access(inode);
  700. if (error)
  701. goto exit;
  702. /*
  703. * Refuse to truncate files with mandatory locks held on them.
  704. */
  705. error = locks_verify_locked(inode);
  706. if (!error) {
  707. DQUOT_INIT(inode);
  708. error = do_truncate(dentry, 0);
  709. }
  710. put_write_access(inode);
  711. if (error)
  712. goto exit;
  713. } else
  714. if (flag & FMODE_WRITE)
  715. DQUOT_INIT(inode);
  716. return dentry;
  717. exit:
  718. dput(dentry);
  719. return ERR_PTR(error);
  720. }
  721. struct dentry * do_mknod(const char * filename, int mode, dev_t dev)
  722. {
  723. int error;
  724. struct dentry *dir;
  725. struct dentry *dentry, *retval;
  726. mode &= ~current->fs->umask;
  727. dentry = lookup_dentry(filename, NULL, LOOKUP_FOLLOW);
  728. if (IS_ERR(dentry))
  729. return dentry;
  730. dir = lock_parent(dentry);
  731. error = -ENOENT;
  732. if (!check_parent(dir, dentry))
  733. goto exit_lock;
  734. error = may_create(dir->d_inode, dentry);
  735. if (error)
  736. goto exit_lock;
  737. error = -EPERM;
  738. if (!dir->d_inode->i_op || !dir->d_inode->i_op->mknod)
  739. goto exit_lock;
  740. DQUOT_INIT(dir->d_inode);
  741. error = dir->d_inode->i_op->mknod(dir->d_inode, dentry, mode, dev);
  742. exit_lock:
  743. retval = ERR_PTR(error);
  744. if (!error)
  745. retval = dget(dentry);
  746. unlock_dir(dir);
  747. dput(dentry);
  748. return retval;
  749. }
  750. asmlinkage int sys_mknod(const char * filename, int mode, dev_t dev)
  751. {
  752. int error;
  753. char * tmp;
  754. lock_kernel();
  755. error = -EPERM;
  756. if (S_ISDIR(mode) || (!S_ISFIFO(mode) && !capable(CAP_SYS_ADMIN)))
  757. goto out;
  758. error = -EINVAL;
  759. switch (mode & S_IFMT) {
  760. case 0:
  761. mode |= S_IFREG;
  762. break;
  763. case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
  764. break;
  765. default:
  766. goto out;
  767. }
  768. tmp = getname(filename);
  769. error = PTR_ERR(tmp);
  770. if (!IS_ERR(tmp)) {
  771. struct dentry * dentry = do_mknod(tmp,mode,dev);
  772. putname(tmp);
  773. error = PTR_ERR(dentry);
  774. if (!IS_ERR(dentry)) {
  775. dput(dentry);
  776. error = 0;
  777. }
  778. }
  779. out:
  780. unlock_kernel();
  781. return error;
  782. }
  783. /*
  784. * Look out: this function may change a normal dentry
  785. * into a directory dentry (different size)..
  786. */
  787. #ifdef OSKIT
  788. int do_mkdir(const char * pathname, int mode)
  789. #else
  790. static inline int do_mkdir(const char * pathname, int mode)
  791. #endif
  792. {
  793. int error;
  794. struct dentry *dir;
  795. struct dentry *dentry;
  796. dentry = lookup_dentry(pathname, NULL, LOOKUP_SLASHOK);
  797. error = PTR_ERR(dentry);
  798. if (IS_ERR(dentry))
  799. goto exit;
  800. /*
  801. * EEXIST is kind of a strange error code to
  802. * return, but basically if the dentry was moved
  803. * or unlinked while we locked the parent, we
  804. * do know that it _did_ exist before, and as
  805. * such it makes perfect sense.. In contrast,
  806. * ENOENT doesn't make sense for mkdir.
  807. */
  808. dir = lock_parent(dentry);
  809. error = -EEXIST;
  810. if (!check_parent(dir, dentry))
  811. goto exit_lock;
  812. error = may_create(dir->d_inode, dentry);
  813. if (error)
  814. goto exit_lock;
  815. error = -EPERM;
  816. if (!dir->d_inode->i_op || !dir->d_inode->i_op->mkdir)
  817. goto exit_lock;
  818. DQUOT_INIT(dir->d_inode);
  819. mode &= 0777 & ~current->fs->umask;
  820. error = dir->d_inode->i_op->mkdir(dir->d_inode, dentry, mode);
  821. exit_lock:
  822. unlock_dir(dir);
  823. dput(dentry);
  824. exit:
  825. return error;
  826. }
  827. asmlinkage int sys_mkdir(const char * pathname, int mode)
  828. {
  829. int error;
  830. char * tmp;
  831. lock_kernel();
  832. tmp = getname(pathname);
  833. error = PTR_ERR(tmp);
  834. if (!IS_ERR(tmp)) {
  835. error = do_mkdir(tmp,mode);
  836. putname(tmp);
  837. }
  838. unlock_kernel();
  839. return error;
  840. }
  841. int vfs_rmdir(struct inode *dir, struct dentry *dentry)
  842. {
  843. int error;
  844. error = may_delete(dir, dentry, 1);
  845. if (error)
  846. return error;
  847. if (!dir->i_op || !dir->i_op->rmdir)
  848. return -EPERM;
  849. DQUOT_INIT(dir);
  850. /*
  851. * We try to drop the dentry early: we should have
  852. * a usage count of 2 if we're the only user of this
  853. * dentry, and if that is true (possibly after pruning
  854. * the dcache), then we drop the dentry now.
  855. *
  856. * A low-level filesystem can, if it choses, legally
  857. * do a
  858. *
  859. * if (!list_empty(&dentry->d_hash))
  860. * return -EBUSY;
  861. *
  862. * if it cannot handle the case of removing a directory
  863. * that is still in use by something else..
  864. */
  865. switch (dentry->d_count) {
  866. default:
  867. shrink_dcache_parent(dentry);
  868. if (dentry->d_count != 2)
  869. break;
  870. case 2:
  871. d_drop(dentry);
  872. }
  873. error = dir->i_op->rmdir(dir, dentry);
  874. return error;
  875. }
  876. #ifdef OSKIT
  877. int do_rmdir(const char * name)
  878. #else
  879. static inline int do_rmdir(const char * name)
  880. #endif
  881. {
  882. int error;
  883. struct dentry *dir;
  884. struct dentry *dentry;
  885. dentry = lookup_dentry(name, NULL, 0);
  886. error = PTR_ERR(dentry);
  887. if (IS_ERR(dentry))
  888. goto exit;
  889. error = -ENOENT;
  890. if (!dentry->d_inode)
  891. goto exit_dput;
  892. dir = dget(dentry->d_parent);
  893. /*
  894. * The dentry->d_count stuff confuses d_delete() enough to
  895. * not kill the inode from under us while it is locked. This
  896. * wouldn't be needed, except the dentry semaphore is really
  897. * in the inode, not in the dentry..
  898. */
  899. dentry->d_count++;
  900. double_lock(dir, dentry);
  901. error = -ENOENT;
  902. if (check_parent(dir, dentry))
  903. error = vfs_rmdir(dir->d_inode, dentry);
  904. double_unlock(dentry, dir);
  905. exit_dput:
  906. dput(dentry);
  907. exit:
  908. return error;
  909. }
  910. asmlinkage int sys_rmdir(const char * pathname)
  911. {
  912. int error;
  913. char * tmp;
  914. lock_kernel();
  915. tmp = getname(pathname);
  916. error = PTR_ERR(tmp);
  917. if (!IS_ERR(tmp)) {
  918. error = do_rmdir(tmp);
  919. putname(tmp);
  920. }
  921. unlock_kernel();
  922. return error;
  923. }
  924. int vfs_unlink(struct inode *dir, struct dentry *dentry)
  925. {
  926. int error;
  927. error = may_delete(dir, dentry, 0);
  928. if (!error) {
  929. error = -EPERM;
  930. if (dir->i_op && dir->i_op->unlink) {
  931. DQUOT_INIT(dir);
  932. error = dir->i_op->unlink(dir, dentry);
  933. }
  934. }
  935. return error;
  936. }
  937. #ifdef OSKIT
  938. int do_unlink(const char * name)
  939. #else
  940. static inline int do_unlink(const char * name)
  941. #endif
  942. {
  943. int error;
  944. struct dentry *dir;
  945. struct dentry *dentry;
  946. dentry = lookup_dentry(name, NULL, 0);
  947. error = PTR_ERR(dentry);
  948. if (IS_ERR(dentry))
  949. goto exit;
  950. dir = lock_parent(dentry);
  951. error = -ENOENT;
  952. if (check_parent(dir, dentry))
  953. error = vfs_unlink(dir->d_inode, dentry);
  954. unlock_dir(dir);
  955. dput(dentry);
  956. exit:
  957. return error;
  958. }
  959. asmlinkage int sys_unlink(const char * pathname)
  960. {
  961. int error;
  962. char * tmp;
  963. lock_kernel();
  964. tmp = getname(pathname);
  965. error = PTR_ERR(tmp);
  966. if (!IS_ERR(tmp)) {
  967. error = do_unlink(tmp);
  968. putname(tmp);
  969. }
  970. unlock_kernel();
  971. return error;
  972. }
  973. #ifdef OSKIT
  974. int do_symlink(const char * oldname, const char * newname)
  975. #else
  976. static inline int do_symlink(const char * oldname, const char * newname)
  977. #endif
  978. {
  979. int error;
  980. struct dentry *dir;
  981. struct dentry *dentry;
  982. dentry = lookup_dentry(newname, NULL, 0);
  983. error = PTR_ERR(dentry);
  984. if (IS_ERR(dentry))
  985. goto exit;
  986. dir = lock_parent(dentry);
  987. error = -ENOENT;
  988. if (!check_parent(dir, dentry))
  989. goto exit_lock;
  990. error = may_create(dir->d_inode, dentry);
  991. if (error)
  992. goto exit_lock;
  993. error = -EPERM;
  994. if (!dir->d_inode->i_op || !dir->d_inode->i_op->symlink)
  995. goto exit_lock;
  996. DQUOT_INIT(dir->d_inode);
  997. error = dir->d_inode->i_op->symlink(dir->d_inode, dentry, oldname);
  998. exit_lock:
  999. unlock_dir(dir);
  1000. dput(dentry);
  1001. exit:
  1002. return error;
  1003. }
  1004. asmlinkage int sys_symlink(const char * oldname, const char * newname)
  1005. {
  1006. int error;
  1007. char * from;
  1008. lock_kernel();
  1009. from = getname(oldname);
  1010. error = PTR_ERR(from);
  1011. if (!IS_ERR(from)) {
  1012. char * to;
  1013. to = getname(newname);
  1014. error = PTR_ERR(to);
  1015. if (!IS_ERR(to)) {
  1016. error = do_symlink(from,to);
  1017. putname(to);
  1018. }
  1019. putname(from);
  1020. }
  1021. unlock_kernel();
  1022. return error;
  1023. }
  1024. #ifdef OSKIT
  1025. int do_link(struct dentry *dentry, const char * newname)
  1026. #else
  1027. static inline int do_link(const char * oldname, const char * newname)
  1028. #endif
  1029. {
  1030. struct dentry *old_dentry, *new_dentry, *dir;
  1031. struct inode *inode;
  1032. int error;
  1033. /*
  1034. * Hardlinks are often used in delicate situations. We avoid
  1035. * security-related surprises by not following symlinks on the
  1036. * newname. --KAB
  1037. *
  1038. * We don't follow them on the oldname either to be compatible
  1039. * with linux 2.0, and to avoid hard-linking to directories
  1040. * and other special files. --ADM
  1041. */
  1042. #ifdef OSKIT
  1043. dget(dentry);
  1044. old_dentry = dentry;
  1045. #else
  1046. old_dentry = lookup_dentry(oldname, NULL, 0);
  1047. #endif
  1048. error = PTR_ERR(old_dentry);
  1049. if (IS_ERR(old_dentry))
  1050. goto exit;
  1051. new_dentry = lookup_dentry(newname, NULL, 0);
  1052. error = PTR_ERR(new_dentry);
  1053. if (IS_ERR(new_dentry))
  1054. goto exit_old;
  1055. dir = lock_parent(new_dentry);
  1056. error = -ENOENT;
  1057. if (!check_parent(dir, new_dentry))
  1058. goto exit_lock;
  1059. error = -ENOENT;
  1060. inode = old_dentry->d_inode;
  1061. if (!inode)
  1062. goto exit_lock;
  1063. error = may_create(dir->d_inode, new_dentry);
  1064. if (error)
  1065. goto exit_lock;
  1066. error = -EXDEV;
  1067. if (dir->d_inode->i_dev != inode->i_dev)
  1068. goto exit_lock;
  1069. /*
  1070. * A link to an append-only or immutable file cannot be created.
  1071. */
  1072. error = -EPERM;
  1073. if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
  1074. goto exit_lock;
  1075. error = -EPERM;
  1076. if (!dir->d_inode->i_op || !dir->d_inode->i_op->link)
  1077. goto exit_lock;
  1078. DQUOT_INIT(dir->d_inode);
  1079. error = dir->d_inode->i_op->link(old_dentry, dir->d_inode, new_dentry);
  1080. exit_lock:
  1081. unlock_dir(dir);
  1082. dput(new_dentry);
  1083. exit_old:
  1084. dput(old_dentry);
  1085. exit:
  1086. return error;
  1087. }
  1088. #ifndef OSKIT
  1089. asmlinkage int sys_link(const char * oldname, const char * newname)
  1090. {
  1091. int error;
  1092. char * from;
  1093. lock_kernel();
  1094. from = getname(oldname);
  1095. error = PTR_ERR(from);
  1096. if (!IS_ERR(from)) {
  1097. char * to;
  1098. to = getname(newname);
  1099. error = PTR_ERR(to);
  1100. if (!IS_ERR(to)) {
  1101. error = do_link(from,to);
  1102. putname(to);
  1103. }
  1104. putname(from);
  1105. }
  1106. unlock_kernel();
  1107. return error;
  1108. }
  1109. #endif
  1110. int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
  1111. struct inode *new_dir, struct dentry *new_dentry)
  1112. {
  1113. int error;
  1114. int need_rehash = 0;
  1115. if (old_dentry->d_inode == new_dentry->d_inode)
  1116. return 0;
  1117. error = may_delete(old_dir, old_dentry, 1);
  1118. if (error)
  1119. return error;
  1120. if (new_dir->i_dev != old_dir->i_dev)
  1121. return -EXDEV;
  1122. if (!new_dentry->d_inode)
  1123. error = may_create(new_dir, new_dentry);
  1124. else
  1125. error = may_delete(new_dir, new_dentry, 1);
  1126. if (error)
  1127. return error;
  1128. if (!old_dir->i_op || !old_dir->i_op->rename)
  1129. return -EPERM;
  1130. /*
  1131. * If we are going to change the parent - check write permissions,
  1132. * we'll need to flip '..'.
  1133. */
  1134. if (new_dir != old_dir) {
  1135. error = permission(old_dentry->d_inode, MAY_WRITE);
  1136. }
  1137. if (error)
  1138. return error;
  1139. DQUOT_INIT(old_dir);
  1140. DQUOT_INIT(new_dir);
  1141. down(&old_dir->i_sb->s_vfs_rename_sem);
  1142. error = -EINVAL;
  1143. if (is_subdir(new_dentry, old_dentry))
  1144. goto out_unlock;
  1145. if (new_dentry->d_inode) {
  1146. error = -EBUSY;
  1147. if (d_invalidate(new_dentry)<0)
  1148. goto out_unlock;
  1149. need_rehash = 1;
  1150. }
  1151. error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
  1152. if (need_rehash)
  1153. d_rehash(new_dentry);
  1154. if (!error)
  1155. d_move(old_dentry,new_dentry);
  1156. out_unlock:
  1157. up(&old_dir->i_sb->s_vfs_rename_sem);
  1158. return error;
  1159. }
  1160. int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
  1161. struct inode *new_dir, struct dentry *new_dentry)
  1162. {
  1163. int error;
  1164. if (old_dentry->d_inode == new_dentry->d_inode)
  1165. return 0;
  1166. error = may_delete(old_dir, old_dentry, 0);
  1167. if (error)
  1168. return error;
  1169. if (new_dir->i_dev != old_dir->i_dev)
  1170. return -EXDEV;
  1171. if (!new_dentry->d_inode)
  1172. error = may_create(new_dir, new_dentry);
  1173. else
  1174. error = may_delete(new_dir, new_dentry, 0);
  1175. if (error)
  1176. return error;
  1177. if (!old_dir->i_op || !old_dir->i_op->rename)
  1178. return -EPERM;
  1179. DQUOT_INIT(old_dir);
  1180. DQUOT_INIT(new_dir);
  1181. error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
  1182. if (error)
  1183. return error;
  1184. /* The following d_move() should become unconditional */
  1185. if (!(old_dir->i_sb->s_flags & MS_ODD_RENAME)) {
  1186. d_move(old_dentry, new_dentry);
  1187. }
  1188. return 0;
  1189. }
  1190. int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  1191. struct inode *new_dir, struct dentry *new_dentry)
  1192. {
  1193. if (S_ISDIR(old_dentry->d_inode->i_mode))
  1194. return vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
  1195. else
  1196. return vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
  1197. }
  1198. #ifdef OSKIT
  1199. int do_rename(const char * oldname, const char * newname)
  1200. #else
  1201. static inline int do_rename(const char * oldname, const char * newname)
  1202. #endif
  1203. {
  1204. int error;
  1205. struct dentry * old_dir, * new_dir;
  1206. struct dentry * old_dentry, *new_dentry;
  1207. old_dentry = lookup_dentry(oldname, NULL, 0);
  1208. error = PTR_ERR(old_dentry);
  1209. if (IS_ERR(old_dentry))
  1210. goto exit;
  1211. error = -ENOENT;
  1212. if (!old_dentry->d_inode)
  1213. goto exit_old;
  1214. {
  1215. unsigned int flags = 0;
  1216. if (S_ISDIR(old_dentry->d_inode->i_mode))
  1217. flags = LOOKUP_SLASHOK;
  1218. new_dentry = lookup_dentry(newname, NULL, flags);
  1219. }
  1220. error = PTR_ERR(new_dentry);
  1221. if (IS_ERR(new_dentry))
  1222. goto exit_old;
  1223. new_dir = get_parent(new_dentry);
  1224. old_dir = get_parent(old_dentry);
  1225. double_lock(new_dir, old_dir);
  1226. error = -ENOENT;
  1227. if (check_parent(old_dir, old_dentry) && check_parent(new_dir, new_dentry))
  1228. error = vfs_rename(old_dir->d_inode, old_dentry,
  1229. new_dir->d_inode, new_dentry);
  1230. double_unlock(new_dir, old_dir);
  1231. dput(new_dentry);
  1232. exit_old:
  1233. dput(old_dentry);
  1234. exit:
  1235. return error;
  1236. }
  1237. asmlinkage int sys_rename(const char * oldname, const char * newname)
  1238. {
  1239. int error;
  1240. char * from;
  1241. lock_kernel();
  1242. from = getname(oldname);
  1243. error = PTR_ERR(from);
  1244. if (!IS_ERR(from)) {
  1245. char * to;
  1246. to = getname(newname);
  1247. error = PTR_ERR(to);
  1248. if (!IS_ERR(to)) {
  1249. error = do_rename(from,to);
  1250. putname(to);
  1251. }
  1252. putname(from);
  1253. }
  1254. unlock_kernel();
  1255. return error;
  1256. }