PageRenderTime 1126ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 1ms

/fs/xfs/xfs_iops.c

https://github.com/Mengqi/linux-2.6
C | 1210 lines | 841 code | 156 blank | 213 comment | 136 complexity | ffe6314a81ddded7231175d4534f60b3 MD5 | raw file
  1. /*
  2. * Copyright (c) 2000-2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_acl.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_sb.h"
  26. #include "xfs_ag.h"
  27. #include "xfs_alloc.h"
  28. #include "xfs_quota.h"
  29. #include "xfs_mount.h"
  30. #include "xfs_bmap_btree.h"
  31. #include "xfs_dinode.h"
  32. #include "xfs_inode.h"
  33. #include "xfs_bmap.h"
  34. #include "xfs_rtalloc.h"
  35. #include "xfs_error.h"
  36. #include "xfs_itable.h"
  37. #include "xfs_rw.h"
  38. #include "xfs_attr.h"
  39. #include "xfs_buf_item.h"
  40. #include "xfs_utils.h"
  41. #include "xfs_vnodeops.h"
  42. #include "xfs_inode_item.h"
  43. #include "xfs_trace.h"
  44. #include <linux/capability.h>
  45. #include <linux/xattr.h>
  46. #include <linux/namei.h>
  47. #include <linux/posix_acl.h>
  48. #include <linux/security.h>
  49. #include <linux/fiemap.h>
  50. #include <linux/slab.h>
  51. /*
  52. * Bring the timestamps in the XFS inode uptodate.
  53. *
  54. * Used before writing the inode to disk.
  55. */
  56. void
  57. xfs_synchronize_times(
  58. xfs_inode_t *ip)
  59. {
  60. struct inode *inode = VFS_I(ip);
  61. ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
  62. ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
  63. ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
  64. ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
  65. ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
  66. ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
  67. }
  68. /*
  69. * If the linux inode is valid, mark it dirty.
  70. * Used when committing a dirty inode into a transaction so that
  71. * the inode will get written back by the linux code
  72. */
  73. void
  74. xfs_mark_inode_dirty_sync(
  75. xfs_inode_t *ip)
  76. {
  77. struct inode *inode = VFS_I(ip);
  78. if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
  79. mark_inode_dirty_sync(inode);
  80. }
  81. void
  82. xfs_mark_inode_dirty(
  83. xfs_inode_t *ip)
  84. {
  85. struct inode *inode = VFS_I(ip);
  86. if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
  87. mark_inode_dirty(inode);
  88. }
  89. /*
  90. * Hook in SELinux. This is not quite correct yet, what we really need
  91. * here (as we do for default ACLs) is a mechanism by which creation of
  92. * these attrs can be journalled at inode creation time (along with the
  93. * inode, of course, such that log replay can't cause these to be lost).
  94. */
  95. STATIC int
  96. xfs_init_security(
  97. struct inode *inode,
  98. struct inode *dir,
  99. const struct qstr *qstr)
  100. {
  101. struct xfs_inode *ip = XFS_I(inode);
  102. size_t length;
  103. void *value;
  104. unsigned char *name;
  105. int error;
  106. error = security_inode_init_security(inode, dir, qstr, (char **)&name,
  107. &value, &length);
  108. if (error) {
  109. if (error == -EOPNOTSUPP)
  110. return 0;
  111. return -error;
  112. }
  113. error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
  114. kfree(name);
  115. kfree(value);
  116. return error;
  117. }
  118. static void
  119. xfs_dentry_to_name(
  120. struct xfs_name *namep,
  121. struct dentry *dentry)
  122. {
  123. namep->name = dentry->d_name.name;
  124. namep->len = dentry->d_name.len;
  125. }
  126. STATIC void
  127. xfs_cleanup_inode(
  128. struct inode *dir,
  129. struct inode *inode,
  130. struct dentry *dentry)
  131. {
  132. struct xfs_name teardown;
  133. /* Oh, the horror.
  134. * If we can't add the ACL or we fail in
  135. * xfs_init_security we must back out.
  136. * ENOSPC can hit here, among other things.
  137. */
  138. xfs_dentry_to_name(&teardown, dentry);
  139. xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
  140. iput(inode);
  141. }
  142. STATIC int
  143. xfs_vn_mknod(
  144. struct inode *dir,
  145. struct dentry *dentry,
  146. int mode,
  147. dev_t rdev)
  148. {
  149. struct inode *inode;
  150. struct xfs_inode *ip = NULL;
  151. struct posix_acl *default_acl = NULL;
  152. struct xfs_name name;
  153. int error;
  154. /*
  155. * Irix uses Missed'em'V split, but doesn't want to see
  156. * the upper 5 bits of (14bit) major.
  157. */
  158. if (S_ISCHR(mode) || S_ISBLK(mode)) {
  159. if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
  160. return -EINVAL;
  161. rdev = sysv_encode_dev(rdev);
  162. } else {
  163. rdev = 0;
  164. }
  165. if (IS_POSIXACL(dir)) {
  166. default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
  167. if (IS_ERR(default_acl))
  168. return PTR_ERR(default_acl);
  169. if (!default_acl)
  170. mode &= ~current_umask();
  171. }
  172. xfs_dentry_to_name(&name, dentry);
  173. error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
  174. if (unlikely(error))
  175. goto out_free_acl;
  176. inode = VFS_I(ip);
  177. error = xfs_init_security(inode, dir, &dentry->d_name);
  178. if (unlikely(error))
  179. goto out_cleanup_inode;
  180. if (default_acl) {
  181. error = -xfs_inherit_acl(inode, default_acl);
  182. default_acl = NULL;
  183. if (unlikely(error))
  184. goto out_cleanup_inode;
  185. }
  186. d_instantiate(dentry, inode);
  187. return -error;
  188. out_cleanup_inode:
  189. xfs_cleanup_inode(dir, inode, dentry);
  190. out_free_acl:
  191. posix_acl_release(default_acl);
  192. return -error;
  193. }
  194. STATIC int
  195. xfs_vn_create(
  196. struct inode *dir,
  197. struct dentry *dentry,
  198. int mode,
  199. struct nameidata *nd)
  200. {
  201. return xfs_vn_mknod(dir, dentry, mode, 0);
  202. }
  203. STATIC int
  204. xfs_vn_mkdir(
  205. struct inode *dir,
  206. struct dentry *dentry,
  207. int mode)
  208. {
  209. return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
  210. }
  211. STATIC struct dentry *
  212. xfs_vn_lookup(
  213. struct inode *dir,
  214. struct dentry *dentry,
  215. struct nameidata *nd)
  216. {
  217. struct xfs_inode *cip;
  218. struct xfs_name name;
  219. int error;
  220. if (dentry->d_name.len >= MAXNAMELEN)
  221. return ERR_PTR(-ENAMETOOLONG);
  222. xfs_dentry_to_name(&name, dentry);
  223. error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
  224. if (unlikely(error)) {
  225. if (unlikely(error != ENOENT))
  226. return ERR_PTR(-error);
  227. d_add(dentry, NULL);
  228. return NULL;
  229. }
  230. return d_splice_alias(VFS_I(cip), dentry);
  231. }
  232. STATIC struct dentry *
  233. xfs_vn_ci_lookup(
  234. struct inode *dir,
  235. struct dentry *dentry,
  236. struct nameidata *nd)
  237. {
  238. struct xfs_inode *ip;
  239. struct xfs_name xname;
  240. struct xfs_name ci_name;
  241. struct qstr dname;
  242. int error;
  243. if (dentry->d_name.len >= MAXNAMELEN)
  244. return ERR_PTR(-ENAMETOOLONG);
  245. xfs_dentry_to_name(&xname, dentry);
  246. error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
  247. if (unlikely(error)) {
  248. if (unlikely(error != ENOENT))
  249. return ERR_PTR(-error);
  250. /*
  251. * call d_add(dentry, NULL) here when d_drop_negative_children
  252. * is called in xfs_vn_mknod (ie. allow negative dentries
  253. * with CI filesystems).
  254. */
  255. return NULL;
  256. }
  257. /* if exact match, just splice and exit */
  258. if (!ci_name.name)
  259. return d_splice_alias(VFS_I(ip), dentry);
  260. /* else case-insensitive match... */
  261. dname.name = ci_name.name;
  262. dname.len = ci_name.len;
  263. dentry = d_add_ci(dentry, VFS_I(ip), &dname);
  264. kmem_free(ci_name.name);
  265. return dentry;
  266. }
  267. STATIC int
  268. xfs_vn_link(
  269. struct dentry *old_dentry,
  270. struct inode *dir,
  271. struct dentry *dentry)
  272. {
  273. struct inode *inode = old_dentry->d_inode;
  274. struct xfs_name name;
  275. int error;
  276. xfs_dentry_to_name(&name, dentry);
  277. error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
  278. if (unlikely(error))
  279. return -error;
  280. ihold(inode);
  281. d_instantiate(dentry, inode);
  282. return 0;
  283. }
  284. STATIC int
  285. xfs_vn_unlink(
  286. struct inode *dir,
  287. struct dentry *dentry)
  288. {
  289. struct xfs_name name;
  290. int error;
  291. xfs_dentry_to_name(&name, dentry);
  292. error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
  293. if (error)
  294. return error;
  295. /*
  296. * With unlink, the VFS makes the dentry "negative": no inode,
  297. * but still hashed. This is incompatible with case-insensitive
  298. * mode, so invalidate (unhash) the dentry in CI-mode.
  299. */
  300. if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
  301. d_invalidate(dentry);
  302. return 0;
  303. }
  304. STATIC int
  305. xfs_vn_symlink(
  306. struct inode *dir,
  307. struct dentry *dentry,
  308. const char *symname)
  309. {
  310. struct inode *inode;
  311. struct xfs_inode *cip = NULL;
  312. struct xfs_name name;
  313. int error;
  314. mode_t mode;
  315. mode = S_IFLNK |
  316. (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
  317. xfs_dentry_to_name(&name, dentry);
  318. error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
  319. if (unlikely(error))
  320. goto out;
  321. inode = VFS_I(cip);
  322. error = xfs_init_security(inode, dir, &dentry->d_name);
  323. if (unlikely(error))
  324. goto out_cleanup_inode;
  325. d_instantiate(dentry, inode);
  326. return 0;
  327. out_cleanup_inode:
  328. xfs_cleanup_inode(dir, inode, dentry);
  329. out:
  330. return -error;
  331. }
  332. STATIC int
  333. xfs_vn_rename(
  334. struct inode *odir,
  335. struct dentry *odentry,
  336. struct inode *ndir,
  337. struct dentry *ndentry)
  338. {
  339. struct inode *new_inode = ndentry->d_inode;
  340. struct xfs_name oname;
  341. struct xfs_name nname;
  342. xfs_dentry_to_name(&oname, odentry);
  343. xfs_dentry_to_name(&nname, ndentry);
  344. return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
  345. XFS_I(ndir), &nname, new_inode ?
  346. XFS_I(new_inode) : NULL);
  347. }
  348. /*
  349. * careful here - this function can get called recursively, so
  350. * we need to be very careful about how much stack we use.
  351. * uio is kmalloced for this reason...
  352. */
  353. STATIC void *
  354. xfs_vn_follow_link(
  355. struct dentry *dentry,
  356. struct nameidata *nd)
  357. {
  358. char *link;
  359. int error = -ENOMEM;
  360. link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
  361. if (!link)
  362. goto out_err;
  363. error = -xfs_readlink(XFS_I(dentry->d_inode), link);
  364. if (unlikely(error))
  365. goto out_kfree;
  366. nd_set_link(nd, link);
  367. return NULL;
  368. out_kfree:
  369. kfree(link);
  370. out_err:
  371. nd_set_link(nd, ERR_PTR(error));
  372. return NULL;
  373. }
  374. STATIC void
  375. xfs_vn_put_link(
  376. struct dentry *dentry,
  377. struct nameidata *nd,
  378. void *p)
  379. {
  380. char *s = nd_get_link(nd);
  381. if (!IS_ERR(s))
  382. kfree(s);
  383. }
  384. STATIC int
  385. xfs_vn_getattr(
  386. struct vfsmount *mnt,
  387. struct dentry *dentry,
  388. struct kstat *stat)
  389. {
  390. struct inode *inode = dentry->d_inode;
  391. struct xfs_inode *ip = XFS_I(inode);
  392. struct xfs_mount *mp = ip->i_mount;
  393. trace_xfs_getattr(ip);
  394. if (XFS_FORCED_SHUTDOWN(mp))
  395. return XFS_ERROR(EIO);
  396. stat->size = XFS_ISIZE(ip);
  397. stat->dev = inode->i_sb->s_dev;
  398. stat->mode = ip->i_d.di_mode;
  399. stat->nlink = ip->i_d.di_nlink;
  400. stat->uid = ip->i_d.di_uid;
  401. stat->gid = ip->i_d.di_gid;
  402. stat->ino = ip->i_ino;
  403. stat->atime = inode->i_atime;
  404. stat->mtime = inode->i_mtime;
  405. stat->ctime = inode->i_ctime;
  406. stat->blocks =
  407. XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
  408. switch (inode->i_mode & S_IFMT) {
  409. case S_IFBLK:
  410. case S_IFCHR:
  411. stat->blksize = BLKDEV_IOSIZE;
  412. stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
  413. sysv_minor(ip->i_df.if_u2.if_rdev));
  414. break;
  415. default:
  416. if (XFS_IS_REALTIME_INODE(ip)) {
  417. /*
  418. * If the file blocks are being allocated from a
  419. * realtime volume, then return the inode's realtime
  420. * extent size or the realtime volume's extent size.
  421. */
  422. stat->blksize =
  423. xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
  424. } else
  425. stat->blksize = xfs_preferred_iosize(mp);
  426. stat->rdev = 0;
  427. break;
  428. }
  429. return 0;
  430. }
  431. int
  432. xfs_setattr_nonsize(
  433. struct xfs_inode *ip,
  434. struct iattr *iattr,
  435. int flags)
  436. {
  437. xfs_mount_t *mp = ip->i_mount;
  438. struct inode *inode = VFS_I(ip);
  439. int mask = iattr->ia_valid;
  440. xfs_trans_t *tp;
  441. int error;
  442. uid_t uid = 0, iuid = 0;
  443. gid_t gid = 0, igid = 0;
  444. struct xfs_dquot *udqp = NULL, *gdqp = NULL;
  445. struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL;
  446. trace_xfs_setattr(ip);
  447. if (mp->m_flags & XFS_MOUNT_RDONLY)
  448. return XFS_ERROR(EROFS);
  449. if (XFS_FORCED_SHUTDOWN(mp))
  450. return XFS_ERROR(EIO);
  451. error = -inode_change_ok(inode, iattr);
  452. if (error)
  453. return XFS_ERROR(error);
  454. ASSERT((mask & ATTR_SIZE) == 0);
  455. /*
  456. * If disk quotas is on, we make sure that the dquots do exist on disk,
  457. * before we start any other transactions. Trying to do this later
  458. * is messy. We don't care to take a readlock to look at the ids
  459. * in inode here, because we can't hold it across the trans_reserve.
  460. * If the IDs do change before we take the ilock, we're covered
  461. * because the i_*dquot fields will get updated anyway.
  462. */
  463. if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
  464. uint qflags = 0;
  465. if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
  466. uid = iattr->ia_uid;
  467. qflags |= XFS_QMOPT_UQUOTA;
  468. } else {
  469. uid = ip->i_d.di_uid;
  470. }
  471. if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
  472. gid = iattr->ia_gid;
  473. qflags |= XFS_QMOPT_GQUOTA;
  474. } else {
  475. gid = ip->i_d.di_gid;
  476. }
  477. /*
  478. * We take a reference when we initialize udqp and gdqp,
  479. * so it is important that we never blindly double trip on
  480. * the same variable. See xfs_create() for an example.
  481. */
  482. ASSERT(udqp == NULL);
  483. ASSERT(gdqp == NULL);
  484. error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
  485. qflags, &udqp, &gdqp);
  486. if (error)
  487. return error;
  488. }
  489. tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
  490. error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
  491. if (error)
  492. goto out_dqrele;
  493. xfs_ilock(ip, XFS_ILOCK_EXCL);
  494. /*
  495. * Change file ownership. Must be the owner or privileged.
  496. */
  497. if (mask & (ATTR_UID|ATTR_GID)) {
  498. /*
  499. * These IDs could have changed since we last looked at them.
  500. * But, we're assured that if the ownership did change
  501. * while we didn't have the inode locked, inode's dquot(s)
  502. * would have changed also.
  503. */
  504. iuid = ip->i_d.di_uid;
  505. igid = ip->i_d.di_gid;
  506. gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
  507. uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
  508. /*
  509. * Do a quota reservation only if uid/gid is actually
  510. * going to change.
  511. */
  512. if (XFS_IS_QUOTA_RUNNING(mp) &&
  513. ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
  514. (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
  515. ASSERT(tp);
  516. error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
  517. capable(CAP_FOWNER) ?
  518. XFS_QMOPT_FORCE_RES : 0);
  519. if (error) /* out of quota */
  520. goto out_trans_cancel;
  521. }
  522. }
  523. xfs_trans_ijoin(tp, ip);
  524. /*
  525. * Change file ownership. Must be the owner or privileged.
  526. */
  527. if (mask & (ATTR_UID|ATTR_GID)) {
  528. /*
  529. * CAP_FSETID overrides the following restrictions:
  530. *
  531. * The set-user-ID and set-group-ID bits of a file will be
  532. * cleared upon successful return from chown()
  533. */
  534. if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
  535. !capable(CAP_FSETID))
  536. ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
  537. /*
  538. * Change the ownerships and register quota modifications
  539. * in the transaction.
  540. */
  541. if (iuid != uid) {
  542. if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
  543. ASSERT(mask & ATTR_UID);
  544. ASSERT(udqp);
  545. olddquot1 = xfs_qm_vop_chown(tp, ip,
  546. &ip->i_udquot, udqp);
  547. }
  548. ip->i_d.di_uid = uid;
  549. inode->i_uid = uid;
  550. }
  551. if (igid != gid) {
  552. if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
  553. ASSERT(!XFS_IS_PQUOTA_ON(mp));
  554. ASSERT(mask & ATTR_GID);
  555. ASSERT(gdqp);
  556. olddquot2 = xfs_qm_vop_chown(tp, ip,
  557. &ip->i_gdquot, gdqp);
  558. }
  559. ip->i_d.di_gid = gid;
  560. inode->i_gid = gid;
  561. }
  562. }
  563. /*
  564. * Change file access modes.
  565. */
  566. if (mask & ATTR_MODE) {
  567. umode_t mode = iattr->ia_mode;
  568. if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
  569. mode &= ~S_ISGID;
  570. ip->i_d.di_mode &= S_IFMT;
  571. ip->i_d.di_mode |= mode & ~S_IFMT;
  572. inode->i_mode &= S_IFMT;
  573. inode->i_mode |= mode & ~S_IFMT;
  574. }
  575. /*
  576. * Change file access or modified times.
  577. */
  578. if (mask & ATTR_ATIME) {
  579. inode->i_atime = iattr->ia_atime;
  580. ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
  581. ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
  582. ip->i_update_core = 1;
  583. }
  584. if (mask & ATTR_CTIME) {
  585. inode->i_ctime = iattr->ia_ctime;
  586. ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
  587. ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
  588. ip->i_update_core = 1;
  589. }
  590. if (mask & ATTR_MTIME) {
  591. inode->i_mtime = iattr->ia_mtime;
  592. ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
  593. ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
  594. ip->i_update_core = 1;
  595. }
  596. xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  597. XFS_STATS_INC(xs_ig_attrchg);
  598. if (mp->m_flags & XFS_MOUNT_WSYNC)
  599. xfs_trans_set_sync(tp);
  600. error = xfs_trans_commit(tp, 0);
  601. xfs_iunlock(ip, XFS_ILOCK_EXCL);
  602. /*
  603. * Release any dquot(s) the inode had kept before chown.
  604. */
  605. xfs_qm_dqrele(olddquot1);
  606. xfs_qm_dqrele(olddquot2);
  607. xfs_qm_dqrele(udqp);
  608. xfs_qm_dqrele(gdqp);
  609. if (error)
  610. return XFS_ERROR(error);
  611. /*
  612. * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
  613. * update. We could avoid this with linked transactions
  614. * and passing down the transaction pointer all the way
  615. * to attr_set. No previous user of the generic
  616. * Posix ACL code seems to care about this issue either.
  617. */
  618. if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
  619. error = -xfs_acl_chmod(inode);
  620. if (error)
  621. return XFS_ERROR(error);
  622. }
  623. return 0;
  624. out_trans_cancel:
  625. xfs_trans_cancel(tp, 0);
  626. xfs_iunlock(ip, XFS_ILOCK_EXCL);
  627. out_dqrele:
  628. xfs_qm_dqrele(udqp);
  629. xfs_qm_dqrele(gdqp);
  630. return error;
  631. }
  632. /*
  633. * Truncate file. Must have write permission and not be a directory.
  634. */
  635. int
  636. xfs_setattr_size(
  637. struct xfs_inode *ip,
  638. struct iattr *iattr,
  639. int flags)
  640. {
  641. struct xfs_mount *mp = ip->i_mount;
  642. struct inode *inode = VFS_I(ip);
  643. int mask = iattr->ia_valid;
  644. struct xfs_trans *tp;
  645. int error;
  646. uint lock_flags;
  647. uint commit_flags = 0;
  648. trace_xfs_setattr(ip);
  649. if (mp->m_flags & XFS_MOUNT_RDONLY)
  650. return XFS_ERROR(EROFS);
  651. if (XFS_FORCED_SHUTDOWN(mp))
  652. return XFS_ERROR(EIO);
  653. error = -inode_change_ok(inode, iattr);
  654. if (error)
  655. return XFS_ERROR(error);
  656. ASSERT(S_ISREG(ip->i_d.di_mode));
  657. ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
  658. ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
  659. ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
  660. lock_flags = XFS_ILOCK_EXCL;
  661. if (!(flags & XFS_ATTR_NOLOCK))
  662. lock_flags |= XFS_IOLOCK_EXCL;
  663. xfs_ilock(ip, lock_flags);
  664. /*
  665. * Short circuit the truncate case for zero length files.
  666. */
  667. if (iattr->ia_size == 0 &&
  668. ip->i_size == 0 && ip->i_d.di_nextents == 0) {
  669. if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
  670. goto out_unlock;
  671. /*
  672. * Use the regular setattr path to update the timestamps.
  673. */
  674. xfs_iunlock(ip, lock_flags);
  675. iattr->ia_valid &= ~ATTR_SIZE;
  676. return xfs_setattr_nonsize(ip, iattr, 0);
  677. }
  678. /*
  679. * Make sure that the dquots are attached to the inode.
  680. */
  681. error = xfs_qm_dqattach_locked(ip, 0);
  682. if (error)
  683. goto out_unlock;
  684. /*
  685. * Now we can make the changes. Before we join the inode to the
  686. * transaction, take care of the part of the truncation that must be
  687. * done without the inode lock. This needs to be done before joining
  688. * the inode to the transaction, because the inode cannot be unlocked
  689. * once it is a part of the transaction.
  690. */
  691. if (iattr->ia_size > ip->i_size) {
  692. /*
  693. * Do the first part of growing a file: zero any data in the
  694. * last block that is beyond the old EOF. We need to do this
  695. * before the inode is joined to the transaction to modify
  696. * i_size.
  697. */
  698. error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
  699. if (error)
  700. goto out_unlock;
  701. }
  702. xfs_iunlock(ip, XFS_ILOCK_EXCL);
  703. lock_flags &= ~XFS_ILOCK_EXCL;
  704. /*
  705. * We are going to log the inode size change in this transaction so
  706. * any previous writes that are beyond the on disk EOF and the new
  707. * EOF that have not been written out need to be written here. If we
  708. * do not write the data out, we expose ourselves to the null files
  709. * problem.
  710. *
  711. * Only flush from the on disk size to the smaller of the in memory
  712. * file size or the new size as that's the range we really care about
  713. * here and prevents waiting for other data not within the range we
  714. * care about here.
  715. */
  716. if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
  717. error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
  718. XBF_ASYNC, FI_NONE);
  719. if (error)
  720. goto out_unlock;
  721. }
  722. /*
  723. * Wait for all I/O to complete.
  724. */
  725. xfs_ioend_wait(ip);
  726. error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
  727. xfs_get_blocks);
  728. if (error)
  729. goto out_unlock;
  730. tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
  731. error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
  732. XFS_TRANS_PERM_LOG_RES,
  733. XFS_ITRUNCATE_LOG_COUNT);
  734. if (error)
  735. goto out_trans_cancel;
  736. truncate_setsize(inode, iattr->ia_size);
  737. commit_flags = XFS_TRANS_RELEASE_LOG_RES;
  738. lock_flags |= XFS_ILOCK_EXCL;
  739. xfs_ilock(ip, XFS_ILOCK_EXCL);
  740. xfs_trans_ijoin(tp, ip);
  741. /*
  742. * Only change the c/mtime if we are changing the size or we are
  743. * explicitly asked to change it. This handles the semantic difference
  744. * between truncate() and ftruncate() as implemented in the VFS.
  745. *
  746. * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
  747. * special case where we need to update the times despite not having
  748. * these flags set. For all other operations the VFS set these flags
  749. * explicitly if it wants a timestamp update.
  750. */
  751. if (iattr->ia_size != ip->i_size &&
  752. (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
  753. iattr->ia_ctime = iattr->ia_mtime =
  754. current_fs_time(inode->i_sb);
  755. mask |= ATTR_CTIME | ATTR_MTIME;
  756. }
  757. if (iattr->ia_size > ip->i_size) {
  758. ip->i_d.di_size = iattr->ia_size;
  759. ip->i_size = iattr->ia_size;
  760. } else if (iattr->ia_size <= ip->i_size ||
  761. (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
  762. error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
  763. if (error)
  764. goto out_trans_abort;
  765. /*
  766. * Truncated "down", so we're removing references to old data
  767. * here - if we delay flushing for a long time, we expose
  768. * ourselves unduly to the notorious NULL files problem. So,
  769. * we mark this inode and flush it when the file is closed,
  770. * and do not wait the usual (long) time for writeout.
  771. */
  772. xfs_iflags_set(ip, XFS_ITRUNCATED);
  773. }
  774. if (mask & ATTR_CTIME) {
  775. inode->i_ctime = iattr->ia_ctime;
  776. ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
  777. ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
  778. ip->i_update_core = 1;
  779. }
  780. if (mask & ATTR_MTIME) {
  781. inode->i_mtime = iattr->ia_mtime;
  782. ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
  783. ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
  784. ip->i_update_core = 1;
  785. }
  786. xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  787. XFS_STATS_INC(xs_ig_attrchg);
  788. if (mp->m_flags & XFS_MOUNT_WSYNC)
  789. xfs_trans_set_sync(tp);
  790. error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
  791. out_unlock:
  792. if (lock_flags)
  793. xfs_iunlock(ip, lock_flags);
  794. return error;
  795. out_trans_abort:
  796. commit_flags |= XFS_TRANS_ABORT;
  797. out_trans_cancel:
  798. xfs_trans_cancel(tp, commit_flags);
  799. goto out_unlock;
  800. }
  801. STATIC int
  802. xfs_vn_setattr(
  803. struct dentry *dentry,
  804. struct iattr *iattr)
  805. {
  806. if (iattr->ia_valid & ATTR_SIZE)
  807. return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
  808. return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
  809. }
  810. #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
  811. /*
  812. * Call fiemap helper to fill in user data.
  813. * Returns positive errors to xfs_getbmap.
  814. */
  815. STATIC int
  816. xfs_fiemap_format(
  817. void **arg,
  818. struct getbmapx *bmv,
  819. int *full)
  820. {
  821. int error;
  822. struct fiemap_extent_info *fieinfo = *arg;
  823. u32 fiemap_flags = 0;
  824. u64 logical, physical, length;
  825. /* Do nothing for a hole */
  826. if (bmv->bmv_block == -1LL)
  827. return 0;
  828. logical = BBTOB(bmv->bmv_offset);
  829. physical = BBTOB(bmv->bmv_block);
  830. length = BBTOB(bmv->bmv_length);
  831. if (bmv->bmv_oflags & BMV_OF_PREALLOC)
  832. fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
  833. else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
  834. fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
  835. physical = 0; /* no block yet */
  836. }
  837. if (bmv->bmv_oflags & BMV_OF_LAST)
  838. fiemap_flags |= FIEMAP_EXTENT_LAST;
  839. error = fiemap_fill_next_extent(fieinfo, logical, physical,
  840. length, fiemap_flags);
  841. if (error > 0) {
  842. error = 0;
  843. *full = 1; /* user array now full */
  844. }
  845. return -error;
  846. }
  847. STATIC int
  848. xfs_vn_fiemap(
  849. struct inode *inode,
  850. struct fiemap_extent_info *fieinfo,
  851. u64 start,
  852. u64 length)
  853. {
  854. xfs_inode_t *ip = XFS_I(inode);
  855. struct getbmapx bm;
  856. int error;
  857. error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
  858. if (error)
  859. return error;
  860. /* Set up bmap header for xfs internal routine */
  861. bm.bmv_offset = BTOBB(start);
  862. /* Special case for whole file */
  863. if (length == FIEMAP_MAX_OFFSET)
  864. bm.bmv_length = -1LL;
  865. else
  866. bm.bmv_length = BTOBB(length);
  867. /* We add one because in getbmap world count includes the header */
  868. bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
  869. fieinfo->fi_extents_max + 1;
  870. bm.bmv_count = min_t(__s32, bm.bmv_count,
  871. (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
  872. bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
  873. if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
  874. bm.bmv_iflags |= BMV_IF_ATTRFORK;
  875. if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
  876. bm.bmv_iflags |= BMV_IF_DELALLOC;
  877. error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
  878. if (error)
  879. return -error;
  880. return 0;
  881. }
  882. static const struct inode_operations xfs_inode_operations = {
  883. .get_acl = xfs_get_acl,
  884. .getattr = xfs_vn_getattr,
  885. .setattr = xfs_vn_setattr,
  886. .setxattr = generic_setxattr,
  887. .getxattr = generic_getxattr,
  888. .removexattr = generic_removexattr,
  889. .listxattr = xfs_vn_listxattr,
  890. .fiemap = xfs_vn_fiemap,
  891. };
  892. static const struct inode_operations xfs_dir_inode_operations = {
  893. .create = xfs_vn_create,
  894. .lookup = xfs_vn_lookup,
  895. .link = xfs_vn_link,
  896. .unlink = xfs_vn_unlink,
  897. .symlink = xfs_vn_symlink,
  898. .mkdir = xfs_vn_mkdir,
  899. /*
  900. * Yes, XFS uses the same method for rmdir and unlink.
  901. *
  902. * There are some subtile differences deeper in the code,
  903. * but we use S_ISDIR to check for those.
  904. */
  905. .rmdir = xfs_vn_unlink,
  906. .mknod = xfs_vn_mknod,
  907. .rename = xfs_vn_rename,
  908. .get_acl = xfs_get_acl,
  909. .getattr = xfs_vn_getattr,
  910. .setattr = xfs_vn_setattr,
  911. .setxattr = generic_setxattr,
  912. .getxattr = generic_getxattr,
  913. .removexattr = generic_removexattr,
  914. .listxattr = xfs_vn_listxattr,
  915. };
  916. static const struct inode_operations xfs_dir_ci_inode_operations = {
  917. .create = xfs_vn_create,
  918. .lookup = xfs_vn_ci_lookup,
  919. .link = xfs_vn_link,
  920. .unlink = xfs_vn_unlink,
  921. .symlink = xfs_vn_symlink,
  922. .mkdir = xfs_vn_mkdir,
  923. /*
  924. * Yes, XFS uses the same method for rmdir and unlink.
  925. *
  926. * There are some subtile differences deeper in the code,
  927. * but we use S_ISDIR to check for those.
  928. */
  929. .rmdir = xfs_vn_unlink,
  930. .mknod = xfs_vn_mknod,
  931. .rename = xfs_vn_rename,
  932. .get_acl = xfs_get_acl,
  933. .getattr = xfs_vn_getattr,
  934. .setattr = xfs_vn_setattr,
  935. .setxattr = generic_setxattr,
  936. .getxattr = generic_getxattr,
  937. .removexattr = generic_removexattr,
  938. .listxattr = xfs_vn_listxattr,
  939. };
  940. static const struct inode_operations xfs_symlink_inode_operations = {
  941. .readlink = generic_readlink,
  942. .follow_link = xfs_vn_follow_link,
  943. .put_link = xfs_vn_put_link,
  944. .get_acl = xfs_get_acl,
  945. .getattr = xfs_vn_getattr,
  946. .setattr = xfs_vn_setattr,
  947. .setxattr = generic_setxattr,
  948. .getxattr = generic_getxattr,
  949. .removexattr = generic_removexattr,
  950. .listxattr = xfs_vn_listxattr,
  951. };
  952. STATIC void
  953. xfs_diflags_to_iflags(
  954. struct inode *inode,
  955. struct xfs_inode *ip)
  956. {
  957. if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
  958. inode->i_flags |= S_IMMUTABLE;
  959. else
  960. inode->i_flags &= ~S_IMMUTABLE;
  961. if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
  962. inode->i_flags |= S_APPEND;
  963. else
  964. inode->i_flags &= ~S_APPEND;
  965. if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
  966. inode->i_flags |= S_SYNC;
  967. else
  968. inode->i_flags &= ~S_SYNC;
  969. if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
  970. inode->i_flags |= S_NOATIME;
  971. else
  972. inode->i_flags &= ~S_NOATIME;
  973. }
  974. /*
  975. * Initialize the Linux inode, set up the operation vectors and
  976. * unlock the inode.
  977. *
  978. * When reading existing inodes from disk this is called directly
  979. * from xfs_iget, when creating a new inode it is called from
  980. * xfs_ialloc after setting up the inode.
  981. *
  982. * We are always called with an uninitialised linux inode here.
  983. * We need to initialise the necessary fields and take a reference
  984. * on it.
  985. */
  986. void
  987. xfs_setup_inode(
  988. struct xfs_inode *ip)
  989. {
  990. struct inode *inode = &ip->i_vnode;
  991. inode->i_ino = ip->i_ino;
  992. inode->i_state = I_NEW;
  993. inode_sb_list_add(inode);
  994. /* make the inode look hashed for the writeback code */
  995. hlist_add_fake(&inode->i_hash);
  996. inode->i_mode = ip->i_d.di_mode;
  997. inode->i_nlink = ip->i_d.di_nlink;
  998. inode->i_uid = ip->i_d.di_uid;
  999. inode->i_gid = ip->i_d.di_gid;
  1000. switch (inode->i_mode & S_IFMT) {
  1001. case S_IFBLK:
  1002. case S_IFCHR:
  1003. inode->i_rdev =
  1004. MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
  1005. sysv_minor(ip->i_df.if_u2.if_rdev));
  1006. break;
  1007. default:
  1008. inode->i_rdev = 0;
  1009. break;
  1010. }
  1011. inode->i_generation = ip->i_d.di_gen;
  1012. i_size_write(inode, ip->i_d.di_size);
  1013. inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
  1014. inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
  1015. inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
  1016. inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
  1017. inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
  1018. inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
  1019. xfs_diflags_to_iflags(inode, ip);
  1020. switch (inode->i_mode & S_IFMT) {
  1021. case S_IFREG:
  1022. inode->i_op = &xfs_inode_operations;
  1023. inode->i_fop = &xfs_file_operations;
  1024. inode->i_mapping->a_ops = &xfs_address_space_operations;
  1025. break;
  1026. case S_IFDIR:
  1027. if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
  1028. inode->i_op = &xfs_dir_ci_inode_operations;
  1029. else
  1030. inode->i_op = &xfs_dir_inode_operations;
  1031. inode->i_fop = &xfs_dir_file_operations;
  1032. break;
  1033. case S_IFLNK:
  1034. inode->i_op = &xfs_symlink_inode_operations;
  1035. if (!(ip->i_df.if_flags & XFS_IFINLINE))
  1036. inode->i_mapping->a_ops = &xfs_address_space_operations;
  1037. break;
  1038. default:
  1039. inode->i_op = &xfs_inode_operations;
  1040. init_special_inode(inode, inode->i_mode, inode->i_rdev);
  1041. break;
  1042. }
  1043. /*
  1044. * If there is no attribute fork no ACL can exist on this inode,
  1045. * and it can't have any file capabilities attached to it either.
  1046. */
  1047. if (!XFS_IFORK_Q(ip)) {
  1048. inode_has_no_xattr(inode);
  1049. cache_no_acl(inode);
  1050. }
  1051. xfs_iflags_clear(ip, XFS_INEW);
  1052. barrier();
  1053. unlock_new_inode(inode);
  1054. }