PageRenderTime 7ms CodeModel.GetById 12ms app.highlight 47ms RepoModel.GetById 1ms app.codeStats 0ms

/fs/open.c

https://bitbucket.org/evzijst/gittest
C | 1076 lines | 797 code | 163 blank | 116 comment | 158 complexity | 03bda35aa0382f68dda4e14ae14cd455 MD5 | raw file
   1/*
   2 *  linux/fs/open.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7#include <linux/string.h>
   8#include <linux/mm.h>
   9#include <linux/utime.h>
  10#include <linux/file.h>
  11#include <linux/smp_lock.h>
  12#include <linux/quotaops.h>
  13#include <linux/dnotify.h>
  14#include <linux/module.h>
  15#include <linux/slab.h>
  16#include <linux/tty.h>
  17#include <linux/namei.h>
  18#include <linux/backing-dev.h>
  19#include <linux/security.h>
  20#include <linux/mount.h>
  21#include <linux/vfs.h>
  22#include <asm/uaccess.h>
  23#include <linux/fs.h>
  24#include <linux/pagemap.h>
  25#include <linux/syscalls.h>
  26
  27#include <asm/unistd.h>
  28
  29int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
  30{
  31	int retval = -ENODEV;
  32
  33	if (sb) {
  34		retval = -ENOSYS;
  35		if (sb->s_op->statfs) {
  36			memset(buf, 0, sizeof(*buf));
  37			retval = security_sb_statfs(sb);
  38			if (retval)
  39				return retval;
  40			retval = sb->s_op->statfs(sb, buf);
  41			if (retval == 0 && buf->f_frsize == 0)
  42				buf->f_frsize = buf->f_bsize;
  43		}
  44	}
  45	return retval;
  46}
  47
  48EXPORT_SYMBOL(vfs_statfs);
  49
  50static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
  51{
  52	struct kstatfs st;
  53	int retval;
  54
  55	retval = vfs_statfs(sb, &st);
  56	if (retval)
  57		return retval;
  58
  59	if (sizeof(*buf) == sizeof(st))
  60		memcpy(buf, &st, sizeof(st));
  61	else {
  62		if (sizeof buf->f_blocks == 4) {
  63			if ((st.f_blocks | st.f_bfree | st.f_bavail) &
  64			    0xffffffff00000000ULL)
  65				return -EOVERFLOW;
  66			/*
  67			 * f_files and f_ffree may be -1; it's okay to stuff
  68			 * that into 32 bits
  69			 */
  70			if (st.f_files != -1 &&
  71			    (st.f_files & 0xffffffff00000000ULL))
  72				return -EOVERFLOW;
  73			if (st.f_ffree != -1 &&
  74			    (st.f_ffree & 0xffffffff00000000ULL))
  75				return -EOVERFLOW;
  76		}
  77
  78		buf->f_type = st.f_type;
  79		buf->f_bsize = st.f_bsize;
  80		buf->f_blocks = st.f_blocks;
  81		buf->f_bfree = st.f_bfree;
  82		buf->f_bavail = st.f_bavail;
  83		buf->f_files = st.f_files;
  84		buf->f_ffree = st.f_ffree;
  85		buf->f_fsid = st.f_fsid;
  86		buf->f_namelen = st.f_namelen;
  87		buf->f_frsize = st.f_frsize;
  88		memset(buf->f_spare, 0, sizeof(buf->f_spare));
  89	}
  90	return 0;
  91}
  92
  93static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
  94{
  95	struct kstatfs st;
  96	int retval;
  97
  98	retval = vfs_statfs(sb, &st);
  99	if (retval)
 100		return retval;
 101
 102	if (sizeof(*buf) == sizeof(st))
 103		memcpy(buf, &st, sizeof(st));
 104	else {
 105		buf->f_type = st.f_type;
 106		buf->f_bsize = st.f_bsize;
 107		buf->f_blocks = st.f_blocks;
 108		buf->f_bfree = st.f_bfree;
 109		buf->f_bavail = st.f_bavail;
 110		buf->f_files = st.f_files;
 111		buf->f_ffree = st.f_ffree;
 112		buf->f_fsid = st.f_fsid;
 113		buf->f_namelen = st.f_namelen;
 114		buf->f_frsize = st.f_frsize;
 115		memset(buf->f_spare, 0, sizeof(buf->f_spare));
 116	}
 117	return 0;
 118}
 119
 120asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
 121{
 122	struct nameidata nd;
 123	int error;
 124
 125	error = user_path_walk(path, &nd);
 126	if (!error) {
 127		struct statfs tmp;
 128		error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
 129		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 130			error = -EFAULT;
 131		path_release(&nd);
 132	}
 133	return error;
 134}
 135
 136
 137asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
 138{
 139	struct nameidata nd;
 140	long error;
 141
 142	if (sz != sizeof(*buf))
 143		return -EINVAL;
 144	error = user_path_walk(path, &nd);
 145	if (!error) {
 146		struct statfs64 tmp;
 147		error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
 148		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 149			error = -EFAULT;
 150		path_release(&nd);
 151	}
 152	return error;
 153}
 154
 155
 156asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
 157{
 158	struct file * file;
 159	struct statfs tmp;
 160	int error;
 161
 162	error = -EBADF;
 163	file = fget(fd);
 164	if (!file)
 165		goto out;
 166	error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
 167	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 168		error = -EFAULT;
 169	fput(file);
 170out:
 171	return error;
 172}
 173
 174asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf)
 175{
 176	struct file * file;
 177	struct statfs64 tmp;
 178	int error;
 179
 180	if (sz != sizeof(*buf))
 181		return -EINVAL;
 182
 183	error = -EBADF;
 184	file = fget(fd);
 185	if (!file)
 186		goto out;
 187	error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
 188	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 189		error = -EFAULT;
 190	fput(file);
 191out:
 192	return error;
 193}
 194
 195int do_truncate(struct dentry *dentry, loff_t length)
 196{
 197	int err;
 198	struct iattr newattrs;
 199
 200	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
 201	if (length < 0)
 202		return -EINVAL;
 203
 204	newattrs.ia_size = length;
 205	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 206
 207	down(&dentry->d_inode->i_sem);
 208	err = notify_change(dentry, &newattrs);
 209	up(&dentry->d_inode->i_sem);
 210	return err;
 211}
 212
 213static inline long do_sys_truncate(const char __user * path, loff_t length)
 214{
 215	struct nameidata nd;
 216	struct inode * inode;
 217	int error;
 218
 219	error = -EINVAL;
 220	if (length < 0)	/* sorry, but loff_t says... */
 221		goto out;
 222
 223	error = user_path_walk(path, &nd);
 224	if (error)
 225		goto out;
 226	inode = nd.dentry->d_inode;
 227
 228	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
 229	error = -EISDIR;
 230	if (S_ISDIR(inode->i_mode))
 231		goto dput_and_out;
 232
 233	error = -EINVAL;
 234	if (!S_ISREG(inode->i_mode))
 235		goto dput_and_out;
 236
 237	error = permission(inode,MAY_WRITE,&nd);
 238	if (error)
 239		goto dput_and_out;
 240
 241	error = -EROFS;
 242	if (IS_RDONLY(inode))
 243		goto dput_and_out;
 244
 245	error = -EPERM;
 246	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 247		goto dput_and_out;
 248
 249	/*
 250	 * Make sure that there are no leases.
 251	 */
 252	error = break_lease(inode, FMODE_WRITE);
 253	if (error)
 254		goto dput_and_out;
 255
 256	error = get_write_access(inode);
 257	if (error)
 258		goto dput_and_out;
 259
 260	error = locks_verify_truncate(inode, NULL, length);
 261	if (!error) {
 262		DQUOT_INIT(inode);
 263		error = do_truncate(nd.dentry, length);
 264	}
 265	put_write_access(inode);
 266
 267dput_and_out:
 268	path_release(&nd);
 269out:
 270	return error;
 271}
 272
 273asmlinkage long sys_truncate(const char __user * path, unsigned long length)
 274{
 275	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
 276	return do_sys_truncate(path, (long)length);
 277}
 278
 279static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 280{
 281	struct inode * inode;
 282	struct dentry *dentry;
 283	struct file * file;
 284	int error;
 285
 286	error = -EINVAL;
 287	if (length < 0)
 288		goto out;
 289	error = -EBADF;
 290	file = fget(fd);
 291	if (!file)
 292		goto out;
 293
 294	/* explicitly opened as large or we are on 64-bit box */
 295	if (file->f_flags & O_LARGEFILE)
 296		small = 0;
 297
 298	dentry = file->f_dentry;
 299	inode = dentry->d_inode;
 300	error = -EINVAL;
 301	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
 302		goto out_putf;
 303
 304	error = -EINVAL;
 305	/* Cannot ftruncate over 2^31 bytes without large file support */
 306	if (small && length > MAX_NON_LFS)
 307		goto out_putf;
 308
 309	error = -EPERM;
 310	if (IS_APPEND(inode))
 311		goto out_putf;
 312
 313	error = locks_verify_truncate(inode, file, length);
 314	if (!error)
 315		error = do_truncate(dentry, length);
 316out_putf:
 317	fput(file);
 318out:
 319	return error;
 320}
 321
 322asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
 323{
 324	return do_sys_ftruncate(fd, length, 1);
 325}
 326
 327/* LFS versions of truncate are only needed on 32 bit machines */
 328#if BITS_PER_LONG == 32
 329asmlinkage long sys_truncate64(const char __user * path, loff_t length)
 330{
 331	return do_sys_truncate(path, length);
 332}
 333
 334asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
 335{
 336	return do_sys_ftruncate(fd, length, 0);
 337}
 338#endif
 339
 340#ifdef __ARCH_WANT_SYS_UTIME
 341
 342/*
 343 * sys_utime() can be implemented in user-level using sys_utimes().
 344 * Is this for backwards compatibility?  If so, why not move it
 345 * into the appropriate arch directory (for those architectures that
 346 * need it).
 347 */
 348
 349/* If times==NULL, set access and modification to current time,
 350 * must be owner or have write permission.
 351 * Else, update from *times, must be owner or super user.
 352 */
 353asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
 354{
 355	int error;
 356	struct nameidata nd;
 357	struct inode * inode;
 358	struct iattr newattrs;
 359
 360	error = user_path_walk(filename, &nd);
 361	if (error)
 362		goto out;
 363	inode = nd.dentry->d_inode;
 364
 365	error = -EROFS;
 366	if (IS_RDONLY(inode))
 367		goto dput_and_out;
 368
 369	/* Don't worry, the checks are done in inode_change_ok() */
 370	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 371	if (times) {
 372		error = -EPERM;
 373		if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 374			goto dput_and_out;
 375
 376		error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
 377		newattrs.ia_atime.tv_nsec = 0;
 378		if (!error) 
 379			error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
 380		newattrs.ia_mtime.tv_nsec = 0;
 381		if (error)
 382			goto dput_and_out;
 383
 384		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
 385	} else {
 386                error = -EACCES;
 387                if (IS_IMMUTABLE(inode))
 388                        goto dput_and_out;
 389
 390		if (current->fsuid != inode->i_uid &&
 391		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
 392			goto dput_and_out;
 393	}
 394	down(&inode->i_sem);
 395	error = notify_change(nd.dentry, &newattrs);
 396	up(&inode->i_sem);
 397dput_and_out:
 398	path_release(&nd);
 399out:
 400	return error;
 401}
 402
 403#endif
 404
 405/* If times==NULL, set access and modification to current time,
 406 * must be owner or have write permission.
 407 * Else, update from *times, must be owner or super user.
 408 */
 409long do_utimes(char __user * filename, struct timeval * times)
 410{
 411	int error;
 412	struct nameidata nd;
 413	struct inode * inode;
 414	struct iattr newattrs;
 415
 416	error = user_path_walk(filename, &nd);
 417
 418	if (error)
 419		goto out;
 420	inode = nd.dentry->d_inode;
 421
 422	error = -EROFS;
 423	if (IS_RDONLY(inode))
 424		goto dput_and_out;
 425
 426	/* Don't worry, the checks are done in inode_change_ok() */
 427	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 428	if (times) {
 429		error = -EPERM;
 430                if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 431                        goto dput_and_out;
 432
 433		newattrs.ia_atime.tv_sec = times[0].tv_sec;
 434		newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000;
 435		newattrs.ia_mtime.tv_sec = times[1].tv_sec;
 436		newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000;
 437		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
 438	} else {
 439		error = -EACCES;
 440                if (IS_IMMUTABLE(inode))
 441                        goto dput_and_out;
 442
 443		if (current->fsuid != inode->i_uid &&
 444		    (error = permission(inode,MAY_WRITE,&nd)) != 0)
 445			goto dput_and_out;
 446	}
 447	down(&inode->i_sem);
 448	error = notify_change(nd.dentry, &newattrs);
 449	up(&inode->i_sem);
 450dput_and_out:
 451	path_release(&nd);
 452out:
 453	return error;
 454}
 455
 456asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utimes)
 457{
 458	struct timeval times[2];
 459
 460	if (utimes && copy_from_user(&times, utimes, sizeof(times)))
 461		return -EFAULT;
 462	return do_utimes(filename, utimes ? times : NULL);
 463}
 464
 465
 466/*
 467 * access() needs to use the real uid/gid, not the effective uid/gid.
 468 * We do this by temporarily clearing all FS-related capabilities and
 469 * switching the fsuid/fsgid around to the real ones.
 470 */
 471asmlinkage long sys_access(const char __user * filename, int mode)
 472{
 473	struct nameidata nd;
 474	int old_fsuid, old_fsgid;
 475	kernel_cap_t old_cap;
 476	int res;
 477
 478	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
 479		return -EINVAL;
 480
 481	old_fsuid = current->fsuid;
 482	old_fsgid = current->fsgid;
 483	old_cap = current->cap_effective;
 484
 485	current->fsuid = current->uid;
 486	current->fsgid = current->gid;
 487
 488	/*
 489	 * Clear the capabilities if we switch to a non-root user
 490	 *
 491	 * FIXME: There is a race here against sys_capset.  The
 492	 * capabilities can change yet we will restore the old
 493	 * value below.  We should hold task_capabilities_lock,
 494	 * but we cannot because user_path_walk can sleep.
 495	 */
 496	if (current->uid)
 497		cap_clear(current->cap_effective);
 498	else
 499		current->cap_effective = current->cap_permitted;
 500
 501	res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
 502	if (!res) {
 503		res = permission(nd.dentry->d_inode, mode, &nd);
 504		/* SuS v2 requires we report a read only fs too */
 505		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
 506		   && !special_file(nd.dentry->d_inode->i_mode))
 507			res = -EROFS;
 508		path_release(&nd);
 509	}
 510
 511	current->fsuid = old_fsuid;
 512	current->fsgid = old_fsgid;
 513	current->cap_effective = old_cap;
 514
 515	return res;
 516}
 517
 518asmlinkage long sys_chdir(const char __user * filename)
 519{
 520	struct nameidata nd;
 521	int error;
 522
 523	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
 524	if (error)
 525		goto out;
 526
 527	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
 528	if (error)
 529		goto dput_and_out;
 530
 531	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
 532
 533dput_and_out:
 534	path_release(&nd);
 535out:
 536	return error;
 537}
 538
 539asmlinkage long sys_fchdir(unsigned int fd)
 540{
 541	struct file *file;
 542	struct dentry *dentry;
 543	struct inode *inode;
 544	struct vfsmount *mnt;
 545	int error;
 546
 547	error = -EBADF;
 548	file = fget(fd);
 549	if (!file)
 550		goto out;
 551
 552	dentry = file->f_dentry;
 553	mnt = file->f_vfsmnt;
 554	inode = dentry->d_inode;
 555
 556	error = -ENOTDIR;
 557	if (!S_ISDIR(inode->i_mode))
 558		goto out_putf;
 559
 560	error = permission(inode, MAY_EXEC, NULL);
 561	if (!error)
 562		set_fs_pwd(current->fs, mnt, dentry);
 563out_putf:
 564	fput(file);
 565out:
 566	return error;
 567}
 568
 569asmlinkage long sys_chroot(const char __user * filename)
 570{
 571	struct nameidata nd;
 572	int error;
 573
 574	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
 575	if (error)
 576		goto out;
 577
 578	error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
 579	if (error)
 580		goto dput_and_out;
 581
 582	error = -EPERM;
 583	if (!capable(CAP_SYS_CHROOT))
 584		goto dput_and_out;
 585
 586	set_fs_root(current->fs, nd.mnt, nd.dentry);
 587	set_fs_altroot();
 588	error = 0;
 589dput_and_out:
 590	path_release(&nd);
 591out:
 592	return error;
 593}
 594
 595asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 596{
 597	struct inode * inode;
 598	struct dentry * dentry;
 599	struct file * file;
 600	int err = -EBADF;
 601	struct iattr newattrs;
 602
 603	file = fget(fd);
 604	if (!file)
 605		goto out;
 606
 607	dentry = file->f_dentry;
 608	inode = dentry->d_inode;
 609
 610	err = -EROFS;
 611	if (IS_RDONLY(inode))
 612		goto out_putf;
 613	err = -EPERM;
 614	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 615		goto out_putf;
 616	down(&inode->i_sem);
 617	if (mode == (mode_t) -1)
 618		mode = inode->i_mode;
 619	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 620	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 621	err = notify_change(dentry, &newattrs);
 622	up(&inode->i_sem);
 623
 624out_putf:
 625	fput(file);
 626out:
 627	return err;
 628}
 629
 630asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
 631{
 632	struct nameidata nd;
 633	struct inode * inode;
 634	int error;
 635	struct iattr newattrs;
 636
 637	error = user_path_walk(filename, &nd);
 638	if (error)
 639		goto out;
 640	inode = nd.dentry->d_inode;
 641
 642	error = -EROFS;
 643	if (IS_RDONLY(inode))
 644		goto dput_and_out;
 645
 646	error = -EPERM;
 647	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 648		goto dput_and_out;
 649
 650	down(&inode->i_sem);
 651	if (mode == (mode_t) -1)
 652		mode = inode->i_mode;
 653	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 654	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 655	error = notify_change(nd.dentry, &newattrs);
 656	up(&inode->i_sem);
 657
 658dput_and_out:
 659	path_release(&nd);
 660out:
 661	return error;
 662}
 663
 664static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 665{
 666	struct inode * inode;
 667	int error;
 668	struct iattr newattrs;
 669
 670	error = -ENOENT;
 671	if (!(inode = dentry->d_inode)) {
 672		printk(KERN_ERR "chown_common: NULL inode\n");
 673		goto out;
 674	}
 675	error = -EROFS;
 676	if (IS_RDONLY(inode))
 677		goto out;
 678	error = -EPERM;
 679	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 680		goto out;
 681	newattrs.ia_valid =  ATTR_CTIME;
 682	if (user != (uid_t) -1) {
 683		newattrs.ia_valid |= ATTR_UID;
 684		newattrs.ia_uid = user;
 685	}
 686	if (group != (gid_t) -1) {
 687		newattrs.ia_valid |= ATTR_GID;
 688		newattrs.ia_gid = group;
 689	}
 690	if (!S_ISDIR(inode->i_mode))
 691		newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
 692	down(&inode->i_sem);
 693	error = notify_change(dentry, &newattrs);
 694	up(&inode->i_sem);
 695out:
 696	return error;
 697}
 698
 699asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
 700{
 701	struct nameidata nd;
 702	int error;
 703
 704	error = user_path_walk(filename, &nd);
 705	if (!error) {
 706		error = chown_common(nd.dentry, user, group);
 707		path_release(&nd);
 708	}
 709	return error;
 710}
 711
 712asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
 713{
 714	struct nameidata nd;
 715	int error;
 716
 717	error = user_path_walk_link(filename, &nd);
 718	if (!error) {
 719		error = chown_common(nd.dentry, user, group);
 720		path_release(&nd);
 721	}
 722	return error;
 723}
 724
 725
 726asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 727{
 728	struct file * file;
 729	int error = -EBADF;
 730
 731	file = fget(fd);
 732	if (file) {
 733		error = chown_common(file->f_dentry, user, group);
 734		fput(file);
 735	}
 736	return error;
 737}
 738
 739/*
 740 * Note that while the flag value (low two bits) for sys_open means:
 741 *	00 - read-only
 742 *	01 - write-only
 743 *	10 - read-write
 744 *	11 - special
 745 * it is changed into
 746 *	00 - no permissions needed
 747 *	01 - read-permission
 748 *	10 - write-permission
 749 *	11 - read-write
 750 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
 751 * used by symlinks.
 752 */
 753struct file *filp_open(const char * filename, int flags, int mode)
 754{
 755	int namei_flags, error;
 756	struct nameidata nd;
 757
 758	namei_flags = flags;
 759	if ((namei_flags+1) & O_ACCMODE)
 760		namei_flags++;
 761	if (namei_flags & O_TRUNC)
 762		namei_flags |= 2;
 763
 764	error = open_namei(filename, namei_flags, mode, &nd);
 765	if (!error)
 766		return dentry_open(nd.dentry, nd.mnt, flags);
 767
 768	return ERR_PTR(error);
 769}
 770
 771EXPORT_SYMBOL(filp_open);
 772
 773struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 774{
 775	struct file * f;
 776	struct inode *inode;
 777	int error;
 778
 779	error = -ENFILE;
 780	f = get_empty_filp();
 781	if (!f)
 782		goto cleanup_dentry;
 783	f->f_flags = flags;
 784	f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
 785	inode = dentry->d_inode;
 786	if (f->f_mode & FMODE_WRITE) {
 787		error = get_write_access(inode);
 788		if (error)
 789			goto cleanup_file;
 790	}
 791
 792	f->f_mapping = inode->i_mapping;
 793	f->f_dentry = dentry;
 794	f->f_vfsmnt = mnt;
 795	f->f_pos = 0;
 796	f->f_op = fops_get(inode->i_fop);
 797	file_move(f, &inode->i_sb->s_files);
 798
 799	if (f->f_op && f->f_op->open) {
 800		error = f->f_op->open(inode,f);
 801		if (error)
 802			goto cleanup_all;
 803	}
 804	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 805
 806	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
 807
 808	/* NB: we're sure to have correct a_ops only after f_op->open */
 809	if (f->f_flags & O_DIRECT) {
 810		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {
 811			fput(f);
 812			f = ERR_PTR(-EINVAL);
 813		}
 814	}
 815
 816	return f;
 817
 818cleanup_all:
 819	fops_put(f->f_op);
 820	if (f->f_mode & FMODE_WRITE)
 821		put_write_access(inode);
 822	file_kill(f);
 823	f->f_dentry = NULL;
 824	f->f_vfsmnt = NULL;
 825cleanup_file:
 826	put_filp(f);
 827cleanup_dentry:
 828	dput(dentry);
 829	mntput(mnt);
 830	return ERR_PTR(error);
 831}
 832
 833EXPORT_SYMBOL(dentry_open);
 834
 835/*
 836 * Find an empty file descriptor entry, and mark it busy.
 837 */
 838int get_unused_fd(void)
 839{
 840	struct files_struct * files = current->files;
 841	int fd, error;
 842
 843  	error = -EMFILE;
 844	spin_lock(&files->file_lock);
 845
 846repeat:
 847 	fd = find_next_zero_bit(files->open_fds->fds_bits, 
 848				files->max_fdset, 
 849				files->next_fd);
 850
 851	/*
 852	 * N.B. For clone tasks sharing a files structure, this test
 853	 * will limit the total number of files that can be opened.
 854	 */
 855	if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 856		goto out;
 857
 858	/* Do we need to expand the fd array or fd set?  */
 859	error = expand_files(files, fd);
 860	if (error < 0)
 861		goto out;
 862
 863	if (error) {
 864		/*
 865	 	 * If we needed to expand the fs array we
 866		 * might have blocked - try again.
 867		 */
 868		error = -EMFILE;
 869		goto repeat;
 870	}
 871
 872	FD_SET(fd, files->open_fds);
 873	FD_CLR(fd, files->close_on_exec);
 874	files->next_fd = fd + 1;
 875#if 1
 876	/* Sanity check */
 877	if (files->fd[fd] != NULL) {
 878		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
 879		files->fd[fd] = NULL;
 880	}
 881#endif
 882	error = fd;
 883
 884out:
 885	spin_unlock(&files->file_lock);
 886	return error;
 887}
 888
 889EXPORT_SYMBOL(get_unused_fd);
 890
 891static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
 892{
 893	__FD_CLR(fd, files->open_fds);
 894	if (fd < files->next_fd)
 895		files->next_fd = fd;
 896}
 897
 898void fastcall put_unused_fd(unsigned int fd)
 899{
 900	struct files_struct *files = current->files;
 901	spin_lock(&files->file_lock);
 902	__put_unused_fd(files, fd);
 903	spin_unlock(&files->file_lock);
 904}
 905
 906EXPORT_SYMBOL(put_unused_fd);
 907
 908/*
 909 * Install a file pointer in the fd array.  
 910 *
 911 * The VFS is full of places where we drop the files lock between
 912 * setting the open_fds bitmap and installing the file in the file
 913 * array.  At any such point, we are vulnerable to a dup2() race
 914 * installing a file in the array before us.  We need to detect this and
 915 * fput() the struct file we are about to overwrite in this case.
 916 *
 917 * It should never happen - if we allow dup2() do it, _really_ bad things
 918 * will follow.
 919 */
 920
 921void fastcall fd_install(unsigned int fd, struct file * file)
 922{
 923	struct files_struct *files = current->files;
 924	spin_lock(&files->file_lock);
 925	if (unlikely(files->fd[fd] != NULL))
 926		BUG();
 927	files->fd[fd] = file;
 928	spin_unlock(&files->file_lock);
 929}
 930
 931EXPORT_SYMBOL(fd_install);
 932
 933asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 934{
 935	char * tmp;
 936	int fd, error;
 937
 938#if BITS_PER_LONG != 32
 939	flags |= O_LARGEFILE;
 940#endif
 941	tmp = getname(filename);
 942	fd = PTR_ERR(tmp);
 943	if (!IS_ERR(tmp)) {
 944		fd = get_unused_fd();
 945		if (fd >= 0) {
 946			struct file *f = filp_open(tmp, flags, mode);
 947			error = PTR_ERR(f);
 948			if (IS_ERR(f))
 949				goto out_error;
 950			fd_install(fd, f);
 951		}
 952out:
 953		putname(tmp);
 954	}
 955	return fd;
 956
 957out_error:
 958	put_unused_fd(fd);
 959	fd = error;
 960	goto out;
 961}
 962EXPORT_SYMBOL_GPL(sys_open);
 963
 964#ifndef __alpha__
 965
 966/*
 967 * For backward compatibility?  Maybe this should be moved
 968 * into arch/i386 instead?
 969 */
 970asmlinkage long sys_creat(const char __user * pathname, int mode)
 971{
 972	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
 973}
 974
 975#endif
 976
 977/*
 978 * "id" is the POSIX thread ID. We use the
 979 * files pointer for this..
 980 */
 981int filp_close(struct file *filp, fl_owner_t id)
 982{
 983	int retval;
 984
 985	/* Report and clear outstanding errors */
 986	retval = filp->f_error;
 987	if (retval)
 988		filp->f_error = 0;
 989
 990	if (!file_count(filp)) {
 991		printk(KERN_ERR "VFS: Close: file count is 0\n");
 992		return retval;
 993	}
 994
 995	if (filp->f_op && filp->f_op->flush) {
 996		int err = filp->f_op->flush(filp);
 997		if (!retval)
 998			retval = err;
 999	}
1000
1001	dnotify_flush(filp, id);
1002	locks_remove_posix(filp, id);
1003	fput(filp);
1004	return retval;
1005}
1006
1007EXPORT_SYMBOL(filp_close);
1008
1009/*
1010 * Careful here! We test whether the file pointer is NULL before
1011 * releasing the fd. This ensures that one clone task can't release
1012 * an fd while another clone is opening it.
1013 */
1014asmlinkage long sys_close(unsigned int fd)
1015{
1016	struct file * filp;
1017	struct files_struct *files = current->files;
1018
1019	spin_lock(&files->file_lock);
1020	if (fd >= files->max_fds)
1021		goto out_unlock;
1022	filp = files->fd[fd];
1023	if (!filp)
1024		goto out_unlock;
1025	files->fd[fd] = NULL;
1026	FD_CLR(fd, files->close_on_exec);
1027	__put_unused_fd(files, fd);
1028	spin_unlock(&files->file_lock);
1029	return filp_close(filp, files);
1030
1031out_unlock:
1032	spin_unlock(&files->file_lock);
1033	return -EBADF;
1034}
1035
1036EXPORT_SYMBOL(sys_close);
1037
1038/*
1039 * This routine simulates a hangup on the tty, to arrange that users
1040 * are given clean terminals at login time.
1041 */
1042asmlinkage long sys_vhangup(void)
1043{
1044	if (capable(CAP_SYS_TTY_CONFIG)) {
1045		tty_vhangup(current->signal->tty);
1046		return 0;
1047	}
1048	return -EPERM;
1049}
1050
1051/*
1052 * Called when an inode is about to be open.
1053 * We use this to disallow opening large files on 32bit systems if
1054 * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1055 * on this flag in sys_open.
1056 */
1057int generic_file_open(struct inode * inode, struct file * filp)
1058{
1059	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1060		return -EFBIG;
1061	return 0;
1062}
1063
1064EXPORT_SYMBOL(generic_file_open);
1065
1066/*
1067 * This is used by subsystems that don't want seekable
1068 * file descriptors
1069 */
1070int nonseekable_open(struct inode *inode, struct file *filp)
1071{
1072	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1073	return 0;
1074}
1075
1076EXPORT_SYMBOL(nonseekable_open);