/net/socket.c
C | 3433 lines | 2473 code | 489 blank | 471 comment | 393 complexity | e6becb9f67e53c8d7ad1ecd681812bd0 MD5 | raw file
Large files files are truncated, but you can click here to view the full file
1/* 2 * NET An implementation of the SOCKET network access protocol. 3 * 4 * Version: @(#)socket.c 1.1.93 18/02/95 5 * 6 * Authors: Orest Zborowski, <obz@Kodak.COM> 7 * Ross Biro 8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * 10 * Fixes: 11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in 12 * shutdown() 13 * Alan Cox : verify_area() fixes 14 * Alan Cox : Removed DDI 15 * Jonathan Kamens : SOCK_DGRAM reconnect bug 16 * Alan Cox : Moved a load of checks to the very 17 * top level. 18 * Alan Cox : Move address structures to/from user 19 * mode above the protocol layers. 20 * Rob Janssen : Allow 0 length sends. 21 * Alan Cox : Asynchronous I/O support (cribbed from the 22 * tty drivers). 23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 24 * Jeff Uphoff : Made max number of sockets command-line 25 * configurable. 26 * Matti Aarnio : Made the number of sockets dynamic, 27 * to be allocated when needed, and mr. 28 * Uphoff's max is used as max to be 29 * allowed to allocate. 30 * Linus : Argh. removed all the socket allocation 31 * altogether: it's in the inode now. 32 * Alan Cox : Made sock_alloc()/sock_release() public 33 * for NetROM and future kernel nfsd type 34 * stuff. 35 * Alan Cox : sendmsg/recvmsg basics. 36 * Tom Dyas : Export net symbols. 37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 38 * Alan Cox : Added thread locking to sys_* calls 39 * for sockets. May have errors at the 40 * moment. 41 * Kevin Buhr : Fixed the dumb errors in the above. 42 * Andi Kleen : Some small cleanups, optimizations, 43 * and fixed a copy_from_user() bug. 44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 45 * Tigran Aivazian : Made listen(2) backlog sanity checks 46 * protocol-independent 47 * 48 * 49 * This program is free software; you can redistribute it and/or 50 * modify it under the terms of the GNU General Public License 51 * as published by the Free Software Foundation; either version 52 * 2 of the License, or (at your option) any later version. 53 * 54 * 55 * This module is effectively the top level interface to the BSD socket 56 * paradigm. 57 * 58 * Based upon Swansea University Computer Society NET3.039 59 */ 60 61#include <linux/mm.h> 62#include <linux/socket.h> 63#include <linux/file.h> 64#include <linux/net.h> 65#include <linux/interrupt.h> 66#include <linux/thread_info.h> 67#include <linux/rcupdate.h> 68#include <linux/netdevice.h> 69#include <linux/proc_fs.h> 70#include <linux/seq_file.h> 71#include <linux/mutex.h> 72#include <linux/wanrouter.h> 73#include <linux/if_bridge.h> 74#include <linux/if_frad.h> 75#include <linux/if_vlan.h> 76#include <linux/init.h> 77#include <linux/poll.h> 78#include <linux/cache.h> 79#include <linux/module.h> 80#include <linux/highmem.h> 81#include <linux/mount.h> 82#include <linux/security.h> 83#include <linux/syscalls.h> 84#include <linux/compat.h> 85#include <linux/kmod.h> 86#include <linux/audit.h> 87#include <linux/wireless.h> 88#include <linux/nsproxy.h> 89#include <linux/magic.h> 90#include <linux/slab.h> 91 92#include <asm/uaccess.h> 93#include <asm/unistd.h> 94 95#include <net/compat.h> 96#include <net/wext.h> 97#include <net/cls_cgroup.h> 98 99#include <net/sock.h> 100#include <linux/netfilter.h> 101 102#include <linux/if_tun.h> 103#include <linux/ipv6_route.h> 104#include <linux/route.h> 105#include <linux/sockios.h> 106#include <linux/atalk.h> 107 108#ifdef CONFIG_UID_STAT 109#include <linux/uid_stat.h> 110#endif 111 112static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 113static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 114 unsigned long nr_segs, loff_t pos); 115static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 116 unsigned long nr_segs, loff_t pos); 117static int sock_mmap(struct file *file, struct vm_area_struct *vma); 118 119static int sock_close(struct inode *inode, struct file *file); 120static unsigned int sock_poll(struct file *file, 121 struct poll_table_struct *wait); 122static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 123#ifdef CONFIG_COMPAT 124static long compat_sock_ioctl(struct file *file, 125 unsigned int cmd, unsigned long arg); 126#endif 127static int sock_fasync(int fd, struct file *filp, int on); 128static ssize_t sock_sendpage(struct file *file, struct page *page, 129 int offset, size_t size, loff_t *ppos, int more); 130static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 131 struct pipe_inode_info *pipe, size_t len, 132 unsigned int flags); 133 134/* 135 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 136 * in the operation structures but are done directly via the socketcall() multiplexor. 137 */ 138 139static const struct file_operations socket_file_ops = { 140 .owner = THIS_MODULE, 141 .llseek = no_llseek, 142 .aio_read = sock_aio_read, 143 .aio_write = sock_aio_write, 144 .poll = sock_poll, 145 .unlocked_ioctl = sock_ioctl, 146#ifdef CONFIG_COMPAT 147 .compat_ioctl = compat_sock_ioctl, 148#endif 149 .mmap = sock_mmap, 150 .open = sock_no_open, /* special open code to disallow open via /proc */ 151 .release = sock_close, 152 .fasync = sock_fasync, 153 .sendpage = sock_sendpage, 154 .splice_write = generic_splice_sendpage, 155 .splice_read = sock_splice_read, 156}; 157 158/* 159 * The protocol list. Each protocol is registered in here. 160 */ 161 162static DEFINE_SPINLOCK(net_family_lock); 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; 164 165/* 166 * Statistics counters of the socket lists 167 */ 168 169static DEFINE_PER_CPU(int, sockets_in_use); 170 171/* 172 * Support routines. 173 * Move socket addresses back and forth across the kernel/user 174 * divide and look after the messy bits. 175 */ 176 177/** 178 * move_addr_to_kernel - copy a socket address into kernel space 179 * @uaddr: Address in user space 180 * @kaddr: Address in kernel space 181 * @ulen: Length in user space 182 * 183 * The address is copied into kernel space. If the provided address is 184 * too long an error code of -EINVAL is returned. If the copy gives 185 * invalid addresses -EFAULT is returned. On a success 0 is returned. 186 */ 187 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) 189{ 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) 191 return -EINVAL; 192 if (ulen == 0) 193 return 0; 194 if (copy_from_user(kaddr, uaddr, ulen)) 195 return -EFAULT; 196 return audit_sockaddr(ulen, kaddr); 197} 198 199/** 200 * move_addr_to_user - copy an address to user space 201 * @kaddr: kernel space address 202 * @klen: length of address in kernel 203 * @uaddr: user space address 204 * @ulen: pointer to user length field 205 * 206 * The value pointed to by ulen on entry is the buffer length available. 207 * This is overwritten with the buffer space used. -EINVAL is returned 208 * if an overlong buffer is specified or a negative buffer size. -EFAULT 209 * is returned if either the buffer or the length field are not 210 * accessible. 211 * After copying the data up to the limit the user specifies, the true 212 * length of the data is written over the length limit the user 213 * specified. Zero is returned for a success. 214 */ 215 216static int move_addr_to_user(struct sockaddr *kaddr, int klen, 217 void __user *uaddr, int __user *ulen) 218{ 219 int err; 220 int len; 221 222 err = get_user(len, ulen); 223 if (err) 224 return err; 225 if (len > klen) 226 len = klen; 227 if (len < 0 || len > sizeof(struct sockaddr_storage)) 228 return -EINVAL; 229 if (len) { 230 if (audit_sockaddr(klen, kaddr)) 231 return -ENOMEM; 232 if (copy_to_user(uaddr, kaddr, len)) 233 return -EFAULT; 234 } 235 /* 236 * "fromlen shall refer to the value before truncation.." 237 * 1003.1g 238 */ 239 return __put_user(klen, ulen); 240} 241 242static struct kmem_cache *sock_inode_cachep __read_mostly; 243 244static struct inode *sock_alloc_inode(struct super_block *sb) 245{ 246 struct socket_alloc *ei; 247 struct socket_wq *wq; 248 249 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 250 if (!ei) 251 return NULL; 252 wq = kmalloc(sizeof(*wq), GFP_KERNEL); 253 if (!wq) { 254 kmem_cache_free(sock_inode_cachep, ei); 255 return NULL; 256 } 257 init_waitqueue_head(&wq->wait); 258 wq->fasync_list = NULL; 259 RCU_INIT_POINTER(ei->socket.wq, wq); 260 261 ei->socket.state = SS_UNCONNECTED; 262 ei->socket.flags = 0; 263 ei->socket.ops = NULL; 264 ei->socket.sk = NULL; 265 ei->socket.file = NULL; 266 267 return &ei->vfs_inode; 268} 269 270static void sock_destroy_inode(struct inode *inode) 271{ 272 struct socket_alloc *ei; 273 struct socket_wq *wq; 274 275 ei = container_of(inode, struct socket_alloc, vfs_inode); 276 wq = rcu_dereference_protected(ei->socket.wq, 1); 277 kfree_rcu(wq, rcu); 278 kmem_cache_free(sock_inode_cachep, ei); 279} 280 281static void init_once(void *foo) 282{ 283 struct socket_alloc *ei = (struct socket_alloc *)foo; 284 285 inode_init_once(&ei->vfs_inode); 286} 287 288static int init_inodecache(void) 289{ 290 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 291 sizeof(struct socket_alloc), 292 0, 293 (SLAB_HWCACHE_ALIGN | 294 SLAB_RECLAIM_ACCOUNT | 295 SLAB_MEM_SPREAD), 296 init_once); 297 if (sock_inode_cachep == NULL) 298 return -ENOMEM; 299 return 0; 300} 301 302static const struct super_operations sockfs_ops = { 303 .alloc_inode = sock_alloc_inode, 304 .destroy_inode = sock_destroy_inode, 305 .statfs = simple_statfs, 306}; 307 308/* 309 * sockfs_dname() is called from d_path(). 310 */ 311static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) 312{ 313 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", 314 dentry->d_inode->i_ino); 315} 316 317static const struct dentry_operations sockfs_dentry_operations = { 318 .d_dname = sockfs_dname, 319}; 320 321static struct dentry *sockfs_mount(struct file_system_type *fs_type, 322 int flags, const char *dev_name, void *data) 323{ 324 return mount_pseudo(fs_type, "socket:", &sockfs_ops, 325 &sockfs_dentry_operations, SOCKFS_MAGIC); 326} 327 328static struct vfsmount *sock_mnt __read_mostly; 329 330static struct file_system_type sock_fs_type = { 331 .name = "sockfs", 332 .mount = sockfs_mount, 333 .kill_sb = kill_anon_super, 334}; 335 336/* 337 * Obtains the first available file descriptor and sets it up for use. 338 * 339 * These functions create file structures and maps them to fd space 340 * of the current process. On success it returns file descriptor 341 * and file struct implicitly stored in sock->file. 342 * Note that another thread may close file descriptor before we return 343 * from this function. We use the fact that now we do not refer 344 * to socket after mapping. If one day we will need it, this 345 * function will increment ref. count on file by 1. 346 * 347 * In any case returned fd MAY BE not valid! 348 * This race condition is unavoidable 349 * with shared fd spaces, we cannot solve it inside kernel, 350 * but we take care of internal coherence yet. 351 */ 352 353static int sock_alloc_file(struct socket *sock, struct file **f, int flags) 354{ 355 struct qstr name = { .name = "" }; 356 struct path path; 357 struct file *file; 358 int fd; 359 360 fd = get_unused_fd_flags(flags); 361 if (unlikely(fd < 0)) 362 return fd; 363 364 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); 365 if (unlikely(!path.dentry)) { 366 put_unused_fd(fd); 367 return -ENOMEM; 368 } 369 path.mnt = mntget(sock_mnt); 370 371 d_instantiate(path.dentry, SOCK_INODE(sock)); 372 SOCK_INODE(sock)->i_fop = &socket_file_ops; 373 374 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, 375 &socket_file_ops); 376 if (unlikely(!file)) { 377 /* drop dentry, keep inode */ 378 ihold(path.dentry->d_inode); 379 path_put(&path); 380 put_unused_fd(fd); 381 return -ENFILE; 382 } 383 384 sock->file = file; 385 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 386 file->f_pos = 0; 387 file->private_data = sock; 388 389 *f = file; 390 return fd; 391} 392 393int sock_map_fd(struct socket *sock, int flags) 394{ 395 struct file *newfile; 396 int fd = sock_alloc_file(sock, &newfile, flags); 397 398 if (likely(fd >= 0)) 399 fd_install(fd, newfile); 400 401 return fd; 402} 403EXPORT_SYMBOL(sock_map_fd); 404 405static struct socket *sock_from_file(struct file *file, int *err) 406{ 407 if (file->f_op == &socket_file_ops) 408 return file->private_data; /* set in sock_map_fd */ 409 410 *err = -ENOTSOCK; 411 return NULL; 412} 413 414/** 415 * sockfd_lookup - Go from a file number to its socket slot 416 * @fd: file handle 417 * @err: pointer to an error code return 418 * 419 * The file handle passed in is locked and the socket it is bound 420 * too is returned. If an error occurs the err pointer is overwritten 421 * with a negative errno code and NULL is returned. The function checks 422 * for both invalid handles and passing a handle which is not a socket. 423 * 424 * On a success the socket object pointer is returned. 425 */ 426 427struct socket *sockfd_lookup(int fd, int *err) 428{ 429 struct file *file; 430 struct socket *sock; 431 432 file = fget(fd); 433 if (!file) { 434 *err = -EBADF; 435 return NULL; 436 } 437 438 sock = sock_from_file(file, err); 439 if (!sock) 440 fput(file); 441 return sock; 442} 443EXPORT_SYMBOL(sockfd_lookup); 444 445static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 446{ 447 struct file *file; 448 struct socket *sock; 449 450 *err = -EBADF; 451 file = fget_light(fd, fput_needed); 452 if (file) { 453 sock = sock_from_file(file, err); 454 if (sock) 455 return sock; 456 fput_light(file, *fput_needed); 457 } 458 return NULL; 459} 460 461/** 462 * sock_alloc - allocate a socket 463 * 464 * Allocate a new inode and socket object. The two are bound together 465 * and initialised. The socket is then returned. If we are out of inodes 466 * NULL is returned. 467 */ 468 469static struct socket *sock_alloc(void) 470{ 471 struct inode *inode; 472 struct socket *sock; 473 474 inode = new_inode_pseudo(sock_mnt->mnt_sb); 475 if (!inode) 476 return NULL; 477 478 sock = SOCKET_I(inode); 479 480 kmemcheck_annotate_bitfield(sock, type); 481 inode->i_ino = get_next_ino(); 482 inode->i_mode = S_IFSOCK | S_IRWXUGO; 483 inode->i_uid = current_fsuid(); 484 inode->i_gid = current_fsgid(); 485 486 percpu_add(sockets_in_use, 1); 487 return sock; 488} 489 490/* 491 * In theory you can't get an open on this inode, but /proc provides 492 * a back door. Remember to keep it shut otherwise you'll let the 493 * creepy crawlies in. 494 */ 495 496static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 497{ 498 return -ENXIO; 499} 500 501const struct file_operations bad_sock_fops = { 502 .owner = THIS_MODULE, 503 .open = sock_no_open, 504 .llseek = noop_llseek, 505}; 506 507/** 508 * sock_release - close a socket 509 * @sock: socket to close 510 * 511 * The socket is released from the protocol stack if it has a release 512 * callback, and the inode is then released if the socket is bound to 513 * an inode not a file. 514 */ 515 516int add_or_remove_port(struct sock *sk, int add_or_remove); /* SSD_RIL: Garbage_Filter_TCP */ 517 518void sock_release(struct socket *sock) 519{ 520 /* ++SSD_RIL: Garbage_Filter_TCP */ 521 if (sock->sk != NULL) 522 add_or_remove_port(sock->sk, 0); 523 /* --SSD_RIL: Garbage_Filter_TCP */ 524 525 if (sock->ops) { 526 struct module *owner = sock->ops->owner; 527 528 sock->ops->release(sock); 529 sock->ops = NULL; 530 module_put(owner); 531 } 532 533 if (rcu_dereference_protected(sock->wq, 1)->fasync_list) 534 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 535 536 percpu_sub(sockets_in_use, 1); 537 if (!sock->file) { 538 iput(SOCK_INODE(sock)); 539 return; 540 } 541 sock->file = NULL; 542} 543EXPORT_SYMBOL(sock_release); 544 545int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) 546{ 547 *tx_flags = 0; 548 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 549 *tx_flags |= SKBTX_HW_TSTAMP; 550 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 551 *tx_flags |= SKBTX_SW_TSTAMP; 552 return 0; 553} 554EXPORT_SYMBOL(sock_tx_timestamp); 555 556static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, 557 struct msghdr *msg, size_t size) 558{ 559 struct sock_iocb *si = kiocb_to_siocb(iocb); 560#ifdef CONFIG_UID_STAT 561 int err; 562#endif 563 sock_update_classid(sock->sk); 564 565 si->sock = sock; 566 si->scm = NULL; 567 si->msg = msg; 568 si->size = size; 569 570#ifdef CONFIG_UID_STAT 571 err = sock->ops->sendmsg(iocb, sock, msg, size); 572 if(err > 0) 573 uid_stat_tcp_snd(current_uid(), err); 574 return err; 575#else 576 return sock->ops->sendmsg(iocb, sock, msg, size); 577#endif 578} 579 580static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 581 struct msghdr *msg, size_t size) 582{ 583 int err = security_socket_sendmsg(sock, msg, size); 584 585 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); 586} 587 588int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 589{ 590 struct kiocb iocb; 591 struct sock_iocb siocb; 592 int ret; 593 594 init_sync_kiocb(&iocb, NULL); 595 iocb.private = &siocb; 596 ret = __sock_sendmsg(&iocb, sock, msg, size); 597 if (-EIOCBQUEUED == ret) 598 ret = wait_on_sync_kiocb(&iocb); 599 return ret; 600} 601EXPORT_SYMBOL(sock_sendmsg); 602 603int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) 604{ 605 struct kiocb iocb; 606 struct sock_iocb siocb; 607 int ret; 608 609 init_sync_kiocb(&iocb, NULL); 610 iocb.private = &siocb; 611 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size); 612 if (-EIOCBQUEUED == ret) 613 ret = wait_on_sync_kiocb(&iocb); 614 return ret; 615} 616 617int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 618 struct kvec *vec, size_t num, size_t size) 619{ 620 mm_segment_t oldfs = get_fs(); 621 int result; 622 623 set_fs(KERNEL_DS); 624 /* 625 * the following is safe, since for compiler definitions of kvec and 626 * iovec are identical, yielding the same in-core layout and alignment 627 */ 628 msg->msg_iov = (struct iovec *)vec; 629 msg->msg_iovlen = num; 630 result = sock_sendmsg(sock, msg, size); 631 set_fs(oldfs); 632 return result; 633} 634EXPORT_SYMBOL(kernel_sendmsg); 635 636static int ktime2ts(ktime_t kt, struct timespec *ts) 637{ 638 if (kt.tv64) { 639 *ts = ktime_to_timespec(kt); 640 return 1; 641 } else { 642 return 0; 643 } 644} 645 646/* 647 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 648 */ 649void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 650 struct sk_buff *skb) 651{ 652 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); 653 struct timespec ts[3]; 654 int empty = 1; 655 struct skb_shared_hwtstamps *shhwtstamps = 656 skb_hwtstamps(skb); 657 658 /* Race occurred between timestamp enabling and packet 659 receiving. Fill in the current time for now. */ 660 if (need_software_tstamp && skb->tstamp.tv64 == 0) 661 __net_timestamp(skb); 662 663 if (need_software_tstamp) { 664 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 665 struct timeval tv; 666 skb_get_timestamp(skb, &tv); 667 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, 668 sizeof(tv), &tv); 669 } else { 670 skb_get_timestampns(skb, &ts[0]); 671 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, 672 sizeof(ts[0]), &ts[0]); 673 } 674 } 675 676 677 memset(ts, 0, sizeof(ts)); 678 if (skb->tstamp.tv64 && 679 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { 680 skb_get_timestampns(skb, ts + 0); 681 empty = 0; 682 } 683 if (shhwtstamps) { 684 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && 685 ktime2ts(shhwtstamps->syststamp, ts + 1)) 686 empty = 0; 687 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && 688 ktime2ts(shhwtstamps->hwtstamp, ts + 2)) 689 empty = 0; 690 } 691 if (!empty) 692 put_cmsg(msg, SOL_SOCKET, 693 SCM_TIMESTAMPING, sizeof(ts), &ts); 694} 695EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 696 697static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, 698 struct sk_buff *skb) 699{ 700 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) 701 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 702 sizeof(__u32), &skb->dropcount); 703} 704 705void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, 706 struct sk_buff *skb) 707{ 708 sock_recv_timestamp(msg, sk, skb); 709 sock_recv_drops(msg, sk, skb); 710} 711EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); 712 713static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, 714 struct msghdr *msg, size_t size, int flags) 715{ 716 struct sock_iocb *si = kiocb_to_siocb(iocb); 717#ifdef CONFIG_UID_STAT 718 int err; 719#endif 720 sock_update_classid(sock->sk); 721 722 si->sock = sock; 723 si->scm = NULL; 724 si->msg = msg; 725 si->size = size; 726 si->flags = flags; 727#ifdef CONFIG_UID_STAT 728 if(sock->ops != NULL) { 729 err = sock->ops->recvmsg(iocb, sock, msg, size, flags); 730 } else { 731 err = -EFAULT; 732 printk(KERN_ERR "[NET]__sock_recvmsg_nosec:sock->ops is NULL\n"); 733 } 734 if(err > 0) 735 uid_stat_tcp_rcv(current_uid(), err); 736 return err; 737#else 738 return sock->ops->recvmsg(iocb, sock, msg, size, flags); 739#endif 740} 741 742static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 743 struct msghdr *msg, size_t size, int flags) 744{ 745 int err = security_socket_recvmsg(sock, msg, size, flags); 746 747 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); 748} 749 750int sock_recvmsg(struct socket *sock, struct msghdr *msg, 751 size_t size, int flags) 752{ 753 struct kiocb iocb; 754 struct sock_iocb siocb; 755 int ret; 756 757 init_sync_kiocb(&iocb, NULL); 758 iocb.private = &siocb; 759 ret = __sock_recvmsg(&iocb, sock, msg, size, flags); 760 if (-EIOCBQUEUED == ret) 761 ret = wait_on_sync_kiocb(&iocb); 762 return ret; 763} 764EXPORT_SYMBOL(sock_recvmsg); 765 766static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, 767 size_t size, int flags) 768{ 769 struct kiocb iocb; 770 struct sock_iocb siocb; 771 int ret; 772 773 init_sync_kiocb(&iocb, NULL); 774 iocb.private = &siocb; 775 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); 776 if (-EIOCBQUEUED == ret) 777 ret = wait_on_sync_kiocb(&iocb); 778 return ret; 779} 780 781/** 782 * kernel_recvmsg - Receive a message from a socket (kernel space) 783 * @sock: The socket to receive the message from 784 * @msg: Received message 785 * @vec: Input s/g array for message data 786 * @num: Size of input s/g array 787 * @size: Number of bytes to read 788 * @flags: Message flags (MSG_DONTWAIT, etc...) 789 * 790 * On return the msg structure contains the scatter/gather array passed in the 791 * vec argument. The array is modified so that it consists of the unfilled 792 * portion of the original array. 793 * 794 * The returned value is the total number of bytes received, or an error. 795 */ 796int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 797 struct kvec *vec, size_t num, size_t size, int flags) 798{ 799 mm_segment_t oldfs = get_fs(); 800 int result; 801 802 set_fs(KERNEL_DS); 803 /* 804 * the following is safe, since for compiler definitions of kvec and 805 * iovec are identical, yielding the same in-core layout and alignment 806 */ 807 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; 808 result = sock_recvmsg(sock, msg, size, flags); 809 set_fs(oldfs); 810 return result; 811} 812EXPORT_SYMBOL(kernel_recvmsg); 813 814static void sock_aio_dtor(struct kiocb *iocb) 815{ 816 kfree(iocb->private); 817} 818 819static ssize_t sock_sendpage(struct file *file, struct page *page, 820 int offset, size_t size, loff_t *ppos, int more) 821{ 822 struct socket *sock; 823 int flags; 824 825 sock = file->private_data; 826 827 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; 828 if (more) 829 flags |= MSG_MORE; 830 831 return kernel_sendpage(sock, page, offset, size, flags); 832} 833 834static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 835 struct pipe_inode_info *pipe, size_t len, 836 unsigned int flags) 837{ 838 struct socket *sock = file->private_data; 839 840 if (unlikely(!sock->ops->splice_read)) 841 return -EINVAL; 842 843 sock_update_classid(sock->sk); 844 845 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 846} 847 848static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 849 struct sock_iocb *siocb) 850{ 851 if (!is_sync_kiocb(iocb)) { 852 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 853 if (!siocb) 854 return NULL; 855 iocb->ki_dtor = sock_aio_dtor; 856 } 857 858 siocb->kiocb = iocb; 859 iocb->private = siocb; 860 return siocb; 861} 862 863static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 864 struct file *file, const struct iovec *iov, 865 unsigned long nr_segs) 866{ 867 struct socket *sock = file->private_data; 868 size_t size = 0; 869 int i; 870 871 for (i = 0; i < nr_segs; i++) 872 size += iov[i].iov_len; 873 874 msg->msg_name = NULL; 875 msg->msg_namelen = 0; 876 msg->msg_control = NULL; 877 msg->msg_controllen = 0; 878 msg->msg_iov = (struct iovec *)iov; 879 msg->msg_iovlen = nr_segs; 880 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 881 882 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); 883} 884 885static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 886 unsigned long nr_segs, loff_t pos) 887{ 888 struct sock_iocb siocb, *x; 889 890 if (pos != 0) 891 return -ESPIPE; 892 893 if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 894 return 0; 895 896 897 x = alloc_sock_iocb(iocb, &siocb); 898 if (!x) 899 return -ENOMEM; 900 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 901} 902 903static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 904 struct file *file, const struct iovec *iov, 905 unsigned long nr_segs) 906{ 907 struct socket *sock = file->private_data; 908 size_t size = 0; 909 int i; 910 911 for (i = 0; i < nr_segs; i++) 912 size += iov[i].iov_len; 913 914 msg->msg_name = NULL; 915 msg->msg_namelen = 0; 916 msg->msg_control = NULL; 917 msg->msg_controllen = 0; 918 msg->msg_iov = (struct iovec *)iov; 919 msg->msg_iovlen = nr_segs; 920 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 921 if (sock->type == SOCK_SEQPACKET) 922 msg->msg_flags |= MSG_EOR; 923 924 return __sock_sendmsg(iocb, sock, msg, size); 925} 926 927static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 928 unsigned long nr_segs, loff_t pos) 929{ 930 struct sock_iocb siocb, *x; 931 932 if (pos != 0) 933 return -ESPIPE; 934 935 x = alloc_sock_iocb(iocb, &siocb); 936 if (!x) 937 return -ENOMEM; 938 939 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 940} 941 942/* 943 * Atomic setting of ioctl hooks to avoid race 944 * with module unload. 945 */ 946 947static DEFINE_MUTEX(br_ioctl_mutex); 948static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); 949 950void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 951{ 952 mutex_lock(&br_ioctl_mutex); 953 br_ioctl_hook = hook; 954 mutex_unlock(&br_ioctl_mutex); 955} 956EXPORT_SYMBOL(brioctl_set); 957 958static DEFINE_MUTEX(vlan_ioctl_mutex); 959static int (*vlan_ioctl_hook) (struct net *, void __user *arg); 960 961void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) 962{ 963 mutex_lock(&vlan_ioctl_mutex); 964 vlan_ioctl_hook = hook; 965 mutex_unlock(&vlan_ioctl_mutex); 966} 967EXPORT_SYMBOL(vlan_ioctl_set); 968 969static DEFINE_MUTEX(dlci_ioctl_mutex); 970static int (*dlci_ioctl_hook) (unsigned int, void __user *); 971 972void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) 973{ 974 mutex_lock(&dlci_ioctl_mutex); 975 dlci_ioctl_hook = hook; 976 mutex_unlock(&dlci_ioctl_mutex); 977} 978EXPORT_SYMBOL(dlci_ioctl_set); 979 980static long sock_do_ioctl(struct net *net, struct socket *sock, 981 unsigned int cmd, unsigned long arg) 982{ 983 int err; 984 void __user *argp = (void __user *)arg; 985 986 err = sock->ops->ioctl(sock, cmd, arg); 987 988 /* 989 * If this ioctl is unknown try to hand it down 990 * to the NIC driver. 991 */ 992 if (err == -ENOIOCTLCMD) 993 err = dev_ioctl(net, cmd, argp); 994 995 return err; 996} 997 998/* 999 * With an ioctl, arg may well be a user mode pointer, but we don't know 1000 * what to do with it - that's up to the protocol still. 1001 */ 1002 1003static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1004{ 1005 struct socket *sock; 1006 struct sock *sk; 1007 void __user *argp = (void __user *)arg; 1008 int pid, err; 1009 struct net *net; 1010 1011 sock = file->private_data; 1012 sk = sock->sk; 1013 net = sock_net(sk); 1014 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { 1015 err = dev_ioctl(net, cmd, argp); 1016 } else 1017#ifdef CONFIG_WEXT_CORE 1018 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 1019 err = dev_ioctl(net, cmd, argp); 1020 } else 1021#endif 1022 switch (cmd) { 1023 case FIOSETOWN: 1024 case SIOCSPGRP: 1025 err = -EFAULT; 1026 if (get_user(pid, (int __user *)argp)) 1027 break; 1028 err = f_setown(sock->file, pid, 1); 1029 break; 1030 case FIOGETOWN: 1031 case SIOCGPGRP: 1032 err = put_user(f_getown(sock->file), 1033 (int __user *)argp); 1034 break; 1035 case SIOCGIFBR: 1036 case SIOCSIFBR: 1037 case SIOCBRADDBR: 1038 case SIOCBRDELBR: 1039 err = -ENOPKG; 1040 if (!br_ioctl_hook) 1041 request_module("bridge"); 1042 1043 mutex_lock(&br_ioctl_mutex); 1044 if (br_ioctl_hook) 1045 err = br_ioctl_hook(net, cmd, argp); 1046 mutex_unlock(&br_ioctl_mutex); 1047 break; 1048 case SIOCGIFVLAN: 1049 case SIOCSIFVLAN: 1050 err = -ENOPKG; 1051 if (!vlan_ioctl_hook) 1052 request_module("8021q"); 1053 1054 mutex_lock(&vlan_ioctl_mutex); 1055 if (vlan_ioctl_hook) 1056 err = vlan_ioctl_hook(net, argp); 1057 mutex_unlock(&vlan_ioctl_mutex); 1058 break; 1059 case SIOCADDDLCI: 1060 case SIOCDELDLCI: 1061 err = -ENOPKG; 1062 if (!dlci_ioctl_hook) 1063 request_module("dlci"); 1064 1065 mutex_lock(&dlci_ioctl_mutex); 1066 if (dlci_ioctl_hook) 1067 err = dlci_ioctl_hook(cmd, argp); 1068 mutex_unlock(&dlci_ioctl_mutex); 1069 break; 1070 default: 1071 err = sock_do_ioctl(net, sock, cmd, arg); 1072 break; 1073 } 1074 return err; 1075} 1076 1077int sock_create_lite(int family, int type, int protocol, struct socket **res) 1078{ 1079 int err; 1080 struct socket *sock = NULL; 1081 1082 err = security_socket_create(family, type, protocol, 1); 1083 if (err) 1084 goto out; 1085 1086 sock = sock_alloc(); 1087 if (!sock) { 1088 err = -ENOMEM; 1089 goto out; 1090 } 1091 1092 sock->type = type; 1093 err = security_socket_post_create(sock, family, type, protocol, 1); 1094 if (err) 1095 goto out_release; 1096 1097out: 1098 *res = sock; 1099 return err; 1100out_release: 1101 sock_release(sock); 1102 sock = NULL; 1103 goto out; 1104} 1105EXPORT_SYMBOL(sock_create_lite); 1106 1107/* No kernel lock held - perfect */ 1108static unsigned int sock_poll(struct file *file, poll_table *wait) 1109{ 1110 struct socket *sock; 1111 1112 /* 1113 * We can't return errors to poll, so it's either yes or no. 1114 */ 1115 sock = file->private_data; 1116 if(sock->ops == NULL) { 1117 printk(KERN_ERR "[NET]sock_poll: sock->ops is NULL\n"); 1118 return -EFAULT; 1119 } else { 1120 return sock->ops->poll(file, sock, wait); 1121 } 1122} 1123 1124static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1125{ 1126 struct socket *sock = file->private_data; 1127 1128 return sock->ops->mmap(file, sock, vma); 1129} 1130 1131static int sock_close(struct inode *inode, struct file *filp) 1132{ 1133 /* 1134 * It was possible the inode is NULL we were 1135 * closing an unfinished socket. 1136 */ 1137 1138 if (!inode) { 1139 printk(KERN_DEBUG "sock_close: NULL inode\n"); 1140 return 0; 1141 } 1142 sock_release(SOCKET_I(inode)); 1143 return 0; 1144} 1145 1146/* 1147 * Update the socket async list 1148 * 1149 * Fasync_list locking strategy. 1150 * 1151 * 1. fasync_list is modified only under process context socket lock 1152 * i.e. under semaphore. 1153 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1154 * or under socket lock 1155 */ 1156 1157static int sock_fasync(int fd, struct file *filp, int on) 1158{ 1159 struct socket *sock = filp->private_data; 1160 struct sock *sk = sock->sk; 1161 struct socket_wq *wq; 1162 1163 if (sk == NULL) 1164 return -EINVAL; 1165 1166 lock_sock(sk); 1167 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); 1168 fasync_helper(fd, filp, on, &wq->fasync_list); 1169 1170 if (!wq->fasync_list) 1171 sock_reset_flag(sk, SOCK_FASYNC); 1172 else 1173 sock_set_flag(sk, SOCK_FASYNC); 1174 1175 release_sock(sk); 1176 return 0; 1177} 1178 1179/* This function may be called only under socket lock or callback_lock or rcu_lock */ 1180 1181int sock_wake_async(struct socket *sock, int how, int band) 1182{ 1183 struct socket_wq *wq; 1184 1185 if (!sock) 1186 return -1; 1187 rcu_read_lock(); 1188 wq = rcu_dereference(sock->wq); 1189 if (!wq || !wq->fasync_list) { 1190 rcu_read_unlock(); 1191 return -1; 1192 } 1193 switch (how) { 1194 case SOCK_WAKE_WAITD: 1195 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1196 break; 1197 goto call_kill; 1198 case SOCK_WAKE_SPACE: 1199 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1200 break; 1201 /* fall through */ 1202 case SOCK_WAKE_IO: 1203call_kill: 1204 kill_fasync(&wq->fasync_list, SIGIO, band); 1205 break; 1206 case SOCK_WAKE_URG: 1207 kill_fasync(&wq->fasync_list, SIGURG, band); 1208 } 1209 rcu_read_unlock(); 1210 return 0; 1211} 1212EXPORT_SYMBOL(sock_wake_async); 1213 1214int __sock_create(struct net *net, int family, int type, int protocol, 1215 struct socket **res, int kern) 1216{ 1217 int err; 1218 struct socket *sock; 1219 const struct net_proto_family *pf; 1220 1221 /* 1222 * Check protocol is in range 1223 */ 1224 if (family < 0 || family >= NPROTO) 1225 return -EAFNOSUPPORT; 1226 if (type < 0 || type >= SOCK_MAX) 1227 return -EINVAL; 1228 1229 /* Compatibility. 1230 1231 This uglymoron is moved from INET layer to here to avoid 1232 deadlock in module load. 1233 */ 1234 if (family == PF_INET && type == SOCK_PACKET) { 1235 static int warned; 1236 if (!warned) { 1237 warned = 1; 1238 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", 1239 current->comm); 1240 } 1241 family = PF_PACKET; 1242 } 1243 1244 err = security_socket_create(family, type, protocol, kern); 1245 if (err) 1246 return err; 1247 1248 /* 1249 * Allocate the socket and allow the family to set things up. if 1250 * the protocol is 0, the family is instructed to select an appropriate 1251 * default. 1252 */ 1253 sock = sock_alloc(); 1254 if (!sock) { 1255 if (net_ratelimit()) 1256 printk(KERN_WARNING "socket: no more sockets\n"); 1257 return -ENFILE; /* Not exactly a match, but its the 1258 closest posix thing */ 1259 } 1260 1261 sock->type = type; 1262 1263#ifdef CONFIG_MODULES 1264 /* Attempt to load a protocol module if the find failed. 1265 * 1266 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1267 * requested real, full-featured networking support upon configuration. 1268 * Otherwise module support will break! 1269 */ 1270 if (rcu_access_pointer(net_families[family]) == NULL) 1271 request_module("net-pf-%d", family); 1272#endif 1273 1274 rcu_read_lock(); 1275 pf = rcu_dereference(net_families[family]); 1276 err = -EAFNOSUPPORT; 1277 if (!pf) 1278 goto out_release; 1279 1280 /* 1281 * We will call the ->create function, that possibly is in a loadable 1282 * module, so we have to bump that loadable module refcnt first. 1283 */ 1284 if (!try_module_get(pf->owner)) 1285 goto out_release; 1286 1287 /* Now protected by module ref count */ 1288 rcu_read_unlock(); 1289 1290 err = pf->create(net, sock, protocol, kern); 1291 if (err < 0) 1292 goto out_module_put; 1293 1294 /* 1295 * Now to bump the refcnt of the [loadable] module that owns this 1296 * socket at sock_release time we decrement its refcnt. 1297 */ 1298 if (!try_module_get(sock->ops->owner)) 1299 goto out_module_busy; 1300 1301 /* 1302 * Now that we're done with the ->create function, the [loadable] 1303 * module can have its refcnt decremented 1304 */ 1305 module_put(pf->owner); 1306 err = security_socket_post_create(sock, family, type, protocol, kern); 1307 if (err) 1308 goto out_sock_release; 1309 *res = sock; 1310 1311 /* ++SSD_RIL: Garbage_Filter_UDP */ 1312 #ifdef CONFIG_ARCH_MSM8960 1313 if (sock->sk->sk_protocol == IPPROTO_UDP) 1314 add_or_remove_port(sock->sk, 1); 1315 #endif 1316 /* --SSD_RIL: Garbage_Filter_UDP */ 1317 1318 return 0; 1319 1320out_module_busy: 1321 err = -EAFNOSUPPORT; 1322out_module_put: 1323 sock->ops = NULL; 1324 module_put(pf->owner); 1325out_sock_release: 1326 sock_release(sock); 1327 return err; 1328 1329out_release: 1330 rcu_read_unlock(); 1331 goto out_sock_release; 1332} 1333EXPORT_SYMBOL(__sock_create); 1334 1335int sock_create(int family, int type, int protocol, struct socket **res) 1336{ 1337 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1338} 1339EXPORT_SYMBOL(sock_create); 1340 1341int sock_create_kern(int family, int type, int protocol, struct socket **res) 1342{ 1343 return __sock_create(&init_net, family, type, protocol, res, 1); 1344} 1345EXPORT_SYMBOL(sock_create_kern); 1346 1347SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1348{ 1349 int retval; 1350 struct socket *sock; 1351 int flags; 1352 1353 /* Check the SOCK_* constants for consistency. */ 1354 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 1355 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); 1356 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 1357 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 1358 1359 flags = type & ~SOCK_TYPE_MASK; 1360 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1361 return -EINVAL; 1362 type &= SOCK_TYPE_MASK; 1363 1364 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1365 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1366 1367 retval = sock_create(family, type, protocol, &sock); 1368 if (retval < 0) 1369 goto out; 1370 1371 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 1372 if (retval < 0) 1373 goto out_release; 1374 1375out: 1376 /* It may be already another descriptor 8) Not kernel problem. */ 1377 return retval; 1378 1379out_release: 1380 sock_release(sock); 1381 return retval; 1382} 1383 1384/* 1385 * Create a pair of connected sockets. 1386 */ 1387 1388SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, 1389 int __user *, usockvec) 1390{ 1391 struct socket *sock1, *sock2; 1392 int fd1, fd2, err; 1393 struct file *newfile1, *newfile2; 1394 int flags; 1395 1396 flags = type & ~SOCK_TYPE_MASK; 1397 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1398 return -EINVAL; 1399 type &= SOCK_TYPE_MASK; 1400 1401 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1402 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1403 1404 /* 1405 * Obtain the first socket and check if the underlying protocol 1406 * supports the socketpair call. 1407 */ 1408 1409 err = sock_create(family, type, protocol, &sock1); 1410 if (err < 0) 1411 goto out; 1412 1413 err = sock_create(family, type, protocol, &sock2); 1414 if (err < 0) 1415 goto out_release_1; 1416 1417 err = sock1->ops->socketpair(sock1, sock2); 1418 if (err < 0) 1419 goto out_release_both; 1420 1421 fd1 = sock_alloc_file(sock1, &newfile1, flags); 1422 if (unlikely(fd1 < 0)) { 1423 err = fd1; 1424 goto out_release_both; 1425 } 1426 1427 fd2 = sock_alloc_file(sock2, &newfile2, flags); 1428 if (unlikely(fd2 < 0)) { 1429 err = fd2; 1430 fput(newfile1); 1431 put_unused_fd(fd1); 1432 sock_release(sock2); 1433 goto out; 1434 } 1435 1436 audit_fd_pair(fd1, fd2); 1437 fd_install(fd1, newfile1); 1438 fd_install(fd2, newfile2); 1439 /* fd1 and fd2 may be already another descriptors. 1440 * Not kernel problem. 1441 */ 1442 1443 err = put_user(fd1, &usockvec[0]); 1444 if (!err) 1445 err = put_user(fd2, &usockvec[1]); 1446 if (!err) 1447 return 0; 1448 1449 sys_close(fd2); 1450 sys_close(fd1); 1451 return err; 1452 1453out_release_both: 1454 sock_release(sock2); 1455out_release_1: 1456 sock_release(sock1); 1457out: 1458 return err; 1459} 1460 1461/* 1462 * Bind a name to a socket. Nothing much to do here since it's 1463 * the protocol's responsibility to handle the local address. 1464 * 1465 * We move the socket address to kernel space before we call 1466 * the protocol layer (having also checked the address is ok). 1467 */ 1468 1469SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) 1470{ 1471 struct socket *sock; 1472 struct sockaddr_storage address; 1473 int err, fput_needed; 1474 1475 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1476 if (sock) { 1477 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); 1478 if (err >= 0) { 1479 err = security_socket_bind(sock, 1480 (struct sockaddr *)&address, 1481 addrlen); 1482 if (!err) 1483 err = sock->ops->bind(sock, 1484 (struct sockaddr *) 1485 &address, addrlen); 1486 } 1487 fput_light(sock->file, fput_needed); 1488 } 1489 return err; 1490} 1491 1492/* 1493 * Perform a listen. Basically, we allow the protocol to do anything 1494 * necessary for a listen, and if that works, we mark the socket as 1495 * ready for listening. 1496 */ 1497 1498SYSCALL_DEFINE2(listen, int, fd, int, backlog) 1499{ 1500 struct socket *sock; 1501 int err, fput_needed; 1502 int somaxconn; 1503 1504 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1505 if (sock) { 1506 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; 1507 if ((unsigned)backlog > somaxconn) 1508 backlog = somaxconn; 1509 1510 err = security_socket_listen(sock, backlog); 1511 if (!err) 1512 err = sock->ops->listen(sock, backlog); 1513 1514 fput_light(sock->file, fput_needed); 1515 /* ++SSD_RIL: Garbage_Filter_TCP */ 1516 if (sock->sk != NULL) 1517 add_or_remove_port(sock->sk, 1); 1518 /* --SSD_RIL: Garbage_Filter_TCP */ 1519 } 1520 return err; 1521} 1522 1523/* 1524 * For accept, we attempt to create a new socket, set up the link 1525 * with the client, wake up the client, then return the new 1526 * connected fd. We collect the address of the connector in kernel 1527 * space and move it to user at the very end. This is unclean because 1528 * we open the socket then return an error. 1529 * 1530 * 1003.1g adds the ability to recvmsg() to query connection pending 1531 * status to recvmsg. We need to add that support in a way thats 1532 * clean when we restucture accept also. 1533 */ 1534 1535SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, 1536 int __user *, upeer_addrlen, int, flags) 1537{ 1538 struct socket *sock, *newsock; 1539 struct file *newfile; 1540 int err, len, newfd, fput_needed; 1541 struct sockaddr_storage address; 1542 1543 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1544 return -EINVAL; 1545 1546 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1547 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1548 1549 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1550 if (!sock) 1551 goto out; 1552 1553 err = -ENFILE; 1554 newsock = sock_alloc(); 1555 if (!newsock) 1556 goto out_put; 1557 1558 newsock->type = sock->type; 1559 newsock->ops = sock->ops; 1560 1561 /* 1562 * We don't need try_module_get here, as the listening socket (sock) 1563 * has the protocol module (sock->ops->owner) held. 1564 */ 1565 __module_get(newsock->ops->owner); 1566 1567 newfd = sock_alloc_file(newsock, &newfile, flags); 1568 if (unlikely(newfd < 0)) { 1569 err = newfd; 1570 sock_release(newsock); 1571 goto out_put; 1572 } 1573 1574 err = security_socket_accept(sock, newsock); 1575 if (err) 1576 goto out_fd; 1577 1578 err = sock->ops->accept(sock, newsock, sock->file->f_flags); 1579 if (err < 0) 1580 goto out_fd; 1581 1582 if (upeer_sockaddr) { 1583 if (newsock->ops->getname(newsock, (struct sockaddr *)&address, 1584 &len, 2) < 0) { 1585 err = -ECONNABORTED; 1586 goto out_fd; 1587 } 1588 err = move_addr_to_user((struct sockaddr *)&address, 1589 len, upeer_sockaddr, upeer_addrlen); 1590 if (err < 0) 1591 goto out_fd; 1592 } 1593 1594 /* File flags are not inherited via accept() unlike another OSes. */ 1595 1596 fd_install(newfd, newfile); 1597 err = newfd; 1598 1599out_put: 1600 fput_light(sock->file, fput_needed); 1601out: 1602 return err; 1603out_fd: 1604 fput(newfile); 1605 put_unused_fd(newfd); 1606 goto out_put; 1607} 1608 1609SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, 1610 int __user *, upeer_addrlen) 1611{ 1612 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 1613} 1614 1615/* 1616 * Attempt to connect to a socket with the server address. The address 1617 * is in user space so we verify it is OK and move it to kernel space. 1618 * 1619 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 1620 * break bindings 1621 * 1622 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 1623 * other SEQPACKET protocols that take time to connect() as it doesn't 1624 * include the -EINPROGRESS status for such sockets. 1625 */ 1626 1627SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, 1628 int, addrlen) 1629{ 1630 struct socket *sock; 1631 struct sockaddr_storage address; 1632 int err, fput_needed; 1633 1634 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1635 if (!sock) 1636 goto out; 1637 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); 1638 if (err < 0) 1639 goto out_put; 1640 1641 err = 1642 security_socket_connect(sock, (struct sockaddr *)&address, addrlen); 1643 if (err) 1644 goto out_put; 1645 1646 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, 1647 sock->file->f_flags); 1648out_put: 1649 fput_light(sock->file, fput_needed); 1650out: 1651 return err; 1652} 1653 1654/* 1655 * Get the local address ('name') of a socket object. Move the obtained 1656 * name to user space. 1657 */ 1658 1659SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, 1660 int __user *, usockaddr_len) 1661{ 1662 struct socket *sock; 1663 struct sockaddr_storage address; 1664 int len, err, fput_needed; 1665 1666 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1667 if (!sock) 1668 goto out; 1669 1670 err = security_socket_getsockname(sock); 1671 if (err) 1672 goto out_put; 1673 1674 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); 1675 if (err) 1676 goto out_put; 1677 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); 1678 1679out_put: 1680 fput_light(sock->file, fput_needed); 1681out: 1682 return err; 1683} 1684 1685/* 1686 * Get the remote address ('name') of a socket object. Move the obtained 1687 * name to user space. 1688 */ 1689 1690SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, 1691 int __user *, usockaddr_len) 1692{ 1693 struct socket *sock; 1694 struct sockaddr_storage address; 1695 int len, err, fput_needed; 1696 1697 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1698 if (sock != NULL) { 1699 err = security_socket_getpeername(sock); 1700 if (err) { 1701 fput_light(sock->file, fput_needed); 1702 return err; 1703 } 1704 1705 err = 1706 sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1707 1); 1708 if (!err) 1709 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, 1710 usockaddr_len); 1711 fput_light(sock->file, fput_needed); 1712 } 1713 return err; 1714} 1715 1716/* 1717 * Send a datagram to a given address. We move the address into kernel 1718 * space and check the user space data area is readable before invoking 1719 * the protocol. 1720 */ 1721 1722SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, 1723 unsigned, flags, struct sockaddr __user *, addr, 1724 int, addr_len) 1725{ 1726 struct socket *sock; 1727 struct sockaddr_storage address; 1728 int err; 1729 struct msghdr msg; 1730 struct iovec iov; 1731 int fput_needed; 1732 1733 if (len > INT_MAX) 1734 len = INT_MAX; 1735 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1736 if (!sock) 1737 goto out; 1738 1739 iov.iov_base = buff; 1740 iov.iov_len = len; 1741 msg.msg_name = NULL; 1742 msg.msg_iov = &iov; 1743 msg.msg_iovlen = 1; 1744 msg.msg_control = NULL; 1745 msg.msg_controllen = 0; 1746 msg.msg_namelen = 0; 1747 if (addr) { 1748 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); 1749 if (err < 0) 1750 goto out_put; 1751 msg.msg_name = (struct sockaddr *)&address; 1752 msg.msg_namelen = addr_len; 1753 } 1754 if (sock->file->f_flags & O_NONBLOCK) 1755 flags |= MSG_DONTWAIT; 1756 msg.msg_flags = flags; 1757 err = sock_sendmsg(sock, &msg, len); 1758 1759out_put: 1760 fput_light(sock->file, fput_needed); 1761out: 1762 return err; 1763} 1764 1765/* 1766 * Send a datagram down a socket. 1767 */ 1768 1769SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, 1770 unsigned, flags) 1771{ 1772 return sys_sendto(fd, buff, len, flags, NULL, 0); 1773} 1774 1775/* 1776 * Receive a frame from the socket and optionally record the address of the 1777 * sender. We verify the buffers are writable and if needed move the 1778 * sender address from kernel to user space. 1779 */ 1780 1781SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, 1782 unsigned, flags, struct sockaddr __user *, addr, 1783 int __user *, addr_len) 1784{ 1785 struct socket *sock; 1786 struct iovec iov; 1787 struct msghdr msg; 1788 struct sockaddr_storage address; 1789 int err, err2; 1790 int fput_needed; 1791 1792 if (size > INT_MAX) 1793 size = INT_MAX; 1794 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1795 if (!sock) 1796 goto out; 1797 1798 msg.msg_control = NULL; 1799 msg.msg_controllen = 0; 1800 msg.msg_iovlen = 1; 1801 msg.msg_iov = &iov; 1802 iov.iov_len = size; 1803 iov.iov_base = ubuf; 1804 msg.msg_name = (struct sockaddr *)&address; 1805 msg.msg_namelen = sizeof(address); 1806 if (sock->file->f_flags & O_NONBLOCK) 1807 flags |= MSG_DONTWAIT; 1808 err = sock_recvmsg(sock, &msg, size, flags); 1809 1810 if (err >= 0 && addr != NULL) { 1811 err2 = move_addr_to_user((struct sockaddr *)&address, 1812 msg.msg_namelen, addr, addr_len); 1813 if (err2 < 0) 1814 err = err2; 1815 } 1816 1817 fput_light(sock->file, fput_needed); 1818out: 1819 return err; 1820} 1821 1822/* 1823 * Receive a datagram from a socket. 1824 */ 1825 1826asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, 1827 unsigned flags) 1828{ 1829 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 1830} 1831 1832/* 1833 * Set a socket option. Because we don't know the option lengths we have 1834 * to pass the user mode parameter for the protocols to sort out. 1835 */ 1836 1837SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, 1838 char __user *, optval, int, optlen) 1839{ 1840 int err, fput_needed; 1841 struct socket *sock; 1842 1843 if (optlen < 0) 1844 return -EINVAL; 1845 1846 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1847 if (sock != NULL) { 1848 err = security_socket_setsockopt(sock, level, optname); 1849 if (err) 1850 goto out_put; 1851 1852 if (level == SOL_SOCKET) 1853 err = 1854 sock_setsockopt(sock, level, optname, optval, 1855 optlen); 1856 else 1857 err = 1858 sock->ops->setsockopt(sock, level, optname, optval, 1859 optlen); 1860out_put: 1861 fput_light(sock->file, fput_needed); 1862 } 1863 return err; 1864} 1865 1866/* 1867 * Get a socket option. Because we don't know the option lengths we have 1868 * to pass a user mode parameter for the protocols to sort out. 1869 */ 1870 1871SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, 1872 char __user *, optval, int __user *, optlen) 1873{ 1874 int err, fput_needed; 1875 struct socket *sock; 1876 1877 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1878 if (sock != NULL) { 1879 err = security_socket_getsockopt(sock, level, optname); 1880 if (err) 1881 goto out_put; 1882 1883 if (level == SOL_SOCKET) 1884 err = 1885 sock_getsockopt(sock, level, optname, optval, 1886 optlen); 1887 else 1888 err = 1889 sock->ops->getsockopt(sock, level, optname, optval, 1890 optlen); 1891out_put: 1892 fput_light(sock->file, fput_needed); 1893 } 1894 return err; 1895} 1896 1897/* 1898 * Shutdown a socket. 1899 */ 1900 1901SYSCALL_DEFINE2(shutdown, int, fd, int, how) 1902{ 1903 int err, fput_needed; 1904 struct socket *sock; 1905 1906 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1907 if (sock != NULL) { 1908 err = security_socket_shutdown(sock, how); 1909 if (!err) 1910 err = sock->ops->shutdown(sock, how); 1911 fput_light(sock->file, fput_needed); 1912 } 1913 return err; 1914} 1915 1916/* A couple of helpful macros for getting the address of the 32/64 bit 1917 * fields which are the same type (int / unsigned) on our platforms. 1918 */ 1919#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 1920#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1921#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1922 1923struct used_address { 1924 struct sockaddr_storage name; 1925 unsigned int name_len; 1926}; 1927 1928static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, 1929 struct msghdr *msg_sys, unsigned flags, 1930 struct used_address *used_address) 1931{ 1932 struct compat_msghdr __user *msg_compat = 1933 (struct compat_msghdr __user *)msg; 1934 struct sockaddr_storage address; 1935 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1936 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1937 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1938 /* 20 is size of ipv6_pktinfo */ 1939 unsigned char *ctl_buf = ctl; 1940 int err, ctl_len, iov_size, total_len; 1941 1942 err = -EFAULT; 1943 if (MSG_CMSG_COMPAT & flags) { 1944 if (get_compat_msghdr(msg_sys, msg_compat)) 1945 return -EFAULT; 1946 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) 1947 return -EFAULT; 1948 1949 /* do not move before msg_sys is valid */ 1950 err = -EMSGSIZE; 1951 if (msg_sys->msg_iovlen > UIO_MAXIOV) 1952 goto out; 1953 1954 /* Check whether to allocate the iovec area */ 1955 err = -ENOMEM; 1956 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); 1957 if (msg_sys->msg_iovlen > UIO_FASTIOV) { 1958 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1959 if (!iov) 1960 goto out; 1961 } 1962 1963 /* This will also move the address data into kernel space */ 1964 if (MSG_CMSG_COMPAT & flags) { 1965 err = verify_compat_iovec(msg_sys, iov, 1966 (struct sockaddr *)&address, 1967 VERIFY_READ); 1968 } else 1969 err = verify_iovec(msg_sys, iov, 1970 (struct sockaddr *)&address, 1971 VERIFY_READ); 1972 if (err < 0) 1973 goto out_freeiov; 1974 total_len = err; 1975 1976 err = -ENOBUFS; 1977 1978 if (msg_sys->msg_controllen > INT_MAX) 1979 goto out_freeiov; 1980 ctl_len = msg_sys->msg_controllen; 1981 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1982 err = 1983 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, 1984 sizeof(ctl)); 1985 if (err) 1986 goto out_freeiov; 1987 ctl_buf = msg_sys->msg_control; 1988 ctl_len = msg_sys->msg_controllen; 1989 } else if (ctl_len) { 1990 if (ctl_len > sizeof(ctl)) { 1991 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1992 if (ctl_buf == NULL) 1993 goto out_freeiov; 1994 } 1995 err = -EFAULT; 1996 /* 1997 * Careful! Before this, msg_sys->msg_control contains a user pointer. 1998 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1999 * checking falls down on this. 2000 */ 2001 if (copy_from_user(ctl_buf, 2002 (void __user __force *)msg_sys->msg_control, 2003 ctl_len)) 2004 goto out_freectl; 2005 msg_sys->msg_control = ctl_buf; 2006 } 2007 msg_sys->msg_flags = flags; 2008 2009 if (sock->file->f_flags & O_NONBLOCK) 2010 msg_sys->msg_flags |= MSG_DONTWAIT; 2011 /* 2012 * If this is sendmmsg() and current destination address…
Large files files are truncated, but you can click here to view the full file