/net/socket.c
C | 3404 lines | 2442 code | 490 blank | 472 comment | 386 complexity | 0d0a4e9afa5e2a1f522d20c2d495f48e MD5 | raw file
1/* 2 * NET An implementation of the SOCKET network access protocol. 3 * 4 * Version: @(#)socket.c 1.1.93 18/02/95 5 * 6 * Authors: Orest Zborowski, <obz@Kodak.COM> 7 * Ross Biro 8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * 10 * Fixes: 11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in 12 * shutdown() 13 * Alan Cox : verify_area() fixes 14 * Alan Cox : Removed DDI 15 * Jonathan Kamens : SOCK_DGRAM reconnect bug 16 * Alan Cox : Moved a load of checks to the very 17 * top level. 18 * Alan Cox : Move address structures to/from user 19 * mode above the protocol layers. 20 * Rob Janssen : Allow 0 length sends. 21 * Alan Cox : Asynchronous I/O support (cribbed from the 22 * tty drivers). 23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 24 * Jeff Uphoff : Made max number of sockets command-line 25 * configurable. 26 * Matti Aarnio : Made the number of sockets dynamic, 27 * to be allocated when needed, and mr. 28 * Uphoff's max is used as max to be 29 * allowed to allocate. 30 * Linus : Argh. removed all the socket allocation 31 * altogether: it's in the inode now. 32 * Alan Cox : Made sock_alloc()/sock_release() public 33 * for NetROM and future kernel nfsd type 34 * stuff. 35 * Alan Cox : sendmsg/recvmsg basics. 36 * Tom Dyas : Export net symbols. 37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 38 * Alan Cox : Added thread locking to sys_* calls 39 * for sockets. May have errors at the 40 * moment. 41 * Kevin Buhr : Fixed the dumb errors in the above. 42 * Andi Kleen : Some small cleanups, optimizations, 43 * and fixed a copy_from_user() bug. 44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 45 * Tigran Aivazian : Made listen(2) backlog sanity checks 46 * protocol-independent 47 * 48 * 49 * This program is free software; you can redistribute it and/or 50 * modify it under the terms of the GNU General Public License 51 * as published by the Free Software Foundation; either version 52 * 2 of the License, or (at your option) any later version. 53 * 54 * 55 * This module is effectively the top level interface to the BSD socket 56 * paradigm. 57 * 58 * Based upon Swansea University Computer Society NET3.039 59 */ 60 61#include <linux/mm.h> 62#include <linux/socket.h> 63#include <linux/file.h> 64#include <linux/net.h> 65#include <linux/interrupt.h> 66#include <linux/thread_info.h> 67#include <linux/rcupdate.h> 68#include <linux/netdevice.h> 69#include <linux/proc_fs.h> 70#include <linux/seq_file.h> 71#include <linux/mutex.h> 72#include <linux/wanrouter.h> 73#include <linux/if_bridge.h> 74#include <linux/if_frad.h> 75#include <linux/if_vlan.h> 76#include <linux/init.h> 77#include <linux/poll.h> 78#include <linux/cache.h> 79#include <linux/module.h> 80#include <linux/highmem.h> 81#include <linux/mount.h> 82#include <linux/security.h> 83#include <linux/syscalls.h> 84#include <linux/compat.h> 85#include <linux/kmod.h> 86#include <linux/audit.h> 87#include <linux/wireless.h> 88#include <linux/nsproxy.h> 89#include <linux/magic.h> 90#include <linux/slab.h> 91 92#include <asm/uaccess.h> 93#include <asm/unistd.h> 94 95#include <net/compat.h> 96#include <net/wext.h> 97#include <net/cls_cgroup.h> 98 99#include <net/sock.h> 100#include <linux/netfilter.h> 101 102#include <linux/if_tun.h> 103#include <linux/ipv6_route.h> 104#include <linux/route.h> 105#include <linux/sockios.h> 106#include <linux/atalk.h> 107 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 110 unsigned long nr_segs, loff_t pos); 111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 112 unsigned long nr_segs, loff_t pos); 113static int sock_mmap(struct file *file, struct vm_area_struct *vma); 114 115static int sock_close(struct inode *inode, struct file *file); 116static unsigned int sock_poll(struct file *file, 117 struct poll_table_struct *wait); 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 119#ifdef CONFIG_COMPAT 120static long compat_sock_ioctl(struct file *file, 121 unsigned int cmd, unsigned long arg); 122#endif 123static int sock_fasync(int fd, struct file *filp, int on); 124static ssize_t sock_sendpage(struct file *file, struct page *page, 125 int offset, size_t size, loff_t *ppos, int more); 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 127 struct pipe_inode_info *pipe, size_t len, 128 unsigned int flags); 129 130/* 131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 132 * in the operation structures but are done directly via the socketcall() multiplexor. 133 */ 134 135static const struct file_operations socket_file_ops = { 136 .owner = THIS_MODULE, 137 .llseek = no_llseek, 138 .aio_read = sock_aio_read, 139 .aio_write = sock_aio_write, 140 .poll = sock_poll, 141 .unlocked_ioctl = sock_ioctl, 142#ifdef CONFIG_COMPAT 143 .compat_ioctl = compat_sock_ioctl, 144#endif 145 .mmap = sock_mmap, 146 .open = sock_no_open, /* special open code to disallow open via /proc */ 147 .release = sock_close, 148 .fasync = sock_fasync, 149 .sendpage = sock_sendpage, 150 .splice_write = generic_splice_sendpage, 151 .splice_read = sock_splice_read, 152}; 153 154/* 155 * The protocol list. Each protocol is registered in here. 156 */ 157 158static DEFINE_SPINLOCK(net_family_lock); 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; 160 161/* 162 * Statistics counters of the socket lists 163 */ 164 165static DEFINE_PER_CPU(int, sockets_in_use); 166 167/* 168 * Support routines. 169 * Move socket addresses back and forth across the kernel/user 170 * divide and look after the messy bits. 171 */ 172 173/** 174 * move_addr_to_kernel - copy a socket address into kernel space 175 * @uaddr: Address in user space 176 * @kaddr: Address in kernel space 177 * @ulen: Length in user space 178 * 179 * The address is copied into kernel space. If the provided address is 180 * too long an error code of -EINVAL is returned. If the copy gives 181 * invalid addresses -EFAULT is returned. On a success 0 is returned. 182 */ 183 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) 185{ 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) 187 return -EINVAL; 188 if (ulen == 0) 189 return 0; 190 if (copy_from_user(kaddr, uaddr, ulen)) 191 return -EFAULT; 192 return audit_sockaddr(ulen, kaddr); 193} 194 195/** 196 * move_addr_to_user - copy an address to user space 197 * @kaddr: kernel space address 198 * @klen: length of address in kernel 199 * @uaddr: user space address 200 * @ulen: pointer to user length field 201 * 202 * The value pointed to by ulen on entry is the buffer length available. 203 * This is overwritten with the buffer space used. -EINVAL is returned 204 * if an overlong buffer is specified or a negative buffer size. -EFAULT 205 * is returned if either the buffer or the length field are not 206 * accessible. 207 * After copying the data up to the limit the user specifies, the true 208 * length of the data is written over the length limit the user 209 * specified. Zero is returned for a success. 210 */ 211 212static int move_addr_to_user(struct sockaddr *kaddr, int klen, 213 void __user *uaddr, int __user *ulen) 214{ 215 int err; 216 int len; 217 218 err = get_user(len, ulen); 219 if (err) 220 return err; 221 if (len > klen) 222 len = klen; 223 if (len < 0 || len > sizeof(struct sockaddr_storage)) 224 return -EINVAL; 225 if (len) { 226 if (audit_sockaddr(klen, kaddr)) 227 return -ENOMEM; 228 if (copy_to_user(uaddr, kaddr, len)) 229 return -EFAULT; 230 } 231 /* 232 * "fromlen shall refer to the value before truncation.." 233 * 1003.1g 234 */ 235 return __put_user(klen, ulen); 236} 237 238static struct kmem_cache *sock_inode_cachep __read_mostly; 239 240static struct inode *sock_alloc_inode(struct super_block *sb) 241{ 242 struct socket_alloc *ei; 243 struct socket_wq *wq; 244 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 246 if (!ei) 247 return NULL; 248 wq = kmalloc(sizeof(*wq), GFP_KERNEL); 249 if (!wq) { 250 kmem_cache_free(sock_inode_cachep, ei); 251 return NULL; 252 } 253 init_waitqueue_head(&wq->wait); 254 wq->fasync_list = NULL; 255 RCU_INIT_POINTER(ei->socket.wq, wq); 256 257 ei->socket.state = SS_UNCONNECTED; 258 ei->socket.flags = 0; 259 ei->socket.ops = NULL; 260 ei->socket.sk = NULL; 261 ei->socket.file = NULL; 262 263 return &ei->vfs_inode; 264} 265 266static void sock_destroy_inode(struct inode *inode) 267{ 268 struct socket_alloc *ei; 269 struct socket_wq *wq; 270 271 ei = container_of(inode, struct socket_alloc, vfs_inode); 272 wq = rcu_dereference_protected(ei->socket.wq, 1); 273 kfree_rcu(wq, rcu); 274 kmem_cache_free(sock_inode_cachep, ei); 275} 276 277static void init_once(void *foo) 278{ 279 struct socket_alloc *ei = (struct socket_alloc *)foo; 280 281 inode_init_once(&ei->vfs_inode); 282} 283 284static int init_inodecache(void) 285{ 286 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 287 sizeof(struct socket_alloc), 288 0, 289 (SLAB_HWCACHE_ALIGN | 290 SLAB_RECLAIM_ACCOUNT | 291 SLAB_MEM_SPREAD), 292 init_once); 293 if (sock_inode_cachep == NULL) 294 return -ENOMEM; 295 return 0; 296} 297 298static const struct super_operations sockfs_ops = { 299 .alloc_inode = sock_alloc_inode, 300 .destroy_inode = sock_destroy_inode, 301 .statfs = simple_statfs, 302}; 303 304/* 305 * sockfs_dname() is called from d_path(). 306 */ 307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) 308{ 309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", 310 dentry->d_inode->i_ino); 311} 312 313static const struct dentry_operations sockfs_dentry_operations = { 314 .d_dname = sockfs_dname, 315}; 316 317static struct dentry *sockfs_mount(struct file_system_type *fs_type, 318 int flags, const char *dev_name, void *data) 319{ 320 return mount_pseudo(fs_type, "socket:", &sockfs_ops, 321 &sockfs_dentry_operations, SOCKFS_MAGIC); 322} 323 324static struct vfsmount *sock_mnt __read_mostly; 325 326static struct file_system_type sock_fs_type = { 327 .name = "sockfs", 328 .mount = sockfs_mount, 329 .kill_sb = kill_anon_super, 330}; 331 332/* 333 * Obtains the first available file descriptor and sets it up for use. 334 * 335 * These functions create file structures and maps them to fd space 336 * of the current process. On success it returns file descriptor 337 * and file struct implicitly stored in sock->file. 338 * Note that another thread may close file descriptor before we return 339 * from this function. We use the fact that now we do not refer 340 * to socket after mapping. If one day we will need it, this 341 * function will increment ref. count on file by 1. 342 * 343 * In any case returned fd MAY BE not valid! 344 * This race condition is unavoidable 345 * with shared fd spaces, we cannot solve it inside kernel, 346 * but we take care of internal coherence yet. 347 */ 348 349static int sock_alloc_file(struct socket *sock, struct file **f, int flags) 350{ 351 struct qstr name = { .name = "" }; 352 struct path path; 353 struct file *file; 354 int fd; 355 356 fd = get_unused_fd_flags(flags); 357 if (unlikely(fd < 0)) 358 return fd; 359 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); 361 if (unlikely(!path.dentry)) { 362 put_unused_fd(fd); 363 return -ENOMEM; 364 } 365 path.mnt = mntget(sock_mnt); 366 367 d_instantiate(path.dentry, SOCK_INODE(sock)); 368 SOCK_INODE(sock)->i_fop = &socket_file_ops; 369 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, 371 &socket_file_ops); 372 if (unlikely(!file)) { 373 /* drop dentry, keep inode */ 374 ihold(path.dentry->d_inode); 375 path_put(&path); 376 put_unused_fd(fd); 377 return -ENFILE; 378 } 379 380 sock->file = file; 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 382 file->f_pos = 0; 383 file->private_data = sock; 384 385 *f = file; 386 return fd; 387} 388 389int sock_map_fd(struct socket *sock, int flags) 390{ 391 struct file *newfile; 392 int fd = sock_alloc_file(sock, &newfile, flags); 393 394 if (likely(fd >= 0)) 395 fd_install(fd, newfile); 396 397 return fd; 398} 399EXPORT_SYMBOL(sock_map_fd); 400 401static struct socket *sock_from_file(struct file *file, int *err) 402{ 403 if (file->f_op == &socket_file_ops) 404 return file->private_data; /* set in sock_map_fd */ 405 406 *err = -ENOTSOCK; 407 return NULL; 408} 409 410/** 411 * sockfd_lookup - Go from a file number to its socket slot 412 * @fd: file handle 413 * @err: pointer to an error code return 414 * 415 * The file handle passed in is locked and the socket it is bound 416 * too is returned. If an error occurs the err pointer is overwritten 417 * with a negative errno code and NULL is returned. The function checks 418 * for both invalid handles and passing a handle which is not a socket. 419 * 420 * On a success the socket object pointer is returned. 421 */ 422 423struct socket *sockfd_lookup(int fd, int *err) 424{ 425 struct file *file; 426 struct socket *sock; 427 428 file = fget(fd); 429 if (!file) { 430 *err = -EBADF; 431 return NULL; 432 } 433 434 sock = sock_from_file(file, err); 435 if (!sock) 436 fput(file); 437 return sock; 438} 439EXPORT_SYMBOL(sockfd_lookup); 440 441static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 442{ 443 struct file *file; 444 struct socket *sock; 445 446 *err = -EBADF; 447 file = fget_light(fd, fput_needed); 448 if (file) { 449 sock = sock_from_file(file, err); 450 if (sock) 451 return sock; 452 fput_light(file, *fput_needed); 453 } 454 return NULL; 455} 456 457/** 458 * sock_alloc - allocate a socket 459 * 460 * Allocate a new inode and socket object. The two are bound together 461 * and initialised. The socket is then returned. If we are out of inodes 462 * NULL is returned. 463 */ 464 465static struct socket *sock_alloc(void) 466{ 467 struct inode *inode; 468 struct socket *sock; 469 470 inode = new_inode(sock_mnt->mnt_sb); 471 if (!inode) 472 return NULL; 473 474 sock = SOCKET_I(inode); 475 476 kmemcheck_annotate_bitfield(sock, type); 477 inode->i_ino = get_next_ino(); 478 inode->i_mode = S_IFSOCK | S_IRWXUGO; 479 inode->i_uid = current_fsuid(); 480 inode->i_gid = current_fsgid(); 481 482 percpu_add(sockets_in_use, 1); 483 return sock; 484} 485 486/* 487 * In theory you can't get an open on this inode, but /proc provides 488 * a back door. Remember to keep it shut otherwise you'll let the 489 * creepy crawlies in. 490 */ 491 492static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 493{ 494 return -ENXIO; 495} 496 497const struct file_operations bad_sock_fops = { 498 .owner = THIS_MODULE, 499 .open = sock_no_open, 500 .llseek = noop_llseek, 501}; 502 503/** 504 * sock_release - close a socket 505 * @sock: socket to close 506 * 507 * The socket is released from the protocol stack if it has a release 508 * callback, and the inode is then released if the socket is bound to 509 * an inode not a file. 510 */ 511 512int add_or_remove_port(struct sock *sk, int add_or_remove); /* SSD_RIL: Garbage_Filter_TCP */ 513 514void sock_release(struct socket *sock) 515{ 516 /* ++SSD_RIL: Garbage_Filter_TCP */ 517 if (sock->sk != NULL) 518 add_or_remove_port(sock->sk, 0); 519 /* --SSD_RIL: Garbage_Filter_TCP */ 520 521 if (sock->ops) { 522 struct module *owner = sock->ops->owner; 523 524 sock->ops->release(sock); 525 sock->ops = NULL; 526 module_put(owner); 527 } 528 529 if (rcu_dereference_protected(sock->wq, 1)->fasync_list) 530 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 531 532 percpu_sub(sockets_in_use, 1); 533 if (!sock->file) { 534 iput(SOCK_INODE(sock)); 535 return; 536 } 537 sock->file = NULL; 538} 539EXPORT_SYMBOL(sock_release); 540 541int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) 542{ 543 *tx_flags = 0; 544 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 545 *tx_flags |= SKBTX_HW_TSTAMP; 546 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 547 *tx_flags |= SKBTX_SW_TSTAMP; 548 return 0; 549} 550EXPORT_SYMBOL(sock_tx_timestamp); 551 552static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, 553 struct msghdr *msg, size_t size) 554{ 555 struct sock_iocb *si = kiocb_to_siocb(iocb); 556 557 sock_update_classid(sock->sk); 558 559 si->sock = sock; 560 si->scm = NULL; 561 si->msg = msg; 562 si->size = size; 563 564 return sock->ops->sendmsg(iocb, sock, msg, size); 565} 566 567static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 568 struct msghdr *msg, size_t size) 569{ 570 int err = security_socket_sendmsg(sock, msg, size); 571 572 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); 573} 574 575int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 576{ 577 struct kiocb iocb; 578 struct sock_iocb siocb; 579 int ret; 580 581 init_sync_kiocb(&iocb, NULL); 582 iocb.private = &siocb; 583 ret = __sock_sendmsg(&iocb, sock, msg, size); 584 if (-EIOCBQUEUED == ret) 585 ret = wait_on_sync_kiocb(&iocb); 586 return ret; 587} 588EXPORT_SYMBOL(sock_sendmsg); 589 590int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) 591{ 592 struct kiocb iocb; 593 struct sock_iocb siocb; 594 int ret; 595 596 init_sync_kiocb(&iocb, NULL); 597 iocb.private = &siocb; 598 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size); 599 if (-EIOCBQUEUED == ret) 600 ret = wait_on_sync_kiocb(&iocb); 601 return ret; 602} 603 604int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 605 struct kvec *vec, size_t num, size_t size) 606{ 607 mm_segment_t oldfs = get_fs(); 608 int result; 609 610 set_fs(KERNEL_DS); 611 /* 612 * the following is safe, since for compiler definitions of kvec and 613 * iovec are identical, yielding the same in-core layout and alignment 614 */ 615 msg->msg_iov = (struct iovec *)vec; 616 msg->msg_iovlen = num; 617 result = sock_sendmsg(sock, msg, size); 618 set_fs(oldfs); 619 return result; 620} 621EXPORT_SYMBOL(kernel_sendmsg); 622 623static int ktime2ts(ktime_t kt, struct timespec *ts) 624{ 625 if (kt.tv64) { 626 *ts = ktime_to_timespec(kt); 627 return 1; 628 } else { 629 return 0; 630 } 631} 632 633/* 634 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 635 */ 636void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 637 struct sk_buff *skb) 638{ 639 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); 640 struct timespec ts[3]; 641 int empty = 1; 642 struct skb_shared_hwtstamps *shhwtstamps = 643 skb_hwtstamps(skb); 644 645 /* Race occurred between timestamp enabling and packet 646 receiving. Fill in the current time for now. */ 647 if (need_software_tstamp && skb->tstamp.tv64 == 0) 648 __net_timestamp(skb); 649 650 if (need_software_tstamp) { 651 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 652 struct timeval tv; 653 skb_get_timestamp(skb, &tv); 654 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, 655 sizeof(tv), &tv); 656 } else { 657 skb_get_timestampns(skb, &ts[0]); 658 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, 659 sizeof(ts[0]), &ts[0]); 660 } 661 } 662 663 664 memset(ts, 0, sizeof(ts)); 665 if (skb->tstamp.tv64 && 666 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { 667 skb_get_timestampns(skb, ts + 0); 668 empty = 0; 669 } 670 if (shhwtstamps) { 671 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && 672 ktime2ts(shhwtstamps->syststamp, ts + 1)) 673 empty = 0; 674 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && 675 ktime2ts(shhwtstamps->hwtstamp, ts + 2)) 676 empty = 0; 677 } 678 if (!empty) 679 put_cmsg(msg, SOL_SOCKET, 680 SCM_TIMESTAMPING, sizeof(ts), &ts); 681} 682EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 683 684static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, 685 struct sk_buff *skb) 686{ 687 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) 688 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 689 sizeof(__u32), &skb->dropcount); 690} 691 692void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, 693 struct sk_buff *skb) 694{ 695 sock_recv_timestamp(msg, sk, skb); 696 sock_recv_drops(msg, sk, skb); 697} 698EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); 699 700static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, 701 struct msghdr *msg, size_t size, int flags) 702{ 703 struct sock_iocb *si = kiocb_to_siocb(iocb); 704 705 sock_update_classid(sock->sk); 706 707 si->sock = sock; 708 si->scm = NULL; 709 si->msg = msg; 710 si->size = size; 711 si->flags = flags; 712 713 return sock->ops->recvmsg(iocb, sock, msg, size, flags); 714} 715 716static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 717 struct msghdr *msg, size_t size, int flags) 718{ 719 int err = security_socket_recvmsg(sock, msg, size, flags); 720 721 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); 722} 723 724int sock_recvmsg(struct socket *sock, struct msghdr *msg, 725 size_t size, int flags) 726{ 727 struct kiocb iocb; 728 struct sock_iocb siocb; 729 int ret; 730 731 init_sync_kiocb(&iocb, NULL); 732 iocb.private = &siocb; 733 ret = __sock_recvmsg(&iocb, sock, msg, size, flags); 734 if (-EIOCBQUEUED == ret) 735 ret = wait_on_sync_kiocb(&iocb); 736 return ret; 737} 738EXPORT_SYMBOL(sock_recvmsg); 739 740static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, 741 size_t size, int flags) 742{ 743 struct kiocb iocb; 744 struct sock_iocb siocb; 745 int ret; 746 747 init_sync_kiocb(&iocb, NULL); 748 iocb.private = &siocb; 749 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); 750 if (-EIOCBQUEUED == ret) 751 ret = wait_on_sync_kiocb(&iocb); 752 return ret; 753} 754 755/** 756 * kernel_recvmsg - Receive a message from a socket (kernel space) 757 * @sock: The socket to receive the message from 758 * @msg: Received message 759 * @vec: Input s/g array for message data 760 * @num: Size of input s/g array 761 * @size: Number of bytes to read 762 * @flags: Message flags (MSG_DONTWAIT, etc...) 763 * 764 * On return the msg structure contains the scatter/gather array passed in the 765 * vec argument. The array is modified so that it consists of the unfilled 766 * portion of the original array. 767 * 768 * The returned value is the total number of bytes received, or an error. 769 */ 770int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 771 struct kvec *vec, size_t num, size_t size, int flags) 772{ 773 mm_segment_t oldfs = get_fs(); 774 int result; 775 776 set_fs(KERNEL_DS); 777 /* 778 * the following is safe, since for compiler definitions of kvec and 779 * iovec are identical, yielding the same in-core layout and alignment 780 */ 781 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; 782 result = sock_recvmsg(sock, msg, size, flags); 783 set_fs(oldfs); 784 return result; 785} 786EXPORT_SYMBOL(kernel_recvmsg); 787 788static void sock_aio_dtor(struct kiocb *iocb) 789{ 790 kfree(iocb->private); 791} 792 793static ssize_t sock_sendpage(struct file *file, struct page *page, 794 int offset, size_t size, loff_t *ppos, int more) 795{ 796 struct socket *sock; 797 int flags; 798 799 sock = file->private_data; 800 801 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 802 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ 803 flags |= more; 804 805 return kernel_sendpage(sock, page, offset, size, flags); 806} 807 808static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 809 struct pipe_inode_info *pipe, size_t len, 810 unsigned int flags) 811{ 812 struct socket *sock = file->private_data; 813 814 if (unlikely(!sock->ops->splice_read)) 815 return -EINVAL; 816 817 sock_update_classid(sock->sk); 818 819 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 820} 821 822static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 823 struct sock_iocb *siocb) 824{ 825 if (!is_sync_kiocb(iocb)) { 826 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 827 if (!siocb) 828 return NULL; 829 iocb->ki_dtor = sock_aio_dtor; 830 } 831 832 siocb->kiocb = iocb; 833 iocb->private = siocb; 834 return siocb; 835} 836 837static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 838 struct file *file, const struct iovec *iov, 839 unsigned long nr_segs) 840{ 841 struct socket *sock = file->private_data; 842 size_t size = 0; 843 int i; 844 845 for (i = 0; i < nr_segs; i++) 846 size += iov[i].iov_len; 847 848 msg->msg_name = NULL; 849 msg->msg_namelen = 0; 850 msg->msg_control = NULL; 851 msg->msg_controllen = 0; 852 msg->msg_iov = (struct iovec *)iov; 853 msg->msg_iovlen = nr_segs; 854 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 855 856 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); 857} 858 859static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 860 unsigned long nr_segs, loff_t pos) 861{ 862 struct sock_iocb siocb, *x; 863 864 if (pos != 0) 865 return -ESPIPE; 866 867 if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 868 return 0; 869 870 871 x = alloc_sock_iocb(iocb, &siocb); 872 if (!x) 873 return -ENOMEM; 874 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 875} 876 877static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 878 struct file *file, const struct iovec *iov, 879 unsigned long nr_segs) 880{ 881 struct socket *sock = file->private_data; 882 size_t size = 0; 883 int i; 884 885 for (i = 0; i < nr_segs; i++) 886 size += iov[i].iov_len; 887 888 msg->msg_name = NULL; 889 msg->msg_namelen = 0; 890 msg->msg_control = NULL; 891 msg->msg_controllen = 0; 892 msg->msg_iov = (struct iovec *)iov; 893 msg->msg_iovlen = nr_segs; 894 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 895 if (sock->type == SOCK_SEQPACKET) 896 msg->msg_flags |= MSG_EOR; 897 898 return __sock_sendmsg(iocb, sock, msg, size); 899} 900 901static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 902 unsigned long nr_segs, loff_t pos) 903{ 904 struct sock_iocb siocb, *x; 905 906 if (pos != 0) 907 return -ESPIPE; 908 909 x = alloc_sock_iocb(iocb, &siocb); 910 if (!x) 911 return -ENOMEM; 912 913 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 914} 915 916/* 917 * Atomic setting of ioctl hooks to avoid race 918 * with module unload. 919 */ 920 921static DEFINE_MUTEX(br_ioctl_mutex); 922static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); 923 924void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 925{ 926 mutex_lock(&br_ioctl_mutex); 927 br_ioctl_hook = hook; 928 mutex_unlock(&br_ioctl_mutex); 929} 930EXPORT_SYMBOL(brioctl_set); 931 932static DEFINE_MUTEX(vlan_ioctl_mutex); 933static int (*vlan_ioctl_hook) (struct net *, void __user *arg); 934 935void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) 936{ 937 mutex_lock(&vlan_ioctl_mutex); 938 vlan_ioctl_hook = hook; 939 mutex_unlock(&vlan_ioctl_mutex); 940} 941EXPORT_SYMBOL(vlan_ioctl_set); 942 943static DEFINE_MUTEX(dlci_ioctl_mutex); 944static int (*dlci_ioctl_hook) (unsigned int, void __user *); 945 946void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) 947{ 948 mutex_lock(&dlci_ioctl_mutex); 949 dlci_ioctl_hook = hook; 950 mutex_unlock(&dlci_ioctl_mutex); 951} 952EXPORT_SYMBOL(dlci_ioctl_set); 953 954static long sock_do_ioctl(struct net *net, struct socket *sock, 955 unsigned int cmd, unsigned long arg) 956{ 957 int err; 958 void __user *argp = (void __user *)arg; 959 960 err = sock->ops->ioctl(sock, cmd, arg); 961 962 /* 963 * If this ioctl is unknown try to hand it down 964 * to the NIC driver. 965 */ 966 if (err == -ENOIOCTLCMD) 967 err = dev_ioctl(net, cmd, argp); 968 969 return err; 970} 971 972/* 973 * With an ioctl, arg may well be a user mode pointer, but we don't know 974 * what to do with it - that's up to the protocol still. 975 */ 976 977static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 978{ 979 struct socket *sock; 980 struct sock *sk; 981 void __user *argp = (void __user *)arg; 982 int pid, err; 983 struct net *net; 984 985 sock = file->private_data; 986 sk = sock->sk; 987 net = sock_net(sk); 988 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { 989 err = dev_ioctl(net, cmd, argp); 990 } else 991#ifdef CONFIG_WEXT_CORE 992 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 993 err = dev_ioctl(net, cmd, argp); 994 } else 995#endif 996 switch (cmd) { 997 case FIOSETOWN: 998 case SIOCSPGRP: 999 err = -EFAULT; 1000 if (get_user(pid, (int __user *)argp)) 1001 break; 1002 err = f_setown(sock->file, pid, 1); 1003 break; 1004 case FIOGETOWN: 1005 case SIOCGPGRP: 1006 err = put_user(f_getown(sock->file), 1007 (int __user *)argp); 1008 break; 1009 case SIOCGIFBR: 1010 case SIOCSIFBR: 1011 case SIOCBRADDBR: 1012 case SIOCBRDELBR: 1013 err = -ENOPKG; 1014 if (!br_ioctl_hook) 1015 request_module("bridge"); 1016 1017 mutex_lock(&br_ioctl_mutex); 1018 if (br_ioctl_hook) 1019 err = br_ioctl_hook(net, cmd, argp); 1020 mutex_unlock(&br_ioctl_mutex); 1021 break; 1022 case SIOCGIFVLAN: 1023 case SIOCSIFVLAN: 1024 err = -ENOPKG; 1025 if (!vlan_ioctl_hook) 1026 request_module("8021q"); 1027 1028 mutex_lock(&vlan_ioctl_mutex); 1029 if (vlan_ioctl_hook) 1030 err = vlan_ioctl_hook(net, argp); 1031 mutex_unlock(&vlan_ioctl_mutex); 1032 break; 1033 case SIOCADDDLCI: 1034 case SIOCDELDLCI: 1035 err = -ENOPKG; 1036 if (!dlci_ioctl_hook) 1037 request_module("dlci"); 1038 1039 mutex_lock(&dlci_ioctl_mutex); 1040 if (dlci_ioctl_hook) 1041 err = dlci_ioctl_hook(cmd, argp); 1042 mutex_unlock(&dlci_ioctl_mutex); 1043 break; 1044 default: 1045 err = sock_do_ioctl(net, sock, cmd, arg); 1046 break; 1047 } 1048 return err; 1049} 1050 1051int sock_create_lite(int family, int type, int protocol, struct socket **res) 1052{ 1053 int err; 1054 struct socket *sock = NULL; 1055 1056 err = security_socket_create(family, type, protocol, 1); 1057 if (err) 1058 goto out; 1059 1060 sock = sock_alloc(); 1061 if (!sock) { 1062 err = -ENOMEM; 1063 goto out; 1064 } 1065 1066 sock->type = type; 1067 err = security_socket_post_create(sock, family, type, protocol, 1); 1068 if (err) 1069 goto out_release; 1070 1071out: 1072 *res = sock; 1073 return err; 1074out_release: 1075 sock_release(sock); 1076 sock = NULL; 1077 goto out; 1078} 1079EXPORT_SYMBOL(sock_create_lite); 1080 1081/* No kernel lock held - perfect */ 1082static unsigned int sock_poll(struct file *file, poll_table *wait) 1083{ 1084 struct socket *sock; 1085 1086 /* 1087 * We can't return errors to poll, so it's either yes or no. 1088 */ 1089 sock = file->private_data; 1090 return sock->ops->poll(file, sock, wait); 1091} 1092 1093static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1094{ 1095 struct socket *sock = file->private_data; 1096 1097 return sock->ops->mmap(file, sock, vma); 1098} 1099 1100static int sock_close(struct inode *inode, struct file *filp) 1101{ 1102 /* 1103 * It was possible the inode is NULL we were 1104 * closing an unfinished socket. 1105 */ 1106 1107 if (!inode) { 1108 printk(KERN_DEBUG "sock_close: NULL inode\n"); 1109 return 0; 1110 } 1111 sock_release(SOCKET_I(inode)); 1112 return 0; 1113} 1114 1115/* 1116 * Update the socket async list 1117 * 1118 * Fasync_list locking strategy. 1119 * 1120 * 1. fasync_list is modified only under process context socket lock 1121 * i.e. under semaphore. 1122 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1123 * or under socket lock 1124 */ 1125 1126static int sock_fasync(int fd, struct file *filp, int on) 1127{ 1128 struct socket *sock = filp->private_data; 1129 struct sock *sk = sock->sk; 1130 struct socket_wq *wq; 1131 1132 if (sk == NULL) 1133 return -EINVAL; 1134 1135 lock_sock(sk); 1136 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); 1137 fasync_helper(fd, filp, on, &wq->fasync_list); 1138 1139 if (!wq->fasync_list) 1140 sock_reset_flag(sk, SOCK_FASYNC); 1141 else 1142 sock_set_flag(sk, SOCK_FASYNC); 1143 1144 release_sock(sk); 1145 return 0; 1146} 1147 1148/* This function may be called only under socket lock or callback_lock or rcu_lock */ 1149 1150int sock_wake_async(struct socket *sock, int how, int band) 1151{ 1152 struct socket_wq *wq; 1153 1154 if (!sock) 1155 return -1; 1156 rcu_read_lock(); 1157 wq = rcu_dereference(sock->wq); 1158 if (!wq || !wq->fasync_list) { 1159 rcu_read_unlock(); 1160 return -1; 1161 } 1162 switch (how) { 1163 case SOCK_WAKE_WAITD: 1164 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1165 break; 1166 goto call_kill; 1167 case SOCK_WAKE_SPACE: 1168 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1169 break; 1170 /* fall through */ 1171 case SOCK_WAKE_IO: 1172call_kill: 1173 kill_fasync(&wq->fasync_list, SIGIO, band); 1174 break; 1175 case SOCK_WAKE_URG: 1176 kill_fasync(&wq->fasync_list, SIGURG, band); 1177 } 1178 rcu_read_unlock(); 1179 return 0; 1180} 1181EXPORT_SYMBOL(sock_wake_async); 1182 1183int __sock_create(struct net *net, int family, int type, int protocol, 1184 struct socket **res, int kern) 1185{ 1186 int err; 1187 struct socket *sock; 1188 const struct net_proto_family *pf; 1189 1190 /* 1191 * Check protocol is in range 1192 */ 1193 if (family < 0 || family >= NPROTO) 1194 return -EAFNOSUPPORT; 1195 if (type < 0 || type >= SOCK_MAX) 1196 return -EINVAL; 1197 1198 /* Compatibility. 1199 1200 This uglymoron is moved from INET layer to here to avoid 1201 deadlock in module load. 1202 */ 1203 if (family == PF_INET && type == SOCK_PACKET) { 1204 static int warned; 1205 if (!warned) { 1206 warned = 1; 1207 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", 1208 current->comm); 1209 } 1210 family = PF_PACKET; 1211 } 1212 1213 err = security_socket_create(family, type, protocol, kern); 1214 if (err) 1215 return err; 1216 1217 /* 1218 * Allocate the socket and allow the family to set things up. if 1219 * the protocol is 0, the family is instructed to select an appropriate 1220 * default. 1221 */ 1222 sock = sock_alloc(); 1223 if (!sock) { 1224 if (net_ratelimit()) 1225 printk(KERN_WARNING "socket: no more sockets\n"); 1226 return -ENFILE; /* Not exactly a match, but its the 1227 closest posix thing */ 1228 } 1229 1230 sock->type = type; 1231 1232#ifdef CONFIG_MODULES 1233 /* Attempt to load a protocol module if the find failed. 1234 * 1235 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1236 * requested real, full-featured networking support upon configuration. 1237 * Otherwise module support will break! 1238 */ 1239 if (rcu_access_pointer(net_families[family]) == NULL) 1240 request_module("net-pf-%d", family); 1241#endif 1242 1243 rcu_read_lock(); 1244 pf = rcu_dereference(net_families[family]); 1245 err = -EAFNOSUPPORT; 1246 if (!pf) 1247 goto out_release; 1248 1249 /* 1250 * We will call the ->create function, that possibly is in a loadable 1251 * module, so we have to bump that loadable module refcnt first. 1252 */ 1253 if (!try_module_get(pf->owner)) 1254 goto out_release; 1255 1256 /* Now protected by module ref count */ 1257 rcu_read_unlock(); 1258 1259 err = pf->create(net, sock, protocol, kern); 1260 if (err < 0) 1261 goto out_module_put; 1262 1263 /* 1264 * Now to bump the refcnt of the [loadable] module that owns this 1265 * socket at sock_release time we decrement its refcnt. 1266 */ 1267 if (!try_module_get(sock->ops->owner)) 1268 goto out_module_busy; 1269 1270 /* 1271 * Now that we're done with the ->create function, the [loadable] 1272 * module can have its refcnt decremented 1273 */ 1274 module_put(pf->owner); 1275 err = security_socket_post_create(sock, family, type, protocol, kern); 1276 if (err) 1277 goto out_sock_release; 1278 *res = sock; 1279 1280 return 0; 1281 1282out_module_busy: 1283 err = -EAFNOSUPPORT; 1284out_module_put: 1285 sock->ops = NULL; 1286 module_put(pf->owner); 1287out_sock_release: 1288 sock_release(sock); 1289 return err; 1290 1291out_release: 1292 rcu_read_unlock(); 1293 goto out_sock_release; 1294} 1295EXPORT_SYMBOL(__sock_create); 1296 1297int sock_create(int family, int type, int protocol, struct socket **res) 1298{ 1299 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1300} 1301EXPORT_SYMBOL(sock_create); 1302 1303int sock_create_kern(int family, int type, int protocol, struct socket **res) 1304{ 1305 return __sock_create(&init_net, family, type, protocol, res, 1); 1306} 1307EXPORT_SYMBOL(sock_create_kern); 1308 1309SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1310{ 1311 int retval; 1312 struct socket *sock; 1313 int flags; 1314 1315 /* Check the SOCK_* constants for consistency. */ 1316 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 1317 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); 1318 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 1319 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 1320 1321 flags = type & ~SOCK_TYPE_MASK; 1322 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1323 return -EINVAL; 1324 type &= SOCK_TYPE_MASK; 1325 1326 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1327 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1328 1329 retval = sock_create(family, type, protocol, &sock); 1330 if (retval < 0) 1331 goto out; 1332 1333 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 1334 if (retval < 0) 1335 goto out_release; 1336 1337out: 1338 /* It may be already another descriptor 8) Not kernel problem. */ 1339 return retval; 1340 1341out_release: 1342 sock_release(sock); 1343 return retval; 1344} 1345 1346/* 1347 * Create a pair of connected sockets. 1348 */ 1349 1350SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, 1351 int __user *, usockvec) 1352{ 1353 struct socket *sock1, *sock2; 1354 int fd1, fd2, err; 1355 struct file *newfile1, *newfile2; 1356 int flags; 1357 1358 flags = type & ~SOCK_TYPE_MASK; 1359 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1360 return -EINVAL; 1361 type &= SOCK_TYPE_MASK; 1362 1363 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1364 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1365 1366 /* 1367 * Obtain the first socket and check if the underlying protocol 1368 * supports the socketpair call. 1369 */ 1370 1371 err = sock_create(family, type, protocol, &sock1); 1372 if (err < 0) 1373 goto out; 1374 1375 err = sock_create(family, type, protocol, &sock2); 1376 if (err < 0) 1377 goto out_release_1; 1378 1379 err = sock1->ops->socketpair(sock1, sock2); 1380 if (err < 0) 1381 goto out_release_both; 1382 1383 fd1 = sock_alloc_file(sock1, &newfile1, flags); 1384 if (unlikely(fd1 < 0)) { 1385 err = fd1; 1386 goto out_release_both; 1387 } 1388 1389 fd2 = sock_alloc_file(sock2, &newfile2, flags); 1390 if (unlikely(fd2 < 0)) { 1391 err = fd2; 1392 fput(newfile1); 1393 put_unused_fd(fd1); 1394 sock_release(sock2); 1395 goto out; 1396 } 1397 1398 audit_fd_pair(fd1, fd2); 1399 fd_install(fd1, newfile1); 1400 fd_install(fd2, newfile2); 1401 /* fd1 and fd2 may be already another descriptors. 1402 * Not kernel problem. 1403 */ 1404 1405 err = put_user(fd1, &usockvec[0]); 1406 if (!err) 1407 err = put_user(fd2, &usockvec[1]); 1408 if (!err) 1409 return 0; 1410 1411 sys_close(fd2); 1412 sys_close(fd1); 1413 return err; 1414 1415out_release_both: 1416 sock_release(sock2); 1417out_release_1: 1418 sock_release(sock1); 1419out: 1420 return err; 1421} 1422 1423/* 1424 * Bind a name to a socket. Nothing much to do here since it's 1425 * the protocol's responsibility to handle the local address. 1426 * 1427 * We move the socket address to kernel space before we call 1428 * the protocol layer (having also checked the address is ok). 1429 */ 1430 1431SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) 1432{ 1433 struct socket *sock; 1434 struct sockaddr_storage address; 1435 int err, fput_needed; 1436 1437 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1438 if (sock) { 1439 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); 1440 if (err >= 0) { 1441 err = security_socket_bind(sock, 1442 (struct sockaddr *)&address, 1443 addrlen); 1444 if (!err) 1445 err = sock->ops->bind(sock, 1446 (struct sockaddr *) 1447 &address, addrlen); 1448 } 1449 fput_light(sock->file, fput_needed); 1450 /* ++SSD_RIL: Garbage_Filter_UDP */ 1451 #ifdef CONFIG_ARCH_MSM8960 1452 if (sock->sk != NULL) { 1453 if (sock->sk->sk_protocol == IPPROTO_UDP) 1454 add_or_remove_port(sock->sk, 1); 1455 } 1456 #endif 1457 /* --SSD_RIL: Garbage_Filter_UDP */ 1458 } 1459 return err; 1460} 1461 1462/* 1463 * Perform a listen. Basically, we allow the protocol to do anything 1464 * necessary for a listen, and if that works, we mark the socket as 1465 * ready for listening. 1466 */ 1467 1468SYSCALL_DEFINE2(listen, int, fd, int, backlog) 1469{ 1470 struct socket *sock; 1471 int err, fput_needed; 1472 int somaxconn; 1473 1474 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1475 if (sock) { 1476 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; 1477 if ((unsigned)backlog > somaxconn) 1478 backlog = somaxconn; 1479 1480 err = security_socket_listen(sock, backlog); 1481 if (!err) 1482 err = sock->ops->listen(sock, backlog); 1483 1484 fput_light(sock->file, fput_needed); 1485 /* ++SSD_RIL: Garbage_Filter_TCP */ 1486 if (sock->sk != NULL) 1487 add_or_remove_port(sock->sk, 1); 1488 /* --SSD_RIL: Garbage_Filter_TCP */ 1489 } 1490 return err; 1491} 1492 1493/* 1494 * For accept, we attempt to create a new socket, set up the link 1495 * with the client, wake up the client, then return the new 1496 * connected fd. We collect the address of the connector in kernel 1497 * space and move it to user at the very end. This is unclean because 1498 * we open the socket then return an error. 1499 * 1500 * 1003.1g adds the ability to recvmsg() to query connection pending 1501 * status to recvmsg. We need to add that support in a way thats 1502 * clean when we restucture accept also. 1503 */ 1504 1505SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, 1506 int __user *, upeer_addrlen, int, flags) 1507{ 1508 struct socket *sock, *newsock; 1509 struct file *newfile; 1510 int err, len, newfd, fput_needed; 1511 struct sockaddr_storage address; 1512 1513 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1514 return -EINVAL; 1515 1516 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1517 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1518 1519 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1520 if (!sock) 1521 goto out; 1522 1523 err = -ENFILE; 1524 newsock = sock_alloc(); 1525 if (!newsock) 1526 goto out_put; 1527 1528 newsock->type = sock->type; 1529 newsock->ops = sock->ops; 1530 1531 /* 1532 * We don't need try_module_get here, as the listening socket (sock) 1533 * has the protocol module (sock->ops->owner) held. 1534 */ 1535 __module_get(newsock->ops->owner); 1536 1537 newfd = sock_alloc_file(newsock, &newfile, flags); 1538 if (unlikely(newfd < 0)) { 1539 err = newfd; 1540 sock_release(newsock); 1541 goto out_put; 1542 } 1543 1544 err = security_socket_accept(sock, newsock); 1545 if (err) 1546 goto out_fd; 1547 1548 err = sock->ops->accept(sock, newsock, sock->file->f_flags); 1549 if (err < 0) 1550 goto out_fd; 1551 1552 if (upeer_sockaddr) { 1553 if (newsock->ops->getname(newsock, (struct sockaddr *)&address, 1554 &len, 2) < 0) { 1555 err = -ECONNABORTED; 1556 goto out_fd; 1557 } 1558 err = move_addr_to_user((struct sockaddr *)&address, 1559 len, upeer_sockaddr, upeer_addrlen); 1560 if (err < 0) 1561 goto out_fd; 1562 } 1563 1564 /* File flags are not inherited via accept() unlike another OSes. */ 1565 1566 fd_install(newfd, newfile); 1567 err = newfd; 1568 1569out_put: 1570 fput_light(sock->file, fput_needed); 1571out: 1572 return err; 1573out_fd: 1574 fput(newfile); 1575 put_unused_fd(newfd); 1576 goto out_put; 1577} 1578 1579SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, 1580 int __user *, upeer_addrlen) 1581{ 1582 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 1583} 1584 1585/* 1586 * Attempt to connect to a socket with the server address. The address 1587 * is in user space so we verify it is OK and move it to kernel space. 1588 * 1589 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 1590 * break bindings 1591 * 1592 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 1593 * other SEQPACKET protocols that take time to connect() as it doesn't 1594 * include the -EINPROGRESS status for such sockets. 1595 */ 1596 1597SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, 1598 int, addrlen) 1599{ 1600 struct socket *sock; 1601 struct sockaddr_storage address; 1602 int err, fput_needed; 1603 1604 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1605 if (!sock) 1606 goto out; 1607 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); 1608 if (err < 0) 1609 goto out_put; 1610 1611 err = 1612 security_socket_connect(sock, (struct sockaddr *)&address, addrlen); 1613 if (err) 1614 goto out_put; 1615 1616 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, 1617 sock->file->f_flags); 1618out_put: 1619 fput_light(sock->file, fput_needed); 1620out: 1621 return err; 1622} 1623 1624/* 1625 * Get the local address ('name') of a socket object. Move the obtained 1626 * name to user space. 1627 */ 1628 1629SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, 1630 int __user *, usockaddr_len) 1631{ 1632 struct socket *sock; 1633 struct sockaddr_storage address; 1634 int len, err, fput_needed; 1635 1636 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1637 if (!sock) 1638 goto out; 1639 1640 err = security_socket_getsockname(sock); 1641 if (err) 1642 goto out_put; 1643 1644 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); 1645 if (err) 1646 goto out_put; 1647 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); 1648 1649out_put: 1650 fput_light(sock->file, fput_needed); 1651out: 1652 return err; 1653} 1654 1655/* 1656 * Get the remote address ('name') of a socket object. Move the obtained 1657 * name to user space. 1658 */ 1659 1660SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, 1661 int __user *, usockaddr_len) 1662{ 1663 struct socket *sock; 1664 struct sockaddr_storage address; 1665 int len, err, fput_needed; 1666 1667 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1668 if (sock != NULL) { 1669 err = security_socket_getpeername(sock); 1670 if (err) { 1671 fput_light(sock->file, fput_needed); 1672 return err; 1673 } 1674 1675 err = 1676 sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1677 1); 1678 if (!err) 1679 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, 1680 usockaddr_len); 1681 fput_light(sock->file, fput_needed); 1682 } 1683 return err; 1684} 1685 1686/* 1687 * Send a datagram to a given address. We move the address into kernel 1688 * space and check the user space data area is readable before invoking 1689 * the protocol. 1690 */ 1691 1692SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, 1693 unsigned, flags, struct sockaddr __user *, addr, 1694 int, addr_len) 1695{ 1696 struct socket *sock; 1697 struct sockaddr_storage address; 1698 int err; 1699 struct msghdr msg; 1700 struct iovec iov; 1701 int fput_needed; 1702 1703 if (len > INT_MAX) 1704 len = INT_MAX; 1705 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1706 if (!sock) 1707 goto out; 1708 1709 iov.iov_base = buff; 1710 iov.iov_len = len; 1711 msg.msg_name = NULL; 1712 msg.msg_iov = &iov; 1713 msg.msg_iovlen = 1; 1714 msg.msg_control = NULL; 1715 msg.msg_controllen = 0; 1716 msg.msg_namelen = 0; 1717 if (addr) { 1718 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); 1719 if (err < 0) 1720 goto out_put; 1721 msg.msg_name = (struct sockaddr *)&address; 1722 msg.msg_namelen = addr_len; 1723 } 1724 if (sock->file->f_flags & O_NONBLOCK) 1725 flags |= MSG_DONTWAIT; 1726 msg.msg_flags = flags; 1727 err = sock_sendmsg(sock, &msg, len); 1728 1729out_put: 1730 fput_light(sock->file, fput_needed); 1731out: 1732 return err; 1733} 1734 1735/* 1736 * Send a datagram down a socket. 1737 */ 1738 1739SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, 1740 unsigned, flags) 1741{ 1742 return sys_sendto(fd, buff, len, flags, NULL, 0); 1743} 1744 1745/* 1746 * Receive a frame from the socket and optionally record the address of the 1747 * sender. We verify the buffers are writable and if needed move the 1748 * sender address from kernel to user space. 1749 */ 1750 1751SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, 1752 unsigned, flags, struct sockaddr __user *, addr, 1753 int __user *, addr_len) 1754{ 1755 struct socket *sock; 1756 struct iovec iov; 1757 struct msghdr msg; 1758 struct sockaddr_storage address; 1759 int err, err2; 1760 int fput_needed; 1761 1762 if (size > INT_MAX) 1763 size = INT_MAX; 1764 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1765 if (!sock) 1766 goto out; 1767 1768 msg.msg_control = NULL; 1769 msg.msg_controllen = 0; 1770 msg.msg_iovlen = 1; 1771 msg.msg_iov = &iov; 1772 iov.iov_len = size; 1773 iov.iov_base = ubuf; 1774 msg.msg_name = (struct sockaddr *)&address; 1775 msg.msg_namelen = sizeof(address); 1776 if (sock->file->f_flags & O_NONBLOCK) 1777 flags |= MSG_DONTWAIT; 1778 err = sock_recvmsg(sock, &msg, size, flags); 1779 1780 if (err >= 0 && addr != NULL) { 1781 err2 = move_addr_to_user((struct sockaddr *)&address, 1782 msg.msg_namelen, addr, addr_len); 1783 if (err2 < 0) 1784 err = err2; 1785 } 1786 1787 fput_light(sock->file, fput_needed); 1788out: 1789 return err; 1790} 1791 1792/* 1793 * Receive a datagram from a socket. 1794 */ 1795 1796asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, 1797 unsigned flags) 1798{ 1799 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 1800} 1801 1802/* 1803 * Set a socket option. Because we don't know the option lengths we have 1804 * to pass the user mode parameter for the protocols to sort out. 1805 */ 1806 1807SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, 1808 char __user *, optval, int, optlen) 1809{ 1810 int err, fput_needed; 1811 struct socket *sock; 1812 1813 if (optlen < 0) 1814 return -EINVAL; 1815 1816 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1817 if (sock != NULL) { 1818 err = security_socket_setsockopt(sock, level, optname); 1819 if (err) 1820 goto out_put; 1821 1822 if (level == SOL_SOCKET) 1823 err = 1824 sock_setsockopt(sock, level, optname, optval, 1825 optlen); 1826 else 1827 err = 1828 sock->ops->setsockopt(sock, level, optname, optval, 1829 optlen); 1830out_put: 1831 fput_light(sock->file, fput_needed); 1832 } 1833 return err; 1834} 1835 1836/* 1837 * Get a socket option. Because we don't know the option lengths we have 1838 * to pass a user mode parameter for the protocols to sort out. 1839 */ 1840 1841SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, 1842 char __user *, optval, int __user *, optlen) 1843{ 1844 int err, fput_needed; 1845 struct socket *sock; 1846 1847 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1848 if (sock != NULL) { 1849 err = security_socket_getsockopt(sock, level, optname); 1850 if (err) 1851 goto out_put; 1852 1853 if (level == SOL_SOCKET) 1854 err = 1855 sock_getsockopt(sock, level, optname, optval, 1856 optlen); 1857 else 1858 err = 1859 sock->ops->getsockopt(sock, level, optname, optval, 1860 optlen); 1861out_put: 1862 fput_light(sock->file, fput_needed); 1863 } 1864 return err; 1865} 1866 1867/* 1868 * Shutdown a socket. 1869 */ 1870 1871SYSCALL_DEFINE2(shutdown, int, fd, int, how) 1872{ 1873 int err, fput_needed; 1874 struct socket *sock; 1875 1876 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1877 if (sock != NULL) { 1878 err = security_socket_shutdown(sock, how); 1879 if (!err) 1880 err = sock->ops->shutdown(sock, how); 1881 fput_light(sock->file, fput_needed); 1882 } 1883 return err; 1884} 1885 1886/* A couple of helpful macros for getting the address of the 32/64 bit 1887 * fields which are the same type (int / unsigned) on our platforms. 1888 */ 1889#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 1890#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1891#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1892 1893struct used_address { 1894 struct sockaddr_storage name; 1895 unsigned int name_len; 1896}; 1897 1898static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, 1899 struct msghdr *msg_sys, unsigned flags, 1900 struct used_address *used_address) 1901{ 1902 struct compat_msghdr __user *msg_compat = 1903 (struct compat_msghdr __user *)msg; 1904 struct sockaddr_storage address; 1905 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1906 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1907 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1908 /* 20 is size of ipv6_pktinfo */ 1909 unsigned char *ctl_buf = ctl; 1910 int err, ctl_len, iov_size, total_len; 1911 1912 err = -EFAULT; 1913 if (MSG_CMSG_COMPAT & flags) { 1914 if (get_compat_msghdr(msg_sys, msg_compat)) 1915 return -EFAULT; 1916 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) 1917 return -EFAULT; 1918 1919 /* do not move before msg_sys is valid */ 1920 err = -EMSGSIZE; 1921 if (msg_sys->msg_iovlen > UIO_MAXIOV) 1922 goto out; 1923 1924 /* Check whether to allocate the iovec area */ 1925 err = -ENOMEM; 1926 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); 1927 if (msg_sys->msg_iovlen > UIO_FASTIOV) { 1928 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1929 if (!iov) 1930 goto out; 1931 } 1932 1933 /* This will also move the address data into kernel space */ 1934 if (MSG_CMSG_COMPAT & flags) { 1935 err = verify_compat_iovec(msg_sys, iov, 1936 (struct sockaddr *)&address, 1937 VERIFY_READ); 1938 } else 1939 err = verify_iovec(msg_sys, iov, 1940 (struct sockaddr *)&address, 1941 VERIFY_READ); 1942 if (err < 0) 1943 goto out_freeiov; 1944 total_len = err; 1945 1946 err = -ENOBUFS; 1947 1948 if (msg_sys->msg_controllen > INT_MAX) 1949 goto out_freeiov; 1950 ctl_len = msg_sys->msg_controllen; 1951 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1952 err = 1953 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, 1954 sizeof(ctl)); 1955 if (err) 1956 goto out_freeiov; 1957 ctl_buf = msg_sys->msg_control; 1958 ctl_len = msg_sys->msg_controllen; 1959 } else if (ctl_len) { 1960 if (ctl_len > sizeof(ctl)) { 1961 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1962 if (ctl_buf == NULL) 1963 goto out_freeiov; 1964 } 1965 err = -EFAULT; 1966 /* 1967 * Careful! Before this, msg_sys->msg_control contains a user pointer. 1968 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1969 * checking falls down on this. 1970 */ 1971 if (copy_from_user(ctl_buf, 1972 (void __user __force *)msg_sys->msg_control, 1973 ctl_len)) 1974 goto out_freectl; 1975 msg_sys->msg_control = ctl_buf; 1976 } 1977 msg_sys->msg_flags = flags; 1978 1979 if (sock->file->f_flags & O_NONBLOCK) 1980 msg_sys->msg_flags |= MSG_DONTWAIT; 1981 /* 1982 * If this is sendmmsg() and current destination address is same as 1983 * previously succeeded address, omit asking LSM's decision. 1984 * used_address->name_len is initialized to UINT_MAX so that the first 1985 * destination address never matches. 1986 */ 1987 if (used_address && msg_sys->msg_name && 1988 used_address->name_len == msg_sys->msg_namelen && 1989 !memcmp(&used_address->name, msg_sys->msg_name, 1990 used_address->name_len)) { 1991 err = sock_sendmsg_nosec(sock, msg_sys, total_len); 1992 goto out_freectl; 1993 } 1994 err = sock_sendmsg(sock, msg_sys, total_len); 1995 /* 1996 * If this is sendmmsg() and sending to current destination address was 1997 * successful, remember it. 1998 */ 1999 if (used_address && err >= 0) { 2000 used_address->name_len = msg_sys->msg_namelen; 2001 if (msg_sys->msg_name) 2002 memcpy(&used_address->name, msg_sys->msg_name, 2003 used_address->name_len); 2004 } 2005 2006out_freectl: 2007 if (ctl_buf != ctl) 2008 sock_kfree_s(sock->sk, ctl_buf, ctl_len); 2009out_freeiov: 2010 if (iov != iovstack) 2011 sock_kfree_s(sock->sk, iov, iov_size); 2012out: 2013 return err; 2014} 2015 2016/* 2017 * BSD sendmsg interface 2018 */ 2019 2020SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) 2021{ 2022 int fput_needed, err; 2023 struct msghdr msg_sys; 2024 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); 2025 2026 if (!sock) 2027 goto out; 2028 2029 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL); 2030 2031 fput_light(sock->file, fput_needed); 2032out: 2033 return err; 2034} 2035 2036/* 2037 * Linux sendmmsg interface 2038 */ 2039 2040int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, 2041 unsigned int flags) 2042{ 2043 int fput_needed, err, datagrams; 2044 struct socket *sock; 2045 struct mmsghdr __user *entry; 2046 struct compat_mmsghdr __user *compat_entry; 2047 struct msghdr msg_sys; 2048 struct used_address used_address; 2049 2050 if (vlen > UIO_MAXIOV) 2051 vlen = UIO_MAXIOV; 2052 2053 datagrams = 0; 2054 2055 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2056 if (!sock) 2057 return err; 2058 2059 used_address.name_len = UINT_MAX; 2060 entry = mmsg; 2061 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2062 err = 0; 2063 2064 while (datagrams < vlen) { 2065 if (MSG_CMSG_COMPAT & flags) { 2066 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, 2067 &msg_sys, flags, &used_address); 2068 if (err < 0) 2069 break; 2070 err = __put_user(err, &compat_entry->msg_len); 2071 ++compat_entry; 2072 } else { 2073 err = __sys_sendmsg(sock, (struct msghdr __user *)entry, 2074 &msg_sys, flags, &used_address); 2075 if (err < 0) 2076 break; 2077 err = put_user(err, &entry->msg_len); 2078 ++entry; 2079 } 2080 2081 if (err) 2082 break; 2083 ++datagrams; 2084 } 2085 2086 fput_light(sock->file, fput_needed); 2087 2088 /* We only return an error if no datagrams were able to be sent */ 2089 if (datagrams != 0) 2090 return datagrams; 2091 2092 return err; 2093} 2094 2095SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, 2096 unsigned int, vlen, unsigned int, flags) 2097{ 2098 return __sys_sendmmsg(fd, mmsg, vlen, flags); 2099} 2100 2101static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, 2102 struct msghdr *msg_sys, unsigned flags, int nosec) 2103{ 2104 struct compat_msghdr __user *msg_compat = 2105 (struct compat_msghdr __user *)msg; 2106 struct iovec iovstack[UIO_FASTIOV]; 2107 struct iovec *iov = iovstack; 2108 unsigned long cmsg_ptr; 2109 int err, iov_size, total_len, len; 2110 2111 /* kernel mode address */ 2112 struct sockaddr_storage addr; 2113 2114 /* user mode address pointers */ 2115 struct sockaddr __user *uaddr; 2116 int __user *uaddr_len; 2117 2118 if (MSG_CMSG_COMPAT & flags) { 2119 if (get_compat_msghdr(msg_sys, msg_compat)) 2120 return -EFAULT; 2121 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) 2122 return -EFAULT; 2123 2124 err = -EMSGSIZE; 2125 if (msg_sys->msg_iovlen > UIO_MAXIOV) 2126 goto out; 2127 2128 /* Check whether to allocate the iovec area */ 2129 err = -ENOMEM; 2130 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); 2131 if (msg_sys->msg_iovlen > UIO_FASTIOV) { 2132 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 2133 if (!iov) 2134 goto out; 2135 } 2136 2137 /* 2138 * Save the user-mode address (verify_iovec will change the 2139 * kernel msghdr to use the kernel address space) 2140 */ 2141 2142 uaddr = (__force void __user *)msg_sys->msg_name; 2143 uaddr_len = COMPAT_NAMELEN(msg); 2144 if (MSG_CMSG_COMPAT & flags) { 2145 err = verify_compat_iovec(msg_sys, iov, 2146 (struct sockaddr *)&addr, 2147 VERIFY_WRITE); 2148 } else 2149 err = verify_iovec(msg_sys, iov, 2150 (struct sockaddr *)&addr, 2151 VERIFY_WRITE); 2152 if (err < 0) 2153 goto out_freeiov; 2154 total_len = err; 2155 2156 cmsg_ptr = (unsigned long)msg_sys->msg_control; 2157 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 2158 2159 if (sock->file->f_flags & O_NONBLOCK) 2160 flags |= MSG_DONTWAIT; 2161 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, 2162 total_len, flags); 2163 if (err < 0) 2164 goto out_freeiov; 2165 len = err; 2166 2167 if (uaddr != NULL) { 2168 err = move_addr_to_user((struct sockaddr *)&addr, 2169 msg_sys->msg_namelen, uaddr, 2170 uaddr_len); 2171 if (err < 0) 2172 goto out_freeiov; 2173 } 2174 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), 2175 COMPAT_FLAGS(msg)); 2176 if (err) 2177 goto out_freeiov; 2178 if (MSG_CMSG_COMPAT & flags) 2179 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, 2180 &msg_compat->msg_controllen); 2181 else 2182 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, 2183 &msg->msg_controllen); 2184 if (err) 2185 goto out_freeiov; 2186 err = len; 2187 2188out_freeiov: 2189 if (iov != iovstack) 2190 sock_kfree_s(sock->sk, iov, iov_size); 2191out: 2192 return err; 2193} 2194 2195/* 2196 * BSD recvmsg interface 2197 */ 2198 2199SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, 2200 unsigned int, flags) 2201{ 2202 int fput_needed, err; 2203 struct msghdr msg_sys; 2204 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); 2205 2206 if (!sock) 2207 goto out; 2208 2209 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); 2210 2211 fput_light(sock->file, fput_needed); 2212out: 2213 return err; 2214} 2215 2216/* 2217 * Linux recvmmsg interface 2218 */ 2219 2220int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, 2221 unsigned int flags, struct timespec *timeout) 2222{ 2223 int fput_needed, err, datagrams; 2224 struct socket *sock; 2225 struct mmsghdr __user *entry; 2226 struct compat_mmsghdr __user *compat_entry; 2227 struct msghdr msg_sys; 2228 struct timespec end_time; 2229 2230 if (timeout && 2231 poll_select_set_timeout(&end_time, timeout->tv_sec, 2232 timeout->tv_nsec)) 2233 return -EINVAL; 2234 2235 datagrams = 0; 2236 2237 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2238 if (!sock) 2239 return err; 2240 2241 err = sock_error(sock->sk); 2242 if (err) 2243 goto out_put; 2244 2245 entry = mmsg; 2246 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2247 2248 while (datagrams < vlen) { 2249 /* 2250 * No need to ask LSM for more than the first datagram. 2251 */ 2252 if (MSG_CMSG_COMPAT & flags) { 2253 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, 2254 &msg_sys, flags & ~MSG_WAITFORONE, 2255 datagrams); 2256 if (err < 0) 2257 break; 2258 err = __put_user(err, &compat_entry->msg_len); 2259 ++compat_entry; 2260 } else { 2261 err = __sys_recvmsg(sock, (struct msghdr __user *)entry, 2262 &msg_sys, flags & ~MSG_WAITFORONE, 2263 datagrams); 2264 if (err < 0) 2265 break; 2266 err = put_user(err, &entry->msg_len); 2267 ++entry; 2268 } 2269 2270 if (err) 2271 break; 2272 ++datagrams; 2273 2274 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */ 2275 if (flags & MSG_WAITFORONE) 2276 flags |= MSG_DONTWAIT; 2277 2278 if (timeout) { 2279 ktime_get_ts(timeout); 2280 *timeout = timespec_sub(end_time, *timeout); 2281 if (timeout->tv_sec < 0) { 2282 timeout->tv_sec = timeout->tv_nsec = 0; 2283 break; 2284 } 2285 2286 /* Timeout, return less than vlen datagrams */ 2287 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) 2288 break; 2289 } 2290 2291 /* Out of band data, return right away */ 2292 if (msg_sys.msg_flags & MSG_OOB) 2293 break; 2294 } 2295 2296out_put: 2297 fput_light(sock->file, fput_needed); 2298 2299 if (err == 0) 2300 return datagrams; 2301 2302 if (datagrams != 0) { 2303 /* 2304 * We may return less entries than requested (vlen) if the 2305 * sock is non block and there aren't enough datagrams... 2306 */ 2307 if (err != -EAGAIN) { 2308 /* 2309 * ... or if recvmsg returns an error after we 2310 * received some datagrams, where we record the 2311 * error to return on the next call or if the 2312 * app asks about it using getsockopt(SO_ERROR). 2313 */ 2314 sock->sk->sk_err = -err; 2315 } 2316 2317 return datagrams; 2318 } 2319 2320 return err; 2321} 2322 2323SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, 2324 unsigned int, vlen, unsigned int, flags, 2325 struct timespec __user *, timeout) 2326{ 2327 int datagrams; 2328 struct timespec timeout_sys; 2329 2330 if (!timeout) 2331 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); 2332 2333 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys))) 2334 return -EFAULT; 2335 2336 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); 2337 2338 if (datagrams > 0 && 2339 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys))) 2340 datagrams = -EFAULT; 2341 2342 return datagrams; 2343} 2344 2345#ifdef __ARCH_WANT_SYS_SOCKETCALL 2346/* Argument list sizes for sys_socketcall */ 2347#define AL(x) ((x) * sizeof(unsigned long)) 2348static const unsigned char nargs[21] = { 2349 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 2350 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 2351 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 2352 AL(4), AL(5), AL(4) 2353}; 2354 2355#undef AL 2356 2357/* 2358 * System call vectors. 2359 * 2360 * Argument checking cleaned up. Saved 20% in size. 2361 * This function doesn't need to set the kernel lock because 2362 * it is set by the callees. 2363 */ 2364 2365SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) 2366{ 2367 unsigned long a[6]; 2368 unsigned long a0, a1; 2369 int err; 2370 unsigned int len; 2371 2372 if (call < 1 || call > SYS_SENDMMSG) 2373 return -EINVAL; 2374 2375 len = nargs[call]; 2376 if (len > sizeof(a)) 2377 return -EINVAL; 2378 2379 /* copy_from_user should be SMP safe. */ 2380 if (copy_from_user(a, args, len)) 2381 return -EFAULT; 2382 2383 audit_socketcall(nargs[call] / sizeof(unsigned long), a); 2384 2385 a0 = a[0]; 2386 a1 = a[1]; 2387 2388 switch (call) { 2389 case SYS_SOCKET: 2390 err = sys_socket(a0, a1, a[2]); 2391 break; 2392 case SYS_BIND: 2393 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); 2394 break; 2395 case SYS_CONNECT: 2396 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); 2397 break; 2398 case SYS_LISTEN: 2399 err = sys_listen(a0, a1); 2400 break; 2401 case SYS_ACCEPT: 2402 err = sys_accept4(a0, (struct sockaddr __user *)a1, 2403 (int __user *)a[2], 0); 2404 break; 2405 case SYS_GETSOCKNAME: 2406 err = 2407 sys_getsockname(a0, (struct sockaddr __user *)a1, 2408 (int __user *)a[2]); 2409 break; 2410 case SYS_GETPEERNAME: 2411 err = 2412 sys_getpeername(a0, (struct sockaddr __user *)a1, 2413 (int __user *)a[2]); 2414 break; 2415 case SYS_SOCKETPAIR: 2416 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); 2417 break; 2418 case SYS_SEND: 2419 err = sys_send(a0, (void __user *)a1, a[2], a[3]); 2420 break; 2421 case SYS_SENDTO: 2422 err = sys_sendto(a0, (void __user *)a1, a[2], a[3], 2423 (struct sockaddr __user *)a[4], a[5]); 2424 break; 2425 case SYS_RECV: 2426 err = sys_recv(a0, (void __user *)a1, a[2], a[3]); 2427 break; 2428 case SYS_RECVFROM: 2429 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2430 (struct sockaddr __user *)a[4], 2431 (int __user *)a[5]); 2432 break; 2433 case SYS_SHUTDOWN: 2434 err = sys_shutdown(a0, a1); 2435 break; 2436 case SYS_SETSOCKOPT: 2437 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); 2438 break; 2439 case SYS_GETSOCKOPT: 2440 err = 2441 sys_getsockopt(a0, a1, a[2], (char __user *)a[3], 2442 (int __user *)a[4]); 2443 break; 2444 case SYS_SENDMSG: 2445 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 2446 break; 2447 case SYS_SENDMMSG: 2448 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); 2449 break; 2450 case SYS_RECVMSG: 2451 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2452 break; 2453 case SYS_RECVMMSG: 2454 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], 2455 (struct timespec __user *)a[4]); 2456 break; 2457 case SYS_ACCEPT4: 2458 err = sys_accept4(a0, (struct sockaddr __user *)a1, 2459 (int __user *)a[2], a[3]); 2460 break; 2461 default: 2462 err = -EINVAL; 2463 break; 2464 } 2465 return err; 2466} 2467 2468#endif /* __ARCH_WANT_SYS_SOCKETCALL */ 2469 2470/** 2471 * sock_register - add a socket protocol handler 2472 * @ops: description of protocol 2473 * 2474 * This function is called by a protocol handler that wants to 2475 * advertise its address family, and have it linked into the 2476 * socket interface. The value ops->family coresponds to the 2477 * socket system call protocol family. 2478 */ 2479int sock_register(const struct net_proto_family *ops) 2480{ 2481 int err; 2482 2483 if (ops->family >= NPROTO) { 2484 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, 2485 NPROTO); 2486 return -ENOBUFS; 2487 } 2488 2489 spin_lock(&net_family_lock); 2490 if (rcu_dereference_protected(net_families[ops->family], 2491 lockdep_is_held(&net_family_lock))) 2492 err = -EEXIST; 2493 else { 2494 rcu_assign_pointer(net_families[ops->family], ops); 2495 err = 0; 2496 } 2497 spin_unlock(&net_family_lock); 2498 2499 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); 2500 return err; 2501} 2502EXPORT_SYMBOL(sock_register); 2503 2504/** 2505 * sock_unregister - remove a protocol handler 2506 * @family: protocol family to remove 2507 * 2508 * This function is called by a protocol handler that wants to 2509 * remove its address family, and have it unlinked from the 2510 * new socket creation. 2511 * 2512 * If protocol handler is a module, then it can use module reference 2513 * counts to protect against new references. If protocol handler is not 2514 * a module then it needs to provide its own protection in 2515 * the ops->create routine. 2516 */ 2517void sock_unregister(int family) 2518{ 2519 BUG_ON(family < 0 || family >= NPROTO); 2520 2521 spin_lock(&net_family_lock); 2522 rcu_assign_pointer(net_families[family], NULL); 2523 spin_unlock(&net_family_lock); 2524 2525 synchronize_rcu(); 2526 2527 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); 2528} 2529EXPORT_SYMBOL(sock_unregister); 2530 2531static int __init sock_init(void) 2532{ 2533 int err; 2534 2535 /* 2536 * Initialize sock SLAB cache. 2537 */ 2538 2539 sk_init(); 2540 2541 /* 2542 * Initialize skbuff SLAB cache 2543 */ 2544 skb_init(); 2545 2546 /* 2547 * Initialize the protocols module. 2548 */ 2549 2550 init_inodecache(); 2551 2552 err = register_filesystem(&sock_fs_type); 2553 if (err) 2554 goto out_fs; 2555 sock_mnt = kern_mount(&sock_fs_type); 2556 if (IS_ERR(sock_mnt)) { 2557 err = PTR_ERR(sock_mnt); 2558 goto out_mount; 2559 } 2560 2561 /* The real protocol initialization is performed in later initcalls. 2562 */ 2563 2564#ifdef CONFIG_NETFILTER 2565 netfilter_init(); 2566#endif 2567 2568#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING 2569 skb_timestamping_init(); 2570#endif 2571 2572out: 2573 return err; 2574 2575out_mount: 2576 unregister_filesystem(&sock_fs_type); 2577out_fs: 2578 goto out; 2579} 2580 2581core_initcall(sock_init); /* early initcall */ 2582 2583#ifdef CONFIG_PROC_FS 2584void socket_seq_show(struct seq_file *seq) 2585{ 2586 int cpu; 2587 int counter = 0; 2588 2589 for_each_possible_cpu(cpu) 2590 counter += per_cpu(sockets_in_use, cpu); 2591 2592 /* It can be negative, by the way. 8) */ 2593 if (counter < 0) 2594 counter = 0; 2595 2596 seq_printf(seq, "sockets: used %d\n", counter); 2597} 2598#endif /* CONFIG_PROC_FS */ 2599 2600#ifdef CONFIG_COMPAT 2601static int do_siocgstamp(struct net *net, struct socket *sock, 2602 unsigned int cmd, struct compat_timeval __user *up) 2603{ 2604 mm_segment_t old_fs = get_fs(); 2605 struct timeval ktv; 2606 int err; 2607 2608 set_fs(KERNEL_DS); 2609 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); 2610 set_fs(old_fs); 2611 if (!err) { 2612 err = put_user(ktv.tv_sec, &up->tv_sec); 2613 err |= __put_user(ktv.tv_usec, &up->tv_usec); 2614 } 2615 return err; 2616} 2617 2618static int do_siocgstampns(struct net *net, struct socket *sock, 2619 unsigned int cmd, struct compat_timespec __user *up) 2620{ 2621 mm_segment_t old_fs = get_fs(); 2622 struct timespec kts; 2623 int err; 2624 2625 set_fs(KERNEL_DS); 2626 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); 2627 set_fs(old_fs); 2628 if (!err) { 2629 err = put_user(kts.tv_sec, &up->tv_sec); 2630 err |= __put_user(kts.tv_nsec, &up->tv_nsec); 2631 } 2632 return err; 2633} 2634 2635static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) 2636{ 2637 struct ifreq __user *uifr; 2638 int err; 2639 2640 uifr = compat_alloc_user_space(sizeof(struct ifreq)); 2641 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2642 return -EFAULT; 2643 2644 err = dev_ioctl(net, SIOCGIFNAME, uifr); 2645 if (err) 2646 return err; 2647 2648 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) 2649 return -EFAULT; 2650 2651 return 0; 2652} 2653 2654static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) 2655{ 2656 struct compat_ifconf ifc32; 2657 struct ifconf ifc; 2658 struct ifconf __user *uifc; 2659 struct compat_ifreq __user *ifr32; 2660 struct ifreq __user *ifr; 2661 unsigned int i, j; 2662 int err; 2663 2664 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) 2665 return -EFAULT; 2666 2667 memset(&ifc, 0, sizeof(ifc)); 2668 if (ifc32.ifcbuf == 0) { 2669 ifc32.ifc_len = 0; 2670 ifc.ifc_len = 0; 2671 ifc.ifc_req = NULL; 2672 uifc = compat_alloc_user_space(sizeof(struct ifconf)); 2673 } else { 2674 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * 2675 sizeof(struct ifreq); 2676 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); 2677 ifc.ifc_len = len; 2678 ifr = ifc.ifc_req = (void __user *)(uifc + 1); 2679 ifr32 = compat_ptr(ifc32.ifcbuf); 2680 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { 2681 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) 2682 return -EFAULT; 2683 ifr++; 2684 ifr32++; 2685 } 2686 } 2687 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) 2688 return -EFAULT; 2689 2690 err = dev_ioctl(net, SIOCGIFCONF, uifc); 2691 if (err) 2692 return err; 2693 2694 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) 2695 return -EFAULT; 2696 2697 ifr = ifc.ifc_req; 2698 ifr32 = compat_ptr(ifc32.ifcbuf); 2699 for (i = 0, j = 0; 2700 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; 2701 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { 2702 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) 2703 return -EFAULT; 2704 ifr32++; 2705 ifr++; 2706 } 2707 2708 if (ifc32.ifcbuf == 0) { 2709 /* Translate from 64-bit structure multiple to 2710 * a 32-bit one. 2711 */ 2712 i = ifc.ifc_len; 2713 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); 2714 ifc32.ifc_len = i; 2715 } else { 2716 ifc32.ifc_len = i; 2717 } 2718 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) 2719 return -EFAULT; 2720 2721 return 0; 2722} 2723 2724static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) 2725{ 2726 struct compat_ethtool_rxnfc __user *compat_rxnfc; 2727 bool convert_in = false, convert_out = false; 2728 size_t buf_size = ALIGN(sizeof(struct ifreq), 8); 2729 struct ethtool_rxnfc __user *rxnfc; 2730 struct ifreq __user *ifr; 2731 u32 rule_cnt = 0, actual_rule_cnt; 2732 u32 ethcmd; 2733 u32 data; 2734 int ret; 2735 2736 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 2737 return -EFAULT; 2738 2739 compat_rxnfc = compat_ptr(data); 2740 2741 if (get_user(ethcmd, &compat_rxnfc->cmd)) 2742 return -EFAULT; 2743 2744 /* Most ethtool structures are defined without padding. 2745 * Unfortunately struct ethtool_rxnfc is an exception. 2746 */ 2747 switch (ethcmd) { 2748 default: 2749 break; 2750 case ETHTOOL_GRXCLSRLALL: 2751 /* Buffer size is variable */ 2752 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) 2753 return -EFAULT; 2754 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) 2755 return -ENOMEM; 2756 buf_size += rule_cnt * sizeof(u32); 2757 /* fall through */ 2758 case ETHTOOL_GRXRINGS: 2759 case ETHTOOL_GRXCLSRLCNT: 2760 case ETHTOOL_GRXCLSRULE: 2761 convert_out = true; 2762 /* fall through */ 2763 case ETHTOOL_SRXCLSRLDEL: 2764 case ETHTOOL_SRXCLSRLINS: 2765 buf_size += sizeof(struct ethtool_rxnfc); 2766 convert_in = true; 2767 break; 2768 } 2769 2770 ifr = compat_alloc_user_space(buf_size); 2771 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8); 2772 2773 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 2774 return -EFAULT; 2775 2776 if (put_user(convert_in ? rxnfc : compat_ptr(data), 2777 &ifr->ifr_ifru.ifru_data)) 2778 return -EFAULT; 2779 2780 if (convert_in) { 2781 /* We expect there to be holes between fs.m_ext and 2782 * fs.ring_cookie and at the end of fs, but nowhere else. 2783 */ 2784 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + 2785 sizeof(compat_rxnfc->fs.m_ext) != 2786 offsetof(struct ethtool_rxnfc, fs.m_ext) + 2787 sizeof(rxnfc->fs.m_ext)); 2788 BUILD_BUG_ON( 2789 offsetof(struct compat_ethtool_rxnfc, fs.location) - 2790 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != 2791 offsetof(struct ethtool_rxnfc, fs.location) - 2792 offsetof(struct ethtool_rxnfc, fs.ring_cookie)); 2793 2794 if (copy_in_user(rxnfc, compat_rxnfc, 2795 (void *)(&rxnfc->fs.m_ext + 1) - 2796 (void *)rxnfc) || 2797 copy_in_user(&rxnfc->fs.ring_cookie, 2798 &compat_rxnfc->fs.ring_cookie, 2799 (void *)(&rxnfc->fs.location + 1) - 2800 (void *)&rxnfc->fs.ring_cookie) || 2801 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, 2802 sizeof(rxnfc->rule_cnt))) 2803 return -EFAULT; 2804 } 2805 2806 ret = dev_ioctl(net, SIOCETHTOOL, ifr); 2807 if (ret) 2808 return ret; 2809 2810 if (convert_out) { 2811 if (copy_in_user(compat_rxnfc, rxnfc, 2812 (const void *)(&rxnfc->fs.m_ext + 1) - 2813 (const void *)rxnfc) || 2814 copy_in_user(&compat_rxnfc->fs.ring_cookie, 2815 &rxnfc->fs.ring_cookie, 2816 (const void *)(&rxnfc->fs.location + 1) - 2817 (const void *)&rxnfc->fs.ring_cookie) || 2818 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, 2819 sizeof(rxnfc->rule_cnt))) 2820 return -EFAULT; 2821 2822 if (ethcmd == ETHTOOL_GRXCLSRLALL) { 2823 /* As an optimisation, we only copy the actual 2824 * number of rules that the underlying 2825 * function returned. Since Mallory might 2826 * change the rule count in user memory, we 2827 * check that it is less than the rule count 2828 * originally given (as the user buffer size), 2829 * which has been range-checked. 2830 */ 2831 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) 2832 return -EFAULT; 2833 if (actual_rule_cnt < rule_cnt) 2834 rule_cnt = actual_rule_cnt; 2835 if (copy_in_user(&compat_rxnfc->rule_locs[0], 2836 &rxnfc->rule_locs[0], 2837 rule_cnt * sizeof(u32))) 2838 return -EFAULT; 2839 } 2840 } 2841 2842 return 0; 2843} 2844 2845static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) 2846{ 2847 void __user *uptr; 2848 compat_uptr_t uptr32; 2849 struct ifreq __user *uifr; 2850 2851 uifr = compat_alloc_user_space(sizeof(*uifr)); 2852 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2853 return -EFAULT; 2854 2855 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) 2856 return -EFAULT; 2857 2858 uptr = compat_ptr(uptr32); 2859 2860 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) 2861 return -EFAULT; 2862 2863 return dev_ioctl(net, SIOCWANDEV, uifr); 2864} 2865 2866static int bond_ioctl(struct net *net, unsigned int cmd, 2867 struct compat_ifreq __user *ifr32) 2868{ 2869 struct ifreq kifr; 2870 struct ifreq __user *uifr; 2871 mm_segment_t old_fs; 2872 int err; 2873 u32 data; 2874 void __user *datap; 2875 2876 switch (cmd) { 2877 case SIOCBONDENSLAVE: 2878 case SIOCBONDRELEASE: 2879 case SIOCBONDSETHWADDR: 2880 case SIOCBONDCHANGEACTIVE: 2881 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) 2882 return -EFAULT; 2883 2884 old_fs = get_fs(); 2885 set_fs(KERNEL_DS); 2886 err = dev_ioctl(net, cmd, 2887 (struct ifreq __user __force *) &kifr); 2888 set_fs(old_fs); 2889 2890 return err; 2891 case SIOCBONDSLAVEINFOQUERY: 2892 case SIOCBONDINFOQUERY: 2893 uifr = compat_alloc_user_space(sizeof(*uifr)); 2894 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 2895 return -EFAULT; 2896 2897 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 2898 return -EFAULT; 2899 2900 datap = compat_ptr(data); 2901 if (put_user(datap, &uifr->ifr_ifru.ifru_data)) 2902 return -EFAULT; 2903 2904 return dev_ioctl(net, cmd, uifr); 2905 default: 2906 return -EINVAL; 2907 } 2908} 2909 2910static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, 2911 struct compat_ifreq __user *u_ifreq32) 2912{ 2913 struct ifreq __user *u_ifreq64; 2914 char tmp_buf[IFNAMSIZ]; 2915 void __user *data64; 2916 u32 data32; 2917 2918 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), 2919 IFNAMSIZ)) 2920 return -EFAULT; 2921 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) 2922 return -EFAULT; 2923 data64 = compat_ptr(data32); 2924 2925 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); 2926 2927 /* Don't check these user accesses, just let that get trapped 2928 * in the ioctl handler instead. 2929 */ 2930 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], 2931 IFNAMSIZ)) 2932 return -EFAULT; 2933 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) 2934 return -EFAULT; 2935 2936 return dev_ioctl(net, cmd, u_ifreq64); 2937} 2938 2939static int dev_ifsioc(struct net *net, struct socket *sock, 2940 unsigned int cmd, struct compat_ifreq __user *uifr32) 2941{ 2942 struct ifreq __user *uifr; 2943 int err; 2944 2945 uifr = compat_alloc_user_space(sizeof(*uifr)); 2946 if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) 2947 return -EFAULT; 2948 2949 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); 2950 2951 if (!err) { 2952 switch (cmd) { 2953 case SIOCGIFFLAGS: 2954 case SIOCGIFMETRIC: 2955 case SIOCGIFMTU: 2956 case SIOCGIFMEM: 2957 case SIOCGIFHWADDR: 2958 case SIOCGIFINDEX: 2959 case SIOCGIFADDR: 2960 case SIOCGIFBRDADDR: 2961 case SIOCGIFDSTADDR: 2962 case SIOCGIFNETMASK: 2963 case SIOCGIFPFLAGS: 2964 case SIOCGIFTXQLEN: 2965 case SIOCGMIIPHY: 2966 case SIOCGMIIREG: 2967 if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) 2968 err = -EFAULT; 2969 break; 2970 } 2971 } 2972 return err; 2973} 2974 2975static int compat_sioc_ifmap(struct net *net, unsigned int cmd, 2976 struct compat_ifreq __user *uifr32) 2977{ 2978 struct ifreq ifr; 2979 struct compat_ifmap __user *uifmap32; 2980 mm_segment_t old_fs; 2981 int err; 2982 2983 uifmap32 = &uifr32->ifr_ifru.ifru_map; 2984 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); 2985 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 2986 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 2987 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 2988 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); 2989 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); 2990 err |= __get_user(ifr.ifr_map.port, &uifmap32->port); 2991 if (err) 2992 return -EFAULT; 2993 2994 old_fs = get_fs(); 2995 set_fs(KERNEL_DS); 2996 err = dev_ioctl(net, cmd, (void __user __force *)&ifr); 2997 set_fs(old_fs); 2998 2999 if (cmd == SIOCGIFMAP && !err) { 3000 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 3001 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3002 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3003 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3004 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); 3005 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); 3006 err |= __put_user(ifr.ifr_map.port, &uifmap32->port); 3007 if (err) 3008 err = -EFAULT; 3009 } 3010 return err; 3011} 3012 3013static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) 3014{ 3015 void __user *uptr; 3016 compat_uptr_t uptr32; 3017 struct ifreq __user *uifr; 3018 3019 uifr = compat_alloc_user_space(sizeof(*uifr)); 3020 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 3021 return -EFAULT; 3022 3023 if (get_user(uptr32, &uifr32->ifr_data)) 3024 return -EFAULT; 3025 3026 uptr = compat_ptr(uptr32); 3027 3028 if (put_user(uptr, &uifr->ifr_data)) 3029 return -EFAULT; 3030 3031 return dev_ioctl(net, SIOCSHWTSTAMP, uifr); 3032} 3033 3034struct rtentry32 { 3035 u32 rt_pad1; 3036 struct sockaddr rt_dst; /* target address */ 3037 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ 3038 struct sockaddr rt_genmask; /* target network mask (IP) */ 3039 unsigned short rt_flags; 3040 short rt_pad2; 3041 u32 rt_pad3; 3042 unsigned char rt_tos; 3043 unsigned char rt_class; 3044 short rt_pad4; 3045 short rt_metric; /* +1 for binary compatibility! */ 3046 /* char * */ u32 rt_dev; /* forcing the device at add */ 3047 u32 rt_mtu; /* per route MTU/Window */ 3048 u32 rt_window; /* Window clamping */ 3049 unsigned short rt_irtt; /* Initial RTT */ 3050}; 3051 3052struct in6_rtmsg32 { 3053 struct in6_addr rtmsg_dst; 3054 struct in6_addr rtmsg_src; 3055 struct in6_addr rtmsg_gateway; 3056 u32 rtmsg_type; 3057 u16 rtmsg_dst_len; 3058 u16 rtmsg_src_len; 3059 u32 rtmsg_metric; 3060 u32 rtmsg_info; 3061 u32 rtmsg_flags; 3062 s32 rtmsg_ifindex; 3063}; 3064 3065static int routing_ioctl(struct net *net, struct socket *sock, 3066 unsigned int cmd, void __user *argp) 3067{ 3068 int ret; 3069 void *r = NULL; 3070 struct in6_rtmsg r6; 3071 struct rtentry r4; 3072 char devname[16]; 3073 u32 rtdev; 3074 mm_segment_t old_fs = get_fs(); 3075 3076 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ 3077 struct in6_rtmsg32 __user *ur6 = argp; 3078 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3079 3 * sizeof(struct in6_addr)); 3080 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); 3081 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); 3082 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); 3083 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); 3084 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); 3085 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); 3086 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); 3087 3088 r = (void *) &r6; 3089 } else { /* ipv4 */ 3090 struct rtentry32 __user *ur4 = argp; 3091 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3092 3 * sizeof(struct sockaddr)); 3093 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); 3094 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); 3095 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); 3096 ret |= __get_user(r4.rt_window, &(ur4->rt_window)); 3097 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); 3098 ret |= __get_user(rtdev, &(ur4->rt_dev)); 3099 if (rtdev) { 3100 ret |= copy_from_user(devname, compat_ptr(rtdev), 15); 3101 r4.rt_dev = (char __user __force *)devname; 3102 devname[15] = 0; 3103 } else 3104 r4.rt_dev = NULL; 3105 3106 r = (void *) &r4; 3107 } 3108 3109 if (ret) { 3110 ret = -EFAULT; 3111 goto out; 3112 } 3113 3114 set_fs(KERNEL_DS); 3115 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); 3116 set_fs(old_fs); 3117 3118out: 3119 return ret; 3120} 3121 3122/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 3123 * for some operations; this forces use of the newer bridge-utils that 3124 * use compatible ioctls 3125 */ 3126static int old_bridge_ioctl(compat_ulong_t __user *argp) 3127{ 3128 compat_ulong_t tmp; 3129 3130 if (get_user(tmp, argp)) 3131 return -EFAULT; 3132 if (tmp == BRCTL_GET_VERSION) 3133 return BRCTL_VERSION + 1; 3134 return -EINVAL; 3135} 3136 3137static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, 3138 unsigned int cmd, unsigned long arg) 3139{ 3140 void __user *argp = compat_ptr(arg); 3141 struct sock *sk = sock->sk; 3142 struct net *net = sock_net(sk); 3143 3144 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) 3145 return siocdevprivate_ioctl(net, cmd, argp); 3146 3147 switch (cmd) { 3148 case SIOCSIFBR: 3149 case SIOCGIFBR: 3150 return old_bridge_ioctl(argp); 3151 case SIOCGIFNAME: 3152 return dev_ifname32(net, argp); 3153 case SIOCGIFCONF: 3154 return dev_ifconf(net, argp); 3155 case SIOCETHTOOL: 3156 return ethtool_ioctl(net, argp); 3157 case SIOCWANDEV: 3158 return compat_siocwandev(net, argp); 3159 case SIOCGIFMAP: 3160 case SIOCSIFMAP: 3161 return compat_sioc_ifmap(net, cmd, argp); 3162 case SIOCBONDENSLAVE: 3163 case SIOCBONDRELEASE: 3164 case SIOCBONDSETHWADDR: 3165 case SIOCBONDSLAVEINFOQUERY: 3166 case SIOCBONDINFOQUERY: 3167 case SIOCBONDCHANGEACTIVE: 3168 return bond_ioctl(net, cmd, argp); 3169 case SIOCADDRT: 3170 case SIOCDELRT: 3171 return routing_ioctl(net, sock, cmd, argp); 3172 case SIOCGSTAMP: 3173 return do_siocgstamp(net, sock, cmd, argp); 3174 case SIOCGSTAMPNS: 3175 return do_siocgstampns(net, sock, cmd, argp); 3176 case SIOCSHWTSTAMP: 3177 return compat_siocshwtstamp(net, argp); 3178 3179 case FIOSETOWN: 3180 case SIOCSPGRP: 3181 case FIOGETOWN: 3182 case SIOCGPGRP: 3183 case SIOCBRADDBR: 3184 case SIOCBRDELBR: 3185 case SIOCGIFVLAN: 3186 case SIOCSIFVLAN: 3187 case SIOCADDDLCI: 3188 case SIOCDELDLCI: 3189 return sock_ioctl(file, cmd, arg); 3190 3191 case SIOCGIFFLAGS: 3192 case SIOCSIFFLAGS: 3193 case SIOCGIFMETRIC: 3194 case SIOCSIFMETRIC: 3195 case SIOCGIFMTU: 3196 case SIOCSIFMTU: 3197 case SIOCGIFMEM: 3198 case SIOCSIFMEM: 3199 case SIOCGIFHWADDR: 3200 case SIOCSIFHWADDR: 3201 case SIOCADDMULTI: 3202 case SIOCDELMULTI: 3203 case SIOCGIFINDEX: 3204 case SIOCGIFADDR: 3205 case SIOCSIFADDR: 3206 case SIOCSIFHWBROADCAST: 3207 case SIOCDIFADDR: 3208 case SIOCGIFBRDADDR: 3209 case SIOCSIFBRDADDR: 3210 case SIOCGIFDSTADDR: 3211 case SIOCSIFDSTADDR: 3212 case SIOCGIFNETMASK: 3213 case SIOCSIFNETMASK: 3214 case SIOCSIFPFLAGS: 3215 case SIOCGIFPFLAGS: 3216 case SIOCGIFTXQLEN: 3217 case SIOCSIFTXQLEN: 3218 case SIOCBRADDIF: 3219 case SIOCBRDELIF: 3220 case SIOCSIFNAME: 3221 case SIOCGMIIPHY: 3222 case SIOCGMIIREG: 3223 case SIOCSMIIREG: 3224 return dev_ifsioc(net, sock, cmd, argp); 3225 3226 case SIOCSARP: 3227 case SIOCGARP: 3228 case SIOCDARP: 3229 case SIOCATMARK: 3230 return sock_do_ioctl(net, sock, cmd, arg); 3231 } 3232 3233 /* Prevent warning from compat_sys_ioctl, these always 3234 * result in -EINVAL in the native case anyway. */ 3235 switch (cmd) { 3236 case SIOCRTMSG: 3237 case SIOCGIFCOUNT: 3238 case SIOCSRARP: 3239 case SIOCGRARP: 3240 case SIOCDRARP: 3241 case SIOCSIFLINK: 3242 case SIOCGIFSLAVE: 3243 case SIOCSIFSLAVE: 3244 return -EINVAL; 3245 } 3246 3247 return -ENOIOCTLCMD; 3248} 3249 3250static long compat_sock_ioctl(struct file *file, unsigned cmd, 3251 unsigned long arg) 3252{ 3253 struct socket *sock = file->private_data; 3254 int ret = -ENOIOCTLCMD; 3255 struct sock *sk; 3256 struct net *net; 3257 3258 sk = sock->sk; 3259 net = sock_net(sk); 3260 3261 if (sock->ops->compat_ioctl) 3262 ret = sock->ops->compat_ioctl(sock, cmd, arg); 3263 3264 if (ret == -ENOIOCTLCMD && 3265 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) 3266 ret = compat_wext_handle_ioctl(net, cmd, arg); 3267 3268 if (ret == -ENOIOCTLCMD) 3269 ret = compat_sock_ioctl_trans(file, sock, cmd, arg); 3270 3271 return ret; 3272} 3273#endif 3274 3275int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) 3276{ 3277 return sock->ops->bind(sock, addr, addrlen); 3278} 3279EXPORT_SYMBOL(kernel_bind); 3280 3281int kernel_listen(struct socket *sock, int backlog) 3282{ 3283 return sock->ops->listen(sock, backlog); 3284} 3285EXPORT_SYMBOL(kernel_listen); 3286 3287int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 3288{ 3289 struct sock *sk = sock->sk; 3290 int err; 3291 3292 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, 3293 newsock); 3294 if (err < 0) 3295 goto done; 3296 3297 err = sock->ops->accept(sock, *newsock, flags); 3298 if (err < 0) { 3299 sock_release(*newsock); 3300 *newsock = NULL; 3301 goto done; 3302 } 3303 3304 (*newsock)->ops = sock->ops; 3305 __module_get((*newsock)->ops->owner); 3306 3307done: 3308 return err; 3309} 3310EXPORT_SYMBOL(kernel_accept); 3311 3312int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 3313 int flags) 3314{ 3315 return sock->ops->connect(sock, addr, addrlen, flags); 3316} 3317EXPORT_SYMBOL(kernel_connect); 3318 3319int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 3320 int *addrlen) 3321{ 3322 return sock->ops->getname(sock, addr, addrlen, 0); 3323} 3324EXPORT_SYMBOL(kernel_getsockname); 3325 3326int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 3327 int *addrlen) 3328{ 3329 return sock->ops->getname(sock, addr, addrlen, 1); 3330} 3331EXPORT_SYMBOL(kernel_getpeername); 3332 3333int kernel_getsockopt(struct socket *sock, int level, int optname, 3334 char *optval, int *optlen) 3335{ 3336 mm_segment_t oldfs = get_fs(); 3337 char __user *uoptval; 3338 int __user *uoptlen; 3339 int err; 3340 3341 uoptval = (char __user __force *) optval; 3342 uoptlen = (int __user __force *) optlen; 3343 3344 set_fs(KERNEL_DS); 3345 if (level == SOL_SOCKET) 3346 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen); 3347 else 3348 err = sock->ops->getsockopt(sock, level, optname, uoptval, 3349 uoptlen); 3350 set_fs(oldfs); 3351 return err; 3352} 3353EXPORT_SYMBOL(kernel_getsockopt); 3354 3355int kernel_setsockopt(struct socket *sock, int level, int optname, 3356 char *optval, unsigned int optlen) 3357{ 3358 mm_segment_t oldfs = get_fs(); 3359 char __user *uoptval; 3360 int err; 3361 3362 uoptval = (char __user __force *) optval; 3363 3364 set_fs(KERNEL_DS); 3365 if (level == SOL_SOCKET) 3366 err = sock_setsockopt(sock, level, optname, uoptval, optlen); 3367 else 3368 err = sock->ops->setsockopt(sock, level, optname, uoptval, 3369 optlen); 3370 set_fs(oldfs); 3371 return err; 3372} 3373EXPORT_SYMBOL(kernel_setsockopt); 3374 3375int kernel_sendpage(struct socket *sock, struct page *page, int offset, 3376 size_t size, int flags) 3377{ 3378 sock_update_classid(sock->sk); 3379 3380 if (sock->ops->sendpage) 3381 return sock->ops->sendpage(sock, page, offset, size, flags); 3382 3383 return sock_no_sendpage(sock, page, offset, size, flags); 3384} 3385EXPORT_SYMBOL(kernel_sendpage); 3386 3387int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) 3388{ 3389 mm_segment_t oldfs = get_fs(); 3390 int err; 3391 3392 set_fs(KERNEL_DS); 3393 err = sock->ops->ioctl(sock, cmd, arg); 3394 set_fs(oldfs); 3395 3396 return err; 3397} 3398EXPORT_SYMBOL(kernel_sock_ioctl); 3399 3400int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 3401{ 3402 return sock->ops->shutdown(sock, how); 3403} 3404EXPORT_SYMBOL(kernel_sock_shutdown);