PageRenderTime 96ms CodeModel.GetById 3ms app.highlight 77ms RepoModel.GetById 1ms app.codeStats 1ms

/net/tipc/socket.c

https://bitbucket.org/emiliolopez/linux
C | 2902 lines | 2025 code | 359 blank | 518 comment | 419 complexity | 93c97618c7d34bfdf95b0cb1702ae122 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * net/tipc/socket.c: TIPC socket API
   3 *
   4 * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
   5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
   6 * All rights reserved.
   7 *
   8 * Redistribution and use in source and binary forms, with or without
   9 * modification, are permitted provided that the following conditions are met:
  10 *
  11 * 1. Redistributions of source code must retain the above copyright
  12 *    notice, this list of conditions and the following disclaimer.
  13 * 2. Redistributions in binary form must reproduce the above copyright
  14 *    notice, this list of conditions and the following disclaimer in the
  15 *    documentation and/or other materials provided with the distribution.
  16 * 3. Neither the names of the copyright holders nor the names of its
  17 *    contributors may be used to endorse or promote products derived from
  18 *    this software without specific prior written permission.
  19 *
  20 * Alternatively, this software may be distributed under the terms of the
  21 * GNU General Public License ("GPL") version 2 as published by the Free
  22 * Software Foundation.
  23 *
  24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34 * POSSIBILITY OF SUCH DAMAGE.
  35 */
  36
  37#include <linux/rhashtable.h>
  38#include <linux/sched/signal.h>
  39
  40#include "core.h"
  41#include "name_table.h"
  42#include "node.h"
  43#include "link.h"
  44#include "name_distr.h"
  45#include "socket.h"
  46#include "bcast.h"
  47#include "netlink.h"
  48
  49#define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
  50#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
  51#define TIPC_FWD_MSG		1
  52#define TIPC_MAX_PORT		0xffffffff
  53#define TIPC_MIN_PORT		1
  54#define TIPC_ACK_RATE		4       /* ACK at 1/4 of of rcv window size */
  55
  56enum {
  57	TIPC_LISTEN = TCP_LISTEN,
  58	TIPC_ESTABLISHED = TCP_ESTABLISHED,
  59	TIPC_OPEN = TCP_CLOSE,
  60	TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
  61	TIPC_CONNECTING = TCP_SYN_SENT,
  62};
  63
  64/**
  65 * struct tipc_sock - TIPC socket structure
  66 * @sk: socket - interacts with 'port' and with user via the socket API
  67 * @conn_type: TIPC type used when connection was established
  68 * @conn_instance: TIPC instance used when connection was established
  69 * @published: non-zero if port has one or more associated names
  70 * @max_pkt: maximum packet size "hint" used when building messages sent by port
  71 * @portid: unique port identity in TIPC socket hash table
  72 * @phdr: preformatted message header used when sending messages
  73 * #cong_links: list of congested links
  74 * @publications: list of publications for port
  75 * @blocking_link: address of the congested link we are currently sleeping on
  76 * @pub_count: total # of publications port has made during its lifetime
  77 * @probing_state:
  78 * @conn_timeout: the time we can wait for an unresponded setup request
  79 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
  80 * @cong_link_cnt: number of congested links
  81 * @sent_unacked: # messages sent by socket, and not yet acked by peer
  82 * @rcv_unacked: # messages read by user, but not yet acked back to peer
  83 * @peer: 'connected' peer for dgram/rdm
  84 * @node: hash table node
  85 * @mc_method: cookie for use between socket and broadcast layer
  86 * @rcu: rcu struct for tipc_sock
  87 */
  88struct tipc_sock {
  89	struct sock sk;
  90	u32 conn_type;
  91	u32 conn_instance;
  92	int published;
  93	u32 max_pkt;
  94	u32 portid;
  95	struct tipc_msg phdr;
  96	struct list_head cong_links;
  97	struct list_head publications;
  98	u32 pub_count;
  99	uint conn_timeout;
 100	atomic_t dupl_rcvcnt;
 101	bool probe_unacked;
 102	u16 cong_link_cnt;
 103	u16 snt_unacked;
 104	u16 snd_win;
 105	u16 peer_caps;
 106	u16 rcv_unacked;
 107	u16 rcv_win;
 108	struct sockaddr_tipc peer;
 109	struct rhash_head node;
 110	struct tipc_mc_method mc_method;
 111	struct rcu_head rcu;
 112};
 113
 114static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 115static void tipc_data_ready(struct sock *sk);
 116static void tipc_write_space(struct sock *sk);
 117static void tipc_sock_destruct(struct sock *sk);
 118static int tipc_release(struct socket *sock);
 119static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 120		       bool kern);
 121static void tipc_sk_timeout(unsigned long data);
 122static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 123			   struct tipc_name_seq const *seq);
 124static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 125			    struct tipc_name_seq const *seq);
 126static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
 127static int tipc_sk_insert(struct tipc_sock *tsk);
 128static void tipc_sk_remove(struct tipc_sock *tsk);
 129static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
 130static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
 131
 132static const struct proto_ops packet_ops;
 133static const struct proto_ops stream_ops;
 134static const struct proto_ops msg_ops;
 135static struct proto tipc_proto;
 136static const struct rhashtable_params tsk_rht_params;
 137
 138static u32 tsk_own_node(struct tipc_sock *tsk)
 139{
 140	return msg_prevnode(&tsk->phdr);
 141}
 142
 143static u32 tsk_peer_node(struct tipc_sock *tsk)
 144{
 145	return msg_destnode(&tsk->phdr);
 146}
 147
 148static u32 tsk_peer_port(struct tipc_sock *tsk)
 149{
 150	return msg_destport(&tsk->phdr);
 151}
 152
 153static  bool tsk_unreliable(struct tipc_sock *tsk)
 154{
 155	return msg_src_droppable(&tsk->phdr) != 0;
 156}
 157
 158static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
 159{
 160	msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
 161}
 162
 163static bool tsk_unreturnable(struct tipc_sock *tsk)
 164{
 165	return msg_dest_droppable(&tsk->phdr) != 0;
 166}
 167
 168static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
 169{
 170	msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
 171}
 172
 173static int tsk_importance(struct tipc_sock *tsk)
 174{
 175	return msg_importance(&tsk->phdr);
 176}
 177
 178static int tsk_set_importance(struct tipc_sock *tsk, int imp)
 179{
 180	if (imp > TIPC_CRITICAL_IMPORTANCE)
 181		return -EINVAL;
 182	msg_set_importance(&tsk->phdr, (u32)imp);
 183	return 0;
 184}
 185
 186static struct tipc_sock *tipc_sk(const struct sock *sk)
 187{
 188	return container_of(sk, struct tipc_sock, sk);
 189}
 190
 191static bool tsk_conn_cong(struct tipc_sock *tsk)
 192{
 193	return tsk->snt_unacked > tsk->snd_win;
 194}
 195
 196/* tsk_blocks(): translate a buffer size in bytes to number of
 197 * advertisable blocks, taking into account the ratio truesize(len)/len
 198 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
 199 */
 200static u16 tsk_adv_blocks(int len)
 201{
 202	return len / FLOWCTL_BLK_SZ / 4;
 203}
 204
 205/* tsk_inc(): increment counter for sent or received data
 206 * - If block based flow control is not supported by peer we
 207 *   fall back to message based ditto, incrementing the counter
 208 */
 209static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
 210{
 211	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
 212		return ((msglen / FLOWCTL_BLK_SZ) + 1);
 213	return 1;
 214}
 215
 216/**
 217 * tsk_advance_rx_queue - discard first buffer in socket receive queue
 218 *
 219 * Caller must hold socket lock
 220 */
 221static void tsk_advance_rx_queue(struct sock *sk)
 222{
 223	kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
 224}
 225
 226/* tipc_sk_respond() : send response message back to sender
 227 */
 228static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
 229{
 230	u32 selector;
 231	u32 dnode;
 232	u32 onode = tipc_own_addr(sock_net(sk));
 233
 234	if (!tipc_msg_reverse(onode, &skb, err))
 235		return;
 236
 237	dnode = msg_destnode(buf_msg(skb));
 238	selector = msg_origport(buf_msg(skb));
 239	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
 240}
 241
 242/**
 243 * tsk_rej_rx_queue - reject all buffers in socket receive queue
 244 *
 245 * Caller must hold socket lock
 246 */
 247static void tsk_rej_rx_queue(struct sock *sk)
 248{
 249	struct sk_buff *skb;
 250
 251	while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
 252		tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
 253}
 254
 255static bool tipc_sk_connected(struct sock *sk)
 256{
 257	return sk->sk_state == TIPC_ESTABLISHED;
 258}
 259
 260/* tipc_sk_type_connectionless - check if the socket is datagram socket
 261 * @sk: socket
 262 *
 263 * Returns true if connection less, false otherwise
 264 */
 265static bool tipc_sk_type_connectionless(struct sock *sk)
 266{
 267	return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
 268}
 269
 270/* tsk_peer_msg - verify if message was sent by connected port's peer
 271 *
 272 * Handles cases where the node's network address has changed from
 273 * the default of <0.0.0> to its configured setting.
 274 */
 275static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 276{
 277	struct sock *sk = &tsk->sk;
 278	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
 279	u32 peer_port = tsk_peer_port(tsk);
 280	u32 orig_node;
 281	u32 peer_node;
 282
 283	if (unlikely(!tipc_sk_connected(sk)))
 284		return false;
 285
 286	if (unlikely(msg_origport(msg) != peer_port))
 287		return false;
 288
 289	orig_node = msg_orignode(msg);
 290	peer_node = tsk_peer_node(tsk);
 291
 292	if (likely(orig_node == peer_node))
 293		return true;
 294
 295	if (!orig_node && (peer_node == tn->own_addr))
 296		return true;
 297
 298	if (!peer_node && (orig_node == tn->own_addr))
 299		return true;
 300
 301	return false;
 302}
 303
 304/* tipc_set_sk_state - set the sk_state of the socket
 305 * @sk: socket
 306 *
 307 * Caller must hold socket lock
 308 *
 309 * Returns 0 on success, errno otherwise
 310 */
 311static int tipc_set_sk_state(struct sock *sk, int state)
 312{
 313	int oldsk_state = sk->sk_state;
 314	int res = -EINVAL;
 315
 316	switch (state) {
 317	case TIPC_OPEN:
 318		res = 0;
 319		break;
 320	case TIPC_LISTEN:
 321	case TIPC_CONNECTING:
 322		if (oldsk_state == TIPC_OPEN)
 323			res = 0;
 324		break;
 325	case TIPC_ESTABLISHED:
 326		if (oldsk_state == TIPC_CONNECTING ||
 327		    oldsk_state == TIPC_OPEN)
 328			res = 0;
 329		break;
 330	case TIPC_DISCONNECTING:
 331		if (oldsk_state == TIPC_CONNECTING ||
 332		    oldsk_state == TIPC_ESTABLISHED)
 333			res = 0;
 334		break;
 335	}
 336
 337	if (!res)
 338		sk->sk_state = state;
 339
 340	return res;
 341}
 342
 343static int tipc_sk_sock_err(struct socket *sock, long *timeout)
 344{
 345	struct sock *sk = sock->sk;
 346	int err = sock_error(sk);
 347	int typ = sock->type;
 348
 349	if (err)
 350		return err;
 351	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
 352		if (sk->sk_state == TIPC_DISCONNECTING)
 353			return -EPIPE;
 354		else if (!tipc_sk_connected(sk))
 355			return -ENOTCONN;
 356	}
 357	if (!*timeout)
 358		return -EAGAIN;
 359	if (signal_pending(current))
 360		return sock_intr_errno(*timeout);
 361
 362	return 0;
 363}
 364
 365#define tipc_wait_for_cond(sock_, timeo_, condition_)			       \
 366({                                                                             \
 367	struct sock *sk_;						       \
 368	int rc_;							       \
 369									       \
 370	while ((rc_ = !(condition_))) {					       \
 371		DEFINE_WAIT_FUNC(wait_, woken_wake_function);	               \
 372		sk_ = (sock_)->sk;					       \
 373		rc_ = tipc_sk_sock_err((sock_), timeo_);		       \
 374		if (rc_)						       \
 375			break;						       \
 376		prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
 377		release_sock(sk_);					       \
 378		*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
 379		sched_annotate_sleep();				               \
 380		lock_sock(sk_);						       \
 381		remove_wait_queue(sk_sleep(sk_), &wait_);		       \
 382	}								       \
 383	rc_;								       \
 384})
 385
 386/**
 387 * tipc_sk_create - create a TIPC socket
 388 * @net: network namespace (must be default network)
 389 * @sock: pre-allocated socket structure
 390 * @protocol: protocol indicator (must be 0)
 391 * @kern: caused by kernel or by userspace?
 392 *
 393 * This routine creates additional data structures used by the TIPC socket,
 394 * initializes them, and links them together.
 395 *
 396 * Returns 0 on success, errno otherwise
 397 */
 398static int tipc_sk_create(struct net *net, struct socket *sock,
 399			  int protocol, int kern)
 400{
 401	struct tipc_net *tn;
 402	const struct proto_ops *ops;
 403	struct sock *sk;
 404	struct tipc_sock *tsk;
 405	struct tipc_msg *msg;
 406
 407	/* Validate arguments */
 408	if (unlikely(protocol != 0))
 409		return -EPROTONOSUPPORT;
 410
 411	switch (sock->type) {
 412	case SOCK_STREAM:
 413		ops = &stream_ops;
 414		break;
 415	case SOCK_SEQPACKET:
 416		ops = &packet_ops;
 417		break;
 418	case SOCK_DGRAM:
 419	case SOCK_RDM:
 420		ops = &msg_ops;
 421		break;
 422	default:
 423		return -EPROTOTYPE;
 424	}
 425
 426	/* Allocate socket's protocol area */
 427	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern);
 428	if (sk == NULL)
 429		return -ENOMEM;
 430
 431	tsk = tipc_sk(sk);
 432	tsk->max_pkt = MAX_PKT_DEFAULT;
 433	INIT_LIST_HEAD(&tsk->publications);
 434	INIT_LIST_HEAD(&tsk->cong_links);
 435	msg = &tsk->phdr;
 436	tn = net_generic(sock_net(sk), tipc_net_id);
 437
 438	/* Finish initializing socket data structures */
 439	sock->ops = ops;
 440	sock_init_data(sock, sk);
 441	tipc_set_sk_state(sk, TIPC_OPEN);
 442	if (tipc_sk_insert(tsk)) {
 443		pr_warn("Socket create failed; port number exhausted\n");
 444		return -EINVAL;
 445	}
 446
 447	/* Ensure tsk is visible before we read own_addr. */
 448	smp_mb();
 449
 450	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
 451		      NAMED_H_SIZE, 0);
 452
 453	msg_set_origport(msg, tsk->portid);
 454	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
 455	sk->sk_shutdown = 0;
 456	sk->sk_backlog_rcv = tipc_backlog_rcv;
 457	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 458	sk->sk_data_ready = tipc_data_ready;
 459	sk->sk_write_space = tipc_write_space;
 460	sk->sk_destruct = tipc_sock_destruct;
 461	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
 462	atomic_set(&tsk->dupl_rcvcnt, 0);
 463
 464	/* Start out with safe limits until we receive an advertised window */
 465	tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
 466	tsk->rcv_win = tsk->snd_win;
 467
 468	if (tipc_sk_type_connectionless(sk)) {
 469		tsk_set_unreturnable(tsk, true);
 470		if (sock->type == SOCK_DGRAM)
 471			tsk_set_unreliable(tsk, true);
 472	}
 473
 474	return 0;
 475}
 476
 477static void tipc_sk_callback(struct rcu_head *head)
 478{
 479	struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu);
 480
 481	sock_put(&tsk->sk);
 482}
 483
 484/* Caller should hold socket lock for the socket. */
 485static void __tipc_shutdown(struct socket *sock, int error)
 486{
 487	struct sock *sk = sock->sk;
 488	struct tipc_sock *tsk = tipc_sk(sk);
 489	struct net *net = sock_net(sk);
 490	long timeout = CONN_TIMEOUT_DEFAULT;
 491	u32 dnode = tsk_peer_node(tsk);
 492	struct sk_buff *skb;
 493
 494	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
 495	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
 496					    !tsk_conn_cong(tsk)));
 497
 498	/* Reject all unreceived messages, except on an active connection
 499	 * (which disconnects locally & sends a 'FIN+' to peer).
 500	 */
 501	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 502		if (TIPC_SKB_CB(skb)->bytes_read) {
 503			kfree_skb(skb);
 504			continue;
 505		}
 506		if (!tipc_sk_type_connectionless(sk) &&
 507		    sk->sk_state != TIPC_DISCONNECTING) {
 508			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
 509			tipc_node_remove_conn(net, dnode, tsk->portid);
 510		}
 511		tipc_sk_respond(sk, skb, error);
 512	}
 513
 514	if (tipc_sk_type_connectionless(sk))
 515		return;
 516
 517	if (sk->sk_state != TIPC_DISCONNECTING) {
 518		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
 519				      TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
 520				      tsk_own_node(tsk), tsk_peer_port(tsk),
 521				      tsk->portid, error);
 522		if (skb)
 523			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
 524		tipc_node_remove_conn(net, dnode, tsk->portid);
 525		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
 526	}
 527}
 528
 529/**
 530 * tipc_release - destroy a TIPC socket
 531 * @sock: socket to destroy
 532 *
 533 * This routine cleans up any messages that are still queued on the socket.
 534 * For DGRAM and RDM socket types, all queued messages are rejected.
 535 * For SEQPACKET and STREAM socket types, the first message is rejected
 536 * and any others are discarded.  (If the first message on a STREAM socket
 537 * is partially-read, it is discarded and the next one is rejected instead.)
 538 *
 539 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 540 * are returned or discarded according to the "destination droppable" setting
 541 * specified for the message by the sender.
 542 *
 543 * Returns 0 on success, errno otherwise
 544 */
 545static int tipc_release(struct socket *sock)
 546{
 547	struct sock *sk = sock->sk;
 548	struct tipc_sock *tsk;
 549
 550	/*
 551	 * Exit if socket isn't fully initialized (occurs when a failed accept()
 552	 * releases a pre-allocated child socket that was never used)
 553	 */
 554	if (sk == NULL)
 555		return 0;
 556
 557	tsk = tipc_sk(sk);
 558	lock_sock(sk);
 559
 560	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
 561	sk->sk_shutdown = SHUTDOWN_MASK;
 562	tipc_sk_withdraw(tsk, 0, NULL);
 563	sk_stop_timer(sk, &sk->sk_timer);
 564	tipc_sk_remove(tsk);
 565
 566	/* Reject any messages that accumulated in backlog queue */
 567	release_sock(sk);
 568	u32_list_purge(&tsk->cong_links);
 569	tsk->cong_link_cnt = 0;
 570	call_rcu(&tsk->rcu, tipc_sk_callback);
 571	sock->sk = NULL;
 572
 573	return 0;
 574}
 575
 576/**
 577 * tipc_bind - associate or disassocate TIPC name(s) with a socket
 578 * @sock: socket structure
 579 * @uaddr: socket address describing name(s) and desired operation
 580 * @uaddr_len: size of socket address data structure
 581 *
 582 * Name and name sequence binding is indicated using a positive scope value;
 583 * a negative scope value unbinds the specified name.  Specifying no name
 584 * (i.e. a socket address length of 0) unbinds all names from the socket.
 585 *
 586 * Returns 0 on success, errno otherwise
 587 *
 588 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 589 *       access any non-constant socket information.
 590 */
 591static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
 592		     int uaddr_len)
 593{
 594	struct sock *sk = sock->sk;
 595	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 596	struct tipc_sock *tsk = tipc_sk(sk);
 597	int res = -EINVAL;
 598
 599	lock_sock(sk);
 600	if (unlikely(!uaddr_len)) {
 601		res = tipc_sk_withdraw(tsk, 0, NULL);
 602		goto exit;
 603	}
 604
 605	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
 606		res = -EINVAL;
 607		goto exit;
 608	}
 609	if (addr->family != AF_TIPC) {
 610		res = -EAFNOSUPPORT;
 611		goto exit;
 612	}
 613
 614	if (addr->addrtype == TIPC_ADDR_NAME)
 615		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
 616	else if (addr->addrtype != TIPC_ADDR_NAMESEQ) {
 617		res = -EAFNOSUPPORT;
 618		goto exit;
 619	}
 620
 621	if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) &&
 622	    (addr->addr.nameseq.type != TIPC_TOP_SRV) &&
 623	    (addr->addr.nameseq.type != TIPC_CFG_SRV)) {
 624		res = -EACCES;
 625		goto exit;
 626	}
 627
 628	res = (addr->scope > 0) ?
 629		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
 630		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
 631exit:
 632	release_sock(sk);
 633	return res;
 634}
 635
 636/**
 637 * tipc_getname - get port ID of socket or peer socket
 638 * @sock: socket structure
 639 * @uaddr: area for returned socket address
 640 * @uaddr_len: area for returned length of socket address
 641 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
 642 *
 643 * Returns 0 on success, errno otherwise
 644 *
 645 * NOTE: This routine doesn't need to take the socket lock since it only
 646 *       accesses socket information that is unchanging (or which changes in
 647 *       a completely predictable manner).
 648 */
 649static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 650			int *uaddr_len, int peer)
 651{
 652	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 653	struct sock *sk = sock->sk;
 654	struct tipc_sock *tsk = tipc_sk(sk);
 655	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
 656
 657	memset(addr, 0, sizeof(*addr));
 658	if (peer) {
 659		if ((!tipc_sk_connected(sk)) &&
 660		    ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
 661			return -ENOTCONN;
 662		addr->addr.id.ref = tsk_peer_port(tsk);
 663		addr->addr.id.node = tsk_peer_node(tsk);
 664	} else {
 665		addr->addr.id.ref = tsk->portid;
 666		addr->addr.id.node = tn->own_addr;
 667	}
 668
 669	*uaddr_len = sizeof(*addr);
 670	addr->addrtype = TIPC_ADDR_ID;
 671	addr->family = AF_TIPC;
 672	addr->scope = 0;
 673	addr->addr.name.domain = 0;
 674
 675	return 0;
 676}
 677
 678/**
 679 * tipc_poll - read and possibly block on pollmask
 680 * @file: file structure associated with the socket
 681 * @sock: socket for which to calculate the poll bits
 682 * @wait: ???
 683 *
 684 * Returns pollmask value
 685 *
 686 * COMMENTARY:
 687 * It appears that the usual socket locking mechanisms are not useful here
 688 * since the pollmask info is potentially out-of-date the moment this routine
 689 * exits.  TCP and other protocols seem to rely on higher level poll routines
 690 * to handle any preventable race conditions, so TIPC will do the same ...
 691 *
 692 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 693 * imply that the operation will succeed, merely that it should be performed
 694 * and will not block.
 695 */
 696static unsigned int tipc_poll(struct file *file, struct socket *sock,
 697			      poll_table *wait)
 698{
 699	struct sock *sk = sock->sk;
 700	struct tipc_sock *tsk = tipc_sk(sk);
 701	u32 mask = 0;
 702
 703	sock_poll_wait(file, sk_sleep(sk), wait);
 704
 705	if (sk->sk_shutdown & RCV_SHUTDOWN)
 706		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 707	if (sk->sk_shutdown == SHUTDOWN_MASK)
 708		mask |= POLLHUP;
 709
 710	switch (sk->sk_state) {
 711	case TIPC_ESTABLISHED:
 712		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
 713			mask |= POLLOUT;
 714		/* fall thru' */
 715	case TIPC_LISTEN:
 716	case TIPC_CONNECTING:
 717		if (!skb_queue_empty(&sk->sk_receive_queue))
 718			mask |= (POLLIN | POLLRDNORM);
 719		break;
 720	case TIPC_OPEN:
 721		if (!tsk->cong_link_cnt)
 722			mask |= POLLOUT;
 723		if (tipc_sk_type_connectionless(sk) &&
 724		    (!skb_queue_empty(&sk->sk_receive_queue)))
 725			mask |= (POLLIN | POLLRDNORM);
 726		break;
 727	case TIPC_DISCONNECTING:
 728		mask = (POLLIN | POLLRDNORM | POLLHUP);
 729		break;
 730	}
 731
 732	return mask;
 733}
 734
 735/**
 736 * tipc_sendmcast - send multicast message
 737 * @sock: socket structure
 738 * @seq: destination address
 739 * @msg: message to send
 740 * @dlen: length of data to send
 741 * @timeout: timeout to wait for wakeup
 742 *
 743 * Called from function tipc_sendmsg(), which has done all sanity checks
 744 * Returns the number of bytes sent on success, or errno
 745 */
 746static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 747			  struct msghdr *msg, size_t dlen, long timeout)
 748{
 749	struct sock *sk = sock->sk;
 750	struct tipc_sock *tsk = tipc_sk(sk);
 751	struct tipc_msg *hdr = &tsk->phdr;
 752	struct net *net = sock_net(sk);
 753	int mtu = tipc_bcast_get_mtu(net);
 754	struct tipc_mc_method *method = &tsk->mc_method;
 755	u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE);
 756	struct sk_buff_head pkts;
 757	struct tipc_nlist dsts;
 758	int rc;
 759
 760	/* Block or return if any destination link is congested */
 761	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
 762	if (unlikely(rc))
 763		return rc;
 764
 765	/* Lookup destination nodes */
 766	tipc_nlist_init(&dsts, tipc_own_addr(net));
 767	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
 768				      seq->upper, domain, &dsts);
 769	if (!dsts.local && !dsts.remote)
 770		return -EHOSTUNREACH;
 771
 772	/* Build message header */
 773	msg_set_type(hdr, TIPC_MCAST_MSG);
 774	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
 775	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
 776	msg_set_destport(hdr, 0);
 777	msg_set_destnode(hdr, 0);
 778	msg_set_nametype(hdr, seq->type);
 779	msg_set_namelower(hdr, seq->lower);
 780	msg_set_nameupper(hdr, seq->upper);
 781
 782	/* Build message as chain of buffers */
 783	skb_queue_head_init(&pkts);
 784	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
 785
 786	/* Send message if build was successful */
 787	if (unlikely(rc == dlen))
 788		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
 789				     &tsk->cong_link_cnt);
 790
 791	tipc_nlist_purge(&dsts);
 792
 793	return rc ? rc : dlen;
 794}
 795
 796/**
 797 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
 798 * @arrvq: queue with arriving messages, to be cloned after destination lookup
 799 * @inputq: queue with cloned messages, delivered to socket after dest lookup
 800 *
 801 * Multi-threaded: parallel calls with reference to same queues may occur
 802 */
 803void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 804		       struct sk_buff_head *inputq)
 805{
 806	struct tipc_msg *msg;
 807	struct list_head dports;
 808	u32 portid;
 809	u32 scope = TIPC_CLUSTER_SCOPE;
 810	struct sk_buff_head tmpq;
 811	uint hsz;
 812	struct sk_buff *skb, *_skb;
 813
 814	__skb_queue_head_init(&tmpq);
 815	INIT_LIST_HEAD(&dports);
 816
 817	skb = tipc_skb_peek(arrvq, &inputq->lock);
 818	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
 819		msg = buf_msg(skb);
 820		hsz = skb_headroom(skb) + msg_hdr_sz(msg);
 821
 822		if (in_own_node(net, msg_orignode(msg)))
 823			scope = TIPC_NODE_SCOPE;
 824
 825		/* Create destination port list and message clones: */
 826		tipc_nametbl_mc_translate(net,
 827					  msg_nametype(msg), msg_namelower(msg),
 828					  msg_nameupper(msg), scope, &dports);
 829		portid = u32_pop(&dports);
 830		for (; portid; portid = u32_pop(&dports)) {
 831			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
 832			if (_skb) {
 833				msg_set_destport(buf_msg(_skb), portid);
 834				__skb_queue_tail(&tmpq, _skb);
 835				continue;
 836			}
 837			pr_warn("Failed to clone mcast rcv buffer\n");
 838		}
 839		/* Append to inputq if not already done by other thread */
 840		spin_lock_bh(&inputq->lock);
 841		if (skb_peek(arrvq) == skb) {
 842			skb_queue_splice_tail_init(&tmpq, inputq);
 843			kfree_skb(__skb_dequeue(arrvq));
 844		}
 845		spin_unlock_bh(&inputq->lock);
 846		__skb_queue_purge(&tmpq);
 847		kfree_skb(skb);
 848	}
 849	tipc_sk_rcv(net, inputq);
 850}
 851
 852/**
 853 * tipc_sk_proto_rcv - receive a connection mng protocol message
 854 * @tsk: receiving socket
 855 * @skb: pointer to message buffer.
 856 */
 857static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
 858			      struct sk_buff_head *xmitq)
 859{
 860	struct sock *sk = &tsk->sk;
 861	u32 onode = tsk_own_node(tsk);
 862	struct tipc_msg *hdr = buf_msg(skb);
 863	int mtyp = msg_type(hdr);
 864	bool conn_cong;
 865
 866	/* Ignore if connection cannot be validated: */
 867	if (!tsk_peer_msg(tsk, hdr))
 868		goto exit;
 869
 870	if (unlikely(msg_errcode(hdr))) {
 871		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
 872		tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
 873				      tsk_peer_port(tsk));
 874		sk->sk_state_change(sk);
 875		goto exit;
 876	}
 877
 878	tsk->probe_unacked = false;
 879
 880	if (mtyp == CONN_PROBE) {
 881		msg_set_type(hdr, CONN_PROBE_REPLY);
 882		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
 883			__skb_queue_tail(xmitq, skb);
 884		return;
 885	} else if (mtyp == CONN_ACK) {
 886		conn_cong = tsk_conn_cong(tsk);
 887		tsk->snt_unacked -= msg_conn_ack(hdr);
 888		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
 889			tsk->snd_win = msg_adv_win(hdr);
 890		if (conn_cong)
 891			sk->sk_write_space(sk);
 892	} else if (mtyp != CONN_PROBE_REPLY) {
 893		pr_warn("Received unknown CONN_PROTO msg\n");
 894	}
 895exit:
 896	kfree_skb(skb);
 897}
 898
 899/**
 900 * tipc_sendmsg - send message in connectionless manner
 901 * @sock: socket structure
 902 * @m: message to send
 903 * @dsz: amount of user data to be sent
 904 *
 905 * Message must have an destination specified explicitly.
 906 * Used for SOCK_RDM and SOCK_DGRAM messages,
 907 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 908 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
 909 *
 910 * Returns the number of bytes sent on success, or errno otherwise
 911 */
 912static int tipc_sendmsg(struct socket *sock,
 913			struct msghdr *m, size_t dsz)
 914{
 915	struct sock *sk = sock->sk;
 916	int ret;
 917
 918	lock_sock(sk);
 919	ret = __tipc_sendmsg(sock, m, dsz);
 920	release_sock(sk);
 921
 922	return ret;
 923}
 924
 925static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 926{
 927	struct sock *sk = sock->sk;
 928	struct net *net = sock_net(sk);
 929	struct tipc_sock *tsk = tipc_sk(sk);
 930	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 931	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 932	struct list_head *clinks = &tsk->cong_links;
 933	bool syn = !tipc_sk_type_connectionless(sk);
 934	struct tipc_msg *hdr = &tsk->phdr;
 935	struct tipc_name_seq *seq;
 936	struct sk_buff_head pkts;
 937	u32 type, inst, domain;
 938	u32 dnode, dport;
 939	int mtu, rc;
 940
 941	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
 942		return -EMSGSIZE;
 943
 944	if (unlikely(!dest)) {
 945		dest = &tsk->peer;
 946		if (!syn || dest->family != AF_TIPC)
 947			return -EDESTADDRREQ;
 948	}
 949
 950	if (unlikely(m->msg_namelen < sizeof(*dest)))
 951		return -EINVAL;
 952
 953	if (unlikely(dest->family != AF_TIPC))
 954		return -EINVAL;
 955
 956	if (unlikely(syn)) {
 957		if (sk->sk_state == TIPC_LISTEN)
 958			return -EPIPE;
 959		if (sk->sk_state != TIPC_OPEN)
 960			return -EISCONN;
 961		if (tsk->published)
 962			return -EOPNOTSUPP;
 963		if (dest->addrtype == TIPC_ADDR_NAME) {
 964			tsk->conn_type = dest->addr.name.name.type;
 965			tsk->conn_instance = dest->addr.name.name.instance;
 966		}
 967	}
 968
 969	seq = &dest->addr.nameseq;
 970	if (dest->addrtype == TIPC_ADDR_MCAST)
 971		return tipc_sendmcast(sock, seq, m, dlen, timeout);
 972
 973	if (dest->addrtype == TIPC_ADDR_NAME) {
 974		type = dest->addr.name.name.type;
 975		inst = dest->addr.name.name.instance;
 976		domain = dest->addr.name.domain;
 977		dnode = domain;
 978		msg_set_type(hdr, TIPC_NAMED_MSG);
 979		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
 980		msg_set_nametype(hdr, type);
 981		msg_set_nameinst(hdr, inst);
 982		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
 983		dport = tipc_nametbl_translate(net, type, inst, &dnode);
 984		msg_set_destnode(hdr, dnode);
 985		msg_set_destport(hdr, dport);
 986		if (unlikely(!dport && !dnode))
 987			return -EHOSTUNREACH;
 988
 989	} else if (dest->addrtype == TIPC_ADDR_ID) {
 990		dnode = dest->addr.id.node;
 991		msg_set_type(hdr, TIPC_DIRECT_MSG);
 992		msg_set_lookup_scope(hdr, 0);
 993		msg_set_destnode(hdr, dnode);
 994		msg_set_destport(hdr, dest->addr.id.ref);
 995		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
 996	}
 997
 998	/* Block or return if destination link is congested */
 999	rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
1000	if (unlikely(rc))
1001		return rc;
1002
1003	skb_queue_head_init(&pkts);
1004	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
1005	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
1006	if (unlikely(rc != dlen))
1007		return rc;
1008
1009	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
1010	if (unlikely(rc == -ELINKCONG)) {
1011		u32_push(clinks, dnode);
1012		tsk->cong_link_cnt++;
1013		rc = 0;
1014	}
1015
1016	if (unlikely(syn && !rc))
1017		tipc_set_sk_state(sk, TIPC_CONNECTING);
1018
1019	return rc ? rc : dlen;
1020}
1021
1022/**
1023 * tipc_sendstream - send stream-oriented data
1024 * @sock: socket structure
1025 * @m: data to send
1026 * @dsz: total length of data to be transmitted
1027 *
1028 * Used for SOCK_STREAM data.
1029 *
1030 * Returns the number of bytes sent on success (or partial success),
1031 * or errno if no data sent
1032 */
1033static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
1034{
1035	struct sock *sk = sock->sk;
1036	int ret;
1037
1038	lock_sock(sk);
1039	ret = __tipc_sendstream(sock, m, dsz);
1040	release_sock(sk);
1041
1042	return ret;
1043}
1044
1045static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
1046{
1047	struct sock *sk = sock->sk;
1048	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1049	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
1050	struct tipc_sock *tsk = tipc_sk(sk);
1051	struct tipc_msg *hdr = &tsk->phdr;
1052	struct net *net = sock_net(sk);
1053	struct sk_buff_head pkts;
1054	u32 dnode = tsk_peer_node(tsk);
1055	int send, sent = 0;
1056	int rc = 0;
1057
1058	skb_queue_head_init(&pkts);
1059
1060	if (unlikely(dlen > INT_MAX))
1061		return -EMSGSIZE;
1062
1063	/* Handle implicit connection setup */
1064	if (unlikely(dest)) {
1065		rc = __tipc_sendmsg(sock, m, dlen);
1066		if (dlen && (dlen == rc))
1067			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
1068		return rc;
1069	}
1070
1071	do {
1072		rc = tipc_wait_for_cond(sock, &timeout,
1073					(!tsk->cong_link_cnt &&
1074					 !tsk_conn_cong(tsk) &&
1075					 tipc_sk_connected(sk)));
1076		if (unlikely(rc))
1077			break;
1078
1079		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
1080		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
1081		if (unlikely(rc != send))
1082			break;
1083
1084		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
1085		if (unlikely(rc == -ELINKCONG)) {
1086			tsk->cong_link_cnt = 1;
1087			rc = 0;
1088		}
1089		if (likely(!rc)) {
1090			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
1091			sent += send;
1092		}
1093	} while (sent < dlen && !rc);
1094
1095	return sent ? sent : rc;
1096}
1097
1098/**
1099 * tipc_send_packet - send a connection-oriented message
1100 * @sock: socket structure
1101 * @m: message to send
1102 * @dsz: length of data to be transmitted
1103 *
1104 * Used for SOCK_SEQPACKET messages.
1105 *
1106 * Returns the number of bytes sent on success, or errno otherwise
1107 */
1108static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
1109{
1110	if (dsz > TIPC_MAX_USER_MSG_SIZE)
1111		return -EMSGSIZE;
1112
1113	return tipc_sendstream(sock, m, dsz);
1114}
1115
1116/* tipc_sk_finish_conn - complete the setup of a connection
1117 */
1118static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1119				u32 peer_node)
1120{
1121	struct sock *sk = &tsk->sk;
1122	struct net *net = sock_net(sk);
1123	struct tipc_msg *msg = &tsk->phdr;
1124
1125	msg_set_destnode(msg, peer_node);
1126	msg_set_destport(msg, peer_port);
1127	msg_set_type(msg, TIPC_CONN_MSG);
1128	msg_set_lookup_scope(msg, 0);
1129	msg_set_hdr_sz(msg, SHORT_H_SIZE);
1130
1131	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
1132	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
1133	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
1134	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
1135	tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
1136	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
1137		return;
1138
1139	/* Fall back to message based flow control */
1140	tsk->rcv_win = FLOWCTL_MSG_WIN;
1141	tsk->snd_win = FLOWCTL_MSG_WIN;
1142}
1143
1144/**
1145 * set_orig_addr - capture sender's address for received message
1146 * @m: descriptor for message info
1147 * @msg: received message header
1148 *
1149 * Note: Address is not captured if not requested by receiver.
1150 */
1151static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
1152{
1153	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
1154
1155	if (addr) {
1156		addr->family = AF_TIPC;
1157		addr->addrtype = TIPC_ADDR_ID;
1158		memset(&addr->addr, 0, sizeof(addr->addr));
1159		addr->addr.id.ref = msg_origport(msg);
1160		addr->addr.id.node = msg_orignode(msg);
1161		addr->addr.name.domain = 0;	/* could leave uninitialized */
1162		addr->scope = 0;		/* could leave uninitialized */
1163		m->msg_namelen = sizeof(struct sockaddr_tipc);
1164	}
1165}
1166
1167/**
1168 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
1169 * @m: descriptor for message info
1170 * @msg: received message header
1171 * @tsk: TIPC port associated with message
1172 *
1173 * Note: Ancillary data is not captured if not requested by receiver.
1174 *
1175 * Returns 0 if successful, otherwise errno
1176 */
1177static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
1178				 struct tipc_sock *tsk)
1179{
1180	u32 anc_data[3];
1181	u32 err;
1182	u32 dest_type;
1183	int has_name;
1184	int res;
1185
1186	if (likely(m->msg_controllen == 0))
1187		return 0;
1188
1189	/* Optionally capture errored message object(s) */
1190	err = msg ? msg_errcode(msg) : 0;
1191	if (unlikely(err)) {
1192		anc_data[0] = err;
1193		anc_data[1] = msg_data_sz(msg);
1194		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
1195		if (res)
1196			return res;
1197		if (anc_data[1]) {
1198			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
1199				       msg_data(msg));
1200			if (res)
1201				return res;
1202		}
1203	}
1204
1205	/* Optionally capture message destination object */
1206	dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
1207	switch (dest_type) {
1208	case TIPC_NAMED_MSG:
1209		has_name = 1;
1210		anc_data[0] = msg_nametype(msg);
1211		anc_data[1] = msg_namelower(msg);
1212		anc_data[2] = msg_namelower(msg);
1213		break;
1214	case TIPC_MCAST_MSG:
1215		has_name = 1;
1216		anc_data[0] = msg_nametype(msg);
1217		anc_data[1] = msg_namelower(msg);
1218		anc_data[2] = msg_nameupper(msg);
1219		break;
1220	case TIPC_CONN_MSG:
1221		has_name = (tsk->conn_type != 0);
1222		anc_data[0] = tsk->conn_type;
1223		anc_data[1] = tsk->conn_instance;
1224		anc_data[2] = tsk->conn_instance;
1225		break;
1226	default:
1227		has_name = 0;
1228	}
1229	if (has_name) {
1230		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
1231		if (res)
1232			return res;
1233	}
1234
1235	return 0;
1236}
1237
1238static void tipc_sk_send_ack(struct tipc_sock *tsk)
1239{
1240	struct sock *sk = &tsk->sk;
1241	struct net *net = sock_net(sk);
1242	struct sk_buff *skb = NULL;
1243	struct tipc_msg *msg;
1244	u32 peer_port = tsk_peer_port(tsk);
1245	u32 dnode = tsk_peer_node(tsk);
1246
1247	if (!tipc_sk_connected(sk))
1248		return;
1249	skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
1250			      dnode, tsk_own_node(tsk), peer_port,
1251			      tsk->portid, TIPC_OK);
1252	if (!skb)
1253		return;
1254	msg = buf_msg(skb);
1255	msg_set_conn_ack(msg, tsk->rcv_unacked);
1256	tsk->rcv_unacked = 0;
1257
1258	/* Adjust to and advertize the correct window limit */
1259	if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
1260		tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
1261		msg_set_adv_win(msg, tsk->rcv_win);
1262	}
1263	tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
1264}
1265
1266static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
1267{
1268	struct sock *sk = sock->sk;
1269	DEFINE_WAIT(wait);
1270	long timeo = *timeop;
1271	int err = sock_error(sk);
1272
1273	if (err)
1274		return err;
1275
1276	for (;;) {
1277		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1278		if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
1279			if (sk->sk_shutdown & RCV_SHUTDOWN) {
1280				err = -ENOTCONN;
1281				break;
1282			}
1283			release_sock(sk);
1284			timeo = schedule_timeout(timeo);
1285			lock_sock(sk);
1286		}
1287		err = 0;
1288		if (!skb_queue_empty(&sk->sk_receive_queue))
1289			break;
1290		err = -EAGAIN;
1291		if (!timeo)
1292			break;
1293		err = sock_intr_errno(timeo);
1294		if (signal_pending(current))
1295			break;
1296
1297		err = sock_error(sk);
1298		if (err)
1299			break;
1300	}
1301	finish_wait(sk_sleep(sk), &wait);
1302	*timeop = timeo;
1303	return err;
1304}
1305
1306/**
1307 * tipc_recvmsg - receive packet-oriented message
1308 * @m: descriptor for message info
1309 * @buflen: length of user buffer area
1310 * @flags: receive flags
1311 *
1312 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
1313 * If the complete message doesn't fit in user area, truncate it.
1314 *
1315 * Returns size of returned message data, errno otherwise
1316 */
1317static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
1318			size_t buflen,	int flags)
1319{
1320	struct sock *sk = sock->sk;
1321	struct tipc_sock *tsk = tipc_sk(sk);
1322	struct sk_buff *skb;
1323	struct tipc_msg *hdr;
1324	bool connected = !tipc_sk_type_connectionless(sk);
1325	int rc, err, hlen, dlen, copy;
1326	long timeout;
1327
1328	/* Catch invalid receive requests */
1329	if (unlikely(!buflen))
1330		return -EINVAL;
1331
1332	lock_sock(sk);
1333	if (unlikely(connected && sk->sk_state == TIPC_OPEN)) {
1334		rc = -ENOTCONN;
1335		goto exit;
1336	}
1337	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1338
1339	do {
1340		/* Look at first msg in receive queue; wait if necessary */
1341		rc = tipc_wait_for_rcvmsg(sock, &timeout);
1342		if (unlikely(rc))
1343			goto exit;
1344		skb = skb_peek(&sk->sk_receive_queue);
1345		hdr = buf_msg(skb);
1346		dlen = msg_data_sz(hdr);
1347		hlen = msg_hdr_sz(hdr);
1348		err = msg_errcode(hdr);
1349		if (likely(dlen || err))
1350			break;
1351		tsk_advance_rx_queue(sk);
1352	} while (1);
1353
1354	/* Collect msg meta data, including error code and rejected data */
1355	set_orig_addr(m, hdr);
1356	rc = tipc_sk_anc_data_recv(m, hdr, tsk);
1357	if (unlikely(rc))
1358		goto exit;
1359
1360	/* Capture data if non-error msg, otherwise just set return value */
1361	if (likely(!err)) {
1362		copy = min_t(int, dlen, buflen);
1363		if (unlikely(copy != dlen))
1364			m->msg_flags |= MSG_TRUNC;
1365		rc = skb_copy_datagram_msg(skb, hlen, m, copy);
1366	} else {
1367		copy = 0;
1368		rc = 0;
1369		if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
1370			rc = -ECONNRESET;
1371	}
1372	if (unlikely(rc))
1373		goto exit;
1374
1375	/* Caption of data or error code/rejected data was successful */
1376	if (unlikely(flags & MSG_PEEK))
1377		goto exit;
1378
1379	tsk_advance_rx_queue(sk);
1380	if (likely(!connected))
1381		goto exit;
1382
1383	/* Send connection flow control ack when applicable */
1384	tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
1385	if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
1386		tipc_sk_send_ack(tsk);
1387exit:
1388	release_sock(sk);
1389	return rc ? rc : copy;
1390}
1391
1392/**
1393 * tipc_recvstream - receive stream-oriented data
1394 * @m: descriptor for message info
1395 * @buflen: total size of user buffer area
1396 * @flags: receive flags
1397 *
1398 * Used for SOCK_STREAM messages only.  If not enough data is available
1399 * will optionally wait for more; never truncates data.
1400 *
1401 * Returns size of returned message data, errno otherwise
1402 */
1403static int tipc_recvstream(struct socket *sock, struct msghdr *m,
1404			   size_t buflen, int flags)
1405{
1406	struct sock *sk = sock->sk;
1407	struct tipc_sock *tsk = tipc_sk(sk);
1408	struct sk_buff *skb;
1409	struct tipc_msg *hdr;
1410	struct tipc_skb_cb *skb_cb;
1411	bool peek = flags & MSG_PEEK;
1412	int offset, required, copy, copied = 0;
1413	int hlen, dlen, err, rc;
1414	long timeout;
1415
1416	/* Catch invalid receive attempts */
1417	if (unlikely(!buflen))
1418		return -EINVAL;
1419
1420	lock_sock(sk);
1421
1422	if (unlikely(sk->sk_state == TIPC_OPEN)) {
1423		rc = -ENOTCONN;
1424		goto exit;
1425	}
1426	required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen);
1427	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1428
1429	do {
1430		/* Look at first msg in receive queue; wait if necessary */
1431		rc = tipc_wait_for_rcvmsg(sock, &timeout);
1432		if (unlikely(rc))
1433			break;
1434		skb = skb_peek(&sk->sk_receive_queue);
1435		skb_cb = TIPC_SKB_CB(skb);
1436		hdr = buf_msg(skb);
1437		dlen = msg_data_sz(hdr);
1438		hlen = msg_hdr_sz(hdr);
1439		err = msg_errcode(hdr);
1440
1441		/* Discard any empty non-errored (SYN-) message */
1442		if (unlikely(!dlen && !err)) {
1443			tsk_advance_rx_queue(sk);
1444			continue;
1445		}
1446
1447		/* Collect msg meta data, incl. error code and rejected data */
1448		if (!copied) {
1449			set_orig_addr(m, hdr);
1450			rc = tipc_sk_anc_data_recv(m, hdr, tsk);
1451			if (rc)
1452				break;
1453		}
1454
1455		/* Copy data if msg ok, otherwise return error/partial data */
1456		if (likely(!err)) {
1457			offset = skb_cb->bytes_read;
1458			copy = min_t(int, dlen - offset, buflen - copied);
1459			rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
1460			if (unlikely(rc))
1461				break;
1462			copied += copy;
1463			offset += copy;
1464			if (unlikely(offset < dlen)) {
1465				if (!peek)
1466					skb_cb->bytes_read = offset;
1467				break;
1468			}
1469		} else {
1470			rc = 0;
1471			if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control)
1472				rc = -ECONNRESET;
1473			if (copied || rc)
1474				break;
1475		}
1476
1477		if (unlikely(peek))
1478			break;
1479
1480		tsk_advance_rx_queue(sk);
1481
1482		/* Send connection flow control advertisement when applicable */
1483		tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
1484		if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
1485			tipc_sk_send_ack(tsk);
1486
1487		/* Exit if all requested data or FIN/error received */
1488		if (copied == buflen || err)
1489			break;
1490
1491	} while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required);
1492exit:
1493	release_sock(sk);
1494	return copied ? copied : rc;
1495}
1496
1497/**
1498 * tipc_write_space - wake up thread if port congestion is released
1499 * @sk: socket
1500 */
1501static void tipc_write_space(struct sock *sk)
1502{
1503	struct socket_wq *wq;
1504
1505	rcu_read_lock();
1506	wq = rcu_dereference(sk->sk_wq);
1507	if (skwq_has_sleeper(wq))
1508		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
1509						POLLWRNORM | POLLWRBAND);
1510	rcu_read_unlock();
1511}
1512
1513/**
1514 * tipc_data_ready - wake up threads to indicate messages have been received
1515 * @sk: socket
1516 * @len: the length of messages
1517 */
1518static void tipc_data_ready(struct sock *sk)
1519{
1520	struct socket_wq *wq;
1521
1522	rcu_read_lock();
1523	wq = rcu_dereference(sk->sk_wq);
1524	if (skwq_has_sleeper(wq))
1525		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
1526						POLLRDNORM | POLLRDBAND);
1527	rcu_read_unlock();
1528}
1529
1530static void tipc_sock_destruct(struct sock *sk)
1531{
1532	__skb_queue_purge(&sk->sk_receive_queue);
1533}
1534
1535/**
1536 * filter_connect - Handle all incoming messages for a connection-based socket
1537 * @tsk: TIPC socket
1538 * @skb: pointer to message buffer. Set to NULL if buffer is consumed
1539 *
1540 * Returns true if everything ok, false otherwise
1541 */
1542static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
1543{
1544	struct sock *sk = &tsk->sk;
1545	struct net *net = sock_net(sk);
1546	struct tipc_msg *hdr = buf_msg(skb);
1547	u32 pport = msg_origport(hdr);
1548	u32 pnode = msg_orignode(hdr);
1549
1550	if (unlikely(msg_mcast(hdr)))
1551		return false;
1552
1553	switch (sk->sk_state) {
1554	case TIPC_CONNECTING:
1555		/* Accept only ACK or NACK message */
1556		if (unlikely(!msg_connected(hdr))) {
1557			if (pport != tsk_peer_port(tsk) ||
1558			    pnode != tsk_peer_node(tsk))
1559				return false;
1560
1561			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1562			sk->sk_err = ECONNREFUSED;
1563			sk->sk_state_change(sk);
1564			return true;
1565		}
1566
1567		if (unlikely(msg_errcode(hdr))) {
1568			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1569			sk->sk_err = ECONNREFUSED;
1570			sk->sk_state_change(sk);
1571			return true;
1572		}
1573
1574		if (unlikely(!msg_isdata(hdr))) {
1575			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1576			sk->sk_err = EINVAL;
1577			sk->sk_state_change(sk);
1578			return true;
1579		}
1580
1581		tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
1582		msg_set_importance(&tsk->phdr, msg_importance(hdr));
1583
1584		/* If 'ACK+' message, add to socket receive queue */
1585		if (msg_data_sz(hdr))
1586			return true;
1587
1588		/* If empty 'ACK-' message, wake up sleeping connect() */
1589		sk->sk_data_ready(sk);
1590
1591		/* 'ACK-' message is neither accepted nor rejected: */
1592		msg_set_dest_droppable(hdr, 1);
1593		return false;
1594
1595	case TIPC_OPEN:
1596	case TIPC_DISCONNECTING:
1597		break;
1598	case TIPC_LISTEN:
1599		/* Accept only SYN message */
1600		if (!msg_connected(hdr) && !(msg_errcode(hdr)))
1601			return true;
1602		break;
1603	case TIPC_ESTABLISHED:
1604		/* Accept only connection-based messages sent by peer */
1605		if (unlikely(!tsk_peer_msg(tsk, hdr)))
1606			return false;
1607
1608		if (unlikely(msg_errcode(hdr))) {
1609			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
1610			/* Let timer expire on it's own */
1611			tipc_node_remove_conn(net, tsk_peer_node(tsk),
1612					      tsk->portid);
1613			sk->sk_state_change(sk);
1614		}
1615		return true;
1616	default:
1617		pr_err("Unknown sk_state %u\n", sk->sk_state);
1618	}
1619
1620	return false;
1621}
1622
1623/**
1624 * rcvbuf_limit - get proper overload limit of socket receive queue
1625 * @sk: socket
1626 * @skb: message
1627 *
1628 * For connection oriented messages, irrespective of importance,
1629 * default queue limit is 2 MB.
1630 *
1631 * For connectionless messages, queue limits are based on message
1632 * importance as follows:
1633 *
1634 * TIPC_LOW_IMPORTANCE       (2 MB)
1635 * TIPC_MEDIUM_IMPORTANCE    (4 MB)
1636 * TIPC_HIGH_IMPORTANCE      (8 MB)
1637 * TIPC_CRITICAL_IMPORTANCE  (16 MB)
1638 *
1639 * Returns overload limit according to corresponding message importance
1640 */
1641static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
1642{
1643	struct tipc_sock *tsk = tipc_sk(sk);
1644	struct tipc_msg *hdr = buf_msg(skb);
1645
1646	if (unlikely(!msg_connected(hdr)))
1647		return sk->sk_rcvbuf << msg_importance(hdr);
1648
1649	if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
1650		return sk->sk_rcvbuf;
1651
1652	return FLOWCTL_MSG_LIM;
1653}
1654
1655/**
1656 * filter_rcv - validate incoming message
1657 * @sk: socket
1658 * @skb: pointer to message.
1659 *
1660 * Enqueues message on receive queue if acceptable; optionally handles
1661 * disconnect indication for a connected socket.
1662 *
1663 * Called with socket lock already taken
1664 *
1665 * Returns true if message was added to socket receive queue, otherwise false
1666 */
1667static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
1668		       struct sk_buff_head *xmitq)
1669{
1670	struct tipc_sock *tsk = tipc_sk(sk);
1671	struct tipc_msg *hdr = buf_msg(skb);
1672	unsigned int limit = rcvbuf_limit(sk, skb);
1673	int err = TIPC_OK;
1674	int usr = msg_user(hdr);
1675	u32 onode;
1676
1677	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
1678		tipc_sk_proto_rcv(tsk, skb, xmitq);
1679		return false;
1680	}
1681
1682	if (unlikely(usr == SOCK_WAKEUP)) {
1683		onode = msg_orignode(hdr);
1684		kfree_skb(skb);
1685		u32_del(&tsk->cong_links, onode);
1686		tsk->cong_link_cnt--;
1687		sk->sk_write_space(sk);
1688		return false;
1689	}
1690
1691	/* Drop if illegal message type */
1692	if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
1693		kfree_skb(skb);
1694		return false;
1695	}
1696
1697	/* Reject if wrong message type for current socket state */
1698	if (tipc_sk_type_connectionless(sk)) {
1699		if (msg_connected(hdr)) {
1700			err = TIPC_ERR_NO_PORT;
1701			goto reject;
1702		}
1703	} else if (unlikely(!filter_connect(tsk, skb))) {
1704		err = TIPC_ERR_NO_PORT;
1705		goto reject;
1706	}
1707
1708	/* Reject message if there isn't room to queue it */
1709	if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
1710		err = TIPC_ERR_OVERLOAD;
1711		goto reject;
1712	}
1713
1714	/* Enqueue message */
1715	TIPC_SKB_CB(skb)->bytes_read = 0;
1716	__skb_queue_tail(&sk->sk_receive_queue, skb);
1717	skb_set_owner_r(skb, sk);
1718
1719	sk->sk_data_ready(sk);
1720	return true;
1721
1722reject:
1723	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
1724		__skb_queue_tail(xmitq, skb);
1725	return false;
1726}
1727
1728/**
1729 * tipc_backlog_rcv - handle incoming message from backlog queue
1730 * @sk: socket
1731 * @skb: message
1732 *
1733 * Caller must hold socket lock
1734 *
1735 * Returns 0
1736 */
1737static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
1738{
1739	unsigned int truesize = skb->truesize;
1740	struct sk_buff_head xmitq;
1741	u32 dnode, selector;
1742
1743	__skb_queue_head_init(&xmitq);
1744
1745	if (likely(filter_rcv(sk, skb, &xmitq))) {
1746		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
1747		return 0;
1748	}
1749
1750	if (skb_queue_empty(&xmitq))
1751		return 0;
1752
1753	/* Send response/rejected message */
1754	skb = __skb_dequeue(&xmitq);
1755	dnode = msg_destnode(buf_msg(skb));
1756	selector = msg_origport(buf_msg(skb));
1757	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
1758	return 0;
1759}
1760
1761/**
1762 * tipc_sk_enqueue - extract all buffers with destination 'dport' from
1763 *                   inputq and try adding them to socket or backlog queue
1764 * @inputq: list of incoming buffers with potentially different destinations
1765 * @sk: socket where the buffers should be enqueued
1766 * @dport: port number for the socket
1767 *
1768 * Caller must hold socket lock
1769 */
1770static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
1771			    u32 dport, struct sk_buff_head *xmitq)
1772{
1773	unsigned long time_limit = jiffies + 2;
1774	struct sk_buff *skb;
1775	unsigned int lim;
1776	atomic_t *dcnt;
1777	u32 onode;
1778
1779	while (skb_queue_len(inputq)) {
1780		if (unlikely(time_after_eq(jiffies, time_limit)))
1781			return;
1782
1783		skb = tipc_skb_dequeue(inputq, dport);
1784		if (unlikely(!skb))
1785			return;
1786
1787		/* Add message directly to receive queue if possible */
1788		if (!sock_owned_by_user(sk)) {
1789			filter_rcv(sk, skb, xmitq);
1790			continue;
1791		}
1792
1793		/* Try backlog, compensating for double-counted bytes */
1794		dcnt = &tipc_sk(sk)->dupl_rcvcnt;
1795		if (!sk->sk_backlog.len)
1796			atomic_set(dcnt, 0);
1797		lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
1798		if (likely(!sk_add_backlog(sk, skb, lim)))
1799			continue;
1800
1801		/* Overload => reject message back to sender */
1802		onode = tipc_own_addr(sock_net(sk));
1803		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
1804			__skb_queue_tail(xmitq, skb);
1805		break;
1806	}
1807}
1808
1809/**
1810 * tipc_sk_rcv - handle a chain of incoming buffers
1811 * @inputq: buffer list containing the buffers
1812 * Consumes all buffers in list until inputq is empty
1813 * Note: may be called in multiple threads referring to the same queue
1814 */
1815void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
1816{
1817	struct sk_buff_head xmitq;
1818	u32 dnode, dport = 0;
1819	int err;
1820	struct tipc_sock *tsk;
1821	struct sock *sk;
1822	struct sk_buff *skb;
1823
1824	__skb_queue_head_init(&xmitq);
1825	while (skb_queue_len(inputq)) {
1826		dport = tipc_skb_peek_port(inputq, dport);
1827		tsk = tipc_sk_lookup(net, dport);
1828
1829		if (likely(tsk)) {
1830			sk = &tsk->sk;
1831			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
1832				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
1833				spin_unlock_bh(&sk->sk_lock.slock);
1834			}
1835			/* Send pending response/rejected …

Large files files are truncated, but you can click here to view the full file