/bsd/sys/netinet/in_pcb.cc
C++ | 2099 lines | 1466 code | 227 blank | 406 comment | 349 complexity | 6af5d93f521c8ba41f238b1a515b78d8 MD5 | raw file
Possible License(s): BSD-3-Clause, 0BSD, MPL-2.0-no-copyleft-exception
Large files files are truncated, but you can click here to view the full file
- /*-
- * Copyright (c) 1982, 1986, 1991, 1993, 1995
- * The Regents of the University of California.
- * Copyright (c) 2007-2009 Robert N. M. Watson
- * Copyright (c) 2010-2011 Juniper Networks, Inc.
- * All rights reserved.
- *
- * Portions of this software were developed by Robert N. M. Watson under
- * contract to Juniper Networks, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
- */
- #include <sys/cdefs.h>
- #include <bsd/porting/netport.h>
- #include <bsd/porting/uma_stub.h>
- #include <bsd/porting/callout.h>
- #include <bsd/sys/sys/eventhandler.h>
- #include <bsd/sys/sys/libkern.h>
- #include <bsd/sys/sys/param.h>
- #include <bsd/sys/sys/mbuf.h>
- #include <bsd/sys/sys/domain.h>
- #include <bsd/sys/sys/protosw.h>
- #include <bsd/sys/sys/socket.h>
- #include <bsd/sys/sys/socketvar.h>
- #include <bsd/sys/sys/priv.h>
- #include <bsd/sys/sys/refcount.h>
- #include <bsd/sys/net/if.h>
- #include <bsd/sys/net/if_types.h>
- #include <bsd/sys/net/route.h>
- #include <bsd/sys/net/vnet.h>
- #if defined(INET) || defined(INET6)
- #include <bsd/sys/netinet/in.h>
- #include <bsd/sys/netinet/in_pcb.h>
- #include <bsd/sys/netinet/ip_var.h>
- #include <bsd/sys/netinet/tcp_var.h>
- #include <bsd/sys/netinet/udp.h>
- #include <bsd/sys/netinet/udp_var.h>
- #endif
- #ifdef INET
- #include <bsd/sys/netinet/in_var.h>
- #endif
- #ifdef INET6
- #include <bsd/sys/netinet/ip6.h>
- #include <bsd/sys/netinet6/in6_pcb.h>
- #include <bsd/sys/netinet6/in6_var.h>
- #include <bsd/sys/netinet6/ip6_var.h>
- #endif /* INET6 */
- #include <bsd/sys/net/routecache.hh>
- #ifdef IPSEC
- #include <bsd/sys/netipsec/ipsec.h>
- #include <bsd/sys/netipsec/key.h>
- #endif /* IPSEC */
- #include <osv/trace.hh>
- TRACEPOINT(trace_inpcb_ref, "inp=%x", struct inpcb *);
- TRACEPOINT(trace_inpcb_rele, "inp=%x", struct inpcb *);
- TRACEPOINT(trace_inpcb_free, "inp=%x", struct inpcb *);
- static struct callout ipport_tick_callout;
- /*
- * These configure the range of local port addresses assigned to
- * "unspecified" outgoing connections/packets/whatever.
- */
- VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */
- VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */
- VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST; /* 10000 */
- VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST; /* 65535 */
- VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO; /* 49152 */
- VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO; /* 65535 */
- /*
- * Reserved ports accessible only to root. There are significant
- * security considerations that must be accounted for when changing these,
- * but the security benefits can be great. Please be careful.
- */
- VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1; /* 1023 */
- VNET_DEFINE(int, ipport_reservedlow);
- /* Variables dealing with random ephemeral port allocation. */
- VNET_DEFINE(int, ipport_randomized) = 1; /* user controlled via sysctl */
- VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */
- VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */
- VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */
- VNET_DEFINE(int, ipport_tcpallocs);
- static VNET_DEFINE(int, ipport_tcplastcount);
- #define V_ipport_tcplastcount VNET(ipport_tcplastcount)
- static void in_pcbremlists(struct inpcb *inp);
- #ifdef INET
- static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
- struct in_addr faddr, u_int fport_arg,
- struct in_addr laddr, u_int lport_arg,
- int lookupflags, struct ifnet *ifp);
- #if 0
- #define RANGECHK(var, min, max) \
- if ((var) < (min)) { (var) = (min); } \
- else if ((var) > (max)) { (var) = (max); }
- static int
- sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
- {
- int error;
- #ifdef VIMAGE
- error = vnet_sysctl_handle_int(oidp, arg1, arg2, req);
- #else
- error = sysctl_handle_int(oidp, arg1, arg2, req);
- #endif
- if (error == 0) {
- RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
- RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
- RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
- RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
- RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
- RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
- }
- return (error);
- }
- #undef RANGECHK
- #endif
- SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
- SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0,
- &sysctl_net_ipport_check, "I", "");
- SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0,
- &sysctl_net_ipport_check, "I", "");
- SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, first,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0,
- &sysctl_net_ipport_check, "I", "");
- SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, last,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0,
- &sysctl_net_ipport_check, "I", "");
- SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0,
- &sysctl_net_ipport_check, "I", "");
- SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0,
- &sysctl_net_ipport_check, "I", "");
- SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
- CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, "");
- SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
- CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
- SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
- &VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
- SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW,
- &VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
- "allocations before switching to a sequental one");
- SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
- &VNET_NAME(ipport_randomtime), 0,
- "Minimum time to keep sequental port "
- "allocation before switching to a random one");
- #endif
- /*
- * in_pcb.c: manage the Protocol Control Blocks.
- *
- * NOTE: It is assumed that most of these functions will be called with
- * the pcbinfo lock held, and often, the inpcb lock held, as these utility
- * functions often modify hash chains or addresses in pcbs.
- */
- /*
- * Initialize an inpcbinfo -- we should be able to reduce the number of
- * arguments in time.
- */
- void
- in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
- struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
- u_int hashfields)
- {
- INP_INFO_LOCK_INIT(pcbinfo, name);
- INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */
- #ifdef VIMAGE
- pcbinfo->ipi_vnet = curvnet;
- #endif
- pcbinfo->ipi_listhead = listhead;
- LIST_INIT(pcbinfo->ipi_listhead);
- pcbinfo->ipi_count = 0;
- pcbinfo->ipi_hashbase = (inpcbhead *)hashinit(hash_nelements, 0,
- &pcbinfo->ipi_hashmask);
- pcbinfo->ipi_porthashbase = (inpcbporthead *)hashinit(porthash_nelements, 0,
- &pcbinfo->ipi_porthashmask);
- // FIXME: uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
- }
- /*
- * Destroy an inpcbinfo.
- */
- void
- in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
- {
- KASSERT(pcbinfo->ipi_count == 0,
- ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count));
- hashdestroy(pcbinfo->ipi_hashbase, 0, pcbinfo->ipi_hashmask);
- hashdestroy(pcbinfo->ipi_porthashbase, 0,
- pcbinfo->ipi_porthashmask);
- INP_HASH_LOCK_DESTROY(pcbinfo);
- INP_INFO_LOCK_DESTROY(pcbinfo);
- }
- /*
- * Allocate a PCB and associate it with the socket.
- * On success return with the PCB locked.
- */
- inpcb::inpcb(struct socket *so, struct inpcbinfo *pcbinfo)
- {
- struct inpcb *inp = this;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
- inp->inp_pcbinfo = pcbinfo;
- inp->inp_socket = so;
- inp->inp_inc.inc_fibnum = so->so_fibnum;
- #ifdef IPSEC
- error = ipsec_init_policy(so, &inp->inp_sp);
- if (error != 0) {
- #ifdef MAC
- mac_inpcb_destroy(inp);
- #endif
- goto out;
- }
- #endif /*IPSEC*/
- #ifdef INET6
- if (INP_SOCKAF(so) == AF_INET6) {
- inp->inp_vflag |= INP_IPV6PROTO;
- if (V_ip6_v6only)
- inp->inp_flags |= IN6P_IPV6_V6ONLY;
- }
- #endif
- LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
- pcbinfo->ipi_count++;
- so->so_pcb = (caddr_t)inp;
- so->set_mutex(&inp->inp_lock);
- #ifdef INET6
- if (V_ip6_auto_flowlabel)
- inp->inp_flags |= IN6P_AUTOFLOWLABEL;
- #endif
- INP_LOCK(inp);
- inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
- refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */
- }
- #ifdef INET
- int
- in_pcbbind(struct inpcb *inp, struct bsd_sockaddr *nam, struct ucred *cred)
- {
- int anonport, error;
- INP_LOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
- if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
- return (EINVAL);
- anonport = inp->inp_lport == 0 && (nam == NULL ||
- ((struct bsd_sockaddr_in *)nam)->sin_port == 0);
- error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
- &inp->inp_lport, cred);
- if (error)
- return (error);
- if (in_pcbinshash(inp) != 0) {
- inp->inp_laddr.s_addr = INADDR_ANY;
- inp->inp_lport = 0;
- return (EAGAIN);
- }
- if (anonport)
- inp->inp_flags |= INP_ANONPORT;
- return (0);
- }
- #endif
- #if defined(INET) || defined(INET6)
- int
- in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
- struct ucred *cred, int lookupflags)
- {
- struct inpcbinfo *pcbinfo;
- struct inpcb *tmpinp;
- unsigned short *lastport;
- int count, dorandom, error;
- u_short aux, first, last, lport;
- #ifdef INET
- struct in_addr laddr;
- #endif
- pcbinfo = inp->inp_pcbinfo;
- /*
- * Because no actual state changes occur here, a global write lock on
- * the pcbinfo isn't required.
- */
- INP_LOCK_ASSERT(inp);
- INP_HASH_LOCK_ASSERT(pcbinfo);
- if (inp->inp_flags & INP_HIGHPORT) {
- first = V_ipport_hifirstauto; /* sysctl */
- last = V_ipport_hilastauto;
- lastport = &pcbinfo->ipi_lasthi;
- } else if (inp->inp_flags & INP_LOWPORT) {
- error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
- if (error)
- return (error);
- first = V_ipport_lowfirstauto; /* 1023 */
- last = V_ipport_lowlastauto; /* 600 */
- lastport = &pcbinfo->ipi_lastlow;
- } else {
- first = V_ipport_firstauto; /* sysctl */
- last = V_ipport_lastauto;
- lastport = &pcbinfo->ipi_lastport;
- }
- /*
- * For UDP, use random port allocation as long as the user
- * allows it. For TCP (and as of yet unknown) connections,
- * use random port allocation only if the user allows it AND
- * ipport_tick() allows it.
- */
- if (V_ipport_randomized &&
- (!V_ipport_stoprandom || pcbinfo == &V_udbinfo))
- dorandom = 1;
- else
- dorandom = 0;
- /*
- * It makes no sense to do random port allocation if
- * we have the only port available.
- */
- if (first == last)
- dorandom = 0;
- /* Make sure to not include UDP packets in the count. */
- if (pcbinfo != &V_udbinfo)
- V_ipport_tcpallocs++;
- /*
- * Instead of having two loops further down counting up or down
- * make sure that first is always <= last and go with only one
- * code path implementing all logic.
- */
- if (first > last) {
- aux = first;
- first = last;
- last = aux;
- }
- #ifdef INET
- /* Make the compiler happy. */
- laddr.s_addr = 0;
- if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) {
- KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p",
- __func__, inp));
- laddr = *laddrp;
- }
- #endif
- tmpinp = NULL; /* Make compiler happy. */
- lport = *lportp;
- if (dorandom)
- *lastport = first + (arc4random() % (last - first));
- count = last - first;
- do {
- if (count-- < 0) /* completely used? */
- return (EADDRNOTAVAIL);
- ++*lastport;
- if (*lastport < first || *lastport > last)
- *lastport = first;
- lport = htons(*lastport);
- #ifdef INET6
- if ((inp->inp_vflag & INP_IPV6) != 0)
- tmpinp = in6_pcblookup_local(pcbinfo,
- &inp->in6p_laddr, lport, lookupflags, cred);
- #endif
- #if defined(INET) && defined(INET6)
- else
- #endif
- #ifdef INET
- tmpinp = in_pcblookup_local(pcbinfo, laddr,
- lport, lookupflags, cred);
- #endif
- } while (tmpinp != NULL);
- #ifdef INET
- if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4)
- laddrp->s_addr = laddr.s_addr;
- #endif
- *lportp = lport;
- return (0);
- }
- #endif /* INET || INET6 */
- #ifdef INET
- /*
- * Set up a bind operation on a PCB, performing port allocation
- * as required, but do not actually modify the PCB. Callers can
- * either complete the bind by setting inp_laddr/inp_lport and
- * calling in_pcbinshash(), or they can just use the resulting
- * port and address to authorise the sending of a once-off packet.
- *
- * On error, the values of *laddrp and *lportp are not changed.
- */
- int
- in_pcbbind_setup(struct inpcb *inp, struct bsd_sockaddr *nam, in_addr_t *laddrp,
- u_short *lportp, struct ucred *cred)
- {
- struct socket *so = inp->inp_socket;
- struct bsd_sockaddr_in *sin;
- struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- struct in_addr laddr;
- u_short lport = 0;
- int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT);
- int error;
- /*
- * No state changes, so read locks are sufficient here.
- */
- INP_LOCK_ASSERT(inp);
- INP_HASH_LOCK_ASSERT(pcbinfo);
- if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
- return (EADDRNOTAVAIL);
- laddr.s_addr = *laddrp;
- if (nam != NULL && laddr.s_addr != INADDR_ANY)
- return (EINVAL);
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
- lookupflags = INPLOOKUP_WILDCARD;
- if (nam != NULL) {
- sin = (struct bsd_sockaddr_in *)nam;
- if (nam->sa_len != sizeof (*sin))
- return (EINVAL);
- #ifdef notdef
- /*
- * We should check the family, but old programs
- * incorrectly fail to initialize it.
- */
- if (sin->sin_family != AF_INET)
- return (EAFNOSUPPORT);
- #endif
- if (sin->sin_port != *lportp) {
- /* Don't allow the port to change. */
- if (*lportp != 0)
- return (EINVAL);
- lport = sin->sin_port;
- }
- /* NB: lport is left as 0 if the port isn't being changed. */
- if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
- /*
- * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
- * allow complete duplication of binding if
- * SO_REUSEPORT is set, or if SO_REUSEADDR is set
- * and a multicast address is bound on both
- * new and duplicated sockets.
- */
- if (so->so_options & SO_REUSEADDR)
- reuseport = SO_REUSEADDR|SO_REUSEPORT;
- } else if (sin->sin_addr.s_addr != INADDR_ANY) {
- sin->sin_port = 0; /* yech... */
- bzero(&sin->sin_zero, sizeof(sin->sin_zero));
- /*
- * Is the address a local IP address?
- * If INP_BINDANY is set, then the socket may be bound
- * to any endpoint address, local or not.
- */
- if ((inp->inp_flags & INP_BINDANY) == 0 &&
- ifa_ifwithaddr_check((struct bsd_sockaddr *)sin) == 0)
- return (EADDRNOTAVAIL);
- }
- laddr = sin->sin_addr;
- if (lport) {
- struct inpcb *t;
- struct tcptw *tw;
- /* GROSS */
- if (ntohs(lport) <= V_ipport_reservedhigh &&
- ntohs(lport) >= V_ipport_reservedlow &&
- priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
- 0))
- return (EACCES);
- if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
- priv_check_cred(inp->inp_cred,
- PRIV_NETINET_REUSEPORT, 0) != 0) {
- t = in_pcblookup_local(pcbinfo, sin->sin_addr,
- lport, INPLOOKUP_WILDCARD, cred);
- /*
- * XXX
- * This entire block sorely needs a rewrite.
- */
- #if 0
- if (t &&
- ((t->inp_flags & INP_TIMEWAIT) == 0) &&
- (so->so_type != SOCK_STREAM ||
- ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
- (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
- ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_flags2 & INP_REUSEPORT) == 0) &&
- (inp->inp_cred->cr_uid !=
- t->inp_cred->cr_uid))
- return (EADDRINUSE);
- #endif
- }
- t = in_pcblookup_local(pcbinfo, sin->sin_addr,
- lport, lookupflags, cred);
- if (t && (t->inp_flags & INP_TIMEWAIT)) {
- /*
- * XXXRW: If an incpb has had its timewait
- * state recycled, we treat the address as
- * being in use (for now). This is better
- * than a panic, but not desirable.
- */
- /*
- * Linux allows a SO_REUSEADDR socket to be
- * bound to an existing TIME_WAIT socket
- * if SO_REUSEADDR is set on the new socket.
- *
- * Allow for that in addition to the BSD
- * SO_REUSEPORT semantics.
- */
- tw = intotw(t);
- if (tw == NULL ||
- ((reuseport & tw->tw_so_options) == 0)
- && (so->so_options & SO_REUSEADDR) == 0)
- return (EADDRINUSE);
- } else if (t && (reuseport == 0 ||
- (t->inp_flags2 & INP_REUSEPORT) == 0)) {
- #ifdef INET6
- if (ntohl(sin->sin_addr.s_addr) !=
- INADDR_ANY ||
- ntohl(t->inp_laddr.s_addr) !=
- INADDR_ANY ||
- (inp->inp_vflag & INP_IPV6PROTO) == 0 ||
- (t->inp_vflag & INP_IPV6PROTO) == 0)
- #endif
- return (EADDRINUSE);
- }
- }
- }
- if (*lportp != 0)
- lport = *lportp;
- if (lport == 0) {
- error = in_pcb_lport(inp, &laddr, &lport, cred, lookupflags);
- if (error != 0)
- return (error);
- }
- *laddrp = laddr.s_addr;
- *lportp = lport;
- return (0);
- }
- /*
- * Connect from a socket to a specified address.
- * Both address and port must be specified in argument sin.
- * If don't have a local address for this socket yet,
- * then pick one.
- */
- int
- in_pcbconnect_mbuf(struct inpcb *inp, struct bsd_sockaddr *nam,
- struct ucred *cred, struct mbuf *m)
- {
- u_short lport, fport;
- in_addr_t laddr, faddr;
- int anonport, error;
- INP_LOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
- lport = inp->inp_lport;
- laddr = inp->inp_laddr.s_addr;
- anonport = (lport == 0);
- error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
- NULL, cred);
- if (error)
- return (error);
- /* Do the initial binding of the local address if required. */
- if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
- inp->inp_lport = lport;
- inp->inp_laddr.s_addr = laddr;
- if (in_pcbinshash(inp) != 0) {
- inp->inp_laddr.s_addr = INADDR_ANY;
- inp->inp_lport = 0;
- return (EAGAIN);
- }
- }
- /* Commit the remaining changes. */
- inp->inp_lport = lport;
- inp->inp_laddr.s_addr = laddr;
- inp->inp_faddr.s_addr = faddr;
- inp->inp_fport = fport;
- in_pcbrehash_mbuf(inp, m);
- if (anonport)
- inp->inp_flags |= INP_ANONPORT;
- return (0);
- }
- int
- in_pcbconnect(struct inpcb *inp, struct bsd_sockaddr *nam, struct ucred *cred)
- {
- return (in_pcbconnect_mbuf(inp, nam, cred, NULL));
- }
- /*
- * Do proper source address selection on an unbound socket in case
- * of connect. Take jails into account as well.
- */
- static int
- in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
- struct ucred *cred)
- {
- struct bsd_ifaddr *ifa;
- struct bsd_sockaddr *sa;
- struct bsd_sockaddr_in *sin;
- struct route sro;
- struct rtentry rte_one;
- int error;
- KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
- error = 0;
- bzero(&sro, sizeof(sro));
- sin = (struct bsd_sockaddr_in *)&sro.ro_dst;
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(struct bsd_sockaddr_in);
- sin->sin_addr.s_addr = faddr->s_addr;
- /*
- * If route is known our src addr is taken from the i/f,
- * else punt.
- *
- * Find out route to destination.
- */
- if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
- {
- if (route_cache::lookup(sin, inp->inp_inc.inc_fibnum, &rte_one)) {
- sro.ro_rt = &rte_one;
- } else {
- sro.ro_rt = NULL;
- }
- }
- /*
- * If we found a route, use the address corresponding to
- * the outgoing interface.
- *
- * Otherwise assume faddr is reachable on a directly connected
- * network and try to find a corresponding interface to take
- * the source address from.
- */
- if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) {
- struct in_ifaddr *ia;
- struct ifnet *ifp;
- ia = ifatoia(ifa_ifwithdstaddr((struct bsd_sockaddr *)sin));
- if (ia == NULL)
- ia = ifatoia(ifa_ifwithnet((struct bsd_sockaddr *)sin, 0));
- if (ia == NULL) {
- error = ENETUNREACH;
- goto done;
- }
- ifp = ia->ia_ifp;
- ifa_free(&ia->ia_ifa);
- ia = NULL;
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- sa = ifa->ifa_addr;
- if (sa->sa_family != AF_INET)
- continue;
- sin = (struct bsd_sockaddr_in *)sa;
- ia = (struct in_ifaddr *)ifa;
- break;
- }
- if (ia != NULL) {
- laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- IF_ADDR_RUNLOCK(ifp);
- goto done;
- }
- IF_ADDR_RUNLOCK(ifp);
- error = 0;
- goto done;
- }
- /*
- * If the outgoing interface on the route found is not
- * a loopback interface, use the address from that interface.
- * In case of jails do those three steps:
- * 1. check if the interface address belongs to the jail. If so use it.
- * 2. check if we have any address on the outgoing interface
- * belonging to this jail. If so use it.
- * 3. as a last resort return the 'default' jail address.
- */
- if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
- struct in_ifaddr *ia;
- ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
- laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- goto done;
- }
- /*
- * The outgoing interface is marked with 'loopback net', so a route
- * to ourselves is here.
- * Try to find the interface of the destination address and then
- * take the address from there. That interface is not necessarily
- * a loopback interface.
- * In case of jails, check that it is an address of the jail
- * and if we cannot find, fall back to the 'default' jail address.
- */
- if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
- struct bsd_sockaddr_in sain;
- struct in_ifaddr *ia;
- bzero(&sain, sizeof(struct bsd_sockaddr_in));
- sain.sin_family = AF_INET;
- sain.sin_len = sizeof(struct bsd_sockaddr_in);
- sain.sin_addr.s_addr = faddr->s_addr;
- ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain)));
- if (ia == NULL)
- ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0));
- if (ia == NULL)
- ia = ifatoia(ifa_ifwithaddr(sintosa(&sain)));
- if (ia == NULL) {
- error = ENETUNREACH;
- goto done;
- }
- laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
- ifa_free(&ia->ia_ifa);
- goto done;
- }
- done:
- return (error);
- }
- /*
- * Set up for a connect from a socket to the specified address.
- * On entry, *laddrp and *lportp should contain the current local
- * address and port for the PCB; these are updated to the values
- * that should be placed in inp_laddr and inp_lport to complete
- * the connect.
- *
- * On success, *faddrp and *fportp will be set to the remote address
- * and port. These are not updated in the error case.
- *
- * If the operation fails because the connection already exists,
- * *oinpp will be set to the PCB of that connection so that the
- * caller can decide to override it. In all other cases, *oinpp
- * is set to NULL.
- */
- int
- in_pcbconnect_setup(struct inpcb *inp, struct bsd_sockaddr *nam,
- in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
- struct inpcb **oinpp, struct ucred *cred)
- {
- struct bsd_sockaddr_in *sin = (struct bsd_sockaddr_in *)nam;
- struct in_ifaddr *ia;
- struct inpcb *oinp;
- struct in_addr laddr, faddr;
- u_short lport, fport;
- int error;
- /*
- * Because a global state change doesn't actually occur here, a read
- * lock is sufficient.
- */
- INP_LOCK_ASSERT(inp);
- INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
- if (oinpp != NULL)
- *oinpp = NULL;
- if (nam->sa_len != sizeof (*sin))
- return (EINVAL);
- if (sin->sin_family != AF_INET)
- return (EAFNOSUPPORT);
- if (sin->sin_port == 0)
- return (EADDRNOTAVAIL);
- laddr.s_addr = *laddrp;
- lport = *lportp;
- faddr = sin->sin_addr;
- fport = sin->sin_port;
- if (!TAILQ_EMPTY(&V_in_ifaddrhead)) {
- /*
- * If the destination address is INADDR_ANY,
- * use the primary local address.
- * If the supplied address is INADDR_BROADCAST,
- * and the primary interface supports broadcast,
- * choose the broadcast address for that interface.
- */
- if (faddr.s_addr == INADDR_ANY) {
- IN_IFADDR_RLOCK();
- faddr =
- IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
- IN_IFADDR_RUNLOCK();
- } else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
- IN_IFADDR_RLOCK();
- if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
- IFF_BROADCAST)
- faddr = satosin(&TAILQ_FIRST(
- &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
- IN_IFADDR_RUNLOCK();
- }
- }
- if (laddr.s_addr == INADDR_ANY) {
- error = in_pcbladdr(inp, &faddr, &laddr, cred);
- /*
- * If the destination address is multicast and an outgoing
- * interface has been set as a multicast option, prefer the
- * address of that interface as our source address.
- */
- if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
- inp->inp_moptions != NULL) {
- struct ip_moptions *imo;
- struct ifnet *ifp;
- imo = inp->inp_moptions;
- if (imo->imo_multicast_ifp != NULL) {
- ifp = imo->imo_multicast_ifp;
- IN_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- if (ia->ia_ifp == ifp)
- break;
- }
- if (ia == NULL)
- error = EADDRNOTAVAIL;
- else {
- laddr = ia->ia_addr.sin_addr;
- error = 0;
- }
- IN_IFADDR_RUNLOCK();
- }
- }
- if (error)
- return (error);
- }
- oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport,
- laddr, lport, 0, NULL);
- if (oinp != NULL) {
- if (oinpp != NULL)
- *oinpp = oinp;
- return (EADDRINUSE);
- }
- if (lport == 0) {
- error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
- cred);
- if (error)
- return (error);
- }
- *laddrp = laddr.s_addr;
- *lportp = lport;
- *faddrp = faddr.s_addr;
- *fportp = fport;
- return (0);
- }
- void
- in_pcbdisconnect(struct inpcb *inp)
- {
- INP_LOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
- inp->inp_faddr.s_addr = INADDR_ANY;
- inp->inp_fport = 0;
- in_pcbrehash(inp);
- }
- #endif
- /*
- * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
- * For most protocols, this will be invoked immediately prior to calling
- * in_pcbfree(). However, with TCP the inpcb may significantly outlive the
- * socket, in which case in_pcbfree() is deferred.
- */
- void
- in_pcbdetach(struct inpcb *inp)
- {
- KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
- inp->inp_socket->so_pcb = NULL;
- inp->inp_socket = NULL;
- }
- /*
- * in_pcbref() bumps the reference count on an inpcb in order to maintain
- * stability of an inpcb pointer despite the inpcb lock being released. This
- * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
- * but where the inpcb lock may already held.
- *
- * in_pcbref() should be used only to provide brief memory stability, and
- * must always be followed by a call to INP_WLOCK() and in_pcbrele() to
- * garbage collect the inpcb if it has been in_pcbfree()'d from another
- * context. Until in_pcbrele() has returned that the inpcb is still valid,
- * lock and rele are the *only* safe operations that may be performed on the
- * inpcb.
- *
- * While the inpcb will not be freed, releasing the inpcb lock means that the
- * connection's state may change, so the caller should be careful to
- * revalidate any cached state on reacquiring the lock. Drop the reference
- * using in_pcbrele().
- */
- void
- in_pcbref(struct inpcb *inp)
- {
- KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
- trace_inpcb_ref(inp);
- refcount_acquire(&inp->inp_refcount);
- }
- /*
- * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
- * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
- * return a flag indicating whether or not the inpcb remains valid. If it is
- * valid, we return with the inpcb lock held.
- *
- * Notice that, unlike in_pcbref(), the inpcb lock must be held to drop a
- * reference on an inpcb. Historically more work was done here (actually, in
- * in_pcbfree_internal()) but has been moved to in_pcbfree() to avoid the
- * need for the pcbinfo lock in in_pcbrele(). Deferring the free is entirely
- * about memory stability (and continued use of the write lock).
- */
- int
- in_pcbrele_locked(struct inpcb *inp)
- {
- struct inpcbinfo *pcbinfo;
- KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
- trace_inpcb_rele(inp);
- INP_LOCK_ASSERT(inp);
- if (refcount_release(&inp->inp_refcount) == 0) {
- /*
- * If the inpcb has been freed, let the caller know, even if
- * this isn't the last reference.
- */
- if (inp->inp_flags2 & INP_FREED) {
- INP_UNLOCK(inp);
- return (1);
- }
- return (0);
- }
- KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
- trace_inpcb_free(inp);
- INP_UNLOCK(inp);
- pcbinfo = inp->inp_pcbinfo;
- delete inp;
- return (1);
- }
- /*
- * Temporary wrapper.
- */
- int
- in_pcbrele(struct inpcb *inp)
- {
- return (in_pcbrele_locked(inp));
- }
- /*
- * Unconditionally schedule an inpcb to be freed by decrementing its
- * reference count, which should occur only after the inpcb has been detached
- * from its socket. If another thread holds a temporary reference (acquired
- * using in_pcbref()) then the free is deferred until that reference is
- * released using in_pcbrele(), but the inpcb is still unlocked. Almost all
- * work, including removal from global lists, is done in this context, where
- * the pcbinfo lock is held.
- */
- void
- in_pcbfree(struct inpcb *inp)
- {
- struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
- INP_INFO_WLOCK_ASSERT(pcbinfo);
- INP_LOCK_ASSERT(inp);
- /* XXXRW: Do as much as possible here. */
- #ifdef IPSEC
- if (inp->inp_sp != NULL)
- ipsec_delete_pcbpolicy(inp);
- #endif /* IPSEC */
- inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
- in_pcbremlists(inp);
- #ifdef INET6
- if (inp->inp_vflag & INP_IPV6PROTO) {
- ip6_freepcbopts(inp->in6p_outputopts);
- if (inp->in6p_moptions != NULL)
- ip6_freemoptions(inp->in6p_moptions);
- }
- #endif
- if (inp->inp_options)
- (void)m_free(inp->inp_options);
- #ifdef INET
- if (inp->inp_moptions != NULL)
- inp_freemoptions(inp->inp_moptions);
- #endif
- inp->inp_vflag = 0;
- inp->inp_flags2 |= INP_FREED;
- #ifdef MAC
- mac_inpcb_destroy(inp);
- #endif
- if (!in_pcbrele_locked(inp))
- INP_UNLOCK(inp);
- }
- /*
- * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
- * port reservation, and preventing it from being returned by inpcb lookups.
- *
- * It is used by TCP to mark an inpcb as unused and avoid future packet
- * delivery or event notification when a socket remains open but TCP has
- * closed. This might occur as a result of a shutdown()-initiated TCP close
- * or a RST on the wire, and allows the port binding to be reused while still
- * maintaining the invariant that so_pcb always points to a valid inpcb until
- * in_pcbdetach().
- *
- * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
- * in_pcbnotifyall() and in_pcbpurgeif0()?
- */
- void
- in_pcbdrop(struct inpcb *inp)
- {
- INP_LOCK_ASSERT(inp);
- /*
- * XXXRW: Possibly we should protect the setting of INP_DROPPED with
- * the hash lock...?
- */
- inp->inp_flags |= INP_DROPPED;
- if (inp->inp_flags & INP_INHASHLIST) {
- struct inpcbport *phd = inp->inp_phd;
- INP_HASH_WLOCK(inp->inp_pcbinfo);
- LIST_REMOVE(inp, inp_hash);
- LIST_REMOVE(inp, inp_portlist);
- if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
- LIST_REMOVE(phd, phd_hash);
- free(phd);
- }
- INP_HASH_WUNLOCK(inp->inp_pcbinfo);
- inp->inp_flags &= ~INP_INHASHLIST;
- }
- }
- #ifdef INET
- /*
- * Common routines to return the socket addresses associated with inpcbs.
- */
- struct bsd_sockaddr *
- in_sockaddr(in_port_t port, struct in_addr *addr_p)
- {
- struct bsd_sockaddr_in *sin;
- sin = (bsd_sockaddr_in *)malloc(sizeof *sin);
- bzero(sin, sizeof *sin);
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- sin->sin_addr = *addr_p;
- sin->sin_port = port;
- return (struct bsd_sockaddr *)sin;
- }
- int
- in_getsockaddr(struct socket *so, struct bsd_sockaddr **nam)
- {
- struct inpcb *inp;
- struct in_addr addr;
- in_port_t port;
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
- INP_LOCK(inp);
- port = inp->inp_lport;
- addr = inp->inp_laddr;
- INP_UNLOCK(inp);
- *nam = in_sockaddr(port, &addr);
- return 0;
- }
- int
- in_getpeeraddr(struct socket *so, struct bsd_sockaddr **nam)
- {
- struct inpcb *inp;
- struct in_addr addr;
- in_port_t port;
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
- INP_LOCK(inp);
- port = inp->inp_fport;
- addr = inp->inp_faddr;
- INP_UNLOCK(inp);
- *nam = in_sockaddr(port, &addr);
- return 0;
- }
- void
- in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errval,
- struct inpcb *(*notify)(struct inpcb *, int))
- {
- struct inpcb *inp, *inp_temp;
- INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
- INP_LOCK(inp);
- #ifdef INET6
- if ((inp->inp_vflag & INP_IPV4) == 0) {
- INP_UNLOCK(inp);
- continue;
- }
- #endif
- if (inp->inp_faddr.s_addr != faddr.s_addr ||
- inp->inp_socket == NULL) {
- INP_UNLOCK(inp);
- continue;
- }
- if ((*notify)(inp, errval))
- INP_UNLOCK(inp);
- }
- INP_INFO_WUNLOCK(pcbinfo);
- }
- void
- in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
- {
- struct inpcb *inp;
- struct ip_moptions *imo;
- int i, gap;
- INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
- INP_LOCK(inp);
- imo = inp->inp_moptions;
- if ((inp->inp_vflag & INP_IPV4) &&
- imo != NULL) {
- /*
- * Unselect the outgoing interface if it is being
- * detached.
- */
- if (imo->imo_multicast_ifp == ifp)
- imo->imo_multicast_ifp = NULL;
- /*
- * Drop multicast group membership if we joined
- * through the interface being detached.
- */
- for (i = 0, gap = 0; i < imo->imo_num_memberships;
- i++) {
- if (imo->imo_membership[i]->inm_ifp == ifp) {
- in_delmulti(imo->imo_membership[i]);
- gap++;
- } else if (gap != 0)
- imo->imo_membership[i - gap] =
- imo->imo_membership[i];
- }
- imo->imo_num_memberships -= gap;
- }
- INP_UNLOCK(inp);
- }
- INP_INFO_WUNLOCK(pcbinfo);
- }
- /*
- * Lookup a PCB based on the local address and port. Caller must hold the
- * hash lock. No inpcb locks or references are acquired.
- */
- #define INP_LOOKUP_MAPPED_PCB_COST 3
- struct inpcb *
- in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
- u_short lport, int lookupflags, struct ucred *cred)
- {
- struct inpcb *inp;
- #ifdef INET6
- int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
- #else
- int matchwild = 3;
- #endif
- int wildcard;
- KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
- ("%s: invalid lookup flags %d", __func__, lookupflags));
- INP_HASH_LOCK_ASSERT(pcbinfo);
- if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
- struct inpcbhead *head;
- /*
- * Look for an unconnected (wildcard foreign addr) PCB that
- * matches the local address and port we're looking for.
- */
- head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
- 0, pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
- #ifdef INET6
- /* XXX inp locking */
- if ((inp->inp_vflag & INP_IPV4) == 0)
- continue;
- #endif
- if (inp->inp_faddr.s_addr == INADDR_ANY &&
- inp->inp_laddr.s_addr == laddr.s_addr &&
- inp->inp_lport == lport) {
- /*
- * Found?
- */
- return (inp);
- }
- }
- /*
- * Not found.
- */
- return (NULL);
- } else {
- struct inpcbporthead *porthash;
- struct inpcbport *phd;
- struct inpcb *match = NULL;
- /*
- * Best fit PCB lookup.
- *
- * First see if this local port is in use by looking on the
- * port hash list.
- */
- porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
- pcbinfo->ipi_porthashmask)];
- LIST_FOREACH(phd, porthash, phd_hash) {
- if (phd->phd_port == lport)
- break;
- }
- if (phd != NULL) {
- /*
- * Port is in use by one or more PCBs. Look for best
- * fit.
- */
- LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
- wildcard = 0;
- #ifdef INET6
- /* XXX inp locking */
- if ((inp->inp_vflag & INP_IPV4) == 0)
- continue;
- /*
- * We never select the PCB that has
- * INP_IPV6 flag and is bound to :: if
- * we have another PCB which is bound
- * to 0.0.0.0. If a PCB has the
- * INP_IPV6 flag, then we set its cost
- * higher than IPv4 only PCBs.
- *
- * Note that the case only happens
- * when a socket is bound to ::, under
- * the condition that the use of the
- * mapped address is allowed.
- */
- if ((inp->inp_vflag & INP_IPV6) != 0)
- wildcard += INP_LOOKUP_MAPPED_PCB_COST;
- #endif
- if (inp->inp_faddr.s_addr != INADDR_ANY)
- wildcard++;
- if (inp->inp_laddr.s_addr != INADDR_ANY) {
- if (laddr.s_addr == INADDR_ANY)
- wildcard++;
- else if (inp->inp_laddr.s_addr != laddr.s_addr)
- continue;
- } else {
- if (laddr.s_addr != INADDR_ANY)
- wildcard++;
- }
- if (wildcard < matchwild) {
- match = inp;
- matchwild = wildcard;
- if (matchwild == 0)
- break;
- }
- }
- }
- return (match);
- }
- }
- #undef INP_LOOKUP_MAPPED_PCB_COST
- /*
- * Lookup PCB in hash list, using pcbinfo tables. This variation assumes
- * that the caller has locked the hash list, and will not perform any further
- * locking or reference operations on either the hash list or the connection.
- */
- static struct inpcb *
- in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
- u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
- struct ifnet *ifp)
- {
- struct inpcbhead *head;
- struct inpcb *inp, *tmpinp;
- u_short fport = fport_arg, lport = lport_arg;
- KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
- ("%s: invalid lookup flags %d", __func__, lookupflags));
- INP_HASH_LOCK_ASSERT(pcbinfo);
- /*
- * First look for an exact match.
- */
- tmpinp = NULL;
- head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
- pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
- #ifdef INET6
- /* XXX inp locking */
- if ((inp->inp_vflag & INP_IPV4) == 0)
- continue;
- #endif
- if (inp->inp_faddr.s_addr == faddr.s_addr &&
- inp->inp_laddr.s_addr == laddr.s_addr &&
- inp->inp_fport == fport &&
- inp->inp_lport == lport) {
- /*
- * XXX We should be able to directly return
- * the inp here, without any checks.
- * Well unless both bound with SO_REUSEPORT?
- */
- if (tmpinp == NULL)
- tmpinp = inp;
- }
- }
- if (tmpinp != NULL)
- return (tmpinp);
- /*
- * Then look for a wildcard match, if requested.
- */
- if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
- struct inpcb *local_wild = NULL, *local_exact = NULL;
- #ifdef INET6
- struct inpcb *local_wild_mapped = NULL;
- #endif
- struct inpcb *jail_wild = NULL;
- int injail;
- /*
- * Order of socket selection - we always prefer jails.
- * 1. jailed, non-wild.
- * 2. jailed, wild.
- * 3. non-jailed, non-wild.
- * 4. non-jailed, wild.
- */
- head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
- 0, pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
- #ifdef INET6
- /* XXX inp locking */
- if ((inp->inp_vflag & INP_IPV4) == 0)
- continue;
- #endif
- if (inp->inp_faddr.s_addr != INADDR_ANY ||
- inp->inp_lport != lport)
- continue;
- /* XXX inp locking */
- if (ifp && ifp->if_type == IFT_FAITH &&
- (inp->inp_flags & INP_FAITH) == 0)
- continue;
- injail = 0;
- if (local_exact != NULL)
- continue;
- if (inp->inp_laddr.s_addr == laddr.s_addr) {
- if (injail)
- return (inp);
- else
- local_exact = inp;
- } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
- #ifdef INET6
- /* XXX inp locking, NULL check */
- if (inp->inp_vflag & INP_IPV6PROTO)
- local_wild_mapped = inp;
- else
- #endif /* INET6 */
- if (injail)
- jail_wild = inp;
- else
- local_wild = inp;
- }
- } /* LIST_FOREACH */
- if (jail_wild != NULL)
- return (jail_wild);
- if (local_exact != NULL)
- return (local_exact);
- if (local_wild != NULL)
- return (local_wild);
- #ifdef INET6
- if (local_wild_mapped != NULL)
- return (local_wild_mapped);
- #endif /* defined(INET6) */
- } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
- return (NULL);
- }
- /*
- * Lookup PCB in hash list, using pcbinfo tables. This variation locks the
- * hash list lock, and will return the inpcb locked (i.e., requires
- * INPLOOKUP_LOCKPCB).
- */
- static struct inpcb *
- in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
- u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
- struct ifnet *ifp)
- {
- struct inpcb *inp;
- INP_HASH_RLOCK(pcbinfo);
- inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
- (lookupflags & ~(INPLOOKUP_LOCKPCB)), ifp);
- if (inp != NULL) {
- in_pcbref(inp);
- INP_HASH_RUNLOCK(pcbinfo);
- if (lookupflags & INPLOOKUP_LOCKPCB) {
- INP_LOCK(inp);
- if (in_pcbrele_locked(inp))
- return (NULL);
- } else
- panic("%s: locking bug", __func__);
- } else
- INP_HASH_RUNLOCK(pcbinfo);
- return (inp);
- }
- /*
- * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
- * from which a pre-calculated hash value may be extracted.
- */
- struct inpcb *
- in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
- struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
- {
- KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
- ("%s: invalid lookup flags %d", __func__, lookupflags));
- KASSERT((lookupflags & INPLOOKUP_LOCKPCB) != 0,
- ("%s: LOCKPCB not set", __func__));
- return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
- lookupflags, ifp));
- }
- struct inpcb *
- in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
- u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
- struct ifnet *ifp, struct mbuf *m)
- {
- KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
- ("%s: invalid lookup flags %d", __func__, lookupflags));
- KASSERT((lookupflags & (INPLOOKUP_LOCKPCB)) != 0,
- ("%s: LOCKPCB not set", __func__));
- return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
- lookupflags, ifp));
- }
- #endif /* INET */
- /*
- * Insert PCB onto various hash lists.
- */
- static int
- in_pcbinshash_internal(struct inpcb *inp)
- {
- struct inpcbhead *pcbhash;
- struct inpcbporthead *pcbporthash;
- struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- struct inpcbport *phd;
- u_int32_t hashkey_faddr;
- INP_LOCK_ASSERT(inp);
- INP_HASH_LOCK_ASSERT(pcbinfo);
- KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
- ("in_pcbinshash: INP_INHASHLIST"));
- #ifdef INET6
- if (inp->inp_vflag & INP_IPV6)
- hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
- else
- #endif /* INET6 */
- hashkey_faddr = inp->inp_faddr.s_addr;
- pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
- inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
- pcbporthash = &pcbinfo->ipi_porthashbase[
- INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
- /*
- * Go through port list and look for a head for this lport.
- */
- LIST_FOREACH(phd, pcbporthash, phd_hash) {
- if (phd->phd_port == inp->inp_lport)
- break;
- }
- /*
- * If none exists, malloc one and tack it on.
- */
- if (phd == NULL) {
- phd = (inpcbport *)malloc(sizeof(struct inpcbport));
- if (phd == NULL) {
- return (ENOBUFS); /* XXX */
- }
- phd->phd_port = inp->inp_lport;
- LIST_INIT(&phd->phd_pcblist);
- LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
- }
- inp->inp_phd = phd;
- LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
- LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
- inp->inp_flags |= INP_INHASHLIST;
- return (0);
- }
- int
- in_pcbinshash(struct inpcb *inp)
- {
- return (in_pcbinshash_internal(inp));
- }
- /*
- * Move PCB to the proper hash bucket when { faddr, fport } have been
- * changed. NOTE: This does not handle the case of the lport changing (the
- * hashed port list would have to be updated as well), so the lport must
- * not change after in_pcbinshash() has been called.
- */
- void
- in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
- {
- struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- struct inpcbhead *head;
- u_int32_t hashkey_faddr;
- INP_LOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(pcbinfo);
- KASSERT(inp->inp_flags & INP_INHASHLIST,
- ("in_pcbrehash: !INP_INHASHLIST"));
- #ifdef INET6
- if (inp->inp_vflag & INP_IPV6)
- hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
- else
- #endif /* INET6 */
- hashkey_faddr = inp->inp_faddr.s_addr;
- head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
- inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
- LIST_REMOVE(inp, inp_hash);
- LIST_INSERT_HEAD(head, inp, inp_hash);
- }
- void
- in_pcbrehash(struct inpcb *inp)
- {
- in_pcbrehash_mbuf(inp, NULL);
- }
- /*
- * Remove PCB from various lists.
- */
- static void
- in_pcbremlists(struct inpcb *inp)
- {
- struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
- INP_LOCK_ASSERT(inp);
- inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
- if (inp->inp_flags & INP_INHASHLIST) {
- struct inpcbport *phd = inp->inp_phd;
- INP_HASH_WLOCK(pcbinfo);
- LIST_REMOVE(inp, inp_hash);
- LIST_REMOVE(inp, inp_portlist);
- if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
- LIST_REMOVE(phd, phd_hash);
- free(phd);
- }
- INP_HASH_WUNLOCK(pcbinfo);
- inp->inp_flags &= ~INP_INHASHLIST;
- }
- LIST_REMOVE(inp, inp_list);
- pcbinfo->ipi_count--;
- }
- /*
- * A set label operation has occurred at the socket layer, propagate the
- * label change into the in_pcb for the socket.
- */
- void
- in_pcbsosetlabel(struct socket *so)
- {
- #ifdef MAC
- struct inpcb *inp;
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
- INP_WLOCK(inp);
- SOCK_LOCK(so);
- mac_inpcb_sosetlabel(so, inp);
- SOCK_UNLOCK(so);
- INP_WUNLOCK(inp);
- #endif
- }
- /*
- * ipport_tick runs once per second, determining if random port allocation
- * should be continued. If more than ipport_randomcps ports have been
- * allocated in the last second, then we return to sequential port
- * allocation. We return to random allocation only once we drop below
- * ipport_randomcps for at least ipport_randomtime seconds.
- */
- static void
- ipport_tick(void *xtp)
- {
- VNET_ITERATOR_DECL(vnet_iter);
- VNET_LIST_RLOCK_NOSLEEP();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */
- if (V_ipport_tcpallocs <=
- V_ipport_tcplastcount + V_ipport_randomcps) {
- if (V_ipport_stoprandom > 0)
- V_ipport_stoprandom--;
- } else
- V_ipport_stoprandom = V_ipport_randomtime;
- V_ipport_tcplastcount = V_ipport_tcpallocs;
- CURVNET_RESTORE();
- }
- VNET_LIST_RUNLOCK_NOSLEEP();
- callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
- }
- #if 0
- static void
- ip_fini(void *xtp)
- {
- callout_stop(&ipport_tick_callout);
- }
- #endif
- /*
- * The ipport_callout should start running at about the time we attach the
- * inet or inet6 domains.
- */
- void ipport_tick_init(const void *unused)
- {
- /* Start ipport_tick. */
- callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
- callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
- /* FIXME: OSv: shutdown handler... */
- // EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
- // SHUTDOWN_PRI_DEFAULT);
- }
- SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
- ipport_tick_init, NULL);
- #if 0
- void
- inp_wlock(struct inpcb *inp)
- {
- INP_WLOCK(inp);
- }
- void
- inp_wunlock(struct inpcb *inp)
- {
- INP_WUNLOCK(inp);
- }
- void
- inp_rlock(struct inpcb *inp)
- {
- INP_RLOCK(inp);
- }
- void
- inp_runlock(struct inpcb *inp)
- {
- INP_RUNLOCK(inp);
- }
- #ifdef INVARIANTS
- void
- inp_lock_assert(struct inpcb *inp)
- {
- INP_WLOCK_ASSERT(inp);
- }
- void
- inp_unlock_assert(struct inpcb *inp)
- {
- INP_UNLOCK_ASSERT(inp);
- }
- #endif
- void
- inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
- {
- struct inpcb *inp;
- INP_INFO_RLOCK(&V_tcbinfo);
- LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
- INP_WLOCK(inp);
- func(inp, arg);
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK(&V_tcbinfo);
- }
- struct socket *
- inp_inpcbtosocket(struct inpcb *inp)
- {
- INP_WLOCK_ASSERT(inp);
- return (inp->inp_socket);
- }
- struct tcpcb *
- inp_inpcbtotcpcb(struct inpcb *inp)
- {
- INP_WLOCK_ASSERT(inp);
- return ((struct tcpcb *)inp->inp_ppcb);
- }
- int
- inp_ip_tos_get(const struct inpcb *inp)
- {
- return (inp->inp_ip_tos);
- }
- void
- inp_ip_tos_set(struct inpcb *inp, int val)
- {
- inp->inp_ip_tos = val;
- }
- void
- inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
- uint32_t *faddr, uint16_t *fp)
- {
- INP_LOCK_ASSERT(inp);
- *laddr = inp->inp_laddr.s_addr;
- *faddr = inp->inp_faddr.s_addr;
- *lp = inp->inp_lport;
- *fp = inp->inp_fport;
- }
- struct inpcb *
- so_sotoinpcb(struct socket *so)
- {
- return (sotoinpcb(so));
- }
- struct tcpcb *
- so_sototcpcb(struct socket *so)
- {
- return (sototcpcb(so));
- }
- #endif
- #ifdef DDB
- static void
- db_print_indent(int indent)
- {
- int i;
- for (i = 0; i < indent; i++)
- db_printf(" ");
- }
- static void
- db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
- {
- char faddr_str[48], laddr_str[48];
- db_print_indent(indent);
- db_printf("%s at %p\n", name, inc);
- indent …
Large files files are truncated, but you can click here to view the full file