PageRenderTime 117ms CodeModel.GetById 26ms app.highlight 75ms RepoModel.GetById 1ms app.codeStats 0ms

/contrib/bind9/lib/dns/dispatch.c

https://bitbucket.org/freebsd/freebsd-head/
C | 3533 lines | 2507 code | 520 blank | 506 comment | 657 complexity | 5edf881a04de59334c968878bc4474a1 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * Copyright (C) 2004-2009, 2011, 2012  Internet Systems Consortium, Inc. ("ISC")
   3 * Copyright (C) 1999-2003  Internet Software Consortium.
   4 *
   5 * Permission to use, copy, modify, and/or distribute this software for any
   6 * purpose with or without fee is hereby granted, provided that the above
   7 * copyright notice and this permission notice appear in all copies.
   8 *
   9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  15 * PERFORMANCE OF THIS SOFTWARE.
  16 */
  17
  18/* $Id$ */
  19
  20/*! \file */
  21
  22#include <config.h>
  23
  24#include <stdlib.h>
  25#include <sys/types.h>
  26#include <unistd.h>
  27#include <stdlib.h>
  28
  29#include <isc/entropy.h>
  30#include <isc/mem.h>
  31#include <isc/mutex.h>
  32#include <isc/portset.h>
  33#include <isc/print.h>
  34#include <isc/random.h>
  35#include <isc/stats.h>
  36#include <isc/string.h>
  37#include <isc/task.h>
  38#include <isc/time.h>
  39#include <isc/util.h>
  40
  41#include <dns/acl.h>
  42#include <dns/dispatch.h>
  43#include <dns/events.h>
  44#include <dns/log.h>
  45#include <dns/message.h>
  46#include <dns/portlist.h>
  47#include <dns/stats.h>
  48#include <dns/tcpmsg.h>
  49#include <dns/types.h>
  50
  51typedef ISC_LIST(dns_dispentry_t)	dns_displist_t;
  52
  53typedef struct dispsocket		dispsocket_t;
  54typedef ISC_LIST(dispsocket_t)		dispsocketlist_t;
  55
  56typedef struct dispportentry		dispportentry_t;
  57typedef ISC_LIST(dispportentry_t)	dispportlist_t;
  58
  59/* ARC4 Random generator state */
  60typedef struct arc4ctx {
  61	isc_uint8_t	i;
  62	isc_uint8_t	j;
  63	isc_uint8_t	s[256];
  64	int		count;
  65	isc_entropy_t	*entropy;	/*%< entropy source for ARC4 */
  66	isc_mutex_t	*lock;
  67} arc4ctx_t;
  68
  69typedef struct dns_qid {
  70	unsigned int	magic;
  71	unsigned int	qid_nbuckets;	/*%< hash table size */
  72	unsigned int	qid_increment;	/*%< id increment on collision */
  73	isc_mutex_t	lock;
  74	dns_displist_t	*qid_table;	/*%< the table itself */
  75	dispsocketlist_t *sock_table;	/*%< socket table */
  76} dns_qid_t;
  77
  78struct dns_dispatchmgr {
  79	/* Unlocked. */
  80	unsigned int			magic;
  81	isc_mem_t		       *mctx;
  82	dns_acl_t		       *blackhole;
  83	dns_portlist_t		       *portlist;
  84	isc_stats_t		       *stats;
  85	isc_entropy_t		       *entropy; /*%< entropy source */
  86
  87	/* Locked by "lock". */
  88	isc_mutex_t			lock;
  89	unsigned int			state;
  90	ISC_LIST(dns_dispatch_t)	list;
  91
  92	/* Locked by arc4_lock. */
  93	isc_mutex_t			arc4_lock;
  94	arc4ctx_t			arc4ctx;    /*%< ARC4 context for QID */
  95
  96	/* locked by buffer lock */
  97	dns_qid_t			*qid;
  98	isc_mutex_t			buffer_lock;
  99	unsigned int			buffers;    /*%< allocated buffers */
 100	unsigned int			buffersize; /*%< size of each buffer */
 101	unsigned int			maxbuffers; /*%< max buffers */
 102
 103	/* Locked internally. */
 104	isc_mutex_t			pool_lock;
 105	isc_mempool_t		       *epool;	/*%< memory pool for events */
 106	isc_mempool_t		       *rpool;	/*%< memory pool for replies */
 107	isc_mempool_t		       *dpool;  /*%< dispatch allocations */
 108	isc_mempool_t		       *bpool;	/*%< memory pool for buffers */
 109	isc_mempool_t		       *spool;	/*%< memory pool for dispsocs */
 110
 111	/*%
 112	 * Locked by qid->lock if qid exists; otherwise, can be used without
 113	 * being locked.
 114	 * Memory footprint considerations: this is a simple implementation of
 115	 * available ports, i.e., an ordered array of the actual port numbers.
 116	 * This will require about 256KB of memory in the worst case (128KB for
 117	 * each of IPv4 and IPv6).  We could reduce it by representing it as a
 118	 * more sophisticated way such as a list (or array) of ranges that are
 119	 * searched to identify a specific port.  Our decision here is the saved
 120	 * memory isn't worth the implementation complexity, considering the
 121	 * fact that the whole BIND9 process (which is mainly named) already
 122	 * requires a pretty large memory footprint.  We may, however, have to
 123	 * revisit the decision when we want to use it as a separate module for
 124	 * an environment where memory requirement is severer.
 125	 */
 126	in_port_t	*v4ports;	/*%< available ports for IPv4 */
 127	unsigned int	nv4ports;	/*%< # of available ports for IPv4 */
 128	in_port_t	*v6ports;	/*%< available ports for IPv4 */
 129	unsigned int	nv6ports;	/*%< # of available ports for IPv4 */
 130};
 131
 132#define MGR_SHUTTINGDOWN		0x00000001U
 133#define MGR_IS_SHUTTINGDOWN(l)	(((l)->state & MGR_SHUTTINGDOWN) != 0)
 134
 135#define IS_PRIVATE(d)	(((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
 136
 137struct dns_dispentry {
 138	unsigned int			magic;
 139	dns_dispatch_t		       *disp;
 140	dns_messageid_t			id;
 141	in_port_t			port;
 142	unsigned int			bucket;
 143	isc_sockaddr_t			host;
 144	isc_task_t		       *task;
 145	isc_taskaction_t		action;
 146	void			       *arg;
 147	isc_boolean_t			item_out;
 148	dispsocket_t			*dispsocket;
 149	ISC_LIST(dns_dispatchevent_t)	items;
 150	ISC_LINK(dns_dispentry_t)	link;
 151};
 152
 153/*%
 154 * Maximum number of dispatch sockets that can be pooled for reuse.  The
 155 * appropriate value may vary, but experiments have shown a busy caching server
 156 * may need more than 1000 sockets concurrently opened.  The maximum allowable
 157 * number of dispatch sockets (per manager) will be set to the double of this
 158 * value.
 159 */
 160#ifndef DNS_DISPATCH_POOLSOCKS
 161#define DNS_DISPATCH_POOLSOCKS			2048
 162#endif
 163
 164/*%
 165 * Quota to control the number of dispatch sockets.  If a dispatch has more
 166 * than the quota of sockets, new queries will purge oldest ones, so that
 167 * a massive number of outstanding queries won't prevent subsequent queries
 168 * (especially if the older ones take longer time and result in timeout).
 169 */
 170#ifndef DNS_DISPATCH_SOCKSQUOTA
 171#define DNS_DISPATCH_SOCKSQUOTA			3072
 172#endif
 173
 174struct dispsocket {
 175	unsigned int			magic;
 176	isc_socket_t			*socket;
 177	dns_dispatch_t			*disp;
 178	isc_sockaddr_t			host;
 179	in_port_t			localport; /* XXX: should be removed later */
 180	dispportentry_t			*portentry;
 181	dns_dispentry_t			*resp;
 182	isc_task_t			*task;
 183	ISC_LINK(dispsocket_t)		link;
 184	unsigned int			bucket;
 185	ISC_LINK(dispsocket_t)		blink;
 186};
 187
 188/*%
 189 * A port table entry.  We remember every port we first open in a table with a
 190 * reference counter so that we can 'reuse' the same port (with different
 191 * destination addresses) using the SO_REUSEADDR socket option.
 192 */
 193struct dispportentry {
 194	in_port_t			port;
 195	unsigned int			refs;
 196	ISC_LINK(struct dispportentry)	link;
 197};
 198
 199#ifndef DNS_DISPATCH_PORTTABLESIZE
 200#define DNS_DISPATCH_PORTTABLESIZE	1024
 201#endif
 202
 203#define INVALID_BUCKET		(0xffffdead)
 204
 205/*%
 206 * Number of tasks for each dispatch that use separate sockets for different
 207 * transactions.  This must be a power of 2 as it will divide 32 bit numbers
 208 * to get an uniformly random tasks selection.  See get_dispsocket().
 209 */
 210#define MAX_INTERNAL_TASKS	64
 211
 212struct dns_dispatch {
 213	/* Unlocked. */
 214	unsigned int		magic;		/*%< magic */
 215	dns_dispatchmgr_t      *mgr;		/*%< dispatch manager */
 216	int			ntasks;
 217	/*%
 218	 * internal task buckets.  We use multiple tasks to distribute various
 219	 * socket events well when using separate dispatch sockets.  We use the
 220	 * 1st task (task[0]) for internal control events.
 221	 */
 222	isc_task_t	       *task[MAX_INTERNAL_TASKS];
 223	isc_socket_t	       *socket;		/*%< isc socket attached to */
 224	isc_sockaddr_t		local;		/*%< local address */
 225	in_port_t		localport;	/*%< local UDP port */
 226	unsigned int		maxrequests;	/*%< max requests */
 227	isc_event_t	       *ctlevent;
 228
 229	/*% Locked by mgr->lock. */
 230	ISC_LINK(dns_dispatch_t) link;
 231
 232	/* Locked by "lock". */
 233	isc_mutex_t		lock;		/*%< locks all below */
 234	isc_sockettype_t	socktype;
 235	unsigned int		attributes;
 236	unsigned int		refcount;	/*%< number of users */
 237	dns_dispatchevent_t    *failsafe_ev;	/*%< failsafe cancel event */
 238	unsigned int		shutting_down : 1,
 239				shutdown_out : 1,
 240				connected : 1,
 241				tcpmsg_valid : 1,
 242				recv_pending : 1; /*%< is a recv() pending? */
 243	isc_result_t		shutdown_why;
 244	ISC_LIST(dispsocket_t)	activesockets;
 245	ISC_LIST(dispsocket_t)	inactivesockets;
 246	unsigned int		nsockets;
 247	unsigned int		requests;	/*%< how many requests we have */
 248	unsigned int		tcpbuffers;	/*%< allocated buffers */
 249	dns_tcpmsg_t		tcpmsg;		/*%< for tcp streams */
 250	dns_qid_t		*qid;
 251	arc4ctx_t		arc4ctx;	/*%< for QID/UDP port num */
 252	dispportlist_t		*port_table;	/*%< hold ports 'owned' by us */
 253	isc_mempool_t		*portpool;	/*%< port table entries  */
 254};
 255
 256#define QID_MAGIC		ISC_MAGIC('Q', 'i', 'd', ' ')
 257#define VALID_QID(e)		ISC_MAGIC_VALID((e), QID_MAGIC)
 258
 259#define RESPONSE_MAGIC		ISC_MAGIC('D', 'r', 's', 'p')
 260#define VALID_RESPONSE(e)	ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
 261
 262#define DISPSOCK_MAGIC		ISC_MAGIC('D', 's', 'o', 'c')
 263#define VALID_DISPSOCK(e)	ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
 264
 265#define DISPATCH_MAGIC		ISC_MAGIC('D', 'i', 's', 'p')
 266#define VALID_DISPATCH(e)	ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
 267
 268#define DNS_DISPATCHMGR_MAGIC	ISC_MAGIC('D', 'M', 'g', 'r')
 269#define VALID_DISPATCHMGR(e)	ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
 270
 271#define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
 272		       (disp)->qid : (disp)->mgr->qid
 273#define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
 274			(&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
 275
 276/*%
 277 * Locking a query port buffer is a bit tricky.  We access the buffer without
 278 * locking until qid is created.  Technically, there is a possibility of race
 279 * between the creation of qid and access to the port buffer; in practice,
 280 * however, this should be safe because qid isn't created until the first
 281 * dispatch is created and there should be no contending situation until then.
 282 */
 283#define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
 284#define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
 285
 286/*
 287 * Statics.
 288 */
 289static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
 290				     dns_messageid_t, in_port_t, unsigned int);
 291static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
 292static void destroy_disp(isc_task_t *task, isc_event_t *event);
 293static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
 294static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
 295static void udp_exrecv(isc_task_t *, isc_event_t *);
 296static void udp_shrecv(isc_task_t *, isc_event_t *);
 297static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
 298static void tcp_recv(isc_task_t *, isc_event_t *);
 299static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
 300static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
 301			     in_port_t);
 302static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
 303static void *allocate_udp_buffer(dns_dispatch_t *disp);
 304static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
 305static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
 306static void do_cancel(dns_dispatch_t *disp);
 307static dns_dispentry_t *linear_first(dns_qid_t *disp);
 308static dns_dispentry_t *linear_next(dns_qid_t *disp,
 309				    dns_dispentry_t *resp);
 310static void dispatch_free(dns_dispatch_t **dispp);
 311static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
 312				  dns_dispatch_t *disp,
 313				  isc_socketmgr_t *sockmgr,
 314				  isc_sockaddr_t *localaddr,
 315				  isc_socket_t **sockp);
 316static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
 317				       isc_socketmgr_t *sockmgr,
 318				       isc_taskmgr_t *taskmgr,
 319				       isc_sockaddr_t *localaddr,
 320				       unsigned int maxrequests,
 321				       unsigned int attributes,
 322				       dns_dispatch_t **dispp);
 323static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
 324static void destroy_mgr(dns_dispatchmgr_t **mgrp);
 325static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
 326				 unsigned int increment, dns_qid_t **qidp,
 327				 isc_boolean_t needaddrtable);
 328static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
 329static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
 330				unsigned int options, isc_socket_t **sockp);
 331static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
 332				   isc_sockaddr_t *sockaddrp);
 333
 334#define LVL(x) ISC_LOG_DEBUG(x)
 335
 336static void
 337mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
 338     ISC_FORMAT_PRINTF(3, 4);
 339
 340static void
 341mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
 342	char msgbuf[2048];
 343	va_list ap;
 344
 345	if (! isc_log_wouldlog(dns_lctx, level))
 346		return;
 347
 348	va_start(ap, fmt);
 349	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
 350	va_end(ap);
 351
 352	isc_log_write(dns_lctx,
 353		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
 354		      level, "dispatchmgr %p: %s", mgr, msgbuf);
 355}
 356
 357static inline void
 358inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
 359	if (mgr->stats != NULL)
 360		isc_stats_increment(mgr->stats, counter);
 361}
 362
 363static void
 364dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
 365     ISC_FORMAT_PRINTF(3, 4);
 366
 367static void
 368dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
 369	char msgbuf[2048];
 370	va_list ap;
 371
 372	if (! isc_log_wouldlog(dns_lctx, level))
 373		return;
 374
 375	va_start(ap, fmt);
 376	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
 377	va_end(ap);
 378
 379	isc_log_write(dns_lctx,
 380		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
 381		      level, "dispatch %p: %s", disp, msgbuf);
 382}
 383
 384static void
 385request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
 386	    int level, const char *fmt, ...)
 387     ISC_FORMAT_PRINTF(4, 5);
 388
 389static void
 390request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
 391	    int level, const char *fmt, ...)
 392{
 393	char msgbuf[2048];
 394	char peerbuf[256];
 395	va_list ap;
 396
 397	if (! isc_log_wouldlog(dns_lctx, level))
 398		return;
 399
 400	va_start(ap, fmt);
 401	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
 402	va_end(ap);
 403
 404	if (VALID_RESPONSE(resp)) {
 405		isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
 406		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
 407			      DNS_LOGMODULE_DISPATCH, level,
 408			      "dispatch %p response %p %s: %s", disp, resp,
 409			      peerbuf, msgbuf);
 410	} else {
 411		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
 412			      DNS_LOGMODULE_DISPATCH, level,
 413			      "dispatch %p req/resp %p: %s", disp, resp,
 414			      msgbuf);
 415	}
 416}
 417
 418/*%
 419 * ARC4 random number generator derived from OpenBSD.
 420 * Only dispatch_random() and dispatch_uniformrandom() are expected
 421 * to be called from general dispatch routines; the rest of them are subroutines
 422 * for these two.
 423 *
 424 * The original copyright follows:
 425 * Copyright (c) 1996, David Mazieres <dm@uun.org>
 426 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
 427 *
 428 * Permission to use, copy, modify, and distribute this software for any
 429 * purpose with or without fee is hereby granted, provided that the above
 430 * copyright notice and this permission notice appear in all copies.
 431 *
 432 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 433 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 434 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 435 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 436 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 437 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 438 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 439 */
 440#ifdef BIND9
 441static void
 442dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
 443		    isc_mutex_t *lock)
 444{
 445	int n;
 446	for (n = 0; n < 256; n++)
 447		actx->s[n] = n;
 448	actx->i = 0;
 449	actx->j = 0;
 450	actx->count = 0;
 451	actx->entropy = entropy; /* don't have to attach */
 452	actx->lock = lock;
 453}
 454
 455static void
 456dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
 457	int n;
 458	isc_uint8_t si;
 459
 460	actx->i--;
 461	for (n = 0; n < 256; n++) {
 462		actx->i = (actx->i + 1);
 463		si = actx->s[actx->i];
 464		actx->j = (actx->j + si + dat[n % datlen]);
 465		actx->s[actx->i] = actx->s[actx->j];
 466		actx->s[actx->j] = si;
 467	}
 468	actx->j = actx->i;
 469}
 470
 471static inline isc_uint8_t
 472dispatch_arc4get8(arc4ctx_t *actx) {
 473	isc_uint8_t si, sj;
 474
 475	actx->i = (actx->i + 1);
 476	si = actx->s[actx->i];
 477	actx->j = (actx->j + si);
 478	sj = actx->s[actx->j];
 479	actx->s[actx->i] = sj;
 480	actx->s[actx->j] = si;
 481
 482	return (actx->s[(si + sj) & 0xff]);
 483}
 484
 485static inline isc_uint16_t
 486dispatch_arc4get16(arc4ctx_t *actx) {
 487	isc_uint16_t val;
 488
 489	val = dispatch_arc4get8(actx) << 8;
 490	val |= dispatch_arc4get8(actx);
 491
 492	return (val);
 493}
 494
 495static void
 496dispatch_arc4stir(arc4ctx_t *actx) {
 497	int i;
 498	union {
 499		unsigned char rnd[128];
 500		isc_uint32_t rnd32[32];
 501	} rnd;
 502	isc_result_t result;
 503
 504	if (actx->entropy != NULL) {
 505		/*
 506		 * We accept any quality of random data to avoid blocking.
 507		 */
 508		result = isc_entropy_getdata(actx->entropy, rnd.rnd,
 509					     sizeof(rnd), NULL, 0);
 510		RUNTIME_CHECK(result == ISC_R_SUCCESS);
 511	} else {
 512		for (i = 0; i < 32; i++)
 513			isc_random_get(&rnd.rnd32[i]);
 514	}
 515	dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
 516
 517	/*
 518	 * Discard early keystream, as per recommendations in:
 519	 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
 520	 */
 521	for (i = 0; i < 256; i++)
 522		(void)dispatch_arc4get8(actx);
 523
 524	/*
 525	 * Derived from OpenBSD's implementation.  The rationale is not clear,
 526	 * but should be conservative enough in safety, and reasonably large
 527	 * for efficiency.
 528	 */
 529	actx->count = 1600000;
 530}
 531
 532static isc_uint16_t
 533dispatch_random(arc4ctx_t *actx) {
 534	isc_uint16_t result;
 535
 536	if (actx->lock != NULL)
 537		LOCK(actx->lock);
 538
 539	actx->count -= sizeof(isc_uint16_t);
 540	if (actx->count <= 0)
 541		dispatch_arc4stir(actx);
 542	result = dispatch_arc4get16(actx);
 543
 544	if (actx->lock != NULL)
 545		UNLOCK(actx->lock);
 546
 547	return (result);
 548}
 549#else
 550/*
 551 * For general purpose library, we don't have to be too strict about the
 552 * quality of random values.  Performance doesn't matter much, either.
 553 * So we simply use the isc_random module to keep the library as small as
 554 * possible.
 555 */
 556
 557static void
 558dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
 559		    isc_mutex_t *lock)
 560{
 561	UNUSED(actx);
 562	UNUSED(entropy);
 563	UNUSED(lock);
 564
 565	return;
 566}
 567
 568static isc_uint16_t
 569dispatch_random(arc4ctx_t *actx) {
 570	isc_uint32_t r;
 571
 572	UNUSED(actx);
 573
 574	isc_random_get(&r);
 575	return (r & 0xffff);
 576}
 577#endif	/* BIND9 */
 578
 579static isc_uint16_t
 580dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
 581	isc_uint16_t min, r;
 582
 583	if (upper_bound < 2)
 584		return (0);
 585
 586	/*
 587	 * Ensure the range of random numbers [min, 0xffff] be a multiple of
 588	 * upper_bound and contain at least a half of the 16 bit range.
 589	 */
 590
 591	if (upper_bound > 0x8000)
 592		min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
 593	else
 594		min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
 595
 596	/*
 597	 * This could theoretically loop forever but each retry has
 598	 * p > 0.5 (worst case, usually far better) of selecting a
 599	 * number inside the range we need, so it should rarely need
 600	 * to re-roll.
 601	 */
 602	for (;;) {
 603		r = dispatch_random(actx);
 604		if (r >= min)
 605			break;
 606	}
 607
 608	return (r % upper_bound);
 609}
 610
 611/*
 612 * Return a hash of the destination and message id.
 613 */
 614static isc_uint32_t
 615dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
 616	 in_port_t port)
 617{
 618	unsigned int ret;
 619
 620	ret = isc_sockaddr_hash(dest, ISC_TRUE);
 621	ret ^= (id << 16) | port;
 622	ret %= qid->qid_nbuckets;
 623
 624	INSIST(ret < qid->qid_nbuckets);
 625
 626	return (ret);
 627}
 628
 629/*
 630 * Find the first entry in 'qid'.  Returns NULL if there are no entries.
 631 */
 632static dns_dispentry_t *
 633linear_first(dns_qid_t *qid) {
 634	dns_dispentry_t *ret;
 635	unsigned int bucket;
 636
 637	bucket = 0;
 638
 639	while (bucket < qid->qid_nbuckets) {
 640		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
 641		if (ret != NULL)
 642			return (ret);
 643		bucket++;
 644	}
 645
 646	return (NULL);
 647}
 648
 649/*
 650 * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
 651 * no more entries.
 652 */
 653static dns_dispentry_t *
 654linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
 655	dns_dispentry_t *ret;
 656	unsigned int bucket;
 657
 658	ret = ISC_LIST_NEXT(resp, link);
 659	if (ret != NULL)
 660		return (ret);
 661
 662	bucket = resp->bucket;
 663	bucket++;
 664	while (bucket < qid->qid_nbuckets) {
 665		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
 666		if (ret != NULL)
 667			return (ret);
 668		bucket++;
 669	}
 670
 671	return (NULL);
 672}
 673
 674/*
 675 * The dispatch must be locked.
 676 */
 677static isc_boolean_t
 678destroy_disp_ok(dns_dispatch_t *disp)
 679{
 680	if (disp->refcount != 0)
 681		return (ISC_FALSE);
 682
 683	if (disp->recv_pending != 0)
 684		return (ISC_FALSE);
 685
 686	if (!ISC_LIST_EMPTY(disp->activesockets))
 687		return (ISC_FALSE);
 688
 689	if (disp->shutting_down == 0)
 690		return (ISC_FALSE);
 691
 692	return (ISC_TRUE);
 693}
 694
 695/*
 696 * Called when refcount reaches 0 (and safe to destroy).
 697 *
 698 * The dispatcher must not be locked.
 699 * The manager must be locked.
 700 */
 701static void
 702destroy_disp(isc_task_t *task, isc_event_t *event) {
 703	dns_dispatch_t *disp;
 704	dns_dispatchmgr_t *mgr;
 705	isc_boolean_t killmgr;
 706	dispsocket_t *dispsocket;
 707	int i;
 708
 709	INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
 710
 711	UNUSED(task);
 712
 713	disp = event->ev_arg;
 714	mgr = disp->mgr;
 715
 716	LOCK(&mgr->lock);
 717	ISC_LIST_UNLINK(mgr->list, disp, link);
 718
 719	dispatch_log(disp, LVL(90),
 720		     "shutting down; detaching from sock %p, task %p",
 721		     disp->socket, disp->task[0]); /* XXXX */
 722
 723	if (disp->socket != NULL)
 724		isc_socket_detach(&disp->socket);
 725	while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
 726		ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
 727		destroy_dispsocket(disp, &dispsocket);
 728	}
 729	for (i = 0; i < disp->ntasks; i++)
 730		isc_task_detach(&disp->task[i]);
 731	isc_event_free(&event);
 732
 733	dispatch_free(&disp);
 734
 735	killmgr = destroy_mgr_ok(mgr);
 736	UNLOCK(&mgr->lock);
 737	if (killmgr)
 738		destroy_mgr(&mgr);
 739}
 740
 741/*%
 742 * Manipulate port table per dispatch: find an entry for a given port number,
 743 * create a new entry, and decrement a given entry with possible clean-up.
 744 */
 745static dispportentry_t *
 746port_search(dns_dispatch_t *disp, in_port_t port) {
 747	dispportentry_t *portentry;
 748
 749	REQUIRE(disp->port_table != NULL);
 750
 751	portentry = ISC_LIST_HEAD(disp->port_table[port %
 752						   DNS_DISPATCH_PORTTABLESIZE]);
 753	while (portentry != NULL) {
 754		if (portentry->port == port)
 755			return (portentry);
 756		portentry = ISC_LIST_NEXT(portentry, link);
 757	}
 758
 759	return (NULL);
 760}
 761
 762static dispportentry_t *
 763new_portentry(dns_dispatch_t *disp, in_port_t port) {
 764	dispportentry_t *portentry;
 765
 766	REQUIRE(disp->port_table != NULL);
 767
 768	portentry = isc_mempool_get(disp->portpool);
 769	if (portentry == NULL)
 770		return (portentry);
 771
 772	portentry->port = port;
 773	portentry->refs = 0;
 774	ISC_LINK_INIT(portentry, link);
 775	ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
 776			portentry, link);
 777
 778	return (portentry);
 779}
 780
 781/*%
 782 * The caller must not hold the qid->lock.
 783 */
 784static void
 785deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
 786	dispportentry_t *portentry = *portentryp;
 787	dns_qid_t *qid;
 788
 789	REQUIRE(disp->port_table != NULL);
 790	REQUIRE(portentry != NULL && portentry->refs > 0);
 791
 792	qid = DNS_QID(disp);
 793	LOCK(&qid->lock);
 794	portentry->refs--;
 795	if (portentry->refs == 0) {
 796		ISC_LIST_UNLINK(disp->port_table[portentry->port %
 797						 DNS_DISPATCH_PORTTABLESIZE],
 798				portentry, link);
 799		isc_mempool_put(disp->portpool, portentry);
 800	}
 801
 802	*portentryp = NULL;
 803	UNLOCK(&qid->lock);
 804}
 805
 806/*%
 807 * Find a dispsocket for socket address 'dest', and port number 'port'.
 808 * Return NULL if no such entry exists.
 809 */
 810static dispsocket_t *
 811socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
 812	      unsigned int bucket)
 813{
 814	dispsocket_t *dispsock;
 815
 816	REQUIRE(bucket < qid->qid_nbuckets);
 817
 818	dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
 819
 820	while (dispsock != NULL) {
 821		if (dispsock->portentry != NULL &&
 822		    dispsock->portentry->port == port &&
 823		    isc_sockaddr_equal(dest, &dispsock->host))
 824			return (dispsock);
 825		dispsock = ISC_LIST_NEXT(dispsock, blink);
 826	}
 827
 828	return (NULL);
 829}
 830
 831/*%
 832 * Make a new socket for a single dispatch with a random port number.
 833 * The caller must hold the disp->lock and qid->lock.
 834 */
 835static isc_result_t
 836get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
 837	       isc_socketmgr_t *sockmgr, dns_qid_t *qid,
 838	       dispsocket_t **dispsockp, in_port_t *portp)
 839{
 840	int i;
 841	isc_uint32_t r;
 842	dns_dispatchmgr_t *mgr = disp->mgr;
 843	isc_socket_t *sock = NULL;
 844	isc_result_t result = ISC_R_FAILURE;
 845	in_port_t port;
 846	isc_sockaddr_t localaddr;
 847	unsigned int bucket = 0;
 848	dispsocket_t *dispsock;
 849	unsigned int nports;
 850	in_port_t *ports;
 851	unsigned int bindoptions;
 852	dispportentry_t *portentry = NULL;
 853
 854	if (isc_sockaddr_pf(&disp->local) == AF_INET) {
 855		nports = disp->mgr->nv4ports;
 856		ports = disp->mgr->v4ports;
 857	} else {
 858		nports = disp->mgr->nv6ports;
 859		ports = disp->mgr->v6ports;
 860	}
 861	if (nports == 0)
 862		return (ISC_R_ADDRNOTAVAIL);
 863
 864	dispsock = ISC_LIST_HEAD(disp->inactivesockets);
 865	if (dispsock != NULL) {
 866		ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
 867		sock = dispsock->socket;
 868		dispsock->socket = NULL;
 869	} else {
 870		dispsock = isc_mempool_get(mgr->spool);
 871		if (dispsock == NULL)
 872			return (ISC_R_NOMEMORY);
 873
 874		disp->nsockets++;
 875		dispsock->socket = NULL;
 876		dispsock->disp = disp;
 877		dispsock->resp = NULL;
 878		dispsock->portentry = NULL;
 879		isc_random_get(&r);
 880		dispsock->task = NULL;
 881		isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
 882		ISC_LINK_INIT(dispsock, link);
 883		ISC_LINK_INIT(dispsock, blink);
 884		dispsock->magic = DISPSOCK_MAGIC;
 885	}
 886
 887	/*
 888	 * Pick up a random UDP port and open a new socket with it.  Avoid
 889	 * choosing ports that share the same destination because it will be
 890	 * very likely to fail in bind(2) or connect(2).
 891	 */
 892	localaddr = disp->local;
 893	for (i = 0; i < 64; i++) {
 894		port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
 895							nports)];
 896		isc_sockaddr_setport(&localaddr, port);
 897
 898		bucket = dns_hash(qid, dest, 0, port);
 899		if (socket_search(qid, dest, port, bucket) != NULL)
 900			continue;
 901		bindoptions = 0;
 902		portentry = port_search(disp, port);
 903		if (portentry != NULL)
 904			bindoptions |= ISC_SOCKET_REUSEADDRESS;
 905		result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
 906		if (result == ISC_R_SUCCESS) {
 907			if (portentry == NULL) {
 908				portentry = new_portentry(disp, port);
 909				if (portentry == NULL) {
 910					result = ISC_R_NOMEMORY;
 911					break;
 912				}
 913			}
 914			portentry->refs++;
 915			break;
 916		} else if (result == ISC_R_NOPERM) {
 917			char buf[ISC_SOCKADDR_FORMATSIZE];
 918			isc_sockaddr_format(&localaddr, buf, sizeof(buf));
 919			dispatch_log(disp, ISC_LOG_WARNING,
 920				     "open_socket(%s) -> %s: continuing",
 921				     buf, isc_result_totext(result));
 922		} else if (result != ISC_R_ADDRINUSE)
 923			break;
 924	}
 925
 926	if (result == ISC_R_SUCCESS) {
 927		dispsock->socket = sock;
 928		dispsock->host = *dest;
 929		dispsock->portentry = portentry;
 930		dispsock->bucket = bucket;
 931		ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
 932		*dispsockp = dispsock;
 933		*portp = port;
 934	} else {
 935		/*
 936		 * We could keep it in the inactive list, but since this should
 937		 * be an exceptional case and might be resource shortage, we'd
 938		 * rather destroy it.
 939		 */
 940		if (sock != NULL)
 941			isc_socket_detach(&sock);
 942		destroy_dispsocket(disp, &dispsock);
 943	}
 944
 945	return (result);
 946}
 947
 948/*%
 949 * Destroy a dedicated dispatch socket.
 950 */
 951static void
 952destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
 953	dispsocket_t *dispsock;
 954	dns_qid_t *qid;
 955
 956	/*
 957	 * The dispatch must be locked.
 958	 */
 959
 960	REQUIRE(dispsockp != NULL && *dispsockp != NULL);
 961	dispsock = *dispsockp;
 962	REQUIRE(!ISC_LINK_LINKED(dispsock, link));
 963
 964	disp->nsockets--;
 965	dispsock->magic = 0;
 966	if (dispsock->portentry != NULL)
 967		deref_portentry(disp, &dispsock->portentry);
 968	if (dispsock->socket != NULL)
 969		isc_socket_detach(&dispsock->socket);
 970	if (ISC_LINK_LINKED(dispsock, blink)) {
 971		qid = DNS_QID(disp);
 972		LOCK(&qid->lock);
 973		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
 974				blink);
 975		UNLOCK(&qid->lock);
 976	}
 977	if (dispsock->task != NULL)
 978		isc_task_detach(&dispsock->task);
 979	isc_mempool_put(disp->mgr->spool, dispsock);
 980
 981	*dispsockp = NULL;
 982}
 983
 984/*%
 985 * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
 986 * future reuse unless the total number of sockets are exceeding the maximum.
 987 */
 988static void
 989deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
 990	isc_result_t result;
 991	dns_qid_t *qid;
 992
 993	/*
 994	 * The dispatch must be locked.
 995	 */
 996	ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
 997	if (dispsock->resp != NULL) {
 998		INSIST(dispsock->resp->dispsocket == dispsock);
 999		dispsock->resp->dispsocket = NULL;
1000	}
1001
1002	INSIST(dispsock->portentry != NULL);
1003	deref_portentry(disp, &dispsock->portentry);
1004
1005#ifdef BIND9
1006	if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
1007		destroy_dispsocket(disp, &dispsock);
1008	else {
1009		result = isc_socket_close(dispsock->socket);
1010
1011		qid = DNS_QID(disp);
1012		LOCK(&qid->lock);
1013		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
1014				blink);
1015		UNLOCK(&qid->lock);
1016
1017		if (result == ISC_R_SUCCESS)
1018			ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
1019		else {
1020			/*
1021			 * If the underlying system does not allow this
1022			 * optimization, destroy this temporary structure (and
1023			 * create a new one for a new transaction).
1024			 */
1025			INSIST(result == ISC_R_NOTIMPLEMENTED);
1026			destroy_dispsocket(disp, &dispsock);
1027		}
1028	}
1029#else
1030	/* This kind of optimization isn't necessary for normal use */
1031	UNUSED(qid);
1032	UNUSED(result);
1033
1034	destroy_dispsocket(disp, &dispsock);
1035#endif
1036}
1037
1038/*
1039 * Find an entry for query ID 'id', socket address 'dest', and port number
1040 * 'port'.
1041 * Return NULL if no such entry exists.
1042 */
1043static dns_dispentry_t *
1044entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
1045	     in_port_t port, unsigned int bucket)
1046{
1047	dns_dispentry_t *res;
1048
1049	REQUIRE(bucket < qid->qid_nbuckets);
1050
1051	res = ISC_LIST_HEAD(qid->qid_table[bucket]);
1052
1053	while (res != NULL) {
1054		if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
1055		    res->port == port) {
1056			return (res);
1057		}
1058		res = ISC_LIST_NEXT(res, link);
1059	}
1060
1061	return (NULL);
1062}
1063
1064static void
1065free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
1066	INSIST(buf != NULL && len != 0);
1067
1068
1069	switch (disp->socktype) {
1070	case isc_sockettype_tcp:
1071		INSIST(disp->tcpbuffers > 0);
1072		disp->tcpbuffers--;
1073		isc_mem_put(disp->mgr->mctx, buf, len);
1074		break;
1075	case isc_sockettype_udp:
1076		LOCK(&disp->mgr->buffer_lock);
1077		INSIST(disp->mgr->buffers > 0);
1078		INSIST(len == disp->mgr->buffersize);
1079		disp->mgr->buffers--;
1080		isc_mempool_put(disp->mgr->bpool, buf);
1081		UNLOCK(&disp->mgr->buffer_lock);
1082		break;
1083	default:
1084		INSIST(0);
1085		break;
1086	}
1087}
1088
1089static void *
1090allocate_udp_buffer(dns_dispatch_t *disp) {
1091	void *temp;
1092
1093	LOCK(&disp->mgr->buffer_lock);
1094	temp = isc_mempool_get(disp->mgr->bpool);
1095
1096	if (temp != NULL)
1097		disp->mgr->buffers++;
1098	UNLOCK(&disp->mgr->buffer_lock);
1099
1100	return (temp);
1101}
1102
1103static inline void
1104free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
1105	if (disp->failsafe_ev == ev) {
1106		INSIST(disp->shutdown_out == 1);
1107		disp->shutdown_out = 0;
1108
1109		return;
1110	}
1111
1112	isc_mempool_put(disp->mgr->epool, ev);
1113}
1114
1115static inline dns_dispatchevent_t *
1116allocate_event(dns_dispatch_t *disp) {
1117	dns_dispatchevent_t *ev;
1118
1119	ev = isc_mempool_get(disp->mgr->epool);
1120	if (ev == NULL)
1121		return (NULL);
1122	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
1123		       NULL, NULL, NULL, NULL, NULL);
1124
1125	return (ev);
1126}
1127
1128static void
1129udp_exrecv(isc_task_t *task, isc_event_t *ev) {
1130	dispsocket_t *dispsock = ev->ev_arg;
1131
1132	UNUSED(task);
1133
1134	REQUIRE(VALID_DISPSOCK(dispsock));
1135	udp_recv(ev, dispsock->disp, dispsock);
1136}
1137
1138static void
1139udp_shrecv(isc_task_t *task, isc_event_t *ev) {
1140	dns_dispatch_t *disp = ev->ev_arg;
1141
1142	UNUSED(task);
1143
1144	REQUIRE(VALID_DISPATCH(disp));
1145	udp_recv(ev, disp, NULL);
1146}
1147
1148/*
1149 * General flow:
1150 *
1151 * If I/O result == CANCELED or error, free the buffer.
1152 *
1153 * If query, free the buffer, restart.
1154 *
1155 * If response:
1156 *	Allocate event, fill in details.
1157 *		If cannot allocate, free buffer, restart.
1158 *	find target.  If not found, free buffer, restart.
1159 *	if event queue is not empty, queue.  else, send.
1160 *	restart.
1161 */
1162static void
1163udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1164	isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
1165	dns_messageid_t id;
1166	isc_result_t dres;
1167	isc_buffer_t source;
1168	unsigned int flags;
1169	dns_dispentry_t *resp = NULL;
1170	dns_dispatchevent_t *rev;
1171	unsigned int bucket;
1172	isc_boolean_t killit;
1173	isc_boolean_t queue_response;
1174	dns_dispatchmgr_t *mgr;
1175	dns_qid_t *qid;
1176	isc_netaddr_t netaddr;
1177	int match;
1178	int result;
1179	isc_boolean_t qidlocked = ISC_FALSE;
1180
1181	LOCK(&disp->lock);
1182
1183	mgr = disp->mgr;
1184	qid = mgr->qid;
1185
1186	dispatch_log(disp, LVL(90),
1187		     "got packet: requests %d, buffers %d, recvs %d",
1188		     disp->requests, disp->mgr->buffers, disp->recv_pending);
1189
1190	if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1191		/*
1192		 * Unless the receive event was imported from a listening
1193		 * interface, in which case the event type is
1194		 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
1195		 */
1196		INSIST(disp->recv_pending != 0);
1197		disp->recv_pending = 0;
1198	}
1199
1200	if (dispsock != NULL &&
1201	    (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
1202		/*
1203		 * dispsock->resp can be NULL if this transaction was canceled
1204		 * just after receiving a response.  Since this socket is
1205		 * exclusively used and there should be at most one receive
1206		 * event the canceled event should have been no effect.  So
1207		 * we can (and should) deactivate the socket right now.
1208		 */
1209		deactivate_dispsocket(disp, dispsock);
1210		dispsock = NULL;
1211	}
1212
1213	if (disp->shutting_down) {
1214		/*
1215		 * This dispatcher is shutting down.
1216		 */
1217		free_buffer(disp, ev->region.base, ev->region.length);
1218
1219		isc_event_free(&ev_in);
1220		ev = NULL;
1221
1222		killit = destroy_disp_ok(disp);
1223		UNLOCK(&disp->lock);
1224		if (killit)
1225			isc_task_send(disp->task[0], &disp->ctlevent);
1226
1227		return;
1228	}
1229
1230	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
1231		if (dispsock != NULL) {
1232			resp = dispsock->resp;
1233			id = resp->id;
1234			if (ev->result != ISC_R_SUCCESS) {
1235				/*
1236				 * This is most likely a network error on a
1237				 * connected socket.  It makes no sense to
1238				 * check the address or parse the packet, but it
1239				 * will help to return the error to the caller.
1240				 */
1241				goto sendresponse;
1242			}
1243		} else {
1244			free_buffer(disp, ev->region.base, ev->region.length);
1245
1246			UNLOCK(&disp->lock);
1247			isc_event_free(&ev_in);
1248			return;
1249		}
1250	} else if (ev->result != ISC_R_SUCCESS) {
1251		free_buffer(disp, ev->region.base, ev->region.length);
1252
1253		if (ev->result != ISC_R_CANCELED)
1254			dispatch_log(disp, ISC_LOG_ERROR,
1255				     "odd socket result in udp_recv(): %s",
1256				     isc_result_totext(ev->result));
1257
1258		UNLOCK(&disp->lock);
1259		isc_event_free(&ev_in);
1260		return;
1261	}
1262
1263	/*
1264	 * If this is from a blackholed address, drop it.
1265	 */
1266	isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1267	if (disp->mgr->blackhole != NULL &&
1268	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1269			  NULL, &match, NULL) == ISC_R_SUCCESS &&
1270	    match > 0)
1271	{
1272		if (isc_log_wouldlog(dns_lctx, LVL(10))) {
1273			char netaddrstr[ISC_NETADDR_FORMATSIZE];
1274			isc_netaddr_format(&netaddr, netaddrstr,
1275					   sizeof(netaddrstr));
1276			dispatch_log(disp, LVL(10),
1277				     "blackholed packet from %s",
1278				     netaddrstr);
1279		}
1280		free_buffer(disp, ev->region.base, ev->region.length);
1281		goto restart;
1282	}
1283
1284	/*
1285	 * Peek into the buffer to see what we can see.
1286	 */
1287	isc_buffer_init(&source, ev->region.base, ev->region.length);
1288	isc_buffer_add(&source, ev->n);
1289	dres = dns_message_peekheader(&source, &id, &flags);
1290	if (dres != ISC_R_SUCCESS) {
1291		free_buffer(disp, ev->region.base, ev->region.length);
1292		dispatch_log(disp, LVL(10), "got garbage packet");
1293		goto restart;
1294	}
1295
1296	dispatch_log(disp, LVL(92),
1297		     "got valid DNS message header, /QR %c, id %u",
1298		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1299
1300	/*
1301	 * Look at flags.  If query, drop it. If response,
1302	 * look to see where it goes.
1303	 */
1304	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1305		/* query */
1306		free_buffer(disp, ev->region.base, ev->region.length);
1307		goto restart;
1308	}
1309
1310	/*
1311	 * Search for the corresponding response.  If we are using an exclusive
1312	 * socket, we've already identified it and we can skip the search; but
1313	 * the ID and the address must match the expected ones.
1314	 */
1315	if (resp == NULL) {
1316		bucket = dns_hash(qid, &ev->address, id, disp->localport);
1317		LOCK(&qid->lock);
1318		qidlocked = ISC_TRUE;
1319		resp = entry_search(qid, &ev->address, id, disp->localport,
1320				    bucket);
1321		dispatch_log(disp, LVL(90),
1322			     "search for response in bucket %d: %s",
1323			     bucket, (resp == NULL ? "not found" : "found"));
1324
1325		if (resp == NULL) {
1326			inc_stats(mgr, dns_resstatscounter_mismatch);
1327			free_buffer(disp, ev->region.base, ev->region.length);
1328			goto unlock;
1329		}
1330	} else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
1331							 &resp->host)) {
1332		dispatch_log(disp, LVL(90),
1333			     "response to an exclusive socket doesn't match");
1334		inc_stats(mgr, dns_resstatscounter_mismatch);
1335		free_buffer(disp, ev->region.base, ev->region.length);
1336		goto unlock;
1337	}
1338
1339	/*
1340	 * Now that we have the original dispatch the query was sent
1341	 * from check that the address and port the response was
1342	 * sent to make sense.
1343	 */
1344	if (disp != resp->disp) {
1345		isc_sockaddr_t a1;
1346		isc_sockaddr_t a2;
1347
1348		/*
1349		 * Check that the socket types and ports match.
1350		 */
1351		if (disp->socktype != resp->disp->socktype ||
1352		    isc_sockaddr_getport(&disp->local) !=
1353		    isc_sockaddr_getport(&resp->disp->local)) {
1354			free_buffer(disp, ev->region.base, ev->region.length);
1355			goto unlock;
1356		}
1357
1358		/*
1359		 * If both dispatches are bound to an address then fail as
1360		 * the addresses can't be equal (enforced by the IP stack).
1361		 *
1362		 * Note under Linux a packet can be sent out via IPv4 socket
1363		 * and the response be received via a IPv6 socket.
1364		 *
1365		 * Requests sent out via IPv6 should always come back in
1366		 * via IPv6.
1367		 */
1368		if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
1369		    isc_sockaddr_pf(&disp->local) != PF_INET6) {
1370			free_buffer(disp, ev->region.base, ev->region.length);
1371			goto unlock;
1372		}
1373		isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
1374		isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
1375		if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
1376		    !isc_sockaddr_eqaddr(&a2, &disp->local)) {
1377			free_buffer(disp, ev->region.base, ev->region.length);
1378			goto unlock;
1379		}
1380	}
1381
1382  sendresponse:
1383	queue_response = resp->item_out;
1384	rev = allocate_event(resp->disp);
1385	if (rev == NULL) {
1386		free_buffer(disp, ev->region.base, ev->region.length);
1387		goto unlock;
1388	}
1389
1390	/*
1391	 * At this point, rev contains the event we want to fill in, and
1392	 * resp contains the information on the place to send it to.
1393	 * Send the event off.
1394	 */
1395	isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
1396	isc_buffer_add(&rev->buffer, ev->n);
1397	rev->result = ev->result;
1398	rev->id = id;
1399	rev->addr = ev->address;
1400	rev->pktinfo = ev->pktinfo;
1401	rev->attributes = ev->attributes;
1402	if (queue_response) {
1403		ISC_LIST_APPEND(resp->items, rev, ev_link);
1404	} else {
1405		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
1406			       DNS_EVENT_DISPATCH,
1407			       resp->action, resp->arg, resp, NULL, NULL);
1408		request_log(disp, resp, LVL(90),
1409			    "[a] Sent event %p buffer %p len %d to task %p",
1410			    rev, rev->buffer.base, rev->buffer.length,
1411			    resp->task);
1412		resp->item_out = ISC_TRUE;
1413		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1414	}
1415 unlock:
1416	if (qidlocked)
1417		UNLOCK(&qid->lock);
1418
1419	/*
1420	 * Restart recv() to get the next packet.
1421	 */
1422 restart:
1423	result = startrecv(disp, dispsock);
1424	if (result != ISC_R_SUCCESS && dispsock != NULL) {
1425		/*
1426		 * XXX: wired. There seems to be no recovery process other than
1427		 * deactivate this socket anyway (since we cannot start
1428		 * receiving, we won't be able to receive a cancel event
1429		 * from the user).
1430		 */
1431		deactivate_dispsocket(disp, dispsock);
1432	}
1433	UNLOCK(&disp->lock);
1434
1435	isc_event_free(&ev_in);
1436}
1437
1438/*
1439 * General flow:
1440 *
1441 * If I/O result == CANCELED, EOF, or error, notify everyone as the
1442 * various queues drain.
1443 *
1444 * If query, restart.
1445 *
1446 * If response:
1447 *	Allocate event, fill in details.
1448 *		If cannot allocate, restart.
1449 *	find target.  If not found, restart.
1450 *	if event queue is not empty, queue.  else, send.
1451 *	restart.
1452 */
1453static void
1454tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
1455	dns_dispatch_t *disp = ev_in->ev_arg;
1456	dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
1457	dns_messageid_t id;
1458	isc_result_t dres;
1459	unsigned int flags;
1460	dns_dispentry_t *resp;
1461	dns_dispatchevent_t *rev;
1462	unsigned int bucket;
1463	isc_boolean_t killit;
1464	isc_boolean_t queue_response;
1465	dns_qid_t *qid;
1466	int level;
1467	char buf[ISC_SOCKADDR_FORMATSIZE];
1468
1469	UNUSED(task);
1470
1471	REQUIRE(VALID_DISPATCH(disp));
1472
1473	qid = disp->qid;
1474
1475	dispatch_log(disp, LVL(90),
1476		     "got TCP packet: requests %d, buffers %d, recvs %d",
1477		     disp->requests, disp->tcpbuffers, disp->recv_pending);
1478
1479	LOCK(&disp->lock);
1480
1481	INSIST(disp->recv_pending != 0);
1482	disp->recv_pending = 0;
1483
1484	if (disp->refcount == 0) {
1485		/*
1486		 * This dispatcher is shutting down.  Force cancelation.
1487		 */
1488		tcpmsg->result = ISC_R_CANCELED;
1489	}
1490
1491	if (tcpmsg->result != ISC_R_SUCCESS) {
1492		switch (tcpmsg->result) {
1493		case ISC_R_CANCELED:
1494			break;
1495
1496		case ISC_R_EOF:
1497			dispatch_log(disp, LVL(90), "shutting down on EOF");
1498			do_cancel(disp);
1499			break;
1500
1501		case ISC_R_CONNECTIONRESET:
1502			level = ISC_LOG_INFO;
1503			goto logit;
1504
1505		default:
1506			level = ISC_LOG_ERROR;
1507		logit:
1508			isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
1509			dispatch_log(disp, level, "shutting down due to TCP "
1510				     "receive error: %s: %s", buf,
1511				     isc_result_totext(tcpmsg->result));
1512			do_cancel(disp);
1513			break;
1514		}
1515
1516		/*
1517		 * The event is statically allocated in the tcpmsg
1518		 * structure, and destroy_disp() frees the tcpmsg, so we must
1519		 * free the event *before* calling destroy_disp().
1520		 */
1521		isc_event_free(&ev_in);
1522
1523		disp->shutting_down = 1;
1524		disp->shutdown_why = tcpmsg->result;
1525
1526		/*
1527		 * If the recv() was canceled pass the word on.
1528		 */
1529		killit = destroy_disp_ok(disp);
1530		UNLOCK(&disp->lock);
1531		if (killit)
1532			isc_task_send(disp->task[0], &disp->ctlevent);
1533		return;
1534	}
1535
1536	dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
1537		     tcpmsg->result,
1538		     tcpmsg->buffer.length, tcpmsg->buffer.base);
1539
1540	/*
1541	 * Peek into the buffer to see what we can see.
1542	 */
1543	dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
1544	if (dres != ISC_R_SUCCESS) {
1545		dispatch_log(disp, LVL(10), "got garbage packet");
1546		goto restart;
1547	}
1548
1549	dispatch_log(disp, LVL(92),
1550		     "got valid DNS message header, /QR %c, id %u",
1551		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
1552
1553	/*
1554	 * Allocate an event to send to the query or response client, and
1555	 * allocate a new buffer for our use.
1556	 */
1557
1558	/*
1559	 * Look at flags.  If query, drop it. If response,
1560	 * look to see where it goes.
1561	 */
1562	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1563		/*
1564		 * Query.
1565		 */
1566		goto restart;
1567	}
1568
1569	/*
1570	 * Response.
1571	 */
1572	bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1573	LOCK(&qid->lock);
1574	resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
1575	dispatch_log(disp, LVL(90),
1576		     "search for response in bucket %d: %s",
1577		     bucket, (resp == NULL ? "not found" : "found"));
1578
1579	if (resp == NULL)
1580		goto unlock;
1581	queue_response = resp->item_out;
1582	rev = allocate_event(disp);
1583	if (rev == NULL)
1584		goto unlock;
1585
1586	/*
1587	 * At this point, rev contains the event we want to fill in, and
1588	 * resp contains the information on the place to send it to.
1589	 * Send the event off.
1590	 */
1591	dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1592	disp->tcpbuffers++;
1593	rev->result = ISC_R_SUCCESS;
1594	rev->id = id;
1595	rev->addr = tcpmsg->address;
1596	if (queue_response) {
1597		ISC_LIST_APPEND(resp->items, rev, ev_link);
1598	} else {
1599		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1600			       resp->action, resp->arg, resp, NULL, NULL);
1601		request_log(disp, resp, LVL(90),
1602			    "[b] Sent event %p buffer %p len %d to task %p",
1603			    rev, rev->buffer.base, rev->buffer.length,
1604			    resp->task);
1605		resp->item_out = ISC_TRUE;
1606		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1607	}
1608 unlock:
1609	UNLOCK(&qid->lock);
1610
1611	/*
1612	 * Restart recv() to get the next packet.
1613	 */
1614 restart:
1615	(void)startrecv(disp, NULL);
1616
1617	UNLOCK(&disp->lock);
1618
1619	isc_event_free(&ev_in);
1620}
1621
1622/*
1623 * disp must be locked.
1624 */
1625static isc_result_t
1626startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
1627	isc_result_t res;
1628	isc_region_t region;
1629	isc_socket_t *socket;
1630
1631	if (disp->shutting_down == 1)
1632		return (ISC_R_SUCCESS);
1633
1634	if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1635		return (ISC_R_SUCCESS);
1636
1637	if (disp->recv_pending != 0 && dispsock == NULL)
1638		return (ISC_R_SUCCESS);
1639
1640	if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1641		return (ISC_R_NOMEMORY);
1642
1643	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
1644	    dispsock == NULL)
1645		return (ISC_R_SUCCESS);
1646
1647	if (dispsock != NULL)
1648		socket = dispsock->socket;
1649	else
1650		socket = disp->socket;
1651	INSIST(socket != NULL);
1652
1653	switch (disp->socktype) {
1654		/*
1655		 * UDP reads are always maximal.
1656		 */
1657	case isc_sockettype_udp:
1658		region.length = disp->mgr->buffersize;
1659		region.base = allocate_udp_buffer(disp);
1660		if (region.base == NULL)
1661			return (ISC_R_NOMEMORY);
1662		if (dispsock != NULL) {
1663			res = isc_socket_recv(socket, &region, 1,
1664					      dispsock->task, udp_exrecv,
1665					      dispsock);
1666			if (res != ISC_R_SUCCESS) {
1667				free_buffer(disp, region.base, region.length);
1668				return (res);
1669			}
1670		} else {
1671			res = isc_socket_recv(socket, &region, 1,
1672					      disp->task[0], udp_shrecv, disp);
1673			if (res != ISC_R_SUCCESS) {
1674				free_buffer(disp, region.base, region.length);
1675				disp->shutdown_why = res;
1676				disp->shutting_down = 1;
1677				do_cancel(disp);
1678				return (ISC_R_SUCCESS); /* recover by cancel */
1679			}
1680			INSIST(disp->recv_pending == 0);
1681			disp->recv_pending = 1;
1682		}
1683		break;
1684
1685	case isc_sockettype_tcp:
1686		res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
1687					     tcp_recv, disp);
1688		if (res != ISC_R_SUCCESS) {
1689			disp->shutdown_why = res;
1690			disp->shutting_down = 1;
1691			do_cancel(disp);
1692			return (ISC_R_SUCCESS); /* recover by cancel */
1693		}
1694		INSIST(disp->recv_pending == 0);
1695		disp->recv_pending = 1;
1696		break;
1697	default:
1698		INSIST(0);
1699		break;
1700	}
1701
1702	return (ISC_R_SUCCESS);
1703}
1704
1705/*
1706 * Mgr must be locked when calling this function.
1707 */
1708static isc_boolean_t
1709destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1710	mgr_log(mgr, LVL(90),
1711		"destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1712		"epool=%d, rpool=%d, dpool=%d",
1713		MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1714		isc_mempool_getallocated(mgr->epool),
1715		isc_mempool_getallocated(mgr->rpool),
1716		isc_mempool_getallocated(mgr->dpool));
1717	if (!MGR_IS_SHUTTINGDOWN(mgr))
1718		return (ISC_FALSE);
1719	if (!ISC_LIST_EMPTY(mgr->list))
1720		return (ISC_FALSE);
1721	if (isc_mempool_getallocated(mgr->epool) != 0)
1722		return (ISC_FALSE);
1723	if (isc_mempool_getallocated(mgr->rpool) != 0)
1724		return (ISC_FALSE);
1725	if (isc_mempool_getallocated(mgr->dpool) != 0)
1726		return (ISC_FALSE);
1727
1728	return (ISC_TRUE);
1729}
1730
1731/*
1732 * Mgr must be unlocked when calling this function.
1733 */
1734static void
1735destroy_mgr(dns_dispatchmgr_t **mgrp) {
1736	isc_mem_t *mctx;
1737	dns_dispatchmgr_t *mgr;
1738
1739	mgr = *mgrp;
1740	*mgrp = NULL;
1741
1742	mctx = mgr->mctx;
1743
1744	mgr->magic = 0;
1745	mgr->mctx = NULL;
1746	DESTROYLOCK(&mgr->lock);
1747	mgr->state = 0;
1748
1749	DESTROYLOCK(&mgr->arc4_lock);
1750
1751	isc_mempool_destroy(&mgr->epool);
1752	isc_mempool_destroy(&mgr->rpool);
1753	isc_mempool_destroy(&mgr->dpool);
1754	if (mgr->bpool != NULL)
1755		isc_mempool_destroy(&mgr->bpool);
1756	if (mgr->spool != NULL)
1757		isc_mempool_destroy(&mgr->spool);
1758
1759	DESTROYLOCK(&mgr->pool_lock);
1760
1761#ifdef BIND9
1762	if (mgr->entropy != NULL)
1763		isc_entropy_detach(&mgr->entropy);
1764#endif /* BIND9 */
1765	if (mgr->qid != NULL)
1766		qid_destroy(mctx, &mgr->qid);
1767
1768	DESTROYLOCK(&mgr->buffer_lock);
1769
1770	if (mgr->blackhole != NULL)
1771		dns_acl_detach(&mgr->blackhole);
1772
1773	if (mgr->stats != NULL)
1774		isc_stats_detach(&mgr->stats);
1775
1776	if (mgr->v4ports != NULL) {
1777		isc_mem_put(mctx, mgr->v4ports,
1778			    mgr->nv4ports * sizeof(in_port_t));
1779	}
1780	if (mgr->v6ports != NULL) {
1781		isc_mem_put(mctx, mgr->v6ports,
1782			    mgr->nv6ports * sizeof(in_port_t));
1783	}
1784	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1785	isc_mem_detach(&mctx);
1786}
1787
1788static isc_result_t
1789open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1790	    unsigned int options, isc_socket_t **sockp)
1791{
1792	isc_socket_t *sock;
1793	isc_result_t result;
1794
1795	sock = *sockp;
1796	if (sock == NULL) {
1797		result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1798					   isc_sockettype_udp, &sock);
1799		if (result != ISC_R_SUCCESS)
1800			return (result);
1801		isc_socket_setname(sock, "dispatcher", NULL);
1802	} else {
1803#ifdef BIND9
1804		result = isc_socket_open(sock);
1805		if (result != ISC_R_SUCCESS)
1806			return (result);
1807#else
1808		INSIST(0);
1809#endif
1810	}
1811
1812#ifndef ISC_ALLOW_MAPPED
1813	isc_socket_ipv6only(sock, ISC_TRUE);
1814#endif
1815	result = isc_socket_bind(sock, local, options);
1816	if (result != ISC_R_SUCCESS) {
1817		if (*sockp == NULL)
1818			isc_socket_detach(&sock);
1819		else {
1820#ifdef BIND9
1821			isc_socket_close(sock);
1822#else
1823			INSIST(0);
1824#endif
1825		}
1826		return (result);
1827	}
1828
1829	*sockp = sock;
1830	return (ISC_R_SUCCESS);
1831}
1832
1833/*%
1834 * Create a temporary port list to set the initial default set of dispatch
1835 * ports: [1024, 65535].  This is almost meaningless as the application will
1836 * normally set the ports explicitly, but is provided to fill some minor corner
1837 * cases.
1838 */
1839static isc_result_t
1840create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
1841	isc_result_t result;
1842
1843	result = isc_portset_create(mctx, portsetp);
1844	if (result != ISC_R_SUCCESS)
1845		return (result);
1846	isc_portset_addrange(*portsetp, 1024, 65535);
1847
1848	return (ISC_R_SUCCESS);
1849}
1850
1851/*
1852 * Publics.
1853 */
1854
1855isc_result_t
1856dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1857		       dns_dispatchmgr_t **mgrp)
1858{
1859	dns_dispatchmgr_t *mgr;
1860	isc_result_t result;
1861	isc_portset_t *v4portset = NULL;
1862	isc_portset_t *v6portset = NULL;
1863
1864	REQUIRE(mctx != NULL);
1865	REQUIRE(mgrp != NULL && *mgrp == NULL);
1866
1867	mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1868	if (mgr == NULL)
1869		return (ISC_R_NOMEMORY);
1870
1871	mgr->mctx = NULL;
1872	isc_mem_attach(mctx, &mgr->mctx);
1873
1874	mgr->blackhole = NULL;
1875	mgr->stats = NULL;
1876
1877	result = isc_mutex_init(&mgr->lock);
1878	if (result != ISC_R_SUCCESS)
1879		goto deallocate;
1880
1881	result = isc_mutex_init(&mgr->arc4_lock);
1882	if (result != ISC_R_SUCCESS)
1883		goto kill_lock;
1884
1885	result = isc_mutex_init(&mgr->buffer_lock);
1886	if (re

Large files files are truncated, but you can click here to view the full file