/contrib/bind9/lib/dns/dispatch.c
https://bitbucket.org/freebsd/freebsd-head/ · C · 3533 lines · 2507 code · 520 blank · 506 comment · 657 complexity · 5edf881a04de59334c968878bc4474a1 MD5 · raw file
Large files are truncated click here to view the full file
- /*
- * Copyright (C) 2004-2009, 2011, 2012 Internet Systems Consortium, Inc. ("ISC")
- * Copyright (C) 1999-2003 Internet Software Consortium.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
- * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
- * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
- * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
- /* $Id$ */
- /*! \file */
- #include <config.h>
- #include <stdlib.h>
- #include <sys/types.h>
- #include <unistd.h>
- #include <stdlib.h>
- #include <isc/entropy.h>
- #include <isc/mem.h>
- #include <isc/mutex.h>
- #include <isc/portset.h>
- #include <isc/print.h>
- #include <isc/random.h>
- #include <isc/stats.h>
- #include <isc/string.h>
- #include <isc/task.h>
- #include <isc/time.h>
- #include <isc/util.h>
- #include <dns/acl.h>
- #include <dns/dispatch.h>
- #include <dns/events.h>
- #include <dns/log.h>
- #include <dns/message.h>
- #include <dns/portlist.h>
- #include <dns/stats.h>
- #include <dns/tcpmsg.h>
- #include <dns/types.h>
- typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
- typedef struct dispsocket dispsocket_t;
- typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
- typedef struct dispportentry dispportentry_t;
- typedef ISC_LIST(dispportentry_t) dispportlist_t;
- /* ARC4 Random generator state */
- typedef struct arc4ctx {
- isc_uint8_t i;
- isc_uint8_t j;
- isc_uint8_t s[256];
- int count;
- isc_entropy_t *entropy; /*%< entropy source for ARC4 */
- isc_mutex_t *lock;
- } arc4ctx_t;
- typedef struct dns_qid {
- unsigned int magic;
- unsigned int qid_nbuckets; /*%< hash table size */
- unsigned int qid_increment; /*%< id increment on collision */
- isc_mutex_t lock;
- dns_displist_t *qid_table; /*%< the table itself */
- dispsocketlist_t *sock_table; /*%< socket table */
- } dns_qid_t;
- struct dns_dispatchmgr {
- /* Unlocked. */
- unsigned int magic;
- isc_mem_t *mctx;
- dns_acl_t *blackhole;
- dns_portlist_t *portlist;
- isc_stats_t *stats;
- isc_entropy_t *entropy; /*%< entropy source */
- /* Locked by "lock". */
- isc_mutex_t lock;
- unsigned int state;
- ISC_LIST(dns_dispatch_t) list;
- /* Locked by arc4_lock. */
- isc_mutex_t arc4_lock;
- arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
- /* locked by buffer lock */
- dns_qid_t *qid;
- isc_mutex_t buffer_lock;
- unsigned int buffers; /*%< allocated buffers */
- unsigned int buffersize; /*%< size of each buffer */
- unsigned int maxbuffers; /*%< max buffers */
- /* Locked internally. */
- isc_mutex_t pool_lock;
- isc_mempool_t *epool; /*%< memory pool for events */
- isc_mempool_t *rpool; /*%< memory pool for replies */
- isc_mempool_t *dpool; /*%< dispatch allocations */
- isc_mempool_t *bpool; /*%< memory pool for buffers */
- isc_mempool_t *spool; /*%< memory pool for dispsocs */
- /*%
- * Locked by qid->lock if qid exists; otherwise, can be used without
- * being locked.
- * Memory footprint considerations: this is a simple implementation of
- * available ports, i.e., an ordered array of the actual port numbers.
- * This will require about 256KB of memory in the worst case (128KB for
- * each of IPv4 and IPv6). We could reduce it by representing it as a
- * more sophisticated way such as a list (or array) of ranges that are
- * searched to identify a specific port. Our decision here is the saved
- * memory isn't worth the implementation complexity, considering the
- * fact that the whole BIND9 process (which is mainly named) already
- * requires a pretty large memory footprint. We may, however, have to
- * revisit the decision when we want to use it as a separate module for
- * an environment where memory requirement is severer.
- */
- in_port_t *v4ports; /*%< available ports for IPv4 */
- unsigned int nv4ports; /*%< # of available ports for IPv4 */
- in_port_t *v6ports; /*%< available ports for IPv4 */
- unsigned int nv6ports; /*%< # of available ports for IPv4 */
- };
- #define MGR_SHUTTINGDOWN 0x00000001U
- #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
- #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
- struct dns_dispentry {
- unsigned int magic;
- dns_dispatch_t *disp;
- dns_messageid_t id;
- in_port_t port;
- unsigned int bucket;
- isc_sockaddr_t host;
- isc_task_t *task;
- isc_taskaction_t action;
- void *arg;
- isc_boolean_t item_out;
- dispsocket_t *dispsocket;
- ISC_LIST(dns_dispatchevent_t) items;
- ISC_LINK(dns_dispentry_t) link;
- };
- /*%
- * Maximum number of dispatch sockets that can be pooled for reuse. The
- * appropriate value may vary, but experiments have shown a busy caching server
- * may need more than 1000 sockets concurrently opened. The maximum allowable
- * number of dispatch sockets (per manager) will be set to the double of this
- * value.
- */
- #ifndef DNS_DISPATCH_POOLSOCKS
- #define DNS_DISPATCH_POOLSOCKS 2048
- #endif
- /*%
- * Quota to control the number of dispatch sockets. If a dispatch has more
- * than the quota of sockets, new queries will purge oldest ones, so that
- * a massive number of outstanding queries won't prevent subsequent queries
- * (especially if the older ones take longer time and result in timeout).
- */
- #ifndef DNS_DISPATCH_SOCKSQUOTA
- #define DNS_DISPATCH_SOCKSQUOTA 3072
- #endif
- struct dispsocket {
- unsigned int magic;
- isc_socket_t *socket;
- dns_dispatch_t *disp;
- isc_sockaddr_t host;
- in_port_t localport; /* XXX: should be removed later */
- dispportentry_t *portentry;
- dns_dispentry_t *resp;
- isc_task_t *task;
- ISC_LINK(dispsocket_t) link;
- unsigned int bucket;
- ISC_LINK(dispsocket_t) blink;
- };
- /*%
- * A port table entry. We remember every port we first open in a table with a
- * reference counter so that we can 'reuse' the same port (with different
- * destination addresses) using the SO_REUSEADDR socket option.
- */
- struct dispportentry {
- in_port_t port;
- unsigned int refs;
- ISC_LINK(struct dispportentry) link;
- };
- #ifndef DNS_DISPATCH_PORTTABLESIZE
- #define DNS_DISPATCH_PORTTABLESIZE 1024
- #endif
- #define INVALID_BUCKET (0xffffdead)
- /*%
- * Number of tasks for each dispatch that use separate sockets for different
- * transactions. This must be a power of 2 as it will divide 32 bit numbers
- * to get an uniformly random tasks selection. See get_dispsocket().
- */
- #define MAX_INTERNAL_TASKS 64
- struct dns_dispatch {
- /* Unlocked. */
- unsigned int magic; /*%< magic */
- dns_dispatchmgr_t *mgr; /*%< dispatch manager */
- int ntasks;
- /*%
- * internal task buckets. We use multiple tasks to distribute various
- * socket events well when using separate dispatch sockets. We use the
- * 1st task (task[0]) for internal control events.
- */
- isc_task_t *task[MAX_INTERNAL_TASKS];
- isc_socket_t *socket; /*%< isc socket attached to */
- isc_sockaddr_t local; /*%< local address */
- in_port_t localport; /*%< local UDP port */
- unsigned int maxrequests; /*%< max requests */
- isc_event_t *ctlevent;
- /*% Locked by mgr->lock. */
- ISC_LINK(dns_dispatch_t) link;
- /* Locked by "lock". */
- isc_mutex_t lock; /*%< locks all below */
- isc_sockettype_t socktype;
- unsigned int attributes;
- unsigned int refcount; /*%< number of users */
- dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
- unsigned int shutting_down : 1,
- shutdown_out : 1,
- connected : 1,
- tcpmsg_valid : 1,
- recv_pending : 1; /*%< is a recv() pending? */
- isc_result_t shutdown_why;
- ISC_LIST(dispsocket_t) activesockets;
- ISC_LIST(dispsocket_t) inactivesockets;
- unsigned int nsockets;
- unsigned int requests; /*%< how many requests we have */
- unsigned int tcpbuffers; /*%< allocated buffers */
- dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
- dns_qid_t *qid;
- arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
- dispportlist_t *port_table; /*%< hold ports 'owned' by us */
- isc_mempool_t *portpool; /*%< port table entries */
- };
- #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
- #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
- #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
- #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
- #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
- #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
- #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
- #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
- #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
- #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
- #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
- (disp)->qid : (disp)->mgr->qid
- #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
- (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
- /*%
- * Locking a query port buffer is a bit tricky. We access the buffer without
- * locking until qid is created. Technically, there is a possibility of race
- * between the creation of qid and access to the port buffer; in practice,
- * however, this should be safe because qid isn't created until the first
- * dispatch is created and there should be no contending situation until then.
- */
- #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
- #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
- /*
- * Statics.
- */
- static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
- dns_messageid_t, in_port_t, unsigned int);
- static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
- static void destroy_disp(isc_task_t *task, isc_event_t *event);
- static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
- static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
- static void udp_exrecv(isc_task_t *, isc_event_t *);
- static void udp_shrecv(isc_task_t *, isc_event_t *);
- static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
- static void tcp_recv(isc_task_t *, isc_event_t *);
- static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
- static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
- in_port_t);
- static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
- static void *allocate_udp_buffer(dns_dispatch_t *disp);
- static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
- static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
- static void do_cancel(dns_dispatch_t *disp);
- static dns_dispentry_t *linear_first(dns_qid_t *disp);
- static dns_dispentry_t *linear_next(dns_qid_t *disp,
- dns_dispentry_t *resp);
- static void dispatch_free(dns_dispatch_t **dispp);
- static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
- dns_dispatch_t *disp,
- isc_socketmgr_t *sockmgr,
- isc_sockaddr_t *localaddr,
- isc_socket_t **sockp);
- static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
- isc_socketmgr_t *sockmgr,
- isc_taskmgr_t *taskmgr,
- isc_sockaddr_t *localaddr,
- unsigned int maxrequests,
- unsigned int attributes,
- dns_dispatch_t **dispp);
- static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
- static void destroy_mgr(dns_dispatchmgr_t **mgrp);
- static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
- unsigned int increment, dns_qid_t **qidp,
- isc_boolean_t needaddrtable);
- static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
- static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
- unsigned int options, isc_socket_t **sockp);
- static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
- isc_sockaddr_t *sockaddrp);
- #define LVL(x) ISC_LOG_DEBUG(x)
- static void
- mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
- ISC_FORMAT_PRINTF(3, 4);
- static void
- mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
- char msgbuf[2048];
- va_list ap;
- if (! isc_log_wouldlog(dns_lctx, level))
- return;
- va_start(ap, fmt);
- vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
- va_end(ap);
- isc_log_write(dns_lctx,
- DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
- level, "dispatchmgr %p: %s", mgr, msgbuf);
- }
- static inline void
- inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
- if (mgr->stats != NULL)
- isc_stats_increment(mgr->stats, counter);
- }
- static void
- dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
- ISC_FORMAT_PRINTF(3, 4);
- static void
- dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
- char msgbuf[2048];
- va_list ap;
- if (! isc_log_wouldlog(dns_lctx, level))
- return;
- va_start(ap, fmt);
- vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
- va_end(ap);
- isc_log_write(dns_lctx,
- DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
- level, "dispatch %p: %s", disp, msgbuf);
- }
- static void
- request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
- int level, const char *fmt, ...)
- ISC_FORMAT_PRINTF(4, 5);
- static void
- request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
- int level, const char *fmt, ...)
- {
- char msgbuf[2048];
- char peerbuf[256];
- va_list ap;
- if (! isc_log_wouldlog(dns_lctx, level))
- return;
- va_start(ap, fmt);
- vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
- va_end(ap);
- if (VALID_RESPONSE(resp)) {
- isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
- isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
- DNS_LOGMODULE_DISPATCH, level,
- "dispatch %p response %p %s: %s", disp, resp,
- peerbuf, msgbuf);
- } else {
- isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
- DNS_LOGMODULE_DISPATCH, level,
- "dispatch %p req/resp %p: %s", disp, resp,
- msgbuf);
- }
- }
- /*%
- * ARC4 random number generator derived from OpenBSD.
- * Only dispatch_random() and dispatch_uniformrandom() are expected
- * to be called from general dispatch routines; the rest of them are subroutines
- * for these two.
- *
- * The original copyright follows:
- * Copyright (c) 1996, David Mazieres <dm@uun.org>
- * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #ifdef BIND9
- static void
- dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
- isc_mutex_t *lock)
- {
- int n;
- for (n = 0; n < 256; n++)
- actx->s[n] = n;
- actx->i = 0;
- actx->j = 0;
- actx->count = 0;
- actx->entropy = entropy; /* don't have to attach */
- actx->lock = lock;
- }
- static void
- dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
- int n;
- isc_uint8_t si;
- actx->i--;
- for (n = 0; n < 256; n++) {
- actx->i = (actx->i + 1);
- si = actx->s[actx->i];
- actx->j = (actx->j + si + dat[n % datlen]);
- actx->s[actx->i] = actx->s[actx->j];
- actx->s[actx->j] = si;
- }
- actx->j = actx->i;
- }
- static inline isc_uint8_t
- dispatch_arc4get8(arc4ctx_t *actx) {
- isc_uint8_t si, sj;
- actx->i = (actx->i + 1);
- si = actx->s[actx->i];
- actx->j = (actx->j + si);
- sj = actx->s[actx->j];
- actx->s[actx->i] = sj;
- actx->s[actx->j] = si;
- return (actx->s[(si + sj) & 0xff]);
- }
- static inline isc_uint16_t
- dispatch_arc4get16(arc4ctx_t *actx) {
- isc_uint16_t val;
- val = dispatch_arc4get8(actx) << 8;
- val |= dispatch_arc4get8(actx);
- return (val);
- }
- static void
- dispatch_arc4stir(arc4ctx_t *actx) {
- int i;
- union {
- unsigned char rnd[128];
- isc_uint32_t rnd32[32];
- } rnd;
- isc_result_t result;
- if (actx->entropy != NULL) {
- /*
- * We accept any quality of random data to avoid blocking.
- */
- result = isc_entropy_getdata(actx->entropy, rnd.rnd,
- sizeof(rnd), NULL, 0);
- RUNTIME_CHECK(result == ISC_R_SUCCESS);
- } else {
- for (i = 0; i < 32; i++)
- isc_random_get(&rnd.rnd32[i]);
- }
- dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
- /*
- * Discard early keystream, as per recommendations in:
- * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
- */
- for (i = 0; i < 256; i++)
- (void)dispatch_arc4get8(actx);
- /*
- * Derived from OpenBSD's implementation. The rationale is not clear,
- * but should be conservative enough in safety, and reasonably large
- * for efficiency.
- */
- actx->count = 1600000;
- }
- static isc_uint16_t
- dispatch_random(arc4ctx_t *actx) {
- isc_uint16_t result;
- if (actx->lock != NULL)
- LOCK(actx->lock);
- actx->count -= sizeof(isc_uint16_t);
- if (actx->count <= 0)
- dispatch_arc4stir(actx);
- result = dispatch_arc4get16(actx);
- if (actx->lock != NULL)
- UNLOCK(actx->lock);
- return (result);
- }
- #else
- /*
- * For general purpose library, we don't have to be too strict about the
- * quality of random values. Performance doesn't matter much, either.
- * So we simply use the isc_random module to keep the library as small as
- * possible.
- */
- static void
- dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
- isc_mutex_t *lock)
- {
- UNUSED(actx);
- UNUSED(entropy);
- UNUSED(lock);
- return;
- }
- static isc_uint16_t
- dispatch_random(arc4ctx_t *actx) {
- isc_uint32_t r;
- UNUSED(actx);
- isc_random_get(&r);
- return (r & 0xffff);
- }
- #endif /* BIND9 */
- static isc_uint16_t
- dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
- isc_uint16_t min, r;
- if (upper_bound < 2)
- return (0);
- /*
- * Ensure the range of random numbers [min, 0xffff] be a multiple of
- * upper_bound and contain at least a half of the 16 bit range.
- */
- if (upper_bound > 0x8000)
- min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
- else
- min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
- /*
- * This could theoretically loop forever but each retry has
- * p > 0.5 (worst case, usually far better) of selecting a
- * number inside the range we need, so it should rarely need
- * to re-roll.
- */
- for (;;) {
- r = dispatch_random(actx);
- if (r >= min)
- break;
- }
- return (r % upper_bound);
- }
- /*
- * Return a hash of the destination and message id.
- */
- static isc_uint32_t
- dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
- in_port_t port)
- {
- unsigned int ret;
- ret = isc_sockaddr_hash(dest, ISC_TRUE);
- ret ^= (id << 16) | port;
- ret %= qid->qid_nbuckets;
- INSIST(ret < qid->qid_nbuckets);
- return (ret);
- }
- /*
- * Find the first entry in 'qid'. Returns NULL if there are no entries.
- */
- static dns_dispentry_t *
- linear_first(dns_qid_t *qid) {
- dns_dispentry_t *ret;
- unsigned int bucket;
- bucket = 0;
- while (bucket < qid->qid_nbuckets) {
- ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
- if (ret != NULL)
- return (ret);
- bucket++;
- }
- return (NULL);
- }
- /*
- * Find the next entry after 'resp' in 'qid'. Return NULL if there are
- * no more entries.
- */
- static dns_dispentry_t *
- linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
- dns_dispentry_t *ret;
- unsigned int bucket;
- ret = ISC_LIST_NEXT(resp, link);
- if (ret != NULL)
- return (ret);
- bucket = resp->bucket;
- bucket++;
- while (bucket < qid->qid_nbuckets) {
- ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
- if (ret != NULL)
- return (ret);
- bucket++;
- }
- return (NULL);
- }
- /*
- * The dispatch must be locked.
- */
- static isc_boolean_t
- destroy_disp_ok(dns_dispatch_t *disp)
- {
- if (disp->refcount != 0)
- return (ISC_FALSE);
- if (disp->recv_pending != 0)
- return (ISC_FALSE);
- if (!ISC_LIST_EMPTY(disp->activesockets))
- return (ISC_FALSE);
- if (disp->shutting_down == 0)
- return (ISC_FALSE);
- return (ISC_TRUE);
- }
- /*
- * Called when refcount reaches 0 (and safe to destroy).
- *
- * The dispatcher must not be locked.
- * The manager must be locked.
- */
- static void
- destroy_disp(isc_task_t *task, isc_event_t *event) {
- dns_dispatch_t *disp;
- dns_dispatchmgr_t *mgr;
- isc_boolean_t killmgr;
- dispsocket_t *dispsocket;
- int i;
- INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
- UNUSED(task);
- disp = event->ev_arg;
- mgr = disp->mgr;
- LOCK(&mgr->lock);
- ISC_LIST_UNLINK(mgr->list, disp, link);
- dispatch_log(disp, LVL(90),
- "shutting down; detaching from sock %p, task %p",
- disp->socket, disp->task[0]); /* XXXX */
- if (disp->socket != NULL)
- isc_socket_detach(&disp->socket);
- while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
- ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
- destroy_dispsocket(disp, &dispsocket);
- }
- for (i = 0; i < disp->ntasks; i++)
- isc_task_detach(&disp->task[i]);
- isc_event_free(&event);
- dispatch_free(&disp);
- killmgr = destroy_mgr_ok(mgr);
- UNLOCK(&mgr->lock);
- if (killmgr)
- destroy_mgr(&mgr);
- }
- /*%
- * Manipulate port table per dispatch: find an entry for a given port number,
- * create a new entry, and decrement a given entry with possible clean-up.
- */
- static dispportentry_t *
- port_search(dns_dispatch_t *disp, in_port_t port) {
- dispportentry_t *portentry;
- REQUIRE(disp->port_table != NULL);
- portentry = ISC_LIST_HEAD(disp->port_table[port %
- DNS_DISPATCH_PORTTABLESIZE]);
- while (portentry != NULL) {
- if (portentry->port == port)
- return (portentry);
- portentry = ISC_LIST_NEXT(portentry, link);
- }
- return (NULL);
- }
- static dispportentry_t *
- new_portentry(dns_dispatch_t *disp, in_port_t port) {
- dispportentry_t *portentry;
- REQUIRE(disp->port_table != NULL);
- portentry = isc_mempool_get(disp->portpool);
- if (portentry == NULL)
- return (portentry);
- portentry->port = port;
- portentry->refs = 0;
- ISC_LINK_INIT(portentry, link);
- ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
- portentry, link);
- return (portentry);
- }
- /*%
- * The caller must not hold the qid->lock.
- */
- static void
- deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
- dispportentry_t *portentry = *portentryp;
- dns_qid_t *qid;
- REQUIRE(disp->port_table != NULL);
- REQUIRE(portentry != NULL && portentry->refs > 0);
- qid = DNS_QID(disp);
- LOCK(&qid->lock);
- portentry->refs--;
- if (portentry->refs == 0) {
- ISC_LIST_UNLINK(disp->port_table[portentry->port %
- DNS_DISPATCH_PORTTABLESIZE],
- portentry, link);
- isc_mempool_put(disp->portpool, portentry);
- }
- *portentryp = NULL;
- UNLOCK(&qid->lock);
- }
- /*%
- * Find a dispsocket for socket address 'dest', and port number 'port'.
- * Return NULL if no such entry exists.
- */
- static dispsocket_t *
- socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
- unsigned int bucket)
- {
- dispsocket_t *dispsock;
- REQUIRE(bucket < qid->qid_nbuckets);
- dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
- while (dispsock != NULL) {
- if (dispsock->portentry != NULL &&
- dispsock->portentry->port == port &&
- isc_sockaddr_equal(dest, &dispsock->host))
- return (dispsock);
- dispsock = ISC_LIST_NEXT(dispsock, blink);
- }
- return (NULL);
- }
- /*%
- * Make a new socket for a single dispatch with a random port number.
- * The caller must hold the disp->lock and qid->lock.
- */
- static isc_result_t
- get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
- isc_socketmgr_t *sockmgr, dns_qid_t *qid,
- dispsocket_t **dispsockp, in_port_t *portp)
- {
- int i;
- isc_uint32_t r;
- dns_dispatchmgr_t *mgr = disp->mgr;
- isc_socket_t *sock = NULL;
- isc_result_t result = ISC_R_FAILURE;
- in_port_t port;
- isc_sockaddr_t localaddr;
- unsigned int bucket = 0;
- dispsocket_t *dispsock;
- unsigned int nports;
- in_port_t *ports;
- unsigned int bindoptions;
- dispportentry_t *portentry = NULL;
- if (isc_sockaddr_pf(&disp->local) == AF_INET) {
- nports = disp->mgr->nv4ports;
- ports = disp->mgr->v4ports;
- } else {
- nports = disp->mgr->nv6ports;
- ports = disp->mgr->v6ports;
- }
- if (nports == 0)
- return (ISC_R_ADDRNOTAVAIL);
- dispsock = ISC_LIST_HEAD(disp->inactivesockets);
- if (dispsock != NULL) {
- ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
- sock = dispsock->socket;
- dispsock->socket = NULL;
- } else {
- dispsock = isc_mempool_get(mgr->spool);
- if (dispsock == NULL)
- return (ISC_R_NOMEMORY);
- disp->nsockets++;
- dispsock->socket = NULL;
- dispsock->disp = disp;
- dispsock->resp = NULL;
- dispsock->portentry = NULL;
- isc_random_get(&r);
- dispsock->task = NULL;
- isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
- ISC_LINK_INIT(dispsock, link);
- ISC_LINK_INIT(dispsock, blink);
- dispsock->magic = DISPSOCK_MAGIC;
- }
- /*
- * Pick up a random UDP port and open a new socket with it. Avoid
- * choosing ports that share the same destination because it will be
- * very likely to fail in bind(2) or connect(2).
- */
- localaddr = disp->local;
- for (i = 0; i < 64; i++) {
- port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
- nports)];
- isc_sockaddr_setport(&localaddr, port);
- bucket = dns_hash(qid, dest, 0, port);
- if (socket_search(qid, dest, port, bucket) != NULL)
- continue;
- bindoptions = 0;
- portentry = port_search(disp, port);
- if (portentry != NULL)
- bindoptions |= ISC_SOCKET_REUSEADDRESS;
- result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
- if (result == ISC_R_SUCCESS) {
- if (portentry == NULL) {
- portentry = new_portentry(disp, port);
- if (portentry == NULL) {
- result = ISC_R_NOMEMORY;
- break;
- }
- }
- portentry->refs++;
- break;
- } else if (result == ISC_R_NOPERM) {
- char buf[ISC_SOCKADDR_FORMATSIZE];
- isc_sockaddr_format(&localaddr, buf, sizeof(buf));
- dispatch_log(disp, ISC_LOG_WARNING,
- "open_socket(%s) -> %s: continuing",
- buf, isc_result_totext(result));
- } else if (result != ISC_R_ADDRINUSE)
- break;
- }
- if (result == ISC_R_SUCCESS) {
- dispsock->socket = sock;
- dispsock->host = *dest;
- dispsock->portentry = portentry;
- dispsock->bucket = bucket;
- ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
- *dispsockp = dispsock;
- *portp = port;
- } else {
- /*
- * We could keep it in the inactive list, but since this should
- * be an exceptional case and might be resource shortage, we'd
- * rather destroy it.
- */
- if (sock != NULL)
- isc_socket_detach(&sock);
- destroy_dispsocket(disp, &dispsock);
- }
- return (result);
- }
- /*%
- * Destroy a dedicated dispatch socket.
- */
- static void
- destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
- dispsocket_t *dispsock;
- dns_qid_t *qid;
- /*
- * The dispatch must be locked.
- */
- REQUIRE(dispsockp != NULL && *dispsockp != NULL);
- dispsock = *dispsockp;
- REQUIRE(!ISC_LINK_LINKED(dispsock, link));
- disp->nsockets--;
- dispsock->magic = 0;
- if (dispsock->portentry != NULL)
- deref_portentry(disp, &dispsock->portentry);
- if (dispsock->socket != NULL)
- isc_socket_detach(&dispsock->socket);
- if (ISC_LINK_LINKED(dispsock, blink)) {
- qid = DNS_QID(disp);
- LOCK(&qid->lock);
- ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
- blink);
- UNLOCK(&qid->lock);
- }
- if (dispsock->task != NULL)
- isc_task_detach(&dispsock->task);
- isc_mempool_put(disp->mgr->spool, dispsock);
- *dispsockp = NULL;
- }
- /*%
- * Deactivate a dedicated dispatch socket. Move it to the inactive list for
- * future reuse unless the total number of sockets are exceeding the maximum.
- */
- static void
- deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
- isc_result_t result;
- dns_qid_t *qid;
- /*
- * The dispatch must be locked.
- */
- ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
- if (dispsock->resp != NULL) {
- INSIST(dispsock->resp->dispsocket == dispsock);
- dispsock->resp->dispsocket = NULL;
- }
- INSIST(dispsock->portentry != NULL);
- deref_portentry(disp, &dispsock->portentry);
- #ifdef BIND9
- if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
- destroy_dispsocket(disp, &dispsock);
- else {
- result = isc_socket_close(dispsock->socket);
- qid = DNS_QID(disp);
- LOCK(&qid->lock);
- ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
- blink);
- UNLOCK(&qid->lock);
- if (result == ISC_R_SUCCESS)
- ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
- else {
- /*
- * If the underlying system does not allow this
- * optimization, destroy this temporary structure (and
- * create a new one for a new transaction).
- */
- INSIST(result == ISC_R_NOTIMPLEMENTED);
- destroy_dispsocket(disp, &dispsock);
- }
- }
- #else
- /* This kind of optimization isn't necessary for normal use */
- UNUSED(qid);
- UNUSED(result);
- destroy_dispsocket(disp, &dispsock);
- #endif
- }
- /*
- * Find an entry for query ID 'id', socket address 'dest', and port number
- * 'port'.
- * Return NULL if no such entry exists.
- */
- static dns_dispentry_t *
- entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
- in_port_t port, unsigned int bucket)
- {
- dns_dispentry_t *res;
- REQUIRE(bucket < qid->qid_nbuckets);
- res = ISC_LIST_HEAD(qid->qid_table[bucket]);
- while (res != NULL) {
- if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
- res->port == port) {
- return (res);
- }
- res = ISC_LIST_NEXT(res, link);
- }
- return (NULL);
- }
- static void
- free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
- INSIST(buf != NULL && len != 0);
- switch (disp->socktype) {
- case isc_sockettype_tcp:
- INSIST(disp->tcpbuffers > 0);
- disp->tcpbuffers--;
- isc_mem_put(disp->mgr->mctx, buf, len);
- break;
- case isc_sockettype_udp:
- LOCK(&disp->mgr->buffer_lock);
- INSIST(disp->mgr->buffers > 0);
- INSIST(len == disp->mgr->buffersize);
- disp->mgr->buffers--;
- isc_mempool_put(disp->mgr->bpool, buf);
- UNLOCK(&disp->mgr->buffer_lock);
- break;
- default:
- INSIST(0);
- break;
- }
- }
- static void *
- allocate_udp_buffer(dns_dispatch_t *disp) {
- void *temp;
- LOCK(&disp->mgr->buffer_lock);
- temp = isc_mempool_get(disp->mgr->bpool);
- if (temp != NULL)
- disp->mgr->buffers++;
- UNLOCK(&disp->mgr->buffer_lock);
- return (temp);
- }
- static inline void
- free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
- if (disp->failsafe_ev == ev) {
- INSIST(disp->shutdown_out == 1);
- disp->shutdown_out = 0;
- return;
- }
- isc_mempool_put(disp->mgr->epool, ev);
- }
- static inline dns_dispatchevent_t *
- allocate_event(dns_dispatch_t *disp) {
- dns_dispatchevent_t *ev;
- ev = isc_mempool_get(disp->mgr->epool);
- if (ev == NULL)
- return (NULL);
- ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
- NULL, NULL, NULL, NULL, NULL);
- return (ev);
- }
- static void
- udp_exrecv(isc_task_t *task, isc_event_t *ev) {
- dispsocket_t *dispsock = ev->ev_arg;
- UNUSED(task);
- REQUIRE(VALID_DISPSOCK(dispsock));
- udp_recv(ev, dispsock->disp, dispsock);
- }
- static void
- udp_shrecv(isc_task_t *task, isc_event_t *ev) {
- dns_dispatch_t *disp = ev->ev_arg;
- UNUSED(task);
- REQUIRE(VALID_DISPATCH(disp));
- udp_recv(ev, disp, NULL);
- }
- /*
- * General flow:
- *
- * If I/O result == CANCELED or error, free the buffer.
- *
- * If query, free the buffer, restart.
- *
- * If response:
- * Allocate event, fill in details.
- * If cannot allocate, free buffer, restart.
- * find target. If not found, free buffer, restart.
- * if event queue is not empty, queue. else, send.
- * restart.
- */
- static void
- udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
- isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
- dns_messageid_t id;
- isc_result_t dres;
- isc_buffer_t source;
- unsigned int flags;
- dns_dispentry_t *resp = NULL;
- dns_dispatchevent_t *rev;
- unsigned int bucket;
- isc_boolean_t killit;
- isc_boolean_t queue_response;
- dns_dispatchmgr_t *mgr;
- dns_qid_t *qid;
- isc_netaddr_t netaddr;
- int match;
- int result;
- isc_boolean_t qidlocked = ISC_FALSE;
- LOCK(&disp->lock);
- mgr = disp->mgr;
- qid = mgr->qid;
- dispatch_log(disp, LVL(90),
- "got packet: requests %d, buffers %d, recvs %d",
- disp->requests, disp->mgr->buffers, disp->recv_pending);
- if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
- /*
- * Unless the receive event was imported from a listening
- * interface, in which case the event type is
- * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
- */
- INSIST(disp->recv_pending != 0);
- disp->recv_pending = 0;
- }
- if (dispsock != NULL &&
- (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
- /*
- * dispsock->resp can be NULL if this transaction was canceled
- * just after receiving a response. Since this socket is
- * exclusively used and there should be at most one receive
- * event the canceled event should have been no effect. So
- * we can (and should) deactivate the socket right now.
- */
- deactivate_dispsocket(disp, dispsock);
- dispsock = NULL;
- }
- if (disp->shutting_down) {
- /*
- * This dispatcher is shutting down.
- */
- free_buffer(disp, ev->region.base, ev->region.length);
- isc_event_free(&ev_in);
- ev = NULL;
- killit = destroy_disp_ok(disp);
- UNLOCK(&disp->lock);
- if (killit)
- isc_task_send(disp->task[0], &disp->ctlevent);
- return;
- }
- if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
- if (dispsock != NULL) {
- resp = dispsock->resp;
- id = resp->id;
- if (ev->result != ISC_R_SUCCESS) {
- /*
- * This is most likely a network error on a
- * connected socket. It makes no sense to
- * check the address or parse the packet, but it
- * will help to return the error to the caller.
- */
- goto sendresponse;
- }
- } else {
- free_buffer(disp, ev->region.base, ev->region.length);
- UNLOCK(&disp->lock);
- isc_event_free(&ev_in);
- return;
- }
- } else if (ev->result != ISC_R_SUCCESS) {
- free_buffer(disp, ev->region.base, ev->region.length);
- if (ev->result != ISC_R_CANCELED)
- dispatch_log(disp, ISC_LOG_ERROR,
- "odd socket result in udp_recv(): %s",
- isc_result_totext(ev->result));
- UNLOCK(&disp->lock);
- isc_event_free(&ev_in);
- return;
- }
- /*
- * If this is from a blackholed address, drop it.
- */
- isc_netaddr_fromsockaddr(&netaddr, &ev->address);
- if (disp->mgr->blackhole != NULL &&
- dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
- NULL, &match, NULL) == ISC_R_SUCCESS &&
- match > 0)
- {
- if (isc_log_wouldlog(dns_lctx, LVL(10))) {
- char netaddrstr[ISC_NETADDR_FORMATSIZE];
- isc_netaddr_format(&netaddr, netaddrstr,
- sizeof(netaddrstr));
- dispatch_log(disp, LVL(10),
- "blackholed packet from %s",
- netaddrstr);
- }
- free_buffer(disp, ev->region.base, ev->region.length);
- goto restart;
- }
- /*
- * Peek into the buffer to see what we can see.
- */
- isc_buffer_init(&source, ev->region.base, ev->region.length);
- isc_buffer_add(&source, ev->n);
- dres = dns_message_peekheader(&source, &id, &flags);
- if (dres != ISC_R_SUCCESS) {
- free_buffer(disp, ev->region.base, ev->region.length);
- dispatch_log(disp, LVL(10), "got garbage packet");
- goto restart;
- }
- dispatch_log(disp, LVL(92),
- "got valid DNS message header, /QR %c, id %u",
- ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
- /*
- * Look at flags. If query, drop it. If response,
- * look to see where it goes.
- */
- if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
- /* query */
- free_buffer(disp, ev->region.base, ev->region.length);
- goto restart;
- }
- /*
- * Search for the corresponding response. If we are using an exclusive
- * socket, we've already identified it and we can skip the search; but
- * the ID and the address must match the expected ones.
- */
- if (resp == NULL) {
- bucket = dns_hash(qid, &ev->address, id, disp->localport);
- LOCK(&qid->lock);
- qidlocked = ISC_TRUE;
- resp = entry_search(qid, &ev->address, id, disp->localport,
- bucket);
- dispatch_log(disp, LVL(90),
- "search for response in bucket %d: %s",
- bucket, (resp == NULL ? "not found" : "found"));
- if (resp == NULL) {
- inc_stats(mgr, dns_resstatscounter_mismatch);
- free_buffer(disp, ev->region.base, ev->region.length);
- goto unlock;
- }
- } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
- &resp->host)) {
- dispatch_log(disp, LVL(90),
- "response to an exclusive socket doesn't match");
- inc_stats(mgr, dns_resstatscounter_mismatch);
- free_buffer(disp, ev->region.base, ev->region.length);
- goto unlock;
- }
- /*
- * Now that we have the original dispatch the query was sent
- * from check that the address and port the response was
- * sent to make sense.
- */
- if (disp != resp->disp) {
- isc_sockaddr_t a1;
- isc_sockaddr_t a2;
- /*
- * Check that the socket types and ports match.
- */
- if (disp->socktype != resp->disp->socktype ||
- isc_sockaddr_getport(&disp->local) !=
- isc_sockaddr_getport(&resp->disp->local)) {
- free_buffer(disp, ev->region.base, ev->region.length);
- goto unlock;
- }
- /*
- * If both dispatches are bound to an address then fail as
- * the addresses can't be equal (enforced by the IP stack).
- *
- * Note under Linux a packet can be sent out via IPv4 socket
- * and the response be received via a IPv6 socket.
- *
- * Requests sent out via IPv6 should always come back in
- * via IPv6.
- */
- if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
- isc_sockaddr_pf(&disp->local) != PF_INET6) {
- free_buffer(disp, ev->region.base, ev->region.length);
- goto unlock;
- }
- isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
- isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
- if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
- !isc_sockaddr_eqaddr(&a2, &disp->local)) {
- free_buffer(disp, ev->region.base, ev->region.length);
- goto unlock;
- }
- }
- sendresponse:
- queue_response = resp->item_out;
- rev = allocate_event(resp->disp);
- if (rev == NULL) {
- free_buffer(disp, ev->region.base, ev->region.length);
- goto unlock;
- }
- /*
- * At this point, rev contains the event we want to fill in, and
- * resp contains the information on the place to send it to.
- * Send the event off.
- */
- isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
- isc_buffer_add(&rev->buffer, ev->n);
- rev->result = ev->result;
- rev->id = id;
- rev->addr = ev->address;
- rev->pktinfo = ev->pktinfo;
- rev->attributes = ev->attributes;
- if (queue_response) {
- ISC_LIST_APPEND(resp->items, rev, ev_link);
- } else {
- ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
- DNS_EVENT_DISPATCH,
- resp->action, resp->arg, resp, NULL, NULL);
- request_log(disp, resp, LVL(90),
- "[a] Sent event %p buffer %p len %d to task %p",
- rev, rev->buffer.base, rev->buffer.length,
- resp->task);
- resp->item_out = ISC_TRUE;
- isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
- }
- unlock:
- if (qidlocked)
- UNLOCK(&qid->lock);
- /*
- * Restart recv() to get the next packet.
- */
- restart:
- result = startrecv(disp, dispsock);
- if (result != ISC_R_SUCCESS && dispsock != NULL) {
- /*
- * XXX: wired. There seems to be no recovery process other than
- * deactivate this socket anyway (since we cannot start
- * receiving, we won't be able to receive a cancel event
- * from the user).
- */
- deactivate_dispsocket(disp, dispsock);
- }
- UNLOCK(&disp->lock);
- isc_event_free(&ev_in);
- }
- /*
- * General flow:
- *
- * If I/O result == CANCELED, EOF, or error, notify everyone as the
- * various queues drain.
- *
- * If query, restart.
- *
- * If response:
- * Allocate event, fill in details.
- * If cannot allocate, restart.
- * find target. If not found, restart.
- * if event queue is not empty, queue. else, send.
- * restart.
- */
- static void
- tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
- dns_dispatch_t *disp = ev_in->ev_arg;
- dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
- dns_messageid_t id;
- isc_result_t dres;
- unsigned int flags;
- dns_dispentry_t *resp;
- dns_dispatchevent_t *rev;
- unsigned int bucket;
- isc_boolean_t killit;
- isc_boolean_t queue_response;
- dns_qid_t *qid;
- int level;
- char buf[ISC_SOCKADDR_FORMATSIZE];
- UNUSED(task);
- REQUIRE(VALID_DISPATCH(disp));
- qid = disp->qid;
- dispatch_log(disp, LVL(90),
- "got TCP packet: requests %d, buffers %d, recvs %d",
- disp->requests, disp->tcpbuffers, disp->recv_pending);
- LOCK(&disp->lock);
- INSIST(disp->recv_pending != 0);
- disp->recv_pending = 0;
- if (disp->refcount == 0) {
- /*
- * This dispatcher is shutting down. Force cancelation.
- */
- tcpmsg->result = ISC_R_CANCELED;
- }
- if (tcpmsg->result != ISC_R_SUCCESS) {
- switch (tcpmsg->result) {
- case ISC_R_CANCELED:
- break;
- case ISC_R_EOF:
- dispatch_log(disp, LVL(90), "shutting down on EOF");
- do_cancel(disp);
- break;
- case ISC_R_CONNECTIONRESET:
- level = ISC_LOG_INFO;
- goto logit;
- default:
- level = ISC_LOG_ERROR;
- logit:
- isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
- dispatch_log(disp, level, "shutting down due to TCP "
- "receive error: %s: %s", buf,
- isc_result_totext(tcpmsg->result));
- do_cancel(disp);
- break;
- }
- /*
- * The event is statically allocated in the tcpmsg
- * structure, and destroy_disp() frees the tcpmsg, so we must
- * free the event *before* calling destroy_disp().
- */
- isc_event_free(&ev_in);
- disp->shutting_down = 1;
- disp->shutdown_why = tcpmsg->result;
- /*
- * If the recv() was canceled pass the word on.
- */
- killit = destroy_disp_ok(disp);
- UNLOCK(&disp->lock);
- if (killit)
- isc_task_send(disp->task[0], &disp->ctlevent);
- return;
- }
- dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
- tcpmsg->result,
- tcpmsg->buffer.length, tcpmsg->buffer.base);
- /*
- * Peek into the buffer to see what we can see.
- */
- dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
- if (dres != ISC_R_SUCCESS) {
- dispatch_log(disp, LVL(10), "got garbage packet");
- goto restart;
- }
- dispatch_log(disp, LVL(92),
- "got valid DNS message header, /QR %c, id %u",
- ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
- /*
- * Allocate an event to send to the query or response client, and
- * allocate a new buffer for our use.
- */
- /*
- * Look at flags. If query, drop it. If response,
- * look to see where it goes.
- */
- if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
- /*
- * Query.
- */
- goto restart;
- }
- /*
- * Response.
- */
- bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
- LOCK(&qid->lock);
- resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
- dispatch_log(disp, LVL(90),
- "search for response in bucket %d: %s",
- bucket, (resp == NULL ? "not found" : "found"));
- if (resp == NULL)
- goto unlock;
- queue_response = resp->item_out;
- rev = allocate_event(disp);
- if (rev == NULL)
- goto unlock;
- /*
- * At this point, rev contains the event we want to fill in, and
- * resp contains the information on the place to send it to.
- * Send the event off.
- */
- dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
- disp->tcpbuffers++;
- rev->result = ISC_R_SUCCESS;
- rev->id = id;
- rev->addr = tcpmsg->address;
- if (queue_response) {
- ISC_LIST_APPEND(resp->items, rev, ev_link);
- } else {
- ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
- resp->action, resp->arg, resp, NULL, NULL);
- request_log(disp, resp, LVL(90),
- "[b] Sent event %p buffer %p len %d to task %p",
- rev, rev->buffer.base, rev->buffer.length,
- resp->task);
- resp->item_out = ISC_TRUE;
- isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
- }
- unlock:
- UNLOCK(&qid->lock);
- /*
- * Restart recv() to get the next packet.
- */
- restart:
- (void)startrecv(disp, NULL);
- UNLOCK(&disp->lock);
- isc_event_free(&ev_in);
- }
- /*
- * disp must be locked.
- */
- static isc_result_t
- startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
- isc_result_t res;
- isc_region_t region;
- isc_socket_t *socket;
- if (disp->shutting_down == 1)
- return (ISC_R_SUCCESS);
- if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
- return (ISC_R_SUCCESS);
- if (disp->recv_pending != 0 && dispsock == NULL)
- return (ISC_R_SUCCESS);
- if (disp->mgr->buffers >= disp->mgr->maxbuffers)
- return (ISC_R_NOMEMORY);
- if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
- dispsock == NULL)
- return (ISC_R_SUCCESS);
- if (dispsock != NULL)
- socket = dispsock->socket;
- else
- socket = disp->socket;
- INSIST(socket != NULL);
- switch (disp->socktype) {
- /*
- * UDP reads are always maximal.
- */
- case isc_sockettype_udp:
- region.length = disp->mgr->buffersize;
- region.base = allocate_udp_buffer(disp);
- if (region.base == NULL)
- return (ISC_R_NOMEMORY);
- if (dispsock != NULL) {
- res = isc_socket_recv(socket, ®ion, 1,
- dispsock->task, udp_exrecv,
- dispsock);
- if (res != ISC_R_SUCCESS) {
- free_buffer(disp, region.base, region.length);
- return (res);
- }
- } else {
- res = isc_socket_recv(socket, ®ion, 1,
- disp->task[0], udp_shrecv, disp);
- if (res != ISC_R_SUCCESS) {
- free_buffer(disp, region.base, region.length);
- disp->shutdown_why = res;
- disp->shutting_down = 1;
- do_cancel(disp);
- return (ISC_R_SUCCESS); /* recover by cancel */
- }
- INSIST(disp->recv_pending == 0);
- disp->recv_pending = 1;
- }
- break;
- case isc_sockettype_tcp:
- res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
- tcp_recv, disp);
- if (res != ISC_R_SUCCESS) {
- disp->shutdown_why = res;
- disp->shutting_down = 1;
- do_cancel(disp);
- return (ISC_R_SUCCESS); /* recover by cancel */
- }
- INSIST(disp->recv_pending == 0);
- disp->recv_pending = 1;
- break;
- default:
- INSIST(0);
- break;
- }
- return (ISC_R_SUCCESS);
- }
- /*
- * Mgr must be locked when calling this function.
- */
- static isc_boolean_t
- destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
- mgr_log(mgr, LVL(90),
- "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
- "epool=%d, rpool=%d, dpool=%d",
- MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
- isc_mempool_getallocated(mgr->epool),
- isc_mempool_getallocated(mgr->rpool),
- isc_mempool_getallocated(mgr->dpool));
- if (!MGR_IS_SHUTTINGDOWN(mgr))
- return (ISC_FALSE);
- if (!ISC_LIST_EMPTY(mgr->list))
- return (ISC_FALSE);
- if (isc_mempool_getallocated(mgr->epool) != 0)
- return (ISC_FALSE);
- if (isc_mempool_getallocated(mgr->rpool) != 0)
- return (ISC_FALSE);
- if (isc_mempool_getallocated(mgr->dpool) != 0)
- return (ISC_FALSE);
- return (ISC_TRUE);
- }
- /*
- * Mgr must be unlocked when calling this function.
- */
- static void
- destroy_mgr(dns_dispatchmgr_t **mgrp) {
- isc_mem_t *mctx;
- dns_dispatchmgr_t *mgr;
- mgr = *mgrp;
- *mgrp = NULL;
- mctx = mgr->mctx;
- mgr->magic = 0;
- mgr->mctx = NULL;
- DESTROYLOCK(&mgr->lock);
- mgr->state = 0;
- DESTROYLOCK(&mgr->arc4_lock);
- isc_mempool_destroy(&mgr->epool);
- isc_mempool_destroy(&mgr->rpool);
- isc_mempool_destroy(&mgr->dpool);
- if (mgr->bpool != NULL)
- isc_mempool_destroy(&mgr->bpool);
- if (mgr->spool != NULL)
- isc_mempool_destroy(&mgr->spool);
- DESTROYLOCK(&mgr->pool_lock);
- #ifdef BIND9
- if (mgr->entropy != NULL)
- isc_entropy_detach(&mgr->entropy);
- #endif /* BIND9 */
- if (mgr->qid != NULL)
- qid_destroy(mctx, &mgr->qid);
- DESTROYLOCK(&mgr->buffer_lock);
- if (mgr->blackhole != NULL)
- dns_acl_detach(&mgr->blackhole);
- if (mgr->stats != NULL)
- isc_stats_detach(&mgr->stats);
- if (mgr->v4ports != NULL) {
- isc_mem_put(mctx, mgr->v4ports,
- mgr->nv4ports * sizeof(in_port_t));
- }
- if (mgr->v6ports != NULL) {
- isc_mem_put(mctx, mgr->v6ports,
- mgr->nv6ports * sizeof(in_port_t));
- }
- isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
- isc_mem_detach(&mctx);
- }
- static isc_result_t
- open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
- unsigned int options, isc_socket_t **sockp)
- {
- isc_socket_t *sock;
- isc_result_t result;
- sock = *sockp;
- if (sock == NULL) {
- result = isc_socket_create(mgr, isc_sockaddr_pf(local),
- isc_sockettype_udp, &sock);
- if (result != ISC_R_SUCCESS)
- return (result);
- isc_socket_setname(sock, "dispatcher", NULL);
- } else {
- #ifdef BIND9
- result = isc_socket_open(sock);
- if (result != ISC_R_SUCCESS)
- return (result);
- #else
- INSIST(0);
- #endif
- }
- #ifndef ISC_ALLOW_MAPPED
- isc_socket_ipv6only(sock, ISC_TRUE);
- #endif
- result = isc_socket_bind(sock, local, options);
- if (result != ISC_R_SUCCESS) {
- if (*sockp == NULL)
- isc_socket_detach(&sock);
- else {
- #ifdef BIND9
- isc_socket_close(sock);
- #else
- INSIST(0);
- #endif
- }
- return (result);
- }
- *sockp = sock;
- return (ISC_R_SUCCESS);
- }
- /*%
- * Create a temporary port list to set the initial default set of dispatch
- * ports: [1024, 65535]. This is almost meaningless as the application will
- * normally set the ports explicitly, but is provided to fill some minor corner
- * cases.
- */
- static isc_result_t
- create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
- isc_result_t result;
- result = isc_portset_create(mctx, portsetp);
- if (result != ISC_R_SUCCESS)
- return (result);
- isc_portset_addrange(*portsetp, 1024, 65535);
- return (ISC_R_SUCCESS);
- }
- /*
- * Publics.
- */
- isc_result_t
- dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
- dns_dispatchmgr_t **mgrp)
- {
- dns_dispatchmgr_t *mgr;
- isc_result_t result;
- isc_portset_t *v4portset = NULL;
- isc_portset_t *v6portset = NULL;
- REQUIRE(mctx != NULL);
- REQUIRE(mgrp != NULL && *mgrp == NULL);
- mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
- if (mgr == NULL)
- return (ISC_R_NOMEMORY);
- mgr->mctx = NULL;
- isc_mem_attach(mctx, &mgr->mctx);
- mgr->blackhole = NULL;
- mgr->stats = NULL;
- result = isc_mutex_init(&mgr->lock);
- if (result != ISC_R_SUCCESS)
- goto deallocate;
- result = isc_mutex_init(&mgr->arc4_lock);
- if (result != ISC_R_SUCCESS)
- goto kill_lock;
- result = isc_mutex_init(&mgr->buffer_lock);
- if (re…