PageRenderTime 67ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/contrib/bind9/lib/dns/dispatch.c

https://bitbucket.org/freebsd/freebsd-head/
C | 3533 lines | 2507 code | 520 blank | 506 comment | 657 complexity | 5edf881a04de59334c968878bc4474a1 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, LGPL-2.0, LGPL-2.1, BSD-2-Clause, 0BSD, JSON, AGPL-1.0, GPL-2.0
  1. /*
  2. * Copyright (C) 2004-2009, 2011, 2012 Internet Systems Consortium, Inc. ("ISC")
  3. * Copyright (C) 1999-2003 Internet Software Consortium.
  4. *
  5. * Permission to use, copy, modify, and/or distribute this software for any
  6. * purpose with or without fee is hereby granted, provided that the above
  7. * copyright notice and this permission notice appear in all copies.
  8. *
  9. * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  10. * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  11. * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  12. * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  13. * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  14. * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  15. * PERFORMANCE OF THIS SOFTWARE.
  16. */
  17. /* $Id$ */
  18. /*! \file */
  19. #include <config.h>
  20. #include <stdlib.h>
  21. #include <sys/types.h>
  22. #include <unistd.h>
  23. #include <stdlib.h>
  24. #include <isc/entropy.h>
  25. #include <isc/mem.h>
  26. #include <isc/mutex.h>
  27. #include <isc/portset.h>
  28. #include <isc/print.h>
  29. #include <isc/random.h>
  30. #include <isc/stats.h>
  31. #include <isc/string.h>
  32. #include <isc/task.h>
  33. #include <isc/time.h>
  34. #include <isc/util.h>
  35. #include <dns/acl.h>
  36. #include <dns/dispatch.h>
  37. #include <dns/events.h>
  38. #include <dns/log.h>
  39. #include <dns/message.h>
  40. #include <dns/portlist.h>
  41. #include <dns/stats.h>
  42. #include <dns/tcpmsg.h>
  43. #include <dns/types.h>
  44. typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
  45. typedef struct dispsocket dispsocket_t;
  46. typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
  47. typedef struct dispportentry dispportentry_t;
  48. typedef ISC_LIST(dispportentry_t) dispportlist_t;
  49. /* ARC4 Random generator state */
  50. typedef struct arc4ctx {
  51. isc_uint8_t i;
  52. isc_uint8_t j;
  53. isc_uint8_t s[256];
  54. int count;
  55. isc_entropy_t *entropy; /*%< entropy source for ARC4 */
  56. isc_mutex_t *lock;
  57. } arc4ctx_t;
  58. typedef struct dns_qid {
  59. unsigned int magic;
  60. unsigned int qid_nbuckets; /*%< hash table size */
  61. unsigned int qid_increment; /*%< id increment on collision */
  62. isc_mutex_t lock;
  63. dns_displist_t *qid_table; /*%< the table itself */
  64. dispsocketlist_t *sock_table; /*%< socket table */
  65. } dns_qid_t;
  66. struct dns_dispatchmgr {
  67. /* Unlocked. */
  68. unsigned int magic;
  69. isc_mem_t *mctx;
  70. dns_acl_t *blackhole;
  71. dns_portlist_t *portlist;
  72. isc_stats_t *stats;
  73. isc_entropy_t *entropy; /*%< entropy source */
  74. /* Locked by "lock". */
  75. isc_mutex_t lock;
  76. unsigned int state;
  77. ISC_LIST(dns_dispatch_t) list;
  78. /* Locked by arc4_lock. */
  79. isc_mutex_t arc4_lock;
  80. arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
  81. /* locked by buffer lock */
  82. dns_qid_t *qid;
  83. isc_mutex_t buffer_lock;
  84. unsigned int buffers; /*%< allocated buffers */
  85. unsigned int buffersize; /*%< size of each buffer */
  86. unsigned int maxbuffers; /*%< max buffers */
  87. /* Locked internally. */
  88. isc_mutex_t pool_lock;
  89. isc_mempool_t *epool; /*%< memory pool for events */
  90. isc_mempool_t *rpool; /*%< memory pool for replies */
  91. isc_mempool_t *dpool; /*%< dispatch allocations */
  92. isc_mempool_t *bpool; /*%< memory pool for buffers */
  93. isc_mempool_t *spool; /*%< memory pool for dispsocs */
  94. /*%
  95. * Locked by qid->lock if qid exists; otherwise, can be used without
  96. * being locked.
  97. * Memory footprint considerations: this is a simple implementation of
  98. * available ports, i.e., an ordered array of the actual port numbers.
  99. * This will require about 256KB of memory in the worst case (128KB for
  100. * each of IPv4 and IPv6). We could reduce it by representing it as a
  101. * more sophisticated way such as a list (or array) of ranges that are
  102. * searched to identify a specific port. Our decision here is the saved
  103. * memory isn't worth the implementation complexity, considering the
  104. * fact that the whole BIND9 process (which is mainly named) already
  105. * requires a pretty large memory footprint. We may, however, have to
  106. * revisit the decision when we want to use it as a separate module for
  107. * an environment where memory requirement is severer.
  108. */
  109. in_port_t *v4ports; /*%< available ports for IPv4 */
  110. unsigned int nv4ports; /*%< # of available ports for IPv4 */
  111. in_port_t *v6ports; /*%< available ports for IPv4 */
  112. unsigned int nv6ports; /*%< # of available ports for IPv4 */
  113. };
  114. #define MGR_SHUTTINGDOWN 0x00000001U
  115. #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
  116. #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
  117. struct dns_dispentry {
  118. unsigned int magic;
  119. dns_dispatch_t *disp;
  120. dns_messageid_t id;
  121. in_port_t port;
  122. unsigned int bucket;
  123. isc_sockaddr_t host;
  124. isc_task_t *task;
  125. isc_taskaction_t action;
  126. void *arg;
  127. isc_boolean_t item_out;
  128. dispsocket_t *dispsocket;
  129. ISC_LIST(dns_dispatchevent_t) items;
  130. ISC_LINK(dns_dispentry_t) link;
  131. };
  132. /*%
  133. * Maximum number of dispatch sockets that can be pooled for reuse. The
  134. * appropriate value may vary, but experiments have shown a busy caching server
  135. * may need more than 1000 sockets concurrently opened. The maximum allowable
  136. * number of dispatch sockets (per manager) will be set to the double of this
  137. * value.
  138. */
  139. #ifndef DNS_DISPATCH_POOLSOCKS
  140. #define DNS_DISPATCH_POOLSOCKS 2048
  141. #endif
  142. /*%
  143. * Quota to control the number of dispatch sockets. If a dispatch has more
  144. * than the quota of sockets, new queries will purge oldest ones, so that
  145. * a massive number of outstanding queries won't prevent subsequent queries
  146. * (especially if the older ones take longer time and result in timeout).
  147. */
  148. #ifndef DNS_DISPATCH_SOCKSQUOTA
  149. #define DNS_DISPATCH_SOCKSQUOTA 3072
  150. #endif
  151. struct dispsocket {
  152. unsigned int magic;
  153. isc_socket_t *socket;
  154. dns_dispatch_t *disp;
  155. isc_sockaddr_t host;
  156. in_port_t localport; /* XXX: should be removed later */
  157. dispportentry_t *portentry;
  158. dns_dispentry_t *resp;
  159. isc_task_t *task;
  160. ISC_LINK(dispsocket_t) link;
  161. unsigned int bucket;
  162. ISC_LINK(dispsocket_t) blink;
  163. };
  164. /*%
  165. * A port table entry. We remember every port we first open in a table with a
  166. * reference counter so that we can 'reuse' the same port (with different
  167. * destination addresses) using the SO_REUSEADDR socket option.
  168. */
  169. struct dispportentry {
  170. in_port_t port;
  171. unsigned int refs;
  172. ISC_LINK(struct dispportentry) link;
  173. };
  174. #ifndef DNS_DISPATCH_PORTTABLESIZE
  175. #define DNS_DISPATCH_PORTTABLESIZE 1024
  176. #endif
  177. #define INVALID_BUCKET (0xffffdead)
  178. /*%
  179. * Number of tasks for each dispatch that use separate sockets for different
  180. * transactions. This must be a power of 2 as it will divide 32 bit numbers
  181. * to get an uniformly random tasks selection. See get_dispsocket().
  182. */
  183. #define MAX_INTERNAL_TASKS 64
  184. struct dns_dispatch {
  185. /* Unlocked. */
  186. unsigned int magic; /*%< magic */
  187. dns_dispatchmgr_t *mgr; /*%< dispatch manager */
  188. int ntasks;
  189. /*%
  190. * internal task buckets. We use multiple tasks to distribute various
  191. * socket events well when using separate dispatch sockets. We use the
  192. * 1st task (task[0]) for internal control events.
  193. */
  194. isc_task_t *task[MAX_INTERNAL_TASKS];
  195. isc_socket_t *socket; /*%< isc socket attached to */
  196. isc_sockaddr_t local; /*%< local address */
  197. in_port_t localport; /*%< local UDP port */
  198. unsigned int maxrequests; /*%< max requests */
  199. isc_event_t *ctlevent;
  200. /*% Locked by mgr->lock. */
  201. ISC_LINK(dns_dispatch_t) link;
  202. /* Locked by "lock". */
  203. isc_mutex_t lock; /*%< locks all below */
  204. isc_sockettype_t socktype;
  205. unsigned int attributes;
  206. unsigned int refcount; /*%< number of users */
  207. dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
  208. unsigned int shutting_down : 1,
  209. shutdown_out : 1,
  210. connected : 1,
  211. tcpmsg_valid : 1,
  212. recv_pending : 1; /*%< is a recv() pending? */
  213. isc_result_t shutdown_why;
  214. ISC_LIST(dispsocket_t) activesockets;
  215. ISC_LIST(dispsocket_t) inactivesockets;
  216. unsigned int nsockets;
  217. unsigned int requests; /*%< how many requests we have */
  218. unsigned int tcpbuffers; /*%< allocated buffers */
  219. dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
  220. dns_qid_t *qid;
  221. arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
  222. dispportlist_t *port_table; /*%< hold ports 'owned' by us */
  223. isc_mempool_t *portpool; /*%< port table entries */
  224. };
  225. #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
  226. #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
  227. #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
  228. #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
  229. #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
  230. #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
  231. #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
  232. #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
  233. #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
  234. #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
  235. #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
  236. (disp)->qid : (disp)->mgr->qid
  237. #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
  238. (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
  239. /*%
  240. * Locking a query port buffer is a bit tricky. We access the buffer without
  241. * locking until qid is created. Technically, there is a possibility of race
  242. * between the creation of qid and access to the port buffer; in practice,
  243. * however, this should be safe because qid isn't created until the first
  244. * dispatch is created and there should be no contending situation until then.
  245. */
  246. #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
  247. #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
  248. /*
  249. * Statics.
  250. */
  251. static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
  252. dns_messageid_t, in_port_t, unsigned int);
  253. static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
  254. static void destroy_disp(isc_task_t *task, isc_event_t *event);
  255. static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
  256. static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
  257. static void udp_exrecv(isc_task_t *, isc_event_t *);
  258. static void udp_shrecv(isc_task_t *, isc_event_t *);
  259. static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
  260. static void tcp_recv(isc_task_t *, isc_event_t *);
  261. static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
  262. static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
  263. in_port_t);
  264. static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
  265. static void *allocate_udp_buffer(dns_dispatch_t *disp);
  266. static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
  267. static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
  268. static void do_cancel(dns_dispatch_t *disp);
  269. static dns_dispentry_t *linear_first(dns_qid_t *disp);
  270. static dns_dispentry_t *linear_next(dns_qid_t *disp,
  271. dns_dispentry_t *resp);
  272. static void dispatch_free(dns_dispatch_t **dispp);
  273. static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
  274. dns_dispatch_t *disp,
  275. isc_socketmgr_t *sockmgr,
  276. isc_sockaddr_t *localaddr,
  277. isc_socket_t **sockp);
  278. static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
  279. isc_socketmgr_t *sockmgr,
  280. isc_taskmgr_t *taskmgr,
  281. isc_sockaddr_t *localaddr,
  282. unsigned int maxrequests,
  283. unsigned int attributes,
  284. dns_dispatch_t **dispp);
  285. static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
  286. static void destroy_mgr(dns_dispatchmgr_t **mgrp);
  287. static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
  288. unsigned int increment, dns_qid_t **qidp,
  289. isc_boolean_t needaddrtable);
  290. static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
  291. static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
  292. unsigned int options, isc_socket_t **sockp);
  293. static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
  294. isc_sockaddr_t *sockaddrp);
  295. #define LVL(x) ISC_LOG_DEBUG(x)
  296. static void
  297. mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
  298. ISC_FORMAT_PRINTF(3, 4);
  299. static void
  300. mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
  301. char msgbuf[2048];
  302. va_list ap;
  303. if (! isc_log_wouldlog(dns_lctx, level))
  304. return;
  305. va_start(ap, fmt);
  306. vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
  307. va_end(ap);
  308. isc_log_write(dns_lctx,
  309. DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
  310. level, "dispatchmgr %p: %s", mgr, msgbuf);
  311. }
  312. static inline void
  313. inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
  314. if (mgr->stats != NULL)
  315. isc_stats_increment(mgr->stats, counter);
  316. }
  317. static void
  318. dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
  319. ISC_FORMAT_PRINTF(3, 4);
  320. static void
  321. dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
  322. char msgbuf[2048];
  323. va_list ap;
  324. if (! isc_log_wouldlog(dns_lctx, level))
  325. return;
  326. va_start(ap, fmt);
  327. vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
  328. va_end(ap);
  329. isc_log_write(dns_lctx,
  330. DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
  331. level, "dispatch %p: %s", disp, msgbuf);
  332. }
  333. static void
  334. request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
  335. int level, const char *fmt, ...)
  336. ISC_FORMAT_PRINTF(4, 5);
  337. static void
  338. request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
  339. int level, const char *fmt, ...)
  340. {
  341. char msgbuf[2048];
  342. char peerbuf[256];
  343. va_list ap;
  344. if (! isc_log_wouldlog(dns_lctx, level))
  345. return;
  346. va_start(ap, fmt);
  347. vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
  348. va_end(ap);
  349. if (VALID_RESPONSE(resp)) {
  350. isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
  351. isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
  352. DNS_LOGMODULE_DISPATCH, level,
  353. "dispatch %p response %p %s: %s", disp, resp,
  354. peerbuf, msgbuf);
  355. } else {
  356. isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
  357. DNS_LOGMODULE_DISPATCH, level,
  358. "dispatch %p req/resp %p: %s", disp, resp,
  359. msgbuf);
  360. }
  361. }
  362. /*%
  363. * ARC4 random number generator derived from OpenBSD.
  364. * Only dispatch_random() and dispatch_uniformrandom() are expected
  365. * to be called from general dispatch routines; the rest of them are subroutines
  366. * for these two.
  367. *
  368. * The original copyright follows:
  369. * Copyright (c) 1996, David Mazieres <dm@uun.org>
  370. * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
  371. *
  372. * Permission to use, copy, modify, and distribute this software for any
  373. * purpose with or without fee is hereby granted, provided that the above
  374. * copyright notice and this permission notice appear in all copies.
  375. *
  376. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  377. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  378. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  379. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  380. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  381. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  382. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  383. */
  384. #ifdef BIND9
  385. static void
  386. dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
  387. isc_mutex_t *lock)
  388. {
  389. int n;
  390. for (n = 0; n < 256; n++)
  391. actx->s[n] = n;
  392. actx->i = 0;
  393. actx->j = 0;
  394. actx->count = 0;
  395. actx->entropy = entropy; /* don't have to attach */
  396. actx->lock = lock;
  397. }
  398. static void
  399. dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
  400. int n;
  401. isc_uint8_t si;
  402. actx->i--;
  403. for (n = 0; n < 256; n++) {
  404. actx->i = (actx->i + 1);
  405. si = actx->s[actx->i];
  406. actx->j = (actx->j + si + dat[n % datlen]);
  407. actx->s[actx->i] = actx->s[actx->j];
  408. actx->s[actx->j] = si;
  409. }
  410. actx->j = actx->i;
  411. }
  412. static inline isc_uint8_t
  413. dispatch_arc4get8(arc4ctx_t *actx) {
  414. isc_uint8_t si, sj;
  415. actx->i = (actx->i + 1);
  416. si = actx->s[actx->i];
  417. actx->j = (actx->j + si);
  418. sj = actx->s[actx->j];
  419. actx->s[actx->i] = sj;
  420. actx->s[actx->j] = si;
  421. return (actx->s[(si + sj) & 0xff]);
  422. }
  423. static inline isc_uint16_t
  424. dispatch_arc4get16(arc4ctx_t *actx) {
  425. isc_uint16_t val;
  426. val = dispatch_arc4get8(actx) << 8;
  427. val |= dispatch_arc4get8(actx);
  428. return (val);
  429. }
  430. static void
  431. dispatch_arc4stir(arc4ctx_t *actx) {
  432. int i;
  433. union {
  434. unsigned char rnd[128];
  435. isc_uint32_t rnd32[32];
  436. } rnd;
  437. isc_result_t result;
  438. if (actx->entropy != NULL) {
  439. /*
  440. * We accept any quality of random data to avoid blocking.
  441. */
  442. result = isc_entropy_getdata(actx->entropy, rnd.rnd,
  443. sizeof(rnd), NULL, 0);
  444. RUNTIME_CHECK(result == ISC_R_SUCCESS);
  445. } else {
  446. for (i = 0; i < 32; i++)
  447. isc_random_get(&rnd.rnd32[i]);
  448. }
  449. dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
  450. /*
  451. * Discard early keystream, as per recommendations in:
  452. * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
  453. */
  454. for (i = 0; i < 256; i++)
  455. (void)dispatch_arc4get8(actx);
  456. /*
  457. * Derived from OpenBSD's implementation. The rationale is not clear,
  458. * but should be conservative enough in safety, and reasonably large
  459. * for efficiency.
  460. */
  461. actx->count = 1600000;
  462. }
  463. static isc_uint16_t
  464. dispatch_random(arc4ctx_t *actx) {
  465. isc_uint16_t result;
  466. if (actx->lock != NULL)
  467. LOCK(actx->lock);
  468. actx->count -= sizeof(isc_uint16_t);
  469. if (actx->count <= 0)
  470. dispatch_arc4stir(actx);
  471. result = dispatch_arc4get16(actx);
  472. if (actx->lock != NULL)
  473. UNLOCK(actx->lock);
  474. return (result);
  475. }
  476. #else
  477. /*
  478. * For general purpose library, we don't have to be too strict about the
  479. * quality of random values. Performance doesn't matter much, either.
  480. * So we simply use the isc_random module to keep the library as small as
  481. * possible.
  482. */
  483. static void
  484. dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
  485. isc_mutex_t *lock)
  486. {
  487. UNUSED(actx);
  488. UNUSED(entropy);
  489. UNUSED(lock);
  490. return;
  491. }
  492. static isc_uint16_t
  493. dispatch_random(arc4ctx_t *actx) {
  494. isc_uint32_t r;
  495. UNUSED(actx);
  496. isc_random_get(&r);
  497. return (r & 0xffff);
  498. }
  499. #endif /* BIND9 */
  500. static isc_uint16_t
  501. dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
  502. isc_uint16_t min, r;
  503. if (upper_bound < 2)
  504. return (0);
  505. /*
  506. * Ensure the range of random numbers [min, 0xffff] be a multiple of
  507. * upper_bound and contain at least a half of the 16 bit range.
  508. */
  509. if (upper_bound > 0x8000)
  510. min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
  511. else
  512. min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
  513. /*
  514. * This could theoretically loop forever but each retry has
  515. * p > 0.5 (worst case, usually far better) of selecting a
  516. * number inside the range we need, so it should rarely need
  517. * to re-roll.
  518. */
  519. for (;;) {
  520. r = dispatch_random(actx);
  521. if (r >= min)
  522. break;
  523. }
  524. return (r % upper_bound);
  525. }
  526. /*
  527. * Return a hash of the destination and message id.
  528. */
  529. static isc_uint32_t
  530. dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
  531. in_port_t port)
  532. {
  533. unsigned int ret;
  534. ret = isc_sockaddr_hash(dest, ISC_TRUE);
  535. ret ^= (id << 16) | port;
  536. ret %= qid->qid_nbuckets;
  537. INSIST(ret < qid->qid_nbuckets);
  538. return (ret);
  539. }
  540. /*
  541. * Find the first entry in 'qid'. Returns NULL if there are no entries.
  542. */
  543. static dns_dispentry_t *
  544. linear_first(dns_qid_t *qid) {
  545. dns_dispentry_t *ret;
  546. unsigned int bucket;
  547. bucket = 0;
  548. while (bucket < qid->qid_nbuckets) {
  549. ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
  550. if (ret != NULL)
  551. return (ret);
  552. bucket++;
  553. }
  554. return (NULL);
  555. }
  556. /*
  557. * Find the next entry after 'resp' in 'qid'. Return NULL if there are
  558. * no more entries.
  559. */
  560. static dns_dispentry_t *
  561. linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
  562. dns_dispentry_t *ret;
  563. unsigned int bucket;
  564. ret = ISC_LIST_NEXT(resp, link);
  565. if (ret != NULL)
  566. return (ret);
  567. bucket = resp->bucket;
  568. bucket++;
  569. while (bucket < qid->qid_nbuckets) {
  570. ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
  571. if (ret != NULL)
  572. return (ret);
  573. bucket++;
  574. }
  575. return (NULL);
  576. }
  577. /*
  578. * The dispatch must be locked.
  579. */
  580. static isc_boolean_t
  581. destroy_disp_ok(dns_dispatch_t *disp)
  582. {
  583. if (disp->refcount != 0)
  584. return (ISC_FALSE);
  585. if (disp->recv_pending != 0)
  586. return (ISC_FALSE);
  587. if (!ISC_LIST_EMPTY(disp->activesockets))
  588. return (ISC_FALSE);
  589. if (disp->shutting_down == 0)
  590. return (ISC_FALSE);
  591. return (ISC_TRUE);
  592. }
  593. /*
  594. * Called when refcount reaches 0 (and safe to destroy).
  595. *
  596. * The dispatcher must not be locked.
  597. * The manager must be locked.
  598. */
  599. static void
  600. destroy_disp(isc_task_t *task, isc_event_t *event) {
  601. dns_dispatch_t *disp;
  602. dns_dispatchmgr_t *mgr;
  603. isc_boolean_t killmgr;
  604. dispsocket_t *dispsocket;
  605. int i;
  606. INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
  607. UNUSED(task);
  608. disp = event->ev_arg;
  609. mgr = disp->mgr;
  610. LOCK(&mgr->lock);
  611. ISC_LIST_UNLINK(mgr->list, disp, link);
  612. dispatch_log(disp, LVL(90),
  613. "shutting down; detaching from sock %p, task %p",
  614. disp->socket, disp->task[0]); /* XXXX */
  615. if (disp->socket != NULL)
  616. isc_socket_detach(&disp->socket);
  617. while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
  618. ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
  619. destroy_dispsocket(disp, &dispsocket);
  620. }
  621. for (i = 0; i < disp->ntasks; i++)
  622. isc_task_detach(&disp->task[i]);
  623. isc_event_free(&event);
  624. dispatch_free(&disp);
  625. killmgr = destroy_mgr_ok(mgr);
  626. UNLOCK(&mgr->lock);
  627. if (killmgr)
  628. destroy_mgr(&mgr);
  629. }
  630. /*%
  631. * Manipulate port table per dispatch: find an entry for a given port number,
  632. * create a new entry, and decrement a given entry with possible clean-up.
  633. */
  634. static dispportentry_t *
  635. port_search(dns_dispatch_t *disp, in_port_t port) {
  636. dispportentry_t *portentry;
  637. REQUIRE(disp->port_table != NULL);
  638. portentry = ISC_LIST_HEAD(disp->port_table[port %
  639. DNS_DISPATCH_PORTTABLESIZE]);
  640. while (portentry != NULL) {
  641. if (portentry->port == port)
  642. return (portentry);
  643. portentry = ISC_LIST_NEXT(portentry, link);
  644. }
  645. return (NULL);
  646. }
  647. static dispportentry_t *
  648. new_portentry(dns_dispatch_t *disp, in_port_t port) {
  649. dispportentry_t *portentry;
  650. REQUIRE(disp->port_table != NULL);
  651. portentry = isc_mempool_get(disp->portpool);
  652. if (portentry == NULL)
  653. return (portentry);
  654. portentry->port = port;
  655. portentry->refs = 0;
  656. ISC_LINK_INIT(portentry, link);
  657. ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
  658. portentry, link);
  659. return (portentry);
  660. }
  661. /*%
  662. * The caller must not hold the qid->lock.
  663. */
  664. static void
  665. deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
  666. dispportentry_t *portentry = *portentryp;
  667. dns_qid_t *qid;
  668. REQUIRE(disp->port_table != NULL);
  669. REQUIRE(portentry != NULL && portentry->refs > 0);
  670. qid = DNS_QID(disp);
  671. LOCK(&qid->lock);
  672. portentry->refs--;
  673. if (portentry->refs == 0) {
  674. ISC_LIST_UNLINK(disp->port_table[portentry->port %
  675. DNS_DISPATCH_PORTTABLESIZE],
  676. portentry, link);
  677. isc_mempool_put(disp->portpool, portentry);
  678. }
  679. *portentryp = NULL;
  680. UNLOCK(&qid->lock);
  681. }
  682. /*%
  683. * Find a dispsocket for socket address 'dest', and port number 'port'.
  684. * Return NULL if no such entry exists.
  685. */
  686. static dispsocket_t *
  687. socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
  688. unsigned int bucket)
  689. {
  690. dispsocket_t *dispsock;
  691. REQUIRE(bucket < qid->qid_nbuckets);
  692. dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
  693. while (dispsock != NULL) {
  694. if (dispsock->portentry != NULL &&
  695. dispsock->portentry->port == port &&
  696. isc_sockaddr_equal(dest, &dispsock->host))
  697. return (dispsock);
  698. dispsock = ISC_LIST_NEXT(dispsock, blink);
  699. }
  700. return (NULL);
  701. }
  702. /*%
  703. * Make a new socket for a single dispatch with a random port number.
  704. * The caller must hold the disp->lock and qid->lock.
  705. */
  706. static isc_result_t
  707. get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
  708. isc_socketmgr_t *sockmgr, dns_qid_t *qid,
  709. dispsocket_t **dispsockp, in_port_t *portp)
  710. {
  711. int i;
  712. isc_uint32_t r;
  713. dns_dispatchmgr_t *mgr = disp->mgr;
  714. isc_socket_t *sock = NULL;
  715. isc_result_t result = ISC_R_FAILURE;
  716. in_port_t port;
  717. isc_sockaddr_t localaddr;
  718. unsigned int bucket = 0;
  719. dispsocket_t *dispsock;
  720. unsigned int nports;
  721. in_port_t *ports;
  722. unsigned int bindoptions;
  723. dispportentry_t *portentry = NULL;
  724. if (isc_sockaddr_pf(&disp->local) == AF_INET) {
  725. nports = disp->mgr->nv4ports;
  726. ports = disp->mgr->v4ports;
  727. } else {
  728. nports = disp->mgr->nv6ports;
  729. ports = disp->mgr->v6ports;
  730. }
  731. if (nports == 0)
  732. return (ISC_R_ADDRNOTAVAIL);
  733. dispsock = ISC_LIST_HEAD(disp->inactivesockets);
  734. if (dispsock != NULL) {
  735. ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
  736. sock = dispsock->socket;
  737. dispsock->socket = NULL;
  738. } else {
  739. dispsock = isc_mempool_get(mgr->spool);
  740. if (dispsock == NULL)
  741. return (ISC_R_NOMEMORY);
  742. disp->nsockets++;
  743. dispsock->socket = NULL;
  744. dispsock->disp = disp;
  745. dispsock->resp = NULL;
  746. dispsock->portentry = NULL;
  747. isc_random_get(&r);
  748. dispsock->task = NULL;
  749. isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
  750. ISC_LINK_INIT(dispsock, link);
  751. ISC_LINK_INIT(dispsock, blink);
  752. dispsock->magic = DISPSOCK_MAGIC;
  753. }
  754. /*
  755. * Pick up a random UDP port and open a new socket with it. Avoid
  756. * choosing ports that share the same destination because it will be
  757. * very likely to fail in bind(2) or connect(2).
  758. */
  759. localaddr = disp->local;
  760. for (i = 0; i < 64; i++) {
  761. port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
  762. nports)];
  763. isc_sockaddr_setport(&localaddr, port);
  764. bucket = dns_hash(qid, dest, 0, port);
  765. if (socket_search(qid, dest, port, bucket) != NULL)
  766. continue;
  767. bindoptions = 0;
  768. portentry = port_search(disp, port);
  769. if (portentry != NULL)
  770. bindoptions |= ISC_SOCKET_REUSEADDRESS;
  771. result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
  772. if (result == ISC_R_SUCCESS) {
  773. if (portentry == NULL) {
  774. portentry = new_portentry(disp, port);
  775. if (portentry == NULL) {
  776. result = ISC_R_NOMEMORY;
  777. break;
  778. }
  779. }
  780. portentry->refs++;
  781. break;
  782. } else if (result == ISC_R_NOPERM) {
  783. char buf[ISC_SOCKADDR_FORMATSIZE];
  784. isc_sockaddr_format(&localaddr, buf, sizeof(buf));
  785. dispatch_log(disp, ISC_LOG_WARNING,
  786. "open_socket(%s) -> %s: continuing",
  787. buf, isc_result_totext(result));
  788. } else if (result != ISC_R_ADDRINUSE)
  789. break;
  790. }
  791. if (result == ISC_R_SUCCESS) {
  792. dispsock->socket = sock;
  793. dispsock->host = *dest;
  794. dispsock->portentry = portentry;
  795. dispsock->bucket = bucket;
  796. ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
  797. *dispsockp = dispsock;
  798. *portp = port;
  799. } else {
  800. /*
  801. * We could keep it in the inactive list, but since this should
  802. * be an exceptional case and might be resource shortage, we'd
  803. * rather destroy it.
  804. */
  805. if (sock != NULL)
  806. isc_socket_detach(&sock);
  807. destroy_dispsocket(disp, &dispsock);
  808. }
  809. return (result);
  810. }
  811. /*%
  812. * Destroy a dedicated dispatch socket.
  813. */
  814. static void
  815. destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
  816. dispsocket_t *dispsock;
  817. dns_qid_t *qid;
  818. /*
  819. * The dispatch must be locked.
  820. */
  821. REQUIRE(dispsockp != NULL && *dispsockp != NULL);
  822. dispsock = *dispsockp;
  823. REQUIRE(!ISC_LINK_LINKED(dispsock, link));
  824. disp->nsockets--;
  825. dispsock->magic = 0;
  826. if (dispsock->portentry != NULL)
  827. deref_portentry(disp, &dispsock->portentry);
  828. if (dispsock->socket != NULL)
  829. isc_socket_detach(&dispsock->socket);
  830. if (ISC_LINK_LINKED(dispsock, blink)) {
  831. qid = DNS_QID(disp);
  832. LOCK(&qid->lock);
  833. ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
  834. blink);
  835. UNLOCK(&qid->lock);
  836. }
  837. if (dispsock->task != NULL)
  838. isc_task_detach(&dispsock->task);
  839. isc_mempool_put(disp->mgr->spool, dispsock);
  840. *dispsockp = NULL;
  841. }
  842. /*%
  843. * Deactivate a dedicated dispatch socket. Move it to the inactive list for
  844. * future reuse unless the total number of sockets are exceeding the maximum.
  845. */
  846. static void
  847. deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
  848. isc_result_t result;
  849. dns_qid_t *qid;
  850. /*
  851. * The dispatch must be locked.
  852. */
  853. ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
  854. if (dispsock->resp != NULL) {
  855. INSIST(dispsock->resp->dispsocket == dispsock);
  856. dispsock->resp->dispsocket = NULL;
  857. }
  858. INSIST(dispsock->portentry != NULL);
  859. deref_portentry(disp, &dispsock->portentry);
  860. #ifdef BIND9
  861. if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
  862. destroy_dispsocket(disp, &dispsock);
  863. else {
  864. result = isc_socket_close(dispsock->socket);
  865. qid = DNS_QID(disp);
  866. LOCK(&qid->lock);
  867. ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
  868. blink);
  869. UNLOCK(&qid->lock);
  870. if (result == ISC_R_SUCCESS)
  871. ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
  872. else {
  873. /*
  874. * If the underlying system does not allow this
  875. * optimization, destroy this temporary structure (and
  876. * create a new one for a new transaction).
  877. */
  878. INSIST(result == ISC_R_NOTIMPLEMENTED);
  879. destroy_dispsocket(disp, &dispsock);
  880. }
  881. }
  882. #else
  883. /* This kind of optimization isn't necessary for normal use */
  884. UNUSED(qid);
  885. UNUSED(result);
  886. destroy_dispsocket(disp, &dispsock);
  887. #endif
  888. }
  889. /*
  890. * Find an entry for query ID 'id', socket address 'dest', and port number
  891. * 'port'.
  892. * Return NULL if no such entry exists.
  893. */
  894. static dns_dispentry_t *
  895. entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
  896. in_port_t port, unsigned int bucket)
  897. {
  898. dns_dispentry_t *res;
  899. REQUIRE(bucket < qid->qid_nbuckets);
  900. res = ISC_LIST_HEAD(qid->qid_table[bucket]);
  901. while (res != NULL) {
  902. if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
  903. res->port == port) {
  904. return (res);
  905. }
  906. res = ISC_LIST_NEXT(res, link);
  907. }
  908. return (NULL);
  909. }
  910. static void
  911. free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
  912. INSIST(buf != NULL && len != 0);
  913. switch (disp->socktype) {
  914. case isc_sockettype_tcp:
  915. INSIST(disp->tcpbuffers > 0);
  916. disp->tcpbuffers--;
  917. isc_mem_put(disp->mgr->mctx, buf, len);
  918. break;
  919. case isc_sockettype_udp:
  920. LOCK(&disp->mgr->buffer_lock);
  921. INSIST(disp->mgr->buffers > 0);
  922. INSIST(len == disp->mgr->buffersize);
  923. disp->mgr->buffers--;
  924. isc_mempool_put(disp->mgr->bpool, buf);
  925. UNLOCK(&disp->mgr->buffer_lock);
  926. break;
  927. default:
  928. INSIST(0);
  929. break;
  930. }
  931. }
  932. static void *
  933. allocate_udp_buffer(dns_dispatch_t *disp) {
  934. void *temp;
  935. LOCK(&disp->mgr->buffer_lock);
  936. temp = isc_mempool_get(disp->mgr->bpool);
  937. if (temp != NULL)
  938. disp->mgr->buffers++;
  939. UNLOCK(&disp->mgr->buffer_lock);
  940. return (temp);
  941. }
  942. static inline void
  943. free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
  944. if (disp->failsafe_ev == ev) {
  945. INSIST(disp->shutdown_out == 1);
  946. disp->shutdown_out = 0;
  947. return;
  948. }
  949. isc_mempool_put(disp->mgr->epool, ev);
  950. }
  951. static inline dns_dispatchevent_t *
  952. allocate_event(dns_dispatch_t *disp) {
  953. dns_dispatchevent_t *ev;
  954. ev = isc_mempool_get(disp->mgr->epool);
  955. if (ev == NULL)
  956. return (NULL);
  957. ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
  958. NULL, NULL, NULL, NULL, NULL);
  959. return (ev);
  960. }
  961. static void
  962. udp_exrecv(isc_task_t *task, isc_event_t *ev) {
  963. dispsocket_t *dispsock = ev->ev_arg;
  964. UNUSED(task);
  965. REQUIRE(VALID_DISPSOCK(dispsock));
  966. udp_recv(ev, dispsock->disp, dispsock);
  967. }
  968. static void
  969. udp_shrecv(isc_task_t *task, isc_event_t *ev) {
  970. dns_dispatch_t *disp = ev->ev_arg;
  971. UNUSED(task);
  972. REQUIRE(VALID_DISPATCH(disp));
  973. udp_recv(ev, disp, NULL);
  974. }
  975. /*
  976. * General flow:
  977. *
  978. * If I/O result == CANCELED or error, free the buffer.
  979. *
  980. * If query, free the buffer, restart.
  981. *
  982. * If response:
  983. * Allocate event, fill in details.
  984. * If cannot allocate, free buffer, restart.
  985. * find target. If not found, free buffer, restart.
  986. * if event queue is not empty, queue. else, send.
  987. * restart.
  988. */
  989. static void
  990. udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
  991. isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
  992. dns_messageid_t id;
  993. isc_result_t dres;
  994. isc_buffer_t source;
  995. unsigned int flags;
  996. dns_dispentry_t *resp = NULL;
  997. dns_dispatchevent_t *rev;
  998. unsigned int bucket;
  999. isc_boolean_t killit;
  1000. isc_boolean_t queue_response;
  1001. dns_dispatchmgr_t *mgr;
  1002. dns_qid_t *qid;
  1003. isc_netaddr_t netaddr;
  1004. int match;
  1005. int result;
  1006. isc_boolean_t qidlocked = ISC_FALSE;
  1007. LOCK(&disp->lock);
  1008. mgr = disp->mgr;
  1009. qid = mgr->qid;
  1010. dispatch_log(disp, LVL(90),
  1011. "got packet: requests %d, buffers %d, recvs %d",
  1012. disp->requests, disp->mgr->buffers, disp->recv_pending);
  1013. if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
  1014. /*
  1015. * Unless the receive event was imported from a listening
  1016. * interface, in which case the event type is
  1017. * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
  1018. */
  1019. INSIST(disp->recv_pending != 0);
  1020. disp->recv_pending = 0;
  1021. }
  1022. if (dispsock != NULL &&
  1023. (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
  1024. /*
  1025. * dispsock->resp can be NULL if this transaction was canceled
  1026. * just after receiving a response. Since this socket is
  1027. * exclusively used and there should be at most one receive
  1028. * event the canceled event should have been no effect. So
  1029. * we can (and should) deactivate the socket right now.
  1030. */
  1031. deactivate_dispsocket(disp, dispsock);
  1032. dispsock = NULL;
  1033. }
  1034. if (disp->shutting_down) {
  1035. /*
  1036. * This dispatcher is shutting down.
  1037. */
  1038. free_buffer(disp, ev->region.base, ev->region.length);
  1039. isc_event_free(&ev_in);
  1040. ev = NULL;
  1041. killit = destroy_disp_ok(disp);
  1042. UNLOCK(&disp->lock);
  1043. if (killit)
  1044. isc_task_send(disp->task[0], &disp->ctlevent);
  1045. return;
  1046. }
  1047. if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
  1048. if (dispsock != NULL) {
  1049. resp = dispsock->resp;
  1050. id = resp->id;
  1051. if (ev->result != ISC_R_SUCCESS) {
  1052. /*
  1053. * This is most likely a network error on a
  1054. * connected socket. It makes no sense to
  1055. * check the address or parse the packet, but it
  1056. * will help to return the error to the caller.
  1057. */
  1058. goto sendresponse;
  1059. }
  1060. } else {
  1061. free_buffer(disp, ev->region.base, ev->region.length);
  1062. UNLOCK(&disp->lock);
  1063. isc_event_free(&ev_in);
  1064. return;
  1065. }
  1066. } else if (ev->result != ISC_R_SUCCESS) {
  1067. free_buffer(disp, ev->region.base, ev->region.length);
  1068. if (ev->result != ISC_R_CANCELED)
  1069. dispatch_log(disp, ISC_LOG_ERROR,
  1070. "odd socket result in udp_recv(): %s",
  1071. isc_result_totext(ev->result));
  1072. UNLOCK(&disp->lock);
  1073. isc_event_free(&ev_in);
  1074. return;
  1075. }
  1076. /*
  1077. * If this is from a blackholed address, drop it.
  1078. */
  1079. isc_netaddr_fromsockaddr(&netaddr, &ev->address);
  1080. if (disp->mgr->blackhole != NULL &&
  1081. dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
  1082. NULL, &match, NULL) == ISC_R_SUCCESS &&
  1083. match > 0)
  1084. {
  1085. if (isc_log_wouldlog(dns_lctx, LVL(10))) {
  1086. char netaddrstr[ISC_NETADDR_FORMATSIZE];
  1087. isc_netaddr_format(&netaddr, netaddrstr,
  1088. sizeof(netaddrstr));
  1089. dispatch_log(disp, LVL(10),
  1090. "blackholed packet from %s",
  1091. netaddrstr);
  1092. }
  1093. free_buffer(disp, ev->region.base, ev->region.length);
  1094. goto restart;
  1095. }
  1096. /*
  1097. * Peek into the buffer to see what we can see.
  1098. */
  1099. isc_buffer_init(&source, ev->region.base, ev->region.length);
  1100. isc_buffer_add(&source, ev->n);
  1101. dres = dns_message_peekheader(&source, &id, &flags);
  1102. if (dres != ISC_R_SUCCESS) {
  1103. free_buffer(disp, ev->region.base, ev->region.length);
  1104. dispatch_log(disp, LVL(10), "got garbage packet");
  1105. goto restart;
  1106. }
  1107. dispatch_log(disp, LVL(92),
  1108. "got valid DNS message header, /QR %c, id %u",
  1109. ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
  1110. /*
  1111. * Look at flags. If query, drop it. If response,
  1112. * look to see where it goes.
  1113. */
  1114. if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
  1115. /* query */
  1116. free_buffer(disp, ev->region.base, ev->region.length);
  1117. goto restart;
  1118. }
  1119. /*
  1120. * Search for the corresponding response. If we are using an exclusive
  1121. * socket, we've already identified it and we can skip the search; but
  1122. * the ID and the address must match the expected ones.
  1123. */
  1124. if (resp == NULL) {
  1125. bucket = dns_hash(qid, &ev->address, id, disp->localport);
  1126. LOCK(&qid->lock);
  1127. qidlocked = ISC_TRUE;
  1128. resp = entry_search(qid, &ev->address, id, disp->localport,
  1129. bucket);
  1130. dispatch_log(disp, LVL(90),
  1131. "search for response in bucket %d: %s",
  1132. bucket, (resp == NULL ? "not found" : "found"));
  1133. if (resp == NULL) {
  1134. inc_stats(mgr, dns_resstatscounter_mismatch);
  1135. free_buffer(disp, ev->region.base, ev->region.length);
  1136. goto unlock;
  1137. }
  1138. } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
  1139. &resp->host)) {
  1140. dispatch_log(disp, LVL(90),
  1141. "response to an exclusive socket doesn't match");
  1142. inc_stats(mgr, dns_resstatscounter_mismatch);
  1143. free_buffer(disp, ev->region.base, ev->region.length);
  1144. goto unlock;
  1145. }
  1146. /*
  1147. * Now that we have the original dispatch the query was sent
  1148. * from check that the address and port the response was
  1149. * sent to make sense.
  1150. */
  1151. if (disp != resp->disp) {
  1152. isc_sockaddr_t a1;
  1153. isc_sockaddr_t a2;
  1154. /*
  1155. * Check that the socket types and ports match.
  1156. */
  1157. if (disp->socktype != resp->disp->socktype ||
  1158. isc_sockaddr_getport(&disp->local) !=
  1159. isc_sockaddr_getport(&resp->disp->local)) {
  1160. free_buffer(disp, ev->region.base, ev->region.length);
  1161. goto unlock;
  1162. }
  1163. /*
  1164. * If both dispatches are bound to an address then fail as
  1165. * the addresses can't be equal (enforced by the IP stack).
  1166. *
  1167. * Note under Linux a packet can be sent out via IPv4 socket
  1168. * and the response be received via a IPv6 socket.
  1169. *
  1170. * Requests sent out via IPv6 should always come back in
  1171. * via IPv6.
  1172. */
  1173. if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
  1174. isc_sockaddr_pf(&disp->local) != PF_INET6) {
  1175. free_buffer(disp, ev->region.base, ev->region.length);
  1176. goto unlock;
  1177. }
  1178. isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
  1179. isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
  1180. if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
  1181. !isc_sockaddr_eqaddr(&a2, &disp->local)) {
  1182. free_buffer(disp, ev->region.base, ev->region.length);
  1183. goto unlock;
  1184. }
  1185. }
  1186. sendresponse:
  1187. queue_response = resp->item_out;
  1188. rev = allocate_event(resp->disp);
  1189. if (rev == NULL) {
  1190. free_buffer(disp, ev->region.base, ev->region.length);
  1191. goto unlock;
  1192. }
  1193. /*
  1194. * At this point, rev contains the event we want to fill in, and
  1195. * resp contains the information on the place to send it to.
  1196. * Send the event off.
  1197. */
  1198. isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
  1199. isc_buffer_add(&rev->buffer, ev->n);
  1200. rev->result = ev->result;
  1201. rev->id = id;
  1202. rev->addr = ev->address;
  1203. rev->pktinfo = ev->pktinfo;
  1204. rev->attributes = ev->attributes;
  1205. if (queue_response) {
  1206. ISC_LIST_APPEND(resp->items, rev, ev_link);
  1207. } else {
  1208. ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
  1209. DNS_EVENT_DISPATCH,
  1210. resp->action, resp->arg, resp, NULL, NULL);
  1211. request_log(disp, resp, LVL(90),
  1212. "[a] Sent event %p buffer %p len %d to task %p",
  1213. rev, rev->buffer.base, rev->buffer.length,
  1214. resp->task);
  1215. resp->item_out = ISC_TRUE;
  1216. isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
  1217. }
  1218. unlock:
  1219. if (qidlocked)
  1220. UNLOCK(&qid->lock);
  1221. /*
  1222. * Restart recv() to get the next packet.
  1223. */
  1224. restart:
  1225. result = startrecv(disp, dispsock);
  1226. if (result != ISC_R_SUCCESS && dispsock != NULL) {
  1227. /*
  1228. * XXX: wired. There seems to be no recovery process other than
  1229. * deactivate this socket anyway (since we cannot start
  1230. * receiving, we won't be able to receive a cancel event
  1231. * from the user).
  1232. */
  1233. deactivate_dispsocket(disp, dispsock);
  1234. }
  1235. UNLOCK(&disp->lock);
  1236. isc_event_free(&ev_in);
  1237. }
  1238. /*
  1239. * General flow:
  1240. *
  1241. * If I/O result == CANCELED, EOF, or error, notify everyone as the
  1242. * various queues drain.
  1243. *
  1244. * If query, restart.
  1245. *
  1246. * If response:
  1247. * Allocate event, fill in details.
  1248. * If cannot allocate, restart.
  1249. * find target. If not found, restart.
  1250. * if event queue is not empty, queue. else, send.
  1251. * restart.
  1252. */
  1253. static void
  1254. tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
  1255. dns_dispatch_t *disp = ev_in->ev_arg;
  1256. dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
  1257. dns_messageid_t id;
  1258. isc_result_t dres;
  1259. unsigned int flags;
  1260. dns_dispentry_t *resp;
  1261. dns_dispatchevent_t *rev;
  1262. unsigned int bucket;
  1263. isc_boolean_t killit;
  1264. isc_boolean_t queue_response;
  1265. dns_qid_t *qid;
  1266. int level;
  1267. char buf[ISC_SOCKADDR_FORMATSIZE];
  1268. UNUSED(task);
  1269. REQUIRE(VALID_DISPATCH(disp));
  1270. qid = disp->qid;
  1271. dispatch_log(disp, LVL(90),
  1272. "got TCP packet: requests %d, buffers %d, recvs %d",
  1273. disp->requests, disp->tcpbuffers, disp->recv_pending);
  1274. LOCK(&disp->lock);
  1275. INSIST(disp->recv_pending != 0);
  1276. disp->recv_pending = 0;
  1277. if (disp->refcount == 0) {
  1278. /*
  1279. * This dispatcher is shutting down. Force cancelation.
  1280. */
  1281. tcpmsg->result = ISC_R_CANCELED;
  1282. }
  1283. if (tcpmsg->result != ISC_R_SUCCESS) {
  1284. switch (tcpmsg->result) {
  1285. case ISC_R_CANCELED:
  1286. break;
  1287. case ISC_R_EOF:
  1288. dispatch_log(disp, LVL(90), "shutting down on EOF");
  1289. do_cancel(disp);
  1290. break;
  1291. case ISC_R_CONNECTIONRESET:
  1292. level = ISC_LOG_INFO;
  1293. goto logit;
  1294. default:
  1295. level = ISC_LOG_ERROR;
  1296. logit:
  1297. isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
  1298. dispatch_log(disp, level, "shutting down due to TCP "
  1299. "receive error: %s: %s", buf,
  1300. isc_result_totext(tcpmsg->result));
  1301. do_cancel(disp);
  1302. break;
  1303. }
  1304. /*
  1305. * The event is statically allocated in the tcpmsg
  1306. * structure, and destroy_disp() frees the tcpmsg, so we must
  1307. * free the event *before* calling destroy_disp().
  1308. */
  1309. isc_event_free(&ev_in);
  1310. disp->shutting_down = 1;
  1311. disp->shutdown_why = tcpmsg->result;
  1312. /*
  1313. * If the recv() was canceled pass the word on.
  1314. */
  1315. killit = destroy_disp_ok(disp);
  1316. UNLOCK(&disp->lock);
  1317. if (killit)
  1318. isc_task_send(disp->task[0], &disp->ctlevent);
  1319. return;
  1320. }
  1321. dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
  1322. tcpmsg->result,
  1323. tcpmsg->buffer.length, tcpmsg->buffer.base);
  1324. /*
  1325. * Peek into the buffer to see what we can see.
  1326. */
  1327. dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
  1328. if (dres != ISC_R_SUCCESS) {
  1329. dispatch_log(disp, LVL(10), "got garbage packet");
  1330. goto restart;
  1331. }
  1332. dispatch_log(disp, LVL(92),
  1333. "got valid DNS message header, /QR %c, id %u",
  1334. ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
  1335. /*
  1336. * Allocate an event to send to the query or response client, and
  1337. * allocate a new buffer for our use.
  1338. */
  1339. /*
  1340. * Look at flags. If query, drop it. If response,
  1341. * look to see where it goes.
  1342. */
  1343. if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
  1344. /*
  1345. * Query.
  1346. */
  1347. goto restart;
  1348. }
  1349. /*
  1350. * Response.
  1351. */
  1352. bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
  1353. LOCK(&qid->lock);
  1354. resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
  1355. dispatch_log(disp, LVL(90),
  1356. "search for response in bucket %d: %s",
  1357. bucket, (resp == NULL ? "not found" : "found"));
  1358. if (resp == NULL)
  1359. goto unlock;
  1360. queue_response = resp->item_out;
  1361. rev = allocate_event(disp);
  1362. if (rev == NULL)
  1363. goto unlock;
  1364. /*
  1365. * At this point, rev contains the event we want to fill in, and
  1366. * resp contains the information on the place to send it to.
  1367. * Send the event off.
  1368. */
  1369. dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
  1370. disp->tcpbuffers++;
  1371. rev->result = ISC_R_SUCCESS;
  1372. rev->id = id;
  1373. rev->addr = tcpmsg->address;
  1374. if (queue_response) {
  1375. ISC_LIST_APPEND(resp->items, rev, ev_link);
  1376. } else {
  1377. ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
  1378. resp->action, resp->arg, resp, NULL, NULL);
  1379. request_log(disp, resp, LVL(90),
  1380. "[b] Sent event %p buffer %p len %d to task %p",
  1381. rev, rev->buffer.base, rev->buffer.length,
  1382. resp->task);
  1383. resp->item_out = ISC_TRUE;
  1384. isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
  1385. }
  1386. unlock:
  1387. UNLOCK(&qid->lock);
  1388. /*
  1389. * Restart recv() to get the next packet.
  1390. */
  1391. restart:
  1392. (void)startrecv(disp, NULL);
  1393. UNLOCK(&disp->lock);
  1394. isc_event_free(&ev_in);
  1395. }
  1396. /*
  1397. * disp must be locked.
  1398. */
  1399. static isc_result_t
  1400. startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
  1401. isc_result_t res;
  1402. isc_region_t region;
  1403. isc_socket_t *socket;
  1404. if (disp->shutting_down == 1)
  1405. return (ISC_R_SUCCESS);
  1406. if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
  1407. return (ISC_R_SUCCESS);
  1408. if (disp->recv_pending != 0 && dispsock == NULL)
  1409. return (ISC_R_SUCCESS);
  1410. if (disp->mgr->buffers >= disp->mgr->maxbuffers)
  1411. return (ISC_R_NOMEMORY);
  1412. if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
  1413. dispsock == NULL)
  1414. return (ISC_R_SUCCESS);
  1415. if (dispsock != NULL)
  1416. socket = dispsock->socket;
  1417. else
  1418. socket = disp->socket;
  1419. INSIST(socket != NULL);
  1420. switch (disp->socktype) {
  1421. /*
  1422. * UDP reads are always maximal.
  1423. */
  1424. case isc_sockettype_udp:
  1425. region.length = disp->mgr->buffersize;
  1426. region.base = allocate_udp_buffer(disp);
  1427. if (region.base == NULL)
  1428. return (ISC_R_NOMEMORY);
  1429. if (dispsock != NULL) {
  1430. res = isc_socket_recv(socket, &region, 1,
  1431. dispsock->task, udp_exrecv,
  1432. dispsock);
  1433. if (res != ISC_R_SUCCESS) {
  1434. free_buffer(disp, region.base, region.length);
  1435. return (res);
  1436. }
  1437. } else {
  1438. res = isc_socket_recv(socket, &region, 1,
  1439. disp->task[0], udp_shrecv, disp);
  1440. if (res != ISC_R_SUCCESS) {
  1441. free_buffer(disp, region.base, region.length);
  1442. disp->shutdown_why = res;
  1443. disp->shutting_down = 1;
  1444. do_cancel(disp);
  1445. return (ISC_R_SUCCESS); /* recover by cancel */
  1446. }
  1447. INSIST(disp->recv_pending == 0);
  1448. disp->recv_pending = 1;
  1449. }
  1450. break;
  1451. case isc_sockettype_tcp:
  1452. res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
  1453. tcp_recv, disp);
  1454. if (res != ISC_R_SUCCESS) {
  1455. disp->shutdown_why = res;
  1456. disp->shutting_down = 1;
  1457. do_cancel(disp);
  1458. return (ISC_R_SUCCESS); /* recover by cancel */
  1459. }
  1460. INSIST(disp->recv_pending == 0);
  1461. disp->recv_pending = 1;
  1462. break;
  1463. default:
  1464. INSIST(0);
  1465. break;
  1466. }
  1467. return (ISC_R_SUCCESS);
  1468. }
  1469. /*
  1470. * Mgr must be locked when calling this function.
  1471. */
  1472. static isc_boolean_t
  1473. destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
  1474. mgr_log(mgr, LVL(90),
  1475. "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
  1476. "epool=%d, rpool=%d, dpool=%d",
  1477. MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
  1478. isc_mempool_getallocated(mgr->epool),
  1479. isc_mempool_getallocated(mgr->rpool),
  1480. isc_mempool_getallocated(mgr->dpool));
  1481. if (!MGR_IS_SHUTTINGDOWN(mgr))
  1482. return (ISC_FALSE);
  1483. if (!ISC_LIST_EMPTY(mgr->list))
  1484. return (ISC_FALSE);
  1485. if (isc_mempool_getallocated(mgr->epool) != 0)
  1486. return (ISC_FALSE);
  1487. if (isc_mempool_getallocated(mgr->rpool) != 0)
  1488. return (ISC_FALSE);
  1489. if (isc_mempool_getallocated(mgr->dpool) != 0)
  1490. return (ISC_FALSE);
  1491. return (ISC_TRUE);
  1492. }
  1493. /*
  1494. * Mgr must be unlocked when calling this function.
  1495. */
  1496. static void
  1497. destroy_mgr(dns_dispatchmgr_t **mgrp) {
  1498. isc_mem_t *mctx;
  1499. dns_dispatchmgr_t *mgr;
  1500. mgr = *mgrp;
  1501. *mgrp = NULL;
  1502. mctx = mgr->mctx;
  1503. mgr->magic = 0;
  1504. mgr->mctx = NULL;
  1505. DESTROYLOCK(&mgr->lock);
  1506. mgr->state = 0;
  1507. DESTROYLOCK(&mgr->arc4_lock);
  1508. isc_mempool_destroy(&mgr->epool);
  1509. isc_mempool_destroy(&mgr->rpool);
  1510. isc_mempool_destroy(&mgr->dpool);
  1511. if (mgr->bpool != NULL)
  1512. isc_mempool_destroy(&mgr->bpool);
  1513. if (mgr->spool != NULL)
  1514. isc_mempool_destroy(&mgr->spool);
  1515. DESTROYLOCK(&mgr->pool_lock);
  1516. #ifdef BIND9
  1517. if (mgr->entropy != NULL)
  1518. isc_entropy_detach(&mgr->entropy);
  1519. #endif /* BIND9 */
  1520. if (mgr->qid != NULL)
  1521. qid_destroy(mctx, &mgr->qid);
  1522. DESTROYLOCK(&mgr->buffer_lock);
  1523. if (mgr->blackhole != NULL)
  1524. dns_acl_detach(&mgr->blackhole);
  1525. if (mgr->stats != NULL)
  1526. isc_stats_detach(&mgr->stats);
  1527. if (mgr->v4ports != NULL) {
  1528. isc_mem_put(mctx, mgr->v4ports,
  1529. mgr->nv4ports * sizeof(in_port_t));
  1530. }
  1531. if (mgr->v6ports != NULL) {
  1532. isc_mem_put(mctx, mgr->v6ports,
  1533. mgr->nv6ports * sizeof(in_port_t));
  1534. }
  1535. isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
  1536. isc_mem_detach(&mctx);
  1537. }
  1538. static isc_result_t
  1539. open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
  1540. unsigned int options, isc_socket_t **sockp)
  1541. {
  1542. isc_socket_t *sock;
  1543. isc_result_t result;
  1544. sock = *sockp;
  1545. if (sock == NULL) {
  1546. result = isc_socket_create(mgr, isc_sockaddr_pf(local),
  1547. isc_sockettype_udp, &sock);
  1548. if (result != ISC_R_SUCCESS)
  1549. return (result);
  1550. isc_socket_setname(sock, "dispatcher", NULL);
  1551. } else {
  1552. #ifdef BIND9
  1553. result = isc_socket_open(sock);
  1554. if (result != ISC_R_SUCCESS)
  1555. return (result);
  1556. #else
  1557. INSIST(0);
  1558. #endif
  1559. }
  1560. #ifndef ISC_ALLOW_MAPPED
  1561. isc_socket_ipv6only(sock, ISC_TRUE);
  1562. #endif
  1563. result = isc_socket_bind(sock, local, options);
  1564. if (result != ISC_R_SUCCESS) {
  1565. if (*sockp == NULL)
  1566. isc_socket_detach(&sock);
  1567. else {
  1568. #ifdef BIND9
  1569. isc_socket_close(sock);
  1570. #else
  1571. INSIST(0);
  1572. #endif
  1573. }
  1574. return (result);
  1575. }
  1576. *sockp = sock;
  1577. return (ISC_R_SUCCESS);
  1578. }
  1579. /*%
  1580. * Create a temporary port list to set the initial default set of dispatch
  1581. * ports: [1024, 65535]. This is almost meaningless as the application will
  1582. * normally set the ports explicitly, but is provided to fill some minor corner
  1583. * cases.
  1584. */
  1585. static isc_result_t
  1586. create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
  1587. isc_result_t result;
  1588. result = isc_portset_create(mctx, portsetp);
  1589. if (result != ISC_R_SUCCESS)
  1590. return (result);
  1591. isc_portset_addrange(*portsetp, 1024, 65535);
  1592. return (ISC_R_SUCCESS);
  1593. }
  1594. /*
  1595. * Publics.
  1596. */
  1597. isc_result_t
  1598. dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
  1599. dns_dispatchmgr_t **mgrp)
  1600. {
  1601. dns_dispatchmgr_t *mgr;
  1602. isc_result_t result;
  1603. isc_portset_t *v4portset = NULL;
  1604. isc_portset_t *v6portset = NULL;
  1605. REQUIRE(mctx != NULL);
  1606. REQUIRE(mgrp != NULL && *mgrp == NULL);
  1607. mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
  1608. if (mgr == NULL)
  1609. return (ISC_R_NOMEMORY);
  1610. mgr->mctx = NULL;
  1611. isc_mem_attach(mctx, &mgr->mctx);
  1612. mgr->blackhole = NULL;
  1613. mgr->stats = NULL;
  1614. result = isc_mutex_init(&mgr->lock);
  1615. if (result != ISC_R_SUCCESS)
  1616. goto deallocate;
  1617. result = isc_mutex_init(&mgr->arc4_lock);
  1618. if (result != ISC_R_SUCCESS)
  1619. goto kill_lock;
  1620. result = isc_mutex_init(&mgr->buffer_lock);
  1621. if (result != ISC_R_SUCCESS)
  1622. goto kill_arc4_lock;
  1623. result = isc_mutex_init(&mgr->pool_lock);
  1624. if (result != ISC_R_SUCCESS)
  1625. goto kill_buffer_lock;
  1626. mgr->epool = NULL;
  1627. if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
  1628. &mgr->epool) != ISC_R_SUCCESS) {
  1629. result = ISC_R_NOMEMORY;
  1630. goto kill_pool_lock;
  1631. }
  1632. mgr->rpool = NULL;
  1633. if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
  1634. &mgr->rpool) != ISC_R_SUCCESS) {
  1635. result = ISC_R_NOMEMORY;
  1636. goto kill_epool;
  1637. }
  1638. mgr->dpool = NULL;
  1639. if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
  1640. &mgr->dpool) != ISC_R_SUCCESS) {
  1641. result = ISC_R_NOMEMORY;
  1642. goto kill_rpool;
  1643. }
  1644. isc_mempool_setname(mgr->epool, "dispmgr_epool");
  1645. isc_mempool_setfreemax(mgr->epool, 1024);
  1646. isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
  1647. isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
  1648. isc_mempool_setfreemax(mgr->rpool, 1024);
  1649. isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
  1650. isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
  1651. isc_mempool_setfreemax(mgr->dpool, 1024);
  1652. isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
  1653. mgr->buffers = 0;
  1654. mgr->buffersize = 0;
  1655. mgr->maxbuffers = 0;
  1656. mgr->bpool = NULL;
  1657. mgr->spool = NULL;
  1658. mgr->entropy = NULL;
  1659. mgr->qid = NULL;
  1660. mgr->state = 0;
  1661. ISC_LIST_INIT(mgr->list);
  1662. mgr->v4ports = NULL;
  1663. mgr->v6ports = NULL;
  1664. mgr->nv4ports = 0;
  1665. mgr->nv6ports = 0;
  1666. mgr->magic = DNS_DISPATCHMGR_MAGIC;
  1667. result = create_default_portset(mctx, &v4portset);
  1668. if (result == ISC_R_SUCCESS) {
  1669. result = create_default_portset(mctx, &v6portset);
  1670. if (result == ISC_R_SUCCESS) {
  1671. result = dns_dispatchmgr_setavailports(mgr,
  1672. v4portset,
  1673. v6portset);
  1674. }
  1675. }
  1676. if (v4portset != NULL)
  1677. isc_portset_destroy(mctx, &v4portset);
  1678. if (v6portset != NULL)
  1679. isc_portset_destroy(mctx, &v6portset);
  1680. if (result != ISC_R_SUCCESS)
  1681. goto kill_dpool;
  1682. #ifdef BIND9
  1683. if (entropy != NULL)
  1684. isc_entropy_attach(entropy, &mgr->entropy);
  1685. #else
  1686. UNUSED(entropy);
  1687. #endif
  1688. dispatch_initrandom(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock);
  1689. *mgrp = mgr;
  1690. return (ISC_R_SUCCESS);
  1691. kill_dpool:
  1692. isc_mempool_destroy(&mgr->dpool);
  1693. kill_rpool:
  1694. isc_mempool_destroy(&mgr->rpool);
  1695. kill_epool:
  1696. isc_mempool_destroy(&mgr->epool);
  1697. kill_pool_lock:
  1698. DESTROYLOCK(&mgr->pool_lock);
  1699. kill_buffer_lock:
  1700. DESTROYLOCK(&mgr->buffer_lock);
  1701. kill_arc4_lock:
  1702. DESTROYLOCK(&mgr->arc4_lock);
  1703. kill_lock:
  1704. DESTROYLOCK(&mgr->lock);
  1705. deallocate:
  1706. isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
  1707. isc_mem_detach(&mctx);
  1708. return (result);
  1709. }
  1710. void
  1711. dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
  1712. REQUIRE(VALID_DISPATCHMGR(mgr));
  1713. if (mgr->blackhole != NULL)
  1714. dns_acl_detach(&mgr->blackhole);
  1715. dns_acl_attach(blackhole, &mgr->blackhole);
  1716. }
  1717. dns_acl_t *
  1718. dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
  1719. REQUIRE(VALID_DISPATCHMGR(mgr));
  1720. return (mgr->blackhole);
  1721. }
  1722. void
  1723. dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
  1724. dns_portlist_t *portlist)
  1725. {
  1726. REQUIRE(VALID_DISPATCHMGR(mgr));
  1727. UNUSED(portlist);
  1728. /* This function is deprecated: use dns_dispatchmgr_setavailports(). */
  1729. return;
  1730. }
  1731. dns_portlist_t *
  1732. dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
  1733. REQUIRE(VALID_DISPATCHMGR(mgr));
  1734. return (NULL); /* this function is deprecated */
  1735. }
  1736. isc_result_t
  1737. dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset,
  1738. isc_portset_t *v6portset)
  1739. {
  1740. in_port_t *v4ports, *v6ports, p;
  1741. unsigned int nv4ports, nv6ports, i4, i6;
  1742. REQUIRE(VALID_DISPATCHMGR(mgr));
  1743. nv4ports = isc_portset_nports(v4portset);
  1744. nv6ports = isc_portset_nports(v6portset);
  1745. v4ports = NULL;
  1746. if (nv4ports != 0) {
  1747. v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports);
  1748. if (v4ports == NULL)
  1749. return (ISC_R_NOMEMORY);
  1750. }
  1751. v6ports = NULL;
  1752. if (nv6ports != 0) {
  1753. v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports);
  1754. if (v6ports == NULL) {
  1755. if (v4ports != NULL) {
  1756. isc_mem_put(mgr->mctx, v4ports,
  1757. sizeof(in_port_t) *
  1758. isc_portset_nports(v4portset));
  1759. }
  1760. return (ISC_R_NOMEMORY);
  1761. }
  1762. }
  1763. p = 0;
  1764. i4 = 0;
  1765. i6 = 0;
  1766. do {
  1767. if (isc_portset_isset(v4portset, p)) {
  1768. INSIST(i4 < nv4ports);
  1769. v4ports[i4++] = p;
  1770. }
  1771. if (isc_portset_isset(v6portset, p)) {
  1772. INSIST(i6 < nv6ports);
  1773. v6ports[i6++] = p;
  1774. }
  1775. } while (p++ < 65535);
  1776. INSIST(i4 == nv4ports && i6 == nv6ports);
  1777. PORTBUFLOCK(mgr);
  1778. if (mgr->v4ports != NULL) {
  1779. isc_mem_put(mgr->mctx, mgr->v4ports,
  1780. mgr->nv4ports * sizeof(in_port_t));
  1781. }
  1782. mgr->v4ports = v4ports;
  1783. mgr->nv4ports = nv4ports;
  1784. if (mgr->v6ports != NULL) {
  1785. isc_mem_put(mgr->mctx, mgr->v6ports,
  1786. mgr->nv6ports * sizeof(in_port_t));
  1787. }
  1788. mgr->v6ports = v6ports;
  1789. mgr->nv6ports = nv6ports;
  1790. PORTBUFUNLOCK(mgr);
  1791. return (ISC_R_SUCCESS);
  1792. }
  1793. static isc_result_t
  1794. dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
  1795. unsigned int buffersize, unsigned int maxbuffers,
  1796. unsigned int maxrequests, unsigned int buckets,
  1797. unsigned int increment)
  1798. {
  1799. isc_result_t result;
  1800. REQUIRE(VALID_DISPATCHMGR(mgr));
  1801. REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
  1802. REQUIRE(maxbuffers > 0);
  1803. REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
  1804. REQUIRE(increment > buckets);
  1805. /*
  1806. * Keep some number of items around. This should be a config
  1807. * option. For now, keep 8, but later keep at least two even
  1808. * if the caller wants less. This allows us to ensure certain
  1809. * things, like an event can be "freed" and the next allocation
  1810. * will always succeed.
  1811. *
  1812. * Note that if limits are placed on anything here, we use one
  1813. * event internally, so the actual limit should be "wanted + 1."
  1814. *
  1815. * XXXMLG
  1816. */
  1817. if (maxbuffers < 8)
  1818. maxbuffers = 8;
  1819. LOCK(&mgr->buffer_lock);
  1820. /* Create or adjust buffer pool */
  1821. if (mgr->bpool != NULL) {
  1822. /*
  1823. * We only increase the maxbuffers to avoid accidental buffer
  1824. * shortage. Ideally we'd separate the manager-wide maximum
  1825. * from per-dispatch limits and respect the latter within the
  1826. * global limit. But at this moment that's deemed to be
  1827. * overkilling and isn't worth additional implementation
  1828. * complexity.
  1829. */
  1830. if (maxbuffers > mgr->maxbuffers) {
  1831. isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
  1832. mgr->maxbuffers = maxbuffers;
  1833. }
  1834. } else {
  1835. result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool);
  1836. if (result != ISC_R_SUCCESS) {
  1837. UNLOCK(&mgr->buffer_lock);
  1838. return (result);
  1839. }
  1840. isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
  1841. isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
  1842. isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
  1843. }
  1844. /* Create or adjust socket pool */
  1845. if (mgr->spool != NULL) {
  1846. isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2);
  1847. UNLOCK(&mgr->buffer_lock);
  1848. return (ISC_R_SUCCESS);
  1849. }
  1850. result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t),
  1851. &mgr->spool);
  1852. if (result != ISC_R_SUCCESS) {
  1853. UNLOCK(&mgr->buffer_lock);
  1854. goto cleanup;
  1855. }
  1856. isc_mempool_setname(mgr->spool, "dispmgr_spool");
  1857. isc_mempool_setmaxalloc(mgr->spool, maxrequests);
  1858. isc_mempool_associatelock(mgr->spool, &mgr->pool_lock);
  1859. result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE);
  1860. if (result != ISC_R_SUCCESS)
  1861. goto cleanup;
  1862. mgr->buffersize = buffersize;
  1863. mgr->maxbuffers = maxbuffers;
  1864. UNLOCK(&mgr->buffer_lock);
  1865. return (ISC_R_SUCCESS);
  1866. cleanup:
  1867. isc_mempool_destroy(&mgr->bpool);
  1868. if (mgr->spool != NULL)
  1869. isc_mempool_destroy(&mgr->spool);
  1870. UNLOCK(&mgr->buffer_lock);
  1871. return (result);
  1872. }
  1873. void
  1874. dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
  1875. dns_dispatchmgr_t *mgr;
  1876. isc_boolean_t killit;
  1877. REQUIRE(mgrp != NULL);
  1878. REQUIRE(VALID_DISPATCHMGR(*mgrp));
  1879. mgr = *mgrp;
  1880. *mgrp = NULL;
  1881. LOCK(&mgr->lock);
  1882. mgr->state |= MGR_SHUTTINGDOWN;
  1883. killit = destroy_mgr_ok(mgr);
  1884. UNLOCK(&mgr->lock);
  1885. mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
  1886. if (killit)
  1887. destroy_mgr(&mgr);
  1888. }
  1889. void
  1890. dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) {
  1891. REQUIRE(VALID_DISPATCHMGR(mgr));
  1892. REQUIRE(ISC_LIST_EMPTY(mgr->list));
  1893. REQUIRE(mgr->stats == NULL);
  1894. isc_stats_attach(stats, &mgr->stats);
  1895. }
  1896. static int
  1897. port_cmp(const void *key, const void *ent) {
  1898. in_port_t p1 = *(const in_port_t *)key;
  1899. in_port_t p2 = *(const in_port_t *)ent;
  1900. if (p1 < p2)
  1901. return (-1);
  1902. else if (p1 == p2)
  1903. return (0);
  1904. else
  1905. return (1);
  1906. }
  1907. static isc_boolean_t
  1908. portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
  1909. isc_sockaddr_t *sockaddrp)
  1910. {
  1911. isc_sockaddr_t sockaddr;
  1912. isc_result_t result;
  1913. in_port_t *ports, port;
  1914. unsigned int nports;
  1915. isc_boolean_t available = ISC_FALSE;
  1916. REQUIRE(sock != NULL || sockaddrp != NULL);
  1917. PORTBUFLOCK(mgr);
  1918. if (sock != NULL) {
  1919. sockaddrp = &sockaddr;
  1920. result = isc_socket_getsockname(sock, sockaddrp);
  1921. if (result != ISC_R_SUCCESS)
  1922. goto unlock;
  1923. }
  1924. if (isc_sockaddr_pf(sockaddrp) == AF_INET) {
  1925. ports = mgr->v4ports;
  1926. nports = mgr->nv4ports;
  1927. } else {
  1928. ports = mgr->v6ports;
  1929. nports = mgr->nv6ports;
  1930. }
  1931. if (ports == NULL)
  1932. goto unlock;
  1933. port = isc_sockaddr_getport(sockaddrp);
  1934. if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL)
  1935. available = ISC_TRUE;
  1936. unlock:
  1937. PORTBUFUNLOCK(mgr);
  1938. return (available);
  1939. }
  1940. #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
  1941. static isc_boolean_t
  1942. local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
  1943. isc_sockaddr_t sockaddr;
  1944. isc_result_t result;
  1945. REQUIRE(disp->socket != NULL);
  1946. if (addr == NULL)
  1947. return (ISC_TRUE);
  1948. /*
  1949. * Don't match wildcard ports unless the port is available in the
  1950. * current configuration.
  1951. */
  1952. if (isc_sockaddr_getport(addr) == 0 &&
  1953. isc_sockaddr_getport(&disp->local) == 0 &&
  1954. !portavailable(disp->mgr, disp->socket, NULL)) {
  1955. return (ISC_FALSE);
  1956. }
  1957. /*
  1958. * Check if we match the binding <address,port>.
  1959. * Wildcard ports match/fail here.
  1960. */
  1961. if (isc_sockaddr_equal(&disp->local, addr))
  1962. return (ISC_TRUE);
  1963. if (isc_sockaddr_getport(addr) == 0)
  1964. return (ISC_FALSE);
  1965. /*
  1966. * Check if we match a bound wildcard port <address,port>.
  1967. */
  1968. if (!isc_sockaddr_eqaddr(&disp->local, addr))
  1969. return (ISC_FALSE);
  1970. result = isc_socket_getsockname(disp->socket, &sockaddr);
  1971. if (result != ISC_R_SUCCESS)
  1972. return (ISC_FALSE);
  1973. return (isc_sockaddr_equal(&sockaddr, addr));
  1974. }
  1975. /*
  1976. * Requires mgr be locked.
  1977. *
  1978. * No dispatcher can be locked by this thread when calling this function.
  1979. *
  1980. *
  1981. * NOTE:
  1982. * If a matching dispatcher is found, it is locked after this function
  1983. * returns, and must be unlocked by the caller.
  1984. */
  1985. static isc_result_t
  1986. dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
  1987. unsigned int attributes, unsigned int mask,
  1988. dns_dispatch_t **dispp)
  1989. {
  1990. dns_dispatch_t *disp;
  1991. isc_result_t result;
  1992. /*
  1993. * Make certain that we will not match a private or exclusive dispatch.
  1994. */
  1995. attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
  1996. mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE);
  1997. disp = ISC_LIST_HEAD(mgr->list);
  1998. while (disp != NULL) {
  1999. LOCK(&disp->lock);
  2000. if ((disp->shutting_down == 0)
  2001. && ATTRMATCH(disp->attributes, attributes, mask)
  2002. && local_addr_match(disp, local))
  2003. break;
  2004. UNLOCK(&disp->lock);
  2005. disp = ISC_LIST_NEXT(disp, link);
  2006. }
  2007. if (disp == NULL) {
  2008. result = ISC_R_NOTFOUND;
  2009. goto out;
  2010. }
  2011. *dispp = disp;
  2012. result = ISC_R_SUCCESS;
  2013. out:
  2014. return (result);
  2015. }
  2016. static isc_result_t
  2017. qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
  2018. unsigned int increment, dns_qid_t **qidp,
  2019. isc_boolean_t needsocktable)
  2020. {
  2021. dns_qid_t *qid;
  2022. unsigned int i;
  2023. isc_result_t result;
  2024. REQUIRE(VALID_DISPATCHMGR(mgr));
  2025. REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
  2026. REQUIRE(increment > buckets);
  2027. REQUIRE(qidp != NULL && *qidp == NULL);
  2028. qid = isc_mem_get(mgr->mctx, sizeof(*qid));
  2029. if (qid == NULL)
  2030. return (ISC_R_NOMEMORY);
  2031. qid->qid_table = isc_mem_get(mgr->mctx,
  2032. buckets * sizeof(dns_displist_t));
  2033. if (qid->qid_table == NULL) {
  2034. isc_mem_put(mgr->mctx, qid, sizeof(*qid));
  2035. return (ISC_R_NOMEMORY);
  2036. }
  2037. qid->sock_table = NULL;
  2038. if (needsocktable) {
  2039. qid->sock_table = isc_mem_get(mgr->mctx, buckets *
  2040. sizeof(dispsocketlist_t));
  2041. if (qid->sock_table == NULL) {
  2042. isc_mem_put(mgr->mctx, qid->qid_table,
  2043. buckets * sizeof(dns_displist_t));
  2044. isc_mem_put(mgr->mctx, qid, sizeof(*qid));
  2045. return (ISC_R_NOMEMORY);
  2046. }
  2047. }
  2048. result = isc_mutex_init(&qid->lock);
  2049. if (result != ISC_R_SUCCESS) {
  2050. if (qid->sock_table != NULL) {
  2051. isc_mem_put(mgr->mctx, qid->sock_table,
  2052. buckets * sizeof(dispsocketlist_t));
  2053. }
  2054. isc_mem_put(mgr->mctx, qid->qid_table,
  2055. buckets * sizeof(dns_displist_t));
  2056. isc_mem_put(mgr->mctx, qid, sizeof(*qid));
  2057. return (result);
  2058. }
  2059. for (i = 0; i < buckets; i++) {
  2060. ISC_LIST_INIT(qid->qid_table[i]);
  2061. if (qid->sock_table != NULL)
  2062. ISC_LIST_INIT(qid->sock_table[i]);
  2063. }
  2064. qid->qid_nbuckets = buckets;
  2065. qid->qid_increment = increment;
  2066. qid->magic = QID_MAGIC;
  2067. *qidp = qid;
  2068. return (ISC_R_SUCCESS);
  2069. }
  2070. static void
  2071. qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
  2072. dns_qid_t *qid;
  2073. REQUIRE(qidp != NULL);
  2074. qid = *qidp;
  2075. REQUIRE(VALID_QID(qid));
  2076. *qidp = NULL;
  2077. qid->magic = 0;
  2078. isc_mem_put(mctx, qid->qid_table,
  2079. qid->qid_nbuckets * sizeof(dns_displist_t));
  2080. if (qid->sock_table != NULL) {
  2081. isc_mem_put(mctx, qid->sock_table,
  2082. qid->qid_nbuckets * sizeof(dispsocketlist_t));
  2083. }
  2084. DESTROYLOCK(&qid->lock);
  2085. isc_mem_put(mctx, qid, sizeof(*qid));
  2086. }
  2087. /*
  2088. * Allocate and set important limits.
  2089. */
  2090. static isc_result_t
  2091. dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
  2092. dns_dispatch_t **dispp)
  2093. {
  2094. dns_dispatch_t *disp;
  2095. isc_result_t result;
  2096. REQUIRE(VALID_DISPATCHMGR(mgr));
  2097. REQUIRE(dispp != NULL && *dispp == NULL);
  2098. /*
  2099. * Set up the dispatcher, mostly. Don't bother setting some of
  2100. * the options that are controlled by tcp vs. udp, etc.
  2101. */
  2102. disp = isc_mempool_get(mgr->dpool);
  2103. if (disp == NULL)
  2104. return (ISC_R_NOMEMORY);
  2105. disp->magic = 0;
  2106. disp->mgr = mgr;
  2107. disp->maxrequests = maxrequests;
  2108. disp->attributes = 0;
  2109. ISC_LINK_INIT(disp, link);
  2110. disp->refcount = 1;
  2111. disp->recv_pending = 0;
  2112. memset(&disp->local, 0, sizeof(disp->local));
  2113. disp->localport = 0;
  2114. disp->shutting_down = 0;
  2115. disp->shutdown_out = 0;
  2116. disp->connected = 0;
  2117. disp->tcpmsg_valid = 0;
  2118. disp->shutdown_why = ISC_R_UNEXPECTED;
  2119. disp->requests = 0;
  2120. disp->tcpbuffers = 0;
  2121. disp->qid = NULL;
  2122. ISC_LIST_INIT(disp->activesockets);
  2123. ISC_LIST_INIT(disp->inactivesockets);
  2124. disp->nsockets = 0;
  2125. dispatch_initrandom(&disp->arc4ctx, mgr->entropy, NULL);
  2126. disp->port_table = NULL;
  2127. disp->portpool = NULL;
  2128. result = isc_mutex_init(&disp->lock);
  2129. if (result != ISC_R_SUCCESS)
  2130. goto deallocate;
  2131. disp->failsafe_ev = allocate_event(disp);
  2132. if (disp->failsafe_ev == NULL) {
  2133. result = ISC_R_NOMEMORY;
  2134. goto kill_lock;
  2135. }
  2136. disp->magic = DISPATCH_MAGIC;
  2137. *dispp = disp;
  2138. return (ISC_R_SUCCESS);
  2139. /*
  2140. * error returns
  2141. */
  2142. kill_lock:
  2143. DESTROYLOCK(&disp->lock);
  2144. deallocate:
  2145. isc_mempool_put(mgr->dpool, disp);
  2146. return (result);
  2147. }
  2148. /*
  2149. * MUST be unlocked, and not used by anything.
  2150. */
  2151. static void
  2152. dispatch_free(dns_dispatch_t **dispp)
  2153. {
  2154. dns_dispatch_t *disp;
  2155. dns_dispatchmgr_t *mgr;
  2156. int i;
  2157. REQUIRE(VALID_DISPATCH(*dispp));
  2158. disp = *dispp;
  2159. *dispp = NULL;
  2160. mgr = disp->mgr;
  2161. REQUIRE(VALID_DISPATCHMGR(mgr));
  2162. if (disp->tcpmsg_valid) {
  2163. dns_tcpmsg_invalidate(&disp->tcpmsg);
  2164. disp->tcpmsg_valid = 0;
  2165. }
  2166. INSIST(disp->tcpbuffers == 0);
  2167. INSIST(disp->requests == 0);
  2168. INSIST(disp->recv_pending == 0);
  2169. INSIST(ISC_LIST_EMPTY(disp->activesockets));
  2170. INSIST(ISC_LIST_EMPTY(disp->inactivesockets));
  2171. isc_mempool_put(mgr->epool, disp->failsafe_ev);
  2172. disp->failsafe_ev = NULL;
  2173. if (disp->qid != NULL)
  2174. qid_destroy(mgr->mctx, &disp->qid);
  2175. if (disp->port_table != NULL) {
  2176. for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
  2177. INSIST(ISC_LIST_EMPTY(disp->port_table[i]));
  2178. isc_mem_put(mgr->mctx, disp->port_table,
  2179. sizeof(disp->port_table[0]) *
  2180. DNS_DISPATCH_PORTTABLESIZE);
  2181. }
  2182. if (disp->portpool != NULL)
  2183. isc_mempool_destroy(&disp->portpool);
  2184. disp->mgr = NULL;
  2185. DESTROYLOCK(&disp->lock);
  2186. disp->magic = 0;
  2187. isc_mempool_put(mgr->dpool, disp);
  2188. }
  2189. isc_result_t
  2190. dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
  2191. isc_taskmgr_t *taskmgr, unsigned int buffersize,
  2192. unsigned int maxbuffers, unsigned int maxrequests,
  2193. unsigned int buckets, unsigned int increment,
  2194. unsigned int attributes, dns_dispatch_t **dispp)
  2195. {
  2196. isc_result_t result;
  2197. dns_dispatch_t *disp;
  2198. UNUSED(maxbuffers);
  2199. UNUSED(buffersize);
  2200. REQUIRE(VALID_DISPATCHMGR(mgr));
  2201. REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
  2202. REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
  2203. REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
  2204. attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
  2205. LOCK(&mgr->lock);
  2206. /*
  2207. * dispatch_allocate() checks mgr for us.
  2208. * qid_allocate() checks buckets and increment for us.
  2209. */
  2210. disp = NULL;
  2211. result = dispatch_allocate(mgr, maxrequests, &disp);
  2212. if (result != ISC_R_SUCCESS) {
  2213. UNLOCK(&mgr->lock);
  2214. return (result);
  2215. }
  2216. result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE);
  2217. if (result != ISC_R_SUCCESS)
  2218. goto deallocate_dispatch;
  2219. disp->socktype = isc_sockettype_tcp;
  2220. disp->socket = NULL;
  2221. isc_socket_attach(sock, &disp->socket);
  2222. disp->ntasks = 1;
  2223. disp->task[0] = NULL;
  2224. result = isc_task_create(taskmgr, 0, &disp->task[0]);
  2225. if (result != ISC_R_SUCCESS)
  2226. goto kill_socket;
  2227. disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
  2228. DNS_EVENT_DISPATCHCONTROL,
  2229. destroy_disp, disp,
  2230. sizeof(isc_event_t));
  2231. if (disp->ctlevent == NULL) {
  2232. result = ISC_R_NOMEMORY;
  2233. goto kill_task;
  2234. }
  2235. isc_task_setname(disp->task[0], "tcpdispatch", disp);
  2236. dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
  2237. disp->tcpmsg_valid = 1;
  2238. disp->attributes = attributes;
  2239. /*
  2240. * Append it to the dispatcher list.
  2241. */
  2242. ISC_LIST_APPEND(mgr->list, disp, link);
  2243. UNLOCK(&mgr->lock);
  2244. mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
  2245. dispatch_log(disp, LVL(90), "created task %p", disp->task[0]);
  2246. *dispp = disp;
  2247. return (ISC_R_SUCCESS);
  2248. /*
  2249. * Error returns.
  2250. */
  2251. kill_task:
  2252. isc_task_detach(&disp->task[0]);
  2253. kill_socket:
  2254. isc_socket_detach(&disp->socket);
  2255. deallocate_dispatch:
  2256. dispatch_free(&disp);
  2257. UNLOCK(&mgr->lock);
  2258. return (result);
  2259. }
  2260. isc_result_t
  2261. dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
  2262. isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
  2263. unsigned int buffersize,
  2264. unsigned int maxbuffers, unsigned int maxrequests,
  2265. unsigned int buckets, unsigned int increment,
  2266. unsigned int attributes, unsigned int mask,
  2267. dns_dispatch_t **dispp)
  2268. {
  2269. isc_result_t result;
  2270. dns_dispatch_t *disp = NULL;
  2271. REQUIRE(VALID_DISPATCHMGR(mgr));
  2272. REQUIRE(sockmgr != NULL);
  2273. REQUIRE(localaddr != NULL);
  2274. REQUIRE(taskmgr != NULL);
  2275. REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
  2276. REQUIRE(maxbuffers > 0);
  2277. REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
  2278. REQUIRE(increment > buckets);
  2279. REQUIRE(dispp != NULL && *dispp == NULL);
  2280. REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
  2281. result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
  2282. maxrequests, buckets, increment);
  2283. if (result != ISC_R_SUCCESS)
  2284. return (result);
  2285. LOCK(&mgr->lock);
  2286. if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
  2287. REQUIRE(isc_sockaddr_getport(localaddr) == 0);
  2288. goto createudp;
  2289. }
  2290. /*
  2291. * See if we have a dispatcher that matches.
  2292. */
  2293. result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
  2294. if (result == ISC_R_SUCCESS) {
  2295. disp->refcount++;
  2296. if (disp->maxrequests < maxrequests)
  2297. disp->maxrequests = maxrequests;
  2298. if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
  2299. (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
  2300. {
  2301. disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
  2302. if (disp->recv_pending != 0)
  2303. isc_socket_cancel(disp->socket, disp->task[0],
  2304. ISC_SOCKCANCEL_RECV);
  2305. }
  2306. UNLOCK(&disp->lock);
  2307. UNLOCK(&mgr->lock);
  2308. *dispp = disp;
  2309. return (ISC_R_SUCCESS);
  2310. }
  2311. createudp:
  2312. /*
  2313. * Nope, create one.
  2314. */
  2315. result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
  2316. maxrequests, attributes, &disp);
  2317. if (result != ISC_R_SUCCESS) {
  2318. UNLOCK(&mgr->lock);
  2319. return (result);
  2320. }
  2321. UNLOCK(&mgr->lock);
  2322. *dispp = disp;
  2323. return (ISC_R_SUCCESS);
  2324. }
  2325. /*
  2326. * mgr should be locked.
  2327. */
  2328. #ifndef DNS_DISPATCH_HELD
  2329. #define DNS_DISPATCH_HELD 20U
  2330. #endif
  2331. static isc_result_t
  2332. get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp,
  2333. isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr,
  2334. isc_socket_t **sockp)
  2335. {
  2336. unsigned int i, j;
  2337. isc_socket_t *held[DNS_DISPATCH_HELD];
  2338. isc_sockaddr_t localaddr_bound;
  2339. isc_socket_t *sock = NULL;
  2340. isc_result_t result = ISC_R_SUCCESS;
  2341. isc_boolean_t anyport;
  2342. INSIST(sockp != NULL && *sockp == NULL);
  2343. localaddr_bound = *localaddr;
  2344. anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0);
  2345. if (anyport) {
  2346. unsigned int nports;
  2347. in_port_t *ports;
  2348. /*
  2349. * If no port is specified, we first try to pick up a random
  2350. * port by ourselves.
  2351. */
  2352. if (isc_sockaddr_pf(&disp->local) == AF_INET) {
  2353. nports = disp->mgr->nv4ports;
  2354. ports = disp->mgr->v4ports;
  2355. } else {
  2356. nports = disp->mgr->nv6ports;
  2357. ports = disp->mgr->v6ports;
  2358. }
  2359. if (nports == 0)
  2360. return (ISC_R_ADDRNOTAVAIL);
  2361. for (i = 0; i < 1024; i++) {
  2362. in_port_t prt;
  2363. prt = ports[dispatch_uniformrandom(
  2364. DISP_ARC4CTX(disp),
  2365. nports)];
  2366. isc_sockaddr_setport(&localaddr_bound, prt);
  2367. result = open_socket(sockmgr, &localaddr_bound,
  2368. 0, &sock);
  2369. if (result == ISC_R_SUCCESS ||
  2370. result != ISC_R_ADDRINUSE) {
  2371. disp->localport = prt;
  2372. *sockp = sock;
  2373. return (result);
  2374. }
  2375. }
  2376. /*
  2377. * If this fails 1024 times, we then ask the kernel for
  2378. * choosing one.
  2379. */
  2380. } else {
  2381. /* Allow to reuse address for non-random ports. */
  2382. result = open_socket(sockmgr, localaddr,
  2383. ISC_SOCKET_REUSEADDRESS, &sock);
  2384. if (result == ISC_R_SUCCESS)
  2385. *sockp = sock;
  2386. return (result);
  2387. }
  2388. memset(held, 0, sizeof(held));
  2389. i = 0;
  2390. for (j = 0; j < 0xffffU; j++) {
  2391. result = open_socket(sockmgr, localaddr, 0, &sock);
  2392. if (result != ISC_R_SUCCESS)
  2393. goto end;
  2394. else if (!anyport)
  2395. break;
  2396. else if (portavailable(mgr, sock, NULL))
  2397. break;
  2398. if (held[i] != NULL)
  2399. isc_socket_detach(&held[i]);
  2400. held[i++] = sock;
  2401. sock = NULL;
  2402. if (i == DNS_DISPATCH_HELD)
  2403. i = 0;
  2404. }
  2405. if (j == 0xffffU) {
  2406. mgr_log(mgr, ISC_LOG_ERROR,
  2407. "avoid-v%s-udp-ports: unable to allocate "
  2408. "an available port",
  2409. isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6");
  2410. result = ISC_R_FAILURE;
  2411. goto end;
  2412. }
  2413. *sockp = sock;
  2414. end:
  2415. for (i = 0; i < DNS_DISPATCH_HELD; i++) {
  2416. if (held[i] != NULL)
  2417. isc_socket_detach(&held[i]);
  2418. }
  2419. return (result);
  2420. }
  2421. static isc_result_t
  2422. dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
  2423. isc_taskmgr_t *taskmgr,
  2424. isc_sockaddr_t *localaddr,
  2425. unsigned int maxrequests,
  2426. unsigned int attributes,
  2427. dns_dispatch_t **dispp)
  2428. {
  2429. isc_result_t result;
  2430. dns_dispatch_t *disp;
  2431. isc_socket_t *sock = NULL;
  2432. int i = 0;
  2433. /*
  2434. * dispatch_allocate() checks mgr for us.
  2435. */
  2436. disp = NULL;
  2437. result = dispatch_allocate(mgr, maxrequests, &disp);
  2438. if (result != ISC_R_SUCCESS)
  2439. return (result);
  2440. if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) {
  2441. result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock);
  2442. if (result != ISC_R_SUCCESS)
  2443. goto deallocate_dispatch;
  2444. } else {
  2445. isc_sockaddr_t sa_any;
  2446. /*
  2447. * For dispatches using exclusive sockets with a specific
  2448. * source address, we only check if the specified address is
  2449. * available on the system. Query sockets will be created later
  2450. * on demand.
  2451. */
  2452. isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr));
  2453. if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) {
  2454. result = open_socket(sockmgr, localaddr, 0, &sock);
  2455. if (sock != NULL)
  2456. isc_socket_detach(&sock);
  2457. if (result != ISC_R_SUCCESS)
  2458. goto deallocate_dispatch;
  2459. }
  2460. disp->port_table = isc_mem_get(mgr->mctx,
  2461. sizeof(disp->port_table[0]) *
  2462. DNS_DISPATCH_PORTTABLESIZE);
  2463. if (disp->port_table == NULL)
  2464. goto deallocate_dispatch;
  2465. for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++)
  2466. ISC_LIST_INIT(disp->port_table[i]);
  2467. result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t),
  2468. &disp->portpool);
  2469. if (result != ISC_R_SUCCESS)
  2470. goto deallocate_dispatch;
  2471. isc_mempool_setname(disp->portpool, "disp_portpool");
  2472. isc_mempool_setfreemax(disp->portpool, 128);
  2473. }
  2474. disp->socktype = isc_sockettype_udp;
  2475. disp->socket = sock;
  2476. disp->local = *localaddr;
  2477. if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
  2478. disp->ntasks = MAX_INTERNAL_TASKS;
  2479. else
  2480. disp->ntasks = 1;
  2481. for (i = 0; i < disp->ntasks; i++) {
  2482. disp->task[i] = NULL;
  2483. result = isc_task_create(taskmgr, 0, &disp->task[i]);
  2484. if (result != ISC_R_SUCCESS) {
  2485. while (--i >= 0) {
  2486. isc_task_shutdown(disp->task[i]);
  2487. isc_task_detach(&disp->task[i]);
  2488. }
  2489. goto kill_socket;
  2490. }
  2491. isc_task_setname(disp->task[i], "udpdispatch", disp);
  2492. }
  2493. disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
  2494. DNS_EVENT_DISPATCHCONTROL,
  2495. destroy_disp, disp,
  2496. sizeof(isc_event_t));
  2497. if (disp->ctlevent == NULL) {
  2498. result = ISC_R_NOMEMORY;
  2499. goto kill_task;
  2500. }
  2501. attributes &= ~DNS_DISPATCHATTR_TCP;
  2502. attributes |= DNS_DISPATCHATTR_UDP;
  2503. disp->attributes = attributes;
  2504. /*
  2505. * Append it to the dispatcher list.
  2506. */
  2507. ISC_LIST_APPEND(mgr->list, disp, link);
  2508. mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
  2509. dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */
  2510. if (disp->socket != NULL)
  2511. dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
  2512. *dispp = disp;
  2513. return (result);
  2514. /*
  2515. * Error returns.
  2516. */
  2517. kill_task:
  2518. for (i = 0; i < disp->ntasks; i++)
  2519. isc_task_detach(&disp->task[i]);
  2520. kill_socket:
  2521. if (disp->socket != NULL)
  2522. isc_socket_detach(&disp->socket);
  2523. deallocate_dispatch:
  2524. dispatch_free(&disp);
  2525. return (result);
  2526. }
  2527. void
  2528. dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
  2529. REQUIRE(VALID_DISPATCH(disp));
  2530. REQUIRE(dispp != NULL && *dispp == NULL);
  2531. LOCK(&disp->lock);
  2532. disp->refcount++;
  2533. UNLOCK(&disp->lock);
  2534. *dispp = disp;
  2535. }
  2536. /*
  2537. * It is important to lock the manager while we are deleting the dispatch,
  2538. * since dns_dispatch_getudp will call dispatch_find, which returns to
  2539. * the caller a dispatch but does not attach to it until later. _getudp
  2540. * locks the manager, however, so locking it here will keep us from attaching
  2541. * to a dispatcher that is in the process of going away.
  2542. */
  2543. void
  2544. dns_dispatch_detach(dns_dispatch_t **dispp) {
  2545. dns_dispatch_t *disp;
  2546. dispsocket_t *dispsock;
  2547. isc_boolean_t killit;
  2548. REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
  2549. disp = *dispp;
  2550. *dispp = NULL;
  2551. LOCK(&disp->lock);
  2552. INSIST(disp->refcount > 0);
  2553. disp->refcount--;
  2554. if (disp->refcount == 0) {
  2555. if (disp->recv_pending > 0)
  2556. isc_socket_cancel(disp->socket, disp->task[0],
  2557. ISC_SOCKCANCEL_RECV);
  2558. for (dispsock = ISC_LIST_HEAD(disp->activesockets);
  2559. dispsock != NULL;
  2560. dispsock = ISC_LIST_NEXT(dispsock, link)) {
  2561. isc_socket_cancel(dispsock->socket, dispsock->task,
  2562. ISC_SOCKCANCEL_RECV);
  2563. }
  2564. disp->shutting_down = 1;
  2565. }
  2566. dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
  2567. killit = destroy_disp_ok(disp);
  2568. UNLOCK(&disp->lock);
  2569. if (killit)
  2570. isc_task_send(disp->task[0], &disp->ctlevent);
  2571. }
  2572. isc_result_t
  2573. dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest,
  2574. isc_task_t *task, isc_taskaction_t action, void *arg,
  2575. dns_messageid_t *idp, dns_dispentry_t **resp,
  2576. isc_socketmgr_t *sockmgr)
  2577. {
  2578. dns_dispentry_t *res;
  2579. unsigned int bucket;
  2580. in_port_t localport = 0;
  2581. dns_messageid_t id;
  2582. int i;
  2583. isc_boolean_t ok;
  2584. dns_qid_t *qid;
  2585. dispsocket_t *dispsocket = NULL;
  2586. isc_result_t result;
  2587. REQUIRE(VALID_DISPATCH(disp));
  2588. REQUIRE(task != NULL);
  2589. REQUIRE(dest != NULL);
  2590. REQUIRE(resp != NULL && *resp == NULL);
  2591. REQUIRE(idp != NULL);
  2592. if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
  2593. REQUIRE(sockmgr != NULL);
  2594. LOCK(&disp->lock);
  2595. if (disp->shutting_down == 1) {
  2596. UNLOCK(&disp->lock);
  2597. return (ISC_R_SHUTTINGDOWN);
  2598. }
  2599. if (disp->requests >= disp->maxrequests) {
  2600. UNLOCK(&disp->lock);
  2601. return (ISC_R_QUOTA);
  2602. }
  2603. if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
  2604. disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) {
  2605. dispsocket_t *oldestsocket;
  2606. dns_dispentry_t *oldestresp;
  2607. dns_dispatchevent_t *rev;
  2608. /*
  2609. * Kill oldest outstanding query if the number of sockets
  2610. * exceeds the quota to keep the room for new queries.
  2611. */
  2612. oldestsocket = ISC_LIST_HEAD(disp->activesockets);
  2613. oldestresp = oldestsocket->resp;
  2614. if (oldestresp != NULL && !oldestresp->item_out) {
  2615. rev = allocate_event(oldestresp->disp);
  2616. if (rev != NULL) {
  2617. rev->buffer.base = NULL;
  2618. rev->result = ISC_R_CANCELED;
  2619. rev->id = oldestresp->id;
  2620. ISC_EVENT_INIT(rev, sizeof(*rev), 0,
  2621. NULL, DNS_EVENT_DISPATCH,
  2622. oldestresp->action,
  2623. oldestresp->arg, oldestresp,
  2624. NULL, NULL);
  2625. oldestresp->item_out = ISC_TRUE;
  2626. isc_task_send(oldestresp->task,
  2627. ISC_EVENT_PTR(&rev));
  2628. inc_stats(disp->mgr,
  2629. dns_resstatscounter_dispabort);
  2630. }
  2631. }
  2632. /*
  2633. * Move this entry to the tail so that it won't (easily) be
  2634. * examined before actually being canceled.
  2635. */
  2636. ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link);
  2637. ISC_LIST_APPEND(disp->activesockets, oldestsocket, link);
  2638. }
  2639. qid = DNS_QID(disp);
  2640. LOCK(&qid->lock);
  2641. if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
  2642. /*
  2643. * Get a separate UDP socket with a random port number.
  2644. */
  2645. result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket,
  2646. &localport);
  2647. if (result != ISC_R_SUCCESS) {
  2648. UNLOCK(&qid->lock);
  2649. UNLOCK(&disp->lock);
  2650. inc_stats(disp->mgr, dns_resstatscounter_dispsockfail);
  2651. return (result);
  2652. }
  2653. } else {
  2654. localport = disp->localport;
  2655. }
  2656. /*
  2657. * Try somewhat hard to find an unique ID.
  2658. */
  2659. id = (dns_messageid_t)dispatch_random(DISP_ARC4CTX(disp));
  2660. bucket = dns_hash(qid, dest, id, localport);
  2661. ok = ISC_FALSE;
  2662. for (i = 0; i < 64; i++) {
  2663. if (entry_search(qid, dest, id, localport, bucket) == NULL) {
  2664. ok = ISC_TRUE;
  2665. break;
  2666. }
  2667. id += qid->qid_increment;
  2668. id &= 0x0000ffff;
  2669. bucket = dns_hash(qid, dest, id, localport);
  2670. }
  2671. if (!ok) {
  2672. UNLOCK(&qid->lock);
  2673. UNLOCK(&disp->lock);
  2674. return (ISC_R_NOMORE);
  2675. }
  2676. res = isc_mempool_get(disp->mgr->rpool);
  2677. if (res == NULL) {
  2678. UNLOCK(&qid->lock);
  2679. UNLOCK(&disp->lock);
  2680. if (dispsocket != NULL)
  2681. destroy_dispsocket(disp, &dispsocket);
  2682. return (ISC_R_NOMEMORY);
  2683. }
  2684. disp->refcount++;
  2685. disp->requests++;
  2686. res->task = NULL;
  2687. isc_task_attach(task, &res->task);
  2688. res->disp = disp;
  2689. res->id = id;
  2690. res->port = localport;
  2691. res->bucket = bucket;
  2692. res->host = *dest;
  2693. res->action = action;
  2694. res->arg = arg;
  2695. res->dispsocket = dispsocket;
  2696. if (dispsocket != NULL)
  2697. dispsocket->resp = res;
  2698. res->item_out = ISC_FALSE;
  2699. ISC_LIST_INIT(res->items);
  2700. ISC_LINK_INIT(res, link);
  2701. res->magic = RESPONSE_MAGIC;
  2702. ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
  2703. UNLOCK(&qid->lock);
  2704. request_log(disp, res, LVL(90),
  2705. "attached to task %p", res->task);
  2706. if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
  2707. ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) {
  2708. result = startrecv(disp, dispsocket);
  2709. if (result != ISC_R_SUCCESS) {
  2710. LOCK(&qid->lock);
  2711. ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
  2712. UNLOCK(&qid->lock);
  2713. if (dispsocket != NULL)
  2714. destroy_dispsocket(disp, &dispsocket);
  2715. disp->refcount--;
  2716. disp->requests--;
  2717. UNLOCK(&disp->lock);
  2718. isc_task_detach(&res->task);
  2719. isc_mempool_put(disp->mgr->rpool, res);
  2720. return (result);
  2721. }
  2722. }
  2723. if (dispsocket != NULL)
  2724. ISC_LIST_APPEND(disp->activesockets, dispsocket, link);
  2725. UNLOCK(&disp->lock);
  2726. *idp = id;
  2727. *resp = res;
  2728. if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0)
  2729. INSIST(res->dispsocket != NULL);
  2730. return (ISC_R_SUCCESS);
  2731. }
  2732. isc_result_t
  2733. dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
  2734. isc_task_t *task, isc_taskaction_t action, void *arg,
  2735. dns_messageid_t *idp, dns_dispentry_t **resp)
  2736. {
  2737. REQUIRE(VALID_DISPATCH(disp));
  2738. REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
  2739. return (dns_dispatch_addresponse2(disp, dest, task, action, arg,
  2740. idp, resp, NULL));
  2741. }
  2742. void
  2743. dns_dispatch_starttcp(dns_dispatch_t *disp) {
  2744. REQUIRE(VALID_DISPATCH(disp));
  2745. dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]);
  2746. LOCK(&disp->lock);
  2747. disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
  2748. (void)startrecv(disp, NULL);
  2749. UNLOCK(&disp->lock);
  2750. }
  2751. void
  2752. dns_dispatch_removeresponse(dns_dispentry_t **resp,
  2753. dns_dispatchevent_t **sockevent)
  2754. {
  2755. dns_dispatchmgr_t *mgr;
  2756. dns_dispatch_t *disp;
  2757. dns_dispentry_t *res;
  2758. dispsocket_t *dispsock;
  2759. dns_dispatchevent_t *ev;
  2760. unsigned int bucket;
  2761. isc_boolean_t killit;
  2762. unsigned int n;
  2763. isc_eventlist_t events;
  2764. dns_qid_t *qid;
  2765. REQUIRE(resp != NULL);
  2766. REQUIRE(VALID_RESPONSE(*resp));
  2767. res = *resp;
  2768. *resp = NULL;
  2769. disp = res->disp;
  2770. REQUIRE(VALID_DISPATCH(disp));
  2771. mgr = disp->mgr;
  2772. REQUIRE(VALID_DISPATCHMGR(mgr));
  2773. qid = DNS_QID(disp);
  2774. if (sockevent != NULL) {
  2775. REQUIRE(*sockevent != NULL);
  2776. ev = *sockevent;
  2777. *sockevent = NULL;
  2778. } else {
  2779. ev = NULL;
  2780. }
  2781. LOCK(&disp->lock);
  2782. INSIST(disp->requests > 0);
  2783. disp->requests--;
  2784. INSIST(disp->refcount > 0);
  2785. disp->refcount--;
  2786. if (disp->refcount == 0) {
  2787. if (disp->recv_pending > 0)
  2788. isc_socket_cancel(disp->socket, disp->task[0],
  2789. ISC_SOCKCANCEL_RECV);
  2790. for (dispsock = ISC_LIST_HEAD(disp->activesockets);
  2791. dispsock != NULL;
  2792. dispsock = ISC_LIST_NEXT(dispsock, link)) {
  2793. isc_socket_cancel(dispsock->socket, dispsock->task,
  2794. ISC_SOCKCANCEL_RECV);
  2795. }
  2796. disp->shutting_down = 1;
  2797. }
  2798. bucket = res->bucket;
  2799. LOCK(&qid->lock);
  2800. ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
  2801. UNLOCK(&qid->lock);
  2802. if (ev == NULL && res->item_out) {
  2803. /*
  2804. * We've posted our event, but the caller hasn't gotten it
  2805. * yet. Take it back.
  2806. */
  2807. ISC_LIST_INIT(events);
  2808. n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
  2809. NULL, &events);
  2810. /*
  2811. * We had better have gotten it back.
  2812. */
  2813. INSIST(n == 1);
  2814. ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
  2815. }
  2816. if (ev != NULL) {
  2817. REQUIRE(res->item_out == ISC_TRUE);
  2818. res->item_out = ISC_FALSE;
  2819. if (ev->buffer.base != NULL)
  2820. free_buffer(disp, ev->buffer.base, ev->buffer.length);
  2821. free_event(disp, ev);
  2822. }
  2823. request_log(disp, res, LVL(90), "detaching from task %p", res->task);
  2824. isc_task_detach(&res->task);
  2825. if (res->dispsocket != NULL) {
  2826. isc_socket_cancel(res->dispsocket->socket,
  2827. res->dispsocket->task, ISC_SOCKCANCEL_RECV);
  2828. res->dispsocket->resp = NULL;
  2829. }
  2830. /*
  2831. * Free any buffered requests as well
  2832. */
  2833. ev = ISC_LIST_HEAD(res->items);
  2834. while (ev != NULL) {
  2835. ISC_LIST_UNLINK(res->items, ev, ev_link);
  2836. if (ev->buffer.base != NULL)
  2837. free_buffer(disp, ev->buffer.base, ev->buffer.length);
  2838. free_event(disp, ev);
  2839. ev = ISC_LIST_HEAD(res->items);
  2840. }
  2841. res->magic = 0;
  2842. isc_mempool_put(disp->mgr->rpool, res);
  2843. if (disp->shutting_down == 1)
  2844. do_cancel(disp);
  2845. else
  2846. (void)startrecv(disp, NULL);
  2847. killit = destroy_disp_ok(disp);
  2848. UNLOCK(&disp->lock);
  2849. if (killit)
  2850. isc_task_send(disp->task[0], &disp->ctlevent);
  2851. }
  2852. static void
  2853. do_cancel(dns_dispatch_t *disp) {
  2854. dns_dispatchevent_t *ev;
  2855. dns_dispentry_t *resp;
  2856. dns_qid_t *qid;
  2857. if (disp->shutdown_out == 1)
  2858. return;
  2859. qid = DNS_QID(disp);
  2860. /*
  2861. * Search for the first response handler without packets outstanding
  2862. * unless a specific hander is given.
  2863. */
  2864. LOCK(&qid->lock);
  2865. for (resp = linear_first(qid);
  2866. resp != NULL && resp->item_out;
  2867. /* Empty. */)
  2868. resp = linear_next(qid, resp);
  2869. /*
  2870. * No one to send the cancel event to, so nothing to do.
  2871. */
  2872. if (resp == NULL)
  2873. goto unlock;
  2874. /*
  2875. * Send the shutdown failsafe event to this resp.
  2876. */
  2877. ev = disp->failsafe_ev;
  2878. ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
  2879. resp->action, resp->arg, resp, NULL, NULL);
  2880. ev->result = disp->shutdown_why;
  2881. ev->buffer.base = NULL;
  2882. ev->buffer.length = 0;
  2883. disp->shutdown_out = 1;
  2884. request_log(disp, resp, LVL(10),
  2885. "cancel: failsafe event %p -> task %p",
  2886. ev, resp->task);
  2887. resp->item_out = ISC_TRUE;
  2888. isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
  2889. unlock:
  2890. UNLOCK(&qid->lock);
  2891. }
  2892. isc_socket_t *
  2893. dns_dispatch_getsocket(dns_dispatch_t *disp) {
  2894. REQUIRE(VALID_DISPATCH(disp));
  2895. return (disp->socket);
  2896. }
  2897. isc_socket_t *
  2898. dns_dispatch_getentrysocket(dns_dispentry_t *resp) {
  2899. REQUIRE(VALID_RESPONSE(resp));
  2900. if (resp->dispsocket != NULL)
  2901. return (resp->dispsocket->socket);
  2902. else
  2903. return (NULL);
  2904. }
  2905. isc_result_t
  2906. dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
  2907. REQUIRE(VALID_DISPATCH(disp));
  2908. REQUIRE(addrp != NULL);
  2909. if (disp->socktype == isc_sockettype_udp) {
  2910. *addrp = disp->local;
  2911. return (ISC_R_SUCCESS);
  2912. }
  2913. return (ISC_R_NOTIMPLEMENTED);
  2914. }
  2915. void
  2916. dns_dispatch_cancel(dns_dispatch_t *disp) {
  2917. REQUIRE(VALID_DISPATCH(disp));
  2918. LOCK(&disp->lock);
  2919. if (disp->shutting_down == 1) {
  2920. UNLOCK(&disp->lock);
  2921. return;
  2922. }
  2923. disp->shutdown_why = ISC_R_CANCELED;
  2924. disp->shutting_down = 1;
  2925. do_cancel(disp);
  2926. UNLOCK(&disp->lock);
  2927. return;
  2928. }
  2929. unsigned int
  2930. dns_dispatch_getattributes(dns_dispatch_t *disp) {
  2931. REQUIRE(VALID_DISPATCH(disp));
  2932. /*
  2933. * We don't bother locking disp here; it's the caller's responsibility
  2934. * to use only non volatile flags.
  2935. */
  2936. return (disp->attributes);
  2937. }
  2938. void
  2939. dns_dispatch_changeattributes(dns_dispatch_t *disp,
  2940. unsigned int attributes, unsigned int mask)
  2941. {
  2942. REQUIRE(VALID_DISPATCH(disp));
  2943. /* Exclusive attribute can only be set on creation */
  2944. REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0);
  2945. /* Also, a dispatch with randomport specified cannot start listening */
  2946. REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 ||
  2947. (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0);
  2948. /* XXXMLG
  2949. * Should check for valid attributes here!
  2950. */
  2951. LOCK(&disp->lock);
  2952. if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
  2953. if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
  2954. (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
  2955. disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
  2956. (void)startrecv(disp, NULL);
  2957. } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
  2958. == 0 &&
  2959. (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
  2960. disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
  2961. if (disp->recv_pending != 0)
  2962. isc_socket_cancel(disp->socket, disp->task[0],
  2963. ISC_SOCKCANCEL_RECV);
  2964. }
  2965. }
  2966. disp->attributes &= ~mask;
  2967. disp->attributes |= (attributes & mask);
  2968. UNLOCK(&disp->lock);
  2969. }
  2970. void
  2971. dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
  2972. void *buf;
  2973. isc_socketevent_t *sevent, *newsevent;
  2974. REQUIRE(VALID_DISPATCH(disp));
  2975. REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
  2976. REQUIRE(event != NULL);
  2977. sevent = (isc_socketevent_t *)event;
  2978. INSIST(sevent->n <= disp->mgr->buffersize);
  2979. newsevent = (isc_socketevent_t *)
  2980. isc_event_allocate(disp->mgr->mctx, NULL,
  2981. DNS_EVENT_IMPORTRECVDONE, udp_shrecv,
  2982. disp, sizeof(isc_socketevent_t));
  2983. if (newsevent == NULL)
  2984. return;
  2985. buf = allocate_udp_buffer(disp);
  2986. if (buf == NULL) {
  2987. isc_event_free(ISC_EVENT_PTR(&newsevent));
  2988. return;
  2989. }
  2990. memcpy(buf, sevent->region.base, sevent->n);
  2991. newsevent->region.base = buf;
  2992. newsevent->region.length = disp->mgr->buffersize;
  2993. newsevent->n = sevent->n;
  2994. newsevent->result = sevent->result;
  2995. newsevent->address = sevent->address;
  2996. newsevent->timestamp = sevent->timestamp;
  2997. newsevent->pktinfo = sevent->pktinfo;
  2998. newsevent->attributes = sevent->attributes;
  2999. isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent));
  3000. }
  3001. #if 0
  3002. void
  3003. dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
  3004. dns_dispatch_t *disp;
  3005. char foo[1024];
  3006. disp = ISC_LIST_HEAD(mgr->list);
  3007. while (disp != NULL) {
  3008. isc_sockaddr_format(&disp->local, foo, sizeof(foo));
  3009. printf("\tdispatch %p, addr %s\n", disp, foo);
  3010. disp = ISC_LIST_NEXT(disp, link);
  3011. }
  3012. }
  3013. #endif