/contrib/bind9/lib/dns/dispatch.c

https://bitbucket.org/freebsd/freebsd-head/ · C · 3533 lines · 2507 code · 520 blank · 506 comment · 657 complexity · 5edf881a04de59334c968878bc4474a1 MD5 · raw file

Large files are truncated click here to view the full file

  1. /*
  2. * Copyright (C) 2004-2009, 2011, 2012 Internet Systems Consortium, Inc. ("ISC")
  3. * Copyright (C) 1999-2003 Internet Software Consortium.
  4. *
  5. * Permission to use, copy, modify, and/or distribute this software for any
  6. * purpose with or without fee is hereby granted, provided that the above
  7. * copyright notice and this permission notice appear in all copies.
  8. *
  9. * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  10. * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  11. * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  12. * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  13. * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  14. * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  15. * PERFORMANCE OF THIS SOFTWARE.
  16. */
  17. /* $Id$ */
  18. /*! \file */
  19. #include <config.h>
  20. #include <stdlib.h>
  21. #include <sys/types.h>
  22. #include <unistd.h>
  23. #include <stdlib.h>
  24. #include <isc/entropy.h>
  25. #include <isc/mem.h>
  26. #include <isc/mutex.h>
  27. #include <isc/portset.h>
  28. #include <isc/print.h>
  29. #include <isc/random.h>
  30. #include <isc/stats.h>
  31. #include <isc/string.h>
  32. #include <isc/task.h>
  33. #include <isc/time.h>
  34. #include <isc/util.h>
  35. #include <dns/acl.h>
  36. #include <dns/dispatch.h>
  37. #include <dns/events.h>
  38. #include <dns/log.h>
  39. #include <dns/message.h>
  40. #include <dns/portlist.h>
  41. #include <dns/stats.h>
  42. #include <dns/tcpmsg.h>
  43. #include <dns/types.h>
  44. typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
  45. typedef struct dispsocket dispsocket_t;
  46. typedef ISC_LIST(dispsocket_t) dispsocketlist_t;
  47. typedef struct dispportentry dispportentry_t;
  48. typedef ISC_LIST(dispportentry_t) dispportlist_t;
  49. /* ARC4 Random generator state */
  50. typedef struct arc4ctx {
  51. isc_uint8_t i;
  52. isc_uint8_t j;
  53. isc_uint8_t s[256];
  54. int count;
  55. isc_entropy_t *entropy; /*%< entropy source for ARC4 */
  56. isc_mutex_t *lock;
  57. } arc4ctx_t;
  58. typedef struct dns_qid {
  59. unsigned int magic;
  60. unsigned int qid_nbuckets; /*%< hash table size */
  61. unsigned int qid_increment; /*%< id increment on collision */
  62. isc_mutex_t lock;
  63. dns_displist_t *qid_table; /*%< the table itself */
  64. dispsocketlist_t *sock_table; /*%< socket table */
  65. } dns_qid_t;
  66. struct dns_dispatchmgr {
  67. /* Unlocked. */
  68. unsigned int magic;
  69. isc_mem_t *mctx;
  70. dns_acl_t *blackhole;
  71. dns_portlist_t *portlist;
  72. isc_stats_t *stats;
  73. isc_entropy_t *entropy; /*%< entropy source */
  74. /* Locked by "lock". */
  75. isc_mutex_t lock;
  76. unsigned int state;
  77. ISC_LIST(dns_dispatch_t) list;
  78. /* Locked by arc4_lock. */
  79. isc_mutex_t arc4_lock;
  80. arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
  81. /* locked by buffer lock */
  82. dns_qid_t *qid;
  83. isc_mutex_t buffer_lock;
  84. unsigned int buffers; /*%< allocated buffers */
  85. unsigned int buffersize; /*%< size of each buffer */
  86. unsigned int maxbuffers; /*%< max buffers */
  87. /* Locked internally. */
  88. isc_mutex_t pool_lock;
  89. isc_mempool_t *epool; /*%< memory pool for events */
  90. isc_mempool_t *rpool; /*%< memory pool for replies */
  91. isc_mempool_t *dpool; /*%< dispatch allocations */
  92. isc_mempool_t *bpool; /*%< memory pool for buffers */
  93. isc_mempool_t *spool; /*%< memory pool for dispsocs */
  94. /*%
  95. * Locked by qid->lock if qid exists; otherwise, can be used without
  96. * being locked.
  97. * Memory footprint considerations: this is a simple implementation of
  98. * available ports, i.e., an ordered array of the actual port numbers.
  99. * This will require about 256KB of memory in the worst case (128KB for
  100. * each of IPv4 and IPv6). We could reduce it by representing it as a
  101. * more sophisticated way such as a list (or array) of ranges that are
  102. * searched to identify a specific port. Our decision here is the saved
  103. * memory isn't worth the implementation complexity, considering the
  104. * fact that the whole BIND9 process (which is mainly named) already
  105. * requires a pretty large memory footprint. We may, however, have to
  106. * revisit the decision when we want to use it as a separate module for
  107. * an environment where memory requirement is severer.
  108. */
  109. in_port_t *v4ports; /*%< available ports for IPv4 */
  110. unsigned int nv4ports; /*%< # of available ports for IPv4 */
  111. in_port_t *v6ports; /*%< available ports for IPv4 */
  112. unsigned int nv6ports; /*%< # of available ports for IPv4 */
  113. };
  114. #define MGR_SHUTTINGDOWN 0x00000001U
  115. #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
  116. #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
  117. struct dns_dispentry {
  118. unsigned int magic;
  119. dns_dispatch_t *disp;
  120. dns_messageid_t id;
  121. in_port_t port;
  122. unsigned int bucket;
  123. isc_sockaddr_t host;
  124. isc_task_t *task;
  125. isc_taskaction_t action;
  126. void *arg;
  127. isc_boolean_t item_out;
  128. dispsocket_t *dispsocket;
  129. ISC_LIST(dns_dispatchevent_t) items;
  130. ISC_LINK(dns_dispentry_t) link;
  131. };
  132. /*%
  133. * Maximum number of dispatch sockets that can be pooled for reuse. The
  134. * appropriate value may vary, but experiments have shown a busy caching server
  135. * may need more than 1000 sockets concurrently opened. The maximum allowable
  136. * number of dispatch sockets (per manager) will be set to the double of this
  137. * value.
  138. */
  139. #ifndef DNS_DISPATCH_POOLSOCKS
  140. #define DNS_DISPATCH_POOLSOCKS 2048
  141. #endif
  142. /*%
  143. * Quota to control the number of dispatch sockets. If a dispatch has more
  144. * than the quota of sockets, new queries will purge oldest ones, so that
  145. * a massive number of outstanding queries won't prevent subsequent queries
  146. * (especially if the older ones take longer time and result in timeout).
  147. */
  148. #ifndef DNS_DISPATCH_SOCKSQUOTA
  149. #define DNS_DISPATCH_SOCKSQUOTA 3072
  150. #endif
  151. struct dispsocket {
  152. unsigned int magic;
  153. isc_socket_t *socket;
  154. dns_dispatch_t *disp;
  155. isc_sockaddr_t host;
  156. in_port_t localport; /* XXX: should be removed later */
  157. dispportentry_t *portentry;
  158. dns_dispentry_t *resp;
  159. isc_task_t *task;
  160. ISC_LINK(dispsocket_t) link;
  161. unsigned int bucket;
  162. ISC_LINK(dispsocket_t) blink;
  163. };
  164. /*%
  165. * A port table entry. We remember every port we first open in a table with a
  166. * reference counter so that we can 'reuse' the same port (with different
  167. * destination addresses) using the SO_REUSEADDR socket option.
  168. */
  169. struct dispportentry {
  170. in_port_t port;
  171. unsigned int refs;
  172. ISC_LINK(struct dispportentry) link;
  173. };
  174. #ifndef DNS_DISPATCH_PORTTABLESIZE
  175. #define DNS_DISPATCH_PORTTABLESIZE 1024
  176. #endif
  177. #define INVALID_BUCKET (0xffffdead)
  178. /*%
  179. * Number of tasks for each dispatch that use separate sockets for different
  180. * transactions. This must be a power of 2 as it will divide 32 bit numbers
  181. * to get an uniformly random tasks selection. See get_dispsocket().
  182. */
  183. #define MAX_INTERNAL_TASKS 64
  184. struct dns_dispatch {
  185. /* Unlocked. */
  186. unsigned int magic; /*%< magic */
  187. dns_dispatchmgr_t *mgr; /*%< dispatch manager */
  188. int ntasks;
  189. /*%
  190. * internal task buckets. We use multiple tasks to distribute various
  191. * socket events well when using separate dispatch sockets. We use the
  192. * 1st task (task[0]) for internal control events.
  193. */
  194. isc_task_t *task[MAX_INTERNAL_TASKS];
  195. isc_socket_t *socket; /*%< isc socket attached to */
  196. isc_sockaddr_t local; /*%< local address */
  197. in_port_t localport; /*%< local UDP port */
  198. unsigned int maxrequests; /*%< max requests */
  199. isc_event_t *ctlevent;
  200. /*% Locked by mgr->lock. */
  201. ISC_LINK(dns_dispatch_t) link;
  202. /* Locked by "lock". */
  203. isc_mutex_t lock; /*%< locks all below */
  204. isc_sockettype_t socktype;
  205. unsigned int attributes;
  206. unsigned int refcount; /*%< number of users */
  207. dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */
  208. unsigned int shutting_down : 1,
  209. shutdown_out : 1,
  210. connected : 1,
  211. tcpmsg_valid : 1,
  212. recv_pending : 1; /*%< is a recv() pending? */
  213. isc_result_t shutdown_why;
  214. ISC_LIST(dispsocket_t) activesockets;
  215. ISC_LIST(dispsocket_t) inactivesockets;
  216. unsigned int nsockets;
  217. unsigned int requests; /*%< how many requests we have */
  218. unsigned int tcpbuffers; /*%< allocated buffers */
  219. dns_tcpmsg_t tcpmsg; /*%< for tcp streams */
  220. dns_qid_t *qid;
  221. arc4ctx_t arc4ctx; /*%< for QID/UDP port num */
  222. dispportlist_t *port_table; /*%< hold ports 'owned' by us */
  223. isc_mempool_t *portpool; /*%< port table entries */
  224. };
  225. #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
  226. #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
  227. #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
  228. #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
  229. #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c')
  230. #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)
  231. #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
  232. #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
  233. #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
  234. #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
  235. #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
  236. (disp)->qid : (disp)->mgr->qid
  237. #define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
  238. (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx)
  239. /*%
  240. * Locking a query port buffer is a bit tricky. We access the buffer without
  241. * locking until qid is created. Technically, there is a possibility of race
  242. * between the creation of qid and access to the port buffer; in practice,
  243. * however, this should be safe because qid isn't created until the first
  244. * dispatch is created and there should be no contending situation until then.
  245. */
  246. #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
  247. #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))
  248. /*
  249. * Statics.
  250. */
  251. static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *,
  252. dns_messageid_t, in_port_t, unsigned int);
  253. static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
  254. static void destroy_disp(isc_task_t *task, isc_event_t *event);
  255. static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
  256. static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
  257. static void udp_exrecv(isc_task_t *, isc_event_t *);
  258. static void udp_shrecv(isc_task_t *, isc_event_t *);
  259. static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
  260. static void tcp_recv(isc_task_t *, isc_event_t *);
  261. static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
  262. static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
  263. in_port_t);
  264. static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
  265. static void *allocate_udp_buffer(dns_dispatch_t *disp);
  266. static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
  267. static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
  268. static void do_cancel(dns_dispatch_t *disp);
  269. static dns_dispentry_t *linear_first(dns_qid_t *disp);
  270. static dns_dispentry_t *linear_next(dns_qid_t *disp,
  271. dns_dispentry_t *resp);
  272. static void dispatch_free(dns_dispatch_t **dispp);
  273. static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
  274. dns_dispatch_t *disp,
  275. isc_socketmgr_t *sockmgr,
  276. isc_sockaddr_t *localaddr,
  277. isc_socket_t **sockp);
  278. static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
  279. isc_socketmgr_t *sockmgr,
  280. isc_taskmgr_t *taskmgr,
  281. isc_sockaddr_t *localaddr,
  282. unsigned int maxrequests,
  283. unsigned int attributes,
  284. dns_dispatch_t **dispp);
  285. static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
  286. static void destroy_mgr(dns_dispatchmgr_t **mgrp);
  287. static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
  288. unsigned int increment, dns_qid_t **qidp,
  289. isc_boolean_t needaddrtable);
  290. static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
  291. static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
  292. unsigned int options, isc_socket_t **sockp);
  293. static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
  294. isc_sockaddr_t *sockaddrp);
  295. #define LVL(x) ISC_LOG_DEBUG(x)
  296. static void
  297. mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
  298. ISC_FORMAT_PRINTF(3, 4);
  299. static void
  300. mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
  301. char msgbuf[2048];
  302. va_list ap;
  303. if (! isc_log_wouldlog(dns_lctx, level))
  304. return;
  305. va_start(ap, fmt);
  306. vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
  307. va_end(ap);
  308. isc_log_write(dns_lctx,
  309. DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
  310. level, "dispatchmgr %p: %s", mgr, msgbuf);
  311. }
  312. static inline void
  313. inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
  314. if (mgr->stats != NULL)
  315. isc_stats_increment(mgr->stats, counter);
  316. }
  317. static void
  318. dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
  319. ISC_FORMAT_PRINTF(3, 4);
  320. static void
  321. dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
  322. char msgbuf[2048];
  323. va_list ap;
  324. if (! isc_log_wouldlog(dns_lctx, level))
  325. return;
  326. va_start(ap, fmt);
  327. vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
  328. va_end(ap);
  329. isc_log_write(dns_lctx,
  330. DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
  331. level, "dispatch %p: %s", disp, msgbuf);
  332. }
  333. static void
  334. request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
  335. int level, const char *fmt, ...)
  336. ISC_FORMAT_PRINTF(4, 5);
  337. static void
  338. request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
  339. int level, const char *fmt, ...)
  340. {
  341. char msgbuf[2048];
  342. char peerbuf[256];
  343. va_list ap;
  344. if (! isc_log_wouldlog(dns_lctx, level))
  345. return;
  346. va_start(ap, fmt);
  347. vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
  348. va_end(ap);
  349. if (VALID_RESPONSE(resp)) {
  350. isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
  351. isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
  352. DNS_LOGMODULE_DISPATCH, level,
  353. "dispatch %p response %p %s: %s", disp, resp,
  354. peerbuf, msgbuf);
  355. } else {
  356. isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
  357. DNS_LOGMODULE_DISPATCH, level,
  358. "dispatch %p req/resp %p: %s", disp, resp,
  359. msgbuf);
  360. }
  361. }
  362. /*%
  363. * ARC4 random number generator derived from OpenBSD.
  364. * Only dispatch_random() and dispatch_uniformrandom() are expected
  365. * to be called from general dispatch routines; the rest of them are subroutines
  366. * for these two.
  367. *
  368. * The original copyright follows:
  369. * Copyright (c) 1996, David Mazieres <dm@uun.org>
  370. * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
  371. *
  372. * Permission to use, copy, modify, and distribute this software for any
  373. * purpose with or without fee is hereby granted, provided that the above
  374. * copyright notice and this permission notice appear in all copies.
  375. *
  376. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  377. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  378. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  379. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  380. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  381. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  382. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  383. */
  384. #ifdef BIND9
  385. static void
  386. dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
  387. isc_mutex_t *lock)
  388. {
  389. int n;
  390. for (n = 0; n < 256; n++)
  391. actx->s[n] = n;
  392. actx->i = 0;
  393. actx->j = 0;
  394. actx->count = 0;
  395. actx->entropy = entropy; /* don't have to attach */
  396. actx->lock = lock;
  397. }
  398. static void
  399. dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
  400. int n;
  401. isc_uint8_t si;
  402. actx->i--;
  403. for (n = 0; n < 256; n++) {
  404. actx->i = (actx->i + 1);
  405. si = actx->s[actx->i];
  406. actx->j = (actx->j + si + dat[n % datlen]);
  407. actx->s[actx->i] = actx->s[actx->j];
  408. actx->s[actx->j] = si;
  409. }
  410. actx->j = actx->i;
  411. }
  412. static inline isc_uint8_t
  413. dispatch_arc4get8(arc4ctx_t *actx) {
  414. isc_uint8_t si, sj;
  415. actx->i = (actx->i + 1);
  416. si = actx->s[actx->i];
  417. actx->j = (actx->j + si);
  418. sj = actx->s[actx->j];
  419. actx->s[actx->i] = sj;
  420. actx->s[actx->j] = si;
  421. return (actx->s[(si + sj) & 0xff]);
  422. }
  423. static inline isc_uint16_t
  424. dispatch_arc4get16(arc4ctx_t *actx) {
  425. isc_uint16_t val;
  426. val = dispatch_arc4get8(actx) << 8;
  427. val |= dispatch_arc4get8(actx);
  428. return (val);
  429. }
  430. static void
  431. dispatch_arc4stir(arc4ctx_t *actx) {
  432. int i;
  433. union {
  434. unsigned char rnd[128];
  435. isc_uint32_t rnd32[32];
  436. } rnd;
  437. isc_result_t result;
  438. if (actx->entropy != NULL) {
  439. /*
  440. * We accept any quality of random data to avoid blocking.
  441. */
  442. result = isc_entropy_getdata(actx->entropy, rnd.rnd,
  443. sizeof(rnd), NULL, 0);
  444. RUNTIME_CHECK(result == ISC_R_SUCCESS);
  445. } else {
  446. for (i = 0; i < 32; i++)
  447. isc_random_get(&rnd.rnd32[i]);
  448. }
  449. dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd));
  450. /*
  451. * Discard early keystream, as per recommendations in:
  452. * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
  453. */
  454. for (i = 0; i < 256; i++)
  455. (void)dispatch_arc4get8(actx);
  456. /*
  457. * Derived from OpenBSD's implementation. The rationale is not clear,
  458. * but should be conservative enough in safety, and reasonably large
  459. * for efficiency.
  460. */
  461. actx->count = 1600000;
  462. }
  463. static isc_uint16_t
  464. dispatch_random(arc4ctx_t *actx) {
  465. isc_uint16_t result;
  466. if (actx->lock != NULL)
  467. LOCK(actx->lock);
  468. actx->count -= sizeof(isc_uint16_t);
  469. if (actx->count <= 0)
  470. dispatch_arc4stir(actx);
  471. result = dispatch_arc4get16(actx);
  472. if (actx->lock != NULL)
  473. UNLOCK(actx->lock);
  474. return (result);
  475. }
  476. #else
  477. /*
  478. * For general purpose library, we don't have to be too strict about the
  479. * quality of random values. Performance doesn't matter much, either.
  480. * So we simply use the isc_random module to keep the library as small as
  481. * possible.
  482. */
  483. static void
  484. dispatch_initrandom(arc4ctx_t *actx, isc_entropy_t *entropy,
  485. isc_mutex_t *lock)
  486. {
  487. UNUSED(actx);
  488. UNUSED(entropy);
  489. UNUSED(lock);
  490. return;
  491. }
  492. static isc_uint16_t
  493. dispatch_random(arc4ctx_t *actx) {
  494. isc_uint32_t r;
  495. UNUSED(actx);
  496. isc_random_get(&r);
  497. return (r & 0xffff);
  498. }
  499. #endif /* BIND9 */
  500. static isc_uint16_t
  501. dispatch_uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) {
  502. isc_uint16_t min, r;
  503. if (upper_bound < 2)
  504. return (0);
  505. /*
  506. * Ensure the range of random numbers [min, 0xffff] be a multiple of
  507. * upper_bound and contain at least a half of the 16 bit range.
  508. */
  509. if (upper_bound > 0x8000)
  510. min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
  511. else
  512. min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
  513. /*
  514. * This could theoretically loop forever but each retry has
  515. * p > 0.5 (worst case, usually far better) of selecting a
  516. * number inside the range we need, so it should rarely need
  517. * to re-roll.
  518. */
  519. for (;;) {
  520. r = dispatch_random(actx);
  521. if (r >= min)
  522. break;
  523. }
  524. return (r % upper_bound);
  525. }
  526. /*
  527. * Return a hash of the destination and message id.
  528. */
  529. static isc_uint32_t
  530. dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
  531. in_port_t port)
  532. {
  533. unsigned int ret;
  534. ret = isc_sockaddr_hash(dest, ISC_TRUE);
  535. ret ^= (id << 16) | port;
  536. ret %= qid->qid_nbuckets;
  537. INSIST(ret < qid->qid_nbuckets);
  538. return (ret);
  539. }
  540. /*
  541. * Find the first entry in 'qid'. Returns NULL if there are no entries.
  542. */
  543. static dns_dispentry_t *
  544. linear_first(dns_qid_t *qid) {
  545. dns_dispentry_t *ret;
  546. unsigned int bucket;
  547. bucket = 0;
  548. while (bucket < qid->qid_nbuckets) {
  549. ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
  550. if (ret != NULL)
  551. return (ret);
  552. bucket++;
  553. }
  554. return (NULL);
  555. }
  556. /*
  557. * Find the next entry after 'resp' in 'qid'. Return NULL if there are
  558. * no more entries.
  559. */
  560. static dns_dispentry_t *
  561. linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
  562. dns_dispentry_t *ret;
  563. unsigned int bucket;
  564. ret = ISC_LIST_NEXT(resp, link);
  565. if (ret != NULL)
  566. return (ret);
  567. bucket = resp->bucket;
  568. bucket++;
  569. while (bucket < qid->qid_nbuckets) {
  570. ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
  571. if (ret != NULL)
  572. return (ret);
  573. bucket++;
  574. }
  575. return (NULL);
  576. }
  577. /*
  578. * The dispatch must be locked.
  579. */
  580. static isc_boolean_t
  581. destroy_disp_ok(dns_dispatch_t *disp)
  582. {
  583. if (disp->refcount != 0)
  584. return (ISC_FALSE);
  585. if (disp->recv_pending != 0)
  586. return (ISC_FALSE);
  587. if (!ISC_LIST_EMPTY(disp->activesockets))
  588. return (ISC_FALSE);
  589. if (disp->shutting_down == 0)
  590. return (ISC_FALSE);
  591. return (ISC_TRUE);
  592. }
  593. /*
  594. * Called when refcount reaches 0 (and safe to destroy).
  595. *
  596. * The dispatcher must not be locked.
  597. * The manager must be locked.
  598. */
  599. static void
  600. destroy_disp(isc_task_t *task, isc_event_t *event) {
  601. dns_dispatch_t *disp;
  602. dns_dispatchmgr_t *mgr;
  603. isc_boolean_t killmgr;
  604. dispsocket_t *dispsocket;
  605. int i;
  606. INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
  607. UNUSED(task);
  608. disp = event->ev_arg;
  609. mgr = disp->mgr;
  610. LOCK(&mgr->lock);
  611. ISC_LIST_UNLINK(mgr->list, disp, link);
  612. dispatch_log(disp, LVL(90),
  613. "shutting down; detaching from sock %p, task %p",
  614. disp->socket, disp->task[0]); /* XXXX */
  615. if (disp->socket != NULL)
  616. isc_socket_detach(&disp->socket);
  617. while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
  618. ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
  619. destroy_dispsocket(disp, &dispsocket);
  620. }
  621. for (i = 0; i < disp->ntasks; i++)
  622. isc_task_detach(&disp->task[i]);
  623. isc_event_free(&event);
  624. dispatch_free(&disp);
  625. killmgr = destroy_mgr_ok(mgr);
  626. UNLOCK(&mgr->lock);
  627. if (killmgr)
  628. destroy_mgr(&mgr);
  629. }
  630. /*%
  631. * Manipulate port table per dispatch: find an entry for a given port number,
  632. * create a new entry, and decrement a given entry with possible clean-up.
  633. */
  634. static dispportentry_t *
  635. port_search(dns_dispatch_t *disp, in_port_t port) {
  636. dispportentry_t *portentry;
  637. REQUIRE(disp->port_table != NULL);
  638. portentry = ISC_LIST_HEAD(disp->port_table[port %
  639. DNS_DISPATCH_PORTTABLESIZE]);
  640. while (portentry != NULL) {
  641. if (portentry->port == port)
  642. return (portentry);
  643. portentry = ISC_LIST_NEXT(portentry, link);
  644. }
  645. return (NULL);
  646. }
  647. static dispportentry_t *
  648. new_portentry(dns_dispatch_t *disp, in_port_t port) {
  649. dispportentry_t *portentry;
  650. REQUIRE(disp->port_table != NULL);
  651. portentry = isc_mempool_get(disp->portpool);
  652. if (portentry == NULL)
  653. return (portentry);
  654. portentry->port = port;
  655. portentry->refs = 0;
  656. ISC_LINK_INIT(portentry, link);
  657. ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
  658. portentry, link);
  659. return (portentry);
  660. }
  661. /*%
  662. * The caller must not hold the qid->lock.
  663. */
  664. static void
  665. deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
  666. dispportentry_t *portentry = *portentryp;
  667. dns_qid_t *qid;
  668. REQUIRE(disp->port_table != NULL);
  669. REQUIRE(portentry != NULL && portentry->refs > 0);
  670. qid = DNS_QID(disp);
  671. LOCK(&qid->lock);
  672. portentry->refs--;
  673. if (portentry->refs == 0) {
  674. ISC_LIST_UNLINK(disp->port_table[portentry->port %
  675. DNS_DISPATCH_PORTTABLESIZE],
  676. portentry, link);
  677. isc_mempool_put(disp->portpool, portentry);
  678. }
  679. *portentryp = NULL;
  680. UNLOCK(&qid->lock);
  681. }
  682. /*%
  683. * Find a dispsocket for socket address 'dest', and port number 'port'.
  684. * Return NULL if no such entry exists.
  685. */
  686. static dispsocket_t *
  687. socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port,
  688. unsigned int bucket)
  689. {
  690. dispsocket_t *dispsock;
  691. REQUIRE(bucket < qid->qid_nbuckets);
  692. dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);
  693. while (dispsock != NULL) {
  694. if (dispsock->portentry != NULL &&
  695. dispsock->portentry->port == port &&
  696. isc_sockaddr_equal(dest, &dispsock->host))
  697. return (dispsock);
  698. dispsock = ISC_LIST_NEXT(dispsock, blink);
  699. }
  700. return (NULL);
  701. }
  702. /*%
  703. * Make a new socket for a single dispatch with a random port number.
  704. * The caller must hold the disp->lock and qid->lock.
  705. */
  706. static isc_result_t
  707. get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest,
  708. isc_socketmgr_t *sockmgr, dns_qid_t *qid,
  709. dispsocket_t **dispsockp, in_port_t *portp)
  710. {
  711. int i;
  712. isc_uint32_t r;
  713. dns_dispatchmgr_t *mgr = disp->mgr;
  714. isc_socket_t *sock = NULL;
  715. isc_result_t result = ISC_R_FAILURE;
  716. in_port_t port;
  717. isc_sockaddr_t localaddr;
  718. unsigned int bucket = 0;
  719. dispsocket_t *dispsock;
  720. unsigned int nports;
  721. in_port_t *ports;
  722. unsigned int bindoptions;
  723. dispportentry_t *portentry = NULL;
  724. if (isc_sockaddr_pf(&disp->local) == AF_INET) {
  725. nports = disp->mgr->nv4ports;
  726. ports = disp->mgr->v4ports;
  727. } else {
  728. nports = disp->mgr->nv6ports;
  729. ports = disp->mgr->v6ports;
  730. }
  731. if (nports == 0)
  732. return (ISC_R_ADDRNOTAVAIL);
  733. dispsock = ISC_LIST_HEAD(disp->inactivesockets);
  734. if (dispsock != NULL) {
  735. ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
  736. sock = dispsock->socket;
  737. dispsock->socket = NULL;
  738. } else {
  739. dispsock = isc_mempool_get(mgr->spool);
  740. if (dispsock == NULL)
  741. return (ISC_R_NOMEMORY);
  742. disp->nsockets++;
  743. dispsock->socket = NULL;
  744. dispsock->disp = disp;
  745. dispsock->resp = NULL;
  746. dispsock->portentry = NULL;
  747. isc_random_get(&r);
  748. dispsock->task = NULL;
  749. isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
  750. ISC_LINK_INIT(dispsock, link);
  751. ISC_LINK_INIT(dispsock, blink);
  752. dispsock->magic = DISPSOCK_MAGIC;
  753. }
  754. /*
  755. * Pick up a random UDP port and open a new socket with it. Avoid
  756. * choosing ports that share the same destination because it will be
  757. * very likely to fail in bind(2) or connect(2).
  758. */
  759. localaddr = disp->local;
  760. for (i = 0; i < 64; i++) {
  761. port = ports[dispatch_uniformrandom(DISP_ARC4CTX(disp),
  762. nports)];
  763. isc_sockaddr_setport(&localaddr, port);
  764. bucket = dns_hash(qid, dest, 0, port);
  765. if (socket_search(qid, dest, port, bucket) != NULL)
  766. continue;
  767. bindoptions = 0;
  768. portentry = port_search(disp, port);
  769. if (portentry != NULL)
  770. bindoptions |= ISC_SOCKET_REUSEADDRESS;
  771. result = open_socket(sockmgr, &localaddr, bindoptions, &sock);
  772. if (result == ISC_R_SUCCESS) {
  773. if (portentry == NULL) {
  774. portentry = new_portentry(disp, port);
  775. if (portentry == NULL) {
  776. result = ISC_R_NOMEMORY;
  777. break;
  778. }
  779. }
  780. portentry->refs++;
  781. break;
  782. } else if (result == ISC_R_NOPERM) {
  783. char buf[ISC_SOCKADDR_FORMATSIZE];
  784. isc_sockaddr_format(&localaddr, buf, sizeof(buf));
  785. dispatch_log(disp, ISC_LOG_WARNING,
  786. "open_socket(%s) -> %s: continuing",
  787. buf, isc_result_totext(result));
  788. } else if (result != ISC_R_ADDRINUSE)
  789. break;
  790. }
  791. if (result == ISC_R_SUCCESS) {
  792. dispsock->socket = sock;
  793. dispsock->host = *dest;
  794. dispsock->portentry = portentry;
  795. dispsock->bucket = bucket;
  796. ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
  797. *dispsockp = dispsock;
  798. *portp = port;
  799. } else {
  800. /*
  801. * We could keep it in the inactive list, but since this should
  802. * be an exceptional case and might be resource shortage, we'd
  803. * rather destroy it.
  804. */
  805. if (sock != NULL)
  806. isc_socket_detach(&sock);
  807. destroy_dispsocket(disp, &dispsock);
  808. }
  809. return (result);
  810. }
  811. /*%
  812. * Destroy a dedicated dispatch socket.
  813. */
  814. static void
  815. destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
  816. dispsocket_t *dispsock;
  817. dns_qid_t *qid;
  818. /*
  819. * The dispatch must be locked.
  820. */
  821. REQUIRE(dispsockp != NULL && *dispsockp != NULL);
  822. dispsock = *dispsockp;
  823. REQUIRE(!ISC_LINK_LINKED(dispsock, link));
  824. disp->nsockets--;
  825. dispsock->magic = 0;
  826. if (dispsock->portentry != NULL)
  827. deref_portentry(disp, &dispsock->portentry);
  828. if (dispsock->socket != NULL)
  829. isc_socket_detach(&dispsock->socket);
  830. if (ISC_LINK_LINKED(dispsock, blink)) {
  831. qid = DNS_QID(disp);
  832. LOCK(&qid->lock);
  833. ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
  834. blink);
  835. UNLOCK(&qid->lock);
  836. }
  837. if (dispsock->task != NULL)
  838. isc_task_detach(&dispsock->task);
  839. isc_mempool_put(disp->mgr->spool, dispsock);
  840. *dispsockp = NULL;
  841. }
  842. /*%
  843. * Deactivate a dedicated dispatch socket. Move it to the inactive list for
  844. * future reuse unless the total number of sockets are exceeding the maximum.
  845. */
  846. static void
  847. deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
  848. isc_result_t result;
  849. dns_qid_t *qid;
  850. /*
  851. * The dispatch must be locked.
  852. */
  853. ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
  854. if (dispsock->resp != NULL) {
  855. INSIST(dispsock->resp->dispsocket == dispsock);
  856. dispsock->resp->dispsocket = NULL;
  857. }
  858. INSIST(dispsock->portentry != NULL);
  859. deref_portentry(disp, &dispsock->portentry);
  860. #ifdef BIND9
  861. if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
  862. destroy_dispsocket(disp, &dispsock);
  863. else {
  864. result = isc_socket_close(dispsock->socket);
  865. qid = DNS_QID(disp);
  866. LOCK(&qid->lock);
  867. ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
  868. blink);
  869. UNLOCK(&qid->lock);
  870. if (result == ISC_R_SUCCESS)
  871. ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
  872. else {
  873. /*
  874. * If the underlying system does not allow this
  875. * optimization, destroy this temporary structure (and
  876. * create a new one for a new transaction).
  877. */
  878. INSIST(result == ISC_R_NOTIMPLEMENTED);
  879. destroy_dispsocket(disp, &dispsock);
  880. }
  881. }
  882. #else
  883. /* This kind of optimization isn't necessary for normal use */
  884. UNUSED(qid);
  885. UNUSED(result);
  886. destroy_dispsocket(disp, &dispsock);
  887. #endif
  888. }
  889. /*
  890. * Find an entry for query ID 'id', socket address 'dest', and port number
  891. * 'port'.
  892. * Return NULL if no such entry exists.
  893. */
  894. static dns_dispentry_t *
  895. entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
  896. in_port_t port, unsigned int bucket)
  897. {
  898. dns_dispentry_t *res;
  899. REQUIRE(bucket < qid->qid_nbuckets);
  900. res = ISC_LIST_HEAD(qid->qid_table[bucket]);
  901. while (res != NULL) {
  902. if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
  903. res->port == port) {
  904. return (res);
  905. }
  906. res = ISC_LIST_NEXT(res, link);
  907. }
  908. return (NULL);
  909. }
  910. static void
  911. free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
  912. INSIST(buf != NULL && len != 0);
  913. switch (disp->socktype) {
  914. case isc_sockettype_tcp:
  915. INSIST(disp->tcpbuffers > 0);
  916. disp->tcpbuffers--;
  917. isc_mem_put(disp->mgr->mctx, buf, len);
  918. break;
  919. case isc_sockettype_udp:
  920. LOCK(&disp->mgr->buffer_lock);
  921. INSIST(disp->mgr->buffers > 0);
  922. INSIST(len == disp->mgr->buffersize);
  923. disp->mgr->buffers--;
  924. isc_mempool_put(disp->mgr->bpool, buf);
  925. UNLOCK(&disp->mgr->buffer_lock);
  926. break;
  927. default:
  928. INSIST(0);
  929. break;
  930. }
  931. }
  932. static void *
  933. allocate_udp_buffer(dns_dispatch_t *disp) {
  934. void *temp;
  935. LOCK(&disp->mgr->buffer_lock);
  936. temp = isc_mempool_get(disp->mgr->bpool);
  937. if (temp != NULL)
  938. disp->mgr->buffers++;
  939. UNLOCK(&disp->mgr->buffer_lock);
  940. return (temp);
  941. }
  942. static inline void
  943. free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
  944. if (disp->failsafe_ev == ev) {
  945. INSIST(disp->shutdown_out == 1);
  946. disp->shutdown_out = 0;
  947. return;
  948. }
  949. isc_mempool_put(disp->mgr->epool, ev);
  950. }
  951. static inline dns_dispatchevent_t *
  952. allocate_event(dns_dispatch_t *disp) {
  953. dns_dispatchevent_t *ev;
  954. ev = isc_mempool_get(disp->mgr->epool);
  955. if (ev == NULL)
  956. return (NULL);
  957. ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
  958. NULL, NULL, NULL, NULL, NULL);
  959. return (ev);
  960. }
  961. static void
  962. udp_exrecv(isc_task_t *task, isc_event_t *ev) {
  963. dispsocket_t *dispsock = ev->ev_arg;
  964. UNUSED(task);
  965. REQUIRE(VALID_DISPSOCK(dispsock));
  966. udp_recv(ev, dispsock->disp, dispsock);
  967. }
  968. static void
  969. udp_shrecv(isc_task_t *task, isc_event_t *ev) {
  970. dns_dispatch_t *disp = ev->ev_arg;
  971. UNUSED(task);
  972. REQUIRE(VALID_DISPATCH(disp));
  973. udp_recv(ev, disp, NULL);
  974. }
  975. /*
  976. * General flow:
  977. *
  978. * If I/O result == CANCELED or error, free the buffer.
  979. *
  980. * If query, free the buffer, restart.
  981. *
  982. * If response:
  983. * Allocate event, fill in details.
  984. * If cannot allocate, free buffer, restart.
  985. * find target. If not found, free buffer, restart.
  986. * if event queue is not empty, queue. else, send.
  987. * restart.
  988. */
  989. static void
  990. udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
  991. isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
  992. dns_messageid_t id;
  993. isc_result_t dres;
  994. isc_buffer_t source;
  995. unsigned int flags;
  996. dns_dispentry_t *resp = NULL;
  997. dns_dispatchevent_t *rev;
  998. unsigned int bucket;
  999. isc_boolean_t killit;
  1000. isc_boolean_t queue_response;
  1001. dns_dispatchmgr_t *mgr;
  1002. dns_qid_t *qid;
  1003. isc_netaddr_t netaddr;
  1004. int match;
  1005. int result;
  1006. isc_boolean_t qidlocked = ISC_FALSE;
  1007. LOCK(&disp->lock);
  1008. mgr = disp->mgr;
  1009. qid = mgr->qid;
  1010. dispatch_log(disp, LVL(90),
  1011. "got packet: requests %d, buffers %d, recvs %d",
  1012. disp->requests, disp->mgr->buffers, disp->recv_pending);
  1013. if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
  1014. /*
  1015. * Unless the receive event was imported from a listening
  1016. * interface, in which case the event type is
  1017. * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
  1018. */
  1019. INSIST(disp->recv_pending != 0);
  1020. disp->recv_pending = 0;
  1021. }
  1022. if (dispsock != NULL &&
  1023. (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
  1024. /*
  1025. * dispsock->resp can be NULL if this transaction was canceled
  1026. * just after receiving a response. Since this socket is
  1027. * exclusively used and there should be at most one receive
  1028. * event the canceled event should have been no effect. So
  1029. * we can (and should) deactivate the socket right now.
  1030. */
  1031. deactivate_dispsocket(disp, dispsock);
  1032. dispsock = NULL;
  1033. }
  1034. if (disp->shutting_down) {
  1035. /*
  1036. * This dispatcher is shutting down.
  1037. */
  1038. free_buffer(disp, ev->region.base, ev->region.length);
  1039. isc_event_free(&ev_in);
  1040. ev = NULL;
  1041. killit = destroy_disp_ok(disp);
  1042. UNLOCK(&disp->lock);
  1043. if (killit)
  1044. isc_task_send(disp->task[0], &disp->ctlevent);
  1045. return;
  1046. }
  1047. if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
  1048. if (dispsock != NULL) {
  1049. resp = dispsock->resp;
  1050. id = resp->id;
  1051. if (ev->result != ISC_R_SUCCESS) {
  1052. /*
  1053. * This is most likely a network error on a
  1054. * connected socket. It makes no sense to
  1055. * check the address or parse the packet, but it
  1056. * will help to return the error to the caller.
  1057. */
  1058. goto sendresponse;
  1059. }
  1060. } else {
  1061. free_buffer(disp, ev->region.base, ev->region.length);
  1062. UNLOCK(&disp->lock);
  1063. isc_event_free(&ev_in);
  1064. return;
  1065. }
  1066. } else if (ev->result != ISC_R_SUCCESS) {
  1067. free_buffer(disp, ev->region.base, ev->region.length);
  1068. if (ev->result != ISC_R_CANCELED)
  1069. dispatch_log(disp, ISC_LOG_ERROR,
  1070. "odd socket result in udp_recv(): %s",
  1071. isc_result_totext(ev->result));
  1072. UNLOCK(&disp->lock);
  1073. isc_event_free(&ev_in);
  1074. return;
  1075. }
  1076. /*
  1077. * If this is from a blackholed address, drop it.
  1078. */
  1079. isc_netaddr_fromsockaddr(&netaddr, &ev->address);
  1080. if (disp->mgr->blackhole != NULL &&
  1081. dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
  1082. NULL, &match, NULL) == ISC_R_SUCCESS &&
  1083. match > 0)
  1084. {
  1085. if (isc_log_wouldlog(dns_lctx, LVL(10))) {
  1086. char netaddrstr[ISC_NETADDR_FORMATSIZE];
  1087. isc_netaddr_format(&netaddr, netaddrstr,
  1088. sizeof(netaddrstr));
  1089. dispatch_log(disp, LVL(10),
  1090. "blackholed packet from %s",
  1091. netaddrstr);
  1092. }
  1093. free_buffer(disp, ev->region.base, ev->region.length);
  1094. goto restart;
  1095. }
  1096. /*
  1097. * Peek into the buffer to see what we can see.
  1098. */
  1099. isc_buffer_init(&source, ev->region.base, ev->region.length);
  1100. isc_buffer_add(&source, ev->n);
  1101. dres = dns_message_peekheader(&source, &id, &flags);
  1102. if (dres != ISC_R_SUCCESS) {
  1103. free_buffer(disp, ev->region.base, ev->region.length);
  1104. dispatch_log(disp, LVL(10), "got garbage packet");
  1105. goto restart;
  1106. }
  1107. dispatch_log(disp, LVL(92),
  1108. "got valid DNS message header, /QR %c, id %u",
  1109. ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
  1110. /*
  1111. * Look at flags. If query, drop it. If response,
  1112. * look to see where it goes.
  1113. */
  1114. if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
  1115. /* query */
  1116. free_buffer(disp, ev->region.base, ev->region.length);
  1117. goto restart;
  1118. }
  1119. /*
  1120. * Search for the corresponding response. If we are using an exclusive
  1121. * socket, we've already identified it and we can skip the search; but
  1122. * the ID and the address must match the expected ones.
  1123. */
  1124. if (resp == NULL) {
  1125. bucket = dns_hash(qid, &ev->address, id, disp->localport);
  1126. LOCK(&qid->lock);
  1127. qidlocked = ISC_TRUE;
  1128. resp = entry_search(qid, &ev->address, id, disp->localport,
  1129. bucket);
  1130. dispatch_log(disp, LVL(90),
  1131. "search for response in bucket %d: %s",
  1132. bucket, (resp == NULL ? "not found" : "found"));
  1133. if (resp == NULL) {
  1134. inc_stats(mgr, dns_resstatscounter_mismatch);
  1135. free_buffer(disp, ev->region.base, ev->region.length);
  1136. goto unlock;
  1137. }
  1138. } else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
  1139. &resp->host)) {
  1140. dispatch_log(disp, LVL(90),
  1141. "response to an exclusive socket doesn't match");
  1142. inc_stats(mgr, dns_resstatscounter_mismatch);
  1143. free_buffer(disp, ev->region.base, ev->region.length);
  1144. goto unlock;
  1145. }
  1146. /*
  1147. * Now that we have the original dispatch the query was sent
  1148. * from check that the address and port the response was
  1149. * sent to make sense.
  1150. */
  1151. if (disp != resp->disp) {
  1152. isc_sockaddr_t a1;
  1153. isc_sockaddr_t a2;
  1154. /*
  1155. * Check that the socket types and ports match.
  1156. */
  1157. if (disp->socktype != resp->disp->socktype ||
  1158. isc_sockaddr_getport(&disp->local) !=
  1159. isc_sockaddr_getport(&resp->disp->local)) {
  1160. free_buffer(disp, ev->region.base, ev->region.length);
  1161. goto unlock;
  1162. }
  1163. /*
  1164. * If both dispatches are bound to an address then fail as
  1165. * the addresses can't be equal (enforced by the IP stack).
  1166. *
  1167. * Note under Linux a packet can be sent out via IPv4 socket
  1168. * and the response be received via a IPv6 socket.
  1169. *
  1170. * Requests sent out via IPv6 should always come back in
  1171. * via IPv6.
  1172. */
  1173. if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
  1174. isc_sockaddr_pf(&disp->local) != PF_INET6) {
  1175. free_buffer(disp, ev->region.base, ev->region.length);
  1176. goto unlock;
  1177. }
  1178. isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
  1179. isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
  1180. if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
  1181. !isc_sockaddr_eqaddr(&a2, &disp->local)) {
  1182. free_buffer(disp, ev->region.base, ev->region.length);
  1183. goto unlock;
  1184. }
  1185. }
  1186. sendresponse:
  1187. queue_response = resp->item_out;
  1188. rev = allocate_event(resp->disp);
  1189. if (rev == NULL) {
  1190. free_buffer(disp, ev->region.base, ev->region.length);
  1191. goto unlock;
  1192. }
  1193. /*
  1194. * At this point, rev contains the event we want to fill in, and
  1195. * resp contains the information on the place to send it to.
  1196. * Send the event off.
  1197. */
  1198. isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
  1199. isc_buffer_add(&rev->buffer, ev->n);
  1200. rev->result = ev->result;
  1201. rev->id = id;
  1202. rev->addr = ev->address;
  1203. rev->pktinfo = ev->pktinfo;
  1204. rev->attributes = ev->attributes;
  1205. if (queue_response) {
  1206. ISC_LIST_APPEND(resp->items, rev, ev_link);
  1207. } else {
  1208. ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
  1209. DNS_EVENT_DISPATCH,
  1210. resp->action, resp->arg, resp, NULL, NULL);
  1211. request_log(disp, resp, LVL(90),
  1212. "[a] Sent event %p buffer %p len %d to task %p",
  1213. rev, rev->buffer.base, rev->buffer.length,
  1214. resp->task);
  1215. resp->item_out = ISC_TRUE;
  1216. isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
  1217. }
  1218. unlock:
  1219. if (qidlocked)
  1220. UNLOCK(&qid->lock);
  1221. /*
  1222. * Restart recv() to get the next packet.
  1223. */
  1224. restart:
  1225. result = startrecv(disp, dispsock);
  1226. if (result != ISC_R_SUCCESS && dispsock != NULL) {
  1227. /*
  1228. * XXX: wired. There seems to be no recovery process other than
  1229. * deactivate this socket anyway (since we cannot start
  1230. * receiving, we won't be able to receive a cancel event
  1231. * from the user).
  1232. */
  1233. deactivate_dispsocket(disp, dispsock);
  1234. }
  1235. UNLOCK(&disp->lock);
  1236. isc_event_free(&ev_in);
  1237. }
  1238. /*
  1239. * General flow:
  1240. *
  1241. * If I/O result == CANCELED, EOF, or error, notify everyone as the
  1242. * various queues drain.
  1243. *
  1244. * If query, restart.
  1245. *
  1246. * If response:
  1247. * Allocate event, fill in details.
  1248. * If cannot allocate, restart.
  1249. * find target. If not found, restart.
  1250. * if event queue is not empty, queue. else, send.
  1251. * restart.
  1252. */
  1253. static void
  1254. tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
  1255. dns_dispatch_t *disp = ev_in->ev_arg;
  1256. dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
  1257. dns_messageid_t id;
  1258. isc_result_t dres;
  1259. unsigned int flags;
  1260. dns_dispentry_t *resp;
  1261. dns_dispatchevent_t *rev;
  1262. unsigned int bucket;
  1263. isc_boolean_t killit;
  1264. isc_boolean_t queue_response;
  1265. dns_qid_t *qid;
  1266. int level;
  1267. char buf[ISC_SOCKADDR_FORMATSIZE];
  1268. UNUSED(task);
  1269. REQUIRE(VALID_DISPATCH(disp));
  1270. qid = disp->qid;
  1271. dispatch_log(disp, LVL(90),
  1272. "got TCP packet: requests %d, buffers %d, recvs %d",
  1273. disp->requests, disp->tcpbuffers, disp->recv_pending);
  1274. LOCK(&disp->lock);
  1275. INSIST(disp->recv_pending != 0);
  1276. disp->recv_pending = 0;
  1277. if (disp->refcount == 0) {
  1278. /*
  1279. * This dispatcher is shutting down. Force cancelation.
  1280. */
  1281. tcpmsg->result = ISC_R_CANCELED;
  1282. }
  1283. if (tcpmsg->result != ISC_R_SUCCESS) {
  1284. switch (tcpmsg->result) {
  1285. case ISC_R_CANCELED:
  1286. break;
  1287. case ISC_R_EOF:
  1288. dispatch_log(disp, LVL(90), "shutting down on EOF");
  1289. do_cancel(disp);
  1290. break;
  1291. case ISC_R_CONNECTIONRESET:
  1292. level = ISC_LOG_INFO;
  1293. goto logit;
  1294. default:
  1295. level = ISC_LOG_ERROR;
  1296. logit:
  1297. isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
  1298. dispatch_log(disp, level, "shutting down due to TCP "
  1299. "receive error: %s: %s", buf,
  1300. isc_result_totext(tcpmsg->result));
  1301. do_cancel(disp);
  1302. break;
  1303. }
  1304. /*
  1305. * The event is statically allocated in the tcpmsg
  1306. * structure, and destroy_disp() frees the tcpmsg, so we must
  1307. * free the event *before* calling destroy_disp().
  1308. */
  1309. isc_event_free(&ev_in);
  1310. disp->shutting_down = 1;
  1311. disp->shutdown_why = tcpmsg->result;
  1312. /*
  1313. * If the recv() was canceled pass the word on.
  1314. */
  1315. killit = destroy_disp_ok(disp);
  1316. UNLOCK(&disp->lock);
  1317. if (killit)
  1318. isc_task_send(disp->task[0], &disp->ctlevent);
  1319. return;
  1320. }
  1321. dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
  1322. tcpmsg->result,
  1323. tcpmsg->buffer.length, tcpmsg->buffer.base);
  1324. /*
  1325. * Peek into the buffer to see what we can see.
  1326. */
  1327. dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
  1328. if (dres != ISC_R_SUCCESS) {
  1329. dispatch_log(disp, LVL(10), "got garbage packet");
  1330. goto restart;
  1331. }
  1332. dispatch_log(disp, LVL(92),
  1333. "got valid DNS message header, /QR %c, id %u",
  1334. ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
  1335. /*
  1336. * Allocate an event to send to the query or response client, and
  1337. * allocate a new buffer for our use.
  1338. */
  1339. /*
  1340. * Look at flags. If query, drop it. If response,
  1341. * look to see where it goes.
  1342. */
  1343. if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
  1344. /*
  1345. * Query.
  1346. */
  1347. goto restart;
  1348. }
  1349. /*
  1350. * Response.
  1351. */
  1352. bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
  1353. LOCK(&qid->lock);
  1354. resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket);
  1355. dispatch_log(disp, LVL(90),
  1356. "search for response in bucket %d: %s",
  1357. bucket, (resp == NULL ? "not found" : "found"));
  1358. if (resp == NULL)
  1359. goto unlock;
  1360. queue_response = resp->item_out;
  1361. rev = allocate_event(disp);
  1362. if (rev == NULL)
  1363. goto unlock;
  1364. /*
  1365. * At this point, rev contains the event we want to fill in, and
  1366. * resp contains the information on the place to send it to.
  1367. * Send the event off.
  1368. */
  1369. dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
  1370. disp->tcpbuffers++;
  1371. rev->result = ISC_R_SUCCESS;
  1372. rev->id = id;
  1373. rev->addr = tcpmsg->address;
  1374. if (queue_response) {
  1375. ISC_LIST_APPEND(resp->items, rev, ev_link);
  1376. } else {
  1377. ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
  1378. resp->action, resp->arg, resp, NULL, NULL);
  1379. request_log(disp, resp, LVL(90),
  1380. "[b] Sent event %p buffer %p len %d to task %p",
  1381. rev, rev->buffer.base, rev->buffer.length,
  1382. resp->task);
  1383. resp->item_out = ISC_TRUE;
  1384. isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
  1385. }
  1386. unlock:
  1387. UNLOCK(&qid->lock);
  1388. /*
  1389. * Restart recv() to get the next packet.
  1390. */
  1391. restart:
  1392. (void)startrecv(disp, NULL);
  1393. UNLOCK(&disp->lock);
  1394. isc_event_free(&ev_in);
  1395. }
  1396. /*
  1397. * disp must be locked.
  1398. */
  1399. static isc_result_t
  1400. startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) {
  1401. isc_result_t res;
  1402. isc_region_t region;
  1403. isc_socket_t *socket;
  1404. if (disp->shutting_down == 1)
  1405. return (ISC_R_SUCCESS);
  1406. if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
  1407. return (ISC_R_SUCCESS);
  1408. if (disp->recv_pending != 0 && dispsock == NULL)
  1409. return (ISC_R_SUCCESS);
  1410. if (disp->mgr->buffers >= disp->mgr->maxbuffers)
  1411. return (ISC_R_NOMEMORY);
  1412. if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 &&
  1413. dispsock == NULL)
  1414. return (ISC_R_SUCCESS);
  1415. if (dispsock != NULL)
  1416. socket = dispsock->socket;
  1417. else
  1418. socket = disp->socket;
  1419. INSIST(socket != NULL);
  1420. switch (disp->socktype) {
  1421. /*
  1422. * UDP reads are always maximal.
  1423. */
  1424. case isc_sockettype_udp:
  1425. region.length = disp->mgr->buffersize;
  1426. region.base = allocate_udp_buffer(disp);
  1427. if (region.base == NULL)
  1428. return (ISC_R_NOMEMORY);
  1429. if (dispsock != NULL) {
  1430. res = isc_socket_recv(socket, &region, 1,
  1431. dispsock->task, udp_exrecv,
  1432. dispsock);
  1433. if (res != ISC_R_SUCCESS) {
  1434. free_buffer(disp, region.base, region.length);
  1435. return (res);
  1436. }
  1437. } else {
  1438. res = isc_socket_recv(socket, &region, 1,
  1439. disp->task[0], udp_shrecv, disp);
  1440. if (res != ISC_R_SUCCESS) {
  1441. free_buffer(disp, region.base, region.length);
  1442. disp->shutdown_why = res;
  1443. disp->shutting_down = 1;
  1444. do_cancel(disp);
  1445. return (ISC_R_SUCCESS); /* recover by cancel */
  1446. }
  1447. INSIST(disp->recv_pending == 0);
  1448. disp->recv_pending = 1;
  1449. }
  1450. break;
  1451. case isc_sockettype_tcp:
  1452. res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0],
  1453. tcp_recv, disp);
  1454. if (res != ISC_R_SUCCESS) {
  1455. disp->shutdown_why = res;
  1456. disp->shutting_down = 1;
  1457. do_cancel(disp);
  1458. return (ISC_R_SUCCESS); /* recover by cancel */
  1459. }
  1460. INSIST(disp->recv_pending == 0);
  1461. disp->recv_pending = 1;
  1462. break;
  1463. default:
  1464. INSIST(0);
  1465. break;
  1466. }
  1467. return (ISC_R_SUCCESS);
  1468. }
  1469. /*
  1470. * Mgr must be locked when calling this function.
  1471. */
  1472. static isc_boolean_t
  1473. destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
  1474. mgr_log(mgr, LVL(90),
  1475. "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
  1476. "epool=%d, rpool=%d, dpool=%d",
  1477. MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
  1478. isc_mempool_getallocated(mgr->epool),
  1479. isc_mempool_getallocated(mgr->rpool),
  1480. isc_mempool_getallocated(mgr->dpool));
  1481. if (!MGR_IS_SHUTTINGDOWN(mgr))
  1482. return (ISC_FALSE);
  1483. if (!ISC_LIST_EMPTY(mgr->list))
  1484. return (ISC_FALSE);
  1485. if (isc_mempool_getallocated(mgr->epool) != 0)
  1486. return (ISC_FALSE);
  1487. if (isc_mempool_getallocated(mgr->rpool) != 0)
  1488. return (ISC_FALSE);
  1489. if (isc_mempool_getallocated(mgr->dpool) != 0)
  1490. return (ISC_FALSE);
  1491. return (ISC_TRUE);
  1492. }
  1493. /*
  1494. * Mgr must be unlocked when calling this function.
  1495. */
  1496. static void
  1497. destroy_mgr(dns_dispatchmgr_t **mgrp) {
  1498. isc_mem_t *mctx;
  1499. dns_dispatchmgr_t *mgr;
  1500. mgr = *mgrp;
  1501. *mgrp = NULL;
  1502. mctx = mgr->mctx;
  1503. mgr->magic = 0;
  1504. mgr->mctx = NULL;
  1505. DESTROYLOCK(&mgr->lock);
  1506. mgr->state = 0;
  1507. DESTROYLOCK(&mgr->arc4_lock);
  1508. isc_mempool_destroy(&mgr->epool);
  1509. isc_mempool_destroy(&mgr->rpool);
  1510. isc_mempool_destroy(&mgr->dpool);
  1511. if (mgr->bpool != NULL)
  1512. isc_mempool_destroy(&mgr->bpool);
  1513. if (mgr->spool != NULL)
  1514. isc_mempool_destroy(&mgr->spool);
  1515. DESTROYLOCK(&mgr->pool_lock);
  1516. #ifdef BIND9
  1517. if (mgr->entropy != NULL)
  1518. isc_entropy_detach(&mgr->entropy);
  1519. #endif /* BIND9 */
  1520. if (mgr->qid != NULL)
  1521. qid_destroy(mctx, &mgr->qid);
  1522. DESTROYLOCK(&mgr->buffer_lock);
  1523. if (mgr->blackhole != NULL)
  1524. dns_acl_detach(&mgr->blackhole);
  1525. if (mgr->stats != NULL)
  1526. isc_stats_detach(&mgr->stats);
  1527. if (mgr->v4ports != NULL) {
  1528. isc_mem_put(mctx, mgr->v4ports,
  1529. mgr->nv4ports * sizeof(in_port_t));
  1530. }
  1531. if (mgr->v6ports != NULL) {
  1532. isc_mem_put(mctx, mgr->v6ports,
  1533. mgr->nv6ports * sizeof(in_port_t));
  1534. }
  1535. isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
  1536. isc_mem_detach(&mctx);
  1537. }
  1538. static isc_result_t
  1539. open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
  1540. unsigned int options, isc_socket_t **sockp)
  1541. {
  1542. isc_socket_t *sock;
  1543. isc_result_t result;
  1544. sock = *sockp;
  1545. if (sock == NULL) {
  1546. result = isc_socket_create(mgr, isc_sockaddr_pf(local),
  1547. isc_sockettype_udp, &sock);
  1548. if (result != ISC_R_SUCCESS)
  1549. return (result);
  1550. isc_socket_setname(sock, "dispatcher", NULL);
  1551. } else {
  1552. #ifdef BIND9
  1553. result = isc_socket_open(sock);
  1554. if (result != ISC_R_SUCCESS)
  1555. return (result);
  1556. #else
  1557. INSIST(0);
  1558. #endif
  1559. }
  1560. #ifndef ISC_ALLOW_MAPPED
  1561. isc_socket_ipv6only(sock, ISC_TRUE);
  1562. #endif
  1563. result = isc_socket_bind(sock, local, options);
  1564. if (result != ISC_R_SUCCESS) {
  1565. if (*sockp == NULL)
  1566. isc_socket_detach(&sock);
  1567. else {
  1568. #ifdef BIND9
  1569. isc_socket_close(sock);
  1570. #else
  1571. INSIST(0);
  1572. #endif
  1573. }
  1574. return (result);
  1575. }
  1576. *sockp = sock;
  1577. return (ISC_R_SUCCESS);
  1578. }
  1579. /*%
  1580. * Create a temporary port list to set the initial default set of dispatch
  1581. * ports: [1024, 65535]. This is almost meaningless as the application will
  1582. * normally set the ports explicitly, but is provided to fill some minor corner
  1583. * cases.
  1584. */
  1585. static isc_result_t
  1586. create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) {
  1587. isc_result_t result;
  1588. result = isc_portset_create(mctx, portsetp);
  1589. if (result != ISC_R_SUCCESS)
  1590. return (result);
  1591. isc_portset_addrange(*portsetp, 1024, 65535);
  1592. return (ISC_R_SUCCESS);
  1593. }
  1594. /*
  1595. * Publics.
  1596. */
  1597. isc_result_t
  1598. dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
  1599. dns_dispatchmgr_t **mgrp)
  1600. {
  1601. dns_dispatchmgr_t *mgr;
  1602. isc_result_t result;
  1603. isc_portset_t *v4portset = NULL;
  1604. isc_portset_t *v6portset = NULL;
  1605. REQUIRE(mctx != NULL);
  1606. REQUIRE(mgrp != NULL && *mgrp == NULL);
  1607. mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
  1608. if (mgr == NULL)
  1609. return (ISC_R_NOMEMORY);
  1610. mgr->mctx = NULL;
  1611. isc_mem_attach(mctx, &mgr->mctx);
  1612. mgr->blackhole = NULL;
  1613. mgr->stats = NULL;
  1614. result = isc_mutex_init(&mgr->lock);
  1615. if (result != ISC_R_SUCCESS)
  1616. goto deallocate;
  1617. result = isc_mutex_init(&mgr->arc4_lock);
  1618. if (result != ISC_R_SUCCESS)
  1619. goto kill_lock;
  1620. result = isc_mutex_init(&mgr->buffer_lock);
  1621. if (re