/contrib/bind9/lib/dns/resolver.c

https://bitbucket.org/freebsd/freebsd-head/ · C · 8823 lines · 6299 code · 960 blank · 1564 comment · 1964 complexity · 651087635c1e860a3303d6aab40e83f7 MD5 · raw file

Large files are truncated click here to view the full file

  1. /*
  2. * Copyright (C) 2004-2012 Internet Systems Consortium, Inc. ("ISC")
  3. * Copyright (C) 1999-2003 Internet Software Consortium.
  4. *
  5. * Permission to use, copy, modify, and/or distribute this software for any
  6. * purpose with or without fee is hereby granted, provided that the above
  7. * copyright notice and this permission notice appear in all copies.
  8. *
  9. * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  10. * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  11. * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  12. * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  13. * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  14. * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  15. * PERFORMANCE OF THIS SOFTWARE.
  16. */
  17. /* $Id$ */
  18. /*! \file */
  19. #include <config.h>
  20. #include <isc/platform.h>
  21. #include <isc/print.h>
  22. #include <isc/string.h>
  23. #include <isc/random.h>
  24. #include <isc/task.h>
  25. #include <isc/stats.h>
  26. #include <isc/timer.h>
  27. #include <isc/util.h>
  28. #include <dns/acl.h>
  29. #include <dns/adb.h>
  30. #include <dns/cache.h>
  31. #include <dns/db.h>
  32. #include <dns/dispatch.h>
  33. #include <dns/ds.h>
  34. #include <dns/events.h>
  35. #include <dns/forward.h>
  36. #include <dns/keytable.h>
  37. #include <dns/log.h>
  38. #include <dns/message.h>
  39. #include <dns/ncache.h>
  40. #include <dns/opcode.h>
  41. #include <dns/peer.h>
  42. #include <dns/rbt.h>
  43. #include <dns/rcode.h>
  44. #include <dns/rdata.h>
  45. #include <dns/rdataclass.h>
  46. #include <dns/rdatalist.h>
  47. #include <dns/rdataset.h>
  48. #include <dns/rdatastruct.h>
  49. #include <dns/rdatatype.h>
  50. #include <dns/resolver.h>
  51. #include <dns/result.h>
  52. #include <dns/rootns.h>
  53. #include <dns/stats.h>
  54. #include <dns/tsig.h>
  55. #include <dns/validator.h>
  56. #define DNS_RESOLVER_TRACE
  57. #ifdef DNS_RESOLVER_TRACE
  58. #define RTRACE(m) isc_log_write(dns_lctx, \
  59. DNS_LOGCATEGORY_RESOLVER, \
  60. DNS_LOGMODULE_RESOLVER, \
  61. ISC_LOG_DEBUG(3), \
  62. "res %p: %s", res, (m))
  63. #define RRTRACE(r, m) isc_log_write(dns_lctx, \
  64. DNS_LOGCATEGORY_RESOLVER, \
  65. DNS_LOGMODULE_RESOLVER, \
  66. ISC_LOG_DEBUG(3), \
  67. "res %p: %s", (r), (m))
  68. #define FCTXTRACE(m) isc_log_write(dns_lctx, \
  69. DNS_LOGCATEGORY_RESOLVER, \
  70. DNS_LOGMODULE_RESOLVER, \
  71. ISC_LOG_DEBUG(3), \
  72. "fctx %p(%s'): %s", fctx, fctx->info, (m))
  73. #define FCTXTRACE2(m1, m2) \
  74. isc_log_write(dns_lctx, \
  75. DNS_LOGCATEGORY_RESOLVER, \
  76. DNS_LOGMODULE_RESOLVER, \
  77. ISC_LOG_DEBUG(3), \
  78. "fctx %p(%s): %s %s", \
  79. fctx, fctx->info, (m1), (m2))
  80. #define FTRACE(m) isc_log_write(dns_lctx, \
  81. DNS_LOGCATEGORY_RESOLVER, \
  82. DNS_LOGMODULE_RESOLVER, \
  83. ISC_LOG_DEBUG(3), \
  84. "fetch %p (fctx %p(%s)): %s", \
  85. fetch, fetch->private, \
  86. fetch->private->info, (m))
  87. #define QTRACE(m) isc_log_write(dns_lctx, \
  88. DNS_LOGCATEGORY_RESOLVER, \
  89. DNS_LOGMODULE_RESOLVER, \
  90. ISC_LOG_DEBUG(3), \
  91. "resquery %p (fctx %p(%s)): %s", \
  92. query, query->fctx, \
  93. query->fctx->info, (m))
  94. #else
  95. #define RTRACE(m)
  96. #define RRTRACE(r, m)
  97. #define FCTXTRACE(m)
  98. #define FTRACE(m)
  99. #define QTRACE(m)
  100. #endif
  101. #ifndef DEFAULT_QUERY_TIMEOUT
  102. #define DEFAULT_QUERY_TIMEOUT 30 /* The default time in seconds for the whole query to live. */
  103. #endif
  104. #ifndef MAXIMUM_QUERY_TIMEOUT
  105. #define MAXIMUM_QUERY_TIMEOUT 30 /* The maximum time in seconds for the whole query to live. */
  106. #endif
  107. /*%
  108. * Maximum EDNS0 input packet size.
  109. */
  110. #define RECV_BUFFER_SIZE 4096 /* XXXRTH Constant. */
  111. /*%
  112. * This defines the maximum number of timeouts we will permit before we
  113. * disable EDNS0 on the query.
  114. */
  115. #define MAX_EDNS0_TIMEOUTS 3
  116. typedef struct fetchctx fetchctx_t;
  117. typedef struct query {
  118. /* Locked by task event serialization. */
  119. unsigned int magic;
  120. fetchctx_t * fctx;
  121. isc_mem_t * mctx;
  122. dns_dispatchmgr_t * dispatchmgr;
  123. dns_dispatch_t * dispatch;
  124. isc_boolean_t exclusivesocket;
  125. dns_adbaddrinfo_t * addrinfo;
  126. isc_socket_t * tcpsocket;
  127. isc_time_t start;
  128. dns_messageid_t id;
  129. dns_dispentry_t * dispentry;
  130. ISC_LINK(struct query) link;
  131. isc_buffer_t buffer;
  132. isc_buffer_t *tsig;
  133. dns_tsigkey_t *tsigkey;
  134. unsigned int options;
  135. unsigned int attributes;
  136. unsigned int sends;
  137. unsigned int connects;
  138. unsigned char data[512];
  139. } resquery_t;
  140. #define QUERY_MAGIC ISC_MAGIC('Q', '!', '!', '!')
  141. #define VALID_QUERY(query) ISC_MAGIC_VALID(query, QUERY_MAGIC)
  142. #define RESQUERY_ATTR_CANCELED 0x02
  143. #define RESQUERY_CONNECTING(q) ((q)->connects > 0)
  144. #define RESQUERY_CANCELED(q) (((q)->attributes & \
  145. RESQUERY_ATTR_CANCELED) != 0)
  146. #define RESQUERY_SENDING(q) ((q)->sends > 0)
  147. typedef enum {
  148. fetchstate_init = 0, /*%< Start event has not run yet. */
  149. fetchstate_active,
  150. fetchstate_done /*%< FETCHDONE events posted. */
  151. } fetchstate;
  152. typedef enum {
  153. badns_unreachable = 0,
  154. badns_response,
  155. badns_validation
  156. } badnstype_t;
  157. struct fetchctx {
  158. /*% Not locked. */
  159. unsigned int magic;
  160. dns_resolver_t * res;
  161. dns_name_t name;
  162. dns_rdatatype_t type;
  163. unsigned int options;
  164. unsigned int bucketnum;
  165. char * info;
  166. isc_mem_t * mctx;
  167. /*% Locked by appropriate bucket lock. */
  168. fetchstate state;
  169. isc_boolean_t want_shutdown;
  170. isc_boolean_t cloned;
  171. isc_boolean_t spilled;
  172. unsigned int references;
  173. isc_event_t control_event;
  174. ISC_LINK(struct fetchctx) link;
  175. ISC_LIST(dns_fetchevent_t) events;
  176. /*% Locked by task event serialization. */
  177. dns_name_t domain;
  178. dns_rdataset_t nameservers;
  179. unsigned int attributes;
  180. isc_timer_t * timer;
  181. isc_time_t expires;
  182. isc_interval_t interval;
  183. dns_message_t * qmessage;
  184. dns_message_t * rmessage;
  185. ISC_LIST(resquery_t) queries;
  186. dns_adbfindlist_t finds;
  187. dns_adbfind_t * find;
  188. dns_adbfindlist_t altfinds;
  189. dns_adbfind_t * altfind;
  190. dns_adbaddrinfolist_t forwaddrs;
  191. dns_adbaddrinfolist_t altaddrs;
  192. isc_sockaddrlist_t forwarders;
  193. dns_fwdpolicy_t fwdpolicy;
  194. isc_sockaddrlist_t bad;
  195. isc_sockaddrlist_t edns;
  196. isc_sockaddrlist_t edns512;
  197. isc_sockaddrlist_t bad_edns;
  198. dns_validator_t *validator;
  199. ISC_LIST(dns_validator_t) validators;
  200. dns_db_t * cache;
  201. dns_adb_t * adb;
  202. isc_boolean_t ns_ttl_ok;
  203. isc_uint32_t ns_ttl;
  204. /*%
  205. * The number of events we're waiting for.
  206. */
  207. unsigned int pending;
  208. /*%
  209. * The number of times we've "restarted" the current
  210. * nameserver set. This acts as a failsafe to prevent
  211. * us from pounding constantly on a particular set of
  212. * servers that, for whatever reason, are not giving
  213. * us useful responses, but are responding in such a
  214. * way that they are not marked "bad".
  215. */
  216. unsigned int restarts;
  217. /*%
  218. * The number of timeouts that have occurred since we
  219. * last successfully received a response packet. This
  220. * is used for EDNS0 black hole detection.
  221. */
  222. unsigned int timeouts;
  223. /*%
  224. * Look aside state for DS lookups.
  225. */
  226. dns_name_t nsname;
  227. dns_fetch_t * nsfetch;
  228. dns_rdataset_t nsrrset;
  229. /*%
  230. * Number of queries that reference this context.
  231. */
  232. unsigned int nqueries;
  233. /*%
  234. * The reason to print when logging a successful
  235. * response to a query.
  236. */
  237. const char * reason;
  238. /*%
  239. * Random numbers to use for mixing up server addresses.
  240. */
  241. isc_uint32_t rand_buf;
  242. isc_uint32_t rand_bits;
  243. /*%
  244. * Fetch-local statistics for detailed logging.
  245. */
  246. isc_result_t result; /*%< fetch result */
  247. isc_result_t vresult; /*%< validation result */
  248. int exitline;
  249. isc_time_t start;
  250. isc_uint64_t duration;
  251. isc_boolean_t logged;
  252. unsigned int querysent;
  253. unsigned int referrals;
  254. unsigned int lamecount;
  255. unsigned int neterr;
  256. unsigned int badresp;
  257. unsigned int adberr;
  258. unsigned int findfail;
  259. unsigned int valfail;
  260. isc_boolean_t timeout;
  261. dns_adbaddrinfo_t *addrinfo;
  262. isc_sockaddr_t *client;
  263. };
  264. #define FCTX_MAGIC ISC_MAGIC('F', '!', '!', '!')
  265. #define VALID_FCTX(fctx) ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
  266. #define FCTX_ATTR_HAVEANSWER 0x0001
  267. #define FCTX_ATTR_GLUING 0x0002
  268. #define FCTX_ATTR_ADDRWAIT 0x0004
  269. #define FCTX_ATTR_SHUTTINGDOWN 0x0008
  270. #define FCTX_ATTR_WANTCACHE 0x0010
  271. #define FCTX_ATTR_WANTNCACHE 0x0020
  272. #define FCTX_ATTR_NEEDEDNS0 0x0040
  273. #define FCTX_ATTR_TRIEDFIND 0x0080
  274. #define FCTX_ATTR_TRIEDALT 0x0100
  275. #define HAVE_ANSWER(f) (((f)->attributes & FCTX_ATTR_HAVEANSWER) != \
  276. 0)
  277. #define GLUING(f) (((f)->attributes & FCTX_ATTR_GLUING) != \
  278. 0)
  279. #define ADDRWAIT(f) (((f)->attributes & FCTX_ATTR_ADDRWAIT) != \
  280. 0)
  281. #define SHUTTINGDOWN(f) (((f)->attributes & FCTX_ATTR_SHUTTINGDOWN) \
  282. != 0)
  283. #define WANTCACHE(f) (((f)->attributes & FCTX_ATTR_WANTCACHE) != 0)
  284. #define WANTNCACHE(f) (((f)->attributes & FCTX_ATTR_WANTNCACHE) != 0)
  285. #define NEEDEDNS0(f) (((f)->attributes & FCTX_ATTR_NEEDEDNS0) != 0)
  286. #define TRIEDFIND(f) (((f)->attributes & FCTX_ATTR_TRIEDFIND) != 0)
  287. #define TRIEDALT(f) (((f)->attributes & FCTX_ATTR_TRIEDALT) != 0)
  288. typedef struct {
  289. dns_adbaddrinfo_t * addrinfo;
  290. fetchctx_t * fctx;
  291. } dns_valarg_t;
  292. struct dns_fetch {
  293. unsigned int magic;
  294. fetchctx_t * private;
  295. };
  296. #define DNS_FETCH_MAGIC ISC_MAGIC('F', 't', 'c', 'h')
  297. #define DNS_FETCH_VALID(fetch) ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
  298. typedef struct fctxbucket {
  299. isc_task_t * task;
  300. isc_mutex_t lock;
  301. ISC_LIST(fetchctx_t) fctxs;
  302. isc_boolean_t exiting;
  303. isc_mem_t * mctx;
  304. } fctxbucket_t;
  305. typedef struct alternate {
  306. isc_boolean_t isaddress;
  307. union {
  308. isc_sockaddr_t addr;
  309. struct {
  310. dns_name_t name;
  311. in_port_t port;
  312. } _n;
  313. } _u;
  314. ISC_LINK(struct alternate) link;
  315. } alternate_t;
  316. typedef struct dns_badcache dns_badcache_t;
  317. struct dns_badcache {
  318. dns_badcache_t * next;
  319. dns_rdatatype_t type;
  320. isc_time_t expire;
  321. unsigned int hashval;
  322. dns_name_t name;
  323. };
  324. #define DNS_BADCACHE_SIZE 1021
  325. #define DNS_BADCACHE_TTL(fctx) \
  326. (((fctx)->res->lame_ttl > 30 ) ? (fctx)->res->lame_ttl : 30)
  327. struct dns_resolver {
  328. /* Unlocked. */
  329. unsigned int magic;
  330. isc_mem_t * mctx;
  331. isc_mutex_t lock;
  332. isc_mutex_t nlock;
  333. isc_mutex_t primelock;
  334. dns_rdataclass_t rdclass;
  335. isc_socketmgr_t * socketmgr;
  336. isc_timermgr_t * timermgr;
  337. isc_taskmgr_t * taskmgr;
  338. dns_view_t * view;
  339. isc_boolean_t frozen;
  340. unsigned int options;
  341. dns_dispatchmgr_t * dispatchmgr;
  342. dns_dispatch_t * dispatchv4;
  343. isc_boolean_t exclusivev4;
  344. dns_dispatch_t * dispatchv6;
  345. isc_boolean_t exclusivev6;
  346. unsigned int ndisps;
  347. unsigned int nbuckets;
  348. fctxbucket_t * buckets;
  349. isc_uint32_t lame_ttl;
  350. ISC_LIST(alternate_t) alternates;
  351. isc_uint16_t udpsize;
  352. #if USE_ALGLOCK
  353. isc_rwlock_t alglock;
  354. #endif
  355. dns_rbt_t * algorithms;
  356. #if USE_MBSLOCK
  357. isc_rwlock_t mbslock;
  358. #endif
  359. dns_rbt_t * mustbesecure;
  360. unsigned int spillatmax;
  361. unsigned int spillatmin;
  362. isc_timer_t * spillattimer;
  363. isc_boolean_t zero_no_soa_ttl;
  364. unsigned int query_timeout;
  365. /* Locked by lock. */
  366. unsigned int references;
  367. isc_boolean_t exiting;
  368. isc_eventlist_t whenshutdown;
  369. unsigned int activebuckets;
  370. isc_boolean_t priming;
  371. unsigned int spillat; /* clients-per-query */
  372. unsigned int nextdisp;
  373. /* Bad cache. */
  374. dns_badcache_t ** badcache;
  375. unsigned int badcount;
  376. unsigned int badhash;
  377. unsigned int badsweep;
  378. /* Locked by primelock. */
  379. dns_fetch_t * primefetch;
  380. /* Locked by nlock. */
  381. unsigned int nfctx;
  382. };
  383. #define RES_MAGIC ISC_MAGIC('R', 'e', 's', '!')
  384. #define VALID_RESOLVER(res) ISC_MAGIC_VALID(res, RES_MAGIC)
  385. /*%
  386. * Private addrinfo flags. These must not conflict with DNS_FETCHOPT_NOEDNS0,
  387. * which we also use as an addrinfo flag.
  388. */
  389. #define FCTX_ADDRINFO_MARK 0x0001
  390. #define FCTX_ADDRINFO_FORWARDER 0x1000
  391. #define FCTX_ADDRINFO_TRIED 0x2000
  392. #define UNMARKED(a) (((a)->flags & FCTX_ADDRINFO_MARK) \
  393. == 0)
  394. #define ISFORWARDER(a) (((a)->flags & \
  395. FCTX_ADDRINFO_FORWARDER) != 0)
  396. #define TRIED(a) (((a)->flags & \
  397. FCTX_ADDRINFO_TRIED) != 0)
  398. #define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
  399. #define NEGATIVE(r) (((r)->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
  400. static void destroy(dns_resolver_t *res);
  401. static void empty_bucket(dns_resolver_t *res);
  402. static isc_result_t resquery_send(resquery_t *query);
  403. static void resquery_response(isc_task_t *task, isc_event_t *event);
  404. static void resquery_connected(isc_task_t *task, isc_event_t *event);
  405. static void fctx_try(fetchctx_t *fctx, isc_boolean_t retrying,
  406. isc_boolean_t badcache);
  407. static void fctx_destroy(fetchctx_t *fctx);
  408. static isc_boolean_t fctx_unlink(fetchctx_t *fctx);
  409. static isc_result_t ncache_adderesult(dns_message_t *message,
  410. dns_db_t *cache, dns_dbnode_t *node,
  411. dns_rdatatype_t covers,
  412. isc_stdtime_t now, dns_ttl_t maxttl,
  413. isc_boolean_t optout,
  414. dns_rdataset_t *ardataset,
  415. isc_result_t *eresultp);
  416. static void validated(isc_task_t *task, isc_event_t *event);
  417. static isc_boolean_t maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked);
  418. static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
  419. isc_result_t reason, badnstype_t badtype);
  420. /*%
  421. * Increment resolver-related statistics counters.
  422. */
  423. static inline void
  424. inc_stats(dns_resolver_t *res, isc_statscounter_t counter) {
  425. if (res->view->resstats != NULL)
  426. isc_stats_increment(res->view->resstats, counter);
  427. }
  428. static isc_result_t
  429. valcreate(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, dns_name_t *name,
  430. dns_rdatatype_t type, dns_rdataset_t *rdataset,
  431. dns_rdataset_t *sigrdataset, unsigned int valoptions,
  432. isc_task_t *task)
  433. {
  434. dns_validator_t *validator = NULL;
  435. dns_valarg_t *valarg;
  436. isc_result_t result;
  437. valarg = isc_mem_get(fctx->mctx, sizeof(*valarg));
  438. if (valarg == NULL)
  439. return (ISC_R_NOMEMORY);
  440. valarg->fctx = fctx;
  441. valarg->addrinfo = addrinfo;
  442. if (!ISC_LIST_EMPTY(fctx->validators))
  443. INSIST((valoptions & DNS_VALIDATOR_DEFER) != 0);
  444. result = dns_validator_create(fctx->res->view, name, type, rdataset,
  445. sigrdataset, fctx->rmessage,
  446. valoptions, task, validated, valarg,
  447. &validator);
  448. if (result == ISC_R_SUCCESS) {
  449. inc_stats(fctx->res, dns_resstatscounter_val);
  450. if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
  451. INSIST(fctx->validator == NULL);
  452. fctx->validator = validator;
  453. }
  454. ISC_LIST_APPEND(fctx->validators, validator, link);
  455. } else
  456. isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
  457. return (result);
  458. }
  459. static isc_boolean_t
  460. rrsig_fromchildzone(fetchctx_t *fctx, dns_rdataset_t *rdataset) {
  461. dns_namereln_t namereln;
  462. dns_rdata_rrsig_t rrsig;
  463. dns_rdata_t rdata = DNS_RDATA_INIT;
  464. int order;
  465. isc_result_t result;
  466. unsigned int labels;
  467. for (result = dns_rdataset_first(rdataset);
  468. result == ISC_R_SUCCESS;
  469. result = dns_rdataset_next(rdataset)) {
  470. dns_rdataset_current(rdataset, &rdata);
  471. result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
  472. RUNTIME_CHECK(result == ISC_R_SUCCESS);
  473. namereln = dns_name_fullcompare(&rrsig.signer, &fctx->domain,
  474. &order, &labels);
  475. if (namereln == dns_namereln_subdomain)
  476. return (ISC_TRUE);
  477. dns_rdata_reset(&rdata);
  478. }
  479. return (ISC_FALSE);
  480. }
  481. static isc_boolean_t
  482. fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
  483. dns_name_t *name;
  484. dns_name_t *domain = &fctx->domain;
  485. dns_rdataset_t *rdataset;
  486. dns_rdatatype_t type;
  487. isc_result_t result;
  488. isc_boolean_t keep_auth = ISC_FALSE;
  489. if (message->rcode == dns_rcode_nxdomain)
  490. return (ISC_FALSE);
  491. /*
  492. * A DS RRset can appear anywhere in a zone, even for a delegation-only
  493. * zone. So a response to an explicit query for this type should be
  494. * excluded from delegation-only fixup.
  495. *
  496. * SOA, NS, and DNSKEY can only exist at a zone apex, so a postive
  497. * response to a query for these types can never violate the
  498. * delegation-only assumption: if the query name is below a
  499. * zone cut, the response should normally be a referral, which should
  500. * be accepted; if the query name is below a zone cut but the server
  501. * happens to have authority for the zone of the query name, the
  502. * response is a (non-referral) answer. But this does not violate
  503. * delegation-only because the query name must be in a different zone
  504. * due to the "apex-only" nature of these types. Note that if the
  505. * remote server happens to have authority for a child zone of a
  506. * delegation-only zone, we may still incorrectly "fix" the response
  507. * with NXDOMAIN for queries for other types. Unfortunately it's
  508. * generally impossible to differentiate this case from violation of
  509. * the delegation-only assumption. Once the resolver learns the
  510. * correct zone cut, possibly via a separate query for an "apex-only"
  511. * type, queries for other types will be resolved correctly.
  512. *
  513. * A query for type ANY will be accepted if it hits an exceptional
  514. * type above in the answer section as it should be from a child
  515. * zone.
  516. *
  517. * Also accept answers with RRSIG records from the child zone.
  518. * Direct queries for RRSIG records should not be answered from
  519. * the parent zone.
  520. */
  521. if (message->counts[DNS_SECTION_ANSWER] != 0 &&
  522. (fctx->type == dns_rdatatype_ns ||
  523. fctx->type == dns_rdatatype_ds ||
  524. fctx->type == dns_rdatatype_soa ||
  525. fctx->type == dns_rdatatype_any ||
  526. fctx->type == dns_rdatatype_rrsig ||
  527. fctx->type == dns_rdatatype_dnskey)) {
  528. result = dns_message_firstname(message, DNS_SECTION_ANSWER);
  529. while (result == ISC_R_SUCCESS) {
  530. name = NULL;
  531. dns_message_currentname(message, DNS_SECTION_ANSWER,
  532. &name);
  533. for (rdataset = ISC_LIST_HEAD(name->list);
  534. rdataset != NULL;
  535. rdataset = ISC_LIST_NEXT(rdataset, link)) {
  536. if (!dns_name_equal(name, &fctx->name))
  537. continue;
  538. type = rdataset->type;
  539. /*
  540. * RRsig from child?
  541. */
  542. if (type == dns_rdatatype_rrsig &&
  543. rrsig_fromchildzone(fctx, rdataset))
  544. return (ISC_FALSE);
  545. /*
  546. * Direct query for apex records or DS.
  547. */
  548. if (fctx->type == type &&
  549. (type == dns_rdatatype_ds ||
  550. type == dns_rdatatype_ns ||
  551. type == dns_rdatatype_soa ||
  552. type == dns_rdatatype_dnskey))
  553. return (ISC_FALSE);
  554. /*
  555. * Indirect query for apex records or DS.
  556. */
  557. if (fctx->type == dns_rdatatype_any &&
  558. (type == dns_rdatatype_ns ||
  559. type == dns_rdatatype_ds ||
  560. type == dns_rdatatype_soa ||
  561. type == dns_rdatatype_dnskey))
  562. return (ISC_FALSE);
  563. }
  564. result = dns_message_nextname(message,
  565. DNS_SECTION_ANSWER);
  566. }
  567. }
  568. /*
  569. * A NODATA response to a DS query?
  570. */
  571. if (fctx->type == dns_rdatatype_ds &&
  572. message->counts[DNS_SECTION_ANSWER] == 0)
  573. return (ISC_FALSE);
  574. /* Look for referral or indication of answer from child zone? */
  575. if (message->counts[DNS_SECTION_AUTHORITY] == 0)
  576. goto munge;
  577. result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
  578. while (result == ISC_R_SUCCESS) {
  579. name = NULL;
  580. dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
  581. for (rdataset = ISC_LIST_HEAD(name->list);
  582. rdataset != NULL;
  583. rdataset = ISC_LIST_NEXT(rdataset, link)) {
  584. type = rdataset->type;
  585. if (type == dns_rdatatype_soa &&
  586. dns_name_equal(name, domain))
  587. keep_auth = ISC_TRUE;
  588. if (type != dns_rdatatype_ns &&
  589. type != dns_rdatatype_soa &&
  590. type != dns_rdatatype_rrsig)
  591. continue;
  592. if (type == dns_rdatatype_rrsig) {
  593. if (rrsig_fromchildzone(fctx, rdataset))
  594. return (ISC_FALSE);
  595. else
  596. continue;
  597. }
  598. /* NS or SOA records. */
  599. if (dns_name_equal(name, domain)) {
  600. /*
  601. * If a query for ANY causes a negative
  602. * response, we can be sure that this is
  603. * an empty node. For other type of queries
  604. * we cannot differentiate an empty node
  605. * from a node that just doesn't have that
  606. * type of record. We only accept the former
  607. * case.
  608. */
  609. if (message->counts[DNS_SECTION_ANSWER] == 0 &&
  610. fctx->type == dns_rdatatype_any)
  611. return (ISC_FALSE);
  612. } else if (dns_name_issubdomain(name, domain)) {
  613. /* Referral or answer from child zone. */
  614. return (ISC_FALSE);
  615. }
  616. }
  617. result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
  618. }
  619. munge:
  620. message->rcode = dns_rcode_nxdomain;
  621. message->counts[DNS_SECTION_ANSWER] = 0;
  622. if (!keep_auth)
  623. message->counts[DNS_SECTION_AUTHORITY] = 0;
  624. message->counts[DNS_SECTION_ADDITIONAL] = 0;
  625. return (ISC_TRUE);
  626. }
  627. static inline isc_result_t
  628. fctx_starttimer(fetchctx_t *fctx) {
  629. /*
  630. * Start the lifetime timer for fctx.
  631. *
  632. * This is also used for stopping the idle timer; in that
  633. * case we must purge events already posted to ensure that
  634. * no further idle events are delivered.
  635. */
  636. return (isc_timer_reset(fctx->timer, isc_timertype_once,
  637. &fctx->expires, NULL, ISC_TRUE));
  638. }
  639. static inline void
  640. fctx_stoptimer(fetchctx_t *fctx) {
  641. isc_result_t result;
  642. /*
  643. * We don't return a result if resetting the timer to inactive fails
  644. * since there's nothing to be done about it. Resetting to inactive
  645. * should never fail anyway, since the code as currently written
  646. * cannot fail in that case.
  647. */
  648. result = isc_timer_reset(fctx->timer, isc_timertype_inactive,
  649. NULL, NULL, ISC_TRUE);
  650. if (result != ISC_R_SUCCESS) {
  651. UNEXPECTED_ERROR(__FILE__, __LINE__,
  652. "isc_timer_reset(): %s",
  653. isc_result_totext(result));
  654. }
  655. }
  656. static inline isc_result_t
  657. fctx_startidletimer(fetchctx_t *fctx, isc_interval_t *interval) {
  658. /*
  659. * Start the idle timer for fctx. The lifetime timer continues
  660. * to be in effect.
  661. */
  662. return (isc_timer_reset(fctx->timer, isc_timertype_once,
  663. &fctx->expires, interval, ISC_FALSE));
  664. }
  665. /*
  666. * Stopping the idle timer is equivalent to calling fctx_starttimer(), but
  667. * we use fctx_stopidletimer for readability in the code below.
  668. */
  669. #define fctx_stopidletimer fctx_starttimer
  670. static inline void
  671. resquery_destroy(resquery_t **queryp) {
  672. resquery_t *query;
  673. REQUIRE(queryp != NULL);
  674. query = *queryp;
  675. REQUIRE(!ISC_LINK_LINKED(query, link));
  676. INSIST(query->tcpsocket == NULL);
  677. query->fctx->nqueries--;
  678. if (SHUTTINGDOWN(query->fctx)) {
  679. dns_resolver_t *res = query->fctx->res;
  680. if (maybe_destroy(query->fctx, ISC_FALSE))
  681. empty_bucket(res);
  682. }
  683. query->magic = 0;
  684. isc_mem_put(query->mctx, query, sizeof(*query));
  685. *queryp = NULL;
  686. }
  687. static void
  688. fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp,
  689. isc_time_t *finish, isc_boolean_t no_response)
  690. {
  691. fetchctx_t *fctx;
  692. resquery_t *query;
  693. unsigned int rtt, rttms;
  694. unsigned int factor;
  695. dns_adbfind_t *find;
  696. dns_adbaddrinfo_t *addrinfo;
  697. isc_socket_t *socket;
  698. query = *queryp;
  699. fctx = query->fctx;
  700. FCTXTRACE("cancelquery");
  701. REQUIRE(!RESQUERY_CANCELED(query));
  702. query->attributes |= RESQUERY_ATTR_CANCELED;
  703. /*
  704. * Should we update the RTT?
  705. */
  706. if (finish != NULL || no_response) {
  707. if (finish != NULL) {
  708. /*
  709. * We have both the start and finish times for this
  710. * packet, so we can compute a real RTT.
  711. */
  712. rtt = (unsigned int)isc_time_microdiff(finish,
  713. &query->start);
  714. factor = DNS_ADB_RTTADJDEFAULT;
  715. rttms = rtt / 1000;
  716. if (rttms < DNS_RESOLVER_QRYRTTCLASS0) {
  717. inc_stats(fctx->res,
  718. dns_resstatscounter_queryrtt0);
  719. } else if (rttms < DNS_RESOLVER_QRYRTTCLASS1) {
  720. inc_stats(fctx->res,
  721. dns_resstatscounter_queryrtt1);
  722. } else if (rttms < DNS_RESOLVER_QRYRTTCLASS2) {
  723. inc_stats(fctx->res,
  724. dns_resstatscounter_queryrtt2);
  725. } else if (rttms < DNS_RESOLVER_QRYRTTCLASS3) {
  726. inc_stats(fctx->res,
  727. dns_resstatscounter_queryrtt3);
  728. } else if (rttms < DNS_RESOLVER_QRYRTTCLASS4) {
  729. inc_stats(fctx->res,
  730. dns_resstatscounter_queryrtt4);
  731. } else {
  732. inc_stats(fctx->res,
  733. dns_resstatscounter_queryrtt5);
  734. }
  735. } else {
  736. /*
  737. * We don't have an RTT for this query. Maybe the
  738. * packet was lost, or maybe this server is very
  739. * slow. We don't know. Increase the RTT.
  740. */
  741. INSIST(no_response);
  742. rtt = query->addrinfo->srtt + 200000;
  743. if (rtt > 10000000)
  744. rtt = 10000000;
  745. /*
  746. * Replace the current RTT with our value.
  747. */
  748. factor = DNS_ADB_RTTADJREPLACE;
  749. }
  750. dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
  751. }
  752. /* Remember that the server has been tried. */
  753. if (!TRIED(query->addrinfo)) {
  754. dns_adb_changeflags(fctx->adb, query->addrinfo,
  755. FCTX_ADDRINFO_TRIED, FCTX_ADDRINFO_TRIED);
  756. }
  757. /*
  758. * Age RTTs of servers not tried.
  759. */
  760. factor = DNS_ADB_RTTADJAGE;
  761. if (finish != NULL)
  762. for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
  763. addrinfo != NULL;
  764. addrinfo = ISC_LIST_NEXT(addrinfo, publink))
  765. if (UNMARKED(addrinfo))
  766. dns_adb_adjustsrtt(fctx->adb, addrinfo,
  767. 0, factor);
  768. if (finish != NULL && TRIEDFIND(fctx))
  769. for (find = ISC_LIST_HEAD(fctx->finds);
  770. find != NULL;
  771. find = ISC_LIST_NEXT(find, publink))
  772. for (addrinfo = ISC_LIST_HEAD(find->list);
  773. addrinfo != NULL;
  774. addrinfo = ISC_LIST_NEXT(addrinfo, publink))
  775. if (UNMARKED(addrinfo))
  776. dns_adb_adjustsrtt(fctx->adb, addrinfo,
  777. 0, factor);
  778. if (finish != NULL && TRIEDALT(fctx)) {
  779. for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
  780. addrinfo != NULL;
  781. addrinfo = ISC_LIST_NEXT(addrinfo, publink))
  782. if (UNMARKED(addrinfo))
  783. dns_adb_adjustsrtt(fctx->adb, addrinfo,
  784. 0, factor);
  785. for (find = ISC_LIST_HEAD(fctx->altfinds);
  786. find != NULL;
  787. find = ISC_LIST_NEXT(find, publink))
  788. for (addrinfo = ISC_LIST_HEAD(find->list);
  789. addrinfo != NULL;
  790. addrinfo = ISC_LIST_NEXT(addrinfo, publink))
  791. if (UNMARKED(addrinfo))
  792. dns_adb_adjustsrtt(fctx->adb, addrinfo,
  793. 0, factor);
  794. }
  795. /*
  796. * Check for any outstanding socket events. If they exist, cancel
  797. * them and let the event handlers finish the cleanup. The resolver
  798. * only needs to worry about managing the connect and send events;
  799. * the dispatcher manages the recv events.
  800. */
  801. if (RESQUERY_CONNECTING(query)) {
  802. /*
  803. * Cancel the connect.
  804. */
  805. if (query->tcpsocket != NULL) {
  806. isc_socket_cancel(query->tcpsocket, NULL,
  807. ISC_SOCKCANCEL_CONNECT);
  808. } else if (query->dispentry != NULL) {
  809. INSIST(query->exclusivesocket);
  810. socket = dns_dispatch_getentrysocket(query->dispentry);
  811. if (socket != NULL)
  812. isc_socket_cancel(socket, NULL,
  813. ISC_SOCKCANCEL_CONNECT);
  814. }
  815. } else if (RESQUERY_SENDING(query)) {
  816. /*
  817. * Cancel the pending send.
  818. */
  819. if (query->exclusivesocket && query->dispentry != NULL)
  820. socket = dns_dispatch_getentrysocket(query->dispentry);
  821. else
  822. socket = dns_dispatch_getsocket(query->dispatch);
  823. if (socket != NULL)
  824. isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND);
  825. }
  826. if (query->dispentry != NULL)
  827. dns_dispatch_removeresponse(&query->dispentry, deventp);
  828. ISC_LIST_UNLINK(fctx->queries, query, link);
  829. if (query->tsig != NULL)
  830. isc_buffer_free(&query->tsig);
  831. if (query->tsigkey != NULL)
  832. dns_tsigkey_detach(&query->tsigkey);
  833. if (query->dispatch != NULL)
  834. dns_dispatch_detach(&query->dispatch);
  835. if (! (RESQUERY_CONNECTING(query) || RESQUERY_SENDING(query)))
  836. /*
  837. * It's safe to destroy the query now.
  838. */
  839. resquery_destroy(&query);
  840. }
  841. static void
  842. fctx_cancelqueries(fetchctx_t *fctx, isc_boolean_t no_response) {
  843. resquery_t *query, *next_query;
  844. FCTXTRACE("cancelqueries");
  845. for (query = ISC_LIST_HEAD(fctx->queries);
  846. query != NULL;
  847. query = next_query) {
  848. next_query = ISC_LIST_NEXT(query, link);
  849. fctx_cancelquery(&query, NULL, NULL, no_response);
  850. }
  851. }
  852. static void
  853. fctx_cleanupfinds(fetchctx_t *fctx) {
  854. dns_adbfind_t *find, *next_find;
  855. REQUIRE(ISC_LIST_EMPTY(fctx->queries));
  856. for (find = ISC_LIST_HEAD(fctx->finds);
  857. find != NULL;
  858. find = next_find) {
  859. next_find = ISC_LIST_NEXT(find, publink);
  860. ISC_LIST_UNLINK(fctx->finds, find, publink);
  861. dns_adb_destroyfind(&find);
  862. }
  863. fctx->find = NULL;
  864. }
  865. static void
  866. fctx_cleanupaltfinds(fetchctx_t *fctx) {
  867. dns_adbfind_t *find, *next_find;
  868. REQUIRE(ISC_LIST_EMPTY(fctx->queries));
  869. for (find = ISC_LIST_HEAD(fctx->altfinds);
  870. find != NULL;
  871. find = next_find) {
  872. next_find = ISC_LIST_NEXT(find, publink);
  873. ISC_LIST_UNLINK(fctx->altfinds, find, publink);
  874. dns_adb_destroyfind(&find);
  875. }
  876. fctx->altfind = NULL;
  877. }
  878. static void
  879. fctx_cleanupforwaddrs(fetchctx_t *fctx) {
  880. dns_adbaddrinfo_t *addr, *next_addr;
  881. REQUIRE(ISC_LIST_EMPTY(fctx->queries));
  882. for (addr = ISC_LIST_HEAD(fctx->forwaddrs);
  883. addr != NULL;
  884. addr = next_addr) {
  885. next_addr = ISC_LIST_NEXT(addr, publink);
  886. ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
  887. dns_adb_freeaddrinfo(fctx->adb, &addr);
  888. }
  889. }
  890. static void
  891. fctx_cleanupaltaddrs(fetchctx_t *fctx) {
  892. dns_adbaddrinfo_t *addr, *next_addr;
  893. REQUIRE(ISC_LIST_EMPTY(fctx->queries));
  894. for (addr = ISC_LIST_HEAD(fctx->altaddrs);
  895. addr != NULL;
  896. addr = next_addr) {
  897. next_addr = ISC_LIST_NEXT(addr, publink);
  898. ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
  899. dns_adb_freeaddrinfo(fctx->adb, &addr);
  900. }
  901. }
  902. static inline void
  903. fctx_stopeverything(fetchctx_t *fctx, isc_boolean_t no_response) {
  904. FCTXTRACE("stopeverything");
  905. fctx_cancelqueries(fctx, no_response);
  906. fctx_cleanupfinds(fctx);
  907. fctx_cleanupaltfinds(fctx);
  908. fctx_cleanupforwaddrs(fctx);
  909. fctx_cleanupaltaddrs(fctx);
  910. fctx_stoptimer(fctx);
  911. }
  912. static inline void
  913. fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
  914. dns_fetchevent_t *event, *next_event;
  915. isc_task_t *task;
  916. unsigned int count = 0;
  917. isc_interval_t i;
  918. isc_boolean_t logit = ISC_FALSE;
  919. isc_time_t now;
  920. unsigned int old_spillat;
  921. unsigned int new_spillat = 0; /* initialized to silence
  922. compiler warnings */
  923. /*
  924. * Caller must be holding the appropriate bucket lock.
  925. */
  926. REQUIRE(fctx->state == fetchstate_done);
  927. FCTXTRACE("sendevents");
  928. /*
  929. * Keep some record of fetch result for logging later (if required).
  930. */
  931. fctx->result = result;
  932. fctx->exitline = line;
  933. TIME_NOW(&now);
  934. fctx->duration = isc_time_microdiff(&now, &fctx->start);
  935. for (event = ISC_LIST_HEAD(fctx->events);
  936. event != NULL;
  937. event = next_event) {
  938. next_event = ISC_LIST_NEXT(event, ev_link);
  939. ISC_LIST_UNLINK(fctx->events, event, ev_link);
  940. task = event->ev_sender;
  941. event->ev_sender = fctx;
  942. event->vresult = fctx->vresult;
  943. if (!HAVE_ANSWER(fctx))
  944. event->result = result;
  945. INSIST(result != ISC_R_SUCCESS ||
  946. dns_rdataset_isassociated(event->rdataset) ||
  947. fctx->type == dns_rdatatype_any ||
  948. fctx->type == dns_rdatatype_rrsig ||
  949. fctx->type == dns_rdatatype_sig);
  950. /*
  951. * Negative results must be indicated in event->result.
  952. */
  953. if (dns_rdataset_isassociated(event->rdataset) &&
  954. NEGATIVE(event->rdataset)) {
  955. INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
  956. event->result == DNS_R_NCACHENXRRSET);
  957. }
  958. isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
  959. count++;
  960. }
  961. if ((fctx->attributes & FCTX_ATTR_HAVEANSWER) != 0 &&
  962. fctx->spilled &&
  963. (count < fctx->res->spillatmax || fctx->res->spillatmax == 0)) {
  964. LOCK(&fctx->res->lock);
  965. if (count == fctx->res->spillat && !fctx->res->exiting) {
  966. old_spillat = fctx->res->spillat;
  967. fctx->res->spillat += 5;
  968. if (fctx->res->spillat > fctx->res->spillatmax &&
  969. fctx->res->spillatmax != 0)
  970. fctx->res->spillat = fctx->res->spillatmax;
  971. new_spillat = fctx->res->spillat;
  972. if (new_spillat != old_spillat) {
  973. logit = ISC_TRUE;
  974. }
  975. isc_interval_set(&i, 20 * 60, 0);
  976. result = isc_timer_reset(fctx->res->spillattimer,
  977. isc_timertype_ticker, NULL,
  978. &i, ISC_TRUE);
  979. RUNTIME_CHECK(result == ISC_R_SUCCESS);
  980. }
  981. UNLOCK(&fctx->res->lock);
  982. if (logit)
  983. isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
  984. DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
  985. "clients-per-query increased to %u",
  986. new_spillat);
  987. }
  988. }
  989. static inline void
  990. log_edns(fetchctx_t *fctx) {
  991. char domainbuf[DNS_NAME_FORMATSIZE];
  992. if (fctx->reason == NULL)
  993. return;
  994. dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
  995. isc_log_write(dns_lctx, DNS_LOGCATEGORY_EDNS_DISABLED,
  996. DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
  997. "success resolving '%s' (in '%s'?) after %s",
  998. fctx->info, domainbuf, fctx->reason);
  999. fctx->reason = NULL;
  1000. }
  1001. static void
  1002. fctx_done(fetchctx_t *fctx, isc_result_t result, int line) {
  1003. dns_resolver_t *res;
  1004. isc_boolean_t no_response;
  1005. REQUIRE(line >= 0);
  1006. FCTXTRACE("done");
  1007. res = fctx->res;
  1008. if (result == ISC_R_SUCCESS) {
  1009. /*%
  1010. * Log any deferred EDNS timeout messages.
  1011. */
  1012. log_edns(fctx);
  1013. no_response = ISC_TRUE;
  1014. } else
  1015. no_response = ISC_FALSE;
  1016. fctx->reason = NULL;
  1017. fctx_stopeverything(fctx, no_response);
  1018. LOCK(&res->buckets[fctx->bucketnum].lock);
  1019. fctx->state = fetchstate_done;
  1020. fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
  1021. fctx_sendevents(fctx, result, line);
  1022. UNLOCK(&res->buckets[fctx->bucketnum].lock);
  1023. }
  1024. static void
  1025. process_sendevent(resquery_t *query, isc_event_t *event) {
  1026. isc_socketevent_t *sevent = (isc_socketevent_t *)event;
  1027. isc_boolean_t retry = ISC_FALSE;
  1028. isc_result_t result;
  1029. fetchctx_t *fctx;
  1030. fctx = query->fctx;
  1031. if (RESQUERY_CANCELED(query)) {
  1032. if (query->sends == 0 && query->connects == 0) {
  1033. /*
  1034. * This query was canceled while the
  1035. * isc_socket_sendto/connect() was in progress.
  1036. */
  1037. if (query->tcpsocket != NULL)
  1038. isc_socket_detach(&query->tcpsocket);
  1039. resquery_destroy(&query);
  1040. }
  1041. } else {
  1042. switch (sevent->result) {
  1043. case ISC_R_SUCCESS:
  1044. break;
  1045. case ISC_R_HOSTUNREACH:
  1046. case ISC_R_NETUNREACH:
  1047. case ISC_R_NOPERM:
  1048. case ISC_R_ADDRNOTAVAIL:
  1049. case ISC_R_CONNREFUSED:
  1050. /*
  1051. * No route to remote.
  1052. */
  1053. add_bad(fctx, query->addrinfo, sevent->result,
  1054. badns_unreachable);
  1055. fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
  1056. retry = ISC_TRUE;
  1057. break;
  1058. default:
  1059. fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
  1060. break;
  1061. }
  1062. }
  1063. isc_event_free(&event);
  1064. if (retry) {
  1065. /*
  1066. * Behave as if the idle timer has expired. For TCP
  1067. * this may not actually reflect the latest timer.
  1068. */
  1069. fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
  1070. result = fctx_stopidletimer(fctx);
  1071. if (result != ISC_R_SUCCESS)
  1072. fctx_done(fctx, result, __LINE__);
  1073. else
  1074. fctx_try(fctx, ISC_TRUE, ISC_FALSE);
  1075. }
  1076. }
  1077. static void
  1078. resquery_udpconnected(isc_task_t *task, isc_event_t *event) {
  1079. resquery_t *query = event->ev_arg;
  1080. REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
  1081. QTRACE("udpconnected");
  1082. UNUSED(task);
  1083. INSIST(RESQUERY_CONNECTING(query));
  1084. query->connects--;
  1085. process_sendevent(query, event);
  1086. }
  1087. static void
  1088. resquery_senddone(isc_task_t *task, isc_event_t *event) {
  1089. resquery_t *query = event->ev_arg;
  1090. REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE);
  1091. QTRACE("senddone");
  1092. /*
  1093. * XXXRTH
  1094. *
  1095. * Currently we don't wait for the senddone event before retrying
  1096. * a query. This means that if we get really behind, we may end
  1097. * up doing extra work!
  1098. */
  1099. UNUSED(task);
  1100. INSIST(RESQUERY_SENDING(query));
  1101. query->sends--;
  1102. process_sendevent(query, event);
  1103. }
  1104. static inline isc_result_t
  1105. fctx_addopt(dns_message_t *message, unsigned int version,
  1106. isc_uint16_t udpsize, isc_boolean_t request_nsid)
  1107. {
  1108. dns_rdataset_t *rdataset;
  1109. dns_rdatalist_t *rdatalist;
  1110. dns_rdata_t *rdata;
  1111. isc_result_t result;
  1112. rdatalist = NULL;
  1113. result = dns_message_gettemprdatalist(message, &rdatalist);
  1114. if (result != ISC_R_SUCCESS)
  1115. return (result);
  1116. rdata = NULL;
  1117. result = dns_message_gettemprdata(message, &rdata);
  1118. if (result != ISC_R_SUCCESS)
  1119. return (result);
  1120. rdataset = NULL;
  1121. result = dns_message_gettemprdataset(message, &rdataset);
  1122. if (result != ISC_R_SUCCESS)
  1123. return (result);
  1124. dns_rdataset_init(rdataset);
  1125. rdatalist->type = dns_rdatatype_opt;
  1126. rdatalist->covers = 0;
  1127. /*
  1128. * Set Maximum UDP buffer size.
  1129. */
  1130. rdatalist->rdclass = udpsize;
  1131. /*
  1132. * Set EXTENDED-RCODE and Z to 0, DO to 1.
  1133. */
  1134. rdatalist->ttl = (version << 16);
  1135. rdatalist->ttl |= DNS_MESSAGEEXTFLAG_DO;
  1136. /*
  1137. * Set EDNS options if applicable
  1138. */
  1139. if (request_nsid) {
  1140. /* Send empty NSID option (RFC5001) */
  1141. unsigned char data[4];
  1142. isc_buffer_t buf;
  1143. isc_buffer_init(&buf, data, sizeof(data));
  1144. isc_buffer_putuint16(&buf, DNS_OPT_NSID);
  1145. isc_buffer_putuint16(&buf, 0);
  1146. rdata->data = data;
  1147. rdata->length = sizeof(data);
  1148. } else {
  1149. rdata->data = NULL;
  1150. rdata->length = 0;
  1151. }
  1152. rdata->rdclass = rdatalist->rdclass;
  1153. rdata->type = rdatalist->type;
  1154. rdata->flags = 0;
  1155. ISC_LIST_INIT(rdatalist->rdata);
  1156. ISC_LIST_APPEND(rdatalist->rdata, rdata, link);
  1157. RUNTIME_CHECK(dns_rdatalist_tordataset(rdatalist, rdataset) == ISC_R_SUCCESS);
  1158. return (dns_message_setopt(message, rdataset));
  1159. }
  1160. static inline void
  1161. fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
  1162. unsigned int seconds;
  1163. unsigned int us;
  1164. /*
  1165. * We retry every .8 seconds the first two times through the address
  1166. * list, and then we do exponential back-off.
  1167. */
  1168. if (fctx->restarts < 3)
  1169. us = 800000;
  1170. else
  1171. us = (800000 << (fctx->restarts - 2));
  1172. /*
  1173. * Double the round-trip time.
  1174. */
  1175. rtt *= 2;
  1176. /*
  1177. * Always wait for at least the doubled round-trip time.
  1178. */
  1179. if (us < rtt)
  1180. us = rtt;
  1181. /*
  1182. * But don't ever wait for more than 10 seconds.
  1183. */
  1184. if (us > 10000000)
  1185. us = 10000000;
  1186. seconds = us / 1000000;
  1187. us -= seconds * 1000000;
  1188. isc_interval_set(&fctx->interval, seconds, us * 1000);
  1189. }
  1190. static isc_result_t
  1191. fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
  1192. unsigned int options)
  1193. {
  1194. dns_resolver_t *res;
  1195. isc_task_t *task;
  1196. isc_result_t result;
  1197. resquery_t *query;
  1198. isc_sockaddr_t addr;
  1199. isc_boolean_t have_addr = ISC_FALSE;
  1200. unsigned int srtt;
  1201. FCTXTRACE("query");
  1202. res = fctx->res;
  1203. task = res->buckets[fctx->bucketnum].task;
  1204. srtt = addrinfo->srtt;
  1205. if (ISFORWARDER(addrinfo) && srtt < 1000000)
  1206. srtt = 1000000;
  1207. fctx_setretryinterval(fctx, srtt);
  1208. result = fctx_startidletimer(fctx, &fctx->interval);
  1209. if (result != ISC_R_SUCCESS)
  1210. return (result);
  1211. INSIST(ISC_LIST_EMPTY(fctx->validators));
  1212. dns_message_reset(fctx->rmessage, DNS_MESSAGE_INTENTPARSE);
  1213. query = isc_mem_get(fctx->mctx, sizeof(*query));
  1214. if (query == NULL) {
  1215. result = ISC_R_NOMEMORY;
  1216. goto stop_idle_timer;
  1217. }
  1218. query->mctx = fctx->mctx;
  1219. query->options = options;
  1220. query->attributes = 0;
  1221. query->sends = 0;
  1222. query->connects = 0;
  1223. /*
  1224. * Note that the caller MUST guarantee that 'addrinfo' will remain
  1225. * valid until this query is canceled.
  1226. */
  1227. query->addrinfo = addrinfo;
  1228. TIME_NOW(&query->start);
  1229. /*
  1230. * If this is a TCP query, then we need to make a socket and
  1231. * a dispatch for it here. Otherwise we use the resolver's
  1232. * shared dispatch.
  1233. */
  1234. query->dispatchmgr = res->dispatchmgr;
  1235. query->dispatch = NULL;
  1236. query->exclusivesocket = ISC_FALSE;
  1237. query->tcpsocket = NULL;
  1238. if (res->view->peers != NULL) {
  1239. dns_peer_t *peer = NULL;
  1240. isc_netaddr_t dstip;
  1241. isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
  1242. result = dns_peerlist_peerbyaddr(res->view->peers,
  1243. &dstip, &peer);
  1244. if (result == ISC_R_SUCCESS) {
  1245. result = dns_peer_getquerysource(peer, &addr);
  1246. if (result == ISC_R_SUCCESS)
  1247. have_addr = ISC_TRUE;
  1248. }
  1249. }
  1250. if ((query->options & DNS_FETCHOPT_TCP) != 0) {
  1251. int pf;
  1252. pf = isc_sockaddr_pf(&addrinfo->sockaddr);
  1253. if (!have_addr) {
  1254. switch (pf) {
  1255. case PF_INET:
  1256. result =
  1257. dns_dispatch_getlocaladdress(res->dispatchv4,
  1258. &addr);
  1259. break;
  1260. case PF_INET6:
  1261. result =
  1262. dns_dispatch_getlocaladdress(res->dispatchv6,
  1263. &addr);
  1264. break;
  1265. default:
  1266. result = ISC_R_NOTIMPLEMENTED;
  1267. break;
  1268. }
  1269. if (result != ISC_R_SUCCESS)
  1270. goto cleanup_query;
  1271. }
  1272. isc_sockaddr_setport(&addr, 0);
  1273. result = isc_socket_create(res->socketmgr, pf,
  1274. isc_sockettype_tcp,
  1275. &query->tcpsocket);
  1276. if (result != ISC_R_SUCCESS)
  1277. goto cleanup_query;
  1278. #ifndef BROKEN_TCP_BIND_BEFORE_CONNECT
  1279. result = isc_socket_bind(query->tcpsocket, &addr, 0);
  1280. if (result != ISC_R_SUCCESS)
  1281. goto cleanup_socket;
  1282. #endif
  1283. /*
  1284. * A dispatch will be created once the connect succeeds.
  1285. */
  1286. } else {
  1287. if (have_addr) {
  1288. unsigned int attrs, attrmask;
  1289. attrs = DNS_DISPATCHATTR_UDP;
  1290. switch (isc_sockaddr_pf(&addr)) {
  1291. case AF_INET:
  1292. attrs |= DNS_DISPATCHATTR_IPV4;
  1293. break;
  1294. case AF_INET6:
  1295. attrs |= DNS_DISPATCHATTR_IPV6;
  1296. break;
  1297. default:
  1298. result = ISC_R_NOTIMPLEMENTED;
  1299. goto cleanup_query;
  1300. }
  1301. attrmask = DNS_DISPATCHATTR_UDP;
  1302. attrmask |= DNS_DISPATCHATTR_TCP;
  1303. attrmask |= DNS_DISPATCHATTR_IPV4;
  1304. attrmask |= DNS_DISPATCHATTR_IPV6;
  1305. result = dns_dispatch_getudp(res->dispatchmgr,
  1306. res->socketmgr,
  1307. res->taskmgr, &addr,
  1308. 4096, 1000, 32768, 16411,
  1309. 16433, attrs, attrmask,
  1310. &query->dispatch);
  1311. if (result != ISC_R_SUCCESS)
  1312. goto cleanup_query;
  1313. } else {
  1314. switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
  1315. case PF_INET:
  1316. dns_dispatch_attach(res->dispatchv4,
  1317. &query->dispatch);
  1318. query->exclusivesocket = res->exclusivev4;
  1319. break;
  1320. case PF_INET6:
  1321. dns_dispatch_attach(res->dispatchv6,
  1322. &query->dispatch);
  1323. query->exclusivesocket = res->exclusivev6;
  1324. break;
  1325. default:
  1326. result = ISC_R_NOTIMPLEMENTED;
  1327. goto cleanup_query;
  1328. }
  1329. }
  1330. /*
  1331. * We should always have a valid dispatcher here. If we
  1332. * don't support a protocol family, then its dispatcher
  1333. * will be NULL, but we shouldn't be finding addresses for
  1334. * protocol types we don't support, so the dispatcher
  1335. * we found should never be NULL.
  1336. */
  1337. INSIST(query->dispatch != NULL);
  1338. }
  1339. query->dispentry = NULL;
  1340. query->fctx = fctx;
  1341. query->tsig = NULL;
  1342. query->tsigkey = NULL;
  1343. ISC_LINK_INIT(query, link);
  1344. query->magic = QUERY_MAGIC;
  1345. if ((query->options & DNS_FETCHOPT_TCP) != 0) {
  1346. /*
  1347. * Connect to the remote server.
  1348. *
  1349. * XXXRTH Should we attach to the socket?
  1350. */
  1351. result = isc_socket_connect(query->tcpsocket,
  1352. &addrinfo->sockaddr, task,
  1353. resquery_connected, query);
  1354. if (result != ISC_R_SUCCESS)
  1355. goto cleanup_socket;
  1356. query->connects++;
  1357. QTRACE("connecting via TCP");
  1358. } else {
  1359. result = resquery_send(query);
  1360. if (result != ISC_R_SUCCESS)
  1361. goto cleanup_dispatch;
  1362. }
  1363. fctx->querysent++;
  1364. ISC_LIST_APPEND(fctx->queries, query, link);
  1365. query->fctx->nqueries++;
  1366. if (isc_sockaddr_pf(&addrinfo->sockaddr) == PF_INET)
  1367. inc_stats(res, dns_resstatscounter_queryv4);
  1368. else
  1369. inc_stats(res, dns_resstatscounter_queryv6);
  1370. if (res->view->resquerystats != NULL)
  1371. dns_rdatatypestats_increment(res->view->resquerystats,
  1372. fctx->type);
  1373. return (ISC_R_SUCCESS);
  1374. cleanup_socket:
  1375. isc_socket_detach(&query->tcpsocket);
  1376. cleanup_dispatch:
  1377. if (query->dispatch != NULL)
  1378. dns_dispatch_detach(&query->dispatch);
  1379. cleanup_query:
  1380. if (query->connects == 0) {
  1381. query->magic = 0;
  1382. isc_mem_put(fctx->mctx, query, sizeof(*query));
  1383. }
  1384. stop_idle_timer:
  1385. RUNTIME_CHECK(fctx_stopidletimer(fctx) == ISC_R_SUCCESS);
  1386. return (result);
  1387. }
  1388. static isc_boolean_t
  1389. bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
  1390. isc_sockaddr_t *sa;
  1391. for (sa = ISC_LIST_HEAD(fctx->bad_edns);
  1392. sa != NULL;
  1393. sa = ISC_LIST_NEXT(sa, link)) {
  1394. if (isc_sockaddr_equal(sa, address))
  1395. return (ISC_TRUE);
  1396. }
  1397. return (ISC_FALSE);
  1398. }
  1399. static void
  1400. add_bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
  1401. isc_sockaddr_t *sa;
  1402. if (bad_edns(fctx, address))
  1403. return;
  1404. sa = isc_mem_get(fctx->mctx, sizeof(*sa));
  1405. if (sa == NULL)
  1406. return;
  1407. *sa = *address;
  1408. ISC_LIST_INITANDAPPEND(fctx->bad_edns, sa, link);
  1409. }
  1410. static isc_boolean_t
  1411. triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
  1412. isc_sockaddr_t *sa;
  1413. for (sa = ISC_LIST_HEAD(fctx->edns);
  1414. sa != NULL;
  1415. sa = ISC_LIST_NEXT(sa, link)) {
  1416. if (isc_sockaddr_equal(sa, address))
  1417. return (ISC_TRUE);
  1418. }
  1419. return (ISC_FALSE);
  1420. }
  1421. static void
  1422. add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
  1423. isc_sockaddr_t *sa;
  1424. if (triededns(fctx, address))
  1425. return;
  1426. sa = isc_mem_get(fctx->mctx, sizeof(*sa));
  1427. if (sa == NULL)
  1428. return;
  1429. *sa = *address;
  1430. ISC_LIST_INITANDAPPEND(fctx->edns, sa, link);
  1431. }
  1432. static isc_boolean_t
  1433. triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
  1434. isc_sockaddr_t *sa;
  1435. for (sa = ISC_LIST_HEAD(fctx->edns512);
  1436. sa != NULL;
  1437. sa = ISC_LIST_NEXT(sa, link)) {
  1438. if (isc_sockaddr_equal(sa, address))
  1439. return (ISC_TRUE);
  1440. }
  1441. return (ISC_FALSE);
  1442. }
  1443. static void
  1444. add_triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
  1445. isc_sockaddr_t *sa;
  1446. if (triededns512(fctx, address))
  1447. return;
  1448. sa = isc_mem_get(fctx->mctx, sizeof(*sa));
  1449. if (sa == NULL)
  1450. return;
  1451. *sa = *address;
  1452. ISC_LIST_INITANDAPPEND(fctx->edns512, sa, link);
  1453. }
  1454. static isc_result_t
  1455. resquery_send(resquery_t *query) {
  1456. fetchctx_t *fctx;
  1457. isc_result_t result;
  1458. dns_name_t *qname = NULL;
  1459. dns_rdataset_t *qrdataset = NULL;
  1460. isc_region_t r;
  1461. dns_resolver_t *res;
  1462. isc_task_t *task;
  1463. isc_socket_t *socket;
  1464. isc_buffer_t tcpbuffer;
  1465. isc_sockaddr_t *address;
  1466. isc_buffer_t *buffer;
  1467. isc_netaddr_t ipaddr;
  1468. dns_tsigkey_t *tsigkey = NULL;
  1469. dns_peer_t *peer = NULL;
  1470. isc_boolean_t useedns;
  1471. dns_compress_t cctx;
  1472. isc_boolean_t cleanup_cctx = ISC_FALSE;
  1473. isc_boolean_t secure_domain;
  1474. isc_boolean_t connecting = ISC_FALSE;
  1475. fctx = query->fctx;
  1476. QTRACE("send");
  1477. res = fctx->res;
  1478. task = res->buckets[fctx->bucketnum].task;
  1479. address = NULL;
  1480. if ((query->options & DNS_FETCHOPT_TCP) != 0) {
  1481. /*
  1482. * Reserve space for the TCP message length.
  1483. */
  1484. isc_buffer_init(&tcpbuffer, query->data, sizeof(query->data));
  1485. isc_buffer_init(&query->buffer, query->data + 2,
  1486. sizeof(query->data) - 2);
  1487. buffer = &tcpbuffer;
  1488. } else {
  1489. isc_buffer_init(&query->buffer, query->data,
  1490. sizeof(query->data));
  1491. buffer = &query->buffer;
  1492. }
  1493. result = dns_message_gettempname(fctx->qmessage, &qname);
  1494. if (result != ISC_R_SUCCESS)
  1495. goto cleanup_temps;
  1496. result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
  1497. if (result != ISC_R_SUCCESS)
  1498. goto cleanup_temps;
  1499. /*
  1500. * Get a query id from the dispatch.
  1501. */
  1502. result = dns_dispatch_addresponse2(query->dispatch,
  1503. &query->addrinfo->sockaddr,
  1504. task,
  1505. resquery_response,
  1506. query,
  1507. &query->id,
  1508. &query->dispentry,
  1509. res->socketmgr);
  1510. if (result != ISC_R_SUCCESS)
  1511. goto cleanup_temps;
  1512. fctx->qmessage->opcode = dns_opcode_query;
  1513. /*
  1514. * Set up question.
  1515. */
  1516. dns_name_init(qname, NULL);
  1517. dns_name_clone(&fctx->name, qname);
  1518. dns_rdataset_init(qrdataset);
  1519. dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
  1520. ISC_LIST_APPEND(qname->list, qrdataset, link);
  1521. dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
  1522. qname = NULL;
  1523. qrdataset = NULL;
  1524. /*
  1525. * Set RD if the client has requested that we do a recursive query,
  1526. * or if we're sending to a forwarder.
  1527. */
  1528. if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
  1529. ISFORWARDER(query->addrinfo))
  1530. fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
  1531. /*
  1532. * Set CD if the client says don't validate or the question is
  1533. * under a secure entry point.
  1534. */
  1535. if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
  1536. fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
  1537. } else if (res->view->enablevalidation) {
  1538. result = dns_view_issecuredomain(res->view, &fctx->name,
  1539. &secure_domain);
  1540. if (result != ISC_R_SUCCESS)
  1541. secure_domain = ISC_FALSE;
  1542. if (res->view->dlv != NULL)
  1543. secure_domain = ISC_TRUE;
  1544. if (secure_domain)
  1545. fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
  1546. }
  1547. /*
  1548. * We don't have to set opcode because it defaults to query.
  1549. */
  1550. fctx->qmessage->id = query->id;
  1551. /*
  1552. * Convert the question to wire format.
  1553. */
  1554. result = dns_compress_init(&cctx, -1, fctx->res->mctx);
  1555. if (result != ISC_R_SUCCESS)
  1556. goto cleanup_message;
  1557. cleanup_cctx = ISC_TRUE;
  1558. result = dns_message_renderbegin(fctx->qmessage, &cctx,
  1559. &query->buffer);
  1560. if (result != ISC_R_SUCCESS)
  1561. goto cleanup_message;
  1562. result = dns_message_rendersection(fctx->qmessage,
  1563. DNS_SECTION_QUESTION, 0);
  1564. if (result != ISC_R_SUCCESS)
  1565. goto cleanup_message;
  1566. peer = NULL;
  1567. isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
  1568. (void) dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
  1569. /*
  1570. * The ADB does not know about servers with "edns no". Check this,
  1571. * and then inform the ADB for future use.
  1572. */
  1573. if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0 &&
  1574. peer != NULL &&
  1575. dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
  1576. !useedns)
  1577. {
  1578. query->options |= DNS_FETCHOPT_NOEDNS0;
  1579. dns_adb_changeflags(fctx->adb, query->addrinfo,
  1580. DNS_FETCHOPT_NOEDNS0,
  1581. DNS_FETCHOPT_NOEDNS0);
  1582. }
  1583. /* Sync NOEDNS0 flag in addrinfo->flags and options now. */
  1584. if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) != 0)
  1585. query->options |= DNS_FETCHOPT_NOEDNS0;
  1586. /*
  1587. * Handle timeouts by reducing the UDP response size to 512 bytes
  1588. * then if that doesn't work disabling EDNS (includes DO) and CD.
  1589. *
  1590. * These timeout can be due to:
  1591. * * broken nameservers that don't respond to EDNS queries.
  1592. * * broken/misconfigured firewalls and NAT implementations
  1593. * that don't handle IP fragmentation.
  1594. * * broken/misconfigured firewalls that don't handle responses
  1595. * greater than 512 bytes.
  1596. * * broken/misconfigured firewalls that don't handle EDNS, DO
  1597. * or CD.
  1598. * * packet loss / link outage.
  1599. */
  1600. if (fctx->timeout) {
  1601. if ((triededns512(fctx, &query->addrinfo->sockaddr) ||
  1602. fctx->timeouts >= (MAX_EDNS0_TIMEOUTS * 2)) &&
  1603. (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
  1604. query->options |= DNS_FETCHOPT_NOEDNS0;
  1605. fctx->reason = "disabling EDNS";
  1606. } else if ((triededns(fctx, &query->addrinfo->sockaddr) ||
  1607. fctx->timeouts >= MAX_EDNS0_TIMEOUTS)