PageRenderTime 189ms CodeModel.GetById 59ms app.highlight 104ms RepoModel.GetById 1ms app.codeStats 2ms

/contrib/bind9/lib/dns/resolver.c

https://bitbucket.org/freebsd/freebsd-head/
C | 8823 lines | 6299 code | 960 blank | 1564 comment | 1964 complexity | 651087635c1e860a3303d6aab40e83f7 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * Copyright (C) 2004-2012  Internet Systems Consortium, Inc. ("ISC")
   3 * Copyright (C) 1999-2003  Internet Software Consortium.
   4 *
   5 * Permission to use, copy, modify, and/or distribute this software for any
   6 * purpose with or without fee is hereby granted, provided that the above
   7 * copyright notice and this permission notice appear in all copies.
   8 *
   9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  15 * PERFORMANCE OF THIS SOFTWARE.
  16 */
  17
  18/* $Id$ */
  19
  20/*! \file */
  21
  22#include <config.h>
  23
  24#include <isc/platform.h>
  25#include <isc/print.h>
  26#include <isc/string.h>
  27#include <isc/random.h>
  28#include <isc/task.h>
  29#include <isc/stats.h>
  30#include <isc/timer.h>
  31#include <isc/util.h>
  32
  33#include <dns/acl.h>
  34#include <dns/adb.h>
  35#include <dns/cache.h>
  36#include <dns/db.h>
  37#include <dns/dispatch.h>
  38#include <dns/ds.h>
  39#include <dns/events.h>
  40#include <dns/forward.h>
  41#include <dns/keytable.h>
  42#include <dns/log.h>
  43#include <dns/message.h>
  44#include <dns/ncache.h>
  45#include <dns/opcode.h>
  46#include <dns/peer.h>
  47#include <dns/rbt.h>
  48#include <dns/rcode.h>
  49#include <dns/rdata.h>
  50#include <dns/rdataclass.h>
  51#include <dns/rdatalist.h>
  52#include <dns/rdataset.h>
  53#include <dns/rdatastruct.h>
  54#include <dns/rdatatype.h>
  55#include <dns/resolver.h>
  56#include <dns/result.h>
  57#include <dns/rootns.h>
  58#include <dns/stats.h>
  59#include <dns/tsig.h>
  60#include <dns/validator.h>
  61
  62#define DNS_RESOLVER_TRACE
  63#ifdef DNS_RESOLVER_TRACE
  64#define RTRACE(m)       isc_log_write(dns_lctx, \
  65				      DNS_LOGCATEGORY_RESOLVER, \
  66				      DNS_LOGMODULE_RESOLVER, \
  67				      ISC_LOG_DEBUG(3), \
  68				      "res %p: %s", res, (m))
  69#define RRTRACE(r, m)   isc_log_write(dns_lctx, \
  70				      DNS_LOGCATEGORY_RESOLVER, \
  71				      DNS_LOGMODULE_RESOLVER, \
  72				      ISC_LOG_DEBUG(3), \
  73				      "res %p: %s", (r), (m))
  74#define FCTXTRACE(m)    isc_log_write(dns_lctx, \
  75				      DNS_LOGCATEGORY_RESOLVER, \
  76				      DNS_LOGMODULE_RESOLVER, \
  77				      ISC_LOG_DEBUG(3), \
  78				      "fctx %p(%s'): %s", fctx, fctx->info, (m))
  79#define FCTXTRACE2(m1, m2) \
  80			isc_log_write(dns_lctx, \
  81				      DNS_LOGCATEGORY_RESOLVER, \
  82				      DNS_LOGMODULE_RESOLVER, \
  83				      ISC_LOG_DEBUG(3), \
  84				      "fctx %p(%s): %s %s", \
  85				      fctx, fctx->info, (m1), (m2))
  86#define FTRACE(m)       isc_log_write(dns_lctx, \
  87				      DNS_LOGCATEGORY_RESOLVER, \
  88				      DNS_LOGMODULE_RESOLVER, \
  89				      ISC_LOG_DEBUG(3), \
  90				      "fetch %p (fctx %p(%s)): %s", \
  91				      fetch, fetch->private, \
  92				      fetch->private->info, (m))
  93#define QTRACE(m)       isc_log_write(dns_lctx, \
  94				      DNS_LOGCATEGORY_RESOLVER, \
  95				      DNS_LOGMODULE_RESOLVER, \
  96				      ISC_LOG_DEBUG(3), \
  97				      "resquery %p (fctx %p(%s)): %s", \
  98				      query, query->fctx, \
  99				      query->fctx->info, (m))
 100#else
 101#define RTRACE(m)
 102#define RRTRACE(r, m)
 103#define FCTXTRACE(m)
 104#define FTRACE(m)
 105#define QTRACE(m)
 106#endif
 107
 108#ifndef DEFAULT_QUERY_TIMEOUT
 109#define DEFAULT_QUERY_TIMEOUT 30  /* The default time in seconds for the whole query to live. */
 110#endif
 111
 112#ifndef MAXIMUM_QUERY_TIMEOUT
 113#define MAXIMUM_QUERY_TIMEOUT 30 /* The maximum time in seconds for the whole query to live. */
 114#endif
 115
 116/*%
 117 * Maximum EDNS0 input packet size.
 118 */
 119#define RECV_BUFFER_SIZE                4096            /* XXXRTH  Constant. */
 120
 121/*%
 122 * This defines the maximum number of timeouts we will permit before we
 123 * disable EDNS0 on the query.
 124 */
 125#define MAX_EDNS0_TIMEOUTS      3
 126
 127typedef struct fetchctx fetchctx_t;
 128
 129typedef struct query {
 130	/* Locked by task event serialization. */
 131	unsigned int			magic;
 132	fetchctx_t *			fctx;
 133	isc_mem_t *			mctx;
 134	dns_dispatchmgr_t *		dispatchmgr;
 135	dns_dispatch_t *		dispatch;
 136	isc_boolean_t			exclusivesocket;
 137	dns_adbaddrinfo_t *		addrinfo;
 138	isc_socket_t *			tcpsocket;
 139	isc_time_t			start;
 140	dns_messageid_t			id;
 141	dns_dispentry_t *		dispentry;
 142	ISC_LINK(struct query)		link;
 143	isc_buffer_t			buffer;
 144	isc_buffer_t			*tsig;
 145	dns_tsigkey_t			*tsigkey;
 146	unsigned int			options;
 147	unsigned int			attributes;
 148	unsigned int			sends;
 149	unsigned int			connects;
 150	unsigned char			data[512];
 151} resquery_t;
 152
 153#define QUERY_MAGIC			ISC_MAGIC('Q', '!', '!', '!')
 154#define VALID_QUERY(query)		ISC_MAGIC_VALID(query, QUERY_MAGIC)
 155
 156#define RESQUERY_ATTR_CANCELED          0x02
 157
 158#define RESQUERY_CONNECTING(q)          ((q)->connects > 0)
 159#define RESQUERY_CANCELED(q)            (((q)->attributes & \
 160					  RESQUERY_ATTR_CANCELED) != 0)
 161#define RESQUERY_SENDING(q)             ((q)->sends > 0)
 162
 163typedef enum {
 164	fetchstate_init = 0,            /*%< Start event has not run yet. */
 165	fetchstate_active,
 166	fetchstate_done                 /*%< FETCHDONE events posted. */
 167} fetchstate;
 168
 169typedef enum {
 170	badns_unreachable = 0,
 171	badns_response,
 172	badns_validation
 173} badnstype_t;
 174
 175struct fetchctx {
 176	/*% Not locked. */
 177	unsigned int			magic;
 178	dns_resolver_t *		res;
 179	dns_name_t			name;
 180	dns_rdatatype_t			type;
 181	unsigned int			options;
 182	unsigned int			bucketnum;
 183	char *				info;
 184	isc_mem_t *			mctx;
 185
 186	/*% Locked by appropriate bucket lock. */
 187	fetchstate			state;
 188	isc_boolean_t			want_shutdown;
 189	isc_boolean_t			cloned;
 190	isc_boolean_t			spilled;
 191	unsigned int			references;
 192	isc_event_t			control_event;
 193	ISC_LINK(struct fetchctx)       link;
 194	ISC_LIST(dns_fetchevent_t)      events;
 195	/*% Locked by task event serialization. */
 196	dns_name_t			domain;
 197	dns_rdataset_t			nameservers;
 198	unsigned int			attributes;
 199	isc_timer_t *			timer;
 200	isc_time_t			expires;
 201	isc_interval_t			interval;
 202	dns_message_t *			qmessage;
 203	dns_message_t *			rmessage;
 204	ISC_LIST(resquery_t)		queries;
 205	dns_adbfindlist_t		finds;
 206	dns_adbfind_t *			find;
 207	dns_adbfindlist_t		altfinds;
 208	dns_adbfind_t *			altfind;
 209	dns_adbaddrinfolist_t		forwaddrs;
 210	dns_adbaddrinfolist_t		altaddrs;
 211	isc_sockaddrlist_t		forwarders;
 212	dns_fwdpolicy_t			fwdpolicy;
 213	isc_sockaddrlist_t		bad;
 214	isc_sockaddrlist_t		edns;
 215	isc_sockaddrlist_t		edns512;
 216	isc_sockaddrlist_t		bad_edns;
 217	dns_validator_t			*validator;
 218	ISC_LIST(dns_validator_t)       validators;
 219	dns_db_t *			cache;
 220	dns_adb_t *			adb;
 221	isc_boolean_t			ns_ttl_ok;
 222	isc_uint32_t			ns_ttl;
 223
 224	/*%
 225	 * The number of events we're waiting for.
 226	 */
 227	unsigned int			pending;
 228
 229	/*%
 230	 * The number of times we've "restarted" the current
 231	 * nameserver set.  This acts as a failsafe to prevent
 232	 * us from pounding constantly on a particular set of
 233	 * servers that, for whatever reason, are not giving
 234	 * us useful responses, but are responding in such a
 235	 * way that they are not marked "bad".
 236	 */
 237	unsigned int			restarts;
 238
 239	/*%
 240	 * The number of timeouts that have occurred since we
 241	 * last successfully received a response packet.  This
 242	 * is used for EDNS0 black hole detection.
 243	 */
 244	unsigned int			timeouts;
 245
 246	/*%
 247	 * Look aside state for DS lookups.
 248	 */
 249	dns_name_t 			nsname;
 250	dns_fetch_t *			nsfetch;
 251	dns_rdataset_t			nsrrset;
 252
 253	/*%
 254	 * Number of queries that reference this context.
 255	 */
 256	unsigned int			nqueries;
 257
 258	/*%
 259	 * The reason to print when logging a successful
 260	 * response to a query.
 261	 */
 262	const char *			reason;
 263
 264	/*%
 265	 * Random numbers to use for mixing up server addresses.
 266	 */
 267	isc_uint32_t                    rand_buf;
 268	isc_uint32_t                    rand_bits;
 269
 270	/*%
 271	 * Fetch-local statistics for detailed logging.
 272	 */
 273	isc_result_t			result; /*%< fetch result  */
 274	isc_result_t			vresult; /*%< validation result  */
 275	int				exitline;
 276	isc_time_t			start;
 277	isc_uint64_t			duration;
 278	isc_boolean_t			logged;
 279	unsigned int			querysent;
 280	unsigned int			referrals;
 281	unsigned int			lamecount;
 282	unsigned int			neterr;
 283	unsigned int			badresp;
 284	unsigned int			adberr;
 285	unsigned int			findfail;
 286	unsigned int			valfail;
 287	isc_boolean_t			timeout;
 288	dns_adbaddrinfo_t 		*addrinfo;
 289	isc_sockaddr_t			*client;
 290};
 291
 292#define FCTX_MAGIC			ISC_MAGIC('F', '!', '!', '!')
 293#define VALID_FCTX(fctx)		ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
 294
 295#define FCTX_ATTR_HAVEANSWER            0x0001
 296#define FCTX_ATTR_GLUING                0x0002
 297#define FCTX_ATTR_ADDRWAIT              0x0004
 298#define FCTX_ATTR_SHUTTINGDOWN          0x0008
 299#define FCTX_ATTR_WANTCACHE             0x0010
 300#define FCTX_ATTR_WANTNCACHE            0x0020
 301#define FCTX_ATTR_NEEDEDNS0             0x0040
 302#define FCTX_ATTR_TRIEDFIND             0x0080
 303#define FCTX_ATTR_TRIEDALT              0x0100
 304
 305#define HAVE_ANSWER(f)          (((f)->attributes & FCTX_ATTR_HAVEANSWER) != \
 306				 0)
 307#define GLUING(f)               (((f)->attributes & FCTX_ATTR_GLUING) != \
 308				 0)
 309#define ADDRWAIT(f)             (((f)->attributes & FCTX_ATTR_ADDRWAIT) != \
 310				 0)
 311#define SHUTTINGDOWN(f)         (((f)->attributes & FCTX_ATTR_SHUTTINGDOWN) \
 312				 != 0)
 313#define WANTCACHE(f)            (((f)->attributes & FCTX_ATTR_WANTCACHE) != 0)
 314#define WANTNCACHE(f)           (((f)->attributes & FCTX_ATTR_WANTNCACHE) != 0)
 315#define NEEDEDNS0(f)            (((f)->attributes & FCTX_ATTR_NEEDEDNS0) != 0)
 316#define TRIEDFIND(f)            (((f)->attributes & FCTX_ATTR_TRIEDFIND) != 0)
 317#define TRIEDALT(f)             (((f)->attributes & FCTX_ATTR_TRIEDALT) != 0)
 318
 319typedef struct {
 320	dns_adbaddrinfo_t *		addrinfo;
 321	fetchctx_t *			fctx;
 322} dns_valarg_t;
 323
 324struct dns_fetch {
 325	unsigned int			magic;
 326	fetchctx_t *			private;
 327};
 328
 329#define DNS_FETCH_MAGIC			ISC_MAGIC('F', 't', 'c', 'h')
 330#define DNS_FETCH_VALID(fetch)		ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
 331
 332typedef struct fctxbucket {
 333	isc_task_t *			task;
 334	isc_mutex_t			lock;
 335	ISC_LIST(fetchctx_t)		fctxs;
 336	isc_boolean_t			exiting;
 337	isc_mem_t *			mctx;
 338} fctxbucket_t;
 339
 340typedef struct alternate {
 341	isc_boolean_t			isaddress;
 342	union   {
 343		isc_sockaddr_t		addr;
 344		struct {
 345			dns_name_t      name;
 346			in_port_t       port;
 347		} _n;
 348	} _u;
 349	ISC_LINK(struct alternate)      link;
 350} alternate_t;
 351
 352typedef struct dns_badcache dns_badcache_t;
 353struct dns_badcache {
 354	dns_badcache_t *	next;
 355	dns_rdatatype_t 	type;
 356	isc_time_t		expire;
 357	unsigned int		hashval;
 358	dns_name_t		name;
 359};
 360#define DNS_BADCACHE_SIZE 1021
 361#define DNS_BADCACHE_TTL(fctx) \
 362	(((fctx)->res->lame_ttl > 30 ) ? (fctx)->res->lame_ttl : 30)
 363
 364struct dns_resolver {
 365	/* Unlocked. */
 366	unsigned int			magic;
 367	isc_mem_t *			mctx;
 368	isc_mutex_t			lock;
 369	isc_mutex_t			nlock;
 370	isc_mutex_t			primelock;
 371	dns_rdataclass_t		rdclass;
 372	isc_socketmgr_t *		socketmgr;
 373	isc_timermgr_t *		timermgr;
 374	isc_taskmgr_t *			taskmgr;
 375	dns_view_t *			view;
 376	isc_boolean_t			frozen;
 377	unsigned int			options;
 378	dns_dispatchmgr_t *		dispatchmgr;
 379	dns_dispatch_t *		dispatchv4;
 380	isc_boolean_t			exclusivev4;
 381	dns_dispatch_t *		dispatchv6;
 382	isc_boolean_t			exclusivev6;
 383	unsigned int			ndisps;
 384	unsigned int			nbuckets;
 385	fctxbucket_t *			buckets;
 386	isc_uint32_t			lame_ttl;
 387	ISC_LIST(alternate_t)		alternates;
 388	isc_uint16_t			udpsize;
 389#if USE_ALGLOCK
 390	isc_rwlock_t			alglock;
 391#endif
 392	dns_rbt_t *			algorithms;
 393#if USE_MBSLOCK
 394	isc_rwlock_t			mbslock;
 395#endif
 396	dns_rbt_t *			mustbesecure;
 397	unsigned int			spillatmax;
 398	unsigned int			spillatmin;
 399	isc_timer_t *			spillattimer;
 400	isc_boolean_t			zero_no_soa_ttl;
 401	unsigned int			query_timeout;
 402
 403	/* Locked by lock. */
 404	unsigned int			references;
 405	isc_boolean_t			exiting;
 406	isc_eventlist_t			whenshutdown;
 407	unsigned int			activebuckets;
 408	isc_boolean_t			priming;
 409	unsigned int			spillat;	/* clients-per-query */
 410	unsigned int			nextdisp;
 411
 412	/* Bad cache. */
 413	dns_badcache_t  ** 		badcache;
 414	unsigned int 			badcount;
 415	unsigned int 			badhash;
 416	unsigned int 			badsweep;
 417
 418	/* Locked by primelock. */
 419	dns_fetch_t *			primefetch;
 420	/* Locked by nlock. */
 421	unsigned int			nfctx;
 422};
 423
 424#define RES_MAGIC			ISC_MAGIC('R', 'e', 's', '!')
 425#define VALID_RESOLVER(res)		ISC_MAGIC_VALID(res, RES_MAGIC)
 426
 427/*%
 428 * Private addrinfo flags.  These must not conflict with DNS_FETCHOPT_NOEDNS0,
 429 * which we also use as an addrinfo flag.
 430 */
 431#define FCTX_ADDRINFO_MARK              0x0001
 432#define FCTX_ADDRINFO_FORWARDER         0x1000
 433#define FCTX_ADDRINFO_TRIED             0x2000
 434#define UNMARKED(a)                     (((a)->flags & FCTX_ADDRINFO_MARK) \
 435					 == 0)
 436#define ISFORWARDER(a)                  (((a)->flags & \
 437					 FCTX_ADDRINFO_FORWARDER) != 0)
 438#define TRIED(a)                        (((a)->flags & \
 439					 FCTX_ADDRINFO_TRIED) != 0)
 440
 441#define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
 442#define NEGATIVE(r) (((r)->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
 443
 444static void destroy(dns_resolver_t *res);
 445static void empty_bucket(dns_resolver_t *res);
 446static isc_result_t resquery_send(resquery_t *query);
 447static void resquery_response(isc_task_t *task, isc_event_t *event);
 448static void resquery_connected(isc_task_t *task, isc_event_t *event);
 449static void fctx_try(fetchctx_t *fctx, isc_boolean_t retrying,
 450		     isc_boolean_t badcache);
 451static void fctx_destroy(fetchctx_t *fctx);
 452static isc_boolean_t fctx_unlink(fetchctx_t *fctx);
 453static isc_result_t ncache_adderesult(dns_message_t *message,
 454				      dns_db_t *cache, dns_dbnode_t *node,
 455				      dns_rdatatype_t covers,
 456				      isc_stdtime_t now, dns_ttl_t maxttl,
 457				      isc_boolean_t optout,
 458				      dns_rdataset_t *ardataset,
 459				      isc_result_t *eresultp);
 460static void validated(isc_task_t *task, isc_event_t *event);
 461static isc_boolean_t maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked);
 462static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
 463		    isc_result_t reason, badnstype_t badtype);
 464
 465/*%
 466 * Increment resolver-related statistics counters.
 467 */
 468static inline void
 469inc_stats(dns_resolver_t *res, isc_statscounter_t counter) {
 470	if (res->view->resstats != NULL)
 471		isc_stats_increment(res->view->resstats, counter);
 472}
 473
 474static isc_result_t
 475valcreate(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, dns_name_t *name,
 476	  dns_rdatatype_t type, dns_rdataset_t *rdataset,
 477	  dns_rdataset_t *sigrdataset, unsigned int valoptions,
 478	  isc_task_t *task)
 479{
 480	dns_validator_t *validator = NULL;
 481	dns_valarg_t *valarg;
 482	isc_result_t result;
 483
 484	valarg = isc_mem_get(fctx->mctx, sizeof(*valarg));
 485	if (valarg == NULL)
 486		return (ISC_R_NOMEMORY);
 487
 488	valarg->fctx = fctx;
 489	valarg->addrinfo = addrinfo;
 490
 491	if (!ISC_LIST_EMPTY(fctx->validators))
 492		INSIST((valoptions & DNS_VALIDATOR_DEFER) != 0);
 493
 494	result = dns_validator_create(fctx->res->view, name, type, rdataset,
 495				      sigrdataset, fctx->rmessage,
 496				      valoptions, task, validated, valarg,
 497				      &validator);
 498	if (result == ISC_R_SUCCESS) {
 499		inc_stats(fctx->res, dns_resstatscounter_val);
 500		if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
 501			INSIST(fctx->validator == NULL);
 502			fctx->validator = validator;
 503		}
 504		ISC_LIST_APPEND(fctx->validators, validator, link);
 505	} else
 506		isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
 507	return (result);
 508}
 509
 510static isc_boolean_t
 511rrsig_fromchildzone(fetchctx_t *fctx, dns_rdataset_t *rdataset) {
 512	dns_namereln_t namereln;
 513	dns_rdata_rrsig_t rrsig;
 514	dns_rdata_t rdata = DNS_RDATA_INIT;
 515	int order;
 516	isc_result_t result;
 517	unsigned int labels;
 518
 519	for (result = dns_rdataset_first(rdataset);
 520	     result == ISC_R_SUCCESS;
 521	     result = dns_rdataset_next(rdataset)) {
 522		dns_rdataset_current(rdataset, &rdata);
 523		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
 524		RUNTIME_CHECK(result == ISC_R_SUCCESS);
 525		namereln = dns_name_fullcompare(&rrsig.signer, &fctx->domain,
 526						&order, &labels);
 527		if (namereln == dns_namereln_subdomain)
 528			return (ISC_TRUE);
 529		dns_rdata_reset(&rdata);
 530	}
 531	return (ISC_FALSE);
 532}
 533
 534static isc_boolean_t
 535fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
 536	dns_name_t *name;
 537	dns_name_t *domain = &fctx->domain;
 538	dns_rdataset_t *rdataset;
 539	dns_rdatatype_t type;
 540	isc_result_t result;
 541	isc_boolean_t keep_auth = ISC_FALSE;
 542
 543	if (message->rcode == dns_rcode_nxdomain)
 544		return (ISC_FALSE);
 545
 546	/*
 547	 * A DS RRset can appear anywhere in a zone, even for a delegation-only
 548	 * zone.  So a response to an explicit query for this type should be
 549	 * excluded from delegation-only fixup.
 550	 *
 551	 * SOA, NS, and DNSKEY can only exist at a zone apex, so a postive
 552	 * response to a query for these types can never violate the
 553	 * delegation-only assumption: if the query name is below a
 554	 * zone cut, the response should normally be a referral, which should
 555	 * be accepted; if the query name is below a zone cut but the server
 556	 * happens to have authority for the zone of the query name, the
 557	 * response is a (non-referral) answer.  But this does not violate
 558	 * delegation-only because the query name must be in a different zone
 559	 * due to the "apex-only" nature of these types.  Note that if the
 560	 * remote server happens to have authority for a child zone of a
 561	 * delegation-only zone, we may still incorrectly "fix" the response
 562	 * with NXDOMAIN for queries for other types.  Unfortunately it's
 563	 * generally impossible to differentiate this case from violation of
 564	 * the delegation-only assumption.  Once the resolver learns the
 565	 * correct zone cut, possibly via a separate query for an "apex-only"
 566	 * type, queries for other types will be resolved correctly.
 567	 *
 568	 * A query for type ANY will be accepted if it hits an exceptional
 569	 * type above in the answer section as it should be from a child
 570	 * zone.
 571	 *
 572	 * Also accept answers with RRSIG records from the child zone.
 573	 * Direct queries for RRSIG records should not be answered from
 574	 * the parent zone.
 575	 */
 576
 577	if (message->counts[DNS_SECTION_ANSWER] != 0 &&
 578	    (fctx->type == dns_rdatatype_ns ||
 579	     fctx->type == dns_rdatatype_ds ||
 580	     fctx->type == dns_rdatatype_soa ||
 581	     fctx->type == dns_rdatatype_any ||
 582	     fctx->type == dns_rdatatype_rrsig ||
 583	     fctx->type == dns_rdatatype_dnskey)) {
 584		result = dns_message_firstname(message, DNS_SECTION_ANSWER);
 585		while (result == ISC_R_SUCCESS) {
 586			name = NULL;
 587			dns_message_currentname(message, DNS_SECTION_ANSWER,
 588						&name);
 589			for (rdataset = ISC_LIST_HEAD(name->list);
 590			     rdataset != NULL;
 591			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
 592				if (!dns_name_equal(name, &fctx->name))
 593					continue;
 594				type = rdataset->type;
 595				/*
 596				 * RRsig from child?
 597				 */
 598				if (type == dns_rdatatype_rrsig &&
 599				    rrsig_fromchildzone(fctx, rdataset))
 600					return (ISC_FALSE);
 601				/*
 602				 * Direct query for apex records or DS.
 603				 */
 604				if (fctx->type == type &&
 605				    (type == dns_rdatatype_ds ||
 606				     type == dns_rdatatype_ns ||
 607				     type == dns_rdatatype_soa ||
 608				     type == dns_rdatatype_dnskey))
 609					return (ISC_FALSE);
 610				/*
 611				 * Indirect query for apex records or DS.
 612				 */
 613				if (fctx->type == dns_rdatatype_any &&
 614				    (type == dns_rdatatype_ns ||
 615				     type == dns_rdatatype_ds ||
 616				     type == dns_rdatatype_soa ||
 617				     type == dns_rdatatype_dnskey))
 618					return (ISC_FALSE);
 619			}
 620			result = dns_message_nextname(message,
 621						      DNS_SECTION_ANSWER);
 622		}
 623	}
 624
 625	/*
 626	 * A NODATA response to a DS query?
 627	 */
 628	if (fctx->type == dns_rdatatype_ds &&
 629	    message->counts[DNS_SECTION_ANSWER] == 0)
 630		return (ISC_FALSE);
 631
 632	/* Look for referral or indication of answer from child zone? */
 633	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
 634		goto munge;
 635
 636	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
 637	while (result == ISC_R_SUCCESS) {
 638		name = NULL;
 639		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
 640		for (rdataset = ISC_LIST_HEAD(name->list);
 641		     rdataset != NULL;
 642		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
 643			type = rdataset->type;
 644			if (type == dns_rdatatype_soa &&
 645			    dns_name_equal(name, domain))
 646				keep_auth = ISC_TRUE;
 647
 648			if (type != dns_rdatatype_ns &&
 649			    type != dns_rdatatype_soa &&
 650			    type != dns_rdatatype_rrsig)
 651				continue;
 652
 653			if (type == dns_rdatatype_rrsig) {
 654				if (rrsig_fromchildzone(fctx, rdataset))
 655					return (ISC_FALSE);
 656				else
 657					continue;
 658			}
 659
 660			/* NS or SOA records. */
 661			if (dns_name_equal(name, domain)) {
 662				/*
 663				 * If a query for ANY causes a negative
 664				 * response, we can be sure that this is
 665				 * an empty node.  For other type of queries
 666				 * we cannot differentiate an empty node
 667				 * from a node that just doesn't have that
 668				 * type of record.  We only accept the former
 669				 * case.
 670				 */
 671				if (message->counts[DNS_SECTION_ANSWER] == 0 &&
 672				    fctx->type == dns_rdatatype_any)
 673					return (ISC_FALSE);
 674			} else if (dns_name_issubdomain(name, domain)) {
 675				/* Referral or answer from child zone. */
 676				return (ISC_FALSE);
 677			}
 678		}
 679		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
 680	}
 681
 682 munge:
 683	message->rcode = dns_rcode_nxdomain;
 684	message->counts[DNS_SECTION_ANSWER] = 0;
 685	if (!keep_auth)
 686		message->counts[DNS_SECTION_AUTHORITY] = 0;
 687	message->counts[DNS_SECTION_ADDITIONAL] = 0;
 688	return (ISC_TRUE);
 689}
 690
 691static inline isc_result_t
 692fctx_starttimer(fetchctx_t *fctx) {
 693	/*
 694	 * Start the lifetime timer for fctx.
 695	 *
 696	 * This is also used for stopping the idle timer; in that
 697	 * case we must purge events already posted to ensure that
 698	 * no further idle events are delivered.
 699	 */
 700	return (isc_timer_reset(fctx->timer, isc_timertype_once,
 701				&fctx->expires, NULL, ISC_TRUE));
 702}
 703
 704static inline void
 705fctx_stoptimer(fetchctx_t *fctx) {
 706	isc_result_t result;
 707
 708	/*
 709	 * We don't return a result if resetting the timer to inactive fails
 710	 * since there's nothing to be done about it.  Resetting to inactive
 711	 * should never fail anyway, since the code as currently written
 712	 * cannot fail in that case.
 713	 */
 714	result = isc_timer_reset(fctx->timer, isc_timertype_inactive,
 715				  NULL, NULL, ISC_TRUE);
 716	if (result != ISC_R_SUCCESS) {
 717		UNEXPECTED_ERROR(__FILE__, __LINE__,
 718				 "isc_timer_reset(): %s",
 719				 isc_result_totext(result));
 720	}
 721}
 722
 723
 724static inline isc_result_t
 725fctx_startidletimer(fetchctx_t *fctx, isc_interval_t *interval) {
 726	/*
 727	 * Start the idle timer for fctx.  The lifetime timer continues
 728	 * to be in effect.
 729	 */
 730	return (isc_timer_reset(fctx->timer, isc_timertype_once,
 731				&fctx->expires, interval, ISC_FALSE));
 732}
 733
 734/*
 735 * Stopping the idle timer is equivalent to calling fctx_starttimer(), but
 736 * we use fctx_stopidletimer for readability in the code below.
 737 */
 738#define fctx_stopidletimer      fctx_starttimer
 739
 740
 741static inline void
 742resquery_destroy(resquery_t **queryp) {
 743	resquery_t *query;
 744
 745	REQUIRE(queryp != NULL);
 746	query = *queryp;
 747	REQUIRE(!ISC_LINK_LINKED(query, link));
 748
 749	INSIST(query->tcpsocket == NULL);
 750
 751	query->fctx->nqueries--;
 752	if (SHUTTINGDOWN(query->fctx)) {
 753		dns_resolver_t *res = query->fctx->res;
 754		if (maybe_destroy(query->fctx, ISC_FALSE))
 755			empty_bucket(res);
 756	}
 757	query->magic = 0;
 758	isc_mem_put(query->mctx, query, sizeof(*query));
 759	*queryp = NULL;
 760}
 761
 762static void
 763fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp,
 764		 isc_time_t *finish, isc_boolean_t no_response)
 765{
 766	fetchctx_t *fctx;
 767	resquery_t *query;
 768	unsigned int rtt, rttms;
 769	unsigned int factor;
 770	dns_adbfind_t *find;
 771	dns_adbaddrinfo_t *addrinfo;
 772	isc_socket_t *socket;
 773
 774	query = *queryp;
 775	fctx = query->fctx;
 776
 777	FCTXTRACE("cancelquery");
 778
 779	REQUIRE(!RESQUERY_CANCELED(query));
 780
 781	query->attributes |= RESQUERY_ATTR_CANCELED;
 782
 783	/*
 784	 * Should we update the RTT?
 785	 */
 786	if (finish != NULL || no_response) {
 787		if (finish != NULL) {
 788			/*
 789			 * We have both the start and finish times for this
 790			 * packet, so we can compute a real RTT.
 791			 */
 792			rtt = (unsigned int)isc_time_microdiff(finish,
 793							       &query->start);
 794			factor = DNS_ADB_RTTADJDEFAULT;
 795
 796			rttms = rtt / 1000;
 797			if (rttms < DNS_RESOLVER_QRYRTTCLASS0) {
 798				inc_stats(fctx->res,
 799					  dns_resstatscounter_queryrtt0);
 800			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS1) {
 801				inc_stats(fctx->res,
 802					  dns_resstatscounter_queryrtt1);
 803			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS2) {
 804				inc_stats(fctx->res,
 805					  dns_resstatscounter_queryrtt2);
 806			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS3) {
 807				inc_stats(fctx->res,
 808					  dns_resstatscounter_queryrtt3);
 809			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS4) {
 810				inc_stats(fctx->res,
 811					  dns_resstatscounter_queryrtt4);
 812			} else {
 813				inc_stats(fctx->res,
 814					  dns_resstatscounter_queryrtt5);
 815			}
 816		} else {
 817			/*
 818			 * We don't have an RTT for this query.  Maybe the
 819			 * packet was lost, or maybe this server is very
 820			 * slow.  We don't know.  Increase the RTT.
 821			 */
 822			INSIST(no_response);
 823			rtt = query->addrinfo->srtt + 200000;
 824			if (rtt > 10000000)
 825				rtt = 10000000;
 826			/*
 827			 * Replace the current RTT with our value.
 828			 */
 829			factor = DNS_ADB_RTTADJREPLACE;
 830		}
 831		dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
 832	}
 833
 834	/* Remember that the server has been tried. */
 835	if (!TRIED(query->addrinfo)) {
 836		dns_adb_changeflags(fctx->adb, query->addrinfo,
 837				    FCTX_ADDRINFO_TRIED, FCTX_ADDRINFO_TRIED);
 838	}
 839
 840	/*
 841	 * Age RTTs of servers not tried.
 842	 */
 843	factor = DNS_ADB_RTTADJAGE;
 844	if (finish != NULL)
 845		for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
 846		     addrinfo != NULL;
 847		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
 848			if (UNMARKED(addrinfo))
 849				dns_adb_adjustsrtt(fctx->adb, addrinfo,
 850						   0, factor);
 851
 852	if (finish != NULL && TRIEDFIND(fctx))
 853		for (find = ISC_LIST_HEAD(fctx->finds);
 854		     find != NULL;
 855		     find = ISC_LIST_NEXT(find, publink))
 856			for (addrinfo = ISC_LIST_HEAD(find->list);
 857			     addrinfo != NULL;
 858			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
 859				if (UNMARKED(addrinfo))
 860					dns_adb_adjustsrtt(fctx->adb, addrinfo,
 861							   0, factor);
 862
 863	if (finish != NULL && TRIEDALT(fctx)) {
 864		for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
 865		     addrinfo != NULL;
 866		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
 867			if (UNMARKED(addrinfo))
 868				dns_adb_adjustsrtt(fctx->adb, addrinfo,
 869						   0, factor);
 870		for (find = ISC_LIST_HEAD(fctx->altfinds);
 871		     find != NULL;
 872		     find = ISC_LIST_NEXT(find, publink))
 873			for (addrinfo = ISC_LIST_HEAD(find->list);
 874			     addrinfo != NULL;
 875			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
 876				if (UNMARKED(addrinfo))
 877					dns_adb_adjustsrtt(fctx->adb, addrinfo,
 878							   0, factor);
 879	}
 880
 881	/*
 882	 * Check for any outstanding socket events.  If they exist, cancel
 883	 * them and let the event handlers finish the cleanup.  The resolver
 884	 * only needs to worry about managing the connect and send events;
 885	 * the dispatcher manages the recv events.
 886	 */
 887	if (RESQUERY_CONNECTING(query)) {
 888		/*
 889		 * Cancel the connect.
 890		 */
 891		if (query->tcpsocket != NULL) {
 892			isc_socket_cancel(query->tcpsocket, NULL,
 893					  ISC_SOCKCANCEL_CONNECT);
 894		} else if (query->dispentry != NULL) {
 895			INSIST(query->exclusivesocket);
 896			socket = dns_dispatch_getentrysocket(query->dispentry);
 897			if (socket != NULL)
 898				isc_socket_cancel(socket, NULL,
 899						  ISC_SOCKCANCEL_CONNECT);
 900		}
 901	} else if (RESQUERY_SENDING(query)) {
 902		/*
 903		 * Cancel the pending send.
 904		 */
 905		if (query->exclusivesocket && query->dispentry != NULL)
 906			socket = dns_dispatch_getentrysocket(query->dispentry);
 907		else
 908			socket = dns_dispatch_getsocket(query->dispatch);
 909		if (socket != NULL)
 910			isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND);
 911	}
 912
 913	if (query->dispentry != NULL)
 914		dns_dispatch_removeresponse(&query->dispentry, deventp);
 915
 916	ISC_LIST_UNLINK(fctx->queries, query, link);
 917
 918	if (query->tsig != NULL)
 919		isc_buffer_free(&query->tsig);
 920
 921	if (query->tsigkey != NULL)
 922		dns_tsigkey_detach(&query->tsigkey);
 923
 924	if (query->dispatch != NULL)
 925		dns_dispatch_detach(&query->dispatch);
 926
 927	if (! (RESQUERY_CONNECTING(query) || RESQUERY_SENDING(query)))
 928		/*
 929		 * It's safe to destroy the query now.
 930		 */
 931		resquery_destroy(&query);
 932}
 933
 934static void
 935fctx_cancelqueries(fetchctx_t *fctx, isc_boolean_t no_response) {
 936	resquery_t *query, *next_query;
 937
 938	FCTXTRACE("cancelqueries");
 939
 940	for (query = ISC_LIST_HEAD(fctx->queries);
 941	     query != NULL;
 942	     query = next_query) {
 943		next_query = ISC_LIST_NEXT(query, link);
 944		fctx_cancelquery(&query, NULL, NULL, no_response);
 945	}
 946}
 947
 948static void
 949fctx_cleanupfinds(fetchctx_t *fctx) {
 950	dns_adbfind_t *find, *next_find;
 951
 952	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
 953
 954	for (find = ISC_LIST_HEAD(fctx->finds);
 955	     find != NULL;
 956	     find = next_find) {
 957		next_find = ISC_LIST_NEXT(find, publink);
 958		ISC_LIST_UNLINK(fctx->finds, find, publink);
 959		dns_adb_destroyfind(&find);
 960	}
 961	fctx->find = NULL;
 962}
 963
 964static void
 965fctx_cleanupaltfinds(fetchctx_t *fctx) {
 966	dns_adbfind_t *find, *next_find;
 967
 968	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
 969
 970	for (find = ISC_LIST_HEAD(fctx->altfinds);
 971	     find != NULL;
 972	     find = next_find) {
 973		next_find = ISC_LIST_NEXT(find, publink);
 974		ISC_LIST_UNLINK(fctx->altfinds, find, publink);
 975		dns_adb_destroyfind(&find);
 976	}
 977	fctx->altfind = NULL;
 978}
 979
 980static void
 981fctx_cleanupforwaddrs(fetchctx_t *fctx) {
 982	dns_adbaddrinfo_t *addr, *next_addr;
 983
 984	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
 985
 986	for (addr = ISC_LIST_HEAD(fctx->forwaddrs);
 987	     addr != NULL;
 988	     addr = next_addr) {
 989		next_addr = ISC_LIST_NEXT(addr, publink);
 990		ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
 991		dns_adb_freeaddrinfo(fctx->adb, &addr);
 992	}
 993}
 994
 995static void
 996fctx_cleanupaltaddrs(fetchctx_t *fctx) {
 997	dns_adbaddrinfo_t *addr, *next_addr;
 998
 999	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1000
1001	for (addr = ISC_LIST_HEAD(fctx->altaddrs);
1002	     addr != NULL;
1003	     addr = next_addr) {
1004		next_addr = ISC_LIST_NEXT(addr, publink);
1005		ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
1006		dns_adb_freeaddrinfo(fctx->adb, &addr);
1007	}
1008}
1009
1010static inline void
1011fctx_stopeverything(fetchctx_t *fctx, isc_boolean_t no_response) {
1012	FCTXTRACE("stopeverything");
1013	fctx_cancelqueries(fctx, no_response);
1014	fctx_cleanupfinds(fctx);
1015	fctx_cleanupaltfinds(fctx);
1016	fctx_cleanupforwaddrs(fctx);
1017	fctx_cleanupaltaddrs(fctx);
1018	fctx_stoptimer(fctx);
1019}
1020
1021static inline void
1022fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
1023	dns_fetchevent_t *event, *next_event;
1024	isc_task_t *task;
1025	unsigned int count = 0;
1026	isc_interval_t i;
1027	isc_boolean_t logit = ISC_FALSE;
1028	isc_time_t now;
1029	unsigned int old_spillat;
1030	unsigned int new_spillat = 0;	/* initialized to silence
1031					   compiler warnings */
1032
1033	/*
1034	 * Caller must be holding the appropriate bucket lock.
1035	 */
1036	REQUIRE(fctx->state == fetchstate_done);
1037
1038	FCTXTRACE("sendevents");
1039
1040	/*
1041	 * Keep some record of fetch result for logging later (if required).
1042	 */
1043	fctx->result = result;
1044	fctx->exitline = line;
1045	TIME_NOW(&now);
1046	fctx->duration = isc_time_microdiff(&now, &fctx->start);
1047
1048	for (event = ISC_LIST_HEAD(fctx->events);
1049	     event != NULL;
1050	     event = next_event) {
1051		next_event = ISC_LIST_NEXT(event, ev_link);
1052		ISC_LIST_UNLINK(fctx->events, event, ev_link);
1053		task = event->ev_sender;
1054		event->ev_sender = fctx;
1055		event->vresult = fctx->vresult;
1056		if (!HAVE_ANSWER(fctx))
1057			event->result = result;
1058
1059		INSIST(result != ISC_R_SUCCESS ||
1060		       dns_rdataset_isassociated(event->rdataset) ||
1061		       fctx->type == dns_rdatatype_any ||
1062		       fctx->type == dns_rdatatype_rrsig ||
1063		       fctx->type == dns_rdatatype_sig);
1064
1065		/*
1066		 * Negative results must be indicated in event->result.
1067		 */
1068		if (dns_rdataset_isassociated(event->rdataset) &&
1069		    NEGATIVE(event->rdataset)) {
1070			INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
1071			       event->result == DNS_R_NCACHENXRRSET);
1072		}
1073
1074		isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
1075		count++;
1076	}
1077
1078	if ((fctx->attributes & FCTX_ATTR_HAVEANSWER) != 0 &&
1079	    fctx->spilled &&
1080	    (count < fctx->res->spillatmax || fctx->res->spillatmax == 0)) {
1081		LOCK(&fctx->res->lock);
1082		if (count == fctx->res->spillat && !fctx->res->exiting) {
1083			old_spillat = fctx->res->spillat;
1084			fctx->res->spillat += 5;
1085			if (fctx->res->spillat > fctx->res->spillatmax &&
1086			    fctx->res->spillatmax != 0)
1087				fctx->res->spillat = fctx->res->spillatmax;
1088			new_spillat = fctx->res->spillat;
1089			if (new_spillat != old_spillat) {
1090				logit = ISC_TRUE;
1091			}
1092			isc_interval_set(&i, 20 * 60, 0);
1093			result = isc_timer_reset(fctx->res->spillattimer,
1094						 isc_timertype_ticker, NULL,
1095						 &i, ISC_TRUE);
1096			RUNTIME_CHECK(result == ISC_R_SUCCESS);
1097		}
1098		UNLOCK(&fctx->res->lock);
1099		if (logit)
1100			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
1101				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
1102				      "clients-per-query increased to %u",
1103				      new_spillat);
1104	}
1105}
1106
1107static inline void
1108log_edns(fetchctx_t *fctx) {
1109	char domainbuf[DNS_NAME_FORMATSIZE];
1110
1111	if (fctx->reason == NULL)
1112		return;
1113
1114	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
1115	isc_log_write(dns_lctx, DNS_LOGCATEGORY_EDNS_DISABLED,
1116		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1117		      "success resolving '%s' (in '%s'?) after %s",
1118		      fctx->info, domainbuf, fctx->reason);
1119
1120	fctx->reason = NULL;
1121}
1122
1123static void
1124fctx_done(fetchctx_t *fctx, isc_result_t result, int line) {
1125	dns_resolver_t *res;
1126	isc_boolean_t no_response;
1127
1128	REQUIRE(line >= 0);
1129
1130	FCTXTRACE("done");
1131
1132	res = fctx->res;
1133
1134	if (result == ISC_R_SUCCESS) {
1135		/*%
1136		 * Log any deferred EDNS timeout messages.
1137		 */
1138		log_edns(fctx);
1139		no_response = ISC_TRUE;
1140	 } else
1141		no_response = ISC_FALSE;
1142
1143	fctx->reason = NULL;
1144	fctx_stopeverything(fctx, no_response);
1145
1146	LOCK(&res->buckets[fctx->bucketnum].lock);
1147
1148	fctx->state = fetchstate_done;
1149	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1150	fctx_sendevents(fctx, result, line);
1151
1152	UNLOCK(&res->buckets[fctx->bucketnum].lock);
1153}
1154
1155static void
1156process_sendevent(resquery_t *query, isc_event_t *event) {
1157	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
1158	isc_boolean_t retry = ISC_FALSE;
1159	isc_result_t result;
1160	fetchctx_t *fctx;
1161
1162	fctx = query->fctx;
1163
1164	if (RESQUERY_CANCELED(query)) {
1165		if (query->sends == 0 && query->connects == 0) {
1166			/*
1167			 * This query was canceled while the
1168			 * isc_socket_sendto/connect() was in progress.
1169			 */
1170			if (query->tcpsocket != NULL)
1171				isc_socket_detach(&query->tcpsocket);
1172			resquery_destroy(&query);
1173		}
1174	} else {
1175		switch (sevent->result) {
1176		case ISC_R_SUCCESS:
1177			break;
1178
1179		case ISC_R_HOSTUNREACH:
1180		case ISC_R_NETUNREACH:
1181		case ISC_R_NOPERM:
1182		case ISC_R_ADDRNOTAVAIL:
1183		case ISC_R_CONNREFUSED:
1184
1185			/*
1186			 * No route to remote.
1187			 */
1188			add_bad(fctx, query->addrinfo, sevent->result,
1189				badns_unreachable);
1190			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
1191			retry = ISC_TRUE;
1192			break;
1193
1194		default:
1195			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
1196			break;
1197		}
1198	}
1199
1200	isc_event_free(&event);
1201
1202	if (retry) {
1203		/*
1204		 * Behave as if the idle timer has expired.  For TCP
1205		 * this may not actually reflect the latest timer.
1206		 */
1207		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1208		result = fctx_stopidletimer(fctx);
1209		if (result != ISC_R_SUCCESS)
1210			fctx_done(fctx, result, __LINE__);
1211		else
1212			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
1213	}
1214}
1215
1216static void
1217resquery_udpconnected(isc_task_t *task, isc_event_t *event) {
1218	resquery_t *query = event->ev_arg;
1219
1220	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
1221
1222	QTRACE("udpconnected");
1223
1224	UNUSED(task);
1225
1226	INSIST(RESQUERY_CONNECTING(query));
1227
1228	query->connects--;
1229
1230	process_sendevent(query, event);
1231}
1232
1233static void
1234resquery_senddone(isc_task_t *task, isc_event_t *event) {
1235	resquery_t *query = event->ev_arg;
1236
1237	REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE);
1238
1239	QTRACE("senddone");
1240
1241	/*
1242	 * XXXRTH
1243	 *
1244	 * Currently we don't wait for the senddone event before retrying
1245	 * a query.  This means that if we get really behind, we may end
1246	 * up doing extra work!
1247	 */
1248
1249	UNUSED(task);
1250
1251	INSIST(RESQUERY_SENDING(query));
1252
1253	query->sends--;
1254
1255	process_sendevent(query, event);
1256}
1257
1258static inline isc_result_t
1259fctx_addopt(dns_message_t *message, unsigned int version,
1260	    isc_uint16_t udpsize, isc_boolean_t request_nsid)
1261{
1262	dns_rdataset_t *rdataset;
1263	dns_rdatalist_t *rdatalist;
1264	dns_rdata_t *rdata;
1265	isc_result_t result;
1266
1267	rdatalist = NULL;
1268	result = dns_message_gettemprdatalist(message, &rdatalist);
1269	if (result != ISC_R_SUCCESS)
1270		return (result);
1271	rdata = NULL;
1272	result = dns_message_gettemprdata(message, &rdata);
1273	if (result != ISC_R_SUCCESS)
1274		return (result);
1275	rdataset = NULL;
1276	result = dns_message_gettemprdataset(message, &rdataset);
1277	if (result != ISC_R_SUCCESS)
1278		return (result);
1279	dns_rdataset_init(rdataset);
1280
1281	rdatalist->type = dns_rdatatype_opt;
1282	rdatalist->covers = 0;
1283
1284	/*
1285	 * Set Maximum UDP buffer size.
1286	 */
1287	rdatalist->rdclass = udpsize;
1288
1289	/*
1290	 * Set EXTENDED-RCODE and Z to 0, DO to 1.
1291	 */
1292	rdatalist->ttl = (version << 16);
1293	rdatalist->ttl |= DNS_MESSAGEEXTFLAG_DO;
1294
1295	/*
1296	 * Set EDNS options if applicable
1297	 */
1298	if (request_nsid) {
1299		/* Send empty NSID option (RFC5001) */
1300		unsigned char data[4];
1301		isc_buffer_t buf;
1302
1303		isc_buffer_init(&buf, data, sizeof(data));
1304		isc_buffer_putuint16(&buf, DNS_OPT_NSID);
1305		isc_buffer_putuint16(&buf, 0);
1306		rdata->data = data;
1307		rdata->length = sizeof(data);
1308	} else {
1309		rdata->data = NULL;
1310		rdata->length = 0;
1311	}
1312
1313	rdata->rdclass = rdatalist->rdclass;
1314	rdata->type = rdatalist->type;
1315	rdata->flags = 0;
1316
1317	ISC_LIST_INIT(rdatalist->rdata);
1318	ISC_LIST_APPEND(rdatalist->rdata, rdata, link);
1319	RUNTIME_CHECK(dns_rdatalist_tordataset(rdatalist, rdataset) == ISC_R_SUCCESS);
1320
1321	return (dns_message_setopt(message, rdataset));
1322}
1323
1324static inline void
1325fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
1326	unsigned int seconds;
1327	unsigned int us;
1328
1329	/*
1330	 * We retry every .8 seconds the first two times through the address
1331	 * list, and then we do exponential back-off.
1332	 */
1333	if (fctx->restarts < 3)
1334		us = 800000;
1335	else
1336		us = (800000 << (fctx->restarts - 2));
1337
1338	/*
1339	 * Double the round-trip time.
1340	 */
1341	rtt *= 2;
1342
1343	/*
1344	 * Always wait for at least the doubled round-trip time.
1345	 */
1346	if (us < rtt)
1347		us = rtt;
1348
1349	/*
1350	 * But don't ever wait for more than 10 seconds.
1351	 */
1352	if (us > 10000000)
1353		us = 10000000;
1354
1355	seconds = us / 1000000;
1356	us -= seconds * 1000000;
1357	isc_interval_set(&fctx->interval, seconds, us * 1000);
1358}
1359
1360static isc_result_t
1361fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
1362	   unsigned int options)
1363{
1364	dns_resolver_t *res;
1365	isc_task_t *task;
1366	isc_result_t result;
1367	resquery_t *query;
1368	isc_sockaddr_t addr;
1369	isc_boolean_t have_addr = ISC_FALSE;
1370	unsigned int srtt;
1371
1372	FCTXTRACE("query");
1373
1374	res = fctx->res;
1375	task = res->buckets[fctx->bucketnum].task;
1376
1377	srtt = addrinfo->srtt;
1378	if (ISFORWARDER(addrinfo) && srtt < 1000000)
1379		srtt = 1000000;
1380
1381	fctx_setretryinterval(fctx, srtt);
1382	result = fctx_startidletimer(fctx, &fctx->interval);
1383	if (result != ISC_R_SUCCESS)
1384		return (result);
1385
1386	INSIST(ISC_LIST_EMPTY(fctx->validators));
1387
1388	dns_message_reset(fctx->rmessage, DNS_MESSAGE_INTENTPARSE);
1389
1390	query = isc_mem_get(fctx->mctx, sizeof(*query));
1391	if (query == NULL) {
1392		result = ISC_R_NOMEMORY;
1393		goto stop_idle_timer;
1394	}
1395	query->mctx = fctx->mctx;
1396	query->options = options;
1397	query->attributes = 0;
1398	query->sends = 0;
1399	query->connects = 0;
1400	/*
1401	 * Note that the caller MUST guarantee that 'addrinfo' will remain
1402	 * valid until this query is canceled.
1403	 */
1404	query->addrinfo = addrinfo;
1405	TIME_NOW(&query->start);
1406
1407	/*
1408	 * If this is a TCP query, then we need to make a socket and
1409	 * a dispatch for it here.  Otherwise we use the resolver's
1410	 * shared dispatch.
1411	 */
1412	query->dispatchmgr = res->dispatchmgr;
1413	query->dispatch = NULL;
1414	query->exclusivesocket = ISC_FALSE;
1415	query->tcpsocket = NULL;
1416	if (res->view->peers != NULL) {
1417		dns_peer_t *peer = NULL;
1418		isc_netaddr_t dstip;
1419		isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
1420		result = dns_peerlist_peerbyaddr(res->view->peers,
1421						 &dstip, &peer);
1422		if (result == ISC_R_SUCCESS) {
1423			result = dns_peer_getquerysource(peer, &addr);
1424			if (result == ISC_R_SUCCESS)
1425				have_addr = ISC_TRUE;
1426		}
1427	}
1428
1429	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1430		int pf;
1431
1432		pf = isc_sockaddr_pf(&addrinfo->sockaddr);
1433		if (!have_addr) {
1434			switch (pf) {
1435			case PF_INET:
1436				result =
1437				  dns_dispatch_getlocaladdress(res->dispatchv4,
1438							       &addr);
1439				break;
1440			case PF_INET6:
1441				result =
1442				  dns_dispatch_getlocaladdress(res->dispatchv6,
1443							       &addr);
1444				break;
1445			default:
1446				result = ISC_R_NOTIMPLEMENTED;
1447				break;
1448			}
1449			if (result != ISC_R_SUCCESS)
1450				goto cleanup_query;
1451		}
1452		isc_sockaddr_setport(&addr, 0);
1453
1454		result = isc_socket_create(res->socketmgr, pf,
1455					   isc_sockettype_tcp,
1456					   &query->tcpsocket);
1457		if (result != ISC_R_SUCCESS)
1458			goto cleanup_query;
1459
1460#ifndef BROKEN_TCP_BIND_BEFORE_CONNECT
1461		result = isc_socket_bind(query->tcpsocket, &addr, 0);
1462		if (result != ISC_R_SUCCESS)
1463			goto cleanup_socket;
1464#endif
1465
1466		/*
1467		 * A dispatch will be created once the connect succeeds.
1468		 */
1469	} else {
1470		if (have_addr) {
1471			unsigned int attrs, attrmask;
1472			attrs = DNS_DISPATCHATTR_UDP;
1473			switch (isc_sockaddr_pf(&addr)) {
1474			case AF_INET:
1475				attrs |= DNS_DISPATCHATTR_IPV4;
1476				break;
1477			case AF_INET6:
1478				attrs |= DNS_DISPATCHATTR_IPV6;
1479				break;
1480			default:
1481				result = ISC_R_NOTIMPLEMENTED;
1482				goto cleanup_query;
1483			}
1484			attrmask = DNS_DISPATCHATTR_UDP;
1485			attrmask |= DNS_DISPATCHATTR_TCP;
1486			attrmask |= DNS_DISPATCHATTR_IPV4;
1487			attrmask |= DNS_DISPATCHATTR_IPV6;
1488			result = dns_dispatch_getudp(res->dispatchmgr,
1489						     res->socketmgr,
1490						     res->taskmgr, &addr,
1491						     4096, 1000, 32768, 16411,
1492						     16433, attrs, attrmask,
1493						     &query->dispatch);
1494			if (result != ISC_R_SUCCESS)
1495				goto cleanup_query;
1496		} else {
1497			switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
1498			case PF_INET:
1499				dns_dispatch_attach(res->dispatchv4,
1500						    &query->dispatch);
1501				query->exclusivesocket = res->exclusivev4;
1502				break;
1503			case PF_INET6:
1504				dns_dispatch_attach(res->dispatchv6,
1505						    &query->dispatch);
1506				query->exclusivesocket = res->exclusivev6;
1507				break;
1508			default:
1509				result = ISC_R_NOTIMPLEMENTED;
1510				goto cleanup_query;
1511			}
1512		}
1513		/*
1514		 * We should always have a valid dispatcher here.  If we
1515		 * don't support a protocol family, then its dispatcher
1516		 * will be NULL, but we shouldn't be finding addresses for
1517		 * protocol types we don't support, so the dispatcher
1518		 * we found should never be NULL.
1519		 */
1520		INSIST(query->dispatch != NULL);
1521	}
1522
1523	query->dispentry = NULL;
1524	query->fctx = fctx;
1525	query->tsig = NULL;
1526	query->tsigkey = NULL;
1527	ISC_LINK_INIT(query, link);
1528	query->magic = QUERY_MAGIC;
1529
1530	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1531		/*
1532		 * Connect to the remote server.
1533		 *
1534		 * XXXRTH  Should we attach to the socket?
1535		 */
1536		result = isc_socket_connect(query->tcpsocket,
1537					    &addrinfo->sockaddr, task,
1538					    resquery_connected, query);
1539		if (result != ISC_R_SUCCESS)
1540			goto cleanup_socket;
1541		query->connects++;
1542		QTRACE("connecting via TCP");
1543	} else {
1544		result = resquery_send(query);
1545		if (result != ISC_R_SUCCESS)
1546			goto cleanup_dispatch;
1547	}
1548	fctx->querysent++;
1549
1550	ISC_LIST_APPEND(fctx->queries, query, link);
1551	query->fctx->nqueries++;
1552	if (isc_sockaddr_pf(&addrinfo->sockaddr) == PF_INET)
1553		inc_stats(res, dns_resstatscounter_queryv4);
1554	else
1555		inc_stats(res, dns_resstatscounter_queryv6);
1556	if (res->view->resquerystats != NULL)
1557		dns_rdatatypestats_increment(res->view->resquerystats,
1558					     fctx->type);
1559
1560	return (ISC_R_SUCCESS);
1561
1562 cleanup_socket:
1563	isc_socket_detach(&query->tcpsocket);
1564
1565 cleanup_dispatch:
1566	if (query->dispatch != NULL)
1567		dns_dispatch_detach(&query->dispatch);
1568
1569 cleanup_query:
1570	if (query->connects == 0) {
1571		query->magic = 0;
1572		isc_mem_put(fctx->mctx, query, sizeof(*query));
1573	}
1574
1575 stop_idle_timer:
1576	RUNTIME_CHECK(fctx_stopidletimer(fctx) == ISC_R_SUCCESS);
1577
1578	return (result);
1579}
1580
1581static isc_boolean_t
1582bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1583	isc_sockaddr_t *sa;
1584
1585	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
1586	     sa != NULL;
1587	     sa = ISC_LIST_NEXT(sa, link)) {
1588		if (isc_sockaddr_equal(sa, address))
1589			return (ISC_TRUE);
1590	}
1591
1592	return (ISC_FALSE);
1593}
1594
1595static void
1596add_bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1597	isc_sockaddr_t *sa;
1598
1599	if (bad_edns(fctx, address))
1600		return;
1601
1602	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1603	if (sa == NULL)
1604		return;
1605
1606	*sa = *address;
1607	ISC_LIST_INITANDAPPEND(fctx->bad_edns, sa, link);
1608}
1609
1610static isc_boolean_t
1611triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1612	isc_sockaddr_t *sa;
1613
1614	for (sa = ISC_LIST_HEAD(fctx->edns);
1615	     sa != NULL;
1616	     sa = ISC_LIST_NEXT(sa, link)) {
1617		if (isc_sockaddr_equal(sa, address))
1618			return (ISC_TRUE);
1619	}
1620
1621	return (ISC_FALSE);
1622}
1623
1624static void
1625add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1626	isc_sockaddr_t *sa;
1627
1628	if (triededns(fctx, address))
1629		return;
1630
1631	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1632	if (sa == NULL)
1633		return;
1634
1635	*sa = *address;
1636	ISC_LIST_INITANDAPPEND(fctx->edns, sa, link);
1637}
1638
1639static isc_boolean_t
1640triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1641	isc_sockaddr_t *sa;
1642
1643	for (sa = ISC_LIST_HEAD(fctx->edns512);
1644	     sa != NULL;
1645	     sa = ISC_LIST_NEXT(sa, link)) {
1646		if (isc_sockaddr_equal(sa, address))
1647			return (ISC_TRUE);
1648	}
1649
1650	return (ISC_FALSE);
1651}
1652
1653static void
1654add_triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1655	isc_sockaddr_t *sa;
1656
1657	if (triededns512(fctx, address))
1658		return;
1659
1660	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1661	if (sa == NULL)
1662		return;
1663
1664	*sa = *address;
1665	ISC_LIST_INITANDAPPEND(fctx->edns512, sa, link);
1666}
1667
1668static isc_result_t
1669resquery_send(resquery_t *query) {
1670	fetchctx_t *fctx;
1671	isc_result_t result;
1672	dns_name_t *qname = NULL;
1673	dns_rdataset_t *qrdataset = NULL;
1674	isc_region_t r;
1675	dns_resolver_t *res;
1676	isc_task_t *task;
1677	isc_socket_t *socket;
1678	isc_buffer_t tcpbuffer;
1679	isc_sockaddr_t *address;
1680	isc_buffer_t *buffer;
1681	isc_netaddr_t ipaddr;
1682	dns_tsigkey_t *tsigkey = NULL;
1683	dns_peer_t *peer = NULL;
1684	isc_boolean_t useedns;
1685	dns_compress_t cctx;
1686	isc_boolean_t cleanup_cctx = ISC_FALSE;
1687	isc_boolean_t secure_domain;
1688	isc_boolean_t connecting = ISC_FALSE;
1689
1690	fctx = query->fctx;
1691	QTRACE("send");
1692
1693	res = fctx->res;
1694	task = res->buckets[fctx->bucketnum].task;
1695	address = NULL;
1696
1697	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1698		/*
1699		 * Reserve space for the TCP message length.
1700		 */
1701		isc_buffer_init(&tcpbuffer, query->data, sizeof(query->data));
1702		isc_buffer_init(&query->buffer, query->data + 2,
1703				sizeof(query->data) - 2);
1704		buffer = &tcpbuffer;
1705	} else {
1706		isc_buffer_init(&query->buffer, query->data,
1707				sizeof(query->data));
1708		buffer = &query->buffer;
1709	}
1710
1711	result = dns_message_gettempname(fctx->qmessage, &qname);
1712	if (result != ISC_R_SUCCESS)
1713		goto cleanup_temps;
1714	result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
1715	if (result != ISC_R_SUCCESS)
1716		goto cleanup_temps;
1717
1718	/*
1719	 * Get a query id from the dispatch.
1720	 */
1721	result = dns_dispatch_addresponse2(query->dispatch,
1722					   &query->addrinfo->sockaddr,
1723					   task,
1724					   resquery_response,
1725					   query,
1726					   &query->id,
1727					   &query->dispentry,
1728					   res->socketmgr);
1729	if (result != ISC_R_SUCCESS)
1730		goto cleanup_temps;
1731
1732	fctx->qmessage->opcode = dns_opcode_query;
1733
1734	/*
1735	 * Set up question.
1736	 */
1737	dns_name_init(qname, NULL);
1738	dns_name_clone(&fctx->name, qname);
1739	dns_rdataset_init(qrdataset);
1740	dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
1741	ISC_LIST_APPEND(qname->list, qrdataset, link);
1742	dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
1743	qname = NULL;
1744	qrdataset = NULL;
1745
1746	/*
1747	 * Set RD if the client has requested that we do a recursive query,
1748	 * or if we're sending to a forwarder.
1749	 */
1750	if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
1751	    ISFORWARDER(query->addrinfo))
1752		fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
1753
1754	/*
1755	 * Set CD if the client says don't validate or the question is
1756	 * under a secure entry point.
1757	 */
1758	if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
1759		fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1760	} else if (res->view->enablevalidation) {
1761		result = dns_view_issecuredomain(res->view, &fctx->name,
1762						 &secure_domain);
1763		if (result != ISC_R_SUCCESS)
1764			secure_domain = ISC_FALSE;
1765		if (res->view->dlv != NULL)
1766			secure_domain = ISC_TRUE;
1767		if (secure_domain)
1768			fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1769	}
1770
1771	/*
1772	 * We don't have to set opcode because it defaults to query.
1773	 */
1774	fctx->qmessage->id = query->id;
1775
1776	/*
1777	 * Convert the question to wire format.
1778	 */
1779	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
1780	if (result != ISC_R_SUCCESS)
1781		goto cleanup_message;
1782	cleanup_cctx = ISC_TRUE;
1783
1784	result = dns_message_renderbegin(fctx->qmessage, &cctx,
1785					 &query->buffer);
1786	if (result != ISC_R_SUCCESS)
1787		goto cleanup_message;
1788
1789	result = dns_message_rendersection(fctx->qmessage,
1790					   DNS_SECTION_QUESTION, 0);
1791	if (result != ISC_R_SUCCESS)
1792		goto cleanup_message;
1793
1794	peer = NULL;
1795	isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
1796	(void) dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
1797
1798	/*
1799	 * The ADB does not know about servers with "edns no".  Check this,
1800	 * and then inform the ADB for future use.
1801	 */
1802	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0 &&
1803	    peer != NULL &&
1804	    dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
1805	    !useedns)
1806	{
1807		query->options |= DNS_FETCHOPT_NOEDNS0;
1808		dns_adb_changeflags(fctx->adb, query->addrinfo,
1809				    DNS_FETCHOPT_NOEDNS0,
1810				    DNS_FETCHOPT_NOEDNS0);
1811	}
1812
1813	/* Sync NOEDNS0 flag in addrinfo->flags and options now. */
1814	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) != 0)
1815		query->options |= DNS_FETCHOPT_NOEDNS0;
1816
1817	/*
1818	 * Handle timeouts by reducing the UDP response size to 512 bytes
1819	 * then if that doesn't work disabling EDNS (includes DO) and CD.
1820	 *
1821	 * These timeout can be due to:
1822	 *	* broken nameservers that don't respond to EDNS queries.
1823	 *	* broken/misconfigured firewalls and NAT implementations
1824	 *	  that don't handle IP fragmentation.
1825	 *	* broken/misconfigured firewalls that don't handle responses
1826	 *	  greater than 512 bytes.
1827	 *	* broken/misconfigured firewalls that don't handle EDNS, DO
1828	 *	  or CD.
1829	 *	* packet loss / link outage.
1830	 */
1831	if (fctx->timeout) {
1832		if ((triededns512(fctx, &query->addrinfo->sockaddr) ||
1833		     fctx->timeouts >= (MAX_EDNS0_TIMEOUTS * 2)) &&
1834		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1835			query->options |= DNS_FETCHOPT_NOEDNS0;
1836			fctx->reason = "disabling EDNS";
1837		} else if ((triededns(fctx, &query->addrinfo->sockaddr) ||
1838			    fctx->timeouts >= MAX_EDNS0_TIMEOUTS)

Large files files are truncated, but you can click here to view the full file