/contrib/bind9/lib/dns/rbtdb.c
https://bitbucket.org/freebsd/freebsd-head/ · C · 9332 lines · 6603 code · 1036 blank · 1693 comment · 2043 complexity · 246c4f82217f43e68410e305432312b4 MD5 · raw file
Large files are truncated click here to view the full file
- /*
- * Copyright (C) 2004-2012 Internet Systems Consortium, Inc. ("ISC")
- * Copyright (C) 1999-2003 Internet Software Consortium.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
- * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
- * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
- * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
- /* $Id$ */
- /*! \file */
- /*
- * Principal Author: Bob Halley
- */
- #include <config.h>
- /* #define inline */
- #include <isc/event.h>
- #include <isc/heap.h>
- #include <isc/mem.h>
- #include <isc/mutex.h>
- #include <isc/platform.h>
- #include <isc/print.h>
- #include <isc/random.h>
- #include <isc/refcount.h>
- #include <isc/rwlock.h>
- #include <isc/serial.h>
- #include <isc/string.h>
- #include <isc/task.h>
- #include <isc/time.h>
- #include <isc/util.h>
- #include <dns/acache.h>
- #include <dns/db.h>
- #include <dns/dbiterator.h>
- #include <dns/events.h>
- #include <dns/fixedname.h>
- #include <dns/lib.h>
- #include <dns/log.h>
- #include <dns/masterdump.h>
- #include <dns/nsec.h>
- #include <dns/nsec3.h>
- #include <dns/rbt.h>
- #include <dns/rpz.h>
- #include <dns/rdata.h>
- #include <dns/rdataset.h>
- #include <dns/rdatasetiter.h>
- #include <dns/rdataslab.h>
- #include <dns/rdatastruct.h>
- #include <dns/result.h>
- #include <dns/stats.h>
- #include <dns/view.h>
- #include <dns/zone.h>
- #include <dns/zonekey.h>
- #ifdef DNS_RBTDB_VERSION64
- #include "rbtdb64.h"
- #else
- #include "rbtdb.h"
- #endif
- #ifdef DNS_RBTDB_VERSION64
- #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
- #else
- #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
- #endif
- /*%
- * Note that "impmagic" is not the first four bytes of the struct, so
- * ISC_MAGIC_VALID cannot be used.
- */
- #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
- (rbtdb)->common.impmagic == RBTDB_MAGIC)
- #ifdef DNS_RBTDB_VERSION64
- typedef isc_uint64_t rbtdb_serial_t;
- /*%
- * Make casting easier in symbolic debuggers by using different names
- * for the 64 bit version.
- */
- #define dns_rbtdb_t dns_rbtdb64_t
- #define rdatasetheader_t rdatasetheader64_t
- #define rbtdb_version_t rbtdb_version64_t
- #else
- typedef isc_uint32_t rbtdb_serial_t;
- #endif
- typedef isc_uint32_t rbtdb_rdatatype_t;
- #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
- #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
- #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
- #define RBTDB_RDATATYPE_SIGNSEC \
- RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
- #define RBTDB_RDATATYPE_SIGNSEC3 \
- RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
- #define RBTDB_RDATATYPE_SIGNS \
- RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
- #define RBTDB_RDATATYPE_SIGCNAME \
- RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
- #define RBTDB_RDATATYPE_SIGDNAME \
- RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
- #define RBTDB_RDATATYPE_NCACHEANY \
- RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
- /*
- * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
- * Using rwlock is effective with regard to lookup performance only when
- * it is implemented in an efficient way.
- * Otherwise, it is generally wise to stick to the simple locking since rwlock
- * would require more memory or can even make lookups slower due to its own
- * overhead (when it internally calls mutex locks).
- */
- #ifdef ISC_RWLOCK_USEATOMIC
- #define DNS_RBTDB_USERWLOCK 1
- #else
- #define DNS_RBTDB_USERWLOCK 0
- #endif
- #if DNS_RBTDB_USERWLOCK
- #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
- #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
- #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
- #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
- #else
- #define RBTDB_INITLOCK(l) isc_mutex_init(l)
- #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
- #define RBTDB_LOCK(l, t) LOCK(l)
- #define RBTDB_UNLOCK(l, t) UNLOCK(l)
- #endif
- /*
- * Since node locking is sensitive to both performance and memory footprint,
- * we need some trick here. If we have both high-performance rwlock and
- * high performance and small-memory reference counters, we use rwlock for
- * node lock and isc_refcount for node references. In this case, we don't have
- * to protect the access to the counters by locks.
- * Otherwise, we simply use ordinary mutex lock for node locking, and use
- * simple integers as reference counters which is protected by the lock.
- * In most cases, we can simply use wrapper macros such as NODE_LOCK and
- * NODE_UNLOCK. In some other cases, however, we need to protect reference
- * counters first and then protect other parts of a node as read-only data.
- * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
- * provided for these special cases. When we can use the efficient backend
- * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
- * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
- * section including the access to the reference counter.
- * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
- * section is also protected by NODE_STRONGLOCK().
- */
- #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
- typedef isc_rwlock_t nodelock_t;
- #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
- #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
- #define NODE_LOCK(l, t) RWLOCK((l), (t))
- #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
- #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
- #define NODE_STRONGLOCK(l) ((void)0)
- #define NODE_STRONGUNLOCK(l) ((void)0)
- #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
- #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
- #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
- #else
- typedef isc_mutex_t nodelock_t;
- #define NODE_INITLOCK(l) isc_mutex_init(l)
- #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
- #define NODE_LOCK(l, t) LOCK(l)
- #define NODE_UNLOCK(l, t) UNLOCK(l)
- #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
- #define NODE_STRONGLOCK(l) LOCK(l)
- #define NODE_STRONGUNLOCK(l) UNLOCK(l)
- #define NODE_WEAKLOCK(l, t) ((void)0)
- #define NODE_WEAKUNLOCK(l, t) ((void)0)
- #define NODE_WEAKDOWNGRADE(l) ((void)0)
- #endif
- /*%
- * Whether to rate-limit updating the LRU to avoid possible thread contention.
- * Our performance measurement has shown the cost is marginal, so it's defined
- * to be 0 by default either with or without threads.
- */
- #ifndef DNS_RBTDB_LIMITLRUUPDATE
- #define DNS_RBTDB_LIMITLRUUPDATE 0
- #endif
- /*
- * Allow clients with a virtual time of up to 5 minutes in the past to see
- * records that would have otherwise have expired.
- */
- #define RBTDB_VIRTUAL 300
- struct noqname {
- dns_name_t name;
- void * neg;
- void * negsig;
- dns_rdatatype_t type;
- };
- typedef struct acachectl acachectl_t;
- typedef struct rdatasetheader {
- /*%
- * Locked by the owning node's lock.
- */
- rbtdb_serial_t serial;
- dns_ttl_t rdh_ttl;
- rbtdb_rdatatype_t type;
- isc_uint16_t attributes;
- dns_trust_t trust;
- struct noqname *noqname;
- struct noqname *closest;
- /*%<
- * We don't use the LIST macros, because the LIST structure has
- * both head and tail pointers, and is doubly linked.
- */
- struct rdatasetheader *next;
- /*%<
- * If this is the top header for an rdataset, 'next' points
- * to the top header for the next rdataset (i.e., the next type).
- * Otherwise, it points up to the header whose down pointer points
- * at this header.
- */
- struct rdatasetheader *down;
- /*%<
- * Points to the header for the next older version of
- * this rdataset.
- */
- isc_uint32_t count;
- /*%<
- * Monotonously increased every time this rdataset is bound so that
- * it is used as the base of the starting point in DNS responses
- * when the "cyclic" rrset-order is required. Since the ordering
- * should not be so crucial, no lock is set for the counter for
- * performance reasons.
- */
- acachectl_t *additional_auth;
- acachectl_t *additional_glue;
- dns_rbtnode_t *node;
- isc_stdtime_t last_used;
- ISC_LINK(struct rdatasetheader) link;
- unsigned int heap_index;
- /*%<
- * Used for TTL-based cache cleaning.
- */
- isc_stdtime_t resign;
- } rdatasetheader_t;
- typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
- typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
- #define RDATASET_ATTR_NONEXISTENT 0x0001
- #define RDATASET_ATTR_STALE 0x0002
- #define RDATASET_ATTR_IGNORE 0x0004
- #define RDATASET_ATTR_RETAIN 0x0008
- #define RDATASET_ATTR_NXDOMAIN 0x0010
- #define RDATASET_ATTR_RESIGN 0x0020
- #define RDATASET_ATTR_STATCOUNT 0x0040
- #define RDATASET_ATTR_OPTOUT 0x0080
- #define RDATASET_ATTR_NEGATIVE 0x0100
- typedef struct acache_cbarg {
- dns_rdatasetadditional_t type;
- unsigned int count;
- dns_db_t *db;
- dns_dbnode_t *node;
- rdatasetheader_t *header;
- } acache_cbarg_t;
- struct acachectl {
- dns_acacheentry_t *entry;
- acache_cbarg_t *cbarg;
- };
- /*
- * XXX
- * When the cache will pre-expire data (due to memory low or other
- * situations) before the rdataset's TTL has expired, it MUST
- * respect the RETAIN bit and not expire the data until its TTL is
- * expired.
- */
- #undef IGNORE /* WIN32 winbase.h defines this. */
- #define EXISTS(header) \
- (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
- #define NONEXISTENT(header) \
- (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
- #define IGNORE(header) \
- (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
- #define RETAIN(header) \
- (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
- #define NXDOMAIN(header) \
- (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
- #define RESIGN(header) \
- (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
- #define OPTOUT(header) \
- (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
- #define NEGATIVE(header) \
- (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
- #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
- /*%
- * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
- * There is a tradeoff issue about configuring this value: if this is too
- * small, it may cause heavier contention between threads; if this is too large,
- * LRU purge algorithm won't work well (entries tend to be purged prematurely).
- * The default value should work well for most environments, but this can
- * also be configurable at compilation time via the
- * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
- * 1 due to the assumption of overmem_purge().
- */
- #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
- #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
- #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
- #else
- #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
- #endif
- #else
- #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
- #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
- typedef struct {
- nodelock_t lock;
- /* Protected in the refcount routines. */
- isc_refcount_t references;
- /* Locked by lock. */
- isc_boolean_t exiting;
- } rbtdb_nodelock_t;
- typedef struct rbtdb_changed {
- dns_rbtnode_t * node;
- isc_boolean_t dirty;
- ISC_LINK(struct rbtdb_changed) link;
- } rbtdb_changed_t;
- typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
- typedef enum {
- dns_db_insecure,
- dns_db_partial,
- dns_db_secure
- } dns_db_secure_t;
- typedef struct dns_rbtdb dns_rbtdb_t;
- typedef struct rbtdb_version {
- /* Not locked */
- rbtdb_serial_t serial;
- dns_rbtdb_t * rbtdb;
- /*
- * Protected in the refcount routines.
- * XXXJT: should we change the lock policy based on the refcount
- * performance?
- */
- isc_refcount_t references;
- /* Locked by database lock. */
- isc_boolean_t writer;
- isc_boolean_t commit_ok;
- rbtdb_changedlist_t changed_list;
- rdatasetheaderlist_t resigned_list;
- ISC_LINK(struct rbtdb_version) link;
- dns_db_secure_t secure;
- isc_boolean_t havensec3;
- /* NSEC3 parameters */
- dns_hash_t hash;
- isc_uint8_t flags;
- isc_uint16_t iterations;
- isc_uint8_t salt_length;
- unsigned char salt[DNS_NSEC3_SALTSIZE];
- } rbtdb_version_t;
- typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
- struct dns_rbtdb {
- /* Unlocked. */
- dns_db_t common;
- /* Locks the data in this struct */
- #if DNS_RBTDB_USERWLOCK
- isc_rwlock_t lock;
- #else
- isc_mutex_t lock;
- #endif
- /* Locks the tree structure (prevents nodes appearing/disappearing) */
- isc_rwlock_t tree_lock;
- /* Locks for individual tree nodes */
- unsigned int node_lock_count;
- rbtdb_nodelock_t * node_locks;
- dns_rbtnode_t * origin_node;
- dns_stats_t * rrsetstats; /* cache DB only */
- /* Locked by lock. */
- unsigned int active;
- isc_refcount_t references;
- unsigned int attributes;
- rbtdb_serial_t current_serial;
- rbtdb_serial_t least_serial;
- rbtdb_serial_t next_serial;
- rbtdb_version_t * current_version;
- rbtdb_version_t * future_version;
- rbtdb_versionlist_t open_versions;
- isc_task_t * task;
- dns_dbnode_t *soanode;
- dns_dbnode_t *nsnode;
- /*
- * This is a linked list used to implement the LRU cache. There will
- * be node_lock_count linked lists here. Nodes in bucket 1 will be
- * placed on the linked list rdatasets[1].
- */
- rdatasetheaderlist_t *rdatasets;
- /*%
- * Temporary storage for stale cache nodes and dynamically deleted
- * nodes that await being cleaned up.
- */
- rbtnodelist_t *deadnodes;
- /*
- * Heaps. These are used for TTL based expiry in a cache,
- * or for zone resigning in a zone DB. hmctx is the memory
- * context to use for the heap (which differs from the main
- * database memory context in the case of a cache).
- */
- isc_mem_t * hmctx;
- isc_heap_t **heaps;
- /* Locked by tree_lock. */
- dns_rbt_t * tree;
- dns_rbt_t * nsec;
- dns_rbt_t * nsec3;
- dns_rpz_cidr_t * rpz_cidr;
- /* Unlocked */
- unsigned int quantum;
- };
- #define RBTDB_ATTR_LOADED 0x01
- #define RBTDB_ATTR_LOADING 0x02
- /*%
- * Search Context
- */
- typedef struct {
- dns_rbtdb_t * rbtdb;
- rbtdb_version_t * rbtversion;
- rbtdb_serial_t serial;
- unsigned int options;
- dns_rbtnodechain_t chain;
- isc_boolean_t copy_name;
- isc_boolean_t need_cleanup;
- isc_boolean_t wild;
- dns_rbtnode_t * zonecut;
- rdatasetheader_t * zonecut_rdataset;
- rdatasetheader_t * zonecut_sigrdataset;
- dns_fixedname_t zonecut_name;
- isc_stdtime_t now;
- } rbtdb_search_t;
- /*%
- * Load Context
- */
- typedef struct {
- dns_rbtdb_t * rbtdb;
- isc_stdtime_t now;
- } rbtdb_load_t;
- static void rdataset_disassociate(dns_rdataset_t *rdataset);
- static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
- static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
- static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
- static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
- static unsigned int rdataset_count(dns_rdataset_t *rdataset);
- static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
- dns_name_t *name,
- dns_rdataset_t *neg,
- dns_rdataset_t *negsig);
- static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
- dns_name_t *name,
- dns_rdataset_t *neg,
- dns_rdataset_t *negsig);
- static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
- dns_rdatasetadditional_t type,
- dns_rdatatype_t qtype,
- dns_acache_t *acache,
- dns_zone_t **zonep,
- dns_db_t **dbp,
- dns_dbversion_t **versionp,
- dns_dbnode_t **nodep,
- dns_name_t *fname,
- dns_message_t *msg,
- isc_stdtime_t now);
- static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
- dns_rdatasetadditional_t type,
- dns_rdatatype_t qtype,
- dns_acache_t *acache,
- dns_zone_t *zone,
- dns_db_t *db,
- dns_dbversion_t *version,
- dns_dbnode_t *node,
- dns_name_t *fname);
- static isc_result_t rdataset_putadditional(dns_acache_t *acache,
- dns_rdataset_t *rdataset,
- dns_rdatasetadditional_t type,
- dns_rdatatype_t qtype);
- static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
- isc_stdtime_t now);
- static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
- isc_stdtime_t now);
- static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
- isc_boolean_t tree_locked);
- static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
- isc_stdtime_t now, isc_boolean_t tree_locked);
- static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
- rdatasetheader_t *newheader);
- static void prune_tree(isc_task_t *task, isc_event_t *event);
- static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
- static void rdataset_expire(dns_rdataset_t *rdataset);
- static dns_rdatasetmethods_t rdataset_methods = {
- rdataset_disassociate,
- rdataset_first,
- rdataset_next,
- rdataset_current,
- rdataset_clone,
- rdataset_count,
- NULL,
- rdataset_getnoqname,
- NULL,
- rdataset_getclosest,
- rdataset_getadditional,
- rdataset_setadditional,
- rdataset_putadditional,
- rdataset_settrust,
- rdataset_expire
- };
- static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
- static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
- static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
- static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
- dns_rdataset_t *rdataset);
- static dns_rdatasetitermethods_t rdatasetiter_methods = {
- rdatasetiter_destroy,
- rdatasetiter_first,
- rdatasetiter_next,
- rdatasetiter_current
- };
- typedef struct rbtdb_rdatasetiter {
- dns_rdatasetiter_t common;
- rdatasetheader_t * current;
- } rbtdb_rdatasetiter_t;
- static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
- static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
- static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
- static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
- dns_name_t *name);
- static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
- static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
- static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
- dns_dbnode_t **nodep,
- dns_name_t *name);
- static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
- static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
- dns_name_t *name);
- static dns_dbiteratormethods_t dbiterator_methods = {
- dbiterator_destroy,
- dbiterator_first,
- dbiterator_last,
- dbiterator_seek,
- dbiterator_prev,
- dbiterator_next,
- dbiterator_current,
- dbiterator_pause,
- dbiterator_origin
- };
- #define DELETION_BATCH_MAX 64
- /*
- * If 'paused' is ISC_TRUE, then the tree lock is not being held.
- */
- typedef struct rbtdb_dbiterator {
- dns_dbiterator_t common;
- isc_boolean_t paused;
- isc_boolean_t new_origin;
- isc_rwlocktype_t tree_locked;
- isc_result_t result;
- dns_fixedname_t name;
- dns_fixedname_t origin;
- dns_rbtnodechain_t chain;
- dns_rbtnodechain_t nsec3chain;
- dns_rbtnodechain_t *current;
- dns_rbtnode_t *node;
- dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
- int delete;
- isc_boolean_t nsec3only;
- isc_boolean_t nonsec3;
- } rbtdb_dbiterator_t;
- #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
- #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
- static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
- isc_event_t *event);
- static void overmem(dns_db_t *db, isc_boolean_t overmem);
- #ifdef BIND9
- static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
- #endif
- /*%
- * 'init_count' is used to initialize 'newheader->count' which inturn
- * is used to determine where in the cycle rrset-order cyclic starts.
- * We don't lock this as we don't care about simultaneous updates.
- *
- * Note:
- * Both init_count and header->count can be ISC_UINT32_MAX.
- * The count on the returned rdataset however can't be as
- * that indicates that the database does not implement cyclic
- * processing.
- */
- static unsigned int init_count;
- /*
- * Locking
- *
- * If a routine is going to lock more than one lock in this module, then
- * the locking must be done in the following order:
- *
- * Tree Lock
- *
- * Node Lock (Only one from the set may be locked at one time by
- * any caller)
- *
- * Database Lock
- *
- * Failure to follow this hierarchy can result in deadlock.
- */
- /*
- * Deleting Nodes
- *
- * For zone databases the node for the origin of the zone MUST NOT be deleted.
- */
- /*
- * DB Routines
- */
- static void
- attach(dns_db_t *source, dns_db_t **targetp) {
- dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
- REQUIRE(VALID_RBTDB(rbtdb));
- isc_refcount_increment(&rbtdb->references, NULL);
- *targetp = source;
- }
- static void
- free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
- dns_rbtdb_t *rbtdb = event->ev_arg;
- UNUSED(task);
- free_rbtdb(rbtdb, ISC_TRUE, event);
- }
- static void
- update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
- isc_boolean_t increment)
- {
- dns_rdatastatstype_t statattributes = 0;
- dns_rdatastatstype_t base = 0;
- dns_rdatastatstype_t type;
- /* At the moment we count statistics only for cache DB */
- INSIST(IS_CACHE(rbtdb));
- if (NEGATIVE(header)) {
- if (NXDOMAIN(header))
- statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
- else {
- statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
- base = RBTDB_RDATATYPE_EXT(header->type);
- }
- } else
- base = RBTDB_RDATATYPE_BASE(header->type);
- type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
- if (increment)
- dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
- else
- dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
- }
- static void
- set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
- int idx;
- isc_heap_t *heap;
- dns_ttl_t oldttl;
- oldttl = header->rdh_ttl;
- header->rdh_ttl = newttl;
- if (!IS_CACHE(rbtdb))
- return;
- /*
- * It's possible the rbtdb is not a cache. If this is the case,
- * we will not have a heap, and we move on. If we do, though,
- * we might need to adjust things.
- */
- if (header->heap_index == 0 || newttl == oldttl)
- return;
- idx = header->node->locknum;
- if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
- return;
- heap = rbtdb->heaps[idx];
- if (newttl < oldttl)
- isc_heap_increased(heap, header->heap_index);
- else
- isc_heap_decreased(heap, header->heap_index);
- }
- /*%
- * These functions allow the heap code to rank the priority of each
- * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
- */
- static isc_boolean_t
- ttl_sooner(void *v1, void *v2) {
- rdatasetheader_t *h1 = v1;
- rdatasetheader_t *h2 = v2;
- if (h1->rdh_ttl < h2->rdh_ttl)
- return (ISC_TRUE);
- return (ISC_FALSE);
- }
- static isc_boolean_t
- resign_sooner(void *v1, void *v2) {
- rdatasetheader_t *h1 = v1;
- rdatasetheader_t *h2 = v2;
- if (h1->resign < h2->resign)
- return (ISC_TRUE);
- return (ISC_FALSE);
- }
- /*%
- * This function sets the heap index into the header.
- */
- static void
- set_index(void *what, unsigned int index) {
- rdatasetheader_t *h = what;
- h->heap_index = index;
- }
- /*%
- * Work out how many nodes can be deleted in the time between two
- * requests to the nameserver. Smooth the resulting number and use it
- * as a estimate for the number of nodes to be deleted in the next
- * iteration.
- */
- static unsigned int
- adjust_quantum(unsigned int old, isc_time_t *start) {
- unsigned int pps = dns_pps; /* packets per second */
- unsigned int interval;
- isc_uint64_t usecs;
- isc_time_t end;
- unsigned int new;
- if (pps < 100)
- pps = 100;
- isc_time_now(&end);
- interval = 1000000 / pps; /* interval in usec */
- if (interval == 0)
- interval = 1;
- usecs = isc_time_microdiff(&end, start);
- if (usecs == 0) {
- /*
- * We were unable to measure the amount of time taken.
- * Double the nodes deleted next time.
- */
- old *= 2;
- if (old > 1000)
- old = 1000;
- return (old);
- }
- new = old * interval;
- new /= (unsigned int)usecs;
- if (new == 0)
- new = 1;
- else if (new > 1000)
- new = 1000;
- /* Smooth */
- new = (new + old * 3) / 4;
- isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
- ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
- return (new);
- }
- static void
- free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
- unsigned int i;
- isc_ondestroy_t ondest;
- isc_result_t result;
- char buf[DNS_NAME_FORMATSIZE];
- dns_rbt_t **treep;
- isc_time_t start;
- if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
- overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
- REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
- REQUIRE(rbtdb->future_version == NULL);
- if (rbtdb->current_version != NULL) {
- unsigned int refs;
- isc_refcount_decrement(&rbtdb->current_version->references,
- &refs);
- INSIST(refs == 0);
- UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
- isc_refcount_destroy(&rbtdb->current_version->references);
- isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
- sizeof(rbtdb_version_t));
- }
- /*
- * We assume the number of remaining dead nodes is reasonably small;
- * the overhead of unlinking all nodes here should be negligible.
- */
- for (i = 0; i < rbtdb->node_lock_count; i++) {
- dns_rbtnode_t *node;
- node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
- while (node != NULL) {
- ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
- node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
- }
- }
- if (event == NULL)
- rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
- for (;;) {
- /*
- * pick the next tree to (start to) destroy
- */
- treep = &rbtdb->tree;
- if (*treep == NULL) {
- treep = &rbtdb->nsec;
- if (*treep == NULL) {
- treep = &rbtdb->nsec3;
- /*
- * we're finished after clear cutting
- */
- if (*treep == NULL)
- break;
- }
- }
- isc_time_now(&start);
- result = dns_rbt_destroy2(treep, rbtdb->quantum);
- if (result == ISC_R_QUOTA) {
- INSIST(rbtdb->task != NULL);
- if (rbtdb->quantum != 0)
- rbtdb->quantum = adjust_quantum(rbtdb->quantum,
- &start);
- if (event == NULL)
- event = isc_event_allocate(rbtdb->common.mctx,
- NULL,
- DNS_EVENT_FREESTORAGE,
- free_rbtdb_callback,
- rbtdb,
- sizeof(isc_event_t));
- if (event == NULL)
- continue;
- isc_task_send(rbtdb->task, &event);
- return;
- }
- INSIST(result == ISC_R_SUCCESS && *treep == NULL);
- }
- if (event != NULL)
- isc_event_free(&event);
- if (log) {
- if (dns_name_dynamic(&rbtdb->common.origin))
- dns_name_format(&rbtdb->common.origin, buf,
- sizeof(buf));
- else
- strcpy(buf, "<UNKNOWN>");
- isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
- DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
- "done free_rbtdb(%s)", buf);
- }
- if (dns_name_dynamic(&rbtdb->common.origin))
- dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
- for (i = 0; i < rbtdb->node_lock_count; i++) {
- isc_refcount_destroy(&rbtdb->node_locks[i].references);
- NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
- }
- /*
- * Clean up LRU / re-signing order lists.
- */
- if (rbtdb->rdatasets != NULL) {
- for (i = 0; i < rbtdb->node_lock_count; i++)
- INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
- isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
- rbtdb->node_lock_count *
- sizeof(rdatasetheaderlist_t));
- }
- /*
- * Clean up dead node buckets.
- */
- if (rbtdb->deadnodes != NULL) {
- for (i = 0; i < rbtdb->node_lock_count; i++)
- INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
- isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
- rbtdb->node_lock_count * sizeof(rbtnodelist_t));
- }
- /*
- * Clean up heap objects.
- */
- if (rbtdb->heaps != NULL) {
- for (i = 0; i < rbtdb->node_lock_count; i++)
- isc_heap_destroy(&rbtdb->heaps[i]);
- isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
- rbtdb->node_lock_count * sizeof(isc_heap_t *));
- }
- if (rbtdb->rrsetstats != NULL)
- dns_stats_detach(&rbtdb->rrsetstats);
- #ifdef BIND9
- if (rbtdb->rpz_cidr != NULL)
- dns_rpz_cidr_free(&rbtdb->rpz_cidr);
- #endif
- isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
- rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
- isc_rwlock_destroy(&rbtdb->tree_lock);
- isc_refcount_destroy(&rbtdb->references);
- if (rbtdb->task != NULL)
- isc_task_detach(&rbtdb->task);
- RBTDB_DESTROYLOCK(&rbtdb->lock);
- rbtdb->common.magic = 0;
- rbtdb->common.impmagic = 0;
- ondest = rbtdb->common.ondest;
- isc_mem_detach(&rbtdb->hmctx);
- isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
- isc_ondestroy_notify(&ondest, rbtdb);
- }
- static inline void
- maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
- isc_boolean_t want_free = ISC_FALSE;
- unsigned int i;
- unsigned int inactive = 0;
- /* XXX check for open versions here */
- if (rbtdb->soanode != NULL)
- dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
- if (rbtdb->nsnode != NULL)
- dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
- /*
- * Even though there are no external direct references, there still
- * may be nodes in use.
- */
- for (i = 0; i < rbtdb->node_lock_count; i++) {
- NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
- rbtdb->node_locks[i].exiting = ISC_TRUE;
- NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
- if (isc_refcount_current(&rbtdb->node_locks[i].references)
- == 0) {
- inactive++;
- }
- }
- if (inactive != 0) {
- RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
- rbtdb->active -= inactive;
- if (rbtdb->active == 0)
- want_free = ISC_TRUE;
- RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
- if (want_free) {
- char buf[DNS_NAME_FORMATSIZE];
- if (dns_name_dynamic(&rbtdb->common.origin))
- dns_name_format(&rbtdb->common.origin, buf,
- sizeof(buf));
- else
- strcpy(buf, "<UNKNOWN>");
- isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
- DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
- "calling free_rbtdb(%s)", buf);
- free_rbtdb(rbtdb, ISC_TRUE, NULL);
- }
- }
- }
- static void
- detach(dns_db_t **dbp) {
- dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
- unsigned int refs;
- REQUIRE(VALID_RBTDB(rbtdb));
- isc_refcount_decrement(&rbtdb->references, &refs);
- if (refs == 0)
- maybe_free_rbtdb(rbtdb);
- *dbp = NULL;
- }
- static void
- currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
- dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
- rbtdb_version_t *version;
- unsigned int refs;
- REQUIRE(VALID_RBTDB(rbtdb));
- RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
- version = rbtdb->current_version;
- isc_refcount_increment(&version->references, &refs);
- RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
- *versionp = (dns_dbversion_t *)version;
- }
- static inline rbtdb_version_t *
- allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
- unsigned int references, isc_boolean_t writer)
- {
- isc_result_t result;
- rbtdb_version_t *version;
- version = isc_mem_get(mctx, sizeof(*version));
- if (version == NULL)
- return (NULL);
- version->serial = serial;
- result = isc_refcount_init(&version->references, references);
- if (result != ISC_R_SUCCESS) {
- isc_mem_put(mctx, version, sizeof(*version));
- return (NULL);
- }
- version->writer = writer;
- version->commit_ok = ISC_FALSE;
- ISC_LIST_INIT(version->changed_list);
- ISC_LIST_INIT(version->resigned_list);
- ISC_LINK_INIT(version, link);
- return (version);
- }
- static isc_result_t
- newversion(dns_db_t *db, dns_dbversion_t **versionp) {
- dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
- rbtdb_version_t *version;
- REQUIRE(VALID_RBTDB(rbtdb));
- REQUIRE(versionp != NULL && *versionp == NULL);
- REQUIRE(rbtdb->future_version == NULL);
- RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
- RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
- version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
- ISC_TRUE);
- if (version != NULL) {
- version->rbtdb = rbtdb;
- version->commit_ok = ISC_TRUE;
- version->secure = rbtdb->current_version->secure;
- version->havensec3 = rbtdb->current_version->havensec3;
- if (version->havensec3) {
- version->flags = rbtdb->current_version->flags;
- version->iterations =
- rbtdb->current_version->iterations;
- version->hash = rbtdb->current_version->hash;
- version->salt_length =
- rbtdb->current_version->salt_length;
- memcpy(version->salt, rbtdb->current_version->salt,
- version->salt_length);
- } else {
- version->flags = 0;
- version->iterations = 0;
- version->hash = 0;
- version->salt_length = 0;
- memset(version->salt, 0, sizeof(version->salt));
- }
- rbtdb->next_serial++;
- rbtdb->future_version = version;
- }
- RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
- if (version == NULL)
- return (ISC_R_NOMEMORY);
- *versionp = version;
- return (ISC_R_SUCCESS);
- }
- static void
- attachversion(dns_db_t *db, dns_dbversion_t *source,
- dns_dbversion_t **targetp)
- {
- dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
- rbtdb_version_t *rbtversion = source;
- unsigned int refs;
- REQUIRE(VALID_RBTDB(rbtdb));
- INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
- isc_refcount_increment(&rbtversion->references, &refs);
- INSIST(refs > 1);
- *targetp = rbtversion;
- }
- static rbtdb_changed_t *
- add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
- dns_rbtnode_t *node)
- {
- rbtdb_changed_t *changed;
- unsigned int refs;
- /*
- * Caller must be holding the node lock if its reference must be
- * protected by the lock.
- */
- changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
- RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
- REQUIRE(version->writer);
- if (changed != NULL) {
- dns_rbtnode_refincrement(node, &refs);
- INSIST(refs != 0);
- changed->node = node;
- changed->dirty = ISC_FALSE;
- ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
- } else
- version->commit_ok = ISC_FALSE;
- RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
- return (changed);
- }
- static void
- free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
- acachectl_t *array)
- {
- unsigned int count;
- unsigned int i;
- unsigned char *raw; /* RDATASLAB */
- /*
- * The caller must be holding the corresponding node lock.
- */
- if (array == NULL)
- return;
- raw = (unsigned char *)header + sizeof(*header);
- count = raw[0] * 256 + raw[1];
- /*
- * Sanity check: since an additional cache entry has a reference to
- * the original DB node (in the callback arg), there should be no
- * acache entries when the node can be freed.
- */
- for (i = 0; i < count; i++)
- INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
- isc_mem_put(mctx, array, count * sizeof(acachectl_t));
- }
- static inline void
- free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
- if (dns_name_dynamic(&(*noqname)->name))
- dns_name_free(&(*noqname)->name, mctx);
- if ((*noqname)->neg != NULL)
- isc_mem_put(mctx, (*noqname)->neg,
- dns_rdataslab_size((*noqname)->neg, 0));
- if ((*noqname)->negsig != NULL)
- isc_mem_put(mctx, (*noqname)->negsig,
- dns_rdataslab_size((*noqname)->negsig, 0));
- isc_mem_put(mctx, *noqname, sizeof(**noqname));
- *noqname = NULL;
- }
- static inline void
- init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
- {
- ISC_LINK_INIT(h, link);
- h->heap_index = 0;
- #if TRACE_HEADER
- if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
- fprintf(stderr, "initialized header: %p\n", h);
- #else
- UNUSED(rbtdb);
- #endif
- }
- static inline rdatasetheader_t *
- new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
- {
- rdatasetheader_t *h;
- h = isc_mem_get(mctx, sizeof(*h));
- if (h == NULL)
- return (NULL);
- #if TRACE_HEADER
- if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
- fprintf(stderr, "allocated header: %p\n", h);
- #endif
- init_rdataset(rbtdb, h);
- return (h);
- }
- static inline void
- free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
- {
- unsigned int size;
- int idx;
- if (EXISTS(rdataset) &&
- (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
- update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
- }
- idx = rdataset->node->locknum;
- if (ISC_LINK_LINKED(rdataset, link)) {
- INSIST(IS_CACHE(rbtdb));
- ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
- }
- if (rdataset->heap_index != 0)
- isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
- rdataset->heap_index = 0;
- if (rdataset->noqname != NULL)
- free_noqname(mctx, &rdataset->noqname);
- if (rdataset->closest != NULL)
- free_noqname(mctx, &rdataset->closest);
- free_acachearray(mctx, rdataset, rdataset->additional_auth);
- free_acachearray(mctx, rdataset, rdataset->additional_glue);
- if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
- size = sizeof(*rdataset);
- else
- size = dns_rdataslab_size((unsigned char *)rdataset,
- sizeof(*rdataset));
- isc_mem_put(mctx, rdataset, size);
- }
- static inline void
- rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
- rdatasetheader_t *header, *dcurrent;
- isc_boolean_t make_dirty = ISC_FALSE;
- /*
- * Caller must hold the node lock.
- */
- /*
- * We set the IGNORE attribute on rdatasets with serial number
- * 'serial'. When the reference count goes to zero, these rdatasets
- * will be cleaned up; until that time, they will be ignored.
- */
- for (header = node->data; header != NULL; header = header->next) {
- if (header->serial == serial) {
- header->attributes |= RDATASET_ATTR_IGNORE;
- make_dirty = ISC_TRUE;
- }
- for (dcurrent = header->down;
- dcurrent != NULL;
- dcurrent = dcurrent->down) {
- if (dcurrent->serial == serial) {
- dcurrent->attributes |= RDATASET_ATTR_IGNORE;
- make_dirty = ISC_TRUE;
- }
- }
- }
- if (make_dirty)
- node->dirty = 1;
- }
- static inline void
- clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
- {
- rdatasetheader_t *d, *down_next;
- for (d = top->down; d != NULL; d = down_next) {
- down_next = d->down;
- free_rdataset(rbtdb, mctx, d);
- }
- top->down = NULL;
- }
- static inline void
- clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
- rdatasetheader_t *current, *top_prev, *top_next;
- isc_mem_t *mctx = rbtdb->common.mctx;
- /*
- * Caller must be holding the node lock.
- */
- top_prev = NULL;
- for (current = node->data; current != NULL; current = top_next) {
- top_next = current->next;
- clean_stale_headers(rbtdb, mctx, current);
- /*
- * If current is nonexistent or stale, we can clean it up.
- */
- if ((current->attributes &
- (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
- if (top_prev != NULL)
- top_prev->next = current->next;
- else
- node->data = current->next;
- free_rdataset(rbtdb, mctx, current);
- } else
- top_prev = current;
- }
- node->dirty = 0;
- }
- static inline void
- clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
- rbtdb_serial_t least_serial)
- {
- rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
- rdatasetheader_t *top_prev, *top_next;
- isc_mem_t *mctx = rbtdb->common.mctx;
- isc_boolean_t still_dirty = ISC_FALSE;
- /*
- * Caller must be holding the node lock.
- */
- REQUIRE(least_serial != 0);
- top_prev = NULL;
- for (current = node->data; current != NULL; current = top_next) {
- top_next = current->next;
- /*
- * First, we clean up any instances of multiple rdatasets
- * with the same serial number, or that have the IGNORE
- * attribute.
- */
- dparent = current;
- for (dcurrent = current->down;
- dcurrent != NULL;
- dcurrent = down_next) {
- down_next = dcurrent->down;
- INSIST(dcurrent->serial <= dparent->serial);
- if (dcurrent->serial == dparent->serial ||
- IGNORE(dcurrent)) {
- if (down_next != NULL)
- down_next->next = dparent;
- dparent->down = down_next;
- free_rdataset(rbtdb, mctx, dcurrent);
- } else
- dparent = dcurrent;
- }
- /*
- * We've now eliminated all IGNORE datasets with the possible
- * exception of current, which we now check.
- */
- if (IGNORE(current)) {
- down_next = current->down;
- if (down_next == NULL) {
- if (top_prev != NULL)
- top_prev->next = current->next;
- else
- node->data = current->next;
- free_rdataset(rbtdb, mctx, current);
- /*
- * current no longer exists, so we can
- * just continue with the loop.
- */
- continue;
- } else {
- /*
- * Pull up current->down, making it the new
- * current.
- */
- if (top_prev != NULL)
- top_prev->next = down_next;
- else
- node->data = down_next;
- down_next->next = top_next;
- free_rdataset(rbtdb, mctx, current);
- current = down_next;
- }
- }
- /*
- * We now try to find the first down node less than the
- * least serial.
- */
- dparent = current;
- for (dcurrent = current->down;
- dcurrent != NULL;
- dcurrent = down_next) {
- down_next = dcurrent->down;
- if (dcurrent->serial < least_serial)
- break;
- dparent = dcurrent;
- }
- /*
- * If there is a such an rdataset, delete it and any older
- * versions.
- */
- if (dcurrent != NULL) {
- do {
- down_next = dcurrent->down;
- INSIST(dcurrent->serial <= least_serial);
- free_rdataset(rbtdb, mctx, dcurrent);
- dcurrent = down_next;
- } while (dcurrent != NULL);
- dparent->down = NULL;
- }
- /*
- * Note. The serial number of 'current' might be less than
- * least_serial too, but we cannot delete it because it is
- * the most recent version, unless it is a NONEXISTENT
- * rdataset.
- */
- if (current->down != NULL) {
- still_dirty = ISC_TRUE;
- top_prev = current;
- } else {
- /*
- * If this is a NONEXISTENT rdataset, we can delete it.
- */
- if (NONEXISTENT(current)) {
- if (top_prev != NULL)
- top_prev->next = current->next;
- else
- node->data = current->next;
- free_rdataset(rbtdb, mctx, current);
- } else
- top_prev = current;
- }
- }
- if (!still_dirty)
- node->dirty = 0;
- }
- static void
- delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
- {
- dns_rbtnode_t *nsecnode;
- dns_fixedname_t fname;
- dns_name_t *name;
- isc_result_t result = ISC_R_UNEXPECTED;
- INSIST(!ISC_LINK_LINKED(node, deadlink));
- switch (node->nsec) {
- case DNS_RBT_NSEC_NORMAL:
- #ifdef BIND9
- if (rbtdb->rpz_cidr != NULL) {
- dns_fixedname_init(&fname);
- name = dns_fixedname_name(&fname);
- dns_rbt_fullnamefromnode(node, name);
- dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
- }
- #endif
- result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
- break;
- case DNS_RBT_NSEC_HAS_NSEC:
- dns_fixedname_init(&fname);
- name = dns_fixedname_name(&fname);
- dns_rbt_fullnamefromnode(node, name);
- /*
- * Delete the corresponding node from the auxiliary NSEC
- * tree before deleting from the main tree.
- */
- nsecnode = NULL;
- result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
- NULL, DNS_RBTFIND_EMPTYDATA,
- NULL, NULL);
- if (result != ISC_R_SUCCESS) {
- isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
- DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
- "delete_node: "
- "dns_rbt_findnode(nsec): %s",
- isc_result_totext(result));
- } else {
- result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
- ISC_FALSE);
- if (result != ISC_R_SUCCESS) {
- isc_log_write(dns_lctx,
- DNS_LOGCATEGORY_DATABASE,
- DNS_LOGMODULE_CACHE,
- ISC_LOG_WARNING,
- "delete_nsecnode(): "
- "dns_rbt_deletenode(nsecnode): %s",
- isc_result_totext(result));
- }
- }
- result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
- #ifdef BIND9
- dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
- #endif
- break;
- case DNS_RBT_NSEC_NSEC:
- result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
- break;
- case DNS_RBT_NSEC_NSEC3:
- result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
- break;
- }
- if (result != ISC_R_SUCCESS) {
- isc_log_write(dns_lctx,
- DNS_LOGCATEGORY_DATABASE,
- DNS_LOGMODULE_CACHE,
- ISC_LOG_WARNING,
- "delete_nsecnode(): "
- "dns_rbt_deletenode: %s",
- isc_result_totext(result));
- }
- }
- /*%
- * Clean up dead nodes. These are nodes which have no references, and
- * have no data. They are dead but we could not or chose not to delete
- * them when we deleted all the data at that node because we did not want
- * to wait for the tree write lock.
- *
- * The caller must hold a tree write lock and bucketnum'th node (write) lock.
- */
- static void
- cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
- dns_rbtnode_t *node;
- int count = 10; /* XXXJT: should be adjustable */
- node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
- while (node != NULL && count > 0) {
- ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
- /*
- * Since we're holding a tree write lock, it should be
- * impossible for this node to be referenced by others.
- */
- INSIST(dns_rbtnode_refcurrent(node) == 0 &&
- node->data == NULL);
- delete_node(rbtdb, node);
- node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
- count--;
- }
- }
- /*
- * Caller must be holding the node lock.
- */
- static inline void
- new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
- unsigned int lockrefs, noderefs;
- isc_refcount_t *lockref;
- INSIST(!ISC_LINK_LINKED(node, deadlink));
- dns_rbtnode_refincrement0(node, &noderefs);
- if (noderefs == 1) { /* this is the first reference to the node */
- lockref = &rbtdb->node_locks[node->locknum].references;
- isc_refcount_increment0(lockref, &lockrefs);
- INSIST(lockrefs != 0);
- }
- INSIST(noderefs != 0);
- }
- /*
- * This function is assumed to be called when a node is newly referenced
- * and can be in the deadnode list. In that case the node must be retrieved
- * from the list because it is going to be used. In addition, if the caller
- * happens to hold a write lock on the tree, it's a good chance to purge dead
- * nodes.
- * Note: while a new reference is gained in multiple places, there are only very
- * few cases where the node can be in the deadnode list (only empty nodes can
- * have been added to the list).
- */
- static inline void
- reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
- isc_rwlocktype_t treelocktype)
- {
- isc_rwlocktype_t locktype = isc_rwlocktype_read;
- nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
- isc_boolean_t maybe_cleanup = ISC_FALSE;
- POST(locktype);
- NODE_STRONGLOCK(nodelock);
- NODE_WEAKLOCK(nodelock, locktype);
- /*
- * Check if we can possibly cleanup the dead node. If so, upgrade
- * the node lock below to perform the cleanup.
- */
- if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
- treelocktype == isc_rwlocktype_write) {
- maybe_cleanup = ISC_TRUE;
- }
- if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
- /*
- * Upgrade the lock and test if we still need to unlink.
- */
- NODE_WEAKUNLOCK(nodelock, locktype);
- locktype = isc_rwlocktype_write;
- POST(locktype);
- NODE_WEAKLOCK(nodelock, locktype);
- if (ISC_LINK_LINKED(node, deadlink))
- ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
- node, deadlink);
- if (maybe_cleanup)
- cleanup_dead_nodes(rbtdb, node->locknum);
- }
- new_reference(rbtdb, node);
- NODE_WEAKUNLOCK(nodelock, locktype);
- NODE_STRONGUNLOCK(nodelock);
- }
- /*
- * Caller must be holding the node lock; either the "strong", read or write
- * lock. Note that the lock must be held even when node references are
- * atomically modified; in that case the decrement operation itself does not
- * have to be protected, but we must avoid a race condition where multiple
- * threads are decreasing the reference to zero simultaneously and at least
- * one of them is going to free the node.
- * This function returns ISC_TRUE if and only if the node reference decreases
- * to zero.
- */
- static isc_boolean_t
- decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
- rbtdb_serial_t least_serial,
- isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
- isc_boolean_t pruning)
- {
- isc_result_t result;
- isc_boolean_t write_locked;
- rbtdb_nodelock_t *nodelock;
- unsigned int refs, nrefs;
- int bucket = node->locknum;
- isc_boolean_t no_reference = ISC_TRUE;
- nodelock = &rbtdb->node_locks[bucket];
- /* Handle easy and typical case first. */
- if (!node->dirty && (node->data != NULL || node->down != NULL)) {
- dns_rbtnode_refdecrement(node, &nrefs);
- INSIST((int)nrefs >= 0);
- if (nrefs == 0) {
- isc_refcount_decrement(&nodelock->references, &refs);
- INSIST((int)refs >= 0);
- }
- return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
- }
- /* Upgrade the lock? */
- if (nlock == isc_rwlocktype_read) {
- NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
- NODE_WEAKL…