[BACK]Return to rrl.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / external / mpl / bind / dist / lib / dns

File: [cvs.NetBSD.org] / src / external / mpl / bind / dist / lib / dns / rrl.c (download)

Revision 1.3.2.3, Wed Apr 8 14:07:07 2020 UTC (3 years, 11 months ago) by martin
Branch: phil-wifi
Changes since 1.3.2.2: +3 -4 lines

Merge changes from current as of 20200406

/*	$NetBSD: rrl.c,v 1.3.2.3 2020/04/08 14:07:07 martin Exp $	*/

/*
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * See the COPYRIGHT file distributed with this work for additional
 * information regarding copyright ownership.
 */

/*! \file */

/*
 * Rate limit DNS responses.
 */

/* #define ISC_LIST_CHECKINIT */

#include <config.h>

#include <inttypes.h>
#include <stdbool.h>

#include <isc/mem.h>
#include <isc/net.h>
#include <isc/netaddr.h>
#include <isc/print.h>
#include <isc/util.h>

#include <dns/result.h>
#include <dns/rcode.h>
#include <dns/rdatatype.h>
#include <dns/rdataclass.h>
#include <dns/log.h>
#include <dns/rrl.h>
#include <dns/view.h>

static void
log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, bool early,
	char *log_buf, unsigned int log_buf_len);

/*
 * Get a modulus for a hash function that is tolerably likely to be
 * relatively prime to most inputs.  Of course, we get a prime for for initial
 * values not larger than the square of the last prime.  We often get a prime
 * after that.
 * This works well in practice for hash tables up to at least 100
 * times the square of the last prime and better than a multiplicative hash.
 */
static int
hash_divisor(unsigned int initial) {
	static uint16_t primes[] = {
		  3,   5,   7,  11,  13,  17,  19,  23,  29,  31,  37,  41,
		 43,  47,  53,  59,  61,  67,  71,  73,  79,  83,  89,  97,
#if 0
		101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
		163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
		229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
		293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
		373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
		443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
		521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
		601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
		673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
		757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
		839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
		929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
#endif
	};
	int divisions, tries;
	unsigned int result;
	uint16_t *pp, p;

	result = initial;

	if (primes[sizeof(primes)/sizeof(primes[0])-1] >= result) {
		pp = primes;
		while (*pp < result)
			++pp;
		return (*pp);
	}

	if ((result & 1) == 0)
		++result;

	divisions = 0;
	tries = 1;
	pp = primes;
	do {
		p = *pp++;
		++divisions;
		if ((result % p) == 0) {
			++tries;
			result += 2;
			pp = primes;
		}
	} while (pp < &primes[sizeof(primes) / sizeof(primes[0])]);

	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
			      "%d hash_divisor() divisions in %d tries"
			      " to get %d from %d",
			      divisions, tries, result, initial);

	return (result);
}

/*
 * Convert a timestamp to a number of seconds in the past.
 */
static inline int
delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) {
	int delta;

	delta = now - ts;
	if (delta >= 0)
		return (delta);

	/*
	 * The timestamp is in the future.  That future might result from
	 * re-ordered requests, because we use timestamps on requests
	 * instead of consulting a clock.  Timestamps in the distant future are
	 * assumed to result from clock changes.  When the clock changes to
	 * the past, make existing timestamps appear to be in the past.
	 */
	if (delta < -DNS_RRL_MAX_TIME_TRAVEL)
		return (DNS_RRL_FOREVER);
	return (0);
}

static inline int
get_age(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, isc_stdtime_t now) {
	if (!e->ts_valid)
		return (DNS_RRL_FOREVER);
	return (delta_rrl_time(e->ts + rrl->ts_bases[e->ts_gen], now));
}

static inline void
set_age(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_stdtime_t now) {
	dns_rrl_entry_t *e_old;
	unsigned int ts_gen;
	int i, ts;

	ts_gen = rrl->ts_gen;
	ts = now - rrl->ts_bases[ts_gen];
	if (ts < 0) {
		if (ts < -DNS_RRL_MAX_TIME_TRAVEL)
			ts = DNS_RRL_FOREVER;
		else
			ts = 0;
	}

	/*
	 * Make a new timestamp base if the current base is too old.
	 * All entries older than DNS_RRL_MAX_WINDOW seconds are ancient,
	 * useless history.  Their timestamps can be treated as if they are
	 * all the same.
	 * We only do arithmetic on more recent timestamps, so bases for
	 * older timestamps can be recycled provided the old timestamps are
	 * marked as ancient history.
	 * This loop is almost always very short because most entries are
	 * recycled after one second and any entries that need to be marked
	 * are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds.
	 */
	if (ts >= DNS_RRL_MAX_TS) {
		ts_gen = (ts_gen + 1) % DNS_RRL_TS_BASES;
		for (e_old = ISC_LIST_TAIL(rrl->lru), i = 0;
		     e_old != NULL && (e_old->ts_gen == ts_gen ||
				       !ISC_LINK_LINKED(e_old, hlink));
		     e_old = ISC_LIST_PREV(e_old, lru), ++i)
		{
			e_old->ts_valid = false;
		}
		if (i != 0)
			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
				      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
				      "rrl new time base scanned %d entries"
				      " at %d for %d %d %d %d",
				      i, now, rrl->ts_bases[ts_gen],
				      rrl->ts_bases[(ts_gen + 1) %
					DNS_RRL_TS_BASES],
				      rrl->ts_bases[(ts_gen + 2) %
					DNS_RRL_TS_BASES],
				      rrl->ts_bases[(ts_gen + 3) %
					DNS_RRL_TS_BASES]);
		rrl->ts_gen = ts_gen;
		rrl->ts_bases[ts_gen] = now;
		ts = 0;
	}

	e->ts_gen = ts_gen;
	e->ts = ts;
	e->ts_valid = true;
}

static isc_result_t
expand_entries(dns_rrl_t *rrl, int newsize) {
	unsigned int bsize;
	dns_rrl_block_t *b;
	dns_rrl_entry_t *e;
	double rate;
	int i;

	if (rrl->num_entries + newsize >= rrl->max_entries &&
	    rrl->max_entries != 0)
	{
		newsize = rrl->max_entries - rrl->num_entries;
		if (newsize <= 0)
			return (ISC_R_SUCCESS);
	}

	/*
	 * Log expansions so that the user can tune max-table-size
	 * and min-table-size.
	 */
	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) &&
	    rrl->hash != NULL) {
		rate = rrl->probes;
		if (rrl->searches != 0)
			rate /= rrl->searches;
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
			      "increase from %d to %d RRL entries with"
			      " %d bins; average search length %.1f",
			      rrl->num_entries, rrl->num_entries+newsize,
			      rrl->hash->length, rate);
	}

	bsize = sizeof(dns_rrl_block_t) + (newsize-1)*sizeof(dns_rrl_entry_t);
	b = isc_mem_get(rrl->mctx, bsize);
	if (b == NULL) {
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
			      "isc_mem_get(%d) failed for RRL entries",
			      bsize);
		return (ISC_R_NOMEMORY);
	}
	memset(b, 0, bsize);
	b->size = bsize;

	e = b->entries;
	for (i = 0; i < newsize; ++i, ++e) {
		ISC_LINK_INIT(e, hlink);
		ISC_LIST_INITANDAPPEND(rrl->lru, e, lru);
	}
	rrl->num_entries += newsize;
	ISC_LIST_INITANDAPPEND(rrl->blocks, b, link);

	return (ISC_R_SUCCESS);
}

static inline dns_rrl_bin_t *
get_bin(dns_rrl_hash_t *hash, unsigned int hval) {
	INSIST(hash != NULL);
	return (&hash->bins[hval % hash->length]);
}

static void
free_old_hash(dns_rrl_t *rrl) {
	dns_rrl_hash_t *old_hash;
	dns_rrl_bin_t *old_bin;
	dns_rrl_entry_t *e, *e_next;

	old_hash = rrl->old_hash;
	for (old_bin = &old_hash->bins[0];
	     old_bin < &old_hash->bins[old_hash->length];
	     ++old_bin)
	{
		for (e = ISC_LIST_HEAD(*old_bin); e != NULL; e = e_next) {
			e_next = ISC_LIST_NEXT(e, hlink);
			ISC_LINK_INIT(e, hlink);
		}
	}

	isc_mem_put(rrl->mctx, old_hash,
		    sizeof(*old_hash)
		      + (old_hash->length - 1) * sizeof(old_hash->bins[0]));
	rrl->old_hash = NULL;
}

static isc_result_t
expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) {
	dns_rrl_hash_t *hash;
	int old_bins, new_bins, hsize;
	double rate;

	if (rrl->old_hash != NULL)
		free_old_hash(rrl);

	/*
	 * Most searches fail and so go to the end of the chain.
	 * Use a small hash table load factor.
	 */
	old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length;
	new_bins = old_bins/8 + old_bins;
	if (new_bins < rrl->num_entries)
		new_bins = rrl->num_entries;
	new_bins = hash_divisor(new_bins);

	hsize = sizeof(dns_rrl_hash_t) + (new_bins-1)*sizeof(hash->bins[0]);
	hash = isc_mem_get(rrl->mctx, hsize);
	if (hash == NULL) {
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
			      "isc_mem_get(%d) failed for"
			      " RRL hash table",
			      hsize);
		return (ISC_R_NOMEMORY);
	}
	memset(hash, 0, hsize);
	hash->length = new_bins;
	rrl->hash_gen ^= 1;
	hash->gen = rrl->hash_gen;

	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && old_bins != 0) {
		rate = rrl->probes;
		if (rrl->searches != 0)
			rate /= rrl->searches;
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
			      "increase from %d to %d RRL bins for"
			      " %d entries; average search length %.1f",
			      old_bins, new_bins, rrl->num_entries, rate);
	}

	rrl->old_hash = rrl->hash;
	if (rrl->old_hash != NULL)
		rrl->old_hash->check_time = now;
	rrl->hash = hash;

	return (ISC_R_SUCCESS);
}

static void
ref_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, int probes, isc_stdtime_t now) {
	/*
	 * Make the entry most recently used.
	 */
	if (ISC_LIST_HEAD(rrl->lru) != e) {
		if (e == rrl->last_logged)
			rrl->last_logged = ISC_LIST_PREV(e, lru);
		ISC_LIST_UNLINK(rrl->lru, e, lru);
		ISC_LIST_PREPEND(rrl->lru, e, lru);
	}

	/*
	 * Expand the hash table if it is time and necessary.
	 * This will leave the newly referenced entry in a chain in the
	 * old hash table.  It will migrate to the new hash table the next
	 * time it is used or be cut loose when the old hash table is destroyed.
	 */
	rrl->probes += probes;
	++rrl->searches;
	if (rrl->searches > 100 &&
	    delta_rrl_time(rrl->hash->check_time, now) > 1) {
		if (rrl->probes/rrl->searches > 2)
			expand_rrl_hash(rrl, now);
		rrl->hash->check_time = now;
		rrl->probes = 0;
		rrl->searches = 0;
	}
}

static inline bool
key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) {
	if (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0)
		return (true);
	return (false);
}

static inline uint32_t
hash_key(const dns_rrl_key_t *key) {
	uint32_t hval;
	int i;

	hval = key->w[0];
	for (i = sizeof(key->w) / sizeof(key->w[0]) - 1; i >= 0; --i) {
		hval = key->w[i] + (hval<<1);
	}
	return (hval);
}

/*
 * Construct the hash table key.
 * Use a hash of the DNS query name to save space in the database.
 * Collisions result in legitimate rate limiting responses for one
 * query name also limiting responses for other names to the
 * same client.  This is rare and benign enough given the large
 * space costs compared to keeping the entire name in the database
 * entry or the time costs of dynamic allocation.
 */
static void
make_key(const dns_rrl_t *rrl, dns_rrl_key_t *key,
	 const isc_sockaddr_t *client_addr, dns_rdatatype_t qtype,
	 const dns_name_t *qname, dns_rdataclass_t qclass,
	 dns_rrl_rtype_t rtype)
{
	dns_name_t base;
	dns_offsets_t base_offsets;
	int labels, i;

	memset(key, 0, sizeof(*key));

	key->s.rtype = rtype;
	if (rtype == DNS_RRL_RTYPE_QUERY) {
		key->s.qtype = qtype;
		key->s.qclass = qclass & 0xff;
	} else if (rtype == DNS_RRL_RTYPE_REFERRAL ||
		   rtype == DNS_RRL_RTYPE_NODATA) {
		/*
		 * Because there is no qtype in the empty answer sections of
		 * referral and NODATA responses, count them as the same.
		 */
		key->s.qclass = qclass & 0xff;
	}

	if (qname != NULL && qname->labels != 0) {
		/*
		 * Ignore the first label of wildcards.
		 */
		if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 &&
		    (labels = dns_name_countlabels(qname)) > 1)
		{
			dns_name_init(&base, base_offsets);
			dns_name_getlabelsequence(qname, 1, labels-1, &base);
			key->s.qname_hash =
				dns_name_fullhash(&base, false);
		} else {
			key->s.qname_hash =
				dns_name_fullhash(qname, false);
		}
	}

	switch (client_addr->type.sa.sa_family) {
	case AF_INET:
		key->s.ip[0] = (client_addr->type.sin.sin_addr.s_addr &
			      rrl->ipv4_mask);
		break;
	case AF_INET6:
		key->s.ipv6 = true;
		memmove(key->s.ip, &client_addr->type.sin6.sin6_addr,
			sizeof(key->s.ip));
		for (i = 0; i < DNS_RRL_MAX_PREFIX/32; ++i)
			key->s.ip[i] &= rrl->ipv6_mask[i];
		break;
	}
}

static inline dns_rrl_rate_t *
get_rate(dns_rrl_t *rrl, dns_rrl_rtype_t rtype) {
	switch (rtype) {
	case DNS_RRL_RTYPE_QUERY:
		return (&rrl->responses_per_second);
	case DNS_RRL_RTYPE_REFERRAL:
		return (&rrl->referrals_per_second);
	case DNS_RRL_RTYPE_NODATA:
		return (&rrl->nodata_per_second);
	case DNS_RRL_RTYPE_NXDOMAIN:
		return (&rrl->nxdomains_per_second);
	case DNS_RRL_RTYPE_ERROR:
		return (&rrl->errors_per_second);
	case DNS_RRL_RTYPE_ALL:
		return (&rrl->all_per_second);
	default:
		INSIST(0);
		ISC_UNREACHABLE();
	}
}

static int
response_balance(dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) {
	dns_rrl_rate_t *ratep;
	int balance, rate;

	if (e->key.s.rtype == DNS_RRL_RTYPE_TCP) {
		rate = 1;
	} else {
		ratep = get_rate(rrl, e->key.s.rtype);
		rate = ratep->scaled;
	}

	balance = e->responses + age * rate;
	if (balance > rate)
		balance = rate;
	return (balance);
}

/*
 * Search for an entry for a response and optionally create it.
 */
static dns_rrl_entry_t *
get_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr,
	  dns_rdataclass_t qclass, dns_rdatatype_t qtype,
	  const dns_name_t *qname, dns_rrl_rtype_t rtype, isc_stdtime_t now,
	  bool create, char *log_buf, unsigned int log_buf_len)
{
	dns_rrl_key_t key;
	uint32_t hval;
	dns_rrl_entry_t *e;
	dns_rrl_hash_t *hash;
	dns_rrl_bin_t *new_bin, *old_bin;
	int probes, age;

	make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype);
	hval = hash_key(&key);

	/*
	 * Look for the entry in the current hash table.
	 */
	new_bin = get_bin(rrl->hash, hval);
	probes = 1;
	e = ISC_LIST_HEAD(*new_bin);
	while (e != NULL) {
		if (key_cmp(&e->key, &key)) {
			ref_entry(rrl, e, probes, now);
			return (e);
		}
		++probes;
		e = ISC_LIST_NEXT(e, hlink);
	}

	/*
	 * Look in the old hash table.
	 */
	if (rrl->old_hash != NULL) {
		old_bin = get_bin(rrl->old_hash, hval);
		e = ISC_LIST_HEAD(*old_bin);
		while (e != NULL) {
			if (key_cmp(&e->key, &key)) {
				ISC_LIST_UNLINK(*old_bin, e, hlink);
				ISC_LIST_PREPEND(*new_bin, e, hlink);
				e->hash_gen = rrl->hash_gen;
				ref_entry(rrl, e, probes, now);
				return (e);
			}
			e = ISC_LIST_NEXT(e, hlink);
		}

		/*
		 * Discard prevous hash table when all of its entries are old.
		 */
		age = delta_rrl_time(rrl->old_hash->check_time, now);
		if (age > rrl->window)
			free_old_hash(rrl);
	}

	if (!create)
		return (NULL);

	/*
	 * The entry does not exist, so create it by finding a free entry.
	 * Keep currently penalized and logged entries.
	 * Try to make more entries if none are idle.
	 * Steal the oldest entry if we cannot create more.
	 */
	for (e = ISC_LIST_TAIL(rrl->lru);
	     e != NULL;
	     e = ISC_LIST_PREV(e, lru))
	{
		if (!ISC_LINK_LINKED(e, hlink))
			break;
		age = get_age(rrl, e, now);
		if (age <= 1) {
			e = NULL;
			break;
		}
		if (!e->logged && response_balance(rrl, e, age) > 0)
			break;
	}
	if (e == NULL) {
		expand_entries(rrl, ISC_MIN((rrl->num_entries+1)/2, 1000));
		e = ISC_LIST_TAIL(rrl->lru);
	}
	if (e->logged)
		log_end(rrl, e, true, log_buf, log_buf_len);
	if (ISC_LINK_LINKED(e, hlink)) {
		if (e->hash_gen == rrl->hash_gen)
			hash = rrl->hash;
		else
			hash = rrl->old_hash;
		old_bin = get_bin(hash, hash_key(&e->key));
		ISC_LIST_UNLINK(*old_bin, e, hlink);
	}
	ISC_LIST_PREPEND(*new_bin, e, hlink);
	e->hash_gen = rrl->hash_gen;
	e->key = key;
	e->ts_valid = false;
	ref_entry(rrl, e, probes, now);
	return (e);
}

static void
debit_log(const dns_rrl_entry_t *e, int age, const char *action) {
	char buf[sizeof("age=2147483647")];
	const char *age_str;

	if (age == DNS_RRL_FOREVER) {
		age_str = "";
	} else {
		snprintf(buf, sizeof(buf), "age=%d", age);
		age_str = buf;
	}
	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
		      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
		      "rrl %08x %6s  responses=%-3d %s",
		      hash_key(&e->key), age_str, e->responses, action);
}

static inline dns_rrl_result_t
debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale,
		const isc_sockaddr_t *client_addr, isc_stdtime_t now,
		char *log_buf, unsigned int log_buf_len)
{
	int rate, new_rate, slip, new_slip, age, log_secs, min;
	dns_rrl_rate_t *ratep;
	dns_rrl_entry_t const *credit_e;

	/*
	 * Pick the rate counter.
	 * Optionally adjust the rate by the estimated query/second rate.
	 */
	ratep = get_rate(rrl, e->key.s.rtype);
	rate = ratep->r;
	if (rate == 0)
		return (DNS_RRL_RESULT_OK);

	if (scale < 1.0) {
		/*
		 * The limit for clients that have used TCP is not scaled.
		 */
		credit_e = get_entry(rrl, client_addr,
				     0, dns_rdatatype_none, NULL,
				     DNS_RRL_RTYPE_TCP, now, false,
				     log_buf, log_buf_len);
		if (credit_e != NULL) {
			age = get_age(rrl, e, now);
			if (age < rrl->window)
				scale = 1.0;
		}
	}
	if (scale < 1.0) {
		new_rate = (int) (rate * scale);
		if (new_rate < 1)
			new_rate = 1;
		if (ratep->scaled != new_rate) {
			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
				      DNS_LOGMODULE_REQUEST,
				      DNS_RRL_LOG_DEBUG1,
				      "%d qps scaled %s by %.2f"
				      " from %d to %d",
				      (int)qps, ratep->str, scale,
				      rate, new_rate);
			rate = new_rate;
			ratep->scaled = rate;
		}
	}

	min = -rrl->window * rate;

	/*
	 * Treat time jumps into the recent past as no time.
	 * Treat entries older than the window as if they were just created
	 * Credit other entries.
	 */
	age = get_age(rrl, e, now);
	if (age > 0) {
		/*
		 * Credit tokens earned during elapsed time.
		 */
		if (age > rrl->window) {
			e->responses = rate;
			e->slip_cnt = 0;
		} else {
			e->responses += rate*age;
			if (e->responses > rate) {
				e->responses = rate;
				e->slip_cnt = 0;
			}
		}
		/*
		 * Find the seconds since last log message without overflowing
		 * small counter.  This counter is reset when an entry is
		 * created.  It is not necessarily reset when some requests
		 * are answered provided other requests continue to be dropped
		 * or slipped.  This can happen when the request rate is just
		 * at the limit.
		 */
		if (e->logged) {
			log_secs = e->log_secs;
			log_secs += age;
			if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0)
				log_secs = DNS_RRL_MAX_LOG_SECS;
			e->log_secs = log_secs;
		}
	}
	set_age(rrl, e, now);

	/*
	 * Debit the entry for this response.
	 */
	if (--e->responses >= 0) {
		if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
			debit_log(e, age, "");
		return (DNS_RRL_RESULT_OK);
	}

	if (e->responses < min)
		e->responses = min;

	/*
	 * Drop this response unless it should slip or leak.
	 */
	slip = rrl->slip.r;
	if (slip > 2 && scale < 1.0) {
		new_slip = (int) (slip * scale);
		if (new_slip < 2)
			new_slip = 2;
		if (rrl->slip.scaled != new_slip) {
			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
				      DNS_LOGMODULE_REQUEST,
				      DNS_RRL_LOG_DEBUG1,
				      "%d qps scaled slip"
				      " by %.2f from %d to %d",
				      (int)qps, scale,
				      slip, new_slip);
			slip = new_slip;
			rrl->slip.scaled = slip;
		}
	}
	if (slip != 0 && e->key.s.rtype != DNS_RRL_RTYPE_ALL) {
		if (e->slip_cnt++ == 0) {
			if ((int) e->slip_cnt >= slip)
				e->slip_cnt = 0;
			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
				debit_log(e, age, "slip");
			return (DNS_RRL_RESULT_SLIP);
		} else if ((int) e->slip_cnt >= slip) {
			e->slip_cnt = 0;
		}
	}

	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
		debit_log(e, age, "drop");
	return (DNS_RRL_RESULT_DROP);
}

static inline dns_rrl_qname_buf_t *
get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) {
	dns_rrl_qname_buf_t *qbuf;

	qbuf = rrl->qnames[e->log_qname];
	if (qbuf == NULL || qbuf->e != e)
		return (NULL);
	return (qbuf);
}

static inline void
free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) {
	dns_rrl_qname_buf_t *qbuf;

	qbuf = get_qname(rrl, e);
	if (qbuf != NULL) {
		qbuf->e = NULL;
		ISC_LIST_APPEND(rrl->qname_free, qbuf, link);
	}
}

static void
add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len) {
	isc_region_t region;

	isc_buffer_availableregion(lb, &region);
	if (str_len >= region.length) {
		if (region.length == 0U)
			return;
		str_len = region.length;
	}
	memmove(region.base, str, str_len);
	isc_buffer_add(lb, str_len);
}

#define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1)

/*
 * Build strings for the logs
 */
static void
make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e,
	     const char *str1, const char *str2, bool plural,
	     const dns_name_t *qname, bool save_qname,
	     dns_rrl_result_t rrl_result, isc_result_t resp_result,
	     char *log_buf, unsigned int log_buf_len)
{
	isc_buffer_t lb;
	dns_rrl_qname_buf_t *qbuf;
	isc_netaddr_t cidr;
	char strbuf[ISC_MAX(sizeof("/123"), sizeof("  (12345678)"))];
	const char *rstr;
	isc_result_t msg_result;

	if (log_buf_len <= 1) {
		if (log_buf_len == 1)
			log_buf[0] = '\0';
		return;
	}
	isc_buffer_init(&lb, log_buf, log_buf_len-1);

	if (str1 != NULL)
		add_log_str(&lb, str1, strlen(str1));
	if (str2 != NULL)
		add_log_str(&lb, str2, strlen(str2));

	switch (rrl_result) {
	case DNS_RRL_RESULT_OK:
		break;
	case DNS_RRL_RESULT_DROP:
		ADD_LOG_CSTR(&lb, "drop ");
		break;
	case DNS_RRL_RESULT_SLIP:
		ADD_LOG_CSTR(&lb, "slip ");
		break;
	default:
		INSIST(0);
		ISC_UNREACHABLE();
	}

	switch (e->key.s.rtype) {
	case DNS_RRL_RTYPE_QUERY:
		break;
	case DNS_RRL_RTYPE_REFERRAL:
		ADD_LOG_CSTR(&lb, "referral ");
		break;
	case DNS_RRL_RTYPE_NODATA:
		ADD_LOG_CSTR(&lb, "NODATA ");
		break;
	case DNS_RRL_RTYPE_NXDOMAIN:
		ADD_LOG_CSTR(&lb, "NXDOMAIN ");
		break;
	case DNS_RRL_RTYPE_ERROR:
		if (resp_result == ISC_R_SUCCESS) {
			ADD_LOG_CSTR(&lb, "error ");
		} else {
			rstr = isc_result_totext(resp_result);
			add_log_str(&lb, rstr, strlen(rstr));
			ADD_LOG_CSTR(&lb, " error ");
		}
		break;
	case DNS_RRL_RTYPE_ALL:
		ADD_LOG_CSTR(&lb, "all ");
		break;
	default:
		INSIST(0);
		ISC_UNREACHABLE();
	}

	if (plural)
		ADD_LOG_CSTR(&lb, "responses to ");
	else
		ADD_LOG_CSTR(&lb, "response to ");

	memset(&cidr, 0, sizeof(cidr));
	if (e->key.s.ipv6) {
		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen);
		cidr.family = AF_INET6;
		memset(&cidr.type.in6, 0,  sizeof(cidr.type.in6));
		memmove(&cidr.type.in6, e->key.s.ip, sizeof(e->key.s.ip));
	} else {
		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen);
		cidr.family = AF_INET;
		cidr.type.in.s_addr = e->key.s.ip[0];
	}
	msg_result = isc_netaddr_totext(&cidr, &lb);
	if (msg_result != ISC_R_SUCCESS)
		ADD_LOG_CSTR(&lb, "?");
	add_log_str(&lb, strbuf, strlen(strbuf));

	if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY ||
	    e->key.s.rtype == DNS_RRL_RTYPE_REFERRAL ||
	    e->key.s.rtype == DNS_RRL_RTYPE_NODATA ||
	    e->key.s.rtype == DNS_RRL_RTYPE_NXDOMAIN) {
		qbuf = get_qname(rrl, e);
		if (save_qname && qbuf == NULL &&
		    qname != NULL && dns_name_isabsolute(qname)) {
			/*
			 * Capture the qname for the "stop limiting" message.
			 */
			qbuf = ISC_LIST_TAIL(rrl->qname_free);
			if (qbuf != NULL) {
				ISC_LIST_UNLINK(rrl->qname_free, qbuf, link);
			} else if (rrl->num_qnames < DNS_RRL_QNAMES) {
				qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf));
				if (qbuf != NULL) {
					memset(qbuf, 0, sizeof(*qbuf));
					ISC_LINK_INIT(qbuf, link);
					qbuf->index = rrl->num_qnames;
					rrl->qnames[rrl->num_qnames++] = qbuf;
				} else {
					isc_log_write(dns_lctx,
						      DNS_LOGCATEGORY_RRL,
						      DNS_LOGMODULE_REQUEST,
						      DNS_RRL_LOG_FAIL,
						      "isc_mem_get(%d)"
						      " failed for RRL qname",
						      (int)sizeof(*qbuf));
				}
			}
			if (qbuf != NULL) {
				e->log_qname = qbuf->index;
				qbuf->e = e;
				dns_fixedname_init(&qbuf->qname);
				dns_name_copynf(qname,
						   dns_fixedname_name(&qbuf->qname));
			}
		}
		if (qbuf != NULL)
			qname = dns_fixedname_name(&qbuf->qname);
		if (qname != NULL) {
			ADD_LOG_CSTR(&lb, " for ");
			(void)dns_name_totext(qname, true, &lb);
		} else {
			ADD_LOG_CSTR(&lb, " for (?)");
		}
		if (e->key.s.rtype != DNS_RRL_RTYPE_NXDOMAIN) {
			ADD_LOG_CSTR(&lb, " ");
			(void)dns_rdataclass_totext(e->key.s.qclass, &lb);
			if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY) {
				ADD_LOG_CSTR(&lb, " ");
				(void)dns_rdatatype_totext(e->key.s.qtype, &lb);
			}
		}
		snprintf(strbuf, sizeof(strbuf), "  (%08x)",
			 e->key.s.qname_hash);
		add_log_str(&lb, strbuf, strlen(strbuf));
	}

	/*
	 * We saved room for '\0'.
	 */
	log_buf[isc_buffer_usedlength(&lb)] = '\0';
}

static void
log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, bool early,
	char *log_buf, unsigned int log_buf_len)
{
	if (e->logged) {
		make_log_buf(rrl, e,
			     early ? "*" : NULL,
			     rrl->log_only ? "would stop limiting "
					   : "stop limiting ",
			     true, NULL, false,
			     DNS_RRL_RESULT_OK, ISC_R_SUCCESS,
			     log_buf, log_buf_len);
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
			      "%s", log_buf);
		free_qname(rrl, e);
		e->logged = false;
		--rrl->num_logged;
	}
}

/*
 * Log messages for streams that have stopped being rate limited.
 */
static void
log_stops(dns_rrl_t *rrl, isc_stdtime_t now, int limit,
	  char *log_buf, unsigned int log_buf_len)
{
	dns_rrl_entry_t *e;
	int age;

	for (e = rrl->last_logged; e != NULL; e = ISC_LIST_PREV(e, lru)) {
		if (!e->logged)
			continue;
		if (now != 0) {
			age = get_age(rrl, e, now);
			if (age < DNS_RRL_STOP_LOG_SECS ||
			    response_balance(rrl, e, age) < 0)
				break;
		}

		log_end(rrl, e, now == 0, log_buf, log_buf_len);
		if (rrl->num_logged <= 0)
			break;

		/*
		 * Too many messages could stall real work.
		 */
		if (--limit < 0) {
			rrl->last_logged = ISC_LIST_PREV(e, lru);
			return;
		}
	}
	if (e == NULL) {
		INSIST(rrl->num_logged == 0);
		rrl->log_stops_time = now;
	}
	rrl->last_logged = e;
}

/*
 * Main rate limit interface.
 */
dns_rrl_result_t
dns_rrl(dns_view_t *view,
	const isc_sockaddr_t *client_addr, bool is_tcp,
	dns_rdataclass_t qclass, dns_rdatatype_t qtype,
	const dns_name_t *qname, isc_result_t resp_result, isc_stdtime_t now,
	bool wouldlog, char *log_buf, unsigned int log_buf_len)
{
	dns_rrl_t *rrl;
	dns_rrl_rtype_t rtype;
	dns_rrl_entry_t *e;
	isc_netaddr_t netclient;
	int secs;
	double qps, scale;
	int exempt_match;
	isc_result_t result;
	dns_rrl_result_t rrl_result;

	INSIST(log_buf != NULL && log_buf_len > 0);

	rrl = view->rrl;
	if (rrl->exempt != NULL) {
		isc_netaddr_fromsockaddr(&netclient, client_addr);
		result = dns_acl_match(&netclient, NULL, rrl->exempt,
				       &view->aclenv, &exempt_match, NULL);
		if (result == ISC_R_SUCCESS && exempt_match > 0)
			return (DNS_RRL_RESULT_OK);
	}

	LOCK(&rrl->lock);

	/*
	 * Estimate total query per second rate when scaling by qps.
	 */
	if (rrl->qps_scale == 0) {
		qps = 0.0;
		scale = 1.0;
	} else {
		++rrl->qps_responses;
		secs = delta_rrl_time(rrl->qps_time, now);
		if (secs <= 0) {
			qps = rrl->qps;
		} else {
			qps = (1.0*rrl->qps_responses) / secs;
			if (secs >= rrl->window) {
				if (isc_log_wouldlog(dns_lctx,
						     DNS_RRL_LOG_DEBUG3))
					isc_log_write(dns_lctx,
						      DNS_LOGCATEGORY_RRL,
						      DNS_LOGMODULE_REQUEST,
						      DNS_RRL_LOG_DEBUG3,
						      "%d responses/%d seconds"
						      " = %d qps",
						      rrl->qps_responses, secs,
						      (int)qps);
				rrl->qps = qps;
				rrl->qps_responses = 0;
				rrl->qps_time = now;
			} else if (qps < rrl->qps) {
				qps = rrl->qps;
			}
		}
		scale = rrl->qps_scale / qps;
	}

	/*
	 * Do maintenance once per second.
	 */
	if (rrl->num_logged > 0 && rrl->log_stops_time != now)
		log_stops(rrl, now, 8, log_buf, log_buf_len);

	/*
	 * Notice TCP responses when scaling limits by qps.
	 * Do not try to rate limit TCP responses.
	 */
	if (is_tcp) {
		if (scale < 1.0) {
			e = get_entry(rrl, client_addr,
				      0, dns_rdatatype_none, NULL,
				      DNS_RRL_RTYPE_TCP, now, true,
				      log_buf, log_buf_len);
			if (e != NULL) {
				e->responses = -(rrl->window+1);
				set_age(rrl, e, now);
			}
		}
		UNLOCK(&rrl->lock);
		return (ISC_R_SUCCESS);
	}

	/*
	 * Find the right kind of entry, creating it if necessary.
	 * If that is impossible, then nothing more can be done
	 */
	switch (resp_result) {
	case ISC_R_SUCCESS:
		rtype = DNS_RRL_RTYPE_QUERY;
		break;
	case DNS_R_DELEGATION:
		rtype = DNS_RRL_RTYPE_REFERRAL;
		break;
	case DNS_R_NXRRSET:
		rtype = DNS_RRL_RTYPE_NODATA;
		break;
	case DNS_R_NXDOMAIN:
		rtype = DNS_RRL_RTYPE_NXDOMAIN;
		break;
	default:
		rtype = DNS_RRL_RTYPE_ERROR;
		break;
	}
	e = get_entry(rrl, client_addr, qclass, qtype, qname, rtype,
		      now, true, log_buf, log_buf_len);
	if (e == NULL) {
		UNLOCK(&rrl->lock);
		return (DNS_RRL_RESULT_OK);
	}

	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
		/*
		 * Do not worry about speed or releasing the lock.
		 * This message appears before messages from debit_rrl_entry().
		 */
		make_log_buf(rrl, e, "consider limiting ", NULL, false,
			     qname, false, DNS_RRL_RESULT_OK, resp_result,
			     log_buf, log_buf_len);
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
			      "%s", log_buf);
	}

	rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now,
				     log_buf, log_buf_len);

	if (rrl->all_per_second.r != 0) {
		/*
		 * We must debit the all-per-second token bucket if we have
		 * an all-per-second limit for the IP address.
		 * The all-per-second limit determines the log message
		 * when both limits are hit.
		 * The response limiting must continue if the
		 * all-per-second limiting lapses.
		 */
		dns_rrl_entry_t *e_all;
		dns_rrl_result_t rrl_all_result;

		e_all = get_entry(rrl, client_addr,
				  0, dns_rdatatype_none, NULL,
				  DNS_RRL_RTYPE_ALL, now, true,
				  log_buf, log_buf_len);
		if (e_all == NULL) {
			UNLOCK(&rrl->lock);
			return (DNS_RRL_RESULT_OK);
		}
		rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale,
						 client_addr, now,
						 log_buf, log_buf_len);
		if (rrl_all_result != DNS_RRL_RESULT_OK) {
			e = e_all;
			rrl_result = rrl_all_result;
			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
				make_log_buf(rrl, e,
					     "prefer all-per-second limiting ",
					     NULL, true, qname, false,
					     DNS_RRL_RESULT_OK, resp_result,
					     log_buf, log_buf_len);
				isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
					      DNS_LOGMODULE_REQUEST,
					      DNS_RRL_LOG_DEBUG1,
					      "%s", log_buf);
			}
		}
	}

	if (rrl_result == DNS_RRL_RESULT_OK) {
		UNLOCK(&rrl->lock);
		return (DNS_RRL_RESULT_OK);
	}

	/*
	 * Log occassionally in the rate-limit category.
	 */
	if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) &&
	    isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) {
		make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
			     e->logged ? "continue limiting " : "limit ",
			     true, qname, true,
			     DNS_RRL_RESULT_OK, resp_result,
			     log_buf, log_buf_len);
		if (!e->logged) {
			e->logged = true;
			if (++rrl->num_logged <= 1)
				rrl->last_logged = e;
		}
		e->log_secs = 0;

		/*
		 * Avoid holding the lock.
		 */
		if (!wouldlog) {
			UNLOCK(&rrl->lock);
			e = NULL;
		}
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
			      "%s", log_buf);
	}

	/*
	 * Make a log message for the caller.
	 */
	if (wouldlog)
		make_log_buf(rrl, e,
			     rrl->log_only ? "would rate limit " : "rate limit ",
			     NULL, false, qname, false,
			     rrl_result, resp_result, log_buf, log_buf_len);

	if (e != NULL) {
		/*
		 * Do not save the qname unless we might need it for
		 * the ending log message.
		 */
		if (!e->logged)
			free_qname(rrl, e);
		UNLOCK(&rrl->lock);
	}

	return (rrl_result);
}

void
dns_rrl_view_destroy(dns_view_t *view) {
	dns_rrl_t *rrl;
	dns_rrl_block_t *b;
	dns_rrl_hash_t *h;
	char log_buf[DNS_RRL_LOG_BUF_LEN];
	int i;

	rrl = view->rrl;
	if (rrl == NULL)
		return;
	view->rrl = NULL;

	/*
	 * Assume the caller takes care of locking the view and anything else.
	 */

	if (rrl->num_logged > 0)
		log_stops(rrl, 0, INT32_MAX, log_buf, sizeof(log_buf));

	for (i = 0; i < DNS_RRL_QNAMES; ++i) {
		if (rrl->qnames[i] == NULL)
			break;
		isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i]));
	}

	if (rrl->exempt != NULL)
		dns_acl_detach(&rrl->exempt);

	isc_mutex_destroy(&rrl->lock);

	while (!ISC_LIST_EMPTY(rrl->blocks)) {
		b = ISC_LIST_HEAD(rrl->blocks);
		ISC_LIST_UNLINK(rrl->blocks, b, link);
		isc_mem_put(rrl->mctx, b, b->size);
	}

	h = rrl->hash;
	if (h != NULL)
		isc_mem_put(rrl->mctx, h,
			    sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));

	h = rrl->old_hash;
	if (h != NULL)
		isc_mem_put(rrl->mctx, h,
			    sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));

	isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
}

isc_result_t
dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) {
	dns_rrl_t *rrl;
	isc_result_t result;

	*rrlp = NULL;

	rrl = isc_mem_get(view->mctx, sizeof(*rrl));
	if (rrl == NULL)
		return (ISC_R_NOMEMORY);
	memset(rrl, 0, sizeof(*rrl));
	isc_mem_attach(view->mctx, &rrl->mctx);
	isc_mutex_init(&rrl->lock);
	isc_stdtime_get(&rrl->ts_bases[0]);

	view->rrl = rrl;

	result = expand_entries(rrl, min_entries);
	if (result != ISC_R_SUCCESS) {
		dns_rrl_view_destroy(view);
		return (result);
	}
	result = expand_rrl_hash(rrl, 0);
	if (result != ISC_R_SUCCESS) {
		dns_rrl_view_destroy(view);
		return (result);
	}

	*rrlp = rrl;
	return (ISC_R_SUCCESS);
}