You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
libev/evdns.c

3116 lines
83 KiB
C

#define DNS_USE_GETTIMEOFDAY_FOR_ID 1
#define HAVE_STRUCT_IN6_ADDR 1
/* The original version of this module was written by Adam Langley; for
* a history of modifications, check out the subversion logs.
*
* When editing this module, try to keep it re-mergeable by Adam. Don't
* reformat the whitespace, add Tor dependencies, or so on.
*
* TODO:
* - Support IPv6 and PTR records.
* - Replace all externally visible magic numbers with #defined constants.
* - Write doccumentation for APIs of all external functions.
*/
/* Async DNS Library
* Adam Langley <agl@imperialviolet.org>
* http://www.imperialviolet.org/eventdns.html
* Public Domain code
*
* This software is Public Domain. To view a copy of the public domain dedication,
* visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
* Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
*
* I ask and expect, but do not require, that all derivative works contain an
* attribution similar to:
* Parts developed by Adam Langley <agl@imperialviolet.org>
*
* You may wish to replace the word "Parts" with something else depending on
* the amount of original code.
*
* (Derivative works does not include programs which link against, run or include
* the source verbatim in their source distributions)
*
* Version: 0.1b
*/
#include <sys/types.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef WIN32
#endif
/* #define NDEBUG */
#ifndef DNS_USE_CPU_CLOCK_FOR_ID
#ifndef DNS_USE_GETTIMEOFDAY_FOR_ID
#ifndef DNS_USE_OPENSSL_FOR_ID
#error Must configure at least one id generation method.
#error Please see the documentation.
#endif
#endif
#endif
/* #define _POSIX_C_SOURCE 200507 */
#define _GNU_SOURCE
#ifdef DNS_USE_CPU_CLOCK_FOR_ID
#ifdef DNS_USE_OPENSSL_FOR_ID
#error Multiple id options selected
#endif
#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
#error Multiple id options selected
#endif
#include <time.h>
#endif
#ifdef DNS_USE_OPENSSL_FOR_ID
#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
#error Multiple id options selected
#endif
#include <openssl/rand.h>
#endif
#define _FORTIFY_SOURCE 3
#include <string.h>
#include <fcntl.h>
#include <sys/time.h>
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <unistd.h>
#include <limits.h>
#include <sys/stat.h>
#include <ctype.h>
#include <stdio.h>
#include <stdarg.h>
#include "evdns.h"
#ifdef WIN32
#include <windows.h>
#include <winsock2.h>
#include <iphlpapi.h>
#else
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#endif
#ifdef HAVE_NETINET_IN6_H
#include <netinet/in6.h>
#endif
#ifdef WIN32
typedef int socklen_t;
#endif
#define EVDNS_LOG_DEBUG 0
#define EVDNS_LOG_WARN 1
#ifndef HOST_NAME_MAX
#define HOST_NAME_MAX 255
#endif
#ifndef NDEBUG
#include <stdio.h>
#endif
#undef MIN
#define MIN(a,b) ((a)<(b)?(a):(b))
#ifdef __USE_ISOC99B
/* libevent doesn't work without this */
typedef uint8_t u_char;
typedef unsigned int uint;
#endif
#include <event.h>
#define u64 uint64_t
#define u32 uint32_t
#define u16 uint16_t
#define u8 uint8_t
#define MAX_ADDRS 4 /* maximum number of addresses from a single packet */
/* which we bother recording */
#define TYPE_A EVDNS_TYPE_A
#define TYPE_CNAME 5
#define TYPE_PTR EVDNS_TYPE_PTR
#define TYPE_AAAA EVDNS_TYPE_AAAA
#define CLASS_INET EVDNS_CLASS_INET
struct request {
u8 *request; /* the dns packet data */
unsigned int request_len;
int reissue_count;
int tx_count; /* the number of times that this packet has been sent */
unsigned int request_type; /* TYPE_PTR or TYPE_A */
void *user_pointer; /* the pointer given to us for this request */
evdns_callback_type user_callback;
struct nameserver *ns; /* the server which we last sent it */
/* elements used by the searching code */
int search_index;
struct search_state *search_state;
char *search_origname; /* needs to be free()ed */
int search_flags;
/* these objects are kept in a circular list */
struct request *next, *prev;
struct event timeout_event;
u16 trans_id; /* the transaction id */
char request_appended; /* true if the request pointer is data which follows this struct */
char transmit_me; /* needs to be transmitted */
};
#ifndef HAVE_STRUCT_IN6_ADDR
struct in6_addr {
u8 s6_addr[16];
};
#endif
struct reply {
unsigned int type;
unsigned int have_answer;
union {
struct {
u32 addrcount;
u32 addresses[MAX_ADDRS];
} a;
struct {
u32 addrcount;
struct in6_addr addresses[MAX_ADDRS];
} aaaa;
struct {
char name[HOST_NAME_MAX];
} ptr;
} data;
};
struct nameserver {
int socket; /* a connected UDP socket */
u32 address;
int failed_times; /* number of times which we have given this server a chance */
int timedout; /* number of times in a row a request has timed out */
struct event event;
/* these objects are kept in a circular list */
struct nameserver *next, *prev;
struct event timeout_event; /* used to keep the timeout for */
/* when we next probe this server. */
/* Valid if state == 0 */
char state; /* zero if we think that this server is down */
char choked; /* true if we have an EAGAIN from this server's socket */
char write_waiting; /* true if we are waiting for EV_WRITE events */
};
static struct request *req_head = NULL, *req_waiting_head = NULL;
static struct nameserver *server_head = NULL;
/* Represents a local port where we're listening for DNS requests. Right now, */
/* only UDP is supported. */
struct evdns_server_port {
int socket; /* socket we use to read queries and write replies. */
int refcnt; /* reference count. */
char choked; /* Are we currently blocked from writing? */
char closing; /* Are we trying to close this port, pending writes? */
evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
void *user_data; /* Opaque pointer passed to user_callback */
struct event event; /* Read/write event */
/* circular list of replies that we want to write. */
struct server_request *pending_replies;
};
/* Represents part of a reply being built. (That is, a single RR.) */
struct server_reply_item {
struct server_reply_item *next; /* next item in sequence. */
char *name; /* name part of the RR */
u16 type : 16; /* The RR type */
u16 class : 16; /* The RR class (usually CLASS_INET) */
u32 ttl; /* The RR TTL */
char is_name; /* True iff data is a label */
u16 datalen; /* Length of data; -1 if data is a label */
void *data; /* The contents of the RR */
};
/* Represents a request that we've received as a DNS server, and holds */
/* the components of the reply as we're constructing it. */
struct server_request {
/* Pointers to the next and previous entries on the list of replies */
/* that we're waiting to write. Only set if we have tried to respond */
/* and gotten EAGAIN. */
struct server_request *next_pending;
struct server_request *prev_pending;
u16 trans_id; /* Transaction id. */
struct evdns_server_port *port; /* Which port received this request on? */
struct sockaddr_storage addr; /* Where to send the response */
socklen_t addrlen; /* length of addr */
int n_answer; /* how many answer RRs have been set? */
int n_authority; /* how many authority RRs have been set? */
int n_additional; /* how many additional RRs have been set? */
struct server_reply_item *answer; /* linked list of answer RRs */
struct server_reply_item *authority; /* linked list of authority RRs */
struct server_reply_item *additional; /* linked list of additional RRs */
/* Constructed response. Only set once we're ready to send a reply. */
/* Once this is set, the RR fields are cleared, and no more should be set. */
char *response;
size_t response_len;
/* Caller-visible fields: flags, questions. */
struct evdns_server_request base;
};
/* helper macro */
#define OFFSET_OF(st, member) ((off_t) (((char*)&((st*)0)->member)-(char*)0))
/* Given a pointer to an evdns_server_request, get the corresponding */
/* server_request. */
#define TO_SERVER_REQUEST(base_ptr) \
((struct server_request*) \
(((char*)(base_ptr) - OFFSET_OF(struct server_request, base))))
/* The number of good nameservers that we have */
static int global_good_nameservers = 0;
/* inflight requests are contained in the req_head list */
/* and are actually going out across the network */
static int global_requests_inflight = 0;
/* requests which aren't inflight are in the waiting list */
/* and are counted here */
static int global_requests_waiting = 0;
static int global_max_requests_inflight = 64;
static struct timeval global_timeout = {5, 0}; /* 5 seconds */
static int global_max_reissues = 1; /* a reissue occurs when we get some errors from the server */
static int global_max_retransmits = 3; /* number of times we'll retransmit a request which timed out */
/* number of timeouts in a row before we consider this server to be down */
static int global_max_nameserver_timeout = 3;
/* These are the timeout values for nameservers. If we find a nameserver is down */
/* we try to probe it at intervals as given below. Values are in seconds. */
static const struct timeval global_nameserver_timeouts[] = {{10, 0}, {60, 0}, {300, 0}, {900, 0}, {3600, 0}};
static const int global_nameserver_timeouts_length = sizeof(global_nameserver_timeouts)/sizeof(struct timeval);
static struct nameserver *nameserver_pick(void);
static void evdns_request_insert(struct request *req, struct request **head);
static void nameserver_ready_callback(int fd, short events, void *arg);
static int evdns_transmit(void);
static int evdns_request_transmit(struct request *req);
static void nameserver_send_probe(struct nameserver *const ns);
static void search_request_finished(struct request *const);
static int search_try_next(struct request *const req);
static int search_request_new(int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
static void evdns_requests_pump_waiting_queue(void);
static u16 transaction_id_pick(void);
static struct request *request_new(int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
static void request_submit(struct request *req);
static int server_request_free(struct server_request *req);
static void server_request_free_answers(struct server_request *req);
static void server_port_free(struct evdns_server_port *port);
static void server_port_ready_callback(int fd, short events, void *arg);
static int strtoint(const char *const str);
#ifdef WIN32
static int
last_error(int sock)
{
int optval, optvallen=sizeof(optval);
int err = WSAGetLastError();
if (err == WSAEWOULDBLOCK && sock >= 0) {
if (getsockopt(sock, SOL_SOCKET, SO_ERROR, (void*)&optval,
&optvallen))
return err;
if (optval)
return optval;
}
return err;
}
static int
error_is_eagain(int err)
{
return err == EAGAIN || err == WSAEWOULDBLOCK;
}
static int
inet_aton(const char *c, struct in_addr *addr)
{
uint32_t r;
if (strcmp(c, "255.255.255.255") == 0) {
addr->s_addr = 0xffffffffu;
} else {
r = inet_addr(c);
if (r == INADDR_NONE)
return 0;
addr->s_addr = r;
}
return 1;
}
#define CLOSE_SOCKET(x) closesocket(x)
#else
#define last_error(sock) (errno)
#define error_is_eagain(err) ((err) == EAGAIN)
#define CLOSE_SOCKET(x) close(x)
#endif
#define ISSPACE(c) isspace((int)(unsigned char)(c))
#define ISDIGIT(c) isdigit((int)(unsigned char)(c))
#ifndef NDEBUG
static const char *
debug_ntoa(u32 address)
{
static char buf[32];
u32 a = ntohl(address);
snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
(int)(u8)((a>>24)&0xff),
(int)(u8)((a>>16)&0xff),
(int)(u8)((a>>8 )&0xff),
(int)(u8)((a )&0xff));
return buf;
}
#endif
static evdns_debug_log_fn_type evdns_log_fn = NULL;
void
evdns_set_log_fn(evdns_debug_log_fn_type fn)
{
evdns_log_fn = fn;
}
#ifdef __GNUC__
#define EVDNS_LOG_CHECK __attribute__ ((format(printf, 2, 3)))
#else
#define EVDNS_LOG_CHECK
#endif
static void _evdns_log(int warn, const char *fmt, ...) EVDNS_LOG_CHECK;
static void
_evdns_log(int warn, const char *fmt, ...)
{
va_list args;
static char buf[512];
if (!evdns_log_fn)
return;
va_start(args,fmt);
#ifdef WIN32
_vsnprintf(buf, sizeof(buf), fmt, args);
#else
vsnprintf(buf, sizeof(buf), fmt, args);
#endif
buf[sizeof(buf)-1] = '\0';
evdns_log_fn(warn, buf);
va_end(args);
}
#define log _evdns_log
/* This walks the list of inflight requests to find the */
/* one with a matching transaction id. Returns NULL on */
/* failure */
static struct request *
request_find_from_trans_id(u16 trans_id) {
struct request *req = req_head, *const started_at = req_head;
if (req) {
do {
if (req->trans_id == trans_id) return req;
req = req->next;
} while (req != started_at);
}
return NULL;
}
/* a libevent callback function which is called when a nameserver */
/* has gone down and we want to test if it has came back to life yet */
static void
nameserver_prod_callback(int fd, short events, void *arg) {
struct nameserver *const ns = (struct nameserver *) arg;
(void)fd;
(void)events;
nameserver_send_probe(ns);
}
/* a libevent callback which is called when a nameserver probe (to see if */
/* it has come back to life) times out. We increment the count of failed_times */
/* and wait longer to send the next probe packet. */
static void
nameserver_probe_failed(struct nameserver *const ns) {
const struct timeval * timeout;
(void) evtimer_del(&ns->timeout_event);
if (ns->state == 1) {
/* This can happen if the nameserver acts in a way which makes us mark */
/* it as bad and then starts sending good replies. */
return;
}
timeout =
&global_nameserver_timeouts[MIN(ns->failed_times,
global_nameserver_timeouts_length - 1)];
ns->failed_times++;
evtimer_set(&ns->timeout_event, nameserver_prod_callback, ns);
if (evtimer_add(&ns->timeout_event, (struct timeval *) timeout) < 0) {
log(EVDNS_LOG_WARN,
"Error from libevent when adding timer event for %s",
debug_ntoa(ns->address));
/* ???? Do more? */
}
}
/* called when a nameserver has been deemed to have failed. For example, too */
/* many packets have timed out etc */
static void
nameserver_failed(struct nameserver *const ns, const char *msg) {
struct request *req, *started_at;
/* if this nameserver has already been marked as failed */
/* then don't do anything */
if (!ns->state) return;
log(EVDNS_LOG_WARN, "Nameserver %s has failed: %s",
debug_ntoa(ns->address), msg);
global_good_nameservers--;
assert(global_good_nameservers >= 0);
if (global_good_nameservers == 0) {
log(EVDNS_LOG_WARN, "All nameservers have failed");
}
ns->state = 0;
ns->failed_times = 1;
evtimer_set(&ns->timeout_event, nameserver_prod_callback, ns);
if (evtimer_add(&ns->timeout_event, (struct timeval *) &global_nameserver_timeouts[0]) < 0) {
log(EVDNS_LOG_WARN,
"Error from libevent when adding timer event for %s",
debug_ntoa(ns->address));
/* ???? Do more? */
}
/* walk the list of inflight requests to see if any can be reassigned to */
/* a different server. Requests in the waiting queue don't have a */
/* nameserver assigned yet */
/* if we don't have *any* good nameservers then there's no point */
/* trying to reassign requests to one */
if (!global_good_nameservers) return;
req = req_head;
started_at = req_head;
if (req) {
do {
if (req->tx_count == 0 && req->ns == ns) {
/* still waiting to go out, can be moved */
/* to another server */
req->ns = nameserver_pick();
}
req = req->next;
} while (req != started_at);
}
}
static void
nameserver_up(struct nameserver *const ns) {
if (ns->state) return;
log(EVDNS_LOG_WARN, "Nameserver %s is back up",
debug_ntoa(ns->address));
evtimer_del(&ns->timeout_event);
ns->state = 1;
ns->failed_times = 0;
ns->timedout = 0;
global_good_nameservers++;
}
static void
request_trans_id_set(struct request *const req, const u16 trans_id) {
req->trans_id = trans_id;
*((u16 *) req->request) = htons(trans_id);
}
/* Called to remove a request from a list and dealloc it. */
/* head is a pointer to the head of the list it should be */
/* removed from or NULL if the request isn't in a list. */
static void
request_finished(struct request *const req, struct request **head) {
if (head) {
if (req->next == req) {
/* only item in the list */
*head = NULL;
} else {
req->next->prev = req->prev;
req->prev->next = req->next;
if (*head == req) *head = req->next;
}
}
log(EVDNS_LOG_DEBUG, "Removing timeout for request %lx",
(unsigned long) req);
evtimer_del(&req->timeout_event);
search_request_finished(req);
global_requests_inflight--;
if (!req->request_appended) {
/* need to free the request data on it's own */
free(req->request);
} else {
/* the request data is appended onto the header */
/* so everything gets free()ed when we: */
}
free(req);
evdns_requests_pump_waiting_queue();
}
/* This is called when a server returns a funny error code. */
/* We try the request again with another server. */
/* */
/* return: */
/* 0 ok */
/* 1 failed/reissue is pointless */
static int
request_reissue(struct request *req) {
const struct nameserver *const last_ns = req->ns;
/* the last nameserver should have been marked as failing */
/* by the caller of this function, therefore pick will try */
/* not to return it */
req->ns = nameserver_pick();
if (req->ns == last_ns) {
/* ... but pick did return it */
/* not a lot of point in trying again with the */
/* same server */
return 1;
}
req->reissue_count++;
req->tx_count = 0;
req->transmit_me = 1;
return 0;
}
/* this function looks for space on the inflight queue and promotes */
/* requests from the waiting queue if it can. */
static void
evdns_requests_pump_waiting_queue(void) {
while (global_requests_inflight < global_max_requests_inflight &&
global_requests_waiting) {
struct request *req;
/* move a request from the waiting queue to the inflight queue */
assert(req_waiting_head);
if (req_waiting_head->next == req_waiting_head) {
/* only one item in the queue */
req = req_waiting_head;
req_waiting_head = NULL;
} else {
req = req_waiting_head;
req->next->prev = req->prev;
req->prev->next = req->next;
req_waiting_head = req->next;
}
global_requests_waiting--;
global_requests_inflight++;
req->ns = nameserver_pick();
request_trans_id_set(req, transaction_id_pick());
evdns_request_insert(req, &req_head);
evdns_request_transmit(req);
evdns_transmit();
}
}
static void
reply_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply) {
switch (req->request_type) {
case TYPE_A:
if (reply)
req->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
reply->data.a.addrcount, ttl,
reply->data.a.addresses,
req->user_pointer);
else
req->user_callback(err, 0, 0, 0, NULL, req->user_pointer);
return;
case TYPE_PTR:
if (reply) {
char *name = reply->data.ptr.name;
req->user_callback(DNS_ERR_NONE, DNS_PTR, 1, ttl,
&name, req->user_pointer);
} else {
req->user_callback(err, 0, 0, 0, NULL,
req->user_pointer);
}
return;
case TYPE_AAAA:
if (reply)
req->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
reply->data.aaaa.addrcount, ttl,
reply->data.aaaa.addresses,
req->user_pointer);
else
req->user_callback(err, 0, 0, 0, NULL, req->user_pointer);
return;
}
assert(0);
}
/* this processes a parsed reply packet */
static void
reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
int error;
static const int error_codes[] = {DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST, DNS_ERR_NOTIMPL, DNS_ERR_REFUSED};
if (flags & 0x020f || !reply || !reply->have_answer) {
/* there was an error */
if (flags & 0x0200) {
error = DNS_ERR_TRUNCATED;
} else {
u16 error_code = (flags & 0x000f) - 1;
if (error_code > 4) {
error = DNS_ERR_UNKNOWN;
} else {
error = error_codes[error_code];
}
}
switch(error) {
case DNS_ERR_NOTIMPL:
case DNS_ERR_REFUSED:
/* we regard these errors as marking a bad nameserver */
if (req->reissue_count < global_max_reissues) {
char msg[64];
snprintf(msg, sizeof(msg), "Bad response %d (%s)",
error, evdns_err_to_string(error));
nameserver_failed(req->ns, msg);
if (!request_reissue(req)) return;
}
break;
case DNS_ERR_SERVERFAILED:
/* rcode 2 (servfailed) sometimes means "we are broken" and
* sometimes (with some binds) means "that request was very
* confusing." Treat this as a timeout, not a failure.
*/
log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver %s; "
"will allow the request to time out.",
debug_ntoa(req->ns->address));
break;
default:
/* we got a good reply from the nameserver */
nameserver_up(req->ns);
}
if (req->search_state && req->request_type != TYPE_PTR) {
/* if we have a list of domains to search in, try the next one */
if (!search_try_next(req)) {
/* a new request was issued so this request is finished and */
/* the user callback will be made when that request (or a */
/* child of it) finishes. */
request_finished(req, &req_head);
return;
}
}
/* all else failed. Pass the failure up */
reply_callback(req, 0, error, NULL);
request_finished(req, &req_head);
} else {
/* all ok, tell the user */
reply_callback(req, ttl, 0, reply);
nameserver_up(req->ns);
request_finished(req, &req_head);
}
}
static int
name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
int name_end = -1;
int j = *idx;
int ptr_count = 0;
#define GET32(x) do { if (j + 4 > length) goto err; memcpy(&_t32, packet + j, 4); j += 4; x = ntohl(_t32); } while(0)
#define GET16(x) do { if (j + 2 > length) goto err; memcpy(&_t, packet + j, 2); j += 2; x = ntohs(_t); } while(0)
#define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while(0)
char *cp = name_out;
const char *const end = name_out + name_out_len;
/* Normally, names are a series of length prefixed strings terminated */
/* with a length of 0 (the lengths are u8's < 63). */
/* However, the length can start with a pair of 1 bits and that */
/* means that the next 14 bits are a pointer within the current */
/* packet. */
for(;;) {
u8 label_len;
if (j >= length) return -1;
GET8(label_len);
if (!label_len) break;
if (label_len & 0xc0) {
u8 ptr_low;
GET8(ptr_low);
if (name_end < 0) name_end = j;
j = (((int)label_len & 0x3f) << 8) + ptr_low;
/* Make sure that the target offset is in-bounds. */
if (j < 0 || j >= length) return -1;
/* If we've jumped more times than there are characters in the
* message, we must have a loop. */
if (++ptr_count > length) return -1;
continue;
}
if (label_len > 63) return -1;
if (cp != name_out) {
if (cp + 1 >= end) return -1;
*cp++ = '.';
}
if (cp + label_len >= end) return -1;
memcpy(cp, packet + j, label_len);
cp += label_len;
j += label_len;
}
if (cp >= end) return -1;
*cp = '\0';
if (name_end < 0)
*idx = j;
else
*idx = name_end;
return 0;
err:
return -1;
}
/* parses a raw request from a nameserver */
static int
reply_parse(u8 *packet, int length) {
int j = 0; /* index into packet */
u16 _t; /* used by the macros */
u32 _t32; /* used by the macros */
char tmp_name[256]; /* used by the macros */
u16 trans_id, questions, answers, authority, additional, datalength;
u16 flags = 0;
u32 ttl, ttl_r = 0xffffffff;
struct reply reply;
struct request *req = NULL;
unsigned int i;
GET16(trans_id);
GET16(flags);
GET16(questions);
GET16(answers);
GET16(authority);
GET16(additional);
(void) authority; /* suppress "unused variable" warnings. */
(void) additional; /* suppress "unused variable" warnings. */
req = request_find_from_trans_id(trans_id);
if (!req) return -1;
memset(&reply, 0, sizeof(reply));
/* If it's not an answer, it doesn't correspond to any request. */
if (!(flags & 0x8000)) return -1; /* must be an answer */
if (flags & 0x020f) {
/* there was an error */
goto err;
}
/* if (!answers) return; */ /* must have an answer of some form */
/* This macro skips a name in the DNS reply. */
#define SKIP_NAME \
do { tmp_name[0] = '\0'; \
if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0) \
goto err; \
} while(0);
reply.type = req->request_type;
/* skip over each question in the reply */
for (i = 0; i < questions; ++i) {
/* the question looks like
* <label:name><u16:type><u16:class>
*/
SKIP_NAME;
j += 4;
if (j >= length) goto err;
}
/* now we have the answer section which looks like
* <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
*/
for (i = 0; i < answers; ++i) {
u16 type, class;
SKIP_NAME;
GET16(type);
GET16(class);
GET32(ttl);
GET16(datalength);
if (type == TYPE_A && class == CLASS_INET) {
int addrcount, addrtocopy;
if (req->request_type != TYPE_A) {
j += datalength; continue;
}
if ((datalength & 3) != 0) /* not an even number of As. */
goto err;
addrcount = datalength >> 2;
addrtocopy = MIN(MAX_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
ttl_r = MIN(ttl_r, ttl);
/* we only bother with the first four addresses. */
if (j + 4*addrtocopy > length) goto err;
memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
packet + j, 4*addrtocopy);
j += 4*addrtocopy;
reply.data.a.addrcount += addrtocopy;
reply.have_answer = 1;
if (reply.data.a.addrcount == MAX_ADDRS) break;
} else if (type == TYPE_PTR && class == CLASS_INET) {
if (req->request_type != TYPE_PTR) {
j += datalength; continue;
}
if (name_parse(packet, length, &j, reply.data.ptr.name,
sizeof(reply.data.ptr.name))<0)
goto err;
ttl_r = MIN(ttl_r, ttl);
reply.have_answer = 1;
break;
} else if (type == TYPE_AAAA && class == CLASS_INET) {
int addrcount, addrtocopy;
if (req->request_type != TYPE_AAAA) {
j += datalength; continue;
}
if ((datalength & 15) != 0) /* not an even number of AAAAs. */
goto err;
addrcount = datalength >> 4; /* each address is 16 bytes long */
addrtocopy = MIN(MAX_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
ttl_r = MIN(ttl_r, ttl);
/* we only bother with the first four addresses. */
if (j + 16*addrtocopy > length) goto err;
memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
packet + j, 16*addrtocopy);
reply.data.aaaa.addrcount += addrtocopy;
j += 16*addrtocopy;
reply.have_answer = 1;
if (reply.data.aaaa.addrcount == MAX_ADDRS) break;
} else {
/* skip over any other type of resource */
j += datalength;
}
}
reply_handle(req, flags, ttl_r, &reply);
return 0;
err:
if (req)
reply_handle(req, flags, 0, NULL);
return -1;
}
/* Parse a raw request (packet,length) sent to a nameserver port (port) from */
/* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
/* callback. */
static int
request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, socklen_t addrlen)
{
int j = 0; /* index into packet */
u16 _t; /* used by the macros */
char tmp_name[256]; /* used by the macros */
int i;
u16 trans_id, flags, questions, answers, authority, additional;
struct server_request *server_req = NULL;
/* Get the header fields */
GET16(trans_id);
GET16(flags);
GET16(questions);
GET16(answers);
GET16(authority);
GET16(additional);
if (flags & 0x8000) return -1; /* Must not be an answer. */
if (flags & 0x7800) return -1; /* only standard queries are supported */
flags &= 0x0300; /* Only TC and RD get preserved. */
server_req = malloc(sizeof(struct server_request));
if (server_req == NULL) return -1;
memset(server_req, 0, sizeof(struct server_request));
server_req->trans_id = trans_id;
memcpy(&server_req->addr, addr, addrlen);
server_req->addrlen = addrlen;
server_req->base.flags = flags;
server_req->base.nquestions = 0;
server_req->base.questions = malloc(sizeof(struct evdns_server_question *) * questions);
if (server_req->base.questions == NULL)
goto err;
for (i = 0; i < questions; ++i) {
u16 type, class;
struct evdns_server_question *q;
int namelen;
if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
goto err;
GET16(type);
GET16(class);
namelen = strlen(tmp_name);
q = malloc(sizeof(struct evdns_server_question) + namelen);
if (!q)
goto err;
q->type = type;
q->class = class;
memcpy(q->name, tmp_name, namelen+1);
server_req->base.questions[server_req->base.nquestions++] = q;
}
/* Ignore answers, authority, and additional. */
server_req->port = port;
port->refcnt++;
port->user_callback(&(server_req->base), port->user_data);
return 0;
err:
if (server_req) {
if (server_req->base.questions) {
for (i = 0; i < server_req->base.nquestions; ++i)
free(server_req->base.questions[i]);
free(server_req->base.questions);
}
free(server_req);
}
return -1;
#undef SKIP_NAME
#undef GET32
#undef GET16
#undef GET8
}
/* Try to choose a strong transaction id which isn't already in flight */
static u16
transaction_id_pick(void) {
for (;;) {
const struct request *req = req_head, *started_at;
#ifdef DNS_USE_CPU_CLOCK_FOR_ID
struct timespec ts;
u16 trans_id;
#ifdef CLOCK_MONOTONIC
if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
#else
if (clock_gettime(CLOCK_REALTIME, &ts) == -1)
#endif
event_err(1, "clock_gettime");
trans_id = ts.tv_nsec & 0xffff;
#endif
#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
struct timeval tv;
u16 trans_id;
gettimeofday(&tv, NULL);
trans_id = tv.tv_usec & 0xffff;
#endif
#ifdef DNS_USE_OPENSSL_FOR_ID
u16 trans_id;
if (RAND_pseudo_bytes((u8 *) &trans_id, 2) == -1) {
/* in the case that the RAND call fails we back */
/* down to using gettimeofday. */
struct timeval tv;
gettimeofday(&tv, NULL);
trans_id = tv.tv_usec & 0xffff; */
abort();
}
#endif
if (trans_id == 0xffff) continue;
/* now check to see if that id is already inflight */
req = started_at = req_head;
if (req) {
do {
if (req->trans_id == trans_id) break;
req = req->next;
} while (req != started_at);
}
/* we didn't find it, so this is a good id */
if (req == started_at) return trans_id;
}
}
/* choose a namesever to use. This function will try to ignore */
/* nameservers which we think are down and load balance across the rest */
/* by updating the server_head global each time. */
static struct nameserver *
nameserver_pick(void) {
struct nameserver *started_at = server_head, *picked;
if (!server_head) return NULL;
/* if we don't have any good nameservers then there's no */
/* point in trying to find one. */
if (!global_good_nameservers) {
server_head = server_head->next;
return server_head;
}
/* remember that nameservers are in a circular list */
for (;;) {
if (server_head->state) {
/* we think this server is currently good */
picked = server_head;
server_head = server_head->next;
return picked;
}
server_head = server_head->next;
if (server_head == started_at) {
/* all the nameservers seem to be down */
/* so we just return this one and hope for the */
/* best */
assert(global_good_nameservers == 0);
picked = server_head;
server_head = server_head->next;
return picked;
}
}
}
/* this is called when a namesever socket is ready for reading */
static void
nameserver_read(struct nameserver *ns) {
u8 packet[1500];
for (;;) {
const int r = recv(ns->socket, packet, sizeof(packet), 0);
if (r < 0) {
int err = last_error(ns->socket);
if (error_is_eagain(err)) return;
nameserver_failed(ns, strerror(err));
return;
}
ns->timedout = 0;
reply_parse(packet, r);
}
}
/* Read a packet from a DNS client on a server port s, parse it, and */
/* act accordingly. */
static void
server_port_read(struct evdns_server_port *s) {
u8 packet[1500];
struct sockaddr_storage addr;
socklen_t addrlen;
int r;
for (;;) {
addrlen = sizeof(struct sockaddr_storage);
r = recvfrom(s->socket, packet, sizeof(packet), 0,
(struct sockaddr*) &addr, &addrlen);
if (r < 0) {
int err = last_error(s->socket);
if (error_is_eagain(err)) return;
log(EVDNS_LOG_WARN, "Error %s (%d) while reading request.",
strerror(err), err);
return;
}
request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
}
}
/* Try to write all pending replies on a given DNS server port. */
static void
server_port_flush(struct evdns_server_port *port)
{
while (port->pending_replies) {
struct server_request *req = port->pending_replies;
int r = sendto(port->socket, req->response, req->response_len, 0,
(struct sockaddr*) &req->addr, req->addrlen);
if (r < 0) {
int err = last_error(port->socket);
if (error_is_eagain(err))
return;
log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", strerror(err), err);
}
if (server_request_free(req)) {
/* we released the last reference to req->port. */
return;
}
}
/* We have no more pending requests; stop listening for 'writeable' events. */
(void) event_del(&port->event);
event_set(&port->event, port->socket, EV_READ | EV_PERSIST,
server_port_ready_callback, port);
if (event_add(&port->event, NULL) < 0) {