NSD currently only processes one UDP packet per socket per select(). Since select() is kind of expensive, under load this means it burns a lot of CPU unnecessarily. There's a simple trick to avoid this. Make the UDP socket non-blocking, and loop on recvfrom() until it returns -1, ignoring any EAGAIN errors. Under light load, this results in an extra recvfrom() every packet. But under heavy load, this avoids select() until the input buffer is drained. Attached is an example patch against NSD 3.0.2 that implements this. According to the queryperf tool that comes with BIND, on a simple query against localhost on an old Linux box, this takes NSD's peak throughput from 39kpps to 48kpps, a 23% improvement. These are obviously ideal conditions, but please feel free to test for yourself. Also included is a patch that fixes a race condition that can cause NSD to freeze up: NSD 3.0.2 currently assumes that selecting on a TCP listen() socket will always result in accept() not blocking. However, it doesn't mark its TCP listen() sockets as non-blocking, so there is probably a race condition at least on some OSes with either the -N flag (number of servers to fork) set to 2 or more, or if someone opens a connection and closes it by the time NSD tries to accept(). This may result in NSD not answering any requests until it receives its next TCP connection. There is a simple fix to this, which is to just mark the listen() socket as non-blocking. diff -ur nsd-3.0.2/server.c nsd-3.0.2.fewerselects/server.c --- nsd-3.0.2/server.c 2006-10-12 01:11:05.000000000 -0700 +++ nsd-3.0.2.fewerselects/server.c 2006-11-18 21:57:14.000000000 -0800 @@ -384,6 +384,11 @@ } #endif + if (fcntl(nsd->udp[i].s, F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); + return -1; + } + /* Bind it... */ if (bind(nsd->udp[i].s, (struct sockaddr *) nsd->udp[i].addr->ai_addr, nsd->udp[i].addr->ai_addrlen) != 0) { log_msg(LOG_ERR, "can't bind the socket: %s", strerror(errno)); @@ -423,6 +428,11 @@ } #endif + if (fcntl(nsd->tcp[i].s, F_SETFL, O_NONBLOCK) == -1) { + log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno)); + return -1; + } + /* Bind it... */ if (bind(nsd->tcp[i].s, (struct sockaddr *) nsd->tcp[i].addr->ai_addr, nsd->tcp[i].addr->ai_addrlen) != 0) { log_msg(LOG_ERR, "can't bind the socket: %s", strerror(errno)); @@ -1188,35 +1198,39 @@ { struct udp_handler_data *data = (struct udp_handler_data *) handler->user_data; - int received, sent; + int received, sent, tries; struct query *q = data->query; if (!(event_types & NETIO_EVENT_READ)) { return; } - /* Account... */ - if (data->socket->addr->ai_family == AF_INET) { - STATUP(data->nsd, qudp); - } else if (data->socket->addr->ai_family == AF_INET6) { - STATUP(data->nsd, qudp6); - } + /* As an optimization, answer up to 100 queries per fd per select(). */ + for (tries = 0; tries < 100; tries++) { + /* Initialize the query... */ + query_reset(q, UDP_MAX_MESSAGE_LEN, 0); + + received = recvfrom(handler->fd, + buffer_begin(q->packet), + buffer_remaining(q->packet), + 0, + (struct sockaddr *)&q->addr, + &q->addrlen); + if (received == -1) { + if (errno != EAGAIN && errno != EINTR) { + log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); + STATUP(data->nsd, rxerr); + } + return; + } - /* Initialize the query... */ - query_reset(q, UDP_MAX_MESSAGE_LEN, 0); - - received = recvfrom(handler->fd, - buffer_begin(q->packet), - buffer_remaining(q->packet), - 0, - (struct sockaddr *)&q->addr, - &q->addrlen); - if (received == -1) { - if (errno != EAGAIN && errno != EINTR) { - log_msg(LOG_ERR, "recvfrom failed: %s", strerror(errno)); - STATUP(data->nsd, rxerr); + /* Account... */ + if (data->socket->addr->ai_family == AF_INET) { + STATUP(data->nsd, qudp); + } else if (data->socket->addr->ai_family == AF_INET6) { + STATUP(data->nsd, qudp6); } - } else { + buffer_skip(q->packet, received); buffer_flip(q->packet);