--- embedaddon/dnsmasq/src/dnsmasq.c 2014/06/15 16:31:38 1.1.1.2 +++ embedaddon/dnsmasq/src/dnsmasq.c 2023/09/27 11:02:07 1.1.1.5 @@ -1,4 +1,4 @@ -/* dnsmasq is Copyright (c) 2000-2014 Simon Kelley +/* dnsmasq is Copyright (c) 2000-2022 Simon Kelley This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,23 +17,28 @@ /* Declare static char *compiler_opts in config.h */ #define DNSMASQ_COMPILE_OPTS +/* dnsmasq.h has to be included first as it sources config.h */ #include "dnsmasq.h" +#if defined(HAVE_IDN) || defined(HAVE_LIBIDN2) || defined(LOCALEDIR) +#include +#endif + struct daemon *daemon; static volatile pid_t pid = 0; static volatile int pipewrite; -static int set_dns_listeners(time_t now, fd_set *set, int *maxfdp); -static void check_dns_listeners(fd_set *set, time_t now); +static void set_dns_listeners(void); +static void check_dns_listeners(time_t now); static void sig_handler(int sig); static void async_event(int pipe, time_t now); static void fatal_event(struct event_desc *ev, char *msg); static int read_event(int fd, struct event_desc *evp, char **msg); +static void poll_resolv(int force, int do_reload, time_t now); int main (int argc, char **argv) { - int bind_fallback = 0; time_t now; struct sigaction sigact; struct iname *if_tmp; @@ -47,19 +52,32 @@ int main (int argc, char **argv) long i, max_fd = sysconf(_SC_OPEN_MAX); char *baduser = NULL; int log_err; + int chown_warn = 0; #if defined(HAVE_LINUX_NETWORK) cap_user_header_t hdr = NULL; cap_user_data_t data = NULL; + int need_cap_net_admin = 0; + int need_cap_net_raw = 0; + int need_cap_net_bind_service = 0; char *bound_device = NULL; int did_bind = 0; + struct server *serv; + char *netlink_warn; +#else + int bind_fallback = 0; #endif #if defined(HAVE_DHCP) || defined(HAVE_DHCP6) struct dhcp_context *context; struct dhcp_relay *relay; #endif +#ifdef HAVE_TFTP + int tftp_prefix_missing = 0; +#endif -#ifdef LOCALEDIR +#if defined(HAVE_IDN) || defined(HAVE_LIBIDN2) || defined(LOCALEDIR) setlocale(LC_ALL, ""); +#endif +#ifdef LOCALEDIR bindtextdomain("dnsmasq", LOCALEDIR); textdomain("dnsmasq"); #endif @@ -73,37 +91,64 @@ int main (int argc, char **argv) sigaction(SIGTERM, &sigact, NULL); sigaction(SIGALRM, &sigact, NULL); sigaction(SIGCHLD, &sigact, NULL); - + sigaction(SIGINT, &sigact, NULL); + /* ignore SIGPIPE */ sigact.sa_handler = SIG_IGN; sigaction(SIGPIPE, &sigact, NULL); umask(022); /* known umask, create leases and pid files as 0644 */ + rand_init(); /* Must precede read_opts() */ + read_opts(argc, argv, compile_opts); +#ifdef HAVE_LINUX_NETWORK + daemon->kernel_version = kernel_version(); +#endif + if (daemon->edns_pktsz < PACKETSZ) daemon->edns_pktsz = PACKETSZ; -#ifdef HAVE_DNSSEC - /* Enforce min packet big enough for DNSSEC */ - if (option_bool(OPT_DNSSEC_VALID) && daemon->edns_pktsz < EDNS_PKTSZ) - daemon->edns_pktsz = EDNS_PKTSZ; -#endif - daemon->packet_buff_sz = daemon->edns_pktsz > DNSMASQ_PACKETSZ ? - daemon->edns_pktsz : DNSMASQ_PACKETSZ; + /* Min buffer size: we check after adding each record, so there must be + memory for the largest packet, and the largest record so the + min for DNS is PACKETSZ+MAXDNAME+RRFIXEDSZ which is < 1000. + This might be increased is EDNS packet size if greater than the minimum. */ + daemon->packet_buff_sz = daemon->edns_pktsz + MAXDNAME + RRFIXEDSZ; daemon->packet = safe_malloc(daemon->packet_buff_sz); - daemon->addrbuff = safe_malloc(ADDRSTRLEN); + if (option_bool(OPT_EXTRALOG)) + daemon->addrbuff2 = safe_malloc(ADDRSTRLEN); #ifdef HAVE_DNSSEC if (option_bool(OPT_DNSSEC_VALID)) { - daemon->keyname = safe_malloc(MAXDNAME); - daemon->workspacename = safe_malloc(MAXDNAME); + /* Note that both /000 and '.' are allowed within labels. These get + represented in presentation format using NAME_ESCAPE as an escape + character when in DNSSEC mode. + In theory, if all the characters in a name were /000 or + '.' or NAME_ESCAPE then all would have to be escaped, so the + presentation format would be twice as long as the spec. + + daemon->namebuff was previously allocated by the option-reading + code before we knew if we're in DNSSEC mode, so reallocate here. */ + free(daemon->namebuff); + daemon->namebuff = safe_malloc(MAXDNAME * 2); + daemon->keyname = safe_malloc(MAXDNAME * 2); + daemon->workspacename = safe_malloc(MAXDNAME * 2); + /* one char flag per possible RR in answer section (may get extended). */ + daemon->rr_status_sz = 64; + daemon->rr_status = safe_malloc(sizeof(*daemon->rr_status) * daemon->rr_status_sz); } #endif +#if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS) + /* CONNTRACK UBUS code uses this buffer, so if not allocated above, + we need to allocate it here. */ + if (option_bool(OPT_CMARK_ALST_EN) && !daemon->workspacename) + daemon->workspacename = safe_malloc(MAXDNAME); +#endif + #ifdef HAVE_DHCP if (!daemon->lease_file) { @@ -112,20 +157,18 @@ int main (int argc, char **argv) } #endif - /* Close any file descriptors we inherited apart from std{in|out|err} - - Ensure that at least stdin, stdout and stderr (fd 0, 1, 2) exist, + /* Ensure that at least stdin, stdout and stderr (fd 0, 1, 2) exist, otherwise file descriptors we create can end up being 0, 1, or 2 and then get accidentally closed later when we make 0, 1, and 2 open to /dev/null. Normally we'll be started with 0, 1 and 2 open, but it's not guaranteed. By opening /dev/null three times, we ensure that we're not using those fds for real stuff. */ - for (i = 0; i < max_fd; i++) - if (i != STDOUT_FILENO && i != STDERR_FILENO && i != STDIN_FILENO) - close(i); - else - open("/dev/null", O_RDWR); - + for (i = 0; i < 3; i++) + open("/dev/null", O_RDWR); + + /* Close any file descriptors we inherited apart from std{in|out|err} */ + close_fds(max_fd, -1, -1, -1); + #ifndef HAVE_LINUX_NETWORK # if !(defined(IP_RECVDSTADDR) && defined(IP_RECVIF) && defined(IP_SENDSRCADDR)) if (!option_bool(OPT_NOWILD)) @@ -143,15 +186,28 @@ int main (int argc, char **argv) reset_option_bool(OPT_CLEVERBIND); } #endif + +#ifndef HAVE_INOTIFY + if (daemon->dynamic_dirs) + die(_("dhcp-hostsdir, dhcp-optsdir and hostsdir are not supported on this platform"), NULL, EC_BADCONF); +#endif if (option_bool(OPT_DNSSEC_VALID)) { #ifdef HAVE_DNSSEC - if (!daemon->ds) - die(_("No trust anchors provided for DNSSEC"), NULL, EC_BADCONF); + struct ds_config *ds; + + /* Must have at least a root trust anchor, or the DNSSEC code + can loop forever. */ + for (ds = daemon->ds; ds; ds = ds->next) + if (ds->name[0] == 0) + break; + + if (!ds) + die(_("no root trust anchor provided for DNSSEC"), NULL, EC_BADCONF); if (daemon->cachesize < CACHESIZ) - die(_("Cannot reduce cache size from default when DNSSEC enabled"), NULL, EC_BADCONF); + die(_("cannot reduce cache size from default when DNSSEC enabled"), NULL, EC_BADCONF); #else die(_("DNSSEC not available: set HAVE_DNSSEC in src/config.h"), NULL, EC_BADCONF); #endif @@ -163,39 +219,73 @@ int main (int argc, char **argv) #endif #ifdef HAVE_CONNTRACK - if (option_bool(OPT_CONNTRACK) && (daemon->query_port != 0 || daemon->osport)) - die (_("Cannot use --conntrack AND --query-port"), NULL, EC_BADCONF); + if (option_bool(OPT_CONNTRACK)) + { + if (daemon->query_port != 0 || daemon->osport) + die (_("cannot use --conntrack AND --query-port"), NULL, EC_BADCONF); + + need_cap_net_admin = 1; + } #else if (option_bool(OPT_CONNTRACK)) - die(_("Conntrack support not available: set HAVE_CONNTRACK in src/config.h"), NULL, EC_BADCONF); + die(_("conntrack support not available: set HAVE_CONNTRACK in src/config.h"), NULL, EC_BADCONF); #endif #ifdef HAVE_SOLARIS_NETWORK if (daemon->max_logs != 0) - die(_("asychronous logging is not available under Solaris"), NULL, EC_BADCONF); + die(_("asynchronous logging is not available under Solaris"), NULL, EC_BADCONF); #endif #ifdef __ANDROID__ if (daemon->max_logs != 0) - die(_("asychronous logging is not available under Android"), NULL, EC_BADCONF); + die(_("asynchronous logging is not available under Android"), NULL, EC_BADCONF); #endif #ifndef HAVE_AUTH - if (daemon->authserver) + if (daemon->auth_zones) die(_("authoritative DNS not available: set HAVE_AUTH in src/config.h"), NULL, EC_BADCONF); #endif - rand_init(); +#ifndef HAVE_LOOP + if (option_bool(OPT_LOOP_DETECT)) + die(_("loop detection not available: set HAVE_LOOP in src/config.h"), NULL, EC_BADCONF); +#endif + +#ifndef HAVE_UBUS + if (option_bool(OPT_UBUS)) + die(_("Ubus not available: set HAVE_UBUS in src/config.h"), NULL, EC_BADCONF); +#endif + /* Handle only one of min_port/max_port being set. */ + if (daemon->min_port != 0 && daemon->max_port == 0) + daemon->max_port = MAX_PORT; + + if (daemon->max_port != 0 && daemon->min_port == 0) + daemon->min_port = MIN_PORT; + + if (daemon->max_port < daemon->min_port) + die(_("max_port cannot be smaller than min_port"), NULL, EC_BADCONF); + + if (daemon->max_port != 0 && + daemon->max_port - daemon->min_port + 1 < daemon->randport_limit) + die(_("port_limit must not be larger than available port range"), NULL, EC_BADCONF); + now = dnsmasq_time(); - /* Create a serial at startup if not configured. */ - if (daemon->authinterface && daemon->soa_sn == 0) + if (daemon->auth_zones) + { + if (!daemon->authserver) + die(_("--auth-server required when an auth zone is defined."), NULL, EC_BADCONF); + + /* Create a serial at startup if not configured. */ #ifdef HAVE_BROKEN_RTC - die(_("zone serial must be configured in --auth-soa"), NULL, EC_BADCONF); + if (daemon->soa_sn == 0) + die(_("zone serial must be configured in --auth-soa"), NULL, EC_BADCONF); #else - daemon->soa_sn = now; + if (daemon->soa_sn == 0) + daemon->soa_sn = now; #endif + } #ifdef HAVE_DHCP6 if (daemon->dhcp6) @@ -220,8 +310,11 @@ int main (int argc, char **argv) /* Note that order matters here, we must call lease_init before creating any file descriptors which shouldn't be leaked to the lease-script init process. We need to call common_init - before lease_init to allocate buffers it uses.*/ - if (daemon->dhcp || daemon->doing_dhcp6 || daemon->relay4 || daemon->relay6) + before lease_init to allocate buffers it uses. + The script subsystem relies on DHCP buffers, hence the last two + conditions below. */ + if (daemon->dhcp || daemon->doing_dhcp6 || daemon->relay4 || + daemon->relay6 || option_bool(OPT_TFTP) || option_bool(OPT_SCRIPT_ARP)) { dhcp_common_init(); if (daemon->dhcp || daemon->doing_dhcp6) @@ -229,11 +322,24 @@ int main (int argc, char **argv) } if (daemon->dhcp || daemon->relay4) - dhcp_init(); + { + dhcp_init(); +# ifdef HAVE_LINUX_NETWORK + if (!option_bool(OPT_NO_PING)) + need_cap_net_raw = 1; + need_cap_net_admin = 1; +# endif + } # ifdef HAVE_DHCP6 if (daemon->doing_ra || daemon->doing_dhcp6 || daemon->relay6) - ra_init(now); + { + ra_init(now); +# ifdef HAVE_LINUX_NETWORK + need_cap_net_raw = 1; + need_cap_net_admin = 1; +# endif + } if (daemon->doing_dhcp6 || daemon->relay6) dhcp6_init(); @@ -243,11 +349,26 @@ int main (int argc, char **argv) #ifdef HAVE_IPSET if (daemon->ipsets) - ipset_init(); + { + ipset_init(); +# ifdef HAVE_LINUX_NETWORK + need_cap_net_admin = 1; +# endif + } #endif +#ifdef HAVE_NFTSET + if (daemon->nftsets) + { + nftset_init(); +# ifdef HAVE_LINUX_NETWORK + need_cap_net_admin = 1; +# endif + } +#endif + #if defined(HAVE_LINUX_NETWORK) - netlink_init(); + netlink_warn = netlink_init(); #elif defined(HAVE_BSD_NETWORK) route_init(); #endif @@ -270,28 +391,9 @@ int main (int argc, char **argv) #if defined(HAVE_LINUX_NETWORK) && defined(HAVE_DHCP) /* after enumerate_interfaces() */ bound_device = whichdevice(); - - if (daemon->dhcp) - { - if (!daemon->relay4 && bound_device) - { - bindtodevice(bound_device, daemon->dhcpfd); - did_bind = 1; - } - if (daemon->enable_pxe && bound_device) - { - bindtodevice(bound_device, daemon->pxefd); - did_bind = 1; - } - } -#endif -#if defined(HAVE_LINUX_NETWORK) && defined(HAVE_DHCP6) - if (daemon->doing_dhcp6 && !daemon->relay6 && bound_device) - { - bindtodevice(bound_device, daemon->dhcp6fd); - did_bind = 1; - } + if ((did_bind = bind_dhcp_devices(bound_device)) & 2) + die(_("failed to set SO_BINDTODEVICE on DHCP socket: %s"), NULL, EC_BADNET); #endif } else @@ -309,24 +411,57 @@ int main (int argc, char **argv) if (daemon->port != 0) { cache_init(); -#ifdef HAVE_DNSSEC blockdata_init(); -#endif + hash_questions_init(); + + /* Scale random socket pool by ftabsize, but + limit it based on available fds. */ + daemon->numrrand = daemon->ftabsize/2; + if (daemon->numrrand > max_fd/3) + daemon->numrrand = max_fd/3; + /* safe_malloc returns zero'd memory */ + daemon->randomsocks = safe_malloc(daemon->numrrand * sizeof(struct randfd)); } - + +#ifdef HAVE_INOTIFY + if ((daemon->port != 0 || daemon->dhcp || daemon->doing_dhcp6) + && (!option_bool(OPT_NO_RESOLV) || daemon->dynamic_dirs)) + inotify_dnsmasq_init(); + else + daemon->inotifyfd = -1; +#endif + + if (daemon->dump_file) +#ifdef HAVE_DUMPFILE + dump_init(); + else + daemon->dumpfd = -1; +#else + die(_("Packet dumps not available: set HAVE_DUMP in src/config.h"), NULL, EC_BADCONF); +#endif + if (option_bool(OPT_DBUS)) #ifdef HAVE_DBUS { char *err; - daemon->dbus = NULL; - daemon->watches = NULL; if ((err = dbus_init())) die(_("DBus error: %s"), err, EC_MISC); } #else die(_("DBus not available: set HAVE_DBUS in src/config.h"), NULL, EC_BADCONF); #endif - + + if (option_bool(OPT_UBUS)) +#ifdef HAVE_UBUS + { + char *err; + if ((err = ubus_init())) + die(_("UBus error: %s"), err, EC_MISC); + } +#else + die(_("UBus not available: set HAVE_UBUS in src/config.h"), NULL, EC_BADCONF); +#endif + if (daemon->port != 0) pre_allocate_sfds(); @@ -336,10 +471,12 @@ int main (int argc, char **argv) daemon->scriptuser && (daemon->lease_change_command || daemon->luascript)) { - if ((ent_pw = getpwnam(daemon->scriptuser))) + struct passwd *scr_pw; + + if ((scr_pw = getpwnam(daemon->scriptuser))) { - script_uid = ent_pw->pw_uid; - script_gid = ent_pw->pw_gid; + script_uid = scr_pw->pw_uid; + script_gid = scr_pw->pw_gid; } else baduser = daemon->scriptuser; @@ -353,7 +490,7 @@ int main (int argc, char **argv) if (baduser) die(_("unknown user or group: %s"), baduser, EC_BADCONF); - + /* implement group defaults, "dip" if available, or group associated with uid */ if (!daemon->group_set && !gp) { @@ -366,28 +503,81 @@ int main (int argc, char **argv) } #if defined(HAVE_LINUX_NETWORK) + /* We keep CAP_NETADMIN (for ARP-injection) and + CAP_NET_RAW (for icmp) if we're doing dhcp, + if we have yet to bind ports because of DAD, + or we're doing it dynamically, we need CAP_NET_BIND_SERVICE. */ + if ((is_dad_listeners() || option_bool(OPT_CLEVERBIND)) && + (option_bool(OPT_TFTP) || (daemon->port != 0 && daemon->port <= 1024))) + need_cap_net_bind_service = 1; + + /* usptream servers which bind to an interface call SO_BINDTODEVICE + for each TCP connection, so need CAP_NET_RAW */ + for (serv = daemon->servers; serv; serv = serv->next) + if (serv->interface[0] != 0) + need_cap_net_raw = 1; + + /* If we're doing Dbus or UBus, the above can be set dynamically, + (as can ports) so always (potentially) needed. */ +#ifdef HAVE_DBUS + if (option_bool(OPT_DBUS)) + { + need_cap_net_bind_service = 1; + need_cap_net_raw = 1; + } +#endif + +#ifdef HAVE_UBUS + if (option_bool(OPT_UBUS)) + { + need_cap_net_bind_service = 1; + need_cap_net_raw = 1; + } +#endif + /* determine capability API version here, while we can still call safe_malloc */ - if (ent_pw && ent_pw->pw_uid != 0) + int capsize = 1; /* for header version 1 */ + char *fail = NULL; + + hdr = safe_malloc(sizeof(*hdr)); + + /* find version supported by kernel */ + memset(hdr, 0, sizeof(*hdr)); + capget(hdr, NULL); + + if (hdr->version != LINUX_CAPABILITY_VERSION_1) { - int capsize = 1; /* for header version 1 */ - hdr = safe_malloc(sizeof(*hdr)); - - /* find version supported by kernel */ - memset(hdr, 0, sizeof(*hdr)); - capget(hdr, NULL); - - if (hdr->version != LINUX_CAPABILITY_VERSION_1) - { - /* if unknown version, use largest supported version (3) */ - if (hdr->version != LINUX_CAPABILITY_VERSION_2) - hdr->version = LINUX_CAPABILITY_VERSION_3; - capsize = 2; - } - - data = safe_malloc(sizeof(*data) * capsize); - memset(data, 0, sizeof(*data) * capsize); + /* if unknown version, use largest supported version (3) */ + if (hdr->version != LINUX_CAPABILITY_VERSION_2) + hdr->version = LINUX_CAPABILITY_VERSION_3; + capsize = 2; } + + data = safe_malloc(sizeof(*data) * capsize); + capget(hdr, data); /* Get current values, for verification */ + + if (need_cap_net_admin && !(data->permitted & (1 << CAP_NET_ADMIN))) + fail = "NET_ADMIN"; + else if (need_cap_net_raw && !(data->permitted & (1 << CAP_NET_RAW))) + fail = "NET_RAW"; + else if (need_cap_net_bind_service && !(data->permitted & (1 << CAP_NET_BIND_SERVICE))) + fail = "NET_BIND_SERVICE"; + + if (fail) + die(_("process is missing required capability %s"), fail, EC_MISC); + + /* Now set bitmaps to set caps after daemonising */ + memset(data, 0, sizeof(*data) * capsize); + + if (need_cap_net_admin) + data->effective |= (1 << CAP_NET_ADMIN); + if (need_cap_net_raw) + data->effective |= (1 << CAP_NET_RAW); + if (need_cap_net_bind_service) + data->effective |= (1 << CAP_NET_BIND_SERVICE); + + data->permitted = data->effective; #endif /* Use a pipe to carry signals and other events back to the event loop @@ -409,7 +599,6 @@ int main (int argc, char **argv) if (chdir("/") != 0) die(_("cannot chdir to filesystem root: %s"), NULL, EC_MISC); -#ifndef NO_FORK if (!option_bool(OPT_NO_FORK)) { pid_t pid; @@ -449,7 +638,6 @@ int main (int argc, char **argv) if (pid != 0) _exit(0); } -#endif /* write pidfile _after_ forking ! */ if (daemon->runfile) @@ -464,7 +652,7 @@ int main (int argc, char **argv) extent that an attacker running as the unprivileged user could replace the pidfile with a symlink, and have the target of that symlink overwritten as root next time dnsmasq starts. - The folowing code first deletes any existing file, and then opens it with the O_EXCL flag, + The following code first deletes any existing file, and then opens it with the O_EXCL flag, ensuring that the open() fails should there be any existing file (because the unlink() failed, or an attacker exploited the race between unlink() and open()). This ensures that no symlink attack can succeed. @@ -487,12 +675,22 @@ int main (int argc, char **argv) } else { + /* We're still running as root here. Change the ownership of the PID file + to the user we will be running as. Note that this is not to allow + us to delete the file, since that depends on the permissions + of the directory containing the file. That directory will + need to by owned by the dnsmasq user, and the ownership of the + file has to match, to keep systemd >273 happy. */ + if (getuid() == 0 && ent_pw && ent_pw->pw_uid != 0 && fchown(fd, ent_pw->pw_uid, ent_pw->pw_gid) == -1) + chown_warn = errno; + if (!read_write(fd, (unsigned char *)daemon->namebuff, strlen(daemon->namebuff), 0)) err = 1; - - while (!err && close(fd) == -1) - if (!retry_send()) - err = 1; + else + { + if (close(fd) == -1) + err = 1; + } } if (err) @@ -509,17 +707,25 @@ int main (int argc, char **argv) { /* open stdout etc to /dev/null */ int nullfd = open("/dev/null", O_RDWR); - dup2(nullfd, STDOUT_FILENO); - dup2(nullfd, STDERR_FILENO); - dup2(nullfd, STDIN_FILENO); - close(nullfd); + if (nullfd != -1) + { + dup2(nullfd, STDOUT_FILENO); + dup2(nullfd, STDERR_FILENO); + dup2(nullfd, STDIN_FILENO); + close(nullfd); + } } /* if we are to run scripts, we need to fork a helper before dropping root. */ daemon->helperfd = -1; #ifdef HAVE_SCRIPT - if ((daemon->dhcp || daemon->dhcp6) && (daemon->lease_change_command || daemon->luascript)) - daemon->helperfd = create_helper(pipewrite, err_pipe[1], script_uid, script_gid, max_fd); + if ((daemon->dhcp || + daemon->dhcp6 || + daemon->relay6 || + option_bool(OPT_TFTP) || + option_bool(OPT_SCRIPT_ARP)) && + (daemon->lease_change_command || daemon->luascript)) + daemon->helperfd = create_helper(pipewrite, err_pipe[1], script_uid, script_gid, max_fd); #endif if (!option_bool(OPT_DEBUG) && getuid() == 0) @@ -527,7 +733,7 @@ int main (int argc, char **argv) int bad_capabilities = 0; gid_t dummy; - /* remove all supplimentary groups */ + /* remove all supplementary groups */ if (gp && (setgroups(0, &dummy) == -1 || setgid(gp->gr_gid) == -1)) @@ -539,18 +745,9 @@ int main (int argc, char **argv) if (ent_pw && ent_pw->pw_uid != 0) { #if defined(HAVE_LINUX_NETWORK) - /* On linux, we keep CAP_NETADMIN (for ARP-injection) and - CAP_NET_RAW (for icmp) if we're doing dhcp. If we have yet to bind - ports because of DAD, or we're doing it dynamically, - we need CAP_NET_BIND_SERVICE too. */ - if (is_dad_listeners() || option_bool(OPT_CLEVERBIND)) - data->effective = data->permitted = data->inheritable = - (1 << CAP_NET_ADMIN) | (1 << CAP_NET_RAW) | - (1 << CAP_SETUID) | (1 << CAP_NET_BIND_SERVICE); - else - data->effective = data->permitted = data->inheritable = - (1 << CAP_NET_ADMIN) | (1 << CAP_NET_RAW) | (1 << CAP_SETUID); - + /* Need to be able to drop root. */ + data->effective |= (1 << CAP_SETUID); + data->permitted |= (1 << CAP_SETUID); /* Tell kernel to not clear capabilities when dropping root */ if (capset(hdr, data) == -1 || prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == -1) bad_capabilities = errno; @@ -591,15 +788,10 @@ int main (int argc, char **argv) } #ifdef HAVE_LINUX_NETWORK - if (is_dad_listeners() || option_bool(OPT_CLEVERBIND)) - data->effective = data->permitted = - (1 << CAP_NET_ADMIN) | (1 << CAP_NET_RAW) | (1 << CAP_NET_BIND_SERVICE); - else - data->effective = data->permitted = - (1 << CAP_NET_ADMIN) | (1 << CAP_NET_RAW); - data->inheritable = 0; + data->effective &= ~(1 << CAP_SETUID); + data->permitted &= ~(1 << CAP_SETUID); - /* lose the setuid and setgid capbilities */ + /* lose the setuid capability */ if (capset(hdr, data) == -1) { send_event(err_pipe[1], EVENT_CAP_ERR, errno, NULL); @@ -611,12 +803,14 @@ int main (int argc, char **argv) } #ifdef HAVE_LINUX_NETWORK + free(hdr); + free(data); if (option_bool(OPT_DEBUG)) prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); #endif #ifdef HAVE_TFTP - if (option_bool(OPT_TFTP)) + if (option_bool(OPT_TFTP)) { DIR *dir; struct tftp_prefix *p; @@ -625,32 +819,56 @@ int main (int argc, char **argv) { if (!((dir = opendir(daemon->tftp_prefix)))) { - send_event(err_pipe[1], EVENT_TFTP_ERR, errno, daemon->tftp_prefix); - _exit(0); + tftp_prefix_missing = 1; + if (!option_bool(OPT_TFTP_NO_FAIL)) + { + send_event(err_pipe[1], EVENT_TFTP_ERR, errno, daemon->tftp_prefix); + _exit(0); + } } - closedir(dir); + else + closedir(dir); } for (p = daemon->if_prefix; p; p = p->next) { + p->missing = 0; if (!((dir = opendir(p->prefix)))) - { - send_event(err_pipe[1], EVENT_TFTP_ERR, errno, p->prefix); - _exit(0); - } - closedir(dir); + { + p->missing = 1; + if (!option_bool(OPT_TFTP_NO_FAIL)) + { + send_event(err_pipe[1], EVENT_TFTP_ERR, errno, p->prefix); + _exit(0); + } + } + else + closedir(dir); } } #endif if (daemon->port == 0) my_syslog(LOG_INFO, _("started, version %s DNS disabled"), VERSION); - else if (daemon->cachesize != 0) - my_syslog(LOG_INFO, _("started, version %s cachesize %d"), VERSION, daemon->cachesize); - else - my_syslog(LOG_INFO, _("started, version %s cache disabled"), VERSION); + else + { + if (daemon->cachesize != 0) + { + my_syslog(LOG_INFO, _("started, version %s cachesize %d"), VERSION, daemon->cachesize); + if (daemon->cachesize > 10000) + my_syslog(LOG_WARNING, _("cache size greater than 10000 may cause performance issues, and is unlikely to be useful.")); + } + else + my_syslog(LOG_INFO, _("started, version %s cache disabled"), VERSION); + + if (option_bool(OPT_LOCAL_SERVICE)) + my_syslog(LOG_INFO, _("DNS service limited to local subnets")); + } my_syslog(LOG_INFO, _("compile time options: %s"), compile_opts); + + if (chown_warn != 0) + my_syslog(LOG_WARNING, "chown of PID file %s failed: %s", daemon->runfile, strerror(chown_warn)); #ifdef HAVE_DBUS if (option_bool(OPT_DBUS)) @@ -662,15 +880,46 @@ int main (int argc, char **argv) } #endif - if (option_bool(OPT_LOCAL_SERVICE)) - my_syslog(LOG_INFO, _("DNS service limited to local subnets")); - +#ifdef HAVE_UBUS + if (option_bool(OPT_UBUS)) + { + if (daemon->ubus) + my_syslog(LOG_INFO, _("UBus support enabled: connected to system bus")); + else + my_syslog(LOG_INFO, _("UBus support enabled: bus connection pending")); + } +#endif + #ifdef HAVE_DNSSEC if (option_bool(OPT_DNSSEC_VALID)) { - my_syslog(LOG_INFO, _("DNSSEC validation enabled")); - if (option_bool(OPT_DNSSEC_TIME)) - my_syslog(LOG_INFO, _("DNSSEC signature timestamps not checked until first cache reload")); + int rc; + struct ds_config *ds; + + /* Delay creating the timestamp file until here, after we've changed user, so that + it has the correct owner to allow updating the mtime later. + This means we have to report fatal errors via the pipe. */ + if ((rc = setup_timestamp()) == -1) + { + send_event(err_pipe[1], EVENT_TIME_ERR, errno, daemon->timestamp_file); + _exit(0); + } + + if (option_bool(OPT_DNSSEC_IGN_NS)) + my_syslog(LOG_INFO, _("DNSSEC validation enabled but all unsigned answers are trusted")); + else + my_syslog(LOG_INFO, _("DNSSEC validation enabled")); + + daemon->dnssec_no_time_check = option_bool(OPT_DNSSEC_TIME); + if (option_bool(OPT_DNSSEC_TIME) && !daemon->back_to_the_future) + my_syslog(LOG_INFO, _("DNSSEC signature timestamps not checked until receipt of SIGINT")); + + if (rc == 1) + my_syslog(LOG_INFO, _("DNSSEC signature timestamps not checked until system time valid")); + + for (ds = daemon->ds; ds; ds = ds->next) + my_syslog(LOG_INFO, _("configured with trust anchor for %s keytag %u"), + ds->name[0] == 0 ? "" : ds->name, ds->keytag); } #endif @@ -678,11 +927,15 @@ int main (int argc, char **argv) my_syslog(LOG_WARNING, _("warning: failed to change owner of %s: %s"), daemon->log_file, strerror(log_err)); +#ifndef HAVE_LINUX_NETWORK if (bind_fallback) my_syslog(LOG_WARNING, _("setting --bind-interfaces option because of OS limitations")); +#endif if (option_bool(OPT_NOWILD)) warn_bound_listeners(); + else if (!option_bool(OPT_CLEVERBIND)) + warn_wild_labels(); warn_int_names(); @@ -728,36 +981,44 @@ int main (int argc, char **argv) # ifdef HAVE_LINUX_NETWORK if (did_bind) my_syslog(MS_DHCP | LOG_INFO, _("DHCP, sockets bound exclusively to interface %s"), bound_device); + + if (netlink_warn) + my_syslog(LOG_WARNING, netlink_warn); # endif - /* after dhcp_contruct_contexts */ + /* after dhcp_construct_contexts */ if (daemon->dhcp || daemon->doing_dhcp6) lease_find_interfaces(now); #endif #ifdef HAVE_TFTP - if (option_bool(OPT_TFTP)) + if (option_bool(OPT_TFTP)) { -#ifdef FD_SETSIZE - if (FD_SETSIZE < (unsigned)max_fd) - max_fd = FD_SETSIZE; -#endif + struct tftp_prefix *p; - my_syslog(MS_TFTP | LOG_INFO, "TFTP %s%s %s", + my_syslog(MS_TFTP | LOG_INFO, "TFTP %s%s %s %s", daemon->tftp_prefix ? _("root is ") : _("enabled"), - daemon->tftp_prefix ? daemon->tftp_prefix: "", - option_bool(OPT_TFTP_SECURE) ? _("secure mode") : ""); - + daemon->tftp_prefix ? daemon->tftp_prefix : "", + option_bool(OPT_TFTP_SECURE) ? _("secure mode") : "", + option_bool(OPT_SINGLE_PORT) ? _("single port mode") : ""); + + if (tftp_prefix_missing) + my_syslog(MS_TFTP | LOG_WARNING, _("warning: %s inaccessible"), daemon->tftp_prefix); + + for (p = daemon->if_prefix; p; p = p->next) + if (p->missing) + my_syslog(MS_TFTP | LOG_WARNING, _("warning: TFTP directory %s inaccessible"), p->prefix); + /* This is a guess, it assumes that for small limits, disjoint files might be served, but for large limits, a single file will be sent to may clients (the file only needs one fd). */ - max_fd -= 30; /* use other than TFTP */ + max_fd -= 30 + daemon->numrrand; /* use other than TFTP */ if (max_fd < 0) max_fd = 5; - else if (max_fd < 100) + else if (max_fd < 100 && !option_bool(OPT_SINGLE_PORT)) max_fd = max_fd/2; else max_fd = max_fd - 20; @@ -783,124 +1044,133 @@ int main (int argc, char **argv) close(err_pipe[1]); if (daemon->port != 0) - check_servers(); + check_servers(0); pid = getpid(); + + daemon->pipe_to_parent = -1; + for (i = 0; i < MAX_PROCS; i++) + daemon->tcp_pipes[i] = -1; +#ifdef HAVE_INOTIFY + /* Using inotify, have to select a resolv file at startup */ + poll_resolv(1, 0, now); +#endif + while (1) { - int maxfd = -1; - struct timeval t, *tp = NULL; - fd_set rset, wset, eset; + int timeout = fast_retry(now); - FD_ZERO(&rset); - FD_ZERO(&wset); - FD_ZERO(&eset); + poll_reset(); - /* if we are out of resources, find how long we have to wait - for some to come free, we'll loop around then and restart - listening for queries */ - if ((t.tv_sec = set_dns_listeners(now, &rset, &maxfd)) != 0) - { - t.tv_usec = 0; - tp = &t; - } - /* Whilst polling for the dbus, or doing a tftp transfer, wake every quarter second */ - if (daemon->tftp_trans || - (option_bool(OPT_DBUS) && !daemon->dbus)) - { - t.tv_sec = 0; - t.tv_usec = 250000; - tp = &t; - } + if ((daemon->tftp_trans || (option_bool(OPT_DBUS) && !daemon->dbus)) && + (timeout == -1 || timeout > 250)) + timeout = 250; + /* Wake every second whilst waiting for DAD to complete */ - else if (is_dad_listeners()) - { - t.tv_sec = 1; - t.tv_usec = 0; - tp = &t; - } + else if (is_dad_listeners() && + (timeout == -1 || timeout > 1000)) + timeout = 1000; + + set_dns_listeners(); #ifdef HAVE_DBUS - set_dbus_listeners(&maxfd, &rset, &wset, &eset); -#endif - + if (option_bool(OPT_DBUS)) + set_dbus_listeners(); +#endif + +#ifdef HAVE_UBUS + if (option_bool(OPT_UBUS)) + set_ubus_listeners(); +#endif + #ifdef HAVE_DHCP - if (daemon->dhcp || daemon->relay4) +# if defined(HAVE_LINUX_NETWORK) + if (bind_dhcp_devices(bound_device) & 2) { - FD_SET(daemon->dhcpfd, &rset); - bump_maxfd(daemon->dhcpfd, &maxfd); - if (daemon->pxefd != -1) + static int warned = 0; + if (!warned) { - FD_SET(daemon->pxefd, &rset); - bump_maxfd(daemon->pxefd, &maxfd); + my_syslog(LOG_ERR, _("error binding DHCP socket to device %s"), bound_device); + warned = 1; } } +# endif + if (daemon->dhcp || daemon->relay4) + { + poll_listen(daemon->dhcpfd, POLLIN); + if (daemon->pxefd != -1) + poll_listen(daemon->pxefd, POLLIN); + } #endif #ifdef HAVE_DHCP6 if (daemon->doing_dhcp6 || daemon->relay6) - { - FD_SET(daemon->dhcp6fd, &rset); - bump_maxfd(daemon->dhcp6fd, &maxfd); - } - + poll_listen(daemon->dhcp6fd, POLLIN); + if (daemon->doing_ra) - { - FD_SET(daemon->icmp6fd, &rset); - bump_maxfd(daemon->icmp6fd, &maxfd); - } + poll_listen(daemon->icmp6fd, POLLIN); #endif + +#ifdef HAVE_INOTIFY + if (daemon->inotifyfd != -1) + poll_listen(daemon->inotifyfd, POLLIN); +#endif #if defined(HAVE_LINUX_NETWORK) - FD_SET(daemon->netlinkfd, &rset); - bump_maxfd(daemon->netlinkfd, &maxfd); + poll_listen(daemon->netlinkfd, POLLIN); #elif defined(HAVE_BSD_NETWORK) - FD_SET(daemon->routefd, &rset); - bump_maxfd(daemon->routefd, &maxfd); + poll_listen(daemon->routefd, POLLIN); #endif + + poll_listen(piperead, POLLIN); - FD_SET(piperead, &rset); - bump_maxfd(piperead, &maxfd); +#ifdef HAVE_SCRIPT +# ifdef HAVE_DHCP + while (helper_buf_empty() && do_script_run(now)); +# endif -#ifdef HAVE_DHCP -# ifdef HAVE_SCRIPT - while (helper_buf_empty() && do_script_run(now)); + /* Refresh cache */ + if (option_bool(OPT_SCRIPT_ARP)) + find_mac(NULL, NULL, 0, now); + while (helper_buf_empty() && do_arp_script_run()); # ifdef HAVE_TFTP while (helper_buf_empty() && do_tftp_script_run()); # endif +# ifdef HAVE_DHCP6 + while (helper_buf_empty() && do_snoop_script_run()); +# endif + if (!helper_buf_empty()) - { - FD_SET(daemon->helperfd, &wset); - bump_maxfd(daemon->helperfd, &maxfd); - } -# else + poll_listen(daemon->helperfd, POLLOUT); +#else /* need this for other side-effects */ +# ifdef HAVE_DHCP while (do_script_run(now)); +# endif + while (do_arp_script_run()); + # ifdef HAVE_TFTP while (do_tftp_script_run()); # endif -# endif #endif + - /* must do this just before select(), when we know no + /* must do this just before do_poll(), when we know no more calls to my_syslog() can occur */ - set_log_writer(&wset, &maxfd); + set_log_writer(); - if (select(maxfd+1, &rset, &wset, &eset, tp) < 0) - { - /* otherwise undefined after error */ - FD_ZERO(&rset); FD_ZERO(&wset); FD_ZERO(&eset); - } - + if (do_poll(timeout) < 0) + continue; + now = dnsmasq_time(); - check_log_writer(&wset); + check_log_writer(0); /* prime. */ enumerate_interfaces(1); @@ -916,13 +1186,20 @@ int main (int argc, char **argv) } #if defined(HAVE_LINUX_NETWORK) - if (FD_ISSET(daemon->netlinkfd, &rset)) - netlink_multicast(now); + if (poll_check(daemon->netlinkfd, POLLIN)) + netlink_multicast(); #elif defined(HAVE_BSD_NETWORK) - if (FD_ISSET(daemon->routefd, &rset)) - route_sock(now); + if (poll_check(daemon->routefd, POLLIN)) + route_sock(); #endif +#ifdef HAVE_INOTIFY + if (daemon->inotifyfd != -1 && poll_check(daemon->inotifyfd, POLLIN) && inotify_check(now)) + { + if (daemon->port != 0 && !option_bool(OPT_NO_POLL)) + poll_resolv(1, 1, now); + } +#else /* Check for changes to resolv files once per second max. */ /* Don't go silent for long periods if the clock goes backwards. */ if (daemon->last_resolv == 0 || @@ -935,48 +1212,78 @@ int main (int argc, char **argv) poll_resolv(0, daemon->last_resolv != 0, now); daemon->last_resolv = now; } - - if (FD_ISSET(piperead, &rset)) +#endif + + if (poll_check(piperead, POLLIN)) async_event(piperead, now); #ifdef HAVE_DBUS /* if we didn't create a DBus connection, retry now. */ - if (option_bool(OPT_DBUS) && !daemon->dbus) + if (option_bool(OPT_DBUS)) { - char *err; - if ((err = dbus_init())) - my_syslog(LOG_WARNING, _("DBus error: %s"), err); - if (daemon->dbus) - my_syslog(LOG_INFO, _("connected to system DBus")); + if (!daemon->dbus) + { + char *err = dbus_init(); + + if (daemon->dbus) + my_syslog(LOG_INFO, _("connected to system DBus")); + else if (err) + { + my_syslog(LOG_ERR, _("DBus error: %s"), err); + reset_option_bool(OPT_DBUS); /* fatal error, stop trying. */ + } + } + + check_dbus_listeners(); } - check_dbus_listeners(&rset, &wset, &eset); #endif - - check_dns_listeners(&rset, now); +#ifdef HAVE_UBUS + /* if we didn't create a UBus connection, retry now. */ + if (option_bool(OPT_UBUS)) + { + if (!daemon->ubus) + { + char *err = ubus_init(); + + if (daemon->ubus) + my_syslog(LOG_INFO, _("connected to system UBus")); + else if (err) + { + my_syslog(LOG_ERR, _("UBus error: %s"), err); + reset_option_bool(OPT_UBUS); /* fatal error, stop trying. */ + } + } + + check_ubus_listeners(); + } +#endif + + check_dns_listeners(now); + #ifdef HAVE_TFTP - check_tftp_listeners(&rset, now); + check_tftp_listeners(now); #endif #ifdef HAVE_DHCP if (daemon->dhcp || daemon->relay4) { - if (FD_ISSET(daemon->dhcpfd, &rset)) + if (poll_check(daemon->dhcpfd, POLLIN)) dhcp_packet(now, 0); - if (daemon->pxefd != -1 && FD_ISSET(daemon->pxefd, &rset)) + if (daemon->pxefd != -1 && poll_check(daemon->pxefd, POLLIN)) dhcp_packet(now, 1); } #ifdef HAVE_DHCP6 - if ((daemon->doing_dhcp6 || daemon->relay6) && FD_ISSET(daemon->dhcp6fd, &rset)) + if ((daemon->doing_dhcp6 || daemon->relay6) && poll_check(daemon->dhcp6fd, POLLIN)) dhcp6_packet(now); - if (daemon->doing_ra && FD_ISSET(daemon->icmp6fd, &rset)) + if (daemon->doing_ra && poll_check(daemon->icmp6fd, POLLIN)) icmp6_packet(now); #endif # ifdef HAVE_SCRIPT - if (daemon->helperfd != -1 && FD_ISSET(daemon->helperfd, &wset)) + if (daemon->helperfd != -1 && poll_check(daemon->helperfd, POLLOUT)) helper_write(); # endif #endif @@ -990,7 +1297,7 @@ static void sig_handler(int sig) { /* ignore anything other than TERM during startup and in helper proc. (helper ignore TERM too) */ - if (sig == SIGTERM) + if (sig == SIGTERM || sig == SIGINT) exit(EC_MISC); } else if (pid != getpid()) @@ -1016,6 +1323,15 @@ static void sig_handler(int sig) event = EVENT_DUMP; else if (sig == SIGUSR2) event = EVENT_REOPEN; + else if (sig == SIGINT) + { + /* Handle SIGINT normally in debug mode, so + ctrl-c continues to operate. */ + if (option_bool(OPT_DEBUG)) + exit(EC_MISC); + else + event = EVENT_TIME; + } else return; @@ -1037,6 +1353,11 @@ void send_alarm(time_t event, time_t now) } } +void queue_event(int event) +{ + send_event(pipewrite, event, 0, NULL); +} + void send_event(int fd, int event, int data, char *msg) { struct event_desc ev; @@ -1093,30 +1414,42 @@ static void fatal_event(struct event_desc *ev, char *m case EVENT_FORK_ERR: die(_("cannot fork into background: %s"), NULL, EC_MISC); - + + /* fall through */ case EVENT_PIPE_ERR: die(_("failed to create helper: %s"), NULL, EC_MISC); - + + /* fall through */ case EVENT_CAP_ERR: die(_("setting capabilities failed: %s"), NULL, EC_MISC); + /* fall through */ case EVENT_USER_ERR: die(_("failed to change user-id to %s: %s"), msg, EC_MISC); + /* fall through */ case EVENT_GROUP_ERR: die(_("failed to change group-id to %s: %s"), msg, EC_MISC); - + + /* fall through */ case EVENT_PIDFILE: die(_("failed to open pidfile %s: %s"), msg, EC_FILE); + /* fall through */ case EVENT_LOG_ERR: die(_("cannot open log %s: %s"), msg, EC_FILE); - + + /* fall through */ case EVENT_LUA_ERR: die(_("failed to load Lua script: %s"), msg, EC_MISC); + /* fall through */ case EVENT_TFTP_ERR: die(_("TFTP directory %s inaccessible: %s"), msg, EC_FILE); + + /* fall through */ + case EVENT_TIME_ERR: + die(_("cannot create timestamp file %s: %s" ), msg, EC_BADCONF); } } @@ -1134,13 +1467,8 @@ static void async_event(int pipe, time_t now) switch (ev.event) { case EVENT_RELOAD: -#ifdef HAVE_DNSSEC - if (option_bool(OPT_DNSSEC_VALID) && option_bool(OPT_DNSSEC_TIME)) - { - my_syslog(LOG_INFO, _("now checking DNSSEC signature timestamps")); - reset_option_bool(OPT_DNSSEC_TIME); - } -#endif + daemon->soa_sn++; /* Bump zone serial, as it may have changed. */ + /* fall through */ case EVENT_INIT: @@ -1161,7 +1489,7 @@ static void async_event(int pipe, time_t now) } if (check) - check_servers(); + check_servers(0); } #ifdef HAVE_DHCP @@ -1203,6 +1531,7 @@ static void async_event(int pipe, time_t now) daemon->tcp_pids[i] = 0; break; +#if defined(HAVE_SCRIPT) case EVENT_KILLED: my_syslog(LOG_WARNING, _("script process killed by signal %d"), ev.data); break; @@ -1216,12 +1545,19 @@ static void async_event(int pipe, time_t now) daemon->lease_change_command, strerror(ev.data)); break; + case EVENT_SCRIPT_LOG: + my_syslog(MS_SCRIPT | LOG_DEBUG, "%s", msg ? msg : ""); + free(msg); + msg = NULL; + break; + /* necessary for fatal errors in helper */ case EVENT_USER_ERR: case EVENT_DIE: case EVENT_LUA_ERR: fatal_event(&ev, msg); break; +#endif case EVENT_REOPEN: /* Note: this may leave TCP-handling processes with the old file still open. @@ -1230,6 +1566,27 @@ static void async_event(int pipe, time_t now) if (daemon->log_file != NULL) log_reopen(daemon->log_file); break; + + case EVENT_NEWADDR: + newaddress(now); + break; + + case EVENT_NEWROUTE: + resend_query(); + /* Force re-reading resolv file right now, for luck. */ + poll_resolv(0, 1, now); + break; + + case EVENT_TIME: +#ifdef HAVE_DNSSEC + if (daemon->dnssec_no_time_check && option_bool(OPT_DNSSEC_VALID) && option_bool(OPT_DNSSEC_TIME)) + { + my_syslog(LOG_INFO, _("now checking DNSSEC signature timestamps")); + daemon->dnssec_no_time_check = 0; + clear_cache_and_reload(now); + } +#endif + break; case EVENT_TERM: /* Knock all our children on the head. */ @@ -1237,13 +1594,13 @@ static void async_event(int pipe, time_t now) if (daemon->tcp_pids[i] != 0) kill(daemon->tcp_pids[i], SIGALRM); -#if defined(HAVE_SCRIPT) +#if defined(HAVE_SCRIPT) && defined(HAVE_DHCP) /* handle pending lease transitions */ if (daemon->helperfd != -1) { /* block in writes until all done */ if ((i = fcntl(daemon->helperfd, F_GETFL)) != -1) - fcntl(daemon->helperfd, F_SETFL, i & ~O_NONBLOCK); + while(retry_send(fcntl(daemon->helperfd, F_SETFL, i & ~O_NONBLOCK))); do { helper_write(); } while (!helper_buf_empty() || do_script_run(now)); @@ -1254,8 +1611,22 @@ static void async_event(int pipe, time_t now) if (daemon->lease_stream) fclose(daemon->lease_stream); +#ifdef HAVE_DNSSEC + /* update timestamp file on TERM if time is considered valid */ + if (daemon->back_to_the_future) + { + if (utimes(daemon->timestamp_file, NULL) == -1) + my_syslog(LOG_ERR, _("failed to update mtime on %s: %s"), daemon->timestamp_file, strerror(errno)); + } +#endif + if (daemon->runfile) unlink(daemon->runfile); + +#ifdef HAVE_DUMPFILE + if (daemon->dumpfd != -1) + close(daemon->dumpfd); +#endif my_syslog(LOG_INFO, _("exiting on receipt of SIGTERM")); flush_log(); @@ -1263,7 +1634,7 @@ static void async_event(int pipe, time_t now) } } -void poll_resolv(int force, int do_reload, time_t now) +static void poll_resolv(int force, int do_reload, time_t now) { struct resolvc *res, *latest; struct stat statbuf; @@ -1299,9 +1670,10 @@ void poll_resolv(int force, int do_reload, time_t now) else { res->logged = 0; - if (force || (statbuf.st_mtime != res->mtime)) + if (force || (statbuf.st_mtime != res->mtime || statbuf.st_ino != res->ino)) { res->mtime = statbuf.st_mtime; + res->ino = statbuf.st_ino; if (difftime(statbuf.st_mtime, last_change) > 0.0) { last_change = statbuf.st_mtime; @@ -1317,12 +1689,17 @@ void poll_resolv(int force, int do_reload, time_t now) { my_syslog(LOG_INFO, _("reading %s"), latest->name); warned = 0; - check_servers(); + check_servers(0); if (option_bool(OPT_RELOAD) && do_reload) clear_cache_and_reload(now); } else { + /* If we're delaying things, we don't call check_servers(), but + reload_servers() may have deleted some servers, rendering the server_array + invalid, so just rebuild that here. Once reload_servers() succeeds, + we call check_servers() above, which calls build_server_array itself. */ + build_server_array(); latest->mtime = 0; if (!warned) { @@ -1360,101 +1737,124 @@ void clear_cache_and_reload(time_t now) #endif } -static int set_dns_listeners(time_t now, fd_set *set, int *maxfdp) +static void set_dns_listeners(void) { struct serverfd *serverfdp; struct listener *listener; - int wait = 0, i; + struct randfd_list *rfl; + int i; #ifdef HAVE_TFTP int tftp = 0; struct tftp_transfer *transfer; - for (transfer = daemon->tftp_trans; transfer; transfer = transfer->next) - { - tftp++; - FD_SET(transfer->sockfd, set); - bump_maxfd(transfer->sockfd, maxfdp); - } + if (!option_bool(OPT_SINGLE_PORT)) + for (transfer = daemon->tftp_trans; transfer; transfer = transfer->next) + { + tftp++; + poll_listen(transfer->sockfd, POLLIN); + } #endif - /* will we be able to get memory? */ - if (daemon->port != 0) - get_new_frec(now, &wait, 0); - for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next) - { - FD_SET(serverfdp->fd, set); - bump_maxfd(serverfdp->fd, maxfdp); - } + poll_listen(serverfdp->fd, POLLIN); + + for (i = 0; i < daemon->numrrand; i++) + if (daemon->randomsocks[i].refcount != 0) + poll_listen(daemon->randomsocks[i].fd, POLLIN); - if (daemon->port != 0 && !daemon->osport) - for (i = 0; i < RANDOM_SOCKS; i++) - if (daemon->randomsocks[i].refcount != 0) - { - FD_SET(daemon->randomsocks[i].fd, set); - bump_maxfd(daemon->randomsocks[i].fd, maxfdp); - } + /* Check overflow random sockets too. */ + for (rfl = daemon->rfl_poll; rfl; rfl = rfl->next) + poll_listen(rfl->rfd->fd, POLLIN); + /* check to see if we have free tcp process slots. */ + for (i = MAX_PROCS - 1; i >= 0; i--) + if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1) + break; + for (listener = daemon->listeners; listener; listener = listener->next) { - /* only listen for queries if we have resources */ - if (listener->fd != -1 && wait == 0) - { - FD_SET(listener->fd, set); - bump_maxfd(listener->fd, maxfdp); - } - - /* death of a child goes through the select loop, so - we don't need to explicitly arrange to wake up here */ - if (listener->tcpfd != -1) - for (i = 0; i < MAX_PROCS; i++) - if (daemon->tcp_pids[i] == 0) - { - FD_SET(listener->tcpfd, set); - bump_maxfd(listener->tcpfd, maxfdp); - break; - } - + if (listener->fd != -1) + poll_listen(listener->fd, POLLIN); + + /* Only listen for TCP connections when a process slot + is available. Death of a child goes through the select loop, so + we don't need to explicitly arrange to wake up here, + we'll be called again when a slot becomes available. */ + if (listener->tcpfd != -1 && i >= 0) + poll_listen(listener->tcpfd, POLLIN); + #ifdef HAVE_TFTP + /* tftp == 0 in single-port mode. */ if (tftp <= daemon->tftp_max && listener->tftpfd != -1) - { - FD_SET(listener->tftpfd, set); - bump_maxfd(listener->tftpfd, maxfdp); - } + poll_listen(listener->tftpfd, POLLIN); #endif - } - return wait; + if (!option_bool(OPT_DEBUG)) + for (i = 0; i < MAX_PROCS; i++) + if (daemon->tcp_pipes[i] != -1) + poll_listen(daemon->tcp_pipes[i], POLLIN); } -static void check_dns_listeners(fd_set *set, time_t now) +static void check_dns_listeners(time_t now) { struct serverfd *serverfdp; struct listener *listener; + struct randfd_list *rfl; int i; - + int pipefd[2]; + for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next) - if (FD_ISSET(serverfdp->fd, set)) - reply_query(serverfdp->fd, serverfdp->source_addr.sa.sa_family, now); + if (poll_check(serverfdp->fd, POLLIN)) + reply_query(serverfdp->fd, now); - if (daemon->port != 0 && !daemon->osport) - for (i = 0; i < RANDOM_SOCKS; i++) - if (daemon->randomsocks[i].refcount != 0 && - FD_ISSET(daemon->randomsocks[i].fd, set)) - reply_query(daemon->randomsocks[i].fd, daemon->randomsocks[i].family, now); - + for (i = 0; i < daemon->numrrand; i++) + if (daemon->randomsocks[i].refcount != 0 && + poll_check(daemon->randomsocks[i].fd, POLLIN)) + reply_query(daemon->randomsocks[i].fd, now); + + /* Check overflow random sockets too. */ + for (rfl = daemon->rfl_poll; rfl; rfl = rfl->next) + if (poll_check(rfl->rfd->fd, POLLIN)) + reply_query(rfl->rfd->fd, now); + + /* Races. The child process can die before we read all of the data from the + pipe, or vice versa. Therefore send tcp_pids to zero when we wait() the + process, and tcp_pipes to -1 and close the FD when we read the last + of the data - indicated by cache_recv_insert returning zero. + The order of these events is indeterminate, and both are needed + to free the process slot. Once the child process has gone, poll() + returns POLLHUP, not POLLIN, so have to check for both here. */ + if (!option_bool(OPT_DEBUG)) + for (i = 0; i < MAX_PROCS; i++) + if (daemon->tcp_pipes[i] != -1 && + poll_check(daemon->tcp_pipes[i], POLLIN | POLLHUP) && + !cache_recv_insert(now, daemon->tcp_pipes[i])) + { + close(daemon->tcp_pipes[i]); + daemon->tcp_pipes[i] = -1; + } + for (listener = daemon->listeners; listener; listener = listener->next) { - if (listener->fd != -1 && FD_ISSET(listener->fd, set)) + if (listener->fd != -1 && poll_check(listener->fd, POLLIN)) receive_query(listener, now); #ifdef HAVE_TFTP - if (listener->tftpfd != -1 && FD_ISSET(listener->tftpfd, set)) + if (listener->tftpfd != -1 && poll_check(listener->tftpfd, POLLIN)) tftp_request(listener, now); #endif - if (listener->tcpfd != -1 && FD_ISSET(listener->tcpfd, set)) + /* check to see if we have a free tcp process slot. + Note that we can't assume that because we had + at least one a poll() time, that we still do. + There may be more waiting connections after + poll() returns then free process slots. */ + for (i = MAX_PROCS - 1; i >= 0; i--) + if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1) + break; + + if (listener->tcpfd != -1 && i >= 0 && poll_check(listener->tcpfd, POLLIN)) { int confd, client_ok = 1; struct irec *iface = NULL; @@ -1497,15 +1897,16 @@ static void check_dns_listeners(fd_set *set, time_t no if ((if_index = tcp_interface(confd, tcp_addr.sa.sa_family)) != 0 && indextoname(listener->tcpfd, if_index, intr_name)) { - struct all_addr addr; - addr.addr.addr4 = tcp_addr.in.sin_addr; -#ifdef HAVE_IPV6 + union all_addr addr; + if (tcp_addr.sa.sa_family == AF_INET6) - addr.addr.addr6 = tcp_addr.in6.sin6_addr; -#endif + addr.addr6 = tcp_addr.in6.sin6_addr; + else + addr.addr4 = tcp_addr.in.sin_addr; for (iface = daemon->interfaces; iface; iface = iface->next) - if (iface->index == if_index) + if (iface->index == if_index && + iface->addr.sa.sa_family == tcp_addr.sa.sa_family) break; if (!iface && !loopback_exception(listener->tcpfd, tcp_addr.sa.sa_family, &addr, intr_name)) @@ -1536,22 +1937,41 @@ static void check_dns_listeners(fd_set *set, time_t no shutdown(confd, SHUT_RDWR); close(confd); } -#ifndef NO_FORK - else if (!option_bool(OPT_DEBUG) && (p = fork()) != 0) + else if (!option_bool(OPT_DEBUG) && pipe(pipefd) == 0 && (p = fork()) != 0) { - if (p != -1) + close(pipefd[1]); /* parent needs read pipe end. */ + if (p == -1) + close(pipefd[0]); + else { - int i; - for (i = 0; i < MAX_PROCS; i++) - if (daemon->tcp_pids[i] == 0) - { - daemon->tcp_pids[i] = p; - break; - } +#ifdef HAVE_LINUX_NETWORK + /* The child process inherits the netlink socket, + which it never uses, but when the parent (us) + uses it in the future, the answer may go to the + child, resulting in the parent blocking + forever awaiting the result. To avoid this + the child closes the netlink socket, but there's + a nasty race, since the parent may use netlink + before the child has done the close. + + To avoid this, the parent blocks here until a + single byte comes back up the pipe, which + is sent by the child after it has closed the + netlink socket. */ + + unsigned char a; + read_write(pipefd[0], &a, 1, 1); +#endif + + /* i holds index of free slot */ + daemon->tcp_pids[i] = p; + daemon->tcp_pipes[i] = pipefd[0]; } close(confd); + + /* The child can use up to TCP_MAX_QUERIES ids, so skip that many. */ + daemon->log_id += TCP_MAX_QUERIES; } -#endif else { unsigned char *buff; @@ -1559,7 +1979,7 @@ static void check_dns_listeners(fd_set *set, time_t no int flags; struct in_addr netmask; int auth_dns; - + if (iface) { netmask = iface->netmask; @@ -1571,13 +1991,22 @@ static void check_dns_listeners(fd_set *set, time_t no auth_dns = 0; } -#ifndef NO_FORK - /* Arrange for SIGALARM after CHILD_LIFETIME seconds to + /* Arrange for SIGALRM after CHILD_LIFETIME seconds to terminate the process. */ if (!option_bool(OPT_DEBUG)) - alarm(CHILD_LIFETIME); -#endif + { +#ifdef HAVE_LINUX_NETWORK + /* See comment above re: netlink socket. */ + unsigned char a = 0; + close(daemon->netlinkfd); + read_write(pipefd[1], &a, 1, 0); +#endif + alarm(CHILD_LIFETIME); + close(pipefd[0]); /* close read end in child. */ + daemon->pipe_to_parent = pipefd[1]; + } + /* start with no upstream connections. */ for (s = daemon->servers; s; s = s->next) s->tcpfd = -1; @@ -1586,13 +2015,10 @@ static void check_dns_listeners(fd_set *set, time_t no attribute from the listening socket. Reset that here. */ if ((flags = fcntl(confd, F_GETFL, 0)) != -1) - fcntl(confd, F_SETFL, flags & ~O_NONBLOCK); + while(retry_send(fcntl(confd, F_SETFL, flags & ~O_NONBLOCK))); buff = tcp_request(confd, now, &tcp_addr, netmask, auth_dns); - shutdown(confd, SHUT_RDWR); - close(confd); - if (buff) free(buff); @@ -1602,13 +2028,13 @@ static void check_dns_listeners(fd_set *set, time_t no shutdown(s->tcpfd, SHUT_RDWR); close(s->tcpfd); } -#ifndef NO_FORK + if (!option_bool(OPT_DEBUG)) { + close(daemon->pipe_to_parent); flush_log(); _exit(0); } -#endif } } } @@ -1637,11 +2063,6 @@ int icmp_ping(struct in_addr addr) { /* Try and get an ICMP echo from a machine. */ - /* Note that whilst in the three second wait, we check for - (and service) events on the DNS and TFTP sockets, (so doing that - better not use any resources our caller has in use...) - but we remain deaf to signals or further DHCP packets. */ - int fd; struct sockaddr_in saddr; struct { @@ -1651,7 +2072,6 @@ int icmp_ping(struct in_addr addr) unsigned short id = rand16(); unsigned int i, j; int gotreply = 0; - time_t start, now; #if defined(HAVE_LINUX_NETWORK) || defined (HAVE_SOLARIS_NETWORK) if ((fd = make_icmp_sock()) == -1) @@ -1678,78 +2098,97 @@ int icmp_ping(struct in_addr addr) j = (j & 0xffff) + (j >> 16); packet.icmp.icmp_cksum = (j == 0xffff) ? j : ~j; - while (sendto(fd, (char *)&packet.icmp, sizeof(struct icmp), 0, - (struct sockaddr *)&saddr, sizeof(saddr)) == -1 && - retry_send()); + while (retry_send(sendto(fd, (char *)&packet.icmp, sizeof(struct icmp), 0, + (struct sockaddr *)&saddr, sizeof(saddr)))); - for (now = start = dnsmasq_time(); - difftime(now, start) < (float)PING_WAIT;) + gotreply = delay_dhcp(dnsmasq_time(), PING_WAIT, fd, addr.s_addr, id); + +#if defined(HAVE_LINUX_NETWORK) || defined(HAVE_SOLARIS_NETWORK) + close(fd); +#else + opt = 1; + setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &opt, sizeof(opt)); +#endif + + return gotreply; +} + +int delay_dhcp(time_t start, int sec, int fd, uint32_t addr, unsigned short id) +{ + /* Delay processing DHCP packets for "sec" seconds counting from "start". + If "fd" is not -1 it will stop waiting if an ICMP echo reply is received + from "addr" with ICMP ID "id" and return 1 */ + + /* Note that whilst waiting, we check for + (and service) events on the DNS and TFTP sockets, (so doing that + better not use any resources our caller has in use...) + but we remain deaf to signals or further DHCP packets. */ + + /* There can be a problem using dnsmasq_time() to end the loop, since + it's not monotonic, and can go backwards if the system clock is + tweaked, leading to the code getting stuck in this loop and + ignoring DHCP requests. To fix this, we check to see if select returned + as a result of a timeout rather than a socket becoming available. We + only allow this to happen as many times as it takes to get to the wait time + in quarter-second chunks. This provides a fallback way to end loop. */ + + int rc, timeout_count; + time_t now; + + for (now = dnsmasq_time(), timeout_count = 0; + (difftime(now, start) <= (float)sec) && (timeout_count < sec * 4);) { - struct timeval tv; - fd_set rset, wset; - struct sockaddr_in faddr; - int maxfd = fd; - socklen_t len = sizeof(faddr); + poll_reset(); + if (fd != -1) + poll_listen(fd, POLLIN); + set_dns_listeners(); + set_log_writer(); - tv.tv_usec = 250000; - tv.tv_sec = 0; - - FD_ZERO(&rset); - FD_ZERO(&wset); - FD_SET(fd, &rset); - set_dns_listeners(now, &rset, &maxfd); - set_log_writer(&wset, &maxfd); - #ifdef HAVE_DHCP6 if (daemon->doing_ra) - { - FD_SET(daemon->icmp6fd, &rset); - bump_maxfd(daemon->icmp6fd, &maxfd); - } + poll_listen(daemon->icmp6fd, POLLIN); #endif - if (select(maxfd+1, &rset, &wset, NULL, &tv) < 0) - { - FD_ZERO(&rset); - FD_ZERO(&wset); - } + rc = do_poll(250); + + if (rc < 0) + continue; + else if (rc == 0) + timeout_count++; now = dnsmasq_time(); - - check_log_writer(&wset); - check_dns_listeners(&rset, now); - + + check_log_writer(0); + check_dns_listeners(now); + #ifdef HAVE_DHCP6 - if (daemon->doing_ra && FD_ISSET(daemon->icmp6fd, &rset)) + if (daemon->doing_ra && poll_check(daemon->icmp6fd, POLLIN)) icmp6_packet(now); #endif #ifdef HAVE_TFTP - check_tftp_listeners(&rset, now); + check_tftp_listeners(now); #endif - if (FD_ISSET(fd, &rset) && - recvfrom(fd, &packet, sizeof(packet), 0, - (struct sockaddr *)&faddr, &len) == sizeof(packet) && - saddr.sin_addr.s_addr == faddr.sin_addr.s_addr && - packet.icmp.icmp_type == ICMP_ECHOREPLY && - packet.icmp.icmp_seq == 0 && - packet.icmp.icmp_id == id) - { - gotreply = 1; - break; + if (fd != -1) + { + struct { + struct ip ip; + struct icmp icmp; + } packet; + struct sockaddr_in faddr; + socklen_t len = sizeof(faddr); + + if (poll_check(fd, POLLIN) && + recvfrom(fd, &packet, sizeof(packet), 0, (struct sockaddr *)&faddr, &len) == sizeof(packet) && + addr == faddr.sin_addr.s_addr && + packet.icmp.icmp_type == ICMP_ECHOREPLY && + packet.icmp.icmp_seq == 0 && + packet.icmp.icmp_id == id) + return 1; } } - -#if defined(HAVE_LINUX_NETWORK) || defined(HAVE_SOLARIS_NETWORK) - close(fd); -#else - opt = 1; - setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &opt, sizeof(opt)); -#endif - return gotreply; + return 0; } -#endif - - +#endif /* HAVE_DHCP */