1: /* dnsmasq is Copyright (c) 2000-2022 Simon Kelley
2:
3: This program is free software; you can redistribute it and/or modify
4: it under the terms of the GNU General Public License as published by
5: the Free Software Foundation; version 2 dated June, 1991, or
6: (at your option) version 3 dated 29 June, 2007.
7:
8: This program is distributed in the hope that it will be useful,
9: but WITHOUT ANY WARRANTY; without even the implied warranty of
10: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11: GNU General Public License for more details.
12:
13: You should have received a copy of the GNU General Public License
14: along with this program. If not, see <http://www.gnu.org/licenses/>.
15: */
16:
17: #include "dnsmasq.h"
18:
19: static struct frec *get_new_frec(time_t now, struct server *serv, int force);
20: static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp);
21: static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigned int flagmask);
22: #ifdef HAVE_DNSSEC
23: static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header);
24: #endif
25:
26: static unsigned short get_id(void);
27: static void free_frec(struct frec *f);
28: static void query_full(time_t now, char *domain);
29:
30: static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status);
31:
32: /* Send a UDP packet with its source address set as "source"
33: unless nowild is true, when we just send it with the kernel default */
34: int send_from(int fd, int nowild, char *packet, size_t len,
35: union mysockaddr *to, union all_addr *source,
36: unsigned int iface)
37: {
38: struct msghdr msg;
39: struct iovec iov[1];
40: union {
41: struct cmsghdr align; /* this ensures alignment */
42: #if defined(HAVE_LINUX_NETWORK)
43: char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
44: #elif defined(IP_SENDSRCADDR)
45: char control[CMSG_SPACE(sizeof(struct in_addr))];
46: #endif
47: char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
48: } control_u;
49:
50: iov[0].iov_base = packet;
51: iov[0].iov_len = len;
52:
53: msg.msg_control = NULL;
54: msg.msg_controllen = 0;
55: msg.msg_flags = 0;
56: msg.msg_name = to;
57: msg.msg_namelen = sa_len(to);
58: msg.msg_iov = iov;
59: msg.msg_iovlen = 1;
60:
61: if (!nowild)
62: {
63: struct cmsghdr *cmptr;
64: msg.msg_control = &control_u;
65: msg.msg_controllen = sizeof(control_u);
66: cmptr = CMSG_FIRSTHDR(&msg);
67:
68: if (to->sa.sa_family == AF_INET)
69: {
70: #if defined(HAVE_LINUX_NETWORK)
71: struct in_pktinfo p;
72: p.ipi_ifindex = 0;
73: p.ipi_spec_dst = source->addr4;
74: msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
75: memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
76: cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
77: cmptr->cmsg_level = IPPROTO_IP;
78: cmptr->cmsg_type = IP_PKTINFO;
79: #elif defined(IP_SENDSRCADDR)
80: msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
81: memcpy(CMSG_DATA(cmptr), &(source->addr4), sizeof(source->addr4));
82: cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
83: cmptr->cmsg_level = IPPROTO_IP;
84: cmptr->cmsg_type = IP_SENDSRCADDR;
85: #endif
86: }
87: else
88: {
89: struct in6_pktinfo p;
90: p.ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
91: p.ipi6_addr = source->addr6;
92: msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
93: memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
94: cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
95: cmptr->cmsg_type = daemon->v6pktinfo;
96: cmptr->cmsg_level = IPPROTO_IPV6;
97: }
98: }
99:
100: while (retry_send(sendmsg(fd, &msg, 0)));
101:
102: if (errno != 0)
103: {
104: #ifdef HAVE_LINUX_NETWORK
105: /* If interface is still in DAD, EINVAL results - ignore that. */
106: if (errno != EINVAL)
107: my_syslog(LOG_ERR, _("failed to send packet: %s"), strerror(errno));
108: #endif
109: return 0;
110: }
111:
112: return 1;
113: }
114:
115: #ifdef HAVE_CONNTRACK
116: static void set_outgoing_mark(struct frec *forward, int fd)
117: {
118: /* Copy connection mark of incoming query to outgoing connection. */
119: unsigned int mark;
120: if (get_incoming_mark(&forward->frec_src.source, &forward->frec_src.dest, 0, &mark))
121: setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
122: }
123: #endif
124:
125: static void log_query_mysockaddr(unsigned int flags, char *name, union mysockaddr *addr, char *arg, unsigned short type)
126: {
127: if (addr->sa.sa_family == AF_INET)
128: {
129: if (flags & F_SERVER)
130: type = ntohs(addr->in.sin_port);
131: log_query(flags | F_IPV4, name, (union all_addr *)&addr->in.sin_addr, arg, type);
132: }
133: else
134: {
135: if (flags & F_SERVER)
136: type = ntohs(addr->in6.sin6_port);
137: log_query(flags | F_IPV6, name, (union all_addr *)&addr->in6.sin6_addr, arg, type);
138: }
139: }
140:
141: static void server_send(struct server *server, int fd,
142: const void *header, size_t plen, int flags)
143: {
144: while (retry_send(sendto(fd, header, plen, flags,
145: &server->addr.sa,
146: sa_len(&server->addr))));
147: }
148:
149: static int domain_no_rebind(char *domain)
150: {
151: struct rebind_domain *rbd;
152: size_t tlen, dlen = strlen(domain);
153: char *dots = strchr(domain, '.');
154:
155: /* Match whole labels only. Empty domain matches no dots (any single label) */
156: for (rbd = daemon->no_rebind; rbd; rbd = rbd->next)
157: {
158: if (dlen >= (tlen = strlen(rbd->domain)) &&
159: hostname_isequal(rbd->domain, &domain[dlen - tlen]) &&
160: (dlen == tlen || domain[dlen - tlen - 1] == '.'))
161: return 1;
162:
163: if (tlen == 0 && !dots)
164: return 1;
165: }
166:
167: return 0;
168: }
169:
170: static int forward_query(int udpfd, union mysockaddr *udpaddr,
171: union all_addr *dst_addr, unsigned int dst_iface,
172: struct dns_header *header, size_t plen, char *limit, time_t now,
173: struct frec *forward, int ad_reqd, int do_bit, int fast_retry)
174: {
175: unsigned int flags = 0;
176: unsigned int fwd_flags = 0;
177: int is_dnssec = forward && (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY));
178: struct server *master;
179: void *hash = hash_questions(header, plen, daemon->namebuff);
180: unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
181: unsigned char *oph = find_pseudoheader(header, plen, NULL, NULL, NULL, NULL);
182: int old_src = 0, old_reply = 0;
183: int first, last, start = 0;
184: int cacheable, forwarded = 0;
185: size_t edns0_len;
186: unsigned char *pheader;
187: int ede = EDE_UNSET;
188: (void)do_bit;
189:
190: if (header->hb4 & HB4_CD)
191: fwd_flags |= FREC_CHECKING_DISABLED;
192: if (ad_reqd)
193: fwd_flags |= FREC_AD_QUESTION;
194: if (oph)
195: fwd_flags |= FREC_HAS_PHEADER;
196: #ifdef HAVE_DNSSEC
197: if (do_bit)
198: fwd_flags |= FREC_DO_QUESTION;
199: #endif
200:
201: /* Check for retry on existing query.
202: FREC_DNSKEY and FREC_DS_QUERY are never set in flags, so the test below
203: ensures that no frec created for internal DNSSEC query can be returned here.
204:
205: Similarly FREC_NO_CACHE is never set in flags, so a query which is
206: contigent on a particular source address EDNS0 option will never be matched. */
207: if (forward)
208: {
209: old_src = 1;
210: old_reply = 1;
211: }
212: else if ((forward = lookup_frec_by_query(hash, fwd_flags,
213: FREC_CHECKING_DISABLED | FREC_AD_QUESTION | FREC_DO_QUESTION |
214: FREC_HAS_PHEADER | FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_NO_CACHE)))
215: {
216: struct frec_src *src;
217:
218: for (src = &forward->frec_src; src; src = src->next)
219: if (src->orig_id == ntohs(header->id) &&
220: sockaddr_isequal(&src->source, udpaddr))
221: break;
222:
223: if (src)
224: {
225: old_src = 1;
226: /* If a query is retried, use the log_id for the retry when logging the answer. */
227: src->log_id = daemon->log_id;
228: }
229: else
230: {
231: /* Existing query, but from new source, just add this
232: client to the list that will get the reply.*/
233:
234: /* Note whine_malloc() zeros memory. */
235: if (!daemon->free_frec_src &&
236: daemon->frec_src_count < daemon->ftabsize &&
237: (daemon->free_frec_src = whine_malloc(sizeof(struct frec_src))))
238: {
239: daemon->frec_src_count++;
240: daemon->free_frec_src->next = NULL;
241: }
242:
243: /* If we've been spammed with many duplicates, return REFUSED. */
244: if (!daemon->free_frec_src)
245: {
246: query_full(now, NULL);
247: /* This is tricky; if we're blasted with the same query
248: over and over, we'll end up taking this path each time
249: and never resetting until the frec gets deleted by
250: aging followed by the receipt of a different query. This
251: is a bit of a DoS vuln. Avoid by explicitly deleting the
252: frec once it expires. */
253: if (difftime(now, forward->time) >= TIMEOUT)
254: free_frec(forward);
255: goto reply;
256: }
257:
258: src = daemon->free_frec_src;
259: daemon->free_frec_src = src->next;
260: src->next = forward->frec_src.next;
261: forward->frec_src.next = src;
262: src->orig_id = ntohs(header->id);
263: src->source = *udpaddr;
264: src->dest = *dst_addr;
265: src->log_id = daemon->log_id;
266: src->iface = dst_iface;
267: src->fd = udpfd;
268:
269: /* closely spaced identical queries cannot be a try and a retry, so
270: it's safe to wait for the reply from the first without
271: forwarding the second. */
272: if (difftime(now, forward->time) < 2)
273: return 0;
274: }
275: }
276:
277: /* new query */
278: if (!forward)
279: {
280: /* If the query is malformed, we can't forward it because
281: we can't get a reliable hash to recognise the answer. */
282: if (!hash)
283: {
284: flags = 0;
285: ede = EDE_INVALID_DATA;
286: goto reply;
287: }
288:
289: if (lookup_domain(daemon->namebuff, gotname, &first, &last))
290: flags = is_local_answer(now, first, daemon->namebuff);
291: else
292: {
293: /* no available server. */
294: ede = EDE_NOT_READY;
295: flags = 0;
296: }
297:
298: /* don't forward A or AAAA queries for simple names, except the empty name */
299: if (!flags &&
300: option_bool(OPT_NODOTS_LOCAL) &&
301: (gotname & (F_IPV4 | F_IPV6)) &&
302: !strchr(daemon->namebuff, '.') &&
303: strlen(daemon->namebuff) != 0)
304: flags = check_for_local_domain(daemon->namebuff, now) ? F_NOERR : F_NXDOMAIN;
305:
306: /* Configured answer. */
307: if (flags || ede == EDE_NOT_READY)
308: goto reply;
309:
310: master = daemon->serverarray[first];
311:
312: if (!(forward = get_new_frec(now, master, 0)))
313: goto reply;
314: /* table full - flags == 0, return REFUSED */
315:
316: /* Keep copy of query if we're doing fast retry. */
317: if (daemon->fast_retry_time != 0)
318: {
319: forward->stash = blockdata_alloc((char *)header, plen);
320: forward->stash_len = plen;
321: }
322:
323: forward->frec_src.log_id = daemon->log_id;
324: forward->frec_src.source = *udpaddr;
325: forward->frec_src.orig_id = ntohs(header->id);
326: forward->frec_src.dest = *dst_addr;
327: forward->frec_src.iface = dst_iface;
328: forward->frec_src.next = NULL;
329: forward->frec_src.fd = udpfd;
330: forward->new_id = get_id();
331: memcpy(forward->hash, hash, HASH_SIZE);
332: forward->forwardall = 0;
333: forward->flags = fwd_flags;
334: if (domain_no_rebind(daemon->namebuff))
335: forward->flags |= FREC_NOREBIND;
336: if (header->hb4 & HB4_CD)
337: forward->flags |= FREC_CHECKING_DISABLED;
338: if (ad_reqd)
339: forward->flags |= FREC_AD_QUESTION;
340: #ifdef HAVE_DNSSEC
341: forward->work_counter = DNSSEC_WORK;
342: if (do_bit)
343: forward->flags |= FREC_DO_QUESTION;
344: #endif
345:
346: start = first;
347:
348: if (option_bool(OPT_ALL_SERVERS))
349: forward->forwardall = 1;
350:
351: if (!option_bool(OPT_ORDER))
352: {
353: if (master->forwardcount++ > FORWARD_TEST ||
354: difftime(now, master->forwardtime) > FORWARD_TIME ||
355: master->last_server == -1)
356: {
357: master->forwardtime = now;
358: master->forwardcount = 0;
359: forward->forwardall = 1;
360: }
361: else
362: start = master->last_server;
363: }
364: }
365: else
366: {
367: #ifdef HAVE_DNSSEC
368: /* If we've already got an answer to this query, but we're awaiting keys for validation,
369: there's no point retrying the query, retry the key query instead...... */
370: while (forward->blocking_query)
371: forward = forward->blocking_query;
372:
373: if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
374: {
375: int is_sign;
376: unsigned char *pheader;
377:
378: /* log_id should match previous DNSSEC query. */
379: daemon->log_display_id = forward->frec_src.log_id;
380:
381: blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
382: plen = forward->stash_len;
383: /* get query for logging. */
384: extract_request(header, plen, daemon->namebuff, NULL);
385:
386: if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
387: PUTSHORT(SAFE_PKTSZ, pheader);
388:
389: /* Find suitable servers: should never fail. */
390: if (!filter_servers(forward->sentto->arrayposn, F_DNSSECOK, &first, &last))
391: return 0;
392:
393: is_dnssec = 1;
394: forward->forwardall = 1;
395: }
396: else
397: #endif
398: {
399: /* retry on existing query, from original source. Send to all available servers */
400: if (udpfd == -1 && !fast_retry)
401: forward->sentto->failed_queries++;
402: else
403: forward->sentto->retrys++;
404:
405: if (!filter_servers(forward->sentto->arrayposn, F_SERVER, &first, &last))
406: goto reply;
407:
408: master = daemon->serverarray[first];
409:
410: /* Forward to all available servers on retry of query from same host. */
411: if (!option_bool(OPT_ORDER) && old_src && !fast_retry)
412: forward->forwardall = 1;
413: else
414: {
415: start = forward->sentto->arrayposn;
416:
417: if (option_bool(OPT_ORDER) && !fast_retry)
418: {
419: /* In strict order mode, there must be a server later in the list
420: left to send to, otherwise without the forwardall mechanism,
421: code further on will cycle around the list forwever if they
422: all return REFUSED. If at the last, give up.
423: Note that we can get here EITHER because a client retried,
424: or an upstream server returned REFUSED. The above only
425: applied in the later case. For client retries,
426: keep trying the last server.. */
427: if (++start == last)
428: {
429: if (old_reply)
430: goto reply;
431: else
432: start--;
433: }
434: }
435: }
436: }
437:
438: /* If we didn't get an answer advertising a maximal packet in EDNS,
439: fall back to 1280, which should work everywhere on IPv6.
440: If that generates an answer, it will become the new default
441: for this server */
442: forward->flags |= FREC_TEST_PKTSZ;
443: }
444:
445: /* We may be resending a DNSSEC query here, for which the below processing is not necessary. */
446: if (!is_dnssec)
447: {
448: header->id = htons(forward->new_id);
449:
450: plen = add_edns0_config(header, plen, ((unsigned char *)header) + PACKETSZ, &forward->frec_src.source, now, &cacheable);
451:
452: if (!cacheable)
453: forward->flags |= FREC_NO_CACHE;
454:
455: #ifdef HAVE_DNSSEC
456: if (option_bool(OPT_DNSSEC_VALID) && (master->flags & SERV_DO_DNSSEC))
457: {
458: plen = add_do_bit(header, plen, ((unsigned char *) header) + PACKETSZ);
459:
460: /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
461: this allows it to select auth servers when one is returning bad data. */
462: if (option_bool(OPT_DNSSEC_DEBUG))
463: header->hb4 |= HB4_CD;
464:
465: }
466: #endif
467:
468: if (find_pseudoheader(header, plen, &edns0_len, &pheader, NULL, NULL))
469: {
470: /* If there wasn't a PH before, and there is now, we added it. */
471: if (!oph)
472: forward->flags |= FREC_ADDED_PHEADER;
473:
474: /* If we're sending an EDNS0 with any options, we can't recreate the query from a reply. */
475: if (edns0_len > 11)
476: forward->flags |= FREC_HAS_EXTRADATA;
477:
478: /* Reduce udp size on retransmits. */
479: if (forward->flags & FREC_TEST_PKTSZ)
480: PUTSHORT(SAFE_PKTSZ, pheader);
481: }
482: }
483:
484: if (forward->forwardall)
485: start = first;
486:
487: forwarded = 0;
488:
489: /* check for send errors here (no route to host)
490: if we fail to send to all nameservers, send back an error
491: packet straight away (helps modem users when offline) */
492:
493: while (1)
494: {
495: int fd;
496: struct server *srv = daemon->serverarray[start];
497:
498: if ((fd = allocate_rfd(&forward->rfds, srv)) != -1)
499: {
500:
501: #ifdef HAVE_CONNTRACK
502: /* Copy connection mark of incoming query to outgoing connection. */
503: if (option_bool(OPT_CONNTRACK))
504: set_outgoing_mark(forward, fd);
505: #endif
506:
507: #ifdef HAVE_DNSSEC
508: if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER))
509: {
510: /* Difficult one here. If our client didn't send EDNS0, we will have set the UDP
511: packet size to 512. But that won't provide space for the RRSIGS in many cases.
512: The RRSIGS will be stripped out before the answer goes back, so the packet should
513: shrink again. So, if we added a do-bit, bump the udp packet size to the value
514: known to be OK for this server. We check returned size after stripping and set
515: the truncated bit if it's still too big. */
516: unsigned char *pheader;
517: int is_sign;
518: if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
519: PUTSHORT(srv->edns_pktsz, pheader);
520: }
521: #endif
522:
523: if (retry_send(sendto(fd, (char *)header, plen, 0,
524: &srv->addr.sa,
525: sa_len(&srv->addr))))
526: continue;
527:
528: if (errno == 0)
529: {
530: #ifdef HAVE_DUMPFILE
531: dump_packet_udp(DUMP_UP_QUERY, (void *)header, plen, NULL, &srv->addr, fd);
532: #endif
533:
534: /* Keep info in case we want to re-send this packet */
535: daemon->srv_save = srv;
536: daemon->packet_len = plen;
537: daemon->fd_save = fd;
538:
539: if (!(forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)))
540: {
541: if (!gotname)
542: strcpy(daemon->namebuff, "query");
543: log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff,
544: &srv->addr, NULL, 0);
545: }
546: #ifdef HAVE_DNSSEC
547: else
548: log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, daemon->namebuff, &srv->addr,
549: (forward->flags & FREC_DNSKEY_QUERY) ? "dnssec-retry[DNSKEY]" : "dnssec-retry[DS]", 0);
550: #endif
551:
552: srv->queries++;
553: forwarded = 1;
554: forward->sentto = srv;
555: if (!forward->forwardall)
556: break;
557: forward->forwardall++;
558: }
559: }
560:
561: if (++start == last)
562: break;
563: }
564:
565: if (forwarded || is_dnssec)
566: {
567: forward->forward_timestamp = dnsmasq_milliseconds();
568: return 1;
569: }
570:
571: /* could not send on, prepare to return */
572: header->id = htons(forward->frec_src.orig_id);
573: free_frec(forward); /* cancel */
574: ede = EDE_NETERR;
575:
576: reply:
577: if (udpfd != -1)
578: {
579: if (!(plen = make_local_answer(flags, gotname, plen, header, daemon->namebuff, limit, first, last, ede)))
580: return 0;
581:
582: if (oph)
583: {
584: u16 swap = htons((u16)ede);
585:
586: if (ede != EDE_UNSET)
587: plen = add_pseudoheader(header, plen, (unsigned char *)limit, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
588: else
589: plen = add_pseudoheader(header, plen, (unsigned char *)limit, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
590: }
591:
592: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
593: if (option_bool(OPT_CMARK_ALST_EN))
594: {
595: unsigned int mark;
596: int have_mark = get_incoming_mark(udpaddr, dst_addr, /* istcp: */ 0, &mark);
597: if (have_mark && ((u32)mark & daemon->allowlist_mask))
598: report_addresses(header, plen, mark);
599: }
600: #endif
601:
602: send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
603: }
604:
605: return 0;
606: }
607:
608: /* Check if any frecs need to do a retry, and action that if so.
609: Return time in milliseconds until he next retry will be required,
610: or -1 if none. */
611: int fast_retry(time_t now)
612: {
613: struct frec *f;
614: int ret = -1;
615:
616: if (daemon->fast_retry_time != 0)
617: {
618: u32 millis = dnsmasq_milliseconds();
619:
620: for (f = daemon->frec_list; f; f = f->next)
621: if (f->sentto && f->stash && difftime(now, f->time) < daemon->fast_retry_timeout)
622: {
623: #ifdef HAVE_DNSSEC
624: if (f->blocking_query)
625: continue;
626: #endif
627: /* t is milliseconds since last query sent. */
628: int to_run, t = (int)(millis - f->forward_timestamp);
629:
630: if (t < f->forward_delay)
631: to_run = f->forward_delay - t;
632: else
633: {
634: unsigned char *udpsz;
635: unsigned short udp_size = PACKETSZ; /* default if no EDNS0 */
636: struct dns_header *header = (struct dns_header *)daemon->packet;
637:
638: /* packet buffer overwritten */
639: daemon->srv_save = NULL;
640:
641: blockdata_retrieve(f->stash, f->stash_len, (void *)header);
642:
643: /* UDP size already set in saved query. */
644: if (find_pseudoheader(header, f->stash_len, NULL, &udpsz, NULL, NULL))
645: GETSHORT(udp_size, udpsz);
646:
647: daemon->log_display_id = f->frec_src.log_id;
648:
649: forward_query(-1, NULL, NULL, 0, header, f->stash_len, ((char *) header) + udp_size, now, f,
650: f->flags & FREC_AD_QUESTION, f->flags & FREC_DO_QUESTION, 1);
651:
652: to_run = f->forward_delay = 2 * f->forward_delay;
653: }
654:
655: if (ret == -1 || ret > to_run)
656: ret = to_run;
657: }
658:
659: }
660: return ret;
661: }
662:
663: static struct ipsets *domain_find_sets(struct ipsets *setlist, const char *domain) {
664: /* Similar algorithm to search_servers. */
665: struct ipsets *ipset_pos, *ret = NULL;
666: unsigned int namelen = strlen(domain);
667: unsigned int matchlen = 0;
668: for (ipset_pos = setlist; ipset_pos; ipset_pos = ipset_pos->next)
669: {
670: unsigned int domainlen = strlen(ipset_pos->domain);
671: const char *matchstart = domain + namelen - domainlen;
672: if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
673: (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
674: domainlen >= matchlen)
675: {
676: matchlen = domainlen;
677: ret = ipset_pos;
678: }
679: }
680:
681: return ret;
682: }
683:
684: static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind,
685: int no_cache, int cache_secure, int bogusanswer, int ad_reqd, int do_bit, int added_pheader,
686: union mysockaddr *query_source, unsigned char *limit, int ede)
687: {
688: unsigned char *pheader, *sizep;
689: struct ipsets *ipsets = NULL, *nftsets = NULL;
690: int munged = 0, is_sign;
691: unsigned int rcode = RCODE(header);
692: size_t plen;
693:
694: (void)ad_reqd;
695: (void)do_bit;
696: (void)bogusanswer;
697:
698: #ifdef HAVE_IPSET
699: if (daemon->ipsets && extract_request(header, n, daemon->namebuff, NULL))
700: ipsets = domain_find_sets(daemon->ipsets, daemon->namebuff);
701: #endif
702:
703: #ifdef HAVE_NFTSET
704: if (daemon->nftsets && extract_request(header, n, daemon->namebuff, NULL))
705: nftsets = domain_find_sets(daemon->nftsets, daemon->namebuff);
706: #endif
707:
708: if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign, NULL)))
709: {
710: /* Get extended RCODE. */
711: rcode |= sizep[2] << 4;
712:
713: if (option_bool(OPT_CLIENT_SUBNET) && !check_source(header, plen, pheader, query_source))
714: {
715: my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
716: return 0;
717: }
718:
719: if (!is_sign)
720: {
721: if (added_pheader)
722: {
723: /* client didn't send EDNS0, we added one, strip it off before returning answer. */
724: n = rrfilter(header, n, RRFILTER_EDNS0);
725: pheader = NULL;
726: }
727: else
728: {
729: /* If upstream is advertising a larger UDP packet size
730: than we allow, trim it so that we don't get overlarge
731: requests for the client. We can't do this for signed packets. */
732: unsigned short udpsz;
733: GETSHORT(udpsz, sizep);
734: if (udpsz > daemon->edns_pktsz)
735: {
736: sizep -= 2;
737: PUTSHORT(daemon->edns_pktsz, sizep);
738: }
739:
740: #ifdef HAVE_DNSSEC
741: /* If the client didn't set the do bit, but we did, reset it. */
742: if (option_bool(OPT_DNSSEC_VALID) && !do_bit)
743: {
744: unsigned short flags;
745: sizep += 2; /* skip RCODE */
746: GETSHORT(flags, sizep);
747: flags &= ~0x8000;
748: sizep -= 2;
749: PUTSHORT(flags, sizep);
750: }
751: #endif
752: }
753: }
754: }
755:
756: /* RFC 4035 sect 4.6 para 3 */
757: if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
758: header->hb4 &= ~HB4_AD;
759:
760: header->hb4 |= HB4_RA; /* recursion if available */
761:
762: if (OPCODE(header) != QUERY)
763: return resize_packet(header, n, pheader, plen);
764:
765: if (rcode != NOERROR && rcode != NXDOMAIN)
766: {
767: union all_addr a;
768: a.log.rcode = rcode;
769: a.log.ede = ede;
770: log_query(F_UPSTREAM | F_RCODE, "error", &a, NULL, 0);
771:
772: return resize_packet(header, n, pheader, plen);
773: }
774:
775: /* Complain loudly if the upstream server is non-recursive. */
776: if (!(header->hb4 & HB4_RA) && rcode == NOERROR &&
777: server && !(server->flags & SERV_WARNED_RECURSIVE))
778: {
779: (void)prettyprint_addr(&server->addr, daemon->namebuff);
780: my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
781: if (!option_bool(OPT_LOG))
782: server->flags |= SERV_WARNED_RECURSIVE;
783: }
784:
785: if (daemon->bogus_addr && rcode != NXDOMAIN &&
786: check_for_bogus_wildcard(header, n, daemon->namebuff, now))
787: {
788: munged = 1;
789: SET_RCODE(header, NXDOMAIN);
790: header->hb3 &= ~HB3_AA;
791: cache_secure = 0;
792: ede = EDE_BLOCKED;
793: }
794: else
795: {
796: int doctored = 0;
797:
798: if (rcode == NXDOMAIN &&
799: extract_request(header, n, daemon->namebuff, NULL))
800: {
801: if (check_for_local_domain(daemon->namebuff, now) ||
802: lookup_domain(daemon->namebuff, F_CONFIG, NULL, NULL))
803: {
804: /* if we forwarded a query for a locally known name (because it was for
805: an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
806: since we know that the domain exists, even if upstream doesn't */
807: munged = 1;
808: header->hb3 |= HB3_AA;
809: SET_RCODE(header, NOERROR);
810: cache_secure = 0;
811: }
812: }
813:
814: /* Before extract_addresses() */
815: if (rcode == NOERROR)
816: {
817: if (option_bool(OPT_FILTER_A))
818: n = rrfilter(header, n, RRFILTER_A);
819:
820: if (option_bool(OPT_FILTER_AAAA))
821: n = rrfilter(header, n, RRFILTER_AAAA);
822: }
823:
824: switch (extract_addresses(header, n, daemon->namebuff, now, ipsets, nftsets, is_sign, check_rebind, no_cache, cache_secure, &doctored))
825: {
826: case 1:
827: my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
828: munged = 1;
829: cache_secure = 0;
830: ede = EDE_BLOCKED;
831: break;
832:
833: /* extract_addresses() found a malformed answer. */
834: case 2:
835: munged = 1;
836: SET_RCODE(header, SERVFAIL);
837: cache_secure = 0;
838: ede = EDE_OTHER;
839: break;
840: }
841:
842: if (doctored)
843: cache_secure = 0;
844: }
845:
846: #ifdef HAVE_DNSSEC
847: if (bogusanswer && !(header->hb4 & HB4_CD) && !option_bool(OPT_DNSSEC_DEBUG))
848: {
849: /* Bogus reply, turn into SERVFAIL */
850: SET_RCODE(header, SERVFAIL);
851: munged = 1;
852: }
853:
854: if (option_bool(OPT_DNSSEC_VALID))
855: {
856: header->hb4 &= ~HB4_AD;
857:
858: if (!(header->hb4 & HB4_CD) && ad_reqd && cache_secure)
859: header->hb4 |= HB4_AD;
860:
861: /* If the requestor didn't set the DO bit, don't return DNSSEC info. */
862: if (!do_bit)
863: n = rrfilter(header, n, RRFILTER_DNSSEC);
864: }
865: #endif
866:
867: /* do this after extract_addresses. Ensure NODATA reply and remove
868: nameserver info. */
869: if (munged)
870: {
871: header->ancount = htons(0);
872: header->nscount = htons(0);
873: header->arcount = htons(0);
874: header->hb3 &= ~HB3_TC;
875: }
876:
877: /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
878: sections of the packet. Find the new length here and put back pseudoheader
879: if it was removed. */
880: n = resize_packet(header, n, pheader, plen);
881:
882: if (pheader && ede != EDE_UNSET)
883: {
884: u16 swap = htons((u16)ede);
885: n = add_pseudoheader(header, n, limit, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 1);
886: }
887:
888: if (RCODE(header) == NXDOMAIN)
889: server->nxdomain_replies++;
890:
891: return n;
892: }
893:
894: #ifdef HAVE_DNSSEC
895: static void dnssec_validate(struct frec *forward, struct dns_header *header,
896: ssize_t plen, int status, time_t now)
897: {
898: daemon->log_display_id = forward->frec_src.log_id;
899:
900: /* We've had a reply already, which we're validating. Ignore this duplicate */
901: if (forward->blocking_query)
902: return;
903:
904: /* Truncated answer can't be validated.
905: If this is an answer to a DNSSEC-generated query, we still
906: need to get the client to retry over TCP, so return
907: an answer with the TC bit set, even if the actual answer fits.
908: */
909: if (header->hb3 & HB3_TC)
910: status = STAT_TRUNCATED;
911:
912: /* If all replies to a query are REFUSED, give up. */
913: if (RCODE(header) == REFUSED)
914: status = STAT_ABANDONED;
915:
916: /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise
917: would invite infinite loops, since the answers to DNSKEY and DS queries
918: will not be cached, so they'll be repeated. */
919: if (!STAT_ISEQUAL(status, STAT_BOGUS) && !STAT_ISEQUAL(status, STAT_TRUNCATED) && !STAT_ISEQUAL(status, STAT_ABANDONED))
920: {
921: if (forward->flags & FREC_DNSKEY_QUERY)
922: status = dnssec_validate_by_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class);
923: else if (forward->flags & FREC_DS_QUERY)
924: status = dnssec_validate_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class);
925: else
926: status = dnssec_validate_reply(now, header, plen, daemon->namebuff, daemon->keyname, &forward->class,
927: !option_bool(OPT_DNSSEC_IGN_NS) && (forward->sentto->flags & SERV_DO_DNSSEC),
928: NULL, NULL, NULL);
929: #ifdef HAVE_DUMPFILE
930: if (STAT_ISEQUAL(status, STAT_BOGUS))
931: dump_packet_udp((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_BOGUS : DUMP_BOGUS,
932: header, (size_t)plen, &forward->sentto->addr, NULL, -daemon->port);
933: #endif
934: }
935:
936: /* Can't validate, as we're missing key data. Put this
937: answer aside, whilst we get that. */
938: if (STAT_ISEQUAL(status, STAT_NEED_DS) || STAT_ISEQUAL(status, STAT_NEED_KEY))
939: {
940: struct frec *new = NULL;
941: struct blockdata *stash;
942:
943: /* Now save reply pending receipt of key data */
944: if ((stash = blockdata_alloc((char *)header, plen)))
945: {
946: /* validate routines leave name of required record in daemon->keyname */
947: unsigned int flags = STAT_ISEQUAL(status, STAT_NEED_KEY) ? FREC_DNSKEY_QUERY : FREC_DS_QUERY;
948:
949: if ((new = lookup_frec_dnssec(daemon->keyname, forward->class, flags, header)))
950: {
951: /* This is tricky; it detects loops in the dependency
952: graph for DNSSEC validation, say validating A requires DS B
953: and validating DS B requires DNSKEY C and validating DNSKEY C requires DS B.
954: This should never happen in correctly signed records, but it's
955: likely the case that sufficiently broken ones can cause our validation
956: code requests to exhibit cycles. The result is that the ->blocking_query list
957: can form a cycle, and under certain circumstances that can lock us in
958: an infinite loop. Here we transform the situation into ABANDONED. */
959: struct frec *f;
960: for (f = new; f; f = f->blocking_query)
961: if (f == forward)
962: break;
963:
964: if (!f)
965: {
966: forward->next_dependent = new->dependent;
967: new->dependent = forward;
968: /* Make consistent, only replace query copy with unvalidated answer
969: when we set ->blocking_query. */
970: if (forward->stash)
971: blockdata_free(forward->stash);
972: forward->blocking_query = new;
973: forward->stash_len = plen;
974: forward->stash = stash;
975: return;
976: }
977: }
978: else
979: {
980: struct server *server;
981: struct frec *orig;
982: void *hash;
983: size_t nn;
984: int serverind, fd;
985: struct randfd_list *rfds = NULL;
986:
987: /* Find the original query that started it all.... */
988: for (orig = forward; orig->dependent; orig = orig->dependent);
989:
990: /* Make sure we don't expire and free the orig frec during the
991: allocation of a new one: third arg of get_new_frec() does that. */
992: if ((serverind = dnssec_server(forward->sentto, daemon->keyname, NULL, NULL)) != -1 &&
993: (server = daemon->serverarray[serverind]) &&
994: (nn = dnssec_generate_query(header, ((unsigned char *) header) + server->edns_pktsz,
995: daemon->keyname, forward->class,
996: STAT_ISEQUAL(status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz)) &&
997: (hash = hash_questions(header, nn, daemon->namebuff)) &&
998: --orig->work_counter != 0 &&
999: (fd = allocate_rfd(&rfds, server)) != -1 &&
1000: (new = get_new_frec(now, server, 1)))
1001: {
1002: struct frec *next = new->next;
1003:
1004: *new = *forward; /* copy everything, then overwrite */
1005: new->next = next;
1006: new->blocking_query = NULL;
1007:
1008: new->frec_src.log_id = daemon->log_display_id = ++daemon->log_id;
1009: new->sentto = server;
1010: new->rfds = rfds;
1011: new->frec_src.next = NULL;
1012: new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_HAS_EXTRADATA);
1013: new->flags |= flags;
1014: new->forwardall = 0;
1015:
1016: forward->next_dependent = NULL;
1017: new->dependent = forward; /* to find query awaiting new one. */
1018:
1019: /* Make consistent, only replace query copy with unvalidated answer
1020: when we set ->blocking_query. */
1021: forward->blocking_query = new;
1022: if (forward->stash)
1023: blockdata_free(forward->stash);
1024: forward->stash_len = plen;
1025: forward->stash = stash;
1026:
1027: memcpy(new->hash, hash, HASH_SIZE);
1028: new->new_id = get_id();
1029: header->id = htons(new->new_id);
1030: /* Save query for retransmission and de-dup */
1031: new->stash = blockdata_alloc((char *)header, nn);
1032: new->stash_len = nn;
1033: if (daemon->fast_retry_time != 0)
1034: new->forward_timestamp = dnsmasq_milliseconds();
1035:
1036: /* Don't resend this. */
1037: daemon->srv_save = NULL;
1038:
1039: #ifdef HAVE_CONNTRACK
1040: if (option_bool(OPT_CONNTRACK))
1041: set_outgoing_mark(orig, fd);
1042: #endif
1043:
1044: server_send(server, fd, header, nn, 0);
1045: server->queries++;
1046: #ifdef HAVE_DUMPFILE
1047: dump_packet_udp(DUMP_SEC_QUERY, (void *)header, (size_t)nn, NULL, &server->addr, fd);
1048: #endif
1049: log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, daemon->keyname, &server->addr,
1050: STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
1051: return;
1052: }
1053:
1054: free_rfds(&rfds); /* error unwind */
1055: }
1056:
1057: blockdata_free(stash); /* don't leak this on failure. */
1058: }
1059:
1060: /* sending DNSSEC query failed or loop detected. */
1061: status = STAT_ABANDONED;
1062: }
1063:
1064: /* Validated original answer, all done. */
1065: if (!forward->dependent)
1066: return_reply(now, forward, header, plen, status);
1067: else
1068: {
1069: /* validated subsidiary query/queries, (and cached result)
1070: pop that and return to the previous query/queries we were working on. */
1071: struct frec *prev, *nxt = forward->dependent;
1072:
1073: free_frec(forward);
1074:
1075: while ((prev = nxt))
1076: {
1077: /* ->next_dependent will have changed after return from recursive call below. */
1078: nxt = prev->next_dependent;
1079: prev->blocking_query = NULL; /* already gone */
1080: blockdata_retrieve(prev->stash, prev->stash_len, (void *)header);
1081: dnssec_validate(prev, header, prev->stash_len, status, now);
1082: }
1083: }
1084: }
1085: #endif
1086:
1087: /* sets new last_server */
1088: void reply_query(int fd, time_t now)
1089: {
1090: /* packet from peer server, extract data for cache, and send to
1091: original requester */
1092: struct dns_header *header;
1093: union mysockaddr serveraddr;
1094: struct frec *forward;
1095: socklen_t addrlen = sizeof(serveraddr);
1096: ssize_t n = recvfrom(fd, daemon->packet, daemon->packet_buff_sz, 0, &serveraddr.sa, &addrlen);
1097: struct server *server;
1098: void *hash;
1099: int first, last, c;
1100:
1101: /* packet buffer overwritten */
1102: daemon->srv_save = NULL;
1103:
1104: /* Determine the address of the server replying so that we can mark that as good */
1105: if (serveraddr.sa.sa_family == AF_INET6)
1106: serveraddr.in6.sin6_flowinfo = 0;
1107:
1108: header = (struct dns_header *)daemon->packet;
1109:
1110: if (n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR))
1111: return;
1112:
1113: hash = hash_questions(header, n, daemon->namebuff);
1114:
1115: if (!(forward = lookup_frec(ntohs(header->id), fd, hash, &first, &last)))
1116: return;
1117:
1118: /* spoof check: answer must come from known server, also
1119: we may have sent the same query to multiple servers from
1120: the same local socket, and would like to know which one has answered. */
1121: for (c = first; c != last; c++)
1122: if (sockaddr_isequal(&daemon->serverarray[c]->addr, &serveraddr))
1123: break;
1124:
1125: if (c == last)
1126: return;
1127:
1128: server = daemon->serverarray[c];
1129:
1130: if (RCODE(header) != REFUSED)
1131: daemon->serverarray[first]->last_server = c;
1132: else if (daemon->serverarray[first]->last_server == c)
1133: daemon->serverarray[first]->last_server = -1;
1134:
1135: /* If sufficient time has elapsed, try and expand UDP buffer size again. */
1136: if (difftime(now, server->pktsz_reduced) > UDP_TEST_TIME)
1137: server->edns_pktsz = daemon->edns_pktsz;
1138:
1139: /* log_query gets called indirectly all over the place, so
1140: pass these in global variables - sorry. */
1141: daemon->log_display_id = forward->frec_src.log_id;
1142: daemon->log_source_addr = &forward->frec_src.source;
1143:
1144: #ifdef HAVE_DUMPFILE
1145: dump_packet_udp((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_REPLY : DUMP_UP_REPLY,
1146: (void *)header, n, &serveraddr, NULL, fd);
1147: #endif
1148:
1149: if (daemon->ignore_addr && RCODE(header) == NOERROR &&
1150: check_for_ignored_address(header, n))
1151: return;
1152:
1153: /* Note: if we send extra options in the EDNS0 header, we can't recreate
1154: the query from the reply. */
1155: if ((RCODE(header) == REFUSED || RCODE(header) == SERVFAIL) &&
1156: forward->forwardall == 0 &&
1157: !(forward->flags & FREC_HAS_EXTRADATA))
1158: /* for broken servers, attempt to send to another one. */
1159: {
1160: unsigned char *pheader, *udpsz;
1161: unsigned short udp_size = PACKETSZ; /* default if no EDNS0 */
1162: size_t plen;
1163: int is_sign;
1164: size_t nn = 0;
1165:
1166: #ifdef HAVE_DNSSEC
1167: /* The query MAY have got a good answer, and be awaiting
1168: the results of further queries, in which case
1169: The Stash contains something else and we don't need to retry anyway. */
1170: if (forward->blocking_query)
1171: return;
1172:
1173: if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
1174: {
1175: /* DNSSEC queries have a copy of the original query stashed. */
1176: blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
1177: nn = forward->stash_len;
1178: udp_size = daemon->edns_pktsz;
1179: }
1180: else
1181: #endif
1182: {
1183: /* in fast retry mode, we have a copy of the query. */
1184: if (daemon->fast_retry_time != 0 && forward->stash)
1185: {
1186: blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
1187: nn = forward->stash_len;
1188: /* UDP size already set in saved query. */
1189: if (find_pseudoheader(header, (size_t)n, NULL, &udpsz, NULL, NULL))
1190: GETSHORT(udp_size, udpsz);
1191: }
1192: else
1193: {
1194: /* recreate query from reply */
1195: if ((pheader = find_pseudoheader(header, (size_t)n, &plen, &udpsz, &is_sign, NULL)))
1196: GETSHORT(udp_size, udpsz);
1197:
1198: /* If the client provides an EDNS0 UDP size, use that to limit our reply.
1199: (bounded by the maximum configured). If no EDNS0, then it
1200: defaults to 512 */
1201: if (udp_size > daemon->edns_pktsz)
1202: udp_size = daemon->edns_pktsz;
1203: else if (udp_size < PACKETSZ)
1204: udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
1205:
1206: header->ancount = htons(0);
1207: header->nscount = htons(0);
1208: header->arcount = htons(0);
1209: header->hb3 &= ~(HB3_QR | HB3_AA | HB3_TC);
1210: header->hb4 &= ~(HB4_RA | HB4_RCODE | HB4_CD | HB4_AD);
1211: if (forward->flags & FREC_CHECKING_DISABLED)
1212: header->hb4 |= HB4_CD;
1213: if (forward->flags & FREC_AD_QUESTION)
1214: header->hb4 |= HB4_AD;
1215:
1216: if (!is_sign &&
1217: (nn = resize_packet(header, (size_t)n, pheader, plen)) &&
1218: (forward->flags & FREC_DO_QUESTION))
1219: add_do_bit(header, nn, (unsigned char *)pheader + plen);
1220: }
1221: }
1222:
1223: if (nn)
1224: {
1225: forward_query(-1, NULL, NULL, 0, header, nn, ((char *) header) + udp_size, now, forward,
1226: forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION, 0);
1227: return;
1228: }
1229: }
1230:
1231: /* If the answer is an error, keep the forward record in place in case
1232: we get a good reply from another server. Kill it when we've
1233: had replies from all to avoid filling the forwarding table when
1234: everything is broken */
1235:
1236: /* decrement count of replies recieved if we sent to more than one server. */
1237: if (forward->forwardall && (--forward->forwardall > 1) && RCODE(header) == REFUSED)
1238: return;
1239:
1240: /* We tried resending to this server with a smaller maximum size and got an answer.
1241: Make that permanent. To avoid reduxing the packet size for a single dropped packet,
1242: only do this when we get a truncated answer, or one larger than the safe size. */
1243: if (server->edns_pktsz > SAFE_PKTSZ && (forward->flags & FREC_TEST_PKTSZ) &&
1244: ((header->hb3 & HB3_TC) || n >= SAFE_PKTSZ))
1245: {
1246: server->edns_pktsz = SAFE_PKTSZ;
1247: server->pktsz_reduced = now;
1248: (void)prettyprint_addr(&server->addr, daemon->addrbuff);
1249: my_syslog(LOG_WARNING, _("reducing DNS packet size for nameserver %s to %d"), daemon->addrbuff, SAFE_PKTSZ);
1250: }
1251:
1252: forward->sentto = server;
1253:
1254: /* We have a good answer, and will now validate it or return it.
1255: It may be some time before this the validation completes, but we don't need
1256: any more answers, so close the socket(s) on which we were expecting
1257: answers, to conserve file descriptors, and to save work reading and
1258: discarding answers for other upstreams. */
1259: free_rfds(&forward->rfds);
1260:
1261: /* calculate modified moving average of server latency */
1262: if (server->query_latency == 0)
1263: server->mma_latency = (dnsmasq_milliseconds() - forward->forward_timestamp) * 128; /* init */
1264: else
1265: server->mma_latency += dnsmasq_milliseconds() - forward->forward_timestamp - server->query_latency;
1266: /* denominator controls how many queries we average over. */
1267: server->query_latency = server->mma_latency/128;
1268:
1269:
1270: #ifdef HAVE_DNSSEC
1271: if ((forward->sentto->flags & SERV_DO_DNSSEC) &&
1272: option_bool(OPT_DNSSEC_VALID) &&
1273: !(forward->flags & FREC_CHECKING_DISABLED))
1274: dnssec_validate(forward, header, n, STAT_OK, now);
1275: else
1276: #endif
1277: return_reply(now, forward, header, n, STAT_OK);
1278: }
1279:
1280: static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status)
1281: {
1282: int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
1283: size_t nn;
1284: int ede = EDE_UNSET;
1285:
1286: (void)status;
1287:
1288: daemon->log_display_id = forward->frec_src.log_id;
1289: daemon->log_source_addr = &forward->frec_src.source;
1290:
1291: /* Don't cache replies where DNSSEC validation was turned off, either
1292: the upstream server told us so, or the original query specified it. */
1293: if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
1294: no_cache_dnssec = 1;
1295:
1296: #ifdef HAVE_DNSSEC
1297: if (!STAT_ISEQUAL(status, STAT_OK))
1298: {
1299: /* status is STAT_OK when validation not turned on. */
1300: no_cache_dnssec = 0;
1301:
1302: if (STAT_ISEQUAL(status, STAT_TRUNCATED))
1303: header->hb3 |= HB3_TC;
1304: else
1305: {
1306: char *result, *domain = "result";
1307: union all_addr a;
1308:
1309: a.log.ede = ede = errflags_to_ede(status);
1310:
1311: if (STAT_ISEQUAL(status, STAT_ABANDONED))
1312: {
1313: result = "ABANDONED";
1314: status = STAT_BOGUS;
1315: }
1316: else
1317: result = (STAT_ISEQUAL(status, STAT_SECURE) ? "SECURE" : (STAT_ISEQUAL(status, STAT_INSECURE) ? "INSECURE" : "BOGUS"));
1318:
1319: if (STAT_ISEQUAL(status, STAT_SECURE))
1320: cache_secure = 1;
1321: else if (STAT_ISEQUAL(status, STAT_BOGUS))
1322: {
1323: no_cache_dnssec = 1;
1324: bogusanswer = 1;
1325:
1326: if (extract_request(header, n, daemon->namebuff, NULL))
1327: domain = daemon->namebuff;
1328: }
1329:
1330: log_query(F_SECSTAT, domain, &a, result, 0);
1331: }
1332: }
1333: #endif
1334:
1335: if (option_bool(OPT_NO_REBIND))
1336: check_rebind = !(forward->flags & FREC_NOREBIND);
1337:
1338: /* restore CD bit to the value in the query */
1339: if (forward->flags & FREC_CHECKING_DISABLED)
1340: header->hb4 |= HB4_CD;
1341: else
1342: header->hb4 &= ~HB4_CD;
1343:
1344: /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
1345: since the cache is ignorant of such things. */
1346: if (forward->flags & FREC_NO_CACHE)
1347: no_cache_dnssec = 1;
1348:
1349: if ((nn = process_reply(header, now, forward->sentto, (size_t)n, check_rebind, no_cache_dnssec, cache_secure, bogusanswer,
1350: forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION,
1351: forward->flags & FREC_ADDED_PHEADER, &forward->frec_src.source,
1352: ((unsigned char *)header) + daemon->edns_pktsz, ede)))
1353: {
1354: struct frec_src *src;
1355:
1356: header->id = htons(forward->frec_src.orig_id);
1357: #ifdef HAVE_DNSSEC
1358: /* We added an EDNSO header for the purpose of getting DNSSEC RRs, and set the value of the UDP payload size
1359: greater than the no-EDNS0-implied 512 to have space for the RRSIGS. If, having stripped them and the EDNS0
1360: header, the answer is still bigger than 512, truncate it and mark it so. The client then retries with TCP. */
1361: if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER) && (nn > PACKETSZ))
1362: {
1363: header->ancount = htons(0);
1364: header->nscount = htons(0);
1365: header->arcount = htons(0);
1366: header->hb3 |= HB3_TC;
1367: nn = resize_packet(header, nn, NULL, 0);
1368: }
1369: #endif
1370:
1371: for (src = &forward->frec_src; src; src = src->next)
1372: {
1373: header->id = htons(src->orig_id);
1374:
1375: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
1376: if (option_bool(OPT_CMARK_ALST_EN))
1377: {
1378: unsigned int mark;
1379: int have_mark = get_incoming_mark(&src->source, &src->dest, /* istcp: */ 0, &mark);
1380: if (have_mark && ((u32)mark & daemon->allowlist_mask))
1381: report_addresses(header, nn, mark);
1382: }
1383: #endif
1384:
1385: if (src->fd != -1)
1386: {
1387: #ifdef HAVE_DUMPFILE
1388: dump_packet_udp(DUMP_REPLY, daemon->packet, (size_t)nn, NULL, &src->source, src->fd);
1389: #endif
1390: send_from(src->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn,
1391: &src->source, &src->dest, src->iface);
1392:
1393: if (option_bool(OPT_EXTRALOG) && src != &forward->frec_src)
1394: {
1395: daemon->log_display_id = src->log_id;
1396: daemon->log_source_addr = &src->source;
1397: log_query(F_UPSTREAM, "query", NULL, "duplicate", 0);
1398: }
1399: }
1400: }
1401: }
1402:
1403: free_frec(forward); /* cancel */
1404: }
1405:
1406:
1407: #ifdef HAVE_CONNTRACK
1408: static int is_query_allowed_for_mark(u32 mark, const char *name)
1409: {
1410: int is_allowable_name, did_validate_name = 0;
1411: struct allowlist *allowlists;
1412: char **patterns_pos;
1413:
1414: for (allowlists = daemon->allowlists; allowlists; allowlists = allowlists->next)
1415: if (allowlists->mark == (mark & daemon->allowlist_mask & allowlists->mask))
1416: for (patterns_pos = allowlists->patterns; *patterns_pos; patterns_pos++)
1417: {
1418: if (!strcmp(*patterns_pos, "*"))
1419: return 1;
1420: if (!did_validate_name)
1421: {
1422: is_allowable_name = name ? is_valid_dns_name(name) : 0;
1423: did_validate_name = 1;
1424: }
1425: if (is_allowable_name && is_dns_name_matching_pattern(name, *patterns_pos))
1426: return 1;
1427: }
1428: return 0;
1429: }
1430:
1431: static size_t answer_disallowed(struct dns_header *header, size_t qlen, u32 mark, const char *name)
1432: {
1433: unsigned char *p;
1434: (void)name;
1435: (void)mark;
1436:
1437: #ifdef HAVE_UBUS
1438: if (name)
1439: ubus_event_bcast_connmark_allowlist_refused(mark, name);
1440: #endif
1441:
1442: setup_reply(header, /* flags: */ 0, EDE_BLOCKED);
1443:
1444: if (!(p = skip_questions(header, qlen)))
1445: return 0;
1446: return p - (unsigned char *)header;
1447: }
1448: #endif
1449:
1450: void receive_query(struct listener *listen, time_t now)
1451: {
1452: struct dns_header *header = (struct dns_header *)daemon->packet;
1453: union mysockaddr source_addr;
1454: unsigned char *pheader;
1455: unsigned short type, udp_size = PACKETSZ; /* default if no EDNS0 */
1456: union all_addr dst_addr;
1457: struct in_addr netmask, dst_addr_4;
1458: size_t m;
1459: ssize_t n;
1460: int if_index = 0, auth_dns = 0, do_bit = 0, have_pseudoheader = 0;
1461: #ifdef HAVE_CONNTRACK
1462: unsigned int mark = 0;
1463: int have_mark = 0;
1464: int is_single_query = 0, allowed = 1;
1465: #endif
1466: #ifdef HAVE_AUTH
1467: int local_auth = 0;
1468: #endif
1469: struct iovec iov[1];
1470: struct msghdr msg;
1471: struct cmsghdr *cmptr;
1472: union {
1473: struct cmsghdr align; /* this ensures alignment */
1474: char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
1475: #if defined(HAVE_LINUX_NETWORK)
1476: char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
1477: #elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
1478: char control[CMSG_SPACE(sizeof(struct in_addr)) +
1479: CMSG_SPACE(sizeof(unsigned int))];
1480: #elif defined(IP_RECVDSTADDR)
1481: char control[CMSG_SPACE(sizeof(struct in_addr)) +
1482: CMSG_SPACE(sizeof(struct sockaddr_dl))];
1483: #endif
1484: } control_u;
1485: int family = listen->addr.sa.sa_family;
1486: /* Can always get recvd interface for IPv6 */
1487: int check_dst = !option_bool(OPT_NOWILD) || family == AF_INET6;
1488:
1489: /* packet buffer overwritten */
1490: daemon->srv_save = NULL;
1491:
1492: dst_addr_4.s_addr = dst_addr.addr4.s_addr = 0;
1493: netmask.s_addr = 0;
1494:
1495: if (option_bool(OPT_NOWILD) && listen->iface)
1496: {
1497: auth_dns = listen->iface->dns_auth;
1498:
1499: if (family == AF_INET)
1500: {
1501: dst_addr_4 = dst_addr.addr4 = listen->iface->addr.in.sin_addr;
1502: netmask = listen->iface->netmask;
1503: }
1504: }
1505:
1506: iov[0].iov_base = daemon->packet;
1507: iov[0].iov_len = daemon->edns_pktsz;
1508:
1509: msg.msg_control = control_u.control;
1510: msg.msg_controllen = sizeof(control_u);
1511: msg.msg_flags = 0;
1512: msg.msg_name = &source_addr;
1513: msg.msg_namelen = sizeof(source_addr);
1514: msg.msg_iov = iov;
1515: msg.msg_iovlen = 1;
1516:
1517: if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
1518: return;
1519:
1520: if (n < (int)sizeof(struct dns_header) ||
1521: (msg.msg_flags & MSG_TRUNC) ||
1522: (header->hb3 & HB3_QR))
1523: return;
1524:
1525: /* Clear buffer beyond request to avoid risk of
1526: information disclosure. */
1527: memset(daemon->packet + n, 0, daemon->edns_pktsz - n);
1528:
1529: source_addr.sa.sa_family = family;
1530:
1531: if (family == AF_INET)
1532: {
1533: /* Source-port == 0 is an error, we can't send back to that.
1534: http://www.ietf.org/mail-archive/web/dnsop/current/msg11441.html */
1535: if (source_addr.in.sin_port == 0)
1536: return;
1537: }
1538: else
1539: {
1540: /* Source-port == 0 is an error, we can't send back to that. */
1541: if (source_addr.in6.sin6_port == 0)
1542: return;
1543: source_addr.in6.sin6_flowinfo = 0;
1544: }
1545:
1546: /* We can be configured to only accept queries from at-most-one-hop-away addresses. */
1547: if (option_bool(OPT_LOCAL_SERVICE))
1548: {
1549: struct addrlist *addr;
1550:
1551: if (family == AF_INET6)
1552: {
1553: for (addr = daemon->interface_addrs; addr; addr = addr->next)
1554: if ((addr->flags & ADDRLIST_IPV6) &&
1555: is_same_net6(&addr->addr.addr6, &source_addr.in6.sin6_addr, addr->prefixlen))
1556: break;
1557: }
1558: else
1559: {
1560: struct in_addr netmask;
1561: for (addr = daemon->interface_addrs; addr; addr = addr->next)
1562: {
1563: netmask.s_addr = htonl(~(in_addr_t)0 << (32 - addr->prefixlen));
1564: if (!(addr->flags & ADDRLIST_IPV6) &&
1565: is_same_net(addr->addr.addr4, source_addr.in.sin_addr, netmask))
1566: break;
1567: }
1568: }
1569: if (!addr)
1570: {
1571: static int warned = 0;
1572: if (!warned)
1573: {
1574: prettyprint_addr(&source_addr, daemon->addrbuff);
1575: my_syslog(LOG_WARNING, _("ignoring query from non-local network %s (logged only once)"), daemon->addrbuff);
1576: warned = 1;
1577: }
1578: return;
1579: }
1580: }
1581:
1582: if (check_dst)
1583: {
1584: struct ifreq ifr;
1585:
1586: if (msg.msg_controllen < sizeof(struct cmsghdr))
1587: return;
1588:
1589: #if defined(HAVE_LINUX_NETWORK)
1590: if (family == AF_INET)
1591: for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1592: if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_PKTINFO)
1593: {
1594: union {
1595: unsigned char *c;
1596: struct in_pktinfo *p;
1597: } p;
1598: p.c = CMSG_DATA(cmptr);
1599: dst_addr_4 = dst_addr.addr4 = p.p->ipi_spec_dst;
1600: if_index = p.p->ipi_ifindex;
1601: }
1602: #elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
1603: if (family == AF_INET)
1604: {
1605: for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1606: {
1607: union {
1608: unsigned char *c;
1609: unsigned int *i;
1610: struct in_addr *a;
1611: #ifndef HAVE_SOLARIS_NETWORK
1612: struct sockaddr_dl *s;
1613: #endif
1614: } p;
1615: p.c = CMSG_DATA(cmptr);
1616: if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
1617: dst_addr_4 = dst_addr.addr4 = *(p.a);
1618: else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
1619: #ifdef HAVE_SOLARIS_NETWORK
1620: if_index = *(p.i);
1621: #else
1622: if_index = p.s->sdl_index;
1623: #endif
1624: }
1625: }
1626: #endif
1627:
1628: if (family == AF_INET6)
1629: {
1630: for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
1631: if (cmptr->cmsg_level == IPPROTO_IPV6 && cmptr->cmsg_type == daemon->v6pktinfo)
1632: {
1633: union {
1634: unsigned char *c;
1635: struct in6_pktinfo *p;
1636: } p;
1637: p.c = CMSG_DATA(cmptr);
1638:
1639: dst_addr.addr6 = p.p->ipi6_addr;
1640: if_index = p.p->ipi6_ifindex;
1641: }
1642: }
1643:
1644: /* enforce available interface configuration */
1645:
1646: if (!indextoname(listen->fd, if_index, ifr.ifr_name))
1647: return;
1648:
1649: if (!iface_check(family, &dst_addr, ifr.ifr_name, &auth_dns))
1650: {
1651: if (!option_bool(OPT_CLEVERBIND))
1652: enumerate_interfaces(0);
1653: if (!loopback_exception(listen->fd, family, &dst_addr, ifr.ifr_name) &&
1654: !label_exception(if_index, family, &dst_addr))
1655: return;
1656: }
1657:
1658: if (family == AF_INET && option_bool(OPT_LOCALISE))
1659: {
1660: struct irec *iface;
1661:
1662: /* get the netmask of the interface which has the address we were sent to.
1663: This is no necessarily the interface we arrived on. */
1664:
1665: for (iface = daemon->interfaces; iface; iface = iface->next)
1666: if (iface->addr.sa.sa_family == AF_INET &&
1667: iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1668: break;
1669:
1670: /* interface may be new */
1671: if (!iface && !option_bool(OPT_CLEVERBIND))
1672: enumerate_interfaces(0);
1673:
1674: for (iface = daemon->interfaces; iface; iface = iface->next)
1675: if (iface->addr.sa.sa_family == AF_INET &&
1676: iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
1677: break;
1678:
1679: /* If we failed, abandon localisation */
1680: if (iface)
1681: netmask = iface->netmask;
1682: else
1683: dst_addr_4.s_addr = 0;
1684: }
1685: }
1686:
1687: /* log_query gets called indirectly all over the place, so
1688: pass these in global variables - sorry. */
1689: daemon->log_display_id = ++daemon->log_id;
1690: daemon->log_source_addr = &source_addr;
1691:
1692: #ifdef HAVE_DUMPFILE
1693: dump_packet_udp(DUMP_QUERY, daemon->packet, (size_t)n, &source_addr, NULL, listen->fd);
1694: #endif
1695:
1696: #ifdef HAVE_CONNTRACK
1697: if (option_bool(OPT_CMARK_ALST_EN))
1698: have_mark = get_incoming_mark(&source_addr, &dst_addr, /* istcp: */ 0, &mark);
1699: #endif
1700:
1701: if (extract_request(header, (size_t)n, daemon->namebuff, &type))
1702: {
1703: #ifdef HAVE_AUTH
1704: struct auth_zone *zone;
1705: #endif
1706: log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
1707: &source_addr, auth_dns ? "auth" : "query", type);
1708:
1709: #ifdef HAVE_CONNTRACK
1710: is_single_query = 1;
1711: #endif
1712:
1713: #ifdef HAVE_AUTH
1714: /* find queries for zones we're authoritative for, and answer them directly */
1715: if (!auth_dns && !option_bool(OPT_LOCALISE))
1716: for (zone = daemon->auth_zones; zone; zone = zone->next)
1717: if (in_zone(zone, daemon->namebuff, NULL))
1718: {
1719: auth_dns = 1;
1720: local_auth = 1;
1721: break;
1722: }
1723: #endif
1724:
1725: #ifdef HAVE_LOOP
1726: /* Check for forwarding loop */
1727: if (detect_loop(daemon->namebuff, type))
1728: return;
1729: #endif
1730: }
1731:
1732: if (find_pseudoheader(header, (size_t)n, NULL, &pheader, NULL, NULL))
1733: {
1734: unsigned short flags;
1735:
1736: have_pseudoheader = 1;
1737: GETSHORT(udp_size, pheader);
1738: pheader += 2; /* ext_rcode */
1739: GETSHORT(flags, pheader);
1740:
1741: if (flags & 0x8000)
1742: do_bit = 1;/* do bit */
1743:
1744: /* If the client provides an EDNS0 UDP size, use that to limit our reply.
1745: (bounded by the maximum configured). If no EDNS0, then it
1746: defaults to 512. We write this value into the query packet too, so that
1747: if it's forwarded, we don't specify a maximum size greater than we can handle. */
1748: if (udp_size > daemon->edns_pktsz)
1749: udp_size = daemon->edns_pktsz;
1750: else if (udp_size < PACKETSZ)
1751: udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
1752:
1753: pheader -= 6; /* ext_class */
1754: PUTSHORT(udp_size, pheader); /* Bounding forwarded queries to maximum configured */
1755: }
1756:
1757: #ifdef HAVE_CONNTRACK
1758: #ifdef HAVE_AUTH
1759: if (!auth_dns || local_auth)
1760: #endif
1761: if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
1762: allowed = is_query_allowed_for_mark((u32)mark, is_single_query ? daemon->namebuff : NULL);
1763: #endif
1764:
1765: if (0);
1766: #ifdef HAVE_CONNTRACK
1767: else if (!allowed)
1768: {
1769: u16 swap = htons(EDE_BLOCKED);
1770:
1771: m = answer_disallowed(header, (size_t)n, (u32)mark, is_single_query ? daemon->namebuff : NULL);
1772:
1773: if (have_pseudoheader && m != 0)
1774: m = add_pseudoheader(header, m, ((unsigned char *) header) + udp_size, daemon->edns_pktsz,
1775: EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
1776:
1777: if (m >= 1)
1778: {
1779: #ifdef HAVE_DUMPFILE
1780: dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
1781: #endif
1782: send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1783: (char *)header, m, &source_addr, &dst_addr, if_index);
1784: daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
1785: }
1786: }
1787: #endif
1788: #ifdef HAVE_AUTH
1789: else if (auth_dns)
1790: {
1791: m = answer_auth(header, ((char *) header) + udp_size, (size_t)n, now, &source_addr,
1792: local_auth, do_bit, have_pseudoheader);
1793: if (m >= 1)
1794: {
1795: #ifdef HAVE_DUMPFILE
1796: dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
1797: #endif
1798: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
1799: if (local_auth)
1800: if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
1801: report_addresses(header, m, mark);
1802: #endif
1803: send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1804: (char *)header, m, &source_addr, &dst_addr, if_index);
1805: daemon->metrics[METRIC_DNS_AUTH_ANSWERED]++;
1806: }
1807: }
1808: #endif
1809: else
1810: {
1811: int stale;
1812: int ad_reqd = do_bit;
1813: u16 hb3 = header->hb3, hb4 = header->hb4;
1814: int fd = listen->fd;
1815:
1816: /* RFC 6840 5.7 */
1817: if (header->hb4 & HB4_AD)
1818: ad_reqd = 1;
1819:
1820: m = answer_request(header, ((char *) header) + udp_size, (size_t)n,
1821: dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader, &stale);
1822:
1823: if (m >= 1)
1824: {
1825: if (stale && have_pseudoheader)
1826: {
1827: u16 swap = htons(EDE_STALE);
1828:
1829: m = add_pseudoheader(header, m, ((unsigned char *) header) + udp_size, daemon->edns_pktsz,
1830: EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
1831: }
1832: #ifdef HAVE_DUMPFILE
1833: dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
1834: #endif
1835: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
1836: if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
1837: report_addresses(header, m, mark);
1838: #endif
1839: send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
1840: (char *)header, m, &source_addr, &dst_addr, if_index);
1841: daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
1842: if (stale)
1843: daemon->metrics[METRIC_DNS_STALE_ANSWERED]++;
1844: }
1845:
1846: if (m == 0 || stale)
1847: {
1848: if (m != 0)
1849: {
1850: size_t plen;
1851:
1852: /* We answered with stale cache data, so forward the query anyway to
1853: refresh that. Restore the query from the answer packet. */
1854: pheader = find_pseudoheader(header, (size_t)m, &plen, NULL, NULL, NULL);
1855:
1856: header->hb3 = hb3;
1857: header->hb4 = hb4;
1858: header->ancount = htons(0);
1859: header->nscount = htons(0);
1860: header->arcount = htons(0);
1861:
1862: m = resize_packet(header, m, pheader, plen);
1863:
1864: /* We've already answered the client, so don't send it the answer
1865: when it comes back. */
1866: fd = -1;
1867: }
1868:
1869: if (forward_query(fd, &source_addr, &dst_addr, if_index,
1870: header, (size_t)n, ((char *) header) + udp_size, now, NULL, ad_reqd, do_bit, 0))
1871: daemon->metrics[METRIC_DNS_QUERIES_FORWARDED]++;
1872: else
1873: daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
1874: }
1875: }
1876: }
1877:
1878: /* Send query in packet, qsize to a server determined by first,last,start and
1879: get the reply. return reply size. */
1880: static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet, size_t qsize,
1881: int have_mark, unsigned int mark, struct server **servp)
1882: {
1883: int firstsendto = -1;
1884: u16 *length = (u16 *)packet;
1885: unsigned char *payload = &packet[2];
1886: struct dns_header *header = (struct dns_header *)payload;
1887: unsigned char c1, c2;
1888: unsigned char hash[HASH_SIZE], *hashp;
1889: unsigned int rsize;
1890:
1891: (void)mark;
1892: (void)have_mark;
1893:
1894: if (!(hashp = hash_questions(header, (unsigned int)qsize, daemon->namebuff)))
1895: return 0;
1896:
1897: memcpy(hash, hashp, HASH_SIZE);
1898:
1899: while (1)
1900: {
1901: int data_sent = 0;
1902: struct server *serv;
1903:
1904: if (firstsendto == -1)
1905: firstsendto = start;
1906: else
1907: {
1908: start++;
1909:
1910: if (start == last)
1911: start = first;
1912:
1913: if (start == firstsendto)
1914: break;
1915: }
1916:
1917: serv = daemon->serverarray[start];
1918:
1919: retry:
1920: *length = htons(qsize);
1921:
1922: if (serv->tcpfd == -1)
1923: {
1924: if ((serv->tcpfd = socket(serv->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
1925: continue;
1926:
1927: #ifdef HAVE_CONNTRACK
1928: /* Copy connection mark of incoming query to outgoing connection. */
1929: if (have_mark)
1930: setsockopt(serv->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
1931: #endif
1932:
1933: if ((!local_bind(serv->tcpfd, &serv->source_addr, serv->interface, 0, 1)))
1934: {
1935: close(serv->tcpfd);
1936: serv->tcpfd = -1;
1937: continue;
1938: }
1939:
1940: #ifdef MSG_FASTOPEN
1941: server_send(serv, serv->tcpfd, packet, qsize + sizeof(u16), MSG_FASTOPEN);
1942:
1943: if (errno == 0)
1944: data_sent = 1;
1945: #endif
1946:
1947: if (!data_sent && connect(serv->tcpfd, &serv->addr.sa, sa_len(&serv->addr)) == -1)
1948: {
1949: close(serv->tcpfd);
1950: serv->tcpfd = -1;
1951: continue;
1952: }
1953:
1954: daemon->serverarray[first]->last_server = start;
1955: serv->flags &= ~SERV_GOT_TCP;
1956: }
1957:
1958: if ((!data_sent && !read_write(serv->tcpfd, packet, qsize + sizeof(u16), 0)) ||
1959: !read_write(serv->tcpfd, &c1, 1, 1) ||
1960: !read_write(serv->tcpfd, &c2, 1, 1) ||
1961: !read_write(serv->tcpfd, payload, (rsize = (c1 << 8) | c2), 1))
1962: {
1963: close(serv->tcpfd);
1964: serv->tcpfd = -1;
1965: /* We get data then EOF, reopen connection to same server,
1966: else try next. This avoids DoS from a server which accepts
1967: connections and then closes them. */
1968: if (serv->flags & SERV_GOT_TCP)
1969: goto retry;
1970: else
1971: continue;
1972: }
1973:
1974: /* If the hash of the question section doesn't match the crc we sent, then
1975: someone might be attempting to insert bogus values into the cache by
1976: sending replies containing questions and bogus answers.
1977: Try another server, or give up */
1978: if (!(hashp = hash_questions(header, rsize, daemon->namebuff)) || memcmp(hash, hashp, HASH_SIZE) != 0)
1979: continue;
1980:
1981: serv->flags |= SERV_GOT_TCP;
1982:
1983: *servp = serv;
1984: return rsize;
1985: }
1986:
1987: return 0;
1988: }
1989:
1990: #ifdef HAVE_DNSSEC
1991: /* Recurse down the key hierarchy */
1992: static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n,
1993: int class, char *name, char *keyname, struct server *server,
1994: int have_mark, unsigned int mark, int *keycount)
1995: {
1996: int first, last, start, new_status;
1997: unsigned char *packet = NULL;
1998: struct dns_header *new_header = NULL;
1999:
2000: while (1)
2001: {
2002: size_t m;
2003: int log_save;
2004:
2005: /* limit the amount of work we do, to avoid cycling forever on loops in the DNS */
2006: if (--(*keycount) == 0)
2007: new_status = STAT_ABANDONED;
2008: else if (STAT_ISEQUAL(status, STAT_NEED_KEY))
2009: new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class);
2010: else if (STAT_ISEQUAL(status, STAT_NEED_DS))
2011: new_status = dnssec_validate_ds(now, header, n, name, keyname, class);
2012: else
2013: new_status = dnssec_validate_reply(now, header, n, name, keyname, &class,
2014: !option_bool(OPT_DNSSEC_IGN_NS) && (server->flags & SERV_DO_DNSSEC),
2015: NULL, NULL, NULL);
2016:
2017: if (!STAT_ISEQUAL(new_status, STAT_NEED_DS) && !STAT_ISEQUAL(new_status, STAT_NEED_KEY))
2018: break;
2019:
2020: /* Can't validate because we need a key/DS whose name now in keyname.
2021: Make query for same, and recurse to validate */
2022: if (!packet)
2023: {
2024: packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
2025: new_header = (struct dns_header *)&packet[2];
2026: }
2027:
2028: if (!packet)
2029: {
2030: new_status = STAT_ABANDONED;
2031: break;
2032: }
2033:
2034: m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class,
2035: STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz);
2036:
2037: if ((start = dnssec_server(server, daemon->keyname, &first, &last)) == -1 ||
2038: (m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0)
2039: {
2040: new_status = STAT_ABANDONED;
2041: break;
2042: }
2043:
2044: log_save = daemon->log_display_id;
2045: daemon->log_display_id = ++daemon->log_id;
2046:
2047: log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, keyname, &server->addr,
2048: STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
2049:
2050: new_status = tcp_key_recurse(now, new_status, new_header, m, class, name, keyname, server, have_mark, mark, keycount);
2051:
2052: daemon->log_display_id = log_save;
2053:
2054: if (!STAT_ISEQUAL(new_status, STAT_OK))
2055: break;
2056: }
2057:
2058: if (packet)
2059: free(packet);
2060:
2061: return new_status;
2062: }
2063: #endif
2064:
2065:
2066: /* The daemon forks before calling this: it should deal with one connection,
2067: blocking as necessary, and then return. Note, need to be a bit careful
2068: about resources for debug mode, when the fork is suppressed: that's
2069: done by the caller. */
2070: unsigned char *tcp_request(int confd, time_t now,
2071: union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
2072: {
2073: size_t size = 0;
2074: int norebind;
2075: #ifdef HAVE_CONNTRACK
2076: int is_single_query = 0, allowed = 1;
2077: #endif
2078: #ifdef HAVE_AUTH
2079: int local_auth = 0;
2080: #endif
2081: int checking_disabled, do_bit, added_pheader = 0, have_pseudoheader = 0;
2082: int cacheable, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
2083: size_t m;
2084: unsigned short qtype;
2085: unsigned int gotname;
2086: /* Max TCP packet + slop + size */
2087: unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
2088: unsigned char *payload = &packet[2];
2089: unsigned char c1, c2;
2090: /* largest field in header is 16-bits, so this is still sufficiently aligned */
2091: struct dns_header *header = (struct dns_header *)payload;
2092: u16 *length = (u16 *)packet;
2093: struct server *serv;
2094: struct in_addr dst_addr_4;
2095: union mysockaddr peer_addr;
2096: socklen_t peer_len = sizeof(union mysockaddr);
2097: int query_count = 0;
2098: unsigned char *pheader;
2099: unsigned int mark = 0;
2100: int have_mark = 0;
2101: int first, last, stale, do_stale = 0;
2102: unsigned int flags = 0;
2103: u16 hb3, hb4;
2104:
2105: if (!packet || getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
2106: return packet;
2107:
2108: #ifdef HAVE_CONNTRACK
2109: /* Get connection mark of incoming query to set on outgoing connections. */
2110: if (option_bool(OPT_CONNTRACK) || option_bool(OPT_CMARK_ALST_EN))
2111: {
2112: union all_addr local;
2113:
2114: if (local_addr->sa.sa_family == AF_INET6)
2115: local.addr6 = local_addr->in6.sin6_addr;
2116: else
2117: local.addr4 = local_addr->in.sin_addr;
2118:
2119: have_mark = get_incoming_mark(&peer_addr, &local, 1, &mark);
2120: }
2121: #endif
2122:
2123: /* We can be configured to only accept queries from at-most-one-hop-away addresses. */
2124: if (option_bool(OPT_LOCAL_SERVICE))
2125: {
2126: struct addrlist *addr;
2127:
2128: if (peer_addr.sa.sa_family == AF_INET6)
2129: {
2130: for (addr = daemon->interface_addrs; addr; addr = addr->next)
2131: if ((addr->flags & ADDRLIST_IPV6) &&
2132: is_same_net6(&addr->addr.addr6, &peer_addr.in6.sin6_addr, addr->prefixlen))
2133: break;
2134: }
2135: else
2136: {
2137: struct in_addr netmask;
2138: for (addr = daemon->interface_addrs; addr; addr = addr->next)
2139: {
2140: netmask.s_addr = htonl(~(in_addr_t)0 << (32 - addr->prefixlen));
2141: if (!(addr->flags & ADDRLIST_IPV6) &&
2142: is_same_net(addr->addr.addr4, peer_addr.in.sin_addr, netmask))
2143: break;
2144: }
2145: }
2146: if (!addr)
2147: {
2148: prettyprint_addr(&peer_addr, daemon->addrbuff);
2149: my_syslog(LOG_WARNING, _("ignoring query from non-local network %s"), daemon->addrbuff);
2150: return packet;
2151: }
2152: }
2153:
2154: while (1)
2155: {
2156: int ede = EDE_UNSET;
2157:
2158: if (query_count == TCP_MAX_QUERIES)
2159: return packet;
2160:
2161: if (do_stale)
2162: {
2163: size_t plen;
2164:
2165: /* We answered the last query with stale data. Now try and get fresh data.
2166: Restore query from answer. */
2167: pheader = find_pseudoheader(header, m, &plen, NULL, NULL, NULL);
2168:
2169: header->hb3 = hb3;
2170: header->hb4 = hb4;
2171: header->ancount = htons(0);
2172: header->nscount = htons(0);
2173: header->arcount = htons(0);
2174:
2175: size = resize_packet(header, m, pheader, plen);
2176: }
2177: else
2178: {
2179: if (!read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
2180: !(size = c1 << 8 | c2) ||
2181: !read_write(confd, payload, size, 1))
2182: return packet;
2183:
2184: /* for stale-answer processing. */
2185: hb3 = header->hb3;
2186: hb4 = header->hb4;
2187: }
2188:
2189: if (size < (int)sizeof(struct dns_header))
2190: continue;
2191:
2192: /* Clear buffer beyond request to avoid risk of
2193: information disclosure. */
2194: memset(payload + size, 0, 65536 - size);
2195:
2196: query_count++;
2197:
2198: /* log_query gets called indirectly all over the place, so
2199: pass these in global variables - sorry. */
2200: daemon->log_display_id = ++daemon->log_id;
2201: daemon->log_source_addr = &peer_addr;
2202:
2203: /* save state of "cd" flag in query */
2204: if ((checking_disabled = header->hb4 & HB4_CD))
2205: no_cache_dnssec = 1;
2206:
2207: if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
2208: {
2209: #ifdef HAVE_AUTH
2210: struct auth_zone *zone;
2211: #endif
2212:
2213: #ifdef HAVE_CONNTRACK
2214: is_single_query = 1;
2215: #endif
2216:
2217: if (!do_stale)
2218: {
2219: log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
2220: &peer_addr, auth_dns ? "auth" : "query", qtype);
2221:
2222: #ifdef HAVE_AUTH
2223: /* find queries for zones we're authoritative for, and answer them directly */
2224: if (!auth_dns && !option_bool(OPT_LOCALISE))
2225: for (zone = daemon->auth_zones; zone; zone = zone->next)
2226: if (in_zone(zone, daemon->namebuff, NULL))
2227: {
2228: auth_dns = 1;
2229: local_auth = 1;
2230: break;
2231: }
2232: #endif
2233: }
2234: }
2235:
2236: norebind = domain_no_rebind(daemon->namebuff);
2237:
2238: if (local_addr->sa.sa_family == AF_INET)
2239: dst_addr_4 = local_addr->in.sin_addr;
2240: else
2241: dst_addr_4.s_addr = 0;
2242:
2243: do_bit = 0;
2244:
2245: if (find_pseudoheader(header, (size_t)size, NULL, &pheader, NULL, NULL))
2246: {
2247: unsigned short flags;
2248:
2249: have_pseudoheader = 1;
2250: pheader += 4; /* udp_size, ext_rcode */
2251: GETSHORT(flags, pheader);
2252:
2253: if (flags & 0x8000)
2254: do_bit = 1; /* do bit */
2255: }
2256:
2257: #ifdef HAVE_CONNTRACK
2258: #ifdef HAVE_AUTH
2259: if (!auth_dns || local_auth)
2260: #endif
2261: if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
2262: allowed = is_query_allowed_for_mark((u32)mark, is_single_query ? daemon->namebuff : NULL);
2263: #endif
2264:
2265: if (0);
2266: #ifdef HAVE_CONNTRACK
2267: else if (!allowed)
2268: {
2269: u16 swap = htons(EDE_BLOCKED);
2270:
2271: m = answer_disallowed(header, size, (u32)mark, is_single_query ? daemon->namebuff : NULL);
2272:
2273: if (have_pseudoheader && m != 0)
2274: m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz,
2275: EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
2276: }
2277: #endif
2278: #ifdef HAVE_AUTH
2279: else if (auth_dns)
2280: m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr,
2281: local_auth, do_bit, have_pseudoheader);
2282: #endif
2283: else
2284: {
2285: int ad_reqd = do_bit;
2286: /* RFC 6840 5.7 */
2287: if (header->hb4 & HB4_AD)
2288: ad_reqd = 1;
2289:
2290: if (do_stale)
2291: m = 0;
2292: else
2293: /* m > 0 if answered from cache */
2294: m = answer_request(header, ((char *) header) + 65536, (size_t)size,
2295: dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader, &stale);
2296:
2297: /* Do this by steam now we're not in the select() loop */
2298: check_log_writer(1);
2299:
2300: if (m == 0)
2301: {
2302: struct server *master;
2303: int start;
2304:
2305: if (lookup_domain(daemon->namebuff, gotname, &first, &last))
2306: flags = is_local_answer(now, first, daemon->namebuff);
2307: else
2308: {
2309: /* No configured servers */
2310: ede = EDE_NOT_READY;
2311: flags = 0;
2312: }
2313:
2314: /* don't forward A or AAAA queries for simple names, except the empty name */
2315: if (!flags &&
2316: option_bool(OPT_NODOTS_LOCAL) &&
2317: (gotname & (F_IPV4 | F_IPV6)) &&
2318: !strchr(daemon->namebuff, '.') &&
2319: strlen(daemon->namebuff) != 0)
2320: flags = check_for_local_domain(daemon->namebuff, now) ? F_NOERR : F_NXDOMAIN;
2321:
2322: if (!flags && ede != EDE_NOT_READY)
2323: {
2324: master = daemon->serverarray[first];
2325:
2326: if (option_bool(OPT_ORDER) || master->last_server == -1)
2327: start = first;
2328: else
2329: start = master->last_server;
2330:
2331: size = add_edns0_config(header, size, ((unsigned char *) header) + 65536, &peer_addr, now, &cacheable);
2332:
2333: #ifdef HAVE_DNSSEC
2334: if (option_bool(OPT_DNSSEC_VALID) && (master->flags & SERV_DO_DNSSEC))
2335: {
2336: size = add_do_bit(header, size, ((unsigned char *) header) + 65536);
2337:
2338: /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
2339: this allows it to select auth servers when one is returning bad data. */
2340: if (option_bool(OPT_DNSSEC_DEBUG))
2341: header->hb4 |= HB4_CD;
2342: }
2343: #endif
2344:
2345: /* Check if we added a pheader on forwarding - may need to
2346: strip it from the reply. */
2347: if (!have_pseudoheader && find_pseudoheader(header, size, NULL, NULL, NULL, NULL))
2348: added_pheader = 1;
2349:
2350: /* Loop round available servers until we succeed in connecting to one. */
2351: if ((m = tcp_talk(first, last, start, packet, size, have_mark, mark, &serv)) == 0)
2352: {
2353: ede = EDE_NETERR;
2354: break;
2355: }
2356:
2357: /* get query name again for logging - may have been overwritten */
2358: if (!(gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
2359: strcpy(daemon->namebuff, "query");
2360: log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff, &serv->addr, NULL, 0);
2361:
2362: #ifdef HAVE_DNSSEC
2363: if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled && (master->flags & SERV_DO_DNSSEC))
2364: {
2365: int keycount = DNSSEC_WORK; /* Limit to number of DNSSEC questions, to catch loops and avoid filling cache. */
2366: int status = tcp_key_recurse(now, STAT_OK, header, m, 0, daemon->namebuff, daemon->keyname,
2367: serv, have_mark, mark, &keycount);
2368: char *result, *domain = "result";
2369:
2370: union all_addr a;
2371: a.log.ede = ede = errflags_to_ede(status);
2372:
2373: if (STAT_ISEQUAL(status, STAT_ABANDONED))
2374: {
2375: result = "ABANDONED";
2376: status = STAT_BOGUS;
2377: }
2378: else
2379: result = (STAT_ISEQUAL(status, STAT_SECURE) ? "SECURE" : (STAT_ISEQUAL(status, STAT_INSECURE) ? "INSECURE" : "BOGUS"));
2380:
2381: if (STAT_ISEQUAL(status, STAT_SECURE))
2382: cache_secure = 1;
2383: else if (STAT_ISEQUAL(status, STAT_BOGUS))
2384: {
2385: no_cache_dnssec = 1;
2386: bogusanswer = 1;
2387:
2388: if (extract_request(header, m, daemon->namebuff, NULL))
2389: domain = daemon->namebuff;
2390: }
2391:
2392: log_query(F_SECSTAT, domain, &a, result, 0);
2393: }
2394: #endif
2395:
2396: /* restore CD bit to the value in the query */
2397: if (checking_disabled)
2398: header->hb4 |= HB4_CD;
2399: else
2400: header->hb4 &= ~HB4_CD;
2401:
2402: /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
2403: since the cache is ignorant of such things. */
2404: if (!cacheable)
2405: no_cache_dnssec = 1;
2406:
2407: m = process_reply(header, now, serv, (unsigned int)m,
2408: option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec, cache_secure, bogusanswer,
2409: ad_reqd, do_bit, added_pheader, &peer_addr, ((unsigned char *)header) + 65536, ede);
2410: }
2411: }
2412: }
2413:
2414: if (do_stale)
2415: break;
2416:
2417: /* In case of local answer or no connections made. */
2418: if (m == 0)
2419: {
2420: if (!(m = make_local_answer(flags, gotname, size, header, daemon->namebuff,
2421: ((char *) header) + 65536, first, last, ede)))
2422: break;
2423:
2424: if (have_pseudoheader)
2425: {
2426: u16 swap = htons((u16)ede);
2427:
2428: if (ede != EDE_UNSET)
2429: m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
2430: else
2431: m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
2432: }
2433: }
2434: else if (stale)
2435: {
2436: u16 swap = htons((u16)EDE_STALE);
2437:
2438: m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
2439: }
2440:
2441: check_log_writer(1);
2442:
2443: *length = htons(m);
2444:
2445: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
2446: #ifdef HAVE_AUTH
2447: if (!auth_dns || local_auth)
2448: #endif
2449: if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
2450: report_addresses(header, m, mark);
2451: #endif
2452: if (!read_write(confd, packet, m + sizeof(u16), 0))
2453: break;
2454:
2455: /* If we answered with stale data, this process will now try and get fresh data into
2456: the cache then and cannot therefore accept new queries. Close the incoming
2457: connection to signal that to the client. Then set do_stale and loop round
2458: once more to try and get fresh data, after which we exit. */
2459: if (stale)
2460: {
2461: shutdown(confd, SHUT_RDWR);
2462: close(confd);
2463: do_stale = 1;
2464: }
2465: }
2466:
2467: /* If we ran once to get fresh data, confd is already closed. */
2468: if (!do_stale)
2469: {
2470: shutdown(confd, SHUT_RDWR);
2471: close(confd);
2472: }
2473:
2474: return packet;
2475: }
2476:
2477: /* return a UDP socket bound to a random port, have to cope with straying into
2478: occupied port nos and reserved ones. */
2479: static int random_sock(struct server *s)
2480: {
2481: int fd;
2482:
2483: if ((fd = socket(s->source_addr.sa.sa_family, SOCK_DGRAM, 0)) != -1)
2484: {
2485: /* We need to set IPV6ONLY so we can use the same ports
2486: for IPv4 and IPV6, otherwise, in restriced port situations,
2487: we can end up with all our available ports in use for
2488: one address family, and the other address family cannot be used. */
2489: if (s->source_addr.sa.sa_family == AF_INET6)
2490: {
2491: int opt = 1;
2492:
2493: if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof(opt)) == -1)
2494: {
2495: close(fd);
2496: return -1;
2497: }
2498: }
2499:
2500: if (local_bind(fd, &s->source_addr, s->interface, s->ifindex, 0))
2501: return fd;
2502:
2503: /* don't log errors due to running out of available ports, we handle those. */
2504: if (!sockaddr_isnull(&s->source_addr) || errno != EADDRINUSE)
2505: {
2506: if (s->interface[0] == 0)
2507: (void)prettyprint_addr(&s->source_addr, daemon->addrbuff);
2508: else
2509: safe_strncpy(daemon->addrbuff, s->interface, ADDRSTRLEN);
2510:
2511: my_syslog(LOG_ERR, _("failed to bind server socket to %s: %s"),
2512: daemon->addrbuff, strerror(errno));
2513: }
2514:
2515: close(fd);
2516: }
2517:
2518: return -1;
2519: }
2520:
2521: /* compare source addresses and interface, serv2 can be null. */
2522: static int server_isequal(const struct server *serv1,
2523: const struct server *serv2)
2524: {
2525: return (serv2 &&
2526: serv2->ifindex == serv1->ifindex &&
2527: sockaddr_isequal(&serv2->source_addr, &serv1->source_addr) &&
2528: strncmp(serv2->interface, serv1->interface, IF_NAMESIZE) == 0);
2529: }
2530:
2531: /* fdlp points to chain of randomfds already in use by transaction.
2532: If there's already a suitable one, return it, else allocate a
2533: new one and add it to the list.
2534:
2535: Not leaking any resources in the face of allocation failures
2536: is rather convoluted here.
2537:
2538: Note that rfd->serv may be NULL, when a server goes away.
2539: */
2540: int allocate_rfd(struct randfd_list **fdlp, struct server *serv)
2541: {
2542: static int finger = 0;
2543: int i, j = 0;
2544: int ports_full = 0;
2545: struct randfd_list **up, *rfl, *found, **found_link;
2546: struct randfd *rfd = NULL;
2547: int fd = 0;
2548: int ports_avail = 0;
2549:
2550: /* We can't have more randomsocks for this AF available than ports in our port range,
2551: so check that here, to avoid trying and failing to bind every port
2552: in local_bind(), called from random_sock(). The actual check is below when
2553: ports_avail != 0 */
2554: if (daemon->max_port != 0)
2555: {
2556: ports_avail = daemon->max_port - daemon->min_port + 1;
2557: if (ports_avail >= SMALL_PORT_RANGE)
2558: ports_avail = 0;
2559: }
2560:
2561: /* If server has a pre-allocated fd, use that. */
2562: if (serv->sfd)
2563: return serv->sfd->fd;
2564:
2565: /* existing suitable random port socket linked to this transaction?
2566: Find the last one in the list and count how many there are. */
2567: for (found = NULL, found_link = NULL, i = 0, up = fdlp, rfl = *fdlp; rfl; up = &rfl->next, rfl = rfl->next)
2568: if (server_isequal(serv, rfl->rfd->serv))
2569: {
2570: i++;
2571: found = rfl;
2572: found_link = up;
2573: }
2574:
2575: /* We have the maximum number for this query already. Promote
2576: the last one on the list to the head, to circulate them,
2577: and return it. */
2578: if (found && i >= daemon->randport_limit)
2579: {
2580: *found_link = found->next;
2581: found->next = *fdlp;
2582: *fdlp = found;
2583: return found->rfd->fd;
2584: }
2585:
2586: /* check for all available ports in use. */
2587: if (ports_avail != 0)
2588: {
2589: int ports_inuse;
2590:
2591: for (ports_inuse = 0, i = 0; i < daemon->numrrand; i++)
2592: if (daemon->randomsocks[i].refcount != 0 &&
2593: daemon->randomsocks[i].serv->source_addr.sa.sa_family == serv->source_addr.sa.sa_family &&
2594: ++ports_inuse >= ports_avail)
2595: {
2596: ports_full = 1;
2597: break;
2598: }
2599: }
2600:
2601: /* limit the number of sockets we have open to avoid starvation of
2602: (eg) TFTP. Once we have a reasonable number, randomness should be OK */
2603: if (!ports_full)
2604: for (i = 0; i < daemon->numrrand; i++)
2605: if (daemon->randomsocks[i].refcount == 0)
2606: {
2607: if ((fd = random_sock(serv)) != -1)
2608: {
2609: rfd = &daemon->randomsocks[i];
2610: rfd->serv = serv;
2611: rfd->fd = fd;
2612: rfd->refcount = 1;
2613: }
2614: break;
2615: }
2616:
2617: /* No good existing. Need new link. */
2618: if ((rfl = daemon->rfl_spare))
2619: daemon->rfl_spare = rfl->next;
2620: else if (!(rfl = whine_malloc(sizeof(struct randfd_list))))
2621: {
2622: /* malloc failed, don't leak allocated sock */
2623: if (rfd)
2624: {
2625: close(rfd->fd);
2626: rfd->refcount = 0;
2627: }
2628:
2629: return -1;
2630: }
2631:
2632: /* No free ones or cannot get new socket, grab an existing one */
2633: if (!rfd)
2634: for (j = 0; j < daemon->numrrand; j++)
2635: {
2636: i = (j + finger) % daemon->numrrand;
2637: if (daemon->randomsocks[i].refcount != 0 &&
2638: server_isequal(serv, daemon->randomsocks[i].serv) &&
2639: daemon->randomsocks[i].refcount != 0xfffe)
2640: {
2641: struct randfd_list *rl;
2642: /* Don't pick one we already have. */
2643: for (rl = *fdlp; rl; rl = rl->next)
2644: if (rl->rfd == &daemon->randomsocks[i])
2645: break;
2646:
2647: if (!rl)
2648: {
2649: finger = i + 1;
2650: rfd = &daemon->randomsocks[i];
2651: rfd->refcount++;
2652: break;
2653: }
2654: }
2655: }
2656:
2657: if (!rfd) /* should be when j == daemon->numrrand */
2658: {
2659: struct randfd_list *rfl_poll;
2660:
2661: /* there are no free slots, and non with the same parameters we can piggy-back on.
2662: We're going to have to allocate a new temporary record, distinguished by
2663: refcount == 0xffff. This will exist in the frec randfd list, never be shared,
2664: and be freed when no longer in use. It will also be held on
2665: the daemon->rfl_poll list so the poll system can find it. */
2666:
2667: if ((rfl_poll = daemon->rfl_spare))
2668: daemon->rfl_spare = rfl_poll->next;
2669: else
2670: rfl_poll = whine_malloc(sizeof(struct randfd_list));
2671:
2672: if (!rfl_poll ||
2673: !(rfd = whine_malloc(sizeof(struct randfd))) ||
2674: (fd = random_sock(serv)) == -1)
2675: {
2676:
2677: /* Don't leak anything we may already have */
2678: rfl->next = daemon->rfl_spare;
2679: daemon->rfl_spare = rfl;
2680:
2681: if (rfl_poll)
2682: {
2683: rfl_poll->next = daemon->rfl_spare;
2684: daemon->rfl_spare = rfl_poll;
2685: }
2686:
2687: if (rfd)
2688: free(rfd);
2689:
2690: return -1; /* doom */
2691: }
2692:
2693: /* Note rfd->serv not set here, since it's not reused */
2694: rfd->fd = fd;
2695: rfd->refcount = 0xffff; /* marker for temp record */
2696:
2697: rfl_poll->rfd = rfd;
2698: rfl_poll->next = daemon->rfl_poll;
2699: daemon->rfl_poll = rfl_poll;
2700: }
2701:
2702: rfl->rfd = rfd;
2703: rfl->next = *fdlp;
2704: *fdlp = rfl;
2705:
2706: return rfl->rfd->fd;
2707: }
2708:
2709: void free_rfds(struct randfd_list **fdlp)
2710: {
2711: struct randfd_list *tmp, *rfl, *poll, *next, **up;
2712:
2713: for (rfl = *fdlp; rfl; rfl = tmp)
2714: {
2715: if (rfl->rfd->refcount == 0xffff || --(rfl->rfd->refcount) == 0)
2716: close(rfl->rfd->fd);
2717:
2718: /* temporary overflow record */
2719: if (rfl->rfd->refcount == 0xffff)
2720: {
2721: free(rfl->rfd);
2722:
2723: /* go through the link of all these by steam to delete.
2724: This list is expected to be almost always empty. */
2725: for (poll = daemon->rfl_poll, up = &daemon->rfl_poll; poll; poll = next)
2726: {
2727: next = poll->next;
2728:
2729: if (poll->rfd == rfl->rfd)
2730: {
2731: *up = poll->next;
2732: poll->next = daemon->rfl_spare;
2733: daemon->rfl_spare = poll;
2734: }
2735: else
2736: up = &poll->next;
2737: }
2738: }
2739:
2740: tmp = rfl->next;
2741: rfl->next = daemon->rfl_spare;
2742: daemon->rfl_spare = rfl;
2743: }
2744:
2745: *fdlp = NULL;
2746: }
2747:
2748: static void free_frec(struct frec *f)
2749: {
2750: struct frec_src *last;
2751:
2752: /* add back to freelist if not the record builtin to every frec. */
2753: for (last = f->frec_src.next; last && last->next; last = last->next) ;
2754: if (last)
2755: {
2756: last->next = daemon->free_frec_src;
2757: daemon->free_frec_src = f->frec_src.next;
2758: }
2759:
2760: f->frec_src.next = NULL;
2761: free_rfds(&f->rfds);
2762: f->sentto = NULL;
2763: f->flags = 0;
2764:
2765: if (f->stash)
2766: {
2767: blockdata_free(f->stash);
2768: f->stash = NULL;
2769: }
2770:
2771: #ifdef HAVE_DNSSEC
2772: /* Anything we're waiting on is pointless now, too */
2773: if (f->blocking_query)
2774: {
2775: struct frec *n, **up;
2776:
2777: /* unlink outselves from the blocking query's dependents list. */
2778: for (n = f->blocking_query->dependent, up = &f->blocking_query->dependent; n; n = n->next_dependent)
2779: if (n == f)
2780: {
2781: *up = n->next_dependent;
2782: break;
2783: }
2784: else
2785: up = &n->next_dependent;
2786:
2787: /* If we were the only/last dependent, free the blocking query too. */
2788: if (!f->blocking_query->dependent)
2789: free_frec(f->blocking_query);
2790: }
2791:
2792: f->blocking_query = NULL;
2793: f->dependent = NULL;
2794: f->next_dependent = NULL;
2795: #endif
2796: }
2797:
2798:
2799:
2800: /* Impose an absolute
2801: limit of 4*TIMEOUT before we wipe things (for random sockets).
2802: If force is set, always return a result, even if we have
2803: to allocate above the limit, and don'y free any records.
2804: This is set when allocating for DNSSEC to avoid cutting off
2805: the branch we are sitting on. */
2806: static struct frec *get_new_frec(time_t now, struct server *master, int force)
2807: {
2808: struct frec *f, *oldest, *target;
2809: int count;
2810:
2811: /* look for free records, garbage collect old records and count number in use by our server-group. */
2812: for (f = daemon->frec_list, oldest = NULL, target = NULL, count = 0; f; f = f->next)
2813: {
2814: if (!f->sentto)
2815: target = f;
2816: else
2817: {
2818: #ifdef HAVE_DNSSEC
2819: /* Don't free DNSSEC sub-queries here, as we may end up with
2820: dangling references to them. They'll go when their "real" query
2821: is freed. */
2822: if (!f->dependent && !force)
2823: #endif
2824: {
2825: if (difftime(now, f->time) >= 4*TIMEOUT)
2826: {
2827: daemon->metrics[METRIC_DNS_UNANSWERED_QUERY]++;
2828: free_frec(f);
2829: target = f;
2830: }
2831: else if (!oldest || difftime(f->time, oldest->time) <= 0)
2832: oldest = f;
2833: }
2834: }
2835:
2836: if (f->sentto && ((int)difftime(now, f->time)) < TIMEOUT && server_samegroup(f->sentto, master))
2837: count++;
2838: }
2839:
2840: if (!force && count >= daemon->ftabsize)
2841: {
2842: query_full(now, master->domain);
2843: return NULL;
2844: }
2845:
2846: if (!target && oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
2847: {
2848: /* can't find empty one, use oldest if there is one and it's older than timeout */
2849: daemon->metrics[METRIC_DNS_UNANSWERED_QUERY]++;
2850: free_frec(oldest);
2851: target = oldest;
2852: }
2853:
2854: if (!target && (target = (struct frec *)whine_malloc(sizeof(struct frec))))
2855: {
2856: target->next = daemon->frec_list;
2857: daemon->frec_list = target;
2858: }
2859:
2860: if (target)
2861: {
2862: target->time = now;
2863: target->forward_delay = daemon->fast_retry_time;
2864: }
2865:
2866: return target;
2867: }
2868:
2869: static void query_full(time_t now, char *domain)
2870: {
2871: static time_t last_log = 0;
2872:
2873: if ((int)difftime(now, last_log) > 5)
2874: {
2875: last_log = now;
2876: if (!domain || strlen(domain) == 0)
2877: my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
2878: else
2879: my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries to %s reached (max: %d)"), domain, daemon->ftabsize);
2880: }
2881: }
2882:
2883:
2884: static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp)
2885: {
2886: struct frec *f;
2887: struct server *s;
2888: int first, last;
2889: struct randfd_list *fdl;
2890:
2891: if (hash)
2892: for (f = daemon->frec_list; f; f = f->next)
2893: if (f->sentto && f->new_id == id &&
2894: (memcmp(hash, f->hash, HASH_SIZE) == 0))
2895: {
2896: filter_servers(f->sentto->arrayposn, F_SERVER, firstp, lastp);
2897:
2898: /* sent from random port */
2899: for (fdl = f->rfds; fdl; fdl = fdl->next)
2900: if (fdl->rfd->fd == fd)
2901: return f;
2902:
2903: /* Sent to upstream from socket associated with a server.
2904: Note we have to iterate over all the possible servers, since they may
2905: have different bound sockets. */
2906: for (first = *firstp, last = *lastp; first != last; first++)
2907: {
2908: s = daemon->serverarray[first];
2909: if (s->sfd && s->sfd->fd == fd)
2910: return f;
2911: }
2912: }
2913:
2914: return NULL;
2915: }
2916:
2917: static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigned int flagmask)
2918: {
2919: struct frec *f;
2920:
2921: if (hash)
2922: for (f = daemon->frec_list; f; f = f->next)
2923: if (f->sentto &&
2924: (f->flags & flagmask) == flags &&
2925: memcmp(hash, f->hash, HASH_SIZE) == 0)
2926: return f;
2927:
2928: return NULL;
2929: }
2930:
2931: #ifdef HAVE_DNSSEC
2932: /* DNSSEC frecs have the complete query in the block stash.
2933: Search for an existing query using that. */
2934: static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header)
2935: {
2936: struct frec *f;
2937:
2938: for (f = daemon->frec_list; f; f = f->next)
2939: if (f->sentto &&
2940: (f->flags & flags) &&
2941: blockdata_retrieve(f->stash, f->stash_len, (void *)header))
2942: {
2943: unsigned char *p = (unsigned char *)(header+1);
2944: int hclass;
2945:
2946: if (extract_name(header, f->stash_len, &p, target, 0, 4) != 1)
2947: continue;
2948:
2949: p += 2; /* type, known from flags */
2950: GETSHORT(hclass, p);
2951:
2952: if (class != hclass)
2953: continue;
2954:
2955: return f;
2956: }
2957:
2958: return NULL;
2959: }
2960: #endif
2961:
2962: /* Send query packet again, if we can. */
2963: void resend_query()
2964: {
2965: if (daemon->srv_save)
2966: server_send(daemon->srv_save, daemon->fd_save,
2967: daemon->packet, daemon->packet_len, 0);
2968: }
2969:
2970: /* A server record is going away, remove references to it */
2971: void server_gone(struct server *server)
2972: {
2973: struct frec *f;
2974: int i;
2975:
2976: for (f = daemon->frec_list; f; f = f->next)
2977: if (f->sentto && f->sentto == server)
2978: free_frec(f);
2979:
2980: /* If any random socket refers to this server, NULL the reference.
2981: No more references to the socket will be created in the future. */
2982: for (i = 0; i < daemon->numrrand; i++)
2983: if (daemon->randomsocks[i].refcount != 0 && daemon->randomsocks[i].serv == server)
2984: daemon->randomsocks[i].serv = NULL;
2985:
2986: if (daemon->srv_save == server)
2987: daemon->srv_save = NULL;
2988: }
2989:
2990: /* return unique random ids. */
2991: static unsigned short get_id(void)
2992: {
2993: unsigned short ret = 0;
2994: struct frec *f;
2995:
2996: while (1)
2997: {
2998: ret = rand16();
2999:
3000: /* ensure id is unique. */
3001: for (f = daemon->frec_list; f; f = f->next)
3002: if (f->sentto && f->new_id == ret)
3003: break;
3004:
3005: if (!f)
3006: return ret;
3007: }
3008: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>