File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / dnsmasq / src / forward.c
Revision 1.1.1.5 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Sep 27 11:02:07 2023 UTC (9 months, 1 week ago) by misho
Branches: elwix, dnsmasq, MAIN
CVS tags: v8_2p1, HEAD
Version 8.2p1

    1: /* dnsmasq is Copyright (c) 2000-2022 Simon Kelley
    2: 
    3:    This program is free software; you can redistribute it and/or modify
    4:    it under the terms of the GNU General Public License as published by
    5:    the Free Software Foundation; version 2 dated June, 1991, or
    6:    (at your option) version 3 dated 29 June, 2007.
    7:  
    8:    This program is distributed in the hope that it will be useful,
    9:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   10:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11:    GNU General Public License for more details.
   12:      
   13:    You should have received a copy of the GNU General Public License
   14:    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   15: */
   16: 
   17: #include "dnsmasq.h"
   18: 
   19: static struct frec *get_new_frec(time_t now, struct server *serv, int force);
   20: static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp);
   21: static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigned int flagmask);
   22: #ifdef HAVE_DNSSEC
   23: static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header);
   24: #endif
   25: 
   26: static unsigned short get_id(void);
   27: static void free_frec(struct frec *f);
   28: static void query_full(time_t now, char *domain);
   29: 
   30: static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status);
   31: 
   32: /* Send a UDP packet with its source address set as "source" 
   33:    unless nowild is true, when we just send it with the kernel default */
   34: int send_from(int fd, int nowild, char *packet, size_t len, 
   35: 	      union mysockaddr *to, union all_addr *source,
   36: 	      unsigned int iface)
   37: {
   38:   struct msghdr msg;
   39:   struct iovec iov[1]; 
   40:   union {
   41:     struct cmsghdr align; /* this ensures alignment */
   42: #if defined(HAVE_LINUX_NETWORK)
   43:     char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
   44: #elif defined(IP_SENDSRCADDR)
   45:     char control[CMSG_SPACE(sizeof(struct in_addr))];
   46: #endif
   47:     char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
   48:   } control_u;
   49:   
   50:   iov[0].iov_base = packet;
   51:   iov[0].iov_len = len;
   52: 
   53:   msg.msg_control = NULL;
   54:   msg.msg_controllen = 0;
   55:   msg.msg_flags = 0;
   56:   msg.msg_name = to;
   57:   msg.msg_namelen = sa_len(to);
   58:   msg.msg_iov = iov;
   59:   msg.msg_iovlen = 1;
   60:   
   61:   if (!nowild)
   62:     {
   63:       struct cmsghdr *cmptr;
   64:       msg.msg_control = &control_u;
   65:       msg.msg_controllen = sizeof(control_u);
   66:       cmptr = CMSG_FIRSTHDR(&msg);
   67: 
   68:       if (to->sa.sa_family == AF_INET)
   69: 	{
   70: #if defined(HAVE_LINUX_NETWORK)
   71: 	  struct in_pktinfo p;
   72: 	  p.ipi_ifindex = 0;
   73: 	  p.ipi_spec_dst = source->addr4;
   74: 	  msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
   75: 	  memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
   76: 	  cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
   77: 	  cmptr->cmsg_level = IPPROTO_IP;
   78: 	  cmptr->cmsg_type = IP_PKTINFO;
   79: #elif defined(IP_SENDSRCADDR)
   80: 	  msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
   81: 	  memcpy(CMSG_DATA(cmptr), &(source->addr4), sizeof(source->addr4));
   82: 	  cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
   83: 	  cmptr->cmsg_level = IPPROTO_IP;
   84: 	  cmptr->cmsg_type = IP_SENDSRCADDR;
   85: #endif
   86: 	}
   87:       else
   88: 	{
   89: 	  struct in6_pktinfo p;
   90: 	  p.ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
   91: 	  p.ipi6_addr = source->addr6;
   92: 	  msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
   93: 	  memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
   94: 	  cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
   95: 	  cmptr->cmsg_type = daemon->v6pktinfo;
   96: 	  cmptr->cmsg_level = IPPROTO_IPV6;
   97: 	}
   98:     }
   99:   
  100:   while (retry_send(sendmsg(fd, &msg, 0)));
  101: 
  102:   if (errno != 0)
  103:     {
  104: #ifdef HAVE_LINUX_NETWORK
  105:       /* If interface is still in DAD, EINVAL results - ignore that. */
  106:       if (errno != EINVAL)
  107: 	my_syslog(LOG_ERR, _("failed to send packet: %s"), strerror(errno));
  108: #endif
  109:       return 0;
  110:     }
  111:   
  112:   return 1;
  113: }
  114:           
  115: #ifdef HAVE_CONNTRACK
  116: static void set_outgoing_mark(struct frec *forward, int fd)
  117: {
  118:   /* Copy connection mark of incoming query to outgoing connection. */
  119:   unsigned int mark;
  120:   if (get_incoming_mark(&forward->frec_src.source, &forward->frec_src.dest, 0, &mark))
  121:     setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
  122: }
  123: #endif
  124: 
  125: static void log_query_mysockaddr(unsigned int flags, char *name, union mysockaddr *addr, char *arg, unsigned short type)
  126: {
  127:   if (addr->sa.sa_family == AF_INET)
  128:     {
  129:       if (flags & F_SERVER)
  130: 	type = ntohs(addr->in.sin_port);
  131:       log_query(flags | F_IPV4, name, (union all_addr *)&addr->in.sin_addr, arg, type);
  132:     }
  133:   else
  134:     {
  135:       if (flags & F_SERVER)
  136: 	type = ntohs(addr->in6.sin6_port);
  137:       log_query(flags | F_IPV6, name, (union all_addr *)&addr->in6.sin6_addr, arg, type);
  138:     }
  139: }
  140: 
  141: static void server_send(struct server *server, int fd,
  142: 			const void *header, size_t plen, int flags)
  143: {
  144:   while (retry_send(sendto(fd, header, plen, flags,
  145: 			   &server->addr.sa,
  146: 			   sa_len(&server->addr))));
  147: }
  148: 
  149: static int domain_no_rebind(char *domain)
  150: {
  151:   struct rebind_domain *rbd;
  152:   size_t tlen, dlen = strlen(domain);
  153:   char *dots = strchr(domain, '.');
  154: 
  155:   /* Match whole labels only. Empty domain matches no dots (any single label) */
  156:   for (rbd = daemon->no_rebind; rbd; rbd = rbd->next)
  157:     {
  158:       if (dlen >= (tlen = strlen(rbd->domain)) &&
  159: 	hostname_isequal(rbd->domain, &domain[dlen - tlen]) &&
  160: 	(dlen == tlen || domain[dlen - tlen - 1] == '.'))
  161:       return 1;
  162: 
  163:       if (tlen == 0 && !dots)
  164: 	return 1;
  165:     }
  166:   
  167:   return 0;
  168: }
  169: 
  170: static int forward_query(int udpfd, union mysockaddr *udpaddr,
  171: 			 union all_addr *dst_addr, unsigned int dst_iface,
  172: 			 struct dns_header *header, size_t plen,  char *limit, time_t now, 
  173: 			 struct frec *forward, int ad_reqd, int do_bit, int fast_retry)
  174: {
  175:   unsigned int flags = 0;
  176:   unsigned int fwd_flags = 0;
  177:   int is_dnssec = forward && (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY));
  178:   struct server *master;
  179:   void *hash = hash_questions(header, plen, daemon->namebuff);
  180:   unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
  181:   unsigned char *oph = find_pseudoheader(header, plen, NULL, NULL, NULL, NULL);
  182:   int old_src = 0, old_reply = 0;
  183:   int first, last, start = 0;
  184:   int cacheable, forwarded = 0;
  185:   size_t edns0_len;
  186:   unsigned char *pheader;
  187:   int ede = EDE_UNSET;
  188:   (void)do_bit;
  189:   
  190:   if (header->hb4 & HB4_CD)
  191:     fwd_flags |= FREC_CHECKING_DISABLED;
  192:   if (ad_reqd)
  193:     fwd_flags |= FREC_AD_QUESTION;
  194:   if (oph)
  195:     fwd_flags |= FREC_HAS_PHEADER;
  196: #ifdef HAVE_DNSSEC
  197:   if (do_bit)
  198:     fwd_flags |= FREC_DO_QUESTION;
  199: #endif
  200:   
  201:   /* Check for retry on existing query.
  202:      FREC_DNSKEY and FREC_DS_QUERY are never set in flags, so the test below 
  203:      ensures that no frec created for internal DNSSEC query can be returned here.
  204:      
  205:      Similarly FREC_NO_CACHE is never set in flags, so a query which is
  206:      contigent on a particular source address EDNS0 option will never be matched. */
  207:   if (forward)
  208:     {
  209:       old_src = 1;
  210:       old_reply = 1;
  211:     }
  212:   else if ((forward = lookup_frec_by_query(hash, fwd_flags,
  213: 					   FREC_CHECKING_DISABLED | FREC_AD_QUESTION | FREC_DO_QUESTION |
  214: 					   FREC_HAS_PHEADER | FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_NO_CACHE)))
  215:     {
  216:       struct frec_src *src;
  217:       
  218:       for (src = &forward->frec_src; src; src = src->next)
  219: 	if (src->orig_id == ntohs(header->id) && 
  220: 	    sockaddr_isequal(&src->source, udpaddr))
  221: 	  break;
  222:       
  223:       if (src)
  224: 	{
  225: 	  old_src = 1;
  226: 	  /* If a query is retried, use the log_id for the retry when logging the answer. */
  227: 	  src->log_id = daemon->log_id;
  228: 	}
  229:       else
  230: 	{
  231: 	  /* Existing query, but from new source, just add this 
  232: 	     client to the list that will get the reply.*/
  233: 	  
  234: 	  /* Note whine_malloc() zeros memory. */
  235: 	  if (!daemon->free_frec_src &&
  236: 	      daemon->frec_src_count < daemon->ftabsize &&
  237: 	      (daemon->free_frec_src = whine_malloc(sizeof(struct frec_src))))
  238: 	    {
  239: 	      daemon->frec_src_count++;
  240: 	      daemon->free_frec_src->next = NULL;
  241: 	    }
  242: 	  
  243: 	  /* If we've been spammed with many duplicates, return REFUSED. */
  244: 	  if (!daemon->free_frec_src)
  245: 	    {
  246: 	      query_full(now, NULL);
  247: 	      /* This is tricky; if we're blasted with the same query
  248: 		 over and over, we'll end up taking this path each time
  249: 		 and never resetting until the frec gets deleted by
  250: 		 aging followed by the receipt of a different query. This
  251: 		 is a bit of a DoS vuln. Avoid by explicitly deleting the
  252: 		 frec once it expires. */
  253: 	      if (difftime(now, forward->time) >= TIMEOUT)
  254: 		free_frec(forward);
  255: 	      goto reply;
  256: 	    }
  257: 	  
  258: 	  src = daemon->free_frec_src;
  259: 	  daemon->free_frec_src = src->next;
  260: 	  src->next = forward->frec_src.next;
  261: 	  forward->frec_src.next = src;
  262: 	  src->orig_id = ntohs(header->id);
  263: 	  src->source = *udpaddr;
  264: 	  src->dest = *dst_addr;
  265: 	  src->log_id = daemon->log_id;
  266: 	  src->iface = dst_iface;
  267: 	  src->fd = udpfd;
  268: 
  269: 	  /* closely spaced identical queries cannot be a try and a retry, so
  270: 	     it's safe to wait for the reply from the first without
  271: 	     forwarding the second. */
  272: 	  if (difftime(now, forward->time) < 2)
  273: 	    return 0;
  274: 	}
  275:     }
  276: 
  277:   /* new query */
  278:   if (!forward)
  279:     {
  280:       /* If the query is malformed, we can't forward it because
  281: 	 we can't get a reliable hash to recognise the answer. */
  282:       if (!hash)
  283: 	{
  284: 	  flags = 0;
  285: 	  ede = EDE_INVALID_DATA;
  286: 	  goto reply;
  287: 	}
  288:       
  289:       if (lookup_domain(daemon->namebuff, gotname, &first, &last))
  290: 	flags = is_local_answer(now, first, daemon->namebuff);
  291:       else
  292: 	{
  293: 	  /* no available server. */
  294: 	  ede = EDE_NOT_READY;
  295: 	  flags = 0;
  296: 	}
  297:        
  298:       /* don't forward A or AAAA queries for simple names, except the empty name */
  299:       if (!flags &&
  300: 	  option_bool(OPT_NODOTS_LOCAL) &&
  301: 	  (gotname & (F_IPV4 | F_IPV6)) &&
  302: 	  !strchr(daemon->namebuff, '.') &&
  303: 	  strlen(daemon->namebuff) != 0)
  304: 	flags = check_for_local_domain(daemon->namebuff, now) ? F_NOERR : F_NXDOMAIN;
  305:       
  306:       /* Configured answer. */
  307:       if (flags || ede == EDE_NOT_READY)
  308: 	goto reply;
  309:       
  310:       master = daemon->serverarray[first];
  311:       
  312:       if (!(forward = get_new_frec(now, master, 0)))
  313: 	goto reply;
  314:       /* table full - flags == 0, return REFUSED */
  315:       
  316:       /* Keep copy of query if we're doing fast retry. */
  317:       if (daemon->fast_retry_time != 0)
  318: 	{
  319: 	  forward->stash = blockdata_alloc((char *)header, plen);
  320: 	  forward->stash_len = plen;
  321: 	}
  322:       
  323:       forward->frec_src.log_id = daemon->log_id;
  324:       forward->frec_src.source = *udpaddr;
  325:       forward->frec_src.orig_id = ntohs(header->id);
  326:       forward->frec_src.dest = *dst_addr;
  327:       forward->frec_src.iface = dst_iface;
  328:       forward->frec_src.next = NULL;
  329:       forward->frec_src.fd = udpfd;
  330:       forward->new_id = get_id();
  331:       memcpy(forward->hash, hash, HASH_SIZE);
  332:       forward->forwardall = 0;
  333:       forward->flags = fwd_flags;
  334:       if (domain_no_rebind(daemon->namebuff))
  335: 	forward->flags |= FREC_NOREBIND;
  336:       if (header->hb4 & HB4_CD)
  337: 	forward->flags |= FREC_CHECKING_DISABLED;
  338:       if (ad_reqd)
  339: 	forward->flags |= FREC_AD_QUESTION;
  340: #ifdef HAVE_DNSSEC
  341:       forward->work_counter = DNSSEC_WORK;
  342:       if (do_bit)
  343: 	forward->flags |= FREC_DO_QUESTION;
  344: #endif
  345:       
  346:       start = first;
  347: 
  348:       if (option_bool(OPT_ALL_SERVERS))
  349: 	forward->forwardall = 1;
  350: 
  351:       if (!option_bool(OPT_ORDER))
  352: 	{
  353: 	  if (master->forwardcount++ > FORWARD_TEST ||
  354: 	      difftime(now, master->forwardtime) > FORWARD_TIME ||
  355: 	      master->last_server == -1)
  356: 	    {
  357: 	      master->forwardtime = now;
  358: 	      master->forwardcount = 0;
  359: 	      forward->forwardall = 1;
  360: 	    }
  361: 	  else
  362: 	    start = master->last_server;
  363: 	}
  364:     }
  365:   else
  366:     {
  367: #ifdef HAVE_DNSSEC
  368:       /* If we've already got an answer to this query, but we're awaiting keys for validation,
  369: 	 there's no point retrying the query, retry the key query instead...... */
  370:       while (forward->blocking_query)
  371: 	forward = forward->blocking_query;
  372: 
  373:       if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
  374: 	{
  375: 	  int is_sign;
  376: 	  unsigned char *pheader;
  377: 	  
  378: 	  /* log_id should match previous DNSSEC query. */
  379: 	  daemon->log_display_id = forward->frec_src.log_id;
  380: 	  
  381: 	  blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
  382: 	  plen = forward->stash_len;
  383: 	  /* get query for logging. */
  384: 	  extract_request(header, plen, daemon->namebuff, NULL);
  385: 	  
  386: 	  if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
  387: 	    PUTSHORT(SAFE_PKTSZ, pheader);
  388: 	  
  389: 	  /* Find suitable servers: should never fail. */
  390: 	  if (!filter_servers(forward->sentto->arrayposn, F_DNSSECOK, &first, &last))
  391: 	    return 0;
  392: 	  
  393: 	  is_dnssec = 1;
  394: 	  forward->forwardall = 1;
  395: 	}
  396:       else
  397: #endif
  398: 	{
  399: 	  /* retry on existing query, from original source. Send to all available servers  */
  400: 	  if (udpfd == -1 && !fast_retry)
  401: 	    forward->sentto->failed_queries++;
  402: 	  else
  403: 	    forward->sentto->retrys++;
  404: 	  
  405: 	  if (!filter_servers(forward->sentto->arrayposn, F_SERVER, &first, &last))
  406: 	    goto reply;
  407: 	  
  408: 	  master = daemon->serverarray[first];
  409: 	  
  410: 	  /* Forward to all available servers on retry of query from same host. */
  411: 	  if (!option_bool(OPT_ORDER) && old_src && !fast_retry)
  412: 	    forward->forwardall = 1;
  413: 	  else
  414: 	    {
  415: 	      start = forward->sentto->arrayposn;
  416: 	      
  417: 	      if (option_bool(OPT_ORDER) && !fast_retry)
  418: 		{
  419: 		  /* In strict order mode, there must be a server later in the list
  420: 		     left to send to, otherwise without the forwardall mechanism,
  421: 		     code further on will cycle around the list forwever if they
  422: 		     all return REFUSED. If at the last, give up.
  423: 		     Note that we can get here EITHER because a client retried,
  424: 		     or an upstream server returned REFUSED. The above only
  425: 		     applied in the later case. For client retries,
  426: 		     keep trying the last server.. */
  427: 		  if (++start == last)
  428: 		    {
  429: 		      if (old_reply)
  430: 			goto reply;
  431: 		      else
  432: 			start--;
  433: 		    }
  434: 		}
  435: 	    }	  
  436: 	}
  437:       
  438:       /* If we didn't get an answer advertising a maximal packet in EDNS,
  439: 	 fall back to 1280, which should work everywhere on IPv6.
  440: 	 If that generates an answer, it will become the new default
  441: 	 for this server */
  442:       forward->flags |= FREC_TEST_PKTSZ;
  443:     }
  444: 
  445:   /* We may be resending a DNSSEC query here, for which the below processing is not necessary. */
  446:   if (!is_dnssec)
  447:     {
  448:       header->id = htons(forward->new_id);
  449:       
  450:       plen = add_edns0_config(header, plen, ((unsigned char *)header) + PACKETSZ, &forward->frec_src.source, now, &cacheable);
  451:       
  452:       if (!cacheable)
  453: 	forward->flags |= FREC_NO_CACHE;
  454:       
  455: #ifdef HAVE_DNSSEC
  456:       if (option_bool(OPT_DNSSEC_VALID) && (master->flags & SERV_DO_DNSSEC))
  457: 	{
  458: 	  plen = add_do_bit(header, plen, ((unsigned char *) header) + PACKETSZ);
  459: 	  
  460: 	  /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
  461: 	     this allows it to select auth servers when one is returning bad data. */
  462: 	  if (option_bool(OPT_DNSSEC_DEBUG))
  463: 	    header->hb4 |= HB4_CD;
  464: 	  
  465: 	}
  466: #endif
  467:       
  468:       if (find_pseudoheader(header, plen, &edns0_len, &pheader, NULL, NULL))
  469: 	{
  470: 	  /* If there wasn't a PH before, and there is now, we added it. */
  471: 	  if (!oph)
  472: 	    forward->flags |= FREC_ADDED_PHEADER;
  473: 	  
  474: 	  /* If we're sending an EDNS0 with any options, we can't recreate the query from a reply. */
  475: 	  if (edns0_len > 11)
  476: 	    forward->flags |= FREC_HAS_EXTRADATA;
  477: 	  
  478: 	  /* Reduce udp size on retransmits. */
  479: 	  if (forward->flags & FREC_TEST_PKTSZ)
  480: 	    PUTSHORT(SAFE_PKTSZ, pheader);
  481: 	}
  482:     }
  483:   
  484:   if (forward->forwardall)
  485:     start = first;
  486: 
  487:   forwarded = 0;
  488:   
  489:   /* check for send errors here (no route to host) 
  490:      if we fail to send to all nameservers, send back an error
  491:      packet straight away (helps modem users when offline)  */
  492: 
  493:   while (1)
  494:     { 
  495:       int fd;
  496:       struct server *srv = daemon->serverarray[start];
  497:       
  498:       if ((fd = allocate_rfd(&forward->rfds, srv)) != -1)
  499: 	{
  500: 	  
  501: #ifdef HAVE_CONNTRACK
  502: 	  /* Copy connection mark of incoming query to outgoing connection. */
  503: 	  if (option_bool(OPT_CONNTRACK))
  504: 	    set_outgoing_mark(forward, fd);
  505: #endif
  506: 	  
  507: #ifdef HAVE_DNSSEC
  508: 	  if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER))
  509: 	    {
  510: 	      /* Difficult one here. If our client didn't send EDNS0, we will have set the UDP
  511: 		 packet size to 512. But that won't provide space for the RRSIGS in many cases.
  512: 		 The RRSIGS will be stripped out before the answer goes back, so the packet should
  513: 		 shrink again. So, if we added a do-bit, bump the udp packet size to the value
  514: 		 known to be OK for this server. We check returned size after stripping and set
  515: 		 the truncated bit if it's still too big. */		  
  516: 	      unsigned char *pheader;
  517: 	      int is_sign;
  518: 	      if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
  519: 		PUTSHORT(srv->edns_pktsz, pheader);
  520: 	    }
  521: #endif
  522: 	  
  523: 	  if (retry_send(sendto(fd, (char *)header, plen, 0,
  524: 				&srv->addr.sa,
  525: 				sa_len(&srv->addr))))
  526: 	    continue;
  527: 	  
  528: 	  if (errno == 0)
  529: 	    {
  530: #ifdef HAVE_DUMPFILE
  531: 	      dump_packet_udp(DUMP_UP_QUERY, (void *)header, plen, NULL, &srv->addr, fd);
  532: #endif
  533: 	      
  534: 	      /* Keep info in case we want to re-send this packet */
  535: 	      daemon->srv_save = srv;
  536: 	      daemon->packet_len = plen;
  537: 	      daemon->fd_save = fd;
  538: 	      
  539: 	      if (!(forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)))
  540: 		{
  541: 		  if (!gotname)
  542: 		    strcpy(daemon->namebuff, "query");
  543: 		  log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff,
  544: 				       &srv->addr, NULL, 0);
  545: 		}
  546: #ifdef HAVE_DNSSEC
  547: 	      else
  548: 		log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, daemon->namebuff, &srv->addr,
  549: 				     (forward->flags & FREC_DNSKEY_QUERY) ? "dnssec-retry[DNSKEY]" : "dnssec-retry[DS]", 0);
  550: #endif
  551: 
  552: 	      srv->queries++;
  553: 	      forwarded = 1;
  554: 	      forward->sentto = srv;
  555: 	      if (!forward->forwardall) 
  556: 		break;
  557: 	      forward->forwardall++;
  558: 	    }
  559: 	}
  560:       
  561:       if (++start == last)
  562: 	break;
  563:     }
  564:   
  565:   if (forwarded || is_dnssec)
  566:     {
  567:       forward->forward_timestamp = dnsmasq_milliseconds();
  568:       return 1;
  569:     }
  570:   
  571:   /* could not send on, prepare to return */ 
  572:   header->id = htons(forward->frec_src.orig_id);
  573:   free_frec(forward); /* cancel */
  574:   ede = EDE_NETERR;
  575:   
  576:  reply:
  577:   if (udpfd != -1)
  578:     {
  579:       if (!(plen = make_local_answer(flags, gotname, plen, header, daemon->namebuff, limit, first, last, ede)))
  580: 	return 0;
  581:       
  582:       if (oph)
  583: 	{
  584: 	  u16 swap = htons((u16)ede);
  585: 
  586: 	  if (ede != EDE_UNSET)
  587: 	    plen = add_pseudoheader(header, plen, (unsigned char *)limit, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
  588: 	  else
  589: 	    plen = add_pseudoheader(header, plen, (unsigned char *)limit, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
  590: 	}
  591:       
  592: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
  593:       if (option_bool(OPT_CMARK_ALST_EN))
  594: 	{
  595: 	  unsigned int mark;
  596: 	  int have_mark = get_incoming_mark(udpaddr, dst_addr, /* istcp: */ 0, &mark);
  597: 	  if (have_mark && ((u32)mark & daemon->allowlist_mask))
  598: 	    report_addresses(header, plen, mark);
  599: 	}
  600: #endif
  601:       
  602:       send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
  603:     }
  604: 	  
  605:   return 0;
  606: }
  607: 
  608: /* Check if any frecs need to do a retry, and action that if so. 
  609:    Return time in milliseconds until he next retry will be required,
  610:    or -1 if none. */
  611: int fast_retry(time_t now)
  612: {
  613:   struct frec *f;
  614:   int ret = -1;
  615:   
  616:   if (daemon->fast_retry_time != 0)
  617:     {
  618:       u32 millis = dnsmasq_milliseconds();
  619:       
  620:       for (f = daemon->frec_list; f; f = f->next)
  621: 	if (f->sentto && f->stash && difftime(now, f->time) < daemon->fast_retry_timeout)
  622: 	  {
  623: #ifdef HAVE_DNSSEC
  624: 	    if (f->blocking_query)
  625: 	      continue;
  626: #endif
  627: 	    /* t is milliseconds since last query sent. */ 
  628: 	    int to_run, t = (int)(millis - f->forward_timestamp);
  629: 	    
  630: 	    if (t < f->forward_delay)
  631: 	      to_run = f->forward_delay - t;
  632: 	    else
  633: 	      {
  634: 		unsigned char *udpsz;
  635: 		unsigned short udp_size =  PACKETSZ; /* default if no EDNS0 */
  636: 		struct dns_header *header = (struct dns_header *)daemon->packet;
  637: 		
  638: 		/* packet buffer overwritten */
  639: 		daemon->srv_save = NULL;
  640: 		
  641: 		blockdata_retrieve(f->stash, f->stash_len, (void *)header);
  642: 		
  643: 		/* UDP size already set in saved query. */
  644: 		if (find_pseudoheader(header, f->stash_len, NULL, &udpsz, NULL, NULL))
  645: 		  GETSHORT(udp_size, udpsz);
  646: 		
  647: 		daemon->log_display_id = f->frec_src.log_id;
  648: 		
  649: 		forward_query(-1, NULL, NULL, 0, header, f->stash_len, ((char *) header) + udp_size, now, f,
  650: 			      f->flags & FREC_AD_QUESTION, f->flags & FREC_DO_QUESTION, 1);
  651: 
  652: 		to_run = f->forward_delay = 2 * f->forward_delay;
  653: 	      }
  654: 
  655: 	    if (ret == -1 || ret > to_run)
  656: 	      ret = to_run;
  657: 	  }
  658:       
  659:     }
  660:   return ret;
  661: }
  662: 
  663: static struct ipsets *domain_find_sets(struct ipsets *setlist, const char *domain) {
  664:   /* Similar algorithm to search_servers. */
  665:   struct ipsets *ipset_pos, *ret = NULL;
  666:   unsigned int namelen = strlen(domain);
  667:   unsigned int matchlen = 0;
  668:   for (ipset_pos = setlist; ipset_pos; ipset_pos = ipset_pos->next) 
  669:     {
  670:       unsigned int domainlen = strlen(ipset_pos->domain);
  671:       const char *matchstart = domain + namelen - domainlen;
  672:       if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
  673:           (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
  674:           domainlen >= matchlen) 
  675:         {
  676:           matchlen = domainlen;
  677:           ret = ipset_pos;
  678:         }
  679:     }
  680: 
  681:   return ret;
  682: }
  683: 
  684: static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind, 
  685: 			    int no_cache, int cache_secure, int bogusanswer, int ad_reqd, int do_bit, int added_pheader, 
  686: 			    union mysockaddr *query_source, unsigned char *limit, int ede)
  687: {
  688:   unsigned char *pheader, *sizep;
  689:   struct ipsets *ipsets = NULL, *nftsets = NULL;
  690:   int munged = 0, is_sign;
  691:   unsigned int rcode = RCODE(header);
  692:   size_t plen; 
  693:     
  694:   (void)ad_reqd;
  695:   (void)do_bit;
  696:   (void)bogusanswer;
  697: 
  698: #ifdef HAVE_IPSET
  699:   if (daemon->ipsets && extract_request(header, n, daemon->namebuff, NULL))
  700:     ipsets = domain_find_sets(daemon->ipsets, daemon->namebuff);
  701: #endif
  702: 
  703: #ifdef HAVE_NFTSET
  704:   if (daemon->nftsets && extract_request(header, n, daemon->namebuff, NULL))
  705:     nftsets = domain_find_sets(daemon->nftsets, daemon->namebuff);
  706: #endif
  707: 
  708:   if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign, NULL)))
  709:     {
  710:       /* Get extended RCODE. */
  711:       rcode |= sizep[2] << 4;
  712: 
  713:       if (option_bool(OPT_CLIENT_SUBNET) && !check_source(header, plen, pheader, query_source))
  714: 	{
  715: 	  my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
  716: 	  return 0;
  717: 	}
  718:       
  719:       if (!is_sign)
  720: 	{
  721: 	  if (added_pheader)
  722: 	    {
  723: 	      /* client didn't send EDNS0, we added one, strip it off before returning answer. */
  724: 	      n = rrfilter(header, n, RRFILTER_EDNS0);
  725: 	      pheader = NULL;
  726: 	    }
  727: 	  else
  728: 	    {
  729: 	      /* If upstream is advertising a larger UDP packet size
  730: 		 than we allow, trim it so that we don't get overlarge
  731: 		 requests for the client. We can't do this for signed packets. */
  732: 	      unsigned short udpsz;
  733: 	      GETSHORT(udpsz, sizep);
  734: 	      if (udpsz > daemon->edns_pktsz)
  735: 		{
  736: 		  sizep -= 2;
  737: 		  PUTSHORT(daemon->edns_pktsz, sizep);
  738: 		}
  739: 
  740: #ifdef HAVE_DNSSEC
  741: 	      /* If the client didn't set the do bit, but we did, reset it. */
  742: 	      if (option_bool(OPT_DNSSEC_VALID) && !do_bit)
  743: 		{
  744: 		  unsigned short flags;
  745: 		  sizep += 2; /* skip RCODE */
  746: 		  GETSHORT(flags, sizep);
  747: 		  flags &= ~0x8000;
  748: 		  sizep -= 2;
  749: 		  PUTSHORT(flags, sizep);
  750: 		}
  751: #endif
  752: 	    }
  753: 	}
  754:     }
  755:   
  756:   /* RFC 4035 sect 4.6 para 3 */
  757:   if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
  758:      header->hb4 &= ~HB4_AD;
  759: 
  760:   header->hb4 |= HB4_RA; /* recursion if available */
  761: 
  762:   if (OPCODE(header) != QUERY)
  763:     return resize_packet(header, n, pheader, plen);
  764: 
  765:   if (rcode != NOERROR && rcode != NXDOMAIN)
  766:     {
  767:       union all_addr a;
  768:       a.log.rcode = rcode;
  769:       a.log.ede = ede;
  770:       log_query(F_UPSTREAM | F_RCODE, "error", &a, NULL, 0);
  771:       
  772:       return resize_packet(header, n, pheader, plen);
  773:     }
  774:   
  775:   /* Complain loudly if the upstream server is non-recursive. */
  776:   if (!(header->hb4 & HB4_RA) && rcode == NOERROR &&
  777:       server && !(server->flags & SERV_WARNED_RECURSIVE))
  778:     {
  779:       (void)prettyprint_addr(&server->addr, daemon->namebuff);
  780:       my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
  781:       if (!option_bool(OPT_LOG))
  782: 	server->flags |= SERV_WARNED_RECURSIVE;
  783:     }  
  784: 
  785:   if (daemon->bogus_addr && rcode != NXDOMAIN &&
  786:       check_for_bogus_wildcard(header, n, daemon->namebuff, now))
  787:     {
  788:       munged = 1;
  789:       SET_RCODE(header, NXDOMAIN);
  790:       header->hb3 &= ~HB3_AA;
  791:       cache_secure = 0;
  792:       ede = EDE_BLOCKED;
  793:     }
  794:   else 
  795:     {
  796:       int doctored = 0;
  797:       
  798:       if (rcode == NXDOMAIN && 
  799: 	  extract_request(header, n, daemon->namebuff, NULL))
  800: 	{
  801: 	  if (check_for_local_domain(daemon->namebuff, now) ||
  802: 	      lookup_domain(daemon->namebuff, F_CONFIG, NULL, NULL))
  803: 	    {
  804: 	      /* if we forwarded a query for a locally known name (because it was for 
  805: 		 an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
  806: 		 since we know that the domain exists, even if upstream doesn't */
  807: 	      munged = 1;
  808: 	      header->hb3 |= HB3_AA;
  809: 	      SET_RCODE(header, NOERROR);
  810: 	      cache_secure = 0;
  811: 	    }
  812: 	}
  813: 
  814:       /* Before extract_addresses() */
  815:       if (rcode == NOERROR)
  816: 	{
  817: 	  if (option_bool(OPT_FILTER_A))
  818: 	    n = rrfilter(header, n, RRFILTER_A);
  819: 
  820: 	  if (option_bool(OPT_FILTER_AAAA))
  821: 	    n = rrfilter(header, n, RRFILTER_AAAA);
  822: 	}
  823: 
  824:       switch (extract_addresses(header, n, daemon->namebuff, now, ipsets, nftsets, is_sign, check_rebind, no_cache, cache_secure, &doctored))
  825: 	{
  826: 	case 1:
  827: 	  my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
  828: 	  munged = 1;
  829: 	  cache_secure = 0;
  830: 	  ede = EDE_BLOCKED;
  831: 	  break;
  832: 	  
  833: 	  /* extract_addresses() found a malformed answer. */
  834: 	case 2:
  835: 	  munged = 1;
  836: 	  SET_RCODE(header, SERVFAIL);
  837: 	  cache_secure = 0;
  838: 	  ede = EDE_OTHER;
  839: 	  break;
  840: 	}
  841: 
  842:       if (doctored)
  843: 	cache_secure = 0;
  844:     }
  845:   
  846: #ifdef HAVE_DNSSEC
  847:   if (bogusanswer && !(header->hb4 & HB4_CD) && !option_bool(OPT_DNSSEC_DEBUG))
  848:     {
  849:       /* Bogus reply, turn into SERVFAIL */
  850:       SET_RCODE(header, SERVFAIL);
  851:       munged = 1;
  852:     }
  853: 
  854:   if (option_bool(OPT_DNSSEC_VALID))
  855:     {
  856:       header->hb4 &= ~HB4_AD;
  857:       
  858:       if (!(header->hb4 & HB4_CD) && ad_reqd && cache_secure)
  859: 	header->hb4 |= HB4_AD;
  860:       
  861:       /* If the requestor didn't set the DO bit, don't return DNSSEC info. */
  862:       if (!do_bit)
  863: 	n = rrfilter(header, n, RRFILTER_DNSSEC);
  864:     }
  865: #endif
  866: 
  867:   /* do this after extract_addresses. Ensure NODATA reply and remove
  868:      nameserver info. */
  869:   if (munged)
  870:     {
  871:       header->ancount = htons(0);
  872:       header->nscount = htons(0);
  873:       header->arcount = htons(0);
  874:       header->hb3 &= ~HB3_TC;
  875:     }
  876:   
  877:   /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
  878:      sections of the packet. Find the new length here and put back pseudoheader
  879:      if it was removed. */
  880:   n = resize_packet(header, n, pheader, plen);
  881: 
  882:   if (pheader && ede != EDE_UNSET)
  883:     {
  884:       u16 swap = htons((u16)ede);
  885:       n = add_pseudoheader(header, n, limit, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 1);
  886:     }
  887: 
  888:   if (RCODE(header) == NXDOMAIN)
  889:     server->nxdomain_replies++;
  890: 
  891:   return n;
  892: }
  893: 
  894: #ifdef HAVE_DNSSEC
  895: static void dnssec_validate(struct frec *forward, struct dns_header *header,
  896: 			    ssize_t plen, int status, time_t now)
  897: {
  898:   daemon->log_display_id = forward->frec_src.log_id;
  899:   
  900:   /* We've had a reply already, which we're validating. Ignore this duplicate */
  901:   if (forward->blocking_query)
  902:     return;
  903:   
  904:   /* Truncated answer can't be validated.
  905:      If this is an answer to a DNSSEC-generated query, we still
  906:      need to get the client to retry over TCP, so return
  907:      an answer with the TC bit set, even if the actual answer fits.
  908:   */
  909:   if (header->hb3 & HB3_TC)
  910:     status = STAT_TRUNCATED;
  911: 
  912:   /* If all replies to a query are REFUSED, give up. */
  913:   if (RCODE(header) == REFUSED)
  914:     status = STAT_ABANDONED;
  915:   
  916:   /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise
  917:      would invite infinite loops, since the answers to DNSKEY and DS queries
  918:      will not be cached, so they'll be repeated. */
  919:   if (!STAT_ISEQUAL(status, STAT_BOGUS) && !STAT_ISEQUAL(status, STAT_TRUNCATED) && !STAT_ISEQUAL(status, STAT_ABANDONED))
  920:     {
  921:       if (forward->flags & FREC_DNSKEY_QUERY)
  922: 	status = dnssec_validate_by_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class);
  923:       else if (forward->flags & FREC_DS_QUERY)
  924: 	status = dnssec_validate_ds(now, header, plen, daemon->namebuff, daemon->keyname, forward->class);
  925:       else
  926: 	status = dnssec_validate_reply(now, header, plen, daemon->namebuff, daemon->keyname, &forward->class, 
  927: 				       !option_bool(OPT_DNSSEC_IGN_NS) && (forward->sentto->flags & SERV_DO_DNSSEC),
  928: 				       NULL, NULL, NULL);
  929: #ifdef HAVE_DUMPFILE
  930:       if (STAT_ISEQUAL(status, STAT_BOGUS))
  931: 	dump_packet_udp((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_BOGUS : DUMP_BOGUS,
  932: 			header, (size_t)plen, &forward->sentto->addr, NULL, -daemon->port);
  933: #endif
  934:     }
  935:   
  936:   /* Can't validate, as we're missing key data. Put this
  937:      answer aside, whilst we get that. */     
  938:   if (STAT_ISEQUAL(status, STAT_NEED_DS) || STAT_ISEQUAL(status, STAT_NEED_KEY))
  939:     {
  940:       struct frec *new = NULL;
  941:       struct blockdata *stash;
  942:       
  943:       /* Now save reply pending receipt of key data */
  944:       if ((stash = blockdata_alloc((char *)header, plen)))
  945: 	{
  946: 	  /* validate routines leave name of required record in daemon->keyname */
  947: 	  unsigned int flags = STAT_ISEQUAL(status, STAT_NEED_KEY) ? FREC_DNSKEY_QUERY : FREC_DS_QUERY;
  948: 
  949: 	  if ((new = lookup_frec_dnssec(daemon->keyname, forward->class, flags, header)))
  950: 	    {
  951: 	      /* This is tricky; it detects loops in the dependency
  952: 		 graph for DNSSEC validation, say validating A requires DS B
  953: 		 and validating DS B requires DNSKEY C and validating DNSKEY C requires DS B.
  954: 		 This should never happen in correctly signed records, but it's
  955: 		 likely the case that sufficiently broken ones can cause our validation
  956: 		 code requests to exhibit cycles. The result is that the ->blocking_query list
  957: 		 can form a cycle, and under certain circumstances that can lock us in 
  958: 		 an infinite loop. Here we transform the situation into ABANDONED. */
  959: 	      struct frec *f;
  960: 	      for (f = new; f; f = f->blocking_query)
  961: 		if (f == forward)
  962: 		  break;
  963: 
  964: 	      if (!f)
  965: 		{
  966: 		  forward->next_dependent = new->dependent;
  967: 		  new->dependent = forward;
  968: 		  /* Make consistent, only replace query copy with unvalidated answer
  969: 		     when we set ->blocking_query. */
  970: 		  if (forward->stash)
  971: 		    blockdata_free(forward->stash);
  972: 		  forward->blocking_query = new;
  973: 		  forward->stash_len = plen;
  974: 		  forward->stash = stash;
  975: 		  return;
  976: 		}
  977: 	    }
  978: 	  else
  979: 	    {
  980: 	      struct server *server;
  981: 	      struct frec *orig;
  982: 	      void *hash;
  983: 	      size_t nn;
  984: 	      int serverind, fd;
  985: 	      struct randfd_list *rfds = NULL;
  986: 	      
  987: 	      /* Find the original query that started it all.... */
  988: 	      for (orig = forward; orig->dependent; orig = orig->dependent);
  989: 	      
  990: 	      /* Make sure we don't expire and free the orig frec during the
  991: 		 allocation of a new one: third arg of get_new_frec() does that. */
  992: 	      if ((serverind = dnssec_server(forward->sentto, daemon->keyname, NULL, NULL)) != -1 &&
  993: 		  (server = daemon->serverarray[serverind]) &&
  994: 		  (nn = dnssec_generate_query(header, ((unsigned char *) header) + server->edns_pktsz,
  995: 					      daemon->keyname, forward->class,
  996: 					      STAT_ISEQUAL(status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz)) && 
  997: 		  (hash = hash_questions(header, nn, daemon->namebuff)) &&
  998: 		  --orig->work_counter != 0 &&
  999: 		  (fd = allocate_rfd(&rfds, server)) != -1 &&
 1000: 		  (new = get_new_frec(now, server, 1)))
 1001: 		{
 1002: 		  struct frec *next = new->next;
 1003: 		  
 1004: 		  *new = *forward; /* copy everything, then overwrite */
 1005: 		  new->next = next;
 1006: 		  new->blocking_query = NULL;
 1007: 		  
 1008: 		  new->frec_src.log_id = daemon->log_display_id = ++daemon->log_id;
 1009: 		  new->sentto = server;
 1010: 		  new->rfds = rfds;
 1011: 		  new->frec_src.next = NULL;
 1012: 		  new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_HAS_EXTRADATA);
 1013: 		  new->flags |= flags;
 1014: 		  new->forwardall = 0;
 1015: 		  
 1016: 		  forward->next_dependent = NULL;
 1017: 		  new->dependent = forward; /* to find query awaiting new one. */
 1018: 		  
 1019: 		  /* Make consistent, only replace query copy with unvalidated answer
 1020: 		     when we set ->blocking_query. */
 1021: 		  forward->blocking_query = new; 
 1022: 		  if (forward->stash)
 1023: 		    blockdata_free(forward->stash);
 1024: 		  forward->stash_len = plen;
 1025: 		  forward->stash = stash;
 1026: 		  
 1027: 		  memcpy(new->hash, hash, HASH_SIZE);
 1028: 		  new->new_id = get_id();
 1029: 		  header->id = htons(new->new_id);
 1030: 		  /* Save query for retransmission and de-dup */
 1031: 		  new->stash = blockdata_alloc((char *)header, nn);
 1032: 		  new->stash_len = nn;
 1033: 		  if (daemon->fast_retry_time != 0)
 1034: 		    new->forward_timestamp = dnsmasq_milliseconds();
 1035: 		  
 1036: 		  /* Don't resend this. */
 1037: 		  daemon->srv_save = NULL;
 1038: 		  
 1039: #ifdef HAVE_CONNTRACK
 1040: 		  if (option_bool(OPT_CONNTRACK))
 1041: 		    set_outgoing_mark(orig, fd);
 1042: #endif
 1043: 		  
 1044: 		  server_send(server, fd, header, nn, 0);
 1045: 		  server->queries++;
 1046: #ifdef HAVE_DUMPFILE
 1047: 		  dump_packet_udp(DUMP_SEC_QUERY, (void *)header, (size_t)nn, NULL, &server->addr, fd);
 1048: #endif
 1049: 		  log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, daemon->keyname, &server->addr,
 1050: 				       STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
 1051: 		  return;
 1052: 		}
 1053: 	      
 1054: 	      free_rfds(&rfds); /* error unwind */
 1055: 	    }
 1056: 	  
 1057: 	  blockdata_free(stash); /* don't leak this on failure. */
 1058: 	}
 1059: 
 1060:       /* sending DNSSEC query failed or loop detected. */
 1061:       status = STAT_ABANDONED;
 1062:     }
 1063: 
 1064:   /* Validated original answer, all done. */
 1065:   if (!forward->dependent)
 1066:     return_reply(now, forward, header, plen, status);
 1067:   else
 1068:     {
 1069:       /* validated subsidiary query/queries, (and cached result)
 1070: 	 pop that and return to the previous query/queries we were working on. */
 1071:       struct frec *prev, *nxt = forward->dependent;
 1072:       
 1073:       free_frec(forward);
 1074:       
 1075:       while ((prev = nxt))
 1076: 	{
 1077: 	  /* ->next_dependent will have changed after return from recursive call below. */
 1078: 	  nxt = prev->next_dependent;
 1079: 	  prev->blocking_query = NULL; /* already gone */
 1080: 	  blockdata_retrieve(prev->stash, prev->stash_len, (void *)header);
 1081: 	  dnssec_validate(prev, header, prev->stash_len, status, now);
 1082: 	}
 1083:     }
 1084: }
 1085: #endif
 1086: 
 1087: /* sets new last_server */
 1088: void reply_query(int fd, time_t now)
 1089: {
 1090:   /* packet from peer server, extract data for cache, and send to
 1091:      original requester */
 1092:   struct dns_header *header;
 1093:   union mysockaddr serveraddr;
 1094:   struct frec *forward;
 1095:   socklen_t addrlen = sizeof(serveraddr);
 1096:   ssize_t n = recvfrom(fd, daemon->packet, daemon->packet_buff_sz, 0, &serveraddr.sa, &addrlen);
 1097:   struct server *server;
 1098:   void *hash;
 1099:   int first, last, c;
 1100:     
 1101:   /* packet buffer overwritten */
 1102:   daemon->srv_save = NULL;
 1103: 
 1104:   /* Determine the address of the server replying  so that we can mark that as good */
 1105:   if (serveraddr.sa.sa_family == AF_INET6)
 1106:     serveraddr.in6.sin6_flowinfo = 0;
 1107:   
 1108:   header = (struct dns_header *)daemon->packet;
 1109: 
 1110:   if (n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR))
 1111:     return;
 1112: 
 1113:   hash = hash_questions(header, n, daemon->namebuff);
 1114:   
 1115:   if (!(forward = lookup_frec(ntohs(header->id), fd, hash, &first, &last)))
 1116:     return;
 1117:   
 1118:   /* spoof check: answer must come from known server, also
 1119:      we may have sent the same query to multiple servers from
 1120:      the same local socket, and would like to know which one has answered. */
 1121:   for (c = first; c != last; c++)
 1122:     if (sockaddr_isequal(&daemon->serverarray[c]->addr, &serveraddr))
 1123:       break;
 1124:   
 1125:   if (c == last)
 1126:     return;
 1127: 
 1128:   server = daemon->serverarray[c];
 1129: 
 1130:   if (RCODE(header) != REFUSED)
 1131:     daemon->serverarray[first]->last_server = c;
 1132:   else if (daemon->serverarray[first]->last_server == c)
 1133:     daemon->serverarray[first]->last_server = -1;
 1134: 
 1135:   /* If sufficient time has elapsed, try and expand UDP buffer size again. */
 1136:   if (difftime(now, server->pktsz_reduced) > UDP_TEST_TIME)
 1137:     server->edns_pktsz = daemon->edns_pktsz;
 1138: 
 1139:   /* log_query gets called indirectly all over the place, so 
 1140:      pass these in global variables - sorry. */
 1141:   daemon->log_display_id = forward->frec_src.log_id;
 1142:   daemon->log_source_addr = &forward->frec_src.source;
 1143:   
 1144: #ifdef HAVE_DUMPFILE
 1145:   dump_packet_udp((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_REPLY : DUMP_UP_REPLY,
 1146: 		  (void *)header, n, &serveraddr, NULL, fd);
 1147: #endif
 1148: 
 1149:   if (daemon->ignore_addr && RCODE(header) == NOERROR &&
 1150:       check_for_ignored_address(header, n))
 1151:     return;
 1152:   
 1153:   /* Note: if we send extra options in the EDNS0 header, we can't recreate
 1154:      the query from the reply. */
 1155:   if ((RCODE(header) == REFUSED || RCODE(header) == SERVFAIL) &&
 1156:       forward->forwardall == 0 &&
 1157:       !(forward->flags & FREC_HAS_EXTRADATA))
 1158:     /* for broken servers, attempt to send to another one. */
 1159:     {
 1160:       unsigned char *pheader, *udpsz;
 1161:       unsigned short udp_size =  PACKETSZ; /* default if no EDNS0 */
 1162:       size_t plen;
 1163:       int is_sign;
 1164:       size_t nn = 0;
 1165:       
 1166: #ifdef HAVE_DNSSEC
 1167:       /* The query MAY have got a good answer, and be awaiting
 1168: 	 the results of further queries, in which case
 1169: 	 The Stash contains something else and we don't need to retry anyway. */
 1170:       if (forward->blocking_query)
 1171: 	return;
 1172:       
 1173:       if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
 1174: 	{
 1175: 	  /* DNSSEC queries have a copy of the original query stashed. */
 1176: 	  blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
 1177: 	  nn = forward->stash_len;
 1178: 	  udp_size = daemon->edns_pktsz;
 1179: 	}
 1180:       else
 1181: #endif
 1182: 	{
 1183: 	  /* in fast retry mode, we have a copy of the query. */
 1184: 	  if (daemon->fast_retry_time != 0 && forward->stash)
 1185: 	    {
 1186: 	      blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
 1187: 	      nn = forward->stash_len;
 1188: 	      /* UDP size already set in saved query. */
 1189: 	      if (find_pseudoheader(header, (size_t)n, NULL, &udpsz, NULL, NULL))
 1190: 		GETSHORT(udp_size, udpsz);
 1191: 	    }
 1192: 	  else
 1193: 	    {
 1194: 	      /* recreate query from reply */
 1195: 	      if ((pheader = find_pseudoheader(header, (size_t)n, &plen, &udpsz, &is_sign, NULL)))
 1196: 		GETSHORT(udp_size, udpsz);
 1197: 	      
 1198: 	      /* If the client provides an EDNS0 UDP size, use that to limit our reply.
 1199: 		 (bounded by the maximum configured). If no EDNS0, then it
 1200: 		 defaults to 512 */
 1201: 	      if (udp_size > daemon->edns_pktsz)
 1202: 		udp_size = daemon->edns_pktsz;
 1203: 	      else if (udp_size < PACKETSZ)
 1204: 		udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
 1205: 	      
 1206: 	      header->ancount = htons(0);
 1207: 	      header->nscount = htons(0);
 1208: 	      header->arcount = htons(0);
 1209: 	      header->hb3 &= ~(HB3_QR | HB3_AA | HB3_TC);
 1210: 	      header->hb4 &= ~(HB4_RA | HB4_RCODE | HB4_CD | HB4_AD);
 1211: 	      if (forward->flags & FREC_CHECKING_DISABLED)
 1212: 		header->hb4 |= HB4_CD;
 1213: 	      if (forward->flags & FREC_AD_QUESTION)
 1214: 		header->hb4 |= HB4_AD;
 1215: 
 1216: 	      if (!is_sign &&
 1217: 		  (nn = resize_packet(header, (size_t)n, pheader, plen)) &&
 1218: 		  (forward->flags & FREC_DO_QUESTION))
 1219: 		add_do_bit(header, nn,  (unsigned char *)pheader + plen);
 1220: 	    }
 1221: 	}
 1222:       
 1223:       if (nn)
 1224: 	{
 1225: 	  forward_query(-1, NULL, NULL, 0, header, nn, ((char *) header) + udp_size, now, forward,
 1226: 			forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION, 0);
 1227: 	  return;
 1228: 	}
 1229:     }
 1230: 
 1231:   /* If the answer is an error, keep the forward record in place in case
 1232:      we get a good reply from another server. Kill it when we've
 1233:      had replies from all to avoid filling the forwarding table when
 1234:      everything is broken */
 1235: 
 1236:   /* decrement count of replies recieved if we sent to more than one server. */
 1237:   if (forward->forwardall && (--forward->forwardall > 1) && RCODE(header) == REFUSED)
 1238:     return;
 1239: 
 1240:   /* We tried resending to this server with a smaller maximum size and got an answer.
 1241:      Make that permanent. To avoid reduxing the packet size for a single dropped packet,
 1242:      only do this when we get a truncated answer, or one larger than the safe size. */
 1243:   if (server->edns_pktsz > SAFE_PKTSZ && (forward->flags & FREC_TEST_PKTSZ) && 
 1244:       ((header->hb3 & HB3_TC) || n >= SAFE_PKTSZ))
 1245:     {
 1246:       server->edns_pktsz = SAFE_PKTSZ;
 1247:       server->pktsz_reduced = now;
 1248:       (void)prettyprint_addr(&server->addr, daemon->addrbuff);
 1249:       my_syslog(LOG_WARNING, _("reducing DNS packet size for nameserver %s to %d"), daemon->addrbuff, SAFE_PKTSZ);
 1250:     }
 1251: 
 1252:   forward->sentto = server;
 1253: 
 1254:   /* We have a good answer, and will now validate it or return it. 
 1255:      It may be some time before this the validation completes, but we don't need
 1256:      any more answers, so close the socket(s) on which we were expecting
 1257:      answers, to conserve file descriptors, and to save work reading and
 1258:      discarding answers for other upstreams. */
 1259:   free_rfds(&forward->rfds);
 1260: 
 1261:   /* calculate modified moving average of server latency */
 1262:   if (server->query_latency == 0)
 1263:     server->mma_latency = (dnsmasq_milliseconds() - forward->forward_timestamp) * 128; /* init */
 1264:   else
 1265:     server->mma_latency += dnsmasq_milliseconds() - forward->forward_timestamp - server->query_latency;
 1266:   /* denominator controls how many queries we average over. */
 1267:   server->query_latency = server->mma_latency/128;
 1268:   
 1269:   
 1270: #ifdef HAVE_DNSSEC
 1271:   if ((forward->sentto->flags & SERV_DO_DNSSEC) && 
 1272:       option_bool(OPT_DNSSEC_VALID) &&
 1273:       !(forward->flags & FREC_CHECKING_DISABLED))
 1274:     dnssec_validate(forward, header, n, STAT_OK, now);
 1275:   else
 1276: #endif
 1277:     return_reply(now, forward, header, n, STAT_OK); 
 1278: }
 1279: 
 1280: static void return_reply(time_t now, struct frec *forward, struct dns_header *header, ssize_t n, int status)
 1281: {
 1282:   int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
 1283:   size_t nn;
 1284:   int ede = EDE_UNSET;
 1285: 
 1286:   (void)status;
 1287: 
 1288:   daemon->log_display_id = forward->frec_src.log_id;
 1289:   daemon->log_source_addr = &forward->frec_src.source;
 1290:   
 1291:   /* Don't cache replies where DNSSEC validation was turned off, either
 1292:      the upstream server told us so, or the original query specified it.  */
 1293:   if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
 1294:     no_cache_dnssec = 1;
 1295: 
 1296: #ifdef HAVE_DNSSEC
 1297:   if (!STAT_ISEQUAL(status, STAT_OK))
 1298:     {
 1299:       /* status is STAT_OK when validation not turned on. */
 1300:       no_cache_dnssec = 0;
 1301:       
 1302:       if (STAT_ISEQUAL(status, STAT_TRUNCATED))
 1303: 	header->hb3 |= HB3_TC;
 1304:       else
 1305: 	{
 1306: 	  char *result, *domain = "result";
 1307: 	  union all_addr a;
 1308: 
 1309: 	  a.log.ede = ede = errflags_to_ede(status);
 1310: 
 1311: 	  if (STAT_ISEQUAL(status, STAT_ABANDONED))
 1312: 	    {
 1313: 	      result = "ABANDONED";
 1314: 	      status = STAT_BOGUS;
 1315: 	    }
 1316: 	  else
 1317: 	    result = (STAT_ISEQUAL(status, STAT_SECURE) ? "SECURE" : (STAT_ISEQUAL(status, STAT_INSECURE) ? "INSECURE" : "BOGUS"));
 1318: 	  
 1319: 	  if (STAT_ISEQUAL(status, STAT_SECURE))
 1320: 	    cache_secure = 1;
 1321: 	  else if (STAT_ISEQUAL(status, STAT_BOGUS))
 1322: 	    {
 1323: 	      no_cache_dnssec = 1;
 1324: 	      bogusanswer = 1;
 1325: 	      
 1326: 	      if (extract_request(header, n, daemon->namebuff, NULL))
 1327: 		domain = daemon->namebuff;
 1328: 	    }
 1329: 	  
 1330: 	  log_query(F_SECSTAT, domain, &a, result, 0);
 1331: 	}
 1332:     }
 1333: #endif
 1334:   
 1335:   if (option_bool(OPT_NO_REBIND))
 1336:     check_rebind = !(forward->flags & FREC_NOREBIND);
 1337:   
 1338:   /* restore CD bit to the value in the query */
 1339:   if (forward->flags & FREC_CHECKING_DISABLED)
 1340:     header->hb4 |= HB4_CD;
 1341:   else
 1342:     header->hb4 &= ~HB4_CD;
 1343:   
 1344:   /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
 1345:      since the cache is ignorant of such things. */
 1346:   if (forward->flags & FREC_NO_CACHE)
 1347:     no_cache_dnssec = 1;
 1348:   
 1349:   if ((nn = process_reply(header, now, forward->sentto, (size_t)n, check_rebind, no_cache_dnssec, cache_secure, bogusanswer, 
 1350: 			  forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION, 
 1351: 			  forward->flags & FREC_ADDED_PHEADER, &forward->frec_src.source,
 1352: 			  ((unsigned char *)header) + daemon->edns_pktsz, ede)))
 1353:     {
 1354:       struct frec_src *src;
 1355:       
 1356:       header->id = htons(forward->frec_src.orig_id);
 1357: #ifdef HAVE_DNSSEC
 1358:       /* We added an EDNSO header for the purpose of getting DNSSEC RRs, and set the value of the UDP payload size
 1359: 	 greater than the no-EDNS0-implied 512 to have space for the RRSIGS. If, having stripped them and the EDNS0
 1360: 	 header, the answer is still bigger than 512, truncate it and mark it so. The client then retries with TCP. */
 1361:       if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER) && (nn > PACKETSZ))
 1362: 	{
 1363: 	  header->ancount = htons(0);
 1364: 	  header->nscount = htons(0);
 1365: 	  header->arcount = htons(0);
 1366: 	  header->hb3 |= HB3_TC;
 1367: 	  nn = resize_packet(header, nn, NULL, 0);
 1368: 	}
 1369: #endif
 1370:       
 1371:       for (src = &forward->frec_src; src; src = src->next)
 1372: 	{
 1373: 	  header->id = htons(src->orig_id);
 1374: 	  
 1375: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
 1376: 	  if (option_bool(OPT_CMARK_ALST_EN))
 1377: 	    {
 1378: 	      unsigned int mark;
 1379: 	      int have_mark = get_incoming_mark(&src->source, &src->dest, /* istcp: */ 0, &mark);
 1380: 	      if (have_mark && ((u32)mark & daemon->allowlist_mask))
 1381: 		report_addresses(header, nn, mark);
 1382: 	    }
 1383: #endif
 1384: 	  
 1385: 	  if (src->fd != -1)
 1386: 	    {
 1387: #ifdef HAVE_DUMPFILE
 1388: 	      dump_packet_udp(DUMP_REPLY, daemon->packet, (size_t)nn, NULL, &src->source, src->fd);
 1389: #endif 
 1390: 	      send_from(src->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn, 
 1391: 			&src->source, &src->dest, src->iface);
 1392: 	      
 1393: 	      if (option_bool(OPT_EXTRALOG) && src != &forward->frec_src)
 1394: 		{
 1395: 		  daemon->log_display_id = src->log_id;
 1396: 		  daemon->log_source_addr = &src->source;
 1397: 		  log_query(F_UPSTREAM, "query", NULL, "duplicate", 0);
 1398: 		}
 1399: 	    }
 1400: 	}
 1401:     }
 1402: 
 1403:   free_frec(forward); /* cancel */
 1404: }
 1405: 
 1406: 
 1407: #ifdef HAVE_CONNTRACK
 1408: static int is_query_allowed_for_mark(u32 mark, const char *name)
 1409: {
 1410:   int is_allowable_name, did_validate_name = 0;
 1411:   struct allowlist *allowlists;
 1412:   char **patterns_pos;
 1413:   
 1414:   for (allowlists = daemon->allowlists; allowlists; allowlists = allowlists->next)
 1415:     if (allowlists->mark == (mark & daemon->allowlist_mask & allowlists->mask))
 1416:       for (patterns_pos = allowlists->patterns; *patterns_pos; patterns_pos++)
 1417: 	{
 1418: 	  if (!strcmp(*patterns_pos, "*"))
 1419: 	    return 1;
 1420: 	  if (!did_validate_name)
 1421: 	    {
 1422: 	      is_allowable_name = name ? is_valid_dns_name(name) : 0;
 1423: 	      did_validate_name = 1;
 1424: 	    }
 1425: 	  if (is_allowable_name && is_dns_name_matching_pattern(name, *patterns_pos))
 1426: 	    return 1;
 1427: 	}
 1428:   return 0;
 1429: }
 1430: 
 1431: static size_t answer_disallowed(struct dns_header *header, size_t qlen, u32 mark, const char *name)
 1432: {
 1433:   unsigned char *p;
 1434:   (void)name;
 1435:   (void)mark;
 1436:   
 1437: #ifdef HAVE_UBUS
 1438:   if (name)
 1439:     ubus_event_bcast_connmark_allowlist_refused(mark, name);
 1440: #endif
 1441:   
 1442:   setup_reply(header, /* flags: */ 0, EDE_BLOCKED);
 1443:   
 1444:   if (!(p = skip_questions(header, qlen)))
 1445:     return 0;
 1446:   return p - (unsigned char *)header;
 1447: }
 1448: #endif
 1449: 
 1450: void receive_query(struct listener *listen, time_t now)
 1451: {
 1452:   struct dns_header *header = (struct dns_header *)daemon->packet;
 1453:   union mysockaddr source_addr;
 1454:   unsigned char *pheader;
 1455:   unsigned short type, udp_size = PACKETSZ; /* default if no EDNS0 */
 1456:   union all_addr dst_addr;
 1457:   struct in_addr netmask, dst_addr_4;
 1458:   size_t m;
 1459:   ssize_t n;
 1460:   int if_index = 0, auth_dns = 0, do_bit = 0, have_pseudoheader = 0;
 1461: #ifdef HAVE_CONNTRACK
 1462:   unsigned int mark = 0;
 1463:   int have_mark = 0;
 1464:   int is_single_query = 0, allowed = 1;
 1465: #endif
 1466: #ifdef HAVE_AUTH
 1467:   int local_auth = 0;
 1468: #endif
 1469:   struct iovec iov[1];
 1470:   struct msghdr msg;
 1471:   struct cmsghdr *cmptr;
 1472:   union {
 1473:     struct cmsghdr align; /* this ensures alignment */
 1474:     char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
 1475: #if defined(HAVE_LINUX_NETWORK)
 1476:     char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
 1477: #elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
 1478:     char control[CMSG_SPACE(sizeof(struct in_addr)) +
 1479: 		 CMSG_SPACE(sizeof(unsigned int))];
 1480: #elif defined(IP_RECVDSTADDR)
 1481:     char control[CMSG_SPACE(sizeof(struct in_addr)) +
 1482: 		 CMSG_SPACE(sizeof(struct sockaddr_dl))];
 1483: #endif
 1484:   } control_u;
 1485:   int family = listen->addr.sa.sa_family;
 1486:    /* Can always get recvd interface for IPv6 */
 1487:   int check_dst = !option_bool(OPT_NOWILD) || family == AF_INET6;
 1488:   
 1489:   /* packet buffer overwritten */
 1490:   daemon->srv_save = NULL;
 1491: 
 1492:   dst_addr_4.s_addr = dst_addr.addr4.s_addr = 0;
 1493:   netmask.s_addr = 0;
 1494:   
 1495:   if (option_bool(OPT_NOWILD) && listen->iface)
 1496:     {
 1497:       auth_dns = listen->iface->dns_auth;
 1498:      
 1499:       if (family == AF_INET)
 1500: 	{
 1501: 	  dst_addr_4 = dst_addr.addr4 = listen->iface->addr.in.sin_addr;
 1502: 	  netmask = listen->iface->netmask;
 1503: 	}
 1504:     }
 1505:   
 1506:   iov[0].iov_base = daemon->packet;
 1507:   iov[0].iov_len = daemon->edns_pktsz;
 1508:     
 1509:   msg.msg_control = control_u.control;
 1510:   msg.msg_controllen = sizeof(control_u);
 1511:   msg.msg_flags = 0;
 1512:   msg.msg_name = &source_addr;
 1513:   msg.msg_namelen = sizeof(source_addr);
 1514:   msg.msg_iov = iov;
 1515:   msg.msg_iovlen = 1;
 1516:   
 1517:   if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
 1518:     return;
 1519:   
 1520:   if (n < (int)sizeof(struct dns_header) || 
 1521:       (msg.msg_flags & MSG_TRUNC) ||
 1522:       (header->hb3 & HB3_QR))
 1523:     return;
 1524: 
 1525:   /* Clear buffer beyond request to avoid risk of
 1526:      information disclosure. */
 1527:   memset(daemon->packet + n, 0, daemon->edns_pktsz - n);
 1528:   
 1529:   source_addr.sa.sa_family = family;
 1530:   
 1531:   if (family == AF_INET)
 1532:     {
 1533:        /* Source-port == 0 is an error, we can't send back to that. 
 1534: 	  http://www.ietf.org/mail-archive/web/dnsop/current/msg11441.html */
 1535:       if (source_addr.in.sin_port == 0)
 1536: 	return;
 1537:     }
 1538:   else
 1539:     {
 1540:       /* Source-port == 0 is an error, we can't send back to that. */
 1541:       if (source_addr.in6.sin6_port == 0)
 1542: 	return;
 1543:       source_addr.in6.sin6_flowinfo = 0;
 1544:     }
 1545:   
 1546:   /* We can be configured to only accept queries from at-most-one-hop-away addresses. */
 1547:   if (option_bool(OPT_LOCAL_SERVICE))
 1548:     {
 1549:       struct addrlist *addr;
 1550: 
 1551:       if (family == AF_INET6) 
 1552: 	{
 1553: 	  for (addr = daemon->interface_addrs; addr; addr = addr->next)
 1554: 	    if ((addr->flags & ADDRLIST_IPV6) &&
 1555: 		is_same_net6(&addr->addr.addr6, &source_addr.in6.sin6_addr, addr->prefixlen))
 1556: 	      break;
 1557: 	}
 1558:       else
 1559: 	{
 1560: 	  struct in_addr netmask;
 1561: 	  for (addr = daemon->interface_addrs; addr; addr = addr->next)
 1562: 	    {
 1563: 	      netmask.s_addr = htonl(~(in_addr_t)0 << (32 - addr->prefixlen));
 1564: 	      if (!(addr->flags & ADDRLIST_IPV6) &&
 1565: 		  is_same_net(addr->addr.addr4, source_addr.in.sin_addr, netmask))
 1566: 		break;
 1567: 	    }
 1568: 	}
 1569:       if (!addr)
 1570: 	{
 1571: 	  static int warned = 0;
 1572: 	  if (!warned)
 1573: 	    {
 1574: 	      prettyprint_addr(&source_addr, daemon->addrbuff);
 1575: 	      my_syslog(LOG_WARNING, _("ignoring query from non-local network %s (logged only once)"), daemon->addrbuff);
 1576: 	      warned = 1;
 1577: 	    }
 1578: 	  return;
 1579: 	}
 1580:     }
 1581: 		
 1582:   if (check_dst)
 1583:     {
 1584:       struct ifreq ifr;
 1585: 
 1586:       if (msg.msg_controllen < sizeof(struct cmsghdr))
 1587: 	return;
 1588: 
 1589: #if defined(HAVE_LINUX_NETWORK)
 1590:       if (family == AF_INET)
 1591: 	for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
 1592: 	  if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_PKTINFO)
 1593: 	    {
 1594: 	      union {
 1595: 		unsigned char *c;
 1596: 		struct in_pktinfo *p;
 1597: 	      } p;
 1598: 	      p.c = CMSG_DATA(cmptr);
 1599: 	      dst_addr_4 = dst_addr.addr4 = p.p->ipi_spec_dst;
 1600: 	      if_index = p.p->ipi_ifindex;
 1601: 	    }
 1602: #elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
 1603:       if (family == AF_INET)
 1604: 	{
 1605: 	  for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
 1606: 	    {
 1607: 	      union {
 1608: 		unsigned char *c;
 1609: 		unsigned int *i;
 1610: 		struct in_addr *a;
 1611: #ifndef HAVE_SOLARIS_NETWORK
 1612: 		struct sockaddr_dl *s;
 1613: #endif
 1614: 	      } p;
 1615: 	       p.c = CMSG_DATA(cmptr);
 1616: 	       if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
 1617: 		 dst_addr_4 = dst_addr.addr4 = *(p.a);
 1618: 	       else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
 1619: #ifdef HAVE_SOLARIS_NETWORK
 1620: 		 if_index = *(p.i);
 1621: #else
 1622:   	         if_index = p.s->sdl_index;
 1623: #endif
 1624: 	    }
 1625: 	}
 1626: #endif
 1627:       
 1628:       if (family == AF_INET6)
 1629: 	{
 1630: 	  for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
 1631: 	    if (cmptr->cmsg_level == IPPROTO_IPV6 && cmptr->cmsg_type == daemon->v6pktinfo)
 1632: 	      {
 1633: 		union {
 1634: 		  unsigned char *c;
 1635: 		  struct in6_pktinfo *p;
 1636: 		} p;
 1637: 		p.c = CMSG_DATA(cmptr);
 1638: 		  
 1639: 		dst_addr.addr6 = p.p->ipi6_addr;
 1640: 		if_index = p.p->ipi6_ifindex;
 1641: 	      }
 1642: 	}
 1643:       
 1644:       /* enforce available interface configuration */
 1645:       
 1646:       if (!indextoname(listen->fd, if_index, ifr.ifr_name))
 1647: 	return;
 1648:       
 1649:       if (!iface_check(family, &dst_addr, ifr.ifr_name, &auth_dns))
 1650: 	{
 1651: 	   if (!option_bool(OPT_CLEVERBIND))
 1652: 	     enumerate_interfaces(0); 
 1653: 	   if (!loopback_exception(listen->fd, family, &dst_addr, ifr.ifr_name) &&
 1654: 	       !label_exception(if_index, family, &dst_addr))
 1655: 	     return;
 1656: 	}
 1657: 
 1658:       if (family == AF_INET && option_bool(OPT_LOCALISE))
 1659: 	{
 1660: 	  struct irec *iface;
 1661: 	  
 1662: 	  /* get the netmask of the interface which has the address we were sent to.
 1663: 	     This is no necessarily the interface we arrived on. */
 1664: 	  
 1665: 	  for (iface = daemon->interfaces; iface; iface = iface->next)
 1666: 	    if (iface->addr.sa.sa_family == AF_INET &&
 1667: 		iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
 1668: 	      break;
 1669: 	  
 1670: 	  /* interface may be new */
 1671: 	  if (!iface && !option_bool(OPT_CLEVERBIND))
 1672: 	    enumerate_interfaces(0); 
 1673: 	  
 1674: 	  for (iface = daemon->interfaces; iface; iface = iface->next)
 1675: 	    if (iface->addr.sa.sa_family == AF_INET &&
 1676: 		iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
 1677: 	      break;
 1678: 	  
 1679: 	  /* If we failed, abandon localisation */
 1680: 	  if (iface)
 1681: 	    netmask = iface->netmask;
 1682: 	  else
 1683: 	    dst_addr_4.s_addr = 0;
 1684: 	}
 1685:     }
 1686:    
 1687:   /* log_query gets called indirectly all over the place, so 
 1688:      pass these in global variables - sorry. */
 1689:   daemon->log_display_id = ++daemon->log_id;
 1690:   daemon->log_source_addr = &source_addr;
 1691: 
 1692: #ifdef HAVE_DUMPFILE
 1693:   dump_packet_udp(DUMP_QUERY, daemon->packet, (size_t)n, &source_addr, NULL, listen->fd);
 1694: #endif
 1695:   
 1696: #ifdef HAVE_CONNTRACK
 1697:   if (option_bool(OPT_CMARK_ALST_EN))
 1698:     have_mark = get_incoming_mark(&source_addr, &dst_addr, /* istcp: */ 0, &mark);
 1699: #endif
 1700: 	  
 1701:   if (extract_request(header, (size_t)n, daemon->namebuff, &type))
 1702:     {
 1703: #ifdef HAVE_AUTH
 1704:       struct auth_zone *zone;
 1705: #endif
 1706:       log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
 1707: 			   &source_addr, auth_dns ? "auth" : "query", type);
 1708:       
 1709: #ifdef HAVE_CONNTRACK
 1710:       is_single_query = 1;
 1711: #endif
 1712: 
 1713: #ifdef HAVE_AUTH
 1714:       /* find queries for zones we're authoritative for, and answer them directly */
 1715:       if (!auth_dns && !option_bool(OPT_LOCALISE))
 1716: 	for (zone = daemon->auth_zones; zone; zone = zone->next)
 1717: 	  if (in_zone(zone, daemon->namebuff, NULL))
 1718: 	    {
 1719: 	      auth_dns = 1;
 1720: 	      local_auth = 1;
 1721: 	      break;
 1722: 	    }
 1723: #endif
 1724:       
 1725: #ifdef HAVE_LOOP
 1726:       /* Check for forwarding loop */
 1727:       if (detect_loop(daemon->namebuff, type))
 1728: 	return;
 1729: #endif
 1730:     }
 1731:   
 1732:   if (find_pseudoheader(header, (size_t)n, NULL, &pheader, NULL, NULL))
 1733:     { 
 1734:       unsigned short flags;
 1735:       
 1736:       have_pseudoheader = 1;
 1737:       GETSHORT(udp_size, pheader);
 1738:       pheader += 2; /* ext_rcode */
 1739:       GETSHORT(flags, pheader);
 1740:       
 1741:       if (flags & 0x8000)
 1742: 	do_bit = 1;/* do bit */ 
 1743: 	
 1744:       /* If the client provides an EDNS0 UDP size, use that to limit our reply.
 1745: 	 (bounded by the maximum configured). If no EDNS0, then it
 1746: 	 defaults to 512. We write this value into the query packet too, so that
 1747: 	 if it's forwarded, we don't specify a maximum size greater than we can handle. */
 1748:       if (udp_size > daemon->edns_pktsz)
 1749: 	udp_size = daemon->edns_pktsz;
 1750:       else if (udp_size < PACKETSZ)
 1751: 	udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
 1752: 
 1753:       pheader -= 6; /* ext_class */
 1754:       PUTSHORT(udp_size, pheader); /* Bounding forwarded queries to maximum configured */
 1755:     }
 1756:   
 1757: #ifdef HAVE_CONNTRACK
 1758: #ifdef HAVE_AUTH
 1759:   if (!auth_dns || local_auth)
 1760: #endif
 1761:     if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
 1762:       allowed = is_query_allowed_for_mark((u32)mark, is_single_query ? daemon->namebuff : NULL);
 1763: #endif
 1764:   
 1765:   if (0);
 1766: #ifdef HAVE_CONNTRACK
 1767:   else if (!allowed)
 1768:     {
 1769:       u16 swap = htons(EDE_BLOCKED);
 1770: 
 1771:       m = answer_disallowed(header, (size_t)n, (u32)mark, is_single_query ? daemon->namebuff : NULL);
 1772:       
 1773:       if (have_pseudoheader && m != 0)
 1774: 	m = add_pseudoheader(header,  m,  ((unsigned char *) header) + udp_size, daemon->edns_pktsz,
 1775: 			     EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
 1776:       
 1777:       if (m >= 1)
 1778: 	{
 1779: #ifdef HAVE_DUMPFILE
 1780: 	  dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
 1781: #endif
 1782: 	  send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
 1783: 		    (char *)header, m, &source_addr, &dst_addr, if_index);
 1784: 	  daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
 1785: 	}
 1786:     }
 1787: #endif
 1788: #ifdef HAVE_AUTH
 1789:   else if (auth_dns)
 1790:     {
 1791:       m = answer_auth(header, ((char *) header) + udp_size, (size_t)n, now, &source_addr, 
 1792: 		      local_auth, do_bit, have_pseudoheader);
 1793:       if (m >= 1)
 1794: 	{
 1795: #ifdef HAVE_DUMPFILE
 1796: 	  dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
 1797: #endif
 1798: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
 1799: 	  if (local_auth)
 1800: 	    if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
 1801: 	      report_addresses(header, m, mark);
 1802: #endif
 1803: 	  send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
 1804: 		    (char *)header, m, &source_addr, &dst_addr, if_index);
 1805: 	  daemon->metrics[METRIC_DNS_AUTH_ANSWERED]++;
 1806: 	}
 1807:     }
 1808: #endif
 1809:   else
 1810:     {
 1811:       int stale;
 1812:       int ad_reqd = do_bit;
 1813:       u16 hb3 = header->hb3, hb4 = header->hb4;
 1814:       int fd = listen->fd;
 1815:       
 1816:       /* RFC 6840 5.7 */
 1817:       if (header->hb4 & HB4_AD)
 1818: 	ad_reqd = 1;
 1819:       
 1820:       m = answer_request(header, ((char *) header) + udp_size, (size_t)n, 
 1821: 			 dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader, &stale);
 1822:       
 1823:       if (m >= 1)
 1824: 	{
 1825: 	  if (stale && have_pseudoheader)
 1826: 	    {
 1827: 	      u16 swap = htons(EDE_STALE);
 1828: 	      
 1829: 	      m = add_pseudoheader(header,  m,  ((unsigned char *) header) + udp_size, daemon->edns_pktsz,
 1830: 				   EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
 1831: 	    }
 1832: #ifdef HAVE_DUMPFILE
 1833: 	  dump_packet_udp(DUMP_REPLY, daemon->packet, m, NULL, &source_addr, listen->fd);
 1834: #endif
 1835: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
 1836: 	  if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
 1837: 	    report_addresses(header, m, mark);
 1838: #endif
 1839: 	  send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
 1840: 		    (char *)header, m, &source_addr, &dst_addr, if_index);
 1841: 	  daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
 1842: 	  if (stale)
 1843: 	    daemon->metrics[METRIC_DNS_STALE_ANSWERED]++;
 1844: 	}
 1845:       
 1846:       if (m == 0 || stale)
 1847: 	{
 1848: 	  if (m != 0)
 1849: 	    {
 1850: 	      size_t plen;
 1851: 	      
 1852: 	      /* We answered with stale cache data, so forward the query anyway to
 1853: 		 refresh that. Restore the query from the answer packet. */
 1854: 	      pheader = find_pseudoheader(header, (size_t)m, &plen, NULL, NULL, NULL);
 1855: 	      
 1856: 	      header->hb3 = hb3;
 1857: 	      header->hb4 = hb4;
 1858: 	      header->ancount = htons(0);
 1859: 	      header->nscount = htons(0);
 1860: 	      header->arcount = htons(0);
 1861: 
 1862: 	      m = resize_packet(header, m, pheader, plen);
 1863: 
 1864: 	      /* We've already answered the client, so don't send it the answer 
 1865: 		 when it comes back. */
 1866: 	      fd = -1;
 1867: 	    }
 1868: 	  
 1869: 	  if (forward_query(fd, &source_addr, &dst_addr, if_index,
 1870: 			    header, (size_t)n,  ((char *) header) + udp_size, now, NULL, ad_reqd, do_bit, 0))
 1871: 	    daemon->metrics[METRIC_DNS_QUERIES_FORWARDED]++;
 1872: 	  else
 1873: 	    daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
 1874: 	}
 1875:     }
 1876: }
 1877: 
 1878: /* Send query in packet, qsize to a server determined by first,last,start and
 1879:    get the reply. return reply size. */
 1880: static ssize_t tcp_talk(int first, int last, int start, unsigned char *packet,  size_t qsize,
 1881: 			int have_mark, unsigned int mark, struct server **servp)
 1882: {
 1883:   int firstsendto = -1;
 1884:   u16 *length = (u16 *)packet;
 1885:   unsigned char *payload = &packet[2];
 1886:   struct dns_header *header = (struct dns_header *)payload;
 1887:   unsigned char c1, c2;
 1888:   unsigned char hash[HASH_SIZE], *hashp;
 1889:   unsigned int rsize;
 1890:   
 1891:   (void)mark;
 1892:   (void)have_mark;
 1893: 
 1894:   if (!(hashp = hash_questions(header, (unsigned int)qsize, daemon->namebuff)))
 1895:     return 0;
 1896: 
 1897:   memcpy(hash, hashp, HASH_SIZE);
 1898:   
 1899:   while (1) 
 1900:     {
 1901:       int data_sent = 0;
 1902:       struct server *serv;
 1903:       
 1904:       if (firstsendto == -1)
 1905: 	firstsendto = start;
 1906:       else
 1907: 	{
 1908: 	  start++;
 1909: 	  
 1910: 	  if (start == last)
 1911: 	    start = first;
 1912: 	  
 1913: 	  if (start == firstsendto)
 1914: 	    break;
 1915: 	}
 1916:       
 1917:       serv = daemon->serverarray[start];
 1918:       
 1919:     retry:
 1920:       *length = htons(qsize);
 1921:       
 1922:       if (serv->tcpfd == -1)
 1923: 	{
 1924: 	  if ((serv->tcpfd = socket(serv->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
 1925: 	    continue;
 1926: 	  
 1927: #ifdef HAVE_CONNTRACK
 1928: 	  /* Copy connection mark of incoming query to outgoing connection. */
 1929: 	  if (have_mark)
 1930: 	    setsockopt(serv->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
 1931: #endif			  
 1932: 	  
 1933: 	  if ((!local_bind(serv->tcpfd,  &serv->source_addr, serv->interface, 0, 1)))
 1934: 	    {
 1935: 	      close(serv->tcpfd);
 1936: 	      serv->tcpfd = -1;
 1937: 	      continue;
 1938: 	    }
 1939: 	  
 1940: #ifdef MSG_FASTOPEN
 1941: 	  server_send(serv, serv->tcpfd, packet, qsize + sizeof(u16), MSG_FASTOPEN);
 1942: 	  
 1943: 	  if (errno == 0)
 1944: 	    data_sent = 1;
 1945: #endif
 1946: 	  
 1947: 	  if (!data_sent && connect(serv->tcpfd, &serv->addr.sa, sa_len(&serv->addr)) == -1)
 1948: 	    {
 1949: 	      close(serv->tcpfd);
 1950: 	      serv->tcpfd = -1;
 1951: 	      continue;
 1952: 	    }
 1953: 	  
 1954: 	  daemon->serverarray[first]->last_server = start;
 1955: 	  serv->flags &= ~SERV_GOT_TCP;
 1956: 	}
 1957:       
 1958:       if ((!data_sent && !read_write(serv->tcpfd, packet, qsize + sizeof(u16), 0)) ||
 1959: 	  !read_write(serv->tcpfd, &c1, 1, 1) ||
 1960: 	  !read_write(serv->tcpfd, &c2, 1, 1) ||
 1961: 	  !read_write(serv->tcpfd, payload, (rsize = (c1 << 8) | c2), 1))
 1962: 	{
 1963: 	  close(serv->tcpfd);
 1964: 	  serv->tcpfd = -1;
 1965: 	  /* We get data then EOF, reopen connection to same server,
 1966: 	     else try next. This avoids DoS from a server which accepts
 1967: 	     connections and then closes them. */
 1968: 	  if (serv->flags & SERV_GOT_TCP)
 1969: 	    goto retry;
 1970: 	  else
 1971: 	    continue;
 1972: 	}
 1973: 
 1974:       /* If the hash of the question section doesn't match the crc we sent, then
 1975: 	 someone might be attempting to insert bogus values into the cache by 
 1976: 	 sending replies containing questions and bogus answers. 
 1977: 	 Try another server, or give up */
 1978:       if (!(hashp = hash_questions(header, rsize, daemon->namebuff)) || memcmp(hash, hashp, HASH_SIZE) != 0)
 1979: 	continue;
 1980:       
 1981:       serv->flags |= SERV_GOT_TCP;
 1982:       
 1983:       *servp = serv;
 1984:       return rsize;
 1985:     }
 1986: 
 1987:   return 0;
 1988: }
 1989: 		  
 1990: #ifdef HAVE_DNSSEC
 1991: /* Recurse down the key hierarchy */
 1992: static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, 
 1993: 			   int class, char *name, char *keyname, struct server *server, 
 1994: 			   int have_mark, unsigned int mark, int *keycount)
 1995: {
 1996:   int first, last, start, new_status;
 1997:   unsigned char *packet = NULL;
 1998:   struct dns_header *new_header = NULL;
 1999:   
 2000:   while (1)
 2001:     {
 2002:       size_t m;
 2003:       int log_save;
 2004:             
 2005:       /* limit the amount of work we do, to avoid cycling forever on loops in the DNS */
 2006:       if (--(*keycount) == 0)
 2007: 	new_status = STAT_ABANDONED;
 2008:       else if (STAT_ISEQUAL(status, STAT_NEED_KEY))
 2009: 	new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class);
 2010:       else if (STAT_ISEQUAL(status, STAT_NEED_DS))
 2011: 	new_status = dnssec_validate_ds(now, header, n, name, keyname, class);
 2012:       else 
 2013: 	new_status = dnssec_validate_reply(now, header, n, name, keyname, &class,
 2014: 					   !option_bool(OPT_DNSSEC_IGN_NS) && (server->flags & SERV_DO_DNSSEC),
 2015: 					   NULL, NULL, NULL);
 2016:       
 2017:       if (!STAT_ISEQUAL(new_status, STAT_NEED_DS) && !STAT_ISEQUAL(new_status, STAT_NEED_KEY))
 2018: 	break;
 2019: 
 2020:       /* Can't validate because we need a key/DS whose name now in keyname.
 2021: 	 Make query for same, and recurse to validate */
 2022:       if (!packet)
 2023: 	{
 2024: 	  packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
 2025: 	  new_header = (struct dns_header *)&packet[2];
 2026: 	}
 2027:       
 2028:       if (!packet)
 2029: 	{
 2030: 	  new_status = STAT_ABANDONED;
 2031: 	  break;
 2032: 	}
 2033: 
 2034:       m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class, 
 2035: 				STAT_ISEQUAL(new_status, STAT_NEED_KEY) ? T_DNSKEY : T_DS, server->edns_pktsz);
 2036:       
 2037:       if ((start = dnssec_server(server, daemon->keyname, &first, &last)) == -1 ||
 2038: 	  (m = tcp_talk(first, last, start, packet, m, have_mark, mark, &server)) == 0)
 2039: 	{
 2040: 	  new_status = STAT_ABANDONED;
 2041: 	  break;
 2042: 	}
 2043: 
 2044:       log_save = daemon->log_display_id;
 2045:       daemon->log_display_id = ++daemon->log_id;
 2046:       
 2047:       log_query_mysockaddr(F_NOEXTRA | F_DNSSEC | F_SERVER, keyname, &server->addr,
 2048: 			    STAT_ISEQUAL(status, STAT_NEED_KEY) ? "dnssec-query[DNSKEY]" : "dnssec-query[DS]", 0);
 2049:             
 2050:       new_status = tcp_key_recurse(now, new_status, new_header, m, class, name, keyname, server, have_mark, mark, keycount);
 2051: 
 2052:       daemon->log_display_id = log_save;
 2053:       
 2054:       if (!STAT_ISEQUAL(new_status, STAT_OK))
 2055: 	break;
 2056:     }
 2057:     
 2058:   if (packet)
 2059:     free(packet);
 2060:     
 2061:   return new_status;
 2062: }
 2063: #endif
 2064: 
 2065: 
 2066: /* The daemon forks before calling this: it should deal with one connection,
 2067:    blocking as necessary, and then return. Note, need to be a bit careful
 2068:    about resources for debug mode, when the fork is suppressed: that's
 2069:    done by the caller. */
 2070: unsigned char *tcp_request(int confd, time_t now,
 2071: 			   union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
 2072: {
 2073:   size_t size = 0;
 2074:   int norebind;
 2075: #ifdef HAVE_CONNTRACK
 2076:   int is_single_query = 0, allowed = 1;
 2077: #endif
 2078: #ifdef HAVE_AUTH
 2079:   int local_auth = 0;
 2080: #endif
 2081:   int checking_disabled, do_bit, added_pheader = 0, have_pseudoheader = 0;
 2082:   int cacheable, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
 2083:   size_t m;
 2084:   unsigned short qtype;
 2085:   unsigned int gotname;
 2086:   /* Max TCP packet + slop + size */
 2087:   unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
 2088:   unsigned char *payload = &packet[2];
 2089:   unsigned char c1, c2;
 2090:   /* largest field in header is 16-bits, so this is still sufficiently aligned */
 2091:   struct dns_header *header = (struct dns_header *)payload;
 2092:   u16 *length = (u16 *)packet;
 2093:   struct server *serv;
 2094:   struct in_addr dst_addr_4;
 2095:   union mysockaddr peer_addr;
 2096:   socklen_t peer_len = sizeof(union mysockaddr);
 2097:   int query_count = 0;
 2098:   unsigned char *pheader;
 2099:   unsigned int mark = 0;
 2100:   int have_mark = 0;
 2101:   int first, last, stale, do_stale = 0;
 2102:   unsigned int flags = 0;
 2103:   u16 hb3, hb4;
 2104:     
 2105:   if (!packet || getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
 2106:     return packet;
 2107: 
 2108: #ifdef HAVE_CONNTRACK
 2109:   /* Get connection mark of incoming query to set on outgoing connections. */
 2110:   if (option_bool(OPT_CONNTRACK) || option_bool(OPT_CMARK_ALST_EN))
 2111:     {
 2112:       union all_addr local;
 2113: 		      
 2114:       if (local_addr->sa.sa_family == AF_INET6)
 2115: 	local.addr6 = local_addr->in6.sin6_addr;
 2116:       else
 2117: 	local.addr4 = local_addr->in.sin_addr;
 2118:       
 2119:       have_mark = get_incoming_mark(&peer_addr, &local, 1, &mark);
 2120:     }
 2121: #endif	
 2122: 
 2123:   /* We can be configured to only accept queries from at-most-one-hop-away addresses. */
 2124:   if (option_bool(OPT_LOCAL_SERVICE))
 2125:     {
 2126:       struct addrlist *addr;
 2127: 
 2128:       if (peer_addr.sa.sa_family == AF_INET6) 
 2129: 	{
 2130: 	  for (addr = daemon->interface_addrs; addr; addr = addr->next)
 2131: 	    if ((addr->flags & ADDRLIST_IPV6) &&
 2132: 		is_same_net6(&addr->addr.addr6, &peer_addr.in6.sin6_addr, addr->prefixlen))
 2133: 	      break;
 2134: 	}
 2135:       else
 2136: 	{
 2137: 	  struct in_addr netmask;
 2138: 	  for (addr = daemon->interface_addrs; addr; addr = addr->next)
 2139: 	    {
 2140: 	      netmask.s_addr = htonl(~(in_addr_t)0 << (32 - addr->prefixlen));
 2141: 	      if (!(addr->flags & ADDRLIST_IPV6) && 
 2142: 		  is_same_net(addr->addr.addr4, peer_addr.in.sin_addr, netmask))
 2143: 		break;
 2144: 	    }
 2145: 	}
 2146:       if (!addr)
 2147: 	{
 2148: 	  prettyprint_addr(&peer_addr, daemon->addrbuff);
 2149: 	  my_syslog(LOG_WARNING, _("ignoring query from non-local network %s"), daemon->addrbuff);
 2150: 	  return packet;
 2151: 	}
 2152:     }
 2153: 
 2154:   while (1)
 2155:     {
 2156:       int ede = EDE_UNSET;
 2157: 
 2158:       if (query_count == TCP_MAX_QUERIES)
 2159: 	return packet;
 2160: 
 2161:       if (do_stale)
 2162: 	{
 2163: 	  size_t plen;
 2164: 
 2165: 	  /* We answered the last query with stale data. Now try and get fresh data.
 2166: 	     Restore query from answer. */
 2167: 	  pheader = find_pseudoheader(header, m, &plen, NULL, NULL, NULL);
 2168: 	  
 2169: 	  header->hb3 = hb3;
 2170: 	  header->hb4 = hb4;
 2171: 	  header->ancount = htons(0);
 2172: 	  header->nscount = htons(0);
 2173: 	  header->arcount = htons(0);
 2174: 	  
 2175: 	  size = resize_packet(header, m, pheader, plen);
 2176: 	}
 2177:       else
 2178: 	{
 2179: 	  if (!read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
 2180: 	      !(size = c1 << 8 | c2) ||
 2181: 	      !read_write(confd, payload, size, 1))
 2182: 	    return packet;
 2183: 	  
 2184: 	  /* for stale-answer processing. */
 2185: 	  hb3 = header->hb3;
 2186: 	  hb4 = header->hb4;
 2187: 	}
 2188:       
 2189:       if (size < (int)sizeof(struct dns_header))
 2190: 	continue;
 2191: 
 2192:       /* Clear buffer beyond request to avoid risk of
 2193: 	 information disclosure. */
 2194:       memset(payload + size, 0, 65536 - size);
 2195:       
 2196:       query_count++;
 2197: 
 2198:       /* log_query gets called indirectly all over the place, so 
 2199: 	 pass these in global variables - sorry. */
 2200:       daemon->log_display_id = ++daemon->log_id;
 2201:       daemon->log_source_addr = &peer_addr;
 2202:       
 2203:       /* save state of "cd" flag in query */
 2204:       if ((checking_disabled = header->hb4 & HB4_CD))
 2205: 	no_cache_dnssec = 1;
 2206:        
 2207:       if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
 2208: 	{
 2209: #ifdef HAVE_AUTH
 2210: 	  struct auth_zone *zone;
 2211: #endif
 2212: 
 2213: #ifdef HAVE_CONNTRACK
 2214: 	  is_single_query = 1;
 2215: #endif
 2216: 
 2217: 	  if (!do_stale)
 2218: 	    {
 2219: 	      log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
 2220: 				   &peer_addr, auth_dns ? "auth" : "query", qtype);
 2221: 	      
 2222: #ifdef HAVE_AUTH
 2223: 	      /* find queries for zones we're authoritative for, and answer them directly */
 2224: 	      if (!auth_dns && !option_bool(OPT_LOCALISE))
 2225: 		for (zone = daemon->auth_zones; zone; zone = zone->next)
 2226: 		  if (in_zone(zone, daemon->namebuff, NULL))
 2227: 		    {
 2228: 		      auth_dns = 1;
 2229: 		      local_auth = 1;
 2230: 		      break;
 2231: 		    }
 2232: #endif
 2233: 	    }
 2234: 	}
 2235:       
 2236:       norebind = domain_no_rebind(daemon->namebuff);
 2237:       
 2238:       if (local_addr->sa.sa_family == AF_INET)
 2239: 	dst_addr_4 = local_addr->in.sin_addr;
 2240:       else
 2241: 	dst_addr_4.s_addr = 0;
 2242:       
 2243:       do_bit = 0;
 2244: 
 2245:       if (find_pseudoheader(header, (size_t)size, NULL, &pheader, NULL, NULL))
 2246: 	{ 
 2247: 	  unsigned short flags;
 2248: 	  
 2249: 	  have_pseudoheader = 1;
 2250: 	  pheader += 4; /* udp_size, ext_rcode */
 2251: 	  GETSHORT(flags, pheader);
 2252:       
 2253: 	  if (flags & 0x8000)
 2254: 	    do_bit = 1; /* do bit */ 
 2255: 	}
 2256:       
 2257: #ifdef HAVE_CONNTRACK
 2258: #ifdef HAVE_AUTH
 2259:       if (!auth_dns || local_auth)
 2260: #endif
 2261: 	if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
 2262: 	  allowed = is_query_allowed_for_mark((u32)mark, is_single_query ? daemon->namebuff : NULL);
 2263: #endif
 2264: 
 2265:       if (0);
 2266: #ifdef HAVE_CONNTRACK
 2267:       else if (!allowed)
 2268: 	{
 2269: 	  u16 swap = htons(EDE_BLOCKED);
 2270: 
 2271: 	  m = answer_disallowed(header, size, (u32)mark, is_single_query ? daemon->namebuff : NULL);
 2272: 	  
 2273: 	  if (have_pseudoheader && m != 0)
 2274: 	    m = add_pseudoheader(header,  m, ((unsigned char *) header) + 65536, daemon->edns_pktsz,
 2275: 				 EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
 2276: 	}
 2277: #endif
 2278: #ifdef HAVE_AUTH
 2279:       else if (auth_dns)
 2280: 	m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr, 
 2281: 			local_auth, do_bit, have_pseudoheader);
 2282: #endif
 2283:       else
 2284: 	{
 2285: 	   int ad_reqd = do_bit;
 2286: 	   /* RFC 6840 5.7 */
 2287: 	   if (header->hb4 & HB4_AD)
 2288: 	     ad_reqd = 1;
 2289: 
 2290: 	   if (do_stale)
 2291: 	     m = 0;
 2292: 	   else
 2293: 	     /* m > 0 if answered from cache */
 2294: 	     m = answer_request(header, ((char *) header) + 65536, (size_t)size, 
 2295: 				dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader, &stale);
 2296: 	   
 2297: 	  /* Do this by steam now we're not in the select() loop */
 2298: 	  check_log_writer(1); 
 2299: 	  
 2300: 	  if (m == 0)
 2301: 	    {
 2302: 	      struct server *master;
 2303: 	      int start;
 2304: 
 2305: 	      if (lookup_domain(daemon->namebuff, gotname, &first, &last))
 2306: 		flags = is_local_answer(now, first, daemon->namebuff);
 2307: 	      else
 2308: 		{
 2309: 		  /* No configured servers */
 2310: 		  ede = EDE_NOT_READY;
 2311: 		  flags = 0;
 2312: 		}
 2313: 	      
 2314: 	      /* don't forward A or AAAA queries for simple names, except the empty name */
 2315: 	      if (!flags &&
 2316: 		  option_bool(OPT_NODOTS_LOCAL) &&
 2317: 		  (gotname & (F_IPV4 | F_IPV6)) &&
 2318: 		  !strchr(daemon->namebuff, '.') &&
 2319: 		  strlen(daemon->namebuff) != 0)
 2320: 		flags = check_for_local_domain(daemon->namebuff, now) ? F_NOERR : F_NXDOMAIN;
 2321: 		
 2322: 	      if (!flags && ede != EDE_NOT_READY)
 2323: 		{
 2324: 		  master = daemon->serverarray[first];
 2325: 		  
 2326: 		  if (option_bool(OPT_ORDER) || master->last_server == -1)
 2327: 		    start = first;
 2328: 		  else
 2329: 		    start = master->last_server;
 2330: 		  
 2331: 		  size = add_edns0_config(header, size, ((unsigned char *) header) + 65536, &peer_addr, now, &cacheable);
 2332: 		  
 2333: #ifdef HAVE_DNSSEC
 2334: 		  if (option_bool(OPT_DNSSEC_VALID) && (master->flags & SERV_DO_DNSSEC))
 2335: 		    {
 2336: 		      size = add_do_bit(header, size, ((unsigned char *) header) + 65536);
 2337: 		      
 2338: 		      /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
 2339: 			 this allows it to select auth servers when one is returning bad data. */
 2340: 		      if (option_bool(OPT_DNSSEC_DEBUG))
 2341: 			header->hb4 |= HB4_CD;
 2342: 		    }
 2343: #endif
 2344: 		  
 2345: 		  /* Check if we added a pheader on forwarding - may need to
 2346: 		     strip it from the reply. */
 2347: 		  if (!have_pseudoheader && find_pseudoheader(header, size, NULL, NULL, NULL, NULL))
 2348: 		    added_pheader = 1;
 2349: 		  
 2350: 		  /* Loop round available servers until we succeed in connecting to one. */
 2351: 		  if ((m = tcp_talk(first, last, start, packet, size, have_mark, mark, &serv)) == 0)
 2352: 		    {
 2353: 		      ede = EDE_NETERR;
 2354: 		      break;
 2355: 		    }
 2356: 		  
 2357: 		  /* get query name again for logging - may have been overwritten */
 2358: 		  if (!(gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
 2359: 		    strcpy(daemon->namebuff, "query");
 2360: 		  log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff, &serv->addr, NULL, 0);
 2361: 		  
 2362: #ifdef HAVE_DNSSEC
 2363: 		  if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled && (master->flags & SERV_DO_DNSSEC))
 2364: 		    {
 2365: 		      int keycount = DNSSEC_WORK; /* Limit to number of DNSSEC questions, to catch loops and avoid filling cache. */
 2366: 		      int status = tcp_key_recurse(now, STAT_OK, header, m, 0, daemon->namebuff, daemon->keyname, 
 2367: 						   serv, have_mark, mark, &keycount);
 2368: 		      char *result, *domain = "result";
 2369: 		      
 2370: 		      union all_addr a;
 2371: 		      a.log.ede = ede = errflags_to_ede(status);
 2372: 		      
 2373: 		      if (STAT_ISEQUAL(status, STAT_ABANDONED))
 2374: 			{
 2375: 			  result = "ABANDONED";
 2376: 			  status = STAT_BOGUS;
 2377: 			}
 2378: 		      else
 2379: 			result = (STAT_ISEQUAL(status, STAT_SECURE) ? "SECURE" : (STAT_ISEQUAL(status, STAT_INSECURE) ? "INSECURE" : "BOGUS"));
 2380: 		      
 2381: 		      if (STAT_ISEQUAL(status, STAT_SECURE))
 2382: 			cache_secure = 1;
 2383: 		      else if (STAT_ISEQUAL(status, STAT_BOGUS))
 2384: 			{
 2385: 			  no_cache_dnssec = 1;
 2386: 			  bogusanswer = 1;
 2387: 			  
 2388: 			  if (extract_request(header, m, daemon->namebuff, NULL))
 2389: 			    domain = daemon->namebuff;
 2390: 			}
 2391: 		      
 2392: 		      log_query(F_SECSTAT, domain, &a, result, 0);
 2393: 		    }
 2394: #endif
 2395: 		  
 2396: 		  /* restore CD bit to the value in the query */
 2397: 		  if (checking_disabled)
 2398: 		    header->hb4 |= HB4_CD;
 2399: 		  else
 2400: 		    header->hb4 &= ~HB4_CD;
 2401: 		  
 2402: 		  /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
 2403: 		     since the cache is ignorant of such things. */
 2404: 		  if (!cacheable)
 2405: 		    no_cache_dnssec = 1;
 2406: 		  
 2407: 		  m = process_reply(header, now, serv, (unsigned int)m, 
 2408: 				    option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec, cache_secure, bogusanswer,
 2409: 				    ad_reqd, do_bit, added_pheader, &peer_addr, ((unsigned char *)header) + 65536, ede); 
 2410: 		}
 2411: 	    }
 2412: 	}
 2413: 	
 2414:       if (do_stale)
 2415: 	break;
 2416:     
 2417:       /* In case of local answer or no connections made. */
 2418:       if (m == 0)
 2419: 	{
 2420: 	  if (!(m = make_local_answer(flags, gotname, size, header, daemon->namebuff,
 2421: 				      ((char *) header) + 65536, first, last, ede)))
 2422: 	    break;
 2423: 	  
 2424: 	  if (have_pseudoheader)
 2425: 	    {
 2426: 	      u16 swap = htons((u16)ede);
 2427: 	      
 2428: 	      if (ede != EDE_UNSET)
 2429: 		m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
 2430: 	      else
 2431: 		m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
 2432: 	    }
 2433: 	}
 2434:       else if (stale)
 2435: 	 {
 2436: 	   u16 swap = htons((u16)EDE_STALE);
 2437: 	   
 2438: 	   m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, EDNS0_OPTION_EDE, (unsigned char *)&swap, 2, do_bit, 0);
 2439: 	 }
 2440:       
 2441:       check_log_writer(1);
 2442:       
 2443:       *length = htons(m);
 2444:       
 2445: #if defined(HAVE_CONNTRACK) && defined(HAVE_UBUS)
 2446: #ifdef HAVE_AUTH
 2447:       if (!auth_dns || local_auth)
 2448: #endif
 2449: 	if (option_bool(OPT_CMARK_ALST_EN) && have_mark && ((u32)mark & daemon->allowlist_mask))
 2450: 	  report_addresses(header, m, mark);
 2451: #endif
 2452:       if (!read_write(confd, packet, m + sizeof(u16), 0))
 2453: 	break;
 2454:       
 2455:       /* If we answered with stale data, this process will now try and get fresh data into
 2456: 	 the cache then and cannot therefore accept new queries. Close the incoming
 2457: 	 connection to signal that to the client. Then set do_stale and loop round
 2458: 	 once more to try and get fresh data, after which we exit. */
 2459:       if (stale)
 2460: 	{
 2461: 	  shutdown(confd, SHUT_RDWR);
 2462: 	  close(confd);
 2463: 	  do_stale = 1;
 2464: 	}
 2465:     }
 2466: 
 2467:   /* If we ran once to get fresh data, confd is already closed. */
 2468:   if (!do_stale)
 2469:     {
 2470:       shutdown(confd, SHUT_RDWR);
 2471:       close(confd);
 2472:     }
 2473: 
 2474:   return packet;
 2475: }
 2476: 
 2477: /* return a UDP socket bound to a random port, have to cope with straying into
 2478:    occupied port nos and reserved ones. */
 2479: static int random_sock(struct server *s)
 2480: {
 2481:   int fd;
 2482: 
 2483:   if ((fd = socket(s->source_addr.sa.sa_family, SOCK_DGRAM, 0)) != -1)
 2484:     {
 2485:       /* We need to set IPV6ONLY so we can use the same ports
 2486: 	 for IPv4 and IPV6, otherwise, in restriced port situations,
 2487: 	 we can end up with all our available ports in use for 
 2488: 	 one address family, and the other address family cannot be used. */
 2489:       if (s->source_addr.sa.sa_family == AF_INET6)
 2490: 	{
 2491: 	  int opt = 1;
 2492: 
 2493: 	  if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &opt, sizeof(opt)) == -1)
 2494: 	    {
 2495: 	      close(fd);
 2496: 	      return -1;
 2497: 	    }
 2498: 	}
 2499:       
 2500:       if (local_bind(fd, &s->source_addr, s->interface, s->ifindex, 0))
 2501: 	return fd;
 2502: 
 2503:       /* don't log errors due to running out of available ports, we handle those. */
 2504:       if (!sockaddr_isnull(&s->source_addr) || errno != EADDRINUSE)
 2505: 	{
 2506: 	  if (s->interface[0] == 0)
 2507: 	    (void)prettyprint_addr(&s->source_addr, daemon->addrbuff);
 2508: 	  else
 2509: 	    safe_strncpy(daemon->addrbuff, s->interface, ADDRSTRLEN);
 2510: 	  
 2511: 	  my_syslog(LOG_ERR, _("failed to bind server socket to %s: %s"),
 2512: 		    daemon->addrbuff, strerror(errno));
 2513: 	}
 2514: 	  
 2515:       close(fd);
 2516:     }
 2517:   
 2518:   return -1;
 2519: }
 2520: 
 2521: /* compare source addresses and interface, serv2 can be null. */
 2522: static int server_isequal(const struct server *serv1,
 2523: 			 const struct server *serv2)
 2524: {
 2525:   return (serv2 &&
 2526:     serv2->ifindex == serv1->ifindex &&
 2527:     sockaddr_isequal(&serv2->source_addr, &serv1->source_addr) &&
 2528:     strncmp(serv2->interface, serv1->interface, IF_NAMESIZE) == 0);
 2529: }
 2530: 
 2531: /* fdlp points to chain of randomfds already in use by transaction.
 2532:    If there's already a suitable one, return it, else allocate a 
 2533:    new one and add it to the list. 
 2534: 
 2535:    Not leaking any resources in the face of allocation failures
 2536:    is rather convoluted here.
 2537:    
 2538:    Note that rfd->serv may be NULL, when a server goes away.
 2539: */
 2540: int allocate_rfd(struct randfd_list **fdlp, struct server *serv)
 2541: {
 2542:   static int finger = 0;
 2543:   int i, j = 0;
 2544:   int ports_full = 0;
 2545:   struct randfd_list **up, *rfl, *found, **found_link;
 2546:   struct randfd *rfd = NULL;
 2547:   int fd = 0;
 2548:   int ports_avail = 0;
 2549:   
 2550:   /* We can't have more randomsocks for this AF available than ports in  our port range,
 2551:      so check that here, to avoid trying and failing to bind every port
 2552:      in local_bind(), called from random_sock(). The actual check is below when 
 2553:      ports_avail != 0 */
 2554:   if (daemon->max_port != 0)
 2555:     {
 2556:       ports_avail = daemon->max_port - daemon->min_port + 1;
 2557:       if (ports_avail >= SMALL_PORT_RANGE)
 2558: 	ports_avail = 0;
 2559:     }
 2560:   
 2561:   /* If server has a pre-allocated fd, use that. */
 2562:   if (serv->sfd)
 2563:     return serv->sfd->fd;
 2564:   
 2565:   /* existing suitable random port socket linked to this transaction?
 2566:      Find the last one in the list and count how many there are. */
 2567:   for (found = NULL, found_link = NULL, i = 0, up = fdlp, rfl = *fdlp; rfl; up = &rfl->next, rfl = rfl->next)
 2568:     if (server_isequal(serv, rfl->rfd->serv))
 2569:       {
 2570: 	i++;
 2571: 	found = rfl;
 2572: 	found_link = up;
 2573:       }
 2574: 
 2575:   /* We have the maximum number for this query already. Promote
 2576:      the last one on the list to the head, to circulate them,
 2577:      and return it. */
 2578:   if (found && i >= daemon->randport_limit)
 2579:     {
 2580:       *found_link = found->next;
 2581:       found->next = *fdlp;
 2582:       *fdlp = found;
 2583:       return found->rfd->fd;
 2584:     }
 2585: 
 2586:   /* check for all available ports in use. */
 2587:   if (ports_avail != 0)
 2588:     {
 2589:       int ports_inuse;
 2590: 
 2591:       for (ports_inuse = 0, i = 0; i < daemon->numrrand; i++)
 2592: 	if (daemon->randomsocks[i].refcount != 0 &&
 2593: 	    daemon->randomsocks[i].serv->source_addr.sa.sa_family == serv->source_addr.sa.sa_family &&
 2594: 	    ++ports_inuse >= ports_avail)
 2595: 	  {
 2596: 	    ports_full = 1;
 2597: 	    break;
 2598: 	  }
 2599:     }
 2600:   
 2601:   /* limit the number of sockets we have open to avoid starvation of 
 2602:      (eg) TFTP. Once we have a reasonable number, randomness should be OK */
 2603:   if (!ports_full)
 2604:     for (i = 0; i < daemon->numrrand; i++)
 2605:       if (daemon->randomsocks[i].refcount == 0)
 2606: 	{
 2607: 	  if ((fd = random_sock(serv)) != -1)
 2608: 	    {
 2609: 	      rfd = &daemon->randomsocks[i];
 2610: 	      rfd->serv = serv;
 2611: 	      rfd->fd = fd;
 2612: 	      rfd->refcount = 1;
 2613: 	    }
 2614: 	  break;
 2615: 	}
 2616:     
 2617:   /* No good existing. Need new link. */
 2618:   if ((rfl = daemon->rfl_spare))
 2619:     daemon->rfl_spare = rfl->next;
 2620:   else if (!(rfl = whine_malloc(sizeof(struct randfd_list))))
 2621:     {
 2622:       /* malloc failed, don't leak allocated sock */
 2623:       if (rfd)
 2624: 	{
 2625: 	  close(rfd->fd);
 2626: 	  rfd->refcount = 0;
 2627: 	}
 2628: 
 2629:       return -1;
 2630:     }
 2631:   
 2632:   /* No free ones or cannot get new socket, grab an existing one */
 2633:   if (!rfd)
 2634:     for (j = 0; j < daemon->numrrand; j++)
 2635:       {
 2636: 	i = (j + finger) % daemon->numrrand;
 2637: 	if (daemon->randomsocks[i].refcount != 0 &&
 2638: 	    server_isequal(serv, daemon->randomsocks[i].serv) &&
 2639: 	    daemon->randomsocks[i].refcount != 0xfffe)
 2640: 	  {
 2641: 	    struct randfd_list *rl;
 2642: 	    /* Don't pick one we already have. */
 2643: 	    for (rl = *fdlp; rl; rl = rl->next)
 2644: 	      if (rl->rfd == &daemon->randomsocks[i])
 2645: 		break;
 2646: 
 2647: 	    if (!rl)
 2648: 	      {
 2649: 		finger = i + 1;
 2650: 		rfd = &daemon->randomsocks[i];
 2651: 		rfd->refcount++;
 2652: 		break;
 2653: 	      }
 2654: 	  }
 2655:       }
 2656: 
 2657:   if (!rfd) /* should be when j == daemon->numrrand */
 2658:     {
 2659:       struct randfd_list *rfl_poll;
 2660: 
 2661:       /* there are no free slots, and non with the same parameters we can piggy-back on. 
 2662: 	 We're going to have to allocate a new temporary record, distinguished by
 2663: 	 refcount == 0xffff. This will exist in the frec randfd list, never be shared,
 2664: 	 and be freed when no longer in use. It will also be held on 
 2665: 	 the daemon->rfl_poll list so the poll system can find it. */
 2666: 
 2667:       if ((rfl_poll = daemon->rfl_spare))
 2668: 	daemon->rfl_spare = rfl_poll->next;
 2669:       else
 2670: 	rfl_poll = whine_malloc(sizeof(struct randfd_list));
 2671:       
 2672:       if (!rfl_poll ||
 2673: 	  !(rfd = whine_malloc(sizeof(struct randfd))) ||
 2674: 	  (fd = random_sock(serv)) == -1)
 2675: 	{
 2676: 	  
 2677: 	  /* Don't leak anything we may already have */
 2678: 	  rfl->next = daemon->rfl_spare;
 2679: 	  daemon->rfl_spare = rfl;
 2680: 
 2681: 	  if (rfl_poll)
 2682: 	    {
 2683: 	      rfl_poll->next = daemon->rfl_spare;
 2684: 	      daemon->rfl_spare = rfl_poll;
 2685: 	    }
 2686: 	  
 2687: 	  if (rfd)
 2688: 	    free(rfd);
 2689: 	  
 2690: 	  return -1; /* doom */
 2691: 	}
 2692: 
 2693:       /* Note rfd->serv not set here, since it's not reused */
 2694:       rfd->fd = fd;
 2695:       rfd->refcount = 0xffff; /* marker for temp record */
 2696: 
 2697:       rfl_poll->rfd = rfd;
 2698:       rfl_poll->next = daemon->rfl_poll;
 2699:       daemon->rfl_poll = rfl_poll;
 2700:     }
 2701:   
 2702:   rfl->rfd = rfd;
 2703:   rfl->next = *fdlp;
 2704:   *fdlp = rfl;
 2705:   
 2706:   return rfl->rfd->fd;
 2707: }
 2708: 
 2709: void free_rfds(struct randfd_list **fdlp)
 2710: {
 2711:   struct randfd_list *tmp, *rfl, *poll, *next, **up;
 2712:   
 2713:   for (rfl = *fdlp; rfl; rfl = tmp)
 2714:     {
 2715:       if (rfl->rfd->refcount == 0xffff || --(rfl->rfd->refcount) == 0)
 2716: 	close(rfl->rfd->fd);
 2717: 
 2718:       /* temporary overflow record */
 2719:       if (rfl->rfd->refcount == 0xffff)
 2720: 	{
 2721: 	  free(rfl->rfd);
 2722: 	  
 2723: 	  /* go through the link of all these by steam to delete.
 2724: 	     This list is expected to be almost always empty. */
 2725: 	  for (poll = daemon->rfl_poll, up = &daemon->rfl_poll; poll; poll = next)
 2726: 	    {
 2727: 	      next = poll->next;
 2728: 	      
 2729: 	      if (poll->rfd == rfl->rfd)
 2730: 		{
 2731: 		  *up = poll->next;
 2732: 		  poll->next = daemon->rfl_spare;
 2733: 		  daemon->rfl_spare = poll;
 2734: 		}
 2735: 	      else
 2736: 		up = &poll->next;
 2737: 	    }
 2738: 	}
 2739: 
 2740:       tmp = rfl->next;
 2741:       rfl->next = daemon->rfl_spare;
 2742:       daemon->rfl_spare = rfl;
 2743:     }
 2744: 
 2745:   *fdlp = NULL;
 2746: }
 2747: 
 2748: static void free_frec(struct frec *f)
 2749: {
 2750:   struct frec_src *last;
 2751:   
 2752:   /* add back to freelist if not the record builtin to every frec. */
 2753:   for (last = f->frec_src.next; last && last->next; last = last->next) ;
 2754:   if (last)
 2755:     {
 2756:       last->next = daemon->free_frec_src;
 2757:       daemon->free_frec_src = f->frec_src.next;
 2758:     }
 2759:     
 2760:   f->frec_src.next = NULL;    
 2761:   free_rfds(&f->rfds);
 2762:   f->sentto = NULL;
 2763:   f->flags = 0;
 2764: 
 2765:   if (f->stash)
 2766:     {
 2767:       blockdata_free(f->stash);
 2768:       f->stash = NULL;
 2769:     }
 2770:   
 2771: #ifdef HAVE_DNSSEC
 2772:   /* Anything we're waiting on is pointless now, too */
 2773:   if (f->blocking_query)
 2774:     {
 2775:       struct frec *n, **up;
 2776: 
 2777:       /* unlink outselves from the blocking query's dependents list. */
 2778:       for (n = f->blocking_query->dependent, up = &f->blocking_query->dependent; n; n = n->next_dependent)
 2779: 	if (n == f)
 2780: 	  {
 2781: 	    *up = n->next_dependent;
 2782: 	    break;
 2783: 	  }
 2784: 	else
 2785: 	  up = &n->next_dependent;
 2786: 
 2787:       /* If we were the only/last dependent, free the blocking query too. */
 2788:       if (!f->blocking_query->dependent)
 2789: 	free_frec(f->blocking_query);
 2790:     }
 2791:   
 2792:   f->blocking_query = NULL;
 2793:   f->dependent = NULL;
 2794:   f->next_dependent = NULL;
 2795: #endif
 2796: }
 2797: 
 2798: 
 2799: 
 2800: /* Impose an absolute
 2801:    limit of 4*TIMEOUT before we wipe things (for random sockets).
 2802:    If force is set, always return a result, even if we have
 2803:    to allocate above the limit, and don'y free any records.
 2804:    This is set when allocating for DNSSEC to avoid cutting off
 2805:    the branch we are sitting on. */
 2806: static struct frec *get_new_frec(time_t now, struct server *master, int force)
 2807: {
 2808:   struct frec *f, *oldest, *target;
 2809:   int count;
 2810:   
 2811:   /* look for free records, garbage collect old records and count number in use by our server-group. */
 2812:   for (f = daemon->frec_list, oldest = NULL, target =  NULL, count = 0; f; f = f->next)
 2813:     {
 2814:       if (!f->sentto)
 2815: 	target = f;
 2816:       else
 2817: 	{
 2818: #ifdef HAVE_DNSSEC
 2819: 	  /* Don't free DNSSEC sub-queries here, as we may end up with
 2820: 	     dangling references to them. They'll go when their "real" query 
 2821: 	     is freed. */
 2822: 	  if (!f->dependent && !force)
 2823: #endif
 2824: 	    {
 2825: 	      if (difftime(now, f->time) >= 4*TIMEOUT)
 2826: 		{
 2827: 		  daemon->metrics[METRIC_DNS_UNANSWERED_QUERY]++;
 2828: 		  free_frec(f);
 2829: 		  target = f;
 2830: 		}
 2831: 	      else if (!oldest || difftime(f->time, oldest->time) <= 0)
 2832: 		oldest = f;
 2833: 	    }
 2834: 	}
 2835:       
 2836:       if (f->sentto && ((int)difftime(now, f->time)) < TIMEOUT && server_samegroup(f->sentto, master))
 2837: 	count++;
 2838:     }
 2839: 
 2840:   if (!force && count >= daemon->ftabsize)
 2841:     {
 2842:       query_full(now, master->domain);
 2843:       return NULL;
 2844:     }
 2845:   
 2846:   if (!target && oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
 2847:     { 
 2848:       /* can't find empty one, use oldest if there is one and it's older than timeout */
 2849:       daemon->metrics[METRIC_DNS_UNANSWERED_QUERY]++;
 2850:       free_frec(oldest);
 2851:       target = oldest;
 2852:     }
 2853:   
 2854:   if (!target && (target = (struct frec *)whine_malloc(sizeof(struct frec))))
 2855:     {
 2856:       target->next = daemon->frec_list;
 2857:       daemon->frec_list = target;
 2858:     }
 2859: 
 2860:   if (target)
 2861:     {
 2862:       target->time = now;
 2863:       target->forward_delay = daemon->fast_retry_time;
 2864:     }
 2865:   
 2866:   return target;
 2867: }
 2868: 
 2869: static void query_full(time_t now, char *domain)
 2870: {
 2871:   static time_t last_log = 0;
 2872:   
 2873:   if ((int)difftime(now, last_log) > 5)
 2874:     {
 2875:       last_log = now;
 2876:       if (!domain || strlen(domain) == 0)
 2877: 	my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
 2878:       else
 2879: 	my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries to %s reached (max: %d)"), domain, daemon->ftabsize);
 2880:     }
 2881: }
 2882: 
 2883: 
 2884: static struct frec *lookup_frec(unsigned short id, int fd, void *hash, int *firstp, int *lastp)
 2885: {
 2886:   struct frec *f;
 2887:   struct server *s;
 2888:   int first, last;
 2889:   struct randfd_list *fdl;
 2890: 
 2891:   if (hash)
 2892:     for (f = daemon->frec_list; f; f = f->next)
 2893:       if (f->sentto && f->new_id == id && 
 2894: 	  (memcmp(hash, f->hash, HASH_SIZE) == 0))
 2895: 	{
 2896: 	  filter_servers(f->sentto->arrayposn, F_SERVER, firstp, lastp);
 2897: 	  
 2898: 	  /* sent from random port */
 2899: 	  for (fdl = f->rfds; fdl; fdl = fdl->next)
 2900: 	    if (fdl->rfd->fd == fd)
 2901: 	      return f;
 2902: 	  
 2903: 	  /* Sent to upstream from socket associated with a server. 
 2904: 	     Note we have to iterate over all the possible servers, since they may
 2905: 	     have different bound sockets. */
 2906: 	  for (first = *firstp, last = *lastp; first != last; first++)
 2907: 	    {
 2908: 	      s = daemon->serverarray[first];
 2909: 	      if (s->sfd && s->sfd->fd == fd)
 2910: 		return f;
 2911: 	    }
 2912: 	}
 2913:   
 2914:   return NULL;
 2915: }
 2916: 
 2917: static struct frec *lookup_frec_by_query(void *hash, unsigned int flags, unsigned int flagmask)
 2918: {
 2919:   struct frec *f;
 2920: 
 2921:   if (hash)
 2922:     for (f = daemon->frec_list; f; f = f->next)
 2923:       if (f->sentto &&
 2924: 	  (f->flags & flagmask) == flags &&
 2925: 	  memcmp(hash, f->hash, HASH_SIZE) == 0)
 2926: 	return f;
 2927:   
 2928:   return NULL;
 2929: }
 2930: 
 2931: #ifdef HAVE_DNSSEC
 2932: /* DNSSEC frecs have the complete query in the block stash.
 2933:    Search for an existing query using that. */
 2934: static struct frec *lookup_frec_dnssec(char *target, int class, int flags, struct dns_header *header)
 2935: {
 2936:    struct frec *f;
 2937: 
 2938:    for (f = daemon->frec_list; f; f = f->next)
 2939:      if (f->sentto &&
 2940: 	 (f->flags & flags) &&
 2941: 	 blockdata_retrieve(f->stash, f->stash_len, (void *)header))
 2942:        {
 2943: 	 unsigned char *p = (unsigned char *)(header+1);
 2944: 	 int hclass;
 2945: 
 2946: 	 if (extract_name(header, f->stash_len, &p, target, 0, 4) != 1)
 2947: 	   continue;
 2948: 
 2949: 	 p += 2;  /* type, known from flags */ 
 2950: 	 GETSHORT(hclass, p);
 2951: 
 2952: 	 if (class != hclass)
 2953: 	   continue;
 2954: 
 2955: 	 return f;
 2956:        }
 2957: 
 2958:    return NULL;
 2959: }
 2960: #endif
 2961: 
 2962: /* Send query packet again, if we can. */
 2963: void resend_query()
 2964: {
 2965:   if (daemon->srv_save)
 2966:     server_send(daemon->srv_save, daemon->fd_save,
 2967: 		daemon->packet, daemon->packet_len, 0);
 2968: }
 2969: 
 2970: /* A server record is going away, remove references to it */
 2971: void server_gone(struct server *server)
 2972: {
 2973:   struct frec *f;
 2974:   int i;
 2975:   
 2976:   for (f = daemon->frec_list; f; f = f->next)
 2977:     if (f->sentto && f->sentto == server)
 2978:       free_frec(f);
 2979: 
 2980:   /* If any random socket refers to this server, NULL the reference.
 2981:      No more references to the socket will be created in the future. */
 2982:   for (i = 0; i < daemon->numrrand; i++)
 2983:     if (daemon->randomsocks[i].refcount != 0 && daemon->randomsocks[i].serv == server)
 2984:       daemon->randomsocks[i].serv = NULL;
 2985:   
 2986:   if (daemon->srv_save == server)
 2987:     daemon->srv_save = NULL;
 2988: }
 2989: 
 2990: /* return unique random ids. */
 2991: static unsigned short get_id(void)
 2992: {
 2993:   unsigned short ret = 0;
 2994:   struct frec *f;
 2995:   
 2996:   while (1)
 2997:     {
 2998:       ret = rand16();
 2999: 
 3000:       /* ensure id is unique. */
 3001:       for (f = daemon->frec_list; f; f = f->next)
 3002: 	if (f->sentto && f->new_id == ret)
 3003: 	  break;
 3004: 
 3005:       if (!f)
 3006: 	return ret;
 3007:     }
 3008: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>