Return to io.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird / sysdep / unix |
1.1 ! misho 1: /* ! 2: * BIRD Internet Routing Daemon -- Unix I/O ! 3: * ! 4: * (c) 1998--2004 Martin Mares <mj@ucw.cz> ! 5: * (c) 2004 Ondrej Filip <feela@network.cz> ! 6: * ! 7: * Can be freely distributed and used under the terms of the GNU GPL. ! 8: */ ! 9: ! 10: /* Unfortunately, some glibc versions hide parts of RFC 3542 API ! 11: if _GNU_SOURCE is not defined. */ ! 12: #ifndef _GNU_SOURCE ! 13: #define _GNU_SOURCE ! 14: #endif ! 15: ! 16: #include <stdio.h> ! 17: #include <stdlib.h> ! 18: #include <time.h> ! 19: #include <sys/time.h> ! 20: #include <sys/types.h> ! 21: #include <sys/socket.h> ! 22: #include <sys/uio.h> ! 23: #include <sys/un.h> ! 24: #include <poll.h> ! 25: #include <unistd.h> ! 26: #include <fcntl.h> ! 27: #include <errno.h> ! 28: #include <net/if.h> ! 29: #include <netinet/in.h> ! 30: #include <netinet/tcp.h> ! 31: #include <netinet/udp.h> ! 32: #include <netinet/icmp6.h> ! 33: ! 34: #include "nest/bird.h" ! 35: #include "lib/lists.h" ! 36: #include "lib/resource.h" ! 37: #include "lib/timer.h" ! 38: #include "lib/socket.h" ! 39: #include "lib/event.h" ! 40: #include "lib/string.h" ! 41: #include "nest/iface.h" ! 42: ! 43: #include "lib/unix.h" ! 44: #include "lib/sysio.h" ! 45: ! 46: /* Maximum number of calls of tx handler for one socket in one ! 47: * poll iteration. Should be small enough to not monopolize CPU by ! 48: * one protocol instance. ! 49: */ ! 50: #define MAX_STEPS 4 ! 51: ! 52: /* Maximum number of calls of rx handler for all sockets in one poll ! 53: iteration. RX callbacks are often much more costly so we limit ! 54: this to gen small latencies */ ! 55: #define MAX_RX_STEPS 4 ! 56: ! 57: /* ! 58: * Tracked Files ! 59: */ ! 60: ! 61: struct rfile { ! 62: resource r; ! 63: FILE *f; ! 64: }; ! 65: ! 66: static void ! 67: rf_free(resource *r) ! 68: { ! 69: struct rfile *a = (struct rfile *) r; ! 70: ! 71: fclose(a->f); ! 72: } ! 73: ! 74: static void ! 75: rf_dump(resource *r) ! 76: { ! 77: struct rfile *a = (struct rfile *) r; ! 78: ! 79: debug("(FILE *%p)\n", a->f); ! 80: } ! 81: ! 82: static struct resclass rf_class = { ! 83: "FILE", ! 84: sizeof(struct rfile), ! 85: rf_free, ! 86: rf_dump, ! 87: NULL, ! 88: NULL ! 89: }; ! 90: ! 91: void * ! 92: tracked_fopen(pool *p, char *name, char *mode) ! 93: { ! 94: FILE *f = fopen(name, mode); ! 95: ! 96: if (f) ! 97: { ! 98: struct rfile *r = ralloc(p, &rf_class); ! 99: r->f = f; ! 100: } ! 101: return f; ! 102: } ! 103: ! 104: /** ! 105: * DOC: Timers ! 106: * ! 107: * Timers are resources which represent a wish of a module to call ! 108: * a function at the specified time. The platform dependent code ! 109: * doesn't guarantee exact timing, only that a timer function ! 110: * won't be called before the requested time. ! 111: * ! 112: * In BIRD, time is represented by values of the &bird_clock_t type ! 113: * which are integral numbers interpreted as a relative number of seconds since ! 114: * some fixed time point in past. The current time can be read ! 115: * from variable @now with reasonable accuracy and is monotonic. There is also ! 116: * a current 'absolute' time in variable @now_real reported by OS. ! 117: * ! 118: * Each timer is described by a &timer structure containing a pointer ! 119: * to the handler function (@hook), data private to this function (@data), ! 120: * time the function should be called at (@expires, 0 for inactive timers), ! 121: * for the other fields see |timer.h|. ! 122: */ ! 123: ! 124: #define NEAR_TIMER_LIMIT 4 ! 125: ! 126: static list near_timers, far_timers; ! 127: static bird_clock_t first_far_timer = TIME_INFINITY; ! 128: ! 129: /* now must be different from 0, because 0 is a special value in timer->expires */ ! 130: bird_clock_t now = 1, now_real, boot_time; ! 131: ! 132: static void ! 133: update_times_plain(void) ! 134: { ! 135: bird_clock_t new_time = time(NULL); ! 136: int delta = new_time - now_real; ! 137: ! 138: if ((delta >= 0) && (delta < 60)) ! 139: now += delta; ! 140: else if (now_real != 0) ! 141: log(L_WARN "Time jump, delta %d s", delta); ! 142: ! 143: now_real = new_time; ! 144: } ! 145: ! 146: static void ! 147: update_times_gettime(void) ! 148: { ! 149: struct timespec ts; ! 150: int rv; ! 151: ! 152: rv = clock_gettime(CLOCK_MONOTONIC, &ts); ! 153: if (rv != 0) ! 154: die("clock_gettime: %m"); ! 155: ! 156: if (ts.tv_sec != now) { ! 157: if (ts.tv_sec < now) ! 158: log(L_ERR "Monotonic timer is broken"); ! 159: ! 160: now = ts.tv_sec; ! 161: now_real = time(NULL); ! 162: } ! 163: } ! 164: ! 165: static int clock_monotonic_available; ! 166: ! 167: static inline void ! 168: update_times(void) ! 169: { ! 170: if (clock_monotonic_available) ! 171: update_times_gettime(); ! 172: else ! 173: update_times_plain(); ! 174: } ! 175: ! 176: static inline void ! 177: init_times(void) ! 178: { ! 179: struct timespec ts; ! 180: clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0); ! 181: if (!clock_monotonic_available) ! 182: log(L_WARN "Monotonic timer is missing"); ! 183: } ! 184: ! 185: ! 186: static void ! 187: tm_free(resource *r) ! 188: { ! 189: timer *t = (timer *) r; ! 190: ! 191: tm_stop(t); ! 192: } ! 193: ! 194: static void ! 195: tm_dump(resource *r) ! 196: { ! 197: timer *t = (timer *) r; ! 198: ! 199: debug("(code %p, data %p, ", t->hook, t->data); ! 200: if (t->randomize) ! 201: debug("rand %d, ", t->randomize); ! 202: if (t->recurrent) ! 203: debug("recur %d, ", t->recurrent); ! 204: if (t->expires) ! 205: debug("expires in %d sec)\n", t->expires - now); ! 206: else ! 207: debug("inactive)\n"); ! 208: } ! 209: ! 210: static struct resclass tm_class = { ! 211: "Timer", ! 212: sizeof(timer), ! 213: tm_free, ! 214: tm_dump, ! 215: NULL, ! 216: NULL ! 217: }; ! 218: ! 219: /** ! 220: * tm_new - create a timer ! 221: * @p: pool ! 222: * ! 223: * This function creates a new timer resource and returns ! 224: * a pointer to it. To use the timer, you need to fill in ! 225: * the structure fields and call tm_start() to start timing. ! 226: */ ! 227: timer * ! 228: tm_new(pool *p) ! 229: { ! 230: timer *t = ralloc(p, &tm_class); ! 231: return t; ! 232: } ! 233: ! 234: static inline void ! 235: tm_insert_near(timer *t) ! 236: { ! 237: node *n = HEAD(near_timers); ! 238: ! 239: while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires)) ! 240: n = n->next; ! 241: insert_node(&t->n, n->prev); ! 242: } ! 243: ! 244: /** ! 245: * tm_start - start a timer ! 246: * @t: timer ! 247: * @after: number of seconds the timer should be run after ! 248: * ! 249: * This function schedules the hook function of the timer to ! 250: * be called after @after seconds. If the timer has been already ! 251: * started, it's @expire time is replaced by the new value. ! 252: * ! 253: * You can have set the @randomize field of @t, the timeout ! 254: * will be increased by a random number of seconds chosen ! 255: * uniformly from range 0 .. @randomize. ! 256: * ! 257: * You can call tm_start() from the handler function of the timer ! 258: * to request another run of the timer. Also, you can set the @recurrent ! 259: * field to have the timer re-added automatically with the same timeout. ! 260: */ ! 261: void ! 262: tm_start(timer *t, unsigned after) ! 263: { ! 264: bird_clock_t when; ! 265: ! 266: if (t->randomize) ! 267: after += random() % (t->randomize + 1); ! 268: when = now + after; ! 269: if (t->expires == when) ! 270: return; ! 271: if (t->expires) ! 272: rem_node(&t->n); ! 273: t->expires = when; ! 274: if (after <= NEAR_TIMER_LIMIT) ! 275: tm_insert_near(t); ! 276: else ! 277: { ! 278: if (!first_far_timer || first_far_timer > when) ! 279: first_far_timer = when; ! 280: add_tail(&far_timers, &t->n); ! 281: } ! 282: } ! 283: ! 284: /** ! 285: * tm_stop - stop a timer ! 286: * @t: timer ! 287: * ! 288: * This function stops a timer. If the timer is already stopped, ! 289: * nothing happens. ! 290: */ ! 291: void ! 292: tm_stop(timer *t) ! 293: { ! 294: if (t->expires) ! 295: { ! 296: rem_node(&t->n); ! 297: t->expires = 0; ! 298: } ! 299: } ! 300: ! 301: static void ! 302: tm_dump_them(char *name, list *l) ! 303: { ! 304: node *n; ! 305: timer *t; ! 306: ! 307: debug("%s timers:\n", name); ! 308: WALK_LIST(n, *l) ! 309: { ! 310: t = SKIP_BACK(timer, n, n); ! 311: debug("%p ", t); ! 312: tm_dump(&t->r); ! 313: } ! 314: debug("\n"); ! 315: } ! 316: ! 317: void ! 318: tm_dump_all(void) ! 319: { ! 320: tm_dump_them("Near", &near_timers); ! 321: tm_dump_them("Far", &far_timers); ! 322: } ! 323: ! 324: static inline time_t ! 325: tm_first_shot(void) ! 326: { ! 327: time_t x = first_far_timer; ! 328: ! 329: if (!EMPTY_LIST(near_timers)) ! 330: { ! 331: timer *t = SKIP_BACK(timer, n, HEAD(near_timers)); ! 332: if (t->expires < x) ! 333: x = t->expires; ! 334: } ! 335: return x; ! 336: } ! 337: ! 338: void io_log_event(void *hook, void *data); ! 339: ! 340: static void ! 341: tm_shot(void) ! 342: { ! 343: timer *t; ! 344: node *n, *m; ! 345: ! 346: if (first_far_timer <= now) ! 347: { ! 348: bird_clock_t limit = now + NEAR_TIMER_LIMIT; ! 349: first_far_timer = TIME_INFINITY; ! 350: n = HEAD(far_timers); ! 351: while (m = n->next) ! 352: { ! 353: t = SKIP_BACK(timer, n, n); ! 354: if (t->expires <= limit) ! 355: { ! 356: rem_node(n); ! 357: tm_insert_near(t); ! 358: } ! 359: else if (t->expires < first_far_timer) ! 360: first_far_timer = t->expires; ! 361: n = m; ! 362: } ! 363: } ! 364: while ((n = HEAD(near_timers)) -> next) ! 365: { ! 366: int delay; ! 367: t = SKIP_BACK(timer, n, n); ! 368: if (t->expires > now) ! 369: break; ! 370: rem_node(n); ! 371: delay = t->expires - now; ! 372: t->expires = 0; ! 373: if (t->recurrent) ! 374: { ! 375: int i = t->recurrent - delay; ! 376: if (i < 0) ! 377: i = 0; ! 378: tm_start(t, i); ! 379: } ! 380: io_log_event(t->hook, t->data); ! 381: t->hook(t); ! 382: } ! 383: } ! 384: ! 385: /** ! 386: * tm_parse_datetime - parse a date and time ! 387: * @x: datetime string ! 388: * ! 389: * tm_parse_datetime() takes a textual representation of ! 390: * a date and time (dd-mm-yyyy hh:mm:ss) ! 391: * and converts it to the corresponding value of type &bird_clock_t. ! 392: */ ! 393: bird_clock_t ! 394: tm_parse_datetime(char *x) ! 395: { ! 396: struct tm tm; ! 397: int n; ! 398: time_t t; ! 399: ! 400: if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n]) ! 401: return tm_parse_date(x); ! 402: tm.tm_mon--; ! 403: tm.tm_year -= 1900; ! 404: t = mktime(&tm); ! 405: if (t == (time_t) -1) ! 406: return 0; ! 407: return t; ! 408: } ! 409: /** ! 410: * tm_parse_date - parse a date ! 411: * @x: date string ! 412: * ! 413: * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy) ! 414: * and converts it to the corresponding value of type &bird_clock_t. ! 415: */ ! 416: bird_clock_t ! 417: tm_parse_date(char *x) ! 418: { ! 419: struct tm tm; ! 420: int n; ! 421: time_t t; ! 422: ! 423: if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n]) ! 424: return 0; ! 425: tm.tm_mon--; ! 426: tm.tm_year -= 1900; ! 427: tm.tm_hour = tm.tm_min = tm.tm_sec = 0; ! 428: t = mktime(&tm); ! 429: if (t == (time_t) -1) ! 430: return 0; ! 431: return t; ! 432: } ! 433: ! 434: static void ! 435: tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta) ! 436: { ! 437: static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", ! 438: "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; ! 439: ! 440: if (delta < 20*3600) ! 441: bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min); ! 442: else if (delta < 360*86400) ! 443: bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday); ! 444: else ! 445: bsprintf(x, "%d", tm->tm_year+1900); ! 446: } ! 447: ! 448: #include "conf/conf.h" ! 449: ! 450: /** ! 451: * tm_format_datetime - convert date and time to textual representation ! 452: * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE ! 453: * @fmt_spec: specification of resulting textual representation of the time ! 454: * @t: time ! 455: * ! 456: * This function formats the given relative time value @t to a textual ! 457: * date/time representation (dd-mm-yyyy hh:mm:ss) in real time. ! 458: */ ! 459: void ! 460: tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) ! 461: { ! 462: const char *fmt_used; ! 463: struct tm *tm; ! 464: bird_clock_t delta = now - t; ! 465: t = now_real - delta; ! 466: tm = localtime(&t); ! 467: ! 468: if (fmt_spec->fmt1 == NULL) ! 469: return tm_format_reltime(x, tm, delta); ! 470: ! 471: if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit)) ! 472: fmt_used = fmt_spec->fmt1; ! 473: else ! 474: fmt_used = fmt_spec->fmt2; ! 475: ! 476: int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm); ! 477: if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE)) ! 478: strcpy(x, "<too-long>"); ! 479: } ! 480: ! 481: ! 482: /** ! 483: * DOC: Sockets ! 484: * ! 485: * Socket resources represent network connections. Their data structure (&socket) ! 486: * contains a lot of fields defining the exact type of the socket, the local and ! 487: * remote addresses and ports, pointers to socket buffers and finally pointers to ! 488: * hook functions to be called when new data have arrived to the receive buffer ! 489: * (@rx_hook), when the contents of the transmit buffer have been transmitted ! 490: * (@tx_hook) and when an error or connection close occurs (@err_hook). ! 491: * ! 492: * Freeing of sockets from inside socket hooks is perfectly safe. ! 493: */ ! 494: ! 495: #ifndef SOL_IP ! 496: #define SOL_IP IPPROTO_IP ! 497: #endif ! 498: ! 499: #ifndef SOL_IPV6 ! 500: #define SOL_IPV6 IPPROTO_IPV6 ! 501: #endif ! 502: ! 503: #ifndef SOL_ICMPV6 ! 504: #define SOL_ICMPV6 IPPROTO_ICMPV6 ! 505: #endif ! 506: ! 507: ! 508: /* ! 509: * Sockaddr helper functions ! 510: */ ! 511: ! 512: static inline int UNUSED sockaddr_length(int af) ! 513: { return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); } ! 514: ! 515: static inline void ! 516: sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port) ! 517: { ! 518: memset(sa, 0, sizeof(struct sockaddr_in)); ! 519: #ifdef HAVE_SIN_LEN ! 520: sa->sin_len = sizeof(struct sockaddr_in); ! 521: #endif ! 522: sa->sin_family = AF_INET; ! 523: sa->sin_port = htons(port); ! 524: sa->sin_addr = ipa_to_in4(a); ! 525: } ! 526: ! 527: static inline void ! 528: sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port) ! 529: { ! 530: memset(sa, 0, sizeof(struct sockaddr_in6)); ! 531: #ifdef SIN6_LEN ! 532: sa->sin6_len = sizeof(struct sockaddr_in6); ! 533: #endif ! 534: sa->sin6_family = AF_INET6; ! 535: sa->sin6_port = htons(port); ! 536: sa->sin6_flowinfo = 0; ! 537: sa->sin6_addr = ipa_to_in6(a); ! 538: ! 539: if (ifa && ipa_is_link_local(a)) ! 540: sa->sin6_scope_id = ifa->index; ! 541: } ! 542: ! 543: void ! 544: sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port) ! 545: { ! 546: if (af == AF_INET) ! 547: sockaddr_fill4((struct sockaddr_in *) sa, a, port); ! 548: else if (af == AF_INET6) ! 549: sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port); ! 550: else ! 551: bug("Unknown AF"); ! 552: } ! 553: ! 554: static inline void ! 555: sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port) ! 556: { ! 557: *port = ntohs(sa->sin_port); ! 558: *a = ipa_from_in4(sa->sin_addr); ! 559: } ! 560: ! 561: static inline void ! 562: sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port) ! 563: { ! 564: *port = ntohs(sa->sin6_port); ! 565: *a = ipa_from_in6(sa->sin6_addr); ! 566: ! 567: if (ifa && ipa_is_link_local(*a)) ! 568: *ifa = if_find_by_index(sa->sin6_scope_id); ! 569: } ! 570: ! 571: int ! 572: sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port) ! 573: { ! 574: if (sa->sa.sa_family != af) ! 575: goto fail; ! 576: ! 577: if (af == AF_INET) ! 578: sockaddr_read4((struct sockaddr_in *) sa, a, port); ! 579: else if (af == AF_INET6) ! 580: sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port); ! 581: else ! 582: goto fail; ! 583: ! 584: return 0; ! 585: ! 586: fail: ! 587: *a = IPA_NONE; ! 588: *port = 0; ! 589: return -1; ! 590: } ! 591: ! 592: ! 593: /* ! 594: * IPv6 multicast syscalls ! 595: */ ! 596: ! 597: /* Fortunately standardized in RFC 3493 */ ! 598: ! 599: #define INIT_MREQ6(maddr,ifa) \ ! 600: { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index } ! 601: ! 602: static inline int ! 603: sk_setup_multicast6(sock *s) ! 604: { ! 605: int index = s->iface->index; ! 606: int ttl = s->ttl; ! 607: int n = 0; ! 608: ! 609: if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) ! 610: ERR("IPV6_MULTICAST_IF"); ! 611: ! 612: if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0) ! 613: ERR("IPV6_MULTICAST_HOPS"); ! 614: ! 615: if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0) ! 616: ERR("IPV6_MULTICAST_LOOP"); ! 617: ! 618: return 0; ! 619: } ! 620: ! 621: static inline int ! 622: sk_join_group6(sock *s, ip_addr maddr) ! 623: { ! 624: struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); ! 625: ! 626: if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0) ! 627: ERR("IPV6_JOIN_GROUP"); ! 628: ! 629: return 0; ! 630: } ! 631: ! 632: static inline int ! 633: sk_leave_group6(sock *s, ip_addr maddr) ! 634: { ! 635: struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface); ! 636: ! 637: if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0) ! 638: ERR("IPV6_LEAVE_GROUP"); ! 639: ! 640: return 0; ! 641: } ! 642: ! 643: ! 644: /* ! 645: * IPv6 packet control messages ! 646: */ ! 647: ! 648: /* Also standardized, in RFC 3542 */ ! 649: ! 650: /* ! 651: * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg ! 652: * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we ! 653: * don't have IPV6_RECVPKTINFO we suppose the OS implements the older ! 654: * RFC and we use IPV6_PKTINFO. ! 655: */ ! 656: #ifndef IPV6_RECVPKTINFO ! 657: #define IPV6_RECVPKTINFO IPV6_PKTINFO ! 658: #endif ! 659: /* ! 660: * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT. ! 661: */ ! 662: #ifndef IPV6_RECVHOPLIMIT ! 663: #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT ! 664: #endif ! 665: ! 666: ! 667: #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo)) ! 668: #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int)) ! 669: ! 670: static inline int ! 671: sk_request_cmsg6_pktinfo(sock *s) ! 672: { ! 673: int y = 1; ! 674: ! 675: if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0) ! 676: ERR("IPV6_RECVPKTINFO"); ! 677: ! 678: return 0; ! 679: } ! 680: ! 681: static inline int ! 682: sk_request_cmsg6_ttl(sock *s) ! 683: { ! 684: int y = 1; ! 685: ! 686: if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0) ! 687: ERR("IPV6_RECVHOPLIMIT"); ! 688: ! 689: return 0; ! 690: } ! 691: ! 692: static inline void ! 693: sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm) ! 694: { ! 695: if (cm->cmsg_type == IPV6_PKTINFO) ! 696: { ! 697: struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm); ! 698: s->laddr = ipa_from_in6(pi->ipi6_addr); ! 699: s->lifindex = pi->ipi6_ifindex; ! 700: } ! 701: } ! 702: ! 703: static inline void ! 704: sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm) ! 705: { ! 706: if (cm->cmsg_type == IPV6_HOPLIMIT) ! 707: s->rcv_ttl = * (int *) CMSG_DATA(cm); ! 708: } ! 709: ! 710: static inline void ! 711: sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) ! 712: { ! 713: struct cmsghdr *cm; ! 714: struct in6_pktinfo *pi; ! 715: int controllen = 0; ! 716: ! 717: msg->msg_control = cbuf; ! 718: msg->msg_controllen = cbuflen; ! 719: ! 720: cm = CMSG_FIRSTHDR(msg); ! 721: cm->cmsg_level = SOL_IPV6; ! 722: cm->cmsg_type = IPV6_PKTINFO; ! 723: cm->cmsg_len = CMSG_LEN(sizeof(*pi)); ! 724: controllen += CMSG_SPACE(sizeof(*pi)); ! 725: ! 726: pi = (struct in6_pktinfo *) CMSG_DATA(cm); ! 727: pi->ipi6_ifindex = s->iface ? s->iface->index : 0; ! 728: pi->ipi6_addr = ipa_to_in6(s->saddr); ! 729: ! 730: msg->msg_controllen = controllen; ! 731: } ! 732: ! 733: ! 734: /* ! 735: * Miscellaneous socket syscalls ! 736: */ ! 737: ! 738: static inline int ! 739: sk_set_ttl4(sock *s, int ttl) ! 740: { ! 741: if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0) ! 742: ERR("IP_TTL"); ! 743: ! 744: return 0; ! 745: } ! 746: ! 747: static inline int ! 748: sk_set_ttl6(sock *s, int ttl) ! 749: { ! 750: if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0) ! 751: ERR("IPV6_UNICAST_HOPS"); ! 752: ! 753: return 0; ! 754: } ! 755: ! 756: static inline int ! 757: sk_set_tos4(sock *s, int tos) ! 758: { ! 759: if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0) ! 760: ERR("IP_TOS"); ! 761: ! 762: return 0; ! 763: } ! 764: ! 765: static inline int ! 766: sk_set_tos6(sock *s, int tos) ! 767: { ! 768: if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0) ! 769: ERR("IPV6_TCLASS"); ! 770: ! 771: return 0; ! 772: } ! 773: ! 774: static inline int ! 775: sk_set_high_port(sock *s UNUSED) ! 776: { ! 777: /* Port range setting is optional, ignore it if not supported */ ! 778: ! 779: #ifdef IP_PORTRANGE ! 780: if (sk_is_ipv4(s)) ! 781: { ! 782: int range = IP_PORTRANGE_HIGH; ! 783: if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0) ! 784: ERR("IP_PORTRANGE"); ! 785: } ! 786: #endif ! 787: ! 788: #ifdef IPV6_PORTRANGE ! 789: if (sk_is_ipv6(s)) ! 790: { ! 791: int range = IPV6_PORTRANGE_HIGH; ! 792: if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0) ! 793: ERR("IPV6_PORTRANGE"); ! 794: } ! 795: #endif ! 796: ! 797: return 0; ! 798: } ! 799: ! 800: static inline byte * ! 801: sk_skip_ip_header(byte *pkt, int *len) ! 802: { ! 803: if ((*len < 20) || ((*pkt & 0xf0) != 0x40)) ! 804: return NULL; ! 805: ! 806: int hlen = (*pkt & 0x0f) * 4; ! 807: if ((hlen < 20) || (hlen > *len)) ! 808: return NULL; ! 809: ! 810: *len -= hlen; ! 811: return pkt + hlen; ! 812: } ! 813: ! 814: byte * ! 815: sk_rx_buffer(sock *s, int *len) ! 816: { ! 817: if (sk_is_ipv4(s) && (s->type == SK_IP)) ! 818: return sk_skip_ip_header(s->rbuf, len); ! 819: else ! 820: return s->rbuf; ! 821: } ! 822: ! 823: ! 824: /* ! 825: * Public socket functions ! 826: */ ! 827: ! 828: /** ! 829: * sk_setup_multicast - enable multicast for given socket ! 830: * @s: socket ! 831: * ! 832: * Prepare transmission of multicast packets for given datagram socket. ! 833: * The socket must have defined @iface. ! 834: * ! 835: * Result: 0 for success, -1 for an error. ! 836: */ ! 837: ! 838: int ! 839: sk_setup_multicast(sock *s) ! 840: { ! 841: ASSERT(s->iface); ! 842: ! 843: if (sk_is_ipv4(s)) ! 844: return sk_setup_multicast4(s); ! 845: else ! 846: return sk_setup_multicast6(s); ! 847: } ! 848: ! 849: /** ! 850: * sk_join_group - join multicast group for given socket ! 851: * @s: socket ! 852: * @maddr: multicast address ! 853: * ! 854: * Join multicast group for given datagram socket and associated interface. ! 855: * The socket must have defined @iface. ! 856: * ! 857: * Result: 0 for success, -1 for an error. ! 858: */ ! 859: ! 860: int ! 861: sk_join_group(sock *s, ip_addr maddr) ! 862: { ! 863: if (sk_is_ipv4(s)) ! 864: return sk_join_group4(s, maddr); ! 865: else ! 866: return sk_join_group6(s, maddr); ! 867: } ! 868: ! 869: /** ! 870: * sk_leave_group - leave multicast group for given socket ! 871: * @s: socket ! 872: * @maddr: multicast address ! 873: * ! 874: * Leave multicast group for given datagram socket and associated interface. ! 875: * The socket must have defined @iface. ! 876: * ! 877: * Result: 0 for success, -1 for an error. ! 878: */ ! 879: ! 880: int ! 881: sk_leave_group(sock *s, ip_addr maddr) ! 882: { ! 883: if (sk_is_ipv4(s)) ! 884: return sk_leave_group4(s, maddr); ! 885: else ! 886: return sk_leave_group6(s, maddr); ! 887: } ! 888: ! 889: /** ! 890: * sk_setup_broadcast - enable broadcast for given socket ! 891: * @s: socket ! 892: * ! 893: * Allow reception and transmission of broadcast packets for given datagram ! 894: * socket. The socket must have defined @iface. For transmission, packets should ! 895: * be send to @brd address of @iface. ! 896: * ! 897: * Result: 0 for success, -1 for an error. ! 898: */ ! 899: ! 900: int ! 901: sk_setup_broadcast(sock *s) ! 902: { ! 903: int y = 1; ! 904: ! 905: if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0) ! 906: ERR("SO_BROADCAST"); ! 907: ! 908: return 0; ! 909: } ! 910: ! 911: /** ! 912: * sk_set_ttl - set transmit TTL for given socket ! 913: * @s: socket ! 914: * @ttl: TTL value ! 915: * ! 916: * Set TTL for already opened connections when TTL was not set before. Useful ! 917: * for accepted connections when different ones should have different TTL. ! 918: * ! 919: * Result: 0 for success, -1 for an error. ! 920: */ ! 921: ! 922: int ! 923: sk_set_ttl(sock *s, int ttl) ! 924: { ! 925: s->ttl = ttl; ! 926: ! 927: if (sk_is_ipv4(s)) ! 928: return sk_set_ttl4(s, ttl); ! 929: else ! 930: return sk_set_ttl6(s, ttl); ! 931: } ! 932: ! 933: /** ! 934: * sk_set_min_ttl - set minimal accepted TTL for given socket ! 935: * @s: socket ! 936: * @ttl: TTL value ! 937: * ! 938: * Set minimal accepted TTL for given socket. Can be used for TTL security. ! 939: * implementations. ! 940: * ! 941: * Result: 0 for success, -1 for an error. ! 942: */ ! 943: ! 944: int ! 945: sk_set_min_ttl(sock *s, int ttl) ! 946: { ! 947: if (sk_is_ipv4(s)) ! 948: return sk_set_min_ttl4(s, ttl); ! 949: else ! 950: return sk_set_min_ttl6(s, ttl); ! 951: } ! 952: ! 953: #if 0 ! 954: /** ! 955: * sk_set_md5_auth - add / remove MD5 security association for given socket ! 956: * @s: socket ! 957: * @local: IP address of local side ! 958: * @remote: IP address of remote side ! 959: * @ifa: Interface for link-local IP address ! 960: * @passwd: Password used for MD5 authentication ! 961: * @setkey: Update also system SA/SP database ! 962: * ! 963: * In TCP MD5 handling code in kernel, there is a set of security associations ! 964: * used for choosing password and other authentication parameters according to ! 965: * the local and remote address. This function is useful for listening socket, ! 966: * for active sockets it may be enough to set s->password field. ! 967: * ! 968: * When called with passwd != NULL, the new pair is added, ! 969: * When called with passwd == NULL, the existing pair is removed. ! 970: * ! 971: * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are ! 972: * stored in global SA/SP database (but the behavior also must be enabled on ! 973: * per-socket basis). In case of multiple sockets to the same neighbor, the ! 974: * socket-specific state must be configured for each socket while global state ! 975: * just once per src-dst pair. The @setkey argument controls whether the global ! 976: * state (SA/SP database) is also updated. ! 977: * ! 978: * Result: 0 for success, -1 for an error. ! 979: */ ! 980: ! 981: int ! 982: sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey) ! 983: { DUMMY; } ! 984: #endif ! 985: ! 986: /** ! 987: * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket ! 988: * @s: socket ! 989: * @offset: offset ! 990: * ! 991: * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the ! 992: * kernel will automatically fill it for outgoing packets and check it for ! 993: * incoming packets. Should not be used on ICMPv6 sockets, where the position is ! 994: * known to the kernel. ! 995: * ! 996: * Result: 0 for success, -1 for an error. ! 997: */ ! 998: ! 999: int ! 1000: sk_set_ipv6_checksum(sock *s, int offset) ! 1001: { ! 1002: if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) ! 1003: ERR("IPV6_CHECKSUM"); ! 1004: ! 1005: return 0; ! 1006: } ! 1007: ! 1008: int ! 1009: sk_set_icmp6_filter(sock *s, int p1, int p2) ! 1010: { ! 1011: /* a bit of lame interface, but it is here only for Radv */ ! 1012: struct icmp6_filter f; ! 1013: ! 1014: ICMP6_FILTER_SETBLOCKALL(&f); ! 1015: ICMP6_FILTER_SETPASS(p1, &f); ! 1016: ICMP6_FILTER_SETPASS(p2, &f); ! 1017: ! 1018: if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) ! 1019: ERR("ICMP6_FILTER"); ! 1020: ! 1021: return 0; ! 1022: } ! 1023: ! 1024: void ! 1025: sk_log_error(sock *s, const char *p) ! 1026: { ! 1027: log(L_ERR "%s: Socket error: %s%#m", p, s->err); ! 1028: } ! 1029: ! 1030: ! 1031: /* ! 1032: * Actual struct birdsock code ! 1033: */ ! 1034: ! 1035: static list sock_list; ! 1036: static struct birdsock *current_sock; ! 1037: static struct birdsock *stored_sock; ! 1038: ! 1039: static inline sock * ! 1040: sk_next(sock *s) ! 1041: { ! 1042: if (!s->n.next->next) ! 1043: return NULL; ! 1044: else ! 1045: return SKIP_BACK(sock, n, s->n.next); ! 1046: } ! 1047: ! 1048: static void ! 1049: sk_alloc_bufs(sock *s) ! 1050: { ! 1051: if (!s->rbuf && s->rbsize) ! 1052: s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize); ! 1053: s->rpos = s->rbuf; ! 1054: if (!s->tbuf && s->tbsize) ! 1055: s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize); ! 1056: s->tpos = s->ttx = s->tbuf; ! 1057: } ! 1058: ! 1059: static void ! 1060: sk_free_bufs(sock *s) ! 1061: { ! 1062: if (s->rbuf_alloc) ! 1063: { ! 1064: xfree(s->rbuf_alloc); ! 1065: s->rbuf = s->rbuf_alloc = NULL; ! 1066: } ! 1067: if (s->tbuf_alloc) ! 1068: { ! 1069: xfree(s->tbuf_alloc); ! 1070: s->tbuf = s->tbuf_alloc = NULL; ! 1071: } ! 1072: } ! 1073: ! 1074: static void ! 1075: sk_free(resource *r) ! 1076: { ! 1077: sock *s = (sock *) r; ! 1078: ! 1079: sk_free_bufs(s); ! 1080: if (s->fd >= 0) ! 1081: { ! 1082: close(s->fd); ! 1083: ! 1084: /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ ! 1085: if (s->flags & SKF_THREAD) ! 1086: return; ! 1087: ! 1088: if (s == current_sock) ! 1089: current_sock = sk_next(s); ! 1090: if (s == stored_sock) ! 1091: stored_sock = sk_next(s); ! 1092: rem_node(&s->n); ! 1093: } ! 1094: } ! 1095: ! 1096: void ! 1097: sk_set_rbsize(sock *s, uint val) ! 1098: { ! 1099: ASSERT(s->rbuf_alloc == s->rbuf); ! 1100: ! 1101: if (s->rbsize == val) ! 1102: return; ! 1103: ! 1104: s->rbsize = val; ! 1105: xfree(s->rbuf_alloc); ! 1106: s->rbuf_alloc = xmalloc(val); ! 1107: s->rpos = s->rbuf = s->rbuf_alloc; ! 1108: } ! 1109: ! 1110: void ! 1111: sk_set_tbsize(sock *s, uint val) ! 1112: { ! 1113: ASSERT(s->tbuf_alloc == s->tbuf); ! 1114: ! 1115: if (s->tbsize == val) ! 1116: return; ! 1117: ! 1118: byte *old_tbuf = s->tbuf; ! 1119: ! 1120: s->tbsize = val; ! 1121: s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); ! 1122: s->tpos = s->tbuf + (s->tpos - old_tbuf); ! 1123: s->ttx = s->tbuf + (s->ttx - old_tbuf); ! 1124: } ! 1125: ! 1126: void ! 1127: sk_set_tbuf(sock *s, void *tbuf) ! 1128: { ! 1129: s->tbuf = tbuf ?: s->tbuf_alloc; ! 1130: s->ttx = s->tpos = s->tbuf; ! 1131: } ! 1132: ! 1133: void ! 1134: sk_reallocate(sock *s) ! 1135: { ! 1136: sk_free_bufs(s); ! 1137: sk_alloc_bufs(s); ! 1138: } ! 1139: ! 1140: static void ! 1141: sk_dump(resource *r) ! 1142: { ! 1143: sock *s = (sock *) r; ! 1144: static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" }; ! 1145: ! 1146: debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n", ! 1147: sk_type_names[s->type], ! 1148: s->data, ! 1149: s->saddr, ! 1150: s->sport, ! 1151: s->daddr, ! 1152: s->dport, ! 1153: s->tos, ! 1154: s->ttl, ! 1155: s->iface ? s->iface->name : "none"); ! 1156: } ! 1157: ! 1158: static struct resclass sk_class = { ! 1159: "Socket", ! 1160: sizeof(sock), ! 1161: sk_free, ! 1162: sk_dump, ! 1163: NULL, ! 1164: NULL ! 1165: }; ! 1166: ! 1167: /** ! 1168: * sk_new - create a socket ! 1169: * @p: pool ! 1170: * ! 1171: * This function creates a new socket resource. If you want to use it, ! 1172: * you need to fill in all the required fields of the structure and ! 1173: * call sk_open() to do the actual opening of the socket. ! 1174: * ! 1175: * The real function name is sock_new(), sk_new() is a macro wrapper ! 1176: * to avoid collision with OpenSSL. ! 1177: */ ! 1178: sock * ! 1179: sock_new(pool *p) ! 1180: { ! 1181: sock *s = ralloc(p, &sk_class); ! 1182: s->pool = p; ! 1183: // s->saddr = s->daddr = IPA_NONE; ! 1184: s->tos = s->priority = s->ttl = -1; ! 1185: s->fd = -1; ! 1186: return s; ! 1187: } ! 1188: ! 1189: static int ! 1190: sk_setup(sock *s) ! 1191: { ! 1192: int y = 1; ! 1193: int fd = s->fd; ! 1194: ! 1195: if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) ! 1196: ERR("O_NONBLOCK"); ! 1197: ! 1198: if (!s->af) ! 1199: return 0; ! 1200: ! 1201: if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND)) ! 1202: s->flags |= SKF_PKTINFO; ! 1203: ! 1204: #ifdef CONFIG_USE_HDRINCL ! 1205: if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO)) ! 1206: { ! 1207: s->flags &= ~SKF_PKTINFO; ! 1208: s->flags |= SKF_HDRINCL; ! 1209: if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0) ! 1210: ERR("IP_HDRINCL"); ! 1211: } ! 1212: #endif ! 1213: ! 1214: if (s->iface) ! 1215: { ! 1216: #ifdef SO_BINDTODEVICE ! 1217: struct ifreq ifr = {}; ! 1218: strcpy(ifr.ifr_name, s->iface->name); ! 1219: if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) ! 1220: ERR("SO_BINDTODEVICE"); ! 1221: #endif ! 1222: ! 1223: #ifdef CONFIG_UNIX_DONTROUTE ! 1224: if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0) ! 1225: ERR("SO_DONTROUTE"); ! 1226: #endif ! 1227: } ! 1228: ! 1229: if (s->priority >= 0) ! 1230: if (sk_set_priority(s, s->priority) < 0) ! 1231: return -1; ! 1232: ! 1233: if (sk_is_ipv4(s)) ! 1234: { ! 1235: if (s->flags & SKF_LADDR_RX) ! 1236: if (sk_request_cmsg4_pktinfo(s) < 0) ! 1237: return -1; ! 1238: ! 1239: if (s->flags & SKF_TTL_RX) ! 1240: if (sk_request_cmsg4_ttl(s) < 0) ! 1241: return -1; ! 1242: ! 1243: if ((s->type == SK_UDP) || (s->type == SK_IP)) ! 1244: if (sk_disable_mtu_disc4(s) < 0) ! 1245: return -1; ! 1246: ! 1247: if (s->ttl >= 0) ! 1248: if (sk_set_ttl4(s, s->ttl) < 0) ! 1249: return -1; ! 1250: ! 1251: if (s->tos >= 0) ! 1252: if (sk_set_tos4(s, s->tos) < 0) ! 1253: return -1; ! 1254: } ! 1255: ! 1256: if (sk_is_ipv6(s)) ! 1257: { ! 1258: if (s->flags & SKF_V6ONLY) ! 1259: if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0) ! 1260: ERR("IPV6_V6ONLY"); ! 1261: ! 1262: if (s->flags & SKF_LADDR_RX) ! 1263: if (sk_request_cmsg6_pktinfo(s) < 0) ! 1264: return -1; ! 1265: ! 1266: if (s->flags & SKF_TTL_RX) ! 1267: if (sk_request_cmsg6_ttl(s) < 0) ! 1268: return -1; ! 1269: ! 1270: if ((s->type == SK_UDP) || (s->type == SK_IP)) ! 1271: if (sk_disable_mtu_disc6(s) < 0) ! 1272: return -1; ! 1273: ! 1274: if (s->ttl >= 0) ! 1275: if (sk_set_ttl6(s, s->ttl) < 0) ! 1276: return -1; ! 1277: ! 1278: if (s->tos >= 0) ! 1279: if (sk_set_tos6(s, s->tos) < 0) ! 1280: return -1; ! 1281: } ! 1282: ! 1283: return 0; ! 1284: } ! 1285: ! 1286: static void ! 1287: sk_insert(sock *s) ! 1288: { ! 1289: add_tail(&sock_list, &s->n); ! 1290: } ! 1291: ! 1292: static void ! 1293: sk_tcp_connected(sock *s) ! 1294: { ! 1295: sockaddr sa; ! 1296: int sa_len = sizeof(sa); ! 1297: ! 1298: if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) || ! 1299: (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0)) ! 1300: log(L_WARN "SOCK: Cannot get local IP address for TCP>"); ! 1301: ! 1302: s->type = SK_TCP; ! 1303: sk_alloc_bufs(s); ! 1304: s->tx_hook(s); ! 1305: } ! 1306: ! 1307: static int ! 1308: sk_passive_connected(sock *s, int type) ! 1309: { ! 1310: sockaddr loc_sa, rem_sa; ! 1311: int loc_sa_len = sizeof(loc_sa); ! 1312: int rem_sa_len = sizeof(rem_sa); ! 1313: ! 1314: int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len); ! 1315: if (fd < 0) ! 1316: { ! 1317: if ((errno != EINTR) && (errno != EAGAIN)) ! 1318: s->err_hook(s, errno); ! 1319: return 0; ! 1320: } ! 1321: ! 1322: sock *t = sk_new(s->pool); ! 1323: t->type = type; ! 1324: t->fd = fd; ! 1325: t->af = s->af; ! 1326: t->ttl = s->ttl; ! 1327: t->tos = s->tos; ! 1328: t->rbsize = s->rbsize; ! 1329: t->tbsize = s->tbsize; ! 1330: ! 1331: if (type == SK_TCP) ! 1332: { ! 1333: if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) || ! 1334: (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0)) ! 1335: log(L_WARN "SOCK: Cannot get local IP address for TCP<"); ! 1336: ! 1337: if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0) ! 1338: log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); ! 1339: } ! 1340: ! 1341: if (sk_setup(t) < 0) ! 1342: { ! 1343: /* FIXME: Call err_hook instead ? */ ! 1344: log(L_ERR "SOCK: Incoming connection: %s%#m", t->err); ! 1345: ! 1346: /* FIXME: handle it better in rfree() */ ! 1347: close(t->fd); ! 1348: t->fd = -1; ! 1349: rfree(t); ! 1350: return 1; ! 1351: } ! 1352: ! 1353: sk_insert(t); ! 1354: sk_alloc_bufs(t); ! 1355: s->rx_hook(t, 0); ! 1356: return 1; ! 1357: } ! 1358: ! 1359: /** ! 1360: * sk_open - open a socket ! 1361: * @s: socket ! 1362: * ! 1363: * This function takes a socket resource created by sk_new() and ! 1364: * initialized by the user and binds a corresponding network connection ! 1365: * to it. ! 1366: * ! 1367: * Result: 0 for success, -1 for an error. ! 1368: */ ! 1369: int ! 1370: sk_open(sock *s) ! 1371: { ! 1372: int af = BIRD_AF; ! 1373: int fd = -1; ! 1374: int do_bind = 0; ! 1375: int bind_port = 0; ! 1376: ip_addr bind_addr = IPA_NONE; ! 1377: sockaddr sa; ! 1378: ! 1379: switch (s->type) ! 1380: { ! 1381: case SK_TCP_ACTIVE: ! 1382: s->ttx = ""; /* Force s->ttx != s->tpos */ ! 1383: /* Fall thru */ ! 1384: case SK_TCP_PASSIVE: ! 1385: fd = socket(af, SOCK_STREAM, IPPROTO_TCP); ! 1386: bind_port = s->sport; ! 1387: bind_addr = s->saddr; ! 1388: do_bind = bind_port || ipa_nonzero(bind_addr); ! 1389: break; ! 1390: ! 1391: case SK_UDP: ! 1392: fd = socket(af, SOCK_DGRAM, IPPROTO_UDP); ! 1393: bind_port = s->sport; ! 1394: bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; ! 1395: do_bind = 1; ! 1396: break; ! 1397: ! 1398: case SK_IP: ! 1399: fd = socket(af, SOCK_RAW, s->dport); ! 1400: bind_port = 0; ! 1401: bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; ! 1402: do_bind = ipa_nonzero(bind_addr); ! 1403: break; ! 1404: ! 1405: case SK_MAGIC: ! 1406: af = 0; ! 1407: fd = s->fd; ! 1408: break; ! 1409: ! 1410: default: ! 1411: bug("sk_open() called for invalid sock type %d", s->type); ! 1412: } ! 1413: ! 1414: if (fd < 0) ! 1415: ERR("socket"); ! 1416: ! 1417: s->af = af; ! 1418: s->fd = fd; ! 1419: ! 1420: if (sk_setup(s) < 0) ! 1421: goto err; ! 1422: ! 1423: if (do_bind) ! 1424: { ! 1425: if (bind_port) ! 1426: { ! 1427: int y = 1; ! 1428: ! 1429: if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0) ! 1430: ERR2("SO_REUSEADDR"); ! 1431: ! 1432: #ifdef CONFIG_NO_IFACE_BIND ! 1433: /* Workaround missing ability to bind to an iface */ ! 1434: if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr)) ! 1435: { ! 1436: if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0) ! 1437: ERR2("SO_REUSEPORT"); ! 1438: } ! 1439: #endif ! 1440: } ! 1441: else ! 1442: if (s->flags & SKF_HIGH_PORT) ! 1443: if (sk_set_high_port(s) < 0) ! 1444: log(L_WARN "Socket error: %s%#m", s->err); ! 1445: ! 1446: sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port); ! 1447: if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) ! 1448: ERR2("bind"); ! 1449: } ! 1450: ! 1451: if (s->password) ! 1452: if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0) ! 1453: goto err; ! 1454: ! 1455: switch (s->type) ! 1456: { ! 1457: case SK_TCP_ACTIVE: ! 1458: sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport); ! 1459: if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0) ! 1460: sk_tcp_connected(s); ! 1461: else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS && ! 1462: errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH) ! 1463: ERR2("connect"); ! 1464: break; ! 1465: ! 1466: case SK_TCP_PASSIVE: ! 1467: if (listen(fd, 8) < 0) ! 1468: ERR2("listen"); ! 1469: break; ! 1470: ! 1471: case SK_MAGIC: ! 1472: break; ! 1473: ! 1474: default: ! 1475: sk_alloc_bufs(s); ! 1476: } ! 1477: ! 1478: if (!(s->flags & SKF_THREAD)) ! 1479: sk_insert(s); ! 1480: return 0; ! 1481: ! 1482: err: ! 1483: close(fd); ! 1484: s->fd = -1; ! 1485: return -1; ! 1486: } ! 1487: ! 1488: int ! 1489: sk_open_unix(sock *s, char *name) ! 1490: { ! 1491: struct sockaddr_un sa; ! 1492: int fd; ! 1493: ! 1494: /* We are sloppy during error (leak fd and not set s->err), but we die anyway */ ! 1495: ! 1496: fd = socket(AF_UNIX, SOCK_STREAM, 0); ! 1497: if (fd < 0) ! 1498: return -1; ! 1499: ! 1500: if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) ! 1501: return -1; ! 1502: ! 1503: /* Path length checked in test_old_bird() */ ! 1504: sa.sun_family = AF_UNIX; ! 1505: strcpy(sa.sun_path, name); ! 1506: ! 1507: if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) ! 1508: return -1; ! 1509: ! 1510: if (listen(fd, 8) < 0) ! 1511: return -1; ! 1512: ! 1513: s->fd = fd; ! 1514: sk_insert(s); ! 1515: return 0; ! 1516: } ! 1517: ! 1518: ! 1519: #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \ ! 1520: CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL) ! 1521: #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO) ! 1522: ! 1523: static void ! 1524: sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) ! 1525: { ! 1526: if (sk_is_ipv4(s)) ! 1527: sk_prepare_cmsgs4(s, msg, cbuf, cbuflen); ! 1528: else ! 1529: sk_prepare_cmsgs6(s, msg, cbuf, cbuflen); ! 1530: } ! 1531: ! 1532: static void ! 1533: sk_process_cmsgs(sock *s, struct msghdr *msg) ! 1534: { ! 1535: struct cmsghdr *cm; ! 1536: ! 1537: s->laddr = IPA_NONE; ! 1538: s->lifindex = 0; ! 1539: s->rcv_ttl = -1; ! 1540: ! 1541: for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) ! 1542: { ! 1543: if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s)) ! 1544: { ! 1545: sk_process_cmsg4_pktinfo(s, cm); ! 1546: sk_process_cmsg4_ttl(s, cm); ! 1547: } ! 1548: ! 1549: if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s)) ! 1550: { ! 1551: sk_process_cmsg6_pktinfo(s, cm); ! 1552: sk_process_cmsg6_ttl(s, cm); ! 1553: } ! 1554: } ! 1555: } ! 1556: ! 1557: ! 1558: static inline int ! 1559: sk_sendmsg(sock *s) ! 1560: { ! 1561: struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; ! 1562: byte cmsg_buf[CMSG_TX_SPACE]; ! 1563: sockaddr dst; ! 1564: ! 1565: sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport); ! 1566: ! 1567: struct msghdr msg = { ! 1568: .msg_name = &dst.sa, ! 1569: .msg_namelen = SA_LEN(dst), ! 1570: .msg_iov = &iov, ! 1571: .msg_iovlen = 1 ! 1572: }; ! 1573: ! 1574: #ifdef CONFIG_USE_HDRINCL ! 1575: byte hdr[20]; ! 1576: struct iovec iov2[2] = { {hdr, 20}, iov }; ! 1577: ! 1578: if (s->flags & SKF_HDRINCL) ! 1579: { ! 1580: sk_prepare_ip_header(s, hdr, iov.iov_len); ! 1581: msg.msg_iov = iov2; ! 1582: msg.msg_iovlen = 2; ! 1583: } ! 1584: #endif ! 1585: ! 1586: if (s->flags & SKF_PKTINFO) ! 1587: sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); ! 1588: ! 1589: return sendmsg(s->fd, &msg, 0); ! 1590: } ! 1591: ! 1592: static inline int ! 1593: sk_recvmsg(sock *s) ! 1594: { ! 1595: struct iovec iov = {s->rbuf, s->rbsize}; ! 1596: byte cmsg_buf[CMSG_RX_SPACE]; ! 1597: sockaddr src; ! 1598: ! 1599: struct msghdr msg = { ! 1600: .msg_name = &src.sa, ! 1601: .msg_namelen = sizeof(src), // XXXX ?? ! 1602: .msg_iov = &iov, ! 1603: .msg_iovlen = 1, ! 1604: .msg_control = cmsg_buf, ! 1605: .msg_controllen = sizeof(cmsg_buf), ! 1606: .msg_flags = 0 ! 1607: }; ! 1608: ! 1609: int rv = recvmsg(s->fd, &msg, 0); ! 1610: if (rv < 0) ! 1611: return rv; ! 1612: ! 1613: //ifdef IPV4 ! 1614: // if (cf_type == SK_IP) ! 1615: // rv = ipv4_skip_header(pbuf, rv); ! 1616: //endif ! 1617: ! 1618: sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport); ! 1619: sk_process_cmsgs(s, &msg); ! 1620: ! 1621: if (msg.msg_flags & MSG_TRUNC) ! 1622: s->flags |= SKF_TRUNCATED; ! 1623: else ! 1624: s->flags &= ~SKF_TRUNCATED; ! 1625: ! 1626: return rv; ! 1627: } ! 1628: ! 1629: ! 1630: static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; } ! 1631: ! 1632: static int ! 1633: sk_maybe_write(sock *s) ! 1634: { ! 1635: int e; ! 1636: ! 1637: switch (s->type) ! 1638: { ! 1639: case SK_TCP: ! 1640: case SK_MAGIC: ! 1641: case SK_UNIX: ! 1642: while (s->ttx != s->tpos) ! 1643: { ! 1644: e = write(s->fd, s->ttx, s->tpos - s->ttx); ! 1645: ! 1646: if (e < 0) ! 1647: { ! 1648: if (errno != EINTR && errno != EAGAIN) ! 1649: { ! 1650: reset_tx_buffer(s); ! 1651: /* EPIPE is just a connection close notification during TX */ ! 1652: s->err_hook(s, (errno != EPIPE) ? errno : 0); ! 1653: return -1; ! 1654: } ! 1655: return 0; ! 1656: } ! 1657: s->ttx += e; ! 1658: } ! 1659: reset_tx_buffer(s); ! 1660: return 1; ! 1661: ! 1662: case SK_UDP: ! 1663: case SK_IP: ! 1664: { ! 1665: if (s->tbuf == s->tpos) ! 1666: return 1; ! 1667: ! 1668: e = sk_sendmsg(s); ! 1669: ! 1670: if (e < 0) ! 1671: { ! 1672: if (errno != EINTR && errno != EAGAIN) ! 1673: { ! 1674: reset_tx_buffer(s); ! 1675: s->err_hook(s, errno); ! 1676: return -1; ! 1677: } ! 1678: ! 1679: if (!s->tx_hook) ! 1680: reset_tx_buffer(s); ! 1681: return 0; ! 1682: } ! 1683: reset_tx_buffer(s); ! 1684: return 1; ! 1685: } ! 1686: default: ! 1687: bug("sk_maybe_write: unknown socket type %d", s->type); ! 1688: } ! 1689: } ! 1690: ! 1691: int ! 1692: sk_rx_ready(sock *s) ! 1693: { ! 1694: int rv; ! 1695: struct pollfd pfd = { .fd = s->fd }; ! 1696: pfd.events |= POLLIN; ! 1697: ! 1698: redo: ! 1699: rv = poll(&pfd, 1, 0); ! 1700: ! 1701: if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) ! 1702: goto redo; ! 1703: ! 1704: return rv; ! 1705: } ! 1706: ! 1707: /** ! 1708: * sk_send - send data to a socket ! 1709: * @s: socket ! 1710: * @len: number of bytes to send ! 1711: * ! 1712: * This function sends @len bytes of data prepared in the ! 1713: * transmit buffer of the socket @s to the network connection. ! 1714: * If the packet can be sent immediately, it does so and returns ! 1715: * 1, else it queues the packet for later processing, returns 0 ! 1716: * and calls the @tx_hook of the socket when the tranmission ! 1717: * takes place. ! 1718: */ ! 1719: int ! 1720: sk_send(sock *s, unsigned len) ! 1721: { ! 1722: s->ttx = s->tbuf; ! 1723: s->tpos = s->tbuf + len; ! 1724: return sk_maybe_write(s); ! 1725: } ! 1726: ! 1727: /** ! 1728: * sk_send_to - send data to a specific destination ! 1729: * @s: socket ! 1730: * @len: number of bytes to send ! 1731: * @addr: IP address to send the packet to ! 1732: * @port: port to send the packet to ! 1733: * ! 1734: * This is a sk_send() replacement for connection-less packet sockets ! 1735: * which allows destination of the packet to be chosen dynamically. ! 1736: * Raw IP sockets should use 0 for @port. ! 1737: */ ! 1738: int ! 1739: sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) ! 1740: { ! 1741: s->daddr = addr; ! 1742: if (port) ! 1743: s->dport = port; ! 1744: ! 1745: s->ttx = s->tbuf; ! 1746: s->tpos = s->tbuf + len; ! 1747: return sk_maybe_write(s); ! 1748: } ! 1749: ! 1750: /* ! 1751: int ! 1752: sk_send_full(sock *s, unsigned len, struct iface *ifa, ! 1753: ip_addr saddr, ip_addr daddr, unsigned dport) ! 1754: { ! 1755: s->iface = ifa; ! 1756: s->saddr = saddr; ! 1757: s->daddr = daddr; ! 1758: s->dport = dport; ! 1759: s->ttx = s->tbuf; ! 1760: s->tpos = s->tbuf + len; ! 1761: return sk_maybe_write(s); ! 1762: } ! 1763: */ ! 1764: ! 1765: /* sk_read() and sk_write() are called from BFD's event loop */ ! 1766: ! 1767: int ! 1768: sk_read(sock *s, int revents) ! 1769: { ! 1770: switch (s->type) ! 1771: { ! 1772: case SK_TCP_PASSIVE: ! 1773: return sk_passive_connected(s, SK_TCP); ! 1774: ! 1775: case SK_UNIX_PASSIVE: ! 1776: return sk_passive_connected(s, SK_UNIX); ! 1777: ! 1778: case SK_TCP: ! 1779: case SK_UNIX: ! 1780: { ! 1781: int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos); ! 1782: ! 1783: if (c < 0) ! 1784: { ! 1785: if (errno != EINTR && errno != EAGAIN) ! 1786: s->err_hook(s, errno); ! 1787: else if (errno == EAGAIN && !(revents & POLLIN)) ! 1788: { ! 1789: log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents); ! 1790: s->err_hook(s, 0); ! 1791: } ! 1792: } ! 1793: else if (!c) ! 1794: s->err_hook(s, 0); ! 1795: else ! 1796: { ! 1797: s->rpos += c; ! 1798: if (s->rx_hook(s, s->rpos - s->rbuf)) ! 1799: { ! 1800: /* We need to be careful since the socket could have been deleted by the hook */ ! 1801: if (current_sock == s) ! 1802: s->rpos = s->rbuf; ! 1803: } ! 1804: return 1; ! 1805: } ! 1806: return 0; ! 1807: } ! 1808: ! 1809: case SK_MAGIC: ! 1810: return s->rx_hook(s, 0); ! 1811: ! 1812: default: ! 1813: { ! 1814: int e = sk_recvmsg(s); ! 1815: ! 1816: if (e < 0) ! 1817: { ! 1818: if (errno != EINTR && errno != EAGAIN) ! 1819: s->err_hook(s, errno); ! 1820: return 0; ! 1821: } ! 1822: ! 1823: s->rpos = s->rbuf + e; ! 1824: s->rx_hook(s, e); ! 1825: return 1; ! 1826: } ! 1827: } ! 1828: } ! 1829: ! 1830: int ! 1831: sk_write(sock *s) ! 1832: { ! 1833: switch (s->type) ! 1834: { ! 1835: case SK_TCP_ACTIVE: ! 1836: { ! 1837: sockaddr sa; ! 1838: sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport); ! 1839: ! 1840: if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN) ! 1841: sk_tcp_connected(s); ! 1842: else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS) ! 1843: s->err_hook(s, errno); ! 1844: return 0; ! 1845: } ! 1846: ! 1847: default: ! 1848: if (s->ttx != s->tpos && sk_maybe_write(s) > 0) ! 1849: { ! 1850: if (s->tx_hook) ! 1851: s->tx_hook(s); ! 1852: return 1; ! 1853: } ! 1854: return 0; ! 1855: } ! 1856: } ! 1857: ! 1858: void ! 1859: sk_err(sock *s, int revents) ! 1860: { ! 1861: int se = 0, sse = sizeof(se); ! 1862: if ((s->type != SK_MAGIC) && (revents & POLLERR)) ! 1863: if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0) ! 1864: { ! 1865: log(L_ERR "IO: Socket error: SO_ERROR: %m"); ! 1866: se = 0; ! 1867: } ! 1868: ! 1869: s->err_hook(s, se); ! 1870: } ! 1871: ! 1872: void ! 1873: sk_dump_all(void) ! 1874: { ! 1875: node *n; ! 1876: sock *s; ! 1877: ! 1878: debug("Open sockets:\n"); ! 1879: WALK_LIST(n, sock_list) ! 1880: { ! 1881: s = SKIP_BACK(sock, n, n); ! 1882: debug("%p ", s); ! 1883: sk_dump(&s->r); ! 1884: } ! 1885: debug("\n"); ! 1886: } ! 1887: ! 1888: ! 1889: /* ! 1890: * Internal event log and watchdog ! 1891: */ ! 1892: ! 1893: #define EVENT_LOG_LENGTH 32 ! 1894: ! 1895: struct event_log_entry ! 1896: { ! 1897: void *hook; ! 1898: void *data; ! 1899: btime timestamp; ! 1900: btime duration; ! 1901: }; ! 1902: ! 1903: static struct event_log_entry event_log[EVENT_LOG_LENGTH]; ! 1904: static struct event_log_entry *event_open; ! 1905: static int event_log_pos, event_log_num, watchdog_active; ! 1906: static btime last_time; ! 1907: static btime loop_time; ! 1908: ! 1909: static void ! 1910: io_update_time(void) ! 1911: { ! 1912: struct timespec ts; ! 1913: int rv; ! 1914: ! 1915: if (!clock_monotonic_available) ! 1916: return; ! 1917: ! 1918: /* ! 1919: * This is third time-tracking procedure (after update_times() above and ! 1920: * times_update() in BFD), dedicated to internal event log and latency ! 1921: * tracking. Hopefully, we consolidate these sometimes. ! 1922: */ ! 1923: ! 1924: rv = clock_gettime(CLOCK_MONOTONIC, &ts); ! 1925: if (rv < 0) ! 1926: die("clock_gettime: %m"); ! 1927: ! 1928: last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000); ! 1929: ! 1930: if (event_open) ! 1931: { ! 1932: event_open->duration = last_time - event_open->timestamp; ! 1933: ! 1934: if (event_open->duration > config->latency_limit) ! 1935: log(L_WARN "Event 0x%p 0x%p took %d ms", ! 1936: event_open->hook, event_open->data, (int) (event_open->duration TO_MS)); ! 1937: ! 1938: event_open = NULL; ! 1939: } ! 1940: } ! 1941: ! 1942: /** ! 1943: * io_log_event - mark approaching event into event log ! 1944: * @hook: event hook address ! 1945: * @data: event data address ! 1946: * ! 1947: * Store info (hook, data, timestamp) about the following internal event into ! 1948: * a circular event log (@event_log). When latency tracking is enabled, the log ! 1949: * entry is kept open (in @event_open) so the duration can be filled later. ! 1950: */ ! 1951: void ! 1952: io_log_event(void *hook, void *data) ! 1953: { ! 1954: if (config->latency_debug) ! 1955: io_update_time(); ! 1956: ! 1957: struct event_log_entry *en = event_log + event_log_pos; ! 1958: ! 1959: en->hook = hook; ! 1960: en->data = data; ! 1961: en->timestamp = last_time; ! 1962: en->duration = 0; ! 1963: ! 1964: event_log_num++; ! 1965: event_log_pos++; ! 1966: event_log_pos %= EVENT_LOG_LENGTH; ! 1967: ! 1968: event_open = config->latency_debug ? en : NULL; ! 1969: } ! 1970: ! 1971: static inline void ! 1972: io_close_event(void) ! 1973: { ! 1974: if (event_open) ! 1975: io_update_time(); ! 1976: } ! 1977: ! 1978: void ! 1979: io_log_dump(void) ! 1980: { ! 1981: int i; ! 1982: ! 1983: log(L_DEBUG "Event log:"); ! 1984: for (i = 0; i < EVENT_LOG_LENGTH; i++) ! 1985: { ! 1986: struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH; ! 1987: if (en->hook) ! 1988: log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data, ! 1989: (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS)); ! 1990: } ! 1991: } ! 1992: ! 1993: void ! 1994: watchdog_sigalrm(int sig UNUSED) ! 1995: { ! 1996: /* Update last_time and duration, but skip latency check */ ! 1997: config->latency_limit = 0xffffffff; ! 1998: io_update_time(); ! 1999: ! 2000: /* We want core dump */ ! 2001: abort(); ! 2002: } ! 2003: ! 2004: static inline void ! 2005: watchdog_start1(void) ! 2006: { ! 2007: io_update_time(); ! 2008: ! 2009: loop_time = last_time; ! 2010: } ! 2011: ! 2012: static inline void ! 2013: watchdog_start(void) ! 2014: { ! 2015: io_update_time(); ! 2016: ! 2017: loop_time = last_time; ! 2018: event_log_num = 0; ! 2019: ! 2020: if (config->watchdog_timeout) ! 2021: { ! 2022: alarm(config->watchdog_timeout); ! 2023: watchdog_active = 1; ! 2024: } ! 2025: } ! 2026: ! 2027: static inline void ! 2028: watchdog_stop(void) ! 2029: { ! 2030: io_update_time(); ! 2031: ! 2032: if (watchdog_active) ! 2033: { ! 2034: alarm(0); ! 2035: watchdog_active = 0; ! 2036: } ! 2037: ! 2038: btime duration = last_time - loop_time; ! 2039: if (duration > config->watchdog_warning) ! 2040: log(L_WARN "I/O loop cycle took %d ms for %d events", ! 2041: (int) (duration TO_MS), event_log_num); ! 2042: } ! 2043: ! 2044: ! 2045: /* ! 2046: * Main I/O Loop ! 2047: */ ! 2048: ! 2049: volatile int async_config_flag; /* Asynchronous reconfiguration/dump scheduled */ ! 2050: volatile int async_dump_flag; ! 2051: volatile int async_shutdown_flag; ! 2052: ! 2053: void ! 2054: io_init(void) ! 2055: { ! 2056: init_list(&near_timers); ! 2057: init_list(&far_timers); ! 2058: init_list(&sock_list); ! 2059: init_list(&global_event_list); ! 2060: krt_io_init(); ! 2061: init_times(); ! 2062: update_times(); ! 2063: boot_time = now; ! 2064: srandom((int) now_real); ! 2065: } ! 2066: ! 2067: static int short_loops = 0; ! 2068: #define SHORT_LOOP_MAX 10 ! 2069: ! 2070: void ! 2071: io_loop(void) ! 2072: { ! 2073: int poll_tout; ! 2074: time_t tout; ! 2075: int nfds, events, pout; ! 2076: sock *s; ! 2077: node *n; ! 2078: int fdmax = 256; ! 2079: struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd)); ! 2080: ! 2081: watchdog_start1(); ! 2082: for(;;) ! 2083: { ! 2084: events = ev_run_list(&global_event_list); ! 2085: timers: ! 2086: update_times(); ! 2087: tout = tm_first_shot(); ! 2088: if (tout <= now) ! 2089: { ! 2090: tm_shot(); ! 2091: goto timers; ! 2092: } ! 2093: poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */ ! 2094: ! 2095: io_close_event(); ! 2096: ! 2097: nfds = 0; ! 2098: WALK_LIST(n, sock_list) ! 2099: { ! 2100: pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */ ! 2101: s = SKIP_BACK(sock, n, n); ! 2102: if (s->rx_hook) ! 2103: { ! 2104: pfd[nfds].fd = s->fd; ! 2105: pfd[nfds].events |= POLLIN; ! 2106: } ! 2107: if (s->tx_hook && s->ttx != s->tpos) ! 2108: { ! 2109: pfd[nfds].fd = s->fd; ! 2110: pfd[nfds].events |= POLLOUT; ! 2111: } ! 2112: if (pfd[nfds].fd != -1) ! 2113: { ! 2114: s->index = nfds; ! 2115: nfds++; ! 2116: } ! 2117: else ! 2118: s->index = -1; ! 2119: ! 2120: if (nfds >= fdmax) ! 2121: { ! 2122: fdmax *= 2; ! 2123: pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd)); ! 2124: } ! 2125: } ! 2126: ! 2127: /* ! 2128: * Yes, this is racy. But even if the signal comes before this test ! 2129: * and entering poll(), it gets caught on the next timer tick. ! 2130: */ ! 2131: ! 2132: if (async_config_flag) ! 2133: { ! 2134: io_log_event(async_config, NULL); ! 2135: async_config(); ! 2136: async_config_flag = 0; ! 2137: continue; ! 2138: } ! 2139: if (async_dump_flag) ! 2140: { ! 2141: io_log_event(async_dump, NULL); ! 2142: async_dump(); ! 2143: async_dump_flag = 0; ! 2144: continue; ! 2145: } ! 2146: if (async_shutdown_flag) ! 2147: { ! 2148: io_log_event(async_shutdown, NULL); ! 2149: async_shutdown(); ! 2150: async_shutdown_flag = 0; ! 2151: continue; ! 2152: } ! 2153: ! 2154: /* And finally enter poll() to find active sockets */ ! 2155: watchdog_stop(); ! 2156: pout = poll(pfd, nfds, poll_tout); ! 2157: watchdog_start(); ! 2158: ! 2159: if (pout < 0) ! 2160: { ! 2161: if (errno == EINTR || errno == EAGAIN) ! 2162: continue; ! 2163: die("poll: %m"); ! 2164: } ! 2165: if (pout) ! 2166: { ! 2167: /* guaranteed to be non-empty */ ! 2168: current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); ! 2169: ! 2170: while (current_sock) ! 2171: { ! 2172: sock *s = current_sock; ! 2173: if (s->index == -1) ! 2174: { ! 2175: current_sock = sk_next(s); ! 2176: goto next; ! 2177: } ! 2178: ! 2179: int e; ! 2180: int steps; ! 2181: ! 2182: steps = MAX_STEPS; ! 2183: if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook) ! 2184: do ! 2185: { ! 2186: steps--; ! 2187: io_log_event(s->rx_hook, s->data); ! 2188: e = sk_read(s, pfd[s->index].revents); ! 2189: if (s != current_sock) ! 2190: goto next; ! 2191: } ! 2192: while (e && s->rx_hook && steps); ! 2193: ! 2194: steps = MAX_STEPS; ! 2195: if (pfd[s->index].revents & POLLOUT) ! 2196: do ! 2197: { ! 2198: steps--; ! 2199: io_log_event(s->tx_hook, s->data); ! 2200: e = sk_write(s); ! 2201: if (s != current_sock) ! 2202: goto next; ! 2203: } ! 2204: while (e && steps); ! 2205: ! 2206: current_sock = sk_next(s); ! 2207: next: ; ! 2208: } ! 2209: ! 2210: short_loops++; ! 2211: if (events && (short_loops < SHORT_LOOP_MAX)) ! 2212: continue; ! 2213: short_loops = 0; ! 2214: ! 2215: int count = 0; ! 2216: current_sock = stored_sock; ! 2217: if (current_sock == NULL) ! 2218: current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); ! 2219: ! 2220: while (current_sock && count < MAX_RX_STEPS) ! 2221: { ! 2222: sock *s = current_sock; ! 2223: if (s->index == -1) ! 2224: { ! 2225: current_sock = sk_next(s); ! 2226: goto next2; ! 2227: } ! 2228: ! 2229: if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook) ! 2230: { ! 2231: count++; ! 2232: io_log_event(s->rx_hook, s->data); ! 2233: sk_read(s, pfd[s->index].revents); ! 2234: if (s != current_sock) ! 2235: goto next2; ! 2236: } ! 2237: ! 2238: if (pfd[s->index].revents & (POLLHUP | POLLERR)) ! 2239: { ! 2240: sk_err(s, pfd[s->index].revents); ! 2241: if (s != current_sock) ! 2242: goto next2; ! 2243: } ! 2244: ! 2245: current_sock = sk_next(s); ! 2246: next2: ; ! 2247: } ! 2248: ! 2249: ! 2250: stored_sock = current_sock; ! 2251: } ! 2252: } ! 2253: } ! 2254: ! 2255: void ! 2256: test_old_bird(char *path) ! 2257: { ! 2258: int fd; ! 2259: struct sockaddr_un sa; ! 2260: ! 2261: fd = socket(AF_UNIX, SOCK_STREAM, 0); ! 2262: if (fd < 0) ! 2263: die("Cannot create socket: %m"); ! 2264: if (strlen(path) >= sizeof(sa.sun_path)) ! 2265: die("Socket path too long"); ! 2266: bzero(&sa, sizeof(sa)); ! 2267: sa.sun_family = AF_UNIX; ! 2268: strcpy(sa.sun_path, path); ! 2269: if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0) ! 2270: die("I found another BIRD running."); ! 2271: close(fd); ! 2272: }