Annotation of embedaddon/quagga/watchquagga/watchquagga.c, revision 1.1.1.3
1.1 misho 1: /*
2: Monitor status of quagga daemons and restart if necessary.
3:
4: Copyright (C) 2004 Andrew J. Schorr
5:
6: This program is free software; you can redistribute it and/or modify
7: it under the terms of the GNU General Public License as published by
8: the Free Software Foundation; either version 2 of the License, or
9: (at your option) any later version.
10:
11: This program is distributed in the hope that it will be useful,
12: but WITHOUT ANY WARRANTY; without even the implied warranty of
13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14: GNU General Public License for more details.
15:
16: You should have received a copy of the GNU General Public License
17: along with this program; if not, write to the Free Software
18: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19: */
20:
21: #include <zebra.h>
22: #include <thread.h>
23: #include <log.h>
24: #include <network.h>
25: #include <sigevent.h>
26: #include <lib/version.h>
27: #include <getopt.h>
28: #include <sys/un.h>
29: #include <sys/wait.h>
1.1.1.2 misho 30: #include <memory.h>
1.1 misho 31:
32: #ifndef MIN
33: #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
34: #endif
35:
36: /* Macros to help randomize timers. */
37: #define JITTER(X) ((random() % ((X)+1))-((X)/2))
38: #define FUZZY(X) ((X)+JITTER((X)/20))
39:
40: #define DEFAULT_PERIOD 5
41: #define DEFAULT_TIMEOUT 10
42: #define DEFAULT_RESTART_TIMEOUT 20
43: #define DEFAULT_LOGLEVEL LOG_INFO
44: #define DEFAULT_MIN_RESTART 60
45: #define DEFAULT_MAX_RESTART 600
46: #ifdef PATH_WATCHQUAGGA_PID
47: #define DEFAULT_PIDFILE PATH_WATCHQUAGGA_PID
48: #else
49: #define DEFAULT_PIDFILE STATEDIR "/watchquagga.pid"
50: #endif
51: #ifdef DAEMON_VTY_DIR
52: #define VTYDIR DAEMON_VTY_DIR
53: #else
54: #define VTYDIR STATEDIR
55: #endif
56:
57: #define PING_TOKEN "PING"
58:
59: /* Needs to be global, referenced somewhere inside libzebra. */
60: struct thread_master *master;
61:
62: typedef enum
63: {
64: MODE_MONITOR = 0,
65: MODE_GLOBAL_RESTART,
66: MODE_SEPARATE_RESTART,
67: MODE_PHASED_ZEBRA_RESTART,
68: MODE_PHASED_ALL_RESTART
69: } watch_mode_t;
70:
71: static const char *mode_str[] =
72: {
73: "monitor",
74: "global restart",
75: "individual daemon restart",
76: "phased zebra restart",
77: "phased global restart for any failure",
78: };
79:
80: typedef enum
81: {
82: PHASE_NONE = 0,
83: PHASE_STOPS_PENDING,
84: PHASE_WAITING_DOWN,
85: PHASE_ZEBRA_RESTART_PENDING,
86: PHASE_WAITING_ZEBRA_UP
87: } restart_phase_t;
88:
89: static const char *phase_str[] =
90: {
91: "None",
92: "Stop jobs running",
93: "Waiting for other daemons to come down",
94: "Zebra restart job running",
95: "Waiting for zebra to come up",
96: "Start jobs running",
97: };
98:
99: #define PHASE_TIMEOUT (3*gs.restart_timeout)
100:
101: struct restart_info
102: {
103: const char *name;
104: const char *what;
105: pid_t pid;
106: struct timeval time;
107: long interval;
108: struct thread *t_kill;
109: int kills;
110: };
111:
112: static struct global_state
113: {
114: watch_mode_t mode;
115: restart_phase_t phase;
116: struct thread *t_phase_hanging;
117: const char *vtydir;
118: long period;
119: long timeout;
120: long restart_timeout;
121: long min_restart_interval;
122: long max_restart_interval;
123: int do_ping;
124: struct daemon *daemons;
125: const char *restart_command;
126: const char *start_command;
127: const char *stop_command;
128: struct restart_info restart;
129: int unresponsive_restart;
130: int loglevel;
131: struct daemon *special; /* points to zebra when doing phased restart */
132: int numdaemons;
133: int numpids;
134: int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
135: } gs = {
136: .mode = MODE_MONITOR,
137: .phase = PHASE_NONE,
138: .vtydir = VTYDIR,
139: .period = 1000*DEFAULT_PERIOD,
140: .timeout = DEFAULT_TIMEOUT,
141: .restart_timeout = DEFAULT_RESTART_TIMEOUT,
142: .loglevel = DEFAULT_LOGLEVEL,
143: .min_restart_interval = DEFAULT_MIN_RESTART,
144: .max_restart_interval = DEFAULT_MAX_RESTART,
145: .do_ping = 1,
146: };
147:
148: typedef enum
149: {
150: DAEMON_INIT,
151: DAEMON_DOWN,
152: DAEMON_CONNECTING,
153: DAEMON_UP,
154: DAEMON_UNRESPONSIVE
155: } daemon_state_t;
156:
157: #define IS_UP(DMN) \
158: (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
159:
160: static const char *state_str[] =
161: {
162: "Init",
163: "Down",
164: "Connecting",
165: "Up",
166: "Unresponsive",
167: };
168:
169: struct daemon {
170: const char *name;
171: daemon_state_t state;
172: int fd;
173: struct timeval echo_sent;
174: u_int connect_tries;
175: struct thread *t_wakeup;
176: struct thread *t_read;
177: struct thread *t_write;
178: struct daemon *next;
179: struct restart_info restart;
180: };
181:
182: static const struct option longopts[] =
183: {
184: { "daemon", no_argument, NULL, 'd'},
185: { "statedir", required_argument, NULL, 'S'},
186: { "no-echo", no_argument, NULL, 'e'},
187: { "loglevel", required_argument, NULL, 'l'},
188: { "interval", required_argument, NULL, 'i'},
189: { "timeout", required_argument, NULL, 't'},
190: { "restart-timeout", required_argument, NULL, 'T'},
191: { "restart", required_argument, NULL, 'r'},
192: { "start-command", required_argument, NULL, 's'},
193: { "kill-command", required_argument, NULL, 'k'},
194: { "restart-all", required_argument, NULL, 'R'},
195: { "all-restart", no_argument, NULL, 'a'},
196: { "always-all-restart", no_argument, NULL, 'A'},
197: { "unresponsive-restart", no_argument, NULL, 'z'},
198: { "min-restart-interval", required_argument, NULL, 'm'},
199: { "max-restart-interval", required_argument, NULL, 'M'},
200: { "pid-file", required_argument, NULL, 'p'},
201: { "blank-string", required_argument, NULL, 'b'},
202: { "help", no_argument, NULL, 'h'},
203: { "version", no_argument, NULL, 'v'},
204: { NULL, 0, NULL, 0 }
205: };
206:
207: static int try_connect(struct daemon *dmn);
208: static int wakeup_send_echo(struct thread *t_wakeup);
209: static void try_restart(struct daemon *dmn);
210: static void phase_check(void);
211:
212: static int
213: usage(const char *progname, int status)
214: {
215: if (status != 0)
216: fprintf(stderr, "Try `%s --help' for more information.\n", progname);
217: else
1.1.1.3 ! misho 218: {
! 219: printf("Usage : %s [OPTION...] <daemon name> ...\n\n\
1.1 misho 220: Watchdog program to monitor status of quagga daemons and try to restart\n\
221: them if they are down or unresponsive. It determines whether a daemon is\n\
222: up based on whether it can connect to the daemon's vty unix stream socket.\n\
223: It then repeatedly sends echo commands over that socket to determine whether\n\
224: the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
225: on the socket connection and know immediately that the daemon is down.\n\n\
226: The daemons to be monitored should be listed on the command line.\n\n\
227: This program can run in one of 5 modes:\n\n\
228: 0. Mode: %s.\n\
229: Just monitor and report on status changes. Example:\n\
230: %s -d zebra ospfd bgpd\n\n\
231: 1. Mode: %s.\n\
232: Whenever any daemon hangs or crashes, use the given command to restart\n\
233: them all. Example:\n\
234: %s -dz \\\n\
235: -R '/sbin/service zebra restart; /sbin/service ospfd restart' \\\n\
236: zebra ospfd\n\n\
237: 2. Mode: %s.\n\
238: When any single daemon hangs or crashes, restart only the daemon that's\n\
239: in trouble using the supplied restart command. Example:\n\
240: %s -dz -r '/sbin/service %%s restart' zebra ospfd bgpd\n\n\
241: 3. Mode: %s.\n\
242: The same as the previous mode, except that there is special treatment when\n\
243: the zebra daemon is in trouble. In that case, a phased restart approach\n\
244: is used: 1. stop all other daemons; 2. restart zebra; 3. start the other\n\
245: daemons. Example:\n\
246: %s -adz -r '/sbin/service %%s restart' \\\n\
247: -s '/sbin/service %%s start' \\\n\
248: -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
249: 4. Mode: %s.\n\
250: This is the same as the previous mode, except that the phased restart\n\
251: procedure is used whenever any of the daemons hangs or crashes. Example:\n\
252: %s -Adz -r '/sbin/service %%s restart' \\\n\
253: -s '/sbin/service %%s start' \\\n\
254: -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
255: As of this writing, it is believed that mode 2 [%s]\n\
256: is not safe, and mode 3 [%s] may not be safe with some of the\n\
257: routing daemons.\n\n\
258: In order to avoid attempting to restart the daemons in a fast loop,\n\
259: the -m and -M options allow you to control the minimum delay between\n\
260: restart commands. The minimum restart delay is recalculated each time\n\
261: a restart is attempted: if the time since the last restart attempt exceeds\n\
262: twice the -M value, then the restart delay is set to the -m value.\n\
1.1.1.3 ! misho 263: Otherwise, the interval is doubled (but capped at the -M value).\n\n",
! 264: progname,mode_str[0],progname,mode_str[1],progname,mode_str[2],
! 265: progname,mode_str[3],progname,mode_str[4],progname,mode_str[2],
! 266: mode_str[3]);
! 267:
! 268: printf("Options:\n\
1.1 misho 269: -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
270: to syslog instead of stdout.\n\
271: -S, --statedir Set the vty socket directory (default is %s)\n\
272: -e, --no-echo Do not ping the daemons to test responsiveness (this\n\
273: option is necessary if the daemons do not support the\n\
274: echo command)\n\
275: -l, --loglevel Set the logging level (default is %d).\n\
276: The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
277: but it can be set higher than %d if extra-verbose debugging\n\
278: messages are desired.\n\
279: -m, --min-restart-interval\n\
280: Set the minimum seconds to wait between invocations of daemon\n\
281: restart commands (default is %d).\n\
282: -M, --max-restart-interval\n\
283: Set the maximum seconds to wait between invocations of daemon\n\
284: restart commands (default is %d).\n\
285: -i, --interval Set the status polling interval in seconds (default is %d)\n\
286: -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
287: -T, --restart-timeout\n\
288: Set the restart (kill) timeout in seconds (default is %d).\n\
289: If any background jobs are still running after this much\n\
290: time has elapsed, they will be killed.\n\
291: -r, --restart Supply a Bourne shell command to use to restart a single\n\
292: daemon. The command string should include '%%s' where the\n\
293: name of the daemon should be substituted.\n\
294: Note that -r and -R are incompatible.\n\
295: -s, --start-command\n\
296: Supply a Bourne shell to command to use to start a single\n\
297: daemon. The command string should include '%%s' where the\n\
298: name of the daemon should be substituted.\n\
299: -k, --kill-command\n\
300: Supply a Bourne shell to command to use to stop a single\n\
301: daemon. The command string should include '%%s' where the\n\
302: name of the daemon should be substituted.\n\
303: -R, --restart-all\n\
304: When one or more daemons is down, try to restart everything\n\
305: using the Bourne shell command supplied as the argument.\n\
306: Note that -r and -R are incompatible.\n\
307: -z, --unresponsive-restart\n\
308: When a daemon is unresponsive, treat it as being down for\n\
309: restart purposes.\n\
310: -a, --all-restart\n\
311: When zebra hangs or crashes, restart all daemons using\n\
312: this phased approach: 1. stop all other daemons; 2. restart\n\
313: zebra; 3. start other daemons. Requires -r, -s, and -k.\n\
314: -A, --always-all-restart\n\
315: When any daemon (not just zebra) hangs or crashes, use the\n\
316: same phased restart mechanism described above for -a.\n\
317: Requires -r, -s, and -k.\n\
318: -p, --pid-file Set process identifier file name\n\
319: (default is %s).\n\
320: -b, --blank-string\n\
321: When the supplied argument string is found in any of the\n\
322: various shell command arguments (-r, -s, -k, or -R), replace\n\
323: it with a space. This is an ugly hack to circumvent problems\n\
324: passing command-line arguments with embedded spaces.\n\
325: -v, --version Print program version\n\
1.1.1.3 ! misho 326: -h, --help Display this help and exit\n",
! 327: VTYDIR,DEFAULT_LOGLEVEL,LOG_EMERG,LOG_DEBUG,LOG_DEBUG,
! 328: DEFAULT_MIN_RESTART,DEFAULT_MAX_RESTART,
! 329: DEFAULT_PERIOD,DEFAULT_TIMEOUT,DEFAULT_RESTART_TIMEOUT,
! 330: DEFAULT_PIDFILE);
! 331: }
1.1 misho 332:
333: return status;
334: }
335:
336: static pid_t
1.1.1.3 ! misho 337: run_background(char *shell_cmd)
1.1 misho 338: {
339: pid_t child;
340:
341: switch (child = fork())
342: {
343: case -1:
344: zlog_err("fork failed, cannot run command [%s]: %s",
345: shell_cmd,safe_strerror(errno));
346: return -1;
347: case 0:
348: /* Child process. */
349: /* Use separate process group so child processes can be killed easily. */
350: if (setpgid(0,0) < 0)
351: zlog_warn("warning: setpgid(0,0) failed: %s",safe_strerror(errno));
352: {
1.1.1.3 ! misho 353: char shell[] = "sh";
! 354: char dashc[] = "-c";
! 355: char *const argv[4] = { shell, dashc, shell_cmd, NULL};
! 356: execv("/bin/sh", argv);
1.1 misho 357: zlog_err("execv(/bin/sh -c '%s') failed: %s",
358: shell_cmd,safe_strerror(errno));
359: _exit(127);
360: }
361: default:
362: /* Parent process: we will reap the child later. */
363: zlog_err("Forked background command [pid %d]: %s",(int)child,shell_cmd);
364: return child;
365: }
366: }
367:
368: static struct timeval *
369: time_elapsed(struct timeval *result, const struct timeval *start_time)
370: {
371: gettimeofday(result,NULL);
372: result->tv_sec -= start_time->tv_sec;
373: result->tv_usec -= start_time->tv_usec;
374: while (result->tv_usec < 0)
375: {
376: result->tv_usec += 1000000L;
377: result->tv_sec--;
378: }
379: return result;
380: }
381:
382: static int
383: restart_kill(struct thread *t_kill)
384: {
385: struct restart_info *restart = THREAD_ARG(t_kill);
386: struct timeval delay;
387:
388: time_elapsed(&delay,&restart->time);
389: zlog_warn("Warning: %s %s child process %d still running after "
390: "%ld seconds, sending signal %d",
1.1.1.3 ! misho 391: restart->what,restart->name,(int)restart->pid, (long)delay.tv_sec,
1.1 misho 392: (restart->kills ? SIGKILL : SIGTERM));
393: kill(-restart->pid,(restart->kills ? SIGKILL : SIGTERM));
394: restart->kills++;
395: restart->t_kill = thread_add_timer(master,restart_kill,restart,
396: gs.restart_timeout);
397: return 0;
398: }
399:
400: static struct restart_info *
401: find_child(pid_t child)
402: {
403: if (gs.mode == MODE_GLOBAL_RESTART)
404: {
405: if (gs.restart.pid == child)
406: return &gs.restart;
407: }
408: else
409: {
410: struct daemon *dmn;
411: for (dmn = gs.daemons; dmn; dmn = dmn->next)
412: {
413: if (dmn->restart.pid == child)
414: return &dmn->restart;
415: }
416: }
417: return NULL;
418: }
419:
420: static void
421: sigchild(void)
422: {
423: pid_t child;
424: int status;
425: const char *name;
426: const char *what;
427: struct restart_info *restart;
428:
429: switch (child = waitpid(-1,&status,WNOHANG))
430: {
431: case -1:
432: zlog_err("waitpid failed: %s",safe_strerror(errno));
433: return;
434: case 0:
435: zlog_warn("SIGCHLD received, but waitpid did not reap a child");
436: return;
437: }
438:
439: if ((restart = find_child(child)) != NULL)
440: {
441: name = restart->name;
442: what = restart->what;
443: restart->pid = 0;
444: gs.numpids--;
445: thread_cancel(restart->t_kill);
446: restart->t_kill = NULL;
447: /* Update restart time to reflect the time the command completed. */
448: gettimeofday(&restart->time,NULL);
449: }
450: else
451: {
452: zlog_err("waitpid returned status for an unknown child process %d",
453: (int)child);
454: name = "(unknown)";
455: what = "background";
456: }
457: if (WIFSTOPPED(status))
458: zlog_warn("warning: %s %s process %d is stopped",
459: what,name,(int)child);
460: else if (WIFSIGNALED(status))
461: zlog_warn("%s %s process %d terminated due to signal %d",
462: what,name,(int)child,WTERMSIG(status));
463: else if (WIFEXITED(status))
464: {
465: if (WEXITSTATUS(status) != 0)
466: zlog_warn("%s %s process %d exited with non-zero status %d",
467: what,name,(int)child,WEXITSTATUS(status));
468: else
469: zlog_debug("%s %s process %d exited normally",what,name,(int)child);
470: }
471: else
472: zlog_err("cannot interpret %s %s process %d wait status 0x%x",
473: what,name,(int)child,status);
474: phase_check();
475: }
476:
477: static int
478: run_job(struct restart_info *restart, const char *cmdtype, const char *command,
479: int force, int update_interval)
480: {
481: struct timeval delay;
482:
483: if (gs.loglevel > LOG_DEBUG+1)
484: zlog_debug("attempting to %s %s",cmdtype,restart->name);
485:
486: if (restart->pid)
487: {
488: if (gs.loglevel > LOG_DEBUG+1)
489: zlog_debug("cannot %s %s, previous pid %d still running",
490: cmdtype,restart->name,(int)restart->pid);
491: return -1;
492: }
493:
494: /* Note: time_elapsed test must come before the force test, since we need
495: to make sure that delay is initialized for use below in updating the
496: restart interval. */
497: if ((time_elapsed(&delay,&restart->time)->tv_sec < restart->interval) &&
498: !force)
499: {
500: if (gs.loglevel > LOG_DEBUG+1)
501: zlog_debug("postponing %s %s: "
502: "elapsed time %ld < retry interval %ld",
503: cmdtype,restart->name,(long)delay.tv_sec,restart->interval);
504: return -1;
505: }
506:
507: gettimeofday(&restart->time,NULL);
508: restart->kills = 0;
509: {
510: char cmd[strlen(command)+strlen(restart->name)+1];
511: snprintf(cmd,sizeof(cmd),command,restart->name);
512: if ((restart->pid = run_background(cmd)) > 0)
513: {
514: restart->t_kill = thread_add_timer(master,restart_kill,restart,
515: gs.restart_timeout);
516: restart->what = cmdtype;
517: gs.numpids++;
518: }
519: else
520: restart->pid = 0;
521: }
522:
523: /* Calculate the new restart interval. */
524: if (update_interval)
525: {
526: if (delay.tv_sec > 2*gs.max_restart_interval)
527: restart->interval = gs.min_restart_interval;
528: else if ((restart->interval *= 2) > gs.max_restart_interval)
529: restart->interval = gs.max_restart_interval;
530: if (gs.loglevel > LOG_DEBUG+1)
531: zlog_debug("restart %s interval is now %ld",
532: restart->name,restart->interval);
533: }
534: return restart->pid;
535: }
536:
537: #define SET_READ_HANDLER(DMN) \
538: (DMN)->t_read = thread_add_read(master,handle_read,(DMN),(DMN)->fd)
539:
540: #define SET_WAKEUP_DOWN(DMN) \
541: (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN), \
542: FUZZY(gs.period))
543:
544: #define SET_WAKEUP_UNRESPONSIVE(DMN) \
545: (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \
546: FUZZY(gs.period))
547:
548: #define SET_WAKEUP_ECHO(DMN) \
549: (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \
550: FUZZY(gs.period))
551:
552: static int
553: wakeup_down(struct thread *t_wakeup)
554: {
555: struct daemon *dmn = THREAD_ARG(t_wakeup);
556:
557: dmn->t_wakeup = NULL;
558: if (try_connect(dmn) < 0)
559: SET_WAKEUP_DOWN(dmn);
560: if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
561: try_restart(dmn);
562: return 0;
563: }
564:
565: static int
566: wakeup_init(struct thread *t_wakeup)
567: {
568: struct daemon *dmn = THREAD_ARG(t_wakeup);
569:
570: dmn->t_wakeup = NULL;
571: if (try_connect(dmn) < 0)
572: {
573: SET_WAKEUP_DOWN(dmn);
574: zlog_err("%s state -> down : initial connection attempt failed",
575: dmn->name);
576: dmn->state = DAEMON_DOWN;
577: }
578: return 0;
579: }
580:
581: static void
582: daemon_down(struct daemon *dmn, const char *why)
583: {
584: if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
585: zlog_err("%s state -> down : %s",dmn->name,why);
586: else if (gs.loglevel > LOG_DEBUG)
587: zlog_debug("%s still down : %s",dmn->name,why);
588: if (IS_UP(dmn))
589: gs.numdown++;
590: dmn->state = DAEMON_DOWN;
591: if (dmn->fd >= 0)
592: {
593: close(dmn->fd);
594: dmn->fd = -1;
595: }
596: THREAD_OFF(dmn->t_read);
597: THREAD_OFF(dmn->t_write);
598: THREAD_OFF(dmn->t_wakeup);
599: if (try_connect(dmn) < 0)
600: SET_WAKEUP_DOWN(dmn);
601: phase_check();
602: }
603:
604: static int
605: handle_read(struct thread *t_read)
606: {
607: struct daemon *dmn = THREAD_ARG(t_read);
608: static const char resp[sizeof(PING_TOKEN)+4] = PING_TOKEN "\n";
609: char buf[sizeof(resp)+100];
610: ssize_t rc;
611: struct timeval delay;
612:
613: dmn->t_read = NULL;
614: if ((rc = read(dmn->fd,buf,sizeof(buf))) < 0)
615: {
616: char why[100];
617:
618: if (ERRNO_IO_RETRY(errno))
619: {
620: /* Pretend it never happened. */
621: SET_READ_HANDLER(dmn);
622: return 0;
623: }
624: snprintf(why,sizeof(why),"unexpected read error: %s",
625: safe_strerror(errno));
626: daemon_down(dmn,why);
627: return 0;
628: }
629: if (rc == 0)
630: {
631: daemon_down(dmn,"read returned EOF");
632: return 0;
633: }
634: if (!dmn->echo_sent.tv_sec)
635: {
636: char why[sizeof(buf)+100];
637: snprintf(why,sizeof(why),"unexpected read returns %d bytes: %.*s",
638: (int)rc,(int)rc,buf);
639: daemon_down(dmn,why);
640: return 0;
641: }
642:
643: /* We are expecting an echo response: is there any chance that the
644: response would not be returned entirely in the first read? That
645: seems inconceivable... */
646: if ((rc != sizeof(resp)) || memcmp(buf,resp,sizeof(resp)))
647: {
648: char why[100+sizeof(buf)];
649: snprintf(why,sizeof(why),"read returned bad echo response of %d bytes "
650: "(expecting %u): %.*s",
651: (int)rc,(u_int)sizeof(resp),(int)rc,buf);
652: daemon_down(dmn,why);
653: return 0;
654: }
655:
656: time_elapsed(&delay,&dmn->echo_sent);
657: dmn->echo_sent.tv_sec = 0;
658: if (dmn->state == DAEMON_UNRESPONSIVE)
659: {
660: if (delay.tv_sec < gs.timeout)
661: {
662: dmn->state = DAEMON_UP;
663: zlog_warn("%s state -> up : echo response received after %ld.%06ld "
1.1.1.3 ! misho 664: "seconds", dmn->name,
! 665: (long)delay.tv_sec, (long)delay.tv_usec);
1.1 misho 666: }
667: else
668: zlog_warn("%s: slow echo response finally received after %ld.%06ld "
1.1.1.3 ! misho 669: "seconds", dmn->name,
! 670: (long)delay.tv_sec, (long)delay.tv_usec);
1.1 misho 671: }
672: else if (gs.loglevel > LOG_DEBUG+1)
673: zlog_debug("%s: echo response received after %ld.%06ld seconds",
1.1.1.3 ! misho 674: dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
1.1 misho 675:
676: SET_READ_HANDLER(dmn);
677: if (dmn->t_wakeup)
678: thread_cancel(dmn->t_wakeup);
679: SET_WAKEUP_ECHO(dmn);
680:
681: return 0;
682: }
683:
684: static void
685: daemon_up(struct daemon *dmn, const char *why)
686: {
687: dmn->state = DAEMON_UP;
688: gs.numdown--;
689: dmn->connect_tries = 0;
690: zlog_notice("%s state -> up : %s",dmn->name,why);
691: if (gs.do_ping)
692: SET_WAKEUP_ECHO(dmn);
693: phase_check();
694: }
695:
696: static int
697: check_connect(struct thread *t_write)
698: {
699: struct daemon *dmn = THREAD_ARG(t_write);
700: int sockerr;
701: socklen_t reslen = sizeof(sockerr);
702:
703: dmn->t_write = NULL;
704: if (getsockopt(dmn->fd,SOL_SOCKET,SO_ERROR,(char *)&sockerr,&reslen) < 0)
705: {
706: zlog_warn("%s: check_connect: getsockopt failed: %s",
707: dmn->name,safe_strerror(errno));
708: daemon_down(dmn,"getsockopt failed checking connection success");
709: return 0;
710: }
711: if ((reslen == sizeof(sockerr)) && sockerr)
712: {
713: char why[100];
714: snprintf(why,sizeof(why),
715: "getsockopt reports that connection attempt failed: %s",
716: safe_strerror(sockerr));
717: daemon_down(dmn,why);
718: return 0;
719: }
720:
721: daemon_up(dmn,"delayed connect succeeded");
722: return 0;
723: }
724:
725: static int
726: wakeup_connect_hanging(struct thread *t_wakeup)
727: {
728: struct daemon *dmn = THREAD_ARG(t_wakeup);
729: char why[100];
730:
731: dmn->t_wakeup = NULL;
732: snprintf(why,sizeof(why),"connection attempt timed out after %ld seconds",
733: gs.timeout);
734: daemon_down(dmn,why);
735: return 0;
736: }
737:
738: /* Making connection to protocol daemon. */
739: static int
740: try_connect(struct daemon *dmn)
741: {
742: int sock;
743: struct sockaddr_un addr;
744: socklen_t len;
745:
746: if (gs.loglevel > LOG_DEBUG+1)
747: zlog_debug("%s: attempting to connect",dmn->name);
748: dmn->connect_tries++;
749:
750: memset (&addr, 0, sizeof (struct sockaddr_un));
751: addr.sun_family = AF_UNIX;
752: snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty",
753: gs.vtydir,dmn->name);
754: #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
755: len = addr.sun_len = SUN_LEN(&addr);
756: #else
757: len = sizeof (addr.sun_family) + strlen (addr.sun_path);
758: #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
759:
760: /* Quick check to see if we might succeed before we go to the trouble
761: of creating a socket. */
762: if (access(addr.sun_path, W_OK) < 0)
763: {
764: if (errno != ENOENT)
765: zlog_err("%s: access to socket %s denied: %s",
766: dmn->name,addr.sun_path,safe_strerror(errno));
767: return -1;
768: }
769:
770: if ((sock = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
771: {
772: zlog_err("%s(%s): cannot make socket: %s",
773: __func__,addr.sun_path, safe_strerror(errno));
774: return -1;
775: }
776:
777: if (set_nonblocking(sock) < 0)
778: {
779: zlog_err("%s(%s): set_nonblocking(%d) failed",
780: __func__, addr.sun_path, sock);
781: close(sock);
782: return -1;
783: }
784:
785: if (connect (sock, (struct sockaddr *) &addr, len) < 0)
786: {
787: if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK))
788: {
789: if (gs.loglevel > LOG_DEBUG)
790: zlog_debug("%s(%s): connect failed: %s",
791: __func__,addr.sun_path, safe_strerror(errno));
792: close (sock);
793: return -1;
794: }
795: if (gs.loglevel > LOG_DEBUG)
796: zlog_debug("%s: connection in progress",dmn->name);
797: dmn->state = DAEMON_CONNECTING;
798: dmn->fd = sock;
799: dmn->t_write = thread_add_write(master,check_connect,dmn,dmn->fd);
800: dmn->t_wakeup = thread_add_timer(master,wakeup_connect_hanging,dmn,
801: gs.timeout);
802: SET_READ_HANDLER(dmn);
803: return 0;
804: }
805:
806: dmn->fd = sock;
807: SET_READ_HANDLER(dmn);
808: daemon_up(dmn,"connect succeeded");
809: return 1;
810: }
811:
812: static int
813: phase_hanging(struct thread *t_hanging)
814: {
815: gs.t_phase_hanging = NULL;
816: zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
817: phase_str[gs.phase],PHASE_TIMEOUT);
818: gs.phase = PHASE_NONE;
819: return 0;
820: }
821:
822: static void
823: set_phase(restart_phase_t new_phase)
824: {
825: gs.phase = new_phase;
826: if (gs.t_phase_hanging)
827: thread_cancel(gs.t_phase_hanging);
828: gs.t_phase_hanging = thread_add_timer(master,phase_hanging,NULL,
829: PHASE_TIMEOUT);
830: }
831:
832: static void
833: phase_check(void)
834: {
835: switch (gs.phase)
836: {
837: case PHASE_NONE:
838: break;
839: case PHASE_STOPS_PENDING:
840: if (gs.numpids)
841: break;
842: zlog_info("Phased restart: all routing daemon stop jobs have completed.");
843: set_phase(PHASE_WAITING_DOWN);
844: /*FALLTHRU*/
845: case PHASE_WAITING_DOWN:
846: if (gs.numdown+IS_UP(gs.special) < gs.numdaemons)
847: break;
848: zlog_info("Phased restart: all routing daemons now down.");
849: run_job(&gs.special->restart,"restart",gs.restart_command,1,1);
850: set_phase(PHASE_ZEBRA_RESTART_PENDING);
851: /*FALLTHRU*/
852: case PHASE_ZEBRA_RESTART_PENDING:
853: if (gs.special->restart.pid)
854: break;
855: zlog_info("Phased restart: %s restart job completed.",gs.special->name);
856: set_phase(PHASE_WAITING_ZEBRA_UP);
857: /*FALLTHRU*/
858: case PHASE_WAITING_ZEBRA_UP:
859: if (!IS_UP(gs.special))
860: break;
861: zlog_info("Phased restart: %s is now up.",gs.special->name);
862: {
863: struct daemon *dmn;
864: for (dmn = gs.daemons; dmn; dmn = dmn->next)
865: {
866: if (dmn != gs.special)
867: run_job(&dmn->restart,"start",gs.start_command,1,0);
868: }
869: }
870: gs.phase = PHASE_NONE;
871: THREAD_OFF(gs.t_phase_hanging);
872: zlog_notice("Phased global restart has completed.");
873: break;
874: }
875: }
876:
877: static void
878: try_restart(struct daemon *dmn)
879: {
880: switch (gs.mode)
881: {
882: case MODE_MONITOR:
883: return;
884: case MODE_GLOBAL_RESTART:
885: run_job(&gs.restart,"restart",gs.restart_command,0,1);
886: break;
887: case MODE_SEPARATE_RESTART:
888: run_job(&dmn->restart,"restart",gs.restart_command,0,1);
889: break;
890: case MODE_PHASED_ZEBRA_RESTART:
891: if (dmn != gs.special)
892: {
893: if ((gs.special->state == DAEMON_UP) && (gs.phase == PHASE_NONE))
894: run_job(&dmn->restart,"restart",gs.restart_command,0,1);
895: else
896: zlog_debug("%s: postponing restart attempt because master %s daemon "
897: "not up [%s], or phased restart in progress",
898: dmn->name,gs.special->name,state_str[gs.special->state]);
899: break;
900: }
901: /*FALLTHRU*/
902: case MODE_PHASED_ALL_RESTART:
903: if ((gs.phase != PHASE_NONE) || gs.numpids)
904: {
905: if (gs.loglevel > LOG_DEBUG+1)
906: zlog_debug("postponing phased global restart: restart already in "
907: "progress [%s], or outstanding child processes [%d]",
908: phase_str[gs.phase],gs.numpids);
909: break;
910: }
911: /* Is it too soon for a restart? */
912: {
913: struct timeval delay;
914: if (time_elapsed(&delay,&gs.special->restart.time)->tv_sec <
915: gs.special->restart.interval)
916: {
917: if (gs.loglevel > LOG_DEBUG+1)
918: zlog_debug("postponing phased global restart: "
919: "elapsed time %ld < retry interval %ld",
920: (long)delay.tv_sec,gs.special->restart.interval);
921: break;
922: }
923: }
924: zlog_info("Phased restart: stopping all routing daemons.");
925: /* First step: stop all other daemons. */
926: for (dmn = gs.daemons; dmn; dmn = dmn->next)
927: {
928: if (dmn != gs.special)
929: run_job(&dmn->restart,"stop",gs.stop_command,1,1);
930: }
931: set_phase(PHASE_STOPS_PENDING);
932: break;
933: default:
934: zlog_err("error: unknown restart mode %d",gs.mode);
935: break;
936: }
937: }
938:
939: static int
940: wakeup_unresponsive(struct thread *t_wakeup)
941: {
942: struct daemon *dmn = THREAD_ARG(t_wakeup);
943:
944: dmn->t_wakeup = NULL;
945: if (dmn->state != DAEMON_UNRESPONSIVE)
946: zlog_err("%s: no longer unresponsive (now %s), "
947: "wakeup should have been cancelled!",
948: dmn->name,state_str[dmn->state]);
949: else
950: {
951: SET_WAKEUP_UNRESPONSIVE(dmn);
952: try_restart(dmn);
953: }
954: return 0;
955: }
956:
957: static int
958: wakeup_no_answer(struct thread *t_wakeup)
959: {
960: struct daemon *dmn = THREAD_ARG(t_wakeup);
961:
962: dmn->t_wakeup = NULL;
963: dmn->state = DAEMON_UNRESPONSIVE;
964: zlog_err("%s state -> unresponsive : no response yet to ping "
965: "sent %ld seconds ago",dmn->name,gs.timeout);
966: if (gs.unresponsive_restart)
967: {
968: SET_WAKEUP_UNRESPONSIVE(dmn);
969: try_restart(dmn);
970: }
971: return 0;
972: }
973:
974: static int
975: wakeup_send_echo(struct thread *t_wakeup)
976: {
977: static const char echocmd[] = "echo " PING_TOKEN;
978: ssize_t rc;
979: struct daemon *dmn = THREAD_ARG(t_wakeup);
980:
981: dmn->t_wakeup = NULL;
982: if (((rc = write(dmn->fd,echocmd,sizeof(echocmd))) < 0) ||
983: ((size_t)rc != sizeof(echocmd)))
984: {
985: char why[100+sizeof(echocmd)];
986: snprintf(why,sizeof(why),"write '%s' returned %d instead of %u",
987: echocmd,(int)rc,(u_int)sizeof(echocmd));
988: daemon_down(dmn,why);
989: }
990: else
991: {
992: gettimeofday(&dmn->echo_sent,NULL);
993: dmn->t_wakeup = thread_add_timer(master,wakeup_no_answer,dmn,gs.timeout);
994: }
995: return 0;
996: }
997:
998: static void
999: sigint(void)
1000: {
1001: zlog_notice("Terminating on signal");
1002: exit(0);
1003: }
1004:
1005: static int
1006: valid_command(const char *cmd)
1007: {
1008: char *p;
1009:
1010: return ((p = strchr(cmd,'%')) != NULL) && (*(p+1) == 's') && !strchr(p+1,'%');
1011: }
1012:
1013: /* This is an ugly hack to circumvent problems with passing command-line
1014: arguments that contain spaces. The fix is to use a configuration file. */
1015: static char *
1016: translate_blanks(const char *cmd, const char *blankstr)
1017: {
1018: char *res;
1019: char *p;
1020: size_t bslen = strlen(blankstr);
1021:
1022: if (!(res = strdup(cmd)))
1023: {
1024: perror("strdup");
1025: exit(1);
1026: }
1027: while ((p = strstr(res,blankstr)) != NULL)
1028: {
1029: *p = ' ';
1030: if (bslen != 1)
1031: memmove(p+1,p+bslen,strlen(p+bslen)+1);
1032: }
1033: return res;
1034: }
1035:
1036: int
1037: main(int argc, char **argv)
1038: {
1039: const char *progname;
1040: int opt;
1041: int daemon_mode = 0;
1042: const char *pidfile = DEFAULT_PIDFILE;
1043: const char *special = "zebra";
1044: const char *blankstr = NULL;
1045: static struct quagga_signal_t my_signals[] =
1046: {
1047: {
1048: .signal = SIGINT,
1049: .handler = sigint,
1050: },
1051: {
1052: .signal = SIGTERM,
1053: .handler = sigint,
1054: },
1055: {
1056: .signal = SIGCHLD,
1057: .handler = sigchild,
1058: },
1059: };
1060:
1061: if ((progname = strrchr (argv[0], '/')) != NULL)
1062: progname++;
1063: else
1064: progname = argv[0];
1065:
1066: gs.restart.name = "all";
1067: while ((opt = getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh",
1068: longopts, 0)) != EOF)
1069: {
1070: switch (opt)
1071: {
1072: case 0:
1073: break;
1074: case 'a':
1075: if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
1076: {
1077: fputs("Ambiguous operating mode selected.\n",stderr);
1078: return usage(progname,1);
1079: }
1080: gs.mode = MODE_PHASED_ZEBRA_RESTART;
1081: break;
1082: case 'A':
1083: if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
1084: {
1085: fputs("Ambiguous operating mode selected.\n",stderr);
1086: return usage(progname,1);
1087: }
1088: gs.mode = MODE_PHASED_ALL_RESTART;
1089: break;
1090: case 'b':
1091: blankstr = optarg;
1092: break;
1093: case 'd':
1094: daemon_mode = 1;
1095: break;
1096: case 'e':
1097: gs.do_ping = 0;
1098: break;
1099: case 'k':
1100: if (!valid_command(optarg))
1101: {
1102: fprintf(stderr,"Invalid kill command, must contain '%%s': %s\n",
1103: optarg);
1104: return usage(progname,1);
1105: }
1106: gs.stop_command = optarg;
1107: break;
1108: case 'l':
1109: {
1110: char garbage[3];
1111: if ((sscanf(optarg,"%d%1s",&gs.loglevel,garbage) != 1) ||
1112: (gs.loglevel < LOG_EMERG))
1113: {
1114: fprintf(stderr,"Invalid loglevel argument: %s\n",optarg);
1115: return usage(progname,1);
1116: }
1117: }
1118: break;
1119: case 'm':
1120: {
1121: char garbage[3];
1122: if ((sscanf(optarg,"%ld%1s",
1123: &gs.min_restart_interval,garbage) != 1) ||
1124: (gs.min_restart_interval < 0))
1125: {
1126: fprintf(stderr,"Invalid min_restart_interval argument: %s\n",
1127: optarg);
1128: return usage(progname,1);
1129: }
1130: }
1131: break;
1132: case 'M':
1133: {
1134: char garbage[3];
1135: if ((sscanf(optarg,"%ld%1s",
1136: &gs.max_restart_interval,garbage) != 1) ||
1137: (gs.max_restart_interval < 0))
1138: {
1139: fprintf(stderr,"Invalid max_restart_interval argument: %s\n",
1140: optarg);
1141: return usage(progname,1);
1142: }
1143: }
1144: break;
1145: case 'i':
1146: {
1147: char garbage[3];
1148: int period;
1149: if ((sscanf(optarg,"%d%1s",&period,garbage) != 1) ||
1150: (gs.period < 1))
1151: {
1152: fprintf(stderr,"Invalid interval argument: %s\n",optarg);
1153: return usage(progname,1);
1154: }
1155: gs.period = 1000*period;
1156: }
1157: break;
1158: case 'p':
1159: pidfile = optarg;
1160: break;
1161: case 'r':
1162: if ((gs.mode == MODE_GLOBAL_RESTART) ||
1163: (gs.mode == MODE_SEPARATE_RESTART))
1164: {
1165: fputs("Ambiguous operating mode selected.\n",stderr);
1166: return usage(progname,1);
1167: }
1168: if (!valid_command(optarg))
1169: {
1170: fprintf(stderr,
1171: "Invalid restart command, must contain '%%s': %s\n",
1172: optarg);
1173: return usage(progname,1);
1174: }
1175: gs.restart_command = optarg;
1176: if (gs.mode == MODE_MONITOR)
1177: gs.mode = MODE_SEPARATE_RESTART;
1178: break;
1179: case 'R':
1180: if (gs.mode != MODE_MONITOR)
1181: {
1182: fputs("Ambiguous operating mode selected.\n",stderr);
1183: return usage(progname,1);
1184: }
1185: if (strchr(optarg,'%'))
1186: {
1187: fprintf(stderr,
1188: "Invalid restart-all arg, must not contain '%%s': %s\n",
1189: optarg);
1190: return usage(progname,1);
1191: }
1192: gs.restart_command = optarg;
1193: gs.mode = MODE_GLOBAL_RESTART;
1194: break;
1195: case 's':
1196: if (!valid_command(optarg))
1197: {
1198: fprintf(stderr,"Invalid start command, must contain '%%s': %s\n",
1199: optarg);
1200: return usage(progname,1);
1201: }
1202: gs.start_command = optarg;
1203: break;
1204: case 'S':
1205: gs.vtydir = optarg;
1206: break;
1207: case 't':
1208: {
1209: char garbage[3];
1210: if ((sscanf(optarg,"%ld%1s",&gs.timeout,garbage) != 1) ||
1211: (gs.timeout < 1))
1212: {
1213: fprintf(stderr,"Invalid timeout argument: %s\n",optarg);
1214: return usage(progname,1);
1215: }
1216: }
1217: break;
1218: case 'T':
1219: {
1220: char garbage[3];
1221: if ((sscanf(optarg,"%ld%1s",&gs.restart_timeout,garbage) != 1) ||
1222: (gs.restart_timeout < 1))
1223: {
1224: fprintf(stderr,"Invalid restart timeout argument: %s\n",optarg);
1225: return usage(progname,1);
1226: }
1227: }
1228: break;
1229: case 'z':
1230: gs.unresponsive_restart = 1;
1231: break;
1232: case 'v':
1233: printf ("%s version %s\n", progname, QUAGGA_VERSION);
1234: puts("Copyright 2004 Andrew J. Schorr");
1235: return 0;
1236: case 'h':
1237: return usage(progname,0);
1238: default:
1239: fputs("Invalid option.\n",stderr);
1240: return usage(progname,1);
1241: }
1242: }
1243:
1244: if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR))
1245: {
1246: fputs("Option -z requires a -r or -R restart option.\n",stderr);
1247: return usage(progname,1);
1248: }
1249: switch (gs.mode)
1250: {
1251: case MODE_MONITOR:
1252: if (gs.restart_command || gs.start_command || gs.stop_command)
1253: {
1254: fprintf(stderr,"No kill/(re)start commands needed for %s mode.\n",
1255: mode_str[gs.mode]);
1256: return usage(progname,1);
1257: }
1258: break;
1259: case MODE_GLOBAL_RESTART:
1260: case MODE_SEPARATE_RESTART:
1261: if (!gs.restart_command || gs.start_command || gs.stop_command)
1262: {
1263: fprintf(stderr,"No start/kill commands needed in [%s] mode.\n",
1264: mode_str[gs.mode]);
1265: return usage(progname,1);
1266: }
1267: break;
1268: case MODE_PHASED_ZEBRA_RESTART:
1269: case MODE_PHASED_ALL_RESTART:
1270: if (!gs.restart_command || !gs.start_command || !gs.stop_command)
1271: {
1272: fprintf(stderr,
1273: "Need start, kill, and restart commands in [%s] mode.\n",
1274: mode_str[gs.mode]);
1275: return usage(progname,1);
1276: }
1277: break;
1278: }
1279:
1280: if (blankstr)
1281: {
1282: if (gs.restart_command)
1283: gs.restart_command = translate_blanks(gs.restart_command,blankstr);
1284: if (gs.start_command)
1285: gs.start_command = translate_blanks(gs.start_command,blankstr);
1286: if (gs.stop_command)
1287: gs.stop_command = translate_blanks(gs.stop_command,blankstr);
1288: }
1289:
1290: gs.restart.interval = gs.min_restart_interval;
1291: master = thread_master_create();
1.1.1.2 misho 1292: signal_init (master, array_size(my_signals), my_signals);
1.1 misho 1293: srandom(time(NULL));
1294:
1295: {
1296: int i;
1297: struct daemon *tail = NULL;
1298:
1299: for (i = optind; i < argc; i++)
1300: {
1301: struct daemon *dmn;
1302:
1303: if (!(dmn = (struct daemon *)calloc(1,sizeof(*dmn))))
1304: {
1305: fprintf(stderr,"calloc(1,%u) failed: %s\n",
1306: (u_int)sizeof(*dmn), safe_strerror(errno));
1307: return 1;
1308: }
1309: dmn->name = dmn->restart.name = argv[i];
1310: dmn->state = DAEMON_INIT;
1311: gs.numdaemons++;
1312: gs.numdown++;
1313: dmn->fd = -1;
1314: dmn->t_wakeup = thread_add_timer_msec(master,wakeup_init,dmn,
1315: 100+(random() % 900));
1316: dmn->restart.interval = gs.min_restart_interval;
1317: if (tail)
1318: tail->next = dmn;
1319: else
1320: gs.daemons = dmn;
1321: tail = dmn;
1322:
1323: if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1324: (gs.mode == MODE_PHASED_ALL_RESTART)) &&
1325: !strcmp(dmn->name,special))
1326: gs.special = dmn;
1327: }
1328: }
1329: if (!gs.daemons)
1330: {
1331: fputs("Must specify one or more daemons to monitor.\n",stderr);
1332: return usage(progname,1);
1333: }
1334: if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1335: (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special)
1336: {
1337: fprintf(stderr,"In mode [%s], but cannot find master daemon %s\n",
1338: mode_str[gs.mode],special);
1339: return usage(progname,1);
1340: }
1341: if (gs.special && (gs.numdaemons < 2))
1342: {
1343: fprintf(stderr,"Mode [%s] does not make sense with only 1 daemon "
1344: "to watch.\n",mode_str[gs.mode]);
1345: return usage(progname,1);
1346: }
1347:
1348: zlog_default = openzlog(progname, ZLOG_NONE,
1349: LOG_CONS|LOG_NDELAY|LOG_PID, LOG_DAEMON);
1350: zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED);
1351: if (daemon_mode)
1352: {
1353: zlog_set_level(NULL, ZLOG_DEST_SYSLOG, MIN(gs.loglevel,LOG_DEBUG));
1354: if (daemon (0, 0) < 0)
1355: {
1356: fprintf(stderr, "Watchquagga daemon failed: %s", strerror(errno));
1357: exit (1);
1358: }
1359: }
1360: else
1361: zlog_set_level(NULL, ZLOG_DEST_STDOUT, MIN(gs.loglevel,LOG_DEBUG));
1362:
1363: /* Make sure we're not already running. */
1364: pid_output (pidfile);
1365:
1366: /* Announce which daemons are being monitored. */
1367: {
1368: struct daemon *dmn;
1369: size_t len = 0;
1370:
1371: for (dmn = gs.daemons; dmn; dmn = dmn->next)
1372: len += strlen(dmn->name)+1;
1373:
1374: {
1375: char buf[len+1];
1376: char *p = buf;
1377:
1378: for (dmn = gs.daemons; dmn; dmn = dmn->next)
1379: {
1380: if (p != buf)
1381: *p++ = ' ';
1382: strcpy(p,dmn->name);
1383: p += strlen(p);
1384: }
1385: zlog_notice("%s %s watching [%s], mode [%s]",
1386: progname, QUAGGA_VERSION, buf, mode_str[gs.mode]);
1387: }
1388: }
1389:
1390: {
1391: struct thread thread;
1392:
1393: while (thread_fetch (master, &thread))
1394: thread_call (&thread);
1395: }
1396:
1397: /* Not reached. */
1398: return 0;
1399: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>