Annotation of embedaddon/smartmontools/smartd.cpp, revision 1.1.1.1
1.1 misho 1: /*
2: * Home page of code is: http://smartmontools.sourceforge.net
3: *
4: * Copyright (C) 2002-11 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5: * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6: * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
7: * Copyright (C) 2008-11 Christian Franke <smartmontools-support@lists.sourceforge.net>
8: *
9: * This program is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU General Public License as published by
11: * the Free Software Foundation; either version 2, or (at your option)
12: * any later version.
13: *
14: * You should have received a copy of the GNU General Public License
15: * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16: *
17: * This code was originally developed as a Senior Thesis by Michael Cornwell
18: * at the Concurrent Systems Laboratory (now part of the Storage Systems
19: * Research Center), Jack Baskin School of Engineering, University of
20: * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21: *
22: */
23:
24: #ifndef _GNU_SOURCE
25: // TODO: Why is this define necessary?
26: #define _GNU_SOURCE
27: #endif
28:
29: // unconditionally included files
30: #include <stdio.h>
31: #include <sys/types.h>
32: #include <sys/stat.h> // umask
33: #include <signal.h>
34: #include <fcntl.h>
35: #include <string.h>
36: #include <syslog.h>
37: #include <stdarg.h>
38: #include <stdlib.h>
39: #include <errno.h>
40: #include <time.h>
41: #include <limits.h>
42: #include <getopt.h>
43:
44: #include <stdexcept>
45: #include <string>
46: #include <vector>
47: #include <algorithm> // std::replace()
48:
49: // see which system files to conditionally include
50: #include "config.h"
51:
52: // conditionally included files
53: #ifndef _WIN32
54: #include <sys/wait.h>
55: #endif
56: #ifdef HAVE_UNISTD_H
57: #include <unistd.h>
58: #endif
59: #ifdef HAVE_NETDB_H
60: #include <netdb.h>
61: #endif
62:
63: #ifdef _WIN32
64: #ifdef _MSC_VER
65: #pragma warning(disable:4761) // "conversion supplied"
66: typedef unsigned short mode_t;
67: typedef int pid_t;
68: #endif
69: #include <io.h> // umask()
70: #include <process.h> // getpid()
71: #endif // _WIN32
72:
73: #ifdef __CYGWIN__
74: #include <io.h> // setmode()
75: #endif // __CYGWIN__
76:
77: #ifdef HAVE_LIBCAP_NG
78: #include <cap-ng.h>
79: #endif // LIBCAP_NG
80:
81: // locally included files
82: #include "int64.h"
83: #include "atacmds.h"
84: #include "dev_interface.h"
85: #include "knowndrives.h"
86: #include "scsicmds.h"
87: #include "utility.h"
88:
89: // This is for solaris, where signal() resets the handler to SIG_DFL
90: // after the first signal is caught.
91: #ifdef HAVE_SIGSET
92: #define SIGNALFN sigset
93: #else
94: #define SIGNALFN signal
95: #endif
96:
97: #ifdef _WIN32
98: #include "hostname_win32.h" // gethost/domainname()
99: #define HAVE_GETHOSTNAME 1
100: #define HAVE_GETDOMAINNAME 1
101: // fork()/signal()/initd simulation for native Windows
102: #include "daemon_win32.h" // daemon_main/detach/signal()
103: #undef SIGNALFN
104: #define SIGNALFN daemon_signal
105: #define strsignal daemon_strsignal
106: #define sleep daemon_sleep
107: // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
108: #define SIGQUIT SIGBREAK
109: #define SIGQUIT_KEYNAME "CONTROL-Break"
110: #else // _WIN32
111: #ifdef __CYGWIN__
112: // 2x CONTROL-C simulates missing SIGQUIT via keyboard
113: #define SIGQUIT_KEYNAME "2x CONTROL-C"
114: #else // __CYGWIN__
115: #define SIGQUIT_KEYNAME "CONTROL-\\"
116: #endif // __CYGWIN__
117: #endif // _WIN32
118:
119: #if defined (__SVR4) && defined (__sun)
120: extern "C" int getdomainname(char *, int); // no declaration in header files!
121: #endif
122:
123: #define ARGUSED(x) ((void)(x))
124:
125: const char * smartd_cpp_cvsid = "$Id: smartd.cpp 3451 2011-10-15 14:27:08Z chrfranke $"
126: CONFIG_H_CVSID;
127:
128: // smartd exit codes
129: #define EXIT_BADCMD 1 // command line did not parse
130: #define EXIT_BADCONF 2 // syntax error in config file
131: #define EXIT_STARTUP 3 // problem forking daemon
132: #define EXIT_PID 4 // problem creating pid file
133: #define EXIT_NOCONF 5 // config file does not exist
134: #define EXIT_READCONF 6 // config file exists but cannot be read
135:
136: #define EXIT_NOMEM 8 // out of memory
137: #define EXIT_BADCODE 10 // internal error - should NEVER happen
138:
139: #define EXIT_BADDEV 16 // we can't monitor this device
140: #define EXIT_NODEV 17 // no devices to monitor
141:
142: #define EXIT_SIGNAL 254 // abort on signal
143:
144:
145: // command-line: 1=debug mode, 2=print presets
146: static unsigned char debugmode = 0;
147:
148: // command-line: how long to sleep between checks
149: #define CHECKTIME 1800
150: static int checktime=CHECKTIME;
151:
152: // command-line: name of PID file (empty for no pid file)
153: static std::string pid_file;
154:
155: // command-line: path prefix of persistent state file, empty if no persistence.
156: static std::string state_path_prefix
157: #ifdef SMARTMONTOOLS_SAVESTATES
158: = SMARTMONTOOLS_SAVESTATES
159: #endif
160: ;
161:
162: // command-line: path prefix of attribute log file, empty if no logs.
163: static std::string attrlog_path_prefix
164: #ifdef SMARTMONTOOLS_ATTRIBUTELOG
165: = SMARTMONTOOLS_ATTRIBUTELOG
166: #endif
167: ;
168:
169: // configuration file name
170: static const char * configfile;
171: // configuration file "name" if read from stdin
172: static const char * const configfile_stdin = "<stdin>";
173: // path of alternate configuration file
174: static std::string configfile_alt;
175:
176: // command-line: when should we exit?
177: static int quit=0;
178:
179: // command-line; this is the default syslog(3) log facility to use.
180: static int facility=LOG_DAEMON;
181:
182: #ifndef _WIN32
183: // command-line: fork into background?
184: static bool do_fork=true;
185: #endif
186:
187: #ifdef HAVE_LIBCAP_NG
188: // command-line: enable capabilities?
189: static bool enable_capabilities = false;
190: #endif
191:
192: #if defined(_WIN32) || defined(__CYGWIN__)
193: // TODO: This smartctl only variable is also used in os_win32.cpp
194: unsigned char failuretest_permissive = 0;
195: #endif
196:
197: // set to one if we catch a USR1 (check devices now)
198: static volatile int caughtsigUSR1=0;
199:
200: #ifdef _WIN32
201: // set to one if we catch a USR2 (toggle debug mode)
202: static volatile int caughtsigUSR2=0;
203: #endif
204:
205: // set to one if we catch a HUP (reload config file). In debug mode,
206: // set to two, if we catch INT (also reload config file).
207: static volatile int caughtsigHUP=0;
208:
209: // set to signal value if we catch INT, QUIT, or TERM
210: static volatile int caughtsigEXIT=0;
211:
212: // This function prints either to stdout or to the syslog as needed.
213: static void PrintOut(int priority, const char *fmt, ...)
214: __attribute__ ((format(printf, 2, 3)));
215:
216: // Attribute monitoring flags.
217: // See monitor_attr_flags below.
218: enum {
219: MONITOR_IGN_FAILUSE = 0x01,
220: MONITOR_IGNORE = 0x02,
221: MONITOR_RAW_PRINT = 0x04,
222: MONITOR_RAW = 0x08,
223: MONITOR_AS_CRIT = 0x10,
224: MONITOR_RAW_AS_CRIT = 0x20,
225: };
226:
227: // Array of flags for each attribute.
228: class attribute_flags
229: {
230: public:
231: attribute_flags()
232: { memset(m_flags, 0, sizeof(m_flags)); }
233:
234: bool is_set(int id, unsigned char flag) const
235: { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
236:
237: void set(int id, unsigned char flags)
238: {
239: if (0 < id && id < (int)sizeof(m_flags))
240: m_flags[id] |= flags;
241: }
242:
243: private:
244: unsigned char m_flags[256];
245: };
246:
247:
248: /// Configuration data for a device. Read from smartd.conf.
249: /// Supports copy & assignment and is compatible with STL containers.
250: struct dev_config
251: {
252: int lineno; // Line number of entry in file
253: std::string name; // Device name (with optional extra info)
254: std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
255: std::string dev_type; // Device type argument from -d directive, empty if none
256: std::string state_file; // Path of the persistent state file, empty if none
257: std::string attrlog_file; // Path of the persistent attrlog file, empty if none
258: bool smartcheck; // Check SMART status
259: bool usagefailed; // Check for failed Usage Attributes
260: bool prefail; // Track changes in Prefail Attributes
261: bool usage; // Track changes in Usage Attributes
262: bool selftest; // Monitor number of selftest errors
263: bool errorlog; // Monitor number of ATA errors
264: bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
265: bool offlinests; // Monitor changes in offline data collection status
266: bool selfteststs; // Monitor changes in self-test execution status
267: bool permissive; // Ignore failed SMART commands
268: char autosave; // 1=disable, 2=enable Autosave Attributes
269: char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
270: unsigned char fix_firmwarebug; // FIX_*, see atacmds.h
271: bool ignorepresets; // Ignore database of -v options
272: bool showpresets; // Show database entry for this device
273: bool removable; // Device may disappear (not be present)
274: char powermode; // skip check, if disk in idle or standby mode
275: bool powerquiet; // skip powermode 'skipping checks' message
276: int powerskipmax; // how many times can be check skipped
277: unsigned char tempdiff; // Track Temperature changes >= this limit
278: unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
279: regular_expression test_regex; // Regex for scheduled testing
280:
281: // Configuration of email warning messages
282: std::string emailcmdline; // script to execute, empty if no messages
283: std::string emailaddress; // email address, or empty
284: unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
285: bool emailtest; // Send test email?
286:
287: // ATA ONLY
288: bool sct_erc_set; // set SCT ERC to:
289: unsigned short sct_erc_readtime; // ERC read time (deciseconds)
290: unsigned short sct_erc_writetime; // ERC write time (deciseconds)
291:
292: unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
293: unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
294: bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
295: bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
296:
297: attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
298:
299: ata_vendor_attr_defs attribute_defs; // -v options
300:
301: dev_config();
302: };
303:
304: dev_config::dev_config()
305: : lineno(0),
306: smartcheck(false),
307: usagefailed(false),
308: prefail(false),
309: usage(false),
310: selftest(false),
311: errorlog(false),
312: xerrorlog(false),
313: offlinests(false),
314: selfteststs(false),
315: permissive(false),
316: autosave(0),
317: autoofflinetest(0),
318: fix_firmwarebug(FIX_NOTSPECIFIED),
319: ignorepresets(false),
320: showpresets(false),
321: removable(false),
322: powermode(0),
323: powerquiet(false),
324: powerskipmax(0),
325: tempdiff(0),
326: tempinfo(0), tempcrit(0),
327: emailfreq(0),
328: emailtest(false),
329: sct_erc_set(false),
330: sct_erc_readtime(0), sct_erc_writetime(0),
331: curr_pending_id(0), offl_pending_id(0),
332: curr_pending_incr(false), offl_pending_incr(false),
333: curr_pending_set(false), offl_pending_set(false)
334: {
335: }
336:
337:
338: // Number of allowed mail message types
339: static const int SMARTD_NMAIL = 13;
340: // Type for '-M test' mails (state not persistent)
341: static const int MAILTYPE_TEST = 0;
342: // TODO: Add const or enum for all mail types.
343:
344: struct mailinfo {
345: int logged;// number of times an email has been sent
346: time_t firstsent;// time first email was sent, as defined by time(2)
347: time_t lastsent; // time last email was sent, as defined by time(2)
348:
349: mailinfo()
350: : logged(0), firstsent(0), lastsent(0) { }
351: };
352:
353: /// Persistent state data for a device.
354: struct persistent_dev_state
355: {
356: unsigned char tempmin, tempmax; // Min/Max Temperatures
357:
358: unsigned char selflogcount; // total number of self-test errors
359: unsigned short selfloghour; // lifetime hours of last self-test error
360:
361: time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
362:
363: uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
364: uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
365:
366: mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
367:
368: // ATA ONLY
369: int ataerrorcount; // Total number of ATA errors
370:
371: // Persistent part of ata_smart_values:
372: struct ata_attribute {
373: unsigned char id;
374: unsigned char val;
375: unsigned char worst; // Byte needed for 'raw64' attribute only.
376: uint64_t raw;
377: unsigned char resvd;
378:
379: ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
380: };
381: ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
382:
383: persistent_dev_state();
384: };
385:
386: persistent_dev_state::persistent_dev_state()
387: : tempmin(0), tempmax(0),
388: selflogcount(0),
389: selfloghour(0),
390: scheduled_test_next_check(0),
391: selective_test_last_start(0),
392: selective_test_last_end(0),
393: ataerrorcount(0)
394: {
395: }
396:
397: /// Non-persistent state data for a device.
398: struct temp_dev_state
399: {
400: bool must_write; // true if persistent part should be written
401:
402: bool not_cap_offline; // true == not capable of offline testing
403: bool not_cap_conveyance;
404: bool not_cap_short;
405: bool not_cap_long;
406: bool not_cap_selective;
407:
408: unsigned char temperature; // last recorded Temperature (in Celsius)
409: time_t tempmin_delay; // time where Min Temperature tracking will start
410:
411: bool powermodefail; // true if power mode check failed
412: int powerskipcnt; // Number of checks skipped due to idle or standby mode
413:
414: // SCSI ONLY
415: unsigned char SmartPageSupported; // has log sense IE page (0x2f)
416: unsigned char TempPageSupported; // has log sense temperature page (0xd)
417: unsigned char SuppressReport; // minimize nuisance reports
418: unsigned char modese_len; // mode sense/select cmd len: 0 (don't
419: // know yet) 6 or 10
420:
421: // ATA ONLY
422: uint64_t num_sectors; // Number of sectors
423: ata_smart_values smartval; // SMART data
424: ata_smart_thresholds_pvt smartthres; // SMART thresholds
425:
426: temp_dev_state();
427: };
428:
429: temp_dev_state::temp_dev_state()
430: : must_write(false),
431: not_cap_offline(false),
432: not_cap_conveyance(false),
433: not_cap_short(false),
434: not_cap_long(false),
435: not_cap_selective(false),
436: temperature(0),
437: tempmin_delay(0),
438: powermodefail(false),
439: powerskipcnt(0),
440: SmartPageSupported(false),
441: TempPageSupported(false),
442: SuppressReport(false),
443: modese_len(0),
444: num_sectors(0)
445: {
446: memset(&smartval, 0, sizeof(smartval));
447: memset(&smartthres, 0, sizeof(smartthres));
448: }
449:
450: /// Runtime state data for a device.
451: struct dev_state
452: : public persistent_dev_state,
453: public temp_dev_state
454: {
455: void update_persistent_state();
456: void update_temp_state();
457: };
458:
459: /// Container for configuration info for each device.
460: typedef std::vector<dev_config> dev_config_vector;
461:
462: /// Container for state info for each device.
463: typedef std::vector<dev_state> dev_state_vector;
464:
465: // Copy ATA attributes to persistent state.
466: void dev_state::update_persistent_state()
467: {
468: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
469: const ata_smart_attribute & ta = smartval.vendor_attributes[i];
470: ata_attribute & pa = ata_attributes[i];
471: pa.id = ta.id;
472: if (ta.id == 0) {
473: pa.val = pa.worst = 0; pa.raw = 0;
474: continue;
475: }
476: pa.val = ta.current;
477: pa.worst = ta.worst;
478: pa.raw = ta.raw[0]
479: | ( ta.raw[1] << 8)
480: | ( ta.raw[2] << 16)
481: | ((uint64_t)ta.raw[3] << 24)
482: | ((uint64_t)ta.raw[4] << 32)
483: | ((uint64_t)ta.raw[5] << 40);
484: pa.resvd = ta.reserv;
485: }
486: }
487:
488: // Copy ATA from persistent to temp state.
489: void dev_state::update_temp_state()
490: {
491: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
492: const ata_attribute & pa = ata_attributes[i];
493: ata_smart_attribute & ta = smartval.vendor_attributes[i];
494: ta.id = pa.id;
495: if (pa.id == 0) {
496: ta.current = ta.worst = 0;
497: memset(ta.raw, 0, sizeof(ta.raw));
498: continue;
499: }
500: ta.current = pa.val;
501: ta.worst = pa.worst;
502: ta.raw[0] = (unsigned char) pa.raw;
503: ta.raw[1] = (unsigned char)(pa.raw >> 8);
504: ta.raw[2] = (unsigned char)(pa.raw >> 16);
505: ta.raw[3] = (unsigned char)(pa.raw >> 24);
506: ta.raw[4] = (unsigned char)(pa.raw >> 32);
507: ta.raw[5] = (unsigned char)(pa.raw >> 40);
508: ta.reserv = pa.resvd;
509: }
510: }
511:
512: // Parse a line from a state file.
513: static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
514: {
515: static const regular_expression regex(
516: "^ *"
517: "((temperature-min)" // (1 (2)
518: "|(temperature-max)" // (3)
519: "|(self-test-errors)" // (4)
520: "|(self-test-last-err-hour)" // (5)
521: "|(scheduled-test-next-check)" // (6)
522: "|(selective-test-last-start)" // (7)
523: "|(selective-test-last-end)" // (8)
524: "|(ata-error-count)" // (9)
525: "|(mail\\.([0-9]+)\\." // (10 (11)
526: "((count)" // (12 (13)
527: "|(first-sent-time)" // (14)
528: "|(last-sent-time)" // (15)
529: ")" // 12)
530: ")" // 10)
531: "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
532: "((id)" // (18 (19)
533: "|(val)" // (20)
534: "|(worst)" // (21)
535: "|(raw)" // (22)
536: "|(resvd)" // (23)
537: ")" // 18)
538: ")" // 16)
539: ")" // 1)
540: " *= *([0-9]+)[ \n]*$", // (24)
541: REG_EXTENDED
542: );
543:
544: const int nmatch = 1+24;
545: regmatch_t match[nmatch];
546: if (!regex.execute(line, nmatch, match))
547: return false;
548: if (match[nmatch-1].rm_so < 0)
549: return false;
550:
551: uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
552:
553: int m = 1;
554: if (match[++m].rm_so >= 0)
555: state.tempmin = (unsigned char)val;
556: else if (match[++m].rm_so >= 0)
557: state.tempmax = (unsigned char)val;
558: else if (match[++m].rm_so >= 0)
559: state.selflogcount = (unsigned char)val;
560: else if (match[++m].rm_so >= 0)
561: state.selfloghour = (unsigned short)val;
562: else if (match[++m].rm_so >= 0)
563: state.scheduled_test_next_check = (time_t)val;
564: else if (match[++m].rm_so >= 0)
565: state.selective_test_last_start = val;
566: else if (match[++m].rm_so >= 0)
567: state.selective_test_last_end = val;
568: else if (match[++m].rm_so >= 0)
569: state.ataerrorcount = (int)val;
570: else if (match[m+=2].rm_so >= 0) {
571: int i = atoi(line+match[m].rm_so);
572: if (!(0 <= i && i < SMARTD_NMAIL))
573: return false;
574: if (i == MAILTYPE_TEST) // Don't suppress test mails
575: return true;
576: if (match[m+=2].rm_so >= 0)
577: state.maillog[i].logged = (int)val;
578: else if (match[++m].rm_so >= 0)
579: state.maillog[i].firstsent = (time_t)val;
580: else if (match[++m].rm_so >= 0)
581: state.maillog[i].lastsent = (time_t)val;
582: else
583: return false;
584: }
585: else if (match[m+=5+1].rm_so >= 0) {
586: int i = atoi(line+match[m].rm_so);
587: if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
588: return false;
589: if (match[m+=2].rm_so >= 0)
590: state.ata_attributes[i].id = (unsigned char)val;
591: else if (match[++m].rm_so >= 0)
592: state.ata_attributes[i].val = (unsigned char)val;
593: else if (match[++m].rm_so >= 0)
594: state.ata_attributes[i].worst = (unsigned char)val;
595: else if (match[++m].rm_so >= 0)
596: state.ata_attributes[i].raw = val;
597: else if (match[++m].rm_so >= 0)
598: state.ata_attributes[i].resvd = (unsigned char)val;
599: else
600: return false;
601: }
602: else
603: return false;
604: return true;
605: }
606:
607: // Read a state file.
608: static bool read_dev_state(const char * path, persistent_dev_state & state)
609: {
610: stdio_file f(path, "r");
611: if (!f) {
612: if (errno != ENOENT)
613: pout("Cannot read state file \"%s\"\n", path);
614: return false;
615: }
616: #ifdef __CYGWIN__
617: setmode(fileno(f), O_TEXT); // Allow files with \r\n
618: #endif
619:
620: persistent_dev_state new_state;
621: int good = 0, bad = 0;
622: char line[256];
623: while (fgets(line, sizeof(line), f)) {
624: const char * s = line + strspn(line, " \t");
625: if (!*s || *s == '#')
626: continue;
627: if (!parse_dev_state_line(line, new_state))
628: bad++;
629: else
630: good++;
631: }
632:
633: if (bad) {
634: if (!good) {
635: pout("%s: format error\n", path);
636: return false;
637: }
638: pout("%s: %d invalid line(s) ignored\n", path, bad);
639: }
640:
641: // This sets the values missing in the file to 0.
642: state = new_state;
643: return true;
644: }
645:
646: static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
647: {
648: if (val)
649: fprintf(f, "%s = %"PRIu64"\n", name, val);
650: }
651:
652: static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
653: {
654: if (val)
655: fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
656: }
657:
658: // Write a state file
659: static bool write_dev_state(const char * path, const persistent_dev_state & state)
660: {
661: // Rename old "file" to "file~"
662: std::string pathbak = path; pathbak += '~';
663: unlink(pathbak.c_str());
664: rename(path, pathbak.c_str());
665:
666: stdio_file f(path, "w");
667: if (!f) {
668: pout("Cannot create state file \"%s\"\n", path);
669: return false;
670: }
671:
672: fprintf(f, "# smartd state file\n");
673: write_dev_state_line(f, "temperature-min", state.tempmin);
674: write_dev_state_line(f, "temperature-max", state.tempmax);
675: write_dev_state_line(f, "self-test-errors", state.selflogcount);
676: write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
677: write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
678: write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
679: write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
680:
681: int i;
682: for (i = 0; i < SMARTD_NMAIL; i++) {
683: if (i == MAILTYPE_TEST) // Don't suppress test mails
684: continue;
685: const mailinfo & mi = state.maillog[i];
686: if (!mi.logged)
687: continue;
688: write_dev_state_line(f, "mail", i, "count", mi.logged);
689: write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
690: write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
691: }
692:
693: // ATA ONLY
694: write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
695:
696: for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
697: const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
698: if (!pa.id)
699: continue;
700: write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
701: write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
702: write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
703: write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
704: write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
705: }
706:
707: return true;
708: }
709:
710: // Write to the attrlog file
711: static bool write_dev_attrlog(const char * path, const persistent_dev_state & state)
712: {
713: stdio_file f(path, "a");
714: if (!f) {
715: pout("Cannot create attribute log file \"%s\"\n", path);
716: return false;
717: }
718:
719: // ATA ONLY
720: time_t now = time(0);
721: struct tm * tms = gmtime(&now);
722: fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
723: 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
724: tms->tm_hour, tms->tm_min, tms->tm_sec);
725: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
726: const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
727: if (!pa.id)
728: continue;
729: fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
730: }
731: fprintf(f, "\n");
732:
733: return true;
734: }
735:
736: // Write all state files. If write_always is false, don't write
737: // unless must_write is set.
738: static void write_all_dev_states(const dev_config_vector & configs,
739: dev_state_vector & states,
740: bool write_always = true)
741: {
742: for (unsigned i = 0; i < states.size(); i++) {
743: const dev_config & cfg = configs.at(i);
744: if (cfg.state_file.empty())
745: continue;
746: dev_state & state = states[i];
747: if (!write_always && !state.must_write)
748: continue;
749: if (!write_dev_state(cfg.state_file.c_str(), state))
750: continue;
751: state.must_write = false;
752: if (write_always || debugmode)
753: PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
754: cfg.name.c_str(), cfg.state_file.c_str());
755: }
756: }
757:
758: // Write to all attrlog files
759: static void write_all_dev_attrlogs(const dev_config_vector & configs,
760: dev_state_vector & states)
761: {
762: for (unsigned i = 0; i < states.size(); i++) {
763: const dev_config & cfg = configs.at(i);
764: if (cfg.attrlog_file.empty())
765: continue;
766: dev_state & state = states[i];
767: write_dev_attrlog(cfg.attrlog_file.c_str(), state);
768: }
769: }
770:
771: // remove the PID file
772: static void RemovePidFile()
773: {
774: if (!pid_file.empty()) {
775: if (unlink(pid_file.c_str()))
776: PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
777: pid_file.c_str(), strerror(errno));
778: pid_file.clear();
779: }
780: return;
781: }
782:
783: extern "C" { // signal handlers require C-linkage
784:
785: // Note if we catch a SIGUSR1
786: static void USR1handler(int sig)
787: {
788: if (SIGUSR1==sig)
789: caughtsigUSR1=1;
790: return;
791: }
792:
793: #ifdef _WIN32
794: // Note if we catch a SIGUSR2
795: static void USR2handler(int sig)
796: {
797: if (SIGUSR2==sig)
798: caughtsigUSR2=1;
799: return;
800: }
801: #endif
802:
803: // Note if we catch a HUP (or INT in debug mode)
804: static void HUPhandler(int sig)
805: {
806: if (sig==SIGHUP)
807: caughtsigHUP=1;
808: else
809: caughtsigHUP=2;
810: return;
811: }
812:
813: // signal handler for TERM, QUIT, and INT (if not in debug mode)
814: static void sighandler(int sig)
815: {
816: if (!caughtsigEXIT)
817: caughtsigEXIT=sig;
818: return;
819: }
820:
821: } // extern "C"
822:
823: // Cleanup, print Goodbye message and remove pidfile
824: static int Goodbye(int status)
825: {
826: // delete PID file, if one was created
827: RemovePidFile();
828:
829: // if we are exiting because of a code bug, tell user
830: if (status==EXIT_BADCODE)
831: PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
832:
833: // and this should be the final output from smartd before it exits
834: PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
835:
836: return status;
837: }
838:
839: #define ENVLENGTH 1024
840:
841: // a replacement for setenv() which is not available on all platforms.
842: // Note that the string passed to putenv must not be freed or made
843: // invalid, since a pointer to it is kept by putenv(). This means that
844: // it must either be a static buffer or allocated off the heap. The
845: // string can be freed if the environment variable is redefined or
846: // deleted via another call to putenv(). So we keep these on the stack
847: // as long as the popen() call is underway.
848: static int exportenv(char *stackspace, const char *name, const char *value)
849: {
850: snprintf(stackspace,ENVLENGTH, "%s=%s", name, value);
851: return putenv(stackspace);
852: }
853:
854: static char *dnsdomain(const char *hostname)
855: {
856: char *p = NULL;
857: #ifdef HAVE_GETADDRINFO
858: static char canon_name[NI_MAXHOST];
859: struct addrinfo *info = NULL;
860: struct addrinfo hints;
861: int err;
862:
863: memset(&hints, 0, sizeof(hints));
864: hints.ai_flags = AI_CANONNAME;
865: if ((err = getaddrinfo(hostname, NULL, &hints, &info)) || (!info)) {
866: PrintOut(LOG_CRIT, "Error retrieving getaddrinfo(%s): %s\n", hostname, gai_strerror(err));
867: return NULL;
868: }
869: if (info->ai_canonname) {
870: strncpy(canon_name, info->ai_canonname, sizeof(canon_name));
871: canon_name[NI_MAXHOST - 1] = '\0';
872: p = canon_name;
873: if ((p = strchr(canon_name, '.')))
874: p++;
875: }
876: freeaddrinfo(info);
877: #elif HAVE_GETHOSTBYNAME
878: struct hostent *hp;
879: if ((hp = gethostbyname(hostname))) {
880: // Does this work if gethostbyname() returns an IPv6 name in
881: // colon/dot notation? [BA]
882: if ((p = strchr(hp->h_name, '.')))
883: p++; // skip "."
884: }
885: #else
886: ARGUSED(hostname);
887: #endif
888: return p;
889: }
890:
891: #define EBUFLEN 1024
892:
893: static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
894: __attribute__ ((format (printf, 4, 5)));
895:
896: // If either address or executable path is non-null then send and log
897: // a warning email, or execute executable
898: static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...){
899: char command[2048], message[256], hostname[256], domainname[256], additional[256],fullmessage[1024];
900: char original[256], further[256], nisdomain[256], subject[256],dates[DATEANDEPOCHLEN];
901: char environ_strings[11][ENVLENGTH];
902: time_t epoch;
903: va_list ap;
904: const int day=24*3600;
905: int days=0;
906: const char * const whichfail[]={
907: "EmailTest", // 0
908: "Health", // 1
909: "Usage", // 2
910: "SelfTest", // 3
911: "ErrorCount", // 4
912: "FailedHealthCheck", // 5
913: "FailedReadSmartData", // 6
914: "FailedReadSmartErrorLog", // 7
915: "FailedReadSmartSelfTestLog", // 8
916: "FailedOpenDevice", // 9
917: "CurrentPendingSector", // 10
918: "OfflineUncorrectableSector", // 11
919: "Temperature" // 12
920: };
921:
922: const char *unknown="[Unknown]";
923:
924: // See if user wants us to send mail
925: if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
926: return;
927:
928: std::string address = cfg.emailaddress;
929: const char * executable = cfg.emailcmdline.c_str();
930:
931: // which type of mail are we sending?
932: mailinfo * mail=(state.maillog)+which;
933:
934: // checks for sanity
935: if (cfg.emailfreq<1 || cfg.emailfreq>3) {
936: PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
937: return;
938: }
939: if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
940: PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
941: which, (int)sizeof(whichfail));
942: return;
943: }
944:
945: // Return if a single warning mail has been sent.
946: if ((cfg.emailfreq==1) && mail->logged)
947: return;
948:
949: // Return if this is an email test and one has already been sent.
950: if (which == 0 && mail->logged)
951: return;
952:
953: // To decide if to send mail, we need to know what time it is.
954: epoch=time(NULL);
955:
956: // Return if less than one day has gone by
957: if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
958: return;
959:
960: // Return if less than 2^(logged-1) days have gone by
961: if (cfg.emailfreq==3 && mail->logged) {
962: days=0x01<<(mail->logged-1);
963: days*=day;
964: if (epoch<(mail->lastsent+days))
965: return;
966: }
967:
968: #ifdef HAVE_LIBCAP_NG
969: if (enable_capabilities) {
970: PrintOut(LOG_ERR, "Sending a mail was supressed. "
971: "Mails can't be send when capabilites are enabled\n");
972: return;
973: }
974: #endif
975:
976: // record the time of this mail message, and the first mail message
977: if (!mail->logged)
978: mail->firstsent=epoch;
979: mail->lastsent=epoch;
980:
981: // get system host & domain names (not null terminated if length=MAX)
982: #ifdef HAVE_GETHOSTNAME
983: if (gethostname(hostname, 256))
984: strcpy(hostname, unknown);
985: else {
986: char *p=NULL;
987: hostname[255]='\0';
988: p = dnsdomain(hostname);
989: if (p && *p) {
990: strncpy(domainname, p, 255);
991: domainname[255]='\0';
992: } else
993: strcpy(domainname, unknown);
994: }
995: #else
996: strcpy(hostname, unknown);
997: strcpy(domainname, unknown);
998: #endif
999:
1000: #ifdef HAVE_GETDOMAINNAME
1001: if (getdomainname(nisdomain, 256))
1002: strcpy(nisdomain, unknown);
1003: else
1004: nisdomain[255]='\0';
1005: #else
1006: strcpy(nisdomain, unknown);
1007: #endif
1008:
1009: // print warning string into message
1010: va_start(ap, fmt);
1011: vsnprintf(message, 256, fmt, ap);
1012: va_end(ap);
1013:
1014: // appropriate message about further information
1015: additional[0]=original[0]=further[0]='\0';
1016: if (which) {
1017: sprintf(further,"You can also use the smartctl utility for further investigation.\n");
1018:
1019: switch (cfg.emailfreq) {
1020: case 1:
1021: sprintf(additional,"No additional email messages about this problem will be sent.\n");
1022: break;
1023: case 2:
1024: sprintf(additional,"Another email message will be sent in 24 hours if the problem persists.\n");
1025: break;
1026: case 3:
1027: sprintf(additional,"Another email message will be sent in %d days if the problem persists\n",
1028: (0x01)<<mail->logged);
1029: break;
1030: }
1031: if (cfg.emailfreq>1 && mail->logged) {
1032: dateandtimezoneepoch(dates, mail->firstsent);
1033: sprintf(original,"The original email about this issue was sent at %s\n", dates);
1034: }
1035: }
1036:
1037: snprintf(subject, 256,"SMART error (%s) detected on host: %s", whichfail[which], hostname);
1038:
1039: // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
1040: if (!*executable)
1041: #ifdef DEFAULT_MAILER
1042: executable = DEFAULT_MAILER ;
1043: #else
1044: #ifndef _WIN32
1045: executable = "mail";
1046: #else
1047: executable = "blat"; // http://blat.sourceforge.net/
1048: #endif
1049: #endif
1050:
1051: #ifndef _WIN32 // blat mailer needs comma
1052: // replace commas by spaces to separate recipients
1053: std::replace(address.begin(), address.end(), ',', ' ');
1054: #endif
1055: // Export information in environment variables that will be useful
1056: // for user scripts
1057: exportenv(environ_strings[0], "SMARTD_MAILER", executable);
1058: exportenv(environ_strings[1], "SMARTD_MESSAGE", message);
1059: exportenv(environ_strings[2], "SMARTD_SUBJECT", subject);
1060: dateandtimezoneepoch(dates, mail->firstsent);
1061: exportenv(environ_strings[3], "SMARTD_TFIRST", dates);
1062: snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1063: exportenv(environ_strings[4], "SMARTD_TFIRSTEPOCH", dates);
1064: exportenv(environ_strings[5], "SMARTD_FAILTYPE", whichfail[which]);
1065: if (!address.empty())
1066: exportenv(environ_strings[6], "SMARTD_ADDRESS", address.c_str());
1067: exportenv(environ_strings[7], "SMARTD_DEVICESTRING", cfg.name.c_str());
1068:
1069: // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1070: exportenv(environ_strings[8], "SMARTD_DEVICETYPE",
1071: (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1072: exportenv(environ_strings[9], "SMARTD_DEVICE", cfg.dev_name.c_str());
1073:
1074: snprintf(fullmessage, 1024,
1075: "This email was generated by the smartd daemon running on:\n\n"
1076: " host name: %s\n"
1077: " DNS domain: %s\n"
1078: " NIS domain: %s\n\n"
1079: "The following warning/error was logged by the smartd daemon:\n\n"
1080: "%s\n\n"
1081: "For details see host's SYSLOG.\n\n"
1082: "%s%s%s",
1083: hostname, domainname, nisdomain, message, further, original, additional);
1084: exportenv(environ_strings[10], "SMARTD_FULLMESSAGE", fullmessage);
1085:
1086: // now construct a command to send this as EMAIL
1087: #ifndef _WIN32
1088: if (!address.empty())
1089: snprintf(command, 2048,
1090: "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1091: "%sENDMAIL\n", subject, address.c_str(), fullmessage);
1092: else
1093: snprintf(command, 2048, "%s 2>&1", executable);
1094:
1095: // tell SYSLOG what we are about to do...
1096: const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1097: const char * newwarn = (which? "Warning via" : "Test of");
1098:
1099: PrintOut(LOG_INFO,"%s %s to %s ...\n",
1100: which?"Sending warning via":"Executing test of", executable, newadd);
1101:
1102: // issue the command to send mail or to run the user's executable
1103: errno=0;
1104: FILE * pfp;
1105: if (!(pfp=popen(command, "r")))
1106: // failed to popen() mail process
1107: PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1108: newwarn, executable, newadd, errno?strerror(errno):"");
1109: else {
1110: // pipe suceeded!
1111: int len, status;
1112: char buffer[EBUFLEN];
1113:
1114: // if unexpected output on stdout/stderr, null terminate, print, and flush
1115: if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1116: int count=0;
1117: int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1118: buffer[newlen]='\0';
1119: PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1120: newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1121:
1122: // flush pipe if needed
1123: while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1124: count++;
1125:
1126: // tell user that pipe was flushed, or that something is really wrong
1127: if (count && count<EBUFLEN)
1128: PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1129: newwarn, executable, newadd);
1130: else if (count)
1131: PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1132: newwarn, executable, newadd);
1133: }
1134:
1135: // if something went wrong with mail process, print warning
1136: errno=0;
1137: if (-1==(status=pclose(pfp)))
1138: PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1139: errno?strerror(errno):"");
1140: else {
1141: // mail process apparently succeeded. Check and report exit status
1142: int status8;
1143:
1144: if (WIFEXITED(status)) {
1145: // exited 'normally' (but perhaps with nonzero status)
1146: status8=WEXITSTATUS(status);
1147:
1148: if (status8>128)
1149: PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1150: newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1151: else if (status8)
1152: PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1153: newwarn, executable, newadd, status, status8);
1154: else
1155: PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1156: }
1157:
1158: if (WIFSIGNALED(status))
1159: PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1160: newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1161:
1162: // this branch is probably not possible. If subprocess is
1163: // stopped then pclose() should not return.
1164: if (WIFSTOPPED(status))
1165: PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1166: newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1167:
1168: }
1169: }
1170:
1171: #else // _WIN32
1172:
1173: // No "here-documents" on Windows, so must use separate commandline and stdin
1174: char stdinbuf[1024];
1175: command[0] = stdinbuf[0] = 0;
1176: int boxtype = -1, boxmsgoffs = 0;
1177: const char * newadd = "<nomailer>";
1178: if (!address.empty()) {
1179: // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1180: char addr1[9+1+13] = ""; int n1 = -1, n2 = -1;
1181: if (sscanf(address.c_str(), "%9[a-z]%n,%n", addr1, &n1, &n2) == 1 && (n1 == (int)address.size() || n2 > 0)) {
1182: if (!strcmp(addr1, "msgbox"))
1183: boxtype = 0;
1184: else if (!strcmp(addr1, "sysmsgbox"))
1185: boxtype = 1;
1186: if (boxtype >= 0)
1187: address.erase(0, (n2 > n1 ? n2 : n1));
1188: }
1189:
1190: if (!address.empty()) {
1191: // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1192: snprintf(command, sizeof(command),
1193: "%s - -q -subject \"%s\" -to \"%s\"",
1194: executable, subject, address.c_str());
1195: newadd = address.c_str();
1196: }
1197:
1198: // Message for mail [0...] and messagebox [boxmsgoffs...]
1199: snprintf(stdinbuf, sizeof(stdinbuf),
1200: "This email was generated by the smartd daemon running on:\n\n"
1201: " host name: %s\n"
1202: " DNS domain: %s\n"
1203: // " NIS domain: %s\n"
1204: "\n",
1205: hostname, /*domainname, */ nisdomain);
1206: boxmsgoffs = strlen(stdinbuf);
1207: snprintf(stdinbuf+boxmsgoffs, sizeof(stdinbuf)-boxmsgoffs,
1208: "The following warning/error was logged by the smartd daemon:\n\n"
1209: "%s\n\n"
1210: "For details see the event log or log file of smartd.\n\n"
1211: "%s%s%s"
1212: "\n",
1213: message, further, original, additional);
1214: }
1215: else
1216: snprintf(command, sizeof(command), "%s", executable);
1217:
1218: const char * newwarn = (which ? "Warning via" : "Test of");
1219: if (boxtype >= 0) {
1220: // show message box
1221: daemon_messagebox(boxtype, subject, stdinbuf+boxmsgoffs);
1222: PrintOut(LOG_INFO,"%s message box\n", newwarn);
1223: }
1224: if (command[0]) {
1225: char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1226: int rc;
1227: // run command
1228: PrintOut(LOG_INFO,"%s %s to %s ...\n",
1229: (which?"Sending warning via":"Executing test of"), executable, newadd);
1230: rc = daemon_spawn(command, stdinbuf, strlen(stdinbuf), stdoutbuf, sizeof(stdoutbuf));
1231: if (rc >= 0 && stdoutbuf[0])
1232: PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1233: newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1234: if (rc != 0)
1235: PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1236: newwarn, executable, newadd, rc);
1237: else
1238: PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1239: }
1240:
1241: #endif // _WIN32
1242:
1243: // increment mail sent counter
1244: mail->logged++;
1245: }
1246:
1247: static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1248: __attribute__ ((format (printf, 4, 5)));
1249:
1250: static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1251: {
1252: if (!(0 <= which && which < SMARTD_NMAIL))
1253: return;
1254:
1255: // Return if no mail sent yet
1256: mailinfo & mi = state.maillog[which];
1257: if (!mi.logged)
1258: return;
1259:
1260: // Format & print message
1261: char msg[256];
1262: va_list ap;
1263: va_start(ap, fmt);
1264: vsnprintf(msg, sizeof(msg), fmt, ap);
1265: va_end(ap);
1266:
1267: PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1268: msg, mi.logged, (mi.logged==1 ? "" : "s"));
1269:
1270: // Clear mail counter and timestamps
1271: mi = mailinfo();
1272: state.must_write = true;
1273: }
1274:
1275: #ifndef _WIN32
1276:
1277: // Output multiple lines via separate syslog(3) calls.
1278: static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1279: {
1280: char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1281: vsnprintf(buf, sizeof(buf), fmt, ap);
1282:
1283: for (char * p = buf, * q; p && *p; p = q) {
1284: if ((q = strchr(p, '\n')))
1285: *q++ = 0;
1286: if (*p)
1287: syslog(priority, "%s\n", p);
1288: }
1289: }
1290:
1291: #else // _WIN32
1292: // os_win32/syslog_win32.cpp supports multiple lines.
1293: #define vsyslog_lines vsyslog
1294: #endif // _WIN32
1295:
1296: // Printing function for watching ataprint commands, or losing them
1297: // [From GLIBC Manual: Since the prototype doesn't specify types for
1298: // optional arguments, in a call to a variadic function the default
1299: // argument promotions are performed on the optional argument
1300: // values. This means the objects of type char or short int (whether
1301: // signed or not) are promoted to either int or unsigned int, as
1302: // appropriate.]
1303: void pout(const char *fmt, ...){
1304: va_list ap;
1305:
1306: // get the correct time in syslog()
1307: FixGlibcTimeZoneBug();
1308: // initialize variable argument list
1309: va_start(ap,fmt);
1310: // in debugmode==1 mode we will print the output from the ataprint.o functions!
1311: if (debugmode && debugmode!=2)
1312: #ifdef _WIN32
1313: if (facility == LOG_LOCAL1) // logging to stdout
1314: vfprintf(stderr,fmt,ap);
1315: else
1316: #endif
1317: vprintf(fmt,ap);
1318: // in debugmode==2 mode we print output from knowndrives.o functions
1319: else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1320: openlog("smartd", LOG_PID, facility);
1321: vsyslog_lines(LOG_INFO, fmt, ap);
1322: closelog();
1323: }
1324: va_end(ap);
1325: fflush(NULL);
1326: return;
1327: }
1328:
1329: // This function prints either to stdout or to the syslog as needed.
1330: static void PrintOut(int priority, const char *fmt, ...){
1331: va_list ap;
1332:
1333: // get the correct time in syslog()
1334: FixGlibcTimeZoneBug();
1335: // initialize variable argument list
1336: va_start(ap,fmt);
1337: if (debugmode)
1338: #ifdef _WIN32
1339: if (facility == LOG_LOCAL1) // logging to stdout
1340: vfprintf(stderr,fmt,ap);
1341: else
1342: #endif
1343: vprintf(fmt,ap);
1344: else {
1345: openlog("smartd", LOG_PID, facility);
1346: vsyslog_lines(priority, fmt, ap);
1347: closelog();
1348: }
1349: va_end(ap);
1350: return;
1351: }
1352:
1353: // Used to warn users about invalid checksums. Called from atacmds.cpp.
1354: void checksumwarning(const char * string)
1355: {
1356: pout("Warning! %s error: invalid SMART checksum.\n", string);
1357: }
1358:
1359: #ifndef _WIN32
1360:
1361: // Wait for the pid file to show up, this makes sure a calling program knows
1362: // that the daemon is really up and running and has a pid to kill it
1363: static bool WaitForPidFile()
1364: {
1365: int waited, max_wait = 10;
1366: struct stat stat_buf;
1367:
1368: if (pid_file.empty() || debugmode)
1369: return true;
1370:
1371: for(waited = 0; waited < max_wait; ++waited) {
1372: if (!stat(pid_file.c_str(), &stat_buf)) {
1373: return true;
1374: } else
1375: sleep(1);
1376: }
1377: return false;
1378: }
1379:
1380: #endif // _WIN32
1381:
1382: // Forks new process, closes ALL file descriptors, redirects stdin,
1383: // stdout, and stderr. Not quite daemon(). See
1384: // http://www.linuxjournal.com/article/2335
1385: // for a good description of why we do things this way.
1386: static void DaemonInit()
1387: {
1388: #ifndef _WIN32
1389: pid_t pid;
1390: int i;
1391:
1392: // flush all buffered streams. Else we might get two copies of open
1393: // streams since both parent and child get copies of the buffers.
1394: fflush(NULL);
1395:
1396: if (do_fork) {
1397: if ((pid=fork()) < 0) {
1398: // unable to fork!
1399: PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1400: EXIT(EXIT_STARTUP);
1401: }
1402: else if (pid) {
1403: // we are the parent process, wait for pid file, then exit cleanly
1404: if(!WaitForPidFile()) {
1405: PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1406: EXIT(EXIT_STARTUP);
1407: } else
1408: EXIT(0);
1409: }
1410:
1411: // from here on, we are the child process.
1412: setsid();
1413:
1414: // Fork one more time to avoid any possibility of having terminals
1415: if ((pid=fork()) < 0) {
1416: // unable to fork!
1417: PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1418: EXIT(EXIT_STARTUP);
1419: }
1420: else if (pid)
1421: // we are the parent process -- exit cleanly
1422: EXIT(0);
1423:
1424: // Now we are the child's child...
1425: }
1426:
1427: // close any open file descriptors
1428: for (i=getdtablesize();i>=0;--i)
1429: close(i);
1430:
1431: #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1432:
1433: // redirect any IO attempts to /dev/null for stdin
1434: i=open("/dev/null",O_RDWR);
1435: if (i>=0) {
1436: // stdout
1437: NO_warn_unused_result(dup(i));
1438: // stderr
1439: NO_warn_unused_result(dup(i));
1440: };
1441: umask(0022);
1442: NO_warn_unused_result(chdir("/"));
1443:
1444: if (do_fork)
1445: PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1446:
1447: #else // _WIN32
1448:
1449: // No fork() on native Win32
1450: // Detach this process from console
1451: fflush(NULL);
1452: if (daemon_detach("smartd")) {
1453: PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1454: EXIT(EXIT_STARTUP);
1455: }
1456: // stdin/out/err now closed if not redirected
1457:
1458: #endif // _WIN32
1459: return;
1460: }
1461:
1462: // create a PID file containing the current process id
1463: static void WritePidFile()
1464: {
1465: if (!pid_file.empty()) {
1466: pid_t pid = getpid();
1467: mode_t old_umask;
1468: #ifndef __CYGWIN__
1469: old_umask = umask(0077); // rwx------
1470: #else
1471: // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1472: old_umask = umask(0033); // rwxr--r--
1473: #endif
1474:
1475: stdio_file f(pid_file.c_str(), "w");
1476: umask(old_umask);
1477: if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1478: PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1479: EXIT(EXIT_PID);
1480: }
1481: PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1482: }
1483: }
1484:
1485: // Prints header identifying version of code and home
1486: static void PrintHead()
1487: {
1488: PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1489: }
1490:
1491: // prints help info for configuration file Directives
1492: static void Directives()
1493: {
1494: PrintOut(LOG_INFO,
1495: "Configuration file (%s) Directives (after device name):\n"
1496: " -d TYPE Set the device type: %s, auto, removable\n"
1497: " -T TYPE Set the tolerance to one of: normal, permissive\n"
1498: " -o VAL Enable/disable automatic offline tests (on/off)\n"
1499: " -S VAL Enable/disable attribute autosave (on/off)\n"
1500: " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1501: " -H Monitor SMART Health Status, report if failed\n"
1502: " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1503: " -l TYPE Monitor SMART log or self-test status\n"
1504: " Type is one of: error, selftest, xerror, offlinests, selfteststs\n"
1505: " -l scterc,R,W Set SCT Error Recovery Control\n"
1506: " -f Monitor 'Usage' Attributes, report failures\n"
1507: " -m ADD Send email warning to address ADD\n"
1508: " -M TYPE Modify email warning behavior (see man page)\n"
1509: " -p Report changes in 'Prefailure' Attributes\n"
1510: " -u Report changes in 'Usage' Attributes\n"
1511: " -t Equivalent to -p and -u Directives\n"
1512: " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1513: " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1514: " -i ID Ignore Attribute ID for -f Directive\n"
1515: " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1516: " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1517: " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1518: " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1519: " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1520: " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1521: " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1522: " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1523: " # Comment: text after a hash sign is ignored\n"
1524: " \\ Line continuation character\n"
1525: "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1526: "Use ID = 0 to turn off -C and/or -U Directives\n"
1527: "Example: /dev/hda -a\n",
1528: configfile, smi()->get_valid_dev_types_str().c_str());
1529: return;
1530: }
1531:
1532: /* Returns a pointer to a static string containing a formatted list of the valid
1533: arguments to the option opt or NULL on failure. */
1534: static const char *GetValidArgList(char opt)
1535: {
1536: switch (opt) {
1537: case 'A':
1538: case 's':
1539: return "<PATH_PREFIX>";
1540: case 'c':
1541: return "<FILE_NAME>, -";
1542: case 'l':
1543: return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1544: case 'q':
1545: return "nodev, errors, nodevstartup, never, onecheck, showtests";
1546: case 'r':
1547: return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1548: case 'B':
1549: case 'p':
1550: return "<FILE_NAME>";
1551: case 'i':
1552: return "<INTEGER_SECONDS>";
1553: default:
1554: return NULL;
1555: }
1556: }
1557:
1558: /* prints help information for command syntax */
1559: static void Usage()
1560: {
1561: PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1562: PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1563: PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1564: #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1565: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_ATTRIBUTELOG"MODEL-SERIAL.ata.csv]\n");
1566: #endif
1567: PrintOut(LOG_INFO,"\n");
1568: PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1569: PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1570: PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1571: #ifdef SMARTMONTOOLS_DRIVEDBDIR
1572: PrintOut(LOG_INFO,"\n");
1573: PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1574: #endif
1575: PrintOut(LOG_INFO,"]\n\n");
1576: PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1577: PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1578: PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1579: #ifdef HAVE_LIBCAP_NG
1580: PrintOut(LOG_INFO," -C, --capabilities\n");
1581: PrintOut(LOG_INFO," Use capabilities.\n"
1582: " Warning: Mail notification does not work when used.\n\n");
1583: #endif
1584: PrintOut(LOG_INFO," -d, --debug\n");
1585: PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1586: PrintOut(LOG_INFO," -D, --showdirectives\n");
1587: PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1588: PrintOut(LOG_INFO," -h, --help, --usage\n");
1589: PrintOut(LOG_INFO," Display this help and exit\n\n");
1590: PrintOut(LOG_INFO," -i N, --interval=N\n");
1591: PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1592: PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1593: #ifndef _WIN32
1594: PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1595: #else
1596: PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1597: #endif
1598: #ifndef _WIN32
1599: PrintOut(LOG_INFO," -n, --no-fork\n");
1600: PrintOut(LOG_INFO," Do not fork into background\n\n");
1601: #endif // _WIN32
1602: PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1603: PrintOut(LOG_INFO," Write PID file NAME\n\n");
1604: PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1605: PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1606: PrintOut(LOG_INFO," -r, --report=TYPE\n");
1607: PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1608: PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1609: PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1610: #ifdef SMARTMONTOOLS_SAVESTATES
1611: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1612: #endif
1613: PrintOut(LOG_INFO,"\n");
1614: #ifdef _WIN32
1615: PrintOut(LOG_INFO," --service\n");
1616: PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1617: PrintOut(LOG_INFO," smartd install [options]\n");
1618: PrintOut(LOG_INFO," Remove service with:\n");
1619: PrintOut(LOG_INFO," smartd remove\n\n");
1620: #endif // _WIN32
1621: PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1622: PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1623: }
1624:
1625: static int CloseDevice(smart_device * device, const char * name)
1626: {
1627: if (!device->close()){
1628: PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1629: return 1;
1630: }
1631: // device sucessfully closed
1632: return 0;
1633: }
1634:
1635: // return true if a char is not allowed in a state file name
1636: static bool not_allowed_in_filename(char c)
1637: {
1638: return !( ('0' <= c && c <= '9')
1639: || ('A' <= c && c <= 'Z')
1640: || ('a' <= c && c <= 'z'));
1641: }
1642:
1643: // Read error count from Summary or Extended Comprehensive SMART error log
1644: // Return -1 on error
1645: static int read_ata_error_count(ata_device * device, const char * name,
1646: unsigned char fix_firmwarebug, bool extended)
1647: {
1648: if (!extended) {
1649: ata_smart_errorlog log;
1650: if (ataReadErrorLog(device, &log, fix_firmwarebug)){
1651: PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1652: return -1;
1653: }
1654: return (log.error_log_pointer ? log.ata_error_count : 0);
1655: }
1656: else {
1657: ata_smart_exterrlog logx;
1658: if (!ataReadExtErrorLog(device, &logx, 1 /*first sector only*/)) {
1659: PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1660: return -1;
1661: }
1662: // Some disks use the reserved byte as index, see ataprint.cpp.
1663: return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1664: }
1665: }
1666:
1667: // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1668: // error count, and top bits are the power-on hours of the last error.
1669: static int SelfTestErrorCount(ata_device * device, const char * name,
1670: unsigned char fix_firmwarebug)
1671: {
1672: struct ata_smart_selftestlog log;
1673:
1674: if (ataReadSelfTestLog(device, &log, fix_firmwarebug)){
1675: PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1676: return -1;
1677: }
1678:
1679: // return current number of self-test errors
1680: return ataPrintSmartSelfTestlog(&log, false, fix_firmwarebug);
1681: }
1682:
1683: #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1684: #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1685:
1686: // Log offline data collection status
1687: static void log_offline_data_coll_status(const char * name, unsigned char status)
1688: {
1689: const char * msg;
1690: switch (status & 0x7f) {
1691: case 0x00: msg = "was never started"; break;
1692: case 0x02: msg = "was completed without error"; break;
1693: case 0x03: msg = "is in progress"; break;
1694: case 0x04: msg = "was suspended by an interrupting command from host"; break;
1695: case 0x05: msg = "was aborted by an interrupting command from host"; break;
1696: case 0x06: msg = "was aborted by the device with a fatal error"; break;
1697: default: msg = 0;
1698: }
1699:
1700: if (msg)
1701: PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1702: "Device: %s, offline data collection %s%s\n", name, msg,
1703: ((status & 0x80) ? " (auto:on)" : ""));
1704: else
1705: PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1706: name, status);
1707: }
1708:
1709: // Log self-test execution status
1710: static void log_self_test_exec_status(const char * name, unsigned char status)
1711: {
1712: const char * msg;
1713: switch (status >> 4) {
1714: case 0x0: msg = "completed without error"; break;
1715: case 0x1: msg = "was aborted by the host"; break;
1716: case 0x2: msg = "was interrupted by the host with a reset"; break;
1717: case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1718: case 0x4: msg = "completed with error (unknown test element)"; break;
1719: case 0x5: msg = "completed with error (electrical test element)"; break;
1720: case 0x6: msg = "completed with error (servo/seek test element)"; break;
1721: case 0x7: msg = "completed with error (read test element)"; break;
1722: case 0x8: msg = "completed with error (handling damage?)"; break;
1723: default: msg = 0;
1724: }
1725:
1726: if (msg)
1727: PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1728: "Device: %s, previous self-test %s\n", name, msg);
1729: else if ((status >> 4) == 0xf)
1730: PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1731: name, status & 0x0f);
1732: else
1733: PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1734: name, status);
1735: }
1736:
1737: // Check pending sector count id (-C, -U directives).
1738: static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1739: unsigned char id, const char * msg)
1740: {
1741: // Check attribute index
1742: int i = ata_find_attr_index(id, state.smartval);
1743: if (i < 0) {
1744: PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1745: cfg.name.c_str(), msg, id);
1746: return false;
1747: }
1748:
1749: // Check value
1750: uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1751: cfg.attribute_defs);
1752: if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1753: PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %"PRIu64" (0x%"PRIx64")\n",
1754: cfg.name.c_str(), msg, id, rawval, rawval);
1755: return false;
1756: }
1757:
1758: return true;
1759: }
1760:
1761: // Called by ATA/SCSIDeviceScan() after successful device check
1762: static void finish_device_scan(dev_config & cfg, dev_state & state)
1763: {
1764: // Set cfg.emailfreq if user hasn't set it
1765: if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1766: // Avoid that emails are suppressed forever due to state persistence
1767: if (cfg.state_file.empty())
1768: cfg.emailfreq = 1; // '-M once'
1769: else
1770: cfg.emailfreq = 2; // '-M daily'
1771: }
1772:
1773: // Start self-test regex check now if time was not read from state file
1774: if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1775: state.scheduled_test_next_check = time(0);
1776: }
1777:
1778:
1779: // TODO: Add '-F swapid' directive
1780: const bool fix_swapped_id = false;
1781:
1782: // scan to see what ata devices there are, and if they support SMART
1783: static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1784: {
1785: int supported=0;
1786: struct ata_identify_device drive;
1787: const char *name = cfg.name.c_str();
1788: int retid;
1789:
1790: // Device must be open
1791:
1792: // Get drive identity structure
1793: if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1794: if (retid<0)
1795: // Unable to read Identity structure
1796: PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1797: else
1798: PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1799: name, packetdevicetype(retid-1));
1800: CloseDevice(atadev, name);
1801: return 2;
1802: }
1803:
1804: // Log drive identity and size
1805: char model[40+1], serial[20+1], firmware[8+1];
1806: ata_format_id_string(model, drive.model, sizeof(model)-1);
1807: ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1808: ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1809:
1810: ata_size_info sizes;
1811: ata_get_size_info(&drive, sizes);
1812: state.num_sectors = sizes.sectors;
1813:
1814: char wwn[30]; wwn[0] = 0;
1815: unsigned oui = 0; uint64_t unique_id = 0;
1816: int naa = ata_get_wwn(&drive, oui, unique_id);
1817: if (naa >= 0)
1818: snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09"PRIx64", ", naa, oui, unique_id);
1819:
1820: char cap[32];
1821: PrintOut(LOG_INFO, "Device: %s, %s, S/N:%s, %sFW:%s, %s\n", name,
1822: model, serial, wwn, firmware,
1823: format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1824:
1825: // Show if device in database, and use preset vendor attribute
1826: // options unless user has requested otherwise.
1827: if (cfg.ignorepresets)
1828: PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1829: else {
1830: // Apply vendor specific presets, print warning if present
1831: const drive_settings * dbentry = lookup_drive_apply_presets(
1832: &drive, cfg.attribute_defs, cfg.fix_firmwarebug);
1833: if (!dbentry)
1834: PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1835: else {
1836: PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1837: name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1838: if (*dbentry->warningmsg)
1839: PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1840: }
1841: }
1842:
1843: // Set default '-C 197[+]' if no '-C ID' is specified.
1844: if (!cfg.curr_pending_set)
1845: cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1846: // Set default '-U 198[+]' if no '-U ID' is specified.
1847: if (!cfg.offl_pending_set)
1848: cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1849:
1850: // If requested, show which presets would be used for this drive
1851: if (cfg.showpresets) {
1852: int savedebugmode=debugmode;
1853: PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1854: if (!debugmode)
1855: debugmode=2;
1856: show_presets(&drive);
1857: debugmode=savedebugmode;
1858: }
1859:
1860: // see if drive supports SMART
1861: supported=ataSmartSupport(&drive);
1862: if (supported!=1) {
1863: if (supported==0)
1864: // drive does NOT support SMART
1865: PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1866: else
1867: // can't tell if drive supports SMART
1868: PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1869:
1870: // should we proceed anyway?
1871: if (cfg.permissive) {
1872: PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1873: }
1874: else {
1875: PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1876: CloseDevice(atadev, name);
1877: return 2;
1878: }
1879: }
1880:
1881: if (ataEnableSmart(atadev)) {
1882: // Enable SMART command has failed
1883: PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1884: CloseDevice(atadev, name);
1885: return 2;
1886: }
1887:
1888: // disable device attribute autosave...
1889: if (cfg.autosave==1) {
1890: if (ataDisableAutoSave(atadev))
1891: PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1892: else
1893: PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1894: }
1895:
1896: // or enable device attribute autosave
1897: if (cfg.autosave==2) {
1898: if (ataEnableAutoSave(atadev))
1899: PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1900: else
1901: PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1902: }
1903:
1904: // capability check: SMART status
1905: if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1906: PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1907: cfg.smartcheck = false;
1908: }
1909:
1910: // capability check: Read smart values and thresholds. Note that
1911: // smart values are ALSO needed even if we ONLY want to know if the
1912: // device is self-test log or error-log capable! After ATA-5, this
1913: // information was ALSO reproduced in the IDENTIFY DEVICE response,
1914: // but sadly not for ATA-5. Sigh.
1915:
1916: // do we need to get SMART data?
1917: bool smart_val_ok = false;
1918: if ( cfg.autoofflinetest || cfg.selftest
1919: || cfg.errorlog || cfg.xerrorlog
1920: || cfg.offlinests || cfg.selfteststs
1921: || cfg.usagefailed || cfg.prefail || cfg.usage
1922: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1923: || cfg.curr_pending_id || cfg.offl_pending_id ) {
1924:
1925: if (ataReadSmartValues(atadev, &state.smartval)) {
1926: PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1927: cfg.usagefailed = cfg.prefail = cfg.usage = false;
1928: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1929: cfg.curr_pending_id = cfg.offl_pending_id = 0;
1930: }
1931: else {
1932: smart_val_ok = true;
1933: if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1934: PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1935: name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1936: cfg.usagefailed = false;
1937: // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1938: memset(&state.smartthres, 0, sizeof(state.smartthres));
1939: }
1940: }
1941:
1942: // see if the necessary Attribute is there to monitor offline or
1943: // current pending sectors or temperature
1944: if ( cfg.curr_pending_id
1945: && !check_pending_id(cfg, state, cfg.curr_pending_id,
1946: "Current_Pending_Sector"))
1947: cfg.curr_pending_id = 0;
1948:
1949: if ( cfg.offl_pending_id
1950: && !check_pending_id(cfg, state, cfg.offl_pending_id,
1951: "Offline_Uncorrectable"))
1952: cfg.offl_pending_id = 0;
1953:
1954: if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1955: && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1956: PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name);
1957: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1958: }
1959: }
1960:
1961: // enable/disable automatic on-line testing
1962: if (cfg.autoofflinetest) {
1963: // is this an enable or disable request?
1964: const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1965: if (!smart_val_ok)
1966: PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1967: else {
1968: // if command appears unsupported, issue a warning...
1969: if (!isSupportAutomaticTimer(&state.smartval))
1970: PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1971: // ... but then try anyway
1972: if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1973: PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1974: else
1975: PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1976: }
1977: }
1978:
1979: // Read log directories if required for capability check
1980: ata_smart_log_directory smart_logdir, gp_logdir;
1981: bool smart_logdir_ok = false, gp_logdir_ok = false;
1982:
1983: if ( isGeneralPurposeLoggingCapable(&drive)
1984: && (cfg.errorlog || cfg.selftest) ) {
1985: if (!ataReadLogDirectory(atadev, &smart_logdir, false))
1986: smart_logdir_ok = true;
1987: }
1988:
1989: if (cfg.xerrorlog) {
1990: if (!ataReadLogDirectory(atadev, &gp_logdir, true))
1991: gp_logdir_ok = true;
1992: }
1993:
1994: // capability check: self-test-log
1995: state.selflogcount = 0; state.selfloghour = 0;
1996: if (cfg.selftest) {
1997: int retval;
1998: if (!( cfg.permissive
1999: || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2000: || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2001: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2002: cfg.selftest = false;
2003: }
2004: else if ((retval = SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0) {
2005: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2006: cfg.selftest = false;
2007: }
2008: else {
2009: state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2010: state.selfloghour =SELFTEST_ERRORHOURS(retval);
2011: }
2012: }
2013:
2014: // capability check: ATA error log
2015: state.ataerrorcount = 0;
2016: if (cfg.errorlog) {
2017: int errcnt1;
2018: if (!( cfg.permissive
2019: || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2020: || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2021: PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2022: cfg.errorlog = false;
2023: }
2024: else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false)) < 0) {
2025: PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2026: cfg.errorlog = false;
2027: }
2028: else
2029: state.ataerrorcount = errcnt1;
2030: }
2031:
2032: if (cfg.xerrorlog) {
2033: int errcnt2;
2034: if (!(cfg.permissive || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors))) {
2035: PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2036: name);
2037: cfg.xerrorlog = false;
2038: }
2039: else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true)) < 0) {
2040: PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2041: cfg.xerrorlog = false;
2042: }
2043: else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2044: PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2045: name, state.ataerrorcount, errcnt2);
2046: // Record max error count
2047: if (errcnt2 > state.ataerrorcount)
2048: state.ataerrorcount = errcnt2;
2049: }
2050: else
2051: state.ataerrorcount = errcnt2;
2052: }
2053:
2054: // capability check: self-test and offline data collection status
2055: if (cfg.offlinests || cfg.selfteststs) {
2056: if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2057: if (cfg.offlinests)
2058: PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2059: if (cfg.selfteststs)
2060: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2061: cfg.offlinests = cfg.selfteststs = false;
2062: }
2063: }
2064:
2065: // capabilities check -- does it support powermode?
2066: if (cfg.powermode) {
2067: int powermode = ataCheckPowerMode(atadev);
2068:
2069: if (-1 == powermode) {
2070: PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2071: cfg.powermode=0;
2072: }
2073: else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
2074: PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2075: name, powermode);
2076: cfg.powermode=0;
2077: }
2078: }
2079:
2080: // set SCT Error Recovery Control if requested
2081: if (cfg.sct_erc_set) {
2082: if (!isSCTErrorRecoveryControlCapable(&drive))
2083: PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2084: name);
2085: else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2086: || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2087: PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2088: else
2089: PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2090: name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2091: }
2092:
2093: // If no tests available or selected, return
2094: if (!( cfg.smartcheck || cfg.selftest
2095: || cfg.errorlog || cfg.xerrorlog
2096: || cfg.offlinests || cfg.selfteststs
2097: || cfg.usagefailed || cfg.prefail || cfg.usage
2098: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2099: CloseDevice(atadev, name);
2100: return 3;
2101: }
2102:
2103: // tell user we are registering device
2104: PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2105:
2106: // close file descriptor
2107: CloseDevice(atadev, name);
2108:
2109: if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2110: // Build file name for state file
2111: std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2112: std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2113: if (!state_path_prefix.empty()) {
2114: cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2115: // Read previous state
2116: if (read_dev_state(cfg.state_file.c_str(), state)) {
2117: PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2118: // Copy ATA attribute values to temp state
2119: state.update_temp_state();
2120: }
2121: }
2122: if (!attrlog_path_prefix.empty())
2123: cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2124: }
2125:
2126: finish_device_scan(cfg, state);
2127:
2128: return 0;
2129: }
2130:
2131: // on success, return 0. On failure, return >0. Never return <0,
2132: // please.
2133: static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2134: {
2135: int k, err, req_len, avail_len, version, len;
2136: const char *device = cfg.name.c_str();
2137: struct scsi_iec_mode_page iec;
2138: UINT8 tBuf[64];
2139: UINT8 inqBuf[96];
2140: UINT8 vpdBuf[252];
2141: char lu_id[64];
2142:
2143: // Device must be open
2144: memset(inqBuf, 0, 96);
2145: req_len = 36;
2146: if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2147: /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2148: req_len = 64;
2149: if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2150: PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2151: "skip device\n", device);
2152: return 2;
2153: }
2154: }
2155: version = inqBuf[2];
2156: avail_len = inqBuf[4] + 5;
2157: len = (avail_len < req_len) ? avail_len : req_len;
2158: // peri_dt = inqBuf[0] & 0x1f;
2159: if (len < 36) {
2160: PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2161: "skip device\n", device);
2162: return 2;
2163: }
2164: lu_id[0] = '\0';
2165: if ((version >= 0x4) && (version < 0x8)) {
2166: /* SPC-2 to SPC-5 */
2167: if (0 == (err = scsiInquiryVpd(scsidev, 0x83, vpdBuf, sizeof(vpdBuf)))) {
2168: len = vpdBuf[3];
2169: scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2170: }
2171: }
2172:
2173: unsigned int lb_size;
2174: char si_str[64];
2175: uint64_t capacity = scsiGetSize(scsidev, &lb_size);
2176:
2177: if (capacity)
2178: format_capacity(si_str, sizeof(si_str), capacity);
2179: else
2180: si_str[0] = '\0';
2181: PrintOut(LOG_INFO, "Device: %s, [%.8s %.16s %.4s]%s%s%s%s\n",
2182: device, (char *)&inqBuf[8], (char *)&inqBuf[16],
2183: (char *)&inqBuf[32],
2184: (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2185: (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2186:
2187: // check that device is ready for commands. IE stores its stuff on
2188: // the media.
2189: if ((err = scsiTestUnitReady(scsidev))) {
2190: if (SIMPLE_ERR_NOT_READY == err)
2191: PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2192: else if (SIMPLE_ERR_NO_MEDIUM == err)
2193: PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2194: else if (SIMPLE_ERR_BECOMING_READY == err)
2195: PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2196: else
2197: PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2198: CloseDevice(scsidev, device);
2199: return 2;
2200: }
2201:
2202: // Badly-conforming USB storage devices may fail this check.
2203: // The response to the following IE mode page fetch (current and
2204: // changeable values) is carefully examined. It has been found
2205: // that various USB devices that malform the response will lock up
2206: // if asked for a log page (e.g. temperature) so it is best to
2207: // bail out now.
2208: if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2209: state.modese_len = iec.modese_len;
2210: else if (SIMPLE_ERR_BAD_FIELD == err)
2211: ; /* continue since it is reasonable not to support IE mpage */
2212: else { /* any other error (including malformed response) unreasonable */
2213: PrintOut(LOG_INFO,
2214: "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2215: device, err);
2216: CloseDevice(scsidev, device);
2217: return 3;
2218: }
2219:
2220: // N.B. The following is passive (i.e. it doesn't attempt to turn on
2221: // smart if it is off). This may change to be the same as the ATA side.
2222: if (!scsi_IsExceptionControlEnabled(&iec)) {
2223: PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2224: "Try 'smartctl -s on %s' to turn on SMART features\n",
2225: device, device);
2226: CloseDevice(scsidev, device);
2227: return 3;
2228: }
2229:
2230: // Flag that certain log pages are supported (information may be
2231: // available from other sources).
2232: if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
2233: for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2234: switch (tBuf[k]) {
2235: case TEMPERATURE_LPAGE:
2236: state.TempPageSupported = 1;
2237: break;
2238: case IE_LPAGE:
2239: state.SmartPageSupported = 1;
2240: break;
2241: default:
2242: break;
2243: }
2244: }
2245: }
2246:
2247: // Check if scsiCheckIE() is going to work
2248: {
2249: UINT8 asc = 0;
2250: UINT8 ascq = 0;
2251: UINT8 currenttemp = 0;
2252: UINT8 triptemp = 0;
2253:
2254: if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2255: &asc, &ascq, ¤ttemp, &triptemp)) {
2256: PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2257: state.SuppressReport = 1;
2258: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2259: PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device);
2260: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2261: }
2262: }
2263: }
2264:
2265: // capability check: self-test-log
2266: if (cfg.selftest){
2267: int retval = scsiCountFailedSelfTests(scsidev, 0);
2268: if (retval<0) {
2269: // no self-test log, turn off monitoring
2270: PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2271: cfg.selftest = false;
2272: state.selflogcount = 0;
2273: state.selfloghour = 0;
2274: }
2275: else {
2276: // register starting values to watch for changes
2277: state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2278: state.selfloghour =SELFTEST_ERRORHOURS(retval);
2279: }
2280: }
2281:
2282: // disable autosave (set GLTSD bit)
2283: if (cfg.autosave==1){
2284: if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2285: PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2286: else
2287: PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2288: }
2289:
2290: // or enable autosave (clear GLTSD bit)
2291: if (cfg.autosave==2){
2292: if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2293: PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2294: else
2295: PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2296: }
2297:
2298: // tell user we are registering device
2299: PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2300:
2301: // TODO: Build file name for state file
2302: if (!state_path_prefix.empty()) {
2303: PrintOut(LOG_INFO, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device);
2304: }
2305: // TODO: Build file name for attribute log file
2306: if (!attrlog_path_prefix.empty()) {
2307: PrintOut(LOG_INFO, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device);
2308: }
2309:
2310: // close file descriptor
2311: CloseDevice(scsidev, device);
2312:
2313: finish_device_scan(cfg, state);
2314:
2315: return 0;
2316: }
2317:
2318: // If the self-test log has got more self-test errors (or more recent
2319: // self-test errors) recorded, then notify user.
2320: static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2321: {
2322: const char * name = cfg.name.c_str();
2323:
2324: if (newi<0)
2325: // command failed
2326: MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2327: else {
2328: reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2329:
2330: // old and new error counts
2331: int oldc=state.selflogcount;
2332: int newc=SELFTEST_ERRORCOUNT(newi);
2333:
2334: // old and new error timestamps in hours
2335: int oldh=state.selfloghour;
2336: int newh=SELFTEST_ERRORHOURS(newi);
2337:
2338: if (oldc<newc) {
2339: // increase in error count
2340: PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2341: name, oldc, newc);
2342: MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2343: name, oldc, newc);
2344: state.must_write = true;
2345: }
2346: else if (newc > 0 && oldh != newh) {
2347: // more recent error
2348: // a 'more recent' error might actually be a smaller hour number,
2349: // if the hour number has wrapped.
2350: // There's still a bug here. You might just happen to run a new test
2351: // exactly 32768 hours after the previous failure, and have run exactly
2352: // 20 tests between the two, in which case smartd will miss the
2353: // new failure.
2354: PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2355: name, newh);
2356: MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2357: name, newh);
2358: state.must_write = true;
2359: }
2360:
2361: // Print info if error entries have disappeared
2362: // or newer successful successful extended self-test exits
2363: if (oldc > newc) {
2364: PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2365: name, oldc, newc);
2366: if (newc == 0)
2367: reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2368: }
2369:
2370: // Needed since self-test error count may DECREASE. Hour might
2371: // also have changed.
2372: state.selflogcount= newc;
2373: state.selfloghour = newh;
2374: }
2375: return;
2376: }
2377:
2378: // Test types, ordered by priority.
2379: static const char test_type_chars[] = "LncrSCO";
2380: static const unsigned num_test_types = sizeof(test_type_chars)-1;
2381:
2382: // returns test type if time to do test of type testtype,
2383: // 0 if not time to do test.
2384: static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2385: {
2386: // check that self-testing has been requested
2387: if (cfg.test_regex.empty())
2388: return 0;
2389:
2390: // Exit if drive not capable of any test
2391: if ( state.not_cap_long && state.not_cap_short &&
2392: (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2393: return 0;
2394:
2395: // since we are about to call localtime(), be sure glibc is informed
2396: // of any timezone changes we make.
2397: if (!usetime)
2398: FixGlibcTimeZoneBug();
2399:
2400: // Is it time for next check?
2401: time_t now = (!usetime ? time(0) : usetime);
2402: if (now < state.scheduled_test_next_check)
2403: return 0;
2404:
2405: // Limit time check interval to 90 days
2406: if (state.scheduled_test_next_check + (3600L*24*90) < now)
2407: state.scheduled_test_next_check = now - (3600L*24*90);
2408:
2409: // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2410: char testtype = 0;
2411: time_t testtime = 0; int testhour = 0;
2412: int maxtest = num_test_types-1;
2413:
2414: for (time_t t = state.scheduled_test_next_check; ; ) {
2415: struct tm * tms = localtime(&t);
2416: // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2417: int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2418: for (int i = 0; i <= maxtest; i++) {
2419: // Skip if drive not capable of this test
2420: switch (test_type_chars[i]) {
2421: case 'L': if (state.not_cap_long) continue; break;
2422: case 'S': if (state.not_cap_short) continue; break;
2423: case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2424: case 'O': if (scsi || state.not_cap_offline) continue; break;
2425: case 'c': case 'n':
2426: case 'r': if (scsi || state.not_cap_selective) continue; break;
2427: default: continue;
2428: }
2429: // Try match of "T/MM/DD/d/HH"
2430: char pattern[16];
2431: snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2432: test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2433: if (cfg.test_regex.full_match(pattern)) {
2434: // Test found
2435: testtype = pattern[0];
2436: testtime = t; testhour = tms->tm_hour;
2437: // Limit further matches to higher priority self-tests
2438: maxtest = i-1;
2439: break;
2440: }
2441: }
2442: // Exit if no tests left or current time reached
2443: if (maxtest < 0)
2444: break;
2445: if (t >= now)
2446: break;
2447: // Check next hour
2448: if ((t += 3600) > now)
2449: t = now;
2450: }
2451:
2452: // Do next check not before next hour.
2453: struct tm * tmnow = localtime(&now);
2454: state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2455:
2456: if (testtype) {
2457: state.must_write = true;
2458: // Tell user if an old test was found.
2459: if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2460: char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2461: PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2462: cfg.name.c_str(), testtype, datebuf);
2463: }
2464: }
2465:
2466: return testtype;
2467: }
2468:
2469: // Print a list of future tests.
2470: static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2471: {
2472: unsigned numdev = configs.size();
2473: if (!numdev)
2474: return;
2475: std::vector<int> testcnts(numdev * num_test_types, 0);
2476:
2477: PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2478:
2479: // FixGlibcTimeZoneBug(); // done in PrintOut()
2480: time_t now = time(0);
2481: char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2482: dateandtimezoneepoch(datenow, now);
2483:
2484: long seconds;
2485: for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2486: // Check for each device whether a test will be run
2487: time_t testtime = now + seconds;
2488: for (unsigned i = 0; i < numdev; i++) {
2489: const dev_config & cfg = configs.at(i);
2490: dev_state & state = states.at(i);
2491: const char * p;
2492: char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2493: if (testtype && (p = strchr(test_type_chars, testtype))) {
2494: unsigned t = (p - test_type_chars);
2495: // Report at most 5 tests of each type
2496: if (++testcnts[i*num_test_types + t] <= 5) {
2497: dateandtimezoneepoch(date, testtime);
2498: PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2499: testcnts[i*num_test_types + t], testtype, date);
2500: }
2501: }
2502: }
2503: }
2504:
2505: // Report totals
2506: dateandtimezoneepoch(date, now+seconds);
2507: PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2508: for (unsigned i = 0; i < numdev; i++) {
2509: const dev_config & cfg = configs.at(i);
2510: bool scsi = devices.at(i)->is_scsi();
2511: for (unsigned t = 0; t < num_test_types; t++) {
2512: int cnt = testcnts[i*num_test_types + t];
2513: if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2514: continue;
2515: PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2516: cnt, (cnt==1?"":"s"), test_type_chars[t]);
2517: }
2518: }
2519:
2520: }
2521:
2522: // Return zero on success, nonzero on failure. Perform offline (background)
2523: // short or long (extended) self test on given scsi device.
2524: static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2525: {
2526: int retval = 0;
2527: const char *testname = 0;
2528: const char *name = cfg.name.c_str();
2529: int inProgress;
2530:
2531: if (scsiSelfTestInProgress(device, &inProgress)) {
2532: PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2533: state.not_cap_short = state.not_cap_long = true;
2534: return 1;
2535: }
2536:
2537: if (1 == inProgress) {
2538: PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2539: "progress.\n", name);
2540: return 1;
2541: }
2542:
2543: switch (testtype) {
2544: case 'S':
2545: testname = "Short Self";
2546: retval = scsiSmartShortSelfTest(device);
2547: break;
2548: case 'L':
2549: testname = "Long Self";
2550: retval = scsiSmartExtendSelfTest(device);
2551: break;
2552: }
2553: // If we can't do the test, exit
2554: if (NULL == testname) {
2555: PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2556: testtype);
2557: return 1;
2558: }
2559: if (retval) {
2560: if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2561: (SIMPLE_ERR_BAD_FIELD == retval)) {
2562: PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2563: testname);
2564: if ('L'==testtype)
2565: state.not_cap_long = true;
2566: else
2567: state.not_cap_short = true;
2568:
2569: return 1;
2570: }
2571: PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2572: testname, retval);
2573: return 1;
2574: }
2575:
2576: PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2577:
2578: return 0;
2579: }
2580:
2581: // Do an offline immediate or self-test. Return zero on success,
2582: // nonzero on failure.
2583: static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2584: {
2585: const char *name = cfg.name.c_str();
2586:
2587: // Read current smart data and check status/capability
2588: struct ata_smart_values data;
2589: if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2590: PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2591: return 1;
2592: }
2593:
2594: // Check for capability to do the test
2595: int dotest = -1, mode = 0;
2596: const char *testname = 0;
2597: switch (testtype) {
2598: case 'O':
2599: testname="Offline Immediate ";
2600: if (isSupportExecuteOfflineImmediate(&data))
2601: dotest=OFFLINE_FULL_SCAN;
2602: else
2603: state.not_cap_offline = true;
2604: break;
2605: case 'C':
2606: testname="Conveyance Self-";
2607: if (isSupportConveyanceSelfTest(&data))
2608: dotest=CONVEYANCE_SELF_TEST;
2609: else
2610: state.not_cap_conveyance = true;
2611: break;
2612: case 'S':
2613: testname="Short Self-";
2614: if (isSupportSelfTest(&data))
2615: dotest=SHORT_SELF_TEST;
2616: else
2617: state.not_cap_short = true;
2618: break;
2619: case 'L':
2620: testname="Long Self-";
2621: if (isSupportSelfTest(&data))
2622: dotest=EXTEND_SELF_TEST;
2623: else
2624: state.not_cap_long = true;
2625: break;
2626:
2627: case 'c': case 'n': case 'r':
2628: testname = "Selective Self-";
2629: if (isSupportSelectiveSelfTest(&data)) {
2630: dotest = SELECTIVE_SELF_TEST;
2631: switch (testtype) {
2632: case 'c': mode = SEL_CONT; break;
2633: case 'n': mode = SEL_NEXT; break;
2634: case 'r': mode = SEL_REDO; break;
2635: }
2636: }
2637: else
2638: state.not_cap_selective = true;
2639: break;
2640: }
2641:
2642: // If we can't do the test, exit
2643: if (dotest<0) {
2644: PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2645: return 1;
2646: }
2647:
2648: // If currently running a self-test, do not interrupt it to start another.
2649: if (15==(data.self_test_exec_status >> 4)) {
2650: if (cfg.fix_firmwarebug == FIX_SAMSUNG3 && data.self_test_exec_status == 0xf0) {
2651: PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2652: "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2653: } else {
2654: PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2655: name, testname, (int)(data.self_test_exec_status & 0x0f));
2656: return 1;
2657: }
2658: }
2659:
2660: if (dotest == SELECTIVE_SELF_TEST) {
2661: // Set test span
2662: ata_selective_selftest_args selargs, prev_args;
2663: selargs.num_spans = 1;
2664: selargs.span[0].mode = mode;
2665: prev_args.num_spans = 1;
2666: prev_args.span[0].start = state.selective_test_last_start;
2667: prev_args.span[0].end = state.selective_test_last_end;
2668: if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2669: PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2670: return 1;
2671: }
2672: uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2673: PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2674: name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2675: start, end, end - start + 1,
2676: (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2677: (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2678: state.selective_test_last_start = start;
2679: state.selective_test_last_end = end;
2680: }
2681:
2682: // execute the test, and return status
2683: int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2684: if (retval) {
2685: PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2686: return retval;
2687: }
2688:
2689: if (testtype != 'O')
2690: // Log next self-test execution status
2691: state.smartval.self_test_exec_status = 0xff;
2692:
2693: PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2694: return 0;
2695: }
2696:
2697: // Check pending sector count attribute values (-C, -U directives).
2698: static void check_pending(const dev_config & cfg, dev_state & state,
2699: unsigned char id, bool increase_only,
2700: const ata_smart_values & smartval,
2701: int mailtype, const char * msg)
2702: {
2703: // Find attribute index
2704: int i = ata_find_attr_index(id, smartval);
2705: if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2706: return;
2707:
2708: // No report if no sectors pending.
2709: uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2710: if (rawval == 0) {
2711: reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
2712: return;
2713: }
2714:
2715: // If attribute is not reset, report only sector count increases.
2716: uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2717: if (!(!increase_only || prev_rawval < rawval))
2718: return;
2719:
2720: // Format message.
2721: std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2722: if (prev_rawval > 0 && rawval != prev_rawval)
2723: s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2724:
2725: PrintOut(LOG_CRIT, "%s\n", s.c_str());
2726: MailWarning(cfg, state, mailtype, "%s\n", s.c_str());
2727: state.must_write = true;
2728: }
2729:
2730: // Format Temperature value
2731: static const char * fmt_temp(unsigned char x, char * buf)
2732: {
2733: if (!x) // unset
2734: strcpy(buf, "??");
2735: else
2736: sprintf(buf, "%u", x);
2737: return buf;
2738: }
2739:
2740: // Check Temperature limits
2741: static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2742: {
2743: if (!(0 < currtemp && currtemp < 255)) {
2744: PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2745: return;
2746: }
2747:
2748: // Update Max Temperature
2749: const char * minchg = "", * maxchg = "";
2750: if (currtemp > state.tempmax) {
2751: if (state.tempmax)
2752: maxchg = "!";
2753: state.tempmax = currtemp;
2754: state.must_write = true;
2755: }
2756:
2757: char buf[20];
2758: if (!state.temperature) {
2759: // First check
2760: if (!state.tempmin || currtemp < state.tempmin)
2761: // Delay Min Temperature update by ~ 30 minutes.
2762: state.tempmin_delay = time(0) + CHECKTIME - 60;
2763: PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2764: cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2765: if (triptemp)
2766: PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2767: state.temperature = currtemp;
2768: }
2769: else {
2770: if (state.tempmin_delay) {
2771: // End Min Temperature update delay if ...
2772: if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2773: || (state.tempmin_delay <= time(0))) { // or delay time is over.
2774: state.tempmin_delay = 0;
2775: if (!state.tempmin)
2776: state.tempmin = 255;
2777: }
2778: }
2779:
2780: // Update Min Temperature
2781: if (!state.tempmin_delay && currtemp < state.tempmin) {
2782: state.tempmin = currtemp;
2783: state.must_write = true;
2784: if (currtemp != state.temperature)
2785: minchg = "!";
2786: }
2787:
2788: // Track changes
2789: if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2790: PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2791: cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2792: state.temperature = currtemp;
2793: }
2794: }
2795:
2796: // Check limits
2797: if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2798: PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2799: cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2800: MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2801: cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2802: }
2803: else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2804: PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2805: cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2806: }
2807: else if (cfg.tempcrit) {
2808: unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
2809: if (currtemp < limit)
2810: reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
2811: }
2812: }
2813:
2814: // Check normalized and raw attribute values.
2815: static void check_attribute(const dev_config & cfg, dev_state & state,
2816: const ata_smart_attribute & attr,
2817: const ata_smart_attribute & prev,
2818: int attridx,
2819: const ata_smart_threshold_entry * thresholds)
2820: {
2821: // Check attribute and threshold
2822: ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
2823: if (attrstate == ATTRSTATE_NON_EXISTING)
2824: return;
2825:
2826: // If requested, check for usage attributes that have failed.
2827: if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2828: && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
2829: std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs);
2830: PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2831: MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2832: state.must_write = true;
2833: }
2834:
2835: // Return if we're not tracking this type of attribute
2836: bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2837: if (!( ( prefail && cfg.prefail)
2838: || (!prefail && cfg.usage )))
2839: return;
2840:
2841: // Return if '-I ID' was specified
2842: if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2843: return;
2844:
2845: // Issue warning if they don't have the same ID in all structures.
2846: if (attr.id != prev.id) {
2847: PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
2848: cfg.name.c_str(), attr.id, prev.id);
2849: return;
2850: }
2851:
2852: // Compare normalized values if valid.
2853: bool valchanged = false;
2854: if (attrstate > ATTRSTATE_NO_NORMVAL) {
2855: if (attr.current != prev.current)
2856: valchanged = true;
2857: }
2858:
2859: // Compare raw values if requested.
2860: bool rawchanged = false;
2861: if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2862: if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2863: != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2864: rawchanged = true;
2865: }
2866:
2867: // Return if no change
2868: if (!(valchanged || rawchanged))
2869: return;
2870:
2871: // Format value strings
2872: std::string currstr, prevstr;
2873: if (attrstate == ATTRSTATE_NO_NORMVAL) {
2874: // Print raw values only
2875: currstr = strprintf("%s (Raw)",
2876: ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2877: prevstr = strprintf("%s (Raw)",
2878: ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2879: }
2880: else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2881: // Print normalized and raw values
2882: currstr = strprintf("%d [Raw %s]", attr.current,
2883: ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2884: prevstr = strprintf("%d [Raw %s]", prev.current,
2885: ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2886: }
2887: else {
2888: // Print normalized values only
2889: currstr = strprintf("%d", attr.current);
2890: prevstr = strprintf("%d", prev.current);
2891: }
2892:
2893: // Format message
2894: std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2895: cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
2896: ata_get_smart_attr_name(attr.id, cfg.attribute_defs).c_str(),
2897: prevstr.c_str(), currstr.c_str());
2898:
2899: // Report this change as critical ?
2900: if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
2901: || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
2902: PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2903: MailWarning(cfg, state, 2, "%s", msg.c_str());
2904: }
2905: else {
2906: PrintOut(LOG_INFO, "%s\n", msg.c_str());
2907: }
2908: state.must_write = true;
2909: }
2910:
2911:
2912: static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
2913: bool firstpass, bool allow_selftests)
2914: {
2915: const char * name = cfg.name.c_str();
2916:
2917: // If user has asked, test the email warning system
2918: if (cfg.emailtest)
2919: MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2920:
2921: // if we can't open device, fail gracefully rather than hard --
2922: // perhaps the next time around we'll be able to open it. ATAPI
2923: // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
2924: // given (see linux cdrom driver).
2925: if (!atadev->open()) {
2926: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
2927: MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
2928: return 1;
2929: }
2930: if (debugmode)
2931: PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
2932: reset_warning_mail(cfg, state, 9, "open device worked again");
2933:
2934: // user may have requested (with the -n Directive) to leave the disk
2935: // alone if it is in idle or sleeping mode. In this case check the
2936: // power mode and exit without check if needed
2937: if (cfg.powermode && !state.powermodefail) {
2938: int dontcheck=0, powermode=ataCheckPowerMode(atadev);
2939: const char * mode = 0;
2940: if (0 <= powermode && powermode < 0xff) {
2941: // wait for possible spin up and check again
2942: int powermode2;
2943: sleep(5);
2944: powermode2 = ataCheckPowerMode(atadev);
2945: if (powermode2 > powermode)
2946: PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
2947: powermode = powermode2;
2948: }
2949:
2950: switch (powermode){
2951: case -1:
2952: // SLEEP
2953: mode="SLEEP";
2954: if (cfg.powermode>=1)
2955: dontcheck=1;
2956: break;
2957: case 0:
2958: // STANDBY
2959: mode="STANDBY";
2960: if (cfg.powermode>=2)
2961: dontcheck=1;
2962: break;
2963: case 0x80:
2964: // IDLE
2965: mode="IDLE";
2966: if (cfg.powermode>=3)
2967: dontcheck=1;
2968: break;
2969: case 0xff:
2970: // ACTIVE/IDLE
2971: mode="ACTIVE or IDLE";
2972: break;
2973: default:
2974: // UNKNOWN
2975: PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2976: name, powermode);
2977: state.powermodefail = true;
2978: break;
2979: }
2980:
2981: // if we are going to skip a check, return now
2982: if (dontcheck){
2983: // skip at most powerskipmax checks
2984: if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2985: CloseDevice(atadev, name);
2986: if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
2987: PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
2988: state.powerskipcnt++;
2989: return 0;
2990: }
2991: else {
2992: PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
2993: name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
2994: }
2995: state.powerskipcnt = 0;
2996: state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
2997: }
2998: else if (state.powerskipcnt) {
2999: PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3000: name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3001: state.powerskipcnt = 0;
3002: state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3003: }
3004: }
3005:
3006: // check smart status
3007: if (cfg.smartcheck) {
3008: int status=ataSmartStatus2(atadev);
3009: if (status==-1){
3010: PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3011: MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3012: state.must_write = true;
3013: }
3014: else if (status==1){
3015: PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3016: MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3017: state.must_write = true;
3018: }
3019: }
3020:
3021: // Check everything that depends upon SMART Data (eg, Attribute values)
3022: if ( cfg.usagefailed || cfg.prefail || cfg.usage
3023: || cfg.curr_pending_id || cfg.offl_pending_id
3024: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3025: || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3026:
3027: // Read current attribute values.
3028: ata_smart_values curval;
3029: if (ataReadSmartValues(atadev, &curval)){
3030: PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3031: MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3032: state.must_write = true;
3033: }
3034: else {
3035: reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3036:
3037: // look for current or offline pending sectors
3038: if (cfg.curr_pending_id)
3039: check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3040: (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3041: : "Total unreadable (pending) sectors" ));
3042:
3043: if (cfg.offl_pending_id)
3044: check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3045: (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3046: : "Total offline uncorrectable sectors"));
3047:
3048: // check temperature limits
3049: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3050: CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3051:
3052: // look for failed usage attributes, or track usage or prefail attributes
3053: if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3054: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3055: check_attribute(cfg, state,
3056: curval.vendor_attributes[i],
3057: state.smartval.vendor_attributes[i],
3058: i, state.smartthres.thres_entries);
3059: }
3060: }
3061:
3062: // Log changes of offline data collection status
3063: if (cfg.offlinests) {
3064: if ( curval.offline_data_collection_status
3065: != state.smartval.offline_data_collection_status
3066: || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3067: log_offline_data_coll_status(name, curval.offline_data_collection_status);
3068: }
3069:
3070: // Log changes of self-test execution status
3071: if (cfg.selfteststs) {
3072: if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
3073: || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3074: log_self_test_exec_status(name, curval.self_test_exec_status);
3075: }
3076:
3077: // Save the new values for the next time around
3078: state.smartval = curval;
3079: }
3080: }
3081:
3082: // check if number of selftest errors has increased (note: may also DECREASE)
3083: if (cfg.selftest)
3084: CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug));
3085:
3086: // check if number of ATA errors has increased
3087: if (cfg.errorlog || cfg.xerrorlog) {
3088:
3089: int errcnt1 = -1, errcnt2 = -1;
3090: if (cfg.errorlog)
3091: errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false);
3092: if (cfg.xerrorlog)
3093: errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true);
3094:
3095: // new number of errors is max of both logs
3096: int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3097:
3098: // did command fail?
3099: if (newc<0)
3100: // lack of PrintOut here is INTENTIONAL
3101: MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3102:
3103: // has error count increased?
3104: int oldc = state.ataerrorcount;
3105: if (newc>oldc){
3106: PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3107: name, oldc, newc);
3108: MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3109: name, oldc, newc);
3110: state.must_write = true;
3111: }
3112:
3113: if (newc>=0)
3114: state.ataerrorcount=newc;
3115: }
3116:
3117: // if the user has asked, and device is capable (or we're not yet
3118: // sure) check whether a self test should be done now.
3119: if (allow_selftests && !cfg.test_regex.empty()) {
3120: char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3121: if (testtype)
3122: DoATASelfTest(cfg, state, atadev, testtype);
3123: }
3124:
3125: // Don't leave device open -- the OS/user may want to access it
3126: // before the next smartd cycle!
3127: CloseDevice(atadev, name);
3128:
3129: // Copy ATA attribute values to persistent state
3130: state.update_persistent_state();
3131:
3132: return 0;
3133: }
3134:
3135: static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3136: {
3137: UINT8 asc, ascq;
3138: UINT8 currenttemp;
3139: UINT8 triptemp;
3140: const char * name = cfg.name.c_str();
3141: const char *cp;
3142:
3143: // If the user has asked for it, test the email warning system
3144: if (cfg.emailtest)
3145: MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3146:
3147: // if we can't open device, fail gracefully rather than hard --
3148: // perhaps the next time around we'll be able to open it
3149: if (!scsidev->open()) {
3150: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3151: MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3152: return 1;
3153: } else if (debugmode)
3154: PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
3155: currenttemp = 0;
3156: asc = 0;
3157: ascq = 0;
3158: if (!state.SuppressReport) {
3159: if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3160: &asc, &ascq, ¤ttemp, &triptemp)) {
3161: PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3162: name);
3163: MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3164: state.SuppressReport = 1;
3165: }
3166: }
3167: if (asc > 0) {
3168: cp = scsiGetIEString(asc, ascq);
3169: if (cp) {
3170: PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3171: MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3172: } else if (debugmode)
3173: PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3174: name, (int)asc, (int)ascq);
3175: } else if (debugmode)
3176: PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3177:
3178: // check temperature limits
3179: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3180: CheckTemperature(cfg, state, currenttemp, triptemp);
3181:
3182: // check if number of selftest errors has increased (note: may also DECREASE)
3183: if (cfg.selftest)
3184: CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3185:
3186: if (allow_selftests && !cfg.test_regex.empty()) {
3187: char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3188: if (testtype)
3189: DoSCSISelfTest(cfg, state, scsidev, testtype);
3190: }
3191: CloseDevice(scsidev, name);
3192: return 0;
3193: }
3194:
3195: // Checks the SMART status of all ATA and SCSI devices
3196: static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3197: smart_device_list & devices, bool firstpass, bool allow_selftests)
3198: {
3199: for (unsigned i = 0; i < configs.size(); i++) {
3200: const dev_config & cfg = configs.at(i);
3201: dev_state & state = states.at(i);
3202: smart_device * dev = devices.at(i);
3203: if (dev->is_ata())
3204: ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3205: else if (dev->is_scsi())
3206: SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3207: }
3208: }
3209:
3210: // Set if Initialize() was called
3211: static bool is_initialized = false;
3212:
3213: // Does initialization right after fork to daemon mode
3214: static void Initialize(time_t *wakeuptime)
3215: {
3216: // Call Goodbye() on exit
3217: is_initialized = true;
3218:
3219: // write PID file
3220: if (!debugmode)
3221: WritePidFile();
3222:
3223: // install signal handlers. On Solaris, can't use signal() because
3224: // it resets the handler to SIG_DFL after each call. So use sigset()
3225: // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3226:
3227: // normal and abnormal exit
3228: if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3229: SIGNALFN(SIGTERM, SIG_IGN);
3230: if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3231: SIGNALFN(SIGQUIT, SIG_IGN);
3232:
3233: // in debug mode, <CONTROL-C> ==> HUP
3234: if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3235: SIGNALFN(SIGINT, SIG_IGN);
3236:
3237: // Catch HUP and USR1
3238: if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3239: SIGNALFN(SIGHUP, SIG_IGN);
3240: if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3241: SIGNALFN(SIGUSR1, SIG_IGN);
3242: #ifdef _WIN32
3243: if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3244: SIGNALFN(SIGUSR2, SIG_IGN);
3245: #endif
3246:
3247: // initialize wakeup time to CURRENT time
3248: *wakeuptime=time(NULL);
3249:
3250: return;
3251: }
3252:
3253: #ifdef _WIN32
3254: // Toggle debug mode implemented for native windows only
3255: // (there is no easy way to reopen tty on *nix)
3256: static void ToggleDebugMode()
3257: {
3258: if (!debugmode) {
3259: PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3260: if (!daemon_enable_console("smartd [Debug]")) {
3261: debugmode = 1;
3262: daemon_signal(SIGINT, HUPhandler);
3263: PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3264: }
3265: else
3266: PrintOut(LOG_INFO,"enable console failed\n");
3267: }
3268: else if (debugmode == 1) {
3269: daemon_disable_console();
3270: debugmode = 0;
3271: daemon_signal(SIGINT, sighandler);
3272: PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3273: }
3274: else
3275: PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3276: }
3277: #endif
3278:
3279: static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3280: {
3281: // If past wake-up-time, compute next wake-up-time
3282: time_t timenow=time(NULL);
3283: while (wakeuptime<=timenow){
3284: int intervals=1+(timenow-wakeuptime)/checktime;
3285: wakeuptime+=intervals*checktime;
3286: }
3287:
3288: // sleep until we catch SIGUSR1 or have completed sleeping
3289: int addtime = 0;
3290: while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3291:
3292: // protect user again system clock being adjusted backwards
3293: if (wakeuptime>timenow+checktime){
3294: PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3295: wakeuptime=timenow+checktime;
3296: }
3297:
3298: // Exit sleep when time interval has expired or a signal is received
3299: sleep(wakeuptime+addtime-timenow);
3300:
3301: #ifdef _WIN32
3302: // toggle debug mode?
3303: if (caughtsigUSR2) {
3304: ToggleDebugMode();
3305: caughtsigUSR2 = 0;
3306: }
3307: #endif
3308:
3309: timenow=time(NULL);
3310:
3311: // Actual sleep time too long?
3312: if (!addtime && timenow > wakeuptime+60) {
3313: if (debugmode)
3314: PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3315: (int)(timenow-wakeuptime));
3316: // Wait another 20 seconds to avoid I/O errors during disk spin-up
3317: addtime = timenow-wakeuptime+20;
3318: // Use next wake-up-time if close
3319: int nextcheck = checktime - addtime % checktime;
3320: if (nextcheck <= 20)
3321: addtime += nextcheck;
3322: }
3323: }
3324:
3325: // if we caught a SIGUSR1 then print message and clear signal
3326: if (caughtsigUSR1){
3327: PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3328: wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3329: caughtsigUSR1=0;
3330: sigwakeup = true;
3331: }
3332:
3333: // return adjusted wakeuptime
3334: return wakeuptime;
3335: }
3336:
3337: // Print out a list of valid arguments for the Directive d
3338: static void printoutvaliddirectiveargs(int priority, char d)
3339: {
3340: switch (d) {
3341: case 'n':
3342: PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3343: break;
3344: case 's':
3345: PrintOut(priority, "valid_regular_expression");
3346: break;
3347: case 'd':
3348: PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3349: break;
3350: case 'T':
3351: PrintOut(priority, "normal, permissive");
3352: break;
3353: case 'o':
3354: case 'S':
3355: PrintOut(priority, "on, off");
3356: break;
3357: case 'l':
3358: PrintOut(priority, "error, selftest");
3359: break;
3360: case 'M':
3361: PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3362: break;
3363: case 'v':
3364: PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3365: break;
3366: case 'P':
3367: PrintOut(priority, "use, ignore, show, showall");
3368: break;
3369: case 'F':
3370: PrintOut(priority, "none, samsung, samsung2, samsung3");
3371: break;
3372: }
3373: }
3374:
3375: // exits with an error message, or returns integer value of token
3376: static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3377: int min, int max, char * suffix = 0)
3378: {
3379: // make sure argument is there
3380: if (!arg) {
3381: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3382: cfgfile, lineno, name, token, min, max);
3383: return -1;
3384: }
3385:
3386: // get argument value (base 10), check that it's integer, and in-range
3387: char *endptr;
3388: int val = strtol(arg,&endptr,10);
3389:
3390: // optional suffix present?
3391: if (suffix) {
3392: if (!strcmp(endptr, suffix))
3393: endptr += strlen(suffix);
3394: else
3395: *suffix = 0;
3396: }
3397:
3398: if (!(!*endptr && min <= val && val <= max)) {
3399: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3400: cfgfile, lineno, name, token, arg, min, max);
3401: return -1;
3402: }
3403:
3404: // all is well; return value
3405: return val;
3406: }
3407:
3408:
3409: // Get 1-3 small integer(s) for '-W' directive
3410: static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3411: unsigned char *val1, unsigned char *val2, unsigned char *val3)
3412: {
3413: unsigned v1 = 0, v2 = 0, v3 = 0;
3414: int n1 = -1, n2 = -1, n3 = -1, len;
3415: if (!arg) {
3416: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3417: cfgfile, lineno, name, token);
3418: return -1;
3419: }
3420:
3421: len = strlen(arg);
3422: if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3423: && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3424: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3425: cfgfile, lineno, name, token, arg);
3426: return -1;
3427: }
3428: *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3429: return 0;
3430: }
3431:
3432:
3433: // This function returns 1 if it has correctly parsed one token (and
3434: // any arguments), else zero if no tokens remain. It returns -1 if an
3435: // error was encountered.
3436: static int ParseToken(char * token, dev_config & cfg)
3437: {
3438: char sym;
3439: const char * name = cfg.name.c_str();
3440: int lineno=cfg.lineno;
3441: const char *delim = " \n\t";
3442: int badarg = 0;
3443: int missingarg = 0;
3444: const char *arg = 0;
3445:
3446: // is the rest of the line a comment
3447: if (*token=='#')
3448: return 1;
3449:
3450: // is the token not recognized?
3451: if (*token!='-' || strlen(token)!=2) {
3452: PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3453: configfile, lineno, name, token);
3454: PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3455: return -1;
3456: }
3457:
3458: // token we will be parsing:
3459: sym=token[1];
3460:
3461: // parse the token and swallow its argument
3462: int val;
3463: char plus[] = "+", excl[] = "!";
3464:
3465: switch (sym) {
3466: case 'C':
3467: // monitor current pending sector count (default 197)
3468: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3469: return -1;
3470: cfg.curr_pending_id = (unsigned char)val;
3471: cfg.curr_pending_incr = (*plus == '+');
3472: cfg.curr_pending_set = true;
3473: break;
3474: case 'U':
3475: // monitor offline uncorrectable sectors (default 198)
3476: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3477: return -1;
3478: cfg.offl_pending_id = (unsigned char)val;
3479: cfg.offl_pending_incr = (*plus == '+');
3480: cfg.offl_pending_set = true;
3481: break;
3482: case 'T':
3483: // Set tolerance level for SMART command failures
3484: if ((arg = strtok(NULL, delim)) == NULL) {
3485: missingarg = 1;
3486: } else if (!strcmp(arg, "normal")) {
3487: // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3488: // not on failure of an optional S.M.A.R.T. command.
3489: // This is the default so we don't need to actually do anything here.
3490: cfg.permissive = false;
3491: } else if (!strcmp(arg, "permissive")) {
3492: // Permissive mode; ignore errors from Mandatory SMART commands
3493: cfg.permissive = true;
3494: } else {
3495: badarg = 1;
3496: }
3497: break;
3498: case 'd':
3499: // specify the device type
3500: if ((arg = strtok(NULL, delim)) == NULL) {
3501: missingarg = 1;
3502: } else if (!strcmp(arg, "removable")) {
3503: cfg.removable = true;
3504: } else if (!strcmp(arg, "auto")) {
3505: cfg.dev_type = "";
3506: } else {
3507: cfg.dev_type = arg;
3508: }
3509: break;
3510: case 'F':
3511: // fix firmware bug
3512: if ((arg = strtok(NULL, delim)) == NULL) {
3513: missingarg = 1;
3514: } else if (!strcmp(arg, "none")) {
3515: cfg.fix_firmwarebug = FIX_NONE;
3516: } else if (!strcmp(arg, "samsung")) {
3517: cfg.fix_firmwarebug = FIX_SAMSUNG;
3518: } else if (!strcmp(arg, "samsung2")) {
3519: cfg.fix_firmwarebug = FIX_SAMSUNG2;
3520: } else if (!strcmp(arg, "samsung3")) {
3521: cfg.fix_firmwarebug = FIX_SAMSUNG3;
3522: } else {
3523: badarg = 1;
3524: }
3525: break;
3526: case 'H':
3527: // check SMART status
3528: cfg.smartcheck = true;
3529: break;
3530: case 'f':
3531: // check for failure of usage attributes
3532: cfg.usagefailed = true;
3533: break;
3534: case 't':
3535: // track changes in all vendor attributes
3536: cfg.prefail = true;
3537: cfg.usage = true;
3538: break;
3539: case 'p':
3540: // track changes in prefail vendor attributes
3541: cfg.prefail = true;
3542: break;
3543: case 'u':
3544: // track changes in usage vendor attributes
3545: cfg.usage = true;
3546: break;
3547: case 'l':
3548: // track changes in SMART logs
3549: if ((arg = strtok(NULL, delim)) == NULL) {
3550: missingarg = 1;
3551: } else if (!strcmp(arg, "selftest")) {
3552: // track changes in self-test log
3553: cfg.selftest = true;
3554: } else if (!strcmp(arg, "error")) {
3555: // track changes in ATA error log
3556: cfg.errorlog = true;
3557: } else if (!strcmp(arg, "xerror")) {
3558: // track changes in Extended Comprehensive SMART error log
3559: cfg.xerrorlog = true;
3560: } else if (!strcmp(arg, "offlinests")) {
3561: // track changes in offline data collection status
3562: cfg.offlinests = true;
3563: } else if (!strcmp(arg, "selfteststs")) {
3564: // track changes in self-test execution status
3565: cfg.selfteststs = true;
3566: } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
3567: // set SCT Error Recovery Control
3568: unsigned rt = ~0, wt = ~0; int nc = -1;
3569: sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
3570: if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
3571: cfg.sct_erc_set = true;
3572: cfg.sct_erc_readtime = rt;
3573: cfg.sct_erc_writetime = wt;
3574: }
3575: else
3576: badarg = 1;
3577: } else {
3578: badarg = 1;
3579: }
3580: break;
3581: case 'a':
3582: // monitor everything
3583: cfg.smartcheck = true;
3584: cfg.prefail = true;
3585: cfg.usagefailed = true;
3586: cfg.usage = true;
3587: cfg.selftest = true;
3588: cfg.errorlog = true;
3589: cfg.selfteststs = true;
3590: break;
3591: case 'o':
3592: // automatic offline testing enable/disable
3593: if ((arg = strtok(NULL, delim)) == NULL) {
3594: missingarg = 1;
3595: } else if (!strcmp(arg, "on")) {
3596: cfg.autoofflinetest = 2;
3597: } else if (!strcmp(arg, "off")) {
3598: cfg.autoofflinetest = 1;
3599: } else {
3600: badarg = 1;
3601: }
3602: break;
3603: case 'n':
3604: // skip disk check if in idle or standby mode
3605: if (!(arg = strtok(NULL, delim)))
3606: missingarg = 1;
3607: else {
3608: char *endptr = NULL;
3609: char *next = strchr(const_cast<char*>(arg), ',');
3610:
3611: cfg.powerquiet = false;
3612: cfg.powerskipmax = 0;
3613:
3614: if (next!=NULL) *next='\0';
3615: if (!strcmp(arg, "never"))
3616: cfg.powermode = 0;
3617: else if (!strcmp(arg, "sleep"))
3618: cfg.powermode = 1;
3619: else if (!strcmp(arg, "standby"))
3620: cfg.powermode = 2;
3621: else if (!strcmp(arg, "idle"))
3622: cfg.powermode = 3;
3623: else
3624: badarg = 1;
3625:
3626: // if optional arguments are present
3627: if (!badarg && next!=NULL) {
3628: next++;
3629: cfg.powerskipmax = strtol(next, &endptr, 10);
3630: if (endptr == next)
3631: cfg.powerskipmax = 0;
3632: else {
3633: next = endptr + (*endptr != '\0');
3634: if (cfg.powerskipmax <= 0)
3635: badarg = 1;
3636: }
3637: if (*next != '\0') {
3638: if (!strcmp("q", next))
3639: cfg.powerquiet = true;
3640: else {
3641: badarg = 1;
3642: }
3643: }
3644: }
3645: }
3646: break;
3647: case 'S':
3648: // automatic attribute autosave enable/disable
3649: if ((arg = strtok(NULL, delim)) == NULL) {
3650: missingarg = 1;
3651: } else if (!strcmp(arg, "on")) {
3652: cfg.autosave = 2;
3653: } else if (!strcmp(arg, "off")) {
3654: cfg.autosave = 1;
3655: } else {
3656: badarg = 1;
3657: }
3658: break;
3659: case 's':
3660: // warn user, and delete any previously given -s REGEXP Directives
3661: if (!cfg.test_regex.empty()){
3662: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3663: configfile, lineno, name, cfg.test_regex.get_pattern());
3664: cfg.test_regex = regular_expression();
3665: }
3666: // check for missing argument
3667: if (!(arg = strtok(NULL, delim))) {
3668: missingarg = 1;
3669: }
3670: // Compile regex
3671: else {
3672: if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3673: // not a valid regular expression!
3674: PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3675: configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3676: return -1;
3677: }
3678: }
3679: // Do a bit of sanity checking and warn user if we think that
3680: // their regexp is "strange". User probably confused about shell
3681: // glob(3) syntax versus regular expression syntax regexp(7).
3682: if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3683: PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3684: configfile, lineno, name, val+1, arg[val], arg);
3685: break;
3686: case 'm':
3687: // send email to address that follows
3688: if (!(arg = strtok(NULL,delim)))
3689: missingarg = 1;
3690: else {
3691: if (!cfg.emailaddress.empty())
3692: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3693: configfile, lineno, name, cfg.emailaddress.c_str());
3694: cfg.emailaddress = arg;
3695: }
3696: break;
3697: case 'M':
3698: // email warning options
3699: if (!(arg = strtok(NULL, delim)))
3700: missingarg = 1;
3701: else if (!strcmp(arg, "once"))
3702: cfg.emailfreq = 1;
3703: else if (!strcmp(arg, "daily"))
3704: cfg.emailfreq = 2;
3705: else if (!strcmp(arg, "diminishing"))
3706: cfg.emailfreq = 3;
3707: else if (!strcmp(arg, "test"))
3708: cfg.emailtest = 1;
3709: else if (!strcmp(arg, "exec")) {
3710: // Get the next argument (the command line)
3711: if (!(arg = strtok(NULL, delim))) {
3712: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3713: configfile, lineno, name, token);
3714: return -1;
3715: }
3716: // Free the last cmd line given if any, and copy new one
3717: if (!cfg.emailcmdline.empty())
3718: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3719: configfile, lineno, name, cfg.emailcmdline.c_str());
3720: cfg.emailcmdline = arg;
3721: }
3722: else
3723: badarg = 1;
3724: break;
3725: case 'i':
3726: // ignore failure of usage attribute
3727: if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3728: return -1;
3729: cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3730: break;
3731: case 'I':
3732: // ignore attribute for tracking purposes
3733: if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3734: return -1;
3735: cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3736: break;
3737: case 'r':
3738: // print raw value when tracking
3739: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3740: return -1;
3741: cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3742: if (*excl == '!') // attribute change is critical
3743: cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3744: break;
3745: case 'R':
3746: // track changes in raw value (forces printing of raw value)
3747: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3748: return -1;
3749: cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3750: if (*excl == '!') // raw value change is critical
3751: cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3752: break;
3753: case 'W':
3754: // track Temperature
3755: if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
3756: &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
3757: return -1;
3758: break;
3759: case 'v':
3760: // non-default vendor-specific attribute meaning
3761: if (!(arg=strtok(NULL,delim))) {
3762: missingarg = 1;
3763: } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
3764: badarg = 1;
3765: }
3766: break;
3767: case 'P':
3768: // Define use of drive-specific presets.
3769: if (!(arg = strtok(NULL, delim))) {
3770: missingarg = 1;
3771: } else if (!strcmp(arg, "use")) {
3772: cfg.ignorepresets = false;
3773: } else if (!strcmp(arg, "ignore")) {
3774: cfg.ignorepresets = true;
3775: } else if (!strcmp(arg, "show")) {
3776: cfg.showpresets = true;
3777: } else if (!strcmp(arg, "showall")) {
3778: showallpresets();
3779: } else {
3780: badarg = 1;
3781: }
3782: break;
3783: default:
3784: // Directive not recognized
3785: PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3786: configfile, lineno, name, token);
3787: Directives();
3788: return -1;
3789: }
3790: if (missingarg) {
3791: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
3792: configfile, lineno, name, token);
3793: }
3794: if (badarg) {
3795: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
3796: configfile, lineno, name, token, arg);
3797: }
3798: if (missingarg || badarg) {
3799: PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
3800: printoutvaliddirectiveargs(LOG_CRIT, sym);
3801: PrintOut(LOG_CRIT, "\n");
3802: return -1;
3803: }
3804:
3805: return 1;
3806: }
3807:
3808: // Scan directive for configuration file
3809: #define SCANDIRECTIVE "DEVICESCAN"
3810:
3811: // This is the routine that adds things to the conf_entries list.
3812: //
3813: // Return values are:
3814: // 1: parsed a normal line
3815: // 0: found comment or blank line
3816: // -1: found SCANDIRECTIVE line
3817: // -2: found an error
3818: //
3819: // Note: this routine modifies *line from the caller!
3820: static int ParseConfigLine(dev_config_vector & conf_entries, int /*entry*/, int lineno, /*const*/ char * line)
3821: {
3822: char *token=NULL;
3823: char *name=NULL;
3824: const char *delim = " \n\t";
3825: int devscan=0;
3826:
3827: // get first token: device name. If a comment, skip line
3828: if (!(name=strtok(line,delim)) || *name=='#') {
3829: return 0;
3830: }
3831:
3832: // Have we detected the SCANDIRECTIVE directive?
3833: if (!strcmp(SCANDIRECTIVE,name)){
3834: devscan=1;
3835: }
3836:
3837: // We've got a legit entry, make space to store it
3838: conf_entries.push_back( dev_config() );
3839: dev_config & cfg = conf_entries.back();
3840:
3841: cfg.name = name; // Later replaced by dev->get_info().info_name
3842: cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
3843:
3844: // Store line number, and by default check for both device types.
3845: cfg.lineno=lineno;
3846:
3847: // parse tokens one at a time from the file.
3848: while ((token=strtok(NULL,delim))){
3849: int retval=ParseToken(token,cfg);
3850:
3851: if (retval==0)
3852: // No tokens left:
3853: break;
3854:
3855: if (retval>0) {
3856: // Parsed token
3857: #if (0)
3858: PrintOut(LOG_INFO,"Parsed token %s\n",token);
3859: #endif
3860: continue;
3861: }
3862:
3863: if (retval<0) {
3864: // error found on the line
3865: return -2;
3866: }
3867: }
3868:
3869: // If NO monitoring directives are set, then set all of them.
3870: if (!( cfg.smartcheck || cfg.selftest
3871: || cfg.errorlog || cfg.xerrorlog
3872: || cfg.offlinests || cfg.selfteststs
3873: || cfg.usagefailed || cfg.prefail || cfg.usage
3874: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
3875:
3876: PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
3877: cfg.name.c_str(), cfg.lineno, configfile);
3878:
3879: cfg.smartcheck = true;
3880: cfg.usagefailed = true;
3881: cfg.prefail = true;
3882: cfg.usage = true;
3883: cfg.selftest = true;
3884: cfg.errorlog = true;
3885: cfg.selfteststs = true;
3886: }
3887:
3888: // additional sanity check. Has user set -M options without -m?
3889: if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
3890: PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
3891: cfg.name.c_str(), cfg.lineno, configfile);
3892: return -2;
3893: }
3894:
3895: // has the user has set <nomailer>?
3896: if (cfg.emailaddress == "<nomailer>") {
3897: // check that -M exec is also set
3898: if (cfg.emailcmdline.empty()){
3899: PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
3900: cfg.name.c_str(), cfg.lineno, configfile);
3901: return -2;
3902: }
3903: // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
3904: cfg.emailaddress.clear();
3905: }
3906:
3907: if (devscan)
3908: return -1;
3909: else
3910: return 1;
3911: }
3912:
3913: // Parses a configuration file. Return values are:
3914: // N=>0: found N entries
3915: // -1: syntax error in config file
3916: // -2: config file does not exist
3917: // -3: config file exists but cannot be read
3918: //
3919: // In the case where the return value is 0, there are three
3920: // possiblities:
3921: // Empty configuration file ==> conf_entries.empty()
3922: // No configuration file ==> conf_entries[0].lineno == 0
3923: // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
3924: static int ParseConfigFile(dev_config_vector & conf_entries)
3925: {
3926: // maximum line length in configuration file
3927: const int MAXLINELEN = 256;
3928: // maximum length of a continued line in configuration file
3929: const int MAXCONTLINE = 1023;
3930:
3931: stdio_file f;
3932: // Open config file, if it exists and is not <stdin>
3933: if (!(configfile == configfile_stdin)) { // pointer comparison ok here
3934: if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
3935: // file exists but we can't read it or it should exist due to '-c' option
3936: int ret = (errno!=ENOENT ? -3 : -2);
3937: PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
3938: strerror(errno),configfile);
3939: return ret;
3940: }
3941: }
3942: else // read from stdin ('-c -' option)
3943: f.open(stdin);
3944:
3945: // No configuration file found -- use fake one
3946: int entry = 0;
3947: if (!f) {
3948: char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
3949:
3950: if (ParseConfigLine(conf_entries, entry, 0, fakeconfig) != -1)
3951: throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
3952: return 0;
3953: }
3954:
3955: #ifdef __CYGWIN__
3956: setmode(fileno(f), O_TEXT); // Allow files with \r\n
3957: #endif
3958:
3959: // configuration file exists
3960: PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
3961:
3962: // parse config file line by line
3963: int lineno = 1, cont = 0, contlineno = 0;
3964: char line[MAXLINELEN+2];
3965: char fullline[MAXCONTLINE+1];
3966:
3967: for (;;) {
3968: int len=0,scandevice;
3969: char *lastslash;
3970: char *comment;
3971: char *code;
3972:
3973: // make debugging simpler
3974: memset(line,0,sizeof(line));
3975:
3976: // get a line
3977: code=fgets(line, MAXLINELEN+2, f);
3978:
3979: // are we at the end of the file?
3980: if (!code){
3981: if (cont) {
3982: scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
3983: // See if we found a SCANDIRECTIVE directive
3984: if (scandevice==-1)
3985: return 0;
3986: // did we find a syntax error
3987: if (scandevice==-2)
3988: return -1;
3989: // the final line is part of a continuation line
3990: cont=0;
3991: entry+=scandevice;
3992: }
3993: break;
3994: }
3995:
3996: // input file line number
3997: contlineno++;
3998:
3999: // See if line is too long
4000: len=strlen(line);
4001: if (len>MAXLINELEN){
4002: const char *warn;
4003: if (line[len-1]=='\n')
4004: warn="(including newline!) ";
4005: else
4006: warn="";
4007: PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4008: (int)contlineno,configfile,warn,(int)MAXLINELEN);
4009: return -1;
4010: }
4011:
4012: // Ignore anything after comment symbol
4013: if ((comment=strchr(line,'#'))){
4014: *comment='\0';
4015: len=strlen(line);
4016: }
4017:
4018: // is the total line (made of all continuation lines) too long?
4019: if (cont+len>MAXCONTLINE){
4020: PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4021: lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4022: return -1;
4023: }
4024:
4025: // copy string so far into fullline, and increment length
4026: strcpy(fullline+cont,line);
4027: cont+=len;
4028:
4029: // is this a continuation line. If so, replace \ by space and look at next line
4030: if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4031: *(fullline+(cont-len)+(lastslash-line))=' ';
4032: continue;
4033: }
4034:
4035: // Not a continuation line. Parse it
4036: scandevice = ParseConfigLine(conf_entries, entry, contlineno, fullline);
4037:
4038: // did we find a scandevice directive?
4039: if (scandevice==-1)
4040: return 0;
4041: // did we find a syntax error
4042: if (scandevice==-2)
4043: return -1;
4044:
4045: entry+=scandevice;
4046: lineno++;
4047: cont=0;
4048: }
4049:
4050: // note -- may be zero if syntax of file OK, but no valid entries!
4051: return entry;
4052: }
4053:
4054: /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4055: <LIST> is the list of valid arguments for option opt. */
4056: static void PrintValidArgs(char opt)
4057: {
4058: const char *s;
4059:
4060: PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4061: if (!(s = GetValidArgList(opt)))
4062: PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4063: else
4064: PrintOut(LOG_CRIT, "%s", (char *)s);
4065: PrintOut(LOG_CRIT, " <=======\n");
4066: }
4067:
4068: #ifndef _WIN32
4069: // Report error and exit if specified path is not absolute.
4070: static void check_abs_path(char option, const std::string & path)
4071: {
4072: if (path.empty() || path[0] == '/')
4073: return;
4074:
4075: debugmode = 1;
4076: PrintHead();
4077: PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4078: PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4079: EXIT(EXIT_BADCMD);
4080: }
4081: #endif // !_WIN32
4082:
4083: // Parses input line, prints usage message and
4084: // version/license/copyright messages
4085: static void ParseOpts(int argc, char **argv)
4086: {
4087: // Init default configfile path
4088: #ifndef _WIN32
4089: configfile = SMARTMONTOOLS_SYSCONFDIR"/smartd.conf";
4090: #else
4091: static std::string configfile_str = get_exe_dir() + "/smartd.conf";
4092: configfile = configfile_str.c_str();
4093: #endif
4094:
4095: // Please update GetValidArgList() if you edit shortopts
4096: static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:Vh?"
4097: #ifdef HAVE_LIBCAP_NG
4098: "C"
4099: #endif
4100: ;
4101: // Please update GetValidArgList() if you edit longopts
4102: struct option longopts[] = {
4103: { "configfile", required_argument, 0, 'c' },
4104: { "logfacility", required_argument, 0, 'l' },
4105: { "quit", required_argument, 0, 'q' },
4106: { "debug", no_argument, 0, 'd' },
4107: { "showdirectives", no_argument, 0, 'D' },
4108: { "interval", required_argument, 0, 'i' },
4109: #ifndef _WIN32
4110: { "no-fork", no_argument, 0, 'n' },
4111: #else
4112: { "service", no_argument, 0, 'n' },
4113: #endif
4114: { "pidfile", required_argument, 0, 'p' },
4115: { "report", required_argument, 0, 'r' },
4116: { "savestates", required_argument, 0, 's' },
4117: { "attributelog", required_argument, 0, 'A' },
4118: { "drivedb", required_argument, 0, 'B' },
4119: { "version", no_argument, 0, 'V' },
4120: { "license", no_argument, 0, 'V' },
4121: { "copyright", no_argument, 0, 'V' },
4122: { "help", no_argument, 0, 'h' },
4123: { "usage", no_argument, 0, 'h' },
4124: #ifdef HAVE_LIBCAP_NG
4125: { "capabilities", no_argument, 0, 'C' },
4126: #endif
4127: { 0, 0, 0, 0 }
4128: };
4129:
4130: opterr=optopt=0;
4131: bool badarg = false;
4132: bool no_defaultdb = false; // set true on '-B FILE'
4133:
4134: // Parse input options.
4135: int optchar;
4136: while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4137: char *arg;
4138: char *tailptr;
4139: long lchecktime;
4140:
4141: switch(optchar) {
4142: case 'q':
4143: // when to quit
4144: if (!(strcmp(optarg,"nodev"))) {
4145: quit=0;
4146: } else if (!(strcmp(optarg,"nodevstartup"))) {
4147: quit=1;
4148: } else if (!(strcmp(optarg,"never"))) {
4149: quit=2;
4150: } else if (!(strcmp(optarg,"onecheck"))) {
4151: quit=3;
4152: debugmode=1;
4153: } else if (!(strcmp(optarg,"showtests"))) {
4154: quit=4;
4155: debugmode=1;
4156: } else if (!(strcmp(optarg,"errors"))) {
4157: quit=5;
4158: } else {
4159: badarg = true;
4160: }
4161: break;
4162: case 'l':
4163: // set the log facility level
4164: if (!strcmp(optarg, "daemon"))
4165: facility=LOG_DAEMON;
4166: else if (!strcmp(optarg, "local0"))
4167: facility=LOG_LOCAL0;
4168: else if (!strcmp(optarg, "local1"))
4169: facility=LOG_LOCAL1;
4170: else if (!strcmp(optarg, "local2"))
4171: facility=LOG_LOCAL2;
4172: else if (!strcmp(optarg, "local3"))
4173: facility=LOG_LOCAL3;
4174: else if (!strcmp(optarg, "local4"))
4175: facility=LOG_LOCAL4;
4176: else if (!strcmp(optarg, "local5"))
4177: facility=LOG_LOCAL5;
4178: else if (!strcmp(optarg, "local6"))
4179: facility=LOG_LOCAL6;
4180: else if (!strcmp(optarg, "local7"))
4181: facility=LOG_LOCAL7;
4182: else
4183: badarg = true;
4184: break;
4185: case 'd':
4186: // enable debug mode
4187: debugmode = 1;
4188: break;
4189: case 'n':
4190: // don't fork()
4191: #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4192: do_fork = false;
4193: #endif
4194: break;
4195: case 'D':
4196: // print summary of all valid directives
4197: debugmode = 1;
4198: Directives();
4199: EXIT(0);
4200: break;
4201: case 'i':
4202: // Period (time interval) for checking
4203: // strtol will set errno in the event of overflow, so we'll check it.
4204: errno = 0;
4205: lchecktime = strtol(optarg, &tailptr, 10);
4206: if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4207: debugmode=1;
4208: PrintHead();
4209: PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4210: PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4211: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4212: EXIT(EXIT_BADCMD);
4213: }
4214: checktime = (int)lchecktime;
4215: break;
4216: case 'r':
4217: // report IOCTL transactions
4218: {
4219: int i;
4220: char *s;
4221:
4222: // split_report_arg() may modify its first argument string, so use a
4223: // copy of optarg in case we want optarg for an error message.
4224: if (!(s = strdup(optarg))) {
4225: PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
4226: EXIT(EXIT_NOMEM);
4227: }
4228: if (split_report_arg(s, &i)) {
4229: badarg = true;
4230: } else if (i<1 || i>3) {
4231: debugmode=1;
4232: PrintHead();
4233: PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
4234: PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
4235: EXIT(EXIT_BADCMD);
4236: } else if (!strcmp(s,"ioctl")) {
4237: ata_debugmode = scsi_debugmode = i;
4238: } else if (!strcmp(s,"ataioctl")) {
4239: ata_debugmode = i;
4240: } else if (!strcmp(s,"scsiioctl")) {
4241: scsi_debugmode = i;
4242: } else {
4243: badarg = true;
4244: }
4245: free(s); // TODO: use std::string
4246: }
4247: break;
4248: case 'c':
4249: // alternate configuration file
4250: if (strcmp(optarg,"-"))
4251: configfile = (configfile_alt = optarg).c_str();
4252: else // read from stdin
4253: configfile=configfile_stdin;
4254: break;
4255: case 'p':
4256: // output file with PID number
4257: pid_file = optarg;
4258: break;
4259: case 's':
4260: // path prefix of persistent state file
4261: state_path_prefix = optarg;
4262: break;
4263: case 'A':
4264: // path prefix of attribute log file
4265: attrlog_path_prefix = optarg;
4266: break;
4267: case 'B':
4268: {
4269: const char * path = optarg;
4270: if (*path == '+' && path[1])
4271: path++;
4272: else
4273: no_defaultdb = true;
4274: unsigned char savedebug = debugmode; debugmode = 1;
4275: if (!read_drive_database(path))
4276: EXIT(EXIT_BADCMD);
4277: debugmode = savedebug;
4278: }
4279: break;
4280: case 'V':
4281: // print version and CVS info
4282: debugmode = 1;
4283: PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4284: EXIT(0);
4285: break;
4286: #ifdef HAVE_LIBCAP_NG
4287: case 'C':
4288: // enable capabilities
4289: enable_capabilities = true;
4290: break;
4291: #endif
4292: case 'h':
4293: // help: print summary of command-line options
4294: debugmode=1;
4295: PrintHead();
4296: Usage();
4297: EXIT(0);
4298: break;
4299: case '?':
4300: default:
4301: // unrecognized option
4302: debugmode=1;
4303: PrintHead();
4304: // Point arg to the argument in which this option was found.
4305: arg = argv[optind-1];
4306: // Check whether the option is a long option that doesn't map to -h.
4307: if (arg[1] == '-' && optchar != 'h') {
4308: // Iff optopt holds a valid option then argument must be missing.
4309: if (optopt && (strchr(shortopts, optopt) != NULL)) {
4310: PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4311: PrintValidArgs(optopt);
4312: } else {
4313: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4314: }
4315: PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4316: EXIT(EXIT_BADCMD);
4317: }
4318: if (optopt) {
4319: // Iff optopt holds a valid option then argument must be missing.
4320: if (strchr(shortopts, optopt) != NULL){
4321: PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4322: PrintValidArgs(optopt);
4323: } else {
4324: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4325: }
4326: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4327: EXIT(EXIT_BADCMD);
4328: }
4329: Usage();
4330: EXIT(0);
4331: }
4332:
4333: // Check to see if option had an unrecognized or incorrect argument.
4334: if (badarg) {
4335: debugmode=1;
4336: PrintHead();
4337: // It would be nice to print the actual option name given by the user
4338: // here, but we just print the short form. Please fix this if you know
4339: // a clean way to do it.
4340: PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4341: PrintValidArgs(optchar);
4342: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4343: EXIT(EXIT_BADCMD);
4344: }
4345: }
4346:
4347: // non-option arguments are not allowed
4348: if (argc > optind) {
4349: debugmode=1;
4350: PrintHead();
4351: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4352: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4353: EXIT(EXIT_BADCMD);
4354: }
4355:
4356: // no pidfile in debug mode
4357: if (debugmode && !pid_file.empty()) {
4358: debugmode=1;
4359: PrintHead();
4360: PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4361: PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4362: EXIT(EXIT_BADCMD);
4363: }
4364:
4365: #ifndef _WIN32
4366: if (!debugmode) {
4367: // absolute path names are required due to chdir('/') after fork().
4368: check_abs_path('p', pid_file);
4369: check_abs_path('s', state_path_prefix);
4370: check_abs_path('A', attrlog_path_prefix);
4371: }
4372: #endif
4373:
4374: // Read or init drive database
4375: if (!no_defaultdb) {
4376: unsigned char savedebug = debugmode; debugmode = 1;
4377: if (!read_default_drive_databases())
4378: EXIT(EXIT_BADCMD);
4379: debugmode = savedebug;
4380: }
4381:
4382: // print header
4383: PrintHead();
4384: }
4385:
4386: // Function we call if no configuration file was found or if the
4387: // SCANDIRECTIVE Directive was found. It makes entries for device
4388: // names returned by scan_smart_devices() in os_OSNAME.cpp
4389: static int MakeConfigEntries(const dev_config & base_cfg,
4390: dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4391: {
4392: // make list of devices
4393: smart_device_list devlist;
4394: if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4395: PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4396:
4397: // if no devices, or error constructing list, return
4398: if (devlist.size() <= 0)
4399: return 0;
4400:
4401: // add empty device slots for existing config entries
4402: while (scanned_devs.size() < conf_entries.size())
4403: scanned_devs.push_back((smart_device *)0);
4404:
4405: // loop over entries to create
4406: for (unsigned i = 0; i < devlist.size(); i++) {
4407: // Move device pointer
4408: smart_device * dev = devlist.release(i);
4409: scanned_devs.push_back(dev);
4410:
4411: // Copy configuration, update device and type name
4412: conf_entries.push_back(base_cfg);
4413: dev_config & cfg = conf_entries.back();
4414: cfg.name = dev->get_info().info_name;
4415: cfg.dev_name = dev->get_info().dev_name;
4416: cfg.dev_type = type;
4417: }
4418:
4419: return devlist.size();
4420: }
4421:
4422: static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4423: {
4424: if (!debugmode && scandirective)
4425: return;
4426: if (line)
4427: PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4428: "Unable to register %s device %s at line %d of file %s\n",
4429: type, name, line, configfile);
4430: else
4431: PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4432: type, name);
4433: return;
4434: }
4435:
4436: // Returns negative value (see ParseConfigFile()) if config file
4437: // had errors, else number of entries which may be zero or positive.
4438: static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4439: {
4440: // parse configuration file configfile (normally /etc/smartd.conf)
4441: int entries = ParseConfigFile(conf_entries);
4442:
4443: if (entries < 0) {
4444: // There was an error reading the configuration file.
4445: conf_entries.clear();
4446: if (entries == -1)
4447: PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4448: return entries;
4449: }
4450:
4451: // no error parsing config file.
4452: if (entries) {
4453: // we did not find a SCANDIRECTIVE and did find valid entries
4454: PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4455: }
4456: else if (!conf_entries.empty()) {
4457: // we found a SCANDIRECTIVE or there was no configuration file so
4458: // scan. Configuration file's last entry contains all options
4459: // that were set
4460: dev_config first = conf_entries.back();
4461: conf_entries.pop_back();
4462:
4463: if (first.lineno)
4464: PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4465: else
4466: PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4467:
4468: // make config list of devices to search for
4469: MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4470:
4471: // warn user if scan table found no devices
4472: if (conf_entries.empty())
4473: PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4474: }
4475: else
4476: PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4477:
4478: return conf_entries.size();
4479: }
4480:
4481:
4482: // This function tries devices from conf_entries. Each one that can be
4483: // registered is moved onto the [ata|scsi]devices lists and removed
4484: // from the conf_entries list.
4485: static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4486: dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4487: {
4488: // start by clearing lists/memory of ALL existing devices
4489: configs.clear();
4490: devices.clear();
4491: states.clear();
4492:
4493: // Register entries
4494: for (unsigned i = 0; i < conf_entries.size(); i++){
4495:
4496: dev_config cfg = conf_entries[i];
4497:
4498: // get device of appropriate type
4499: smart_device_auto_ptr dev;
4500: bool scanning = false;
4501:
4502: // Device may already be detected during devicescan
4503: if (i < scanned_devs.size()) {
4504: dev = scanned_devs.release(i);
4505: if (dev)
4506: scanning = true;
4507: }
4508:
4509: if (!dev) {
4510: dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4511: if (!dev) {
4512: if (cfg.dev_type.empty())
4513: PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4514: else
4515: PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4516: continue;
4517: }
4518: }
4519:
4520: // Save old info
4521: smart_device::device_info oldinfo = dev->get_info();
4522:
4523: // Open with autodetect support, may return 'better' device
4524: dev.replace( dev->autodetect_open() );
4525:
4526: // Report if type has changed
4527: if (oldinfo.dev_type != dev->get_dev_type())
4528: PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4529: cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4530:
4531: if (!dev->is_open()) {
4532: // For linux+devfs, a nonexistent device gives a strange error
4533: // message. This makes the error message a bit more sensible.
4534: // If no debug and scanning - don't print errors
4535: if (debugmode || !scanning)
4536: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4537: continue;
4538: }
4539:
4540: // Update informal name
4541: cfg.name = dev->get_info().info_name;
4542: PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4543:
4544: // Prepare initial state
4545: dev_state state;
4546:
4547: // register ATA devices
4548: if (dev->is_ata()){
4549: if (ATADeviceScan(cfg, state, dev->to_ata())) {
4550: CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4551: dev.reset();
4552: }
4553: }
4554: // or register SCSI devices
4555: else if (dev->is_scsi()){
4556: if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4557: CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4558: dev.reset();
4559: }
4560: }
4561: else {
4562: PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4563: dev.reset();
4564: }
4565:
4566: if (dev) {
4567: // move onto the list of devices
4568: configs.push_back(cfg);
4569: states.push_back(state);
4570: devices.push_back(dev);
4571: }
4572: // if device is explictly listed and we can't register it, then
4573: // exit unless the user has specified that the device is removable
4574: else if (!scanning) {
4575: if (cfg.removable || quit==2)
4576: PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4577: else {
4578: PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4579: EXIT(EXIT_BADDEV);
4580: }
4581: }
4582: }
4583: }
4584:
4585:
4586: // Main program without exception handling
4587: static int main_worker(int argc, char **argv)
4588: {
4589: // Initialize interface
4590: smart_interface::init();
4591: if (!smi())
4592: return 1;
4593:
4594: // is it our first pass through?
4595: bool firstpass = true;
4596:
4597: // next time to wake up
4598: time_t wakeuptime = 0;
4599:
4600: // parse input and print header and usage info if needed
4601: ParseOpts(argc,argv);
4602:
4603: // Configuration for each device
4604: dev_config_vector configs;
4605: // Device states
4606: dev_state_vector states;
4607: // Devices to monitor
4608: smart_device_list devices;
4609:
4610: bool write_states_always = true;
4611:
4612: #ifdef HAVE_LIBCAP_NG
4613: // Drop capabilities
4614: if (enable_capabilities) {
4615: capng_clear(CAPNG_SELECT_BOTH);
4616: capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
4617: CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
4618: capng_apply(CAPNG_SELECT_BOTH);
4619: }
4620: #endif
4621:
4622: // the main loop of the code
4623: for (;;) {
4624:
4625: // are we exiting from a signal?
4626: if (caughtsigEXIT) {
4627: // are we exiting with SIGTERM?
4628: int isterm=(caughtsigEXIT==SIGTERM);
4629: int isquit=(caughtsigEXIT==SIGQUIT);
4630: int isok=debugmode?isterm || isquit:isterm;
4631:
4632: PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
4633: caughtsigEXIT, strsignal(caughtsigEXIT));
4634:
4635: if (!isok)
4636: return EXIT_SIGNAL;
4637:
4638: // Write state files
4639: if (!state_path_prefix.empty())
4640: write_all_dev_states(configs, states);
4641:
4642: return 0;
4643: }
4644:
4645: // Should we (re)read the config file?
4646: if (firstpass || caughtsigHUP){
4647: if (!firstpass) {
4648: #ifdef __CYGWIN__
4649: // Workaround for missing SIGQUIT via keyboard on Cygwin
4650: if (caughtsigHUP==2) {
4651: // Simulate SIGQUIT if another SIGINT arrives soon
4652: caughtsigHUP=0;
4653: sleep(1);
4654: if (caughtsigHUP==2) {
4655: caughtsigEXIT=SIGQUIT;
4656: continue;
4657: }
4658: caughtsigHUP=2;
4659: }
4660: #endif
4661: // Write state files
4662: if (!state_path_prefix.empty())
4663: write_all_dev_states(configs, states);
4664:
4665: PrintOut(LOG_INFO,
4666: caughtsigHUP==1?
4667: "Signal HUP - rereading configuration file %s\n":
4668: "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
4669: configfile);
4670: }
4671:
4672: {
4673: dev_config_vector conf_entries; // Entries read from smartd.conf
4674: smart_device_list scanned_devs; // Devices found during scan
4675: // (re)reads config file, makes >=0 entries
4676: int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
4677:
4678: if (entries>=0) {
4679: // checks devices, then moves onto ata/scsi list or deallocates.
4680: RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
4681: if (!(configs.size() == devices.size() && configs.size() == states.size()))
4682: throw std::logic_error("Invalid result from RegisterDevices");
4683: }
4684: else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
4685: // user has asked to continue on error in configuration file
4686: if (!firstpass)
4687: PrintOut(LOG_INFO,"Reusing previous configuration\n");
4688: }
4689: else {
4690: // exit with configuration file error status
4691: return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
4692: }
4693: }
4694:
4695: // Log number of devices we are monitoring...
4696: if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
4697: int numata = 0;
4698: for (unsigned i = 0; i < devices.size(); i++) {
4699: if (devices.at(i)->is_ata())
4700: numata++;
4701: }
4702: PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
4703: numata, devices.size() - numata);
4704: }
4705: else {
4706: PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4707: return EXIT_NODEV;
4708: }
4709:
4710: if (quit==4) {
4711: // user has asked to print test schedule
4712: PrintTestSchedule(configs, states, devices);
4713: return 0;
4714: }
4715:
4716: #ifdef HAVE_LIBCAP_NG
4717: if (enable_capabilities) {
4718: for (unsigned i = 0; i < configs.size(); i++) {
4719: if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
4720: PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
4721: break;
4722: }
4723: }
4724: }
4725: #endif
4726:
4727: // reset signal
4728: caughtsigHUP=0;
4729:
4730: // Always write state files after (re)configuration
4731: write_states_always = true;
4732: }
4733:
4734: // check all devices once,
4735: // self tests are not started in first pass unless '-q onecheck' is specified
4736: CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit==3));
4737:
4738: // Write state files
4739: if (!state_path_prefix.empty())
4740: write_all_dev_states(configs, states, write_states_always);
4741: write_states_always = false;
4742:
4743: // Write attribute logs
4744: if (!attrlog_path_prefix.empty())
4745: write_all_dev_attrlogs(configs, states);
4746:
4747: // user has asked us to exit after first check
4748: if (quit==3) {
4749: PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
4750: "smartd is exiting (exit status 0)\n");
4751: return 0;
4752: }
4753:
4754: // fork into background if needed
4755: if (firstpass && !debugmode) {
4756: DaemonInit();
4757: }
4758:
4759: // set exit and signal handlers, write PID file, set wake-up time
4760: if (firstpass){
4761: Initialize(&wakeuptime);
4762: firstpass = false;
4763: }
4764:
4765: // sleep until next check time, or a signal arrives
4766: wakeuptime = dosleep(wakeuptime, write_states_always);
4767: }
4768: }
4769:
4770:
4771: #ifndef _WIN32
4772: // Main program
4773: int main(int argc, char **argv)
4774: #else
4775: // Windows: internal main function started direct or by service control manager
4776: static int smartd_main(int argc, char **argv)
4777: #endif
4778: {
4779: int status;
4780: try {
4781: // Do the real work ...
4782: status = main_worker(argc, argv);
4783: }
4784: catch (int ex) {
4785: // EXIT(status) arrives here
4786: status = ex;
4787: }
4788: catch (const std::bad_alloc & /*ex*/) {
4789: // Memory allocation failed (also thrown by std::operator new)
4790: PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
4791: status = EXIT_NOMEM;
4792: }
4793: catch (const std::exception & ex) {
4794: // Other fatal errors
4795: PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
4796: status = EXIT_BADCODE;
4797: }
4798:
4799: if (is_initialized)
4800: status = Goodbye(status);
4801:
4802: #ifdef _WIN32
4803: daemon_winsvc_exitcode = status;
4804: #endif
4805: return status;
4806: }
4807:
4808:
4809: #ifdef _WIN32
4810: // Main function for Windows
4811: int main(int argc, char **argv){
4812: // Options for smartd windows service
4813: static const daemon_winsvc_options svc_opts = {
4814: "--service", // cmd_opt
4815: "smartd", "SmartD Service", // servicename, displayname
4816: // description
4817: "Controls and monitors storage devices using the Self-Monitoring, "
4818: "Analysis and Reporting Technology System (S.M.A.R.T.) "
4819: "built into ATA and SCSI Hard Drives. "
4820: PACKAGE_HOMEPAGE
4821: };
4822: // daemon_main() handles daemon and service specific commands
4823: // and starts smartd_main() direct, from a new process,
4824: // or via service control manager
4825: return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
4826: }
4827: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>