Annotation of embedaddon/smartmontools/smartd.cpp, revision 1.1.1.2
1.1 misho 1: /*
2: * Home page of code is: http://smartmontools.sourceforge.net
3: *
4: * Copyright (C) 2002-11 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5: * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6: * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
1.1.1.2 ! misho 7: * Copyright (C) 2008-12 Christian Franke <smartmontools-support@lists.sourceforge.net>
1.1 misho 8: *
9: * This program is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU General Public License as published by
11: * the Free Software Foundation; either version 2, or (at your option)
12: * any later version.
13: *
14: * You should have received a copy of the GNU General Public License
15: * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16: *
17: * This code was originally developed as a Senior Thesis by Michael Cornwell
18: * at the Concurrent Systems Laboratory (now part of the Storage Systems
19: * Research Center), Jack Baskin School of Engineering, University of
20: * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21: *
22: */
23:
24: #ifndef _GNU_SOURCE
25: // TODO: Why is this define necessary?
26: #define _GNU_SOURCE
27: #endif
28:
29: // unconditionally included files
30: #include <stdio.h>
31: #include <sys/types.h>
32: #include <sys/stat.h> // umask
33: #include <signal.h>
34: #include <fcntl.h>
35: #include <string.h>
36: #include <syslog.h>
37: #include <stdarg.h>
38: #include <stdlib.h>
39: #include <errno.h>
40: #include <time.h>
41: #include <limits.h>
42: #include <getopt.h>
43:
44: #include <stdexcept>
45: #include <string>
46: #include <vector>
47: #include <algorithm> // std::replace()
48:
49: // see which system files to conditionally include
50: #include "config.h"
51:
52: // conditionally included files
53: #ifndef _WIN32
54: #include <sys/wait.h>
55: #endif
56: #ifdef HAVE_UNISTD_H
57: #include <unistd.h>
58: #endif
59: #ifdef HAVE_NETDB_H
60: #include <netdb.h>
61: #endif
62:
63: #ifdef _WIN32
64: #ifdef _MSC_VER
65: #pragma warning(disable:4761) // "conversion supplied"
66: typedef unsigned short mode_t;
67: typedef int pid_t;
68: #endif
69: #include <io.h> // umask()
70: #include <process.h> // getpid()
71: #endif // _WIN32
72:
73: #ifdef __CYGWIN__
74: #include <io.h> // setmode()
75: #endif // __CYGWIN__
76:
77: #ifdef HAVE_LIBCAP_NG
78: #include <cap-ng.h>
79: #endif // LIBCAP_NG
80:
81: // locally included files
82: #include "int64.h"
83: #include "atacmds.h"
84: #include "dev_interface.h"
85: #include "knowndrives.h"
86: #include "scsicmds.h"
87: #include "utility.h"
88:
89: // This is for solaris, where signal() resets the handler to SIG_DFL
90: // after the first signal is caught.
91: #ifdef HAVE_SIGSET
92: #define SIGNALFN sigset
93: #else
94: #define SIGNALFN signal
95: #endif
96:
97: #ifdef _WIN32
98: #include "hostname_win32.h" // gethost/domainname()
99: #define HAVE_GETHOSTNAME 1
100: #define HAVE_GETDOMAINNAME 1
101: // fork()/signal()/initd simulation for native Windows
102: #include "daemon_win32.h" // daemon_main/detach/signal()
103: #undef SIGNALFN
104: #define SIGNALFN daemon_signal
105: #define strsignal daemon_strsignal
106: #define sleep daemon_sleep
107: // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
108: #define SIGQUIT SIGBREAK
109: #define SIGQUIT_KEYNAME "CONTROL-Break"
110: #else // _WIN32
111: #define SIGQUIT_KEYNAME "CONTROL-\\"
112: #endif // _WIN32
113:
114: #if defined (__SVR4) && defined (__sun)
115: extern "C" int getdomainname(char *, int); // no declaration in header files!
116: #endif
117:
118: #define ARGUSED(x) ((void)(x))
119:
1.1.1.2 ! misho 120: const char * smartd_cpp_cvsid = "$Id: smartd.cpp 3513 2012-02-15 21:57:21Z chrfranke $"
1.1 misho 121: CONFIG_H_CVSID;
122:
123: // smartd exit codes
124: #define EXIT_BADCMD 1 // command line did not parse
125: #define EXIT_BADCONF 2 // syntax error in config file
126: #define EXIT_STARTUP 3 // problem forking daemon
127: #define EXIT_PID 4 // problem creating pid file
128: #define EXIT_NOCONF 5 // config file does not exist
129: #define EXIT_READCONF 6 // config file exists but cannot be read
130:
131: #define EXIT_NOMEM 8 // out of memory
132: #define EXIT_BADCODE 10 // internal error - should NEVER happen
133:
134: #define EXIT_BADDEV 16 // we can't monitor this device
135: #define EXIT_NODEV 17 // no devices to monitor
136:
137: #define EXIT_SIGNAL 254 // abort on signal
138:
139:
140: // command-line: 1=debug mode, 2=print presets
141: static unsigned char debugmode = 0;
142:
143: // command-line: how long to sleep between checks
144: #define CHECKTIME 1800
145: static int checktime=CHECKTIME;
146:
147: // command-line: name of PID file (empty for no pid file)
148: static std::string pid_file;
149:
150: // command-line: path prefix of persistent state file, empty if no persistence.
151: static std::string state_path_prefix
152: #ifdef SMARTMONTOOLS_SAVESTATES
153: = SMARTMONTOOLS_SAVESTATES
154: #endif
155: ;
156:
157: // command-line: path prefix of attribute log file, empty if no logs.
158: static std::string attrlog_path_prefix
159: #ifdef SMARTMONTOOLS_ATTRIBUTELOG
160: = SMARTMONTOOLS_ATTRIBUTELOG
161: #endif
162: ;
163:
164: // configuration file name
165: static const char * configfile;
166: // configuration file "name" if read from stdin
167: static const char * const configfile_stdin = "<stdin>";
168: // path of alternate configuration file
169: static std::string configfile_alt;
170:
171: // command-line: when should we exit?
172: static int quit=0;
173:
174: // command-line; this is the default syslog(3) log facility to use.
175: static int facility=LOG_DAEMON;
176:
177: #ifndef _WIN32
178: // command-line: fork into background?
179: static bool do_fork=true;
180: #endif
181:
182: #ifdef HAVE_LIBCAP_NG
183: // command-line: enable capabilities?
184: static bool enable_capabilities = false;
185: #endif
186:
187: #if defined(_WIN32) || defined(__CYGWIN__)
188: // TODO: This smartctl only variable is also used in os_win32.cpp
189: unsigned char failuretest_permissive = 0;
190: #endif
191:
192: // set to one if we catch a USR1 (check devices now)
193: static volatile int caughtsigUSR1=0;
194:
195: #ifdef _WIN32
196: // set to one if we catch a USR2 (toggle debug mode)
197: static volatile int caughtsigUSR2=0;
198: #endif
199:
200: // set to one if we catch a HUP (reload config file). In debug mode,
201: // set to two, if we catch INT (also reload config file).
202: static volatile int caughtsigHUP=0;
203:
204: // set to signal value if we catch INT, QUIT, or TERM
205: static volatile int caughtsigEXIT=0;
206:
207: // This function prints either to stdout or to the syslog as needed.
208: static void PrintOut(int priority, const char *fmt, ...)
1.1.1.2 ! misho 209: __attribute_format_printf(2, 3);
1.1 misho 210:
211: // Attribute monitoring flags.
212: // See monitor_attr_flags below.
213: enum {
214: MONITOR_IGN_FAILUSE = 0x01,
215: MONITOR_IGNORE = 0x02,
216: MONITOR_RAW_PRINT = 0x04,
217: MONITOR_RAW = 0x08,
218: MONITOR_AS_CRIT = 0x10,
219: MONITOR_RAW_AS_CRIT = 0x20,
220: };
221:
222: // Array of flags for each attribute.
223: class attribute_flags
224: {
225: public:
226: attribute_flags()
227: { memset(m_flags, 0, sizeof(m_flags)); }
228:
229: bool is_set(int id, unsigned char flag) const
230: { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
231:
232: void set(int id, unsigned char flags)
233: {
234: if (0 < id && id < (int)sizeof(m_flags))
235: m_flags[id] |= flags;
236: }
237:
238: private:
239: unsigned char m_flags[256];
240: };
241:
242:
243: /// Configuration data for a device. Read from smartd.conf.
244: /// Supports copy & assignment and is compatible with STL containers.
245: struct dev_config
246: {
247: int lineno; // Line number of entry in file
248: std::string name; // Device name (with optional extra info)
249: std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
250: std::string dev_type; // Device type argument from -d directive, empty if none
251: std::string state_file; // Path of the persistent state file, empty if none
252: std::string attrlog_file; // Path of the persistent attrlog file, empty if none
253: bool smartcheck; // Check SMART status
254: bool usagefailed; // Check for failed Usage Attributes
255: bool prefail; // Track changes in Prefail Attributes
256: bool usage; // Track changes in Usage Attributes
257: bool selftest; // Monitor number of selftest errors
258: bool errorlog; // Monitor number of ATA errors
259: bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
260: bool offlinests; // Monitor changes in offline data collection status
1.1.1.2 ! misho 261: bool offlinests_ns; // Disable auto standby if in progress
1.1 misho 262: bool selfteststs; // Monitor changes in self-test execution status
1.1.1.2 ! misho 263: bool selfteststs_ns; // Disable auto standby if in progress
1.1 misho 264: bool permissive; // Ignore failed SMART commands
265: char autosave; // 1=disable, 2=enable Autosave Attributes
266: char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
267: unsigned char fix_firmwarebug; // FIX_*, see atacmds.h
268: bool ignorepresets; // Ignore database of -v options
269: bool showpresets; // Show database entry for this device
270: bool removable; // Device may disappear (not be present)
271: char powermode; // skip check, if disk in idle or standby mode
272: bool powerquiet; // skip powermode 'skipping checks' message
273: int powerskipmax; // how many times can be check skipped
274: unsigned char tempdiff; // Track Temperature changes >= this limit
275: unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
276: regular_expression test_regex; // Regex for scheduled testing
277:
278: // Configuration of email warning messages
279: std::string emailcmdline; // script to execute, empty if no messages
280: std::string emailaddress; // email address, or empty
281: unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
282: bool emailtest; // Send test email?
283:
284: // ATA ONLY
1.1.1.2 ! misho 285: int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
! 286: int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
! 287: int set_lookahead; // disable(-1), enable(1) read look-ahead
! 288: int set_standby; // set(1..255->0..254) standby timer
! 289: bool set_security_freeze; // Freeze ATA security
! 290: int set_wcache; // disable(-1), enable(1) write cache
! 291:
1.1 misho 292: bool sct_erc_set; // set SCT ERC to:
293: unsigned short sct_erc_readtime; // ERC read time (deciseconds)
294: unsigned short sct_erc_writetime; // ERC write time (deciseconds)
295:
296: unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
297: unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
298: bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
299: bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
300:
301: attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
302:
303: ata_vendor_attr_defs attribute_defs; // -v options
304:
305: dev_config();
306: };
307:
308: dev_config::dev_config()
309: : lineno(0),
310: smartcheck(false),
311: usagefailed(false),
312: prefail(false),
313: usage(false),
314: selftest(false),
315: errorlog(false),
316: xerrorlog(false),
1.1.1.2 ! misho 317: offlinests(false), offlinests_ns(false),
! 318: selfteststs(false), selfteststs_ns(false),
1.1 misho 319: permissive(false),
320: autosave(0),
321: autoofflinetest(0),
322: fix_firmwarebug(FIX_NOTSPECIFIED),
323: ignorepresets(false),
324: showpresets(false),
325: removable(false),
326: powermode(0),
327: powerquiet(false),
328: powerskipmax(0),
329: tempdiff(0),
330: tempinfo(0), tempcrit(0),
331: emailfreq(0),
332: emailtest(false),
1.1.1.2 ! misho 333: set_aam(0), set_apm(0),
! 334: set_lookahead(0),
! 335: set_standby(0),
! 336: set_security_freeze(false),
! 337: set_wcache(0),
1.1 misho 338: sct_erc_set(false),
339: sct_erc_readtime(0), sct_erc_writetime(0),
340: curr_pending_id(0), offl_pending_id(0),
341: curr_pending_incr(false), offl_pending_incr(false),
342: curr_pending_set(false), offl_pending_set(false)
343: {
344: }
345:
346:
347: // Number of allowed mail message types
348: static const int SMARTD_NMAIL = 13;
349: // Type for '-M test' mails (state not persistent)
350: static const int MAILTYPE_TEST = 0;
351: // TODO: Add const or enum for all mail types.
352:
353: struct mailinfo {
354: int logged;// number of times an email has been sent
355: time_t firstsent;// time first email was sent, as defined by time(2)
356: time_t lastsent; // time last email was sent, as defined by time(2)
357:
358: mailinfo()
359: : logged(0), firstsent(0), lastsent(0) { }
360: };
361:
362: /// Persistent state data for a device.
363: struct persistent_dev_state
364: {
365: unsigned char tempmin, tempmax; // Min/Max Temperatures
366:
367: unsigned char selflogcount; // total number of self-test errors
368: unsigned short selfloghour; // lifetime hours of last self-test error
369:
370: time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
371:
372: uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
373: uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
374:
375: mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
376:
377: // ATA ONLY
378: int ataerrorcount; // Total number of ATA errors
379:
380: // Persistent part of ata_smart_values:
381: struct ata_attribute {
382: unsigned char id;
383: unsigned char val;
384: unsigned char worst; // Byte needed for 'raw64' attribute only.
385: uint64_t raw;
386: unsigned char resvd;
387:
388: ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
389: };
390: ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
391:
392: persistent_dev_state();
393: };
394:
395: persistent_dev_state::persistent_dev_state()
396: : tempmin(0), tempmax(0),
397: selflogcount(0),
398: selfloghour(0),
399: scheduled_test_next_check(0),
400: selective_test_last_start(0),
401: selective_test_last_end(0),
402: ataerrorcount(0)
403: {
404: }
405:
406: /// Non-persistent state data for a device.
407: struct temp_dev_state
408: {
409: bool must_write; // true if persistent part should be written
410:
411: bool not_cap_offline; // true == not capable of offline testing
412: bool not_cap_conveyance;
413: bool not_cap_short;
414: bool not_cap_long;
415: bool not_cap_selective;
416:
417: unsigned char temperature; // last recorded Temperature (in Celsius)
418: time_t tempmin_delay; // time where Min Temperature tracking will start
419:
420: bool powermodefail; // true if power mode check failed
421: int powerskipcnt; // Number of checks skipped due to idle or standby mode
422:
423: // SCSI ONLY
424: unsigned char SmartPageSupported; // has log sense IE page (0x2f)
425: unsigned char TempPageSupported; // has log sense temperature page (0xd)
426: unsigned char SuppressReport; // minimize nuisance reports
427: unsigned char modese_len; // mode sense/select cmd len: 0 (don't
428: // know yet) 6 or 10
429:
430: // ATA ONLY
431: uint64_t num_sectors; // Number of sectors
432: ata_smart_values smartval; // SMART data
433: ata_smart_thresholds_pvt smartthres; // SMART thresholds
1.1.1.2 ! misho 434: bool offline_started; // true if offline data collection was started
! 435: bool selftest_started; // true if self-test was started
1.1 misho 436:
437: temp_dev_state();
438: };
439:
440: temp_dev_state::temp_dev_state()
441: : must_write(false),
442: not_cap_offline(false),
443: not_cap_conveyance(false),
444: not_cap_short(false),
445: not_cap_long(false),
446: not_cap_selective(false),
447: temperature(0),
448: tempmin_delay(0),
449: powermodefail(false),
450: powerskipcnt(0),
451: SmartPageSupported(false),
452: TempPageSupported(false),
453: SuppressReport(false),
454: modese_len(0),
1.1.1.2 ! misho 455: num_sectors(0),
! 456: offline_started(false),
! 457: selftest_started(false)
1.1 misho 458: {
459: memset(&smartval, 0, sizeof(smartval));
460: memset(&smartthres, 0, sizeof(smartthres));
461: }
462:
463: /// Runtime state data for a device.
464: struct dev_state
465: : public persistent_dev_state,
466: public temp_dev_state
467: {
468: void update_persistent_state();
469: void update_temp_state();
470: };
471:
472: /// Container for configuration info for each device.
473: typedef std::vector<dev_config> dev_config_vector;
474:
475: /// Container for state info for each device.
476: typedef std::vector<dev_state> dev_state_vector;
477:
478: // Copy ATA attributes to persistent state.
479: void dev_state::update_persistent_state()
480: {
481: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
482: const ata_smart_attribute & ta = smartval.vendor_attributes[i];
483: ata_attribute & pa = ata_attributes[i];
484: pa.id = ta.id;
485: if (ta.id == 0) {
486: pa.val = pa.worst = 0; pa.raw = 0;
487: continue;
488: }
489: pa.val = ta.current;
490: pa.worst = ta.worst;
491: pa.raw = ta.raw[0]
492: | ( ta.raw[1] << 8)
493: | ( ta.raw[2] << 16)
494: | ((uint64_t)ta.raw[3] << 24)
495: | ((uint64_t)ta.raw[4] << 32)
496: | ((uint64_t)ta.raw[5] << 40);
497: pa.resvd = ta.reserv;
498: }
499: }
500:
501: // Copy ATA from persistent to temp state.
502: void dev_state::update_temp_state()
503: {
504: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
505: const ata_attribute & pa = ata_attributes[i];
506: ata_smart_attribute & ta = smartval.vendor_attributes[i];
507: ta.id = pa.id;
508: if (pa.id == 0) {
509: ta.current = ta.worst = 0;
510: memset(ta.raw, 0, sizeof(ta.raw));
511: continue;
512: }
513: ta.current = pa.val;
514: ta.worst = pa.worst;
515: ta.raw[0] = (unsigned char) pa.raw;
516: ta.raw[1] = (unsigned char)(pa.raw >> 8);
517: ta.raw[2] = (unsigned char)(pa.raw >> 16);
518: ta.raw[3] = (unsigned char)(pa.raw >> 24);
519: ta.raw[4] = (unsigned char)(pa.raw >> 32);
520: ta.raw[5] = (unsigned char)(pa.raw >> 40);
521: ta.reserv = pa.resvd;
522: }
523: }
524:
525: // Parse a line from a state file.
526: static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
527: {
528: static const regular_expression regex(
529: "^ *"
530: "((temperature-min)" // (1 (2)
531: "|(temperature-max)" // (3)
532: "|(self-test-errors)" // (4)
533: "|(self-test-last-err-hour)" // (5)
534: "|(scheduled-test-next-check)" // (6)
535: "|(selective-test-last-start)" // (7)
536: "|(selective-test-last-end)" // (8)
537: "|(ata-error-count)" // (9)
538: "|(mail\\.([0-9]+)\\." // (10 (11)
539: "((count)" // (12 (13)
540: "|(first-sent-time)" // (14)
541: "|(last-sent-time)" // (15)
542: ")" // 12)
543: ")" // 10)
544: "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
545: "((id)" // (18 (19)
546: "|(val)" // (20)
547: "|(worst)" // (21)
548: "|(raw)" // (22)
549: "|(resvd)" // (23)
550: ")" // 18)
551: ")" // 16)
552: ")" // 1)
553: " *= *([0-9]+)[ \n]*$", // (24)
554: REG_EXTENDED
555: );
556:
557: const int nmatch = 1+24;
558: regmatch_t match[nmatch];
559: if (!regex.execute(line, nmatch, match))
560: return false;
561: if (match[nmatch-1].rm_so < 0)
562: return false;
563:
564: uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
565:
566: int m = 1;
567: if (match[++m].rm_so >= 0)
568: state.tempmin = (unsigned char)val;
569: else if (match[++m].rm_so >= 0)
570: state.tempmax = (unsigned char)val;
571: else if (match[++m].rm_so >= 0)
572: state.selflogcount = (unsigned char)val;
573: else if (match[++m].rm_so >= 0)
574: state.selfloghour = (unsigned short)val;
575: else if (match[++m].rm_so >= 0)
576: state.scheduled_test_next_check = (time_t)val;
577: else if (match[++m].rm_so >= 0)
578: state.selective_test_last_start = val;
579: else if (match[++m].rm_so >= 0)
580: state.selective_test_last_end = val;
581: else if (match[++m].rm_so >= 0)
582: state.ataerrorcount = (int)val;
583: else if (match[m+=2].rm_so >= 0) {
584: int i = atoi(line+match[m].rm_so);
585: if (!(0 <= i && i < SMARTD_NMAIL))
586: return false;
587: if (i == MAILTYPE_TEST) // Don't suppress test mails
588: return true;
589: if (match[m+=2].rm_so >= 0)
590: state.maillog[i].logged = (int)val;
591: else if (match[++m].rm_so >= 0)
592: state.maillog[i].firstsent = (time_t)val;
593: else if (match[++m].rm_so >= 0)
594: state.maillog[i].lastsent = (time_t)val;
595: else
596: return false;
597: }
598: else if (match[m+=5+1].rm_so >= 0) {
599: int i = atoi(line+match[m].rm_so);
600: if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
601: return false;
602: if (match[m+=2].rm_so >= 0)
603: state.ata_attributes[i].id = (unsigned char)val;
604: else if (match[++m].rm_so >= 0)
605: state.ata_attributes[i].val = (unsigned char)val;
606: else if (match[++m].rm_so >= 0)
607: state.ata_attributes[i].worst = (unsigned char)val;
608: else if (match[++m].rm_so >= 0)
609: state.ata_attributes[i].raw = val;
610: else if (match[++m].rm_so >= 0)
611: state.ata_attributes[i].resvd = (unsigned char)val;
612: else
613: return false;
614: }
615: else
616: return false;
617: return true;
618: }
619:
620: // Read a state file.
621: static bool read_dev_state(const char * path, persistent_dev_state & state)
622: {
623: stdio_file f(path, "r");
624: if (!f) {
625: if (errno != ENOENT)
626: pout("Cannot read state file \"%s\"\n", path);
627: return false;
628: }
629: #ifdef __CYGWIN__
630: setmode(fileno(f), O_TEXT); // Allow files with \r\n
631: #endif
632:
633: persistent_dev_state new_state;
634: int good = 0, bad = 0;
635: char line[256];
636: while (fgets(line, sizeof(line), f)) {
637: const char * s = line + strspn(line, " \t");
638: if (!*s || *s == '#')
639: continue;
640: if (!parse_dev_state_line(line, new_state))
641: bad++;
642: else
643: good++;
644: }
645:
646: if (bad) {
647: if (!good) {
648: pout("%s: format error\n", path);
649: return false;
650: }
651: pout("%s: %d invalid line(s) ignored\n", path, bad);
652: }
653:
654: // This sets the values missing in the file to 0.
655: state = new_state;
656: return true;
657: }
658:
659: static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
660: {
661: if (val)
662: fprintf(f, "%s = %"PRIu64"\n", name, val);
663: }
664:
665: static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
666: {
667: if (val)
668: fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
669: }
670:
671: // Write a state file
672: static bool write_dev_state(const char * path, const persistent_dev_state & state)
673: {
674: // Rename old "file" to "file~"
675: std::string pathbak = path; pathbak += '~';
676: unlink(pathbak.c_str());
677: rename(path, pathbak.c_str());
678:
679: stdio_file f(path, "w");
680: if (!f) {
681: pout("Cannot create state file \"%s\"\n", path);
682: return false;
683: }
684:
685: fprintf(f, "# smartd state file\n");
686: write_dev_state_line(f, "temperature-min", state.tempmin);
687: write_dev_state_line(f, "temperature-max", state.tempmax);
688: write_dev_state_line(f, "self-test-errors", state.selflogcount);
689: write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
690: write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
691: write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
692: write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
693:
694: int i;
695: for (i = 0; i < SMARTD_NMAIL; i++) {
696: if (i == MAILTYPE_TEST) // Don't suppress test mails
697: continue;
698: const mailinfo & mi = state.maillog[i];
699: if (!mi.logged)
700: continue;
701: write_dev_state_line(f, "mail", i, "count", mi.logged);
702: write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
703: write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
704: }
705:
706: // ATA ONLY
707: write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
708:
709: for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
710: const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
711: if (!pa.id)
712: continue;
713: write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
714: write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
715: write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
716: write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
717: write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
718: }
719:
720: return true;
721: }
722:
723: // Write to the attrlog file
724: static bool write_dev_attrlog(const char * path, const persistent_dev_state & state)
725: {
726: stdio_file f(path, "a");
727: if (!f) {
728: pout("Cannot create attribute log file \"%s\"\n", path);
729: return false;
730: }
731:
732: // ATA ONLY
733: time_t now = time(0);
734: struct tm * tms = gmtime(&now);
735: fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
736: 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
737: tms->tm_hour, tms->tm_min, tms->tm_sec);
738: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
739: const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
740: if (!pa.id)
741: continue;
742: fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
743: }
744: fprintf(f, "\n");
745:
746: return true;
747: }
748:
749: // Write all state files. If write_always is false, don't write
750: // unless must_write is set.
751: static void write_all_dev_states(const dev_config_vector & configs,
752: dev_state_vector & states,
753: bool write_always = true)
754: {
755: for (unsigned i = 0; i < states.size(); i++) {
756: const dev_config & cfg = configs.at(i);
757: if (cfg.state_file.empty())
758: continue;
759: dev_state & state = states[i];
760: if (!write_always && !state.must_write)
761: continue;
762: if (!write_dev_state(cfg.state_file.c_str(), state))
763: continue;
764: state.must_write = false;
765: if (write_always || debugmode)
766: PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
767: cfg.name.c_str(), cfg.state_file.c_str());
768: }
769: }
770:
771: // Write to all attrlog files
772: static void write_all_dev_attrlogs(const dev_config_vector & configs,
773: dev_state_vector & states)
774: {
775: for (unsigned i = 0; i < states.size(); i++) {
776: const dev_config & cfg = configs.at(i);
777: if (cfg.attrlog_file.empty())
778: continue;
779: dev_state & state = states[i];
780: write_dev_attrlog(cfg.attrlog_file.c_str(), state);
781: }
782: }
783:
784: // remove the PID file
785: static void RemovePidFile()
786: {
787: if (!pid_file.empty()) {
788: if (unlink(pid_file.c_str()))
789: PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
790: pid_file.c_str(), strerror(errno));
791: pid_file.clear();
792: }
793: return;
794: }
795:
796: extern "C" { // signal handlers require C-linkage
797:
798: // Note if we catch a SIGUSR1
799: static void USR1handler(int sig)
800: {
801: if (SIGUSR1==sig)
802: caughtsigUSR1=1;
803: return;
804: }
805:
806: #ifdef _WIN32
807: // Note if we catch a SIGUSR2
808: static void USR2handler(int sig)
809: {
810: if (SIGUSR2==sig)
811: caughtsigUSR2=1;
812: return;
813: }
814: #endif
815:
816: // Note if we catch a HUP (or INT in debug mode)
817: static void HUPhandler(int sig)
818: {
819: if (sig==SIGHUP)
820: caughtsigHUP=1;
821: else
822: caughtsigHUP=2;
823: return;
824: }
825:
826: // signal handler for TERM, QUIT, and INT (if not in debug mode)
827: static void sighandler(int sig)
828: {
829: if (!caughtsigEXIT)
830: caughtsigEXIT=sig;
831: return;
832: }
833:
834: } // extern "C"
835:
836: // Cleanup, print Goodbye message and remove pidfile
837: static int Goodbye(int status)
838: {
839: // delete PID file, if one was created
840: RemovePidFile();
841:
842: // if we are exiting because of a code bug, tell user
843: if (status==EXIT_BADCODE)
844: PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
845:
846: // and this should be the final output from smartd before it exits
847: PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
848:
849: return status;
850: }
851:
852: #define ENVLENGTH 1024
853:
854: // a replacement for setenv() which is not available on all platforms.
855: // Note that the string passed to putenv must not be freed or made
856: // invalid, since a pointer to it is kept by putenv(). This means that
857: // it must either be a static buffer or allocated off the heap. The
858: // string can be freed if the environment variable is redefined or
859: // deleted via another call to putenv(). So we keep these on the stack
860: // as long as the popen() call is underway.
861: static int exportenv(char *stackspace, const char *name, const char *value)
862: {
863: snprintf(stackspace,ENVLENGTH, "%s=%s", name, value);
864: return putenv(stackspace);
865: }
866:
867: static char *dnsdomain(const char *hostname)
868: {
869: char *p = NULL;
870: #ifdef HAVE_GETADDRINFO
871: static char canon_name[NI_MAXHOST];
872: struct addrinfo *info = NULL;
873: struct addrinfo hints;
874: int err;
875:
876: memset(&hints, 0, sizeof(hints));
877: hints.ai_flags = AI_CANONNAME;
878: if ((err = getaddrinfo(hostname, NULL, &hints, &info)) || (!info)) {
879: PrintOut(LOG_CRIT, "Error retrieving getaddrinfo(%s): %s\n", hostname, gai_strerror(err));
880: return NULL;
881: }
882: if (info->ai_canonname) {
883: strncpy(canon_name, info->ai_canonname, sizeof(canon_name));
884: canon_name[NI_MAXHOST - 1] = '\0';
885: p = canon_name;
886: if ((p = strchr(canon_name, '.')))
887: p++;
888: }
889: freeaddrinfo(info);
890: #elif HAVE_GETHOSTBYNAME
891: struct hostent *hp;
892: if ((hp = gethostbyname(hostname))) {
893: // Does this work if gethostbyname() returns an IPv6 name in
894: // colon/dot notation? [BA]
895: if ((p = strchr(hp->h_name, '.')))
896: p++; // skip "."
897: }
898: #else
899: ARGUSED(hostname);
900: #endif
901: return p;
902: }
903:
904: #define EBUFLEN 1024
905:
906: static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1.1.1.2 ! misho 907: __attribute_format_printf(4, 5);
1.1 misho 908:
909: // If either address or executable path is non-null then send and log
910: // a warning email, or execute executable
911: static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...){
912: char command[2048], message[256], hostname[256], domainname[256], additional[256],fullmessage[1024];
913: char original[256], further[256], nisdomain[256], subject[256],dates[DATEANDEPOCHLEN];
914: char environ_strings[11][ENVLENGTH];
915: time_t epoch;
916: va_list ap;
917: const int day=24*3600;
918: int days=0;
919: const char * const whichfail[]={
920: "EmailTest", // 0
921: "Health", // 1
922: "Usage", // 2
923: "SelfTest", // 3
924: "ErrorCount", // 4
925: "FailedHealthCheck", // 5
926: "FailedReadSmartData", // 6
927: "FailedReadSmartErrorLog", // 7
928: "FailedReadSmartSelfTestLog", // 8
929: "FailedOpenDevice", // 9
930: "CurrentPendingSector", // 10
931: "OfflineUncorrectableSector", // 11
932: "Temperature" // 12
933: };
934:
935: const char *unknown="[Unknown]";
936:
937: // See if user wants us to send mail
938: if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
939: return;
940:
941: std::string address = cfg.emailaddress;
942: const char * executable = cfg.emailcmdline.c_str();
943:
944: // which type of mail are we sending?
945: mailinfo * mail=(state.maillog)+which;
946:
947: // checks for sanity
948: if (cfg.emailfreq<1 || cfg.emailfreq>3) {
949: PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
950: return;
951: }
952: if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
953: PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
954: which, (int)sizeof(whichfail));
955: return;
956: }
957:
958: // Return if a single warning mail has been sent.
959: if ((cfg.emailfreq==1) && mail->logged)
960: return;
961:
962: // Return if this is an email test and one has already been sent.
963: if (which == 0 && mail->logged)
964: return;
965:
966: // To decide if to send mail, we need to know what time it is.
967: epoch=time(NULL);
968:
969: // Return if less than one day has gone by
970: if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
971: return;
972:
973: // Return if less than 2^(logged-1) days have gone by
974: if (cfg.emailfreq==3 && mail->logged) {
975: days=0x01<<(mail->logged-1);
976: days*=day;
977: if (epoch<(mail->lastsent+days))
978: return;
979: }
980:
981: #ifdef HAVE_LIBCAP_NG
982: if (enable_capabilities) {
983: PrintOut(LOG_ERR, "Sending a mail was supressed. "
984: "Mails can't be send when capabilites are enabled\n");
985: return;
986: }
987: #endif
988:
989: // record the time of this mail message, and the first mail message
990: if (!mail->logged)
991: mail->firstsent=epoch;
992: mail->lastsent=epoch;
993:
994: // get system host & domain names (not null terminated if length=MAX)
995: #ifdef HAVE_GETHOSTNAME
996: if (gethostname(hostname, 256))
997: strcpy(hostname, unknown);
998: else {
999: char *p=NULL;
1000: hostname[255]='\0';
1001: p = dnsdomain(hostname);
1002: if (p && *p) {
1003: strncpy(domainname, p, 255);
1004: domainname[255]='\0';
1005: } else
1006: strcpy(domainname, unknown);
1007: }
1008: #else
1009: strcpy(hostname, unknown);
1010: strcpy(domainname, unknown);
1011: #endif
1012:
1013: #ifdef HAVE_GETDOMAINNAME
1014: if (getdomainname(nisdomain, 256))
1015: strcpy(nisdomain, unknown);
1016: else
1017: nisdomain[255]='\0';
1018: #else
1019: strcpy(nisdomain, unknown);
1020: #endif
1021:
1022: // print warning string into message
1023: va_start(ap, fmt);
1024: vsnprintf(message, 256, fmt, ap);
1025: va_end(ap);
1026:
1027: // appropriate message about further information
1028: additional[0]=original[0]=further[0]='\0';
1029: if (which) {
1030: sprintf(further,"You can also use the smartctl utility for further investigation.\n");
1031:
1032: switch (cfg.emailfreq) {
1033: case 1:
1034: sprintf(additional,"No additional email messages about this problem will be sent.\n");
1035: break;
1036: case 2:
1037: sprintf(additional,"Another email message will be sent in 24 hours if the problem persists.\n");
1038: break;
1039: case 3:
1040: sprintf(additional,"Another email message will be sent in %d days if the problem persists\n",
1041: (0x01)<<mail->logged);
1042: break;
1043: }
1044: if (cfg.emailfreq>1 && mail->logged) {
1045: dateandtimezoneepoch(dates, mail->firstsent);
1046: sprintf(original,"The original email about this issue was sent at %s\n", dates);
1047: }
1048: }
1049:
1050: snprintf(subject, 256,"SMART error (%s) detected on host: %s", whichfail[which], hostname);
1051:
1052: // If the user has set cfg.emailcmdline, use that as mailer, else "mail" or "mailx".
1053: if (!*executable)
1054: #ifdef DEFAULT_MAILER
1055: executable = DEFAULT_MAILER ;
1056: #else
1057: #ifndef _WIN32
1058: executable = "mail";
1059: #else
1060: executable = "blat"; // http://blat.sourceforge.net/
1061: #endif
1062: #endif
1063:
1064: #ifndef _WIN32 // blat mailer needs comma
1065: // replace commas by spaces to separate recipients
1066: std::replace(address.begin(), address.end(), ',', ' ');
1067: #endif
1068: // Export information in environment variables that will be useful
1069: // for user scripts
1070: exportenv(environ_strings[0], "SMARTD_MAILER", executable);
1071: exportenv(environ_strings[1], "SMARTD_MESSAGE", message);
1072: exportenv(environ_strings[2], "SMARTD_SUBJECT", subject);
1073: dateandtimezoneepoch(dates, mail->firstsent);
1074: exportenv(environ_strings[3], "SMARTD_TFIRST", dates);
1075: snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1076: exportenv(environ_strings[4], "SMARTD_TFIRSTEPOCH", dates);
1077: exportenv(environ_strings[5], "SMARTD_FAILTYPE", whichfail[which]);
1078: if (!address.empty())
1079: exportenv(environ_strings[6], "SMARTD_ADDRESS", address.c_str());
1080: exportenv(environ_strings[7], "SMARTD_DEVICESTRING", cfg.name.c_str());
1081:
1082: // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1083: exportenv(environ_strings[8], "SMARTD_DEVICETYPE",
1084: (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1085: exportenv(environ_strings[9], "SMARTD_DEVICE", cfg.dev_name.c_str());
1086:
1087: snprintf(fullmessage, 1024,
1088: "This email was generated by the smartd daemon running on:\n\n"
1089: " host name: %s\n"
1090: " DNS domain: %s\n"
1091: " NIS domain: %s\n\n"
1092: "The following warning/error was logged by the smartd daemon:\n\n"
1093: "%s\n\n"
1094: "For details see host's SYSLOG.\n\n"
1095: "%s%s%s",
1096: hostname, domainname, nisdomain, message, further, original, additional);
1097: exportenv(environ_strings[10], "SMARTD_FULLMESSAGE", fullmessage);
1098:
1099: // now construct a command to send this as EMAIL
1100: #ifndef _WIN32
1101: if (!address.empty())
1102: snprintf(command, 2048,
1103: "$SMARTD_MAILER -s '%s' %s 2>&1 << \"ENDMAIL\"\n"
1104: "%sENDMAIL\n", subject, address.c_str(), fullmessage);
1105: else
1106: snprintf(command, 2048, "%s 2>&1", executable);
1107:
1108: // tell SYSLOG what we are about to do...
1109: const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1110: const char * newwarn = (which? "Warning via" : "Test of");
1111:
1112: PrintOut(LOG_INFO,"%s %s to %s ...\n",
1113: which?"Sending warning via":"Executing test of", executable, newadd);
1114:
1115: // issue the command to send mail or to run the user's executable
1116: errno=0;
1117: FILE * pfp;
1118: if (!(pfp=popen(command, "r")))
1119: // failed to popen() mail process
1120: PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1121: newwarn, executable, newadd, errno?strerror(errno):"");
1122: else {
1123: // pipe suceeded!
1124: int len, status;
1125: char buffer[EBUFLEN];
1126:
1127: // if unexpected output on stdout/stderr, null terminate, print, and flush
1128: if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1129: int count=0;
1130: int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1131: buffer[newlen]='\0';
1132: PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1133: newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1134:
1135: // flush pipe if needed
1136: while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1137: count++;
1138:
1139: // tell user that pipe was flushed, or that something is really wrong
1140: if (count && count<EBUFLEN)
1141: PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1142: newwarn, executable, newadd);
1143: else if (count)
1144: PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1145: newwarn, executable, newadd);
1146: }
1147:
1148: // if something went wrong with mail process, print warning
1149: errno=0;
1150: if (-1==(status=pclose(pfp)))
1151: PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1152: errno?strerror(errno):"");
1153: else {
1154: // mail process apparently succeeded. Check and report exit status
1155: int status8;
1156:
1157: if (WIFEXITED(status)) {
1158: // exited 'normally' (but perhaps with nonzero status)
1159: status8=WEXITSTATUS(status);
1160:
1161: if (status8>128)
1162: PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1163: newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1164: else if (status8)
1165: PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1166: newwarn, executable, newadd, status, status8);
1167: else
1168: PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1169: }
1170:
1171: if (WIFSIGNALED(status))
1172: PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1173: newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1174:
1175: // this branch is probably not possible. If subprocess is
1176: // stopped then pclose() should not return.
1177: if (WIFSTOPPED(status))
1178: PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1179: newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1180:
1181: }
1182: }
1183:
1184: #else // _WIN32
1185:
1186: // No "here-documents" on Windows, so must use separate commandline and stdin
1187: char stdinbuf[1024];
1188: command[0] = stdinbuf[0] = 0;
1189: int boxtype = -1, boxmsgoffs = 0;
1190: const char * newadd = "<nomailer>";
1191: if (!address.empty()) {
1192: // address "[sys]msgbox ..." => show warning (also) as [system modal ]messagebox
1193: char addr1[9+1+13] = ""; int n1 = -1, n2 = -1;
1194: if (sscanf(address.c_str(), "%9[a-z]%n,%n", addr1, &n1, &n2) == 1 && (n1 == (int)address.size() || n2 > 0)) {
1195: if (!strcmp(addr1, "msgbox"))
1196: boxtype = 0;
1197: else if (!strcmp(addr1, "sysmsgbox"))
1198: boxtype = 1;
1199: if (boxtype >= 0)
1200: address.erase(0, (n2 > n1 ? n2 : n1));
1201: }
1202:
1203: if (!address.empty()) {
1204: // Use "blat" parameter syntax (TODO: configure via -M for other mailers)
1205: snprintf(command, sizeof(command),
1206: "%s - -q -subject \"%s\" -to \"%s\"",
1207: executable, subject, address.c_str());
1208: newadd = address.c_str();
1209: }
1210:
1211: // Message for mail [0...] and messagebox [boxmsgoffs...]
1212: snprintf(stdinbuf, sizeof(stdinbuf),
1213: "This email was generated by the smartd daemon running on:\n\n"
1214: " host name: %s\n"
1215: " DNS domain: %s\n"
1216: // " NIS domain: %s\n"
1217: "\n",
1218: hostname, /*domainname, */ nisdomain);
1219: boxmsgoffs = strlen(stdinbuf);
1220: snprintf(stdinbuf+boxmsgoffs, sizeof(stdinbuf)-boxmsgoffs,
1221: "The following warning/error was logged by the smartd daemon:\n\n"
1222: "%s\n\n"
1223: "For details see the event log or log file of smartd.\n\n"
1224: "%s%s%s"
1225: "\n",
1226: message, further, original, additional);
1227: }
1228: else
1229: snprintf(command, sizeof(command), "%s", executable);
1230:
1231: const char * newwarn = (which ? "Warning via" : "Test of");
1232: if (boxtype >= 0) {
1233: // show message box
1234: daemon_messagebox(boxtype, subject, stdinbuf+boxmsgoffs);
1235: PrintOut(LOG_INFO,"%s message box\n", newwarn);
1236: }
1237: if (command[0]) {
1238: char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1239: int rc;
1240: // run command
1241: PrintOut(LOG_INFO,"%s %s to %s ...\n",
1242: (which?"Sending warning via":"Executing test of"), executable, newadd);
1243: rc = daemon_spawn(command, stdinbuf, strlen(stdinbuf), stdoutbuf, sizeof(stdoutbuf));
1244: if (rc >= 0 && stdoutbuf[0])
1245: PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1246: newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1247: if (rc != 0)
1248: PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1249: newwarn, executable, newadd, rc);
1250: else
1251: PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1252: }
1253:
1254: #endif // _WIN32
1255:
1256: // increment mail sent counter
1257: mail->logged++;
1258: }
1259:
1260: static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1.1.1.2 ! misho 1261: __attribute_format_printf(4, 5);
1.1 misho 1262:
1263: static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1264: {
1265: if (!(0 <= which && which < SMARTD_NMAIL))
1266: return;
1267:
1268: // Return if no mail sent yet
1269: mailinfo & mi = state.maillog[which];
1270: if (!mi.logged)
1271: return;
1272:
1273: // Format & print message
1274: char msg[256];
1275: va_list ap;
1276: va_start(ap, fmt);
1277: vsnprintf(msg, sizeof(msg), fmt, ap);
1278: va_end(ap);
1279:
1280: PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1281: msg, mi.logged, (mi.logged==1 ? "" : "s"));
1282:
1283: // Clear mail counter and timestamps
1284: mi = mailinfo();
1285: state.must_write = true;
1286: }
1287:
1288: #ifndef _WIN32
1289:
1290: // Output multiple lines via separate syslog(3) calls.
1291: static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1292: {
1293: char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1294: vsnprintf(buf, sizeof(buf), fmt, ap);
1295:
1296: for (char * p = buf, * q; p && *p; p = q) {
1297: if ((q = strchr(p, '\n')))
1298: *q++ = 0;
1299: if (*p)
1300: syslog(priority, "%s\n", p);
1301: }
1302: }
1303:
1304: #else // _WIN32
1305: // os_win32/syslog_win32.cpp supports multiple lines.
1306: #define vsyslog_lines vsyslog
1307: #endif // _WIN32
1308:
1309: // Printing function for watching ataprint commands, or losing them
1310: // [From GLIBC Manual: Since the prototype doesn't specify types for
1311: // optional arguments, in a call to a variadic function the default
1312: // argument promotions are performed on the optional argument
1313: // values. This means the objects of type char or short int (whether
1314: // signed or not) are promoted to either int or unsigned int, as
1315: // appropriate.]
1316: void pout(const char *fmt, ...){
1317: va_list ap;
1318:
1319: // get the correct time in syslog()
1320: FixGlibcTimeZoneBug();
1321: // initialize variable argument list
1322: va_start(ap,fmt);
1323: // in debugmode==1 mode we will print the output from the ataprint.o functions!
1324: if (debugmode && debugmode!=2)
1325: #ifdef _WIN32
1326: if (facility == LOG_LOCAL1) // logging to stdout
1327: vfprintf(stderr,fmt,ap);
1328: else
1329: #endif
1330: vprintf(fmt,ap);
1331: // in debugmode==2 mode we print output from knowndrives.o functions
1332: else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1333: openlog("smartd", LOG_PID, facility);
1334: vsyslog_lines(LOG_INFO, fmt, ap);
1335: closelog();
1336: }
1337: va_end(ap);
1338: fflush(NULL);
1339: return;
1340: }
1341:
1342: // This function prints either to stdout or to the syslog as needed.
1343: static void PrintOut(int priority, const char *fmt, ...){
1344: va_list ap;
1345:
1346: // get the correct time in syslog()
1347: FixGlibcTimeZoneBug();
1348: // initialize variable argument list
1349: va_start(ap,fmt);
1350: if (debugmode)
1351: #ifdef _WIN32
1352: if (facility == LOG_LOCAL1) // logging to stdout
1353: vfprintf(stderr,fmt,ap);
1354: else
1355: #endif
1356: vprintf(fmt,ap);
1357: else {
1358: openlog("smartd", LOG_PID, facility);
1359: vsyslog_lines(priority, fmt, ap);
1360: closelog();
1361: }
1362: va_end(ap);
1363: return;
1364: }
1365:
1366: // Used to warn users about invalid checksums. Called from atacmds.cpp.
1367: void checksumwarning(const char * string)
1368: {
1369: pout("Warning! %s error: invalid SMART checksum.\n", string);
1370: }
1371:
1372: #ifndef _WIN32
1373:
1374: // Wait for the pid file to show up, this makes sure a calling program knows
1375: // that the daemon is really up and running and has a pid to kill it
1376: static bool WaitForPidFile()
1377: {
1378: int waited, max_wait = 10;
1379: struct stat stat_buf;
1380:
1381: if (pid_file.empty() || debugmode)
1382: return true;
1383:
1384: for(waited = 0; waited < max_wait; ++waited) {
1385: if (!stat(pid_file.c_str(), &stat_buf)) {
1386: return true;
1387: } else
1388: sleep(1);
1389: }
1390: return false;
1391: }
1392:
1393: #endif // _WIN32
1394:
1395: // Forks new process, closes ALL file descriptors, redirects stdin,
1396: // stdout, and stderr. Not quite daemon(). See
1397: // http://www.linuxjournal.com/article/2335
1398: // for a good description of why we do things this way.
1399: static void DaemonInit()
1400: {
1401: #ifndef _WIN32
1402: pid_t pid;
1403: int i;
1404:
1405: // flush all buffered streams. Else we might get two copies of open
1406: // streams since both parent and child get copies of the buffers.
1407: fflush(NULL);
1408:
1409: if (do_fork) {
1410: if ((pid=fork()) < 0) {
1411: // unable to fork!
1412: PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1413: EXIT(EXIT_STARTUP);
1414: }
1415: else if (pid) {
1416: // we are the parent process, wait for pid file, then exit cleanly
1417: if(!WaitForPidFile()) {
1418: PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1419: EXIT(EXIT_STARTUP);
1420: } else
1421: EXIT(0);
1422: }
1423:
1424: // from here on, we are the child process.
1425: setsid();
1426:
1427: // Fork one more time to avoid any possibility of having terminals
1428: if ((pid=fork()) < 0) {
1429: // unable to fork!
1430: PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1431: EXIT(EXIT_STARTUP);
1432: }
1433: else if (pid)
1434: // we are the parent process -- exit cleanly
1435: EXIT(0);
1436:
1437: // Now we are the child's child...
1438: }
1439:
1440: // close any open file descriptors
1441: for (i=getdtablesize();i>=0;--i)
1442: close(i);
1443:
1444: #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1445:
1446: // redirect any IO attempts to /dev/null for stdin
1447: i=open("/dev/null",O_RDWR);
1448: if (i>=0) {
1449: // stdout
1450: NO_warn_unused_result(dup(i));
1451: // stderr
1452: NO_warn_unused_result(dup(i));
1453: };
1454: umask(0022);
1455: NO_warn_unused_result(chdir("/"));
1456:
1457: if (do_fork)
1458: PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1459:
1460: #else // _WIN32
1461:
1462: // No fork() on native Win32
1463: // Detach this process from console
1464: fflush(NULL);
1465: if (daemon_detach("smartd")) {
1466: PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1467: EXIT(EXIT_STARTUP);
1468: }
1469: // stdin/out/err now closed if not redirected
1470:
1471: #endif // _WIN32
1472: return;
1473: }
1474:
1475: // create a PID file containing the current process id
1476: static void WritePidFile()
1477: {
1478: if (!pid_file.empty()) {
1479: pid_t pid = getpid();
1480: mode_t old_umask;
1481: #ifndef __CYGWIN__
1482: old_umask = umask(0077); // rwx------
1483: #else
1484: // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1485: old_umask = umask(0033); // rwxr--r--
1486: #endif
1487:
1488: stdio_file f(pid_file.c_str(), "w");
1489: umask(old_umask);
1490: if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1491: PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1492: EXIT(EXIT_PID);
1493: }
1494: PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1495: }
1496: }
1497:
1498: // Prints header identifying version of code and home
1499: static void PrintHead()
1500: {
1501: PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1502: }
1503:
1504: // prints help info for configuration file Directives
1505: static void Directives()
1506: {
1507: PrintOut(LOG_INFO,
1508: "Configuration file (%s) Directives (after device name):\n"
1509: " -d TYPE Set the device type: %s, auto, removable\n"
1510: " -T TYPE Set the tolerance to one of: normal, permissive\n"
1511: " -o VAL Enable/disable automatic offline tests (on/off)\n"
1512: " -S VAL Enable/disable attribute autosave (on/off)\n"
1513: " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1514: " -H Monitor SMART Health Status, report if failed\n"
1515: " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1.1.1.2 ! misho 1516: " -l TYPE Monitor SMART log or self-test status:\n"
! 1517: " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1.1 misho 1518: " -l scterc,R,W Set SCT Error Recovery Control\n"
1.1.1.2 ! misho 1519: " -e Change device setting: aam,[N|off], apm,[N|off], lookahead,[on|off],\n"
! 1520: " security-freeze, standby,[N|off], wcache,[on|off]\n"
1.1 misho 1521: " -f Monitor 'Usage' Attributes, report failures\n"
1522: " -m ADD Send email warning to address ADD\n"
1523: " -M TYPE Modify email warning behavior (see man page)\n"
1524: " -p Report changes in 'Prefailure' Attributes\n"
1525: " -u Report changes in 'Usage' Attributes\n"
1526: " -t Equivalent to -p and -u Directives\n"
1527: " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1528: " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1529: " -i ID Ignore Attribute ID for -f Directive\n"
1530: " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1531: " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1532: " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1533: " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1534: " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1535: " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1536: " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1537: " -F TYPE Firmware bug workaround: none, samsung, samsung2, samsung3\n"
1538: " # Comment: text after a hash sign is ignored\n"
1539: " \\ Line continuation character\n"
1540: "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1541: "Use ID = 0 to turn off -C and/or -U Directives\n"
1542: "Example: /dev/hda -a\n",
1543: configfile, smi()->get_valid_dev_types_str().c_str());
1544: return;
1545: }
1546:
1547: /* Returns a pointer to a static string containing a formatted list of the valid
1548: arguments to the option opt or NULL on failure. */
1549: static const char *GetValidArgList(char opt)
1550: {
1551: switch (opt) {
1552: case 'A':
1553: case 's':
1554: return "<PATH_PREFIX>";
1555: case 'c':
1556: return "<FILE_NAME>, -";
1557: case 'l':
1558: return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1559: case 'q':
1560: return "nodev, errors, nodevstartup, never, onecheck, showtests";
1561: case 'r':
1562: return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1563: case 'B':
1564: case 'p':
1565: return "<FILE_NAME>";
1566: case 'i':
1567: return "<INTEGER_SECONDS>";
1568: default:
1569: return NULL;
1570: }
1571: }
1572:
1573: /* prints help information for command syntax */
1574: static void Usage()
1575: {
1576: PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1577: PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1578: PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1579: #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1580: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_ATTRIBUTELOG"MODEL-SERIAL.ata.csv]\n");
1581: #endif
1582: PrintOut(LOG_INFO,"\n");
1583: PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1584: PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1585: PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1586: #ifdef SMARTMONTOOLS_DRIVEDBDIR
1587: PrintOut(LOG_INFO,"\n");
1588: PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1589: #endif
1590: PrintOut(LOG_INFO,"]\n\n");
1591: PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1592: PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1593: PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1594: #ifdef HAVE_LIBCAP_NG
1595: PrintOut(LOG_INFO," -C, --capabilities\n");
1596: PrintOut(LOG_INFO," Use capabilities.\n"
1597: " Warning: Mail notification does not work when used.\n\n");
1598: #endif
1599: PrintOut(LOG_INFO," -d, --debug\n");
1600: PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1601: PrintOut(LOG_INFO," -D, --showdirectives\n");
1602: PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1603: PrintOut(LOG_INFO," -h, --help, --usage\n");
1604: PrintOut(LOG_INFO," Display this help and exit\n\n");
1605: PrintOut(LOG_INFO," -i N, --interval=N\n");
1606: PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1607: PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1608: #ifndef _WIN32
1609: PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1610: #else
1611: PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1612: #endif
1613: #ifndef _WIN32
1614: PrintOut(LOG_INFO," -n, --no-fork\n");
1615: PrintOut(LOG_INFO," Do not fork into background\n\n");
1616: #endif // _WIN32
1617: PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1618: PrintOut(LOG_INFO," Write PID file NAME\n\n");
1619: PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1620: PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1621: PrintOut(LOG_INFO," -r, --report=TYPE\n");
1622: PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1623: PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1624: PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1625: #ifdef SMARTMONTOOLS_SAVESTATES
1626: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1627: #endif
1628: PrintOut(LOG_INFO,"\n");
1629: #ifdef _WIN32
1630: PrintOut(LOG_INFO," --service\n");
1631: PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1632: PrintOut(LOG_INFO," smartd install [options]\n");
1633: PrintOut(LOG_INFO," Remove service with:\n");
1634: PrintOut(LOG_INFO," smartd remove\n\n");
1635: #endif // _WIN32
1636: PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1637: PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1638: }
1639:
1640: static int CloseDevice(smart_device * device, const char * name)
1641: {
1642: if (!device->close()){
1643: PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1644: return 1;
1645: }
1646: // device sucessfully closed
1647: return 0;
1648: }
1649:
1650: // return true if a char is not allowed in a state file name
1651: static bool not_allowed_in_filename(char c)
1652: {
1653: return !( ('0' <= c && c <= '9')
1654: || ('A' <= c && c <= 'Z')
1655: || ('a' <= c && c <= 'z'));
1656: }
1657:
1658: // Read error count from Summary or Extended Comprehensive SMART error log
1659: // Return -1 on error
1660: static int read_ata_error_count(ata_device * device, const char * name,
1661: unsigned char fix_firmwarebug, bool extended)
1662: {
1663: if (!extended) {
1664: ata_smart_errorlog log;
1665: if (ataReadErrorLog(device, &log, fix_firmwarebug)){
1666: PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1667: return -1;
1668: }
1669: return (log.error_log_pointer ? log.ata_error_count : 0);
1670: }
1671: else {
1672: ata_smart_exterrlog logx;
1673: if (!ataReadExtErrorLog(device, &logx, 1 /*first sector only*/)) {
1674: PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1675: return -1;
1676: }
1677: // Some disks use the reserved byte as index, see ataprint.cpp.
1678: return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1679: }
1680: }
1681:
1682: // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1683: // error count, and top bits are the power-on hours of the last error.
1684: static int SelfTestErrorCount(ata_device * device, const char * name,
1685: unsigned char fix_firmwarebug)
1686: {
1687: struct ata_smart_selftestlog log;
1688:
1689: if (ataReadSelfTestLog(device, &log, fix_firmwarebug)){
1690: PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1691: return -1;
1692: }
1693:
1694: // return current number of self-test errors
1695: return ataPrintSmartSelfTestlog(&log, false, fix_firmwarebug);
1696: }
1697:
1698: #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1699: #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1700:
1.1.1.2 ! misho 1701: // Check offline data collection status
! 1702: static inline bool is_offl_coll_in_progress(unsigned char status)
! 1703: {
! 1704: return ((status & 0x7f) == 0x03);
! 1705: }
! 1706:
! 1707: // Check self-test execution status
! 1708: static inline bool is_self_test_in_progress(unsigned char status)
! 1709: {
! 1710: return ((status >> 4) == 0xf);
! 1711: }
! 1712:
1.1 misho 1713: // Log offline data collection status
1714: static void log_offline_data_coll_status(const char * name, unsigned char status)
1715: {
1716: const char * msg;
1717: switch (status & 0x7f) {
1718: case 0x00: msg = "was never started"; break;
1719: case 0x02: msg = "was completed without error"; break;
1720: case 0x03: msg = "is in progress"; break;
1721: case 0x04: msg = "was suspended by an interrupting command from host"; break;
1722: case 0x05: msg = "was aborted by an interrupting command from host"; break;
1723: case 0x06: msg = "was aborted by the device with a fatal error"; break;
1724: default: msg = 0;
1725: }
1726:
1727: if (msg)
1728: PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1729: "Device: %s, offline data collection %s%s\n", name, msg,
1730: ((status & 0x80) ? " (auto:on)" : ""));
1731: else
1732: PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1733: name, status);
1734: }
1735:
1736: // Log self-test execution status
1737: static void log_self_test_exec_status(const char * name, unsigned char status)
1738: {
1739: const char * msg;
1740: switch (status >> 4) {
1741: case 0x0: msg = "completed without error"; break;
1742: case 0x1: msg = "was aborted by the host"; break;
1743: case 0x2: msg = "was interrupted by the host with a reset"; break;
1744: case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1745: case 0x4: msg = "completed with error (unknown test element)"; break;
1746: case 0x5: msg = "completed with error (electrical test element)"; break;
1747: case 0x6: msg = "completed with error (servo/seek test element)"; break;
1748: case 0x7: msg = "completed with error (read test element)"; break;
1749: case 0x8: msg = "completed with error (handling damage?)"; break;
1750: default: msg = 0;
1751: }
1752:
1753: if (msg)
1754: PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1755: "Device: %s, previous self-test %s\n", name, msg);
1756: else if ((status >> 4) == 0xf)
1757: PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1758: name, status & 0x0f);
1759: else
1760: PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1761: name, status);
1762: }
1763:
1764: // Check pending sector count id (-C, -U directives).
1765: static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1766: unsigned char id, const char * msg)
1767: {
1768: // Check attribute index
1769: int i = ata_find_attr_index(id, state.smartval);
1770: if (i < 0) {
1771: PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1772: cfg.name.c_str(), msg, id);
1773: return false;
1774: }
1775:
1776: // Check value
1777: uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1778: cfg.attribute_defs);
1779: if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1780: PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %"PRIu64" (0x%"PRIx64")\n",
1781: cfg.name.c_str(), msg, id, rawval, rawval);
1782: return false;
1783: }
1784:
1785: return true;
1786: }
1787:
1788: // Called by ATA/SCSIDeviceScan() after successful device check
1789: static void finish_device_scan(dev_config & cfg, dev_state & state)
1790: {
1791: // Set cfg.emailfreq if user hasn't set it
1792: if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1793: // Avoid that emails are suppressed forever due to state persistence
1794: if (cfg.state_file.empty())
1795: cfg.emailfreq = 1; // '-M once'
1796: else
1797: cfg.emailfreq = 2; // '-M daily'
1798: }
1799:
1800: // Start self-test regex check now if time was not read from state file
1801: if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1802: state.scheduled_test_next_check = time(0);
1803: }
1804:
1.1.1.2 ! misho 1805: // Common function to format result message for ATA setting
! 1806: static void format_set_result_msg(std::string & msg, const char * name, bool ok,
! 1807: int set_option = 0, bool has_value = false)
! 1808: {
! 1809: if (!msg.empty())
! 1810: msg += ", ";
! 1811: msg += name;
! 1812: if (!ok)
! 1813: msg += ":--";
! 1814: else if (set_option < 0)
! 1815: msg += ":off";
! 1816: else if (has_value)
! 1817: msg += strprintf(":%d", set_option-1);
! 1818: else if (set_option > 0)
! 1819: msg += ":on";
! 1820: }
! 1821:
1.1 misho 1822:
1823: // TODO: Add '-F swapid' directive
1824: const bool fix_swapped_id = false;
1825:
1826: // scan to see what ata devices there are, and if they support SMART
1827: static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1828: {
1829: int supported=0;
1830: struct ata_identify_device drive;
1831: const char *name = cfg.name.c_str();
1832: int retid;
1833:
1834: // Device must be open
1835:
1836: // Get drive identity structure
1837: if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1838: if (retid<0)
1839: // Unable to read Identity structure
1840: PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1841: else
1842: PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1843: name, packetdevicetype(retid-1));
1844: CloseDevice(atadev, name);
1845: return 2;
1846: }
1847:
1848: // Log drive identity and size
1849: char model[40+1], serial[20+1], firmware[8+1];
1850: ata_format_id_string(model, drive.model, sizeof(model)-1);
1851: ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1852: ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1853:
1854: ata_size_info sizes;
1855: ata_get_size_info(&drive, sizes);
1856: state.num_sectors = sizes.sectors;
1857:
1858: char wwn[30]; wwn[0] = 0;
1859: unsigned oui = 0; uint64_t unique_id = 0;
1860: int naa = ata_get_wwn(&drive, oui, unique_id);
1861: if (naa >= 0)
1862: snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09"PRIx64", ", naa, oui, unique_id);
1863:
1864: char cap[32];
1865: PrintOut(LOG_INFO, "Device: %s, %s, S/N:%s, %sFW:%s, %s\n", name,
1866: model, serial, wwn, firmware,
1867: format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1868:
1869: // Show if device in database, and use preset vendor attribute
1870: // options unless user has requested otherwise.
1871: if (cfg.ignorepresets)
1872: PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1873: else {
1874: // Apply vendor specific presets, print warning if present
1875: const drive_settings * dbentry = lookup_drive_apply_presets(
1876: &drive, cfg.attribute_defs, cfg.fix_firmwarebug);
1877: if (!dbentry)
1878: PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1879: else {
1880: PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1881: name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1882: if (*dbentry->warningmsg)
1883: PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1884: }
1885: }
1886:
1887: // Set default '-C 197[+]' if no '-C ID' is specified.
1888: if (!cfg.curr_pending_set)
1889: cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1890: // Set default '-U 198[+]' if no '-U ID' is specified.
1891: if (!cfg.offl_pending_set)
1892: cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1893:
1894: // If requested, show which presets would be used for this drive
1895: if (cfg.showpresets) {
1896: int savedebugmode=debugmode;
1897: PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1898: if (!debugmode)
1899: debugmode=2;
1900: show_presets(&drive);
1901: debugmode=savedebugmode;
1902: }
1903:
1904: // see if drive supports SMART
1905: supported=ataSmartSupport(&drive);
1906: if (supported!=1) {
1907: if (supported==0)
1908: // drive does NOT support SMART
1909: PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1910: else
1911: // can't tell if drive supports SMART
1912: PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1913:
1914: // should we proceed anyway?
1915: if (cfg.permissive) {
1916: PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1917: }
1918: else {
1919: PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1920: CloseDevice(atadev, name);
1921: return 2;
1922: }
1923: }
1924:
1925: if (ataEnableSmart(atadev)) {
1926: // Enable SMART command has failed
1927: PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1928: CloseDevice(atadev, name);
1929: return 2;
1930: }
1931:
1932: // disable device attribute autosave...
1933: if (cfg.autosave==1) {
1934: if (ataDisableAutoSave(atadev))
1935: PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1936: else
1937: PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1938: }
1939:
1940: // or enable device attribute autosave
1941: if (cfg.autosave==2) {
1942: if (ataEnableAutoSave(atadev))
1943: PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1944: else
1945: PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1946: }
1947:
1948: // capability check: SMART status
1949: if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1950: PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1951: cfg.smartcheck = false;
1952: }
1953:
1954: // capability check: Read smart values and thresholds. Note that
1955: // smart values are ALSO needed even if we ONLY want to know if the
1956: // device is self-test log or error-log capable! After ATA-5, this
1957: // information was ALSO reproduced in the IDENTIFY DEVICE response,
1958: // but sadly not for ATA-5. Sigh.
1959:
1960: // do we need to get SMART data?
1961: bool smart_val_ok = false;
1962: if ( cfg.autoofflinetest || cfg.selftest
1963: || cfg.errorlog || cfg.xerrorlog
1964: || cfg.offlinests || cfg.selfteststs
1965: || cfg.usagefailed || cfg.prefail || cfg.usage
1966: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1967: || cfg.curr_pending_id || cfg.offl_pending_id ) {
1968:
1969: if (ataReadSmartValues(atadev, &state.smartval)) {
1970: PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1971: cfg.usagefailed = cfg.prefail = cfg.usage = false;
1972: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1973: cfg.curr_pending_id = cfg.offl_pending_id = 0;
1974: }
1975: else {
1976: smart_val_ok = true;
1977: if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1978: PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1979: name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1980: cfg.usagefailed = false;
1981: // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1982: memset(&state.smartthres, 0, sizeof(state.smartthres));
1983: }
1984: }
1985:
1986: // see if the necessary Attribute is there to monitor offline or
1987: // current pending sectors or temperature
1988: if ( cfg.curr_pending_id
1989: && !check_pending_id(cfg, state, cfg.curr_pending_id,
1990: "Current_Pending_Sector"))
1991: cfg.curr_pending_id = 0;
1992:
1993: if ( cfg.offl_pending_id
1994: && !check_pending_id(cfg, state, cfg.offl_pending_id,
1995: "Offline_Uncorrectable"))
1996: cfg.offl_pending_id = 0;
1997:
1998: if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1999: && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
2000: PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", name);
2001: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2002: }
1.1.1.2 ! misho 2003:
! 2004: // Report ignored '-r' or '-R' directives
! 2005: for (int id = 1; id <= 255; id++) {
! 2006: if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
! 2007: char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
! 2008: const char * excl = (cfg.monitor_attr_flags.is_set(id,
! 2009: (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
! 2010:
! 2011: int idx = ata_find_attr_index(id, state.smartval);
! 2012: if (idx < 0)
! 2013: PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
! 2014: else {
! 2015: bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
! 2016: if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
! 2017: PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
! 2018: (prefail ? "Prefailure" : "Usage"), opt, id, excl);
! 2019: }
! 2020: }
! 2021: }
1.1 misho 2022: }
2023:
2024: // enable/disable automatic on-line testing
2025: if (cfg.autoofflinetest) {
2026: // is this an enable or disable request?
2027: const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2028: if (!smart_val_ok)
2029: PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2030: else {
2031: // if command appears unsupported, issue a warning...
2032: if (!isSupportAutomaticTimer(&state.smartval))
2033: PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2034: // ... but then try anyway
2035: if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2036: PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2037: else
2038: PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2039: }
2040: }
2041:
2042: // Read log directories if required for capability check
2043: ata_smart_log_directory smart_logdir, gp_logdir;
2044: bool smart_logdir_ok = false, gp_logdir_ok = false;
2045:
2046: if ( isGeneralPurposeLoggingCapable(&drive)
2047: && (cfg.errorlog || cfg.selftest) ) {
2048: if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2049: smart_logdir_ok = true;
2050: }
2051:
2052: if (cfg.xerrorlog) {
2053: if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2054: gp_logdir_ok = true;
2055: }
2056:
2057: // capability check: self-test-log
2058: state.selflogcount = 0; state.selfloghour = 0;
2059: if (cfg.selftest) {
2060: int retval;
2061: if (!( cfg.permissive
2062: || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2063: || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2064: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2065: cfg.selftest = false;
2066: }
2067: else if ((retval = SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug)) < 0) {
2068: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2069: cfg.selftest = false;
2070: }
2071: else {
2072: state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2073: state.selfloghour =SELFTEST_ERRORHOURS(retval);
2074: }
2075: }
2076:
2077: // capability check: ATA error log
2078: state.ataerrorcount = 0;
2079: if (cfg.errorlog) {
2080: int errcnt1;
2081: if (!( cfg.permissive
2082: || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2083: || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2084: PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2085: cfg.errorlog = false;
2086: }
2087: else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false)) < 0) {
2088: PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2089: cfg.errorlog = false;
2090: }
2091: else
2092: state.ataerrorcount = errcnt1;
2093: }
2094:
2095: if (cfg.xerrorlog) {
2096: int errcnt2;
2097: if (!(cfg.permissive || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors))) {
2098: PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2099: name);
2100: cfg.xerrorlog = false;
2101: }
2102: else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true)) < 0) {
2103: PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2104: cfg.xerrorlog = false;
2105: }
2106: else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2107: PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2108: name, state.ataerrorcount, errcnt2);
2109: // Record max error count
2110: if (errcnt2 > state.ataerrorcount)
2111: state.ataerrorcount = errcnt2;
2112: }
2113: else
2114: state.ataerrorcount = errcnt2;
2115: }
2116:
2117: // capability check: self-test and offline data collection status
2118: if (cfg.offlinests || cfg.selfteststs) {
2119: if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2120: if (cfg.offlinests)
2121: PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2122: if (cfg.selfteststs)
2123: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2124: cfg.offlinests = cfg.selfteststs = false;
2125: }
2126: }
2127:
2128: // capabilities check -- does it support powermode?
2129: if (cfg.powermode) {
2130: int powermode = ataCheckPowerMode(atadev);
2131:
2132: if (-1 == powermode) {
2133: PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2134: cfg.powermode=0;
2135: }
2136: else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
2137: PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2138: name, powermode);
2139: cfg.powermode=0;
2140: }
2141: }
2142:
1.1.1.2 ! misho 2143: // Apply ATA settings
! 2144: std::string msg;
! 2145:
! 2146: if (cfg.set_aam)
! 2147: format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
! 2148: ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
! 2149: ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
! 2150:
! 2151: if (cfg.set_apm)
! 2152: format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
! 2153: ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
! 2154: ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
! 2155:
! 2156: if (cfg.set_lookahead)
! 2157: format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
! 2158: (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
! 2159: cfg.set_lookahead);
! 2160:
! 2161: if (cfg.set_wcache)
! 2162: format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
! 2163: (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
! 2164:
! 2165: if (cfg.set_security_freeze)
! 2166: format_set_result_msg(msg, "Security freeze",
! 2167: ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
! 2168:
! 2169: if (cfg.set_standby)
! 2170: format_set_result_msg(msg, "Standby",
! 2171: ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
! 2172:
! 2173: // Report as one log entry
! 2174: if (!msg.empty())
! 2175: PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
! 2176:
1.1 misho 2177: // set SCT Error Recovery Control if requested
2178: if (cfg.sct_erc_set) {
2179: if (!isSCTErrorRecoveryControlCapable(&drive))
2180: PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2181: name);
2182: else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2183: || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2184: PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2185: else
2186: PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2187: name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2188: }
2189:
2190: // If no tests available or selected, return
2191: if (!( cfg.smartcheck || cfg.selftest
2192: || cfg.errorlog || cfg.xerrorlog
2193: || cfg.offlinests || cfg.selfteststs
2194: || cfg.usagefailed || cfg.prefail || cfg.usage
2195: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2196: CloseDevice(atadev, name);
2197: return 3;
2198: }
2199:
2200: // tell user we are registering device
2201: PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2202:
2203: // close file descriptor
2204: CloseDevice(atadev, name);
2205:
2206: if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2207: // Build file name for state file
2208: std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2209: std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2210: if (!state_path_prefix.empty()) {
2211: cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2212: // Read previous state
2213: if (read_dev_state(cfg.state_file.c_str(), state)) {
2214: PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2215: // Copy ATA attribute values to temp state
2216: state.update_temp_state();
2217: }
2218: }
2219: if (!attrlog_path_prefix.empty())
2220: cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2221: }
2222:
2223: finish_device_scan(cfg, state);
2224:
2225: return 0;
2226: }
2227:
2228: // on success, return 0. On failure, return >0. Never return <0,
2229: // please.
2230: static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2231: {
2232: int k, err, req_len, avail_len, version, len;
2233: const char *device = cfg.name.c_str();
2234: struct scsi_iec_mode_page iec;
2235: UINT8 tBuf[64];
2236: UINT8 inqBuf[96];
2237: UINT8 vpdBuf[252];
2238: char lu_id[64];
2239:
2240: // Device must be open
2241: memset(inqBuf, 0, 96);
2242: req_len = 36;
2243: if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2244: /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2245: req_len = 64;
2246: if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2247: PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2248: "skip device\n", device);
2249: return 2;
2250: }
2251: }
2252: version = inqBuf[2];
2253: avail_len = inqBuf[4] + 5;
2254: len = (avail_len < req_len) ? avail_len : req_len;
2255: if (len < 36) {
2256: PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2257: "skip device\n", device);
2258: return 2;
2259: }
1.1.1.2 ! misho 2260:
! 2261: int pdt = inqBuf[0] & 0x1f;
! 2262:
! 2263: if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
! 2264: (0xe == pdt))) {
! 2265: PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
! 2266: "skip\n", device, pdt);
! 2267: return 2;
! 2268: }
1.1 misho 2269: lu_id[0] = '\0';
2270: if ((version >= 0x4) && (version < 0x8)) {
2271: /* SPC-2 to SPC-5 */
2272: if (0 == (err = scsiInquiryVpd(scsidev, 0x83, vpdBuf, sizeof(vpdBuf)))) {
2273: len = vpdBuf[3];
2274: scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2275: }
2276: }
2277:
2278: unsigned int lb_size;
2279: char si_str[64];
2280: uint64_t capacity = scsiGetSize(scsidev, &lb_size);
2281:
2282: if (capacity)
2283: format_capacity(si_str, sizeof(si_str), capacity);
2284: else
2285: si_str[0] = '\0';
2286: PrintOut(LOG_INFO, "Device: %s, [%.8s %.16s %.4s]%s%s%s%s\n",
2287: device, (char *)&inqBuf[8], (char *)&inqBuf[16],
2288: (char *)&inqBuf[32],
2289: (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2290: (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2291:
2292: // check that device is ready for commands. IE stores its stuff on
2293: // the media.
2294: if ((err = scsiTestUnitReady(scsidev))) {
2295: if (SIMPLE_ERR_NOT_READY == err)
2296: PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2297: else if (SIMPLE_ERR_NO_MEDIUM == err)
2298: PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2299: else if (SIMPLE_ERR_BECOMING_READY == err)
2300: PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2301: else
2302: PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2303: CloseDevice(scsidev, device);
2304: return 2;
2305: }
2306:
2307: // Badly-conforming USB storage devices may fail this check.
2308: // The response to the following IE mode page fetch (current and
2309: // changeable values) is carefully examined. It has been found
2310: // that various USB devices that malform the response will lock up
2311: // if asked for a log page (e.g. temperature) so it is best to
2312: // bail out now.
2313: if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2314: state.modese_len = iec.modese_len;
2315: else if (SIMPLE_ERR_BAD_FIELD == err)
2316: ; /* continue since it is reasonable not to support IE mpage */
2317: else { /* any other error (including malformed response) unreasonable */
2318: PrintOut(LOG_INFO,
2319: "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2320: device, err);
2321: CloseDevice(scsidev, device);
2322: return 3;
2323: }
2324:
2325: // N.B. The following is passive (i.e. it doesn't attempt to turn on
2326: // smart if it is off). This may change to be the same as the ATA side.
2327: if (!scsi_IsExceptionControlEnabled(&iec)) {
2328: PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2329: "Try 'smartctl -s on %s' to turn on SMART features\n",
2330: device, device);
2331: CloseDevice(scsidev, device);
2332: return 3;
2333: }
2334:
2335: // Flag that certain log pages are supported (information may be
2336: // available from other sources).
2337: if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
2338: for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2339: switch (tBuf[k]) {
2340: case TEMPERATURE_LPAGE:
2341: state.TempPageSupported = 1;
2342: break;
2343: case IE_LPAGE:
2344: state.SmartPageSupported = 1;
2345: break;
2346: default:
2347: break;
2348: }
2349: }
2350: }
2351:
2352: // Check if scsiCheckIE() is going to work
2353: {
2354: UINT8 asc = 0;
2355: UINT8 ascq = 0;
2356: UINT8 currenttemp = 0;
2357: UINT8 triptemp = 0;
2358:
2359: if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2360: &asc, &ascq, ¤ttemp, &triptemp)) {
2361: PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2362: state.SuppressReport = 1;
2363: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2364: PrintOut(LOG_CRIT, "Device: %s, can't monitor Temperature, ignoring -W Directive\n", device);
2365: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2366: }
2367: }
2368: }
2369:
2370: // capability check: self-test-log
2371: if (cfg.selftest){
2372: int retval = scsiCountFailedSelfTests(scsidev, 0);
2373: if (retval<0) {
2374: // no self-test log, turn off monitoring
2375: PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2376: cfg.selftest = false;
2377: state.selflogcount = 0;
2378: state.selfloghour = 0;
2379: }
2380: else {
2381: // register starting values to watch for changes
2382: state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2383: state.selfloghour =SELFTEST_ERRORHOURS(retval);
2384: }
2385: }
2386:
2387: // disable autosave (set GLTSD bit)
2388: if (cfg.autosave==1){
2389: if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2390: PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2391: else
2392: PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2393: }
2394:
2395: // or enable autosave (clear GLTSD bit)
2396: if (cfg.autosave==2){
2397: if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2398: PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2399: else
2400: PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2401: }
2402:
2403: // tell user we are registering device
2404: PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2405:
2406: // TODO: Build file name for state file
2407: if (!state_path_prefix.empty()) {
2408: PrintOut(LOG_INFO, "Device: %s, persistence not yet supported for SCSI; ignoring -s option.\n", device);
2409: }
2410: // TODO: Build file name for attribute log file
2411: if (!attrlog_path_prefix.empty()) {
2412: PrintOut(LOG_INFO, "Device: %s, attribute log not yet supported for SCSI; ignoring -A option.\n", device);
2413: }
2414:
1.1.1.2 ! misho 2415: // Make sure that init_standby_check() ignores SCSI devices
! 2416: cfg.offlinests_ns = cfg.selfteststs_ns = false;
! 2417:
1.1 misho 2418: // close file descriptor
2419: CloseDevice(scsidev, device);
2420:
2421: finish_device_scan(cfg, state);
2422:
2423: return 0;
2424: }
2425:
2426: // If the self-test log has got more self-test errors (or more recent
2427: // self-test errors) recorded, then notify user.
2428: static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2429: {
2430: const char * name = cfg.name.c_str();
2431:
2432: if (newi<0)
2433: // command failed
2434: MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2435: else {
2436: reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2437:
2438: // old and new error counts
2439: int oldc=state.selflogcount;
2440: int newc=SELFTEST_ERRORCOUNT(newi);
2441:
2442: // old and new error timestamps in hours
2443: int oldh=state.selfloghour;
2444: int newh=SELFTEST_ERRORHOURS(newi);
2445:
2446: if (oldc<newc) {
2447: // increase in error count
2448: PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2449: name, oldc, newc);
2450: MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2451: name, oldc, newc);
2452: state.must_write = true;
2453: }
2454: else if (newc > 0 && oldh != newh) {
2455: // more recent error
2456: // a 'more recent' error might actually be a smaller hour number,
2457: // if the hour number has wrapped.
2458: // There's still a bug here. You might just happen to run a new test
2459: // exactly 32768 hours after the previous failure, and have run exactly
2460: // 20 tests between the two, in which case smartd will miss the
2461: // new failure.
2462: PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2463: name, newh);
2464: MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2465: name, newh);
2466: state.must_write = true;
2467: }
2468:
2469: // Print info if error entries have disappeared
2470: // or newer successful successful extended self-test exits
2471: if (oldc > newc) {
2472: PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2473: name, oldc, newc);
2474: if (newc == 0)
2475: reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2476: }
2477:
2478: // Needed since self-test error count may DECREASE. Hour might
2479: // also have changed.
2480: state.selflogcount= newc;
2481: state.selfloghour = newh;
2482: }
2483: return;
2484: }
2485:
2486: // Test types, ordered by priority.
2487: static const char test_type_chars[] = "LncrSCO";
2488: static const unsigned num_test_types = sizeof(test_type_chars)-1;
2489:
2490: // returns test type if time to do test of type testtype,
2491: // 0 if not time to do test.
2492: static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2493: {
2494: // check that self-testing has been requested
2495: if (cfg.test_regex.empty())
2496: return 0;
2497:
2498: // Exit if drive not capable of any test
2499: if ( state.not_cap_long && state.not_cap_short &&
2500: (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2501: return 0;
2502:
2503: // since we are about to call localtime(), be sure glibc is informed
2504: // of any timezone changes we make.
2505: if (!usetime)
2506: FixGlibcTimeZoneBug();
2507:
2508: // Is it time for next check?
2509: time_t now = (!usetime ? time(0) : usetime);
2510: if (now < state.scheduled_test_next_check)
2511: return 0;
2512:
2513: // Limit time check interval to 90 days
2514: if (state.scheduled_test_next_check + (3600L*24*90) < now)
2515: state.scheduled_test_next_check = now - (3600L*24*90);
2516:
2517: // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2518: char testtype = 0;
2519: time_t testtime = 0; int testhour = 0;
2520: int maxtest = num_test_types-1;
2521:
2522: for (time_t t = state.scheduled_test_next_check; ; ) {
2523: struct tm * tms = localtime(&t);
2524: // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2525: int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2526: for (int i = 0; i <= maxtest; i++) {
2527: // Skip if drive not capable of this test
2528: switch (test_type_chars[i]) {
2529: case 'L': if (state.not_cap_long) continue; break;
2530: case 'S': if (state.not_cap_short) continue; break;
2531: case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2532: case 'O': if (scsi || state.not_cap_offline) continue; break;
2533: case 'c': case 'n':
2534: case 'r': if (scsi || state.not_cap_selective) continue; break;
2535: default: continue;
2536: }
2537: // Try match of "T/MM/DD/d/HH"
2538: char pattern[16];
2539: snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2540: test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2541: if (cfg.test_regex.full_match(pattern)) {
2542: // Test found
2543: testtype = pattern[0];
2544: testtime = t; testhour = tms->tm_hour;
2545: // Limit further matches to higher priority self-tests
2546: maxtest = i-1;
2547: break;
2548: }
2549: }
2550: // Exit if no tests left or current time reached
2551: if (maxtest < 0)
2552: break;
2553: if (t >= now)
2554: break;
2555: // Check next hour
2556: if ((t += 3600) > now)
2557: t = now;
2558: }
2559:
2560: // Do next check not before next hour.
2561: struct tm * tmnow = localtime(&now);
2562: state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2563:
2564: if (testtype) {
2565: state.must_write = true;
2566: // Tell user if an old test was found.
2567: if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2568: char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2569: PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2570: cfg.name.c_str(), testtype, datebuf);
2571: }
2572: }
2573:
2574: return testtype;
2575: }
2576:
2577: // Print a list of future tests.
2578: static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2579: {
2580: unsigned numdev = configs.size();
2581: if (!numdev)
2582: return;
2583: std::vector<int> testcnts(numdev * num_test_types, 0);
2584:
2585: PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2586:
2587: // FixGlibcTimeZoneBug(); // done in PrintOut()
2588: time_t now = time(0);
2589: char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2590: dateandtimezoneepoch(datenow, now);
2591:
2592: long seconds;
2593: for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2594: // Check for each device whether a test will be run
2595: time_t testtime = now + seconds;
2596: for (unsigned i = 0; i < numdev; i++) {
2597: const dev_config & cfg = configs.at(i);
2598: dev_state & state = states.at(i);
2599: const char * p;
2600: char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2601: if (testtype && (p = strchr(test_type_chars, testtype))) {
2602: unsigned t = (p - test_type_chars);
2603: // Report at most 5 tests of each type
2604: if (++testcnts[i*num_test_types + t] <= 5) {
2605: dateandtimezoneepoch(date, testtime);
2606: PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2607: testcnts[i*num_test_types + t], testtype, date);
2608: }
2609: }
2610: }
2611: }
2612:
2613: // Report totals
2614: dateandtimezoneepoch(date, now+seconds);
2615: PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2616: for (unsigned i = 0; i < numdev; i++) {
2617: const dev_config & cfg = configs.at(i);
2618: bool scsi = devices.at(i)->is_scsi();
2619: for (unsigned t = 0; t < num_test_types; t++) {
2620: int cnt = testcnts[i*num_test_types + t];
2621: if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2622: continue;
2623: PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2624: cnt, (cnt==1?"":"s"), test_type_chars[t]);
2625: }
2626: }
2627:
2628: }
2629:
2630: // Return zero on success, nonzero on failure. Perform offline (background)
2631: // short or long (extended) self test on given scsi device.
2632: static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2633: {
2634: int retval = 0;
2635: const char *testname = 0;
2636: const char *name = cfg.name.c_str();
2637: int inProgress;
2638:
2639: if (scsiSelfTestInProgress(device, &inProgress)) {
2640: PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2641: state.not_cap_short = state.not_cap_long = true;
2642: return 1;
2643: }
2644:
2645: if (1 == inProgress) {
2646: PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2647: "progress.\n", name);
2648: return 1;
2649: }
2650:
2651: switch (testtype) {
2652: case 'S':
2653: testname = "Short Self";
2654: retval = scsiSmartShortSelfTest(device);
2655: break;
2656: case 'L':
2657: testname = "Long Self";
2658: retval = scsiSmartExtendSelfTest(device);
2659: break;
2660: }
2661: // If we can't do the test, exit
2662: if (NULL == testname) {
2663: PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2664: testtype);
2665: return 1;
2666: }
2667: if (retval) {
2668: if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2669: (SIMPLE_ERR_BAD_FIELD == retval)) {
2670: PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2671: testname);
2672: if ('L'==testtype)
2673: state.not_cap_long = true;
2674: else
2675: state.not_cap_short = true;
2676:
2677: return 1;
2678: }
2679: PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2680: testname, retval);
2681: return 1;
2682: }
2683:
2684: PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2685:
2686: return 0;
2687: }
2688:
2689: // Do an offline immediate or self-test. Return zero on success,
2690: // nonzero on failure.
2691: static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2692: {
2693: const char *name = cfg.name.c_str();
2694:
2695: // Read current smart data and check status/capability
2696: struct ata_smart_values data;
2697: if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2698: PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2699: return 1;
2700: }
2701:
2702: // Check for capability to do the test
2703: int dotest = -1, mode = 0;
2704: const char *testname = 0;
2705: switch (testtype) {
2706: case 'O':
2707: testname="Offline Immediate ";
2708: if (isSupportExecuteOfflineImmediate(&data))
2709: dotest=OFFLINE_FULL_SCAN;
2710: else
2711: state.not_cap_offline = true;
2712: break;
2713: case 'C':
2714: testname="Conveyance Self-";
2715: if (isSupportConveyanceSelfTest(&data))
2716: dotest=CONVEYANCE_SELF_TEST;
2717: else
2718: state.not_cap_conveyance = true;
2719: break;
2720: case 'S':
2721: testname="Short Self-";
2722: if (isSupportSelfTest(&data))
2723: dotest=SHORT_SELF_TEST;
2724: else
2725: state.not_cap_short = true;
2726: break;
2727: case 'L':
2728: testname="Long Self-";
2729: if (isSupportSelfTest(&data))
2730: dotest=EXTEND_SELF_TEST;
2731: else
2732: state.not_cap_long = true;
2733: break;
2734:
2735: case 'c': case 'n': case 'r':
2736: testname = "Selective Self-";
2737: if (isSupportSelectiveSelfTest(&data)) {
2738: dotest = SELECTIVE_SELF_TEST;
2739: switch (testtype) {
2740: case 'c': mode = SEL_CONT; break;
2741: case 'n': mode = SEL_NEXT; break;
2742: case 'r': mode = SEL_REDO; break;
2743: }
2744: }
2745: else
2746: state.not_cap_selective = true;
2747: break;
2748: }
2749:
2750: // If we can't do the test, exit
2751: if (dotest<0) {
2752: PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2753: return 1;
2754: }
2755:
2756: // If currently running a self-test, do not interrupt it to start another.
2757: if (15==(data.self_test_exec_status >> 4)) {
2758: if (cfg.fix_firmwarebug == FIX_SAMSUNG3 && data.self_test_exec_status == 0xf0) {
2759: PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2760: "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2761: } else {
2762: PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2763: name, testname, (int)(data.self_test_exec_status & 0x0f));
2764: return 1;
2765: }
2766: }
2767:
2768: if (dotest == SELECTIVE_SELF_TEST) {
2769: // Set test span
2770: ata_selective_selftest_args selargs, prev_args;
2771: selargs.num_spans = 1;
2772: selargs.span[0].mode = mode;
2773: prev_args.num_spans = 1;
2774: prev_args.span[0].start = state.selective_test_last_start;
2775: prev_args.span[0].end = state.selective_test_last_end;
2776: if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2777: PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2778: return 1;
2779: }
2780: uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2781: PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2782: name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2783: start, end, end - start + 1,
2784: (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2785: (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2786: state.selective_test_last_start = start;
2787: state.selective_test_last_end = end;
2788: }
2789:
2790: // execute the test, and return status
2791: int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2792: if (retval) {
2793: PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2794: return retval;
2795: }
2796:
1.1.1.2 ! misho 2797: // Report recent test start to do_disable_standby_check()
! 2798: // and force log of next test status
! 2799: if (testtype == 'O')
! 2800: state.offline_started = true;
! 2801: else
! 2802: state.selftest_started = true;
1.1 misho 2803:
2804: PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2805: return 0;
2806: }
2807:
2808: // Check pending sector count attribute values (-C, -U directives).
2809: static void check_pending(const dev_config & cfg, dev_state & state,
2810: unsigned char id, bool increase_only,
2811: const ata_smart_values & smartval,
2812: int mailtype, const char * msg)
2813: {
2814: // Find attribute index
2815: int i = ata_find_attr_index(id, smartval);
2816: if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2817: return;
2818:
2819: // No report if no sectors pending.
2820: uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2821: if (rawval == 0) {
2822: reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
2823: return;
2824: }
2825:
2826: // If attribute is not reset, report only sector count increases.
2827: uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2828: if (!(!increase_only || prev_rawval < rawval))
2829: return;
2830:
2831: // Format message.
2832: std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2833: if (prev_rawval > 0 && rawval != prev_rawval)
2834: s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2835:
2836: PrintOut(LOG_CRIT, "%s\n", s.c_str());
2837: MailWarning(cfg, state, mailtype, "%s\n", s.c_str());
2838: state.must_write = true;
2839: }
2840:
2841: // Format Temperature value
2842: static const char * fmt_temp(unsigned char x, char * buf)
2843: {
2844: if (!x) // unset
2845: strcpy(buf, "??");
2846: else
2847: sprintf(buf, "%u", x);
2848: return buf;
2849: }
2850:
2851: // Check Temperature limits
2852: static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2853: {
2854: if (!(0 < currtemp && currtemp < 255)) {
2855: PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2856: return;
2857: }
2858:
2859: // Update Max Temperature
2860: const char * minchg = "", * maxchg = "";
2861: if (currtemp > state.tempmax) {
2862: if (state.tempmax)
2863: maxchg = "!";
2864: state.tempmax = currtemp;
2865: state.must_write = true;
2866: }
2867:
2868: char buf[20];
2869: if (!state.temperature) {
2870: // First check
2871: if (!state.tempmin || currtemp < state.tempmin)
2872: // Delay Min Temperature update by ~ 30 minutes.
2873: state.tempmin_delay = time(0) + CHECKTIME - 60;
2874: PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2875: cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2876: if (triptemp)
2877: PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2878: state.temperature = currtemp;
2879: }
2880: else {
2881: if (state.tempmin_delay) {
2882: // End Min Temperature update delay if ...
2883: if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2884: || (state.tempmin_delay <= time(0))) { // or delay time is over.
2885: state.tempmin_delay = 0;
2886: if (!state.tempmin)
2887: state.tempmin = 255;
2888: }
2889: }
2890:
2891: // Update Min Temperature
2892: if (!state.tempmin_delay && currtemp < state.tempmin) {
2893: state.tempmin = currtemp;
2894: state.must_write = true;
2895: if (currtemp != state.temperature)
2896: minchg = "!";
2897: }
2898:
2899: // Track changes
2900: if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2901: PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2902: cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2903: state.temperature = currtemp;
2904: }
2905: }
2906:
2907: // Check limits
2908: if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2909: PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2910: cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2911: MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2912: cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2913: }
2914: else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2915: PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2916: cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2917: }
2918: else if (cfg.tempcrit) {
2919: unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
2920: if (currtemp < limit)
2921: reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
2922: }
2923: }
2924:
2925: // Check normalized and raw attribute values.
2926: static void check_attribute(const dev_config & cfg, dev_state & state,
2927: const ata_smart_attribute & attr,
2928: const ata_smart_attribute & prev,
2929: int attridx,
2930: const ata_smart_threshold_entry * thresholds)
2931: {
2932: // Check attribute and threshold
2933: ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
2934: if (attrstate == ATTRSTATE_NON_EXISTING)
2935: return;
2936:
2937: // If requested, check for usage attributes that have failed.
2938: if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2939: && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
2940: std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs);
2941: PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2942: MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2943: state.must_write = true;
2944: }
2945:
2946: // Return if we're not tracking this type of attribute
2947: bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2948: if (!( ( prefail && cfg.prefail)
2949: || (!prefail && cfg.usage )))
2950: return;
2951:
2952: // Return if '-I ID' was specified
2953: if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2954: return;
2955:
2956: // Issue warning if they don't have the same ID in all structures.
2957: if (attr.id != prev.id) {
2958: PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
2959: cfg.name.c_str(), attr.id, prev.id);
2960: return;
2961: }
2962:
2963: // Compare normalized values if valid.
2964: bool valchanged = false;
2965: if (attrstate > ATTRSTATE_NO_NORMVAL) {
2966: if (attr.current != prev.current)
2967: valchanged = true;
2968: }
2969:
2970: // Compare raw values if requested.
2971: bool rawchanged = false;
2972: if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2973: if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2974: != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2975: rawchanged = true;
2976: }
2977:
2978: // Return if no change
2979: if (!(valchanged || rawchanged))
2980: return;
2981:
2982: // Format value strings
2983: std::string currstr, prevstr;
2984: if (attrstate == ATTRSTATE_NO_NORMVAL) {
2985: // Print raw values only
2986: currstr = strprintf("%s (Raw)",
2987: ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2988: prevstr = strprintf("%s (Raw)",
2989: ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2990: }
2991: else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2992: // Print normalized and raw values
2993: currstr = strprintf("%d [Raw %s]", attr.current,
2994: ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2995: prevstr = strprintf("%d [Raw %s]", prev.current,
2996: ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2997: }
2998: else {
2999: // Print normalized values only
3000: currstr = strprintf("%d", attr.current);
3001: prevstr = strprintf("%d", prev.current);
3002: }
3003:
3004: // Format message
3005: std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3006: cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3007: ata_get_smart_attr_name(attr.id, cfg.attribute_defs).c_str(),
3008: prevstr.c_str(), currstr.c_str());
3009:
3010: // Report this change as critical ?
3011: if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3012: || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3013: PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3014: MailWarning(cfg, state, 2, "%s", msg.c_str());
3015: }
3016: else {
3017: PrintOut(LOG_INFO, "%s\n", msg.c_str());
3018: }
3019: state.must_write = true;
3020: }
3021:
3022:
3023: static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3024: bool firstpass, bool allow_selftests)
3025: {
3026: const char * name = cfg.name.c_str();
3027:
3028: // If user has asked, test the email warning system
3029: if (cfg.emailtest)
3030: MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3031:
3032: // if we can't open device, fail gracefully rather than hard --
3033: // perhaps the next time around we'll be able to open it. ATAPI
3034: // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
3035: // given (see linux cdrom driver).
3036: if (!atadev->open()) {
3037: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
3038: MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3039: return 1;
3040: }
3041: if (debugmode)
3042: PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
3043: reset_warning_mail(cfg, state, 9, "open device worked again");
3044:
3045: // user may have requested (with the -n Directive) to leave the disk
3046: // alone if it is in idle or sleeping mode. In this case check the
3047: // power mode and exit without check if needed
3048: if (cfg.powermode && !state.powermodefail) {
3049: int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3050: const char * mode = 0;
3051: if (0 <= powermode && powermode < 0xff) {
3052: // wait for possible spin up and check again
3053: int powermode2;
3054: sleep(5);
3055: powermode2 = ataCheckPowerMode(atadev);
3056: if (powermode2 > powermode)
3057: PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3058: powermode = powermode2;
3059: }
3060:
3061: switch (powermode){
3062: case -1:
3063: // SLEEP
3064: mode="SLEEP";
3065: if (cfg.powermode>=1)
3066: dontcheck=1;
3067: break;
3068: case 0:
3069: // STANDBY
3070: mode="STANDBY";
3071: if (cfg.powermode>=2)
3072: dontcheck=1;
3073: break;
3074: case 0x80:
3075: // IDLE
3076: mode="IDLE";
3077: if (cfg.powermode>=3)
3078: dontcheck=1;
3079: break;
3080: case 0xff:
3081: // ACTIVE/IDLE
3082: mode="ACTIVE or IDLE";
3083: break;
3084: default:
3085: // UNKNOWN
3086: PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3087: name, powermode);
3088: state.powermodefail = true;
3089: break;
3090: }
3091:
3092: // if we are going to skip a check, return now
3093: if (dontcheck){
3094: // skip at most powerskipmax checks
3095: if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3096: CloseDevice(atadev, name);
3097: if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
3098: PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3099: state.powerskipcnt++;
3100: return 0;
3101: }
3102: else {
3103: PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3104: name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3105: }
3106: state.powerskipcnt = 0;
3107: state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3108: }
3109: else if (state.powerskipcnt) {
3110: PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3111: name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3112: state.powerskipcnt = 0;
3113: state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3114: }
3115: }
3116:
3117: // check smart status
3118: if (cfg.smartcheck) {
3119: int status=ataSmartStatus2(atadev);
3120: if (status==-1){
3121: PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3122: MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3123: state.must_write = true;
3124: }
3125: else if (status==1){
3126: PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3127: MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3128: state.must_write = true;
3129: }
3130: }
3131:
3132: // Check everything that depends upon SMART Data (eg, Attribute values)
3133: if ( cfg.usagefailed || cfg.prefail || cfg.usage
3134: || cfg.curr_pending_id || cfg.offl_pending_id
3135: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3136: || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3137:
3138: // Read current attribute values.
3139: ata_smart_values curval;
3140: if (ataReadSmartValues(atadev, &curval)){
3141: PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3142: MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3143: state.must_write = true;
3144: }
3145: else {
3146: reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3147:
3148: // look for current or offline pending sectors
3149: if (cfg.curr_pending_id)
3150: check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3151: (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3152: : "Total unreadable (pending) sectors" ));
3153:
3154: if (cfg.offl_pending_id)
3155: check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3156: (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3157: : "Total offline uncorrectable sectors"));
3158:
3159: // check temperature limits
3160: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3161: CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3162:
3163: // look for failed usage attributes, or track usage or prefail attributes
3164: if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3165: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3166: check_attribute(cfg, state,
3167: curval.vendor_attributes[i],
3168: state.smartval.vendor_attributes[i],
3169: i, state.smartthres.thres_entries);
3170: }
3171: }
3172:
3173: // Log changes of offline data collection status
3174: if (cfg.offlinests) {
3175: if ( curval.offline_data_collection_status
3176: != state.smartval.offline_data_collection_status
1.1.1.2 ! misho 3177: || state.offline_started // test was started in previous call
1.1 misho 3178: || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3179: log_offline_data_coll_status(name, curval.offline_data_collection_status);
3180: }
3181:
3182: // Log changes of self-test execution status
3183: if (cfg.selfteststs) {
3184: if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
1.1.1.2 ! misho 3185: || state.selftest_started // test was started in previous call
1.1 misho 3186: || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3187: log_self_test_exec_status(name, curval.self_test_exec_status);
3188: }
3189:
3190: // Save the new values for the next time around
3191: state.smartval = curval;
3192: }
3193: }
1.1.1.2 ! misho 3194: state.offline_started = state.selftest_started = false;
1.1 misho 3195:
3196: // check if number of selftest errors has increased (note: may also DECREASE)
3197: if (cfg.selftest)
3198: CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.fix_firmwarebug));
3199:
3200: // check if number of ATA errors has increased
3201: if (cfg.errorlog || cfg.xerrorlog) {
3202:
3203: int errcnt1 = -1, errcnt2 = -1;
3204: if (cfg.errorlog)
3205: errcnt1 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, false);
3206: if (cfg.xerrorlog)
3207: errcnt2 = read_ata_error_count(atadev, name, cfg.fix_firmwarebug, true);
3208:
3209: // new number of errors is max of both logs
3210: int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3211:
3212: // did command fail?
3213: if (newc<0)
3214: // lack of PrintOut here is INTENTIONAL
3215: MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3216:
3217: // has error count increased?
3218: int oldc = state.ataerrorcount;
3219: if (newc>oldc){
3220: PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3221: name, oldc, newc);
3222: MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3223: name, oldc, newc);
3224: state.must_write = true;
3225: }
3226:
3227: if (newc>=0)
3228: state.ataerrorcount=newc;
3229: }
3230:
3231: // if the user has asked, and device is capable (or we're not yet
3232: // sure) check whether a self test should be done now.
3233: if (allow_selftests && !cfg.test_regex.empty()) {
3234: char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3235: if (testtype)
3236: DoATASelfTest(cfg, state, atadev, testtype);
3237: }
3238:
3239: // Don't leave device open -- the OS/user may want to access it
3240: // before the next smartd cycle!
3241: CloseDevice(atadev, name);
3242:
3243: // Copy ATA attribute values to persistent state
3244: state.update_persistent_state();
3245:
3246: return 0;
3247: }
3248:
3249: static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3250: {
3251: UINT8 asc, ascq;
3252: UINT8 currenttemp;
3253: UINT8 triptemp;
3254: const char * name = cfg.name.c_str();
3255: const char *cp;
3256:
3257: // If the user has asked for it, test the email warning system
3258: if (cfg.emailtest)
3259: MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3260:
3261: // if we can't open device, fail gracefully rather than hard --
3262: // perhaps the next time around we'll be able to open it
3263: if (!scsidev->open()) {
3264: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3265: MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3266: return 1;
3267: } else if (debugmode)
3268: PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
3269: currenttemp = 0;
3270: asc = 0;
3271: ascq = 0;
3272: if (!state.SuppressReport) {
3273: if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3274: &asc, &ascq, ¤ttemp, &triptemp)) {
3275: PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3276: name);
3277: MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3278: state.SuppressReport = 1;
3279: }
3280: }
3281: if (asc > 0) {
3282: cp = scsiGetIEString(asc, ascq);
3283: if (cp) {
3284: PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3285: MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3286: } else if (debugmode)
3287: PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3288: name, (int)asc, (int)ascq);
3289: } else if (debugmode)
3290: PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3291:
3292: // check temperature limits
3293: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3294: CheckTemperature(cfg, state, currenttemp, triptemp);
3295:
3296: // check if number of selftest errors has increased (note: may also DECREASE)
3297: if (cfg.selftest)
3298: CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3299:
3300: if (allow_selftests && !cfg.test_regex.empty()) {
3301: char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3302: if (testtype)
3303: DoSCSISelfTest(cfg, state, scsidev, testtype);
3304: }
3305: CloseDevice(scsidev, name);
3306: return 0;
3307: }
3308:
1.1.1.2 ! misho 3309: // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
! 3310: static int standby_disable_state = 0;
! 3311:
! 3312: static void init_disable_standby_check(dev_config_vector & configs)
! 3313: {
! 3314: // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
! 3315: bool sts1 = false, sts2 = false;
! 3316: for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
! 3317: const dev_config & cfg = configs.at(i);
! 3318: if (cfg.offlinests_ns)
! 3319: sts1 = true;
! 3320: if (cfg.selfteststs_ns)
! 3321: sts2 = true;
! 3322: }
! 3323:
! 3324: // Check for support of disable auto standby
! 3325: // Reenable standby if smartd.conf was reread
! 3326: if (sts1 || sts2 || standby_disable_state == 3) {
! 3327: if (!smi()->disable_system_auto_standby(false)) {
! 3328: if (standby_disable_state == 3)
! 3329: PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
! 3330: if (sts1 || sts2) {
! 3331: PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
! 3332: (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
! 3333: sts1 = sts2 = false;
! 3334: }
! 3335: }
! 3336: }
! 3337:
! 3338: standby_disable_state = (sts1 || sts2 ? 1 : 0);
! 3339: }
! 3340:
! 3341: static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
! 3342: {
! 3343: if (!standby_disable_state)
! 3344: return;
! 3345:
! 3346: // Check for just started or still running self-tests
! 3347: bool running = false;
! 3348: for (unsigned i = 0; i < configs.size() && !running; i++) {
! 3349: const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
! 3350:
! 3351: if ( ( cfg.offlinests_ns
! 3352: && (state.offline_started ||
! 3353: is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
! 3354: || ( cfg.selfteststs_ns
! 3355: && (state.selftest_started ||
! 3356: is_self_test_in_progress(state.smartval.self_test_exec_status))) )
! 3357: running = true;
! 3358: // state.offline/selftest_started will be reset after next logging of test status
! 3359: }
! 3360:
! 3361: // Disable/enable auto standby and log state changes
! 3362: if (!running) {
! 3363: if (standby_disable_state != 1) {
! 3364: if (!smi()->disable_system_auto_standby(false))
! 3365: PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
! 3366: smi()->get_errmsg());
! 3367: else
! 3368: PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
! 3369: standby_disable_state = 1;
! 3370: }
! 3371: }
! 3372: else if (!smi()->disable_system_auto_standby(true)) {
! 3373: if (standby_disable_state != 2) {
! 3374: PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
! 3375: smi()->get_errmsg());
! 3376: standby_disable_state = 2;
! 3377: }
! 3378: }
! 3379: else {
! 3380: if (standby_disable_state != 3) {
! 3381: PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
! 3382: standby_disable_state = 3;
! 3383: }
! 3384: }
! 3385: }
! 3386:
1.1 misho 3387: // Checks the SMART status of all ATA and SCSI devices
3388: static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3389: smart_device_list & devices, bool firstpass, bool allow_selftests)
3390: {
3391: for (unsigned i = 0; i < configs.size(); i++) {
3392: const dev_config & cfg = configs.at(i);
3393: dev_state & state = states.at(i);
3394: smart_device * dev = devices.at(i);
3395: if (dev->is_ata())
3396: ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3397: else if (dev->is_scsi())
3398: SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3399: }
1.1.1.2 ! misho 3400:
! 3401: do_disable_standby_check(configs, states);
1.1 misho 3402: }
3403:
3404: // Set if Initialize() was called
3405: static bool is_initialized = false;
3406:
3407: // Does initialization right after fork to daemon mode
3408: static void Initialize(time_t *wakeuptime)
3409: {
3410: // Call Goodbye() on exit
3411: is_initialized = true;
3412:
3413: // write PID file
3414: if (!debugmode)
3415: WritePidFile();
3416:
3417: // install signal handlers. On Solaris, can't use signal() because
3418: // it resets the handler to SIG_DFL after each call. So use sigset()
3419: // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3420:
3421: // normal and abnormal exit
3422: if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3423: SIGNALFN(SIGTERM, SIG_IGN);
3424: if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3425: SIGNALFN(SIGQUIT, SIG_IGN);
3426:
3427: // in debug mode, <CONTROL-C> ==> HUP
3428: if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3429: SIGNALFN(SIGINT, SIG_IGN);
3430:
3431: // Catch HUP and USR1
3432: if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3433: SIGNALFN(SIGHUP, SIG_IGN);
3434: if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3435: SIGNALFN(SIGUSR1, SIG_IGN);
3436: #ifdef _WIN32
3437: if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3438: SIGNALFN(SIGUSR2, SIG_IGN);
3439: #endif
3440:
3441: // initialize wakeup time to CURRENT time
3442: *wakeuptime=time(NULL);
3443:
3444: return;
3445: }
3446:
3447: #ifdef _WIN32
3448: // Toggle debug mode implemented for native windows only
3449: // (there is no easy way to reopen tty on *nix)
3450: static void ToggleDebugMode()
3451: {
3452: if (!debugmode) {
3453: PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3454: if (!daemon_enable_console("smartd [Debug]")) {
3455: debugmode = 1;
3456: daemon_signal(SIGINT, HUPhandler);
3457: PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3458: }
3459: else
3460: PrintOut(LOG_INFO,"enable console failed\n");
3461: }
3462: else if (debugmode == 1) {
3463: daemon_disable_console();
3464: debugmode = 0;
3465: daemon_signal(SIGINT, sighandler);
3466: PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3467: }
3468: else
3469: PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3470: }
3471: #endif
3472:
3473: static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3474: {
3475: // If past wake-up-time, compute next wake-up-time
3476: time_t timenow=time(NULL);
3477: while (wakeuptime<=timenow){
3478: int intervals=1+(timenow-wakeuptime)/checktime;
3479: wakeuptime+=intervals*checktime;
3480: }
3481:
3482: // sleep until we catch SIGUSR1 or have completed sleeping
3483: int addtime = 0;
3484: while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3485:
3486: // protect user again system clock being adjusted backwards
3487: if (wakeuptime>timenow+checktime){
3488: PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3489: wakeuptime=timenow+checktime;
3490: }
3491:
3492: // Exit sleep when time interval has expired or a signal is received
3493: sleep(wakeuptime+addtime-timenow);
3494:
3495: #ifdef _WIN32
3496: // toggle debug mode?
3497: if (caughtsigUSR2) {
3498: ToggleDebugMode();
3499: caughtsigUSR2 = 0;
3500: }
3501: #endif
3502:
3503: timenow=time(NULL);
3504:
3505: // Actual sleep time too long?
3506: if (!addtime && timenow > wakeuptime+60) {
3507: if (debugmode)
3508: PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3509: (int)(timenow-wakeuptime));
3510: // Wait another 20 seconds to avoid I/O errors during disk spin-up
3511: addtime = timenow-wakeuptime+20;
3512: // Use next wake-up-time if close
3513: int nextcheck = checktime - addtime % checktime;
3514: if (nextcheck <= 20)
3515: addtime += nextcheck;
3516: }
3517: }
3518:
3519: // if we caught a SIGUSR1 then print message and clear signal
3520: if (caughtsigUSR1){
3521: PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3522: wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3523: caughtsigUSR1=0;
3524: sigwakeup = true;
3525: }
3526:
3527: // return adjusted wakeuptime
3528: return wakeuptime;
3529: }
3530:
3531: // Print out a list of valid arguments for the Directive d
3532: static void printoutvaliddirectiveargs(int priority, char d)
3533: {
3534: switch (d) {
3535: case 'n':
3536: PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3537: break;
3538: case 's':
3539: PrintOut(priority, "valid_regular_expression");
3540: break;
3541: case 'd':
3542: PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3543: break;
3544: case 'T':
3545: PrintOut(priority, "normal, permissive");
3546: break;
3547: case 'o':
3548: case 'S':
3549: PrintOut(priority, "on, off");
3550: break;
3551: case 'l':
3552: PrintOut(priority, "error, selftest");
3553: break;
3554: case 'M':
3555: PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3556: break;
3557: case 'v':
3558: PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3559: break;
3560: case 'P':
3561: PrintOut(priority, "use, ignore, show, showall");
3562: break;
3563: case 'F':
3564: PrintOut(priority, "none, samsung, samsung2, samsung3");
1.1.1.2 ! misho 3565: case 'e':
! 3566: PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], "
! 3567: "security-freeze, standby,[N|off], wcache,[on|off]");
1.1 misho 3568: break;
3569: }
3570: }
3571:
3572: // exits with an error message, or returns integer value of token
3573: static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3574: int min, int max, char * suffix = 0)
3575: {
3576: // make sure argument is there
3577: if (!arg) {
3578: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3579: cfgfile, lineno, name, token, min, max);
3580: return -1;
3581: }
3582:
3583: // get argument value (base 10), check that it's integer, and in-range
3584: char *endptr;
3585: int val = strtol(arg,&endptr,10);
3586:
3587: // optional suffix present?
3588: if (suffix) {
3589: if (!strcmp(endptr, suffix))
3590: endptr += strlen(suffix);
3591: else
3592: *suffix = 0;
3593: }
3594:
3595: if (!(!*endptr && min <= val && val <= max)) {
3596: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3597: cfgfile, lineno, name, token, arg, min, max);
3598: return -1;
3599: }
3600:
3601: // all is well; return value
3602: return val;
3603: }
3604:
3605:
3606: // Get 1-3 small integer(s) for '-W' directive
3607: static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3608: unsigned char *val1, unsigned char *val2, unsigned char *val3)
3609: {
3610: unsigned v1 = 0, v2 = 0, v3 = 0;
3611: int n1 = -1, n2 = -1, n3 = -1, len;
3612: if (!arg) {
3613: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3614: cfgfile, lineno, name, token);
3615: return -1;
3616: }
3617:
3618: len = strlen(arg);
3619: if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3620: && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3621: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3622: cfgfile, lineno, name, token, arg);
3623: return -1;
3624: }
3625: *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3626: return 0;
3627: }
3628:
3629:
3630: // This function returns 1 if it has correctly parsed one token (and
3631: // any arguments), else zero if no tokens remain. It returns -1 if an
3632: // error was encountered.
3633: static int ParseToken(char * token, dev_config & cfg)
3634: {
3635: char sym;
3636: const char * name = cfg.name.c_str();
3637: int lineno=cfg.lineno;
3638: const char *delim = " \n\t";
3639: int badarg = 0;
3640: int missingarg = 0;
3641: const char *arg = 0;
3642:
3643: // is the rest of the line a comment
3644: if (*token=='#')
3645: return 1;
3646:
3647: // is the token not recognized?
3648: if (*token!='-' || strlen(token)!=2) {
3649: PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3650: configfile, lineno, name, token);
3651: PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3652: return -1;
3653: }
3654:
3655: // token we will be parsing:
3656: sym=token[1];
3657:
3658: // parse the token and swallow its argument
3659: int val;
3660: char plus[] = "+", excl[] = "!";
3661:
3662: switch (sym) {
3663: case 'C':
3664: // monitor current pending sector count (default 197)
3665: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3666: return -1;
3667: cfg.curr_pending_id = (unsigned char)val;
3668: cfg.curr_pending_incr = (*plus == '+');
3669: cfg.curr_pending_set = true;
3670: break;
3671: case 'U':
3672: // monitor offline uncorrectable sectors (default 198)
3673: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3674: return -1;
3675: cfg.offl_pending_id = (unsigned char)val;
3676: cfg.offl_pending_incr = (*plus == '+');
3677: cfg.offl_pending_set = true;
3678: break;
3679: case 'T':
3680: // Set tolerance level for SMART command failures
3681: if ((arg = strtok(NULL, delim)) == NULL) {
3682: missingarg = 1;
3683: } else if (!strcmp(arg, "normal")) {
3684: // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3685: // not on failure of an optional S.M.A.R.T. command.
3686: // This is the default so we don't need to actually do anything here.
3687: cfg.permissive = false;
3688: } else if (!strcmp(arg, "permissive")) {
3689: // Permissive mode; ignore errors from Mandatory SMART commands
3690: cfg.permissive = true;
3691: } else {
3692: badarg = 1;
3693: }
3694: break;
3695: case 'd':
3696: // specify the device type
3697: if ((arg = strtok(NULL, delim)) == NULL) {
3698: missingarg = 1;
3699: } else if (!strcmp(arg, "removable")) {
3700: cfg.removable = true;
3701: } else if (!strcmp(arg, "auto")) {
3702: cfg.dev_type = "";
3703: } else {
3704: cfg.dev_type = arg;
3705: }
3706: break;
3707: case 'F':
3708: // fix firmware bug
3709: if ((arg = strtok(NULL, delim)) == NULL) {
3710: missingarg = 1;
3711: } else if (!strcmp(arg, "none")) {
3712: cfg.fix_firmwarebug = FIX_NONE;
3713: } else if (!strcmp(arg, "samsung")) {
3714: cfg.fix_firmwarebug = FIX_SAMSUNG;
3715: } else if (!strcmp(arg, "samsung2")) {
3716: cfg.fix_firmwarebug = FIX_SAMSUNG2;
3717: } else if (!strcmp(arg, "samsung3")) {
3718: cfg.fix_firmwarebug = FIX_SAMSUNG3;
3719: } else {
3720: badarg = 1;
3721: }
3722: break;
3723: case 'H':
3724: // check SMART status
3725: cfg.smartcheck = true;
3726: break;
3727: case 'f':
3728: // check for failure of usage attributes
3729: cfg.usagefailed = true;
3730: break;
3731: case 't':
3732: // track changes in all vendor attributes
3733: cfg.prefail = true;
3734: cfg.usage = true;
3735: break;
3736: case 'p':
3737: // track changes in prefail vendor attributes
3738: cfg.prefail = true;
3739: break;
3740: case 'u':
3741: // track changes in usage vendor attributes
3742: cfg.usage = true;
3743: break;
3744: case 'l':
3745: // track changes in SMART logs
3746: if ((arg = strtok(NULL, delim)) == NULL) {
3747: missingarg = 1;
3748: } else if (!strcmp(arg, "selftest")) {
3749: // track changes in self-test log
3750: cfg.selftest = true;
3751: } else if (!strcmp(arg, "error")) {
3752: // track changes in ATA error log
3753: cfg.errorlog = true;
3754: } else if (!strcmp(arg, "xerror")) {
3755: // track changes in Extended Comprehensive SMART error log
3756: cfg.xerrorlog = true;
3757: } else if (!strcmp(arg, "offlinests")) {
3758: // track changes in offline data collection status
3759: cfg.offlinests = true;
1.1.1.2 ! misho 3760: } else if (!strcmp(arg, "offlinests,ns")) {
! 3761: // track changes in offline data collection status, disable auto standby
! 3762: cfg.offlinests = cfg.offlinests_ns = true;
1.1 misho 3763: } else if (!strcmp(arg, "selfteststs")) {
3764: // track changes in self-test execution status
3765: cfg.selfteststs = true;
1.1.1.2 ! misho 3766: } else if (!strcmp(arg, "selfteststs,ns")) {
! 3767: // track changes in self-test execution status, disable auto standby
! 3768: cfg.selfteststs = cfg.selfteststs_ns = true;
1.1 misho 3769: } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
3770: // set SCT Error Recovery Control
3771: unsigned rt = ~0, wt = ~0; int nc = -1;
3772: sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
3773: if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
3774: cfg.sct_erc_set = true;
3775: cfg.sct_erc_readtime = rt;
3776: cfg.sct_erc_writetime = wt;
3777: }
3778: else
3779: badarg = 1;
3780: } else {
3781: badarg = 1;
3782: }
3783: break;
3784: case 'a':
3785: // monitor everything
3786: cfg.smartcheck = true;
3787: cfg.prefail = true;
3788: cfg.usagefailed = true;
3789: cfg.usage = true;
3790: cfg.selftest = true;
3791: cfg.errorlog = true;
3792: cfg.selfteststs = true;
3793: break;
3794: case 'o':
3795: // automatic offline testing enable/disable
3796: if ((arg = strtok(NULL, delim)) == NULL) {
3797: missingarg = 1;
3798: } else if (!strcmp(arg, "on")) {
3799: cfg.autoofflinetest = 2;
3800: } else if (!strcmp(arg, "off")) {
3801: cfg.autoofflinetest = 1;
3802: } else {
3803: badarg = 1;
3804: }
3805: break;
3806: case 'n':
3807: // skip disk check if in idle or standby mode
3808: if (!(arg = strtok(NULL, delim)))
3809: missingarg = 1;
3810: else {
3811: char *endptr = NULL;
3812: char *next = strchr(const_cast<char*>(arg), ',');
3813:
3814: cfg.powerquiet = false;
3815: cfg.powerskipmax = 0;
3816:
3817: if (next!=NULL) *next='\0';
3818: if (!strcmp(arg, "never"))
3819: cfg.powermode = 0;
3820: else if (!strcmp(arg, "sleep"))
3821: cfg.powermode = 1;
3822: else if (!strcmp(arg, "standby"))
3823: cfg.powermode = 2;
3824: else if (!strcmp(arg, "idle"))
3825: cfg.powermode = 3;
3826: else
3827: badarg = 1;
3828:
3829: // if optional arguments are present
3830: if (!badarg && next!=NULL) {
3831: next++;
3832: cfg.powerskipmax = strtol(next, &endptr, 10);
3833: if (endptr == next)
3834: cfg.powerskipmax = 0;
3835: else {
3836: next = endptr + (*endptr != '\0');
3837: if (cfg.powerskipmax <= 0)
3838: badarg = 1;
3839: }
3840: if (*next != '\0') {
3841: if (!strcmp("q", next))
3842: cfg.powerquiet = true;
3843: else {
3844: badarg = 1;
3845: }
3846: }
3847: }
3848: }
3849: break;
3850: case 'S':
3851: // automatic attribute autosave enable/disable
3852: if ((arg = strtok(NULL, delim)) == NULL) {
3853: missingarg = 1;
3854: } else if (!strcmp(arg, "on")) {
3855: cfg.autosave = 2;
3856: } else if (!strcmp(arg, "off")) {
3857: cfg.autosave = 1;
3858: } else {
3859: badarg = 1;
3860: }
3861: break;
3862: case 's':
3863: // warn user, and delete any previously given -s REGEXP Directives
3864: if (!cfg.test_regex.empty()){
3865: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3866: configfile, lineno, name, cfg.test_regex.get_pattern());
3867: cfg.test_regex = regular_expression();
3868: }
3869: // check for missing argument
3870: if (!(arg = strtok(NULL, delim))) {
3871: missingarg = 1;
3872: }
3873: // Compile regex
3874: else {
3875: if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3876: // not a valid regular expression!
3877: PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3878: configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3879: return -1;
3880: }
3881: }
3882: // Do a bit of sanity checking and warn user if we think that
3883: // their regexp is "strange". User probably confused about shell
3884: // glob(3) syntax versus regular expression syntax regexp(7).
3885: if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3886: PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3887: configfile, lineno, name, val+1, arg[val], arg);
3888: break;
3889: case 'm':
3890: // send email to address that follows
3891: if (!(arg = strtok(NULL,delim)))
3892: missingarg = 1;
3893: else {
3894: if (!cfg.emailaddress.empty())
3895: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3896: configfile, lineno, name, cfg.emailaddress.c_str());
3897: cfg.emailaddress = arg;
3898: }
3899: break;
3900: case 'M':
3901: // email warning options
3902: if (!(arg = strtok(NULL, delim)))
3903: missingarg = 1;
3904: else if (!strcmp(arg, "once"))
3905: cfg.emailfreq = 1;
3906: else if (!strcmp(arg, "daily"))
3907: cfg.emailfreq = 2;
3908: else if (!strcmp(arg, "diminishing"))
3909: cfg.emailfreq = 3;
3910: else if (!strcmp(arg, "test"))
3911: cfg.emailtest = 1;
3912: else if (!strcmp(arg, "exec")) {
3913: // Get the next argument (the command line)
3914: if (!(arg = strtok(NULL, delim))) {
3915: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3916: configfile, lineno, name, token);
3917: return -1;
3918: }
3919: // Free the last cmd line given if any, and copy new one
3920: if (!cfg.emailcmdline.empty())
3921: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3922: configfile, lineno, name, cfg.emailcmdline.c_str());
3923: cfg.emailcmdline = arg;
3924: }
3925: else
3926: badarg = 1;
3927: break;
3928: case 'i':
3929: // ignore failure of usage attribute
3930: if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3931: return -1;
3932: cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3933: break;
3934: case 'I':
3935: // ignore attribute for tracking purposes
3936: if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3937: return -1;
3938: cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3939: break;
3940: case 'r':
3941: // print raw value when tracking
3942: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3943: return -1;
3944: cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3945: if (*excl == '!') // attribute change is critical
3946: cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3947: break;
3948: case 'R':
3949: // track changes in raw value (forces printing of raw value)
3950: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3951: return -1;
3952: cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3953: if (*excl == '!') // raw value change is critical
3954: cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3955: break;
3956: case 'W':
3957: // track Temperature
3958: if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
3959: &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
3960: return -1;
3961: break;
3962: case 'v':
3963: // non-default vendor-specific attribute meaning
3964: if (!(arg=strtok(NULL,delim))) {
3965: missingarg = 1;
3966: } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
3967: badarg = 1;
3968: }
3969: break;
3970: case 'P':
3971: // Define use of drive-specific presets.
3972: if (!(arg = strtok(NULL, delim))) {
3973: missingarg = 1;
3974: } else if (!strcmp(arg, "use")) {
3975: cfg.ignorepresets = false;
3976: } else if (!strcmp(arg, "ignore")) {
3977: cfg.ignorepresets = true;
3978: } else if (!strcmp(arg, "show")) {
3979: cfg.showpresets = true;
3980: } else if (!strcmp(arg, "showall")) {
3981: showallpresets();
3982: } else {
3983: badarg = 1;
3984: }
3985: break;
1.1.1.2 ! misho 3986:
! 3987: case 'e':
! 3988: // Various ATA settings
! 3989: if (!(arg = strtok(NULL, delim))) {
! 3990: missingarg = true;
! 3991: }
! 3992: else {
! 3993: char arg2[16+1]; unsigned val;
! 3994: int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
! 3995: if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
! 3996: && (n1 == len || n2 > 0)) {
! 3997: bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
! 3998: bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
! 3999: if (n3 != len)
! 4000: val = ~0U;
! 4001:
! 4002: if (!strcmp(arg2, "aam")) {
! 4003: if (off)
! 4004: cfg.set_aam = -1;
! 4005: else if (val <= 254)
! 4006: cfg.set_aam = val + 1;
! 4007: else
! 4008: badarg = true;
! 4009: }
! 4010: else if (!strcmp(arg2, "apm")) {
! 4011: if (off)
! 4012: cfg.set_apm = -1;
! 4013: else if (1 <= val && val <= 254)
! 4014: cfg.set_apm = val + 1;
! 4015: else
! 4016: badarg = true;
! 4017: }
! 4018: else if (!strcmp(arg2, "lookahead")) {
! 4019: if (off)
! 4020: cfg.set_lookahead = -1;
! 4021: else if (on)
! 4022: cfg.set_lookahead = 1;
! 4023: else
! 4024: badarg = true;
! 4025: }
! 4026: else if (!strcmp(arg, "security-freeze")) {
! 4027: cfg.set_security_freeze = true;
! 4028: }
! 4029: else if (!strcmp(arg2, "standby")) {
! 4030: if (off)
! 4031: cfg.set_standby = 0 + 1;
! 4032: else if (val <= 255)
! 4033: cfg.set_standby = val + 1;
! 4034: else
! 4035: badarg = true;
! 4036: }
! 4037: else if (!strcmp(arg2, "wcache")) {
! 4038: if (off)
! 4039: cfg.set_wcache = -1;
! 4040: else if (on)
! 4041: cfg.set_wcache = 1;
! 4042: else
! 4043: badarg = true;
! 4044: }
! 4045: else
! 4046: badarg = true;
! 4047: }
! 4048: else
! 4049: badarg = true;
! 4050: }
! 4051: break;
! 4052:
1.1 misho 4053: default:
4054: // Directive not recognized
4055: PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4056: configfile, lineno, name, token);
4057: Directives();
4058: return -1;
4059: }
4060: if (missingarg) {
4061: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4062: configfile, lineno, name, token);
4063: }
4064: if (badarg) {
4065: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4066: configfile, lineno, name, token, arg);
4067: }
4068: if (missingarg || badarg) {
4069: PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4070: printoutvaliddirectiveargs(LOG_CRIT, sym);
4071: PrintOut(LOG_CRIT, "\n");
4072: return -1;
4073: }
4074:
4075: return 1;
4076: }
4077:
4078: // Scan directive for configuration file
4079: #define SCANDIRECTIVE "DEVICESCAN"
4080:
4081: // This is the routine that adds things to the conf_entries list.
4082: //
4083: // Return values are:
4084: // 1: parsed a normal line
1.1.1.2 ! misho 4085: // 0: found DEFAULT setting or comment or blank line
1.1 misho 4086: // -1: found SCANDIRECTIVE line
4087: // -2: found an error
4088: //
4089: // Note: this routine modifies *line from the caller!
1.1.1.2 ! misho 4090: static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf, int lineno, /*const*/ char * line)
1.1 misho 4091: {
4092: const char *delim = " \n\t";
4093:
4094: // get first token: device name. If a comment, skip line
1.1.1.2 ! misho 4095: const char * name = strtok(line, delim);
! 4096: if (!name || *name == '#')
1.1 misho 4097: return 0;
4098:
1.1.1.2 ! misho 4099: // Check device name for DEFAULT or DEVICESCAN
! 4100: int retval;
! 4101: if (!strcmp("DEFAULT", name)) {
! 4102: retval = 0;
! 4103: // Restart with empty defaults
! 4104: default_conf = dev_config();
1.1 misho 4105: }
1.1.1.2 ! misho 4106: else {
! 4107: retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
! 4108: // Init new entry with current defaults
! 4109: conf_entries.push_back(default_conf);
! 4110: }
! 4111: dev_config & cfg = (retval ? conf_entries.back() : default_conf);
1.1 misho 4112:
4113: cfg.name = name; // Later replaced by dev->get_info().info_name
4114: cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
1.1.1.2 ! misho 4115: cfg.lineno = lineno;
1.1 misho 4116:
4117: // parse tokens one at a time from the file.
1.1.1.2 ! misho 4118: while (char * token = strtok(0, delim)) {
! 4119: int rc = ParseToken(token, cfg);
! 4120: if (rc < 0)
1.1 misho 4121: // error found on the line
4122: return -2;
1.1.1.2 ! misho 4123:
! 4124: if (rc == 0)
! 4125: // No tokens left
! 4126: break;
! 4127:
! 4128: // PrintOut(LOG_INFO,"Parsed token %s\n",token);
1.1 misho 4129: }
1.1.1.2 ! misho 4130:
! 4131: // Don't perform checks below for DEFAULT entries
! 4132: if (retval == 0)
! 4133: return retval;
! 4134:
1.1 misho 4135: // If NO monitoring directives are set, then set all of them.
4136: if (!( cfg.smartcheck || cfg.selftest
4137: || cfg.errorlog || cfg.xerrorlog
4138: || cfg.offlinests || cfg.selfteststs
4139: || cfg.usagefailed || cfg.prefail || cfg.usage
4140: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4141:
4142: PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4143: cfg.name.c_str(), cfg.lineno, configfile);
4144:
4145: cfg.smartcheck = true;
4146: cfg.usagefailed = true;
4147: cfg.prefail = true;
4148: cfg.usage = true;
4149: cfg.selftest = true;
4150: cfg.errorlog = true;
4151: cfg.selfteststs = true;
4152: }
4153:
4154: // additional sanity check. Has user set -M options without -m?
4155: if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4156: PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4157: cfg.name.c_str(), cfg.lineno, configfile);
4158: return -2;
4159: }
4160:
4161: // has the user has set <nomailer>?
4162: if (cfg.emailaddress == "<nomailer>") {
4163: // check that -M exec is also set
4164: if (cfg.emailcmdline.empty()){
4165: PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4166: cfg.name.c_str(), cfg.lineno, configfile);
4167: return -2;
4168: }
4169: // From here on the sign of <nomailer> is address.empty() and !cfg.emailcmdline.empty()
4170: cfg.emailaddress.clear();
4171: }
4172:
1.1.1.2 ! misho 4173: return retval;
1.1 misho 4174: }
4175:
4176: // Parses a configuration file. Return values are:
4177: // N=>0: found N entries
4178: // -1: syntax error in config file
4179: // -2: config file does not exist
4180: // -3: config file exists but cannot be read
4181: //
4182: // In the case where the return value is 0, there are three
4183: // possiblities:
4184: // Empty configuration file ==> conf_entries.empty()
4185: // No configuration file ==> conf_entries[0].lineno == 0
4186: // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4187: static int ParseConfigFile(dev_config_vector & conf_entries)
4188: {
4189: // maximum line length in configuration file
4190: const int MAXLINELEN = 256;
4191: // maximum length of a continued line in configuration file
4192: const int MAXCONTLINE = 1023;
4193:
4194: stdio_file f;
4195: // Open config file, if it exists and is not <stdin>
4196: if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4197: if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4198: // file exists but we can't read it or it should exist due to '-c' option
4199: int ret = (errno!=ENOENT ? -3 : -2);
4200: PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4201: strerror(errno),configfile);
4202: return ret;
4203: }
4204: }
4205: else // read from stdin ('-c -' option)
4206: f.open(stdin);
4207:
1.1.1.2 ! misho 4208: // Start with empty defaults
! 4209: dev_config default_conf;
! 4210:
1.1 misho 4211: // No configuration file found -- use fake one
4212: int entry = 0;
4213: if (!f) {
4214: char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
4215:
1.1.1.2 ! misho 4216: if (ParseConfigLine(conf_entries, default_conf, 0, fakeconfig) != -1)
1.1 misho 4217: throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
4218: return 0;
4219: }
4220:
4221: #ifdef __CYGWIN__
4222: setmode(fileno(f), O_TEXT); // Allow files with \r\n
4223: #endif
4224:
4225: // configuration file exists
4226: PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4227:
4228: // parse config file line by line
4229: int lineno = 1, cont = 0, contlineno = 0;
4230: char line[MAXLINELEN+2];
4231: char fullline[MAXCONTLINE+1];
4232:
4233: for (;;) {
4234: int len=0,scandevice;
4235: char *lastslash;
4236: char *comment;
4237: char *code;
4238:
4239: // make debugging simpler
4240: memset(line,0,sizeof(line));
4241:
4242: // get a line
4243: code=fgets(line, MAXLINELEN+2, f);
4244:
4245: // are we at the end of the file?
4246: if (!code){
4247: if (cont) {
1.1.1.2 ! misho 4248: scandevice = ParseConfigLine(conf_entries, default_conf, contlineno, fullline);
1.1 misho 4249: // See if we found a SCANDIRECTIVE directive
4250: if (scandevice==-1)
4251: return 0;
4252: // did we find a syntax error
4253: if (scandevice==-2)
4254: return -1;
4255: // the final line is part of a continuation line
4256: cont=0;
4257: entry+=scandevice;
4258: }
4259: break;
4260: }
4261:
4262: // input file line number
4263: contlineno++;
4264:
4265: // See if line is too long
4266: len=strlen(line);
4267: if (len>MAXLINELEN){
4268: const char *warn;
4269: if (line[len-1]=='\n')
4270: warn="(including newline!) ";
4271: else
4272: warn="";
4273: PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4274: (int)contlineno,configfile,warn,(int)MAXLINELEN);
4275: return -1;
4276: }
4277:
4278: // Ignore anything after comment symbol
4279: if ((comment=strchr(line,'#'))){
4280: *comment='\0';
4281: len=strlen(line);
4282: }
4283:
4284: // is the total line (made of all continuation lines) too long?
4285: if (cont+len>MAXCONTLINE){
4286: PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4287: lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4288: return -1;
4289: }
4290:
4291: // copy string so far into fullline, and increment length
4292: strcpy(fullline+cont,line);
4293: cont+=len;
4294:
4295: // is this a continuation line. If so, replace \ by space and look at next line
4296: if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4297: *(fullline+(cont-len)+(lastslash-line))=' ';
4298: continue;
4299: }
4300:
4301: // Not a continuation line. Parse it
1.1.1.2 ! misho 4302: scandevice = ParseConfigLine(conf_entries, default_conf, contlineno, fullline);
1.1 misho 4303:
4304: // did we find a scandevice directive?
4305: if (scandevice==-1)
4306: return 0;
4307: // did we find a syntax error
4308: if (scandevice==-2)
4309: return -1;
4310:
4311: entry+=scandevice;
4312: lineno++;
4313: cont=0;
4314: }
4315:
4316: // note -- may be zero if syntax of file OK, but no valid entries!
4317: return entry;
4318: }
4319:
4320: /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4321: <LIST> is the list of valid arguments for option opt. */
4322: static void PrintValidArgs(char opt)
4323: {
4324: const char *s;
4325:
4326: PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4327: if (!(s = GetValidArgList(opt)))
4328: PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4329: else
4330: PrintOut(LOG_CRIT, "%s", (char *)s);
4331: PrintOut(LOG_CRIT, " <=======\n");
4332: }
4333:
4334: #ifndef _WIN32
4335: // Report error and exit if specified path is not absolute.
4336: static void check_abs_path(char option, const std::string & path)
4337: {
4338: if (path.empty() || path[0] == '/')
4339: return;
4340:
4341: debugmode = 1;
4342: PrintHead();
4343: PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4344: PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4345: EXIT(EXIT_BADCMD);
4346: }
4347: #endif // !_WIN32
4348:
4349: // Parses input line, prints usage message and
4350: // version/license/copyright messages
4351: static void ParseOpts(int argc, char **argv)
4352: {
4353: // Init default configfile path
4354: #ifndef _WIN32
4355: configfile = SMARTMONTOOLS_SYSCONFDIR"/smartd.conf";
4356: #else
4357: static std::string configfile_str = get_exe_dir() + "/smartd.conf";
4358: configfile = configfile_str.c_str();
4359: #endif
4360:
4361: // Please update GetValidArgList() if you edit shortopts
4362: static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:Vh?"
4363: #ifdef HAVE_LIBCAP_NG
4364: "C"
4365: #endif
4366: ;
4367: // Please update GetValidArgList() if you edit longopts
4368: struct option longopts[] = {
4369: { "configfile", required_argument, 0, 'c' },
4370: { "logfacility", required_argument, 0, 'l' },
4371: { "quit", required_argument, 0, 'q' },
4372: { "debug", no_argument, 0, 'd' },
4373: { "showdirectives", no_argument, 0, 'D' },
4374: { "interval", required_argument, 0, 'i' },
4375: #ifndef _WIN32
4376: { "no-fork", no_argument, 0, 'n' },
4377: #else
4378: { "service", no_argument, 0, 'n' },
4379: #endif
4380: { "pidfile", required_argument, 0, 'p' },
4381: { "report", required_argument, 0, 'r' },
4382: { "savestates", required_argument, 0, 's' },
4383: { "attributelog", required_argument, 0, 'A' },
4384: { "drivedb", required_argument, 0, 'B' },
4385: { "version", no_argument, 0, 'V' },
4386: { "license", no_argument, 0, 'V' },
4387: { "copyright", no_argument, 0, 'V' },
4388: { "help", no_argument, 0, 'h' },
4389: { "usage", no_argument, 0, 'h' },
4390: #ifdef HAVE_LIBCAP_NG
4391: { "capabilities", no_argument, 0, 'C' },
4392: #endif
4393: { 0, 0, 0, 0 }
4394: };
4395:
4396: opterr=optopt=0;
4397: bool badarg = false;
4398: bool no_defaultdb = false; // set true on '-B FILE'
4399:
4400: // Parse input options.
4401: int optchar;
4402: while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4403: char *arg;
4404: char *tailptr;
4405: long lchecktime;
4406:
4407: switch(optchar) {
4408: case 'q':
4409: // when to quit
4410: if (!(strcmp(optarg,"nodev"))) {
4411: quit=0;
4412: } else if (!(strcmp(optarg,"nodevstartup"))) {
4413: quit=1;
4414: } else if (!(strcmp(optarg,"never"))) {
4415: quit=2;
4416: } else if (!(strcmp(optarg,"onecheck"))) {
4417: quit=3;
4418: debugmode=1;
4419: } else if (!(strcmp(optarg,"showtests"))) {
4420: quit=4;
4421: debugmode=1;
4422: } else if (!(strcmp(optarg,"errors"))) {
4423: quit=5;
4424: } else {
4425: badarg = true;
4426: }
4427: break;
4428: case 'l':
4429: // set the log facility level
4430: if (!strcmp(optarg, "daemon"))
4431: facility=LOG_DAEMON;
4432: else if (!strcmp(optarg, "local0"))
4433: facility=LOG_LOCAL0;
4434: else if (!strcmp(optarg, "local1"))
4435: facility=LOG_LOCAL1;
4436: else if (!strcmp(optarg, "local2"))
4437: facility=LOG_LOCAL2;
4438: else if (!strcmp(optarg, "local3"))
4439: facility=LOG_LOCAL3;
4440: else if (!strcmp(optarg, "local4"))
4441: facility=LOG_LOCAL4;
4442: else if (!strcmp(optarg, "local5"))
4443: facility=LOG_LOCAL5;
4444: else if (!strcmp(optarg, "local6"))
4445: facility=LOG_LOCAL6;
4446: else if (!strcmp(optarg, "local7"))
4447: facility=LOG_LOCAL7;
4448: else
4449: badarg = true;
4450: break;
4451: case 'd':
4452: // enable debug mode
4453: debugmode = 1;
4454: break;
4455: case 'n':
4456: // don't fork()
4457: #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4458: do_fork = false;
4459: #endif
4460: break;
4461: case 'D':
4462: // print summary of all valid directives
4463: debugmode = 1;
4464: Directives();
4465: EXIT(0);
4466: break;
4467: case 'i':
4468: // Period (time interval) for checking
4469: // strtol will set errno in the event of overflow, so we'll check it.
4470: errno = 0;
4471: lchecktime = strtol(optarg, &tailptr, 10);
4472: if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4473: debugmode=1;
4474: PrintHead();
4475: PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4476: PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4477: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4478: EXIT(EXIT_BADCMD);
4479: }
4480: checktime = (int)lchecktime;
4481: break;
4482: case 'r':
4483: // report IOCTL transactions
4484: {
4485: int i;
4486: char *s;
4487:
4488: // split_report_arg() may modify its first argument string, so use a
4489: // copy of optarg in case we want optarg for an error message.
4490: if (!(s = strdup(optarg))) {
4491: PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
4492: EXIT(EXIT_NOMEM);
4493: }
4494: if (split_report_arg(s, &i)) {
4495: badarg = true;
4496: } else if (i<1 || i>3) {
4497: debugmode=1;
4498: PrintHead();
4499: PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
4500: PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
4501: EXIT(EXIT_BADCMD);
4502: } else if (!strcmp(s,"ioctl")) {
4503: ata_debugmode = scsi_debugmode = i;
4504: } else if (!strcmp(s,"ataioctl")) {
4505: ata_debugmode = i;
4506: } else if (!strcmp(s,"scsiioctl")) {
4507: scsi_debugmode = i;
4508: } else {
4509: badarg = true;
4510: }
4511: free(s); // TODO: use std::string
4512: }
4513: break;
4514: case 'c':
4515: // alternate configuration file
4516: if (strcmp(optarg,"-"))
4517: configfile = (configfile_alt = optarg).c_str();
4518: else // read from stdin
4519: configfile=configfile_stdin;
4520: break;
4521: case 'p':
4522: // output file with PID number
4523: pid_file = optarg;
4524: break;
4525: case 's':
4526: // path prefix of persistent state file
4527: state_path_prefix = optarg;
4528: break;
4529: case 'A':
4530: // path prefix of attribute log file
4531: attrlog_path_prefix = optarg;
4532: break;
4533: case 'B':
4534: {
4535: const char * path = optarg;
4536: if (*path == '+' && path[1])
4537: path++;
4538: else
4539: no_defaultdb = true;
4540: unsigned char savedebug = debugmode; debugmode = 1;
4541: if (!read_drive_database(path))
4542: EXIT(EXIT_BADCMD);
4543: debugmode = savedebug;
4544: }
4545: break;
4546: case 'V':
4547: // print version and CVS info
4548: debugmode = 1;
4549: PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4550: EXIT(0);
4551: break;
4552: #ifdef HAVE_LIBCAP_NG
4553: case 'C':
4554: // enable capabilities
4555: enable_capabilities = true;
4556: break;
4557: #endif
4558: case 'h':
4559: // help: print summary of command-line options
4560: debugmode=1;
4561: PrintHead();
4562: Usage();
4563: EXIT(0);
4564: break;
4565: case '?':
4566: default:
4567: // unrecognized option
4568: debugmode=1;
4569: PrintHead();
4570: // Point arg to the argument in which this option was found.
4571: arg = argv[optind-1];
4572: // Check whether the option is a long option that doesn't map to -h.
4573: if (arg[1] == '-' && optchar != 'h') {
4574: // Iff optopt holds a valid option then argument must be missing.
4575: if (optopt && (strchr(shortopts, optopt) != NULL)) {
4576: PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4577: PrintValidArgs(optopt);
4578: } else {
4579: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4580: }
4581: PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4582: EXIT(EXIT_BADCMD);
4583: }
4584: if (optopt) {
4585: // Iff optopt holds a valid option then argument must be missing.
4586: if (strchr(shortopts, optopt) != NULL){
4587: PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4588: PrintValidArgs(optopt);
4589: } else {
4590: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4591: }
4592: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4593: EXIT(EXIT_BADCMD);
4594: }
4595: Usage();
4596: EXIT(0);
4597: }
4598:
4599: // Check to see if option had an unrecognized or incorrect argument.
4600: if (badarg) {
4601: debugmode=1;
4602: PrintHead();
4603: // It would be nice to print the actual option name given by the user
4604: // here, but we just print the short form. Please fix this if you know
4605: // a clean way to do it.
4606: PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4607: PrintValidArgs(optchar);
4608: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4609: EXIT(EXIT_BADCMD);
4610: }
4611: }
4612:
4613: // non-option arguments are not allowed
4614: if (argc > optind) {
4615: debugmode=1;
4616: PrintHead();
4617: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4618: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4619: EXIT(EXIT_BADCMD);
4620: }
4621:
4622: // no pidfile in debug mode
4623: if (debugmode && !pid_file.empty()) {
4624: debugmode=1;
4625: PrintHead();
4626: PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4627: PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4628: EXIT(EXIT_BADCMD);
4629: }
4630:
4631: #ifndef _WIN32
4632: if (!debugmode) {
4633: // absolute path names are required due to chdir('/') after fork().
4634: check_abs_path('p', pid_file);
4635: check_abs_path('s', state_path_prefix);
4636: check_abs_path('A', attrlog_path_prefix);
4637: }
4638: #endif
4639:
4640: // Read or init drive database
4641: if (!no_defaultdb) {
4642: unsigned char savedebug = debugmode; debugmode = 1;
4643: if (!read_default_drive_databases())
4644: EXIT(EXIT_BADCMD);
4645: debugmode = savedebug;
4646: }
4647:
4648: // print header
4649: PrintHead();
4650: }
4651:
4652: // Function we call if no configuration file was found or if the
4653: // SCANDIRECTIVE Directive was found. It makes entries for device
4654: // names returned by scan_smart_devices() in os_OSNAME.cpp
4655: static int MakeConfigEntries(const dev_config & base_cfg,
4656: dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4657: {
4658: // make list of devices
4659: smart_device_list devlist;
4660: if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4661: PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4662:
4663: // if no devices, or error constructing list, return
4664: if (devlist.size() <= 0)
4665: return 0;
4666:
4667: // add empty device slots for existing config entries
4668: while (scanned_devs.size() < conf_entries.size())
4669: scanned_devs.push_back((smart_device *)0);
4670:
4671: // loop over entries to create
4672: for (unsigned i = 0; i < devlist.size(); i++) {
4673: // Move device pointer
4674: smart_device * dev = devlist.release(i);
4675: scanned_devs.push_back(dev);
4676:
4677: // Copy configuration, update device and type name
4678: conf_entries.push_back(base_cfg);
4679: dev_config & cfg = conf_entries.back();
4680: cfg.name = dev->get_info().info_name;
4681: cfg.dev_name = dev->get_info().dev_name;
4682: cfg.dev_type = type;
4683: }
4684:
4685: return devlist.size();
4686: }
4687:
4688: static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4689: {
4690: if (!debugmode && scandirective)
4691: return;
4692: if (line)
4693: PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4694: "Unable to register %s device %s at line %d of file %s\n",
4695: type, name, line, configfile);
4696: else
4697: PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4698: type, name);
4699: return;
4700: }
4701:
4702: // Returns negative value (see ParseConfigFile()) if config file
4703: // had errors, else number of entries which may be zero or positive.
4704: static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4705: {
4706: // parse configuration file configfile (normally /etc/smartd.conf)
4707: int entries = ParseConfigFile(conf_entries);
4708:
4709: if (entries < 0) {
4710: // There was an error reading the configuration file.
4711: conf_entries.clear();
4712: if (entries == -1)
4713: PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4714: return entries;
4715: }
4716:
4717: // no error parsing config file.
4718: if (entries) {
4719: // we did not find a SCANDIRECTIVE and did find valid entries
4720: PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4721: }
4722: else if (!conf_entries.empty()) {
4723: // we found a SCANDIRECTIVE or there was no configuration file so
4724: // scan. Configuration file's last entry contains all options
4725: // that were set
4726: dev_config first = conf_entries.back();
4727: conf_entries.pop_back();
4728:
4729: if (first.lineno)
4730: PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4731: else
4732: PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4733:
4734: // make config list of devices to search for
4735: MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4736:
4737: // warn user if scan table found no devices
4738: if (conf_entries.empty())
4739: PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4740: }
4741: else
4742: PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4743:
4744: return conf_entries.size();
4745: }
4746:
4747:
4748: // This function tries devices from conf_entries. Each one that can be
4749: // registered is moved onto the [ata|scsi]devices lists and removed
4750: // from the conf_entries list.
4751: static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4752: dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4753: {
4754: // start by clearing lists/memory of ALL existing devices
4755: configs.clear();
4756: devices.clear();
4757: states.clear();
4758:
4759: // Register entries
4760: for (unsigned i = 0; i < conf_entries.size(); i++){
4761:
4762: dev_config cfg = conf_entries[i];
4763:
4764: // get device of appropriate type
4765: smart_device_auto_ptr dev;
4766: bool scanning = false;
4767:
4768: // Device may already be detected during devicescan
4769: if (i < scanned_devs.size()) {
4770: dev = scanned_devs.release(i);
4771: if (dev)
4772: scanning = true;
4773: }
4774:
4775: if (!dev) {
4776: dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4777: if (!dev) {
4778: if (cfg.dev_type.empty())
4779: PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4780: else
4781: PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4782: continue;
4783: }
4784: }
4785:
4786: // Save old info
4787: smart_device::device_info oldinfo = dev->get_info();
4788:
4789: // Open with autodetect support, may return 'better' device
4790: dev.replace( dev->autodetect_open() );
4791:
4792: // Report if type has changed
4793: if (oldinfo.dev_type != dev->get_dev_type())
4794: PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4795: cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4796:
4797: if (!dev->is_open()) {
4798: // For linux+devfs, a nonexistent device gives a strange error
4799: // message. This makes the error message a bit more sensible.
4800: // If no debug and scanning - don't print errors
4801: if (debugmode || !scanning)
4802: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4803: continue;
4804: }
4805:
4806: // Update informal name
4807: cfg.name = dev->get_info().info_name;
4808: PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4809:
4810: // Prepare initial state
4811: dev_state state;
4812:
4813: // register ATA devices
4814: if (dev->is_ata()){
4815: if (ATADeviceScan(cfg, state, dev->to_ata())) {
4816: CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4817: dev.reset();
4818: }
4819: }
4820: // or register SCSI devices
4821: else if (dev->is_scsi()){
4822: if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4823: CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4824: dev.reset();
4825: }
4826: }
4827: else {
4828: PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4829: dev.reset();
4830: }
4831:
4832: if (dev) {
4833: // move onto the list of devices
4834: configs.push_back(cfg);
4835: states.push_back(state);
4836: devices.push_back(dev);
4837: }
4838: // if device is explictly listed and we can't register it, then
4839: // exit unless the user has specified that the device is removable
4840: else if (!scanning) {
4841: if (cfg.removable || quit==2)
4842: PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4843: else {
4844: PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4845: EXIT(EXIT_BADDEV);
4846: }
4847: }
4848: }
1.1.1.2 ! misho 4849:
! 4850: init_disable_standby_check(configs);
1.1 misho 4851: }
4852:
4853:
4854: // Main program without exception handling
4855: static int main_worker(int argc, char **argv)
4856: {
4857: // Initialize interface
4858: smart_interface::init();
4859: if (!smi())
4860: return 1;
4861:
4862: // is it our first pass through?
4863: bool firstpass = true;
4864:
4865: // next time to wake up
4866: time_t wakeuptime = 0;
4867:
4868: // parse input and print header and usage info if needed
4869: ParseOpts(argc,argv);
4870:
4871: // Configuration for each device
4872: dev_config_vector configs;
4873: // Device states
4874: dev_state_vector states;
4875: // Devices to monitor
4876: smart_device_list devices;
4877:
4878: bool write_states_always = true;
4879:
4880: #ifdef HAVE_LIBCAP_NG
4881: // Drop capabilities
4882: if (enable_capabilities) {
4883: capng_clear(CAPNG_SELECT_BOTH);
4884: capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
4885: CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
4886: capng_apply(CAPNG_SELECT_BOTH);
4887: }
4888: #endif
4889:
4890: // the main loop of the code
4891: for (;;) {
4892:
4893: // are we exiting from a signal?
4894: if (caughtsigEXIT) {
4895: // are we exiting with SIGTERM?
4896: int isterm=(caughtsigEXIT==SIGTERM);
4897: int isquit=(caughtsigEXIT==SIGQUIT);
4898: int isok=debugmode?isterm || isquit:isterm;
4899:
4900: PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
4901: caughtsigEXIT, strsignal(caughtsigEXIT));
4902:
4903: if (!isok)
4904: return EXIT_SIGNAL;
4905:
4906: // Write state files
4907: if (!state_path_prefix.empty())
4908: write_all_dev_states(configs, states);
4909:
4910: return 0;
4911: }
4912:
4913: // Should we (re)read the config file?
4914: if (firstpass || caughtsigHUP){
4915: if (!firstpass) {
4916: // Write state files
4917: if (!state_path_prefix.empty())
4918: write_all_dev_states(configs, states);
4919:
4920: PrintOut(LOG_INFO,
4921: caughtsigHUP==1?
4922: "Signal HUP - rereading configuration file %s\n":
4923: "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
4924: configfile);
4925: }
4926:
4927: {
4928: dev_config_vector conf_entries; // Entries read from smartd.conf
4929: smart_device_list scanned_devs; // Devices found during scan
4930: // (re)reads config file, makes >=0 entries
4931: int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
4932:
4933: if (entries>=0) {
4934: // checks devices, then moves onto ata/scsi list or deallocates.
4935: RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
4936: if (!(configs.size() == devices.size() && configs.size() == states.size()))
4937: throw std::logic_error("Invalid result from RegisterDevices");
4938: }
4939: else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
4940: // user has asked to continue on error in configuration file
4941: if (!firstpass)
4942: PrintOut(LOG_INFO,"Reusing previous configuration\n");
4943: }
4944: else {
4945: // exit with configuration file error status
4946: return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
4947: }
4948: }
4949:
4950: // Log number of devices we are monitoring...
4951: if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
4952: int numata = 0;
4953: for (unsigned i = 0; i < devices.size(); i++) {
4954: if (devices.at(i)->is_ata())
4955: numata++;
4956: }
4957: PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
4958: numata, devices.size() - numata);
4959: }
4960: else {
4961: PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
4962: return EXIT_NODEV;
4963: }
4964:
4965: if (quit==4) {
4966: // user has asked to print test schedule
4967: PrintTestSchedule(configs, states, devices);
4968: return 0;
4969: }
4970:
4971: #ifdef HAVE_LIBCAP_NG
4972: if (enable_capabilities) {
4973: for (unsigned i = 0; i < configs.size(); i++) {
4974: if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
4975: PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
4976: break;
4977: }
4978: }
4979: }
4980: #endif
4981:
4982: // reset signal
4983: caughtsigHUP=0;
4984:
4985: // Always write state files after (re)configuration
4986: write_states_always = true;
4987: }
4988:
4989: // check all devices once,
4990: // self tests are not started in first pass unless '-q onecheck' is specified
4991: CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit==3));
4992:
4993: // Write state files
4994: if (!state_path_prefix.empty())
4995: write_all_dev_states(configs, states, write_states_always);
4996: write_states_always = false;
4997:
4998: // Write attribute logs
4999: if (!attrlog_path_prefix.empty())
5000: write_all_dev_attrlogs(configs, states);
5001:
5002: // user has asked us to exit after first check
5003: if (quit==3) {
5004: PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
5005: "smartd is exiting (exit status 0)\n");
5006: return 0;
5007: }
5008:
5009: // fork into background if needed
5010: if (firstpass && !debugmode) {
5011: DaemonInit();
5012: }
5013:
5014: // set exit and signal handlers, write PID file, set wake-up time
5015: if (firstpass){
5016: Initialize(&wakeuptime);
5017: firstpass = false;
5018: }
5019:
5020: // sleep until next check time, or a signal arrives
5021: wakeuptime = dosleep(wakeuptime, write_states_always);
5022: }
5023: }
5024:
5025:
5026: #ifndef _WIN32
5027: // Main program
5028: int main(int argc, char **argv)
5029: #else
5030: // Windows: internal main function started direct or by service control manager
5031: static int smartd_main(int argc, char **argv)
5032: #endif
5033: {
5034: int status;
5035: try {
5036: // Do the real work ...
5037: status = main_worker(argc, argv);
5038: }
5039: catch (int ex) {
5040: // EXIT(status) arrives here
5041: status = ex;
5042: }
5043: catch (const std::bad_alloc & /*ex*/) {
5044: // Memory allocation failed (also thrown by std::operator new)
5045: PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5046: status = EXIT_NOMEM;
5047: }
5048: catch (const std::exception & ex) {
5049: // Other fatal errors
5050: PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5051: status = EXIT_BADCODE;
5052: }
5053:
5054: if (is_initialized)
5055: status = Goodbye(status);
5056:
5057: #ifdef _WIN32
5058: daemon_winsvc_exitcode = status;
5059: #endif
5060: return status;
5061: }
5062:
5063:
5064: #ifdef _WIN32
5065: // Main function for Windows
5066: int main(int argc, char **argv){
5067: // Options for smartd windows service
5068: static const daemon_winsvc_options svc_opts = {
5069: "--service", // cmd_opt
5070: "smartd", "SmartD Service", // servicename, displayname
5071: // description
5072: "Controls and monitors storage devices using the Self-Monitoring, "
5073: "Analysis and Reporting Technology System (S.M.A.R.T.) "
5074: "built into ATA and SCSI Hard Drives. "
5075: PACKAGE_HOMEPAGE
5076: };
5077: // daemon_main() handles daemon and service specific commands
5078: // and starts smartd_main() direct, from a new process,
5079: // or via service control manager
5080: return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5081: }
5082: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>