Annotation of embedaddon/smartmontools/smartd.cpp, revision 1.1.1.4
1.1 misho 1: /*
2: * Home page of code is: http://smartmontools.sourceforge.net
3: *
4: * Copyright (C) 2002-11 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5: * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6: * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
1.1.1.3 misho 7: * Copyright (C) 2008-13 Christian Franke <smartmontools-support@lists.sourceforge.net>
1.1 misho 8: *
9: * This program is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU General Public License as published by
11: * the Free Software Foundation; either version 2, or (at your option)
12: * any later version.
13: *
14: * You should have received a copy of the GNU General Public License
15: * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16: *
17: * This code was originally developed as a Senior Thesis by Michael Cornwell
18: * at the Concurrent Systems Laboratory (now part of the Storage Systems
19: * Research Center), Jack Baskin School of Engineering, University of
20: * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21: *
22: */
23:
1.1.1.4 ! misho 24: #include "config.h"
! 25: #include "int64.h"
! 26:
1.1 misho 27: // unconditionally included files
28: #include <stdio.h>
29: #include <sys/types.h>
30: #include <sys/stat.h> // umask
31: #include <signal.h>
32: #include <fcntl.h>
33: #include <string.h>
34: #include <syslog.h>
35: #include <stdarg.h>
36: #include <stdlib.h>
37: #include <errno.h>
38: #include <time.h>
39: #include <limits.h>
40: #include <getopt.h>
41:
42: #include <stdexcept>
43: #include <string>
44: #include <vector>
45: #include <algorithm> // std::replace()
46:
47: // conditionally included files
48: #ifndef _WIN32
49: #include <sys/wait.h>
50: #endif
51: #ifdef HAVE_UNISTD_H
52: #include <unistd.h>
53: #endif
54: #ifdef HAVE_NETDB_H
55: #include <netdb.h>
56: #endif
57:
58: #ifdef _WIN32
59: #ifdef _MSC_VER
60: #pragma warning(disable:4761) // "conversion supplied"
61: typedef unsigned short mode_t;
62: typedef int pid_t;
63: #endif
64: #include <io.h> // umask()
65: #include <process.h> // getpid()
66: #endif // _WIN32
67:
68: #ifdef __CYGWIN__
69: #include <io.h> // setmode()
70: #endif // __CYGWIN__
71:
72: #ifdef HAVE_LIBCAP_NG
73: #include <cap-ng.h>
74: #endif // LIBCAP_NG
75:
76: // locally included files
77: #include "atacmds.h"
78: #include "dev_interface.h"
79: #include "knowndrives.h"
80: #include "scsicmds.h"
81: #include "utility.h"
82:
83: // This is for solaris, where signal() resets the handler to SIG_DFL
84: // after the first signal is caught.
85: #ifdef HAVE_SIGSET
86: #define SIGNALFN sigset
87: #else
88: #define SIGNALFN signal
89: #endif
90:
91: #ifdef _WIN32
92: // fork()/signal()/initd simulation for native Windows
93: #include "daemon_win32.h" // daemon_main/detach/signal()
94: #undef SIGNALFN
95: #define SIGNALFN daemon_signal
96: #define strsignal daemon_strsignal
97: #define sleep daemon_sleep
98: // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
99: #define SIGQUIT SIGBREAK
100: #define SIGQUIT_KEYNAME "CONTROL-Break"
101: #else // _WIN32
102: #define SIGQUIT_KEYNAME "CONTROL-\\"
103: #endif // _WIN32
104:
105: #if defined (__SVR4) && defined (__sun)
106: extern "C" int getdomainname(char *, int); // no declaration in header files!
107: #endif
108:
1.1.1.4 ! misho 109: const char * smartd_cpp_cvsid = "$Id: smartd.cpp 3802 2013-03-24 18:36:21Z chrfranke $"
1.1 misho 110: CONFIG_H_CVSID;
111:
112: // smartd exit codes
113: #define EXIT_BADCMD 1 // command line did not parse
114: #define EXIT_BADCONF 2 // syntax error in config file
115: #define EXIT_STARTUP 3 // problem forking daemon
116: #define EXIT_PID 4 // problem creating pid file
117: #define EXIT_NOCONF 5 // config file does not exist
118: #define EXIT_READCONF 6 // config file exists but cannot be read
119:
120: #define EXIT_NOMEM 8 // out of memory
121: #define EXIT_BADCODE 10 // internal error - should NEVER happen
122:
123: #define EXIT_BADDEV 16 // we can't monitor this device
124: #define EXIT_NODEV 17 // no devices to monitor
125:
126: #define EXIT_SIGNAL 254 // abort on signal
127:
128:
129: // command-line: 1=debug mode, 2=print presets
130: static unsigned char debugmode = 0;
131:
132: // command-line: how long to sleep between checks
133: #define CHECKTIME 1800
134: static int checktime=CHECKTIME;
135:
136: // command-line: name of PID file (empty for no pid file)
137: static std::string pid_file;
138:
139: // command-line: path prefix of persistent state file, empty if no persistence.
140: static std::string state_path_prefix
141: #ifdef SMARTMONTOOLS_SAVESTATES
142: = SMARTMONTOOLS_SAVESTATES
143: #endif
144: ;
145:
146: // command-line: path prefix of attribute log file, empty if no logs.
147: static std::string attrlog_path_prefix
148: #ifdef SMARTMONTOOLS_ATTRIBUTELOG
149: = SMARTMONTOOLS_ATTRIBUTELOG
150: #endif
151: ;
152:
153: // configuration file name
154: static const char * configfile;
155: // configuration file "name" if read from stdin
156: static const char * const configfile_stdin = "<stdin>";
157: // path of alternate configuration file
158: static std::string configfile_alt;
159:
1.1.1.3 misho 160: // warning script file
161: static std::string warning_script;
162:
1.1 misho 163: // command-line: when should we exit?
164: static int quit=0;
165:
166: // command-line; this is the default syslog(3) log facility to use.
167: static int facility=LOG_DAEMON;
168:
169: #ifndef _WIN32
170: // command-line: fork into background?
171: static bool do_fork=true;
172: #endif
173:
174: #ifdef HAVE_LIBCAP_NG
175: // command-line: enable capabilities?
176: static bool enable_capabilities = false;
177: #endif
178:
179: // TODO: This smartctl only variable is also used in os_win32.cpp
180: unsigned char failuretest_permissive = 0;
181:
182: // set to one if we catch a USR1 (check devices now)
183: static volatile int caughtsigUSR1=0;
184:
185: #ifdef _WIN32
186: // set to one if we catch a USR2 (toggle debug mode)
187: static volatile int caughtsigUSR2=0;
188: #endif
189:
190: // set to one if we catch a HUP (reload config file). In debug mode,
191: // set to two, if we catch INT (also reload config file).
192: static volatile int caughtsigHUP=0;
193:
194: // set to signal value if we catch INT, QUIT, or TERM
195: static volatile int caughtsigEXIT=0;
196:
197: // This function prints either to stdout or to the syslog as needed.
198: static void PrintOut(int priority, const char *fmt, ...)
1.1.1.2 misho 199: __attribute_format_printf(2, 3);
1.1 misho 200:
201: // Attribute monitoring flags.
202: // See monitor_attr_flags below.
203: enum {
204: MONITOR_IGN_FAILUSE = 0x01,
205: MONITOR_IGNORE = 0x02,
206: MONITOR_RAW_PRINT = 0x04,
207: MONITOR_RAW = 0x08,
208: MONITOR_AS_CRIT = 0x10,
209: MONITOR_RAW_AS_CRIT = 0x20,
210: };
211:
212: // Array of flags for each attribute.
213: class attribute_flags
214: {
215: public:
216: attribute_flags()
217: { memset(m_flags, 0, sizeof(m_flags)); }
218:
219: bool is_set(int id, unsigned char flag) const
220: { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
221:
222: void set(int id, unsigned char flags)
223: {
224: if (0 < id && id < (int)sizeof(m_flags))
225: m_flags[id] |= flags;
226: }
227:
228: private:
229: unsigned char m_flags[256];
230: };
231:
232:
233: /// Configuration data for a device. Read from smartd.conf.
234: /// Supports copy & assignment and is compatible with STL containers.
235: struct dev_config
236: {
237: int lineno; // Line number of entry in file
238: std::string name; // Device name (with optional extra info)
239: std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
240: std::string dev_type; // Device type argument from -d directive, empty if none
1.1.1.3 misho 241: std::string dev_idinfo; // Device identify info for warning emails
1.1 misho 242: std::string state_file; // Path of the persistent state file, empty if none
243: std::string attrlog_file; // Path of the persistent attrlog file, empty if none
1.1.1.3 misho 244: bool ignore; // Ignore this entry
1.1 misho 245: bool smartcheck; // Check SMART status
246: bool usagefailed; // Check for failed Usage Attributes
247: bool prefail; // Track changes in Prefail Attributes
248: bool usage; // Track changes in Usage Attributes
249: bool selftest; // Monitor number of selftest errors
250: bool errorlog; // Monitor number of ATA errors
251: bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
252: bool offlinests; // Monitor changes in offline data collection status
1.1.1.2 misho 253: bool offlinests_ns; // Disable auto standby if in progress
1.1 misho 254: bool selfteststs; // Monitor changes in self-test execution status
1.1.1.2 misho 255: bool selfteststs_ns; // Disable auto standby if in progress
1.1 misho 256: bool permissive; // Ignore failed SMART commands
257: char autosave; // 1=disable, 2=enable Autosave Attributes
258: char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
1.1.1.3 misho 259: firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
1.1 misho 260: bool ignorepresets; // Ignore database of -v options
261: bool showpresets; // Show database entry for this device
262: bool removable; // Device may disappear (not be present)
263: char powermode; // skip check, if disk in idle or standby mode
264: bool powerquiet; // skip powermode 'skipping checks' message
265: int powerskipmax; // how many times can be check skipped
266: unsigned char tempdiff; // Track Temperature changes >= this limit
267: unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
268: regular_expression test_regex; // Regex for scheduled testing
269:
270: // Configuration of email warning messages
271: std::string emailcmdline; // script to execute, empty if no messages
272: std::string emailaddress; // email address, or empty
273: unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
274: bool emailtest; // Send test email?
275:
276: // ATA ONLY
1.1.1.3 misho 277: int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
1.1.1.2 misho 278: int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
279: int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
280: int set_lookahead; // disable(-1), enable(1) read look-ahead
281: int set_standby; // set(1..255->0..254) standby timer
282: bool set_security_freeze; // Freeze ATA security
283: int set_wcache; // disable(-1), enable(1) write cache
284:
1.1 misho 285: bool sct_erc_set; // set SCT ERC to:
286: unsigned short sct_erc_readtime; // ERC read time (deciseconds)
287: unsigned short sct_erc_writetime; // ERC write time (deciseconds)
288:
289: unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
290: unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
291: bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
292: bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
293:
294: attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
295:
296: ata_vendor_attr_defs attribute_defs; // -v options
297:
298: dev_config();
299: };
300:
301: dev_config::dev_config()
302: : lineno(0),
1.1.1.3 misho 303: ignore(false),
1.1 misho 304: smartcheck(false),
305: usagefailed(false),
306: prefail(false),
307: usage(false),
308: selftest(false),
309: errorlog(false),
310: xerrorlog(false),
1.1.1.2 misho 311: offlinests(false), offlinests_ns(false),
312: selfteststs(false), selfteststs_ns(false),
1.1 misho 313: permissive(false),
314: autosave(0),
315: autoofflinetest(0),
316: ignorepresets(false),
317: showpresets(false),
318: removable(false),
319: powermode(0),
320: powerquiet(false),
321: powerskipmax(0),
322: tempdiff(0),
323: tempinfo(0), tempcrit(0),
324: emailfreq(0),
325: emailtest(false),
1.1.1.3 misho 326: dev_rpm(0),
1.1.1.2 misho 327: set_aam(0), set_apm(0),
328: set_lookahead(0),
329: set_standby(0),
330: set_security_freeze(false),
331: set_wcache(0),
1.1 misho 332: sct_erc_set(false),
333: sct_erc_readtime(0), sct_erc_writetime(0),
334: curr_pending_id(0), offl_pending_id(0),
335: curr_pending_incr(false), offl_pending_incr(false),
336: curr_pending_set(false), offl_pending_set(false)
337: {
338: }
339:
340:
341: // Number of allowed mail message types
342: static const int SMARTD_NMAIL = 13;
343: // Type for '-M test' mails (state not persistent)
344: static const int MAILTYPE_TEST = 0;
345: // TODO: Add const or enum for all mail types.
346:
347: struct mailinfo {
348: int logged;// number of times an email has been sent
349: time_t firstsent;// time first email was sent, as defined by time(2)
350: time_t lastsent; // time last email was sent, as defined by time(2)
351:
352: mailinfo()
353: : logged(0), firstsent(0), lastsent(0) { }
354: };
355:
356: /// Persistent state data for a device.
357: struct persistent_dev_state
358: {
359: unsigned char tempmin, tempmax; // Min/Max Temperatures
360:
361: unsigned char selflogcount; // total number of self-test errors
362: unsigned short selfloghour; // lifetime hours of last self-test error
363:
364: time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
365:
366: uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
367: uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
368:
369: mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
370:
371: // ATA ONLY
372: int ataerrorcount; // Total number of ATA errors
373:
374: // Persistent part of ata_smart_values:
375: struct ata_attribute {
376: unsigned char id;
377: unsigned char val;
378: unsigned char worst; // Byte needed for 'raw64' attribute only.
379: uint64_t raw;
380: unsigned char resvd;
381:
382: ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
383: };
384: ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
1.1.1.3 misho 385:
386: // SCSI ONLY
387:
388: struct scsi_error_counter {
389: struct scsiErrorCounter errCounter;
390: unsigned char found;
391: scsi_error_counter() : found(0) { }
392: };
393: scsi_error_counter scsi_error_counters[3];
394:
395: struct scsi_nonmedium_error {
396: struct scsiNonMediumError nme;
397: unsigned char found;
398: scsi_nonmedium_error() : found(0) { }
399: };
400: scsi_nonmedium_error scsi_nonmedium_error;
1.1 misho 401:
402: persistent_dev_state();
403: };
404:
405: persistent_dev_state::persistent_dev_state()
406: : tempmin(0), tempmax(0),
407: selflogcount(0),
408: selfloghour(0),
409: scheduled_test_next_check(0),
410: selective_test_last_start(0),
411: selective_test_last_end(0),
412: ataerrorcount(0)
413: {
414: }
415:
416: /// Non-persistent state data for a device.
417: struct temp_dev_state
418: {
419: bool must_write; // true if persistent part should be written
420:
421: bool not_cap_offline; // true == not capable of offline testing
422: bool not_cap_conveyance;
423: bool not_cap_short;
424: bool not_cap_long;
425: bool not_cap_selective;
426:
427: unsigned char temperature; // last recorded Temperature (in Celsius)
428: time_t tempmin_delay; // time where Min Temperature tracking will start
429:
430: bool powermodefail; // true if power mode check failed
431: int powerskipcnt; // Number of checks skipped due to idle or standby mode
432:
433: // SCSI ONLY
434: unsigned char SmartPageSupported; // has log sense IE page (0x2f)
435: unsigned char TempPageSupported; // has log sense temperature page (0xd)
1.1.1.3 misho 436: unsigned char ReadECounterPageSupported;
437: unsigned char WriteECounterPageSupported;
438: unsigned char VerifyECounterPageSupported;
439: unsigned char NonMediumErrorPageSupported;
1.1 misho 440: unsigned char SuppressReport; // minimize nuisance reports
441: unsigned char modese_len; // mode sense/select cmd len: 0 (don't
442: // know yet) 6 or 10
443: // ATA ONLY
444: uint64_t num_sectors; // Number of sectors
445: ata_smart_values smartval; // SMART data
446: ata_smart_thresholds_pvt smartthres; // SMART thresholds
1.1.1.2 misho 447: bool offline_started; // true if offline data collection was started
448: bool selftest_started; // true if self-test was started
1.1 misho 449:
450: temp_dev_state();
451: };
452:
453: temp_dev_state::temp_dev_state()
454: : must_write(false),
455: not_cap_offline(false),
456: not_cap_conveyance(false),
457: not_cap_short(false),
458: not_cap_long(false),
459: not_cap_selective(false),
460: temperature(0),
461: tempmin_delay(0),
462: powermodefail(false),
463: powerskipcnt(0),
464: SmartPageSupported(false),
465: TempPageSupported(false),
1.1.1.3 misho 466: ReadECounterPageSupported(false),
467: WriteECounterPageSupported(false),
468: VerifyECounterPageSupported(false),
469: NonMediumErrorPageSupported(false),
1.1 misho 470: SuppressReport(false),
471: modese_len(0),
1.1.1.2 misho 472: num_sectors(0),
473: offline_started(false),
474: selftest_started(false)
1.1 misho 475: {
476: memset(&smartval, 0, sizeof(smartval));
477: memset(&smartthres, 0, sizeof(smartthres));
478: }
479:
480: /// Runtime state data for a device.
481: struct dev_state
482: : public persistent_dev_state,
483: public temp_dev_state
484: {
485: void update_persistent_state();
486: void update_temp_state();
487: };
488:
489: /// Container for configuration info for each device.
490: typedef std::vector<dev_config> dev_config_vector;
491:
492: /// Container for state info for each device.
493: typedef std::vector<dev_state> dev_state_vector;
494:
495: // Copy ATA attributes to persistent state.
496: void dev_state::update_persistent_state()
497: {
498: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
499: const ata_smart_attribute & ta = smartval.vendor_attributes[i];
500: ata_attribute & pa = ata_attributes[i];
501: pa.id = ta.id;
502: if (ta.id == 0) {
503: pa.val = pa.worst = 0; pa.raw = 0;
504: continue;
505: }
506: pa.val = ta.current;
507: pa.worst = ta.worst;
508: pa.raw = ta.raw[0]
509: | ( ta.raw[1] << 8)
510: | ( ta.raw[2] << 16)
511: | ((uint64_t)ta.raw[3] << 24)
512: | ((uint64_t)ta.raw[4] << 32)
513: | ((uint64_t)ta.raw[5] << 40);
514: pa.resvd = ta.reserv;
515: }
516: }
517:
518: // Copy ATA from persistent to temp state.
519: void dev_state::update_temp_state()
520: {
521: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
522: const ata_attribute & pa = ata_attributes[i];
523: ata_smart_attribute & ta = smartval.vendor_attributes[i];
524: ta.id = pa.id;
525: if (pa.id == 0) {
526: ta.current = ta.worst = 0;
527: memset(ta.raw, 0, sizeof(ta.raw));
528: continue;
529: }
530: ta.current = pa.val;
531: ta.worst = pa.worst;
532: ta.raw[0] = (unsigned char) pa.raw;
533: ta.raw[1] = (unsigned char)(pa.raw >> 8);
534: ta.raw[2] = (unsigned char)(pa.raw >> 16);
535: ta.raw[3] = (unsigned char)(pa.raw >> 24);
536: ta.raw[4] = (unsigned char)(pa.raw >> 32);
537: ta.raw[5] = (unsigned char)(pa.raw >> 40);
538: ta.reserv = pa.resvd;
539: }
540: }
541:
542: // Parse a line from a state file.
543: static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
544: {
545: static const regular_expression regex(
546: "^ *"
547: "((temperature-min)" // (1 (2)
548: "|(temperature-max)" // (3)
549: "|(self-test-errors)" // (4)
550: "|(self-test-last-err-hour)" // (5)
551: "|(scheduled-test-next-check)" // (6)
552: "|(selective-test-last-start)" // (7)
553: "|(selective-test-last-end)" // (8)
554: "|(ata-error-count)" // (9)
555: "|(mail\\.([0-9]+)\\." // (10 (11)
556: "((count)" // (12 (13)
557: "|(first-sent-time)" // (14)
558: "|(last-sent-time)" // (15)
559: ")" // 12)
560: ")" // 10)
561: "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
562: "((id)" // (18 (19)
563: "|(val)" // (20)
564: "|(worst)" // (21)
565: "|(raw)" // (22)
566: "|(resvd)" // (23)
567: ")" // 18)
568: ")" // 16)
569: ")" // 1)
570: " *= *([0-9]+)[ \n]*$", // (24)
571: REG_EXTENDED
572: );
573:
574: const int nmatch = 1+24;
575: regmatch_t match[nmatch];
576: if (!regex.execute(line, nmatch, match))
577: return false;
578: if (match[nmatch-1].rm_so < 0)
579: return false;
580:
581: uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
582:
583: int m = 1;
584: if (match[++m].rm_so >= 0)
585: state.tempmin = (unsigned char)val;
586: else if (match[++m].rm_so >= 0)
587: state.tempmax = (unsigned char)val;
588: else if (match[++m].rm_so >= 0)
589: state.selflogcount = (unsigned char)val;
590: else if (match[++m].rm_so >= 0)
591: state.selfloghour = (unsigned short)val;
592: else if (match[++m].rm_so >= 0)
593: state.scheduled_test_next_check = (time_t)val;
594: else if (match[++m].rm_so >= 0)
595: state.selective_test_last_start = val;
596: else if (match[++m].rm_so >= 0)
597: state.selective_test_last_end = val;
598: else if (match[++m].rm_so >= 0)
599: state.ataerrorcount = (int)val;
600: else if (match[m+=2].rm_so >= 0) {
601: int i = atoi(line+match[m].rm_so);
602: if (!(0 <= i && i < SMARTD_NMAIL))
603: return false;
604: if (i == MAILTYPE_TEST) // Don't suppress test mails
605: return true;
606: if (match[m+=2].rm_so >= 0)
607: state.maillog[i].logged = (int)val;
608: else if (match[++m].rm_so >= 0)
609: state.maillog[i].firstsent = (time_t)val;
610: else if (match[++m].rm_so >= 0)
611: state.maillog[i].lastsent = (time_t)val;
612: else
613: return false;
614: }
615: else if (match[m+=5+1].rm_so >= 0) {
616: int i = atoi(line+match[m].rm_so);
617: if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
618: return false;
619: if (match[m+=2].rm_so >= 0)
620: state.ata_attributes[i].id = (unsigned char)val;
621: else if (match[++m].rm_so >= 0)
622: state.ata_attributes[i].val = (unsigned char)val;
623: else if (match[++m].rm_so >= 0)
624: state.ata_attributes[i].worst = (unsigned char)val;
625: else if (match[++m].rm_so >= 0)
626: state.ata_attributes[i].raw = val;
627: else if (match[++m].rm_so >= 0)
628: state.ata_attributes[i].resvd = (unsigned char)val;
629: else
630: return false;
631: }
632: else
633: return false;
634: return true;
635: }
636:
637: // Read a state file.
638: static bool read_dev_state(const char * path, persistent_dev_state & state)
639: {
640: stdio_file f(path, "r");
641: if (!f) {
642: if (errno != ENOENT)
643: pout("Cannot read state file \"%s\"\n", path);
644: return false;
645: }
646: #ifdef __CYGWIN__
647: setmode(fileno(f), O_TEXT); // Allow files with \r\n
648: #endif
649:
650: persistent_dev_state new_state;
651: int good = 0, bad = 0;
652: char line[256];
653: while (fgets(line, sizeof(line), f)) {
654: const char * s = line + strspn(line, " \t");
655: if (!*s || *s == '#')
656: continue;
657: if (!parse_dev_state_line(line, new_state))
658: bad++;
659: else
660: good++;
661: }
662:
663: if (bad) {
664: if (!good) {
665: pout("%s: format error\n", path);
666: return false;
667: }
668: pout("%s: %d invalid line(s) ignored\n", path, bad);
669: }
670:
671: // This sets the values missing in the file to 0.
672: state = new_state;
673: return true;
674: }
675:
676: static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
677: {
678: if (val)
679: fprintf(f, "%s = %"PRIu64"\n", name, val);
680: }
681:
682: static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
683: {
684: if (val)
685: fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
686: }
687:
688: // Write a state file
689: static bool write_dev_state(const char * path, const persistent_dev_state & state)
690: {
691: // Rename old "file" to "file~"
692: std::string pathbak = path; pathbak += '~';
693: unlink(pathbak.c_str());
694: rename(path, pathbak.c_str());
695:
696: stdio_file f(path, "w");
697: if (!f) {
698: pout("Cannot create state file \"%s\"\n", path);
699: return false;
700: }
701:
702: fprintf(f, "# smartd state file\n");
703: write_dev_state_line(f, "temperature-min", state.tempmin);
704: write_dev_state_line(f, "temperature-max", state.tempmax);
705: write_dev_state_line(f, "self-test-errors", state.selflogcount);
706: write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
707: write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
708: write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
709: write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
710:
711: int i;
712: for (i = 0; i < SMARTD_NMAIL; i++) {
713: if (i == MAILTYPE_TEST) // Don't suppress test mails
714: continue;
715: const mailinfo & mi = state.maillog[i];
716: if (!mi.logged)
717: continue;
718: write_dev_state_line(f, "mail", i, "count", mi.logged);
719: write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
720: write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
721: }
722:
723: // ATA ONLY
724: write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
725:
726: for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
727: const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
728: if (!pa.id)
729: continue;
730: write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
731: write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
732: write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
733: write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
734: write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
735: }
736:
737: return true;
738: }
739:
740: // Write to the attrlog file
1.1.1.3 misho 741: static bool write_dev_attrlog(const char * path, const dev_state & state)
1.1 misho 742: {
743: stdio_file f(path, "a");
744: if (!f) {
745: pout("Cannot create attribute log file \"%s\"\n", path);
746: return false;
747: }
748:
1.1.1.3 misho 749:
1.1 misho 750: time_t now = time(0);
751: struct tm * tms = gmtime(&now);
752: fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
753: 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
754: tms->tm_hour, tms->tm_min, tms->tm_sec);
1.1.1.3 misho 755: // ATA ONLY
1.1 misho 756: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
757: const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
758: if (!pa.id)
759: continue;
760: fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
761: }
1.1.1.3 misho 762: // SCSI ONLY
763: const struct scsiErrorCounter * ecp;
764: const char * pageNames[3] = {"read", "write", "verify"};
765: for (int k = 0; k < 3; ++k) {
766: if ( !state.scsi_error_counters[k].found ) continue;
767: ecp = &state.scsi_error_counters[k].errCounter;
768: fprintf(f, "\t%s-corr-by-ecc-fast;%"PRIu64";"
769: "\t%s-corr-by-ecc-delayed;%"PRIu64";"
770: "\t%s-corr-by-retry;%"PRIu64";"
771: "\t%s-total-err-corrected;%"PRIu64";"
772: "\t%s-corr-algorithm-invocations;%"PRIu64";"
773: "\t%s-gb-processed;%.3f;"
774: "\t%s-total-unc-errors;%"PRIu64";",
775: pageNames[k], ecp->counter[0],
776: pageNames[k], ecp->counter[1],
777: pageNames[k], ecp->counter[2],
778: pageNames[k], ecp->counter[3],
779: pageNames[k], ecp->counter[4],
780: pageNames[k], (ecp->counter[5] / 1000000000.0),
781: pageNames[k], ecp->counter[6]);
782: }
783: if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
784: fprintf(f, "\tnon-medium-errors;%"PRIu64";", state.scsi_nonmedium_error.nme.counterPC0);
785: }
786: // write SCSI current temperature if it is monitored
787: if(state.TempPageSupported && state.temperature)
788: fprintf(f, "\ttemperature;%d;", state.temperature);
789: // end of line
1.1 misho 790: fprintf(f, "\n");
791: return true;
792: }
793:
794: // Write all state files. If write_always is false, don't write
795: // unless must_write is set.
796: static void write_all_dev_states(const dev_config_vector & configs,
797: dev_state_vector & states,
798: bool write_always = true)
799: {
800: for (unsigned i = 0; i < states.size(); i++) {
801: const dev_config & cfg = configs.at(i);
802: if (cfg.state_file.empty())
803: continue;
804: dev_state & state = states[i];
805: if (!write_always && !state.must_write)
806: continue;
807: if (!write_dev_state(cfg.state_file.c_str(), state))
808: continue;
809: state.must_write = false;
810: if (write_always || debugmode)
811: PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
812: cfg.name.c_str(), cfg.state_file.c_str());
813: }
814: }
815:
816: // Write to all attrlog files
817: static void write_all_dev_attrlogs(const dev_config_vector & configs,
818: dev_state_vector & states)
819: {
820: for (unsigned i = 0; i < states.size(); i++) {
821: const dev_config & cfg = configs.at(i);
822: if (cfg.attrlog_file.empty())
823: continue;
824: dev_state & state = states[i];
825: write_dev_attrlog(cfg.attrlog_file.c_str(), state);
826: }
827: }
828:
829: // remove the PID file
830: static void RemovePidFile()
831: {
832: if (!pid_file.empty()) {
833: if (unlink(pid_file.c_str()))
834: PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
835: pid_file.c_str(), strerror(errno));
836: pid_file.clear();
837: }
838: return;
839: }
840:
841: extern "C" { // signal handlers require C-linkage
842:
843: // Note if we catch a SIGUSR1
844: static void USR1handler(int sig)
845: {
846: if (SIGUSR1==sig)
847: caughtsigUSR1=1;
848: return;
849: }
850:
851: #ifdef _WIN32
852: // Note if we catch a SIGUSR2
853: static void USR2handler(int sig)
854: {
855: if (SIGUSR2==sig)
856: caughtsigUSR2=1;
857: return;
858: }
859: #endif
860:
861: // Note if we catch a HUP (or INT in debug mode)
862: static void HUPhandler(int sig)
863: {
864: if (sig==SIGHUP)
865: caughtsigHUP=1;
866: else
867: caughtsigHUP=2;
868: return;
869: }
870:
871: // signal handler for TERM, QUIT, and INT (if not in debug mode)
872: static void sighandler(int sig)
873: {
874: if (!caughtsigEXIT)
875: caughtsigEXIT=sig;
876: return;
877: }
878:
879: } // extern "C"
880:
881: // Cleanup, print Goodbye message and remove pidfile
882: static int Goodbye(int status)
883: {
884: // delete PID file, if one was created
885: RemovePidFile();
886:
887: // if we are exiting because of a code bug, tell user
888: if (status==EXIT_BADCODE)
889: PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
890:
891: // and this should be the final output from smartd before it exits
892: PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
893:
894: return status;
895: }
896:
897: // a replacement for setenv() which is not available on all platforms.
898: // Note that the string passed to putenv must not be freed or made
899: // invalid, since a pointer to it is kept by putenv(). This means that
900: // it must either be a static buffer or allocated off the heap. The
1.1.1.3 misho 901: // string can be freed if the environment variable is redefined via
902: // another call to putenv(). There is no portable way to unset a variable
903: // with putenv(). So we manage the buffer in a static object.
904: // Using setenv() if available is not considered because some
905: // implementations may produce memory leaks.
906:
907: class env_buffer
908: {
909: public:
910: env_buffer()
911: : m_buf((char *)0) { }
912:
913: void set(const char * name, const char * value);
914:
915: private:
916: char * m_buf;
917:
918: env_buffer(const env_buffer &);
919: void operator=(const env_buffer &);
920: };
921:
922: void env_buffer::set(const char * name, const char * value)
923: {
924: int size = strlen(name) + 1 + strlen(value) + 1;
925: char * newbuf = new char[size];
926: snprintf(newbuf, size, "%s=%s", name, value);
927:
928: if (putenv(newbuf))
929: throw std::runtime_error("putenv() failed");
930:
931: // This assumes that the same NAME is passed on each call
932: delete [] m_buf;
933: m_buf = newbuf;
1.1 misho 934: }
935:
936: #define EBUFLEN 1024
937:
938: static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1.1.1.2 misho 939: __attribute_format_printf(4, 5);
1.1 misho 940:
941: // If either address or executable path is non-null then send and log
942: // a warning email, or execute executable
1.1.1.3 misho 943: static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
944: {
945: static const char * const whichfail[] = {
1.1 misho 946: "EmailTest", // 0
947: "Health", // 1
948: "Usage", // 2
949: "SelfTest", // 3
950: "ErrorCount", // 4
951: "FailedHealthCheck", // 5
952: "FailedReadSmartData", // 6
953: "FailedReadSmartErrorLog", // 7
954: "FailedReadSmartSelfTestLog", // 8
955: "FailedOpenDevice", // 9
956: "CurrentPendingSector", // 10
957: "OfflineUncorrectableSector", // 11
958: "Temperature" // 12
959: };
960:
961: // See if user wants us to send mail
962: if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
963: return;
964:
965: std::string address = cfg.emailaddress;
966: const char * executable = cfg.emailcmdline.c_str();
967:
968: // which type of mail are we sending?
969: mailinfo * mail=(state.maillog)+which;
970:
971: // checks for sanity
972: if (cfg.emailfreq<1 || cfg.emailfreq>3) {
973: PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
974: return;
975: }
976: if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
977: PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
978: which, (int)sizeof(whichfail));
979: return;
980: }
981:
982: // Return if a single warning mail has been sent.
983: if ((cfg.emailfreq==1) && mail->logged)
984: return;
985:
986: // Return if this is an email test and one has already been sent.
987: if (which == 0 && mail->logged)
988: return;
989:
990: // To decide if to send mail, we need to know what time it is.
1.1.1.3 misho 991: time_t epoch = time(0);
1.1 misho 992:
993: // Return if less than one day has gone by
1.1.1.3 misho 994: const int day = 24*3600;
1.1 misho 995: if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
996: return;
997:
998: // Return if less than 2^(logged-1) days have gone by
999: if (cfg.emailfreq==3 && mail->logged) {
1.1.1.3 misho 1000: int days = 0x01 << (mail->logged - 1);
1.1 misho 1001: days*=day;
1002: if (epoch<(mail->lastsent+days))
1003: return;
1004: }
1005:
1006: #ifdef HAVE_LIBCAP_NG
1007: if (enable_capabilities) {
1008: PrintOut(LOG_ERR, "Sending a mail was supressed. "
1009: "Mails can't be send when capabilites are enabled\n");
1010: return;
1011: }
1012: #endif
1013:
1014: // record the time of this mail message, and the first mail message
1015: if (!mail->logged)
1016: mail->firstsent=epoch;
1017: mail->lastsent=epoch;
1.1.1.3 misho 1018:
1.1 misho 1019: // print warning string into message
1.1.1.3 misho 1020: char message[256];
1021: va_list ap;
1.1 misho 1022: va_start(ap, fmt);
1.1.1.3 misho 1023: vsnprintf(message, sizeof(message), fmt, ap);
1.1 misho 1024: va_end(ap);
1025:
1026: // replace commas by spaces to separate recipients
1027: std::replace(address.begin(), address.end(), ',', ' ');
1.1.1.3 misho 1028:
1.1 misho 1029: // Export information in environment variables that will be useful
1030: // for user scripts
1.1.1.3 misho 1031: static env_buffer env[12];
1032: env[0].set("SMARTD_MAILER", executable);
1033: env[1].set("SMARTD_MESSAGE", message);
1034: char dates[DATEANDEPOCHLEN];
1035: snprintf(dates, sizeof(dates), "%d", mail->logged);
1036: env[2].set("SMARTD_PREVCNT", dates);
1.1 misho 1037: dateandtimezoneepoch(dates, mail->firstsent);
1.1.1.3 misho 1038: env[3].set("SMARTD_TFIRST", dates);
1.1 misho 1039: snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1.1.1.3 misho 1040: env[4].set("SMARTD_TFIRSTEPOCH", dates);
1041: env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1042: env[6].set("SMARTD_ADDRESS", address.c_str());
1043: env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1.1 misho 1044:
1045: // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1.1.1.3 misho 1046: env[8].set("SMARTD_DEVICETYPE",
1047: (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1048: env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1049:
1050: env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1051: dates[0] = 0;
1052: if (which) switch (cfg.emailfreq) {
1053: case 2: dates[0] = '1'; dates[1] = 0; break;
1054: case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1055: }
1056: env[11].set("SMARTD_NEXTDAYS", dates);
1.1 misho 1057:
1058: // now construct a command to send this as EMAIL
1.1.1.3 misho 1059: char command[2048];
1060: if (!*executable)
1061: executable = "<mail>";
1.1 misho 1062: const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1063: const char * newwarn = (which? "Warning via" : "Test of");
1064:
1.1.1.3 misho 1065: #ifndef _WIN32
1066: snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1067:
1068: // tell SYSLOG what we are about to do...
1.1 misho 1069: PrintOut(LOG_INFO,"%s %s to %s ...\n",
1070: which?"Sending warning via":"Executing test of", executable, newadd);
1071:
1072: // issue the command to send mail or to run the user's executable
1073: errno=0;
1074: FILE * pfp;
1075: if (!(pfp=popen(command, "r")))
1076: // failed to popen() mail process
1077: PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1078: newwarn, executable, newadd, errno?strerror(errno):"");
1079: else {
1080: // pipe suceeded!
1081: int len, status;
1082: char buffer[EBUFLEN];
1083:
1084: // if unexpected output on stdout/stderr, null terminate, print, and flush
1085: if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1086: int count=0;
1087: int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1088: buffer[newlen]='\0';
1089: PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1090: newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1091:
1092: // flush pipe if needed
1093: while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1094: count++;
1095:
1096: // tell user that pipe was flushed, or that something is really wrong
1097: if (count && count<EBUFLEN)
1098: PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1099: newwarn, executable, newadd);
1100: else if (count)
1101: PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1102: newwarn, executable, newadd);
1103: }
1104:
1105: // if something went wrong with mail process, print warning
1106: errno=0;
1107: if (-1==(status=pclose(pfp)))
1108: PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1109: errno?strerror(errno):"");
1110: else {
1111: // mail process apparently succeeded. Check and report exit status
1112: int status8;
1113:
1114: if (WIFEXITED(status)) {
1115: // exited 'normally' (but perhaps with nonzero status)
1116: status8=WEXITSTATUS(status);
1117:
1118: if (status8>128)
1119: PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1120: newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1121: else if (status8)
1122: PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1123: newwarn, executable, newadd, status, status8);
1124: else
1125: PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1126: }
1127:
1128: if (WIFSIGNALED(status))
1129: PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1130: newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1131:
1132: // this branch is probably not possible. If subprocess is
1133: // stopped then pclose() should not return.
1134: if (WIFSTOPPED(status))
1135: PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1136: newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1137:
1138: }
1139: }
1140:
1141: #else // _WIN32
1.1.1.3 misho 1142: {
1143: snprintf(command, sizeof(command), "cmd /c \"%s\"", warning_script.c_str());
1.1 misho 1144:
1145: char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1146: int rc;
1147: // run command
1148: PrintOut(LOG_INFO,"%s %s to %s ...\n",
1149: (which?"Sending warning via":"Executing test of"), executable, newadd);
1.1.1.3 misho 1150: rc = daemon_spawn(command, "", 0, stdoutbuf, sizeof(stdoutbuf));
1.1 misho 1151: if (rc >= 0 && stdoutbuf[0])
1152: PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1153: newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1154: if (rc != 0)
1155: PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1156: newwarn, executable, newadd, rc);
1157: else
1158: PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1159: }
1160:
1161: #endif // _WIN32
1162:
1163: // increment mail sent counter
1164: mail->logged++;
1165: }
1166:
1167: static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1.1.1.2 misho 1168: __attribute_format_printf(4, 5);
1.1 misho 1169:
1170: static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1171: {
1172: if (!(0 <= which && which < SMARTD_NMAIL))
1173: return;
1174:
1175: // Return if no mail sent yet
1176: mailinfo & mi = state.maillog[which];
1177: if (!mi.logged)
1178: return;
1179:
1180: // Format & print message
1181: char msg[256];
1182: va_list ap;
1183: va_start(ap, fmt);
1184: vsnprintf(msg, sizeof(msg), fmt, ap);
1185: va_end(ap);
1186:
1187: PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1188: msg, mi.logged, (mi.logged==1 ? "" : "s"));
1189:
1190: // Clear mail counter and timestamps
1191: mi = mailinfo();
1192: state.must_write = true;
1193: }
1194:
1195: #ifndef _WIN32
1196:
1197: // Output multiple lines via separate syslog(3) calls.
1198: static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1199: {
1200: char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1201: vsnprintf(buf, sizeof(buf), fmt, ap);
1202:
1203: for (char * p = buf, * q; p && *p; p = q) {
1204: if ((q = strchr(p, '\n')))
1205: *q++ = 0;
1206: if (*p)
1207: syslog(priority, "%s\n", p);
1208: }
1209: }
1210:
1211: #else // _WIN32
1212: // os_win32/syslog_win32.cpp supports multiple lines.
1213: #define vsyslog_lines vsyslog
1214: #endif // _WIN32
1215:
1216: // Printing function for watching ataprint commands, or losing them
1217: // [From GLIBC Manual: Since the prototype doesn't specify types for
1218: // optional arguments, in a call to a variadic function the default
1219: // argument promotions are performed on the optional argument
1220: // values. This means the objects of type char or short int (whether
1221: // signed or not) are promoted to either int or unsigned int, as
1222: // appropriate.]
1223: void pout(const char *fmt, ...){
1224: va_list ap;
1225:
1226: // get the correct time in syslog()
1227: FixGlibcTimeZoneBug();
1228: // initialize variable argument list
1229: va_start(ap,fmt);
1230: // in debugmode==1 mode we will print the output from the ataprint.o functions!
1.1.1.3 misho 1231: if (debugmode && debugmode != 2) {
1232: FILE * f = stdout;
1.1 misho 1233: #ifdef _WIN32
1.1.1.3 misho 1234: if (facility == LOG_LOCAL1) // logging to stdout
1235: f = stderr;
1.1 misho 1236: #endif
1.1.1.3 misho 1237: vfprintf(f, fmt, ap);
1238: fflush(f);
1239: }
1.1 misho 1240: // in debugmode==2 mode we print output from knowndrives.o functions
1241: else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1242: openlog("smartd", LOG_PID, facility);
1243: vsyslog_lines(LOG_INFO, fmt, ap);
1244: closelog();
1245: }
1246: va_end(ap);
1247: return;
1248: }
1249:
1250: // This function prints either to stdout or to the syslog as needed.
1251: static void PrintOut(int priority, const char *fmt, ...){
1252: va_list ap;
1253:
1254: // get the correct time in syslog()
1255: FixGlibcTimeZoneBug();
1256: // initialize variable argument list
1257: va_start(ap,fmt);
1.1.1.3 misho 1258: if (debugmode) {
1259: FILE * f = stdout;
1.1 misho 1260: #ifdef _WIN32
1.1.1.3 misho 1261: if (facility == LOG_LOCAL1) // logging to stdout
1262: f = stderr;
1.1 misho 1263: #endif
1.1.1.3 misho 1264: vfprintf(f, fmt, ap);
1265: fflush(f);
1266: }
1.1 misho 1267: else {
1268: openlog("smartd", LOG_PID, facility);
1269: vsyslog_lines(priority, fmt, ap);
1270: closelog();
1271: }
1272: va_end(ap);
1273: return;
1274: }
1275:
1276: // Used to warn users about invalid checksums. Called from atacmds.cpp.
1277: void checksumwarning(const char * string)
1278: {
1279: pout("Warning! %s error: invalid SMART checksum.\n", string);
1280: }
1281:
1282: #ifndef _WIN32
1283:
1284: // Wait for the pid file to show up, this makes sure a calling program knows
1285: // that the daemon is really up and running and has a pid to kill it
1286: static bool WaitForPidFile()
1287: {
1288: int waited, max_wait = 10;
1289: struct stat stat_buf;
1290:
1291: if (pid_file.empty() || debugmode)
1292: return true;
1293:
1294: for(waited = 0; waited < max_wait; ++waited) {
1295: if (!stat(pid_file.c_str(), &stat_buf)) {
1296: return true;
1297: } else
1298: sleep(1);
1299: }
1300: return false;
1301: }
1302:
1303: #endif // _WIN32
1304:
1305: // Forks new process, closes ALL file descriptors, redirects stdin,
1306: // stdout, and stderr. Not quite daemon(). See
1307: // http://www.linuxjournal.com/article/2335
1308: // for a good description of why we do things this way.
1309: static void DaemonInit()
1310: {
1311: #ifndef _WIN32
1312: pid_t pid;
1313: int i;
1314:
1315: // flush all buffered streams. Else we might get two copies of open
1316: // streams since both parent and child get copies of the buffers.
1317: fflush(NULL);
1318:
1319: if (do_fork) {
1320: if ((pid=fork()) < 0) {
1321: // unable to fork!
1322: PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1323: EXIT(EXIT_STARTUP);
1324: }
1325: else if (pid) {
1326: // we are the parent process, wait for pid file, then exit cleanly
1327: if(!WaitForPidFile()) {
1328: PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1329: EXIT(EXIT_STARTUP);
1330: } else
1331: EXIT(0);
1332: }
1333:
1334: // from here on, we are the child process.
1335: setsid();
1336:
1337: // Fork one more time to avoid any possibility of having terminals
1338: if ((pid=fork()) < 0) {
1339: // unable to fork!
1340: PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1341: EXIT(EXIT_STARTUP);
1342: }
1343: else if (pid)
1344: // we are the parent process -- exit cleanly
1345: EXIT(0);
1346:
1347: // Now we are the child's child...
1348: }
1349:
1350: // close any open file descriptors
1351: for (i=getdtablesize();i>=0;--i)
1352: close(i);
1353:
1354: #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1355:
1356: // redirect any IO attempts to /dev/null for stdin
1357: i=open("/dev/null",O_RDWR);
1358: if (i>=0) {
1359: // stdout
1360: NO_warn_unused_result(dup(i));
1361: // stderr
1362: NO_warn_unused_result(dup(i));
1363: };
1364: umask(0022);
1365: NO_warn_unused_result(chdir("/"));
1366:
1367: if (do_fork)
1368: PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1369:
1370: #else // _WIN32
1371:
1372: // No fork() on native Win32
1373: // Detach this process from console
1374: fflush(NULL);
1375: if (daemon_detach("smartd")) {
1376: PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1377: EXIT(EXIT_STARTUP);
1378: }
1379: // stdin/out/err now closed if not redirected
1380:
1381: #endif // _WIN32
1382: return;
1383: }
1384:
1385: // create a PID file containing the current process id
1386: static void WritePidFile()
1387: {
1388: if (!pid_file.empty()) {
1389: pid_t pid = getpid();
1390: mode_t old_umask;
1391: #ifndef __CYGWIN__
1392: old_umask = umask(0077); // rwx------
1393: #else
1394: // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1395: old_umask = umask(0033); // rwxr--r--
1396: #endif
1397:
1398: stdio_file f(pid_file.c_str(), "w");
1399: umask(old_umask);
1400: if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1401: PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1402: EXIT(EXIT_PID);
1403: }
1404: PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1405: }
1406: }
1407:
1408: // Prints header identifying version of code and home
1409: static void PrintHead()
1410: {
1411: PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1412: }
1413:
1414: // prints help info for configuration file Directives
1415: static void Directives()
1416: {
1417: PrintOut(LOG_INFO,
1418: "Configuration file (%s) Directives (after device name):\n"
1.1.1.3 misho 1419: " -d TYPE Set the device type: auto, ignore, removable,\n"
1420: " %s\n"
1.1 misho 1421: " -T TYPE Set the tolerance to one of: normal, permissive\n"
1422: " -o VAL Enable/disable automatic offline tests (on/off)\n"
1423: " -S VAL Enable/disable attribute autosave (on/off)\n"
1424: " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1425: " -H Monitor SMART Health Status, report if failed\n"
1426: " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1.1.1.2 misho 1427: " -l TYPE Monitor SMART log or self-test status:\n"
1428: " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1.1 misho 1429: " -l scterc,R,W Set SCT Error Recovery Control\n"
1.1.1.2 misho 1430: " -e Change device setting: aam,[N|off], apm,[N|off], lookahead,[on|off],\n"
1431: " security-freeze, standby,[N|off], wcache,[on|off]\n"
1.1 misho 1432: " -f Monitor 'Usage' Attributes, report failures\n"
1433: " -m ADD Send email warning to address ADD\n"
1434: " -M TYPE Modify email warning behavior (see man page)\n"
1435: " -p Report changes in 'Prefailure' Attributes\n"
1436: " -u Report changes in 'Usage' Attributes\n"
1437: " -t Equivalent to -p and -u Directives\n"
1438: " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1439: " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1440: " -i ID Ignore Attribute ID for -f Directive\n"
1441: " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1442: " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1443: " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1444: " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1445: " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1446: " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1447: " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1.1.1.3 misho 1448: " -F TYPE Use firmware bug workaround:\n"
1449: " %s\n"
1.1 misho 1450: " # Comment: text after a hash sign is ignored\n"
1451: " \\ Line continuation character\n"
1452: "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1.1.1.3 misho 1453: "Use ID = 0 to turn off -C and/or -U Directives\n"
1454: "Example: /dev/sda -a\n",
1455: configfile,
1456: smi()->get_valid_dev_types_str().c_str(),
1457: get_valid_firmwarebug_args());
1.1 misho 1458: }
1459:
1460: /* Returns a pointer to a static string containing a formatted list of the valid
1461: arguments to the option opt or NULL on failure. */
1462: static const char *GetValidArgList(char opt)
1463: {
1464: switch (opt) {
1465: case 'A':
1466: case 's':
1467: return "<PATH_PREFIX>";
1468: case 'c':
1469: return "<FILE_NAME>, -";
1470: case 'l':
1471: return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1472: case 'q':
1473: return "nodev, errors, nodevstartup, never, onecheck, showtests";
1474: case 'r':
1475: return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1476: case 'B':
1477: case 'p':
1.1.1.3 misho 1478: case 'w':
1.1 misho 1479: return "<FILE_NAME>";
1480: case 'i':
1481: return "<INTEGER_SECONDS>";
1482: default:
1483: return NULL;
1484: }
1485: }
1486:
1487: /* prints help information for command syntax */
1488: static void Usage()
1489: {
1490: PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1491: PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1492: PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1493: #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1494: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_ATTRIBUTELOG"MODEL-SERIAL.ata.csv]\n");
1495: #endif
1496: PrintOut(LOG_INFO,"\n");
1497: PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1498: PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1499: PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1500: #ifdef SMARTMONTOOLS_DRIVEDBDIR
1501: PrintOut(LOG_INFO,"\n");
1502: PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1503: #endif
1504: PrintOut(LOG_INFO,"]\n\n");
1505: PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1506: PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1507: PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1508: #ifdef HAVE_LIBCAP_NG
1509: PrintOut(LOG_INFO," -C, --capabilities\n");
1510: PrintOut(LOG_INFO," Use capabilities.\n"
1511: " Warning: Mail notification does not work when used.\n\n");
1512: #endif
1513: PrintOut(LOG_INFO," -d, --debug\n");
1514: PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1515: PrintOut(LOG_INFO," -D, --showdirectives\n");
1516: PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1517: PrintOut(LOG_INFO," -h, --help, --usage\n");
1518: PrintOut(LOG_INFO," Display this help and exit\n\n");
1519: PrintOut(LOG_INFO," -i N, --interval=N\n");
1520: PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1521: PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1522: #ifndef _WIN32
1523: PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1524: #else
1525: PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1526: #endif
1527: #ifndef _WIN32
1528: PrintOut(LOG_INFO," -n, --no-fork\n");
1529: PrintOut(LOG_INFO," Do not fork into background\n\n");
1530: #endif // _WIN32
1531: PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1532: PrintOut(LOG_INFO," Write PID file NAME\n\n");
1533: PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1534: PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1535: PrintOut(LOG_INFO," -r, --report=TYPE\n");
1536: PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1537: PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1538: PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1539: #ifdef SMARTMONTOOLS_SAVESTATES
1540: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1541: #endif
1542: PrintOut(LOG_INFO,"\n");
1.1.1.3 misho 1543: PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1544: PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1545: #ifndef _WIN32
1546: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SYSCONFDIR"/smartd_warning.sh]\n\n");
1547: #else
1548: PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1549: #endif
1.1 misho 1550: #ifdef _WIN32
1551: PrintOut(LOG_INFO," --service\n");
1552: PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1553: PrintOut(LOG_INFO," smartd install [options]\n");
1554: PrintOut(LOG_INFO," Remove service with:\n");
1555: PrintOut(LOG_INFO," smartd remove\n\n");
1556: #endif // _WIN32
1557: PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1558: PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1559: }
1560:
1561: static int CloseDevice(smart_device * device, const char * name)
1562: {
1563: if (!device->close()){
1564: PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1565: return 1;
1566: }
1567: // device sucessfully closed
1568: return 0;
1569: }
1570:
1571: // return true if a char is not allowed in a state file name
1572: static bool not_allowed_in_filename(char c)
1573: {
1574: return !( ('0' <= c && c <= '9')
1575: || ('A' <= c && c <= 'Z')
1576: || ('a' <= c && c <= 'z'));
1577: }
1578:
1579: // Read error count from Summary or Extended Comprehensive SMART error log
1580: // Return -1 on error
1581: static int read_ata_error_count(ata_device * device, const char * name,
1.1.1.3 misho 1582: firmwarebug_defs firmwarebugs, bool extended)
1.1 misho 1583: {
1584: if (!extended) {
1585: ata_smart_errorlog log;
1.1.1.3 misho 1586: if (ataReadErrorLog(device, &log, firmwarebugs)){
1.1 misho 1587: PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1588: return -1;
1589: }
1590: return (log.error_log_pointer ? log.ata_error_count : 0);
1591: }
1592: else {
1593: ata_smart_exterrlog logx;
1.1.1.3 misho 1594: if (!ataReadExtErrorLog(device, &logx, 1 /*first sector only*/, firmwarebugs)) {
1.1 misho 1595: PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1596: return -1;
1597: }
1598: // Some disks use the reserved byte as index, see ataprint.cpp.
1599: return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1600: }
1601: }
1602:
1603: // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1604: // error count, and top bits are the power-on hours of the last error.
1605: static int SelfTestErrorCount(ata_device * device, const char * name,
1.1.1.3 misho 1606: firmwarebug_defs firmwarebugs)
1.1 misho 1607: {
1608: struct ata_smart_selftestlog log;
1609:
1.1.1.3 misho 1610: if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1.1 misho 1611: PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1612: return -1;
1613: }
1614:
1615: // return current number of self-test errors
1.1.1.3 misho 1616: return ataPrintSmartSelfTestlog(&log, false, firmwarebugs);
1.1 misho 1617: }
1618:
1619: #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1620: #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1621:
1.1.1.2 misho 1622: // Check offline data collection status
1623: static inline bool is_offl_coll_in_progress(unsigned char status)
1624: {
1625: return ((status & 0x7f) == 0x03);
1626: }
1627:
1628: // Check self-test execution status
1629: static inline bool is_self_test_in_progress(unsigned char status)
1630: {
1631: return ((status >> 4) == 0xf);
1632: }
1633:
1.1 misho 1634: // Log offline data collection status
1635: static void log_offline_data_coll_status(const char * name, unsigned char status)
1636: {
1637: const char * msg;
1638: switch (status & 0x7f) {
1639: case 0x00: msg = "was never started"; break;
1640: case 0x02: msg = "was completed without error"; break;
1641: case 0x03: msg = "is in progress"; break;
1642: case 0x04: msg = "was suspended by an interrupting command from host"; break;
1643: case 0x05: msg = "was aborted by an interrupting command from host"; break;
1644: case 0x06: msg = "was aborted by the device with a fatal error"; break;
1645: default: msg = 0;
1646: }
1647:
1648: if (msg)
1649: PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1650: "Device: %s, offline data collection %s%s\n", name, msg,
1651: ((status & 0x80) ? " (auto:on)" : ""));
1652: else
1653: PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1654: name, status);
1655: }
1656:
1657: // Log self-test execution status
1658: static void log_self_test_exec_status(const char * name, unsigned char status)
1659: {
1660: const char * msg;
1661: switch (status >> 4) {
1662: case 0x0: msg = "completed without error"; break;
1663: case 0x1: msg = "was aborted by the host"; break;
1664: case 0x2: msg = "was interrupted by the host with a reset"; break;
1665: case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1666: case 0x4: msg = "completed with error (unknown test element)"; break;
1667: case 0x5: msg = "completed with error (electrical test element)"; break;
1668: case 0x6: msg = "completed with error (servo/seek test element)"; break;
1669: case 0x7: msg = "completed with error (read test element)"; break;
1670: case 0x8: msg = "completed with error (handling damage?)"; break;
1671: default: msg = 0;
1672: }
1673:
1674: if (msg)
1675: PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1676: "Device: %s, previous self-test %s\n", name, msg);
1677: else if ((status >> 4) == 0xf)
1678: PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1679: name, status & 0x0f);
1680: else
1681: PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1682: name, status);
1683: }
1684:
1685: // Check pending sector count id (-C, -U directives).
1686: static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1687: unsigned char id, const char * msg)
1688: {
1689: // Check attribute index
1690: int i = ata_find_attr_index(id, state.smartval);
1691: if (i < 0) {
1692: PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1693: cfg.name.c_str(), msg, id);
1694: return false;
1695: }
1696:
1697: // Check value
1698: uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1699: cfg.attribute_defs);
1700: if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1701: PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %"PRIu64" (0x%"PRIx64")\n",
1702: cfg.name.c_str(), msg, id, rawval, rawval);
1703: return false;
1704: }
1705:
1706: return true;
1707: }
1708:
1709: // Called by ATA/SCSIDeviceScan() after successful device check
1710: static void finish_device_scan(dev_config & cfg, dev_state & state)
1711: {
1712: // Set cfg.emailfreq if user hasn't set it
1713: if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1714: // Avoid that emails are suppressed forever due to state persistence
1715: if (cfg.state_file.empty())
1716: cfg.emailfreq = 1; // '-M once'
1717: else
1718: cfg.emailfreq = 2; // '-M daily'
1719: }
1720:
1721: // Start self-test regex check now if time was not read from state file
1722: if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1723: state.scheduled_test_next_check = time(0);
1724: }
1725:
1.1.1.2 misho 1726: // Common function to format result message for ATA setting
1727: static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1728: int set_option = 0, bool has_value = false)
1729: {
1730: if (!msg.empty())
1731: msg += ", ";
1732: msg += name;
1733: if (!ok)
1734: msg += ":--";
1735: else if (set_option < 0)
1736: msg += ":off";
1737: else if (has_value)
1738: msg += strprintf(":%d", set_option-1);
1739: else if (set_option > 0)
1740: msg += ":on";
1741: }
1742:
1.1 misho 1743:
1744: // TODO: Add '-F swapid' directive
1745: const bool fix_swapped_id = false;
1746:
1747: // scan to see what ata devices there are, and if they support SMART
1748: static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1749: {
1750: int supported=0;
1751: struct ata_identify_device drive;
1752: const char *name = cfg.name.c_str();
1753: int retid;
1754:
1755: // Device must be open
1756:
1757: // Get drive identity structure
1758: if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1759: if (retid<0)
1760: // Unable to read Identity structure
1761: PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1762: else
1763: PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1764: name, packetdevicetype(retid-1));
1765: CloseDevice(atadev, name);
1766: return 2;
1767: }
1768:
1.1.1.3 misho 1769: // Get drive identity, size and rotation rate (HDD/SSD)
1.1 misho 1770: char model[40+1], serial[20+1], firmware[8+1];
1771: ata_format_id_string(model, drive.model, sizeof(model)-1);
1772: ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1773: ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1774:
1775: ata_size_info sizes;
1776: ata_get_size_info(&drive, sizes);
1777: state.num_sectors = sizes.sectors;
1.1.1.3 misho 1778: cfg.dev_rpm = ata_get_rotation_rate(&drive);
1.1 misho 1779:
1780: char wwn[30]; wwn[0] = 0;
1781: unsigned oui = 0; uint64_t unique_id = 0;
1782: int naa = ata_get_wwn(&drive, oui, unique_id);
1783: if (naa >= 0)
1784: snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09"PRIx64", ", naa, oui, unique_id);
1785:
1.1.1.3 misho 1786: // Format device id string for warning emails
1.1 misho 1787: char cap[32];
1.1.1.3 misho 1788: cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1789: format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1790:
1791: PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1.1 misho 1792:
1793: // Show if device in database, and use preset vendor attribute
1794: // options unless user has requested otherwise.
1795: if (cfg.ignorepresets)
1796: PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1797: else {
1798: // Apply vendor specific presets, print warning if present
1799: const drive_settings * dbentry = lookup_drive_apply_presets(
1.1.1.3 misho 1800: &drive, cfg.attribute_defs, cfg.firmwarebugs);
1.1 misho 1801: if (!dbentry)
1802: PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1803: else {
1804: PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1805: name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1806: if (*dbentry->warningmsg)
1807: PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1808: }
1809: }
1810:
1811: // Set default '-C 197[+]' if no '-C ID' is specified.
1812: if (!cfg.curr_pending_set)
1813: cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1814: // Set default '-U 198[+]' if no '-U ID' is specified.
1815: if (!cfg.offl_pending_set)
1816: cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1817:
1818: // If requested, show which presets would be used for this drive
1819: if (cfg.showpresets) {
1820: int savedebugmode=debugmode;
1821: PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1822: if (!debugmode)
1823: debugmode=2;
1824: show_presets(&drive);
1825: debugmode=savedebugmode;
1826: }
1827:
1828: // see if drive supports SMART
1829: supported=ataSmartSupport(&drive);
1830: if (supported!=1) {
1831: if (supported==0)
1832: // drive does NOT support SMART
1833: PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1834: else
1835: // can't tell if drive supports SMART
1836: PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1837:
1838: // should we proceed anyway?
1839: if (cfg.permissive) {
1840: PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1841: }
1842: else {
1843: PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1844: CloseDevice(atadev, name);
1845: return 2;
1846: }
1847: }
1848:
1849: if (ataEnableSmart(atadev)) {
1850: // Enable SMART command has failed
1851: PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1852: CloseDevice(atadev, name);
1853: return 2;
1854: }
1855:
1856: // disable device attribute autosave...
1857: if (cfg.autosave==1) {
1858: if (ataDisableAutoSave(atadev))
1859: PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1860: else
1861: PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1862: }
1863:
1864: // or enable device attribute autosave
1865: if (cfg.autosave==2) {
1866: if (ataEnableAutoSave(atadev))
1867: PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1868: else
1869: PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1870: }
1871:
1872: // capability check: SMART status
1873: if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1874: PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1875: cfg.smartcheck = false;
1876: }
1877:
1878: // capability check: Read smart values and thresholds. Note that
1879: // smart values are ALSO needed even if we ONLY want to know if the
1880: // device is self-test log or error-log capable! After ATA-5, this
1881: // information was ALSO reproduced in the IDENTIFY DEVICE response,
1882: // but sadly not for ATA-5. Sigh.
1883:
1884: // do we need to get SMART data?
1885: bool smart_val_ok = false;
1886: if ( cfg.autoofflinetest || cfg.selftest
1887: || cfg.errorlog || cfg.xerrorlog
1888: || cfg.offlinests || cfg.selfteststs
1889: || cfg.usagefailed || cfg.prefail || cfg.usage
1890: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1891: || cfg.curr_pending_id || cfg.offl_pending_id ) {
1892:
1893: if (ataReadSmartValues(atadev, &state.smartval)) {
1894: PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1895: cfg.usagefailed = cfg.prefail = cfg.usage = false;
1896: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1897: cfg.curr_pending_id = cfg.offl_pending_id = 0;
1898: }
1899: else {
1900: smart_val_ok = true;
1901: if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1902: PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1903: name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1904: cfg.usagefailed = false;
1905: // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1906: memset(&state.smartthres, 0, sizeof(state.smartthres));
1907: }
1908: }
1909:
1910: // see if the necessary Attribute is there to monitor offline or
1911: // current pending sectors or temperature
1912: if ( cfg.curr_pending_id
1913: && !check_pending_id(cfg, state, cfg.curr_pending_id,
1914: "Current_Pending_Sector"))
1915: cfg.curr_pending_id = 0;
1916:
1917: if ( cfg.offl_pending_id
1918: && !check_pending_id(cfg, state, cfg.offl_pending_id,
1919: "Offline_Uncorrectable"))
1920: cfg.offl_pending_id = 0;
1921:
1922: if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1923: && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1.1.1.3 misho 1924: PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
1925: name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1.1 misho 1926: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1927: }
1.1.1.2 misho 1928:
1929: // Report ignored '-r' or '-R' directives
1930: for (int id = 1; id <= 255; id++) {
1931: if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
1932: char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
1933: const char * excl = (cfg.monitor_attr_flags.is_set(id,
1934: (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
1935:
1936: int idx = ata_find_attr_index(id, state.smartval);
1937: if (idx < 0)
1938: PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
1939: else {
1940: bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
1941: if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
1942: PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
1943: (prefail ? "Prefailure" : "Usage"), opt, id, excl);
1944: }
1945: }
1946: }
1.1 misho 1947: }
1948:
1949: // enable/disable automatic on-line testing
1950: if (cfg.autoofflinetest) {
1951: // is this an enable or disable request?
1952: const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1953: if (!smart_val_ok)
1954: PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1955: else {
1956: // if command appears unsupported, issue a warning...
1957: if (!isSupportAutomaticTimer(&state.smartval))
1958: PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1959: // ... but then try anyway
1960: if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1961: PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1962: else
1963: PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1964: }
1965: }
1966:
1967: // Read log directories if required for capability check
1968: ata_smart_log_directory smart_logdir, gp_logdir;
1969: bool smart_logdir_ok = false, gp_logdir_ok = false;
1970:
1971: if ( isGeneralPurposeLoggingCapable(&drive)
1.1.1.3 misho 1972: && (cfg.errorlog || cfg.selftest)
1973: && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1.1 misho 1974: if (!ataReadLogDirectory(atadev, &smart_logdir, false))
1975: smart_logdir_ok = true;
1976: }
1977:
1.1.1.3 misho 1978: if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1.1 misho 1979: if (!ataReadLogDirectory(atadev, &gp_logdir, true))
1980: gp_logdir_ok = true;
1981: }
1982:
1983: // capability check: self-test-log
1984: state.selflogcount = 0; state.selfloghour = 0;
1985: if (cfg.selftest) {
1986: int retval;
1987: if (!( cfg.permissive
1988: || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
1989: || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
1990: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
1991: cfg.selftest = false;
1992: }
1.1.1.3 misho 1993: else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
1.1 misho 1994: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
1995: cfg.selftest = false;
1996: }
1997: else {
1998: state.selflogcount=SELFTEST_ERRORCOUNT(retval);
1999: state.selfloghour =SELFTEST_ERRORHOURS(retval);
2000: }
2001: }
2002:
2003: // capability check: ATA error log
2004: state.ataerrorcount = 0;
2005: if (cfg.errorlog) {
2006: int errcnt1;
2007: if (!( cfg.permissive
2008: || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2009: || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2010: PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2011: cfg.errorlog = false;
2012: }
1.1.1.3 misho 2013: else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
1.1 misho 2014: PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2015: cfg.errorlog = false;
2016: }
2017: else
2018: state.ataerrorcount = errcnt1;
2019: }
2020:
2021: if (cfg.xerrorlog) {
2022: int errcnt2;
1.1.1.3 misho 2023: if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2024: || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
1.1 misho 2025: PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2026: name);
2027: cfg.xerrorlog = false;
2028: }
1.1.1.3 misho 2029: else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
1.1 misho 2030: PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2031: cfg.xerrorlog = false;
2032: }
2033: else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2034: PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2035: name, state.ataerrorcount, errcnt2);
2036: // Record max error count
2037: if (errcnt2 > state.ataerrorcount)
2038: state.ataerrorcount = errcnt2;
2039: }
2040: else
2041: state.ataerrorcount = errcnt2;
2042: }
2043:
2044: // capability check: self-test and offline data collection status
2045: if (cfg.offlinests || cfg.selfteststs) {
2046: if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2047: if (cfg.offlinests)
2048: PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2049: if (cfg.selfteststs)
2050: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2051: cfg.offlinests = cfg.selfteststs = false;
2052: }
2053: }
2054:
2055: // capabilities check -- does it support powermode?
2056: if (cfg.powermode) {
2057: int powermode = ataCheckPowerMode(atadev);
2058:
2059: if (-1 == powermode) {
2060: PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2061: cfg.powermode=0;
2062: }
2063: else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
2064: PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2065: name, powermode);
2066: cfg.powermode=0;
2067: }
2068: }
2069:
1.1.1.2 misho 2070: // Apply ATA settings
2071: std::string msg;
2072:
2073: if (cfg.set_aam)
2074: format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2075: ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2076: ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2077:
2078: if (cfg.set_apm)
2079: format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2080: ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2081: ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2082:
2083: if (cfg.set_lookahead)
2084: format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2085: (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2086: cfg.set_lookahead);
2087:
2088: if (cfg.set_wcache)
2089: format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2090: (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2091:
2092: if (cfg.set_security_freeze)
2093: format_set_result_msg(msg, "Security freeze",
2094: ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2095:
2096: if (cfg.set_standby)
2097: format_set_result_msg(msg, "Standby",
2098: ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2099:
2100: // Report as one log entry
2101: if (!msg.empty())
2102: PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2103:
1.1 misho 2104: // set SCT Error Recovery Control if requested
2105: if (cfg.sct_erc_set) {
2106: if (!isSCTErrorRecoveryControlCapable(&drive))
2107: PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2108: name);
2109: else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2110: || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2111: PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2112: else
2113: PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2114: name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2115: }
2116:
2117: // If no tests available or selected, return
2118: if (!( cfg.smartcheck || cfg.selftest
2119: || cfg.errorlog || cfg.xerrorlog
2120: || cfg.offlinests || cfg.selfteststs
2121: || cfg.usagefailed || cfg.prefail || cfg.usage
2122: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2123: CloseDevice(atadev, name);
2124: return 3;
2125: }
2126:
2127: // tell user we are registering device
2128: PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2129:
2130: // close file descriptor
2131: CloseDevice(atadev, name);
2132:
2133: if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2134: // Build file name for state file
2135: std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2136: std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2137: if (!state_path_prefix.empty()) {
2138: cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2139: // Read previous state
2140: if (read_dev_state(cfg.state_file.c_str(), state)) {
2141: PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2142: // Copy ATA attribute values to temp state
2143: state.update_temp_state();
2144: }
2145: }
2146: if (!attrlog_path_prefix.empty())
2147: cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2148: }
2149:
2150: finish_device_scan(cfg, state);
2151:
2152: return 0;
2153: }
2154:
2155: // on success, return 0. On failure, return >0. Never return <0,
2156: // please.
2157: static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2158: {
2159: int k, err, req_len, avail_len, version, len;
2160: const char *device = cfg.name.c_str();
2161: struct scsi_iec_mode_page iec;
2162: UINT8 tBuf[64];
2163: UINT8 inqBuf[96];
2164: UINT8 vpdBuf[252];
1.1.1.3 misho 2165: char lu_id[64], serial[256], vendor[40], model[40];
1.1 misho 2166:
2167: // Device must be open
2168: memset(inqBuf, 0, 96);
2169: req_len = 36;
2170: if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2171: /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2172: req_len = 64;
2173: if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2174: PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2175: "skip device\n", device);
2176: return 2;
2177: }
2178: }
1.1.1.3 misho 2179: version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2180:
1.1 misho 2181: avail_len = inqBuf[4] + 5;
2182: len = (avail_len < req_len) ? avail_len : req_len;
2183: if (len < 36) {
2184: PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2185: "skip device\n", device);
2186: return 2;
2187: }
1.1.1.2 misho 2188:
2189: int pdt = inqBuf[0] & 0x1f;
2190:
2191: if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2192: (0xe == pdt))) {
2193: PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2194: "skip\n", device, pdt);
2195: return 2;
2196: }
1.1.1.3 misho 2197:
2198: if (supported_vpd_pages_p) {
2199: delete supported_vpd_pages_p;
2200: supported_vpd_pages_p = NULL;
2201: }
2202: supported_vpd_pages_p = new supported_vpd_pages(scsidev);
2203:
1.1 misho 2204: lu_id[0] = '\0';
1.1.1.3 misho 2205: if ((version >= 0x3) && (version < 0x8)) {
2206: /* SPC to SPC-5 */
2207: if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
2208: vpdBuf, sizeof(vpdBuf))) {
1.1 misho 2209: len = vpdBuf[3];
2210: scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2211: }
1.1.1.3 misho 2212: }
2213: serial[0] = '\0';
2214: if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2215: vpdBuf, sizeof(vpdBuf))) {
2216: len = vpdBuf[3];
2217: vpdBuf[4 + len] = '\0';
2218: scsi_format_id_string(serial, (const unsigned char *)&vpdBuf[4], len);
2219: }
1.1 misho 2220:
2221: unsigned int lb_size;
2222: char si_str[64];
1.1.1.3 misho 2223: uint64_t capacity = scsiGetSize(scsidev, &lb_size, NULL);
1.1 misho 2224:
2225: if (capacity)
2226: format_capacity(si_str, sizeof(si_str), capacity);
2227: else
2228: si_str[0] = '\0';
1.1.1.3 misho 2229:
2230: // Format device id string for warning emails
2231: cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2232: (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2233: (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2234: (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2235: (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2236:
2237: // format "model" string
2238: scsi_format_id_string(vendor, (const unsigned char *)&inqBuf[8], 8);
2239: scsi_format_id_string(model, (const unsigned char *)&inqBuf[16], 16);
2240: PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
1.1 misho 2241:
2242: // check that device is ready for commands. IE stores its stuff on
2243: // the media.
2244: if ((err = scsiTestUnitReady(scsidev))) {
2245: if (SIMPLE_ERR_NOT_READY == err)
2246: PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2247: else if (SIMPLE_ERR_NO_MEDIUM == err)
2248: PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2249: else if (SIMPLE_ERR_BECOMING_READY == err)
2250: PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2251: else
2252: PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2253: CloseDevice(scsidev, device);
2254: return 2;
2255: }
2256:
2257: // Badly-conforming USB storage devices may fail this check.
2258: // The response to the following IE mode page fetch (current and
2259: // changeable values) is carefully examined. It has been found
2260: // that various USB devices that malform the response will lock up
2261: // if asked for a log page (e.g. temperature) so it is best to
2262: // bail out now.
2263: if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2264: state.modese_len = iec.modese_len;
2265: else if (SIMPLE_ERR_BAD_FIELD == err)
2266: ; /* continue since it is reasonable not to support IE mpage */
2267: else { /* any other error (including malformed response) unreasonable */
2268: PrintOut(LOG_INFO,
2269: "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2270: device, err);
2271: CloseDevice(scsidev, device);
2272: return 3;
2273: }
2274:
2275: // N.B. The following is passive (i.e. it doesn't attempt to turn on
2276: // smart if it is off). This may change to be the same as the ATA side.
2277: if (!scsi_IsExceptionControlEnabled(&iec)) {
2278: PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2279: "Try 'smartctl -s on %s' to turn on SMART features\n",
2280: device, device);
2281: CloseDevice(scsidev, device);
2282: return 3;
2283: }
2284:
2285: // Flag that certain log pages are supported (information may be
2286: // available from other sources).
2287: if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
2288: for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2289: switch (tBuf[k]) {
2290: case TEMPERATURE_LPAGE:
2291: state.TempPageSupported = 1;
2292: break;
2293: case IE_LPAGE:
2294: state.SmartPageSupported = 1;
2295: break;
1.1.1.3 misho 2296: case READ_ERROR_COUNTER_LPAGE:
2297: state.ReadECounterPageSupported = 1;
2298: break;
2299: case WRITE_ERROR_COUNTER_LPAGE:
2300: state.WriteECounterPageSupported = 1;
2301: break;
2302: case VERIFY_ERROR_COUNTER_LPAGE:
2303: state.VerifyECounterPageSupported = 1;
2304: break;
2305: case NON_MEDIUM_ERROR_LPAGE:
2306: state.NonMediumErrorPageSupported = 1;
2307: break;
1.1 misho 2308: default:
2309: break;
2310: }
2311: }
2312: }
2313:
2314: // Check if scsiCheckIE() is going to work
2315: {
2316: UINT8 asc = 0;
2317: UINT8 ascq = 0;
2318: UINT8 currenttemp = 0;
2319: UINT8 triptemp = 0;
2320:
2321: if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2322: &asc, &ascq, ¤ttemp, &triptemp)) {
2323: PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2324: state.SuppressReport = 1;
2325: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
1.1.1.3 misho 2326: PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2327: device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1.1 misho 2328: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2329: }
2330: }
2331: }
2332:
2333: // capability check: self-test-log
2334: if (cfg.selftest){
2335: int retval = scsiCountFailedSelfTests(scsidev, 0);
2336: if (retval<0) {
2337: // no self-test log, turn off monitoring
2338: PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2339: cfg.selftest = false;
2340: state.selflogcount = 0;
2341: state.selfloghour = 0;
2342: }
2343: else {
2344: // register starting values to watch for changes
2345: state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2346: state.selfloghour =SELFTEST_ERRORHOURS(retval);
2347: }
2348: }
2349:
2350: // disable autosave (set GLTSD bit)
2351: if (cfg.autosave==1){
2352: if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2353: PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2354: else
2355: PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2356: }
2357:
2358: // or enable autosave (clear GLTSD bit)
2359: if (cfg.autosave==2){
2360: if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2361: PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2362: else
2363: PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2364: }
2365:
2366: // tell user we are registering device
2367: PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2368:
1.1.1.2 misho 2369: // Make sure that init_standby_check() ignores SCSI devices
2370: cfg.offlinests_ns = cfg.selfteststs_ns = false;
2371:
1.1 misho 2372: // close file descriptor
2373: CloseDevice(scsidev, device);
2374:
1.1.1.3 misho 2375: if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2376: // Build file name for state file
2377: std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2378: std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2379: if (!state_path_prefix.empty()) {
2380: cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2381: // Read previous state
2382: if (read_dev_state(cfg.state_file.c_str(), state)) {
2383: PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2384: // Copy ATA attribute values to temp state
2385: state.update_temp_state();
2386: }
2387: }
2388: if (!attrlog_path_prefix.empty())
2389: cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2390: }
2391:
1.1 misho 2392: finish_device_scan(cfg, state);
2393:
2394: return 0;
2395: }
2396:
2397: // If the self-test log has got more self-test errors (or more recent
2398: // self-test errors) recorded, then notify user.
2399: static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2400: {
2401: const char * name = cfg.name.c_str();
2402:
2403: if (newi<0)
2404: // command failed
2405: MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2406: else {
2407: reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2408:
2409: // old and new error counts
2410: int oldc=state.selflogcount;
2411: int newc=SELFTEST_ERRORCOUNT(newi);
2412:
2413: // old and new error timestamps in hours
2414: int oldh=state.selfloghour;
2415: int newh=SELFTEST_ERRORHOURS(newi);
2416:
2417: if (oldc<newc) {
2418: // increase in error count
2419: PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2420: name, oldc, newc);
2421: MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2422: name, oldc, newc);
2423: state.must_write = true;
2424: }
2425: else if (newc > 0 && oldh != newh) {
2426: // more recent error
2427: // a 'more recent' error might actually be a smaller hour number,
2428: // if the hour number has wrapped.
2429: // There's still a bug here. You might just happen to run a new test
2430: // exactly 32768 hours after the previous failure, and have run exactly
2431: // 20 tests between the two, in which case smartd will miss the
2432: // new failure.
2433: PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2434: name, newh);
1.1.1.3 misho 2435: MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
1.1 misho 2436: name, newh);
2437: state.must_write = true;
2438: }
2439:
2440: // Print info if error entries have disappeared
2441: // or newer successful successful extended self-test exits
2442: if (oldc > newc) {
2443: PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2444: name, oldc, newc);
2445: if (newc == 0)
2446: reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2447: }
2448:
2449: // Needed since self-test error count may DECREASE. Hour might
2450: // also have changed.
2451: state.selflogcount= newc;
2452: state.selfloghour = newh;
2453: }
2454: return;
2455: }
2456:
2457: // Test types, ordered by priority.
2458: static const char test_type_chars[] = "LncrSCO";
2459: static const unsigned num_test_types = sizeof(test_type_chars)-1;
2460:
2461: // returns test type if time to do test of type testtype,
2462: // 0 if not time to do test.
2463: static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2464: {
2465: // check that self-testing has been requested
2466: if (cfg.test_regex.empty())
2467: return 0;
2468:
2469: // Exit if drive not capable of any test
2470: if ( state.not_cap_long && state.not_cap_short &&
2471: (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2472: return 0;
2473:
2474: // since we are about to call localtime(), be sure glibc is informed
2475: // of any timezone changes we make.
2476: if (!usetime)
2477: FixGlibcTimeZoneBug();
2478:
2479: // Is it time for next check?
2480: time_t now = (!usetime ? time(0) : usetime);
2481: if (now < state.scheduled_test_next_check)
2482: return 0;
2483:
2484: // Limit time check interval to 90 days
2485: if (state.scheduled_test_next_check + (3600L*24*90) < now)
2486: state.scheduled_test_next_check = now - (3600L*24*90);
2487:
2488: // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2489: char testtype = 0;
2490: time_t testtime = 0; int testhour = 0;
2491: int maxtest = num_test_types-1;
2492:
2493: for (time_t t = state.scheduled_test_next_check; ; ) {
2494: struct tm * tms = localtime(&t);
2495: // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2496: int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2497: for (int i = 0; i <= maxtest; i++) {
2498: // Skip if drive not capable of this test
2499: switch (test_type_chars[i]) {
2500: case 'L': if (state.not_cap_long) continue; break;
2501: case 'S': if (state.not_cap_short) continue; break;
2502: case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2503: case 'O': if (scsi || state.not_cap_offline) continue; break;
2504: case 'c': case 'n':
2505: case 'r': if (scsi || state.not_cap_selective) continue; break;
2506: default: continue;
2507: }
2508: // Try match of "T/MM/DD/d/HH"
2509: char pattern[16];
2510: snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2511: test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2512: if (cfg.test_regex.full_match(pattern)) {
2513: // Test found
2514: testtype = pattern[0];
2515: testtime = t; testhour = tms->tm_hour;
2516: // Limit further matches to higher priority self-tests
2517: maxtest = i-1;
2518: break;
2519: }
2520: }
2521: // Exit if no tests left or current time reached
2522: if (maxtest < 0)
2523: break;
2524: if (t >= now)
2525: break;
2526: // Check next hour
2527: if ((t += 3600) > now)
2528: t = now;
2529: }
2530:
2531: // Do next check not before next hour.
2532: struct tm * tmnow = localtime(&now);
2533: state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2534:
2535: if (testtype) {
2536: state.must_write = true;
2537: // Tell user if an old test was found.
2538: if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2539: char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2540: PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2541: cfg.name.c_str(), testtype, datebuf);
2542: }
2543: }
2544:
2545: return testtype;
2546: }
2547:
2548: // Print a list of future tests.
2549: static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2550: {
2551: unsigned numdev = configs.size();
2552: if (!numdev)
2553: return;
2554: std::vector<int> testcnts(numdev * num_test_types, 0);
2555:
2556: PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2557:
2558: // FixGlibcTimeZoneBug(); // done in PrintOut()
2559: time_t now = time(0);
2560: char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2561: dateandtimezoneepoch(datenow, now);
2562:
2563: long seconds;
2564: for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2565: // Check for each device whether a test will be run
2566: time_t testtime = now + seconds;
2567: for (unsigned i = 0; i < numdev; i++) {
2568: const dev_config & cfg = configs.at(i);
2569: dev_state & state = states.at(i);
2570: const char * p;
2571: char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2572: if (testtype && (p = strchr(test_type_chars, testtype))) {
2573: unsigned t = (p - test_type_chars);
2574: // Report at most 5 tests of each type
2575: if (++testcnts[i*num_test_types + t] <= 5) {
2576: dateandtimezoneepoch(date, testtime);
2577: PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2578: testcnts[i*num_test_types + t], testtype, date);
2579: }
2580: }
2581: }
2582: }
2583:
2584: // Report totals
2585: dateandtimezoneepoch(date, now+seconds);
2586: PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2587: for (unsigned i = 0; i < numdev; i++) {
2588: const dev_config & cfg = configs.at(i);
2589: bool scsi = devices.at(i)->is_scsi();
2590: for (unsigned t = 0; t < num_test_types; t++) {
2591: int cnt = testcnts[i*num_test_types + t];
2592: if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2593: continue;
2594: PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2595: cnt, (cnt==1?"":"s"), test_type_chars[t]);
2596: }
2597: }
2598:
2599: }
2600:
2601: // Return zero on success, nonzero on failure. Perform offline (background)
2602: // short or long (extended) self test on given scsi device.
2603: static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2604: {
2605: int retval = 0;
2606: const char *testname = 0;
2607: const char *name = cfg.name.c_str();
2608: int inProgress;
2609:
2610: if (scsiSelfTestInProgress(device, &inProgress)) {
2611: PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2612: state.not_cap_short = state.not_cap_long = true;
2613: return 1;
2614: }
2615:
2616: if (1 == inProgress) {
2617: PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2618: "progress.\n", name);
2619: return 1;
2620: }
2621:
2622: switch (testtype) {
2623: case 'S':
2624: testname = "Short Self";
2625: retval = scsiSmartShortSelfTest(device);
2626: break;
2627: case 'L':
2628: testname = "Long Self";
2629: retval = scsiSmartExtendSelfTest(device);
2630: break;
2631: }
2632: // If we can't do the test, exit
2633: if (NULL == testname) {
2634: PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2635: testtype);
2636: return 1;
2637: }
2638: if (retval) {
2639: if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2640: (SIMPLE_ERR_BAD_FIELD == retval)) {
2641: PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2642: testname);
2643: if ('L'==testtype)
2644: state.not_cap_long = true;
2645: else
2646: state.not_cap_short = true;
2647:
2648: return 1;
2649: }
2650: PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2651: testname, retval);
2652: return 1;
2653: }
2654:
2655: PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2656:
2657: return 0;
2658: }
2659:
2660: // Do an offline immediate or self-test. Return zero on success,
2661: // nonzero on failure.
2662: static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2663: {
2664: const char *name = cfg.name.c_str();
2665:
2666: // Read current smart data and check status/capability
2667: struct ata_smart_values data;
2668: if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2669: PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2670: return 1;
2671: }
2672:
2673: // Check for capability to do the test
2674: int dotest = -1, mode = 0;
2675: const char *testname = 0;
2676: switch (testtype) {
2677: case 'O':
2678: testname="Offline Immediate ";
2679: if (isSupportExecuteOfflineImmediate(&data))
2680: dotest=OFFLINE_FULL_SCAN;
2681: else
2682: state.not_cap_offline = true;
2683: break;
2684: case 'C':
2685: testname="Conveyance Self-";
2686: if (isSupportConveyanceSelfTest(&data))
2687: dotest=CONVEYANCE_SELF_TEST;
2688: else
2689: state.not_cap_conveyance = true;
2690: break;
2691: case 'S':
2692: testname="Short Self-";
2693: if (isSupportSelfTest(&data))
2694: dotest=SHORT_SELF_TEST;
2695: else
2696: state.not_cap_short = true;
2697: break;
2698: case 'L':
2699: testname="Long Self-";
2700: if (isSupportSelfTest(&data))
2701: dotest=EXTEND_SELF_TEST;
2702: else
2703: state.not_cap_long = true;
2704: break;
2705:
2706: case 'c': case 'n': case 'r':
2707: testname = "Selective Self-";
2708: if (isSupportSelectiveSelfTest(&data)) {
2709: dotest = SELECTIVE_SELF_TEST;
2710: switch (testtype) {
2711: case 'c': mode = SEL_CONT; break;
2712: case 'n': mode = SEL_NEXT; break;
2713: case 'r': mode = SEL_REDO; break;
2714: }
2715: }
2716: else
2717: state.not_cap_selective = true;
2718: break;
2719: }
2720:
2721: // If we can't do the test, exit
2722: if (dotest<0) {
2723: PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2724: return 1;
2725: }
2726:
2727: // If currently running a self-test, do not interrupt it to start another.
2728: if (15==(data.self_test_exec_status >> 4)) {
1.1.1.3 misho 2729: if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
1.1 misho 2730: PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2731: "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2732: } else {
2733: PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2734: name, testname, (int)(data.self_test_exec_status & 0x0f));
2735: return 1;
2736: }
2737: }
2738:
2739: if (dotest == SELECTIVE_SELF_TEST) {
2740: // Set test span
2741: ata_selective_selftest_args selargs, prev_args;
2742: selargs.num_spans = 1;
2743: selargs.span[0].mode = mode;
2744: prev_args.num_spans = 1;
2745: prev_args.span[0].start = state.selective_test_last_start;
2746: prev_args.span[0].end = state.selective_test_last_end;
2747: if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2748: PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2749: return 1;
2750: }
2751: uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2752: PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2753: name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2754: start, end, end - start + 1,
2755: (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2756: (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2757: state.selective_test_last_start = start;
2758: state.selective_test_last_end = end;
2759: }
2760:
2761: // execute the test, and return status
2762: int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2763: if (retval) {
2764: PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2765: return retval;
2766: }
2767:
1.1.1.2 misho 2768: // Report recent test start to do_disable_standby_check()
2769: // and force log of next test status
2770: if (testtype == 'O')
2771: state.offline_started = true;
2772: else
2773: state.selftest_started = true;
1.1 misho 2774:
2775: PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2776: return 0;
2777: }
2778:
2779: // Check pending sector count attribute values (-C, -U directives).
2780: static void check_pending(const dev_config & cfg, dev_state & state,
2781: unsigned char id, bool increase_only,
2782: const ata_smart_values & smartval,
2783: int mailtype, const char * msg)
2784: {
2785: // Find attribute index
2786: int i = ata_find_attr_index(id, smartval);
2787: if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2788: return;
2789:
2790: // No report if no sectors pending.
2791: uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2792: if (rawval == 0) {
2793: reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
2794: return;
2795: }
2796:
2797: // If attribute is not reset, report only sector count increases.
2798: uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2799: if (!(!increase_only || prev_rawval < rawval))
2800: return;
2801:
2802: // Format message.
2803: std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2804: if (prev_rawval > 0 && rawval != prev_rawval)
2805: s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2806:
2807: PrintOut(LOG_CRIT, "%s\n", s.c_str());
1.1.1.3 misho 2808: MailWarning(cfg, state, mailtype, "%s", s.c_str());
1.1 misho 2809: state.must_write = true;
2810: }
2811:
2812: // Format Temperature value
1.1.1.3 misho 2813: static const char * fmt_temp(unsigned char x, char (& buf)[20])
1.1 misho 2814: {
2815: if (!x) // unset
1.1.1.3 misho 2816: return "??";
2817: snprintf(buf, sizeof(buf), "%u", x);
1.1 misho 2818: return buf;
2819: }
2820:
2821: // Check Temperature limits
2822: static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2823: {
2824: if (!(0 < currtemp && currtemp < 255)) {
2825: PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2826: return;
2827: }
2828:
2829: // Update Max Temperature
2830: const char * minchg = "", * maxchg = "";
2831: if (currtemp > state.tempmax) {
2832: if (state.tempmax)
2833: maxchg = "!";
2834: state.tempmax = currtemp;
2835: state.must_write = true;
2836: }
2837:
2838: char buf[20];
2839: if (!state.temperature) {
2840: // First check
2841: if (!state.tempmin || currtemp < state.tempmin)
2842: // Delay Min Temperature update by ~ 30 minutes.
2843: state.tempmin_delay = time(0) + CHECKTIME - 60;
2844: PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2845: cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2846: if (triptemp)
2847: PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2848: state.temperature = currtemp;
2849: }
2850: else {
2851: if (state.tempmin_delay) {
2852: // End Min Temperature update delay if ...
2853: if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2854: || (state.tempmin_delay <= time(0))) { // or delay time is over.
2855: state.tempmin_delay = 0;
2856: if (!state.tempmin)
2857: state.tempmin = 255;
2858: }
2859: }
2860:
2861: // Update Min Temperature
2862: if (!state.tempmin_delay && currtemp < state.tempmin) {
2863: state.tempmin = currtemp;
2864: state.must_write = true;
2865: if (currtemp != state.temperature)
2866: minchg = "!";
2867: }
2868:
2869: // Track changes
2870: if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2871: PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2872: cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2873: state.temperature = currtemp;
2874: }
2875: }
2876:
2877: // Check limits
2878: if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2879: PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2880: cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
1.1.1.3 misho 2881: MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
1.1 misho 2882: cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2883: }
2884: else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2885: PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2886: cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2887: }
2888: else if (cfg.tempcrit) {
2889: unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
2890: if (currtemp < limit)
2891: reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
2892: }
2893: }
2894:
2895: // Check normalized and raw attribute values.
2896: static void check_attribute(const dev_config & cfg, dev_state & state,
2897: const ata_smart_attribute & attr,
2898: const ata_smart_attribute & prev,
2899: int attridx,
2900: const ata_smart_threshold_entry * thresholds)
2901: {
2902: // Check attribute and threshold
2903: ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
2904: if (attrstate == ATTRSTATE_NON_EXISTING)
2905: return;
2906:
2907: // If requested, check for usage attributes that have failed.
2908: if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2909: && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
1.1.1.3 misho 2910: std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
1.1 misho 2911: PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2912: MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2913: state.must_write = true;
2914: }
2915:
2916: // Return if we're not tracking this type of attribute
2917: bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2918: if (!( ( prefail && cfg.prefail)
2919: || (!prefail && cfg.usage )))
2920: return;
2921:
2922: // Return if '-I ID' was specified
2923: if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2924: return;
2925:
2926: // Issue warning if they don't have the same ID in all structures.
2927: if (attr.id != prev.id) {
2928: PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
2929: cfg.name.c_str(), attr.id, prev.id);
2930: return;
2931: }
2932:
2933: // Compare normalized values if valid.
2934: bool valchanged = false;
2935: if (attrstate > ATTRSTATE_NO_NORMVAL) {
2936: if (attr.current != prev.current)
2937: valchanged = true;
2938: }
2939:
2940: // Compare raw values if requested.
2941: bool rawchanged = false;
2942: if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2943: if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2944: != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2945: rawchanged = true;
2946: }
2947:
2948: // Return if no change
2949: if (!(valchanged || rawchanged))
2950: return;
2951:
2952: // Format value strings
2953: std::string currstr, prevstr;
2954: if (attrstate == ATTRSTATE_NO_NORMVAL) {
2955: // Print raw values only
2956: currstr = strprintf("%s (Raw)",
2957: ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2958: prevstr = strprintf("%s (Raw)",
2959: ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2960: }
2961: else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2962: // Print normalized and raw values
2963: currstr = strprintf("%d [Raw %s]", attr.current,
2964: ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2965: prevstr = strprintf("%d [Raw %s]", prev.current,
2966: ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2967: }
2968: else {
2969: // Print normalized values only
2970: currstr = strprintf("%d", attr.current);
2971: prevstr = strprintf("%d", prev.current);
2972: }
2973:
2974: // Format message
2975: std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2976: cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
1.1.1.3 misho 2977: ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
1.1 misho 2978: prevstr.c_str(), currstr.c_str());
2979:
2980: // Report this change as critical ?
2981: if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
2982: || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
2983: PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2984: MailWarning(cfg, state, 2, "%s", msg.c_str());
2985: }
2986: else {
2987: PrintOut(LOG_INFO, "%s\n", msg.c_str());
2988: }
2989: state.must_write = true;
2990: }
2991:
2992:
2993: static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
2994: bool firstpass, bool allow_selftests)
2995: {
2996: const char * name = cfg.name.c_str();
2997:
2998: // If user has asked, test the email warning system
2999: if (cfg.emailtest)
3000: MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3001:
3002: // if we can't open device, fail gracefully rather than hard --
3003: // perhaps the next time around we'll be able to open it. ATAPI
3004: // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
3005: // given (see linux cdrom driver).
3006: if (!atadev->open()) {
3007: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
3008: MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3009: return 1;
3010: }
3011: if (debugmode)
3012: PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
3013: reset_warning_mail(cfg, state, 9, "open device worked again");
3014:
3015: // user may have requested (with the -n Directive) to leave the disk
3016: // alone if it is in idle or sleeping mode. In this case check the
3017: // power mode and exit without check if needed
3018: if (cfg.powermode && !state.powermodefail) {
3019: int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3020: const char * mode = 0;
3021: if (0 <= powermode && powermode < 0xff) {
3022: // wait for possible spin up and check again
3023: int powermode2;
3024: sleep(5);
3025: powermode2 = ataCheckPowerMode(atadev);
3026: if (powermode2 > powermode)
3027: PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3028: powermode = powermode2;
3029: }
3030:
3031: switch (powermode){
3032: case -1:
3033: // SLEEP
3034: mode="SLEEP";
3035: if (cfg.powermode>=1)
3036: dontcheck=1;
3037: break;
3038: case 0:
3039: // STANDBY
3040: mode="STANDBY";
3041: if (cfg.powermode>=2)
3042: dontcheck=1;
3043: break;
3044: case 0x80:
3045: // IDLE
3046: mode="IDLE";
3047: if (cfg.powermode>=3)
3048: dontcheck=1;
3049: break;
3050: case 0xff:
3051: // ACTIVE/IDLE
3052: mode="ACTIVE or IDLE";
3053: break;
3054: default:
3055: // UNKNOWN
3056: PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3057: name, powermode);
3058: state.powermodefail = true;
3059: break;
3060: }
3061:
3062: // if we are going to skip a check, return now
3063: if (dontcheck){
3064: // skip at most powerskipmax checks
3065: if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3066: CloseDevice(atadev, name);
3067: if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
3068: PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3069: state.powerskipcnt++;
3070: return 0;
3071: }
3072: else {
3073: PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3074: name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3075: }
3076: state.powerskipcnt = 0;
3077: state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3078: }
3079: else if (state.powerskipcnt) {
3080: PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3081: name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3082: state.powerskipcnt = 0;
3083: state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3084: }
3085: }
3086:
3087: // check smart status
3088: if (cfg.smartcheck) {
3089: int status=ataSmartStatus2(atadev);
3090: if (status==-1){
3091: PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3092: MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3093: state.must_write = true;
3094: }
3095: else if (status==1){
3096: PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3097: MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3098: state.must_write = true;
3099: }
3100: }
3101:
3102: // Check everything that depends upon SMART Data (eg, Attribute values)
3103: if ( cfg.usagefailed || cfg.prefail || cfg.usage
3104: || cfg.curr_pending_id || cfg.offl_pending_id
3105: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3106: || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3107:
3108: // Read current attribute values.
3109: ata_smart_values curval;
3110: if (ataReadSmartValues(atadev, &curval)){
3111: PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3112: MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3113: state.must_write = true;
3114: }
3115: else {
3116: reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3117:
3118: // look for current or offline pending sectors
3119: if (cfg.curr_pending_id)
3120: check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3121: (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3122: : "Total unreadable (pending) sectors" ));
3123:
3124: if (cfg.offl_pending_id)
3125: check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3126: (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3127: : "Total offline uncorrectable sectors"));
3128:
3129: // check temperature limits
3130: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3131: CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3132:
3133: // look for failed usage attributes, or track usage or prefail attributes
3134: if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3135: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3136: check_attribute(cfg, state,
3137: curval.vendor_attributes[i],
3138: state.smartval.vendor_attributes[i],
3139: i, state.smartthres.thres_entries);
3140: }
3141: }
3142:
3143: // Log changes of offline data collection status
3144: if (cfg.offlinests) {
3145: if ( curval.offline_data_collection_status
3146: != state.smartval.offline_data_collection_status
1.1.1.2 misho 3147: || state.offline_started // test was started in previous call
1.1 misho 3148: || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3149: log_offline_data_coll_status(name, curval.offline_data_collection_status);
3150: }
3151:
3152: // Log changes of self-test execution status
3153: if (cfg.selfteststs) {
3154: if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
1.1.1.2 misho 3155: || state.selftest_started // test was started in previous call
1.1 misho 3156: || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3157: log_self_test_exec_status(name, curval.self_test_exec_status);
3158: }
3159:
3160: // Save the new values for the next time around
3161: state.smartval = curval;
3162: }
3163: }
1.1.1.2 misho 3164: state.offline_started = state.selftest_started = false;
1.1 misho 3165:
3166: // check if number of selftest errors has increased (note: may also DECREASE)
3167: if (cfg.selftest)
1.1.1.3 misho 3168: CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
1.1 misho 3169:
3170: // check if number of ATA errors has increased
3171: if (cfg.errorlog || cfg.xerrorlog) {
3172:
3173: int errcnt1 = -1, errcnt2 = -1;
3174: if (cfg.errorlog)
1.1.1.3 misho 3175: errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
1.1 misho 3176: if (cfg.xerrorlog)
1.1.1.3 misho 3177: errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
1.1 misho 3178:
3179: // new number of errors is max of both logs
3180: int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3181:
3182: // did command fail?
3183: if (newc<0)
3184: // lack of PrintOut here is INTENTIONAL
3185: MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3186:
3187: // has error count increased?
3188: int oldc = state.ataerrorcount;
3189: if (newc>oldc){
3190: PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3191: name, oldc, newc);
3192: MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3193: name, oldc, newc);
3194: state.must_write = true;
3195: }
3196:
3197: if (newc>=0)
3198: state.ataerrorcount=newc;
3199: }
3200:
3201: // if the user has asked, and device is capable (or we're not yet
3202: // sure) check whether a self test should be done now.
3203: if (allow_selftests && !cfg.test_regex.empty()) {
3204: char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3205: if (testtype)
3206: DoATASelfTest(cfg, state, atadev, testtype);
3207: }
3208:
3209: // Don't leave device open -- the OS/user may want to access it
3210: // before the next smartd cycle!
3211: CloseDevice(atadev, name);
3212:
3213: // Copy ATA attribute values to persistent state
3214: state.update_persistent_state();
3215:
3216: return 0;
3217: }
3218:
3219: static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3220: {
3221: UINT8 asc, ascq;
3222: UINT8 currenttemp;
3223: UINT8 triptemp;
1.1.1.3 misho 3224: UINT8 tBuf[252];
1.1 misho 3225: const char * name = cfg.name.c_str();
3226: const char *cp;
3227:
3228: // If the user has asked for it, test the email warning system
3229: if (cfg.emailtest)
3230: MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3231:
3232: // if we can't open device, fail gracefully rather than hard --
3233: // perhaps the next time around we'll be able to open it
3234: if (!scsidev->open()) {
3235: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3236: MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3237: return 1;
3238: } else if (debugmode)
3239: PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
1.1.1.3 misho 3240: reset_warning_mail(cfg, state, 9, "open device worked again");
1.1 misho 3241: currenttemp = 0;
3242: asc = 0;
3243: ascq = 0;
3244: if (!state.SuppressReport) {
3245: if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3246: &asc, &ascq, ¤ttemp, &triptemp)) {
3247: PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3248: name);
3249: MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3250: state.SuppressReport = 1;
3251: }
3252: }
3253: if (asc > 0) {
3254: cp = scsiGetIEString(asc, ascq);
3255: if (cp) {
3256: PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3257: MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
1.1.1.3 misho 3258: } else if (asc == 4 && ascq == 9) {
3259: PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
1.1 misho 3260: } else if (debugmode)
3261: PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3262: name, (int)asc, (int)ascq);
3263: } else if (debugmode)
3264: PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3265:
3266: // check temperature limits
1.1.1.3 misho 3267: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || !cfg.attrlog_file.empty())
1.1 misho 3268: CheckTemperature(cfg, state, currenttemp, triptemp);
3269:
3270: // check if number of selftest errors has increased (note: may also DECREASE)
3271: if (cfg.selftest)
3272: CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3273:
3274: if (allow_selftests && !cfg.test_regex.empty()) {
3275: char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3276: if (testtype)
3277: DoSCSISelfTest(cfg, state, scsidev, testtype);
3278: }
1.1.1.3 misho 3279: if (!cfg.attrlog_file.empty()){
3280: // saving error counters to state
3281: if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3282: READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3283: scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
3284: state.scsi_error_counters[0].found=1;
3285: }
3286: if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3287: WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3288: scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
3289: state.scsi_error_counters[1].found=1;
3290: }
3291: if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3292: VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3293: scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
3294: state.scsi_error_counters[2].found=1;
3295: }
3296: if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3297: NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3298: scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
3299: state.scsi_nonmedium_error.found=1;
3300: }
3301: }
1.1 misho 3302: CloseDevice(scsidev, name);
3303: return 0;
3304: }
3305:
1.1.1.2 misho 3306: // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3307: static int standby_disable_state = 0;
3308:
3309: static void init_disable_standby_check(dev_config_vector & configs)
3310: {
3311: // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3312: bool sts1 = false, sts2 = false;
3313: for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3314: const dev_config & cfg = configs.at(i);
3315: if (cfg.offlinests_ns)
3316: sts1 = true;
3317: if (cfg.selfteststs_ns)
3318: sts2 = true;
3319: }
3320:
3321: // Check for support of disable auto standby
3322: // Reenable standby if smartd.conf was reread
3323: if (sts1 || sts2 || standby_disable_state == 3) {
3324: if (!smi()->disable_system_auto_standby(false)) {
3325: if (standby_disable_state == 3)
3326: PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3327: if (sts1 || sts2) {
3328: PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3329: (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3330: sts1 = sts2 = false;
3331: }
3332: }
3333: }
3334:
3335: standby_disable_state = (sts1 || sts2 ? 1 : 0);
3336: }
3337:
3338: static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3339: {
3340: if (!standby_disable_state)
3341: return;
3342:
3343: // Check for just started or still running self-tests
3344: bool running = false;
3345: for (unsigned i = 0; i < configs.size() && !running; i++) {
3346: const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3347:
3348: if ( ( cfg.offlinests_ns
3349: && (state.offline_started ||
3350: is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3351: || ( cfg.selfteststs_ns
3352: && (state.selftest_started ||
3353: is_self_test_in_progress(state.smartval.self_test_exec_status))) )
3354: running = true;
3355: // state.offline/selftest_started will be reset after next logging of test status
3356: }
3357:
3358: // Disable/enable auto standby and log state changes
3359: if (!running) {
3360: if (standby_disable_state != 1) {
3361: if (!smi()->disable_system_auto_standby(false))
3362: PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3363: smi()->get_errmsg());
3364: else
3365: PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3366: standby_disable_state = 1;
3367: }
3368: }
3369: else if (!smi()->disable_system_auto_standby(true)) {
3370: if (standby_disable_state != 2) {
3371: PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3372: smi()->get_errmsg());
3373: standby_disable_state = 2;
3374: }
3375: }
3376: else {
3377: if (standby_disable_state != 3) {
3378: PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3379: standby_disable_state = 3;
3380: }
3381: }
3382: }
3383:
1.1 misho 3384: // Checks the SMART status of all ATA and SCSI devices
3385: static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3386: smart_device_list & devices, bool firstpass, bool allow_selftests)
3387: {
3388: for (unsigned i = 0; i < configs.size(); i++) {
3389: const dev_config & cfg = configs.at(i);
3390: dev_state & state = states.at(i);
3391: smart_device * dev = devices.at(i);
3392: if (dev->is_ata())
3393: ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3394: else if (dev->is_scsi())
3395: SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3396: }
1.1.1.2 misho 3397:
3398: do_disable_standby_check(configs, states);
1.1 misho 3399: }
3400:
3401: // Set if Initialize() was called
3402: static bool is_initialized = false;
3403:
3404: // Does initialization right after fork to daemon mode
3405: static void Initialize(time_t *wakeuptime)
3406: {
3407: // Call Goodbye() on exit
3408: is_initialized = true;
3409:
3410: // write PID file
3411: if (!debugmode)
3412: WritePidFile();
3413:
3414: // install signal handlers. On Solaris, can't use signal() because
3415: // it resets the handler to SIG_DFL after each call. So use sigset()
3416: // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3417:
3418: // normal and abnormal exit
3419: if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3420: SIGNALFN(SIGTERM, SIG_IGN);
3421: if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3422: SIGNALFN(SIGQUIT, SIG_IGN);
3423:
3424: // in debug mode, <CONTROL-C> ==> HUP
3425: if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3426: SIGNALFN(SIGINT, SIG_IGN);
3427:
3428: // Catch HUP and USR1
3429: if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3430: SIGNALFN(SIGHUP, SIG_IGN);
3431: if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3432: SIGNALFN(SIGUSR1, SIG_IGN);
3433: #ifdef _WIN32
3434: if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3435: SIGNALFN(SIGUSR2, SIG_IGN);
3436: #endif
3437:
3438: // initialize wakeup time to CURRENT time
3439: *wakeuptime=time(NULL);
3440:
3441: return;
3442: }
3443:
3444: #ifdef _WIN32
3445: // Toggle debug mode implemented for native windows only
3446: // (there is no easy way to reopen tty on *nix)
3447: static void ToggleDebugMode()
3448: {
3449: if (!debugmode) {
3450: PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3451: if (!daemon_enable_console("smartd [Debug]")) {
3452: debugmode = 1;
3453: daemon_signal(SIGINT, HUPhandler);
3454: PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3455: }
3456: else
3457: PrintOut(LOG_INFO,"enable console failed\n");
3458: }
3459: else if (debugmode == 1) {
3460: daemon_disable_console();
3461: debugmode = 0;
3462: daemon_signal(SIGINT, sighandler);
3463: PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3464: }
3465: else
3466: PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3467: }
3468: #endif
3469:
3470: static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3471: {
3472: // If past wake-up-time, compute next wake-up-time
3473: time_t timenow=time(NULL);
3474: while (wakeuptime<=timenow){
3475: int intervals=1+(timenow-wakeuptime)/checktime;
3476: wakeuptime+=intervals*checktime;
3477: }
3478:
3479: // sleep until we catch SIGUSR1 or have completed sleeping
3480: int addtime = 0;
3481: while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3482:
3483: // protect user again system clock being adjusted backwards
3484: if (wakeuptime>timenow+checktime){
3485: PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3486: wakeuptime=timenow+checktime;
3487: }
3488:
3489: // Exit sleep when time interval has expired or a signal is received
3490: sleep(wakeuptime+addtime-timenow);
3491:
3492: #ifdef _WIN32
3493: // toggle debug mode?
3494: if (caughtsigUSR2) {
3495: ToggleDebugMode();
3496: caughtsigUSR2 = 0;
3497: }
3498: #endif
3499:
3500: timenow=time(NULL);
3501:
3502: // Actual sleep time too long?
3503: if (!addtime && timenow > wakeuptime+60) {
3504: if (debugmode)
3505: PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3506: (int)(timenow-wakeuptime));
3507: // Wait another 20 seconds to avoid I/O errors during disk spin-up
3508: addtime = timenow-wakeuptime+20;
3509: // Use next wake-up-time if close
3510: int nextcheck = checktime - addtime % checktime;
3511: if (nextcheck <= 20)
3512: addtime += nextcheck;
3513: }
3514: }
3515:
3516: // if we caught a SIGUSR1 then print message and clear signal
3517: if (caughtsigUSR1){
3518: PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3519: wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3520: caughtsigUSR1=0;
3521: sigwakeup = true;
3522: }
3523:
3524: // return adjusted wakeuptime
3525: return wakeuptime;
3526: }
3527:
3528: // Print out a list of valid arguments for the Directive d
3529: static void printoutvaliddirectiveargs(int priority, char d)
3530: {
3531: switch (d) {
3532: case 'n':
3533: PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3534: break;
3535: case 's':
3536: PrintOut(priority, "valid_regular_expression");
3537: break;
3538: case 'd':
3539: PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3540: break;
3541: case 'T':
3542: PrintOut(priority, "normal, permissive");
3543: break;
3544: case 'o':
3545: case 'S':
3546: PrintOut(priority, "on, off");
3547: break;
3548: case 'l':
3549: PrintOut(priority, "error, selftest");
3550: break;
3551: case 'M':
3552: PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3553: break;
3554: case 'v':
3555: PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3556: break;
3557: case 'P':
3558: PrintOut(priority, "use, ignore, show, showall");
3559: break;
3560: case 'F':
1.1.1.3 misho 3561: PrintOut(priority, "%s", get_valid_firmwarebug_args());
3562: break;
1.1.1.2 misho 3563: case 'e':
3564: PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], "
3565: "security-freeze, standby,[N|off], wcache,[on|off]");
1.1 misho 3566: break;
3567: }
3568: }
3569:
3570: // exits with an error message, or returns integer value of token
3571: static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3572: int min, int max, char * suffix = 0)
3573: {
3574: // make sure argument is there
3575: if (!arg) {
3576: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3577: cfgfile, lineno, name, token, min, max);
3578: return -1;
3579: }
3580:
3581: // get argument value (base 10), check that it's integer, and in-range
3582: char *endptr;
3583: int val = strtol(arg,&endptr,10);
3584:
3585: // optional suffix present?
3586: if (suffix) {
3587: if (!strcmp(endptr, suffix))
3588: endptr += strlen(suffix);
3589: else
3590: *suffix = 0;
3591: }
3592:
3593: if (!(!*endptr && min <= val && val <= max)) {
3594: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3595: cfgfile, lineno, name, token, arg, min, max);
3596: return -1;
3597: }
3598:
3599: // all is well; return value
3600: return val;
3601: }
3602:
3603:
3604: // Get 1-3 small integer(s) for '-W' directive
3605: static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3606: unsigned char *val1, unsigned char *val2, unsigned char *val3)
3607: {
3608: unsigned v1 = 0, v2 = 0, v3 = 0;
3609: int n1 = -1, n2 = -1, n3 = -1, len;
3610: if (!arg) {
3611: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3612: cfgfile, lineno, name, token);
3613: return -1;
3614: }
3615:
3616: len = strlen(arg);
3617: if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3618: && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3619: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3620: cfgfile, lineno, name, token, arg);
3621: return -1;
3622: }
3623: *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3624: return 0;
3625: }
3626:
3627:
1.1.1.3 misho 3628: #ifdef _WIN32
3629:
3630: // Concatenate strtok() results if quoted with "..."
3631: static const char * strtok_dequote(const char * delimiters)
3632: {
3633: const char * t = strtok(0, delimiters);
3634: if (!t || t[0] != '"')
3635: return t;
3636:
3637: static std::string token;
3638: token = t+1;
3639: for (;;) {
3640: t = strtok(0, delimiters);
3641: if (!t || !*t)
3642: return "\"";
3643: token += ' ';
3644: int len = strlen(t);
3645: if (t[len-1] == '"') {
3646: token += std::string(t, len-1);
3647: break;
3648: }
3649: token += t;
3650: }
3651: return token.c_str();
3652: }
3653:
3654: #endif // _WIN32
3655:
3656:
1.1 misho 3657: // This function returns 1 if it has correctly parsed one token (and
3658: // any arguments), else zero if no tokens remain. It returns -1 if an
3659: // error was encountered.
3660: static int ParseToken(char * token, dev_config & cfg)
3661: {
3662: char sym;
3663: const char * name = cfg.name.c_str();
3664: int lineno=cfg.lineno;
3665: const char *delim = " \n\t";
3666: int badarg = 0;
3667: int missingarg = 0;
3668: const char *arg = 0;
3669:
3670: // is the rest of the line a comment
3671: if (*token=='#')
3672: return 1;
3673:
3674: // is the token not recognized?
3675: if (*token!='-' || strlen(token)!=2) {
3676: PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3677: configfile, lineno, name, token);
3678: PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3679: return -1;
3680: }
3681:
3682: // token we will be parsing:
3683: sym=token[1];
3684:
3685: // parse the token and swallow its argument
3686: int val;
3687: char plus[] = "+", excl[] = "!";
3688:
3689: switch (sym) {
3690: case 'C':
3691: // monitor current pending sector count (default 197)
3692: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3693: return -1;
3694: cfg.curr_pending_id = (unsigned char)val;
3695: cfg.curr_pending_incr = (*plus == '+');
3696: cfg.curr_pending_set = true;
3697: break;
3698: case 'U':
3699: // monitor offline uncorrectable sectors (default 198)
3700: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3701: return -1;
3702: cfg.offl_pending_id = (unsigned char)val;
3703: cfg.offl_pending_incr = (*plus == '+');
3704: cfg.offl_pending_set = true;
3705: break;
3706: case 'T':
3707: // Set tolerance level for SMART command failures
3708: if ((arg = strtok(NULL, delim)) == NULL) {
3709: missingarg = 1;
3710: } else if (!strcmp(arg, "normal")) {
3711: // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3712: // not on failure of an optional S.M.A.R.T. command.
3713: // This is the default so we don't need to actually do anything here.
3714: cfg.permissive = false;
3715: } else if (!strcmp(arg, "permissive")) {
3716: // Permissive mode; ignore errors from Mandatory SMART commands
3717: cfg.permissive = true;
3718: } else {
3719: badarg = 1;
3720: }
3721: break;
3722: case 'd':
3723: // specify the device type
3724: if ((arg = strtok(NULL, delim)) == NULL) {
3725: missingarg = 1;
1.1.1.3 misho 3726: } else if (!strcmp(arg, "ignore")) {
3727: cfg.ignore = true;
1.1 misho 3728: } else if (!strcmp(arg, "removable")) {
3729: cfg.removable = true;
3730: } else if (!strcmp(arg, "auto")) {
3731: cfg.dev_type = "";
3732: } else {
3733: cfg.dev_type = arg;
3734: }
3735: break;
3736: case 'F':
3737: // fix firmware bug
1.1.1.3 misho 3738: if (!(arg = strtok(0, delim)))
1.1 misho 3739: missingarg = 1;
1.1.1.3 misho 3740: else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
1.1 misho 3741: badarg = 1;
3742: break;
3743: case 'H':
3744: // check SMART status
3745: cfg.smartcheck = true;
3746: break;
3747: case 'f':
3748: // check for failure of usage attributes
3749: cfg.usagefailed = true;
3750: break;
3751: case 't':
3752: // track changes in all vendor attributes
3753: cfg.prefail = true;
3754: cfg.usage = true;
3755: break;
3756: case 'p':
3757: // track changes in prefail vendor attributes
3758: cfg.prefail = true;
3759: break;
3760: case 'u':
3761: // track changes in usage vendor attributes
3762: cfg.usage = true;
3763: break;
3764: case 'l':
3765: // track changes in SMART logs
3766: if ((arg = strtok(NULL, delim)) == NULL) {
3767: missingarg = 1;
3768: } else if (!strcmp(arg, "selftest")) {
3769: // track changes in self-test log
3770: cfg.selftest = true;
3771: } else if (!strcmp(arg, "error")) {
3772: // track changes in ATA error log
3773: cfg.errorlog = true;
3774: } else if (!strcmp(arg, "xerror")) {
3775: // track changes in Extended Comprehensive SMART error log
3776: cfg.xerrorlog = true;
3777: } else if (!strcmp(arg, "offlinests")) {
3778: // track changes in offline data collection status
3779: cfg.offlinests = true;
1.1.1.2 misho 3780: } else if (!strcmp(arg, "offlinests,ns")) {
3781: // track changes in offline data collection status, disable auto standby
3782: cfg.offlinests = cfg.offlinests_ns = true;
1.1 misho 3783: } else if (!strcmp(arg, "selfteststs")) {
3784: // track changes in self-test execution status
3785: cfg.selfteststs = true;
1.1.1.2 misho 3786: } else if (!strcmp(arg, "selfteststs,ns")) {
3787: // track changes in self-test execution status, disable auto standby
3788: cfg.selfteststs = cfg.selfteststs_ns = true;
1.1 misho 3789: } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
3790: // set SCT Error Recovery Control
3791: unsigned rt = ~0, wt = ~0; int nc = -1;
3792: sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
3793: if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
3794: cfg.sct_erc_set = true;
3795: cfg.sct_erc_readtime = rt;
3796: cfg.sct_erc_writetime = wt;
3797: }
3798: else
3799: badarg = 1;
3800: } else {
3801: badarg = 1;
3802: }
3803: break;
3804: case 'a':
3805: // monitor everything
3806: cfg.smartcheck = true;
3807: cfg.prefail = true;
3808: cfg.usagefailed = true;
3809: cfg.usage = true;
3810: cfg.selftest = true;
3811: cfg.errorlog = true;
3812: cfg.selfteststs = true;
3813: break;
3814: case 'o':
3815: // automatic offline testing enable/disable
3816: if ((arg = strtok(NULL, delim)) == NULL) {
3817: missingarg = 1;
3818: } else if (!strcmp(arg, "on")) {
3819: cfg.autoofflinetest = 2;
3820: } else if (!strcmp(arg, "off")) {
3821: cfg.autoofflinetest = 1;
3822: } else {
3823: badarg = 1;
3824: }
3825: break;
3826: case 'n':
3827: // skip disk check if in idle or standby mode
3828: if (!(arg = strtok(NULL, delim)))
3829: missingarg = 1;
3830: else {
3831: char *endptr = NULL;
3832: char *next = strchr(const_cast<char*>(arg), ',');
3833:
3834: cfg.powerquiet = false;
3835: cfg.powerskipmax = 0;
3836:
3837: if (next!=NULL) *next='\0';
3838: if (!strcmp(arg, "never"))
3839: cfg.powermode = 0;
3840: else if (!strcmp(arg, "sleep"))
3841: cfg.powermode = 1;
3842: else if (!strcmp(arg, "standby"))
3843: cfg.powermode = 2;
3844: else if (!strcmp(arg, "idle"))
3845: cfg.powermode = 3;
3846: else
3847: badarg = 1;
3848:
3849: // if optional arguments are present
3850: if (!badarg && next!=NULL) {
3851: next++;
3852: cfg.powerskipmax = strtol(next, &endptr, 10);
3853: if (endptr == next)
3854: cfg.powerskipmax = 0;
3855: else {
3856: next = endptr + (*endptr != '\0');
3857: if (cfg.powerskipmax <= 0)
3858: badarg = 1;
3859: }
3860: if (*next != '\0') {
3861: if (!strcmp("q", next))
3862: cfg.powerquiet = true;
3863: else {
3864: badarg = 1;
3865: }
3866: }
3867: }
3868: }
3869: break;
3870: case 'S':
3871: // automatic attribute autosave enable/disable
3872: if ((arg = strtok(NULL, delim)) == NULL) {
3873: missingarg = 1;
3874: } else if (!strcmp(arg, "on")) {
3875: cfg.autosave = 2;
3876: } else if (!strcmp(arg, "off")) {
3877: cfg.autosave = 1;
3878: } else {
3879: badarg = 1;
3880: }
3881: break;
3882: case 's':
3883: // warn user, and delete any previously given -s REGEXP Directives
3884: if (!cfg.test_regex.empty()){
3885: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3886: configfile, lineno, name, cfg.test_regex.get_pattern());
3887: cfg.test_regex = regular_expression();
3888: }
3889: // check for missing argument
3890: if (!(arg = strtok(NULL, delim))) {
3891: missingarg = 1;
3892: }
3893: // Compile regex
3894: else {
3895: if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3896: // not a valid regular expression!
3897: PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3898: configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3899: return -1;
3900: }
3901: }
3902: // Do a bit of sanity checking and warn user if we think that
3903: // their regexp is "strange". User probably confused about shell
3904: // glob(3) syntax versus regular expression syntax regexp(7).
3905: if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3906: PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3907: configfile, lineno, name, val+1, arg[val], arg);
3908: break;
3909: case 'm':
3910: // send email to address that follows
3911: if (!(arg = strtok(NULL,delim)))
3912: missingarg = 1;
3913: else {
3914: if (!cfg.emailaddress.empty())
3915: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3916: configfile, lineno, name, cfg.emailaddress.c_str());
1.1.1.3 misho 3917: #ifdef _WIN32
3918: if ( !strcmp(arg, "msgbox") || !strcmp(arg, "sysmsgbox")
3919: || str_starts_with(arg, "msgbox,") || str_starts_with(arg, "sysmsgbox,")) {
3920: cfg.emailaddress = "console";
3921: const char * arg2 = strchr(arg, ',');
3922: if (arg2)
3923: cfg.emailaddress += arg2;
3924: PrintOut(LOG_INFO, "File %s line %d (drive %s): Deprecated -m %s changed to -m %s\n",
3925: configfile, lineno, name, arg, cfg.emailaddress.c_str());
3926: }
3927: else
3928: #endif
1.1 misho 3929: cfg.emailaddress = arg;
3930: }
3931: break;
3932: case 'M':
3933: // email warning options
3934: if (!(arg = strtok(NULL, delim)))
3935: missingarg = 1;
3936: else if (!strcmp(arg, "once"))
3937: cfg.emailfreq = 1;
3938: else if (!strcmp(arg, "daily"))
3939: cfg.emailfreq = 2;
3940: else if (!strcmp(arg, "diminishing"))
3941: cfg.emailfreq = 3;
3942: else if (!strcmp(arg, "test"))
3943: cfg.emailtest = 1;
3944: else if (!strcmp(arg, "exec")) {
3945: // Get the next argument (the command line)
1.1.1.3 misho 3946: #ifdef _WIN32
3947: // Allow "/path name/with spaces/..." on Windows
3948: arg = strtok_dequote(delim);
3949: if (arg && arg[0] == '"') {
3950: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
3951: configfile, lineno, name, token);
3952: return -1;
3953: }
3954: #else
3955: arg = strtok(0, delim);
3956: #endif
3957: if (!arg) {
1.1 misho 3958: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3959: configfile, lineno, name, token);
3960: return -1;
3961: }
3962: // Free the last cmd line given if any, and copy new one
3963: if (!cfg.emailcmdline.empty())
3964: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3965: configfile, lineno, name, cfg.emailcmdline.c_str());
3966: cfg.emailcmdline = arg;
3967: }
3968: else
3969: badarg = 1;
3970: break;
3971: case 'i':
3972: // ignore failure of usage attribute
3973: if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3974: return -1;
3975: cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3976: break;
3977: case 'I':
3978: // ignore attribute for tracking purposes
3979: if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3980: return -1;
3981: cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3982: break;
3983: case 'r':
3984: // print raw value when tracking
3985: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3986: return -1;
3987: cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3988: if (*excl == '!') // attribute change is critical
3989: cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3990: break;
3991: case 'R':
3992: // track changes in raw value (forces printing of raw value)
3993: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3994: return -1;
3995: cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3996: if (*excl == '!') // raw value change is critical
3997: cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3998: break;
3999: case 'W':
4000: // track Temperature
4001: if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
4002: &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
4003: return -1;
4004: break;
4005: case 'v':
4006: // non-default vendor-specific attribute meaning
4007: if (!(arg=strtok(NULL,delim))) {
4008: missingarg = 1;
4009: } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4010: badarg = 1;
4011: }
4012: break;
4013: case 'P':
4014: // Define use of drive-specific presets.
4015: if (!(arg = strtok(NULL, delim))) {
4016: missingarg = 1;
4017: } else if (!strcmp(arg, "use")) {
4018: cfg.ignorepresets = false;
4019: } else if (!strcmp(arg, "ignore")) {
4020: cfg.ignorepresets = true;
4021: } else if (!strcmp(arg, "show")) {
4022: cfg.showpresets = true;
4023: } else if (!strcmp(arg, "showall")) {
4024: showallpresets();
4025: } else {
4026: badarg = 1;
4027: }
4028: break;
1.1.1.2 misho 4029:
4030: case 'e':
4031: // Various ATA settings
4032: if (!(arg = strtok(NULL, delim))) {
4033: missingarg = true;
4034: }
4035: else {
4036: char arg2[16+1]; unsigned val;
4037: int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4038: if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4039: && (n1 == len || n2 > 0)) {
4040: bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4041: bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4042: if (n3 != len)
4043: val = ~0U;
4044:
4045: if (!strcmp(arg2, "aam")) {
4046: if (off)
4047: cfg.set_aam = -1;
4048: else if (val <= 254)
4049: cfg.set_aam = val + 1;
4050: else
4051: badarg = true;
4052: }
4053: else if (!strcmp(arg2, "apm")) {
4054: if (off)
4055: cfg.set_apm = -1;
4056: else if (1 <= val && val <= 254)
4057: cfg.set_apm = val + 1;
4058: else
4059: badarg = true;
4060: }
4061: else if (!strcmp(arg2, "lookahead")) {
4062: if (off)
4063: cfg.set_lookahead = -1;
4064: else if (on)
4065: cfg.set_lookahead = 1;
4066: else
4067: badarg = true;
4068: }
4069: else if (!strcmp(arg, "security-freeze")) {
4070: cfg.set_security_freeze = true;
4071: }
4072: else if (!strcmp(arg2, "standby")) {
4073: if (off)
4074: cfg.set_standby = 0 + 1;
4075: else if (val <= 255)
4076: cfg.set_standby = val + 1;
4077: else
4078: badarg = true;
4079: }
4080: else if (!strcmp(arg2, "wcache")) {
4081: if (off)
4082: cfg.set_wcache = -1;
4083: else if (on)
4084: cfg.set_wcache = 1;
4085: else
4086: badarg = true;
4087: }
4088: else
4089: badarg = true;
4090: }
4091: else
4092: badarg = true;
4093: }
4094: break;
4095:
1.1 misho 4096: default:
4097: // Directive not recognized
4098: PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4099: configfile, lineno, name, token);
4100: Directives();
4101: return -1;
4102: }
4103: if (missingarg) {
4104: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4105: configfile, lineno, name, token);
4106: }
4107: if (badarg) {
4108: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4109: configfile, lineno, name, token, arg);
4110: }
4111: if (missingarg || badarg) {
4112: PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4113: printoutvaliddirectiveargs(LOG_CRIT, sym);
4114: PrintOut(LOG_CRIT, "\n");
4115: return -1;
4116: }
4117:
4118: return 1;
4119: }
4120:
4121: // Scan directive for configuration file
4122: #define SCANDIRECTIVE "DEVICESCAN"
4123:
4124: // This is the routine that adds things to the conf_entries list.
4125: //
4126: // Return values are:
4127: // 1: parsed a normal line
1.1.1.2 misho 4128: // 0: found DEFAULT setting or comment or blank line
1.1 misho 4129: // -1: found SCANDIRECTIVE line
4130: // -2: found an error
4131: //
4132: // Note: this routine modifies *line from the caller!
1.1.1.2 misho 4133: static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf, int lineno, /*const*/ char * line)
1.1 misho 4134: {
4135: const char *delim = " \n\t";
4136:
4137: // get first token: device name. If a comment, skip line
1.1.1.2 misho 4138: const char * name = strtok(line, delim);
4139: if (!name || *name == '#')
1.1 misho 4140: return 0;
4141:
1.1.1.2 misho 4142: // Check device name for DEFAULT or DEVICESCAN
4143: int retval;
4144: if (!strcmp("DEFAULT", name)) {
4145: retval = 0;
4146: // Restart with empty defaults
4147: default_conf = dev_config();
1.1 misho 4148: }
1.1.1.2 misho 4149: else {
4150: retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4151: // Init new entry with current defaults
4152: conf_entries.push_back(default_conf);
4153: }
4154: dev_config & cfg = (retval ? conf_entries.back() : default_conf);
1.1 misho 4155:
4156: cfg.name = name; // Later replaced by dev->get_info().info_name
4157: cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
1.1.1.2 misho 4158: cfg.lineno = lineno;
1.1 misho 4159:
4160: // parse tokens one at a time from the file.
1.1.1.2 misho 4161: while (char * token = strtok(0, delim)) {
4162: int rc = ParseToken(token, cfg);
4163: if (rc < 0)
1.1 misho 4164: // error found on the line
4165: return -2;
1.1.1.2 misho 4166:
4167: if (rc == 0)
4168: // No tokens left
4169: break;
4170:
4171: // PrintOut(LOG_INFO,"Parsed token %s\n",token);
1.1 misho 4172: }
1.1.1.2 misho 4173:
4174: // Don't perform checks below for DEFAULT entries
4175: if (retval == 0)
4176: return retval;
4177:
1.1 misho 4178: // If NO monitoring directives are set, then set all of them.
4179: if (!( cfg.smartcheck || cfg.selftest
4180: || cfg.errorlog || cfg.xerrorlog
4181: || cfg.offlinests || cfg.selfteststs
4182: || cfg.usagefailed || cfg.prefail || cfg.usage
4183: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4184:
4185: PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4186: cfg.name.c_str(), cfg.lineno, configfile);
4187:
4188: cfg.smartcheck = true;
4189: cfg.usagefailed = true;
4190: cfg.prefail = true;
4191: cfg.usage = true;
4192: cfg.selftest = true;
4193: cfg.errorlog = true;
4194: cfg.selfteststs = true;
4195: }
4196:
4197: // additional sanity check. Has user set -M options without -m?
4198: if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4199: PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4200: cfg.name.c_str(), cfg.lineno, configfile);
4201: return -2;
4202: }
4203:
4204: // has the user has set <nomailer>?
4205: if (cfg.emailaddress == "<nomailer>") {
4206: // check that -M exec is also set
4207: if (cfg.emailcmdline.empty()){
4208: PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4209: cfg.name.c_str(), cfg.lineno, configfile);
4210: return -2;
4211: }
1.1.1.3 misho 4212: // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
1.1 misho 4213: cfg.emailaddress.clear();
4214: }
4215:
1.1.1.2 misho 4216: return retval;
1.1 misho 4217: }
4218:
4219: // Parses a configuration file. Return values are:
4220: // N=>0: found N entries
4221: // -1: syntax error in config file
4222: // -2: config file does not exist
4223: // -3: config file exists but cannot be read
4224: //
4225: // In the case where the return value is 0, there are three
4226: // possiblities:
4227: // Empty configuration file ==> conf_entries.empty()
4228: // No configuration file ==> conf_entries[0].lineno == 0
4229: // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4230: static int ParseConfigFile(dev_config_vector & conf_entries)
4231: {
4232: // maximum line length in configuration file
4233: const int MAXLINELEN = 256;
4234: // maximum length of a continued line in configuration file
4235: const int MAXCONTLINE = 1023;
4236:
4237: stdio_file f;
4238: // Open config file, if it exists and is not <stdin>
4239: if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4240: if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4241: // file exists but we can't read it or it should exist due to '-c' option
4242: int ret = (errno!=ENOENT ? -3 : -2);
4243: PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4244: strerror(errno),configfile);
4245: return ret;
4246: }
4247: }
4248: else // read from stdin ('-c -' option)
4249: f.open(stdin);
4250:
1.1.1.2 misho 4251: // Start with empty defaults
4252: dev_config default_conf;
4253:
1.1 misho 4254: // No configuration file found -- use fake one
4255: int entry = 0;
4256: if (!f) {
4257: char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
4258:
1.1.1.2 misho 4259: if (ParseConfigLine(conf_entries, default_conf, 0, fakeconfig) != -1)
1.1 misho 4260: throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
4261: return 0;
4262: }
4263:
4264: #ifdef __CYGWIN__
4265: setmode(fileno(f), O_TEXT); // Allow files with \r\n
4266: #endif
4267:
4268: // configuration file exists
4269: PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4270:
4271: // parse config file line by line
4272: int lineno = 1, cont = 0, contlineno = 0;
4273: char line[MAXLINELEN+2];
4274: char fullline[MAXCONTLINE+1];
4275:
4276: for (;;) {
4277: int len=0,scandevice;
4278: char *lastslash;
4279: char *comment;
4280: char *code;
4281:
4282: // make debugging simpler
4283: memset(line,0,sizeof(line));
4284:
4285: // get a line
4286: code=fgets(line, MAXLINELEN+2, f);
4287:
4288: // are we at the end of the file?
4289: if (!code){
4290: if (cont) {
1.1.1.2 misho 4291: scandevice = ParseConfigLine(conf_entries, default_conf, contlineno, fullline);
1.1 misho 4292: // See if we found a SCANDIRECTIVE directive
4293: if (scandevice==-1)
4294: return 0;
4295: // did we find a syntax error
4296: if (scandevice==-2)
4297: return -1;
4298: // the final line is part of a continuation line
4299: cont=0;
4300: entry+=scandevice;
4301: }
4302: break;
4303: }
4304:
4305: // input file line number
4306: contlineno++;
4307:
4308: // See if line is too long
4309: len=strlen(line);
4310: if (len>MAXLINELEN){
4311: const char *warn;
4312: if (line[len-1]=='\n')
4313: warn="(including newline!) ";
4314: else
4315: warn="";
4316: PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4317: (int)contlineno,configfile,warn,(int)MAXLINELEN);
4318: return -1;
4319: }
4320:
4321: // Ignore anything after comment symbol
4322: if ((comment=strchr(line,'#'))){
4323: *comment='\0';
4324: len=strlen(line);
4325: }
4326:
4327: // is the total line (made of all continuation lines) too long?
4328: if (cont+len>MAXCONTLINE){
4329: PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4330: lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4331: return -1;
4332: }
4333:
4334: // copy string so far into fullline, and increment length
1.1.1.3 misho 4335: snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
1.1 misho 4336: cont+=len;
4337:
4338: // is this a continuation line. If so, replace \ by space and look at next line
4339: if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4340: *(fullline+(cont-len)+(lastslash-line))=' ';
4341: continue;
4342: }
4343:
4344: // Not a continuation line. Parse it
1.1.1.2 misho 4345: scandevice = ParseConfigLine(conf_entries, default_conf, contlineno, fullline);
1.1 misho 4346:
4347: // did we find a scandevice directive?
4348: if (scandevice==-1)
4349: return 0;
4350: // did we find a syntax error
4351: if (scandevice==-2)
4352: return -1;
4353:
4354: entry+=scandevice;
4355: lineno++;
4356: cont=0;
4357: }
4358:
4359: // note -- may be zero if syntax of file OK, but no valid entries!
4360: return entry;
4361: }
4362:
4363: /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4364: <LIST> is the list of valid arguments for option opt. */
4365: static void PrintValidArgs(char opt)
4366: {
4367: const char *s;
4368:
4369: PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4370: if (!(s = GetValidArgList(opt)))
4371: PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4372: else
4373: PrintOut(LOG_CRIT, "%s", (char *)s);
4374: PrintOut(LOG_CRIT, " <=======\n");
4375: }
4376:
4377: #ifndef _WIN32
4378: // Report error and exit if specified path is not absolute.
4379: static void check_abs_path(char option, const std::string & path)
4380: {
4381: if (path.empty() || path[0] == '/')
4382: return;
4383:
4384: debugmode = 1;
4385: PrintHead();
4386: PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4387: PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4388: EXIT(EXIT_BADCMD);
4389: }
4390: #endif // !_WIN32
4391:
4392: // Parses input line, prints usage message and
4393: // version/license/copyright messages
4394: static void ParseOpts(int argc, char **argv)
4395: {
1.1.1.3 misho 4396: // Init default path names
1.1 misho 4397: #ifndef _WIN32
4398: configfile = SMARTMONTOOLS_SYSCONFDIR"/smartd.conf";
1.1.1.3 misho 4399: warning_script = SMARTMONTOOLS_SYSCONFDIR"/smartd_warning.sh";
1.1 misho 4400: #else
1.1.1.3 misho 4401: std::string exedir = get_exe_dir();
4402: static std::string configfile_str = exedir + "/smartd.conf";
1.1 misho 4403: configfile = configfile_str.c_str();
1.1.1.3 misho 4404: warning_script = exedir + "/smartd_warning.cmd";
1.1 misho 4405: #endif
4406:
4407: // Please update GetValidArgList() if you edit shortopts
1.1.1.3 misho 4408: static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
1.1 misho 4409: #ifdef HAVE_LIBCAP_NG
4410: "C"
4411: #endif
4412: ;
4413: // Please update GetValidArgList() if you edit longopts
4414: struct option longopts[] = {
4415: { "configfile", required_argument, 0, 'c' },
4416: { "logfacility", required_argument, 0, 'l' },
4417: { "quit", required_argument, 0, 'q' },
4418: { "debug", no_argument, 0, 'd' },
4419: { "showdirectives", no_argument, 0, 'D' },
4420: { "interval", required_argument, 0, 'i' },
4421: #ifndef _WIN32
4422: { "no-fork", no_argument, 0, 'n' },
4423: #else
4424: { "service", no_argument, 0, 'n' },
4425: #endif
4426: { "pidfile", required_argument, 0, 'p' },
4427: { "report", required_argument, 0, 'r' },
4428: { "savestates", required_argument, 0, 's' },
4429: { "attributelog", required_argument, 0, 'A' },
4430: { "drivedb", required_argument, 0, 'B' },
1.1.1.3 misho 4431: { "warnexec", required_argument, 0, 'w' },
1.1 misho 4432: { "version", no_argument, 0, 'V' },
4433: { "license", no_argument, 0, 'V' },
4434: { "copyright", no_argument, 0, 'V' },
4435: { "help", no_argument, 0, 'h' },
4436: { "usage", no_argument, 0, 'h' },
4437: #ifdef HAVE_LIBCAP_NG
4438: { "capabilities", no_argument, 0, 'C' },
4439: #endif
4440: { 0, 0, 0, 0 }
4441: };
4442:
4443: opterr=optopt=0;
4444: bool badarg = false;
4445: bool no_defaultdb = false; // set true on '-B FILE'
4446:
4447: // Parse input options.
4448: int optchar;
4449: while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4450: char *arg;
4451: char *tailptr;
4452: long lchecktime;
4453:
4454: switch(optchar) {
4455: case 'q':
4456: // when to quit
4457: if (!(strcmp(optarg,"nodev"))) {
4458: quit=0;
4459: } else if (!(strcmp(optarg,"nodevstartup"))) {
4460: quit=1;
4461: } else if (!(strcmp(optarg,"never"))) {
4462: quit=2;
4463: } else if (!(strcmp(optarg,"onecheck"))) {
4464: quit=3;
4465: debugmode=1;
4466: } else if (!(strcmp(optarg,"showtests"))) {
4467: quit=4;
4468: debugmode=1;
4469: } else if (!(strcmp(optarg,"errors"))) {
4470: quit=5;
4471: } else {
4472: badarg = true;
4473: }
4474: break;
4475: case 'l':
4476: // set the log facility level
4477: if (!strcmp(optarg, "daemon"))
4478: facility=LOG_DAEMON;
4479: else if (!strcmp(optarg, "local0"))
4480: facility=LOG_LOCAL0;
4481: else if (!strcmp(optarg, "local1"))
4482: facility=LOG_LOCAL1;
4483: else if (!strcmp(optarg, "local2"))
4484: facility=LOG_LOCAL2;
4485: else if (!strcmp(optarg, "local3"))
4486: facility=LOG_LOCAL3;
4487: else if (!strcmp(optarg, "local4"))
4488: facility=LOG_LOCAL4;
4489: else if (!strcmp(optarg, "local5"))
4490: facility=LOG_LOCAL5;
4491: else if (!strcmp(optarg, "local6"))
4492: facility=LOG_LOCAL6;
4493: else if (!strcmp(optarg, "local7"))
4494: facility=LOG_LOCAL7;
4495: else
4496: badarg = true;
4497: break;
4498: case 'd':
4499: // enable debug mode
4500: debugmode = 1;
4501: break;
4502: case 'n':
4503: // don't fork()
4504: #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4505: do_fork = false;
4506: #endif
4507: break;
4508: case 'D':
4509: // print summary of all valid directives
4510: debugmode = 1;
4511: Directives();
4512: EXIT(0);
4513: break;
4514: case 'i':
4515: // Period (time interval) for checking
4516: // strtol will set errno in the event of overflow, so we'll check it.
4517: errno = 0;
4518: lchecktime = strtol(optarg, &tailptr, 10);
4519: if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4520: debugmode=1;
4521: PrintHead();
4522: PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4523: PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4524: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4525: EXIT(EXIT_BADCMD);
4526: }
4527: checktime = (int)lchecktime;
4528: break;
4529: case 'r':
4530: // report IOCTL transactions
4531: {
4532: int i;
4533: char *s;
4534:
4535: // split_report_arg() may modify its first argument string, so use a
4536: // copy of optarg in case we want optarg for an error message.
4537: if (!(s = strdup(optarg))) {
4538: PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
4539: EXIT(EXIT_NOMEM);
4540: }
4541: if (split_report_arg(s, &i)) {
4542: badarg = true;
4543: } else if (i<1 || i>3) {
4544: debugmode=1;
4545: PrintHead();
4546: PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
4547: PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
4548: EXIT(EXIT_BADCMD);
4549: } else if (!strcmp(s,"ioctl")) {
4550: ata_debugmode = scsi_debugmode = i;
4551: } else if (!strcmp(s,"ataioctl")) {
4552: ata_debugmode = i;
4553: } else if (!strcmp(s,"scsiioctl")) {
4554: scsi_debugmode = i;
4555: } else {
4556: badarg = true;
4557: }
4558: free(s); // TODO: use std::string
4559: }
4560: break;
4561: case 'c':
4562: // alternate configuration file
4563: if (strcmp(optarg,"-"))
4564: configfile = (configfile_alt = optarg).c_str();
4565: else // read from stdin
4566: configfile=configfile_stdin;
4567: break;
4568: case 'p':
4569: // output file with PID number
4570: pid_file = optarg;
4571: break;
4572: case 's':
4573: // path prefix of persistent state file
4574: state_path_prefix = optarg;
4575: break;
4576: case 'A':
4577: // path prefix of attribute log file
4578: attrlog_path_prefix = optarg;
4579: break;
4580: case 'B':
4581: {
4582: const char * path = optarg;
4583: if (*path == '+' && path[1])
4584: path++;
4585: else
4586: no_defaultdb = true;
4587: unsigned char savedebug = debugmode; debugmode = 1;
4588: if (!read_drive_database(path))
4589: EXIT(EXIT_BADCMD);
4590: debugmode = savedebug;
4591: }
4592: break;
1.1.1.3 misho 4593: case 'w':
4594: warning_script = optarg;
4595: break;
1.1 misho 4596: case 'V':
4597: // print version and CVS info
4598: debugmode = 1;
4599: PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4600: EXIT(0);
4601: break;
4602: #ifdef HAVE_LIBCAP_NG
4603: case 'C':
4604: // enable capabilities
4605: enable_capabilities = true;
4606: break;
4607: #endif
4608: case 'h':
4609: // help: print summary of command-line options
4610: debugmode=1;
4611: PrintHead();
4612: Usage();
4613: EXIT(0);
4614: break;
4615: case '?':
4616: default:
4617: // unrecognized option
4618: debugmode=1;
4619: PrintHead();
4620: // Point arg to the argument in which this option was found.
4621: arg = argv[optind-1];
4622: // Check whether the option is a long option that doesn't map to -h.
4623: if (arg[1] == '-' && optchar != 'h') {
4624: // Iff optopt holds a valid option then argument must be missing.
4625: if (optopt && (strchr(shortopts, optopt) != NULL)) {
4626: PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4627: PrintValidArgs(optopt);
4628: } else {
4629: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4630: }
4631: PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4632: EXIT(EXIT_BADCMD);
4633: }
4634: if (optopt) {
4635: // Iff optopt holds a valid option then argument must be missing.
4636: if (strchr(shortopts, optopt) != NULL){
4637: PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4638: PrintValidArgs(optopt);
4639: } else {
4640: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4641: }
4642: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4643: EXIT(EXIT_BADCMD);
4644: }
4645: Usage();
4646: EXIT(0);
4647: }
4648:
4649: // Check to see if option had an unrecognized or incorrect argument.
4650: if (badarg) {
4651: debugmode=1;
4652: PrintHead();
4653: // It would be nice to print the actual option name given by the user
4654: // here, but we just print the short form. Please fix this if you know
4655: // a clean way to do it.
4656: PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4657: PrintValidArgs(optchar);
4658: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4659: EXIT(EXIT_BADCMD);
4660: }
4661: }
4662:
4663: // non-option arguments are not allowed
4664: if (argc > optind) {
4665: debugmode=1;
4666: PrintHead();
4667: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4668: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4669: EXIT(EXIT_BADCMD);
4670: }
4671:
4672: // no pidfile in debug mode
4673: if (debugmode && !pid_file.empty()) {
4674: debugmode=1;
4675: PrintHead();
4676: PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4677: PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4678: EXIT(EXIT_BADCMD);
4679: }
4680:
4681: #ifndef _WIN32
4682: if (!debugmode) {
4683: // absolute path names are required due to chdir('/') after fork().
4684: check_abs_path('p', pid_file);
4685: check_abs_path('s', state_path_prefix);
4686: check_abs_path('A', attrlog_path_prefix);
4687: }
4688: #endif
4689:
4690: // Read or init drive database
4691: if (!no_defaultdb) {
4692: unsigned char savedebug = debugmode; debugmode = 1;
4693: if (!read_default_drive_databases())
4694: EXIT(EXIT_BADCMD);
4695: debugmode = savedebug;
4696: }
4697:
4698: // print header
4699: PrintHead();
4700: }
4701:
4702: // Function we call if no configuration file was found or if the
4703: // SCANDIRECTIVE Directive was found. It makes entries for device
4704: // names returned by scan_smart_devices() in os_OSNAME.cpp
4705: static int MakeConfigEntries(const dev_config & base_cfg,
4706: dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4707: {
4708: // make list of devices
4709: smart_device_list devlist;
4710: if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4711: PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4712:
4713: // if no devices, or error constructing list, return
4714: if (devlist.size() <= 0)
4715: return 0;
4716:
4717: // add empty device slots for existing config entries
4718: while (scanned_devs.size() < conf_entries.size())
4719: scanned_devs.push_back((smart_device *)0);
4720:
4721: // loop over entries to create
4722: for (unsigned i = 0; i < devlist.size(); i++) {
4723: // Move device pointer
4724: smart_device * dev = devlist.release(i);
4725: scanned_devs.push_back(dev);
4726:
4727: // Copy configuration, update device and type name
4728: conf_entries.push_back(base_cfg);
4729: dev_config & cfg = conf_entries.back();
4730: cfg.name = dev->get_info().info_name;
4731: cfg.dev_name = dev->get_info().dev_name;
4732: cfg.dev_type = type;
4733: }
4734:
4735: return devlist.size();
4736: }
4737:
4738: static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4739: {
4740: if (!debugmode && scandirective)
4741: return;
4742: if (line)
4743: PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4744: "Unable to register %s device %s at line %d of file %s\n",
4745: type, name, line, configfile);
4746: else
4747: PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4748: type, name);
4749: return;
4750: }
4751:
4752: // Returns negative value (see ParseConfigFile()) if config file
4753: // had errors, else number of entries which may be zero or positive.
4754: static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4755: {
4756: // parse configuration file configfile (normally /etc/smartd.conf)
4757: int entries = ParseConfigFile(conf_entries);
4758:
4759: if (entries < 0) {
4760: // There was an error reading the configuration file.
4761: conf_entries.clear();
4762: if (entries == -1)
4763: PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4764: return entries;
4765: }
4766:
4767: // no error parsing config file.
4768: if (entries) {
4769: // we did not find a SCANDIRECTIVE and did find valid entries
4770: PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4771: }
4772: else if (!conf_entries.empty()) {
4773: // we found a SCANDIRECTIVE or there was no configuration file so
4774: // scan. Configuration file's last entry contains all options
4775: // that were set
4776: dev_config first = conf_entries.back();
4777: conf_entries.pop_back();
4778:
4779: if (first.lineno)
4780: PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4781: else
4782: PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4783:
4784: // make config list of devices to search for
4785: MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4786:
4787: // warn user if scan table found no devices
4788: if (conf_entries.empty())
4789: PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4790: }
4791: else
4792: PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4793:
4794: return conf_entries.size();
4795: }
4796:
1.1.1.3 misho 4797: // Return true if TYPE contains a RAID drive number
4798: static bool is_raid_type(const char * type)
4799: {
4800: if (str_starts_with(type, "sat,"))
4801: return false;
4802: int i;
4803: if (sscanf(type, "%*[^,],%d", &i) != 1)
4804: return false;
4805: return true;
4806: }
4807:
4808: // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
4809: static bool is_duplicate_device(const smart_device * dev,
4810: const smart_device_list & devices, unsigned numdevs,
4811: const dev_config_vector & ignored)
4812: {
4813: const smart_device::device_info & info1 = dev->get_info();
4814: bool is_raid1 = is_raid_type(info1.dev_type.c_str());
4815:
4816: for (unsigned i = 0; i < numdevs; i++) {
4817: const smart_device::device_info & info2 = devices.at(i)->get_info();
4818: // -d TYPE options must match if RAID drive number is specified
4819: if ( info1.dev_name == info2.dev_name
4820: && ( info1.dev_type == info2.dev_type
4821: || !is_raid1 || !is_raid_type(info2.dev_type.c_str())))
4822: return true;
4823: }
4824:
4825: for (unsigned i = 0; i < ignored.size(); i++) {
4826: const dev_config & cfg2 = ignored.at(i);
4827: if ( info1.dev_name == cfg2.dev_name
4828: && ( info1.dev_type == cfg2.dev_type
4829: || !is_raid1 || !is_raid_type(cfg2.dev_type.c_str())))
4830: return true;
4831: }
4832: return false;
4833: }
1.1 misho 4834:
4835: // This function tries devices from conf_entries. Each one that can be
4836: // registered is moved onto the [ata|scsi]devices lists and removed
4837: // from the conf_entries list.
4838: static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4839: dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4840: {
4841: // start by clearing lists/memory of ALL existing devices
4842: configs.clear();
4843: devices.clear();
4844: states.clear();
4845:
4846: // Register entries
1.1.1.3 misho 4847: dev_config_vector ignored_entries;
4848: unsigned numnoscan = 0;
1.1 misho 4849: for (unsigned i = 0; i < conf_entries.size(); i++){
4850:
4851: dev_config cfg = conf_entries[i];
4852:
1.1.1.3 misho 4853: if (cfg.ignore) {
4854: // Store for is_duplicate_device() check and ignore
4855: PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
4856: (!cfg.dev_type.empty() ? " [" : ""),
4857: cfg.dev_type.c_str(),
4858: (!cfg.dev_type.empty() ? "]" : ""));
4859: ignored_entries.push_back(cfg);
4860: continue;
4861: }
4862:
1.1 misho 4863: // get device of appropriate type
4864: smart_device_auto_ptr dev;
4865: bool scanning = false;
4866:
4867: // Device may already be detected during devicescan
4868: if (i < scanned_devs.size()) {
4869: dev = scanned_devs.release(i);
1.1.1.3 misho 4870: if (dev) {
4871: // Check for a preceding non-DEVICESCAN entry for the same device
4872: if ( (numnoscan || !ignored_entries.empty())
4873: && is_duplicate_device(dev.get(), devices, numnoscan, ignored_entries)) {
4874: PrintOut(LOG_INFO, "Device: %s, duplicate, ignored\n", dev->get_info_name());
4875: continue;
4876: }
1.1 misho 4877: scanning = true;
1.1.1.3 misho 4878: }
1.1 misho 4879: }
4880:
4881: if (!dev) {
4882: dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4883: if (!dev) {
4884: if (cfg.dev_type.empty())
4885: PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4886: else
4887: PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4888: continue;
4889: }
4890: }
4891:
4892: // Save old info
4893: smart_device::device_info oldinfo = dev->get_info();
4894:
4895: // Open with autodetect support, may return 'better' device
4896: dev.replace( dev->autodetect_open() );
4897:
4898: // Report if type has changed
4899: if (oldinfo.dev_type != dev->get_dev_type())
4900: PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4901: cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4902:
4903: if (!dev->is_open()) {
4904: // For linux+devfs, a nonexistent device gives a strange error
4905: // message. This makes the error message a bit more sensible.
4906: // If no debug and scanning - don't print errors
4907: if (debugmode || !scanning)
4908: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4909: continue;
4910: }
4911:
4912: // Update informal name
4913: cfg.name = dev->get_info().info_name;
4914: PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4915:
4916: // Prepare initial state
4917: dev_state state;
4918:
4919: // register ATA devices
4920: if (dev->is_ata()){
4921: if (ATADeviceScan(cfg, state, dev->to_ata())) {
4922: CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4923: dev.reset();
4924: }
4925: }
4926: // or register SCSI devices
4927: else if (dev->is_scsi()){
4928: if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4929: CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4930: dev.reset();
4931: }
4932: }
4933: else {
4934: PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4935: dev.reset();
4936: }
4937:
4938: if (dev) {
4939: // move onto the list of devices
4940: configs.push_back(cfg);
4941: states.push_back(state);
4942: devices.push_back(dev);
1.1.1.3 misho 4943: if (!scanning)
4944: numnoscan = devices.size();
1.1 misho 4945: }
4946: // if device is explictly listed and we can't register it, then
4947: // exit unless the user has specified that the device is removable
4948: else if (!scanning) {
4949: if (cfg.removable || quit==2)
4950: PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4951: else {
4952: PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4953: EXIT(EXIT_BADDEV);
4954: }
4955: }
4956: }
1.1.1.2 misho 4957:
4958: init_disable_standby_check(configs);
1.1 misho 4959: }
4960:
4961:
4962: // Main program without exception handling
4963: static int main_worker(int argc, char **argv)
4964: {
4965: // Initialize interface
4966: smart_interface::init();
4967: if (!smi())
4968: return 1;
4969:
4970: // is it our first pass through?
4971: bool firstpass = true;
4972:
4973: // next time to wake up
4974: time_t wakeuptime = 0;
4975:
4976: // parse input and print header and usage info if needed
4977: ParseOpts(argc,argv);
4978:
4979: // Configuration for each device
4980: dev_config_vector configs;
4981: // Device states
4982: dev_state_vector states;
4983: // Devices to monitor
4984: smart_device_list devices;
4985:
4986: bool write_states_always = true;
4987:
4988: #ifdef HAVE_LIBCAP_NG
4989: // Drop capabilities
4990: if (enable_capabilities) {
4991: capng_clear(CAPNG_SELECT_BOTH);
4992: capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
4993: CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
4994: capng_apply(CAPNG_SELECT_BOTH);
4995: }
4996: #endif
4997:
4998: // the main loop of the code
4999: for (;;) {
5000:
5001: // are we exiting from a signal?
5002: if (caughtsigEXIT) {
5003: // are we exiting with SIGTERM?
5004: int isterm=(caughtsigEXIT==SIGTERM);
5005: int isquit=(caughtsigEXIT==SIGQUIT);
5006: int isok=debugmode?isterm || isquit:isterm;
5007:
5008: PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
5009: caughtsigEXIT, strsignal(caughtsigEXIT));
5010:
5011: if (!isok)
5012: return EXIT_SIGNAL;
5013:
5014: // Write state files
5015: if (!state_path_prefix.empty())
5016: write_all_dev_states(configs, states);
5017:
5018: return 0;
5019: }
5020:
5021: // Should we (re)read the config file?
5022: if (firstpass || caughtsigHUP){
5023: if (!firstpass) {
5024: // Write state files
5025: if (!state_path_prefix.empty())
5026: write_all_dev_states(configs, states);
5027:
5028: PrintOut(LOG_INFO,
5029: caughtsigHUP==1?
5030: "Signal HUP - rereading configuration file %s\n":
5031: "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
5032: configfile);
5033: }
5034:
5035: {
5036: dev_config_vector conf_entries; // Entries read from smartd.conf
5037: smart_device_list scanned_devs; // Devices found during scan
5038: // (re)reads config file, makes >=0 entries
5039: int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5040:
5041: if (entries>=0) {
5042: // checks devices, then moves onto ata/scsi list or deallocates.
5043: RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
5044: if (!(configs.size() == devices.size() && configs.size() == states.size()))
5045: throw std::logic_error("Invalid result from RegisterDevices");
5046: }
5047: else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
5048: // user has asked to continue on error in configuration file
5049: if (!firstpass)
5050: PrintOut(LOG_INFO,"Reusing previous configuration\n");
5051: }
5052: else {
5053: // exit with configuration file error status
5054: return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
5055: }
5056: }
5057:
5058: // Log number of devices we are monitoring...
5059: if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
5060: int numata = 0;
5061: for (unsigned i = 0; i < devices.size(); i++) {
5062: if (devices.at(i)->is_ata())
5063: numata++;
5064: }
5065: PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
5066: numata, devices.size() - numata);
5067: }
5068: else {
5069: PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
5070: return EXIT_NODEV;
5071: }
5072:
5073: if (quit==4) {
5074: // user has asked to print test schedule
5075: PrintTestSchedule(configs, states, devices);
5076: return 0;
5077: }
5078:
5079: #ifdef HAVE_LIBCAP_NG
5080: if (enable_capabilities) {
5081: for (unsigned i = 0; i < configs.size(); i++) {
5082: if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
5083: PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
5084: break;
5085: }
5086: }
5087: }
5088: #endif
5089:
5090: // reset signal
5091: caughtsigHUP=0;
5092:
5093: // Always write state files after (re)configuration
5094: write_states_always = true;
5095: }
5096:
5097: // check all devices once,
5098: // self tests are not started in first pass unless '-q onecheck' is specified
5099: CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit==3));
5100:
5101: // Write state files
5102: if (!state_path_prefix.empty())
5103: write_all_dev_states(configs, states, write_states_always);
5104: write_states_always = false;
5105:
5106: // Write attribute logs
5107: if (!attrlog_path_prefix.empty())
5108: write_all_dev_attrlogs(configs, states);
5109:
5110: // user has asked us to exit after first check
5111: if (quit==3) {
5112: PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
5113: "smartd is exiting (exit status 0)\n");
5114: return 0;
5115: }
5116:
5117: // fork into background if needed
5118: if (firstpass && !debugmode) {
5119: DaemonInit();
5120: }
5121:
5122: // set exit and signal handlers, write PID file, set wake-up time
5123: if (firstpass){
5124: Initialize(&wakeuptime);
5125: firstpass = false;
5126: }
5127:
5128: // sleep until next check time, or a signal arrives
5129: wakeuptime = dosleep(wakeuptime, write_states_always);
5130: }
5131: }
5132:
5133:
5134: #ifndef _WIN32
5135: // Main program
5136: int main(int argc, char **argv)
5137: #else
5138: // Windows: internal main function started direct or by service control manager
5139: static int smartd_main(int argc, char **argv)
5140: #endif
5141: {
5142: int status;
5143: try {
5144: // Do the real work ...
5145: status = main_worker(argc, argv);
5146: }
5147: catch (int ex) {
5148: // EXIT(status) arrives here
5149: status = ex;
5150: }
5151: catch (const std::bad_alloc & /*ex*/) {
5152: // Memory allocation failed (also thrown by std::operator new)
5153: PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5154: status = EXIT_NOMEM;
5155: }
5156: catch (const std::exception & ex) {
5157: // Other fatal errors
5158: PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5159: status = EXIT_BADCODE;
5160: }
5161:
5162: if (is_initialized)
5163: status = Goodbye(status);
5164:
5165: #ifdef _WIN32
5166: daemon_winsvc_exitcode = status;
5167: #endif
5168: return status;
5169: }
5170:
5171:
5172: #ifdef _WIN32
5173: // Main function for Windows
5174: int main(int argc, char **argv){
5175: // Options for smartd windows service
5176: static const daemon_winsvc_options svc_opts = {
5177: "--service", // cmd_opt
5178: "smartd", "SmartD Service", // servicename, displayname
5179: // description
5180: "Controls and monitors storage devices using the Self-Monitoring, "
5181: "Analysis and Reporting Technology System (S.M.A.R.T.) "
5182: "built into ATA and SCSI Hard Drives. "
5183: PACKAGE_HOMEPAGE
5184: };
5185: // daemon_main() handles daemon and service specific commands
5186: // and starts smartd_main() direct, from a new process,
5187: // or via service control manager
5188: return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5189: }
5190: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>