Annotation of embedaddon/smartmontools/smartd.cpp, revision 1.1.1.3
1.1 misho 1: /*
2: * Home page of code is: http://smartmontools.sourceforge.net
3: *
4: * Copyright (C) 2002-11 Bruce Allen <smartmontools-support@lists.sourceforge.net>
5: * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
6: * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
1.1.1.3 ! misho 7: * Copyright (C) 2008-13 Christian Franke <smartmontools-support@lists.sourceforge.net>
1.1 misho 8: *
9: * This program is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU General Public License as published by
11: * the Free Software Foundation; either version 2, or (at your option)
12: * any later version.
13: *
14: * You should have received a copy of the GNU General Public License
15: * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16: *
17: * This code was originally developed as a Senior Thesis by Michael Cornwell
18: * at the Concurrent Systems Laboratory (now part of the Storage Systems
19: * Research Center), Jack Baskin School of Engineering, University of
20: * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21: *
22: */
23:
24: // unconditionally included files
25: #include <stdio.h>
26: #include <sys/types.h>
27: #include <sys/stat.h> // umask
28: #include <signal.h>
29: #include <fcntl.h>
30: #include <string.h>
31: #include <syslog.h>
32: #include <stdarg.h>
33: #include <stdlib.h>
34: #include <errno.h>
35: #include <time.h>
36: #include <limits.h>
37: #include <getopt.h>
38:
39: #include <stdexcept>
40: #include <string>
41: #include <vector>
42: #include <algorithm> // std::replace()
43:
44: // see which system files to conditionally include
45: #include "config.h"
46:
47: // conditionally included files
48: #ifndef _WIN32
49: #include <sys/wait.h>
50: #endif
51: #ifdef HAVE_UNISTD_H
52: #include <unistd.h>
53: #endif
54: #ifdef HAVE_NETDB_H
55: #include <netdb.h>
56: #endif
57:
58: #ifdef _WIN32
59: #ifdef _MSC_VER
60: #pragma warning(disable:4761) // "conversion supplied"
61: typedef unsigned short mode_t;
62: typedef int pid_t;
63: #endif
64: #include <io.h> // umask()
65: #include <process.h> // getpid()
66: #endif // _WIN32
67:
68: #ifdef __CYGWIN__
69: #include <io.h> // setmode()
70: #endif // __CYGWIN__
71:
72: #ifdef HAVE_LIBCAP_NG
73: #include <cap-ng.h>
74: #endif // LIBCAP_NG
75:
76: // locally included files
77: #include "int64.h"
78: #include "atacmds.h"
79: #include "dev_interface.h"
80: #include "knowndrives.h"
81: #include "scsicmds.h"
82: #include "utility.h"
83:
84: // This is for solaris, where signal() resets the handler to SIG_DFL
85: // after the first signal is caught.
86: #ifdef HAVE_SIGSET
87: #define SIGNALFN sigset
88: #else
89: #define SIGNALFN signal
90: #endif
91:
92: #ifdef _WIN32
93: // fork()/signal()/initd simulation for native Windows
94: #include "daemon_win32.h" // daemon_main/detach/signal()
95: #undef SIGNALFN
96: #define SIGNALFN daemon_signal
97: #define strsignal daemon_strsignal
98: #define sleep daemon_sleep
99: // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
100: #define SIGQUIT SIGBREAK
101: #define SIGQUIT_KEYNAME "CONTROL-Break"
102: #else // _WIN32
103: #define SIGQUIT_KEYNAME "CONTROL-\\"
104: #endif // _WIN32
105:
106: #if defined (__SVR4) && defined (__sun)
107: extern "C" int getdomainname(char *, int); // no declaration in header files!
108: #endif
109:
1.1.1.3 ! misho 110: const char * smartd_cpp_cvsid = "$Id: smartd.cpp 3778 2013-02-21 23:55:35Z dpgilbert $"
1.1 misho 111: CONFIG_H_CVSID;
112:
113: // smartd exit codes
114: #define EXIT_BADCMD 1 // command line did not parse
115: #define EXIT_BADCONF 2 // syntax error in config file
116: #define EXIT_STARTUP 3 // problem forking daemon
117: #define EXIT_PID 4 // problem creating pid file
118: #define EXIT_NOCONF 5 // config file does not exist
119: #define EXIT_READCONF 6 // config file exists but cannot be read
120:
121: #define EXIT_NOMEM 8 // out of memory
122: #define EXIT_BADCODE 10 // internal error - should NEVER happen
123:
124: #define EXIT_BADDEV 16 // we can't monitor this device
125: #define EXIT_NODEV 17 // no devices to monitor
126:
127: #define EXIT_SIGNAL 254 // abort on signal
128:
129:
130: // command-line: 1=debug mode, 2=print presets
131: static unsigned char debugmode = 0;
132:
133: // command-line: how long to sleep between checks
134: #define CHECKTIME 1800
135: static int checktime=CHECKTIME;
136:
137: // command-line: name of PID file (empty for no pid file)
138: static std::string pid_file;
139:
140: // command-line: path prefix of persistent state file, empty if no persistence.
141: static std::string state_path_prefix
142: #ifdef SMARTMONTOOLS_SAVESTATES
143: = SMARTMONTOOLS_SAVESTATES
144: #endif
145: ;
146:
147: // command-line: path prefix of attribute log file, empty if no logs.
148: static std::string attrlog_path_prefix
149: #ifdef SMARTMONTOOLS_ATTRIBUTELOG
150: = SMARTMONTOOLS_ATTRIBUTELOG
151: #endif
152: ;
153:
154: // configuration file name
155: static const char * configfile;
156: // configuration file "name" if read from stdin
157: static const char * const configfile_stdin = "<stdin>";
158: // path of alternate configuration file
159: static std::string configfile_alt;
160:
1.1.1.3 ! misho 161: // warning script file
! 162: static std::string warning_script;
! 163:
1.1 misho 164: // command-line: when should we exit?
165: static int quit=0;
166:
167: // command-line; this is the default syslog(3) log facility to use.
168: static int facility=LOG_DAEMON;
169:
170: #ifndef _WIN32
171: // command-line: fork into background?
172: static bool do_fork=true;
173: #endif
174:
175: #ifdef HAVE_LIBCAP_NG
176: // command-line: enable capabilities?
177: static bool enable_capabilities = false;
178: #endif
179:
180: // TODO: This smartctl only variable is also used in os_win32.cpp
181: unsigned char failuretest_permissive = 0;
182:
183: // set to one if we catch a USR1 (check devices now)
184: static volatile int caughtsigUSR1=0;
185:
186: #ifdef _WIN32
187: // set to one if we catch a USR2 (toggle debug mode)
188: static volatile int caughtsigUSR2=0;
189: #endif
190:
191: // set to one if we catch a HUP (reload config file). In debug mode,
192: // set to two, if we catch INT (also reload config file).
193: static volatile int caughtsigHUP=0;
194:
195: // set to signal value if we catch INT, QUIT, or TERM
196: static volatile int caughtsigEXIT=0;
197:
198: // This function prints either to stdout or to the syslog as needed.
199: static void PrintOut(int priority, const char *fmt, ...)
1.1.1.2 misho 200: __attribute_format_printf(2, 3);
1.1 misho 201:
202: // Attribute monitoring flags.
203: // See monitor_attr_flags below.
204: enum {
205: MONITOR_IGN_FAILUSE = 0x01,
206: MONITOR_IGNORE = 0x02,
207: MONITOR_RAW_PRINT = 0x04,
208: MONITOR_RAW = 0x08,
209: MONITOR_AS_CRIT = 0x10,
210: MONITOR_RAW_AS_CRIT = 0x20,
211: };
212:
213: // Array of flags for each attribute.
214: class attribute_flags
215: {
216: public:
217: attribute_flags()
218: { memset(m_flags, 0, sizeof(m_flags)); }
219:
220: bool is_set(int id, unsigned char flag) const
221: { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
222:
223: void set(int id, unsigned char flags)
224: {
225: if (0 < id && id < (int)sizeof(m_flags))
226: m_flags[id] |= flags;
227: }
228:
229: private:
230: unsigned char m_flags[256];
231: };
232:
233:
234: /// Configuration data for a device. Read from smartd.conf.
235: /// Supports copy & assignment and is compatible with STL containers.
236: struct dev_config
237: {
238: int lineno; // Line number of entry in file
239: std::string name; // Device name (with optional extra info)
240: std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
241: std::string dev_type; // Device type argument from -d directive, empty if none
1.1.1.3 ! misho 242: std::string dev_idinfo; // Device identify info for warning emails
1.1 misho 243: std::string state_file; // Path of the persistent state file, empty if none
244: std::string attrlog_file; // Path of the persistent attrlog file, empty if none
1.1.1.3 ! misho 245: bool ignore; // Ignore this entry
1.1 misho 246: bool smartcheck; // Check SMART status
247: bool usagefailed; // Check for failed Usage Attributes
248: bool prefail; // Track changes in Prefail Attributes
249: bool usage; // Track changes in Usage Attributes
250: bool selftest; // Monitor number of selftest errors
251: bool errorlog; // Monitor number of ATA errors
252: bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
253: bool offlinests; // Monitor changes in offline data collection status
1.1.1.2 misho 254: bool offlinests_ns; // Disable auto standby if in progress
1.1 misho 255: bool selfteststs; // Monitor changes in self-test execution status
1.1.1.2 misho 256: bool selfteststs_ns; // Disable auto standby if in progress
1.1 misho 257: bool permissive; // Ignore failed SMART commands
258: char autosave; // 1=disable, 2=enable Autosave Attributes
259: char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
1.1.1.3 ! misho 260: firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
1.1 misho 261: bool ignorepresets; // Ignore database of -v options
262: bool showpresets; // Show database entry for this device
263: bool removable; // Device may disappear (not be present)
264: char powermode; // skip check, if disk in idle or standby mode
265: bool powerquiet; // skip powermode 'skipping checks' message
266: int powerskipmax; // how many times can be check skipped
267: unsigned char tempdiff; // Track Temperature changes >= this limit
268: unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
269: regular_expression test_regex; // Regex for scheduled testing
270:
271: // Configuration of email warning messages
272: std::string emailcmdline; // script to execute, empty if no messages
273: std::string emailaddress; // email address, or empty
274: unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
275: bool emailtest; // Send test email?
276:
277: // ATA ONLY
1.1.1.3 ! misho 278: int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
1.1.1.2 misho 279: int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
280: int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
281: int set_lookahead; // disable(-1), enable(1) read look-ahead
282: int set_standby; // set(1..255->0..254) standby timer
283: bool set_security_freeze; // Freeze ATA security
284: int set_wcache; // disable(-1), enable(1) write cache
285:
1.1 misho 286: bool sct_erc_set; // set SCT ERC to:
287: unsigned short sct_erc_readtime; // ERC read time (deciseconds)
288: unsigned short sct_erc_writetime; // ERC write time (deciseconds)
289:
290: unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
291: unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
292: bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
293: bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
294:
295: attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
296:
297: ata_vendor_attr_defs attribute_defs; // -v options
298:
299: dev_config();
300: };
301:
302: dev_config::dev_config()
303: : lineno(0),
1.1.1.3 ! misho 304: ignore(false),
1.1 misho 305: smartcheck(false),
306: usagefailed(false),
307: prefail(false),
308: usage(false),
309: selftest(false),
310: errorlog(false),
311: xerrorlog(false),
1.1.1.2 misho 312: offlinests(false), offlinests_ns(false),
313: selfteststs(false), selfteststs_ns(false),
1.1 misho 314: permissive(false),
315: autosave(0),
316: autoofflinetest(0),
317: ignorepresets(false),
318: showpresets(false),
319: removable(false),
320: powermode(0),
321: powerquiet(false),
322: powerskipmax(0),
323: tempdiff(0),
324: tempinfo(0), tempcrit(0),
325: emailfreq(0),
326: emailtest(false),
1.1.1.3 ! misho 327: dev_rpm(0),
1.1.1.2 misho 328: set_aam(0), set_apm(0),
329: set_lookahead(0),
330: set_standby(0),
331: set_security_freeze(false),
332: set_wcache(0),
1.1 misho 333: sct_erc_set(false),
334: sct_erc_readtime(0), sct_erc_writetime(0),
335: curr_pending_id(0), offl_pending_id(0),
336: curr_pending_incr(false), offl_pending_incr(false),
337: curr_pending_set(false), offl_pending_set(false)
338: {
339: }
340:
341:
342: // Number of allowed mail message types
343: static const int SMARTD_NMAIL = 13;
344: // Type for '-M test' mails (state not persistent)
345: static const int MAILTYPE_TEST = 0;
346: // TODO: Add const or enum for all mail types.
347:
348: struct mailinfo {
349: int logged;// number of times an email has been sent
350: time_t firstsent;// time first email was sent, as defined by time(2)
351: time_t lastsent; // time last email was sent, as defined by time(2)
352:
353: mailinfo()
354: : logged(0), firstsent(0), lastsent(0) { }
355: };
356:
357: /// Persistent state data for a device.
358: struct persistent_dev_state
359: {
360: unsigned char tempmin, tempmax; // Min/Max Temperatures
361:
362: unsigned char selflogcount; // total number of self-test errors
363: unsigned short selfloghour; // lifetime hours of last self-test error
364:
365: time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
366:
367: uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
368: uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
369:
370: mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
371:
372: // ATA ONLY
373: int ataerrorcount; // Total number of ATA errors
374:
375: // Persistent part of ata_smart_values:
376: struct ata_attribute {
377: unsigned char id;
378: unsigned char val;
379: unsigned char worst; // Byte needed for 'raw64' attribute only.
380: uint64_t raw;
381: unsigned char resvd;
382:
383: ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
384: };
385: ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
1.1.1.3 ! misho 386:
! 387: // SCSI ONLY
! 388:
! 389: struct scsi_error_counter {
! 390: struct scsiErrorCounter errCounter;
! 391: unsigned char found;
! 392: scsi_error_counter() : found(0) { }
! 393: };
! 394: scsi_error_counter scsi_error_counters[3];
! 395:
! 396: struct scsi_nonmedium_error {
! 397: struct scsiNonMediumError nme;
! 398: unsigned char found;
! 399: scsi_nonmedium_error() : found(0) { }
! 400: };
! 401: scsi_nonmedium_error scsi_nonmedium_error;
1.1 misho 402:
403: persistent_dev_state();
404: };
405:
406: persistent_dev_state::persistent_dev_state()
407: : tempmin(0), tempmax(0),
408: selflogcount(0),
409: selfloghour(0),
410: scheduled_test_next_check(0),
411: selective_test_last_start(0),
412: selective_test_last_end(0),
413: ataerrorcount(0)
414: {
415: }
416:
417: /// Non-persistent state data for a device.
418: struct temp_dev_state
419: {
420: bool must_write; // true if persistent part should be written
421:
422: bool not_cap_offline; // true == not capable of offline testing
423: bool not_cap_conveyance;
424: bool not_cap_short;
425: bool not_cap_long;
426: bool not_cap_selective;
427:
428: unsigned char temperature; // last recorded Temperature (in Celsius)
429: time_t tempmin_delay; // time where Min Temperature tracking will start
430:
431: bool powermodefail; // true if power mode check failed
432: int powerskipcnt; // Number of checks skipped due to idle or standby mode
433:
434: // SCSI ONLY
435: unsigned char SmartPageSupported; // has log sense IE page (0x2f)
436: unsigned char TempPageSupported; // has log sense temperature page (0xd)
1.1.1.3 ! misho 437: unsigned char ReadECounterPageSupported;
! 438: unsigned char WriteECounterPageSupported;
! 439: unsigned char VerifyECounterPageSupported;
! 440: unsigned char NonMediumErrorPageSupported;
1.1 misho 441: unsigned char SuppressReport; // minimize nuisance reports
442: unsigned char modese_len; // mode sense/select cmd len: 0 (don't
443: // know yet) 6 or 10
444: // ATA ONLY
445: uint64_t num_sectors; // Number of sectors
446: ata_smart_values smartval; // SMART data
447: ata_smart_thresholds_pvt smartthres; // SMART thresholds
1.1.1.2 misho 448: bool offline_started; // true if offline data collection was started
449: bool selftest_started; // true if self-test was started
1.1 misho 450:
451: temp_dev_state();
452: };
453:
454: temp_dev_state::temp_dev_state()
455: : must_write(false),
456: not_cap_offline(false),
457: not_cap_conveyance(false),
458: not_cap_short(false),
459: not_cap_long(false),
460: not_cap_selective(false),
461: temperature(0),
462: tempmin_delay(0),
463: powermodefail(false),
464: powerskipcnt(0),
465: SmartPageSupported(false),
466: TempPageSupported(false),
1.1.1.3 ! misho 467: ReadECounterPageSupported(false),
! 468: WriteECounterPageSupported(false),
! 469: VerifyECounterPageSupported(false),
! 470: NonMediumErrorPageSupported(false),
1.1 misho 471: SuppressReport(false),
472: modese_len(0),
1.1.1.2 misho 473: num_sectors(0),
474: offline_started(false),
475: selftest_started(false)
1.1 misho 476: {
477: memset(&smartval, 0, sizeof(smartval));
478: memset(&smartthres, 0, sizeof(smartthres));
479: }
480:
481: /// Runtime state data for a device.
482: struct dev_state
483: : public persistent_dev_state,
484: public temp_dev_state
485: {
486: void update_persistent_state();
487: void update_temp_state();
488: };
489:
490: /// Container for configuration info for each device.
491: typedef std::vector<dev_config> dev_config_vector;
492:
493: /// Container for state info for each device.
494: typedef std::vector<dev_state> dev_state_vector;
495:
496: // Copy ATA attributes to persistent state.
497: void dev_state::update_persistent_state()
498: {
499: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
500: const ata_smart_attribute & ta = smartval.vendor_attributes[i];
501: ata_attribute & pa = ata_attributes[i];
502: pa.id = ta.id;
503: if (ta.id == 0) {
504: pa.val = pa.worst = 0; pa.raw = 0;
505: continue;
506: }
507: pa.val = ta.current;
508: pa.worst = ta.worst;
509: pa.raw = ta.raw[0]
510: | ( ta.raw[1] << 8)
511: | ( ta.raw[2] << 16)
512: | ((uint64_t)ta.raw[3] << 24)
513: | ((uint64_t)ta.raw[4] << 32)
514: | ((uint64_t)ta.raw[5] << 40);
515: pa.resvd = ta.reserv;
516: }
517: }
518:
519: // Copy ATA from persistent to temp state.
520: void dev_state::update_temp_state()
521: {
522: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
523: const ata_attribute & pa = ata_attributes[i];
524: ata_smart_attribute & ta = smartval.vendor_attributes[i];
525: ta.id = pa.id;
526: if (pa.id == 0) {
527: ta.current = ta.worst = 0;
528: memset(ta.raw, 0, sizeof(ta.raw));
529: continue;
530: }
531: ta.current = pa.val;
532: ta.worst = pa.worst;
533: ta.raw[0] = (unsigned char) pa.raw;
534: ta.raw[1] = (unsigned char)(pa.raw >> 8);
535: ta.raw[2] = (unsigned char)(pa.raw >> 16);
536: ta.raw[3] = (unsigned char)(pa.raw >> 24);
537: ta.raw[4] = (unsigned char)(pa.raw >> 32);
538: ta.raw[5] = (unsigned char)(pa.raw >> 40);
539: ta.reserv = pa.resvd;
540: }
541: }
542:
543: // Parse a line from a state file.
544: static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
545: {
546: static const regular_expression regex(
547: "^ *"
548: "((temperature-min)" // (1 (2)
549: "|(temperature-max)" // (3)
550: "|(self-test-errors)" // (4)
551: "|(self-test-last-err-hour)" // (5)
552: "|(scheduled-test-next-check)" // (6)
553: "|(selective-test-last-start)" // (7)
554: "|(selective-test-last-end)" // (8)
555: "|(ata-error-count)" // (9)
556: "|(mail\\.([0-9]+)\\." // (10 (11)
557: "((count)" // (12 (13)
558: "|(first-sent-time)" // (14)
559: "|(last-sent-time)" // (15)
560: ")" // 12)
561: ")" // 10)
562: "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
563: "((id)" // (18 (19)
564: "|(val)" // (20)
565: "|(worst)" // (21)
566: "|(raw)" // (22)
567: "|(resvd)" // (23)
568: ")" // 18)
569: ")" // 16)
570: ")" // 1)
571: " *= *([0-9]+)[ \n]*$", // (24)
572: REG_EXTENDED
573: );
574:
575: const int nmatch = 1+24;
576: regmatch_t match[nmatch];
577: if (!regex.execute(line, nmatch, match))
578: return false;
579: if (match[nmatch-1].rm_so < 0)
580: return false;
581:
582: uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
583:
584: int m = 1;
585: if (match[++m].rm_so >= 0)
586: state.tempmin = (unsigned char)val;
587: else if (match[++m].rm_so >= 0)
588: state.tempmax = (unsigned char)val;
589: else if (match[++m].rm_so >= 0)
590: state.selflogcount = (unsigned char)val;
591: else if (match[++m].rm_so >= 0)
592: state.selfloghour = (unsigned short)val;
593: else if (match[++m].rm_so >= 0)
594: state.scheduled_test_next_check = (time_t)val;
595: else if (match[++m].rm_so >= 0)
596: state.selective_test_last_start = val;
597: else if (match[++m].rm_so >= 0)
598: state.selective_test_last_end = val;
599: else if (match[++m].rm_so >= 0)
600: state.ataerrorcount = (int)val;
601: else if (match[m+=2].rm_so >= 0) {
602: int i = atoi(line+match[m].rm_so);
603: if (!(0 <= i && i < SMARTD_NMAIL))
604: return false;
605: if (i == MAILTYPE_TEST) // Don't suppress test mails
606: return true;
607: if (match[m+=2].rm_so >= 0)
608: state.maillog[i].logged = (int)val;
609: else if (match[++m].rm_so >= 0)
610: state.maillog[i].firstsent = (time_t)val;
611: else if (match[++m].rm_so >= 0)
612: state.maillog[i].lastsent = (time_t)val;
613: else
614: return false;
615: }
616: else if (match[m+=5+1].rm_so >= 0) {
617: int i = atoi(line+match[m].rm_so);
618: if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
619: return false;
620: if (match[m+=2].rm_so >= 0)
621: state.ata_attributes[i].id = (unsigned char)val;
622: else if (match[++m].rm_so >= 0)
623: state.ata_attributes[i].val = (unsigned char)val;
624: else if (match[++m].rm_so >= 0)
625: state.ata_attributes[i].worst = (unsigned char)val;
626: else if (match[++m].rm_so >= 0)
627: state.ata_attributes[i].raw = val;
628: else if (match[++m].rm_so >= 0)
629: state.ata_attributes[i].resvd = (unsigned char)val;
630: else
631: return false;
632: }
633: else
634: return false;
635: return true;
636: }
637:
638: // Read a state file.
639: static bool read_dev_state(const char * path, persistent_dev_state & state)
640: {
641: stdio_file f(path, "r");
642: if (!f) {
643: if (errno != ENOENT)
644: pout("Cannot read state file \"%s\"\n", path);
645: return false;
646: }
647: #ifdef __CYGWIN__
648: setmode(fileno(f), O_TEXT); // Allow files with \r\n
649: #endif
650:
651: persistent_dev_state new_state;
652: int good = 0, bad = 0;
653: char line[256];
654: while (fgets(line, sizeof(line), f)) {
655: const char * s = line + strspn(line, " \t");
656: if (!*s || *s == '#')
657: continue;
658: if (!parse_dev_state_line(line, new_state))
659: bad++;
660: else
661: good++;
662: }
663:
664: if (bad) {
665: if (!good) {
666: pout("%s: format error\n", path);
667: return false;
668: }
669: pout("%s: %d invalid line(s) ignored\n", path, bad);
670: }
671:
672: // This sets the values missing in the file to 0.
673: state = new_state;
674: return true;
675: }
676:
677: static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
678: {
679: if (val)
680: fprintf(f, "%s = %"PRIu64"\n", name, val);
681: }
682:
683: static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
684: {
685: if (val)
686: fprintf(f, "%s.%d.%s = %"PRIu64"\n", name1, id, name2, val);
687: }
688:
689: // Write a state file
690: static bool write_dev_state(const char * path, const persistent_dev_state & state)
691: {
692: // Rename old "file" to "file~"
693: std::string pathbak = path; pathbak += '~';
694: unlink(pathbak.c_str());
695: rename(path, pathbak.c_str());
696:
697: stdio_file f(path, "w");
698: if (!f) {
699: pout("Cannot create state file \"%s\"\n", path);
700: return false;
701: }
702:
703: fprintf(f, "# smartd state file\n");
704: write_dev_state_line(f, "temperature-min", state.tempmin);
705: write_dev_state_line(f, "temperature-max", state.tempmax);
706: write_dev_state_line(f, "self-test-errors", state.selflogcount);
707: write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
708: write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
709: write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
710: write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
711:
712: int i;
713: for (i = 0; i < SMARTD_NMAIL; i++) {
714: if (i == MAILTYPE_TEST) // Don't suppress test mails
715: continue;
716: const mailinfo & mi = state.maillog[i];
717: if (!mi.logged)
718: continue;
719: write_dev_state_line(f, "mail", i, "count", mi.logged);
720: write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
721: write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
722: }
723:
724: // ATA ONLY
725: write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
726:
727: for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
728: const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
729: if (!pa.id)
730: continue;
731: write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
732: write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
733: write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
734: write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
735: write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
736: }
737:
738: return true;
739: }
740:
741: // Write to the attrlog file
1.1.1.3 ! misho 742: static bool write_dev_attrlog(const char * path, const dev_state & state)
1.1 misho 743: {
744: stdio_file f(path, "a");
745: if (!f) {
746: pout("Cannot create attribute log file \"%s\"\n", path);
747: return false;
748: }
749:
1.1.1.3 ! misho 750:
1.1 misho 751: time_t now = time(0);
752: struct tm * tms = gmtime(&now);
753: fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
754: 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
755: tms->tm_hour, tms->tm_min, tms->tm_sec);
1.1.1.3 ! misho 756: // ATA ONLY
1.1 misho 757: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
758: const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
759: if (!pa.id)
760: continue;
761: fprintf(f, "\t%d;%d;%"PRIu64";", pa.id, pa.val, pa.raw);
762: }
1.1.1.3 ! misho 763: // SCSI ONLY
! 764: const struct scsiErrorCounter * ecp;
! 765: const char * pageNames[3] = {"read", "write", "verify"};
! 766: for (int k = 0; k < 3; ++k) {
! 767: if ( !state.scsi_error_counters[k].found ) continue;
! 768: ecp = &state.scsi_error_counters[k].errCounter;
! 769: fprintf(f, "\t%s-corr-by-ecc-fast;%"PRIu64";"
! 770: "\t%s-corr-by-ecc-delayed;%"PRIu64";"
! 771: "\t%s-corr-by-retry;%"PRIu64";"
! 772: "\t%s-total-err-corrected;%"PRIu64";"
! 773: "\t%s-corr-algorithm-invocations;%"PRIu64";"
! 774: "\t%s-gb-processed;%.3f;"
! 775: "\t%s-total-unc-errors;%"PRIu64";",
! 776: pageNames[k], ecp->counter[0],
! 777: pageNames[k], ecp->counter[1],
! 778: pageNames[k], ecp->counter[2],
! 779: pageNames[k], ecp->counter[3],
! 780: pageNames[k], ecp->counter[4],
! 781: pageNames[k], (ecp->counter[5] / 1000000000.0),
! 782: pageNames[k], ecp->counter[6]);
! 783: }
! 784: if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
! 785: fprintf(f, "\tnon-medium-errors;%"PRIu64";", state.scsi_nonmedium_error.nme.counterPC0);
! 786: }
! 787: // write SCSI current temperature if it is monitored
! 788: if(state.TempPageSupported && state.temperature)
! 789: fprintf(f, "\ttemperature;%d;", state.temperature);
! 790: // end of line
1.1 misho 791: fprintf(f, "\n");
792: return true;
793: }
794:
795: // Write all state files. If write_always is false, don't write
796: // unless must_write is set.
797: static void write_all_dev_states(const dev_config_vector & configs,
798: dev_state_vector & states,
799: bool write_always = true)
800: {
801: for (unsigned i = 0; i < states.size(); i++) {
802: const dev_config & cfg = configs.at(i);
803: if (cfg.state_file.empty())
804: continue;
805: dev_state & state = states[i];
806: if (!write_always && !state.must_write)
807: continue;
808: if (!write_dev_state(cfg.state_file.c_str(), state))
809: continue;
810: state.must_write = false;
811: if (write_always || debugmode)
812: PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
813: cfg.name.c_str(), cfg.state_file.c_str());
814: }
815: }
816:
817: // Write to all attrlog files
818: static void write_all_dev_attrlogs(const dev_config_vector & configs,
819: dev_state_vector & states)
820: {
821: for (unsigned i = 0; i < states.size(); i++) {
822: const dev_config & cfg = configs.at(i);
823: if (cfg.attrlog_file.empty())
824: continue;
825: dev_state & state = states[i];
826: write_dev_attrlog(cfg.attrlog_file.c_str(), state);
827: }
828: }
829:
830: // remove the PID file
831: static void RemovePidFile()
832: {
833: if (!pid_file.empty()) {
834: if (unlink(pid_file.c_str()))
835: PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
836: pid_file.c_str(), strerror(errno));
837: pid_file.clear();
838: }
839: return;
840: }
841:
842: extern "C" { // signal handlers require C-linkage
843:
844: // Note if we catch a SIGUSR1
845: static void USR1handler(int sig)
846: {
847: if (SIGUSR1==sig)
848: caughtsigUSR1=1;
849: return;
850: }
851:
852: #ifdef _WIN32
853: // Note if we catch a SIGUSR2
854: static void USR2handler(int sig)
855: {
856: if (SIGUSR2==sig)
857: caughtsigUSR2=1;
858: return;
859: }
860: #endif
861:
862: // Note if we catch a HUP (or INT in debug mode)
863: static void HUPhandler(int sig)
864: {
865: if (sig==SIGHUP)
866: caughtsigHUP=1;
867: else
868: caughtsigHUP=2;
869: return;
870: }
871:
872: // signal handler for TERM, QUIT, and INT (if not in debug mode)
873: static void sighandler(int sig)
874: {
875: if (!caughtsigEXIT)
876: caughtsigEXIT=sig;
877: return;
878: }
879:
880: } // extern "C"
881:
882: // Cleanup, print Goodbye message and remove pidfile
883: static int Goodbye(int status)
884: {
885: // delete PID file, if one was created
886: RemovePidFile();
887:
888: // if we are exiting because of a code bug, tell user
889: if (status==EXIT_BADCODE)
890: PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
891:
892: // and this should be the final output from smartd before it exits
893: PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
894:
895: return status;
896: }
897:
898: // a replacement for setenv() which is not available on all platforms.
899: // Note that the string passed to putenv must not be freed or made
900: // invalid, since a pointer to it is kept by putenv(). This means that
901: // it must either be a static buffer or allocated off the heap. The
1.1.1.3 ! misho 902: // string can be freed if the environment variable is redefined via
! 903: // another call to putenv(). There is no portable way to unset a variable
! 904: // with putenv(). So we manage the buffer in a static object.
! 905: // Using setenv() if available is not considered because some
! 906: // implementations may produce memory leaks.
! 907:
! 908: class env_buffer
! 909: {
! 910: public:
! 911: env_buffer()
! 912: : m_buf((char *)0) { }
! 913:
! 914: void set(const char * name, const char * value);
! 915:
! 916: private:
! 917: char * m_buf;
! 918:
! 919: env_buffer(const env_buffer &);
! 920: void operator=(const env_buffer &);
! 921: };
! 922:
! 923: void env_buffer::set(const char * name, const char * value)
! 924: {
! 925: int size = strlen(name) + 1 + strlen(value) + 1;
! 926: char * newbuf = new char[size];
! 927: snprintf(newbuf, size, "%s=%s", name, value);
! 928:
! 929: if (putenv(newbuf))
! 930: throw std::runtime_error("putenv() failed");
! 931:
! 932: // This assumes that the same NAME is passed on each call
! 933: delete [] m_buf;
! 934: m_buf = newbuf;
1.1 misho 935: }
936:
937: #define EBUFLEN 1024
938:
939: static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1.1.1.2 misho 940: __attribute_format_printf(4, 5);
1.1 misho 941:
942: // If either address or executable path is non-null then send and log
943: // a warning email, or execute executable
1.1.1.3 ! misho 944: static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
! 945: {
! 946: static const char * const whichfail[] = {
1.1 misho 947: "EmailTest", // 0
948: "Health", // 1
949: "Usage", // 2
950: "SelfTest", // 3
951: "ErrorCount", // 4
952: "FailedHealthCheck", // 5
953: "FailedReadSmartData", // 6
954: "FailedReadSmartErrorLog", // 7
955: "FailedReadSmartSelfTestLog", // 8
956: "FailedOpenDevice", // 9
957: "CurrentPendingSector", // 10
958: "OfflineUncorrectableSector", // 11
959: "Temperature" // 12
960: };
961:
962: // See if user wants us to send mail
963: if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
964: return;
965:
966: std::string address = cfg.emailaddress;
967: const char * executable = cfg.emailcmdline.c_str();
968:
969: // which type of mail are we sending?
970: mailinfo * mail=(state.maillog)+which;
971:
972: // checks for sanity
973: if (cfg.emailfreq<1 || cfg.emailfreq>3) {
974: PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
975: return;
976: }
977: if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
978: PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
979: which, (int)sizeof(whichfail));
980: return;
981: }
982:
983: // Return if a single warning mail has been sent.
984: if ((cfg.emailfreq==1) && mail->logged)
985: return;
986:
987: // Return if this is an email test and one has already been sent.
988: if (which == 0 && mail->logged)
989: return;
990:
991: // To decide if to send mail, we need to know what time it is.
1.1.1.3 ! misho 992: time_t epoch = time(0);
1.1 misho 993:
994: // Return if less than one day has gone by
1.1.1.3 ! misho 995: const int day = 24*3600;
1.1 misho 996: if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
997: return;
998:
999: // Return if less than 2^(logged-1) days have gone by
1000: if (cfg.emailfreq==3 && mail->logged) {
1.1.1.3 ! misho 1001: int days = 0x01 << (mail->logged - 1);
1.1 misho 1002: days*=day;
1003: if (epoch<(mail->lastsent+days))
1004: return;
1005: }
1006:
1007: #ifdef HAVE_LIBCAP_NG
1008: if (enable_capabilities) {
1009: PrintOut(LOG_ERR, "Sending a mail was supressed. "
1010: "Mails can't be send when capabilites are enabled\n");
1011: return;
1012: }
1013: #endif
1014:
1015: // record the time of this mail message, and the first mail message
1016: if (!mail->logged)
1017: mail->firstsent=epoch;
1018: mail->lastsent=epoch;
1.1.1.3 ! misho 1019:
1.1 misho 1020: // print warning string into message
1.1.1.3 ! misho 1021: char message[256];
! 1022: va_list ap;
1.1 misho 1023: va_start(ap, fmt);
1.1.1.3 ! misho 1024: vsnprintf(message, sizeof(message), fmt, ap);
1.1 misho 1025: va_end(ap);
1026:
1027: // replace commas by spaces to separate recipients
1028: std::replace(address.begin(), address.end(), ',', ' ');
1.1.1.3 ! misho 1029:
1.1 misho 1030: // Export information in environment variables that will be useful
1031: // for user scripts
1.1.1.3 ! misho 1032: static env_buffer env[12];
! 1033: env[0].set("SMARTD_MAILER", executable);
! 1034: env[1].set("SMARTD_MESSAGE", message);
! 1035: char dates[DATEANDEPOCHLEN];
! 1036: snprintf(dates, sizeof(dates), "%d", mail->logged);
! 1037: env[2].set("SMARTD_PREVCNT", dates);
1.1 misho 1038: dateandtimezoneepoch(dates, mail->firstsent);
1.1.1.3 ! misho 1039: env[3].set("SMARTD_TFIRST", dates);
1.1 misho 1040: snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1.1.1.3 ! misho 1041: env[4].set("SMARTD_TFIRSTEPOCH", dates);
! 1042: env[5].set("SMARTD_FAILTYPE", whichfail[which]);
! 1043: env[6].set("SMARTD_ADDRESS", address.c_str());
! 1044: env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1.1 misho 1045:
1046: // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1.1.1.3 ! misho 1047: env[8].set("SMARTD_DEVICETYPE",
! 1048: (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
! 1049: env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
! 1050:
! 1051: env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
! 1052: dates[0] = 0;
! 1053: if (which) switch (cfg.emailfreq) {
! 1054: case 2: dates[0] = '1'; dates[1] = 0; break;
! 1055: case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
! 1056: }
! 1057: env[11].set("SMARTD_NEXTDAYS", dates);
1.1 misho 1058:
1059: // now construct a command to send this as EMAIL
1.1.1.3 ! misho 1060: char command[2048];
! 1061: if (!*executable)
! 1062: executable = "<mail>";
1.1 misho 1063: const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1064: const char * newwarn = (which? "Warning via" : "Test of");
1065:
1.1.1.3 ! misho 1066: #ifndef _WIN32
! 1067: snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
! 1068:
! 1069: // tell SYSLOG what we are about to do...
1.1 misho 1070: PrintOut(LOG_INFO,"%s %s to %s ...\n",
1071: which?"Sending warning via":"Executing test of", executable, newadd);
1072:
1073: // issue the command to send mail or to run the user's executable
1074: errno=0;
1075: FILE * pfp;
1076: if (!(pfp=popen(command, "r")))
1077: // failed to popen() mail process
1078: PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1079: newwarn, executable, newadd, errno?strerror(errno):"");
1080: else {
1081: // pipe suceeded!
1082: int len, status;
1083: char buffer[EBUFLEN];
1084:
1085: // if unexpected output on stdout/stderr, null terminate, print, and flush
1086: if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1087: int count=0;
1088: int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1089: buffer[newlen]='\0';
1090: PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1091: newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1092:
1093: // flush pipe if needed
1094: while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1095: count++;
1096:
1097: // tell user that pipe was flushed, or that something is really wrong
1098: if (count && count<EBUFLEN)
1099: PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1100: newwarn, executable, newadd);
1101: else if (count)
1102: PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1103: newwarn, executable, newadd);
1104: }
1105:
1106: // if something went wrong with mail process, print warning
1107: errno=0;
1108: if (-1==(status=pclose(pfp)))
1109: PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1110: errno?strerror(errno):"");
1111: else {
1112: // mail process apparently succeeded. Check and report exit status
1113: int status8;
1114:
1115: if (WIFEXITED(status)) {
1116: // exited 'normally' (but perhaps with nonzero status)
1117: status8=WEXITSTATUS(status);
1118:
1119: if (status8>128)
1120: PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1121: newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1122: else if (status8)
1123: PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1124: newwarn, executable, newadd, status, status8);
1125: else
1126: PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1127: }
1128:
1129: if (WIFSIGNALED(status))
1130: PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1131: newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1132:
1133: // this branch is probably not possible. If subprocess is
1134: // stopped then pclose() should not return.
1135: if (WIFSTOPPED(status))
1136: PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1137: newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1138:
1139: }
1140: }
1141:
1142: #else // _WIN32
1.1.1.3 ! misho 1143: {
! 1144: snprintf(command, sizeof(command), "cmd /c \"%s\"", warning_script.c_str());
1.1 misho 1145:
1146: char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1147: int rc;
1148: // run command
1149: PrintOut(LOG_INFO,"%s %s to %s ...\n",
1150: (which?"Sending warning via":"Executing test of"), executable, newadd);
1.1.1.3 ! misho 1151: rc = daemon_spawn(command, "", 0, stdoutbuf, sizeof(stdoutbuf));
1.1 misho 1152: if (rc >= 0 && stdoutbuf[0])
1153: PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1154: newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1155: if (rc != 0)
1156: PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1157: newwarn, executable, newadd, rc);
1158: else
1159: PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1160: }
1161:
1162: #endif // _WIN32
1163:
1164: // increment mail sent counter
1165: mail->logged++;
1166: }
1167:
1168: static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1.1.1.2 misho 1169: __attribute_format_printf(4, 5);
1.1 misho 1170:
1171: static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1172: {
1173: if (!(0 <= which && which < SMARTD_NMAIL))
1174: return;
1175:
1176: // Return if no mail sent yet
1177: mailinfo & mi = state.maillog[which];
1178: if (!mi.logged)
1179: return;
1180:
1181: // Format & print message
1182: char msg[256];
1183: va_list ap;
1184: va_start(ap, fmt);
1185: vsnprintf(msg, sizeof(msg), fmt, ap);
1186: va_end(ap);
1187:
1188: PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1189: msg, mi.logged, (mi.logged==1 ? "" : "s"));
1190:
1191: // Clear mail counter and timestamps
1192: mi = mailinfo();
1193: state.must_write = true;
1194: }
1195:
1196: #ifndef _WIN32
1197:
1198: // Output multiple lines via separate syslog(3) calls.
1199: static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1200: {
1201: char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1202: vsnprintf(buf, sizeof(buf), fmt, ap);
1203:
1204: for (char * p = buf, * q; p && *p; p = q) {
1205: if ((q = strchr(p, '\n')))
1206: *q++ = 0;
1207: if (*p)
1208: syslog(priority, "%s\n", p);
1209: }
1210: }
1211:
1212: #else // _WIN32
1213: // os_win32/syslog_win32.cpp supports multiple lines.
1214: #define vsyslog_lines vsyslog
1215: #endif // _WIN32
1216:
1217: // Printing function for watching ataprint commands, or losing them
1218: // [From GLIBC Manual: Since the prototype doesn't specify types for
1219: // optional arguments, in a call to a variadic function the default
1220: // argument promotions are performed on the optional argument
1221: // values. This means the objects of type char or short int (whether
1222: // signed or not) are promoted to either int or unsigned int, as
1223: // appropriate.]
1224: void pout(const char *fmt, ...){
1225: va_list ap;
1226:
1227: // get the correct time in syslog()
1228: FixGlibcTimeZoneBug();
1229: // initialize variable argument list
1230: va_start(ap,fmt);
1231: // in debugmode==1 mode we will print the output from the ataprint.o functions!
1.1.1.3 ! misho 1232: if (debugmode && debugmode != 2) {
! 1233: FILE * f = stdout;
1.1 misho 1234: #ifdef _WIN32
1.1.1.3 ! misho 1235: if (facility == LOG_LOCAL1) // logging to stdout
! 1236: f = stderr;
1.1 misho 1237: #endif
1.1.1.3 ! misho 1238: vfprintf(f, fmt, ap);
! 1239: fflush(f);
! 1240: }
1.1 misho 1241: // in debugmode==2 mode we print output from knowndrives.o functions
1242: else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1243: openlog("smartd", LOG_PID, facility);
1244: vsyslog_lines(LOG_INFO, fmt, ap);
1245: closelog();
1246: }
1247: va_end(ap);
1248: return;
1249: }
1250:
1251: // This function prints either to stdout or to the syslog as needed.
1252: static void PrintOut(int priority, const char *fmt, ...){
1253: va_list ap;
1254:
1255: // get the correct time in syslog()
1256: FixGlibcTimeZoneBug();
1257: // initialize variable argument list
1258: va_start(ap,fmt);
1.1.1.3 ! misho 1259: if (debugmode) {
! 1260: FILE * f = stdout;
1.1 misho 1261: #ifdef _WIN32
1.1.1.3 ! misho 1262: if (facility == LOG_LOCAL1) // logging to stdout
! 1263: f = stderr;
1.1 misho 1264: #endif
1.1.1.3 ! misho 1265: vfprintf(f, fmt, ap);
! 1266: fflush(f);
! 1267: }
1.1 misho 1268: else {
1269: openlog("smartd", LOG_PID, facility);
1270: vsyslog_lines(priority, fmt, ap);
1271: closelog();
1272: }
1273: va_end(ap);
1274: return;
1275: }
1276:
1277: // Used to warn users about invalid checksums. Called from atacmds.cpp.
1278: void checksumwarning(const char * string)
1279: {
1280: pout("Warning! %s error: invalid SMART checksum.\n", string);
1281: }
1282:
1283: #ifndef _WIN32
1284:
1285: // Wait for the pid file to show up, this makes sure a calling program knows
1286: // that the daemon is really up and running and has a pid to kill it
1287: static bool WaitForPidFile()
1288: {
1289: int waited, max_wait = 10;
1290: struct stat stat_buf;
1291:
1292: if (pid_file.empty() || debugmode)
1293: return true;
1294:
1295: for(waited = 0; waited < max_wait; ++waited) {
1296: if (!stat(pid_file.c_str(), &stat_buf)) {
1297: return true;
1298: } else
1299: sleep(1);
1300: }
1301: return false;
1302: }
1303:
1304: #endif // _WIN32
1305:
1306: // Forks new process, closes ALL file descriptors, redirects stdin,
1307: // stdout, and stderr. Not quite daemon(). See
1308: // http://www.linuxjournal.com/article/2335
1309: // for a good description of why we do things this way.
1310: static void DaemonInit()
1311: {
1312: #ifndef _WIN32
1313: pid_t pid;
1314: int i;
1315:
1316: // flush all buffered streams. Else we might get two copies of open
1317: // streams since both parent and child get copies of the buffers.
1318: fflush(NULL);
1319:
1320: if (do_fork) {
1321: if ((pid=fork()) < 0) {
1322: // unable to fork!
1323: PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1324: EXIT(EXIT_STARTUP);
1325: }
1326: else if (pid) {
1327: // we are the parent process, wait for pid file, then exit cleanly
1328: if(!WaitForPidFile()) {
1329: PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1330: EXIT(EXIT_STARTUP);
1331: } else
1332: EXIT(0);
1333: }
1334:
1335: // from here on, we are the child process.
1336: setsid();
1337:
1338: // Fork one more time to avoid any possibility of having terminals
1339: if ((pid=fork()) < 0) {
1340: // unable to fork!
1341: PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1342: EXIT(EXIT_STARTUP);
1343: }
1344: else if (pid)
1345: // we are the parent process -- exit cleanly
1346: EXIT(0);
1347:
1348: // Now we are the child's child...
1349: }
1350:
1351: // close any open file descriptors
1352: for (i=getdtablesize();i>=0;--i)
1353: close(i);
1354:
1355: #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1356:
1357: // redirect any IO attempts to /dev/null for stdin
1358: i=open("/dev/null",O_RDWR);
1359: if (i>=0) {
1360: // stdout
1361: NO_warn_unused_result(dup(i));
1362: // stderr
1363: NO_warn_unused_result(dup(i));
1364: };
1365: umask(0022);
1366: NO_warn_unused_result(chdir("/"));
1367:
1368: if (do_fork)
1369: PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1370:
1371: #else // _WIN32
1372:
1373: // No fork() on native Win32
1374: // Detach this process from console
1375: fflush(NULL);
1376: if (daemon_detach("smartd")) {
1377: PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1378: EXIT(EXIT_STARTUP);
1379: }
1380: // stdin/out/err now closed if not redirected
1381:
1382: #endif // _WIN32
1383: return;
1384: }
1385:
1386: // create a PID file containing the current process id
1387: static void WritePidFile()
1388: {
1389: if (!pid_file.empty()) {
1390: pid_t pid = getpid();
1391: mode_t old_umask;
1392: #ifndef __CYGWIN__
1393: old_umask = umask(0077); // rwx------
1394: #else
1395: // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1396: old_umask = umask(0033); // rwxr--r--
1397: #endif
1398:
1399: stdio_file f(pid_file.c_str(), "w");
1400: umask(old_umask);
1401: if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1402: PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1403: EXIT(EXIT_PID);
1404: }
1405: PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1406: }
1407: }
1408:
1409: // Prints header identifying version of code and home
1410: static void PrintHead()
1411: {
1412: PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1413: }
1414:
1415: // prints help info for configuration file Directives
1416: static void Directives()
1417: {
1418: PrintOut(LOG_INFO,
1419: "Configuration file (%s) Directives (after device name):\n"
1.1.1.3 ! misho 1420: " -d TYPE Set the device type: auto, ignore, removable,\n"
! 1421: " %s\n"
1.1 misho 1422: " -T TYPE Set the tolerance to one of: normal, permissive\n"
1423: " -o VAL Enable/disable automatic offline tests (on/off)\n"
1424: " -S VAL Enable/disable attribute autosave (on/off)\n"
1425: " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1426: " -H Monitor SMART Health Status, report if failed\n"
1427: " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1.1.1.2 misho 1428: " -l TYPE Monitor SMART log or self-test status:\n"
1429: " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1.1 misho 1430: " -l scterc,R,W Set SCT Error Recovery Control\n"
1.1.1.2 misho 1431: " -e Change device setting: aam,[N|off], apm,[N|off], lookahead,[on|off],\n"
1432: " security-freeze, standby,[N|off], wcache,[on|off]\n"
1.1 misho 1433: " -f Monitor 'Usage' Attributes, report failures\n"
1434: " -m ADD Send email warning to address ADD\n"
1435: " -M TYPE Modify email warning behavior (see man page)\n"
1436: " -p Report changes in 'Prefailure' Attributes\n"
1437: " -u Report changes in 'Usage' Attributes\n"
1438: " -t Equivalent to -p and -u Directives\n"
1439: " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1440: " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1441: " -i ID Ignore Attribute ID for -f Directive\n"
1442: " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1443: " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1444: " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1445: " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1446: " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1447: " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1448: " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1.1.1.3 ! misho 1449: " -F TYPE Use firmware bug workaround:\n"
! 1450: " %s\n"
1.1 misho 1451: " # Comment: text after a hash sign is ignored\n"
1452: " \\ Line continuation character\n"
1453: "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1.1.1.3 ! misho 1454: "Use ID = 0 to turn off -C and/or -U Directives\n"
! 1455: "Example: /dev/sda -a\n",
! 1456: configfile,
! 1457: smi()->get_valid_dev_types_str().c_str(),
! 1458: get_valid_firmwarebug_args());
1.1 misho 1459: }
1460:
1461: /* Returns a pointer to a static string containing a formatted list of the valid
1462: arguments to the option opt or NULL on failure. */
1463: static const char *GetValidArgList(char opt)
1464: {
1465: switch (opt) {
1466: case 'A':
1467: case 's':
1468: return "<PATH_PREFIX>";
1469: case 'c':
1470: return "<FILE_NAME>, -";
1471: case 'l':
1472: return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1473: case 'q':
1474: return "nodev, errors, nodevstartup, never, onecheck, showtests";
1475: case 'r':
1476: return "ioctl[,N], ataioctl[,N], scsiioctl[,N]";
1477: case 'B':
1478: case 'p':
1.1.1.3 ! misho 1479: case 'w':
1.1 misho 1480: return "<FILE_NAME>";
1481: case 'i':
1482: return "<INTEGER_SECONDS>";
1483: default:
1484: return NULL;
1485: }
1486: }
1487:
1488: /* prints help information for command syntax */
1489: static void Usage()
1490: {
1491: PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1492: PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1493: PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1494: #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1495: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_ATTRIBUTELOG"MODEL-SERIAL.ata.csv]\n");
1496: #endif
1497: PrintOut(LOG_INFO,"\n");
1498: PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1499: PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1500: PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1501: #ifdef SMARTMONTOOLS_DRIVEDBDIR
1502: PrintOut(LOG_INFO,"\n");
1503: PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1504: #endif
1505: PrintOut(LOG_INFO,"]\n\n");
1506: PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1507: PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1508: PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1509: #ifdef HAVE_LIBCAP_NG
1510: PrintOut(LOG_INFO," -C, --capabilities\n");
1511: PrintOut(LOG_INFO," Use capabilities.\n"
1512: " Warning: Mail notification does not work when used.\n\n");
1513: #endif
1514: PrintOut(LOG_INFO," -d, --debug\n");
1515: PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1516: PrintOut(LOG_INFO," -D, --showdirectives\n");
1517: PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1518: PrintOut(LOG_INFO," -h, --help, --usage\n");
1519: PrintOut(LOG_INFO," Display this help and exit\n\n");
1520: PrintOut(LOG_INFO," -i N, --interval=N\n");
1521: PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1522: PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1523: #ifndef _WIN32
1524: PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1525: #else
1526: PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1527: #endif
1528: #ifndef _WIN32
1529: PrintOut(LOG_INFO," -n, --no-fork\n");
1530: PrintOut(LOG_INFO," Do not fork into background\n\n");
1531: #endif // _WIN32
1532: PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1533: PrintOut(LOG_INFO," Write PID file NAME\n\n");
1534: PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1535: PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1536: PrintOut(LOG_INFO," -r, --report=TYPE\n");
1537: PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1538: PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1539: PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1540: #ifdef SMARTMONTOOLS_SAVESTATES
1541: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SAVESTATES"MODEL-SERIAL.TYPE.state]\n");
1542: #endif
1543: PrintOut(LOG_INFO,"\n");
1.1.1.3 ! misho 1544: PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
! 1545: PrintOut(LOG_INFO," Run executable NAME on warnings\n");
! 1546: #ifndef _WIN32
! 1547: PrintOut(LOG_INFO," [default is "SMARTMONTOOLS_SYSCONFDIR"/smartd_warning.sh]\n\n");
! 1548: #else
! 1549: PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
! 1550: #endif
1.1 misho 1551: #ifdef _WIN32
1552: PrintOut(LOG_INFO," --service\n");
1553: PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1554: PrintOut(LOG_INFO," smartd install [options]\n");
1555: PrintOut(LOG_INFO," Remove service with:\n");
1556: PrintOut(LOG_INFO," smartd remove\n\n");
1557: #endif // _WIN32
1558: PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1559: PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1560: }
1561:
1562: static int CloseDevice(smart_device * device, const char * name)
1563: {
1564: if (!device->close()){
1565: PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1566: return 1;
1567: }
1568: // device sucessfully closed
1569: return 0;
1570: }
1571:
1572: // return true if a char is not allowed in a state file name
1573: static bool not_allowed_in_filename(char c)
1574: {
1575: return !( ('0' <= c && c <= '9')
1576: || ('A' <= c && c <= 'Z')
1577: || ('a' <= c && c <= 'z'));
1578: }
1579:
1580: // Read error count from Summary or Extended Comprehensive SMART error log
1581: // Return -1 on error
1582: static int read_ata_error_count(ata_device * device, const char * name,
1.1.1.3 ! misho 1583: firmwarebug_defs firmwarebugs, bool extended)
1.1 misho 1584: {
1585: if (!extended) {
1586: ata_smart_errorlog log;
1.1.1.3 ! misho 1587: if (ataReadErrorLog(device, &log, firmwarebugs)){
1.1 misho 1588: PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1589: return -1;
1590: }
1591: return (log.error_log_pointer ? log.ata_error_count : 0);
1592: }
1593: else {
1594: ata_smart_exterrlog logx;
1.1.1.3 ! misho 1595: if (!ataReadExtErrorLog(device, &logx, 1 /*first sector only*/, firmwarebugs)) {
1.1 misho 1596: PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1597: return -1;
1598: }
1599: // Some disks use the reserved byte as index, see ataprint.cpp.
1600: return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1601: }
1602: }
1603:
1604: // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1605: // error count, and top bits are the power-on hours of the last error.
1606: static int SelfTestErrorCount(ata_device * device, const char * name,
1.1.1.3 ! misho 1607: firmwarebug_defs firmwarebugs)
1.1 misho 1608: {
1609: struct ata_smart_selftestlog log;
1610:
1.1.1.3 ! misho 1611: if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1.1 misho 1612: PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1613: return -1;
1614: }
1615:
1616: // return current number of self-test errors
1.1.1.3 ! misho 1617: return ataPrintSmartSelfTestlog(&log, false, firmwarebugs);
1.1 misho 1618: }
1619:
1620: #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1621: #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1622:
1.1.1.2 misho 1623: // Check offline data collection status
1624: static inline bool is_offl_coll_in_progress(unsigned char status)
1625: {
1626: return ((status & 0x7f) == 0x03);
1627: }
1628:
1629: // Check self-test execution status
1630: static inline bool is_self_test_in_progress(unsigned char status)
1631: {
1632: return ((status >> 4) == 0xf);
1633: }
1634:
1.1 misho 1635: // Log offline data collection status
1636: static void log_offline_data_coll_status(const char * name, unsigned char status)
1637: {
1638: const char * msg;
1639: switch (status & 0x7f) {
1640: case 0x00: msg = "was never started"; break;
1641: case 0x02: msg = "was completed without error"; break;
1642: case 0x03: msg = "is in progress"; break;
1643: case 0x04: msg = "was suspended by an interrupting command from host"; break;
1644: case 0x05: msg = "was aborted by an interrupting command from host"; break;
1645: case 0x06: msg = "was aborted by the device with a fatal error"; break;
1646: default: msg = 0;
1647: }
1648:
1649: if (msg)
1650: PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1651: "Device: %s, offline data collection %s%s\n", name, msg,
1652: ((status & 0x80) ? " (auto:on)" : ""));
1653: else
1654: PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1655: name, status);
1656: }
1657:
1658: // Log self-test execution status
1659: static void log_self_test_exec_status(const char * name, unsigned char status)
1660: {
1661: const char * msg;
1662: switch (status >> 4) {
1663: case 0x0: msg = "completed without error"; break;
1664: case 0x1: msg = "was aborted by the host"; break;
1665: case 0x2: msg = "was interrupted by the host with a reset"; break;
1666: case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1667: case 0x4: msg = "completed with error (unknown test element)"; break;
1668: case 0x5: msg = "completed with error (electrical test element)"; break;
1669: case 0x6: msg = "completed with error (servo/seek test element)"; break;
1670: case 0x7: msg = "completed with error (read test element)"; break;
1671: case 0x8: msg = "completed with error (handling damage?)"; break;
1672: default: msg = 0;
1673: }
1674:
1675: if (msg)
1676: PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1677: "Device: %s, previous self-test %s\n", name, msg);
1678: else if ((status >> 4) == 0xf)
1679: PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1680: name, status & 0x0f);
1681: else
1682: PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1683: name, status);
1684: }
1685:
1686: // Check pending sector count id (-C, -U directives).
1687: static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1688: unsigned char id, const char * msg)
1689: {
1690: // Check attribute index
1691: int i = ata_find_attr_index(id, state.smartval);
1692: if (i < 0) {
1693: PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1694: cfg.name.c_str(), msg, id);
1695: return false;
1696: }
1697:
1698: // Check value
1699: uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1700: cfg.attribute_defs);
1701: if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1702: PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %"PRIu64" (0x%"PRIx64")\n",
1703: cfg.name.c_str(), msg, id, rawval, rawval);
1704: return false;
1705: }
1706:
1707: return true;
1708: }
1709:
1710: // Called by ATA/SCSIDeviceScan() after successful device check
1711: static void finish_device_scan(dev_config & cfg, dev_state & state)
1712: {
1713: // Set cfg.emailfreq if user hasn't set it
1714: if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1715: // Avoid that emails are suppressed forever due to state persistence
1716: if (cfg.state_file.empty())
1717: cfg.emailfreq = 1; // '-M once'
1718: else
1719: cfg.emailfreq = 2; // '-M daily'
1720: }
1721:
1722: // Start self-test regex check now if time was not read from state file
1723: if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1724: state.scheduled_test_next_check = time(0);
1725: }
1726:
1.1.1.2 misho 1727: // Common function to format result message for ATA setting
1728: static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1729: int set_option = 0, bool has_value = false)
1730: {
1731: if (!msg.empty())
1732: msg += ", ";
1733: msg += name;
1734: if (!ok)
1735: msg += ":--";
1736: else if (set_option < 0)
1737: msg += ":off";
1738: else if (has_value)
1739: msg += strprintf(":%d", set_option-1);
1740: else if (set_option > 0)
1741: msg += ":on";
1742: }
1743:
1.1 misho 1744:
1745: // TODO: Add '-F swapid' directive
1746: const bool fix_swapped_id = false;
1747:
1748: // scan to see what ata devices there are, and if they support SMART
1749: static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1750: {
1751: int supported=0;
1752: struct ata_identify_device drive;
1753: const char *name = cfg.name.c_str();
1754: int retid;
1755:
1756: // Device must be open
1757:
1758: // Get drive identity structure
1759: if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1760: if (retid<0)
1761: // Unable to read Identity structure
1762: PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1763: else
1764: PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1765: name, packetdevicetype(retid-1));
1766: CloseDevice(atadev, name);
1767: return 2;
1768: }
1769:
1.1.1.3 ! misho 1770: // Get drive identity, size and rotation rate (HDD/SSD)
1.1 misho 1771: char model[40+1], serial[20+1], firmware[8+1];
1772: ata_format_id_string(model, drive.model, sizeof(model)-1);
1773: ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1774: ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1775:
1776: ata_size_info sizes;
1777: ata_get_size_info(&drive, sizes);
1778: state.num_sectors = sizes.sectors;
1.1.1.3 ! misho 1779: cfg.dev_rpm = ata_get_rotation_rate(&drive);
1.1 misho 1780:
1781: char wwn[30]; wwn[0] = 0;
1782: unsigned oui = 0; uint64_t unique_id = 0;
1783: int naa = ata_get_wwn(&drive, oui, unique_id);
1784: if (naa >= 0)
1785: snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09"PRIx64", ", naa, oui, unique_id);
1786:
1.1.1.3 ! misho 1787: // Format device id string for warning emails
1.1 misho 1788: char cap[32];
1.1.1.3 ! misho 1789: cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
! 1790: format_capacity(cap, sizeof(cap), sizes.capacity, "."));
! 1791:
! 1792: PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1.1 misho 1793:
1794: // Show if device in database, and use preset vendor attribute
1795: // options unless user has requested otherwise.
1796: if (cfg.ignorepresets)
1797: PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1798: else {
1799: // Apply vendor specific presets, print warning if present
1800: const drive_settings * dbentry = lookup_drive_apply_presets(
1.1.1.3 ! misho 1801: &drive, cfg.attribute_defs, cfg.firmwarebugs);
1.1 misho 1802: if (!dbentry)
1803: PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1804: else {
1805: PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1806: name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1807: if (*dbentry->warningmsg)
1808: PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1809: }
1810: }
1811:
1812: // Set default '-C 197[+]' if no '-C ID' is specified.
1813: if (!cfg.curr_pending_set)
1814: cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
1815: // Set default '-U 198[+]' if no '-U ID' is specified.
1816: if (!cfg.offl_pending_set)
1817: cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
1818:
1819: // If requested, show which presets would be used for this drive
1820: if (cfg.showpresets) {
1821: int savedebugmode=debugmode;
1822: PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1823: if (!debugmode)
1824: debugmode=2;
1825: show_presets(&drive);
1826: debugmode=savedebugmode;
1827: }
1828:
1829: // see if drive supports SMART
1830: supported=ataSmartSupport(&drive);
1831: if (supported!=1) {
1832: if (supported==0)
1833: // drive does NOT support SMART
1834: PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1835: else
1836: // can't tell if drive supports SMART
1837: PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1838:
1839: // should we proceed anyway?
1840: if (cfg.permissive) {
1841: PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1842: }
1843: else {
1844: PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1845: CloseDevice(atadev, name);
1846: return 2;
1847: }
1848: }
1849:
1850: if (ataEnableSmart(atadev)) {
1851: // Enable SMART command has failed
1852: PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1853: CloseDevice(atadev, name);
1854: return 2;
1855: }
1856:
1857: // disable device attribute autosave...
1858: if (cfg.autosave==1) {
1859: if (ataDisableAutoSave(atadev))
1860: PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1861: else
1862: PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1863: }
1864:
1865: // or enable device attribute autosave
1866: if (cfg.autosave==2) {
1867: if (ataEnableAutoSave(atadev))
1868: PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1869: else
1870: PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1871: }
1872:
1873: // capability check: SMART status
1874: if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1875: PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1876: cfg.smartcheck = false;
1877: }
1878:
1879: // capability check: Read smart values and thresholds. Note that
1880: // smart values are ALSO needed even if we ONLY want to know if the
1881: // device is self-test log or error-log capable! After ATA-5, this
1882: // information was ALSO reproduced in the IDENTIFY DEVICE response,
1883: // but sadly not for ATA-5. Sigh.
1884:
1885: // do we need to get SMART data?
1886: bool smart_val_ok = false;
1887: if ( cfg.autoofflinetest || cfg.selftest
1888: || cfg.errorlog || cfg.xerrorlog
1889: || cfg.offlinests || cfg.selfteststs
1890: || cfg.usagefailed || cfg.prefail || cfg.usage
1891: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1892: || cfg.curr_pending_id || cfg.offl_pending_id ) {
1893:
1894: if (ataReadSmartValues(atadev, &state.smartval)) {
1895: PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1896: cfg.usagefailed = cfg.prefail = cfg.usage = false;
1897: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1898: cfg.curr_pending_id = cfg.offl_pending_id = 0;
1899: }
1900: else {
1901: smart_val_ok = true;
1902: if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1903: PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1904: name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1905: cfg.usagefailed = false;
1906: // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1907: memset(&state.smartthres, 0, sizeof(state.smartthres));
1908: }
1909: }
1910:
1911: // see if the necessary Attribute is there to monitor offline or
1912: // current pending sectors or temperature
1913: if ( cfg.curr_pending_id
1914: && !check_pending_id(cfg, state, cfg.curr_pending_id,
1915: "Current_Pending_Sector"))
1916: cfg.curr_pending_id = 0;
1917:
1918: if ( cfg.offl_pending_id
1919: && !check_pending_id(cfg, state, cfg.offl_pending_id,
1920: "Offline_Uncorrectable"))
1921: cfg.offl_pending_id = 0;
1922:
1923: if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1924: && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
1.1.1.3 ! misho 1925: PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
! 1926: name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1.1 misho 1927: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1928: }
1.1.1.2 misho 1929:
1930: // Report ignored '-r' or '-R' directives
1931: for (int id = 1; id <= 255; id++) {
1932: if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
1933: char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
1934: const char * excl = (cfg.monitor_attr_flags.is_set(id,
1935: (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
1936:
1937: int idx = ata_find_attr_index(id, state.smartval);
1938: if (idx < 0)
1939: PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
1940: else {
1941: bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
1942: if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
1943: PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
1944: (prefail ? "Prefailure" : "Usage"), opt, id, excl);
1945: }
1946: }
1947: }
1.1 misho 1948: }
1949:
1950: // enable/disable automatic on-line testing
1951: if (cfg.autoofflinetest) {
1952: // is this an enable or disable request?
1953: const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1954: if (!smart_val_ok)
1955: PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1956: else {
1957: // if command appears unsupported, issue a warning...
1958: if (!isSupportAutomaticTimer(&state.smartval))
1959: PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1960: // ... but then try anyway
1961: if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1962: PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1963: else
1964: PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1965: }
1966: }
1967:
1968: // Read log directories if required for capability check
1969: ata_smart_log_directory smart_logdir, gp_logdir;
1970: bool smart_logdir_ok = false, gp_logdir_ok = false;
1971:
1972: if ( isGeneralPurposeLoggingCapable(&drive)
1.1.1.3 ! misho 1973: && (cfg.errorlog || cfg.selftest)
! 1974: && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1.1 misho 1975: if (!ataReadLogDirectory(atadev, &smart_logdir, false))
1976: smart_logdir_ok = true;
1977: }
1978:
1.1.1.3 ! misho 1979: if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1.1 misho 1980: if (!ataReadLogDirectory(atadev, &gp_logdir, true))
1981: gp_logdir_ok = true;
1982: }
1983:
1984: // capability check: self-test-log
1985: state.selflogcount = 0; state.selfloghour = 0;
1986: if (cfg.selftest) {
1987: int retval;
1988: if (!( cfg.permissive
1989: || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
1990: || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
1991: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
1992: cfg.selftest = false;
1993: }
1.1.1.3 ! misho 1994: else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
1.1 misho 1995: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
1996: cfg.selftest = false;
1997: }
1998: else {
1999: state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2000: state.selfloghour =SELFTEST_ERRORHOURS(retval);
2001: }
2002: }
2003:
2004: // capability check: ATA error log
2005: state.ataerrorcount = 0;
2006: if (cfg.errorlog) {
2007: int errcnt1;
2008: if (!( cfg.permissive
2009: || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2010: || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2011: PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2012: cfg.errorlog = false;
2013: }
1.1.1.3 ! misho 2014: else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
1.1 misho 2015: PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2016: cfg.errorlog = false;
2017: }
2018: else
2019: state.ataerrorcount = errcnt1;
2020: }
2021:
2022: if (cfg.xerrorlog) {
2023: int errcnt2;
1.1.1.3 ! misho 2024: if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
! 2025: || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
1.1 misho 2026: PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2027: name);
2028: cfg.xerrorlog = false;
2029: }
1.1.1.3 ! misho 2030: else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
1.1 misho 2031: PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2032: cfg.xerrorlog = false;
2033: }
2034: else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2035: PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2036: name, state.ataerrorcount, errcnt2);
2037: // Record max error count
2038: if (errcnt2 > state.ataerrorcount)
2039: state.ataerrorcount = errcnt2;
2040: }
2041: else
2042: state.ataerrorcount = errcnt2;
2043: }
2044:
2045: // capability check: self-test and offline data collection status
2046: if (cfg.offlinests || cfg.selfteststs) {
2047: if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2048: if (cfg.offlinests)
2049: PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2050: if (cfg.selfteststs)
2051: PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2052: cfg.offlinests = cfg.selfteststs = false;
2053: }
2054: }
2055:
2056: // capabilities check -- does it support powermode?
2057: if (cfg.powermode) {
2058: int powermode = ataCheckPowerMode(atadev);
2059:
2060: if (-1 == powermode) {
2061: PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2062: cfg.powermode=0;
2063: }
2064: else if (powermode!=0 && powermode!=0x80 && powermode!=0xff) {
2065: PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2066: name, powermode);
2067: cfg.powermode=0;
2068: }
2069: }
2070:
1.1.1.2 misho 2071: // Apply ATA settings
2072: std::string msg;
2073:
2074: if (cfg.set_aam)
2075: format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2076: ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2077: ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2078:
2079: if (cfg.set_apm)
2080: format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2081: ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2082: ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2083:
2084: if (cfg.set_lookahead)
2085: format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2086: (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2087: cfg.set_lookahead);
2088:
2089: if (cfg.set_wcache)
2090: format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2091: (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2092:
2093: if (cfg.set_security_freeze)
2094: format_set_result_msg(msg, "Security freeze",
2095: ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2096:
2097: if (cfg.set_standby)
2098: format_set_result_msg(msg, "Standby",
2099: ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2100:
2101: // Report as one log entry
2102: if (!msg.empty())
2103: PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2104:
1.1 misho 2105: // set SCT Error Recovery Control if requested
2106: if (cfg.sct_erc_set) {
2107: if (!isSCTErrorRecoveryControlCapable(&drive))
2108: PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2109: name);
2110: else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2111: || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2112: PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2113: else
2114: PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2115: name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2116: }
2117:
2118: // If no tests available or selected, return
2119: if (!( cfg.smartcheck || cfg.selftest
2120: || cfg.errorlog || cfg.xerrorlog
2121: || cfg.offlinests || cfg.selfteststs
2122: || cfg.usagefailed || cfg.prefail || cfg.usage
2123: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2124: CloseDevice(atadev, name);
2125: return 3;
2126: }
2127:
2128: // tell user we are registering device
2129: PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2130:
2131: // close file descriptor
2132: CloseDevice(atadev, name);
2133:
2134: if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2135: // Build file name for state file
2136: std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2137: std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2138: if (!state_path_prefix.empty()) {
2139: cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2140: // Read previous state
2141: if (read_dev_state(cfg.state_file.c_str(), state)) {
2142: PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2143: // Copy ATA attribute values to temp state
2144: state.update_temp_state();
2145: }
2146: }
2147: if (!attrlog_path_prefix.empty())
2148: cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2149: }
2150:
2151: finish_device_scan(cfg, state);
2152:
2153: return 0;
2154: }
2155:
2156: // on success, return 0. On failure, return >0. Never return <0,
2157: // please.
2158: static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2159: {
2160: int k, err, req_len, avail_len, version, len;
2161: const char *device = cfg.name.c_str();
2162: struct scsi_iec_mode_page iec;
2163: UINT8 tBuf[64];
2164: UINT8 inqBuf[96];
2165: UINT8 vpdBuf[252];
1.1.1.3 ! misho 2166: char lu_id[64], serial[256], vendor[40], model[40];
1.1 misho 2167:
2168: // Device must be open
2169: memset(inqBuf, 0, 96);
2170: req_len = 36;
2171: if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2172: /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2173: req_len = 64;
2174: if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2175: PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2176: "skip device\n", device);
2177: return 2;
2178: }
2179: }
1.1.1.3 ! misho 2180: version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
! 2181:
1.1 misho 2182: avail_len = inqBuf[4] + 5;
2183: len = (avail_len < req_len) ? avail_len : req_len;
2184: if (len < 36) {
2185: PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2186: "skip device\n", device);
2187: return 2;
2188: }
1.1.1.2 misho 2189:
2190: int pdt = inqBuf[0] & 0x1f;
2191:
2192: if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2193: (0xe == pdt))) {
2194: PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2195: "skip\n", device, pdt);
2196: return 2;
2197: }
1.1.1.3 ! misho 2198:
! 2199: if (supported_vpd_pages_p) {
! 2200: delete supported_vpd_pages_p;
! 2201: supported_vpd_pages_p = NULL;
! 2202: }
! 2203: supported_vpd_pages_p = new supported_vpd_pages(scsidev);
! 2204:
1.1 misho 2205: lu_id[0] = '\0';
1.1.1.3 ! misho 2206: if ((version >= 0x3) && (version < 0x8)) {
! 2207: /* SPC to SPC-5 */
! 2208: if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
! 2209: vpdBuf, sizeof(vpdBuf))) {
1.1 misho 2210: len = vpdBuf[3];
2211: scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2212: }
1.1.1.3 ! misho 2213: }
! 2214: serial[0] = '\0';
! 2215: if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
! 2216: vpdBuf, sizeof(vpdBuf))) {
! 2217: len = vpdBuf[3];
! 2218: vpdBuf[4 + len] = '\0';
! 2219: scsi_format_id_string(serial, (const unsigned char *)&vpdBuf[4], len);
! 2220: }
1.1 misho 2221:
2222: unsigned int lb_size;
2223: char si_str[64];
1.1.1.3 ! misho 2224: uint64_t capacity = scsiGetSize(scsidev, &lb_size, NULL);
1.1 misho 2225:
2226: if (capacity)
2227: format_capacity(si_str, sizeof(si_str), capacity);
2228: else
2229: si_str[0] = '\0';
1.1.1.3 ! misho 2230:
! 2231: // Format device id string for warning emails
! 2232: cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
! 2233: (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
! 2234: (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
! 2235: (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
! 2236: (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
! 2237:
! 2238: // format "model" string
! 2239: scsi_format_id_string(vendor, (const unsigned char *)&inqBuf[8], 8);
! 2240: scsi_format_id_string(model, (const unsigned char *)&inqBuf[16], 16);
! 2241: PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
1.1 misho 2242:
2243: // check that device is ready for commands. IE stores its stuff on
2244: // the media.
2245: if ((err = scsiTestUnitReady(scsidev))) {
2246: if (SIMPLE_ERR_NOT_READY == err)
2247: PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2248: else if (SIMPLE_ERR_NO_MEDIUM == err)
2249: PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2250: else if (SIMPLE_ERR_BECOMING_READY == err)
2251: PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2252: else
2253: PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2254: CloseDevice(scsidev, device);
2255: return 2;
2256: }
2257:
2258: // Badly-conforming USB storage devices may fail this check.
2259: // The response to the following IE mode page fetch (current and
2260: // changeable values) is carefully examined. It has been found
2261: // that various USB devices that malform the response will lock up
2262: // if asked for a log page (e.g. temperature) so it is best to
2263: // bail out now.
2264: if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2265: state.modese_len = iec.modese_len;
2266: else if (SIMPLE_ERR_BAD_FIELD == err)
2267: ; /* continue since it is reasonable not to support IE mpage */
2268: else { /* any other error (including malformed response) unreasonable */
2269: PrintOut(LOG_INFO,
2270: "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2271: device, err);
2272: CloseDevice(scsidev, device);
2273: return 3;
2274: }
2275:
2276: // N.B. The following is passive (i.e. it doesn't attempt to turn on
2277: // smart if it is off). This may change to be the same as the ATA side.
2278: if (!scsi_IsExceptionControlEnabled(&iec)) {
2279: PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2280: "Try 'smartctl -s on %s' to turn on SMART features\n",
2281: device, device);
2282: CloseDevice(scsidev, device);
2283: return 3;
2284: }
2285:
2286: // Flag that certain log pages are supported (information may be
2287: // available from other sources).
2288: if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0)) {
2289: for (k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2290: switch (tBuf[k]) {
2291: case TEMPERATURE_LPAGE:
2292: state.TempPageSupported = 1;
2293: break;
2294: case IE_LPAGE:
2295: state.SmartPageSupported = 1;
2296: break;
1.1.1.3 ! misho 2297: case READ_ERROR_COUNTER_LPAGE:
! 2298: state.ReadECounterPageSupported = 1;
! 2299: break;
! 2300: case WRITE_ERROR_COUNTER_LPAGE:
! 2301: state.WriteECounterPageSupported = 1;
! 2302: break;
! 2303: case VERIFY_ERROR_COUNTER_LPAGE:
! 2304: state.VerifyECounterPageSupported = 1;
! 2305: break;
! 2306: case NON_MEDIUM_ERROR_LPAGE:
! 2307: state.NonMediumErrorPageSupported = 1;
! 2308: break;
1.1 misho 2309: default:
2310: break;
2311: }
2312: }
2313: }
2314:
2315: // Check if scsiCheckIE() is going to work
2316: {
2317: UINT8 asc = 0;
2318: UINT8 ascq = 0;
2319: UINT8 currenttemp = 0;
2320: UINT8 triptemp = 0;
2321:
2322: if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2323: &asc, &ascq, ¤ttemp, &triptemp)) {
2324: PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2325: state.SuppressReport = 1;
2326: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
1.1.1.3 ! misho 2327: PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
! 2328: device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1.1 misho 2329: cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2330: }
2331: }
2332: }
2333:
2334: // capability check: self-test-log
2335: if (cfg.selftest){
2336: int retval = scsiCountFailedSelfTests(scsidev, 0);
2337: if (retval<0) {
2338: // no self-test log, turn off monitoring
2339: PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2340: cfg.selftest = false;
2341: state.selflogcount = 0;
2342: state.selfloghour = 0;
2343: }
2344: else {
2345: // register starting values to watch for changes
2346: state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2347: state.selfloghour =SELFTEST_ERRORHOURS(retval);
2348: }
2349: }
2350:
2351: // disable autosave (set GLTSD bit)
2352: if (cfg.autosave==1){
2353: if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2354: PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2355: else
2356: PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2357: }
2358:
2359: // or enable autosave (clear GLTSD bit)
2360: if (cfg.autosave==2){
2361: if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2362: PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2363: else
2364: PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2365: }
2366:
2367: // tell user we are registering device
2368: PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2369:
1.1.1.2 misho 2370: // Make sure that init_standby_check() ignores SCSI devices
2371: cfg.offlinests_ns = cfg.selfteststs_ns = false;
2372:
1.1 misho 2373: // close file descriptor
2374: CloseDevice(scsidev, device);
2375:
1.1.1.3 ! misho 2376: if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
! 2377: // Build file name for state file
! 2378: std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
! 2379: std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
! 2380: if (!state_path_prefix.empty()) {
! 2381: cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
! 2382: // Read previous state
! 2383: if (read_dev_state(cfg.state_file.c_str(), state)) {
! 2384: PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
! 2385: // Copy ATA attribute values to temp state
! 2386: state.update_temp_state();
! 2387: }
! 2388: }
! 2389: if (!attrlog_path_prefix.empty())
! 2390: cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
! 2391: }
! 2392:
1.1 misho 2393: finish_device_scan(cfg, state);
2394:
2395: return 0;
2396: }
2397:
2398: // If the self-test log has got more self-test errors (or more recent
2399: // self-test errors) recorded, then notify user.
2400: static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2401: {
2402: const char * name = cfg.name.c_str();
2403:
2404: if (newi<0)
2405: // command failed
2406: MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2407: else {
2408: reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2409:
2410: // old and new error counts
2411: int oldc=state.selflogcount;
2412: int newc=SELFTEST_ERRORCOUNT(newi);
2413:
2414: // old and new error timestamps in hours
2415: int oldh=state.selfloghour;
2416: int newh=SELFTEST_ERRORHOURS(newi);
2417:
2418: if (oldc<newc) {
2419: // increase in error count
2420: PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2421: name, oldc, newc);
2422: MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2423: name, oldc, newc);
2424: state.must_write = true;
2425: }
2426: else if (newc > 0 && oldh != newh) {
2427: // more recent error
2428: // a 'more recent' error might actually be a smaller hour number,
2429: // if the hour number has wrapped.
2430: // There's still a bug here. You might just happen to run a new test
2431: // exactly 32768 hours after the previous failure, and have run exactly
2432: // 20 tests between the two, in which case smartd will miss the
2433: // new failure.
2434: PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2435: name, newh);
1.1.1.3 ! misho 2436: MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
1.1 misho 2437: name, newh);
2438: state.must_write = true;
2439: }
2440:
2441: // Print info if error entries have disappeared
2442: // or newer successful successful extended self-test exits
2443: if (oldc > newc) {
2444: PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2445: name, oldc, newc);
2446: if (newc == 0)
2447: reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2448: }
2449:
2450: // Needed since self-test error count may DECREASE. Hour might
2451: // also have changed.
2452: state.selflogcount= newc;
2453: state.selfloghour = newh;
2454: }
2455: return;
2456: }
2457:
2458: // Test types, ordered by priority.
2459: static const char test_type_chars[] = "LncrSCO";
2460: static const unsigned num_test_types = sizeof(test_type_chars)-1;
2461:
2462: // returns test type if time to do test of type testtype,
2463: // 0 if not time to do test.
2464: static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2465: {
2466: // check that self-testing has been requested
2467: if (cfg.test_regex.empty())
2468: return 0;
2469:
2470: // Exit if drive not capable of any test
2471: if ( state.not_cap_long && state.not_cap_short &&
2472: (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2473: return 0;
2474:
2475: // since we are about to call localtime(), be sure glibc is informed
2476: // of any timezone changes we make.
2477: if (!usetime)
2478: FixGlibcTimeZoneBug();
2479:
2480: // Is it time for next check?
2481: time_t now = (!usetime ? time(0) : usetime);
2482: if (now < state.scheduled_test_next_check)
2483: return 0;
2484:
2485: // Limit time check interval to 90 days
2486: if (state.scheduled_test_next_check + (3600L*24*90) < now)
2487: state.scheduled_test_next_check = now - (3600L*24*90);
2488:
2489: // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2490: char testtype = 0;
2491: time_t testtime = 0; int testhour = 0;
2492: int maxtest = num_test_types-1;
2493:
2494: for (time_t t = state.scheduled_test_next_check; ; ) {
2495: struct tm * tms = localtime(&t);
2496: // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2497: int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2498: for (int i = 0; i <= maxtest; i++) {
2499: // Skip if drive not capable of this test
2500: switch (test_type_chars[i]) {
2501: case 'L': if (state.not_cap_long) continue; break;
2502: case 'S': if (state.not_cap_short) continue; break;
2503: case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2504: case 'O': if (scsi || state.not_cap_offline) continue; break;
2505: case 'c': case 'n':
2506: case 'r': if (scsi || state.not_cap_selective) continue; break;
2507: default: continue;
2508: }
2509: // Try match of "T/MM/DD/d/HH"
2510: char pattern[16];
2511: snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2512: test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2513: if (cfg.test_regex.full_match(pattern)) {
2514: // Test found
2515: testtype = pattern[0];
2516: testtime = t; testhour = tms->tm_hour;
2517: // Limit further matches to higher priority self-tests
2518: maxtest = i-1;
2519: break;
2520: }
2521: }
2522: // Exit if no tests left or current time reached
2523: if (maxtest < 0)
2524: break;
2525: if (t >= now)
2526: break;
2527: // Check next hour
2528: if ((t += 3600) > now)
2529: t = now;
2530: }
2531:
2532: // Do next check not before next hour.
2533: struct tm * tmnow = localtime(&now);
2534: state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2535:
2536: if (testtype) {
2537: state.must_write = true;
2538: // Tell user if an old test was found.
2539: if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2540: char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2541: PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2542: cfg.name.c_str(), testtype, datebuf);
2543: }
2544: }
2545:
2546: return testtype;
2547: }
2548:
2549: // Print a list of future tests.
2550: static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2551: {
2552: unsigned numdev = configs.size();
2553: if (!numdev)
2554: return;
2555: std::vector<int> testcnts(numdev * num_test_types, 0);
2556:
2557: PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2558:
2559: // FixGlibcTimeZoneBug(); // done in PrintOut()
2560: time_t now = time(0);
2561: char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2562: dateandtimezoneepoch(datenow, now);
2563:
2564: long seconds;
2565: for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2566: // Check for each device whether a test will be run
2567: time_t testtime = now + seconds;
2568: for (unsigned i = 0; i < numdev; i++) {
2569: const dev_config & cfg = configs.at(i);
2570: dev_state & state = states.at(i);
2571: const char * p;
2572: char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2573: if (testtype && (p = strchr(test_type_chars, testtype))) {
2574: unsigned t = (p - test_type_chars);
2575: // Report at most 5 tests of each type
2576: if (++testcnts[i*num_test_types + t] <= 5) {
2577: dateandtimezoneepoch(date, testtime);
2578: PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2579: testcnts[i*num_test_types + t], testtype, date);
2580: }
2581: }
2582: }
2583: }
2584:
2585: // Report totals
2586: dateandtimezoneepoch(date, now+seconds);
2587: PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2588: for (unsigned i = 0; i < numdev; i++) {
2589: const dev_config & cfg = configs.at(i);
2590: bool scsi = devices.at(i)->is_scsi();
2591: for (unsigned t = 0; t < num_test_types; t++) {
2592: int cnt = testcnts[i*num_test_types + t];
2593: if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2594: continue;
2595: PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2596: cnt, (cnt==1?"":"s"), test_type_chars[t]);
2597: }
2598: }
2599:
2600: }
2601:
2602: // Return zero on success, nonzero on failure. Perform offline (background)
2603: // short or long (extended) self test on given scsi device.
2604: static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2605: {
2606: int retval = 0;
2607: const char *testname = 0;
2608: const char *name = cfg.name.c_str();
2609: int inProgress;
2610:
2611: if (scsiSelfTestInProgress(device, &inProgress)) {
2612: PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2613: state.not_cap_short = state.not_cap_long = true;
2614: return 1;
2615: }
2616:
2617: if (1 == inProgress) {
2618: PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2619: "progress.\n", name);
2620: return 1;
2621: }
2622:
2623: switch (testtype) {
2624: case 'S':
2625: testname = "Short Self";
2626: retval = scsiSmartShortSelfTest(device);
2627: break;
2628: case 'L':
2629: testname = "Long Self";
2630: retval = scsiSmartExtendSelfTest(device);
2631: break;
2632: }
2633: // If we can't do the test, exit
2634: if (NULL == testname) {
2635: PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2636: testtype);
2637: return 1;
2638: }
2639: if (retval) {
2640: if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2641: (SIMPLE_ERR_BAD_FIELD == retval)) {
2642: PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2643: testname);
2644: if ('L'==testtype)
2645: state.not_cap_long = true;
2646: else
2647: state.not_cap_short = true;
2648:
2649: return 1;
2650: }
2651: PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2652: testname, retval);
2653: return 1;
2654: }
2655:
2656: PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2657:
2658: return 0;
2659: }
2660:
2661: // Do an offline immediate or self-test. Return zero on success,
2662: // nonzero on failure.
2663: static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2664: {
2665: const char *name = cfg.name.c_str();
2666:
2667: // Read current smart data and check status/capability
2668: struct ata_smart_values data;
2669: if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2670: PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2671: return 1;
2672: }
2673:
2674: // Check for capability to do the test
2675: int dotest = -1, mode = 0;
2676: const char *testname = 0;
2677: switch (testtype) {
2678: case 'O':
2679: testname="Offline Immediate ";
2680: if (isSupportExecuteOfflineImmediate(&data))
2681: dotest=OFFLINE_FULL_SCAN;
2682: else
2683: state.not_cap_offline = true;
2684: break;
2685: case 'C':
2686: testname="Conveyance Self-";
2687: if (isSupportConveyanceSelfTest(&data))
2688: dotest=CONVEYANCE_SELF_TEST;
2689: else
2690: state.not_cap_conveyance = true;
2691: break;
2692: case 'S':
2693: testname="Short Self-";
2694: if (isSupportSelfTest(&data))
2695: dotest=SHORT_SELF_TEST;
2696: else
2697: state.not_cap_short = true;
2698: break;
2699: case 'L':
2700: testname="Long Self-";
2701: if (isSupportSelfTest(&data))
2702: dotest=EXTEND_SELF_TEST;
2703: else
2704: state.not_cap_long = true;
2705: break;
2706:
2707: case 'c': case 'n': case 'r':
2708: testname = "Selective Self-";
2709: if (isSupportSelectiveSelfTest(&data)) {
2710: dotest = SELECTIVE_SELF_TEST;
2711: switch (testtype) {
2712: case 'c': mode = SEL_CONT; break;
2713: case 'n': mode = SEL_NEXT; break;
2714: case 'r': mode = SEL_REDO; break;
2715: }
2716: }
2717: else
2718: state.not_cap_selective = true;
2719: break;
2720: }
2721:
2722: // If we can't do the test, exit
2723: if (dotest<0) {
2724: PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2725: return 1;
2726: }
2727:
2728: // If currently running a self-test, do not interrupt it to start another.
2729: if (15==(data.self_test_exec_status >> 4)) {
1.1.1.3 ! misho 2730: if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
1.1 misho 2731: PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2732: "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2733: } else {
2734: PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2735: name, testname, (int)(data.self_test_exec_status & 0x0f));
2736: return 1;
2737: }
2738: }
2739:
2740: if (dotest == SELECTIVE_SELF_TEST) {
2741: // Set test span
2742: ata_selective_selftest_args selargs, prev_args;
2743: selargs.num_spans = 1;
2744: selargs.span[0].mode = mode;
2745: prev_args.num_spans = 1;
2746: prev_args.span[0].start = state.selective_test_last_start;
2747: prev_args.span[0].end = state.selective_test_last_end;
2748: if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2749: PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2750: return 1;
2751: }
2752: uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2753: PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %"PRIu64" - %"PRIu64" (%"PRIu64" sectors, %u%% - %u%% of disk).\n",
2754: name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2755: start, end, end - start + 1,
2756: (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2757: (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2758: state.selective_test_last_start = start;
2759: state.selective_test_last_end = end;
2760: }
2761:
2762: // execute the test, and return status
2763: int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2764: if (retval) {
2765: PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2766: return retval;
2767: }
2768:
1.1.1.2 misho 2769: // Report recent test start to do_disable_standby_check()
2770: // and force log of next test status
2771: if (testtype == 'O')
2772: state.offline_started = true;
2773: else
2774: state.selftest_started = true;
1.1 misho 2775:
2776: PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2777: return 0;
2778: }
2779:
2780: // Check pending sector count attribute values (-C, -U directives).
2781: static void check_pending(const dev_config & cfg, dev_state & state,
2782: unsigned char id, bool increase_only,
2783: const ata_smart_values & smartval,
2784: int mailtype, const char * msg)
2785: {
2786: // Find attribute index
2787: int i = ata_find_attr_index(id, smartval);
2788: if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
2789: return;
2790:
2791: // No report if no sectors pending.
2792: uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
2793: if (rawval == 0) {
2794: reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
2795: return;
2796: }
2797:
2798: // If attribute is not reset, report only sector count increases.
2799: uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
2800: if (!(!increase_only || prev_rawval < rawval))
2801: return;
2802:
2803: // Format message.
2804: std::string s = strprintf("Device: %s, %"PRId64" %s", cfg.name.c_str(), rawval, msg);
2805: if (prev_rawval > 0 && rawval != prev_rawval)
2806: s += strprintf(" (changed %+"PRId64")", rawval - prev_rawval);
2807:
2808: PrintOut(LOG_CRIT, "%s\n", s.c_str());
1.1.1.3 ! misho 2809: MailWarning(cfg, state, mailtype, "%s", s.c_str());
1.1 misho 2810: state.must_write = true;
2811: }
2812:
2813: // Format Temperature value
1.1.1.3 ! misho 2814: static const char * fmt_temp(unsigned char x, char (& buf)[20])
1.1 misho 2815: {
2816: if (!x) // unset
1.1.1.3 ! misho 2817: return "??";
! 2818: snprintf(buf, sizeof(buf), "%u", x);
1.1 misho 2819: return buf;
2820: }
2821:
2822: // Check Temperature limits
2823: static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
2824: {
2825: if (!(0 < currtemp && currtemp < 255)) {
2826: PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
2827: return;
2828: }
2829:
2830: // Update Max Temperature
2831: const char * minchg = "", * maxchg = "";
2832: if (currtemp > state.tempmax) {
2833: if (state.tempmax)
2834: maxchg = "!";
2835: state.tempmax = currtemp;
2836: state.must_write = true;
2837: }
2838:
2839: char buf[20];
2840: if (!state.temperature) {
2841: // First check
2842: if (!state.tempmin || currtemp < state.tempmin)
2843: // Delay Min Temperature update by ~ 30 minutes.
2844: state.tempmin_delay = time(0) + CHECKTIME - 60;
2845: PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
2846: cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
2847: if (triptemp)
2848: PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
2849: state.temperature = currtemp;
2850: }
2851: else {
2852: if (state.tempmin_delay) {
2853: // End Min Temperature update delay if ...
2854: if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
2855: || (state.tempmin_delay <= time(0))) { // or delay time is over.
2856: state.tempmin_delay = 0;
2857: if (!state.tempmin)
2858: state.tempmin = 255;
2859: }
2860: }
2861:
2862: // Update Min Temperature
2863: if (!state.tempmin_delay && currtemp < state.tempmin) {
2864: state.tempmin = currtemp;
2865: state.must_write = true;
2866: if (currtemp != state.temperature)
2867: minchg = "!";
2868: }
2869:
2870: // Track changes
2871: if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
2872: PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
2873: cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2874: state.temperature = currtemp;
2875: }
2876: }
2877:
2878: // Check limits
2879: if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
2880: PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2881: cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
1.1.1.3 ! misho 2882: MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
1.1 misho 2883: cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2884: }
2885: else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
2886: PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
2887: cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
2888: }
2889: else if (cfg.tempcrit) {
2890: unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
2891: if (currtemp < limit)
2892: reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
2893: }
2894: }
2895:
2896: // Check normalized and raw attribute values.
2897: static void check_attribute(const dev_config & cfg, dev_state & state,
2898: const ata_smart_attribute & attr,
2899: const ata_smart_attribute & prev,
2900: int attridx,
2901: const ata_smart_threshold_entry * thresholds)
2902: {
2903: // Check attribute and threshold
2904: ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
2905: if (attrstate == ATTRSTATE_NON_EXISTING)
2906: return;
2907:
2908: // If requested, check for usage attributes that have failed.
2909: if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
2910: && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
1.1.1.3 ! misho 2911: std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
1.1 misho 2912: PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
2913: MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
2914: state.must_write = true;
2915: }
2916:
2917: // Return if we're not tracking this type of attribute
2918: bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
2919: if (!( ( prefail && cfg.prefail)
2920: || (!prefail && cfg.usage )))
2921: return;
2922:
2923: // Return if '-I ID' was specified
2924: if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
2925: return;
2926:
2927: // Issue warning if they don't have the same ID in all structures.
2928: if (attr.id != prev.id) {
2929: PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
2930: cfg.name.c_str(), attr.id, prev.id);
2931: return;
2932: }
2933:
2934: // Compare normalized values if valid.
2935: bool valchanged = false;
2936: if (attrstate > ATTRSTATE_NO_NORMVAL) {
2937: if (attr.current != prev.current)
2938: valchanged = true;
2939: }
2940:
2941: // Compare raw values if requested.
2942: bool rawchanged = false;
2943: if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
2944: if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
2945: != ata_get_attr_raw_value(prev, cfg.attribute_defs))
2946: rawchanged = true;
2947: }
2948:
2949: // Return if no change
2950: if (!(valchanged || rawchanged))
2951: return;
2952:
2953: // Format value strings
2954: std::string currstr, prevstr;
2955: if (attrstate == ATTRSTATE_NO_NORMVAL) {
2956: // Print raw values only
2957: currstr = strprintf("%s (Raw)",
2958: ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2959: prevstr = strprintf("%s (Raw)",
2960: ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2961: }
2962: else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
2963: // Print normalized and raw values
2964: currstr = strprintf("%d [Raw %s]", attr.current,
2965: ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
2966: prevstr = strprintf("%d [Raw %s]", prev.current,
2967: ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
2968: }
2969: else {
2970: // Print normalized values only
2971: currstr = strprintf("%d", attr.current);
2972: prevstr = strprintf("%d", prev.current);
2973: }
2974:
2975: // Format message
2976: std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
2977: cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
1.1.1.3 ! misho 2978: ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
1.1 misho 2979: prevstr.c_str(), currstr.c_str());
2980:
2981: // Report this change as critical ?
2982: if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
2983: || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
2984: PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2985: MailWarning(cfg, state, 2, "%s", msg.c_str());
2986: }
2987: else {
2988: PrintOut(LOG_INFO, "%s\n", msg.c_str());
2989: }
2990: state.must_write = true;
2991: }
2992:
2993:
2994: static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
2995: bool firstpass, bool allow_selftests)
2996: {
2997: const char * name = cfg.name.c_str();
2998:
2999: // If user has asked, test the email warning system
3000: if (cfg.emailtest)
3001: MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3002:
3003: // if we can't open device, fail gracefully rather than hard --
3004: // perhaps the next time around we'll be able to open it. ATAPI
3005: // cd/dvd devices will hang awaiting media if O_NONBLOCK is not
3006: // given (see linux cdrom driver).
3007: if (!atadev->open()) {
3008: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, atadev->get_errmsg());
3009: MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3010: return 1;
3011: }
3012: if (debugmode)
3013: PrintOut(LOG_INFO,"Device: %s, opened ATA device\n", name);
3014: reset_warning_mail(cfg, state, 9, "open device worked again");
3015:
3016: // user may have requested (with the -n Directive) to leave the disk
3017: // alone if it is in idle or sleeping mode. In this case check the
3018: // power mode and exit without check if needed
3019: if (cfg.powermode && !state.powermodefail) {
3020: int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3021: const char * mode = 0;
3022: if (0 <= powermode && powermode < 0xff) {
3023: // wait for possible spin up and check again
3024: int powermode2;
3025: sleep(5);
3026: powermode2 = ataCheckPowerMode(atadev);
3027: if (powermode2 > powermode)
3028: PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3029: powermode = powermode2;
3030: }
3031:
3032: switch (powermode){
3033: case -1:
3034: // SLEEP
3035: mode="SLEEP";
3036: if (cfg.powermode>=1)
3037: dontcheck=1;
3038: break;
3039: case 0:
3040: // STANDBY
3041: mode="STANDBY";
3042: if (cfg.powermode>=2)
3043: dontcheck=1;
3044: break;
3045: case 0x80:
3046: // IDLE
3047: mode="IDLE";
3048: if (cfg.powermode>=3)
3049: dontcheck=1;
3050: break;
3051: case 0xff:
3052: // ACTIVE/IDLE
3053: mode="ACTIVE or IDLE";
3054: break;
3055: default:
3056: // UNKNOWN
3057: PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3058: name, powermode);
3059: state.powermodefail = true;
3060: break;
3061: }
3062:
3063: // if we are going to skip a check, return now
3064: if (dontcheck){
3065: // skip at most powerskipmax checks
3066: if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3067: CloseDevice(atadev, name);
3068: if (!state.powerskipcnt && !cfg.powerquiet) // report first only and avoid waking up system disk
3069: PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3070: state.powerskipcnt++;
3071: return 0;
3072: }
3073: else {
3074: PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3075: name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3076: }
3077: state.powerskipcnt = 0;
3078: state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3079: }
3080: else if (state.powerskipcnt) {
3081: PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3082: name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3083: state.powerskipcnt = 0;
3084: state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3085: }
3086: }
3087:
3088: // check smart status
3089: if (cfg.smartcheck) {
3090: int status=ataSmartStatus2(atadev);
3091: if (status==-1){
3092: PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3093: MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3094: state.must_write = true;
3095: }
3096: else if (status==1){
3097: PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3098: MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3099: state.must_write = true;
3100: }
3101: }
3102:
3103: // Check everything that depends upon SMART Data (eg, Attribute values)
3104: if ( cfg.usagefailed || cfg.prefail || cfg.usage
3105: || cfg.curr_pending_id || cfg.offl_pending_id
3106: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3107: || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3108:
3109: // Read current attribute values.
3110: ata_smart_values curval;
3111: if (ataReadSmartValues(atadev, &curval)){
3112: PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3113: MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3114: state.must_write = true;
3115: }
3116: else {
3117: reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3118:
3119: // look for current or offline pending sectors
3120: if (cfg.curr_pending_id)
3121: check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3122: (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3123: : "Total unreadable (pending) sectors" ));
3124:
3125: if (cfg.offl_pending_id)
3126: check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3127: (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3128: : "Total offline uncorrectable sectors"));
3129:
3130: // check temperature limits
3131: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3132: CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3133:
3134: // look for failed usage attributes, or track usage or prefail attributes
3135: if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3136: for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3137: check_attribute(cfg, state,
3138: curval.vendor_attributes[i],
3139: state.smartval.vendor_attributes[i],
3140: i, state.smartthres.thres_entries);
3141: }
3142: }
3143:
3144: // Log changes of offline data collection status
3145: if (cfg.offlinests) {
3146: if ( curval.offline_data_collection_status
3147: != state.smartval.offline_data_collection_status
1.1.1.2 misho 3148: || state.offline_started // test was started in previous call
1.1 misho 3149: || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3150: log_offline_data_coll_status(name, curval.offline_data_collection_status);
3151: }
3152:
3153: // Log changes of self-test execution status
3154: if (cfg.selfteststs) {
3155: if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
1.1.1.2 misho 3156: || state.selftest_started // test was started in previous call
1.1 misho 3157: || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3158: log_self_test_exec_status(name, curval.self_test_exec_status);
3159: }
3160:
3161: // Save the new values for the next time around
3162: state.smartval = curval;
3163: }
3164: }
1.1.1.2 misho 3165: state.offline_started = state.selftest_started = false;
1.1 misho 3166:
3167: // check if number of selftest errors has increased (note: may also DECREASE)
3168: if (cfg.selftest)
1.1.1.3 ! misho 3169: CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
1.1 misho 3170:
3171: // check if number of ATA errors has increased
3172: if (cfg.errorlog || cfg.xerrorlog) {
3173:
3174: int errcnt1 = -1, errcnt2 = -1;
3175: if (cfg.errorlog)
1.1.1.3 ! misho 3176: errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
1.1 misho 3177: if (cfg.xerrorlog)
1.1.1.3 ! misho 3178: errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
1.1 misho 3179:
3180: // new number of errors is max of both logs
3181: int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3182:
3183: // did command fail?
3184: if (newc<0)
3185: // lack of PrintOut here is INTENTIONAL
3186: MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3187:
3188: // has error count increased?
3189: int oldc = state.ataerrorcount;
3190: if (newc>oldc){
3191: PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3192: name, oldc, newc);
3193: MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3194: name, oldc, newc);
3195: state.must_write = true;
3196: }
3197:
3198: if (newc>=0)
3199: state.ataerrorcount=newc;
3200: }
3201:
3202: // if the user has asked, and device is capable (or we're not yet
3203: // sure) check whether a self test should be done now.
3204: if (allow_selftests && !cfg.test_regex.empty()) {
3205: char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3206: if (testtype)
3207: DoATASelfTest(cfg, state, atadev, testtype);
3208: }
3209:
3210: // Don't leave device open -- the OS/user may want to access it
3211: // before the next smartd cycle!
3212: CloseDevice(atadev, name);
3213:
3214: // Copy ATA attribute values to persistent state
3215: state.update_persistent_state();
3216:
3217: return 0;
3218: }
3219:
3220: static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3221: {
3222: UINT8 asc, ascq;
3223: UINT8 currenttemp;
3224: UINT8 triptemp;
1.1.1.3 ! misho 3225: UINT8 tBuf[252];
1.1 misho 3226: const char * name = cfg.name.c_str();
3227: const char *cp;
3228:
3229: // If the user has asked for it, test the email warning system
3230: if (cfg.emailtest)
3231: MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
3232:
3233: // if we can't open device, fail gracefully rather than hard --
3234: // perhaps the next time around we'll be able to open it
3235: if (!scsidev->open()) {
3236: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", name, scsidev->get_errmsg());
3237: MailWarning(cfg, state, 9, "Device: %s, unable to open device", name);
3238: return 1;
3239: } else if (debugmode)
3240: PrintOut(LOG_INFO,"Device: %s, opened SCSI device\n", name);
1.1.1.3 ! misho 3241: reset_warning_mail(cfg, state, 9, "open device worked again");
1.1 misho 3242: currenttemp = 0;
3243: asc = 0;
3244: ascq = 0;
3245: if (!state.SuppressReport) {
3246: if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3247: &asc, &ascq, ¤ttemp, &triptemp)) {
3248: PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3249: name);
3250: MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3251: state.SuppressReport = 1;
3252: }
3253: }
3254: if (asc > 0) {
3255: cp = scsiGetIEString(asc, ascq);
3256: if (cp) {
3257: PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3258: MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
1.1.1.3 ! misho 3259: } else if (asc == 4 && ascq == 9) {
! 3260: PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
1.1 misho 3261: } else if (debugmode)
3262: PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3263: name, (int)asc, (int)ascq);
3264: } else if (debugmode)
3265: PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3266:
3267: // check temperature limits
1.1.1.3 ! misho 3268: if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit || !cfg.attrlog_file.empty())
1.1 misho 3269: CheckTemperature(cfg, state, currenttemp, triptemp);
3270:
3271: // check if number of selftest errors has increased (note: may also DECREASE)
3272: if (cfg.selftest)
3273: CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3274:
3275: if (allow_selftests && !cfg.test_regex.empty()) {
3276: char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3277: if (testtype)
3278: DoSCSISelfTest(cfg, state, scsidev, testtype);
3279: }
1.1.1.3 ! misho 3280: if (!cfg.attrlog_file.empty()){
! 3281: // saving error counters to state
! 3282: if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
! 3283: READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
! 3284: scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
! 3285: state.scsi_error_counters[0].found=1;
! 3286: }
! 3287: if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
! 3288: WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
! 3289: scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
! 3290: state.scsi_error_counters[1].found=1;
! 3291: }
! 3292: if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
! 3293: VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
! 3294: scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
! 3295: state.scsi_error_counters[2].found=1;
! 3296: }
! 3297: if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
! 3298: NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
! 3299: scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
! 3300: state.scsi_nonmedium_error.found=1;
! 3301: }
! 3302: }
1.1 misho 3303: CloseDevice(scsidev, name);
3304: return 0;
3305: }
3306:
1.1.1.2 misho 3307: // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3308: static int standby_disable_state = 0;
3309:
3310: static void init_disable_standby_check(dev_config_vector & configs)
3311: {
3312: // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3313: bool sts1 = false, sts2 = false;
3314: for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3315: const dev_config & cfg = configs.at(i);
3316: if (cfg.offlinests_ns)
3317: sts1 = true;
3318: if (cfg.selfteststs_ns)
3319: sts2 = true;
3320: }
3321:
3322: // Check for support of disable auto standby
3323: // Reenable standby if smartd.conf was reread
3324: if (sts1 || sts2 || standby_disable_state == 3) {
3325: if (!smi()->disable_system_auto_standby(false)) {
3326: if (standby_disable_state == 3)
3327: PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3328: if (sts1 || sts2) {
3329: PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3330: (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3331: sts1 = sts2 = false;
3332: }
3333: }
3334: }
3335:
3336: standby_disable_state = (sts1 || sts2 ? 1 : 0);
3337: }
3338:
3339: static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3340: {
3341: if (!standby_disable_state)
3342: return;
3343:
3344: // Check for just started or still running self-tests
3345: bool running = false;
3346: for (unsigned i = 0; i < configs.size() && !running; i++) {
3347: const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3348:
3349: if ( ( cfg.offlinests_ns
3350: && (state.offline_started ||
3351: is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3352: || ( cfg.selfteststs_ns
3353: && (state.selftest_started ||
3354: is_self_test_in_progress(state.smartval.self_test_exec_status))) )
3355: running = true;
3356: // state.offline/selftest_started will be reset after next logging of test status
3357: }
3358:
3359: // Disable/enable auto standby and log state changes
3360: if (!running) {
3361: if (standby_disable_state != 1) {
3362: if (!smi()->disable_system_auto_standby(false))
3363: PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3364: smi()->get_errmsg());
3365: else
3366: PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3367: standby_disable_state = 1;
3368: }
3369: }
3370: else if (!smi()->disable_system_auto_standby(true)) {
3371: if (standby_disable_state != 2) {
3372: PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3373: smi()->get_errmsg());
3374: standby_disable_state = 2;
3375: }
3376: }
3377: else {
3378: if (standby_disable_state != 3) {
3379: PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3380: standby_disable_state = 3;
3381: }
3382: }
3383: }
3384:
1.1 misho 3385: // Checks the SMART status of all ATA and SCSI devices
3386: static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3387: smart_device_list & devices, bool firstpass, bool allow_selftests)
3388: {
3389: for (unsigned i = 0; i < configs.size(); i++) {
3390: const dev_config & cfg = configs.at(i);
3391: dev_state & state = states.at(i);
3392: smart_device * dev = devices.at(i);
3393: if (dev->is_ata())
3394: ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3395: else if (dev->is_scsi())
3396: SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3397: }
1.1.1.2 misho 3398:
3399: do_disable_standby_check(configs, states);
1.1 misho 3400: }
3401:
3402: // Set if Initialize() was called
3403: static bool is_initialized = false;
3404:
3405: // Does initialization right after fork to daemon mode
3406: static void Initialize(time_t *wakeuptime)
3407: {
3408: // Call Goodbye() on exit
3409: is_initialized = true;
3410:
3411: // write PID file
3412: if (!debugmode)
3413: WritePidFile();
3414:
3415: // install signal handlers. On Solaris, can't use signal() because
3416: // it resets the handler to SIG_DFL after each call. So use sigset()
3417: // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3418:
3419: // normal and abnormal exit
3420: if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3421: SIGNALFN(SIGTERM, SIG_IGN);
3422: if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3423: SIGNALFN(SIGQUIT, SIG_IGN);
3424:
3425: // in debug mode, <CONTROL-C> ==> HUP
3426: if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3427: SIGNALFN(SIGINT, SIG_IGN);
3428:
3429: // Catch HUP and USR1
3430: if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3431: SIGNALFN(SIGHUP, SIG_IGN);
3432: if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3433: SIGNALFN(SIGUSR1, SIG_IGN);
3434: #ifdef _WIN32
3435: if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3436: SIGNALFN(SIGUSR2, SIG_IGN);
3437: #endif
3438:
3439: // initialize wakeup time to CURRENT time
3440: *wakeuptime=time(NULL);
3441:
3442: return;
3443: }
3444:
3445: #ifdef _WIN32
3446: // Toggle debug mode implemented for native windows only
3447: // (there is no easy way to reopen tty on *nix)
3448: static void ToggleDebugMode()
3449: {
3450: if (!debugmode) {
3451: PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3452: if (!daemon_enable_console("smartd [Debug]")) {
3453: debugmode = 1;
3454: daemon_signal(SIGINT, HUPhandler);
3455: PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3456: }
3457: else
3458: PrintOut(LOG_INFO,"enable console failed\n");
3459: }
3460: else if (debugmode == 1) {
3461: daemon_disable_console();
3462: debugmode = 0;
3463: daemon_signal(SIGINT, sighandler);
3464: PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3465: }
3466: else
3467: PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3468: }
3469: #endif
3470:
3471: static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3472: {
3473: // If past wake-up-time, compute next wake-up-time
3474: time_t timenow=time(NULL);
3475: while (wakeuptime<=timenow){
3476: int intervals=1+(timenow-wakeuptime)/checktime;
3477: wakeuptime+=intervals*checktime;
3478: }
3479:
3480: // sleep until we catch SIGUSR1 or have completed sleeping
3481: int addtime = 0;
3482: while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3483:
3484: // protect user again system clock being adjusted backwards
3485: if (wakeuptime>timenow+checktime){
3486: PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3487: wakeuptime=timenow+checktime;
3488: }
3489:
3490: // Exit sleep when time interval has expired or a signal is received
3491: sleep(wakeuptime+addtime-timenow);
3492:
3493: #ifdef _WIN32
3494: // toggle debug mode?
3495: if (caughtsigUSR2) {
3496: ToggleDebugMode();
3497: caughtsigUSR2 = 0;
3498: }
3499: #endif
3500:
3501: timenow=time(NULL);
3502:
3503: // Actual sleep time too long?
3504: if (!addtime && timenow > wakeuptime+60) {
3505: if (debugmode)
3506: PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3507: (int)(timenow-wakeuptime));
3508: // Wait another 20 seconds to avoid I/O errors during disk spin-up
3509: addtime = timenow-wakeuptime+20;
3510: // Use next wake-up-time if close
3511: int nextcheck = checktime - addtime % checktime;
3512: if (nextcheck <= 20)
3513: addtime += nextcheck;
3514: }
3515: }
3516:
3517: // if we caught a SIGUSR1 then print message and clear signal
3518: if (caughtsigUSR1){
3519: PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3520: wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3521: caughtsigUSR1=0;
3522: sigwakeup = true;
3523: }
3524:
3525: // return adjusted wakeuptime
3526: return wakeuptime;
3527: }
3528:
3529: // Print out a list of valid arguments for the Directive d
3530: static void printoutvaliddirectiveargs(int priority, char d)
3531: {
3532: switch (d) {
3533: case 'n':
3534: PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3535: break;
3536: case 's':
3537: PrintOut(priority, "valid_regular_expression");
3538: break;
3539: case 'd':
3540: PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3541: break;
3542: case 'T':
3543: PrintOut(priority, "normal, permissive");
3544: break;
3545: case 'o':
3546: case 'S':
3547: PrintOut(priority, "on, off");
3548: break;
3549: case 'l':
3550: PrintOut(priority, "error, selftest");
3551: break;
3552: case 'M':
3553: PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3554: break;
3555: case 'v':
3556: PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3557: break;
3558: case 'P':
3559: PrintOut(priority, "use, ignore, show, showall");
3560: break;
3561: case 'F':
1.1.1.3 ! misho 3562: PrintOut(priority, "%s", get_valid_firmwarebug_args());
! 3563: break;
1.1.1.2 misho 3564: case 'e':
3565: PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], "
3566: "security-freeze, standby,[N|off], wcache,[on|off]");
1.1 misho 3567: break;
3568: }
3569: }
3570:
3571: // exits with an error message, or returns integer value of token
3572: static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3573: int min, int max, char * suffix = 0)
3574: {
3575: // make sure argument is there
3576: if (!arg) {
3577: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3578: cfgfile, lineno, name, token, min, max);
3579: return -1;
3580: }
3581:
3582: // get argument value (base 10), check that it's integer, and in-range
3583: char *endptr;
3584: int val = strtol(arg,&endptr,10);
3585:
3586: // optional suffix present?
3587: if (suffix) {
3588: if (!strcmp(endptr, suffix))
3589: endptr += strlen(suffix);
3590: else
3591: *suffix = 0;
3592: }
3593:
3594: if (!(!*endptr && min <= val && val <= max)) {
3595: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3596: cfgfile, lineno, name, token, arg, min, max);
3597: return -1;
3598: }
3599:
3600: // all is well; return value
3601: return val;
3602: }
3603:
3604:
3605: // Get 1-3 small integer(s) for '-W' directive
3606: static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3607: unsigned char *val1, unsigned char *val2, unsigned char *val3)
3608: {
3609: unsigned v1 = 0, v2 = 0, v3 = 0;
3610: int n1 = -1, n2 = -1, n3 = -1, len;
3611: if (!arg) {
3612: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3613: cfgfile, lineno, name, token);
3614: return -1;
3615: }
3616:
3617: len = strlen(arg);
3618: if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3619: && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3620: PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3621: cfgfile, lineno, name, token, arg);
3622: return -1;
3623: }
3624: *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3625: return 0;
3626: }
3627:
3628:
1.1.1.3 ! misho 3629: #ifdef _WIN32
! 3630:
! 3631: // Concatenate strtok() results if quoted with "..."
! 3632: static const char * strtok_dequote(const char * delimiters)
! 3633: {
! 3634: const char * t = strtok(0, delimiters);
! 3635: if (!t || t[0] != '"')
! 3636: return t;
! 3637:
! 3638: static std::string token;
! 3639: token = t+1;
! 3640: for (;;) {
! 3641: t = strtok(0, delimiters);
! 3642: if (!t || !*t)
! 3643: return "\"";
! 3644: token += ' ';
! 3645: int len = strlen(t);
! 3646: if (t[len-1] == '"') {
! 3647: token += std::string(t, len-1);
! 3648: break;
! 3649: }
! 3650: token += t;
! 3651: }
! 3652: return token.c_str();
! 3653: }
! 3654:
! 3655: #endif // _WIN32
! 3656:
! 3657:
1.1 misho 3658: // This function returns 1 if it has correctly parsed one token (and
3659: // any arguments), else zero if no tokens remain. It returns -1 if an
3660: // error was encountered.
3661: static int ParseToken(char * token, dev_config & cfg)
3662: {
3663: char sym;
3664: const char * name = cfg.name.c_str();
3665: int lineno=cfg.lineno;
3666: const char *delim = " \n\t";
3667: int badarg = 0;
3668: int missingarg = 0;
3669: const char *arg = 0;
3670:
3671: // is the rest of the line a comment
3672: if (*token=='#')
3673: return 1;
3674:
3675: // is the token not recognized?
3676: if (*token!='-' || strlen(token)!=2) {
3677: PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3678: configfile, lineno, name, token);
3679: PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3680: return -1;
3681: }
3682:
3683: // token we will be parsing:
3684: sym=token[1];
3685:
3686: // parse the token and swallow its argument
3687: int val;
3688: char plus[] = "+", excl[] = "!";
3689:
3690: switch (sym) {
3691: case 'C':
3692: // monitor current pending sector count (default 197)
3693: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3694: return -1;
3695: cfg.curr_pending_id = (unsigned char)val;
3696: cfg.curr_pending_incr = (*plus == '+');
3697: cfg.curr_pending_set = true;
3698: break;
3699: case 'U':
3700: // monitor offline uncorrectable sectors (default 198)
3701: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3702: return -1;
3703: cfg.offl_pending_id = (unsigned char)val;
3704: cfg.offl_pending_incr = (*plus == '+');
3705: cfg.offl_pending_set = true;
3706: break;
3707: case 'T':
3708: // Set tolerance level for SMART command failures
3709: if ((arg = strtok(NULL, delim)) == NULL) {
3710: missingarg = 1;
3711: } else if (!strcmp(arg, "normal")) {
3712: // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
3713: // not on failure of an optional S.M.A.R.T. command.
3714: // This is the default so we don't need to actually do anything here.
3715: cfg.permissive = false;
3716: } else if (!strcmp(arg, "permissive")) {
3717: // Permissive mode; ignore errors from Mandatory SMART commands
3718: cfg.permissive = true;
3719: } else {
3720: badarg = 1;
3721: }
3722: break;
3723: case 'd':
3724: // specify the device type
3725: if ((arg = strtok(NULL, delim)) == NULL) {
3726: missingarg = 1;
1.1.1.3 ! misho 3727: } else if (!strcmp(arg, "ignore")) {
! 3728: cfg.ignore = true;
1.1 misho 3729: } else if (!strcmp(arg, "removable")) {
3730: cfg.removable = true;
3731: } else if (!strcmp(arg, "auto")) {
3732: cfg.dev_type = "";
3733: } else {
3734: cfg.dev_type = arg;
3735: }
3736: break;
3737: case 'F':
3738: // fix firmware bug
1.1.1.3 ! misho 3739: if (!(arg = strtok(0, delim)))
1.1 misho 3740: missingarg = 1;
1.1.1.3 ! misho 3741: else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
1.1 misho 3742: badarg = 1;
3743: break;
3744: case 'H':
3745: // check SMART status
3746: cfg.smartcheck = true;
3747: break;
3748: case 'f':
3749: // check for failure of usage attributes
3750: cfg.usagefailed = true;
3751: break;
3752: case 't':
3753: // track changes in all vendor attributes
3754: cfg.prefail = true;
3755: cfg.usage = true;
3756: break;
3757: case 'p':
3758: // track changes in prefail vendor attributes
3759: cfg.prefail = true;
3760: break;
3761: case 'u':
3762: // track changes in usage vendor attributes
3763: cfg.usage = true;
3764: break;
3765: case 'l':
3766: // track changes in SMART logs
3767: if ((arg = strtok(NULL, delim)) == NULL) {
3768: missingarg = 1;
3769: } else if (!strcmp(arg, "selftest")) {
3770: // track changes in self-test log
3771: cfg.selftest = true;
3772: } else if (!strcmp(arg, "error")) {
3773: // track changes in ATA error log
3774: cfg.errorlog = true;
3775: } else if (!strcmp(arg, "xerror")) {
3776: // track changes in Extended Comprehensive SMART error log
3777: cfg.xerrorlog = true;
3778: } else if (!strcmp(arg, "offlinests")) {
3779: // track changes in offline data collection status
3780: cfg.offlinests = true;
1.1.1.2 misho 3781: } else if (!strcmp(arg, "offlinests,ns")) {
3782: // track changes in offline data collection status, disable auto standby
3783: cfg.offlinests = cfg.offlinests_ns = true;
1.1 misho 3784: } else if (!strcmp(arg, "selfteststs")) {
3785: // track changes in self-test execution status
3786: cfg.selfteststs = true;
1.1.1.2 misho 3787: } else if (!strcmp(arg, "selfteststs,ns")) {
3788: // track changes in self-test execution status, disable auto standby
3789: cfg.selfteststs = cfg.selfteststs_ns = true;
1.1 misho 3790: } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
3791: // set SCT Error Recovery Control
3792: unsigned rt = ~0, wt = ~0; int nc = -1;
3793: sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
3794: if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
3795: cfg.sct_erc_set = true;
3796: cfg.sct_erc_readtime = rt;
3797: cfg.sct_erc_writetime = wt;
3798: }
3799: else
3800: badarg = 1;
3801: } else {
3802: badarg = 1;
3803: }
3804: break;
3805: case 'a':
3806: // monitor everything
3807: cfg.smartcheck = true;
3808: cfg.prefail = true;
3809: cfg.usagefailed = true;
3810: cfg.usage = true;
3811: cfg.selftest = true;
3812: cfg.errorlog = true;
3813: cfg.selfteststs = true;
3814: break;
3815: case 'o':
3816: // automatic offline testing enable/disable
3817: if ((arg = strtok(NULL, delim)) == NULL) {
3818: missingarg = 1;
3819: } else if (!strcmp(arg, "on")) {
3820: cfg.autoofflinetest = 2;
3821: } else if (!strcmp(arg, "off")) {
3822: cfg.autoofflinetest = 1;
3823: } else {
3824: badarg = 1;
3825: }
3826: break;
3827: case 'n':
3828: // skip disk check if in idle or standby mode
3829: if (!(arg = strtok(NULL, delim)))
3830: missingarg = 1;
3831: else {
3832: char *endptr = NULL;
3833: char *next = strchr(const_cast<char*>(arg), ',');
3834:
3835: cfg.powerquiet = false;
3836: cfg.powerskipmax = 0;
3837:
3838: if (next!=NULL) *next='\0';
3839: if (!strcmp(arg, "never"))
3840: cfg.powermode = 0;
3841: else if (!strcmp(arg, "sleep"))
3842: cfg.powermode = 1;
3843: else if (!strcmp(arg, "standby"))
3844: cfg.powermode = 2;
3845: else if (!strcmp(arg, "idle"))
3846: cfg.powermode = 3;
3847: else
3848: badarg = 1;
3849:
3850: // if optional arguments are present
3851: if (!badarg && next!=NULL) {
3852: next++;
3853: cfg.powerskipmax = strtol(next, &endptr, 10);
3854: if (endptr == next)
3855: cfg.powerskipmax = 0;
3856: else {
3857: next = endptr + (*endptr != '\0');
3858: if (cfg.powerskipmax <= 0)
3859: badarg = 1;
3860: }
3861: if (*next != '\0') {
3862: if (!strcmp("q", next))
3863: cfg.powerquiet = true;
3864: else {
3865: badarg = 1;
3866: }
3867: }
3868: }
3869: }
3870: break;
3871: case 'S':
3872: // automatic attribute autosave enable/disable
3873: if ((arg = strtok(NULL, delim)) == NULL) {
3874: missingarg = 1;
3875: } else if (!strcmp(arg, "on")) {
3876: cfg.autosave = 2;
3877: } else if (!strcmp(arg, "off")) {
3878: cfg.autosave = 1;
3879: } else {
3880: badarg = 1;
3881: }
3882: break;
3883: case 's':
3884: // warn user, and delete any previously given -s REGEXP Directives
3885: if (!cfg.test_regex.empty()){
3886: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
3887: configfile, lineno, name, cfg.test_regex.get_pattern());
3888: cfg.test_regex = regular_expression();
3889: }
3890: // check for missing argument
3891: if (!(arg = strtok(NULL, delim))) {
3892: missingarg = 1;
3893: }
3894: // Compile regex
3895: else {
3896: if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
3897: // not a valid regular expression!
3898: PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
3899: configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
3900: return -1;
3901: }
3902: }
3903: // Do a bit of sanity checking and warn user if we think that
3904: // their regexp is "strange". User probably confused about shell
3905: // glob(3) syntax versus regular expression syntax regexp(7).
3906: if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
3907: PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
3908: configfile, lineno, name, val+1, arg[val], arg);
3909: break;
3910: case 'm':
3911: // send email to address that follows
3912: if (!(arg = strtok(NULL,delim)))
3913: missingarg = 1;
3914: else {
3915: if (!cfg.emailaddress.empty())
3916: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
3917: configfile, lineno, name, cfg.emailaddress.c_str());
1.1.1.3 ! misho 3918: #ifdef _WIN32
! 3919: if ( !strcmp(arg, "msgbox") || !strcmp(arg, "sysmsgbox")
! 3920: || str_starts_with(arg, "msgbox,") || str_starts_with(arg, "sysmsgbox,")) {
! 3921: cfg.emailaddress = "console";
! 3922: const char * arg2 = strchr(arg, ',');
! 3923: if (arg2)
! 3924: cfg.emailaddress += arg2;
! 3925: PrintOut(LOG_INFO, "File %s line %d (drive %s): Deprecated -m %s changed to -m %s\n",
! 3926: configfile, lineno, name, arg, cfg.emailaddress.c_str());
! 3927: }
! 3928: else
! 3929: #endif
1.1 misho 3930: cfg.emailaddress = arg;
3931: }
3932: break;
3933: case 'M':
3934: // email warning options
3935: if (!(arg = strtok(NULL, delim)))
3936: missingarg = 1;
3937: else if (!strcmp(arg, "once"))
3938: cfg.emailfreq = 1;
3939: else if (!strcmp(arg, "daily"))
3940: cfg.emailfreq = 2;
3941: else if (!strcmp(arg, "diminishing"))
3942: cfg.emailfreq = 3;
3943: else if (!strcmp(arg, "test"))
3944: cfg.emailtest = 1;
3945: else if (!strcmp(arg, "exec")) {
3946: // Get the next argument (the command line)
1.1.1.3 ! misho 3947: #ifdef _WIN32
! 3948: // Allow "/path name/with spaces/..." on Windows
! 3949: arg = strtok_dequote(delim);
! 3950: if (arg && arg[0] == '"') {
! 3951: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
! 3952: configfile, lineno, name, token);
! 3953: return -1;
! 3954: }
! 3955: #else
! 3956: arg = strtok(0, delim);
! 3957: #endif
! 3958: if (!arg) {
1.1 misho 3959: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
3960: configfile, lineno, name, token);
3961: return -1;
3962: }
3963: // Free the last cmd line given if any, and copy new one
3964: if (!cfg.emailcmdline.empty())
3965: PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
3966: configfile, lineno, name, cfg.emailcmdline.c_str());
3967: cfg.emailcmdline = arg;
3968: }
3969: else
3970: badarg = 1;
3971: break;
3972: case 'i':
3973: // ignore failure of usage attribute
3974: if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3975: return -1;
3976: cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
3977: break;
3978: case 'I':
3979: // ignore attribute for tracking purposes
3980: if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
3981: return -1;
3982: cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
3983: break;
3984: case 'r':
3985: // print raw value when tracking
3986: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3987: return -1;
3988: cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
3989: if (*excl == '!') // attribute change is critical
3990: cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
3991: break;
3992: case 'R':
3993: // track changes in raw value (forces printing of raw value)
3994: if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
3995: return -1;
3996: cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
3997: if (*excl == '!') // raw value change is critical
3998: cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
3999: break;
4000: case 'W':
4001: // track Temperature
4002: if ((val=Get3Integers(arg=strtok(NULL,delim), name, token, lineno, configfile,
4003: &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit))<0)
4004: return -1;
4005: break;
4006: case 'v':
4007: // non-default vendor-specific attribute meaning
4008: if (!(arg=strtok(NULL,delim))) {
4009: missingarg = 1;
4010: } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4011: badarg = 1;
4012: }
4013: break;
4014: case 'P':
4015: // Define use of drive-specific presets.
4016: if (!(arg = strtok(NULL, delim))) {
4017: missingarg = 1;
4018: } else if (!strcmp(arg, "use")) {
4019: cfg.ignorepresets = false;
4020: } else if (!strcmp(arg, "ignore")) {
4021: cfg.ignorepresets = true;
4022: } else if (!strcmp(arg, "show")) {
4023: cfg.showpresets = true;
4024: } else if (!strcmp(arg, "showall")) {
4025: showallpresets();
4026: } else {
4027: badarg = 1;
4028: }
4029: break;
1.1.1.2 misho 4030:
4031: case 'e':
4032: // Various ATA settings
4033: if (!(arg = strtok(NULL, delim))) {
4034: missingarg = true;
4035: }
4036: else {
4037: char arg2[16+1]; unsigned val;
4038: int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4039: if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4040: && (n1 == len || n2 > 0)) {
4041: bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4042: bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4043: if (n3 != len)
4044: val = ~0U;
4045:
4046: if (!strcmp(arg2, "aam")) {
4047: if (off)
4048: cfg.set_aam = -1;
4049: else if (val <= 254)
4050: cfg.set_aam = val + 1;
4051: else
4052: badarg = true;
4053: }
4054: else if (!strcmp(arg2, "apm")) {
4055: if (off)
4056: cfg.set_apm = -1;
4057: else if (1 <= val && val <= 254)
4058: cfg.set_apm = val + 1;
4059: else
4060: badarg = true;
4061: }
4062: else if (!strcmp(arg2, "lookahead")) {
4063: if (off)
4064: cfg.set_lookahead = -1;
4065: else if (on)
4066: cfg.set_lookahead = 1;
4067: else
4068: badarg = true;
4069: }
4070: else if (!strcmp(arg, "security-freeze")) {
4071: cfg.set_security_freeze = true;
4072: }
4073: else if (!strcmp(arg2, "standby")) {
4074: if (off)
4075: cfg.set_standby = 0 + 1;
4076: else if (val <= 255)
4077: cfg.set_standby = val + 1;
4078: else
4079: badarg = true;
4080: }
4081: else if (!strcmp(arg2, "wcache")) {
4082: if (off)
4083: cfg.set_wcache = -1;
4084: else if (on)
4085: cfg.set_wcache = 1;
4086: else
4087: badarg = true;
4088: }
4089: else
4090: badarg = true;
4091: }
4092: else
4093: badarg = true;
4094: }
4095: break;
4096:
1.1 misho 4097: default:
4098: // Directive not recognized
4099: PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4100: configfile, lineno, name, token);
4101: Directives();
4102: return -1;
4103: }
4104: if (missingarg) {
4105: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4106: configfile, lineno, name, token);
4107: }
4108: if (badarg) {
4109: PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4110: configfile, lineno, name, token, arg);
4111: }
4112: if (missingarg || badarg) {
4113: PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4114: printoutvaliddirectiveargs(LOG_CRIT, sym);
4115: PrintOut(LOG_CRIT, "\n");
4116: return -1;
4117: }
4118:
4119: return 1;
4120: }
4121:
4122: // Scan directive for configuration file
4123: #define SCANDIRECTIVE "DEVICESCAN"
4124:
4125: // This is the routine that adds things to the conf_entries list.
4126: //
4127: // Return values are:
4128: // 1: parsed a normal line
1.1.1.2 misho 4129: // 0: found DEFAULT setting or comment or blank line
1.1 misho 4130: // -1: found SCANDIRECTIVE line
4131: // -2: found an error
4132: //
4133: // Note: this routine modifies *line from the caller!
1.1.1.2 misho 4134: static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf, int lineno, /*const*/ char * line)
1.1 misho 4135: {
4136: const char *delim = " \n\t";
4137:
4138: // get first token: device name. If a comment, skip line
1.1.1.2 misho 4139: const char * name = strtok(line, delim);
4140: if (!name || *name == '#')
1.1 misho 4141: return 0;
4142:
1.1.1.2 misho 4143: // Check device name for DEFAULT or DEVICESCAN
4144: int retval;
4145: if (!strcmp("DEFAULT", name)) {
4146: retval = 0;
4147: // Restart with empty defaults
4148: default_conf = dev_config();
1.1 misho 4149: }
1.1.1.2 misho 4150: else {
4151: retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4152: // Init new entry with current defaults
4153: conf_entries.push_back(default_conf);
4154: }
4155: dev_config & cfg = (retval ? conf_entries.back() : default_conf);
1.1 misho 4156:
4157: cfg.name = name; // Later replaced by dev->get_info().info_name
4158: cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
1.1.1.2 misho 4159: cfg.lineno = lineno;
1.1 misho 4160:
4161: // parse tokens one at a time from the file.
1.1.1.2 misho 4162: while (char * token = strtok(0, delim)) {
4163: int rc = ParseToken(token, cfg);
4164: if (rc < 0)
1.1 misho 4165: // error found on the line
4166: return -2;
1.1.1.2 misho 4167:
4168: if (rc == 0)
4169: // No tokens left
4170: break;
4171:
4172: // PrintOut(LOG_INFO,"Parsed token %s\n",token);
1.1 misho 4173: }
1.1.1.2 misho 4174:
4175: // Don't perform checks below for DEFAULT entries
4176: if (retval == 0)
4177: return retval;
4178:
1.1 misho 4179: // If NO monitoring directives are set, then set all of them.
4180: if (!( cfg.smartcheck || cfg.selftest
4181: || cfg.errorlog || cfg.xerrorlog
4182: || cfg.offlinests || cfg.selfteststs
4183: || cfg.usagefailed || cfg.prefail || cfg.usage
4184: || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4185:
4186: PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4187: cfg.name.c_str(), cfg.lineno, configfile);
4188:
4189: cfg.smartcheck = true;
4190: cfg.usagefailed = true;
4191: cfg.prefail = true;
4192: cfg.usage = true;
4193: cfg.selftest = true;
4194: cfg.errorlog = true;
4195: cfg.selfteststs = true;
4196: }
4197:
4198: // additional sanity check. Has user set -M options without -m?
4199: if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4200: PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4201: cfg.name.c_str(), cfg.lineno, configfile);
4202: return -2;
4203: }
4204:
4205: // has the user has set <nomailer>?
4206: if (cfg.emailaddress == "<nomailer>") {
4207: // check that -M exec is also set
4208: if (cfg.emailcmdline.empty()){
4209: PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4210: cfg.name.c_str(), cfg.lineno, configfile);
4211: return -2;
4212: }
1.1.1.3 ! misho 4213: // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
1.1 misho 4214: cfg.emailaddress.clear();
4215: }
4216:
1.1.1.2 misho 4217: return retval;
1.1 misho 4218: }
4219:
4220: // Parses a configuration file. Return values are:
4221: // N=>0: found N entries
4222: // -1: syntax error in config file
4223: // -2: config file does not exist
4224: // -3: config file exists but cannot be read
4225: //
4226: // In the case where the return value is 0, there are three
4227: // possiblities:
4228: // Empty configuration file ==> conf_entries.empty()
4229: // No configuration file ==> conf_entries[0].lineno == 0
4230: // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4231: static int ParseConfigFile(dev_config_vector & conf_entries)
4232: {
4233: // maximum line length in configuration file
4234: const int MAXLINELEN = 256;
4235: // maximum length of a continued line in configuration file
4236: const int MAXCONTLINE = 1023;
4237:
4238: stdio_file f;
4239: // Open config file, if it exists and is not <stdin>
4240: if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4241: if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4242: // file exists but we can't read it or it should exist due to '-c' option
4243: int ret = (errno!=ENOENT ? -3 : -2);
4244: PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4245: strerror(errno),configfile);
4246: return ret;
4247: }
4248: }
4249: else // read from stdin ('-c -' option)
4250: f.open(stdin);
4251:
1.1.1.2 misho 4252: // Start with empty defaults
4253: dev_config default_conf;
4254:
1.1 misho 4255: // No configuration file found -- use fake one
4256: int entry = 0;
4257: if (!f) {
4258: char fakeconfig[] = SCANDIRECTIVE" -a"; // TODO: Remove this hack, build cfg_entry.
4259:
1.1.1.2 misho 4260: if (ParseConfigLine(conf_entries, default_conf, 0, fakeconfig) != -1)
1.1 misho 4261: throw std::logic_error("Internal error parsing "SCANDIRECTIVE);
4262: return 0;
4263: }
4264:
4265: #ifdef __CYGWIN__
4266: setmode(fileno(f), O_TEXT); // Allow files with \r\n
4267: #endif
4268:
4269: // configuration file exists
4270: PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4271:
4272: // parse config file line by line
4273: int lineno = 1, cont = 0, contlineno = 0;
4274: char line[MAXLINELEN+2];
4275: char fullline[MAXCONTLINE+1];
4276:
4277: for (;;) {
4278: int len=0,scandevice;
4279: char *lastslash;
4280: char *comment;
4281: char *code;
4282:
4283: // make debugging simpler
4284: memset(line,0,sizeof(line));
4285:
4286: // get a line
4287: code=fgets(line, MAXLINELEN+2, f);
4288:
4289: // are we at the end of the file?
4290: if (!code){
4291: if (cont) {
1.1.1.2 misho 4292: scandevice = ParseConfigLine(conf_entries, default_conf, contlineno, fullline);
1.1 misho 4293: // See if we found a SCANDIRECTIVE directive
4294: if (scandevice==-1)
4295: return 0;
4296: // did we find a syntax error
4297: if (scandevice==-2)
4298: return -1;
4299: // the final line is part of a continuation line
4300: cont=0;
4301: entry+=scandevice;
4302: }
4303: break;
4304: }
4305:
4306: // input file line number
4307: contlineno++;
4308:
4309: // See if line is too long
4310: len=strlen(line);
4311: if (len>MAXLINELEN){
4312: const char *warn;
4313: if (line[len-1]=='\n')
4314: warn="(including newline!) ";
4315: else
4316: warn="";
4317: PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4318: (int)contlineno,configfile,warn,(int)MAXLINELEN);
4319: return -1;
4320: }
4321:
4322: // Ignore anything after comment symbol
4323: if ((comment=strchr(line,'#'))){
4324: *comment='\0';
4325: len=strlen(line);
4326: }
4327:
4328: // is the total line (made of all continuation lines) too long?
4329: if (cont+len>MAXCONTLINE){
4330: PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4331: lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4332: return -1;
4333: }
4334:
4335: // copy string so far into fullline, and increment length
1.1.1.3 ! misho 4336: snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
1.1 misho 4337: cont+=len;
4338:
4339: // is this a continuation line. If so, replace \ by space and look at next line
4340: if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4341: *(fullline+(cont-len)+(lastslash-line))=' ';
4342: continue;
4343: }
4344:
4345: // Not a continuation line. Parse it
1.1.1.2 misho 4346: scandevice = ParseConfigLine(conf_entries, default_conf, contlineno, fullline);
1.1 misho 4347:
4348: // did we find a scandevice directive?
4349: if (scandevice==-1)
4350: return 0;
4351: // did we find a syntax error
4352: if (scandevice==-2)
4353: return -1;
4354:
4355: entry+=scandevice;
4356: lineno++;
4357: cont=0;
4358: }
4359:
4360: // note -- may be zero if syntax of file OK, but no valid entries!
4361: return entry;
4362: }
4363:
4364: /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4365: <LIST> is the list of valid arguments for option opt. */
4366: static void PrintValidArgs(char opt)
4367: {
4368: const char *s;
4369:
4370: PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4371: if (!(s = GetValidArgList(opt)))
4372: PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4373: else
4374: PrintOut(LOG_CRIT, "%s", (char *)s);
4375: PrintOut(LOG_CRIT, " <=======\n");
4376: }
4377:
4378: #ifndef _WIN32
4379: // Report error and exit if specified path is not absolute.
4380: static void check_abs_path(char option, const std::string & path)
4381: {
4382: if (path.empty() || path[0] == '/')
4383: return;
4384:
4385: debugmode = 1;
4386: PrintHead();
4387: PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4388: PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4389: EXIT(EXIT_BADCMD);
4390: }
4391: #endif // !_WIN32
4392:
4393: // Parses input line, prints usage message and
4394: // version/license/copyright messages
4395: static void ParseOpts(int argc, char **argv)
4396: {
1.1.1.3 ! misho 4397: // Init default path names
1.1 misho 4398: #ifndef _WIN32
4399: configfile = SMARTMONTOOLS_SYSCONFDIR"/smartd.conf";
1.1.1.3 ! misho 4400: warning_script = SMARTMONTOOLS_SYSCONFDIR"/smartd_warning.sh";
1.1 misho 4401: #else
1.1.1.3 ! misho 4402: std::string exedir = get_exe_dir();
! 4403: static std::string configfile_str = exedir + "/smartd.conf";
1.1 misho 4404: configfile = configfile_str.c_str();
1.1.1.3 ! misho 4405: warning_script = exedir + "/smartd_warning.cmd";
1.1 misho 4406: #endif
4407:
4408: // Please update GetValidArgList() if you edit shortopts
1.1.1.3 ! misho 4409: static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
1.1 misho 4410: #ifdef HAVE_LIBCAP_NG
4411: "C"
4412: #endif
4413: ;
4414: // Please update GetValidArgList() if you edit longopts
4415: struct option longopts[] = {
4416: { "configfile", required_argument, 0, 'c' },
4417: { "logfacility", required_argument, 0, 'l' },
4418: { "quit", required_argument, 0, 'q' },
4419: { "debug", no_argument, 0, 'd' },
4420: { "showdirectives", no_argument, 0, 'D' },
4421: { "interval", required_argument, 0, 'i' },
4422: #ifndef _WIN32
4423: { "no-fork", no_argument, 0, 'n' },
4424: #else
4425: { "service", no_argument, 0, 'n' },
4426: #endif
4427: { "pidfile", required_argument, 0, 'p' },
4428: { "report", required_argument, 0, 'r' },
4429: { "savestates", required_argument, 0, 's' },
4430: { "attributelog", required_argument, 0, 'A' },
4431: { "drivedb", required_argument, 0, 'B' },
1.1.1.3 ! misho 4432: { "warnexec", required_argument, 0, 'w' },
1.1 misho 4433: { "version", no_argument, 0, 'V' },
4434: { "license", no_argument, 0, 'V' },
4435: { "copyright", no_argument, 0, 'V' },
4436: { "help", no_argument, 0, 'h' },
4437: { "usage", no_argument, 0, 'h' },
4438: #ifdef HAVE_LIBCAP_NG
4439: { "capabilities", no_argument, 0, 'C' },
4440: #endif
4441: { 0, 0, 0, 0 }
4442: };
4443:
4444: opterr=optopt=0;
4445: bool badarg = false;
4446: bool no_defaultdb = false; // set true on '-B FILE'
4447:
4448: // Parse input options.
4449: int optchar;
4450: while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4451: char *arg;
4452: char *tailptr;
4453: long lchecktime;
4454:
4455: switch(optchar) {
4456: case 'q':
4457: // when to quit
4458: if (!(strcmp(optarg,"nodev"))) {
4459: quit=0;
4460: } else if (!(strcmp(optarg,"nodevstartup"))) {
4461: quit=1;
4462: } else if (!(strcmp(optarg,"never"))) {
4463: quit=2;
4464: } else if (!(strcmp(optarg,"onecheck"))) {
4465: quit=3;
4466: debugmode=1;
4467: } else if (!(strcmp(optarg,"showtests"))) {
4468: quit=4;
4469: debugmode=1;
4470: } else if (!(strcmp(optarg,"errors"))) {
4471: quit=5;
4472: } else {
4473: badarg = true;
4474: }
4475: break;
4476: case 'l':
4477: // set the log facility level
4478: if (!strcmp(optarg, "daemon"))
4479: facility=LOG_DAEMON;
4480: else if (!strcmp(optarg, "local0"))
4481: facility=LOG_LOCAL0;
4482: else if (!strcmp(optarg, "local1"))
4483: facility=LOG_LOCAL1;
4484: else if (!strcmp(optarg, "local2"))
4485: facility=LOG_LOCAL2;
4486: else if (!strcmp(optarg, "local3"))
4487: facility=LOG_LOCAL3;
4488: else if (!strcmp(optarg, "local4"))
4489: facility=LOG_LOCAL4;
4490: else if (!strcmp(optarg, "local5"))
4491: facility=LOG_LOCAL5;
4492: else if (!strcmp(optarg, "local6"))
4493: facility=LOG_LOCAL6;
4494: else if (!strcmp(optarg, "local7"))
4495: facility=LOG_LOCAL7;
4496: else
4497: badarg = true;
4498: break;
4499: case 'd':
4500: // enable debug mode
4501: debugmode = 1;
4502: break;
4503: case 'n':
4504: // don't fork()
4505: #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4506: do_fork = false;
4507: #endif
4508: break;
4509: case 'D':
4510: // print summary of all valid directives
4511: debugmode = 1;
4512: Directives();
4513: EXIT(0);
4514: break;
4515: case 'i':
4516: // Period (time interval) for checking
4517: // strtol will set errno in the event of overflow, so we'll check it.
4518: errno = 0;
4519: lchecktime = strtol(optarg, &tailptr, 10);
4520: if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4521: debugmode=1;
4522: PrintHead();
4523: PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4524: PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4525: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4526: EXIT(EXIT_BADCMD);
4527: }
4528: checktime = (int)lchecktime;
4529: break;
4530: case 'r':
4531: // report IOCTL transactions
4532: {
4533: int i;
4534: char *s;
4535:
4536: // split_report_arg() may modify its first argument string, so use a
4537: // copy of optarg in case we want optarg for an error message.
4538: if (!(s = strdup(optarg))) {
4539: PrintOut(LOG_CRIT, "No memory to process -r option - exiting\n");
4540: EXIT(EXIT_NOMEM);
4541: }
4542: if (split_report_arg(s, &i)) {
4543: badarg = true;
4544: } else if (i<1 || i>3) {
4545: debugmode=1;
4546: PrintHead();
4547: PrintOut(LOG_CRIT, "======> INVALID REPORT LEVEL: %s <=======\n", optarg);
4548: PrintOut(LOG_CRIT, "======> LEVEL MUST BE INTEGER BETWEEN 1 AND 3<=======\n");
4549: EXIT(EXIT_BADCMD);
4550: } else if (!strcmp(s,"ioctl")) {
4551: ata_debugmode = scsi_debugmode = i;
4552: } else if (!strcmp(s,"ataioctl")) {
4553: ata_debugmode = i;
4554: } else if (!strcmp(s,"scsiioctl")) {
4555: scsi_debugmode = i;
4556: } else {
4557: badarg = true;
4558: }
4559: free(s); // TODO: use std::string
4560: }
4561: break;
4562: case 'c':
4563: // alternate configuration file
4564: if (strcmp(optarg,"-"))
4565: configfile = (configfile_alt = optarg).c_str();
4566: else // read from stdin
4567: configfile=configfile_stdin;
4568: break;
4569: case 'p':
4570: // output file with PID number
4571: pid_file = optarg;
4572: break;
4573: case 's':
4574: // path prefix of persistent state file
4575: state_path_prefix = optarg;
4576: break;
4577: case 'A':
4578: // path prefix of attribute log file
4579: attrlog_path_prefix = optarg;
4580: break;
4581: case 'B':
4582: {
4583: const char * path = optarg;
4584: if (*path == '+' && path[1])
4585: path++;
4586: else
4587: no_defaultdb = true;
4588: unsigned char savedebug = debugmode; debugmode = 1;
4589: if (!read_drive_database(path))
4590: EXIT(EXIT_BADCMD);
4591: debugmode = savedebug;
4592: }
4593: break;
1.1.1.3 ! misho 4594: case 'w':
! 4595: warning_script = optarg;
! 4596: break;
1.1 misho 4597: case 'V':
4598: // print version and CVS info
4599: debugmode = 1;
4600: PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4601: EXIT(0);
4602: break;
4603: #ifdef HAVE_LIBCAP_NG
4604: case 'C':
4605: // enable capabilities
4606: enable_capabilities = true;
4607: break;
4608: #endif
4609: case 'h':
4610: // help: print summary of command-line options
4611: debugmode=1;
4612: PrintHead();
4613: Usage();
4614: EXIT(0);
4615: break;
4616: case '?':
4617: default:
4618: // unrecognized option
4619: debugmode=1;
4620: PrintHead();
4621: // Point arg to the argument in which this option was found.
4622: arg = argv[optind-1];
4623: // Check whether the option is a long option that doesn't map to -h.
4624: if (arg[1] == '-' && optchar != 'h') {
4625: // Iff optopt holds a valid option then argument must be missing.
4626: if (optopt && (strchr(shortopts, optopt) != NULL)) {
4627: PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4628: PrintValidArgs(optopt);
4629: } else {
4630: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4631: }
4632: PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4633: EXIT(EXIT_BADCMD);
4634: }
4635: if (optopt) {
4636: // Iff optopt holds a valid option then argument must be missing.
4637: if (strchr(shortopts, optopt) != NULL){
4638: PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4639: PrintValidArgs(optopt);
4640: } else {
4641: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4642: }
4643: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4644: EXIT(EXIT_BADCMD);
4645: }
4646: Usage();
4647: EXIT(0);
4648: }
4649:
4650: // Check to see if option had an unrecognized or incorrect argument.
4651: if (badarg) {
4652: debugmode=1;
4653: PrintHead();
4654: // It would be nice to print the actual option name given by the user
4655: // here, but we just print the short form. Please fix this if you know
4656: // a clean way to do it.
4657: PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4658: PrintValidArgs(optchar);
4659: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4660: EXIT(EXIT_BADCMD);
4661: }
4662: }
4663:
4664: // non-option arguments are not allowed
4665: if (argc > optind) {
4666: debugmode=1;
4667: PrintHead();
4668: PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4669: PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4670: EXIT(EXIT_BADCMD);
4671: }
4672:
4673: // no pidfile in debug mode
4674: if (debugmode && !pid_file.empty()) {
4675: debugmode=1;
4676: PrintHead();
4677: PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4678: PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4679: EXIT(EXIT_BADCMD);
4680: }
4681:
4682: #ifndef _WIN32
4683: if (!debugmode) {
4684: // absolute path names are required due to chdir('/') after fork().
4685: check_abs_path('p', pid_file);
4686: check_abs_path('s', state_path_prefix);
4687: check_abs_path('A', attrlog_path_prefix);
4688: }
4689: #endif
4690:
4691: // Read or init drive database
4692: if (!no_defaultdb) {
4693: unsigned char savedebug = debugmode; debugmode = 1;
4694: if (!read_default_drive_databases())
4695: EXIT(EXIT_BADCMD);
4696: debugmode = savedebug;
4697: }
4698:
4699: // print header
4700: PrintHead();
4701: }
4702:
4703: // Function we call if no configuration file was found or if the
4704: // SCANDIRECTIVE Directive was found. It makes entries for device
4705: // names returned by scan_smart_devices() in os_OSNAME.cpp
4706: static int MakeConfigEntries(const dev_config & base_cfg,
4707: dev_config_vector & conf_entries, smart_device_list & scanned_devs, const char * type)
4708: {
4709: // make list of devices
4710: smart_device_list devlist;
4711: if (!smi()->scan_smart_devices(devlist, (*type ? type : 0)))
4712: PrintOut(LOG_CRIT,"Problem creating device name scan list\n");
4713:
4714: // if no devices, or error constructing list, return
4715: if (devlist.size() <= 0)
4716: return 0;
4717:
4718: // add empty device slots for existing config entries
4719: while (scanned_devs.size() < conf_entries.size())
4720: scanned_devs.push_back((smart_device *)0);
4721:
4722: // loop over entries to create
4723: for (unsigned i = 0; i < devlist.size(); i++) {
4724: // Move device pointer
4725: smart_device * dev = devlist.release(i);
4726: scanned_devs.push_back(dev);
4727:
4728: // Copy configuration, update device and type name
4729: conf_entries.push_back(base_cfg);
4730: dev_config & cfg = conf_entries.back();
4731: cfg.name = dev->get_info().info_name;
4732: cfg.dev_name = dev->get_info().dev_name;
4733: cfg.dev_type = type;
4734: }
4735:
4736: return devlist.size();
4737: }
4738:
4739: static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
4740: {
4741: if (!debugmode && scandirective)
4742: return;
4743: if (line)
4744: PrintOut(scandirective?LOG_INFO:LOG_CRIT,
4745: "Unable to register %s device %s at line %d of file %s\n",
4746: type, name, line, configfile);
4747: else
4748: PrintOut(LOG_INFO,"Unable to register %s device %s\n",
4749: type, name);
4750: return;
4751: }
4752:
4753: // Returns negative value (see ParseConfigFile()) if config file
4754: // had errors, else number of entries which may be zero or positive.
4755: static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
4756: {
4757: // parse configuration file configfile (normally /etc/smartd.conf)
4758: int entries = ParseConfigFile(conf_entries);
4759:
4760: if (entries < 0) {
4761: // There was an error reading the configuration file.
4762: conf_entries.clear();
4763: if (entries == -1)
4764: PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
4765: return entries;
4766: }
4767:
4768: // no error parsing config file.
4769: if (entries) {
4770: // we did not find a SCANDIRECTIVE and did find valid entries
4771: PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
4772: }
4773: else if (!conf_entries.empty()) {
4774: // we found a SCANDIRECTIVE or there was no configuration file so
4775: // scan. Configuration file's last entry contains all options
4776: // that were set
4777: dev_config first = conf_entries.back();
4778: conf_entries.pop_back();
4779:
4780: if (first.lineno)
4781: PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
4782: else
4783: PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
4784:
4785: // make config list of devices to search for
4786: MakeConfigEntries(first, conf_entries, scanned_devs, first.dev_type.c_str());
4787:
4788: // warn user if scan table found no devices
4789: if (conf_entries.empty())
4790: PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
4791: }
4792: else
4793: PrintOut(LOG_CRIT,"Configuration file %s parsed but has no entries (like /dev/hda)\n",configfile);
4794:
4795: return conf_entries.size();
4796: }
4797:
1.1.1.3 ! misho 4798: // Return true if TYPE contains a RAID drive number
! 4799: static bool is_raid_type(const char * type)
! 4800: {
! 4801: if (str_starts_with(type, "sat,"))
! 4802: return false;
! 4803: int i;
! 4804: if (sscanf(type, "%*[^,],%d", &i) != 1)
! 4805: return false;
! 4806: return true;
! 4807: }
! 4808:
! 4809: // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
! 4810: static bool is_duplicate_device(const smart_device * dev,
! 4811: const smart_device_list & devices, unsigned numdevs,
! 4812: const dev_config_vector & ignored)
! 4813: {
! 4814: const smart_device::device_info & info1 = dev->get_info();
! 4815: bool is_raid1 = is_raid_type(info1.dev_type.c_str());
! 4816:
! 4817: for (unsigned i = 0; i < numdevs; i++) {
! 4818: const smart_device::device_info & info2 = devices.at(i)->get_info();
! 4819: // -d TYPE options must match if RAID drive number is specified
! 4820: if ( info1.dev_name == info2.dev_name
! 4821: && ( info1.dev_type == info2.dev_type
! 4822: || !is_raid1 || !is_raid_type(info2.dev_type.c_str())))
! 4823: return true;
! 4824: }
! 4825:
! 4826: for (unsigned i = 0; i < ignored.size(); i++) {
! 4827: const dev_config & cfg2 = ignored.at(i);
! 4828: if ( info1.dev_name == cfg2.dev_name
! 4829: && ( info1.dev_type == cfg2.dev_type
! 4830: || !is_raid1 || !is_raid_type(cfg2.dev_type.c_str())))
! 4831: return true;
! 4832: }
! 4833: return false;
! 4834: }
1.1 misho 4835:
4836: // This function tries devices from conf_entries. Each one that can be
4837: // registered is moved onto the [ata|scsi]devices lists and removed
4838: // from the conf_entries list.
4839: static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
4840: dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
4841: {
4842: // start by clearing lists/memory of ALL existing devices
4843: configs.clear();
4844: devices.clear();
4845: states.clear();
4846:
4847: // Register entries
1.1.1.3 ! misho 4848: dev_config_vector ignored_entries;
! 4849: unsigned numnoscan = 0;
1.1 misho 4850: for (unsigned i = 0; i < conf_entries.size(); i++){
4851:
4852: dev_config cfg = conf_entries[i];
4853:
1.1.1.3 ! misho 4854: if (cfg.ignore) {
! 4855: // Store for is_duplicate_device() check and ignore
! 4856: PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
! 4857: (!cfg.dev_type.empty() ? " [" : ""),
! 4858: cfg.dev_type.c_str(),
! 4859: (!cfg.dev_type.empty() ? "]" : ""));
! 4860: ignored_entries.push_back(cfg);
! 4861: continue;
! 4862: }
! 4863:
1.1 misho 4864: // get device of appropriate type
4865: smart_device_auto_ptr dev;
4866: bool scanning = false;
4867:
4868: // Device may already be detected during devicescan
4869: if (i < scanned_devs.size()) {
4870: dev = scanned_devs.release(i);
1.1.1.3 ! misho 4871: if (dev) {
! 4872: // Check for a preceding non-DEVICESCAN entry for the same device
! 4873: if ( (numnoscan || !ignored_entries.empty())
! 4874: && is_duplicate_device(dev.get(), devices, numnoscan, ignored_entries)) {
! 4875: PrintOut(LOG_INFO, "Device: %s, duplicate, ignored\n", dev->get_info_name());
! 4876: continue;
! 4877: }
1.1 misho 4878: scanning = true;
1.1.1.3 ! misho 4879: }
1.1 misho 4880: }
4881:
4882: if (!dev) {
4883: dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
4884: if (!dev) {
4885: if (cfg.dev_type.empty())
4886: PrintOut(LOG_INFO,"Device: %s, unable to autodetect device type\n", cfg.name.c_str());
4887: else
4888: PrintOut(LOG_INFO,"Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
4889: continue;
4890: }
4891: }
4892:
4893: // Save old info
4894: smart_device::device_info oldinfo = dev->get_info();
4895:
4896: // Open with autodetect support, may return 'better' device
4897: dev.replace( dev->autodetect_open() );
4898:
4899: // Report if type has changed
4900: if (oldinfo.dev_type != dev->get_dev_type())
4901: PrintOut(LOG_INFO,"Device: %s, type changed from '%s' to '%s'\n",
4902: cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
4903:
4904: if (!dev->is_open()) {
4905: // For linux+devfs, a nonexistent device gives a strange error
4906: // message. This makes the error message a bit more sensible.
4907: // If no debug and scanning - don't print errors
4908: if (debugmode || !scanning)
4909: PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
4910: continue;
4911: }
4912:
4913: // Update informal name
4914: cfg.name = dev->get_info().info_name;
4915: PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
4916:
4917: // Prepare initial state
4918: dev_state state;
4919:
4920: // register ATA devices
4921: if (dev->is_ata()){
4922: if (ATADeviceScan(cfg, state, dev->to_ata())) {
4923: CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
4924: dev.reset();
4925: }
4926: }
4927: // or register SCSI devices
4928: else if (dev->is_scsi()){
4929: if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
4930: CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
4931: dev.reset();
4932: }
4933: }
4934: else {
4935: PrintOut(LOG_INFO, "Device: %s, neither ATA nor SCSI device\n", cfg.name.c_str());
4936: dev.reset();
4937: }
4938:
4939: if (dev) {
4940: // move onto the list of devices
4941: configs.push_back(cfg);
4942: states.push_back(state);
4943: devices.push_back(dev);
1.1.1.3 ! misho 4944: if (!scanning)
! 4945: numnoscan = devices.size();
1.1 misho 4946: }
4947: // if device is explictly listed and we can't register it, then
4948: // exit unless the user has specified that the device is removable
4949: else if (!scanning) {
4950: if (cfg.removable || quit==2)
4951: PrintOut(LOG_INFO, "Device %s not available\n", cfg.name.c_str());
4952: else {
4953: PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
4954: EXIT(EXIT_BADDEV);
4955: }
4956: }
4957: }
1.1.1.2 misho 4958:
4959: init_disable_standby_check(configs);
1.1 misho 4960: }
4961:
4962:
4963: // Main program without exception handling
4964: static int main_worker(int argc, char **argv)
4965: {
4966: // Initialize interface
4967: smart_interface::init();
4968: if (!smi())
4969: return 1;
4970:
4971: // is it our first pass through?
4972: bool firstpass = true;
4973:
4974: // next time to wake up
4975: time_t wakeuptime = 0;
4976:
4977: // parse input and print header and usage info if needed
4978: ParseOpts(argc,argv);
4979:
4980: // Configuration for each device
4981: dev_config_vector configs;
4982: // Device states
4983: dev_state_vector states;
4984: // Devices to monitor
4985: smart_device_list devices;
4986:
4987: bool write_states_always = true;
4988:
4989: #ifdef HAVE_LIBCAP_NG
4990: // Drop capabilities
4991: if (enable_capabilities) {
4992: capng_clear(CAPNG_SELECT_BOTH);
4993: capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
4994: CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
4995: capng_apply(CAPNG_SELECT_BOTH);
4996: }
4997: #endif
4998:
4999: // the main loop of the code
5000: for (;;) {
5001:
5002: // are we exiting from a signal?
5003: if (caughtsigEXIT) {
5004: // are we exiting with SIGTERM?
5005: int isterm=(caughtsigEXIT==SIGTERM);
5006: int isquit=(caughtsigEXIT==SIGQUIT);
5007: int isok=debugmode?isterm || isquit:isterm;
5008:
5009: PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
5010: caughtsigEXIT, strsignal(caughtsigEXIT));
5011:
5012: if (!isok)
5013: return EXIT_SIGNAL;
5014:
5015: // Write state files
5016: if (!state_path_prefix.empty())
5017: write_all_dev_states(configs, states);
5018:
5019: return 0;
5020: }
5021:
5022: // Should we (re)read the config file?
5023: if (firstpass || caughtsigHUP){
5024: if (!firstpass) {
5025: // Write state files
5026: if (!state_path_prefix.empty())
5027: write_all_dev_states(configs, states);
5028:
5029: PrintOut(LOG_INFO,
5030: caughtsigHUP==1?
5031: "Signal HUP - rereading configuration file %s\n":
5032: "\a\nSignal INT - rereading configuration file %s ("SIGQUIT_KEYNAME" quits)\n\n",
5033: configfile);
5034: }
5035:
5036: {
5037: dev_config_vector conf_entries; // Entries read from smartd.conf
5038: smart_device_list scanned_devs; // Devices found during scan
5039: // (re)reads config file, makes >=0 entries
5040: int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5041:
5042: if (entries>=0) {
5043: // checks devices, then moves onto ata/scsi list or deallocates.
5044: RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
5045: if (!(configs.size() == devices.size() && configs.size() == states.size()))
5046: throw std::logic_error("Invalid result from RegisterDevices");
5047: }
5048: else if (quit==2 || ((quit==0 || quit==1) && !firstpass)) {
5049: // user has asked to continue on error in configuration file
5050: if (!firstpass)
5051: PrintOut(LOG_INFO,"Reusing previous configuration\n");
5052: }
5053: else {
5054: // exit with configuration file error status
5055: return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
5056: }
5057: }
5058:
5059: // Log number of devices we are monitoring...
5060: if (devices.size() > 0 || quit==2 || (quit==1 && !firstpass)) {
5061: int numata = 0;
5062: for (unsigned i = 0; i < devices.size(); i++) {
5063: if (devices.at(i)->is_ata())
5064: numata++;
5065: }
5066: PrintOut(LOG_INFO,"Monitoring %d ATA and %d SCSI devices\n",
5067: numata, devices.size() - numata);
5068: }
5069: else {
5070: PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
5071: return EXIT_NODEV;
5072: }
5073:
5074: if (quit==4) {
5075: // user has asked to print test schedule
5076: PrintTestSchedule(configs, states, devices);
5077: return 0;
5078: }
5079:
5080: #ifdef HAVE_LIBCAP_NG
5081: if (enable_capabilities) {
5082: for (unsigned i = 0; i < configs.size(); i++) {
5083: if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
5084: PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
5085: break;
5086: }
5087: }
5088: }
5089: #endif
5090:
5091: // reset signal
5092: caughtsigHUP=0;
5093:
5094: // Always write state files after (re)configuration
5095: write_states_always = true;
5096: }
5097:
5098: // check all devices once,
5099: // self tests are not started in first pass unless '-q onecheck' is specified
5100: CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit==3));
5101:
5102: // Write state files
5103: if (!state_path_prefix.empty())
5104: write_all_dev_states(configs, states, write_states_always);
5105: write_states_always = false;
5106:
5107: // Write attribute logs
5108: if (!attrlog_path_prefix.empty())
5109: write_all_dev_attrlogs(configs, states);
5110:
5111: // user has asked us to exit after first check
5112: if (quit==3) {
5113: PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices sucessfully checked once.\n"
5114: "smartd is exiting (exit status 0)\n");
5115: return 0;
5116: }
5117:
5118: // fork into background if needed
5119: if (firstpass && !debugmode) {
5120: DaemonInit();
5121: }
5122:
5123: // set exit and signal handlers, write PID file, set wake-up time
5124: if (firstpass){
5125: Initialize(&wakeuptime);
5126: firstpass = false;
5127: }
5128:
5129: // sleep until next check time, or a signal arrives
5130: wakeuptime = dosleep(wakeuptime, write_states_always);
5131: }
5132: }
5133:
5134:
5135: #ifndef _WIN32
5136: // Main program
5137: int main(int argc, char **argv)
5138: #else
5139: // Windows: internal main function started direct or by service control manager
5140: static int smartd_main(int argc, char **argv)
5141: #endif
5142: {
5143: int status;
5144: try {
5145: // Do the real work ...
5146: status = main_worker(argc, argv);
5147: }
5148: catch (int ex) {
5149: // EXIT(status) arrives here
5150: status = ex;
5151: }
5152: catch (const std::bad_alloc & /*ex*/) {
5153: // Memory allocation failed (also thrown by std::operator new)
5154: PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5155: status = EXIT_NOMEM;
5156: }
5157: catch (const std::exception & ex) {
5158: // Other fatal errors
5159: PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5160: status = EXIT_BADCODE;
5161: }
5162:
5163: if (is_initialized)
5164: status = Goodbye(status);
5165:
5166: #ifdef _WIN32
5167: daemon_winsvc_exitcode = status;
5168: #endif
5169: return status;
5170: }
5171:
5172:
5173: #ifdef _WIN32
5174: // Main function for Windows
5175: int main(int argc, char **argv){
5176: // Options for smartd windows service
5177: static const daemon_winsvc_options svc_opts = {
5178: "--service", // cmd_opt
5179: "smartd", "SmartD Service", // servicename, displayname
5180: // description
5181: "Controls and monitors storage devices using the Self-Monitoring, "
5182: "Analysis and Reporting Technology System (S.M.A.R.T.) "
5183: "built into ATA and SCSI Hard Drives. "
5184: PACKAGE_HOMEPAGE
5185: };
5186: // daemon_main() handles daemon and service specific commands
5187: // and starts smartd_main() direct, from a new process,
5188: // or via service control manager
5189: return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5190: }
5191: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>