Browse Source

Properly detect missing SMART support

Reported by Patrick Kuijvenhoven on https://github.com/ercpe/smart-check/issues/5.

Previously, we did not check the value of `SMART support is` in the information section of the smartctl output and relied entirely on the other sections.
Now we check that value too and report a failed check if the device (or maybe a raid controller) announces that the device has no SMART capability.
master
Johann Schmitz 8 months ago
parent
commit
724a9fc341
  1. 52
      smartcheck/__main__.py
  2. 31
      smartcheck/check.py
  3. 11
      tests/check.py
  4. 23
      tests/samples/AVAGO-MR9340-8i-on-megaraid-no-smart-support.txt

52
smartcheck/__main__.py

@ -86,31 +86,35 @@ def main():
check = SMARTCheck(stream, args.disks_file)
if check.data_parsed:
attribute_errors = check.check_attributes(ignore_attributes=args.ignore_attributes)
if args.exclude_notices:
for k in [x for x, y in attribute_errors.items() if y.level == AttributeWarning.Notice]:
del attribute_errors[k]
if attribute_errors:
msg = ', '.join([ae.long_message if args.verbose else ae.short_message for ae in attribute_errors.values()])
# set the exit code of NOTICEs to WARNINGs if (-n / --notices-are-warnings) is set
if args.notices_are_warnings and \
any((ae.level == AttributeWarning.Notice for ae in attribute_errors.values())):
exit_code = 1
if any((ae.level == AttributeWarning.Warning for ae in attribute_errors.values())):
exit_code = 1
if any((ae.level == AttributeWarning.Critical for ae in attribute_errors.values())):
if check.has_smart_support():
attribute_errors = check.check_attributes(ignore_attributes=args.ignore_attributes)
if args.exclude_notices:
for k in [x for x, y in attribute_errors.items() if y.level == AttributeWarning.Notice]:
del attribute_errors[k]
if attribute_errors:
msg = ', '.join([ae.long_message if args.verbose else ae.short_message for ae in attribute_errors.values()])
# set the exit code of NOTICEs to WARNINGs if (-n / --notices-are-warnings) is set
if args.notices_are_warnings and \
any((ae.level == AttributeWarning.Notice for ae in attribute_errors.values())):
exit_code = 1
if any((ae.level == AttributeWarning.Warning for ae in attribute_errors.values())):
exit_code = 1
if any((ae.level == AttributeWarning.Critical for ae in attribute_errors.values())):
exit_code = 2
if not check.check_tests(latest_only=args.accept_recovered_self_test):
msg = (msg.strip() + '; S.M.A.R.T. self test reported an error').lstrip(';').strip()
exit_code = 2
if not check.check_tests(latest_only=args.accept_recovered_self_test):
msg = (msg.strip() + '; S.M.A.R.T. self test reported an error').lstrip(';').strip()
exit_code = 2
if check.ata_error_count:
msg = (msg.strip() + '; %s ATA errors found' % check.ata_error_count).lstrip(';').strip()
if check.ata_error_count:
msg = (msg.strip() + '; %s ATA errors found' % check.ata_error_count).lstrip(';').strip()
exit_code = 2
else:
msg = 'Device is missing S.M.A.R.T. support'
exit_code = 2
if not exit_code and not msg:

31
smartcheck/check.py

@ -17,12 +17,13 @@ TESTS_SECTION_START = 'SMART Self-test log structure revision number'
ATA_ERROR_COUNT = re.compile('^ATA Error Count: (\d+).*', re.MULTILINE | re.IGNORECASE)
INFORMATION_RE = [
("model_family", re.compile('Model Family: (.*)', re.UNICODE)),
("device_model", re.compile("(?:Device Model|Product): (.*)", re.UNICODE)),
("serial", re.compile("Serial Number: (.*)", re.UNICODE | re.IGNORECASE)),
("firmware_version", re.compile("Firmware version: (.*)", re.UNICODE)),
("ata_version", re.compile("ATA Version is: (.*)", re.UNICODE)),
("sata_version", re.compile("SATA Version is: (.*)", re.UNICODE)),
('model_family', re.compile(r'Model Family: (.*)', re.UNICODE)),
('device_model', re.compile(r'(?:Device Model|Product): (.*)', re.UNICODE)),
('serial', re.compile(r'Serial Number: (.*)', re.UNICODE | re.IGNORECASE)),
('firmware_version', re.compile(r'Firmware version: (.*)', re.UNICODE)),
('ata_version', re.compile(r'ATA Version is: (.*)', re.UNICODE)),
('sata_version', re.compile(r'SATA Version is: (.*)', re.UNICODE)),
('smart_support', re.compile(r'SMART support is:\s+(.*)', re.UNICODE))
]
DATA_RE = [
@ -138,7 +139,7 @@ class SMARTCheck(object):
if self._database is None:
if self.db_path:
with open(self.db_path) as f:
self._database = yaml.load(f) or {}
self._database = yaml.load(f, Loader=yaml.SafeLoader) or {}
else:
self._database = []
return self._database
@ -148,7 +149,7 @@ class SMARTCheck(object):
if self._generic is None:
try:
with open(GENERIC_ATTRS_FILE) as f:
self._generic = yaml.load(f) or []
self._generic = yaml.load(f, Loader=yaml.SafeLoader) or []
except:
logger.exception("Could not read %s", GENERIC_ATTRS_FILE)
return self._generic
@ -246,7 +247,14 @@ class SMARTCheck(object):
return 0
def check(self, ignore_attributes=None):
return len(self.check_attributes(ignore_attributes or [])) == 0 and self.check_tests() and self.ata_error_count == 0
# 1. the device should announce smart support in the info section
# 2. no attributes should indicate an unhealthy device
# 3. no short/long smart tests should have failed
# 4. no ATA errors recorded
return self.has_smart_support() and \
len(self.check_attributes(ignore_attributes or [])) == 0 and \
self.check_tests() and \
self.ata_error_count == 0
def check_tests(self, latest_only=False):
ok_test_results = [
@ -384,3 +392,8 @@ class SMARTCheck(object):
raise ValueError("Unknown attribute specification: %s" % db_attrs)
return failed_attributes
def has_smart_support(self):
return self.information and (
self.information.get('smart_support') == 'Enabled' or self.information.get('smart_support', '').startswith('Available')
)

11
tests/check.py

@ -229,3 +229,14 @@ ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_
assert f(1) is False
assert f(2) is False
def test_smart_not_supported_behind_megaraid(self):
# https://github.com/ercpe/smart-check/issues/5
#
# Either the device does not support S.M.A.R.T. at all or the megaraid controller prevents
# smartctl from working correctly. Either way, make sure that we don't report an OK
# when the smartctl output reports no SMART capability.
with open(os.path.join(samples_path, 'AVAGO-MR9340-8i-on-megaraid-no-smart-support.txt')) as f:
check = SMARTCheck(f)
self.assertTrue(check.check_tests())
self.assertFalse(check.check())

23
tests/samples/AVAGO-MR9340-8i-on-megaraid-no-smart-support.txt

@ -0,0 +1,23 @@
smartctl 7.0 2018-12-30 r4883 [x86_64-linux-3.10.0-1127.10.1.el7.x86_64] (local build)
Copyright (C) 2002-18, Bruce Allen, Christian Franke, www.smartmontools.org
=== START OF INFORMATION SECTION ===
Vendor: AVAGO
Product: MR9340-8i
Revision: 4.62
Compliance: SPC-3
User Capacity: 4,000,225,165,312 bytes [4.00 TB]
Logical block size: 512 bytes
Logical Unit id: 0x600605b00bf860b026c6a4e8041f90d5
Serial number: 00d5901f04e8a4c626b060f80bb00506
Device type: disk
Local Time is: Thu Aug 13 10:10:30 2020 UTC
SMART support is: Unavailable - device lacks SMART capability.
=== START OF READ SMART DATA SECTION ===
Current Drive Temperature: 0 C
Drive Trip Temperature: 0 C
Error Counter logging not supported
Device does not support Self Test logging
Loading…
Cancel
Save