Browse Source

Changed tuples to AttributeWarning instances

Replace test data with a sane smartctl report
master
Johann Schmitz 5 years ago
parent
commit
bd3a5b58a7
  1. 135
      src/smartcheck/check.py
  2. 18
      src/smartcheck/main.py
  3. 31
      tests/check.py
  4. 100
      tests/samples/ST2000NM0033-9ZM175.txt
  5. 2
      tests/samples/disks-min-max.yaml
  6. 3
      tests/samples/disks-min-or-max.yaml

135
src/smartcheck/check.py

@ -23,6 +23,53 @@ DATA_ATTRIBUTES_RE = re.compile(r"\s*(\d+)\s+([\w\d_\-]+)\s+([0-9a-fx]+)\s+(\d+)
TEST_RESULT_RE = re.compile(r"#\s*(\d+)\s+(.*?)\s{2,}(.*?)\s{2,}\s+([\d%]+)\s+(\d+)\s+(\d+|-)", re.UNICODE)
def toint(s, default=0):
try:
return int(s)
except ValueError:
return default
class AttributeWarning(object):
Notice = 'NOTICE'
Warning = 'WARNING'
Critical = 'CRITICAL'
FieldRawValue = 'RAW_VALUE'
FieldValue = 'VALUE'
def __init__(self, level=None, field=None, value=None, description=None):
self.level = level
self.field = field
self.value = value
self.description = description
@property
def short_message(self):
return "%s: %s=%s" % (self.level or '?', self.field, self.value)
@property
def long_message(self):
s = self.short_message
if self.description:
s += ": %s" % self.description
return s
def __str__(self):
return self.short_message
def __repr__(self):
return self.short_message
def __eq__(self, other):
return isinstance(other, AttributeWarning) and \
self.level is not None and self.level == other.level and \
self.field is not None and self.field == other.field and \
self.value is not None and self.value == other.value
class SMARTCheck(object):
def __init__(self, file_or_stream, db_path=None):
@ -118,7 +165,7 @@ class SMARTCheck(object):
if m:
d[k] = m.group(1).strip() if m.group(1) else ''
d['attributes'] = DATA_ATTRIBUTES_RE.findall(s)
d['attributes'] = sorted(DATA_ATTRIBUTES_RE.findall(s), key=lambda t: int(t[0]))
return d
@ -150,6 +197,81 @@ class SMARTCheck(object):
return not any([x[2] not in ok_test_results for x in self.self_tests['test_results']])
def check_attributes(self):
failed_attributes = self.check_generic_attributes()
if self.exists_in_database():
failed_attributes.update(self.check_device_attributes())
return failed_attributes
def check_generic_attributes(self):
failed_attributes = {}
for attrid, name, flag, value, worst, tresh, type, updated, when_failed, raw_value in self.smart_data['attributes']:
attr_name = (name or '').lower()
int_value = toint(value)
int_raw_value = toint(raw_value)
# these tests are take from gsmartcontrol (storage_property_descr.cpp) and check for known pre-fail attributes
if attr_name == 'reallocated_sector_count' and int_raw_value:
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The drive has a non-zero Raw value, but there is no SMART warning yet. " +
"This could be an indication of future failures and/or potential data loss in bad sectors.")
elif attr_name == 'spin_up_retry_count' and int_raw_value:
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The drive has a non-zero Raw value, but there is no SMART warning yet. " +
"Your drive may have problems spinning up, which could lead to a complete mechanical failure.")
elif attr_name == "soft_read_error_rate" and int_raw_value:
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The drive has a non-zero Raw value, but there is no SMART warning yet. " +
"This could be an indication of future failures and/or potential data loss in bad sectors.")
elif attr_name == "temperature_celsius" and (50 <= int_raw_value <= 120):
# Temperature (for some it may be 10xTemp, so limit the upper bound.)
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The temperature of the drive is higher than 50 degrees Celsius. " +
"This may shorten its lifespan and cause damage under severe load.")
elif attr_name == "temperature_celsius_x10" and int_raw_value > 500:
# Temperature (for some it may be 10xTemp, so limit the upper bound.)
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The temperature of the drive is higher than 50 degrees Celsius. " +
"This may shorten its lifespan and cause damage under severe load.")
elif attr_name == "reallocation_event_count" and int_raw_value:
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The drive has a non-zero Raw value, but there is no SMART warning yet. " +
"This could be an indication of future failures and/or potential data loss in bad sectors.")
elif attr_name in ("current_pending_sector_count", "total_pending_sectors") and int_raw_value:
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The drive has a non-zero Raw value, but there is no SMART warning yet. " +
"This could be an indication of future failures and/or potential data loss in bad sectors.")
elif attr_name in ("offline_uncorrectable", "total_offline_uncorrectable") and int_raw_value:
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The drive has a non-zero Raw value, but there is no SMART warning yet. " +
"This could be an indication of future failures and/or potential data loss in bad sectors.")
elif attr_name == "ssd_life_left" and int_value < 50:
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
AttributeWarning.FieldRawValue,
raw_value,
"The drive has less than half of its life left.")
return failed_attributes
def check_device_attributes(self):
device_model = self.device_model
device_db_attributes = self.get_attributes_from_database(device_model)
@ -157,9 +279,6 @@ class SMARTCheck(object):
threshold_to = re.compile('^:(\d+)$')
threshold_from_to = re.compile('^(\d+):(\d+)$')
if not device_db_attributes:
return {}
failed_attributes = {}
for attrid, name, flag, value, worst, tresh, type, updated, when_failed, raw_value in self.smart_data['attributes']:
@ -173,7 +292,7 @@ class SMARTCheck(object):
check_value = int(value if value_field == "VALUE" else raw_value)
if not (int(min_value) <= check_value <= int(max_value)):
failed_attributes[(attrid, name)] = ('CRITICAL', value_field, check_value)
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Critical, value_field, check_value)
elif isinstance(db_attrs, dict):
value_field = db_attrs.get('field', 'RAW_VALUE')
check_value = int(value if value_field == "VALUE" else raw_value)
@ -195,13 +314,13 @@ class SMARTCheck(object):
(to_m and check_value <= int(to_m.group(1))) or \
(from_to_m and (int(from_to_m.group(1)) <= check_value <= int(from_to_m.group(2)))):
failed_attributes[(attrid, name)] = (failure_type, value_field, check_value)
failed_attributes[(attrid, name)] = AttributeWarning(failure_type, value_field, check_value)
else:
if (min_value is not None and check_value >= int(min_value)) or \
(max_value is not None and check_value <= int(max_value)):
failed_attributes[(attrid, name)] = ('CRITICAL', value_field, check_value)
failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Critical, value_field, check_value)
else:
raise ValueError("Unknown attribute specification: %s" % db_attrs)
return failed_attributes
return failed_attributes

18
src/smartcheck/main.py

@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from argparse import ArgumentParser
import os
import sys
from smartcheck.check import SMARTCheck
DEFAULT_DATA_FILE=os.path.join(os.path.dirname(__file__), 'disks.yaml')
@ -8,15 +9,24 @@ DEFAULT_DATA_FILE=os.path.join(os.path.dirname(__file__), 'disks.yaml')
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('--data-file', default=DEFAULT_DATA_FILE)
parser.add_argument('-f', '--file', help="Use S.M.A.R.T. report from file instead of calling smartctl")
parser.add_argument('-f', '--file', help="Use S.M.A.R.T. report from file instead of calling smartctl (Use - to read from stdin)")
args = parser.parse_args()
check = SMARTCheck(open(args.file, 'r'), args.data_file)
stream = None
if args.file:
if args.file == '-':
stream = sys.stdin
else:
stream = open(args.file, 'r')
check = SMARTCheck(stream, args.data_file)
import pprint
#print(check.check_attributes())
print(check.device_model, check.exists_in_database())
print(check.information)
#print(check.device_model, check.exists_in_database())
#print(check.check())
print(check.check_tests())
#pprint.pprint(check.smart_data)
#pprint.pprint(check.self_tests)

31
tests/check.py

@ -3,7 +3,7 @@ from StringIO import StringIO
import unittest
import os
from smartcheck.check import SMARTCheck
from smartcheck.check import SMARTCheck, AttributeWarning
samples_path = os.path.join(os.path.dirname(__file__), 'samples')
db_path = os.path.join(samples_path, '../../src/smartcheck/disks.yaml')
@ -21,10 +21,10 @@ class CheckTest(unittest.TestCase):
self.assertFalse(check.check())
def test_smart_attributes_not_found(self):
check = SMARTCheck(open(os.path.join(samples_path, 'seagate-barracuda-broken1.txt')), db_path)
self.assertFalse(check.check_tests())
check = SMARTCheck(open(os.path.join(samples_path, 'ST2000NM0033-9ZM175.txt')), db_path)
self.assertTrue(check.check_tests())
self.assertDictEqual(check.check_attributes(), {}) # Attributes not found in disks.json
self.assertFalse(check.check())
self.assertTrue(check.check())
def test_smart_attributes_nothing_wrong(self):
check = SMARTCheck(open(os.path.join(samples_path, 'WDC-WD2000FYYZ-01UL1B1.txt')), db_path)
@ -34,33 +34,38 @@ class CheckTest(unittest.TestCase):
def test_smart_attributes_min_max(self):
# from list
check = SMARTCheck(open(os.path.join(samples_path, 'WDC-WD2000FYYZ-01UL1B1.txt')),
check = SMARTCheck(open(os.path.join(samples_path, 'ST2000NM0033-9ZM175.txt')),
os.path.join(samples_path, 'disks-min-max.yaml'))
self.assertTrue(check.check_tests())
self.assertDictEqual(check.check_attributes(), {
(9, 'Power_On_Hours'): ('CRITICAL', 'RAW_VALUE', 15360)
(9, 'Power_On_Hours'): AttributeWarning(AttributeWarning.Critical, 'RAW_VALUE', 16998)
})
self.assertFalse(check.check())
# from dict
check = SMARTCheck(open(os.path.join(samples_path, 'WDC-WD2000FYYZ-01UL1B1.txt')),
check = SMARTCheck(open(os.path.join(samples_path, 'ST2000NM0033-9ZM175.txt')),
os.path.join(samples_path, 'disks-min-or-max.yaml'))
self.assertTrue(check.check_tests())
self.assertDictEqual(check.check_attributes(), {
(9, 'Power_On_Hours'): ('CRITICAL', 'RAW_VALUE', 15360),
(194, 'Temperature_Celsius'): ('CRITICAL', 'RAW_VALUE', 43)
(9, 'Power_On_Hours'): AttributeWarning(AttributeWarning.Critical, 'RAW_VALUE', 16998),
(194, 'Temperature_Celsius'): AttributeWarning(AttributeWarning.Critical, 'VALUE', 30)
})
self.assertFalse(check.check())
def test_smart_attributes_thresholds_min(self):
for sample_file, expected_attributes in [
# only warning
('disks-thresholds.yaml', { (9, 'Power_On_Hours'): ('WARNING', 'RAW_VALUE', 15360)}),
('disks-thresholds.yaml', {
(9, 'Power_On_Hours'): AttributeWarning(AttributeWarning.Warning, 'RAW_VALUE', 15360)
}),
# warning and critical - critical wins
('disks-thresholds-warn-and-crit.yaml', { (9, 'Power_On_Hours'): ('CRITICAL', 'RAW_VALUE', 15360)}),
('disks-thresholds-warn-and-crit.yaml', {
(9, 'Power_On_Hours'): AttributeWarning(AttributeWarning.Critical, 'RAW_VALUE', 15360)
}),
# warning threshold with range
('disks-thresholds-range.yaml', { (4, 'Start_Stop_Count'): ('WARNING', 'RAW_VALUE', 2)})
('disks-thresholds-range.yaml', {
(4, 'Start_Stop_Count'): AttributeWarning(AttributeWarning.Warning, 'RAW_VALUE', 2)
})
]:
check = SMARTCheck(open(os.path.join(samples_path, 'WDC-WD2000FYYZ-01UL1B1.txt')), os.path.join(samples_path, sample_file))
self.assertTrue(check.check_tests())

100
tests/samples/ST2000NM0033-9ZM175.txt

@ -0,0 +1,100 @@
smartctl 6.1 2013-03-16 r3800 [x86_64-linux-3.10.17-gentoo] (local build)
Copyright (C) 2002-13, Bruce Allen, Christian Franke, www.smartmontools.org
=== START OF INFORMATION SECTION ===
Device Model: ST2000NM0033-9ZM175
Serial Number: Z1X12VQJ
LU WWN Device Id: 5 000c50 064ebae97
Firmware Version: SN03
User Capacity: 2,000,398,934,016 bytes [2.00 TB]
Sector Size: 512 bytes logical/physical
Rotation Rate: 7200 rpm
Device is: Not in smartctl database [for details use: -P showall]
ATA Version is: ACS-2 (minor revision not indicated)
SATA Version is: SATA 3.0, 6.0 Gb/s (current: 6.0 Gb/s)
Local Time is: Wed Oct 21 13:17:10 2015 CEST
SMART support is: Available - device has SMART capability.
SMART support is: Enabled
=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED
General SMART Values:
Offline data collection status: (0x82) Offline data collection activity
was completed without error.
Auto Offline Data Collection: Enabled.
Self-test execution status: ( 0) The previous self-test routine completed
without error or no self-test has ever
been run.
Total time to complete Offline
data collection: ( 584) seconds.
Offline data collection
capabilities: (0x7b) SMART execute Offline immediate.
Auto Offline data collection on/off support.
Suspend Offline collection upon new
command.
Offline surface scan supported.
Self-test supported.
Conveyance Self-test supported.
Selective Self-test supported.
SMART capabilities: (0x0003) Saves SMART data before entering
power-saving mode.
Supports SMART auto save timer.
Error logging capability: (0x01) Error logging supported.
General Purpose Logging supported.
Short self-test routine
recommended polling time: ( 1) minutes.
Extended self-test routine
recommended polling time: ( 244) minutes.
Conveyance self-test routine
recommended polling time: ( 2) minutes.
SCT capabilities: (0x50bd) SCT Status supported.
SCT Error Recovery Control supported.
SCT Feature Control supported.
SCT Data Table supported.
SMART Attributes Data Structure revision number: 10
Vendor Specific SMART Attributes with Thresholds:
ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
1 Raw_Read_Error_Rate 0x000f 081 063 044 Pre-fail Always - 122282989
3 Spin_Up_Time 0x0003 096 096 000 Pre-fail Always - 0
4 Start_Stop_Count 0x0032 100 100 020 Old_age Always - 8
5 Reallocated_Sector_Ct 0x0033 100 100 010 Pre-fail Always - 0
7 Seek_Error_Rate 0x000f 093 060 030 Pre-fail Always - 2245985704
9 Power_On_Hours 0x0032 081 081 000 Old_age Always - 16998
10 Spin_Retry_Count 0x0013 100 100 097 Pre-fail Always - 0
12 Power_Cycle_Count 0x0032 100 100 020 Old_age Always - 8
184 End-to-End_Error 0x0032 100 100 099 Old_age Always - 0
187 Reported_Uncorrect 0x0032 100 100 000 Old_age Always - 0
188 Command_Timeout 0x0032 100 100 000 Old_age Always - 0
189 High_Fly_Writes 0x003a 100 100 000 Old_age Always - 0
190 Airflow_Temperature_Cel 0x0022 070 062 045 Old_age Always - 30 (Min/Max 23/38)
191 G-Sense_Error_Rate 0x0032 100 100 000 Old_age Always - 0
192 Power-Off_Retract_Count 0x0032 100 100 000 Old_age Always - 5
193 Load_Cycle_Count 0x0032 100 100 000 Old_age Always - 746
194 Temperature_Celsius 0x0022 030 040 000 Old_age Always - 30 (0 22 0 0 0)
195 Hardware_ECC_Recovered 0x001a 054 015 000 Old_age Always - 122282989
197 Current_Pending_Sector 0x0012 100 100 000 Old_age Always - 0
198 Offline_Uncorrectable 0x0010 100 100 000 Old_age Offline - 0
199 UDMA_CRC_Error_Count 0x003e 200 200 000 Old_age Always - 0
SMART Error Log Version: 1
No Errors Logged
SMART Self-test log structure revision number 1
Num Test_Description Status Remaining LifeTime(hours) LBA_of_first_error
# 1 Extended offline Completed without error 00% 16905 -
# 2 Extended offline Completed without error 00% 16864 -
# 3 Extended offline Completed without error 00% 3 -
SMART Selective self-test log data structure revision number 1
SPAN MIN_LBA MAX_LBA CURRENT_TEST_STATUS
1 0 0 Not_testing
2 0 0 Not_testing
3 0 0 Not_testing
4 0 0 Not_testing
5 0 0 Not_testing
Selective self-test flags (0x0):
After scanning selected spans, do NOT read-scan remainder of disk.
If Selective self-test is pending on power-up, resume after 0 minute delay.

2
tests/samples/disks-min-max.yaml

@ -1,3 +1,3 @@
- model: "^WDC WD[234]000FYYZ-01UL1B[012]$"
- model: "^ST2000NM0033-9ZM175$"
attributes:
9: ["RAW_VALUE", 0, 10]

3
tests/samples/disks-min-or-max.yaml

@ -1,6 +1,7 @@
- model: "^WDC WD[234]000FYYZ-01UL1B[012]$"
- model: "^ST2000NM0033-9ZM175$"
attributes:
9:
min: 0
194:
field: 'VALUE'
max: 50
Loading…
Cancel
Save