Browse Source

Added ATA error count parsing

tags/0.3
Johann Schmitz 3 years ago
parent
commit
92e54a0838
4 changed files with 222 additions and 1 deletions
  1. +13
    -1
      src/smartcheck/check.py
  2. +4
    -0
      src/smartcheck/main.py
  3. +13
    -0
      tests/check.py
  4. +192
    -0
      tests/samples/WDC-WD1000FYPS-01ZKB0.txt

+ 13
- 1
src/smartcheck/check.py View File

@@ -11,6 +11,7 @@ DEFAULT_DISKS_FILE=os.path.join(os.path.dirname(__file__), 'disks.yaml')
INFORMATION_SECTION_START = '=== START OF INFORMATION SECTION ==='
DATA_SECTION_START = '=== START OF READ SMART DATA SECTION ==='
TESTS_SECTION_START = 'SMART Self-test log structure revision number'
ATA_ERROR_COUNT = re.compile('^ATA Error Count: (\d+).*', re.MULTILINE | re.IGNORECASE)

INFORMATION_RE = [
("model_family", re.compile('Model Family: (.*)', re.UNICODE)),
@@ -121,6 +122,10 @@ class SMARTCheck(object):
def device_model(self):
return self.information['device_model']

@property
def ata_error_count(self):
return self.parsed['ata_error_count']

def exists_in_database(self):
return self.get_attributes_from_database(self.device_model) is not None

@@ -138,6 +143,7 @@ class SMARTCheck(object):
'information': self.parse_information_section(self.raw),
'data': self.parse_data_section(self.raw),
'self_tests': self.parse_tests_section(self.raw),
'ata_error_count': self.parse_ata_error_count(self.raw),
}

@property
@@ -198,8 +204,14 @@ class SMARTCheck(object):
'test_results': TEST_RESULT_RE.findall(tests_text)
}

def parse_ata_error_count(self, s):
m = ATA_ERROR_COUNT.search(s)
if m:
return int(m.group(1))
return 0

def check(self, ignore_attributes=None):
return len(self.check_attributes(ignore_attributes or [])) == 0 and self.check_tests()
return len(self.check_attributes(ignore_attributes or [])) == 0 and self.check_tests() and self.ata_error_count == 0

def check_tests(self):
ok_test_results = [


+ 4
- 0
src/smartcheck/main.py View File

@@ -89,6 +89,10 @@ if __name__ == "__main__":
msg = (msg.strip() + '; S.M.A.R.T. self test reported an error').lstrip(';').strip()
exit_code = 2

if check.ata_error_count:
msg = (msg.strip() + '; %s ATA errors found' % check.ata_error_count).lstrip(';').strip()
exit_code = 2

if not exit_code:
msg = "S.M.A.R.T. data OK"



+ 13
- 0
tests/check.py View File

@@ -12,12 +12,21 @@ class CheckTest(unittest.TestCase):
with open(os.path.join(samples_path, 'seagate-barracuda-broken1.txt')) as f:
check = SMARTCheck(f)
self.assertFalse(check.check_tests())
self.assertEqual(check.ata_error_count, 8)
self.assertFalse(check.check())

def test_check_broken2(self):
with open(os.path.join(samples_path, 'seagate-barracuda-broken2.txt')) as f:
check = SMARTCheck(f)
self.assertFalse(check.check_tests())
self.assertEqual(check.ata_error_count, 52)
self.assertFalse(check.check())

def test_check_broken3(self):
with open(os.path.join(samples_path, 'WDC-WD1000FYPS-01ZKB0.txt')) as f:
check = SMARTCheck(f)
self.assertTrue(check.check_tests()) # no test ran
self.assertEqual(check.ata_error_count, 32)
self.assertFalse(check.check())

def test_smart_attributes_not_found(self):
@@ -25,6 +34,7 @@ class CheckTest(unittest.TestCase):
check = SMARTCheck(f, db_path)
self.assertTrue(check.check_tests())
self.assertDictEqual(check.check_attributes(), {}) # Attributes not found in disks.json
self.assertEqual(check.ata_error_count, 0)
self.assertTrue(check.check())

def test_smart_attributes_nothing_wrong(self):
@@ -32,6 +42,7 @@ class CheckTest(unittest.TestCase):
check = SMARTCheck(f, db_path)
self.assertTrue(check.check_tests())
self.assertDictEqual(check.check_attributes(), {})
self.assertEqual(check.ata_error_count, 0)
self.assertTrue(check.check())

def test_smart_attributes_min_max(self):
@@ -42,6 +53,7 @@ class CheckTest(unittest.TestCase):
self.assertDictEqual(check.check_attributes(), {
(9, 'Power_On_Hours'): AttributeWarning(AttributeWarning.Critical, 'Power_On_Hours', 16998)
})
self.assertEqual(check.ata_error_count, 0)
self.assertFalse(check.check())

# from dict
@@ -52,6 +64,7 @@ class CheckTest(unittest.TestCase):
(9, 'Power_On_Hours'): AttributeWarning(AttributeWarning.Critical, 'Power_On_Hours', 16998),
(194, 'Temperature_Celsius'): AttributeWarning(AttributeWarning.Critical, 'Temperature_Celsius', 30)
})
self.assertEqual(check.ata_error_count, 0)
self.assertFalse(check.check())

def test_smart_attributes_thresholds_min(self):


+ 192
- 0
tests/samples/WDC-WD1000FYPS-01ZKB0.txt View File

@@ -0,0 +1,192 @@
smartctl 6.3 2014-07-26 r3976 [x86_64-linux-4.4.2-hardened-cit-2] (local build)
Copyright (C) 2002-14, Bruce Allen, Christian Franke, www.smartmontools.org

=== START OF INFORMATION SECTION ===
Model Family: Western Digital RE2-GP
Device Model: WDC WD1000FYPS-01ZKB0
Serial Number: WD-WCASJ0657732
LU WWN Device Id: 5 0014ee 2ab796fcb
Firmware Version: 02.01B01
User Capacity: 1,000,204,886,016 bytes [1.00 TB]
Sector Size: 512 bytes logical/physical
Rotation Rate: 5400 rpm
Device is: In smartctl database [for details use: -P show]
ATA Version is: ATA8-ACS (minor revision not indicated)
SATA Version is: SATA 2.5, 3.0 Gb/s
Local Time is: Mon May 2 08:40:23 2016 CEST
SMART support is: Available - device has SMART capability.
SMART support is: Enabled

=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED

General SMART Values:
Offline data collection status: (0x82) Offline data collection activity
was completed without error.
Auto Offline Data Collection: Enabled.
Self-test execution status: ( 0) The previous self-test routine completed
without error or no self-test has ever
been run.
Total time to complete Offline
data collection: (25560) seconds.
Offline data collection
capabilities: (0x7b) SMART execute Offline immediate.
Auto Offline data collection on/off support.
Suspend Offline collection upon new
command.
Offline surface scan supported.
Self-test supported.
Conveyance Self-test supported.
Selective Self-test supported.
SMART capabilities: (0x0003) Saves SMART data before entering
power-saving mode.
Supports SMART auto save timer.
Error logging capability: (0x01) Error logging supported.
General Purpose Logging supported.
Short self-test routine
recommended polling time: ( 2) minutes.
Extended self-test routine
recommended polling time: ( 293) minutes.
Conveyance self-test routine
recommended polling time: ( 5) minutes.
SCT capabilities: (0x303f) SCT Status supported.
SCT Error Recovery Control supported.
SCT Feature Control supported.
SCT Data Table supported.

SMART Attributes Data Structure revision number: 16
Vendor Specific SMART Attributes with Thresholds:
ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
1 Raw_Read_Error_Rate 0x000f 200 199 051 Pre-fail Always - 17
3 Spin_Up_Time 0x0003 180 177 021 Pre-fail Always - 7966
4 Start_Stop_Count 0x0032 100 100 000 Old_age Always - 97
5 Reallocated_Sector_Ct 0x0033 200 200 140 Pre-fail Always - 0
7 Seek_Error_Rate 0x000e 100 253 000 Old_age Always - 0
9 Power_On_Hours 0x0032 026 026 000 Old_age Always - 54234
10 Spin_Retry_Count 0x0012 100 253 000 Old_age Always - 0
11 Calibration_Retry_Count 0x0012 100 253 000 Old_age Always - 0
12 Power_Cycle_Count 0x0032 100 100 000 Old_age Always - 97
192 Power-Off_Retract_Count 0x0032 200 200 000 Old_age Always - 332
193 Load_Cycle_Count 0x0032 001 001 000 Old_age Always - 1643266
194 Temperature_Celsius 0x0022 122 094 000 Old_age Always - 30
196 Reallocated_Event_Count 0x0032 200 200 000 Old_age Always - 0
197 Current_Pending_Sector 0x0012 200 200 000 Old_age Always - 4
198 Offline_Uncorrectable 0x0010 200 200 000 Old_age Offline - 4
199 UDMA_CRC_Error_Count 0x003e 200 200 000 Old_age Always - 0
200 Multi_Zone_Error_Rate 0x0008 200 200 000 Old_age Offline - 0

SMART Error Log Version: 1
ATA Error Count: 32 (device log contains only the most recent five errors)
CR = Command Register [HEX]
FR = Features Register [HEX]
SC = Sector Count Register [HEX]
SN = Sector Number Register [HEX]
CL = Cylinder Low Register [HEX]
CH = Cylinder High Register [HEX]
DH = Device/Head Register [HEX]
DC = Device Command Register [HEX]
ER = Error register [HEX]
ST = Status register [HEX]
Powered_Up_Time is measured from power on, and printed as
DDd+hh:mm:SS.sss where DD=days, hh=hours, mm=minutes,
SS=sec, and sss=millisec. It "wraps" after 49.710 days.

Error 32 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
When the command that caused the error occurred, the device was active or idle.

After command completion occurred, registers were:
ER ST SC SN CL CH DH
-- -- -- -- -- -- --
40 51 00 ca 1a 40 40 Error: UNC at LBA = 0x00401aca = 4201162

Commands leading to the command that caused the error were:
CR FR SC SN CL CH DH DC Powered_Up_Time Command/Feature_Name
-- -- -- -- -- -- -- -- ---------------- --------------------
60 00 00 80 1a 40 73 08 15d+02:47:11.908 READ FPDMA QUEUED
ef aa 00 00 00 00 00 08 15d+02:47:11.908 SET FEATURES [Enable read look-ahead]
ef 03 46 00 00 00 00 08 15d+02:47:11.908 SET FEATURES [Set transfer mode]
ef 02 00 00 00 00 00 08 15d+02:47:11.908 SET FEATURES [Enable write cache]
ef 03 0c 00 00 00 00 08 15d+02:47:11.908 SET FEATURES [Set transfer mode]

Error 31 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
When the command that caused the error occurred, the device was active or idle.

After command completion occurred, registers were:
ER ST SC SN CL CH DH
-- -- -- -- -- -- --
40 51 00 ca 1a 40 40 Error: UNC at LBA = 0x00401aca = 4201162

Commands leading to the command that caused the error were:
CR FR SC SN CL CH DH DC Powered_Up_Time Command/Feature_Name
-- -- -- -- -- -- -- -- ---------------- --------------------
60 00 68 00 51 40 73 08 15d+02:47:06.731 READ FPDMA QUEUED
60 00 60 00 50 40 73 08 15d+02:47:06.731 READ FPDMA QUEUED
60 00 58 00 4f 40 73 08 15d+02:47:06.731 READ FPDMA QUEUED
60 38 50 90 e3 3b 3a 08 15d+02:47:06.731 READ FPDMA QUEUED
60 80 48 80 1e 40 73 08 15d+02:47:06.730 READ FPDMA QUEUED

Error 30 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
When the command that caused the error occurred, the device was active or idle.

After command completion occurred, registers were:
ER ST SC SN CL CH DH
-- -- -- -- -- -- --
40 51 00 95 13 40 40 Error: UNC at LBA = 0x00401395 = 4199317

Commands leading to the command that caused the error were:
CR FR SC SN CL CH DH DC Powered_Up_Time Command/Feature_Name
-- -- -- -- -- -- -- -- ---------------- --------------------
60 00 00 80 13 40 73 08 15d+02:47:01.722 READ FPDMA QUEUED
ef aa 00 00 00 00 00 08 15d+02:47:01.722 SET FEATURES [Enable read look-ahead]
ef 03 46 00 00 00 00 08 15d+02:47:01.722 SET FEATURES [Set transfer mode]
ef 02 00 00 00 00 00 08 15d+02:47:01.722 SET FEATURES [Enable write cache]
ef 03 0c 00 00 00 00 08 15d+02:47:01.722 SET FEATURES [Set transfer mode]

Error 29 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
When the command that caused the error occurred, the device was active or idle.

After command completion occurred, registers were:
ER ST SC SN CL CH DH
-- -- -- -- -- -- --
40 51 00 95 13 40 40 Error: UNC at LBA = 0x00401395 = 4199317

Commands leading to the command that caused the error were:
CR FR SC SN CL CH DH DC Powered_Up_Time Command/Feature_Name
-- -- -- -- -- -- -- -- ---------------- --------------------
60 00 e8 00 51 40 73 08 15d+02:46:55.570 READ FPDMA QUEUED
60 00 e0 00 50 40 73 08 15d+02:46:55.570 READ FPDMA QUEUED
60 00 d8 00 4f 40 73 08 15d+02:46:55.570 READ FPDMA QUEUED
60 38 d0 90 e3 3b 3a 08 15d+02:46:55.570 READ FPDMA QUEUED
60 80 c8 80 1e 40 73 08 15d+02:46:55.570 READ FPDMA QUEUED

Error 28 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
When the command that caused the error occurred, the device was active or idle.

After command completion occurred, registers were:
ER ST SC SN CL CH DH
-- -- -- -- -- -- --
40 51 00 e9 04 40 40 Error: UNC at LBA = 0x004004e9 = 4195561

Commands leading to the command that caused the error were:
CR FR SC SN CL CH DH DC Powered_Up_Time Command/Feature_Name
-- -- -- -- -- -- -- -- ---------------- --------------------
60 00 00 80 04 40 73 08 15d+02:46:50.338 READ FPDMA QUEUED
ef aa 00 00 00 00 00 08 15d+02:46:50.338 SET FEATURES [Enable read look-ahead]
ef 03 46 00 00 00 00 08 15d+02:46:50.338 SET FEATURES [Set transfer mode]
ef 02 00 00 00 00 00 08 15d+02:46:50.338 SET FEATURES [Enable write cache]
ef 03 0c 00 00 00 00 08 15d+02:46:50.338 SET FEATURES [Set transfer mode]

SMART Self-test log structure revision number 1
Num Test_Description Status Remaining LifeTime(hours) LBA_of_first_error
# 1 Extended offline Completed without error 00% 52852 -

SMART Selective self-test log data structure revision number 1
SPAN MIN_LBA MAX_LBA CURRENT_TEST_STATUS
1 0 0 Not_testing
2 0 0 Not_testing
3 0 0 Not_testing
4 0 0 Not_testing
5 0 0 Not_testing
Selective self-test flags (0x0):
After scanning selected spans, do NOT read-scan remainder of disk.
If Selective self-test is pending on power-up, resume after 0 minute delay.

Loading…
Cancel
Save