Browse Source

Added ATA error count parsing

tags/0.3
Johann Schmitz 3 years ago
parent
commit
92e54a0838
4 changed files with 222 additions and 1 deletions
  1. 13
    1
      src/smartcheck/check.py
  2. 4
    0
      src/smartcheck/main.py
  3. 13
    0
      tests/check.py
  4. 192
    0
      tests/samples/WDC-WD1000FYPS-01ZKB0.txt

+ 13
- 1
src/smartcheck/check.py View File

@@ -11,6 +11,7 @@ DEFAULT_DISKS_FILE=os.path.join(os.path.dirname(__file__), 'disks.yaml')
11 11
 INFORMATION_SECTION_START = '=== START OF INFORMATION SECTION ==='
12 12
 DATA_SECTION_START = '=== START OF READ SMART DATA SECTION ==='
13 13
 TESTS_SECTION_START = 'SMART Self-test log structure revision number'
14
+ATA_ERROR_COUNT = re.compile('^ATA Error Count: (\d+).*', re.MULTILINE | re.IGNORECASE)
14 15
 
15 16
 INFORMATION_RE = [
16 17
 	("model_family", re.compile('Model Family: (.*)', re.UNICODE)),
@@ -121,6 +122,10 @@ class SMARTCheck(object):
121 122
 	def device_model(self):
122 123
 		return self.information['device_model']
123 124
 
125
+	@property
126
+	def ata_error_count(self):
127
+		return self.parsed['ata_error_count']
128
+
124 129
 	def exists_in_database(self):
125 130
 		return self.get_attributes_from_database(self.device_model) is not None
126 131
 
@@ -138,6 +143,7 @@ class SMARTCheck(object):
138 143
 			'information': self.parse_information_section(self.raw),
139 144
 			'data': self.parse_data_section(self.raw),
140 145
 			'self_tests': self.parse_tests_section(self.raw),
146
+			'ata_error_count': self.parse_ata_error_count(self.raw),
141 147
 		}
142 148
 
143 149
 	@property
@@ -198,8 +204,14 @@ class SMARTCheck(object):
198 204
 			'test_results': TEST_RESULT_RE.findall(tests_text)
199 205
 		}
200 206
 
207
+	def parse_ata_error_count(self, s):
208
+		m = ATA_ERROR_COUNT.search(s)
209
+		if m:
210
+			return int(m.group(1))
211
+		return 0
212
+
201 213
 	def check(self, ignore_attributes=None):
202
-		return len(self.check_attributes(ignore_attributes or [])) == 0 and self.check_tests()
214
+		return len(self.check_attributes(ignore_attributes or [])) == 0 and self.check_tests() and self.ata_error_count == 0
203 215
 
204 216
 	def check_tests(self):
205 217
 		ok_test_results = [

+ 4
- 0
src/smartcheck/main.py View File

@@ -89,6 +89,10 @@ if __name__ == "__main__":
89 89
 				msg = (msg.strip() + '; S.M.A.R.T. self test reported an error').lstrip(';').strip()
90 90
 				exit_code = 2
91 91
 
92
+			if check.ata_error_count:
93
+				msg = (msg.strip() + '; %s ATA errors found' % check.ata_error_count).lstrip(';').strip()
94
+				exit_code = 2
95
+
92 96
 			if not exit_code:
93 97
 				msg = "S.M.A.R.T. data OK"
94 98
 

+ 13
- 0
tests/check.py View File

@@ -12,12 +12,21 @@ class CheckTest(unittest.TestCase):
12 12
 		with open(os.path.join(samples_path, 'seagate-barracuda-broken1.txt')) as f:
13 13
 			check = SMARTCheck(f)
14 14
 			self.assertFalse(check.check_tests())
15
+			self.assertEqual(check.ata_error_count, 8)
15 16
 			self.assertFalse(check.check())
16 17
 
17 18
 	def test_check_broken2(self):
18 19
 		with open(os.path.join(samples_path, 'seagate-barracuda-broken2.txt')) as f:
19 20
 			check = SMARTCheck(f)
20 21
 			self.assertFalse(check.check_tests())
22
+			self.assertEqual(check.ata_error_count, 52)
23
+			self.assertFalse(check.check())
24
+
25
+	def test_check_broken3(self):
26
+		with open(os.path.join(samples_path, 'WDC-WD1000FYPS-01ZKB0.txt')) as f:
27
+			check = SMARTCheck(f)
28
+			self.assertTrue(check.check_tests())  # no test ran
29
+			self.assertEqual(check.ata_error_count, 32)
21 30
 			self.assertFalse(check.check())
22 31
 
23 32
 	def test_smart_attributes_not_found(self):
@@ -25,6 +34,7 @@ class CheckTest(unittest.TestCase):
25 34
 			check = SMARTCheck(f, db_path)
26 35
 			self.assertTrue(check.check_tests())
27 36
 			self.assertDictEqual(check.check_attributes(), {})  # Attributes not found in disks.json
37
+			self.assertEqual(check.ata_error_count, 0)
28 38
 			self.assertTrue(check.check())
29 39
 
30 40
 	def test_smart_attributes_nothing_wrong(self):
@@ -32,6 +42,7 @@ class CheckTest(unittest.TestCase):
32 42
 			check = SMARTCheck(f, db_path)
33 43
 			self.assertTrue(check.check_tests())
34 44
 			self.assertDictEqual(check.check_attributes(), {})
45
+			self.assertEqual(check.ata_error_count, 0)
35 46
 			self.assertTrue(check.check())
36 47
 
37 48
 	def test_smart_attributes_min_max(self):
@@ -42,6 +53,7 @@ class CheckTest(unittest.TestCase):
42 53
 			self.assertDictEqual(check.check_attributes(), {
43 54
 				(9, 'Power_On_Hours'): AttributeWarning(AttributeWarning.Critical, 'Power_On_Hours', 16998)
44 55
 			})
56
+			self.assertEqual(check.ata_error_count, 0)
45 57
 			self.assertFalse(check.check())
46 58
 
47 59
 		# from dict
@@ -52,6 +64,7 @@ class CheckTest(unittest.TestCase):
52 64
 				(9, 'Power_On_Hours'): AttributeWarning(AttributeWarning.Critical, 'Power_On_Hours', 16998),
53 65
 				(194, 'Temperature_Celsius'): AttributeWarning(AttributeWarning.Critical, 'Temperature_Celsius', 30)
54 66
 			})
67
+			self.assertEqual(check.ata_error_count, 0)
55 68
 			self.assertFalse(check.check())
56 69
 
57 70
 	def test_smart_attributes_thresholds_min(self):

+ 192
- 0
tests/samples/WDC-WD1000FYPS-01ZKB0.txt View File

@@ -0,0 +1,192 @@
1
+ smartctl 6.3 2014-07-26 r3976 [x86_64-linux-4.4.2-hardened-cit-2] (local build)
2
+Copyright (C) 2002-14, Bruce Allen, Christian Franke, www.smartmontools.org
3
+
4
+=== START OF INFORMATION SECTION ===
5
+Model Family:     Western Digital RE2-GP
6
+Device Model:     WDC WD1000FYPS-01ZKB0
7
+Serial Number:    WD-WCASJ0657732
8
+LU WWN Device Id: 5 0014ee 2ab796fcb
9
+Firmware Version: 02.01B01
10
+User Capacity:    1,000,204,886,016 bytes [1.00 TB]
11
+Sector Size:      512 bytes logical/physical
12
+Rotation Rate:    5400 rpm
13
+Device is:        In smartctl database [for details use: -P show]
14
+ATA Version is:   ATA8-ACS (minor revision not indicated)
15
+SATA Version is:  SATA 2.5, 3.0 Gb/s
16
+Local Time is:    Mon May  2 08:40:23 2016 CEST
17
+SMART support is: Available - device has SMART capability.
18
+SMART support is: Enabled
19
+
20
+=== START OF READ SMART DATA SECTION ===
21
+SMART overall-health self-assessment test result: PASSED
22
+
23
+General SMART Values:
24
+Offline data collection status:  (0x82)    Offline data collection activity
25
+                    was completed without error.
26
+                    Auto Offline Data Collection: Enabled.
27
+Self-test execution status:      (   0)    The previous self-test routine completed
28
+                    without error or no self-test has ever
29
+                    been run.
30
+Total time to complete Offline
31
+data collection:         (25560) seconds.
32
+Offline data collection
33
+capabilities:              (0x7b) SMART execute Offline immediate.
34
+                    Auto Offline data collection on/off support.
35
+                    Suspend Offline collection upon new
36
+                    command.
37
+                    Offline surface scan supported.
38
+                    Self-test supported.
39
+                    Conveyance Self-test supported.
40
+                    Selective Self-test supported.
41
+SMART capabilities:            (0x0003)    Saves SMART data before entering
42
+                    power-saving mode.
43
+                    Supports SMART auto save timer.
44
+Error logging capability:        (0x01)    Error logging supported.
45
+                    General Purpose Logging supported.
46
+Short self-test routine
47
+recommended polling time:      (   2) minutes.
48
+Extended self-test routine
49
+recommended polling time:      ( 293) minutes.
50
+Conveyance self-test routine
51
+recommended polling time:      (   5) minutes.
52
+SCT capabilities:            (0x303f)    SCT Status supported.
53
+                    SCT Error Recovery Control supported.
54
+                    SCT Feature Control supported.
55
+                    SCT Data Table supported.
56
+
57
+SMART Attributes Data Structure revision number: 16
58
+Vendor Specific SMART Attributes with Thresholds:
59
+ID# ATTRIBUTE_NAME          FLAG     VALUE WORST THRESH TYPE      UPDATED  WHEN_FAILED RAW_VALUE
60
+  1 Raw_Read_Error_Rate     0x000f   200   199   051    Pre-fail  Always       -       17
61
+  3 Spin_Up_Time            0x0003   180   177   021    Pre-fail  Always       -       7966
62
+  4 Start_Stop_Count        0x0032   100   100   000    Old_age   Always       -       97
63
+  5 Reallocated_Sector_Ct   0x0033   200   200   140    Pre-fail  Always       -       0
64
+  7 Seek_Error_Rate         0x000e   100   253   000    Old_age   Always       -       0
65
+  9 Power_On_Hours          0x0032   026   026   000    Old_age   Always       -       54234
66
+ 10 Spin_Retry_Count        0x0012   100   253   000    Old_age   Always       -       0
67
+ 11 Calibration_Retry_Count 0x0012   100   253   000    Old_age   Always       -       0
68
+ 12 Power_Cycle_Count       0x0032   100   100   000    Old_age   Always       -       97
69
+192 Power-Off_Retract_Count 0x0032   200   200   000    Old_age   Always       -       332
70
+193 Load_Cycle_Count        0x0032   001   001   000    Old_age   Always       -       1643266
71
+194 Temperature_Celsius     0x0022   122   094   000    Old_age   Always       -       30
72
+196 Reallocated_Event_Count 0x0032   200   200   000    Old_age   Always       -       0
73
+197 Current_Pending_Sector  0x0012   200   200   000    Old_age   Always       -       4
74
+198 Offline_Uncorrectable   0x0010   200   200   000    Old_age   Offline      -       4
75
+199 UDMA_CRC_Error_Count    0x003e   200   200   000    Old_age   Always       -       0
76
+200 Multi_Zone_Error_Rate   0x0008   200   200   000    Old_age   Offline      -       0
77
+
78
+SMART Error Log Version: 1
79
+ATA Error Count: 32 (device log contains only the most recent five errors)
80
+    CR = Command Register [HEX]
81
+    FR = Features Register [HEX]
82
+    SC = Sector Count Register [HEX]
83
+    SN = Sector Number Register [HEX]
84
+    CL = Cylinder Low Register [HEX]
85
+    CH = Cylinder High Register [HEX]
86
+    DH = Device/Head Register [HEX]
87
+    DC = Device Command Register [HEX]
88
+    ER = Error register [HEX]
89
+    ST = Status register [HEX]
90
+Powered_Up_Time is measured from power on, and printed as
91
+DDd+hh:mm:SS.sss where DD=days, hh=hours, mm=minutes,
92
+SS=sec, and sss=millisec. It "wraps" after 49.710 days.
93
+
94
+Error 32 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
95
+  When the command that caused the error occurred, the device was active or idle.
96
+
97
+  After command completion occurred, registers were:
98
+  ER ST SC SN CL CH DH
99
+  -- -- -- -- -- -- --
100
+  40 51 00 ca 1a 40 40  Error: UNC at LBA = 0x00401aca = 4201162
101
+
102
+  Commands leading to the command that caused the error were:
103
+  CR FR SC SN CL CH DH DC   Powered_Up_Time  Command/Feature_Name
104
+  -- -- -- -- -- -- -- --  ----------------  --------------------
105
+  60 00 00 80 1a 40 73 08  15d+02:47:11.908  READ FPDMA QUEUED
106
+  ef aa 00 00 00 00 00 08  15d+02:47:11.908  SET FEATURES [Enable read look-ahead]
107
+  ef 03 46 00 00 00 00 08  15d+02:47:11.908  SET FEATURES [Set transfer mode]
108
+  ef 02 00 00 00 00 00 08  15d+02:47:11.908  SET FEATURES [Enable write cache]
109
+  ef 03 0c 00 00 00 00 08  15d+02:47:11.908  SET FEATURES [Set transfer mode]
110
+
111
+Error 31 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
112
+  When the command that caused the error occurred, the device was active or idle.
113
+
114
+  After command completion occurred, registers were:
115
+  ER ST SC SN CL CH DH
116
+  -- -- -- -- -- -- --
117
+  40 51 00 ca 1a 40 40  Error: UNC at LBA = 0x00401aca = 4201162
118
+
119
+  Commands leading to the command that caused the error were:
120
+  CR FR SC SN CL CH DH DC   Powered_Up_Time  Command/Feature_Name
121
+  -- -- -- -- -- -- -- --  ----------------  --------------------
122
+  60 00 68 00 51 40 73 08  15d+02:47:06.731  READ FPDMA QUEUED
123
+  60 00 60 00 50 40 73 08  15d+02:47:06.731  READ FPDMA QUEUED
124
+  60 00 58 00 4f 40 73 08  15d+02:47:06.731  READ FPDMA QUEUED
125
+  60 38 50 90 e3 3b 3a 08  15d+02:47:06.731  READ FPDMA QUEUED
126
+  60 80 48 80 1e 40 73 08  15d+02:47:06.730  READ FPDMA QUEUED
127
+
128
+Error 30 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
129
+  When the command that caused the error occurred, the device was active or idle.
130
+
131
+  After command completion occurred, registers were:
132
+  ER ST SC SN CL CH DH
133
+  -- -- -- -- -- -- --
134
+  40 51 00 95 13 40 40  Error: UNC at LBA = 0x00401395 = 4199317
135
+
136
+  Commands leading to the command that caused the error were:
137
+  CR FR SC SN CL CH DH DC   Powered_Up_Time  Command/Feature_Name
138
+  -- -- -- -- -- -- -- --  ----------------  --------------------
139
+  60 00 00 80 13 40 73 08  15d+02:47:01.722  READ FPDMA QUEUED
140
+  ef aa 00 00 00 00 00 08  15d+02:47:01.722  SET FEATURES [Enable read look-ahead]
141
+  ef 03 46 00 00 00 00 08  15d+02:47:01.722  SET FEATURES [Set transfer mode]
142
+  ef 02 00 00 00 00 00 08  15d+02:47:01.722  SET FEATURES [Enable write cache]
143
+  ef 03 0c 00 00 00 00 08  15d+02:47:01.722  SET FEATURES [Set transfer mode]
144
+
145
+Error 29 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
146
+  When the command that caused the error occurred, the device was active or idle.
147
+
148
+  After command completion occurred, registers were:
149
+  ER ST SC SN CL CH DH
150
+  -- -- -- -- -- -- --
151
+  40 51 00 95 13 40 40  Error: UNC at LBA = 0x00401395 = 4199317
152
+
153
+  Commands leading to the command that caused the error were:
154
+  CR FR SC SN CL CH DH DC   Powered_Up_Time  Command/Feature_Name
155
+  -- -- -- -- -- -- -- --  ----------------  --------------------
156
+  60 00 e8 00 51 40 73 08  15d+02:46:55.570  READ FPDMA QUEUED
157
+  60 00 e0 00 50 40 73 08  15d+02:46:55.570  READ FPDMA QUEUED
158
+  60 00 d8 00 4f 40 73 08  15d+02:46:55.570  READ FPDMA QUEUED
159
+  60 38 d0 90 e3 3b 3a 08  15d+02:46:55.570  READ FPDMA QUEUED
160
+  60 80 c8 80 1e 40 73 08  15d+02:46:55.570  READ FPDMA QUEUED
161
+
162
+Error 28 occurred at disk power-on lifetime: 54186 hours (2257 days + 18 hours)
163
+  When the command that caused the error occurred, the device was active or idle.
164
+
165
+  After command completion occurred, registers were:
166
+  ER ST SC SN CL CH DH
167
+  -- -- -- -- -- -- --
168
+  40 51 00 e9 04 40 40  Error: UNC at LBA = 0x004004e9 = 4195561
169
+
170
+  Commands leading to the command that caused the error were:
171
+  CR FR SC SN CL CH DH DC   Powered_Up_Time  Command/Feature_Name
172
+  -- -- -- -- -- -- -- --  ----------------  --------------------
173
+  60 00 00 80 04 40 73 08  15d+02:46:50.338  READ FPDMA QUEUED
174
+  ef aa 00 00 00 00 00 08  15d+02:46:50.338  SET FEATURES [Enable read look-ahead]
175
+  ef 03 46 00 00 00 00 08  15d+02:46:50.338  SET FEATURES [Set transfer mode]
176
+  ef 02 00 00 00 00 00 08  15d+02:46:50.338  SET FEATURES [Enable write cache]
177
+  ef 03 0c 00 00 00 00 08  15d+02:46:50.338  SET FEATURES [Set transfer mode]
178
+
179
+SMART Self-test log structure revision number 1
180
+Num  Test_Description    Status                  Remaining  LifeTime(hours)  LBA_of_first_error
181
+# 1  Extended offline    Completed without error       00%     52852         -
182
+
183
+SMART Selective self-test log data structure revision number 1
184
+ SPAN  MIN_LBA  MAX_LBA  CURRENT_TEST_STATUS
185
+    1        0        0  Not_testing
186
+    2        0        0  Not_testing
187
+    3        0        0  Not_testing
188
+    4        0        0  Not_testing
189
+    5        0        0  Not_testing
190
+Selective self-test flags (0x0):
191
+  After scanning selected spans, do NOT read-scan remainder of disk.
192
+If Selective self-test is pending on power-up, resume after 0 minute delay.

Loading…
Cancel
Save