Browse Source

Replace hard-coded tests for attributes with an instruction file as mention by Patrick Kuijvenhoven in https://github.com/ercpe/smart-check/issues/3.

If an attribute isn't listed in the generic.yaml file (and the disk does not exist in disks.yaml either), the code still checks the value against the threshold.
tags/0.4
Johann Schmitz 2 years ago
parent
commit
7c93febd7b
3 changed files with 131 additions and 68 deletions
  1. 61
    66
      smartcheck/check.py
  2. 68
    0
      smartcheck/generic.yaml
  3. 2
    2
      tests/check.py

+ 61
- 66
smartcheck/check.py View File

@@ -9,6 +9,7 @@ import os
9 9
 logger = logging.getLogger(__name__)
10 10
 
11 11
 DEFAULT_DISKS_FILE=os.path.join(os.path.dirname(__file__), 'disks.yaml')
12
+GENERIC_ATTRS_FILE=os.path.join(os.path.dirname(__file__), 'generic.yaml')
12 13
 
13 14
 INFORMATION_SECTION_START = '=== START OF INFORMATION SECTION ==='
14 15
 DATA_SECTION_START = '=== START OF READ SMART DATA SECTION ==='
@@ -39,6 +40,27 @@ def toint(s, default=0):
39 40
         return default
40 41
 
41 42
 
43
+def parse_range_specifier(s):
44
+    # should be functional equivalent to the next one
45
+    if isinstance(s, int):
46
+        return lambda x: x > s
47
+
48
+    # either '10' or '10:'
49
+    if re.match("^[0-9]+$", s) or re.match("^[0-9]+:$", s):
50
+        return lambda x: x > int(s.rstrip(':'))
51
+    
52
+    # ':10'
53
+    if re.match("^:[0-9]+$", s):
54
+        return lambda x: x < int(s.lstrip(':'))
55
+
56
+    from_to = re.match("^([0-9]+):([0-9]+)$", s, re.IGNORECASE)
57
+    if from_to:
58
+        return lambda x: int(from_to.group(1)) <= x <= int(from_to.group(2))
59
+    
60
+    logger.error("Couldn't parse '%s' - it will be ignored")
61
+    return lambda x: False
62
+
63
+
42 64
 class AttributeWarning(object):
43 65
     Notice = 'NOTICE'
44 66
     Warning = 'WARNING'
@@ -91,6 +113,7 @@ class SMARTCheck(object):
91 113
         self.parsed_sections = None
92 114
         self.db_path = db_path
93 115
         self._database = None
116
+        self._generic = None
94 117
 
95 118
     @property
96 119
     def information(self):
@@ -120,6 +143,16 @@ class SMARTCheck(object):
120 143
                 self._database = []
121 144
         return self._database
122 145
 
146
+    @property
147
+    def generic_attributes_checks(self):
148
+        if self._generic is None:
149
+            try:
150
+                with open(GENERIC_ATTRS_FILE) as f:
151
+                    self._generic = yaml.load(f) or []
152
+            except:
153
+                logger.exception("Could not read %s", GENERIC_ATTRS_FILE)
154
+        return self._generic
155
+
123 156
     @property
124 157
     def device_model(self):
125 158
         return self.information['device_model']
@@ -247,78 +280,40 @@ class SMARTCheck(object):
247 280
 
248 281
         for attrid, name, flag, value, worst, thresh, attr_type, updated, when_failed, raw_value in self.smart_data['attributes']:
249 282
             logger.debug("Attribute %s (%s): value=%s, raw value=%s, worst=%s, thresh=%s", attrid, name, value, raw_value, worst, thresh)
283
+
250 284
             attrid = int(attrid)
251 285
             attr_name = (name or '').lower()
252 286
             int_value = toint(value)
253 287
             int_raw_value = toint(raw_value)
254 288
             int_thresh = toint(thresh)
255 289
 
256
-            # these tests are take from gsmartcontrol (storage_property_descr.cpp) and check for known pre-fail attributes
257
-            if attr_name in ('reallocated_sector_count', 'reallocated_sector_ct') and int_raw_value > 0:
258
-                failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
259
-                                                                     name,
260
-                                                                     raw_value,
261
-                                                                     "The drive has a non-zero Raw value, but there is no SMART warning yet. " +
262
-                                                                     "This could be an indication of future failures and/or potential data loss in bad sectors.")
263
-            elif attr_name == 'spin_up_retry_count' and int_raw_value > 0:
264
-                failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
265
-                                                                     name,
266
-                                                                     raw_value,
267
-                                                                     "The drive has a non-zero Raw value, but there is no SMART warning yet. " +
268
-                                                                     "Your drive may have problems spinning up, which could lead to a complete mechanical failure.")
269
-            elif attr_name == "soft_read_error_rate" and int_raw_value > 0:
270
-                failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
271
-                                                                     name,
272
-                                                                     raw_value,
273
-                                                                     "The drive has a non-zero Raw value, but there is no SMART warning yet. " +
274
-                                                                     "This could be an indication of future failures and/or potential data loss in bad sectors.")
275
-            elif attr_name in ("temperature_celsius", "temperature_celsius_x10"):
276
-                if 50 <= int_raw_value <= 120:
277
-                    # Temperature (for some it may be 10xTemp, so limit the upper bound.)
278
-                    failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
279
-                                                                         name,
280
-                                                                         int_raw_value,
281
-                                                                         "The temperature of the drive is higher than 50 degrees Celsius. " +
282
-                                                                         "This may shorten its lifespan and cause damage under severe load.")
283
-                elif int_raw_value > 500:
284
-                    # Temperature (for some it may be 10xTemp, so limit the upper bound.)
285
-                    failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
286
-                                                                         name,
287
-                                                                         int_raw_value,
288
-                                                                         "The temperature of the drive is higher than 50 degrees Celsius. " +
289
-                                                                         "This may shorten its lifespan and cause damage under severe load.")
290
-            elif attr_name == "reallocation_event_count" and int_raw_value > 0:
291
-                failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
292
-                                                                     name,
293
-                                                                     raw_value,
294
-                                                                     "The drive has a non-zero Raw value, but there is no SMART warning yet. " +
295
-                                                                     "This could be an indication of future failures and/or potential data loss in bad sectors.")
296
-            elif attr_name in ("current_pending_sector", "current_pending_sector_count", "total_pending_sectors") and int_raw_value > 0:
297
-                failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
298
-                                                                     name,
299
-                                                                     raw_value,
300
-                                                                     "The drive has a non-zero Raw value, but there is no SMART warning yet. " +
301
-                                                                     "This could be an indication of future failures and/or potential data loss in bad sectors.")
302
-            elif attr_name in ("offline_uncorrectable", "total_offline_uncorrectable") and int_raw_value > 0:
303
-                failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
304
-                                                                     name,
305
-                                                                     raw_value,
306
-                                                                     "The drive has a non-zero Raw value, but there is no SMART warning yet. " +
307
-                                                                     "This could be an indication of future failures and/or potential data loss in bad sectors.")
308
-            elif attr_name == "ssd_life_left" and int_value < 50:
309
-                failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice,
310
-                                                                     name,
311
-                                                                     raw_value,
312
-                                                                     "The drive has less than half of its life left.")
313
-            else:
314
-                # execute a generic check for value < threshold
315
-                if int_value and int_thresh:
316
-                    if int_value < int_thresh:
317
-                        failed_attributes[(attrid, name)] = AttributeWarning(
318
-                                                                    AttributeWarning.Warning if attr_type == 'Pre-fail' else AttributeWarning.Notice,
319
-                                                                    name,
320
-                                                                    raw_value,
321
-                                                                    "Attribute value dropped below threshold of %s" % int_thresh)
290
+            for rule in self.generic_attributes_checks:
291
+                if attr_name not in rule.get('attributes', []):
292
+                    continue
293
+
294
+                if 'value' in rule:
295
+                    check_value = int_value
296
+                    func = parse_range_specifier(rule['value'])
297
+                elif 'raw_value' in rule:
298
+                    check_value = int_raw_value
299
+                    func = parse_range_specifier(rule['raw_value'])
300
+
301
+                if func(check_value):
302
+                    failed_attributes[(attrid, name)] = AttributeWarning(AttributeWarning.Notice, name, check_value, rule['message'])
303
+                    break
304
+            
305
+            if (attrid, name) in failed_attributes:
306
+                # don't check against threshold if one of the generic rules already matched
307
+                continue
308
+
309
+            # execute a generic check for value < threshold
310
+            if int_value and int_thresh:
311
+                if int_value < int_thresh:
312
+                    failed_attributes[(attrid, name)] = AttributeWarning(
313
+                                                                AttributeWarning.Warning if attr_type == 'Pre-fail' else AttributeWarning.Notice,
314
+                                                                name,
315
+                                                                raw_value,
316
+                                                                "Attribute value dropped below threshold of %s" % int_thresh)
322 317
 
323 318
         logger.debug("Failed generic attributes: %s" % failed_attributes)
324 319
         return failed_attributes

+ 68
- 0
smartcheck/generic.yaml View File

@@ -0,0 +1,68 @@
1
+# these tests are take from gsmartcontrol (storage_property_descr.cpp) and check for known pre-fail attributes
2
+
3
+# FORMAT
4
+
5
+# - attributes
6
+#   - name1
7
+#   - name2
8
+#   raw_value: X
9
+#    - OR -
10
+#   value: X
11
+#   message: Descriptive message
12
+#
13
+# where X is one of:
14
+# - a number or a string with a trailing colon (e.g. '10:') - greater than
15
+# - a string with a leading colon (e.g. ':10') - less than
16
+# - a string in the form "from:to" (e.g. '10:20') - greater than or equal to the left part *AND* less than or equal to the right part
17
+
18
+
19
+- attributes:
20
+  - reallocated_sector_count
21
+  - reallocated_sector_ct
22
+  raw_value: 0
23
+  message: The drive has a non-zero Raw value, but there is no SMART warning yet. This could be an indication of future failures and/or potential data loss in bad sectors.
24
+
25
+- attributes:
26
+  - spin_up_retry_count
27
+  raw_value: 0
28
+  message: The drive has a non-zero Raw value, but there is no SMART warning yet. Your drive may have problems spinning up, which could lead to a complete mechanical failure.
29
+
30
+- attributes:
31
+  - soft_read_error_rate
32
+  raw_value: 0
33
+  message: The drive has a non-zero Raw value, but there is no SMART warning yet. This could be an indication of future failures and/or potential data loss in bad sectors.
34
+
35
+- attributes:
36
+  - temperature_celsius
37
+  - temperature_celsius_x10
38
+  raw_value: "50:120" # Temperature (for some it may be 10xTemp, so limit the upper bound.)
39
+  message: The temperature of the drive is higher than 50 degrees Celsius. This may shorten its lifespan and cause damage under severe load.
40
+
41
+- attributes:
42
+  - temperature_celsius
43
+  - temperature_celsius_x10
44
+  raw_value: "500:" # Temperature (for some it may be 10xTemp, so limit the upper bound.)
45
+  message: The temperature of the drive is higher than 50 degrees Celsius. This may shorten its lifespan and cause damage under severe load.
46
+
47
+- attributes:
48
+  - reallocation_event_count
49
+  raw_value: 0
50
+  message: The drive has a non-zero Raw value, but there is no SMART warning yet. This could be an indication of future failures and/or potential data loss in bad sectors.
51
+
52
+- attributes:
53
+  - current_pending_sector
54
+  - current_pending_sector_count
55
+  - total_pending_sectors
56
+  raw_value: 0
57
+  message: The drive has a non-zero Raw value, but there is no SMART warning yet. This could be an indication of future failures and/or potential data loss in bad sectors.
58
+
59
+- attributes:
60
+  - offline_uncorrectable
61
+  - total_offline_uncorrectable
62
+  raw_value: 0
63
+  message: The drive has a non-zero Raw value, but there is no SMART warning yet. This could be an indication of future failures and/or potential data loss in bad sectors.
64
+
65
+- attributes:
66
+  - ssd_life_left
67
+  value: ':50'
68
+  message: The drive has less than half of its life left.

+ 2
- 2
tests/check.py View File

@@ -118,7 +118,7 @@ ID# ATTRIBUTE_NAME          FLAG     VALUE WORST THRESH TYPE      UPDATED  WHEN_
118 118
             (failed_id, failed_name), warning = list(failed_attributes.items())[0]
119 119
             assert int(failed_id) == int(attr_id)
120 120
             assert warning.level == AttributeWarning.Notice
121
-            assert warning.value == '1'
121
+            assert warning.value == 1
122 122
             self.assertTrue(check.check_tests())
123 123
             self.assertFalse(check.check())
124 124
 
@@ -178,5 +178,5 @@ ID# ATTRIBUTE_NAME          FLAG     VALUE WORST THRESH TYPE      UPDATED  WHEN_
178 178
             
179 179
             self.assertEqual(len(failed), 1)
180 180
             self.assertEqual(failed, {
181
-                (5, 'Reallocated_Sector_Ct'): AttributeWarning(AttributeWarning.Notice, 'Reallocated_Sector_Ct', '84')
181
+                (5, 'Reallocated_Sector_Ct'): AttributeWarning(AttributeWarning.Notice, 'Reallocated_Sector_Ct', 84)
182 182
             })

Loading…
Cancel
Save