Browse Source

Print changes in match list; find COPYING files in subdirs too

Johann Schmitz 4 years ago
parent
commit
c0b69b99a4
1 changed files with 26 additions and 16 deletions
  1. 26
    16
      src/main.py

+ 26
- 16
src/main.py View File

@@ -6,15 +6,13 @@ from difflib import SequenceMatcher
6 6
 import os
7 7
 import re
8 8
 
9
-DEFAULT_SIZE_DIFF = 0.1
10 9
 DEFAULT_MIN_RATIO = 0.8
11 10
 DEFAULT_N = 5
12 11
 
13 12
 
14 13
 class Tester(object):
15
-	def __init__(self, licenses_dir, max_size_diff=DEFAULT_SIZE_DIFF, min_ratio=DEFAULT_MIN_RATIO):
14
+	def __init__(self, licenses_dir, min_ratio=DEFAULT_MIN_RATIO):
16 15
 		self.licenses_dir = licenses_dir
17
-		self.max_size_diff = max_size_diff
18 16
 		self.min_ratio = min_ratio
19 17
 
20 18
 		self._licenses = None
@@ -39,18 +37,29 @@ class Tester(object):
39 37
 		matches = []
40 38
 
41 39
 		with open(src, 'r') as f:
42
-			src_content = '\n'.join([x.strip() for x in f])
40
+			src_content = [x.strip() for x in f]
43 41
 
44 42
 		for license_file, license_size in self.licenses:
45
-			if abs(src_size - license_size) > src_size * self.max_size_diff:
46
-				continue
47
-
48 43
 			with open(license_file, 'r') as l:
49
-				lic_content = '\n'.join([x.strip() for x in l])
50
-				s = SequenceMatcher(lambda x: x in " \t", src_content, lic_content)
44
+				lic_content = [x.strip() for x in l]
45
+
46
+				s = SequenceMatcher(lambda x: x in " \t", lic_content, src_content)
51 47
 
52 48
 				if s.quick_ratio() > self.min_ratio:
53
-					matches.append((s.ratio(), license_file))
49
+					ops = s.get_grouped_opcodes()
50
+
51
+					adds, deletes, edits = 0, 0, 0
52
+					for x in ops:
53
+						for y in x:
54
+							action, ll, lh, rl, rh = y
55
+							if action == "insert":
56
+								adds += 1
57
+							if action == "delete":
58
+								deletes += 1
59
+							if action == "replace":
60
+								edits += 1
61
+
62
+					matches.append((s.ratio(), license_file, (adds, deletes, edits)))
54 63
 
55 64
 		return src, matches
56 65
 
@@ -79,7 +88,8 @@ class Finder(object):
79 88
 			l.append(os.path.join(src, 'COPYING'))
80 89
 
81 90
 		for root, dirs, files in os.walk(src):
82
-			l.extend([os.path.join(root, x) for x in files if re.match("^.*license(\.(txt|md|rst))?", x, re.IGNORECASE)])
91
+			l.extend([os.path.join(root, x) for x in files if re.match("^.*license(\.(txt|md|rst))?$", x, re.IGNORECASE)])
92
+			l.extend([os.path.join(root, x) for x in files if x == "COPYING"])
83 93
 
84 94
 		return l
85 95
 
@@ -89,8 +99,6 @@ if __name__ == "__main__":
89 99
 	parser.add_argument('--licenses-dir', default='/usr/portage/licenses',
90 100
 						help="Directory to look for licenses (default: %(default)s)")
91 101
 
92
-	parser.add_argument('--size-diff', default=DEFAULT_SIZE_DIFF, type=float,
93
-						help="Ignore license files which diff by size*x bytes (default: %(default)s)")
94 102
 	parser.add_argument('--min-ratio', default=DEFAULT_MIN_RATIO, type=float,
95 103
 						help="Minimum ratio to consider the file as a match (default: %(default)s)")
96 104
 	parser.add_argument('-n', default=DEFAULT_N, type=int, help="Show at most x matching licenses")
@@ -98,7 +106,7 @@ if __name__ == "__main__":
98 106
 
99 107
 	args = parser.parse_args()
100 108
 
101
-	tester = Tester(args.licenses_dir, args.size_diff, args.min_ratio)
109
+	tester = Tester(args.licenses_dir, args.min_ratio)
102 110
 
103 111
 	for f_or_d in args.files_or_dirs:
104 112
 		for filename, matches in tester.test(Finder.find(f_or_d)):
@@ -106,7 +114,9 @@ if __name__ == "__main__":
106 114
 			print("-" * (len(filename)+1))
107 115
 
108 116
 			if matches:
109
-				for ratio, license_name in sorted(matches, reverse=True)[:args.n]:
110
-					print("%8.3f %%: %s" % (ratio*100.0, license_name))
117
+				for ratio, license_name, changes in sorted(matches, reverse=True)[:args.n]:
118
+					print("%8.3f %%: %s (+%s -%s ~%s)" % ((ratio*100.0, license_name) + changes))
111 119
 			else:
112 120
 				print("No license matched :(")
121
+
122
+			print("")