Browse Source

Print changes in match list; find COPYING files in subdirs too

master 0.1
Johann Schmitz 7 years ago
parent
commit
c0b69b99a4
  1. 42
      src/main.py

42
src/main.py

@ -6,15 +6,13 @@ from difflib import SequenceMatcher
import os
import re
DEFAULT_SIZE_DIFF = 0.1
DEFAULT_MIN_RATIO = 0.8
DEFAULT_N = 5
class Tester(object):
def __init__(self, licenses_dir, max_size_diff=DEFAULT_SIZE_DIFF, min_ratio=DEFAULT_MIN_RATIO):
def __init__(self, licenses_dir, min_ratio=DEFAULT_MIN_RATIO):
self.licenses_dir = licenses_dir
self.max_size_diff = max_size_diff
self.min_ratio = min_ratio
self._licenses = None
@ -39,18 +37,29 @@ class Tester(object):
matches = []
with open(src, 'r') as f:
src_content = '\n'.join([x.strip() for x in f])
src_content = [x.strip() for x in f]
for license_file, license_size in self.licenses:
if abs(src_size - license_size) > src_size * self.max_size_diff:
continue
with open(license_file, 'r') as l:
lic_content = '\n'.join([x.strip() for x in l])
s = SequenceMatcher(lambda x: x in " \t", src_content, lic_content)
lic_content = [x.strip() for x in l]
s = SequenceMatcher(lambda x: x in " \t", lic_content, src_content)
if s.quick_ratio() > self.min_ratio:
matches.append((s.ratio(), license_file))
ops = s.get_grouped_opcodes()
adds, deletes, edits = 0, 0, 0
for x in ops:
for y in x:
action, ll, lh, rl, rh = y
if action == "insert":
adds += 1
if action == "delete":
deletes += 1
if action == "replace":
edits += 1
matches.append((s.ratio(), license_file, (adds, deletes, edits)))
return src, matches
@ -79,7 +88,8 @@ class Finder(object):
l.append(os.path.join(src, 'COPYING'))
for root, dirs, files in os.walk(src):
l.extend([os.path.join(root, x) for x in files if re.match("^.*license(\.(txt|md|rst))?", x, re.IGNORECASE)])
l.extend([os.path.join(root, x) for x in files if re.match("^.*license(\.(txt|md|rst))?$", x, re.IGNORECASE)])
l.extend([os.path.join(root, x) for x in files if x == "COPYING"])
return l
@ -89,8 +99,6 @@ if __name__ == "__main__":
parser.add_argument('--licenses-dir', default='/usr/portage/licenses',
help="Directory to look for licenses (default: %(default)s)")
parser.add_argument('--size-diff', default=DEFAULT_SIZE_DIFF, type=float,
help="Ignore license files which diff by size*x bytes (default: %(default)s)")
parser.add_argument('--min-ratio', default=DEFAULT_MIN_RATIO, type=float,
help="Minimum ratio to consider the file as a match (default: %(default)s)")
parser.add_argument('-n', default=DEFAULT_N, type=int, help="Show at most x matching licenses")
@ -98,7 +106,7 @@ if __name__ == "__main__":
args = parser.parse_args()
tester = Tester(args.licenses_dir, args.size_diff, args.min_ratio)
tester = Tester(args.licenses_dir, args.min_ratio)
for f_or_d in args.files_or_dirs:
for filename, matches in tester.test(Finder.find(f_or_d)):
@ -106,7 +114,9 @@ if __name__ == "__main__":
print("-" * (len(filename)+1))
if matches:
for ratio, license_name in sorted(matches, reverse=True)[:args.n]:
print("%8.3f %%: %s" % (ratio*100.0, license_name))
for ratio, license_name, changes in sorted(matches, reverse=True)[:args.n]:
print("%8.3f %%: %s (+%s -%s ~%s)" % ((ratio*100.0, license_name) + changes))
else:
print("No license matched :(")
print("")