|
|
@ -6,15 +6,13 @@ from difflib import SequenceMatcher |
|
|
|
import os |
|
|
|
import re |
|
|
|
|
|
|
|
DEFAULT_SIZE_DIFF = 0.1 |
|
|
|
DEFAULT_MIN_RATIO = 0.8 |
|
|
|
DEFAULT_N = 5 |
|
|
|
|
|
|
|
|
|
|
|
class Tester(object): |
|
|
|
def __init__(self, licenses_dir, max_size_diff=DEFAULT_SIZE_DIFF, min_ratio=DEFAULT_MIN_RATIO): |
|
|
|
def __init__(self, licenses_dir, min_ratio=DEFAULT_MIN_RATIO): |
|
|
|
self.licenses_dir = licenses_dir |
|
|
|
self.max_size_diff = max_size_diff |
|
|
|
self.min_ratio = min_ratio |
|
|
|
|
|
|
|
self._licenses = None |
|
|
@ -39,18 +37,29 @@ class Tester(object): |
|
|
|
matches = [] |
|
|
|
|
|
|
|
with open(src, 'r') as f: |
|
|
|
src_content = '\n'.join([x.strip() for x in f]) |
|
|
|
src_content = [x.strip() for x in f] |
|
|
|
|
|
|
|
for license_file, license_size in self.licenses: |
|
|
|
if abs(src_size - license_size) > src_size * self.max_size_diff: |
|
|
|
continue |
|
|
|
|
|
|
|
with open(license_file, 'r') as l: |
|
|
|
lic_content = '\n'.join([x.strip() for x in l]) |
|
|
|
s = SequenceMatcher(lambda x: x in " \t", src_content, lic_content) |
|
|
|
lic_content = [x.strip() for x in l] |
|
|
|
|
|
|
|
s = SequenceMatcher(lambda x: x in " \t", lic_content, src_content) |
|
|
|
|
|
|
|
if s.quick_ratio() > self.min_ratio: |
|
|
|
matches.append((s.ratio(), license_file)) |
|
|
|
ops = s.get_grouped_opcodes() |
|
|
|
|
|
|
|
adds, deletes, edits = 0, 0, 0 |
|
|
|
for x in ops: |
|
|
|
for y in x: |
|
|
|
action, ll, lh, rl, rh = y |
|
|
|
if action == "insert": |
|
|
|
adds += 1 |
|
|
|
if action == "delete": |
|
|
|
deletes += 1 |
|
|
|
if action == "replace": |
|
|
|
edits += 1 |
|
|
|
|
|
|
|
matches.append((s.ratio(), license_file, (adds, deletes, edits))) |
|
|
|
|
|
|
|
return src, matches |
|
|
|
|
|
|
@ -79,7 +88,8 @@ class Finder(object): |
|
|
|
l.append(os.path.join(src, 'COPYING')) |
|
|
|
|
|
|
|
for root, dirs, files in os.walk(src): |
|
|
|
l.extend([os.path.join(root, x) for x in files if re.match("^.*license(\.(txt|md|rst))?", x, re.IGNORECASE)]) |
|
|
|
l.extend([os.path.join(root, x) for x in files if re.match("^.*license(\.(txt|md|rst))?$", x, re.IGNORECASE)]) |
|
|
|
l.extend([os.path.join(root, x) for x in files if x == "COPYING"]) |
|
|
|
|
|
|
|
return l |
|
|
|
|
|
|
@ -89,8 +99,6 @@ if __name__ == "__main__": |
|
|
|
parser.add_argument('--licenses-dir', default='/usr/portage/licenses', |
|
|
|
help="Directory to look for licenses (default: %(default)s)") |
|
|
|
|
|
|
|
parser.add_argument('--size-diff', default=DEFAULT_SIZE_DIFF, type=float, |
|
|
|
help="Ignore license files which diff by size*x bytes (default: %(default)s)") |
|
|
|
parser.add_argument('--min-ratio', default=DEFAULT_MIN_RATIO, type=float, |
|
|
|
help="Minimum ratio to consider the file as a match (default: %(default)s)") |
|
|
|
parser.add_argument('-n', default=DEFAULT_N, type=int, help="Show at most x matching licenses") |
|
|
@ -98,7 +106,7 @@ if __name__ == "__main__": |
|
|
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
tester = Tester(args.licenses_dir, args.size_diff, args.min_ratio) |
|
|
|
tester = Tester(args.licenses_dir, args.min_ratio) |
|
|
|
|
|
|
|
for f_or_d in args.files_or_dirs: |
|
|
|
for filename, matches in tester.test(Finder.find(f_or_d)): |
|
|
@ -106,7 +114,9 @@ if __name__ == "__main__": |
|
|
|
print("-" * (len(filename)+1)) |
|
|
|
|
|
|
|
if matches: |
|
|
|
for ratio, license_name in sorted(matches, reverse=True)[:args.n]: |
|
|
|
print("%8.3f %%: %s" % (ratio*100.0, license_name)) |
|
|
|
for ratio, license_name, changes in sorted(matches, reverse=True)[:args.n]: |
|
|
|
print("%8.3f %%: %s (+%s -%s ~%s)" % ((ratio*100.0, license_name) + changes)) |
|
|
|
else: |
|
|
|
print("No license matched :(") |
|
|
|
|
|
|
|
print("") |