123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- #
- # Copyright (c) Contributors to the Open 3D Engine Project. For complete copyright and license terms please see the LICENSE at the root of this distribution.
- #
- # SPDX-License-Identifier: Apache-2.0 OR MIT
- #
- #
- import argparse
- from collections import OrderedDict
- import fnmatch
- import json
- import os
- import pathlib
- import re
- import sys
- class LicenseScanner:
- """Class to contain license scanner.
- Scans source tree for license files using provided filename patterns and generates a file
- with the contents of all the licenses.
- :param config_file: Config file with license patterns and scanner settings
- """
- DEFAULT_CONFIG_FILE = 'scanner_config.json'
- DEFAULT_EXCLUDE_FILE = '.gitignore'
- DEFAULT_PACKAGE_INFO_FILE = 'PackageInfo.json'
- def __init__(self, config_file=None):
- self.config_file = config_file
- self.config_data = self._load_config()
- self.file_regex = self._load_file_regex(self.config_data['license_patterns'])
- self.package_info = self._load_file_regex(self.config_data['package_patterns'])
- self.excluded_directories = self._load_file_regex(self.config_data['excluded_directories'])
- def _load_config(self):
- """Load config from the provided file. Sets default file if one is not provided."""
- if not self.config_file:
- script_directory = os.path.dirname(os.path.abspath(__file__)) # Default file expected in same dir as script
- self.config_file = os.path.join(script_directory, self.DEFAULT_CONFIG_FILE)
- try:
- with open(self.config_file) as f:
- return json.load(f)
- except FileNotFoundError:
- print('Config file cannot be found')
- raise
- def _load_file_regex(self, patterns):
- """Returns regex object with case-insensitive matching from the list of filename patterns."""
- regex_patterns = []
- for pattern in patterns:
- regex_patterns.append(fnmatch.translate(pattern))
-
- if not regex_patterns:
- print(f'Warning: No patterns from {patterns} found')
- return None
- return re.compile('|'.join(regex_patterns), re.IGNORECASE)
- def scan(self, paths=os.curdir):
- """Scan directory tree for filenames matching file_regex, package info, and exclusion files.
- :param paths: Paths of the directory to run scanner
- :return: Package paths and their corresponding file contents
- :rtype: Ordered dict
- """
- files = 0
- matching_files = OrderedDict()
- excluded_directories = None
- if not self.package_info:
- self.package_info = self.DEFAULT_PACKAGE_INFO_FILE
- if not self.excluded_directories:
- print(f'No excluded directory in config, looking for {self.DEFAULT_EXCLUDE_FILE} instead')
- for path in paths:
- for dirpath, dirnames, filenames in os.walk(path, topdown=True):
- dirnames.sort(key=str.casefold) # Ensure that results are sorted
- for file in filenames:
- if self.file_regex.match(file) or self.package_info.match(file):
- file_path = os.path.join(dirpath, file)
- matching_file_content = self._get_file_contents(file_path)
- matching_files[file_path] = matching_file_content
- files += 1
- print(f'Matching file: {file_path}')
- if self.package_info.match(file):
- dirnames[:] = [] # Stop scanning subdirectories if package info file found
- if self.DEFAULT_EXCLUDE_FILE in file and not self.excluded_directories:
- ignore_list = self._get_file_contents(os.path.join(dirpath, file)).splitlines()
- ignore_list.append('.git') # .gitignore doesn't usually have .git in its exclusions
- excluded_directories = self._load_file_regex(ignore_list)
- # Remove directories that should not be scanned
- if self.excluded_directories:
- excluded_directories = self.excluded_directories
- for dir in dirnames:
- if excluded_directories.match(dir):
- dirnames.remove(dir)
- print(f'{files} files found.')
- return matching_files
- def _get_file_contents(self, filepath):
- try:
- with open(filepath, encoding='utf8') as f:
- return f.read()
- except UnicodeDecodeError:
- print(f'Unable to read file: {filepath}')
- pass
- def create_license_file(self, licenses, filepath='NOTICES.txt'):
- """Creates file with all the provided license file contents.
- :param licenses: Dict with package paths and their corresponding license file contents
- :param filepath: Path to write the file
- """
- license_separator = '------------------------------------'
- with open(filepath, 'w', encoding='utf8') as lf:
- for directory, license in licenses.items():
- if not self.package_info.match(os.path.basename(directory)):
- license_output = '\n\n'.join([
- f'{license_separator}',
- f'Package path: {os.path.relpath(directory)}',
- 'License:',
- f'{license}\n'
- ])
- lf.write(license_output)
- return None
-
- def create_package_file(self, packages, filepath='SPDX-Licenses.json', get_contents=False):
- """Creates file with all the provided SPDX package info summaries in json.
- Optional dirpath parameter will follow the license file path in the package info and return its contents in a dictionary
- :param licenses: Dict with package info paths and their corresponding file contents
- :param filepath: Path to write the file
- :param dirpath: Root path for packages
- :rtype: Ordered dict
- """
- licenses = OrderedDict()
- package_json = []
- with open(filepath, 'w', encoding='utf8') as pf:
- for directory, package in packages.items():
- if self.package_info.match(os.path.basename(directory)):
- package_obj = json.loads(package)
- package_json.append(package_obj)
- if get_contents:
- license_path = os.path.join(os.path.dirname(directory), pathlib.Path(package_obj['LicenseFile']))
- licenses[license_path] = self._get_file_contents(license_path)
- else:
- licenses[directory] = package
- pf.write(json.dumps(package_json, indent=4))
- return licenses
- def parse_args():
- parser = argparse.ArgumentParser(
- description='Script to run LicenseScanner and generate license file')
- parser.add_argument('--config-file', '-c', type=pathlib.Path, help='Config file for LicenseScanner')
- parser.add_argument('--license-file-path', '-l', type=pathlib.Path, help='Create license file in the provided path')
- parser.add_argument('--package-file-path', '-p', type=pathlib.Path, help='Create package summary file in the provided path')
- parser.add_argument('--scan-path', '-s', default=os.curdir, type=pathlib.Path, nargs='+', help='Path to scan, multiple space separated paths can be used')
- return parser.parse_args()
- def main():
- try:
- args = parse_args()
- ls = LicenseScanner(args.config_file)
- scanned_path_data = ls.scan(args.scan_path)
- if args.license_file_path:
- ls.create_license_file(scanned_path_data, args.license_file_path)
- if args.package_file_path:
- ls.create_package_file(scanned_path_data, args.package_file_path)
- if args.license_file_path and args.package_file_path:
- license_files = ls.create_package_file(scanned_path_data, args.package_file_path, True)
- ls.create_license_file(license_files, args.license_file_path)
- except FileNotFoundError as e:
- print(f'Type: {type(e).__name__}, Error: {e}')
- return 1
- if __name__ == '__main__':
- sys.exit(main())
|