You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
606 lines
22 KiB
606 lines
22 KiB
#!/usr/bin/env python3 |
|
# Copyright (c) 2016-2020 The Bitcoin Core developers |
|
# Distributed under the MIT software license, see the accompanying |
|
# file COPYING or http://www.opensource.org/licenses/mit-license.php. |
|
|
|
import re |
|
import fnmatch |
|
import sys |
|
import subprocess |
|
import datetime |
|
import os |
|
|
|
################################################################################ |
|
# file filtering |
|
################################################################################ |
|
|
|
EXCLUDE = [ |
|
# auto generated: |
|
'src/qt/bitcoinstrings.cpp', |
|
'src/chainparamsseeds.h', |
|
# other external copyrights: |
|
'src/reverse_iterator.h', |
|
'src/test/fuzz/FuzzedDataProvider.h', |
|
'src/tinyformat.h', |
|
'src/bench/nanobench.h', |
|
'test/functional/test_framework/bignum.py', |
|
# python init: |
|
'*__init__.py', |
|
] |
|
EXCLUDE_COMPILED = re.compile('|'.join([fnmatch.translate(m) for m in EXCLUDE])) |
|
|
|
EXCLUDE_DIRS = [ |
|
# git subtrees |
|
"src/crypto/ctaes/", |
|
"src/leveldb/", |
|
"src/secp256k1/", |
|
"src/univalue/", |
|
"src/crc32c/", |
|
] |
|
|
|
INCLUDE = ['*.h', '*.cpp', '*.cc', '*.c', '*.mm', '*.py', '*.sh', '*.bash-completion'] |
|
INCLUDE_COMPILED = re.compile('|'.join([fnmatch.translate(m) for m in INCLUDE])) |
|
|
|
def applies_to_file(filename): |
|
for excluded_dir in EXCLUDE_DIRS: |
|
if filename.startswith(excluded_dir): |
|
return False |
|
return ((EXCLUDE_COMPILED.match(filename) is None) and |
|
(INCLUDE_COMPILED.match(filename) is not None)) |
|
|
|
################################################################################ |
|
# obtain list of files in repo according to INCLUDE and EXCLUDE |
|
################################################################################ |
|
|
|
GIT_LS_CMD = 'git ls-files --full-name'.split(' ') |
|
GIT_TOPLEVEL_CMD = 'git rev-parse --show-toplevel'.split(' ') |
|
|
|
def call_git_ls(base_directory): |
|
out = subprocess.check_output([*GIT_LS_CMD, base_directory]) |
|
return [f for f in out.decode("utf-8").split('\n') if f != ''] |
|
|
|
def call_git_toplevel(): |
|
"Returns the absolute path to the project root" |
|
return subprocess.check_output(GIT_TOPLEVEL_CMD).strip().decode("utf-8") |
|
|
|
def get_filenames_to_examine(base_directory): |
|
"Returns an array of absolute paths to any project files in the base_directory that pass the include/exclude filters" |
|
root = call_git_toplevel() |
|
filenames = call_git_ls(base_directory) |
|
return sorted([os.path.join(root, filename) for filename in filenames if |
|
applies_to_file(filename)]) |
|
|
|
################################################################################ |
|
# define and compile regexes for the patterns we are looking for |
|
################################################################################ |
|
|
|
|
|
COPYRIGHT_WITH_C = r'Copyright \(c\)' |
|
COPYRIGHT_WITHOUT_C = 'Copyright' |
|
ANY_COPYRIGHT_STYLE = '(%s|%s)' % (COPYRIGHT_WITH_C, COPYRIGHT_WITHOUT_C) |
|
|
|
YEAR = "20[0-9][0-9]" |
|
YEAR_RANGE = '(%s)(-%s)?' % (YEAR, YEAR) |
|
YEAR_LIST = '(%s)(, %s)+' % (YEAR, YEAR) |
|
ANY_YEAR_STYLE = '(%s|%s)' % (YEAR_RANGE, YEAR_LIST) |
|
ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE = ("%s %s" % (ANY_COPYRIGHT_STYLE, |
|
ANY_YEAR_STYLE)) |
|
|
|
ANY_COPYRIGHT_COMPILED = re.compile(ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE) |
|
|
|
def compile_copyright_regex(copyright_style, year_style, name): |
|
return re.compile(r'%s %s,? %s( +\*)?\n' % (copyright_style, year_style, name)) |
|
|
|
EXPECTED_HOLDER_NAMES = [ |
|
r"Satoshi Nakamoto", |
|
r"The Bitcoin Core developers", |
|
r"BitPay Inc\.", |
|
r"University of Illinois at Urbana-Champaign\.", |
|
r"Pieter Wuille", |
|
r"Wladimir J\. van der Laan", |
|
r"Jeff Garzik", |
|
r"Jan-Klaas Kollhof", |
|
r"ArtForz -- public domain half-a-node", |
|
r"Intel Corporation ?", |
|
r"The Zcash developers", |
|
r"Jeremy Rubin", |
|
] |
|
|
|
DOMINANT_STYLE_COMPILED = {} |
|
YEAR_LIST_STYLE_COMPILED = {} |
|
WITHOUT_C_STYLE_COMPILED = {} |
|
|
|
for holder_name in EXPECTED_HOLDER_NAMES: |
|
DOMINANT_STYLE_COMPILED[holder_name] = ( |
|
compile_copyright_regex(COPYRIGHT_WITH_C, YEAR_RANGE, holder_name)) |
|
YEAR_LIST_STYLE_COMPILED[holder_name] = ( |
|
compile_copyright_regex(COPYRIGHT_WITH_C, YEAR_LIST, holder_name)) |
|
WITHOUT_C_STYLE_COMPILED[holder_name] = ( |
|
compile_copyright_regex(COPYRIGHT_WITHOUT_C, ANY_YEAR_STYLE, |
|
holder_name)) |
|
|
|
################################################################################ |
|
# search file contents for copyright message of particular category |
|
################################################################################ |
|
|
|
def get_count_of_copyrights_of_any_style_any_holder(contents): |
|
return len(ANY_COPYRIGHT_COMPILED.findall(contents)) |
|
|
|
def file_has_dominant_style_copyright_for_holder(contents, holder_name): |
|
match = DOMINANT_STYLE_COMPILED[holder_name].search(contents) |
|
return match is not None |
|
|
|
def file_has_year_list_style_copyright_for_holder(contents, holder_name): |
|
match = YEAR_LIST_STYLE_COMPILED[holder_name].search(contents) |
|
return match is not None |
|
|
|
def file_has_without_c_style_copyright_for_holder(contents, holder_name): |
|
match = WITHOUT_C_STYLE_COMPILED[holder_name].search(contents) |
|
return match is not None |
|
|
|
################################################################################ |
|
# get file info |
|
################################################################################ |
|
|
|
def read_file(filename): |
|
return open(filename, 'r', encoding="utf8").read() |
|
|
|
def gather_file_info(filename): |
|
info = {} |
|
info['filename'] = filename |
|
c = read_file(filename) |
|
info['contents'] = c |
|
|
|
info['all_copyrights'] = get_count_of_copyrights_of_any_style_any_holder(c) |
|
|
|
info['classified_copyrights'] = 0 |
|
info['dominant_style'] = {} |
|
info['year_list_style'] = {} |
|
info['without_c_style'] = {} |
|
for holder_name in EXPECTED_HOLDER_NAMES: |
|
has_dominant_style = ( |
|
file_has_dominant_style_copyright_for_holder(c, holder_name)) |
|
has_year_list_style = ( |
|
file_has_year_list_style_copyright_for_holder(c, holder_name)) |
|
has_without_c_style = ( |
|
file_has_without_c_style_copyright_for_holder(c, holder_name)) |
|
info['dominant_style'][holder_name] = has_dominant_style |
|
info['year_list_style'][holder_name] = has_year_list_style |
|
info['without_c_style'][holder_name] = has_without_c_style |
|
if has_dominant_style or has_year_list_style or has_without_c_style: |
|
info['classified_copyrights'] = info['classified_copyrights'] + 1 |
|
return info |
|
|
|
################################################################################ |
|
# report execution |
|
################################################################################ |
|
|
|
SEPARATOR = '-'.join(['' for _ in range(80)]) |
|
|
|
def print_filenames(filenames, verbose): |
|
if not verbose: |
|
return |
|
for filename in filenames: |
|
print("\t%s" % filename) |
|
|
|
def print_report(file_infos, verbose): |
|
print(SEPARATOR) |
|
examined = [i['filename'] for i in file_infos] |
|
print("%d files examined according to INCLUDE and EXCLUDE fnmatch rules" % |
|
len(examined)) |
|
print_filenames(examined, verbose) |
|
|
|
print(SEPARATOR) |
|
print('') |
|
zero_copyrights = [i['filename'] for i in file_infos if |
|
i['all_copyrights'] == 0] |
|
print("%4d with zero copyrights" % len(zero_copyrights)) |
|
print_filenames(zero_copyrights, verbose) |
|
one_copyright = [i['filename'] for i in file_infos if |
|
i['all_copyrights'] == 1] |
|
print("%4d with one copyright" % len(one_copyright)) |
|
print_filenames(one_copyright, verbose) |
|
two_copyrights = [i['filename'] for i in file_infos if |
|
i['all_copyrights'] == 2] |
|
print("%4d with two copyrights" % len(two_copyrights)) |
|
print_filenames(two_copyrights, verbose) |
|
three_copyrights = [i['filename'] for i in file_infos if |
|
i['all_copyrights'] == 3] |
|
print("%4d with three copyrights" % len(three_copyrights)) |
|
print_filenames(three_copyrights, verbose) |
|
four_or_more_copyrights = [i['filename'] for i in file_infos if |
|
i['all_copyrights'] >= 4] |
|
print("%4d with four or more copyrights" % len(four_or_more_copyrights)) |
|
print_filenames(four_or_more_copyrights, verbose) |
|
print('') |
|
print(SEPARATOR) |
|
print('Copyrights with dominant style:\ne.g. "Copyright (c)" and ' |
|
'"<year>" or "<startYear>-<endYear>":\n') |
|
for holder_name in EXPECTED_HOLDER_NAMES: |
|
dominant_style = [i['filename'] for i in file_infos if |
|
i['dominant_style'][holder_name]] |
|
if len(dominant_style) > 0: |
|
print("%4d with '%s'" % (len(dominant_style), |
|
holder_name.replace('\n', '\\n'))) |
|
print_filenames(dominant_style, verbose) |
|
print('') |
|
print(SEPARATOR) |
|
print('Copyrights with year list style:\ne.g. "Copyright (c)" and ' |
|
'"<year1>, <year2>, ...":\n') |
|
for holder_name in EXPECTED_HOLDER_NAMES: |
|
year_list_style = [i['filename'] for i in file_infos if |
|
i['year_list_style'][holder_name]] |
|
if len(year_list_style) > 0: |
|
print("%4d with '%s'" % (len(year_list_style), |
|
holder_name.replace('\n', '\\n'))) |
|
print_filenames(year_list_style, verbose) |
|
print('') |
|
print(SEPARATOR) |
|
print('Copyrights with no "(c)" style:\ne.g. "Copyright" and "<year>" or ' |
|
'"<startYear>-<endYear>":\n') |
|
for holder_name in EXPECTED_HOLDER_NAMES: |
|
without_c_style = [i['filename'] for i in file_infos if |
|
i['without_c_style'][holder_name]] |
|
if len(without_c_style) > 0: |
|
print("%4d with '%s'" % (len(without_c_style), |
|
holder_name.replace('\n', '\\n'))) |
|
print_filenames(without_c_style, verbose) |
|
|
|
print('') |
|
print(SEPARATOR) |
|
|
|
unclassified_copyrights = [i['filename'] for i in file_infos if |
|
i['classified_copyrights'] < i['all_copyrights']] |
|
print("%d with unexpected copyright holder names" % |
|
len(unclassified_copyrights)) |
|
print_filenames(unclassified_copyrights, verbose) |
|
print(SEPARATOR) |
|
|
|
def exec_report(base_directory, verbose): |
|
filenames = get_filenames_to_examine(base_directory) |
|
file_infos = [gather_file_info(f) for f in filenames] |
|
print_report(file_infos, verbose) |
|
|
|
################################################################################ |
|
# report cmd |
|
################################################################################ |
|
|
|
REPORT_USAGE = """ |
|
Produces a report of all copyright header notices found inside the source files |
|
of a repository. |
|
|
|
Usage: |
|
$ ./copyright_header.py report <base_directory> [verbose] |
|
|
|
Arguments: |
|
<base_directory> - The base directory of a bitcoin source code repository. |
|
[verbose] - Includes a list of every file of each subcategory in the report. |
|
""" |
|
|
|
def report_cmd(argv): |
|
if len(argv) == 2: |
|
sys.exit(REPORT_USAGE) |
|
|
|
base_directory = argv[2] |
|
if not os.path.exists(base_directory): |
|
sys.exit("*** bad <base_directory>: %s" % base_directory) |
|
|
|
if len(argv) == 3: |
|
verbose = False |
|
elif argv[3] == 'verbose': |
|
verbose = True |
|
else: |
|
sys.exit("*** unknown argument: %s" % argv[2]) |
|
|
|
exec_report(base_directory, verbose) |
|
|
|
################################################################################ |
|
# query git for year of last change |
|
################################################################################ |
|
|
|
GIT_LOG_CMD = "git log --pretty=format:%%ai %s" |
|
|
|
def call_git_log(filename): |
|
out = subprocess.check_output((GIT_LOG_CMD % filename).split(' ')) |
|
return out.decode("utf-8").split('\n') |
|
|
|
def get_git_change_years(filename): |
|
git_log_lines = call_git_log(filename) |
|
if len(git_log_lines) == 0: |
|
return [datetime.date.today().year] |
|
# timestamp is in ISO 8601 format. e.g. "2016-09-05 14:25:32 -0600" |
|
return [line.split(' ')[0].split('-')[0] for line in git_log_lines] |
|
|
|
def get_most_recent_git_change_year(filename): |
|
return max(get_git_change_years(filename)) |
|
|
|
################################################################################ |
|
# read and write to file |
|
################################################################################ |
|
|
|
def read_file_lines(filename): |
|
f = open(filename, 'r', encoding="utf8") |
|
file_lines = f.readlines() |
|
f.close() |
|
return file_lines |
|
|
|
def write_file_lines(filename, file_lines): |
|
f = open(filename, 'w', encoding="utf8") |
|
f.write(''.join(file_lines)) |
|
f.close() |
|
|
|
################################################################################ |
|
# update header years execution |
|
################################################################################ |
|
|
|
COPYRIGHT = r'Copyright \(c\)' |
|
YEAR = "20[0-9][0-9]" |
|
YEAR_RANGE = '(%s)(-%s)?' % (YEAR, YEAR) |
|
HOLDER = 'The Bitcoin Core developers' |
|
UPDATEABLE_LINE_COMPILED = re.compile(' '.join([COPYRIGHT, YEAR_RANGE, HOLDER])) |
|
|
|
def get_updatable_copyright_line(file_lines): |
|
index = 0 |
|
for line in file_lines: |
|
if UPDATEABLE_LINE_COMPILED.search(line) is not None: |
|
return index, line |
|
index = index + 1 |
|
return None, None |
|
|
|
def parse_year_range(year_range): |
|
year_split = year_range.split('-') |
|
start_year = year_split[0] |
|
if len(year_split) == 1: |
|
return start_year, start_year |
|
return start_year, year_split[1] |
|
|
|
def year_range_to_str(start_year, end_year): |
|
if start_year == end_year: |
|
return start_year |
|
return "%s-%s" % (start_year, end_year) |
|
|
|
def create_updated_copyright_line(line, last_git_change_year): |
|
copyright_splitter = 'Copyright (c) ' |
|
copyright_split = line.split(copyright_splitter) |
|
# Preserve characters on line that are ahead of the start of the copyright |
|
# notice - they are part of the comment block and vary from file-to-file. |
|
before_copyright = copyright_split[0] |
|
after_copyright = copyright_split[1] |
|
|
|
space_split = after_copyright.split(' ') |
|
year_range = space_split[0] |
|
start_year, end_year = parse_year_range(year_range) |
|
if end_year == last_git_change_year: |
|
return line |
|
return (before_copyright + copyright_splitter + |
|
year_range_to_str(start_year, last_git_change_year) + ' ' + |
|
' '.join(space_split[1:])) |
|
|
|
def update_updatable_copyright(filename): |
|
file_lines = read_file_lines(filename) |
|
index, line = get_updatable_copyright_line(file_lines) |
|
if not line: |
|
print_file_action_message(filename, "No updatable copyright.") |
|
return |
|
last_git_change_year = get_most_recent_git_change_year(filename) |
|
new_line = create_updated_copyright_line(line, last_git_change_year) |
|
if line == new_line: |
|
print_file_action_message(filename, "Copyright up-to-date.") |
|
return |
|
file_lines[index] = new_line |
|
write_file_lines(filename, file_lines) |
|
print_file_action_message(filename, |
|
"Copyright updated! -> %s" % last_git_change_year) |
|
|
|
def exec_update_header_year(base_directory): |
|
for filename in get_filenames_to_examine(base_directory): |
|
update_updatable_copyright(filename) |
|
|
|
################################################################################ |
|
# update cmd |
|
################################################################################ |
|
|
|
UPDATE_USAGE = """ |
|
Updates all the copyright headers of "The Bitcoin Core developers" which were |
|
changed in a year more recent than is listed. For example: |
|
|
|
// Copyright (c) <firstYear>-<lastYear> The Bitcoin Core developers |
|
|
|
will be updated to: |
|
|
|
// Copyright (c) <firstYear>-<lastModifiedYear> The Bitcoin Core developers |
|
|
|
where <lastModifiedYear> is obtained from the 'git log' history. |
|
|
|
This subcommand also handles copyright headers that have only a single year. In those cases: |
|
|
|
// Copyright (c) <year> The Bitcoin Core developers |
|
|
|
will be updated to: |
|
|
|
// Copyright (c) <year>-<lastModifiedYear> The Bitcoin Core developers |
|
|
|
where the update is appropriate. |
|
|
|
Usage: |
|
$ ./copyright_header.py update <base_directory> |
|
|
|
Arguments: |
|
<base_directory> - The base directory of a bitcoin source code repository. |
|
""" |
|
|
|
def print_file_action_message(filename, action): |
|
print("%-52s %s" % (filename, action)) |
|
|
|
def update_cmd(argv): |
|
if len(argv) != 3: |
|
sys.exit(UPDATE_USAGE) |
|
|
|
base_directory = argv[2] |
|
if not os.path.exists(base_directory): |
|
sys.exit("*** bad base_directory: %s" % base_directory) |
|
exec_update_header_year(base_directory) |
|
|
|
################################################################################ |
|
# inserted copyright header format |
|
################################################################################ |
|
|
|
def get_header_lines(header, start_year, end_year): |
|
lines = header.split('\n')[1:-1] |
|
lines[0] = lines[0] % year_range_to_str(start_year, end_year) |
|
return [line + '\n' for line in lines] |
|
|
|
CPP_HEADER = ''' |
|
// Copyright (c) %s The Bitcoin Core developers |
|
// Distributed under the MIT software license, see the accompanying |
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php. |
|
''' |
|
|
|
def get_cpp_header_lines_to_insert(start_year, end_year): |
|
return reversed(get_header_lines(CPP_HEADER, start_year, end_year)) |
|
|
|
SCRIPT_HEADER = ''' |
|
# Copyright (c) %s The Bitcoin Core developers |
|
# Distributed under the MIT software license, see the accompanying |
|
# file COPYING or http://www.opensource.org/licenses/mit-license.php. |
|
''' |
|
|
|
def get_script_header_lines_to_insert(start_year, end_year): |
|
return reversed(get_header_lines(SCRIPT_HEADER, start_year, end_year)) |
|
|
|
################################################################################ |
|
# query git for year of last change |
|
################################################################################ |
|
|
|
def get_git_change_year_range(filename): |
|
years = get_git_change_years(filename) |
|
return min(years), max(years) |
|
|
|
################################################################################ |
|
# check for existing core copyright |
|
################################################################################ |
|
|
|
def file_already_has_core_copyright(file_lines): |
|
index, _ = get_updatable_copyright_line(file_lines) |
|
return index is not None |
|
|
|
################################################################################ |
|
# insert header execution |
|
################################################################################ |
|
|
|
def file_has_hashbang(file_lines): |
|
if len(file_lines) < 1: |
|
return False |
|
if len(file_lines[0]) <= 2: |
|
return False |
|
return file_lines[0][:2] == '#!' |
|
|
|
def insert_script_header(filename, file_lines, start_year, end_year): |
|
if file_has_hashbang(file_lines): |
|
insert_idx = 1 |
|
else: |
|
insert_idx = 0 |
|
header_lines = get_script_header_lines_to_insert(start_year, end_year) |
|
for line in header_lines: |
|
file_lines.insert(insert_idx, line) |
|
write_file_lines(filename, file_lines) |
|
|
|
def insert_cpp_header(filename, file_lines, start_year, end_year): |
|
file_lines.insert(0, '\n') |
|
header_lines = get_cpp_header_lines_to_insert(start_year, end_year) |
|
for line in header_lines: |
|
file_lines.insert(0, line) |
|
write_file_lines(filename, file_lines) |
|
|
|
def exec_insert_header(filename, style): |
|
file_lines = read_file_lines(filename) |
|
if file_already_has_core_copyright(file_lines): |
|
sys.exit('*** %s already has a copyright by The Bitcoin Core developers' |
|
% (filename)) |
|
start_year, end_year = get_git_change_year_range(filename) |
|
if style in ['python', 'shell']: |
|
insert_script_header(filename, file_lines, start_year, end_year) |
|
else: |
|
insert_cpp_header(filename, file_lines, start_year, end_year) |
|
|
|
################################################################################ |
|
# insert cmd |
|
################################################################################ |
|
|
|
INSERT_USAGE = """ |
|
Inserts a copyright header for "The Bitcoin Core developers" at the top of the |
|
file in either Python or C++ style as determined by the file extension. If the |
|
file is a Python file and it has a '#!' starting the first line, the header is |
|
inserted in the line below it. |
|
|
|
The copyright dates will be set to be: |
|
|
|
"<year_introduced>-<current_year>" |
|
|
|
where <year_introduced> is according to the 'git log' history. If |
|
<year_introduced> is equal to <current_year>, the date will be set to be: |
|
|
|
"<current_year>" |
|
|
|
If the file already has a copyright for "The Bitcoin Core developers", the |
|
script will exit. |
|
|
|
Usage: |
|
$ ./copyright_header.py insert <file> |
|
|
|
Arguments: |
|
<file> - A source file in the bitcoin repository. |
|
""" |
|
|
|
def insert_cmd(argv): |
|
if len(argv) != 3: |
|
sys.exit(INSERT_USAGE) |
|
|
|
filename = argv[2] |
|
if not os.path.isfile(filename): |
|
sys.exit("*** bad filename: %s" % filename) |
|
_, extension = os.path.splitext(filename) |
|
if extension not in ['.h', '.cpp', '.cc', '.c', '.py', '.sh']: |
|
sys.exit("*** cannot insert for file extension %s" % extension) |
|
|
|
if extension == '.py': |
|
style = 'python' |
|
elif extension == '.sh': |
|
style = 'shell' |
|
else: |
|
style = 'cpp' |
|
exec_insert_header(filename, style) |
|
|
|
################################################################################ |
|
# UI |
|
################################################################################ |
|
|
|
USAGE = """ |
|
copyright_header.py - utilities for managing copyright headers of 'The Bitcoin |
|
Core developers' in repository source files. |
|
|
|
Usage: |
|
$ ./copyright_header <subcommand> |
|
|
|
Subcommands: |
|
report |
|
update |
|
insert |
|
|
|
To see subcommand usage, run them without arguments. |
|
""" |
|
|
|
SUBCOMMANDS = ['report', 'update', 'insert'] |
|
|
|
if __name__ == "__main__": |
|
if len(sys.argv) == 1: |
|
sys.exit(USAGE) |
|
subcommand = sys.argv[1] |
|
if subcommand not in SUBCOMMANDS: |
|
sys.exit(USAGE) |
|
if subcommand == 'report': |
|
report_cmd(sys.argv) |
|
elif subcommand == 'update': |
|
update_cmd(sys.argv) |
|
elif subcommand == 'insert': |
|
insert_cmd(sys.argv)
|
|
|