blob: 1ac1e69ce781b799c5a8141304b8cec20ee1937c [file] [log] [blame]
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -07001#!/usr/bin/env python3
2#
3# Copyright (C) 2020 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Add files to a Rust package for third party review."""
17
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010018import collections
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070019import datetime
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010020import enum
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -070021import glob
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070022import json
23import os
24import pathlib
25import re
26
27# patterns to match keys in Cargo.toml
28NAME_PATTERN = r"^name *= *\"(.+)\""
29NAME_MATCHER = re.compile(NAME_PATTERN)
30VERSION_PATTERN = r"^version *= *\"(.+)\""
31VERSION_MATCHER = re.compile(VERSION_PATTERN)
32DESCRIPTION_PATTERN = r"^description *= *(\".+\")"
33DESCRIPTION_MATCHER = re.compile(DESCRIPTION_PATTERN)
34# NOTE: This description one-liner pattern fails to match
35# multi-line descriptions in some Rust crates, e.g. shlex.
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -070036LICENSE_PATTERN = r"^license *= *\"(.+)\""
37LICENSE_MATCHER = re.compile(LICENSE_PATTERN)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070038
39# patterns to match year/month/day in METADATA
40YMD_PATTERN = r"^ +(year|month|day): (.+)$"
41YMD_MATCHER = re.compile(YMD_PATTERN)
42YMD_LINE_PATTERN = r"^.* year: *([^ ]+) +month: *([^ ]+) +day: *([^ ]+).*$"
43YMD_LINE_MATCHER = re.compile(YMD_LINE_PATTERN)
44
45# patterns to match Apache/MIT licence in LICENSE*
46APACHE_PATTERN = r"^.*Apache License.*$"
47APACHE_MATCHER = re.compile(APACHE_PATTERN)
48MIT_PATTERN = r"^.*MIT License.*$"
49MIT_MATCHER = re.compile(MIT_PATTERN)
50BSD_PATTERN = r"^.*BSD .*License.*$"
51BSD_MATCHER = re.compile(BSD_PATTERN)
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010052MULTI_LICENSE_COMMENT = ("# Dual-licensed, using the least restrictive "
53 "per go/thirdpartylicenses#same.\n ")
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070054
55# default owners added to OWNERS
Ivan Lozano70c12c52021-07-30 14:37:09 -040056DEFAULT_OWNERS = "include platform/prebuilts/rust:master:/OWNERS\n"
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070057
58# See b/159487435 Official policy for rust imports METADATA URLs.
59# "license_type: NOTICE" might be optional,
60# but it is already used in most rust crate METADATA.
61# This line format should match the output of external_updater.
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010062METADATA_CONTENT = """name: "{name}"
63description: {description}
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070064third_party {{
65 url {{
66 type: HOMEPAGE
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010067 value: "https://crates.io/crates/{name}"
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070068 }}
69 url {{
70 type: ARCHIVE
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010071 value: "https://static.crates.io/crates/{name}/{name}-{version}.crate"
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070072 }}
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010073 version: "{version}"
74 {license_comment}license_type: NOTICE
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070075 last_upgrade_date {{
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010076 year: {year}
77 month: {month}
78 day: {day}
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070079 }}
80}}
81"""
82
83
84def get_metadata_date():
85 """Return last_upgrade_date in METADATA or today."""
86 # When applied to existing directories to normalize METADATA,
87 # we don't want to change the last_upgrade_date.
88 year, month, day = "", "", ""
89 if os.path.exists("METADATA"):
90 with open("METADATA", "r") as inf:
91 for line in inf:
92 match = YMD_MATCHER.match(line)
93 if match:
94 if match.group(1) == "year":
95 year = match.group(2)
96 elif match.group(1) == "month":
97 month = match.group(2)
98 elif match.group(1) == "day":
99 day = match.group(2)
100 else:
101 match = YMD_LINE_MATCHER.match(line)
102 if match:
103 year, month, day = match.group(1), match.group(2), match.group(3)
104 if year and month and day:
105 print("### Reuse date in METADATA:", year, month, day)
106 return int(year), int(month), int(day)
107 today = datetime.date.today()
108 return today.year, today.month, today.day
109
110
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100111def add_metadata(name, version, description, multi_license):
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700112 """Update or add METADATA file."""
113 if os.path.exists("METADATA"):
114 print("### Updating METADATA")
115 else:
116 print("### Adding METADATA")
117 year, month, day = get_metadata_date()
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100118 license_comment = ""
119 if multi_license:
120 license_comment = MULTI_LICENSE_COMMENT
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700121 with open("METADATA", "w") as outf:
122 outf.write(METADATA_CONTENT.format(
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100123 name=name, description=description, version=version,
124 license_comment=license_comment, year=year, month=month, day=day))
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700125
126
127def grep_license_keyword(license_file):
128 """Find familiar patterns in a file and return the type."""
129 with open(license_file, "r") as input_file:
130 for line in input_file:
131 if APACHE_MATCHER.match(line):
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100132 return License(LicenseType.APACHE2, license_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700133 if MIT_MATCHER.match(line):
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100134 return License(LicenseType.MIT, license_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700135 if BSD_MATCHER.match(line):
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100136 return License(LicenseType.BSD_LIKE, license_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700137 print("ERROR: cannot decide license type in", license_file,
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100138 "assume BSD_LIKE")
139 return License(LicenseType.BSD_LIKE, license_file)
140
141
142class LicenseType(enum.IntEnum):
143 """A type of license.
144
145 An IntEnum is used to be able to sort by preference. This is mainly the case
146 for dual-licensed Apache/MIT code, for which we prefer the Apache license.
147 The enum name is used to generate the corresponding MODULE_LICENSE_* file.
148 """
149 APACHE2 = 1
150 MIT = 2
151 BSD_LIKE = 3
152 ISC = 4
153
154
155License = collections.namedtuple('License', ['type', 'filename'])
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700156
157
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700158def decide_license_type(cargo_license):
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100159 """Check LICENSE* files to determine the license type.
160
161 Returns: A list of Licenses. The first element is the license we prefer.
162 """
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700163 # Most crates.io packages have both APACHE and MIT.
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700164 # Some crate like time-macros-impl uses lower case names like LICENSE-Apache.
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100165 licenses = []
166 license_file = None
167 for license_file in glob.glob("LICENSE*"):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700168 lowered_name = license_file.lower()
169 if lowered_name == "license-apache":
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100170 licenses.append(License(LicenseType.APACHE2, license_file))
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700171 elif lowered_name == "license-mit":
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100172 licenses.append(License(LicenseType.MIT, license_file))
173 if licenses:
174 licenses.sort(key=lambda l: l.type)
175 return licenses
176 if not license_file:
177 raise FileNotFoundError("No license file has been found.")
178 # There is a LICENSE or LICENSE.txt file, use cargo_license found in
179 # Cargo.toml.
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700180 if "Apache" in cargo_license:
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100181 return [License(LicenseType.APACHE2, license_file)]
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700182 if "MIT" in cargo_license:
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100183 return [License(LicenseType.MIT, license_file)]
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700184 if "BSD" in cargo_license:
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100185 return [License(LicenseType.BSD_LIKE, license_file)]
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700186 if "ISC" in cargo_license:
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100187 return [License(LicenseType.ISC, license_file)]
188 return [grep_license_keyword(license_file)]
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700189
190
191def add_notice():
192 if not os.path.exists("NOTICE"):
193 if os.path.exists("LICENSE"):
194 os.symlink("LICENSE", "NOTICE")
195 print("Created link from NOTICE to LICENSE")
196 else:
197 print("ERROR: missing NOTICE and LICENSE")
198
199
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700200def check_license_link(target):
201 """Check the LICENSE link, must bet the given target."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700202 if not os.path.islink("LICENSE"):
203 print("ERROR: LICENSE file is not a link")
204 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700205 found_target = os.readlink("LICENSE")
206 if target != found_target and found_target != "LICENSE.txt":
207 print("ERROR: found LICENSE link to", found_target,
208 "but expected", target)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700209
210
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700211def add_license(target):
212 """Add LICENSE link to give target."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700213 if os.path.exists("LICENSE"):
214 if os.path.islink("LICENSE"):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700215 check_license_link(target)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700216 else:
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100217 print("NOTE: found LICENSE and it is not a link.")
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700218 return
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700219 print("### Creating LICENSE link to", target)
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700220 os.symlink(target, "LICENSE")
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700221
222
223def add_module_license(license_type):
224 """Touch MODULE_LICENSE_type file."""
225 # Do not change existing MODULE_* files.
226 for suffix in ["MIT", "APACHE", "APACHE2", "BSD_LIKE"]:
227 module_file = "MODULE_LICENSE_" + suffix
228 if os.path.exists(module_file):
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100229 if license_type.name != suffix:
230 raise Exception("Found unexpected license " + module_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700231 return
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100232 module_file = "MODULE_LICENSE_" + license_type.name.upper()
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700233 pathlib.Path(module_file).touch()
234 print("### Touched", module_file)
235
236
237def found_line(file_name, line):
238 """Returns true if the given line is found in a file."""
239 with open(file_name, "r") as input_file:
240 return line in input_file
241
242
243def add_owners():
244 """Create or append OWNERS with the default owner line."""
245 # Existing OWNERS file might contain more than the default owners.
246 # Only append missing default owners to existing OWNERS.
247 if os.path.isfile("OWNERS"):
248 if found_line("OWNERS", DEFAULT_OWNERS):
249 print("### No change to OWNERS, which has already default owners.")
250 return
251 else:
252 print("### Append default owners to OWNERS")
253 mode = "a"
254 else:
255 print("### Creating OWNERS with default owners")
256 mode = "w"
257 with open("OWNERS", mode) as outf:
258 outf.write(DEFAULT_OWNERS)
259
260
261def toml2json(line):
262 """Convert a quoted toml string to a json quoted string for METADATA."""
263 if line.startswith("\"\"\""):
264 return "\"()\"" # cannot handle broken multi-line description
265 # TOML string escapes: \b \t \n \f \r \" \\ (no unicode escape)
266 line = line[1:-1].replace("\\\\", "\n").replace("\\b", "")
267 line = line.replace("\\t", " ").replace("\\n", " ").replace("\\f", " ")
268 line = line.replace("\\r", "").replace("\\\"", "\"").replace("\n", "\\")
269 # replace a unicode quotation mark, used in the libloading crate
270 line = line.replace("’", "'")
271 # strip and escape single quotes
272 return json.dumps(line.strip()).replace("'", "\\'")
273
274
275def parse_cargo_toml(cargo):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700276 """get name, version, description, license string from Cargo.toml."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700277 name = ""
278 version = ""
279 description = ""
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700280 cargo_license = ""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700281 with open(cargo, "r") as toml:
282 for line in toml:
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700283 if not name and NAME_MATCHER.match(line):
284 name = NAME_MATCHER.match(line).group(1)
285 elif not version and VERSION_MATCHER.match(line):
286 version = VERSION_MATCHER.match(line).group(1)
287 elif not description and DESCRIPTION_MATCHER.match(line):
288 description = toml2json(DESCRIPTION_MATCHER.match(line).group(1))
289 elif not cargo_license and LICENSE_MATCHER.match(line):
290 cargo_license = LICENSE_MATCHER.match(line).group(1)
291 if name and version and description and cargo_license:
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700292 break
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700293 return name, version, description, cargo_license
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700294
295
296def main():
297 """Add 3rd party review files."""
298 cargo = "Cargo.toml"
299 if not os.path.isfile(cargo):
300 print("ERROR: ", cargo, "is not found")
301 return
302 if not os.access(cargo, os.R_OK):
303 print("ERROR: ", cargo, "is not readable")
304 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700305 name, version, description, cargo_license = parse_cargo_toml(cargo)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700306 if not name or not version or not description:
307 print("ERROR: Cannot find name, version, or description in", cargo)
308 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700309 print("### Cargo.toml license:", cargo_license)
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100310 licenses = decide_license_type(cargo_license)
311 preferred_license = licenses[0]
312 add_metadata(name, version, description, len(licenses) > 1)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700313 add_owners()
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100314 add_license(preferred_license.filename)
315 add_module_license(preferred_license.type)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700316 # It is unclear yet if a NOTICE file is required.
317 # add_notice()
318
319
320if __name__ == "__main__":
321 main()