blob: 0d71f3eeca63229dd301e783a8c9849477a632a1 [file] [log] [blame]
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -07001#!/usr/bin/env python3
2#
3# Copyright (C) 2020 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Add files to a Rust package for third party review."""
17
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010018import collections
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070019import datetime
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010020import enum
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -070021import glob
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070022import json
23import os
24import pathlib
25import re
26
27# patterns to match keys in Cargo.toml
28NAME_PATTERN = r"^name *= *\"(.+)\""
29NAME_MATCHER = re.compile(NAME_PATTERN)
30VERSION_PATTERN = r"^version *= *\"(.+)\""
31VERSION_MATCHER = re.compile(VERSION_PATTERN)
32DESCRIPTION_PATTERN = r"^description *= *(\".+\")"
33DESCRIPTION_MATCHER = re.compile(DESCRIPTION_PATTERN)
34# NOTE: This description one-liner pattern fails to match
35# multi-line descriptions in some Rust crates, e.g. shlex.
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -070036LICENSE_PATTERN = r"^license *= *\"(.+)\""
37LICENSE_MATCHER = re.compile(LICENSE_PATTERN)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070038
39# patterns to match year/month/day in METADATA
40YMD_PATTERN = r"^ +(year|month|day): (.+)$"
41YMD_MATCHER = re.compile(YMD_PATTERN)
42YMD_LINE_PATTERN = r"^.* year: *([^ ]+) +month: *([^ ]+) +day: *([^ ]+).*$"
43YMD_LINE_MATCHER = re.compile(YMD_LINE_PATTERN)
44
Matt Schulte38d199e2023-12-20 10:05:57 -080045# patterns to match different licence types in LICENSE*
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070046APACHE_PATTERN = r"^.*Apache License.*$"
47APACHE_MATCHER = re.compile(APACHE_PATTERN)
48MIT_PATTERN = r"^.*MIT License.*$"
49MIT_MATCHER = re.compile(MIT_PATTERN)
50BSD_PATTERN = r"^.*BSD .*License.*$"
51BSD_MATCHER = re.compile(BSD_PATTERN)
Matt Schulte055ccb32023-10-30 14:07:27 -070052MPL_PATTERN = r"^.Mozilla Public License.*$"
53MPL_MATCHER = re.compile(MPL_PATTERN)
Matt Schulteb4fa3db2023-12-21 08:06:49 -080054UNLICENSE_PATTERN = r"^.*unlicense\.org.*$"
55UNLICENSE_MATCHER = re.compile(UNLICENSE_PATTERN)
Matt Schulte38d199e2023-12-20 10:05:57 -080056ZERO_BSD_PATTERN = r"^.*Zero-Clause BSD.*$"
57ZERO_BSD_MATCHER = re.compile(ZERO_BSD_PATTERN)
Matt Schulte61852052024-01-18 15:33:30 -080058ZLIB_PATTERN = r"^.*zlib License.$"
59ZLIB_MATCHER = re.compile(ZLIB_PATTERN)
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010060MULTI_LICENSE_COMMENT = ("# Dual-licensed, using the least restrictive "
61 "per go/thirdpartylicenses#same.\n ")
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070062
63# default owners added to OWNERS
Stephen Hinesce488a72023-10-19 00:34:53 -070064DEFAULT_OWNERS = "include platform/prebuilts/rust:main:/OWNERS\n"
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070065
66# See b/159487435 Official policy for rust imports METADATA URLs.
67# "license_type: NOTICE" might be optional,
68# but it is already used in most rust crate METADATA.
69# This line format should match the output of external_updater.
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010070METADATA_CONTENT = """name: "{name}"
71description: {description}
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070072third_party {{
Jeongik Cha4e8edb42023-08-29 11:26:17 +090073 identifier {{
74 type: "crates.io"
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010075 value: "https://crates.io/crates/{name}"
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070076 }}
Jeongik Cha4e8edb42023-08-29 11:26:17 +090077 identifier {{
78 type: "Archive"
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010079 value: "https://static.crates.io/crates/{name}/{name}-{version}.crate"
Matt Schulte46ab4f82024-01-18 15:17:26 -080080 primary_source: true
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070081 }}
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010082 version: "{version}"
Matt Schulte055ccb32023-10-30 14:07:27 -070083 {license_comment}license_type: {license_type}
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070084 last_upgrade_date {{
Thiébaud Weksteen8da49112021-02-19 11:59:49 +010085 year: {year}
86 month: {month}
87 day: {day}
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070088 }}
89}}
90"""
91
92
93def get_metadata_date():
94 """Return last_upgrade_date in METADATA or today."""
95 # When applied to existing directories to normalize METADATA,
96 # we don't want to change the last_upgrade_date.
97 year, month, day = "", "", ""
98 if os.path.exists("METADATA"):
99 with open("METADATA", "r") as inf:
100 for line in inf:
101 match = YMD_MATCHER.match(line)
102 if match:
103 if match.group(1) == "year":
104 year = match.group(2)
105 elif match.group(1) == "month":
106 month = match.group(2)
107 elif match.group(1) == "day":
108 day = match.group(2)
109 else:
110 match = YMD_LINE_MATCHER.match(line)
111 if match:
112 year, month, day = match.group(1), match.group(2), match.group(3)
113 if year and month and day:
114 print("### Reuse date in METADATA:", year, month, day)
115 return int(year), int(month), int(day)
116 today = datetime.date.today()
117 return today.year, today.month, today.day
118
119
Matt Schulte055ccb32023-10-30 14:07:27 -0700120def add_metadata(name, version, description, license_group, multi_license):
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700121 """Update or add METADATA file."""
122 if os.path.exists("METADATA"):
123 print("### Updating METADATA")
124 else:
125 print("### Adding METADATA")
126 year, month, day = get_metadata_date()
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100127 license_comment = ""
128 if multi_license:
129 license_comment = MULTI_LICENSE_COMMENT
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700130 with open("METADATA", "w") as outf:
131 outf.write(METADATA_CONTENT.format(
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100132 name=name, description=description, version=version,
Matt Schulte055ccb32023-10-30 14:07:27 -0700133 license_comment=license_comment, license_type=license_group, year=year, month=month, day=day))
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700134
135
136def grep_license_keyword(license_file):
137 """Find familiar patterns in a file and return the type."""
138 with open(license_file, "r") as input_file:
139 for line in input_file:
140 if APACHE_MATCHER.match(line):
Matt Schulte055ccb32023-10-30 14:07:27 -0700141 return License(LicenseType.APACHE2, LicenseGroup.NOTICE, license_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700142 if MIT_MATCHER.match(line):
Matt Schulte055ccb32023-10-30 14:07:27 -0700143 return License(LicenseType.MIT, LicenseGroup.NOTICE, license_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700144 if BSD_MATCHER.match(line):
Matt Schulte055ccb32023-10-30 14:07:27 -0700145 return License(LicenseType.BSD_LIKE, LicenseGroup.NOTICE, license_file)
Matt Schulte362d7f42023-12-20 07:54:03 -0800146 if MPL_MATCHER.match(line):
Matt Schulte055ccb32023-10-30 14:07:27 -0700147 return License(LicenseType.MPL, LicenseGroup.RECIPROCAL, license_file)
Matt Schulteb4fa3db2023-12-21 08:06:49 -0800148 if UNLICENSE_MATCHER.match(line):
149 return License(LicenseType.UNLICENSE, LicenseGroup.PERMISSIVE, license_file)
Matt Schulte38d199e2023-12-20 10:05:57 -0800150 if ZERO_BSD_MATCHER.match(line):
151 return License(LicenseType.ZERO_BSD, LicenseGroup.PERMISSIVE, license_file)
Matt Schulte61852052024-01-18 15:33:30 -0800152 if ZLIB_MATCHER.match(line):
153 return License(LicenseType.ZLIB, LicenseGroup.NOTICE, license_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700154 print("ERROR: cannot decide license type in", license_file,
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100155 "assume BSD_LIKE")
Matt Schulte055ccb32023-10-30 14:07:27 -0700156 return License(LicenseType.BSD_LIKE, LicenseGroup.NOTICE, license_file)
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100157
158
159class LicenseType(enum.IntEnum):
160 """A type of license.
161
162 An IntEnum is used to be able to sort by preference. This is mainly the case
163 for dual-licensed Apache/MIT code, for which we prefer the Apache license.
164 The enum name is used to generate the corresponding MODULE_LICENSE_* file.
165 """
166 APACHE2 = 1
167 MIT = 2
168 BSD_LIKE = 3
169 ISC = 4
Matt Schulte055ccb32023-10-30 14:07:27 -0700170 MPL = 5
Matt Schulte38d199e2023-12-20 10:05:57 -0800171 ZERO_BSD = 6
Matt Schulteb4fa3db2023-12-21 08:06:49 -0800172 UNLICENSE = 7
Matt Schulte61852052024-01-18 15:33:30 -0800173 ZLIB = 8
Matt Schulte055ccb32023-10-30 14:07:27 -0700174
175class LicenseGroup(enum.Enum):
176 """A group of license as defined by go/thirdpartylicenses#types
177
178 Note, go/thirdpartylicenses#types calls them "types". But LicenseType was
179 already taken so this script calls them groups.
180 """
181 RESTRICTED = 1
182 RESTRICTED_IF_STATICALLY_LINKED = 2
183 RECIPROCAL = 3
184 NOTICE = 4
185 PERMISSIVE = 5
186 BY_EXCEPTION_ONLY = 6
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100187
188
Matt Schulte055ccb32023-10-30 14:07:27 -0700189License = collections.namedtuple('License', ['type', 'group', 'filename'])
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700190
191
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700192def decide_license_type(cargo_license):
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100193 """Check LICENSE* files to determine the license type.
194
195 Returns: A list of Licenses. The first element is the license we prefer.
196 """
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700197 # Most crates.io packages have both APACHE and MIT.
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700198 # Some crate like time-macros-impl uses lower case names like LICENSE-Apache.
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100199 licenses = []
200 license_file = None
Matt Schulte52e1d5a2024-01-18 15:30:30 -0800201 for license_file in glob.glob("LICENSE*") + glob.glob("COPYING*") + glob.glob("UNLICENSE*"):
202 lowered_name = os.path.splitext(license_file.lower())[0]
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700203 if lowered_name == "license-apache":
Matt Schulte055ccb32023-10-30 14:07:27 -0700204 licenses.append(License(LicenseType.APACHE2, LicenseGroup.NOTICE, license_file))
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700205 elif lowered_name == "license-mit":
Matt Schulte055ccb32023-10-30 14:07:27 -0700206 licenses.append(License(LicenseType.MIT, LicenseGroup.NOTICE, license_file))
Matt Schulte38d199e2023-12-20 10:05:57 -0800207 elif lowered_name == "license-0bsd":
208 licenses.append(License(LicenseType.ZERO_BSD, LicenseGroup.PERMISSIVE, license_file))
Matt Schulte61852052024-01-18 15:33:30 -0800209 elif lowered_name == "license-zlib":
210 licenses.append(License(LicenseType.ZLIB, LicenseGroup.NOTICE, license_file))
Matt Schulteb4fa3db2023-12-21 08:06:49 -0800211 elif lowered_name == "unlicense":
212 licenses.append(License(LicenseType.UNLICENSE, LicenseGroup.PERMISSIVE, license_file))
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100213 if licenses:
214 licenses.sort(key=lambda l: l.type)
215 return licenses
216 if not license_file:
217 raise FileNotFoundError("No license file has been found.")
Matthew Maurer51ec0162022-08-10 15:29:24 -0700218 # There is a LICENSE* or COPYING* file, use cargo_license found in
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100219 # Cargo.toml.
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700220 if "Apache" in cargo_license:
Matt Schulte055ccb32023-10-30 14:07:27 -0700221 return [License(LicenseType.APACHE2, LicenseGroup.NOTICE, license_file)]
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700222 if "MIT" in cargo_license:
Matt Schulte055ccb32023-10-30 14:07:27 -0700223 return [License(LicenseType.MIT, LicenseGroup.NOTICE, license_file)]
Matt Schulte38d199e2023-12-20 10:05:57 -0800224 if "0BSD" in cargo_license:
225 return [License(LicenseType.ZERO_BSD, LicenseGroup.PERMISSIVE, license_file)]
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700226 if "BSD" in cargo_license:
Matt Schulte055ccb32023-10-30 14:07:27 -0700227 return [License(LicenseType.BSD_LIKE, LicenseGroup.NOTICE, license_file)]
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700228 if "ISC" in cargo_license:
Matt Schulte055ccb32023-10-30 14:07:27 -0700229 return [License(LicenseType.ISC, LicenseGroup.NOTICE, license_file)]
230 if "MPL" in cargo_license:
231 return [License(LicenseType.MPL, LicenseGroup.RECIPROCAL, license_file)]
Matt Schulteb4fa3db2023-12-21 08:06:49 -0800232 if "Unlicense" in cargo_license:
233 return [License(LicenseType.UNLICENSE, LicenseGroup.PERMISSIVE, license_file)]
Matt Schulte61852052024-01-18 15:33:30 -0800234 if "Zlib" in cargo_license:
235 return [License(LicenseType.ZLIB, LicenseGroup.NOTICE, license_file)]
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100236 return [grep_license_keyword(license_file)]
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700237
238
239def add_notice():
240 if not os.path.exists("NOTICE"):
241 if os.path.exists("LICENSE"):
242 os.symlink("LICENSE", "NOTICE")
243 print("Created link from NOTICE to LICENSE")
244 else:
245 print("ERROR: missing NOTICE and LICENSE")
246
247
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700248def check_license_link(target):
249 """Check the LICENSE link, must bet the given target."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700250 if not os.path.islink("LICENSE"):
251 print("ERROR: LICENSE file is not a link")
252 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700253 found_target = os.readlink("LICENSE")
254 if target != found_target and found_target != "LICENSE.txt":
255 print("ERROR: found LICENSE link to", found_target,
256 "but expected", target)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700257
258
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700259def add_license(target):
260 """Add LICENSE link to give target."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700261 if os.path.exists("LICENSE"):
262 if os.path.islink("LICENSE"):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700263 check_license_link(target)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700264 else:
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100265 print("NOTE: found LICENSE and it is not a link.")
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700266 return
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700267 print("### Creating LICENSE link to", target)
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700268 os.symlink(target, "LICENSE")
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700269
270
271def add_module_license(license_type):
272 """Touch MODULE_LICENSE_type file."""
273 # Do not change existing MODULE_* files.
Matt Schulte61852052024-01-18 15:33:30 -0800274 for suffix in ["MIT", "APACHE", "APACHE2", "BSD_LIKE", "MPL", "0BSD", "UNLICENSE", "ZLIB"]:
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700275 module_file = "MODULE_LICENSE_" + suffix
276 if os.path.exists(module_file):
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100277 if license_type.name != suffix:
278 raise Exception("Found unexpected license " + module_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700279 return
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100280 module_file = "MODULE_LICENSE_" + license_type.name.upper()
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700281 pathlib.Path(module_file).touch()
282 print("### Touched", module_file)
283
284
285def found_line(file_name, line):
286 """Returns true if the given line is found in a file."""
287 with open(file_name, "r") as input_file:
288 return line in input_file
289
290
291def add_owners():
292 """Create or append OWNERS with the default owner line."""
293 # Existing OWNERS file might contain more than the default owners.
294 # Only append missing default owners to existing OWNERS.
295 if os.path.isfile("OWNERS"):
296 if found_line("OWNERS", DEFAULT_OWNERS):
297 print("### No change to OWNERS, which has already default owners.")
298 return
299 else:
300 print("### Append default owners to OWNERS")
301 mode = "a"
302 else:
303 print("### Creating OWNERS with default owners")
304 mode = "w"
305 with open("OWNERS", mode) as outf:
306 outf.write(DEFAULT_OWNERS)
307
308
309def toml2json(line):
310 """Convert a quoted toml string to a json quoted string for METADATA."""
311 if line.startswith("\"\"\""):
312 return "\"()\"" # cannot handle broken multi-line description
313 # TOML string escapes: \b \t \n \f \r \" \\ (no unicode escape)
314 line = line[1:-1].replace("\\\\", "\n").replace("\\b", "")
315 line = line.replace("\\t", " ").replace("\\n", " ").replace("\\f", " ")
316 line = line.replace("\\r", "").replace("\\\"", "\"").replace("\n", "\\")
317 # replace a unicode quotation mark, used in the libloading crate
318 line = line.replace("’", "'")
319 # strip and escape single quotes
320 return json.dumps(line.strip()).replace("'", "\\'")
321
322
323def parse_cargo_toml(cargo):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700324 """get name, version, description, license string from Cargo.toml."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700325 name = ""
326 version = ""
327 description = ""
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700328 cargo_license = ""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700329 with open(cargo, "r") as toml:
330 for line in toml:
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700331 if not name and NAME_MATCHER.match(line):
332 name = NAME_MATCHER.match(line).group(1)
333 elif not version and VERSION_MATCHER.match(line):
334 version = VERSION_MATCHER.match(line).group(1)
335 elif not description and DESCRIPTION_MATCHER.match(line):
336 description = toml2json(DESCRIPTION_MATCHER.match(line).group(1))
337 elif not cargo_license and LICENSE_MATCHER.match(line):
338 cargo_license = LICENSE_MATCHER.match(line).group(1)
339 if name and version and description and cargo_license:
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700340 break
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700341 return name, version, description, cargo_license
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700342
343
344def main():
345 """Add 3rd party review files."""
346 cargo = "Cargo.toml"
347 if not os.path.isfile(cargo):
348 print("ERROR: ", cargo, "is not found")
349 return
350 if not os.access(cargo, os.R_OK):
351 print("ERROR: ", cargo, "is not readable")
352 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700353 name, version, description, cargo_license = parse_cargo_toml(cargo)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700354 if not name or not version or not description:
355 print("ERROR: Cannot find name, version, or description in", cargo)
356 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700357 print("### Cargo.toml license:", cargo_license)
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100358 licenses = decide_license_type(cargo_license)
359 preferred_license = licenses[0]
Matt Schulte055ccb32023-10-30 14:07:27 -0700360 add_metadata(name, version, description, preferred_license.group.name, len(licenses) > 1)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700361 add_owners()
Thiébaud Weksteen8da49112021-02-19 11:59:49 +0100362 add_license(preferred_license.filename)
363 add_module_license(preferred_license.type)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700364 # It is unclear yet if a NOTICE file is required.
365 # add_notice()
366
367
368if __name__ == "__main__":
369 main()