blob: bc23306b2559a58577cd1d6d4781bce97b61a66a [file] [log] [blame]
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -07001#!/usr/bin/env python3
2#
3# Copyright (C) 2020 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Add files to a Rust package for third party review."""
17
18import datetime
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -070019import glob
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070020import json
21import os
22import pathlib
23import re
24
25# patterns to match keys in Cargo.toml
26NAME_PATTERN = r"^name *= *\"(.+)\""
27NAME_MATCHER = re.compile(NAME_PATTERN)
28VERSION_PATTERN = r"^version *= *\"(.+)\""
29VERSION_MATCHER = re.compile(VERSION_PATTERN)
30DESCRIPTION_PATTERN = r"^description *= *(\".+\")"
31DESCRIPTION_MATCHER = re.compile(DESCRIPTION_PATTERN)
32# NOTE: This description one-liner pattern fails to match
33# multi-line descriptions in some Rust crates, e.g. shlex.
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -070034LICENSE_PATTERN = r"^license *= *\"(.+)\""
35LICENSE_MATCHER = re.compile(LICENSE_PATTERN)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -070036
37# patterns to match year/month/day in METADATA
38YMD_PATTERN = r"^ +(year|month|day): (.+)$"
39YMD_MATCHER = re.compile(YMD_PATTERN)
40YMD_LINE_PATTERN = r"^.* year: *([^ ]+) +month: *([^ ]+) +day: *([^ ]+).*$"
41YMD_LINE_MATCHER = re.compile(YMD_LINE_PATTERN)
42
43# patterns to match Apache/MIT licence in LICENSE*
44APACHE_PATTERN = r"^.*Apache License.*$"
45APACHE_MATCHER = re.compile(APACHE_PATTERN)
46MIT_PATTERN = r"^.*MIT License.*$"
47MIT_MATCHER = re.compile(MIT_PATTERN)
48BSD_PATTERN = r"^.*BSD .*License.*$"
49BSD_MATCHER = re.compile(BSD_PATTERN)
50
51# default owners added to OWNERS
52DEFAULT_OWNERS = "include platform/prebuilts/rust:/OWNERS\n"
53
54# See b/159487435 Official policy for rust imports METADATA URLs.
55# "license_type: NOTICE" might be optional,
56# but it is already used in most rust crate METADATA.
57# This line format should match the output of external_updater.
58METADATA_CONTENT = """name: "{}"
59description: {}
60third_party {{
61 url {{
62 type: HOMEPAGE
63 value: "https://crates.io/crates/{}"
64 }}
65 url {{
66 type: ARCHIVE
67 value: "https://static.crates.io/crates/{}/{}-{}.crate"
68 }}
69 version: "{}"
70 license_type: NOTICE
71 last_upgrade_date {{
72 year: {}
73 month: {}
74 day: {}
75 }}
76}}
77"""
78
79
80def get_metadata_date():
81 """Return last_upgrade_date in METADATA or today."""
82 # When applied to existing directories to normalize METADATA,
83 # we don't want to change the last_upgrade_date.
84 year, month, day = "", "", ""
85 if os.path.exists("METADATA"):
86 with open("METADATA", "r") as inf:
87 for line in inf:
88 match = YMD_MATCHER.match(line)
89 if match:
90 if match.group(1) == "year":
91 year = match.group(2)
92 elif match.group(1) == "month":
93 month = match.group(2)
94 elif match.group(1) == "day":
95 day = match.group(2)
96 else:
97 match = YMD_LINE_MATCHER.match(line)
98 if match:
99 year, month, day = match.group(1), match.group(2), match.group(3)
100 if year and month and day:
101 print("### Reuse date in METADATA:", year, month, day)
102 return int(year), int(month), int(day)
103 today = datetime.date.today()
104 return today.year, today.month, today.day
105
106
107def add_metadata(name, version, description):
108 """Update or add METADATA file."""
109 if os.path.exists("METADATA"):
110 print("### Updating METADATA")
111 else:
112 print("### Adding METADATA")
113 year, month, day = get_metadata_date()
114 with open("METADATA", "w") as outf:
115 outf.write(METADATA_CONTENT.format(
116 name, description, name, name, name,
117 version, version, year, month, day))
118
119
120def grep_license_keyword(license_file):
121 """Find familiar patterns in a file and return the type."""
122 with open(license_file, "r") as input_file:
123 for line in input_file:
124 if APACHE_MATCHER.match(line):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700125 return "APACHE2", license_file
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700126 if MIT_MATCHER.match(line):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700127 return "MIT", license_file
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700128 if BSD_MATCHER.match(line):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700129 return "BSD_LIKE", license_file
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700130 print("ERROR: cannot decide license type in", license_file,
131 " assume BSD_LIKE")
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700132 return "BSD_LIKE", license_file
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700133
134
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700135def decide_license_type(cargo_license):
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700136 """Check LICENSE* files to determine the license type."""
137 # Most crates.io packages have both APACHE and MIT.
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700138 # Some crate like time-macros-impl uses lower case names like LICENSE-Apache.
139 targets = {}
140 license_file = "unknown-file"
141 for license_file in glob.glob("./LICENSE*"):
142 license_file = license_file[2:]
143 lowered_name = license_file.lower()
144 if lowered_name == "license-apache":
145 targets["APACHE2"] = license_file
146 elif lowered_name == "license-mit":
147 targets["MIT"] = license_file
148 # Prefer APACHE2 over MIT license type.
149 for license_type in ["APACHE2", "MIT"]:
150 if license_type in targets:
151 return license_type, targets[license_type]
152 # Use cargo_license found in Cargo.toml.
153 if "Apache" in cargo_license:
154 return "APACHE2", license_file
155 if "MIT" in cargo_license:
156 return "MIT", license_file
157 if "BSD" in cargo_license:
158 return "BSD_LIKE", license_file
159 if "ISC" in cargo_license:
160 return "ISC", license_file
161 # Try to find key words in LICENSE* files.
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700162 for license_file in ["LICENSE", "LICENSE.txt"]:
163 if os.path.exists(license_file):
164 return grep_license_keyword(license_file)
165 print("ERROR: missing LICENSE-{APACHE,MIT}; assume BSD_LIKE")
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700166 return "BSD_LIKE", "unknown-file"
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700167
168
169def add_notice():
170 if not os.path.exists("NOTICE"):
171 if os.path.exists("LICENSE"):
172 os.symlink("LICENSE", "NOTICE")
173 print("Created link from NOTICE to LICENSE")
174 else:
175 print("ERROR: missing NOTICE and LICENSE")
176
177
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700178def check_license_link(target):
179 """Check the LICENSE link, must bet the given target."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700180 if not os.path.islink("LICENSE"):
181 print("ERROR: LICENSE file is not a link")
182 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700183 found_target = os.readlink("LICENSE")
184 if target != found_target and found_target != "LICENSE.txt":
185 print("ERROR: found LICENSE link to", found_target,
186 "but expected", target)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700187
188
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700189def add_license(target):
190 """Add LICENSE link to give target."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700191 if os.path.exists("LICENSE"):
192 if os.path.islink("LICENSE"):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700193 check_license_link(target)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700194 else:
195 print("NOTE: found LICENSE and it is not a link!")
196 return
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700197 print("### Creating LICENSE link to", target)
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700198 os.symlink(target, "LICENSE")
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700199
200
201def add_module_license(license_type):
202 """Touch MODULE_LICENSE_type file."""
203 # Do not change existing MODULE_* files.
204 for suffix in ["MIT", "APACHE", "APACHE2", "BSD_LIKE"]:
205 module_file = "MODULE_LICENSE_" + suffix
206 if os.path.exists(module_file):
207 if license_type != suffix:
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700208 print("ERROR: found unexpected", module_file)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700209 return
210 module_file = "MODULE_LICENSE_" + license_type
211 pathlib.Path(module_file).touch()
212 print("### Touched", module_file)
213
214
215def found_line(file_name, line):
216 """Returns true if the given line is found in a file."""
217 with open(file_name, "r") as input_file:
218 return line in input_file
219
220
221def add_owners():
222 """Create or append OWNERS with the default owner line."""
223 # Existing OWNERS file might contain more than the default owners.
224 # Only append missing default owners to existing OWNERS.
225 if os.path.isfile("OWNERS"):
226 if found_line("OWNERS", DEFAULT_OWNERS):
227 print("### No change to OWNERS, which has already default owners.")
228 return
229 else:
230 print("### Append default owners to OWNERS")
231 mode = "a"
232 else:
233 print("### Creating OWNERS with default owners")
234 mode = "w"
235 with open("OWNERS", mode) as outf:
236 outf.write(DEFAULT_OWNERS)
237
238
239def toml2json(line):
240 """Convert a quoted toml string to a json quoted string for METADATA."""
241 if line.startswith("\"\"\""):
242 return "\"()\"" # cannot handle broken multi-line description
243 # TOML string escapes: \b \t \n \f \r \" \\ (no unicode escape)
244 line = line[1:-1].replace("\\\\", "\n").replace("\\b", "")
245 line = line.replace("\\t", " ").replace("\\n", " ").replace("\\f", " ")
246 line = line.replace("\\r", "").replace("\\\"", "\"").replace("\n", "\\")
247 # replace a unicode quotation mark, used in the libloading crate
248 line = line.replace("’", "'")
249 # strip and escape single quotes
250 return json.dumps(line.strip()).replace("'", "\\'")
251
252
253def parse_cargo_toml(cargo):
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700254 """get name, version, description, license string from Cargo.toml."""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700255 name = ""
256 version = ""
257 description = ""
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700258 cargo_license = ""
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700259 with open(cargo, "r") as toml:
260 for line in toml:
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700261 if not name and NAME_MATCHER.match(line):
262 name = NAME_MATCHER.match(line).group(1)
263 elif not version and VERSION_MATCHER.match(line):
264 version = VERSION_MATCHER.match(line).group(1)
265 elif not description and DESCRIPTION_MATCHER.match(line):
266 description = toml2json(DESCRIPTION_MATCHER.match(line).group(1))
267 elif not cargo_license and LICENSE_MATCHER.match(line):
268 cargo_license = LICENSE_MATCHER.match(line).group(1)
269 if name and version and description and cargo_license:
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700270 break
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700271 return name, version, description, cargo_license
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700272
273
274def main():
275 """Add 3rd party review files."""
276 cargo = "Cargo.toml"
277 if not os.path.isfile(cargo):
278 print("ERROR: ", cargo, "is not found")
279 return
280 if not os.access(cargo, os.R_OK):
281 print("ERROR: ", cargo, "is not readable")
282 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700283 name, version, description, cargo_license = parse_cargo_toml(cargo)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700284 if not name or not version or not description:
285 print("ERROR: Cannot find name, version, or description in", cargo)
286 return
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700287 print("### Cargo.toml license:", cargo_license)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700288 add_metadata(name, version, description)
289 add_owners()
Chih-Hung Hsieh03f14e42020-10-19 18:38:30 -0700290 license_type, file_name = decide_license_type(cargo_license)
291 add_license(file_name)
Chih-Hung Hsieh3d24aed2020-10-05 15:29:11 -0700292 add_module_license(license_type)
293 # It is unclear yet if a NOTICE file is required.
294 # add_notice()
295
296
297if __name__ == "__main__":
298 main()