Better heuristic to find license types
* Handle lower case file names like LICENSE-Apache.
* Use "license" key value in Cargo.toml if LICENSE-* files are missing.
* Recognize license type: ISC
* Improve readability of a sequence of if-statements.
Bug: 170254513
Test: get_rust_pkg.py --add3prf time-macros-impl
Test: run add3prf.py in current rust/crates/* projects
Change-Id: I9642f52090b20922302513a94fa97d41277f9762
diff --git a/scripts/add3prf.py b/scripts/add3prf.py
index 6b1b13c..bc23306 100755
--- a/scripts/add3prf.py
+++ b/scripts/add3prf.py
@@ -16,6 +16,7 @@
"""Add files to a Rust package for third party review."""
import datetime
+import glob
import json
import os
import pathlib
@@ -30,6 +31,8 @@
DESCRIPTION_MATCHER = re.compile(DESCRIPTION_PATTERN)
# NOTE: This description one-liner pattern fails to match
# multi-line descriptions in some Rust crates, e.g. shlex.
+LICENSE_PATTERN = r"^license *= *\"(.+)\""
+LICENSE_MATCHER = re.compile(LICENSE_PATTERN)
# patterns to match year/month/day in METADATA
YMD_PATTERN = r"^ +(year|month|day): (.+)$"
@@ -119,28 +122,48 @@
with open(license_file, "r") as input_file:
for line in input_file:
if APACHE_MATCHER.match(line):
- return "APACHE2"
+ return "APACHE2", license_file
if MIT_MATCHER.match(line):
- return "MIT"
+ return "MIT", license_file
if BSD_MATCHER.match(line):
- return "BSD_LIKE"
+ return "BSD_LIKE", license_file
print("ERROR: cannot decide license type in", license_file,
" assume BSD_LIKE")
- return "BSD_LIKE"
+ return "BSD_LIKE", license_file
-def decide_license_type():
+def decide_license_type(cargo_license):
"""Check LICENSE* files to determine the license type."""
# Most crates.io packages have both APACHE and MIT.
- if os.path.exists("LICENSE-APACHE"):
- return "APACHE2"
- if os.path.exists("LICENSE-MIT"):
- return "MIT"
+ # Some crate like time-macros-impl uses lower case names like LICENSE-Apache.
+ targets = {}
+ license_file = "unknown-file"
+ for license_file in glob.glob("./LICENSE*"):
+ license_file = license_file[2:]
+ lowered_name = license_file.lower()
+ if lowered_name == "license-apache":
+ targets["APACHE2"] = license_file
+ elif lowered_name == "license-mit":
+ targets["MIT"] = license_file
+ # Prefer APACHE2 over MIT license type.
+ for license_type in ["APACHE2", "MIT"]:
+ if license_type in targets:
+ return license_type, targets[license_type]
+ # Use cargo_license found in Cargo.toml.
+ if "Apache" in cargo_license:
+ return "APACHE2", license_file
+ if "MIT" in cargo_license:
+ return "MIT", license_file
+ if "BSD" in cargo_license:
+ return "BSD_LIKE", license_file
+ if "ISC" in cargo_license:
+ return "ISC", license_file
+ # Try to find key words in LICENSE* files.
for license_file in ["LICENSE", "LICENSE.txt"]:
if os.path.exists(license_file):
return grep_license_keyword(license_file)
print("ERROR: missing LICENSE-{APACHE,MIT}; assume BSD_LIKE")
- return "BSD_LIKE"
+ return "BSD_LIKE", "unknown-file"
def add_notice():
@@ -152,48 +175,27 @@
print("ERROR: missing NOTICE and LICENSE")
-def license_link_target(license_type):
- """Return the LICENSE-* target file for LICENSE link."""
- if license_type == "APACHE2":
- return "LICENSE-APACHE"
- elif license_type == "MIT":
- return "LICENSE-MIT"
- elif license_type == "BSD_LIKE":
- for name in ["LICENSE.txt"]:
- if os.path.exists(name):
- return name
- print("### ERROR: cannot find LICENSE target")
- return ""
- else:
- print("### ERROR; unknown license type:", license_type)
- return ""
-
-
-def check_license_link(license_type):
- """Check the LICENSE link, must match given type."""
+def check_license_link(target):
+ """Check the LICENSE link, must bet the given target."""
if not os.path.islink("LICENSE"):
print("ERROR: LICENSE file is not a link")
return
- target = os.readlink("LICENSE")
- expected = license_link_target(license_type)
- if target != expected:
- print("ERROR: found LICENSE link to", target,
- "but expected", expected)
+ found_target = os.readlink("LICENSE")
+ if target != found_target and found_target != "LICENSE.txt":
+ print("ERROR: found LICENSE link to", found_target,
+ "but expected", target)
-def add_license(license_type):
- """Add LICENSE related file."""
+def add_license(target):
+ """Add LICENSE link to give target."""
if os.path.exists("LICENSE"):
if os.path.islink("LICENSE"):
- check_license_link(license_type)
+ check_license_link(target)
else:
print("NOTE: found LICENSE and it is not a link!")
return
- target = license_link_target(license_type)
print("### Creating LICENSE link to", target)
- if target:
- os.symlink(target, "LICENSE")
- # error reported in license_link_target
+ os.symlink(target, "LICENSE")
def add_module_license(license_type):
@@ -203,7 +205,7 @@
module_file = "MODULE_LICENSE_" + suffix
if os.path.exists(module_file):
if license_type != suffix:
- print("### ERROR: found unexpected", module_file)
+ print("ERROR: found unexpected", module_file)
return
module_file = "MODULE_LICENSE_" + license_type
pathlib.Path(module_file).touch()
@@ -249,27 +251,24 @@
def parse_cargo_toml(cargo):
- """get description string from Cargo.toml."""
+ """get name, version, description, license string from Cargo.toml."""
name = ""
version = ""
description = ""
+ cargo_license = ""
with open(cargo, "r") as toml:
for line in toml:
- if not name:
- match = NAME_MATCHER.match(line)
- if match:
- name = match.group(1)
- if not version:
- match = VERSION_MATCHER.match(line)
- if match:
- version = match.group(1)
- if not description:
- match = DESCRIPTION_MATCHER.match(line)
- if match:
- description = toml2json(match.group(1))
- if name and version and description:
+ if not name and NAME_MATCHER.match(line):
+ name = NAME_MATCHER.match(line).group(1)
+ elif not version and VERSION_MATCHER.match(line):
+ version = VERSION_MATCHER.match(line).group(1)
+ elif not description and DESCRIPTION_MATCHER.match(line):
+ description = toml2json(DESCRIPTION_MATCHER.match(line).group(1))
+ elif not cargo_license and LICENSE_MATCHER.match(line):
+ cargo_license = LICENSE_MATCHER.match(line).group(1)
+ if name and version and description and cargo_license:
break
- return name, version, description
+ return name, version, description, cargo_license
def main():
@@ -281,14 +280,15 @@
if not os.access(cargo, os.R_OK):
print("ERROR: ", cargo, "is not readable")
return
- name, version, description = parse_cargo_toml(cargo)
+ name, version, description, cargo_license = parse_cargo_toml(cargo)
if not name or not version or not description:
print("ERROR: Cannot find name, version, or description in", cargo)
return
+ print("### Cargo.toml license:", cargo_license)
add_metadata(name, version, description)
add_owners()
- license_type = decide_license_type()
- add_license(license_type)
+ license_type, file_name = decide_license_type(cargo_license)
+ add_license(file_name)
add_module_license(license_type)
# It is unclear yet if a NOTICE file is required.
# add_notice()