pw_tokenizer: GN integration for token databases
- Provide the pw_tokenizer_database template that updates a token
database in the source tree from build artifacts or other token
databases.
- Support paths or booleans for pw_python_script's stamp argument.
Change-Id: I11a35bb77d6cfa1f328986915e3f17e9603327a5
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/14586
Commit-Queue: Wyatt Hepler <hepler@google.com>
Reviewed-by: Alexei Frolov <frolv@google.com>
diff --git a/pw_build/docs.rst b/pw_build/docs.rst
index 05b31dc..c16eab1 100644
--- a/pw_build/docs.rst
+++ b/pw_build/docs.rst
@@ -97,9 +97,13 @@
``pw_python_script`` accepts all of the arguments of a regular ``action``
target. Additionally, it has some of its own arguments:
-* ``stamp``: Optional boolean indicating whether to automatically create a dummy
- output file for the script. This allows running scripts without specifying any
- ``outputs``.
+* ``capture_output``: Optional boolean. If true, script output is hidden unless
+ the script fails with an error. Defaults to true.
+* ``stamp``: Optional variable indicating whether to automatically create a
+ dummy output file for the script. This allows running scripts without
+ specifying ``outputs``. If ``stamp`` is true, a generic output file is
+ used. If ``stamp`` is a file path, that file is used as a stamp file. Like any
+ output file, ``stamp`` must be in the build directory. Defaults to false.
**Expressions**
diff --git a/pw_build/py/pw_build/python_runner.py b/pw_build/py/pw_build/python_runner.py
index 164b996..3679041 100755
--- a/pw_build/py/pw_build/python_runner.py
+++ b/pw_build/py/pw_build/python_runner.py
@@ -83,7 +83,7 @@
def resolve(self, gn_path: str) -> Path:
"""Resolves a GN path to a filesystem path."""
if gn_path.startswith('//'):
- return self.root.joinpath(gn_path[2:]).resolve()
+ return self.root.joinpath(gn_path.lstrip('/')).resolve()
return self.cwd.joinpath(gn_path).resolve()
diff --git a/pw_build/python_script.gni b/pw_build/python_script.gni
index ef53d19..d9ec728 100644
--- a/pw_build/python_script.gni
+++ b/pw_build/python_script.gni
@@ -65,8 +65,9 @@
# capture_output (=true) If true, script output is hidden unless the script
# fails with an error. Defaults to true.
#
-# stamp File to touch if the script is successful. If not
-# set, no file is touched.
+# stamp File to touch if the script is successful. If set to
+# true, a generic file is used. If false or not set,
+# no file is touched.
#
template("pw_python_script") {
assert(defined(invoker.script), "pw_python_script requires a script to run")
@@ -102,8 +103,13 @@
}
# If a stamp file is requested, add it as an output of the runner script.
- if (defined(invoker.stamp) && invoker.stamp) {
- _stamp_file = "$target_gen_dir/$target_name.pw_pystamp"
+ if (defined(invoker.stamp) && invoker.stamp != false) {
+ if (invoker.stamp == true) {
+ _stamp_file = "$target_gen_dir/$target_name.pw_pystamp"
+ } else {
+ _stamp_file = invoker.stamp
+ }
+
_outputs += [ _stamp_file ]
_script_args += [
"--touch",
diff --git a/pw_tokenizer/database.gni b/pw_tokenizer/database.gni
new file mode 100644
index 0000000..477a763
--- /dev/null
+++ b/pw_tokenizer/database.gni
@@ -0,0 +1,74 @@
+# Copyright 2020 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+# gn-format disable
+import("//build_overrides/pigweed.gni")
+
+import("$dir_pw_build/python_script.gni")
+
+# Updates a tokenized string database in the source tree with artifacts from one
+# or more targets. Other database files may also be used.
+#
+# The database file must exist. A CSV or binary database can be created with the
+# pw/pw_tokenizer/database.py tool. An empty CSV database file can be also
+# created as a starting point.
+#
+# Args:
+# database: source tree path to database file to update; must exist beforehand
+# targets: GN targets (executables or libraries) from which to add tokens
+# input_databases: paths to other database files from which to add tokens
+#
+template("pw_tokenizer_database") {
+ assert(defined(invoker.database),
+ "pw_tokenizer_database requires a 'database' variable")
+
+ if (defined(invoker.targets)) {
+ _targets = invoker.targets
+ } else {
+ _targets = []
+ }
+
+ if (defined(invoker.input_databases)) {
+ _input_databases = invoker.input_databases
+ } else {
+ _input_databases = []
+ }
+
+ assert(
+ _targets != [] || _input_databases != [],
+ "No 'targets' or 'input_databases' were set for pw_tokenizer_database! " +
+ "At least one target or database must be provided as an input.")
+
+ pw_python_script(target_name) {
+ script = "$dir_pw_tokenizer/py/pw_tokenizer/database.py"
+ args = [
+ "add",
+ "--database",
+ rebase_path(invoker.database),
+ ]
+ args += rebase_path(_input_databases)
+
+ foreach(target, invoker.targets) {
+ args += [ "<TARGET_FILE($target)>" ]
+ }
+
+ deps = _targets
+ inputs = [ invoker.database ] + _input_databases
+
+ # Since the output file is in the source tree, create a corresponding stamp
+ # file in the output directory that is independent of the toolchain. That
+ # way, trying to update the database from multiple toolchains is an error.
+ stamp = "$root_build_dir/" + rebase_path(invoker.database, "//") + ".update"
+ }
+}
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index 2541652..9a5b369 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst
@@ -483,6 +483,33 @@
changes are made. The build system can invoke ``database.py`` to update the
database after each build.
+GN integration
+^^^^^^^^^^^^^^
+Token databases may be updated as part of a GN build. The
+``pw_tokenizer_database`` template provided by ``dir_pw_tokenizer/database.gni``
+automatically updates a tokenized strings database in the source tree with
+artifacts from one or more GN targets or other database files.
+
+Each database in the source tree can only be updated from a single
+``pw_tokenizer_database`` rule. Updating the same database in multiple rules
+results in ``Duplicate output file`` GN errors or ``multiple rules generate
+<file>`` Ninja errors. To avoid these errors, ``pw_tokenizer_database`` rules
+should be defined in the default toolchain, and the input targets should be
+referenced with specific toolchains.
+
+.. code-block::
+
+ # gn-format disable
+ import("//build_overrides/pigweed.gni")
+
+ import("$dir_pw_tokenizer/database.gni")
+
+ pw_tokenizer_database("my_database") {
+ database = "database_in_the_source_tree.csv"
+ targets = [ "//firmware/image:foo(//targets/my_board:some_toolchain)" ]
+ input_databases = [ "other_database.csv" ]
+ }
+
Detokenization
==============
Detokenization is the process of expanding a token to the string it represents
diff --git a/pw_tokenizer/py/pw_tokenizer/database.py b/pw_tokenizer/py/pw_tokenizer/database.py
index 5078baf..2a18d9c 100755
--- a/pw_tokenizer/py/pw_tokenizer/database.py
+++ b/pw_tokenizer/py/pw_tokenizer/database.py
@@ -231,12 +231,16 @@
# This is a valid path; yield it without evaluating it as a glob.
yield Path(path_or_glob)
else:
- paths = glob.glob(path_or_glob)
- if not paths:
+ paths = glob.glob(path_or_glob, recursive=True)
+
+ # If no paths were found and the path is not a glob, raise an Error.
+ if not paths and not any(c in path_or_glob for c in '*?[]!'):
raise FileNotFoundError(f'{path_or_glob} is not a valid path')
for path in paths:
- yield Path(path)
+ # Resolve globs to CSV or compatible binary files.
+ if elf_reader.compatible_file(path) or path.endswith('.csv'):
+ yield Path(path)
class ExpandGlobs(argparse.Action):
@@ -291,7 +295,8 @@
'tokens. For ELF files, the tokenization domain to read from '
'may specified after the path as #domain_name (e.g. '
'foo.elf#TEST_DOMAIN). Unless specified, only the default '
- 'domain is read from ELF files; .* reads all domains.'))
+ 'domain is read from ELF files; .* reads all domains. Globs are '
+ 'expanded to compatible database files.'))
return parser
diff --git a/pw_tokenizer/py/pw_tokenizer/elf_reader.py b/pw_tokenizer/py/pw_tokenizer/elf_reader.py
index 6eec96c..2a3ac3b 100755
--- a/pw_tokenizer/py/pw_tokenizer/elf_reader.py
+++ b/pw_tokenizer/py/pw_tokenizer/elf_reader.py
@@ -24,6 +24,7 @@
"""
import argparse
+from pathlib import Path
import re
import struct
import sys
@@ -152,12 +153,19 @@
return False
-def compatible_file(fd: BinaryIO) -> bool:
+def compatible_file(file: Union[BinaryIO, str, Path]) -> bool:
"""True if the file type is supported (ELF or archive)."""
- offset = fd.tell()
- fd.seek(0)
- result = _bytes_match(fd, ELF_MAGIC) or _bytes_match(fd, ARCHIVE_MAGIC)
- fd.seek(offset)
+ try:
+ fd = open(file, 'rb') if isinstance(file, (str, Path)) else file
+
+ offset = fd.tell()
+ fd.seek(0)
+ result = _bytes_match(fd, ELF_MAGIC) or _bytes_match(fd, ARCHIVE_MAGIC)
+ fd.seek(offset)
+ finally:
+ if isinstance(file, (str, Path)):
+ fd.close()
+
return result