pw_tokenizer: Replace string literals with tokens

pw_tokenizer provides macros that replace printf-style string literals
with 32-bit hashes at compile time. The string literals are removed
from the resulting binary, which dramatically reduces the binary size.
Like any printf-style string, binary versions of the strings can be
formatted with arguments and then transmitted or stored.

The pw_tokenizer module is general purpose, but its most common use case
is binary logging. In binary logging, human-readable text logs are
replaced with binary tokens. These are decoded off-device.

This commit includes the C and C++ code for tokenizing strings. It also
includes a C++ library for decoding tokenized strings.

Change-Id: I6d5737ab2d6dfdd76dcf70c852b547fdcd68d683
diff --git a/pw_tokenizer/BUILD b/pw_tokenizer/BUILD
new file mode 100644
index 0000000..59883fc
--- /dev/null
+++ b/pw_tokenizer/BUILD
@@ -0,0 +1,162 @@
+# Copyright 2020 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+load(
+    "//pw_build:pigweed.bzl",
+    "pw_cc_binary",
+    "pw_cc_library",
+    "pw_cc_test",
+)
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache License 2.0
+
+pw_cc_library(
+    name = "pw_tokenizer",
+    srcs = [
+        "public/pw_tokenizer/config.h",
+        "public/pw_tokenizer/internal/argument_types.h",
+        "public/pw_tokenizer/internal/argument_types_macro_4_byte.h",
+        "public/pw_tokenizer/internal/argument_types_macro_8_byte.h",
+        "public/pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_128_hash_macro.h",
+        "public/pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_80_hash_macro.h",
+        "public/pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_96_hash_macro.h",
+        "public/pw_tokenizer/internal/tokenize_string.h",
+        "tokenize.cc",
+    ],
+    hdrs = [
+        "public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h",
+        "public/pw_tokenizer/tokenize.h",
+    ],
+    includes = ["public"],
+    deps = [
+        "//pw_preprocessor",
+        "//pw_span",
+        "//pw_varint",
+    ],
+)
+
+pw_cc_library(
+    name = "decoder",
+    srcs = [
+        "decode.cc",
+        "detokenize.cc",
+        "token_database.cc",
+    ],
+    hdrs = [
+        "public/pw_tokenizer/detokenize.h",
+        "public/pw_tokenizer/internal/decode.h",
+        "public/pw_tokenizer/token_database.h",
+    ],
+    includes = ["public"],
+    deps = [
+        "//pw_span",
+        "//pw_varint",
+    ],
+)
+
+# Executable for generating test data for the C++ and Python detokenizers. This
+# target should only be built for the host.
+pw_cc_binary(
+    name = "generate_decoding_test_data",
+    srcs = [
+        "generate_decoding_test_data.cc",
+    ],
+    deps = [
+        ":decoder",
+        ":pw_tokenizer",
+        "//pw_preprocessor",
+        "//pw_varint",
+    ],
+)
+
+pw_cc_test(
+    name = "argument_types_test",
+    srcs = [
+        "argument_types_test.c",
+        "argument_types_test.cc",
+        "pw_tokenizer_private/argument_types_test.h",
+    ],
+    deps = [
+        ":pw_tokenizer",
+    ],
+)
+
+pw_cc_test(
+    name = "decode_test",
+    srcs = [
+        "decode_test.cc",
+        "pw_tokenizer_private/tokenized_string_decoding_test_data.h",
+        "pw_tokenizer_private/varint_decoding_test_data.h",
+    ],
+    deps = [
+        ":decoder",
+        "//pw_varint",
+    ],
+)
+
+pw_cc_test(
+    name = "detokenize_test",
+    srcs = [
+        "detokenize_test.cc",
+    ],
+    deps = [
+        ":decoder",
+    ],
+)
+
+pw_cc_test(
+    name = "hash_test",
+    srcs = [
+        "hash_test.cc",
+        "pw_tokenizer_private/generated_hash_test_cases.h",
+    ],
+    deps = [
+        ":pw_tokenizer",
+    ],
+)
+
+pw_cc_test(
+    name = "token_database_test",
+    srcs = [
+        "token_database_test.cc",
+    ],
+    deps = [
+        ":decoder",
+    ],
+)
+
+pw_cc_test(
+    name = "tokenize_test",
+    srcs = [
+        "pw_tokenizer_private/tokenize_test.h",
+        "tokenize_test.c",
+        "tokenize_test.cc",
+    ],
+    deps = [
+        ":pw_tokenizer",
+        "//pw_varint",
+    ],
+)
+
+# Create a shared library for the tokenizer JNI wrapper. The include paths for
+# the JNI headers must be available in the system or provided with the
+# pw_java_native_interface_include_dirs variable.
+filegroup(
+    name = "detokenizer_jni",
+    srcs = [
+        "java/dev/pigweed/tokenizer/detokenizer.cc",
+    ],
+)