Allow-list unsigned integer overflows.

This CL adds the 'no_sanitize("unsigned-integer-overflow")' Clang/LLVM
attribute to several locations that intentionally rely on that behavior.

From https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html: "this
is not undefined behavior, but it is often unintentional." By using an
explicit allow-list, we should still be able to detect the latter case.

Change-Id: I7d7da40c73fd22b6b29196dc4788eb00fca61439
Bug: 190
diff --git a/pw_preprocessor/public/pw_preprocessor/compiler.h b/pw_preprocessor/public/pw_preprocessor/compiler.h
index 7fa48fc..21b1eb6 100644
--- a/pw_preprocessor/public/pw_preprocessor/compiler.h
+++ b/pw_preprocessor/public/pw_preprocessor/compiler.h
@@ -75,3 +75,22 @@
 //   }
 //
 #define PW_UNREACHABLE __builtin_unreachable()
+
+// Indicate to a sanitizer compiler runtime to skip the named check in the
+// associated function.
+// Example:
+//
+//   uint32_t djb2(const void* buf, size_t len)
+//       PW_NO_SANITIZE("unsigned-integer-overflow"){
+//     uint32_t hash = 5381;
+//     const uint8_t* u8 = static_cast<const uint8_t*>(buf);
+//     for (size_t i = 0; i < len; ++i) {
+//       hash = (hash * 33) + u8[i]; /* hash * 33 + c */
+//     }
+//     return hash;
+//   }
+#if __clang__
+#define PW_NO_SANITIZE(check) __attribute__((no_sanitize(check)))
+#else
+#define PW_NO_SANITIZE(check)
+#endif  // __clang__
diff --git a/pw_string/public/pw_string/string_builder.h b/pw_string/public/pw_string/string_builder.h
index d2397e4..1204e71 100644
--- a/pw_string/public/pw_string/string_builder.h
+++ b/pw_string/public/pw_string/string_builder.h
@@ -158,8 +158,10 @@
   void push_back(char ch) { append(1, ch); }
 
   // Removes the last character. Sets the status to OUT_OF_RANGE if the buffer
-  // is empty.
-  void pop_back() { resize(size() - 1); }
+  // is empty (in which case the unsigned overflow is intentional).
+  void pop_back() PW_NO_SANITIZE("unsigned-integer-overflow") {
+    resize(size() - 1);
+  }
 
   // Appends the provided character count times.
   StringBuilder& append(size_t count, char ch);
diff --git a/pw_tokenizer/hash_test.cc b/pw_tokenizer/hash_test.cc
index 8392437..014be0d 100644
--- a/pw_tokenizer/hash_test.cc
+++ b/pw_tokenizer/hash_test.cc
@@ -62,7 +62,7 @@
   return kSizeIncludingNull - 1;  // subtract the null terminator
 }
 
-TEST(Hashing, Runtime) {
+TEST(Hashing, Runtime) PW_NO_SANITIZE("unsigned-integer-overflow") {
   // Coefficients for the hash terms; k1 is 1.
   static constexpr uint32_t k2 = k65599HashConstant;
   static constexpr uint32_t k3 = k65599HashConstant * k2;
@@ -70,6 +70,8 @@
   static constexpr uint32_t k5 = k65599HashConstant * k4;
 
   // Hash a few things at hash length 4
+  // The coefficient calculation is done modulo 0x100000000, so the unsigned
+  // integer overflows of the hash terms are intentional.
   EXPECT_EQ(PwTokenizer65599FixedLengthHash("", 4), StringLength(""));
   EXPECT_EQ(PwTokenizer65599FixedLengthHash("1", 4),
             StringLength("1") + k2 * '1');
diff --git a/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h b/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
index bd4a140..609bee6 100644
--- a/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
+++ b/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
@@ -42,12 +42,15 @@
 //   - Characters are hashed in reverse order.
 //   - The string length is hashed as the first character in the string.
 constexpr uint32_t PwTokenizer65599FixedLengthHash(std::string_view string,
-                                                   size_t hash_length) {
+                                                   size_t hash_length)
+    PW_NO_SANITIZE("unsigned-integer-overflow") {
   // The length is hashed as if it were the first character.
   uint32_t hash = string.size();
   uint32_t coefficient = k65599HashConstant;
 
   // Hash all of the characters in the string as unsigned ints.
+  // The coefficient calculation is done modulo 0x100000000, so the unsigned
+  // integer overflows are intentional.
   for (uint8_t ch : string.substr(0, hash_length)) {
     hash += coefficient * ch;
     coefficient *= k65599HashConstant;
diff --git a/pw_tokenizer/simple_tokenize_test.cc b/pw_tokenizer/simple_tokenize_test.cc
index ce84db8..7af8702 100644
--- a/pw_tokenizer/simple_tokenize_test.cc
+++ b/pw_tokenizer/simple_tokenize_test.cc
@@ -23,7 +23,8 @@
 namespace {
 
 template <size_t kSize>
-uint32_t TestHash(const char (&str)[kSize]) {
+uint32_t TestHash(const char (&str)[kSize])
+    PW_NO_SANITIZE("unsigned-integer-overflow") {
   static_assert(kSize > 0u, "Must have at least a null terminator");
 
   static constexpr uint32_t k65599HashConstant = 65599u;
@@ -36,6 +37,8 @@
       std::min(static_cast<size_t>(PW_TOKENIZER_CFG_HASH_LENGTH), kSize - 1);
 
   // Hash all of the characters in the string as unsigned ints.
+  // The coefficient calculation is done modulo 0x100000000, so the unsigned
+  // integer overflows are intentional.
   for (size_t i = 0; i < length; ++i) {
     hash += coefficient * str[i];
     coefficient *= k65599HashConstant;
diff --git a/pw_varint/BUILD b/pw_varint/BUILD
index 6efb177..e37d4cf 100644
--- a/pw_varint/BUILD
+++ b/pw_varint/BUILD
@@ -33,6 +33,7 @@
     includes = ["public"],
     deps = [
         "//pw_polyfill",
+        "//pw_preprocessor",
         "//pw_span",
     ],
 )
diff --git a/pw_varint/BUILD.gn b/pw_varint/BUILD.gn
index 9b79a5c..ffd6b90 100644
--- a/pw_varint/BUILD.gn
+++ b/pw_varint/BUILD.gn
@@ -21,7 +21,10 @@
 
 source_set("pw_varint") {
   public_configs = [ ":default_config" ]
-  public_deps = [ "$dir_pw_span" ]
+  public_deps = [
+    "$dir_pw_preprocessor",
+    "$dir_pw_span",
+  ]
   sources = [
     "public/pw_varint/varint.h",
     "varint.cc",
diff --git a/pw_varint/CMakeLists.txt b/pw_varint/CMakeLists.txt
index 2768af9..6e66eb1 100644
--- a/pw_varint/CMakeLists.txt
+++ b/pw_varint/CMakeLists.txt
@@ -12,4 +12,8 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-pw_auto_add_simple_module(pw_varint PUBLIC_DEPS pw_span)
+pw_auto_add_simple_module(pw_varint
+  PUBLIC_DEPS
+    pw_preprocessor
+    pw_span
+)
diff --git a/pw_varint/public/pw_varint/varint.h b/pw_varint/public/pw_varint/varint.h
index dbfcedd..3bef687 100644
--- a/pw_varint/public/pw_varint/varint.h
+++ b/pw_varint/public/pw_varint/varint.h
@@ -16,6 +16,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "pw_preprocessor/compiler.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -64,8 +66,11 @@
 }
 
 // ZigZag decodes a signed integer.
+// The calculation is done modulo std::numeric_limits<T>::max()+1, so the
+// unsigned integer overflows are intentional.
 template <typename T>
-constexpr std::make_signed_t<T> ZigZagDecode(T n) {
+constexpr std::make_signed_t<T> ZigZagDecode(T n)
+    PW_NO_SANITIZE("unsigned-integer-overflow") {
   static_assert(std::is_unsigned<T>(),
                 "Zig-zag decoding is for unsigned integers");
   return static_cast<std::make_signed_t<T>>((n >> 1) ^ (~(n & 1) + 1));