Allow atoms to log fields in bytes format.

There are an increasing number of requests to log data in complex format to statsd, while the data
is not expected to be parsed or aggregated by statsd and only to be uploaded as events.

Instead of making an exception for each of these cases in a hard coded way, this CL add a feature to
annotate these field in atoms.proto and the stats-log-api-gen tool will produce byte array
interfaces for them.

Note that log_msg does not have byte array type, and only has string type, when statsd receives the
log, these fields are in string type. Only when the atom is written to proto, we will check if this
field should be bytes field and write it to protobuf in message format.

Change-Id: If53dd95c5826710c76d7fe982bf951a435dfc738
Fix: 118386797
Test: unit test & manual test
diff --git a/tools/stats_log_api_gen/Collation.cpp b/tools/stats_log_api_gen/Collation.cpp
index d1f42f8..257043b 100644
--- a/tools/stats_log_api_gen/Collation.cpp
+++ b/tools/stats_log_api_gen/Collation.cpp
@@ -47,7 +47,8 @@
       fields(that.fields),
       primaryFields(that.primaryFields),
       exclusiveField(that.exclusiveField),
-      uidField(that.uidField) {}
+      uidField(that.uidField),
+      binaryFields(that.binaryFields) {}
 
 AtomDecl::AtomDecl(int c, const string& n, const string& m)
     :code(c),
@@ -119,6 +120,9 @@
             } else if (field->message_type()->full_name() ==
                        "android.os.statsd.KeyValuePair") {
               return JAVA_TYPE_KEY_VALUE_PAIR;
+            } else if (field->options().GetExtension(os::statsd::log_mode) ==
+                       os::statsd::LogMode::MODE_BYTES) {
+                return JAVA_TYPE_BYTE_ARRAY;
             } else {
                 return JAVA_TYPE_OBJECT;
             }
@@ -188,6 +192,8 @@
   for (map<int, const FieldDescriptor *>::const_iterator it = fields.begin();
        it != fields.end(); it++) {
     const FieldDescriptor *field = it->second;
+    bool isBinaryField = field->options().GetExtension(os::statsd::log_mode) ==
+                         os::statsd::LogMode::MODE_BYTES;
 
     java_type_t javaType = java_type(field);
 
@@ -197,17 +203,24 @@
       continue;
     } else if (javaType == JAVA_TYPE_OBJECT &&
                atomDecl->code < PULL_ATOM_START_ID) {
-      // Allow attribution chain, but only at position 1.
-      print_error(field,
-                  "Message type not allowed for field in pushed atoms: %s\n",
-                  field->name().c_str());
-      errorCount++;
-      continue;
-    } else if (javaType == JAVA_TYPE_BYTE_ARRAY) {
-      print_error(field, "Raw bytes type not allowed for field: %s\n",
-                  field->name().c_str());
-      errorCount++;
-      continue;
+        // Allow attribution chain, but only at position 1.
+        print_error(field,
+                    "Message type not allowed for field in pushed atoms: %s\n",
+                    field->name().c_str());
+        errorCount++;
+        continue;
+    } else if (javaType == JAVA_TYPE_BYTE_ARRAY && !isBinaryField) {
+        print_error(field, "Raw bytes type not allowed for field: %s\n",
+                    field->name().c_str());
+        errorCount++;
+        continue;
+    }
+
+    if (isBinaryField && javaType != JAVA_TYPE_BYTE_ARRAY) {
+        print_error(field, "Cannot mark field %s as bytes.\n",
+                    field->name().c_str());
+        errorCount++;
+        continue;
     }
   }
 
@@ -233,6 +246,8 @@
        it != fields.end(); it++) {
     const FieldDescriptor *field = it->second;
     java_type_t javaType = java_type(field);
+    bool isBinaryField = field->options().GetExtension(os::statsd::log_mode) ==
+                         os::statsd::LogMode::MODE_BYTES;
 
     AtomField atField(field->name(), javaType);
     // Generate signature for pushed atoms
@@ -241,8 +256,10 @@
         // All enums are treated as ints when it comes to function signatures.
         signature->push_back(JAVA_TYPE_INT);
         collate_enums(*field->enum_type(), &atField);
+      } else if (javaType == JAVA_TYPE_OBJECT && isBinaryField) {
+          signature->push_back(JAVA_TYPE_BYTE_ARRAY);
       } else {
-        signature->push_back(javaType);
+          signature->push_back(javaType);
       }
     }
     if (javaType == JAVA_TYPE_ENUM) {
@@ -287,6 +304,10 @@
             errorCount++;
         }
     }
+    // Binary field validity is already checked above.
+    if (isBinaryField) {
+        atomDecl->binaryFields.push_back(it->first);
+    }
   }
 
   return errorCount;
diff --git a/tools/stats_log_api_gen/Collation.h b/tools/stats_log_api_gen/Collation.h
index 31b8b07..450b305 100644
--- a/tools/stats_log_api_gen/Collation.h
+++ b/tools/stats_log_api_gen/Collation.h
@@ -89,6 +89,8 @@
 
     int uidField = 0;
 
+    vector<int> binaryFields;
+
     AtomDecl();
     AtomDecl(const AtomDecl& that);
     AtomDecl(int code, const string& name, const string& message);
diff --git a/tools/stats_log_api_gen/main.cpp b/tools/stats_log_api_gen/main.cpp
index 56c8428..1ef34b9 100644
--- a/tools/stats_log_api_gen/main.cpp
+++ b/tools/stats_log_api_gen/main.cpp
@@ -66,6 +66,8 @@
             return "double";
         case JAVA_TYPE_STRING:
             return "char const*";
+        case JAVA_TYPE_BYTE_ARRAY:
+            return "char const*";
         default:
             return "UNKNOWN";
     }
@@ -88,6 +90,8 @@
             return "double";
         case JAVA_TYPE_STRING:
             return "java.lang.String";
+        case JAVA_TYPE_BYTE_ARRAY:
+            return "byte[]";
         default:
             return "UNKNOWN";
     }
@@ -198,13 +202,40 @@
     }
 
     fprintf(out, "    return options;\n");
-    fprintf(out, "  }\n");
+    fprintf(out, "}\n");
 
     fprintf(out,
             "const std::map<int, StateAtomFieldOptions> "
             "AtomsInfo::kStateAtomsFieldOptions = "
             "getStateAtomFieldOptions();\n");
 
+    fprintf(out,
+            "static std::map<int, std::vector<int>> "
+            "getBinaryFieldAtoms() {\n");
+    fprintf(out, "    std::map<int, std::vector<int>> options;\n");
+    for (set<AtomDecl>::const_iterator atom = atoms.decls.begin();
+         atom != atoms.decls.end(); atom++) {
+        if (atom->binaryFields.size() == 0) {
+            continue;
+        }
+        fprintf(out,
+                "\n    // Adding binary fields for atom "
+                "(%d)%s\n",
+                atom->code, atom->name.c_str());
+
+        for (const auto& field : atom->binaryFields) {
+            fprintf(out, "    options[static_cast<int>(%s)].push_back(%d);\n",
+                    make_constant_name(atom->name).c_str(), field);
+        }
+    }
+
+    fprintf(out, "    return options;\n");
+    fprintf(out, "}\n");
+
+    fprintf(out,
+            "const std::map<int, std::vector<int>> "
+            "AtomsInfo::kBytesFieldAtoms = "
+            "getBinaryFieldAtoms();\n");
 
     fprintf(out, "int64_t lastRetryTimestampNs = -1;\n");
     fprintf(out, "const int64_t kMinRetryIntervalNs = NS_PER_SEC * 60 * 20; // 20 minutes\n");
@@ -664,6 +695,9 @@
     fprintf(out,
             "  const static std::map<int, StateAtomFieldOptions> "
             "kStateAtomsFieldOptions;\n");
+    fprintf(out,
+            "  const static std::map<int, std::vector<int>> "
+            "kBytesFieldAtoms;");
     fprintf(out, "};\n");
 
     fprintf(out, "const static int kMaxPushedAtomId = %d;\n\n",
@@ -698,6 +732,8 @@
             fprintf(out, ", android.os.WorkSource workSource");
         } else if (field->javaType == JAVA_TYPE_KEY_VALUE_PAIR) {
             fprintf(out, ", SparseArray<Object> value_map");
+        } else if (field->javaType == JAVA_TYPE_BYTE_ARRAY) {
+            fprintf(out, ", byte[] %s", field->name.c_str());
         } else {
             fprintf(out, ", %s %s", java_type_name(field->javaType), field->name.c_str());
         }
@@ -890,6 +926,8 @@
             return "jdouble";
         case JAVA_TYPE_STRING:
             return "jstring";
+        case JAVA_TYPE_BYTE_ARRAY:
+            return "jbyteArray";
         default:
             return "UNKNOWN";
     }
@@ -942,6 +980,9 @@
             case JAVA_TYPE_KEY_VALUE_PAIR:
               result += "_KeyValuePairs";
               break;
+            case JAVA_TYPE_BYTE_ARRAY:
+                result += "_bytes";
+                break;
             default:
                 result += "_UNKNOWN";
                 break;
@@ -967,6 +1008,8 @@
             return "D";
         case JAVA_TYPE_STRING:
             return "Ljava/lang/String;";
+        case JAVA_TYPE_BYTE_ARRAY:
+            return "[B";
         default:
             return "UNKNOWN";
     }
@@ -1081,6 +1124,25 @@
                 fprintf(out, "    } else {\n");
                 fprintf(out, "        str%d = NULL;\n", argIndex);
                 fprintf(out, "    }\n");
+            } else if (*arg == JAVA_TYPE_BYTE_ARRAY) {
+                hadStringOrChain = true;
+                fprintf(out, "    jbyte* jbyte_array%d;\n", argIndex);
+                fprintf(out, "    const char* str%d;\n", argIndex);
+                fprintf(out, "    if (arg%d != NULL) {\n", argIndex);
+                fprintf(out,
+                        "        jbyte_array%d = "
+                        "env->GetByteArrayElements(arg%d, NULL);\n",
+                        argIndex, argIndex);
+                fprintf(out,
+                        "        str%d = "
+                        "reinterpret_cast<char*>(env->GetByteArrayElements(arg%"
+                        "d, NULL));\n",
+                        argIndex, argIndex);
+                fprintf(out, "    } else {\n");
+                fprintf(out, "        jbyte_array%d = NULL;\n", argIndex);
+                fprintf(out, "        str%d = NULL;\n", argIndex);
+                fprintf(out, "    }\n");
+
             } else if (*arg == JAVA_TYPE_ATTRIBUTION_CHAIN) {
                 hadStringOrChain = true;
                 for (auto chainField : attributionDecl.fields) {
@@ -1154,7 +1216,10 @@
             } else if (*arg == JAVA_TYPE_KEY_VALUE_PAIR) {
                 fprintf(out, ", int32_t_map, int64_t_map, string_map, float_map");
             } else {
-                const char *argName = (*arg == JAVA_TYPE_STRING) ? "str" : "arg";
+                const char* argName = (*arg == JAVA_TYPE_STRING ||
+                                       *arg == JAVA_TYPE_BYTE_ARRAY)
+                                              ? "str"
+                                              : "arg";
                 fprintf(out, ", (%s)%s%d", cpp_type_name(*arg), argName, argIndex);
             }
             argIndex++;
@@ -1171,6 +1236,13 @@
                 fprintf(out, "        env->ReleaseStringUTFChars(arg%d, str%d);\n",
                         argIndex, argIndex);
                 fprintf(out, "    }\n");
+            } else if (*arg == JAVA_TYPE_BYTE_ARRAY) {
+                fprintf(out, "    if (str%d != NULL) { \n", argIndex);
+                fprintf(out,
+                        "        env->ReleaseByteArrayElements(arg%d, "
+                        "jbyte_array%d, 0);\n",
+                        argIndex, argIndex);
+                fprintf(out, "    }\n");
             } else if (*arg == JAVA_TYPE_ATTRIBUTION_CHAIN) {
                 for (auto chainField : attributionDecl.fields) {
                     if (chainField.javaType == JAVA_TYPE_INT) {
diff --git a/tools/stats_log_api_gen/test.proto b/tools/stats_log_api_gen/test.proto
index f635974..3be87d9 100644
--- a/tools/stats_log_api_gen/test.proto
+++ b/tools/stats_log_api_gen/test.proto
@@ -109,6 +109,28 @@
   oneof event { BadAttributionNodePositionAtom bad = 1; }
 }
 
+message GoodEventWithBinaryFieldAtom {
+    oneof event { GoodBinaryFieldAtom field1 = 1; }
+}
+
+message ComplexField {
+    optional string str = 1;
+}
+
+message GoodBinaryFieldAtom {
+    optional int32 field1 = 1;
+    optional ComplexField bf = 2 [(android.os.statsd.log_mode) = MODE_BYTES];
+}
+
+message BadEventWithBinaryFieldAtom {
+    oneof event { BadBinaryFieldAtom field1 = 1; }
+}
+
+message BadBinaryFieldAtom {
+    optional int32 field1 = 1;
+    optional ComplexField bf = 2;
+}
+
 message BadStateAtoms {
     oneof event {
         BadStateAtom1 bad1 = 1;
diff --git a/tools/stats_log_api_gen/test_collation.cpp b/tools/stats_log_api_gen/test_collation.cpp
index 1936d96..ad3bffac 100644
--- a/tools/stats_log_api_gen/test_collation.cpp
+++ b/tools/stats_log_api_gen/test_collation.cpp
@@ -212,5 +212,19 @@
     EXPECT_EQ(0, errorCount);
 }
 
+TEST(CollationTest, PassOnGoodBinaryFieldAtom) {
+    Atoms atoms;
+    int errorCount =
+            collate_atoms(GoodEventWithBinaryFieldAtom::descriptor(), &atoms);
+    EXPECT_EQ(0, errorCount);
+}
+
+TEST(CollationTest, FailOnBadBinaryFieldAtom) {
+    Atoms atoms;
+    int errorCount =
+            collate_atoms(BadEventWithBinaryFieldAtom::descriptor(), &atoms);
+    EXPECT_TRUE(errorCount > 0);
+}
+
 }  // namespace stats_log_api_gen
 }  // namespace android
\ No newline at end of file