Implement a simple demangler.

The purpose of this demangler is to avoid crashes for any string.

- It does one pass and should avoid going past the end of the string.
- The code avoids recursion to minimize the amount of stack required.
- It cannot demangle all mangled names, but it should be able to work
  on nearly all names in normal stack traces.
- If the mangled name is too large, it will stop demangling and return
  as if the name is not a demangled name.

Test: Passes new unit tests.

Change-Id: I596f74a533c0e093d1517c6bd11cced07009d321
diff --git a/demangle/Demangler.cpp b/demangle/Demangler.cpp
new file mode 100644
index 0000000..77cfd3b
--- /dev/null
+++ b/demangle/Demangler.cpp
@@ -0,0 +1,746 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+
+#include <cctype>
+#include <stack>
+#include <string>
+#include <vector>
+
+#include "Demangler.h"
+
+constexpr const char* Demangler::kTypes[];
+constexpr const char* Demangler::kDTypes[];
+constexpr const char* Demangler::kSTypes[];
+
+void Demangler::Save(const std::string& str, bool is_name) {
+  saves_.push_back(str);
+  last_save_name_ = is_name;
+}
+
+std::string Demangler::GetArgumentsString() {
+  size_t num_args = cur_state_.args.size();
+  std::string arg_str;
+  if (num_args > 0) {
+    arg_str = cur_state_.args[0];
+    for (size_t i = 1; i < num_args; i++) {
+      arg_str += ", " + cur_state_.args[i];
+    }
+  }
+  return arg_str;
+}
+
+const char* Demangler::AppendOperatorString(const char* name) {
+  const char* oper = nullptr;
+  switch (*name) {
+  case 'a':
+    name++;
+    switch (*name) {
+    case 'a':
+      oper = "operator&&";
+      break;
+    case 'd':
+    case 'n':
+      oper = "operator&";
+      break;
+    case 'N':
+      oper = "operator&=";
+      break;
+    case 'S':
+      oper = "operator=";
+      break;
+    }
+    break;
+  case 'c':
+    name++;
+    switch (*name) {
+    case 'l':
+      oper = "operator()";
+      break;
+    case 'm':
+      oper = "operator,";
+      break;
+    case 'o':
+      oper = "operator~";
+      break;
+    }
+    break;
+  case 'd':
+    name++;
+    switch (*name) {
+    case 'a':
+      oper = "operator delete[]";
+      break;
+    case 'e':
+      oper = "operator*";
+      break;
+    case 'l':
+      oper = "operator delete";
+      break;
+    case 'v':
+      oper = "operator/";
+      break;
+    case 'V':
+      oper = "operator/=";
+      break;
+    }
+    break;
+  case 'e':
+    name++;
+    switch (*name) {
+    case 'o':
+      oper = "operator^";
+      break;
+    case 'O':
+      oper = "operator^=";
+      break;
+    case 'q':
+      oper = "operator==";
+      break;
+    }
+    break;
+  case 'g':
+    name++;
+    switch (*name) {
+    case 'e':
+      oper = "operator>=";
+      break;
+    case 't':
+      oper = "operator>";
+      break;
+    }
+    break;
+  case 'i':
+    name++;
+    switch (*name) {
+    case 'x':
+      oper = "operator[]";
+      break;
+    }
+    break;
+  case 'l':
+    name++;
+    switch (*name) {
+    case 'e':
+      oper = "operator<=";
+      break;
+    case 's':
+      oper = "operator<<";
+      break;
+    case 'S':
+      oper = "operator<<=";
+      break;
+    case 't':
+      oper = "operator<";
+      break;
+    }
+    break;
+  case 'm':
+    name++;
+    switch (*name) {
+    case 'i':
+      oper = "operator-";
+      break;
+    case 'I':
+      oper = "operator-=";
+      break;
+    case 'l':
+      oper = "operator*";
+      break;
+    case 'L':
+      oper = "operator*=";
+      break;
+    case 'm':
+      oper = "operator--";
+      break;
+    }
+    break;
+  case 'n':
+    name++;
+    switch (*name) {
+    case 'a':
+      oper = "operator new[]";
+      break;
+    case 'e':
+      oper = "operator!=";
+      break;
+    case 'g':
+      oper = "operator-";
+      break;
+    case 't':
+      oper = "operator!";
+      break;
+    case 'w':
+      oper = "operator new";
+      break;
+    }
+    break;
+  case 'o':
+    name++;
+    switch (*name) {
+    case 'o':
+      oper = "operator||";
+      break;
+    case 'r':
+      oper = "operator|";
+      break;
+    case 'R':
+      oper = "operator|=";
+      break;
+    }
+    break;
+  case 'p':
+    name++;
+    switch (*name) {
+    case 'm':
+      oper = "operator->*";
+      break;
+    case 'l':
+      oper = "operator+";
+      break;
+    case 'L':
+      oper = "operator+=";
+      break;
+    case 'p':
+      oper = "operator++";
+      break;
+    case 's':
+      oper = "operator+";
+      break;
+    case 't':
+      oper = "operator->";
+      break;
+    }
+    break;
+  case 'q':
+    name++;
+    switch (*name) {
+    case 'u':
+      oper = "operator?";
+      break;
+    }
+    break;
+  case 'r':
+    name++;
+    switch (*name) {
+    case 'm':
+      oper = "operator%";
+      break;
+    case 'M':
+      oper = "operator%=";
+      break;
+    case 's':
+      oper = "operator>>";
+      break;
+    case 'S':
+      oper = "operator>>=";
+      break;
+    }
+    break;
+  }
+  if (oper == nullptr) {
+    return nullptr;
+  }
+  AppendCurrent(oper);
+  cur_state_.last_save = oper;
+  return name + 1;
+}
+
+const char* Demangler::GetStringFromLength(const char* name, std::string* str) {
+  assert(std::isdigit(*name));
+
+  size_t length = *name - '0';
+  name++;
+  while (*name != '\0' && std::isdigit(*name)) {
+    length = length * 10 + *name - '0';
+    name++;
+  }
+
+  std::string read_str;
+  while (*name != '\0' && length != 0) {
+    read_str += *name;
+    name++;
+    length--;
+  }
+  if (length != 0) {
+    return nullptr;
+  }
+  // Special replacement of _GLOBAL__N_1 to (anonymous namespace).
+  if (read_str == "_GLOBAL__N_1") {
+    *str += "(anonymous namespace)";
+  } else {
+    *str += read_str;
+  }
+  return name;
+}
+
+void Demangler::AppendCurrent(const std::string& str) {
+  if (!cur_state_.str.empty()) {
+    cur_state_.str += "::";
+  }
+  cur_state_.str += str;
+}
+
+void Demangler::AppendCurrent(const char* str) {
+  if (!cur_state_.str.empty()) {
+    cur_state_.str += "::";
+  }
+  cur_state_.str += str;
+}
+
+const char* Demangler::ParseS(const char* name) {
+  if (std::islower(*name)) {
+    const char* type = kSTypes[*name - 'a'];
+    if (type == nullptr) {
+      return nullptr;
+    }
+    AppendCurrent(type);
+    return name + 1;
+  }
+
+  if (saves_.empty()) {
+    return nullptr;
+  }
+
+  if (*name == '_') {
+    last_save_name_ = false;
+    AppendCurrent(saves_[0]);
+    return name + 1;
+  }
+
+  bool isdigit = std::isdigit(*name);
+  if (!isdigit && !std::isupper(*name)) {
+    return nullptr;
+  }
+
+  size_t index;
+  if (isdigit) {
+    index = *name - '0' + 1;
+  } else {
+    index = *name - 'A' + 11;
+  }
+  name++;
+  if (*name != '_') {
+    return nullptr;
+  }
+
+  if (index >= saves_.size()) {
+    return nullptr;
+  }
+
+  last_save_name_ = false;
+  AppendCurrent(saves_[index]);
+  return name + 1;
+}
+
+const char* Demangler::ParseFunctionName(const char* name) {
+  if (*name == 'E') {
+    if (parse_funcs_.empty()) {
+      return nullptr;
+    }
+    parse_func_ = parse_funcs_.back();
+    parse_funcs_.pop_back();
+
+    // Remove the last saved part so that the full function name is not saved.
+    // But only if the last save was not something like a substitution.
+    if (!saves_.empty() && last_save_name_) {
+      saves_.pop_back();
+    }
+
+    function_name_ = cur_state_.str;
+    while (!cur_state_.suffixes.empty()) {
+      function_suffix_ += cur_state_.suffixes.back();
+      cur_state_.suffixes.pop_back();
+    }
+    cur_state_.Clear();
+
+    return name + 1;
+  }
+
+  return ParseComplexString(name);
+}
+
+const char* Demangler::ParseComplexArgument(const char* name) {
+  if (*name == 'E') {
+    if (parse_funcs_.empty()) {
+      return nullptr;
+    }
+    parse_func_ = parse_funcs_.back();
+    parse_funcs_.pop_back();
+
+    AppendArgument(cur_state_.str);
+    cur_state_.str.clear();
+
+    return name + 1;
+  }
+
+  return ParseComplexString(name);
+}
+
+void Demangler::FinalizeTemplate() {
+  std::string arg_str(GetArgumentsString());
+  cur_state_ = state_stack_.top();
+  state_stack_.pop();
+  cur_state_.str += '<' + arg_str + '>';
+}
+
+const char* Demangler::ParseComplexString(const char* name) {
+  if (*name == 'S') {
+    name++;
+    if (*name == 't') {
+      AppendCurrent("std");
+      return name + 1;
+    }
+    return ParseS(name);
+  }
+  if (*name == 'L') {
+    name++;
+    if (!std::isdigit(*name)) {
+      return nullptr;
+    }
+  }
+  if (std::isdigit(*name)) {
+    std::string str;
+    name = GetStringFromLength(name, &str);
+    if (name == nullptr) {
+      return name;
+    }
+    AppendCurrent(str);
+    Save(cur_state_.str, true);
+    cur_state_.last_save = std::move(str);
+    return name;
+  }
+  if (*name == 'D') {
+    name++;
+    if (saves_.empty() || (*name != '0' && *name != '1' && *name != '2'
+        && *name != '5')) {
+      return nullptr;
+    }
+    last_save_name_ = false;
+    AppendCurrent("~" + cur_state_.last_save);
+    return name + 1;
+  }
+  if (*name == 'C') {
+    name++;
+    if (saves_.empty() || (*name != '1' && *name != '2' && *name != '3'
+        && *name != '5')) {
+      return nullptr;
+    }
+    last_save_name_ = false;
+    AppendCurrent(cur_state_.last_save);
+    return name + 1;
+  }
+  if (*name == 'K') {
+    cur_state_.suffixes.push_back(" const");
+    return name + 1;
+  }
+  if (*name == 'V') {
+    cur_state_.suffixes.push_back(" volatile");
+    return name + 1;
+  }
+  if (*name == 'I') {
+    // Save the current argument state.
+    state_stack_.push(cur_state_);
+    cur_state_.Clear();
+
+    parse_funcs_.push_back(parse_func_);
+    parse_func_ = &Demangler::ParseTemplateArgumentsComplex;
+    return name + 1;
+  }
+  name = AppendOperatorString(name);
+  if (name != nullptr) {
+    Save(cur_state_.str, true);
+  }
+  return name;
+}
+
+void Demangler::AppendArgument(const std::string& str) {
+  std::string arg(str);
+  while (!cur_state_.suffixes.empty()) {
+    arg += cur_state_.suffixes.back();
+    cur_state_.suffixes.pop_back();
+    Save(arg, false);
+  }
+  cur_state_.args.push_back(arg);
+}
+
+const char* Demangler::ParseFunctionArgument(const char* name) {
+  if (*name == 'E') {
+    // The first argument is the function modifier.
+    // The second argument is the function type.
+    // The third argument is the return type of the function.
+    // The rest of the arguments are the function arguments.
+    size_t num_args = cur_state_.args.size();
+    if (num_args < 4) {
+      return nullptr;
+    }
+    std::string function_modifier = cur_state_.args[0];
+    std::string function_type = cur_state_.args[1];
+
+    std::string str = cur_state_.args[2] + ' ';
+    if (!cur_state_.args[1].empty()) {
+      str += '(' + cur_state_.args[1] + ')';
+    }
+
+    if (num_args == 4 && cur_state_.args[3] == "void") {
+      str += "()";
+    } else {
+      str += '(' + cur_state_.args[3];
+      for (size_t i = 4; i < num_args; i++) {
+        str += ", " + cur_state_.args[i];
+      }
+      str += ')';
+    }
+    str += cur_state_.args[0];
+
+    cur_state_ = state_stack_.top();
+    state_stack_.pop();
+    cur_state_.args.emplace_back(std::move(str));
+
+    parse_func_ = parse_funcs_.back();
+    parse_funcs_.pop_back();
+    return name + 1;
+  }
+  return ParseArguments(name);
+}
+
+const char* Demangler::ParseArguments(const char* name) {
+  switch (*name) {
+  case 'P':
+    cur_state_.suffixes.push_back("*");
+    return name + 1;
+
+  case 'R':
+    // This should always be okay because the string is guaranteed to have
+    // at least two characters before this. A mangled string always starts
+    // with _Z.
+    if (name[-1] != 'R') {
+      // Multiple 'R's in a row only add a single &.
+      cur_state_.suffixes.push_back("&");
+    }
+    return name + 1;
+
+  case 'K':
+  case 'V': {
+    const char* suffix;
+    if (*name == 'K') {
+      suffix = " const";
+    } else {
+      suffix = " volatile";
+    }
+    if (name[-1] == 'K' || name[-1] == 'V') {
+      // Special case, const/volatile apply as a single entity.
+      assert(!cur_state_.suffixes.empty());
+      size_t index = cur_state_.suffixes.size();
+      cur_state_.suffixes[index-1].insert(0, suffix);
+    } else {
+      cur_state_.suffixes.push_back(suffix);
+    }
+    return name + 1;
+  }
+
+  case 'F': {
+    std::string function_modifier;
+    std::string function_type;
+    if (!cur_state_.suffixes.empty()) {
+      // If the first element starts with a ' ', then this modifies the
+      // function itself.
+      if (cur_state_.suffixes.back()[0] == ' ') {
+        function_modifier = cur_state_.suffixes.back();
+        cur_state_.suffixes.pop_back();
+      }
+      while (!cur_state_.suffixes.empty()) {
+        function_type += cur_state_.suffixes.back();
+        cur_state_.suffixes.pop_back();
+      }
+    }
+
+    state_stack_.push(cur_state_);
+
+    cur_state_.Clear();
+
+    // The function parameter has this format:
+    //   First argument is the function modifier.
+    //   Second argument is the function type.
+    //   Third argument will be the return function type but has not
+    //     been parsed yet.
+    //   Any other parameters are the arguments to the function. There
+    //     must be at least one or this isn't valid.
+    cur_state_.args.push_back(function_modifier);
+    cur_state_.args.push_back(function_type);
+
+    parse_funcs_.push_back(parse_func_);
+    parse_func_ = &Demangler::ParseFunctionArgument;
+    return name + 1;
+  }
+
+  case 'N':
+    parse_funcs_.push_back(parse_func_);
+    parse_func_ = &Demangler::ParseComplexArgument;
+    return name + 1;
+
+  case 'S':
+    name++;
+    if (*name == 't') {
+      cur_state_.str = "std::";
+      return name + 1;
+    }
+    name = ParseS(name);
+    if (name == nullptr) {
+      return nullptr;
+    }
+    AppendArgument(cur_state_.str);
+    cur_state_.str.clear();
+    return name;
+
+  case 'D':
+    name++;
+    if (*name >= 'a' && *name <= 'z') {
+      const char* arg = Demangler::kDTypes[*name - 'a'];
+      if (arg == nullptr) {
+        return nullptr;
+      }
+      AppendArgument(arg);
+      return name + 1;
+    }
+    return nullptr;
+
+  case 'I':
+    // Save the current argument state.
+    state_stack_.push(cur_state_);
+    cur_state_.Clear();
+
+    parse_funcs_.push_back(parse_func_);
+    parse_func_ = &Demangler::ParseTemplateArguments;
+    return name + 1;
+
+  case 'v':
+    AppendArgument("void");
+    return name + 1;
+
+  default:
+    if (*name >= 'a' && *name <= 'z') {
+      const char* arg = Demangler::kTypes[*name - 'a'];
+      if (arg == nullptr) {
+        return nullptr;
+      }
+      AppendArgument(arg);
+      return name + 1;
+    } else if (std::isdigit(*name)) {
+      std::string arg = cur_state_.str;
+      name = GetStringFromLength(name, &arg);
+      if (name == nullptr) {
+        return nullptr;
+      }
+      Save(arg, true);
+      if (*name == 'I') {
+        // There is one case where this argument is not complete, and that's
+        // where this is a template argument.
+        cur_state_.str = arg;
+      } else {
+        AppendArgument(arg);
+        cur_state_.str.clear();
+      }
+      return name;
+    }
+  }
+  return nullptr;
+}
+
+const char* Demangler::ParseTemplateArgumentsComplex(const char* name) {
+  if (*name == 'E') {
+    if (parse_funcs_.empty()) {
+      return nullptr;
+    }
+    parse_func_ = parse_funcs_.back();
+    parse_funcs_.pop_back();
+    FinalizeTemplate();
+    Save(cur_state_.str, false);
+    return name + 1;
+  }
+  return ParseArguments(name);
+}
+
+const char* Demangler::ParseTemplateArguments(const char* name) {
+  if (*name == 'E') {
+    if (parse_funcs_.empty()) {
+      return nullptr;
+    }
+    parse_func_ = parse_funcs_.back();
+    parse_funcs_.pop_back();
+    FinalizeTemplate();
+    AppendArgument(cur_state_.str);
+    cur_state_.str.clear();
+    return name + 1;
+  }
+  return ParseArguments(name);
+}
+
+const char* Demangler::FindFunctionName(const char* name) {
+  if (*name == 'N') {
+    parse_funcs_.push_back(&Demangler::ParseArguments);
+    parse_func_ = &Demangler::ParseFunctionName;
+    return name + 1;
+  }
+
+  if (std::isdigit(*name)) {
+    name = GetStringFromLength(name, &function_name_);
+  } else {
+    name = AppendOperatorString(name);
+    function_name_ = cur_state_.str;
+  }
+  parse_func_ = &Demangler::ParseArguments;
+  cur_state_.Clear();
+  return name;
+}
+
+std::string Demangler::Parse(const char* name, size_t max_length) {
+  if (name[0] == '\0' || name[0] != '_' || name[1] == '\0' || name[1] != 'Z') {
+    // Name is not mangled.
+    return name;
+  }
+
+  Clear();
+
+  parse_func_ = &Demangler::FindFunctionName;
+  parse_funcs_.push_back(&Demangler::Fail);
+  const char* cur_name = name + 2;
+  while (cur_name != nullptr && *cur_name != '\0'
+      && static_cast<size_t>(cur_name - name) < max_length) {
+    cur_name = (this->*parse_func_)(cur_name);
+  }
+  if (cur_name == nullptr || *cur_name != '\0' || function_name_.empty()) {
+    return name;
+  }
+
+  std::string arg_str;
+  if (cur_state_.args.size() == 1 && cur_state_.args[0] == "void") {
+    // If the only argument is void, then don't print any args.
+    arg_str = "()";
+  } else {
+    arg_str = GetArgumentsString();
+    if (!arg_str.empty()) {
+      arg_str = '(' + arg_str + ')';
+    }
+  }
+  return function_name_ + arg_str + function_suffix_;
+}
+
+std::string demangle(const char* name) {
+  Demangler demangler;
+  return demangler.Parse(name);
+}