internal/strs: unify string manipulation functionality

Create a new internal/strs package that unifies common functionality:
* Since protobuf itself pseudo-specifies at least 4 different camel-case
and snake-case conversion functions, we define all variants in one place.
* We move the internal/filedesc.nameBuilder function to this package.
We simplify its implementation to not depend on a strings.Builder fork
under the hood since the semantics we desire is simpler than what
strings.Builder provides.
* We use strs.Builder in reflect/protodesc in its construction of all
the full names. This is perfect use case of strs.Builder since all
full names within a file descriptor share the same lifetime.
* Add an UnsafeString and UnsafeBytes cast function that will be useful
in the near future for optimizing encoding/prototext and encoding/protojson.

Change-Id: I2cf07cbaf6f72e5f9fd6ae3d37b0d46f6af2ad59
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/185198
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/internal/strs/strings.go b/internal/strs/strings.go
new file mode 100644
index 0000000..295bd29
--- /dev/null
+++ b/internal/strs/strings.go
@@ -0,0 +1,111 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package strs provides string manipulation functionality specific to protobuf.
+package strs
+
+import (
+	"strings"
+	"unicode"
+)
+
+// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
+// according to the protobuf JSON specification.
+func JSONCamelCase(s string) string {
+	var b []byte
+	var wasUnderscore bool
+	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
+		c := s[i]
+		if c != '_' {
+			isLower := 'a' <= c && c <= 'z'
+			if wasUnderscore && isLower {
+				c -= 'a' - 'A' // convert to uppercase
+			}
+			b = append(b, c)
+		}
+		wasUnderscore = c == '_'
+	}
+	return string(b)
+}
+
+// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
+// according to the protobuf JSON specification.
+func JSONSnakeCase(s string) string {
+	var b []byte
+	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
+		c := s[i]
+		isUpper := 'A' <= c && c <= 'Z'
+		if isUpper {
+			b = append(b, '_')
+			c += 'a' - 'A' // convert to lowercase
+		}
+		b = append(b, c)
+	}
+	return string(b)
+}
+
+// MapEntryName derives the name of the map entry message given the field name.
+// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
+func MapEntryName(s string) string {
+	var b []byte
+	upperNext := true
+	for _, c := range s {
+		switch {
+		case c == '_':
+			upperNext = true
+		case upperNext:
+			b = append(b, byte(unicode.ToUpper(c)))
+			upperNext = false
+		default:
+			b = append(b, byte(c))
+		}
+	}
+	b = append(b, "Entry"...)
+	return string(b)
+}
+
+// EnumValueName derives the camel-cased enum value name.
+// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
+func EnumValueName(s string) string {
+	var b []byte
+	upperNext := true
+	for _, c := range s {
+		switch {
+		case c == '_':
+			upperNext = true
+		case upperNext:
+			b = append(b, byte(unicode.ToUpper(c)))
+			upperNext = false
+		default:
+			b = append(b, byte(unicode.ToLower(c)))
+			upperNext = false
+		}
+	}
+	return string(b)
+}
+
+// TrimEnumPrefix trims the enum name prefix from an enum value name,
+// where the prefix is all lowercase without underscores.
+// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
+func TrimEnumPrefix(s, prefix string) string {
+	s0 := s // original input
+	for len(s) > 0 && len(prefix) > 0 {
+		if s[0] == '_' {
+			s = s[1:]
+			continue
+		}
+		if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
+			return s0 // no prefix match
+		}
+		s, prefix = s[1:], prefix[1:]
+	}
+	if len(prefix) > 0 {
+		return s0 // no prefix match
+	}
+	s = strings.TrimLeft(s, "_")
+	if len(s) == 0 {
+		return s0 // avoid returning empty string
+	}
+	return s
+}
diff --git a/internal/strs/strings_pure.go b/internal/strs/strings_pure.go
new file mode 100644
index 0000000..85e074c
--- /dev/null
+++ b/internal/strs/strings_pure.go
@@ -0,0 +1,27 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build purego appengine
+
+package strs
+
+import pref "google.golang.org/protobuf/reflect/protoreflect"
+
+func UnsafeString(b []byte) string {
+	return string(b)
+}
+
+func UnsafeBytes(s string) []byte {
+	return []byte(s)
+}
+
+type Builder struct{}
+
+func (*Builder) AppendFullName(prefix pref.FullName, name pref.Name) pref.FullName {
+	return prefix.Append(name)
+}
+
+func (*Builder) MakeString(b []byte) string {
+	return string(b)
+}
diff --git a/internal/strs/strings_test.go b/internal/strs/strings_test.go
new file mode 100644
index 0000000..2c4c2ad
--- /dev/null
+++ b/internal/strs/strings_test.go
@@ -0,0 +1,108 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strs
+
+import (
+	"strconv"
+	"testing"
+)
+
+func TestName(t *testing.T) {
+	tests := []struct {
+		in                string
+		inEnumPrefix      string
+		wantMapEntry      string
+		wantEnumValue     string
+		wantTrimValue     string
+		wantJSONCamelCase string
+		wantJSONSnakeCase string
+	}{{
+		in:                "abc",
+		inEnumPrefix:      "",
+		wantMapEntry:      "AbcEntry",
+		wantEnumValue:     "Abc",
+		wantTrimValue:     "abc",
+		wantJSONCamelCase: "abc",
+		wantJSONSnakeCase: "abc",
+	}, {
+		in:                "foo_baR_",
+		inEnumPrefix:      "foo_bar",
+		wantMapEntry:      "FooBaREntry",
+		wantEnumValue:     "FooBar",
+		wantTrimValue:     "foo_baR_",
+		wantJSONCamelCase: "fooBaR",
+		wantJSONSnakeCase: "foo_ba_r_",
+	}, {
+		in:                "snake_caseCamelCase",
+		inEnumPrefix:      "snakecasecamel",
+		wantMapEntry:      "SnakeCaseCamelCaseEntry",
+		wantEnumValue:     "SnakeCasecamelcase",
+		wantTrimValue:     "Case",
+		wantJSONCamelCase: "snakeCaseCamelCase",
+		wantJSONSnakeCase: "snake_case_camel_case",
+	}, {
+		in:                "FiZz_BuZz",
+		inEnumPrefix:      "fizz",
+		wantMapEntry:      "FiZzBuZzEntry",
+		wantEnumValue:     "FizzBuzz",
+		wantTrimValue:     "BuZz",
+		wantJSONCamelCase: "FiZzBuZz",
+		wantJSONSnakeCase: "_fi_zz__bu_zz",
+	}}
+
+	for _, tt := range tests {
+		if got := MapEntryName(tt.in); got != tt.wantMapEntry {
+			t.Errorf("MapEntryName(%q) = %q, want %q", tt.in, got, tt.wantMapEntry)
+		}
+		if got := EnumValueName(tt.in); got != tt.wantEnumValue {
+			t.Errorf("EnumValueName(%q) = %q, want %q", tt.in, got, tt.wantEnumValue)
+		}
+		if got := TrimEnumPrefix(tt.in, tt.inEnumPrefix); got != tt.wantTrimValue {
+			t.Errorf("ErimEnumPrefix(%q, %q) = %q, want %q", tt.in, tt.inEnumPrefix, got, tt.wantTrimValue)
+		}
+		if got := JSONCamelCase(tt.in); got != tt.wantJSONCamelCase {
+			t.Errorf("JSONCamelCase(%q) = %q, want %q", tt.in, got, tt.wantJSONCamelCase)
+		}
+		if got := JSONSnakeCase(tt.in); got != tt.wantJSONSnakeCase {
+			t.Errorf("JSONSnakeCase(%q) = %q, want %q", tt.in, got, tt.wantJSONSnakeCase)
+		}
+	}
+}
+
+var (
+	srcString = "1234"
+	srcBytes  = []byte(srcString)
+	dst       uint64
+)
+
+func BenchmarkCast(b *testing.B) {
+	b.Run("Ideal", func(b *testing.B) {
+		b.ReportAllocs()
+		for i := 0; i < b.N; i++ {
+			dst, _ = strconv.ParseUint(srcString, 0, 64)
+		}
+		if dst != 1234 {
+			b.Errorf("got %d, want %s", dst, srcString)
+		}
+	})
+	b.Run("Copy", func(b *testing.B) {
+		b.ReportAllocs()
+		for i := 0; i < b.N; i++ {
+			dst, _ = strconv.ParseUint(string(srcBytes), 0, 64)
+		}
+		if dst != 1234 {
+			b.Errorf("got %d, want %s", dst, srcString)
+		}
+	})
+	b.Run("Cast", func(b *testing.B) {
+		b.ReportAllocs()
+		for i := 0; i < b.N; i++ {
+			dst, _ = strconv.ParseUint(UnsafeString(srcBytes), 0, 64)
+		}
+		if dst != 1234 {
+			b.Errorf("got %d, want %s", dst, srcString)
+		}
+	})
+}
diff --git a/internal/strs/strings_unsafe.go b/internal/strs/strings_unsafe.go
new file mode 100644
index 0000000..2160c70
--- /dev/null
+++ b/internal/strs/strings_unsafe.go
@@ -0,0 +1,94 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !purego,!appengine
+
+package strs
+
+import (
+	"unsafe"
+
+	pref "google.golang.org/protobuf/reflect/protoreflect"
+)
+
+type (
+	stringHeader struct {
+		Data unsafe.Pointer
+		Len  int
+	}
+	sliceHeader struct {
+		Data unsafe.Pointer
+		Len  int
+		Cap  int
+	}
+)
+
+// UnsafeString returns an unsafe string reference of b.
+// The caller must treat the input slice as immutable.
+//
+// WARNING: Use carefully. The returned result must not leak to the end user
+// unless the input slice is provably immutable.
+func UnsafeString(b []byte) (s string) {
+	src := (*sliceHeader)(unsafe.Pointer(&b))
+	dst := (*stringHeader)(unsafe.Pointer(&s))
+	dst.Data = src.Data
+	dst.Len = src.Len
+	return s
+}
+
+// UnsafeBytes returns an unsafe bytes slice reference of s.
+// The caller must treat returned slice as immutable.
+//
+// WARNING: Use carefully. The returned result must not leak to the end user.
+func UnsafeBytes(s string) (b []byte) {
+	src := (*stringHeader)(unsafe.Pointer(&s))
+	dst := (*sliceHeader)(unsafe.Pointer(&b))
+	dst.Data = src.Data
+	dst.Len = src.Len
+	dst.Cap = src.Len
+	return b
+}
+
+// Builder builds a set of strings with shared lifetime.
+// This differs from strings.Builder, which is for building a single string.
+type Builder struct {
+	buf []byte
+}
+
+// AppendFullName is equivalent to protoreflect.FullName.Append,
+// but optimized for large batches where each name has a shared lifetime.
+func (sb *Builder) AppendFullName(prefix pref.FullName, name pref.Name) pref.FullName {
+	n := len(prefix) + len(".") + len(name)
+	if len(prefix) == 0 {
+		n -= len(".")
+	}
+	sb.grow(n)
+	sb.buf = append(sb.buf, prefix...)
+	sb.buf = append(sb.buf, '.')
+	sb.buf = append(sb.buf, name...)
+	return pref.FullName(sb.last(n))
+}
+
+// MakeString is equivalent to string(b), but optimized for large batches
+// with a shared lifetime.
+func (sb *Builder) MakeString(b []byte) string {
+	sb.grow(len(b))
+	sb.buf = append(sb.buf, b...)
+	return sb.last(len(b))
+}
+
+func (sb *Builder) grow(n int) {
+	if cap(sb.buf)-len(sb.buf) >= n {
+		return
+	}
+
+	// Unlike strings.Builder, we do not need to copy over the contents
+	// of the old buffer since our builder provides no API for
+	// retrieving previously created strings.
+	sb.buf = make([]byte, 2*(cap(sb.buf)+n))
+}
+
+func (sb *Builder) last(n int) string {
+	return UnsafeString(sb.buf[len(sb.buf)-n:])
+}