compiler/protogen, internal/strs, internal/impl: expose enum Go name derivation

In order to migrate v1 to wrap v2, we need a way to reproduce
the awful enum "names" that v1 used, which was the concatenation of
the proto package with the Go identifier used for the enum.

To support this:
* Move the camel case logic from compiler/protogen to internal/strs
* Add a small stub in internal/impl to expose this functionality

Change-Id: I8ff31daa9ae541e5788dc04d2e89eae1574877e4
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/191637
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/compiler/protogen/names.go b/compiler/protogen/names.go
deleted file mode 100644
index ae41a5a..0000000
--- a/compiler/protogen/names.go
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package protogen
-
-import (
-	"fmt"
-	"go/token"
-	"strconv"
-	"strings"
-	"unicode"
-	"unicode/utf8"
-
-	"google.golang.org/protobuf/reflect/protoreflect"
-)
-
-// A GoIdent is a Go identifier, consisting of a name and import path.
-// The name is a single identifier and may not be a dot-qualified selector.
-type GoIdent struct {
-	GoName       string
-	GoImportPath GoImportPath
-}
-
-func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
-
-// newGoIdent returns the Go identifier for a descriptor.
-func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
-	name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
-	return GoIdent{
-		GoName:       camelCase(name),
-		GoImportPath: f.GoImportPath,
-	}
-}
-
-// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
-type GoImportPath string
-
-func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
-
-// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
-func (p GoImportPath) Ident(s string) GoIdent {
-	return GoIdent{GoName: s, GoImportPath: p}
-}
-
-// A GoPackageName is the name of a Go package. e.g., "protobuf".
-type GoPackageName string
-
-// cleanPackageName converts a string to a valid Go package name.
-func cleanPackageName(name string) GoPackageName {
-	return GoPackageName(cleanGoName(name))
-}
-
-// cleanGoName converts a string to a valid Go identifier.
-func cleanGoName(s string) string {
-	// Sanitize the input to the set of valid characters,
-	// which must be '_' or be in the Unicode L or N categories.
-	s = strings.Map(func(r rune) rune {
-		if unicode.IsLetter(r) || unicode.IsDigit(r) {
-			return r
-		}
-		return '_'
-	}, s)
-
-	// Prepend '_' in the event of a Go keyword conflict or if
-	// the identifier is invalid (does not start in the Unicode L category).
-	r, _ := utf8.DecodeRuneInString(s)
-	if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
-		return "_" + s
-	}
-	return s
-}
-
-// baseName returns the last path element of the name, with the last dotted suffix removed.
-func baseName(name string) string {
-	// First, find the last element
-	if i := strings.LastIndex(name, "/"); i >= 0 {
-		name = name[i+1:]
-	}
-	// Now drop the suffix
-	if i := strings.LastIndex(name, "."); i >= 0 {
-		name = name[:i]
-	}
-	return name
-}
-
-// camelCase converts a name to CamelCase.
-//
-// If there is an interior underscore followed by a lower case letter,
-// drop the underscore and convert the letter to upper case.
-// There is a remote possibility of this rewrite causing a name collision,
-// but it's so remote we're prepared to pretend it's nonexistent - since the
-// C++ generator lowercases names, it's extremely unlikely to have two fields
-// with different capitalizations.
-func camelCase(s string) string {
-	// Invariant: if the next letter is lower case, it must be converted
-	// to upper case.
-	// That is, we process a word at a time, where words are marked by _ or
-	// upper case letter. Digits are treated as words.
-	var b []byte
-	for i := 0; i < len(s); i++ {
-		c := s[i]
-		switch {
-		case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
-			// Skip over '.' in ".{{lowercase}}".
-		case c == '.':
-			b = append(b, '_') // convert '.' to '_'
-		case c == '_' && (i == 0 || s[i-1] == '.'):
-			// Convert initial '_' to ensure we start with a capital letter.
-			// Do the same for '_' after '.' to match historic behavior.
-			b = append(b, 'X') // convert '_' to 'X'
-		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
-			// Skip over '_' in "_{{lowercase}}".
-		case isASCIIDigit(c):
-			b = append(b, c)
-		default:
-			// Assume we have a letter now - if not, it's a bogus identifier.
-			// The next word is a sequence of characters that must start upper case.
-			if isASCIILower(c) {
-				c -= 'a' - 'A' // convert lowercase to uppercase
-			}
-			b = append(b, c)
-
-			// Accept lower case sequence that follows.
-			for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
-				b = append(b, s[i+1])
-			}
-		}
-	}
-	return string(b)
-}
-
-// Is c an ASCII lower-case letter?
-func isASCIILower(c byte) bool {
-	return 'a' <= c && c <= 'z'
-}
-
-// Is c an ASCII digit?
-func isASCIIDigit(c byte) bool {
-	return '0' <= c && c <= '9'
-}
diff --git a/compiler/protogen/names_test.go b/compiler/protogen/names_test.go
deleted file mode 100644
index 6f03cc9..0000000
--- a/compiler/protogen/names_test.go
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package protogen
-
-import "testing"
-
-func TestCamelCase(t *testing.T) {
-	tests := []struct {
-		in, want string
-	}{
-		{"", ""},
-		{"one", "One"},
-		{"one_two", "OneTwo"},
-		{"_my_field_name_2", "XMyFieldName_2"},
-		{"Something_Capped", "Something_Capped"},
-		{"my_Name", "My_Name"},
-		{"OneTwo", "OneTwo"},
-		{"_", "X"},
-		{"_a_", "XA_"},
-		{"one.two", "OneTwo"},
-		{"one.Two", "One_Two"},
-		{"one_two.three_four", "OneTwoThreeFour"},
-		{"one_two.Three_four", "OneTwo_ThreeFour"},
-		{"_one._two", "XOne_XTwo"},
-		{"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
-		{"double__underscore", "Double_Underscore"},
-		{"camelCase", "CamelCase"},
-		{"go2proto", "Go2Proto"},
-		{"世界", "世界"},
-		{"x世界", "X世界"},
-		{"foo_bar世界", "FooBar世界"},
-	}
-	for _, tc := range tests {
-		if got := camelCase(tc.in); got != tc.want {
-			t.Errorf("CamelCase(%q) = %q, want %q", tc.in, got, tc.want)
-		}
-	}
-}
-
-func TestCleanGoName(t *testing.T) {
-	tests := []struct {
-		in, want string
-	}{
-		{"", "_"},
-		{"boo", "boo"},
-		{"Boo", "Boo"},
-		{"ßoo", "ßoo"},
-		{"default", "_default"},
-		{"hello", "hello"},
-		{"hello-world!!", "hello_world__"},
-		{"hello-\xde\xad\xbe\xef\x00", "hello_____"},
-		{"hello 世界", "hello_世界"},
-		{"世界", "世界"},
-	}
-	for _, tc := range tests {
-		if got := cleanGoName(tc.in); got != tc.want {
-			t.Errorf("cleanGoName(%q) = %q, want %q", tc.in, got, tc.want)
-		}
-	}
-}
diff --git a/compiler/protogen/protogen.go b/compiler/protogen/protogen.go
index ec03704..3be898d 100644
--- a/compiler/protogen/protogen.go
+++ b/compiler/protogen/protogen.go
@@ -30,6 +30,7 @@
 
 	"google.golang.org/protobuf/encoding/prototext"
 	"google.golang.org/protobuf/internal/fieldnum"
+	"google.golang.org/protobuf/internal/strs"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/reflect/protodesc"
 	"google.golang.org/protobuf/reflect/protoreflect"
@@ -431,7 +432,7 @@
 		}
 	}
 	f.GoDescriptorIdent = GoIdent{
-		GoName:       "File_" + cleanGoName(p.GetName()),
+		GoName:       "File_" + strs.GoSanitized(p.GetName()),
 		GoImportPath: f.GoImportPath,
 	}
 	f.GeneratedFilenamePrefix = prefix
@@ -499,6 +500,8 @@
 	}
 	// A semicolon-delimited suffix delimits the import path and package name.
 	if i := strings.Index(opt, ";"); i >= 0 {
+		// TODO: The package name is explicitly provided by the .proto file.
+		// Rather than sanitizing it, we should pass it verbatim.
 		return cleanPackageName(opt[i+1:]), GoImportPath(opt[:i])
 	}
 	// The presence of a slash implies there's an import path.
@@ -756,7 +759,7 @@
 	default:
 		loc = message.Location.appendPath(fieldnum.DescriptorProto_Field, int32(desc.Index()))
 	}
-	camelCased := camelCase(string(desc.Name()))
+	camelCased := strs.GoCamelCase(string(desc.Name()))
 	var parentPrefix string
 	if message != nil {
 		parentPrefix = message.GoIdent.GoName + "_"
@@ -826,7 +829,7 @@
 
 func newOneof(gen *Plugin, f *File, message *Message, desc protoreflect.OneofDescriptor) *Oneof {
 	loc := message.Location.appendPath(fieldnum.DescriptorProto_OneofDecl, int32(desc.Index()))
-	camelCased := camelCase(string(desc.Name()))
+	camelCased := strs.GoCamelCase(string(desc.Name()))
 	parentPrefix := message.GoIdent.GoName + "_"
 	return &Oneof{
 		Desc:   desc,
@@ -860,7 +863,7 @@
 	loc := f.location(fieldnum.FileDescriptorProto_Service, int32(desc.Index()))
 	service := &Service{
 		Desc:     desc,
-		GoName:   camelCase(string(desc.Name())),
+		GoName:   strs.GoCamelCase(string(desc.Name())),
 		Location: loc,
 		Comments: f.comments[newPathKey(loc.Path)],
 	}
@@ -889,7 +892,7 @@
 	loc := service.Location.appendPath(fieldnum.ServiceDescriptorProto_Method, int32(desc.Index()))
 	method := &Method{
 		Desc:     desc,
-		GoName:   camelCase(string(desc.Name())),
+		GoName:   strs.GoCamelCase(string(desc.Name())),
 		Parent:   service,
 		Location: loc,
 		Comments: f.comments[newPathKey(loc.Path)],
@@ -1183,6 +1186,56 @@
 	return string(b), nil
 }
 
+// A GoIdent is a Go identifier, consisting of a name and import path.
+// The name is a single identifier and may not be a dot-qualified selector.
+type GoIdent struct {
+	GoName       string
+	GoImportPath GoImportPath
+}
+
+func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
+
+// newGoIdent returns the Go identifier for a descriptor.
+func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
+	name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
+	return GoIdent{
+		GoName:       strs.GoCamelCase(name),
+		GoImportPath: f.GoImportPath,
+	}
+}
+
+// A GoImportPath is the import path of a Go package.
+// For example: "google.golang.org/protobuf/compiler/protogen"
+type GoImportPath string
+
+func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
+
+// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
+func (p GoImportPath) Ident(s string) GoIdent {
+	return GoIdent{GoName: s, GoImportPath: p}
+}
+
+// A GoPackageName is the name of a Go package. e.g., "protobuf".
+type GoPackageName string
+
+// cleanPackageName converts a string to a valid Go package name.
+func cleanPackageName(name string) GoPackageName {
+	return GoPackageName(strs.GoSanitized(name))
+}
+
+// baseName returns the last path element of the name, with the last dotted suffix removed.
+func baseName(name string) string {
+	// First, find the last element
+	if i := strings.LastIndex(name, "/"); i >= 0 {
+		name = name[i+1:]
+	}
+	// Now drop the suffix
+	if i := strings.LastIndex(name, "."); i >= 0 {
+		name = name[:i]
+	}
+	return name
+}
+
 type pathType int
 
 const (