compiler/protogen, internal/strs, internal/impl: expose enum Go name derivation
In order to migrate v1 to wrap v2, we need a way to reproduce
the awful enum "names" that v1 used, which was the concatenation of
the proto package with the Go identifier used for the enum.
To support this:
* Move the camel case logic from compiler/protogen to internal/strs
* Add a small stub in internal/impl to expose this functionality
Change-Id: I8ff31daa9ae541e5788dc04d2e89eae1574877e4
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/191637
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/compiler/protogen/names.go b/compiler/protogen/names.go
deleted file mode 100644
index ae41a5a..0000000
--- a/compiler/protogen/names.go
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package protogen
-
-import (
- "fmt"
- "go/token"
- "strconv"
- "strings"
- "unicode"
- "unicode/utf8"
-
- "google.golang.org/protobuf/reflect/protoreflect"
-)
-
-// A GoIdent is a Go identifier, consisting of a name and import path.
-// The name is a single identifier and may not be a dot-qualified selector.
-type GoIdent struct {
- GoName string
- GoImportPath GoImportPath
-}
-
-func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
-
-// newGoIdent returns the Go identifier for a descriptor.
-func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
- name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
- return GoIdent{
- GoName: camelCase(name),
- GoImportPath: f.GoImportPath,
- }
-}
-
-// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
-type GoImportPath string
-
-func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
-
-// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
-func (p GoImportPath) Ident(s string) GoIdent {
- return GoIdent{GoName: s, GoImportPath: p}
-}
-
-// A GoPackageName is the name of a Go package. e.g., "protobuf".
-type GoPackageName string
-
-// cleanPackageName converts a string to a valid Go package name.
-func cleanPackageName(name string) GoPackageName {
- return GoPackageName(cleanGoName(name))
-}
-
-// cleanGoName converts a string to a valid Go identifier.
-func cleanGoName(s string) string {
- // Sanitize the input to the set of valid characters,
- // which must be '_' or be in the Unicode L or N categories.
- s = strings.Map(func(r rune) rune {
- if unicode.IsLetter(r) || unicode.IsDigit(r) {
- return r
- }
- return '_'
- }, s)
-
- // Prepend '_' in the event of a Go keyword conflict or if
- // the identifier is invalid (does not start in the Unicode L category).
- r, _ := utf8.DecodeRuneInString(s)
- if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
- return "_" + s
- }
- return s
-}
-
-// baseName returns the last path element of the name, with the last dotted suffix removed.
-func baseName(name string) string {
- // First, find the last element
- if i := strings.LastIndex(name, "/"); i >= 0 {
- name = name[i+1:]
- }
- // Now drop the suffix
- if i := strings.LastIndex(name, "."); i >= 0 {
- name = name[:i]
- }
- return name
-}
-
-// camelCase converts a name to CamelCase.
-//
-// If there is an interior underscore followed by a lower case letter,
-// drop the underscore and convert the letter to upper case.
-// There is a remote possibility of this rewrite causing a name collision,
-// but it's so remote we're prepared to pretend it's nonexistent - since the
-// C++ generator lowercases names, it's extremely unlikely to have two fields
-// with different capitalizations.
-func camelCase(s string) string {
- // Invariant: if the next letter is lower case, it must be converted
- // to upper case.
- // That is, we process a word at a time, where words are marked by _ or
- // upper case letter. Digits are treated as words.
- var b []byte
- for i := 0; i < len(s); i++ {
- c := s[i]
- switch {
- case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
- // Skip over '.' in ".{{lowercase}}".
- case c == '.':
- b = append(b, '_') // convert '.' to '_'
- case c == '_' && (i == 0 || s[i-1] == '.'):
- // Convert initial '_' to ensure we start with a capital letter.
- // Do the same for '_' after '.' to match historic behavior.
- b = append(b, 'X') // convert '_' to 'X'
- case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
- // Skip over '_' in "_{{lowercase}}".
- case isASCIIDigit(c):
- b = append(b, c)
- default:
- // Assume we have a letter now - if not, it's a bogus identifier.
- // The next word is a sequence of characters that must start upper case.
- if isASCIILower(c) {
- c -= 'a' - 'A' // convert lowercase to uppercase
- }
- b = append(b, c)
-
- // Accept lower case sequence that follows.
- for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
- b = append(b, s[i+1])
- }
- }
- }
- return string(b)
-}
-
-// Is c an ASCII lower-case letter?
-func isASCIILower(c byte) bool {
- return 'a' <= c && c <= 'z'
-}
-
-// Is c an ASCII digit?
-func isASCIIDigit(c byte) bool {
- return '0' <= c && c <= '9'
-}
diff --git a/compiler/protogen/names_test.go b/compiler/protogen/names_test.go
deleted file mode 100644
index 6f03cc9..0000000
--- a/compiler/protogen/names_test.go
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package protogen
-
-import "testing"
-
-func TestCamelCase(t *testing.T) {
- tests := []struct {
- in, want string
- }{
- {"", ""},
- {"one", "One"},
- {"one_two", "OneTwo"},
- {"_my_field_name_2", "XMyFieldName_2"},
- {"Something_Capped", "Something_Capped"},
- {"my_Name", "My_Name"},
- {"OneTwo", "OneTwo"},
- {"_", "X"},
- {"_a_", "XA_"},
- {"one.two", "OneTwo"},
- {"one.Two", "One_Two"},
- {"one_two.three_four", "OneTwoThreeFour"},
- {"one_two.Three_four", "OneTwo_ThreeFour"},
- {"_one._two", "XOne_XTwo"},
- {"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
- {"double__underscore", "Double_Underscore"},
- {"camelCase", "CamelCase"},
- {"go2proto", "Go2Proto"},
- {"世界", "世界"},
- {"x世界", "X世界"},
- {"foo_bar世界", "FooBar世界"},
- }
- for _, tc := range tests {
- if got := camelCase(tc.in); got != tc.want {
- t.Errorf("CamelCase(%q) = %q, want %q", tc.in, got, tc.want)
- }
- }
-}
-
-func TestCleanGoName(t *testing.T) {
- tests := []struct {
- in, want string
- }{
- {"", "_"},
- {"boo", "boo"},
- {"Boo", "Boo"},
- {"ßoo", "ßoo"},
- {"default", "_default"},
- {"hello", "hello"},
- {"hello-world!!", "hello_world__"},
- {"hello-\xde\xad\xbe\xef\x00", "hello_____"},
- {"hello 世界", "hello_世界"},
- {"世界", "世界"},
- }
- for _, tc := range tests {
- if got := cleanGoName(tc.in); got != tc.want {
- t.Errorf("cleanGoName(%q) = %q, want %q", tc.in, got, tc.want)
- }
- }
-}
diff --git a/compiler/protogen/protogen.go b/compiler/protogen/protogen.go
index ec03704..3be898d 100644
--- a/compiler/protogen/protogen.go
+++ b/compiler/protogen/protogen.go
@@ -30,6 +30,7 @@
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/internal/fieldnum"
+ "google.golang.org/protobuf/internal/strs"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protodesc"
"google.golang.org/protobuf/reflect/protoreflect"
@@ -431,7 +432,7 @@
}
}
f.GoDescriptorIdent = GoIdent{
- GoName: "File_" + cleanGoName(p.GetName()),
+ GoName: "File_" + strs.GoSanitized(p.GetName()),
GoImportPath: f.GoImportPath,
}
f.GeneratedFilenamePrefix = prefix
@@ -499,6 +500,8 @@
}
// A semicolon-delimited suffix delimits the import path and package name.
if i := strings.Index(opt, ";"); i >= 0 {
+ // TODO: The package name is explicitly provided by the .proto file.
+ // Rather than sanitizing it, we should pass it verbatim.
return cleanPackageName(opt[i+1:]), GoImportPath(opt[:i])
}
// The presence of a slash implies there's an import path.
@@ -756,7 +759,7 @@
default:
loc = message.Location.appendPath(fieldnum.DescriptorProto_Field, int32(desc.Index()))
}
- camelCased := camelCase(string(desc.Name()))
+ camelCased := strs.GoCamelCase(string(desc.Name()))
var parentPrefix string
if message != nil {
parentPrefix = message.GoIdent.GoName + "_"
@@ -826,7 +829,7 @@
func newOneof(gen *Plugin, f *File, message *Message, desc protoreflect.OneofDescriptor) *Oneof {
loc := message.Location.appendPath(fieldnum.DescriptorProto_OneofDecl, int32(desc.Index()))
- camelCased := camelCase(string(desc.Name()))
+ camelCased := strs.GoCamelCase(string(desc.Name()))
parentPrefix := message.GoIdent.GoName + "_"
return &Oneof{
Desc: desc,
@@ -860,7 +863,7 @@
loc := f.location(fieldnum.FileDescriptorProto_Service, int32(desc.Index()))
service := &Service{
Desc: desc,
- GoName: camelCase(string(desc.Name())),
+ GoName: strs.GoCamelCase(string(desc.Name())),
Location: loc,
Comments: f.comments[newPathKey(loc.Path)],
}
@@ -889,7 +892,7 @@
loc := service.Location.appendPath(fieldnum.ServiceDescriptorProto_Method, int32(desc.Index()))
method := &Method{
Desc: desc,
- GoName: camelCase(string(desc.Name())),
+ GoName: strs.GoCamelCase(string(desc.Name())),
Parent: service,
Location: loc,
Comments: f.comments[newPathKey(loc.Path)],
@@ -1183,6 +1186,56 @@
return string(b), nil
}
+// A GoIdent is a Go identifier, consisting of a name and import path.
+// The name is a single identifier and may not be a dot-qualified selector.
+type GoIdent struct {
+ GoName string
+ GoImportPath GoImportPath
+}
+
+func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
+
+// newGoIdent returns the Go identifier for a descriptor.
+func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
+ name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
+ return GoIdent{
+ GoName: strs.GoCamelCase(name),
+ GoImportPath: f.GoImportPath,
+ }
+}
+
+// A GoImportPath is the import path of a Go package.
+// For example: "google.golang.org/protobuf/compiler/protogen"
+type GoImportPath string
+
+func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
+
+// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
+func (p GoImportPath) Ident(s string) GoIdent {
+ return GoIdent{GoName: s, GoImportPath: p}
+}
+
+// A GoPackageName is the name of a Go package. e.g., "protobuf".
+type GoPackageName string
+
+// cleanPackageName converts a string to a valid Go package name.
+func cleanPackageName(name string) GoPackageName {
+ return GoPackageName(strs.GoSanitized(name))
+}
+
+// baseName returns the last path element of the name, with the last dotted suffix removed.
+func baseName(name string) string {
+ // First, find the last element
+ if i := strings.LastIndex(name, "/"); i >= 0 {
+ name = name[i+1:]
+ }
+ // Now drop the suffix
+ if i := strings.LastIndex(name, "."); i >= 0 {
+ name = name[:i]
+ }
+ return name
+}
+
type pathType int
const (