blob: 0c220bc50f54ba07de37bbc2df86a32e263caa1f [file] [log] [blame]
Damien Neil220c2022018-08-15 11:24:18 -07001package protogen
2
3import (
Damien Neild9016772018-08-23 14:39:30 -07004 "fmt"
Damien Neil220c2022018-08-15 11:24:18 -07005 "go/token"
6 "strconv"
7 "strings"
8 "unicode"
9 "unicode/utf8"
Damien Neilabc6fc12018-08-23 14:39:30 -070010
11 "google.golang.org/proto/reflect/protoreflect"
Damien Neil220c2022018-08-15 11:24:18 -070012)
13
Damien Neild9016772018-08-23 14:39:30 -070014// A GoIdent is a Go identifier, consisting of a name and import path.
15type GoIdent struct {
16 GoName string
17 GoImportPath GoImportPath
18}
19
20func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
Damien Neilc7d07d92018-08-22 13:46:02 -070021
Damien Neilabc6fc12018-08-23 14:39:30 -070022// newGoIdent returns the Go identifier for a descriptor.
23func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
24 name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
25 return GoIdent{
26 GoName: camelCase(name),
27 GoImportPath: f.GoImportPath,
28 }
29}
30
Damien Neil220c2022018-08-15 11:24:18 -070031// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
32type GoImportPath string
33
34func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
35
36// A GoPackageName is the name of a Go package. e.g., "protobuf".
37type GoPackageName string
38
39// cleanPacakgeName converts a string to a valid Go package name.
40func cleanPackageName(name string) GoPackageName {
41 name = strings.Map(badToUnderscore, name)
42 // Identifier must not be keyword: insert _.
43 if token.Lookup(name).IsKeyword() {
44 name = "_" + name
45 }
46 // Identifier must not begin with digit: insert _.
47 if r, _ := utf8.DecodeRuneInString(name); unicode.IsDigit(r) {
48 name = "_" + name
49 }
50 return GoPackageName(name)
51}
52
53// badToUnderscore is the mapping function used to generate Go names from package names,
54// which can be dotted in the input .proto file. It replaces non-identifier characters such as
55// dot or dash with underscore.
56func badToUnderscore(r rune) rune {
57 if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
58 return r
59 }
60 return '_'
61}
62
63// baseName returns the last path element of the name, with the last dotted suffix removed.
64func baseName(name string) string {
65 // First, find the last element
66 if i := strings.LastIndex(name, "/"); i >= 0 {
67 name = name[i+1:]
68 }
69 // Now drop the suffix
70 if i := strings.LastIndex(name, "."); i >= 0 {
71 name = name[:i]
72 }
73 return name
74}
Damien Neilc7d07d92018-08-22 13:46:02 -070075
76// camelCase converts a name to CamelCase.
77//
78// If there is an interior underscore followed by a lower case letter,
79// drop the underscore and convert the letter to upper case.
80// There is a remote possibility of this rewrite causing a name collision,
81// but it's so remote we're prepared to pretend it's nonexistent - since the
82// C++ generator lowercases names, it's extremely unlikely to have two fields
83// with different capitalizations.
Damien Neild9016772018-08-23 14:39:30 -070084func camelCase(s string) string {
Damien Neilc7d07d92018-08-22 13:46:02 -070085 if s == "" {
86 return ""
87 }
88 var t []byte
89 i := 0
90 // Invariant: if the next letter is lower case, it must be converted
91 // to upper case.
92 // That is, we process a word at a time, where words are marked by _ or
93 // upper case letter. Digits are treated as words.
94 for ; i < len(s); i++ {
95 c := s[i]
96 switch {
97 case c == '.':
98 t = append(t, '_') // Convert . to _.
99 case c == '_' && (i == 0 || s[i-1] == '.'):
100 // Convert initial _ to X so we start with a capital letter.
101 // Do the same for _ after .; not strictly necessary, but matches
102 // historic behavior.
103 t = append(t, 'X')
104 case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
105 // Skip the underscore in s.
106 case isASCIIDigit(c):
107 t = append(t, c)
108 default:
109 // Assume we have a letter now - if not, it's a bogus identifier.
110 // The next word is a sequence of characters that must start upper case.
111 if isASCIILower(c) {
112 c ^= ' ' // Make it a capital letter.
113 }
114 t = append(t, c) // Guaranteed not lower case.
115 // Accept lower case sequence that follows.
116 for i+1 < len(s) && isASCIILower(s[i+1]) {
117 i++
118 t = append(t, s[i])
119 }
120 }
121 }
Damien Neild9016772018-08-23 14:39:30 -0700122 return string(t)
Damien Neilc7d07d92018-08-22 13:46:02 -0700123}
124
125// Is c an ASCII lower-case letter?
126func isASCIILower(c byte) bool {
127 return 'a' <= c && c <= 'z'
128}
129
130// Is c an ASCII digit?
131func isASCIIDigit(c byte) bool {
132 return '0' <= c && c <= '9'
133}