blob: dc4c86c37d7c4523b8504a3fc95a265a5faf395e [file] [log] [blame]
Joe Tsai411f3392018-11-16 15:31:26 -08001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Damien Neil220c2022018-08-15 11:24:18 -07005package protogen
6
7import (
Damien Neild9016772018-08-23 14:39:30 -07008 "fmt"
Damien Neil220c2022018-08-15 11:24:18 -07009 "go/token"
10 "strconv"
11 "strings"
12 "unicode"
13 "unicode/utf8"
Damien Neilabc6fc12018-08-23 14:39:30 -070014
Joe Tsai01ab2962018-09-21 17:44:00 -070015 "github.com/golang/protobuf/v2/reflect/protoreflect"
Damien Neil220c2022018-08-15 11:24:18 -070016)
17
Damien Neild9016772018-08-23 14:39:30 -070018// A GoIdent is a Go identifier, consisting of a name and import path.
Joe Tsai411f3392018-11-16 15:31:26 -080019// The name is a single identifier and may not be a dot-qualified selector.
Damien Neild9016772018-08-23 14:39:30 -070020type GoIdent struct {
21 GoName string
22 GoImportPath GoImportPath
23}
24
25func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
Damien Neilc7d07d92018-08-22 13:46:02 -070026
Damien Neilabc6fc12018-08-23 14:39:30 -070027// newGoIdent returns the Go identifier for a descriptor.
28func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
29 name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
30 return GoIdent{
31 GoName: camelCase(name),
32 GoImportPath: f.GoImportPath,
33 }
34}
35
Damien Neil220c2022018-08-15 11:24:18 -070036// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
37type GoImportPath string
38
39func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
40
Joe Tsaic1c17aa2018-11-16 11:14:14 -080041// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
42func (p GoImportPath) Ident(s string) GoIdent {
43 return GoIdent{GoName: s, GoImportPath: p}
44}
45
Damien Neil220c2022018-08-15 11:24:18 -070046// A GoPackageName is the name of a Go package. e.g., "protobuf".
47type GoPackageName string
48
Joe Tsaib6405bd2018-11-15 14:44:37 -080049// cleanPackageName converts a string to a valid Go package name.
Damien Neil220c2022018-08-15 11:24:18 -070050func cleanPackageName(name string) GoPackageName {
Joe Tsaib6405bd2018-11-15 14:44:37 -080051 return GoPackageName(cleanGoName(name, false))
52}
53
54// cleanGoName converts a string to a valid Go identifier.
55// If mustExport, then the returned identifier is exported if not already.
56func cleanGoName(name string, mustExport bool) string {
57 name = strings.Map(func(r rune) rune {
58 if unicode.IsLetter(r) || unicode.IsDigit(r) {
59 return r
60 }
61 return '_'
62 }, name)
63 prefix := "_"
64 if mustExport {
65 prefix = "X"
Damien Neil220c2022018-08-15 11:24:18 -070066 }
Joe Tsaib6405bd2018-11-15 14:44:37 -080067 switch r, n := utf8.DecodeRuneInString(name); {
68 case token.Lookup(name).IsKeyword():
69 return prefix + name
70 case unicode.IsDigit(r):
71 return prefix + name
72 case mustExport && !unicode.IsUpper(r):
73 if unicode.IsLower(r) {
74 return string(unicode.ToUpper(r)) + name[n:]
75 }
76 return prefix + name
Damien Neil220c2022018-08-15 11:24:18 -070077 }
Joe Tsaib6405bd2018-11-15 14:44:37 -080078 return name
Damien Neil220c2022018-08-15 11:24:18 -070079}
80
Damien Neil87214662018-10-05 11:23:35 -070081var isGoPredeclaredIdentifier = map[string]bool{
82 "append": true,
83 "bool": true,
84 "byte": true,
85 "cap": true,
86 "close": true,
87 "complex": true,
88 "complex128": true,
89 "complex64": true,
90 "copy": true,
91 "delete": true,
92 "error": true,
93 "false": true,
94 "float32": true,
95 "float64": true,
96 "imag": true,
97 "int": true,
98 "int16": true,
99 "int32": true,
100 "int64": true,
101 "int8": true,
102 "iota": true,
103 "len": true,
104 "make": true,
105 "new": true,
106 "nil": true,
107 "panic": true,
108 "print": true,
109 "println": true,
110 "real": true,
111 "recover": true,
112 "rune": true,
113 "string": true,
114 "true": true,
115 "uint": true,
116 "uint16": true,
117 "uint32": true,
118 "uint64": true,
119 "uint8": true,
120 "uintptr": true,
121}
122
Damien Neil220c2022018-08-15 11:24:18 -0700123// baseName returns the last path element of the name, with the last dotted suffix removed.
124func baseName(name string) string {
125 // First, find the last element
126 if i := strings.LastIndex(name, "/"); i >= 0 {
127 name = name[i+1:]
128 }
129 // Now drop the suffix
130 if i := strings.LastIndex(name, "."); i >= 0 {
131 name = name[:i]
132 }
133 return name
134}
Damien Neilc7d07d92018-08-22 13:46:02 -0700135
136// camelCase converts a name to CamelCase.
137//
138// If there is an interior underscore followed by a lower case letter,
139// drop the underscore and convert the letter to upper case.
140// There is a remote possibility of this rewrite causing a name collision,
141// but it's so remote we're prepared to pretend it's nonexistent - since the
142// C++ generator lowercases names, it's extremely unlikely to have two fields
143// with different capitalizations.
Damien Neild9016772018-08-23 14:39:30 -0700144func camelCase(s string) string {
Damien Neilc7d07d92018-08-22 13:46:02 -0700145 if s == "" {
146 return ""
147 }
148 var t []byte
149 i := 0
150 // Invariant: if the next letter is lower case, it must be converted
151 // to upper case.
152 // That is, we process a word at a time, where words are marked by _ or
153 // upper case letter. Digits are treated as words.
154 for ; i < len(s); i++ {
155 c := s[i]
156 switch {
Damien Neil3863ee52018-10-09 13:24:04 -0700157 case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
158 // Skip over .<lowercase>, to match historic behavior.
Damien Neilc7d07d92018-08-22 13:46:02 -0700159 case c == '.':
160 t = append(t, '_') // Convert . to _.
161 case c == '_' && (i == 0 || s[i-1] == '.'):
162 // Convert initial _ to X so we start with a capital letter.
163 // Do the same for _ after .; not strictly necessary, but matches
164 // historic behavior.
165 t = append(t, 'X')
166 case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
167 // Skip the underscore in s.
168 case isASCIIDigit(c):
169 t = append(t, c)
170 default:
171 // Assume we have a letter now - if not, it's a bogus identifier.
172 // The next word is a sequence of characters that must start upper case.
173 if isASCIILower(c) {
174 c ^= ' ' // Make it a capital letter.
175 }
176 t = append(t, c) // Guaranteed not lower case.
177 // Accept lower case sequence that follows.
178 for i+1 < len(s) && isASCIILower(s[i+1]) {
179 i++
180 t = append(t, s[i])
181 }
182 }
183 }
Damien Neild9016772018-08-23 14:39:30 -0700184 return string(t)
Damien Neilc7d07d92018-08-22 13:46:02 -0700185}
186
187// Is c an ASCII lower-case letter?
188func isASCIILower(c byte) bool {
189 return 'a' <= c && c <= 'z'
190}
191
192// Is c an ASCII digit?
193func isASCIIDigit(c byte) bool {
194 return '0' <= c && c <= '9'
195}