blob: 5cf953cd4d67a91beff49e7b3148b048607c7eb5 [file] [log] [blame]
Joe Tsai411f3392018-11-16 15:31:26 -08001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Damien Neil220c2022018-08-15 11:24:18 -07005package protogen
6
7import (
Damien Neild9016772018-08-23 14:39:30 -07008 "fmt"
Damien Neil220c2022018-08-15 11:24:18 -07009 "go/token"
10 "strconv"
11 "strings"
12 "unicode"
13 "unicode/utf8"
Damien Neilabc6fc12018-08-23 14:39:30 -070014
Joe Tsai01ab2962018-09-21 17:44:00 -070015 "github.com/golang/protobuf/v2/reflect/protoreflect"
Damien Neil220c2022018-08-15 11:24:18 -070016)
17
Damien Neild9016772018-08-23 14:39:30 -070018// A GoIdent is a Go identifier, consisting of a name and import path.
Joe Tsai411f3392018-11-16 15:31:26 -080019// The name is a single identifier and may not be a dot-qualified selector.
Damien Neild9016772018-08-23 14:39:30 -070020type GoIdent struct {
21 GoName string
22 GoImportPath GoImportPath
23}
24
25func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
Damien Neilc7d07d92018-08-22 13:46:02 -070026
Damien Neilabc6fc12018-08-23 14:39:30 -070027// newGoIdent returns the Go identifier for a descriptor.
28func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent {
29 name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".")
30 return GoIdent{
31 GoName: camelCase(name),
32 GoImportPath: f.GoImportPath,
33 }
34}
35
Damien Neil220c2022018-08-15 11:24:18 -070036// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
37type GoImportPath string
38
39func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
40
Joe Tsaic1c17aa2018-11-16 11:14:14 -080041// Ident returns a GoIdent with s as the GoName and p as the GoImportPath.
42func (p GoImportPath) Ident(s string) GoIdent {
43 return GoIdent{GoName: s, GoImportPath: p}
44}
45
Damien Neil220c2022018-08-15 11:24:18 -070046// A GoPackageName is the name of a Go package. e.g., "protobuf".
47type GoPackageName string
48
Joe Tsaib6405bd2018-11-15 14:44:37 -080049// cleanPackageName converts a string to a valid Go package name.
Damien Neil220c2022018-08-15 11:24:18 -070050func cleanPackageName(name string) GoPackageName {
Joe Tsai40692112019-02-27 20:25:51 -080051 return GoPackageName(cleanGoName(name))
Joe Tsaib6405bd2018-11-15 14:44:37 -080052}
53
54// cleanGoName converts a string to a valid Go identifier.
Joe Tsai40692112019-02-27 20:25:51 -080055func cleanGoName(s string) string {
Joe Tsai2c6f0982018-12-13 18:37:25 -080056 // Sanitize the input to the set of valid characters,
57 // which must be '_' or be in the Unicode L or N categories.
58 s = strings.Map(func(r rune) rune {
Joe Tsaib6405bd2018-11-15 14:44:37 -080059 if unicode.IsLetter(r) || unicode.IsDigit(r) {
60 return r
61 }
62 return '_'
Joe Tsai2c6f0982018-12-13 18:37:25 -080063 }, s)
Joe Tsai2c6f0982018-12-13 18:37:25 -080064
65 // Prepend '_' in the event of a Go keyword conflict or if
66 // the identifier is invalid (does not start in the Unicode L category).
Joe Tsai40692112019-02-27 20:25:51 -080067 r, _ := utf8.DecodeRuneInString(s)
Joe Tsai2c6f0982018-12-13 18:37:25 -080068 if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
69 return "_" + s
70 }
71 return s
Damien Neil220c2022018-08-15 11:24:18 -070072}
73
Damien Neil220c2022018-08-15 11:24:18 -070074// baseName returns the last path element of the name, with the last dotted suffix removed.
75func baseName(name string) string {
76 // First, find the last element
77 if i := strings.LastIndex(name, "/"); i >= 0 {
78 name = name[i+1:]
79 }
80 // Now drop the suffix
81 if i := strings.LastIndex(name, "."); i >= 0 {
82 name = name[:i]
83 }
84 return name
85}
Damien Neilc7d07d92018-08-22 13:46:02 -070086
87// camelCase converts a name to CamelCase.
88//
89// If there is an interior underscore followed by a lower case letter,
90// drop the underscore and convert the letter to upper case.
91// There is a remote possibility of this rewrite causing a name collision,
92// but it's so remote we're prepared to pretend it's nonexistent - since the
93// C++ generator lowercases names, it's extremely unlikely to have two fields
94// with different capitalizations.
Damien Neild9016772018-08-23 14:39:30 -070095func camelCase(s string) string {
Damien Neilc7d07d92018-08-22 13:46:02 -070096 // Invariant: if the next letter is lower case, it must be converted
97 // to upper case.
98 // That is, we process a word at a time, where words are marked by _ or
99 // upper case letter. Digits are treated as words.
Joe Tsai2c6f0982018-12-13 18:37:25 -0800100 var b []byte
101 for i := 0; i < len(s); i++ {
Damien Neilc7d07d92018-08-22 13:46:02 -0700102 c := s[i]
103 switch {
Damien Neil3863ee52018-10-09 13:24:04 -0700104 case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
Joe Tsai2c6f0982018-12-13 18:37:25 -0800105 // Skip over '.' in ".{{lowercase}}".
Damien Neilc7d07d92018-08-22 13:46:02 -0700106 case c == '.':
Joe Tsai2c6f0982018-12-13 18:37:25 -0800107 b = append(b, '_') // convert '.' to '_'
Damien Neilc7d07d92018-08-22 13:46:02 -0700108 case c == '_' && (i == 0 || s[i-1] == '.'):
Joe Tsai2c6f0982018-12-13 18:37:25 -0800109 // Convert initial '_' to ensure we start with a capital letter.
110 // Do the same for '_' after '.' to match historic behavior.
111 b = append(b, 'X') // convert '_' to 'X'
Damien Neilc7d07d92018-08-22 13:46:02 -0700112 case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
Joe Tsai2c6f0982018-12-13 18:37:25 -0800113 // Skip over '_' in "_{{lowercase}}".
Damien Neilc7d07d92018-08-22 13:46:02 -0700114 case isASCIIDigit(c):
Joe Tsai2c6f0982018-12-13 18:37:25 -0800115 b = append(b, c)
Damien Neilc7d07d92018-08-22 13:46:02 -0700116 default:
117 // Assume we have a letter now - if not, it's a bogus identifier.
118 // The next word is a sequence of characters that must start upper case.
119 if isASCIILower(c) {
Joe Tsai2c6f0982018-12-13 18:37:25 -0800120 c -= 'a' - 'A' // convert lowercase to uppercase
Damien Neilc7d07d92018-08-22 13:46:02 -0700121 }
Joe Tsai2c6f0982018-12-13 18:37:25 -0800122 b = append(b, c)
123
Damien Neilc7d07d92018-08-22 13:46:02 -0700124 // Accept lower case sequence that follows.
Joe Tsai2c6f0982018-12-13 18:37:25 -0800125 for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
126 b = append(b, s[i+1])
Damien Neilc7d07d92018-08-22 13:46:02 -0700127 }
128 }
129 }
Joe Tsai2c6f0982018-12-13 18:37:25 -0800130 return string(b)
Damien Neilc7d07d92018-08-22 13:46:02 -0700131}
132
133// Is c an ASCII lower-case letter?
134func isASCIILower(c byte) bool {
135 return 'a' <= c && c <= 'z'
136}
137
138// Is c an ASCII digit?
139func isASCIIDigit(c byte) bool {
140 return '0' <= c && c <= '9'
141}