blob: ea3d05771a4c85c1ad6f9f7ad59145ce942fb00d [file] [log] [blame]
Damien Neil220c2022018-08-15 11:24:18 -07001package protogen
2
3import (
Damien Neild9016772018-08-23 14:39:30 -07004 "fmt"
Damien Neil220c2022018-08-15 11:24:18 -07005 "go/token"
6 "strconv"
7 "strings"
8 "unicode"
9 "unicode/utf8"
10)
11
Damien Neild9016772018-08-23 14:39:30 -070012// A GoIdent is a Go identifier, consisting of a name and import path.
13type GoIdent struct {
14 GoName string
15 GoImportPath GoImportPath
16}
17
18func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) }
Damien Neilc7d07d92018-08-22 13:46:02 -070019
Damien Neil220c2022018-08-15 11:24:18 -070020// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
21type GoImportPath string
22
23func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
24
25// A GoPackageName is the name of a Go package. e.g., "protobuf".
26type GoPackageName string
27
28// cleanPacakgeName converts a string to a valid Go package name.
29func cleanPackageName(name string) GoPackageName {
30 name = strings.Map(badToUnderscore, name)
31 // Identifier must not be keyword: insert _.
32 if token.Lookup(name).IsKeyword() {
33 name = "_" + name
34 }
35 // Identifier must not begin with digit: insert _.
36 if r, _ := utf8.DecodeRuneInString(name); unicode.IsDigit(r) {
37 name = "_" + name
38 }
39 return GoPackageName(name)
40}
41
42// badToUnderscore is the mapping function used to generate Go names from package names,
43// which can be dotted in the input .proto file. It replaces non-identifier characters such as
44// dot or dash with underscore.
45func badToUnderscore(r rune) rune {
46 if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
47 return r
48 }
49 return '_'
50}
51
52// baseName returns the last path element of the name, with the last dotted suffix removed.
53func baseName(name string) string {
54 // First, find the last element
55 if i := strings.LastIndex(name, "/"); i >= 0 {
56 name = name[i+1:]
57 }
58 // Now drop the suffix
59 if i := strings.LastIndex(name, "."); i >= 0 {
60 name = name[:i]
61 }
62 return name
63}
Damien Neilc7d07d92018-08-22 13:46:02 -070064
65// camelCase converts a name to CamelCase.
66//
67// If there is an interior underscore followed by a lower case letter,
68// drop the underscore and convert the letter to upper case.
69// There is a remote possibility of this rewrite causing a name collision,
70// but it's so remote we're prepared to pretend it's nonexistent - since the
71// C++ generator lowercases names, it's extremely unlikely to have two fields
72// with different capitalizations.
Damien Neild9016772018-08-23 14:39:30 -070073func camelCase(s string) string {
Damien Neilc7d07d92018-08-22 13:46:02 -070074 if s == "" {
75 return ""
76 }
77 var t []byte
78 i := 0
79 // Invariant: if the next letter is lower case, it must be converted
80 // to upper case.
81 // That is, we process a word at a time, where words are marked by _ or
82 // upper case letter. Digits are treated as words.
83 for ; i < len(s); i++ {
84 c := s[i]
85 switch {
86 case c == '.':
87 t = append(t, '_') // Convert . to _.
88 case c == '_' && (i == 0 || s[i-1] == '.'):
89 // Convert initial _ to X so we start with a capital letter.
90 // Do the same for _ after .; not strictly necessary, but matches
91 // historic behavior.
92 t = append(t, 'X')
93 case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
94 // Skip the underscore in s.
95 case isASCIIDigit(c):
96 t = append(t, c)
97 default:
98 // Assume we have a letter now - if not, it's a bogus identifier.
99 // The next word is a sequence of characters that must start upper case.
100 if isASCIILower(c) {
101 c ^= ' ' // Make it a capital letter.
102 }
103 t = append(t, c) // Guaranteed not lower case.
104 // Accept lower case sequence that follows.
105 for i+1 < len(s) && isASCIILower(s[i+1]) {
106 i++
107 t = append(t, s[i])
108 }
109 }
110 }
Damien Neild9016772018-08-23 14:39:30 -0700111 return string(t)
Damien Neilc7d07d92018-08-22 13:46:02 -0700112}
113
114// Is c an ASCII lower-case letter?
115func isASCIILower(c byte) bool {
116 return 'a' <= c && c <= 'z'
117}
118
119// Is c an ASCII digit?
120func isASCIIDigit(c byte) bool {
121 return '0' <= c && c <= '9'
122}