blob: b97c47d5a3bd511a37976ec24d0d67a97e8ce650 [file] [log] [blame]
Damien Neil220c2022018-08-15 11:24:18 -07001package protogen
2
3import (
4 "go/token"
5 "strconv"
6 "strings"
7 "unicode"
8 "unicode/utf8"
9)
10
Damien Neilc7d07d92018-08-22 13:46:02 -070011// A GoIdent is a Go identifier.
12type GoIdent string
13
Damien Neil220c2022018-08-15 11:24:18 -070014// A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
15type GoImportPath string
16
17func (p GoImportPath) String() string { return strconv.Quote(string(p)) }
18
19// A GoPackageName is the name of a Go package. e.g., "protobuf".
20type GoPackageName string
21
22// cleanPacakgeName converts a string to a valid Go package name.
23func cleanPackageName(name string) GoPackageName {
24 name = strings.Map(badToUnderscore, name)
25 // Identifier must not be keyword: insert _.
26 if token.Lookup(name).IsKeyword() {
27 name = "_" + name
28 }
29 // Identifier must not begin with digit: insert _.
30 if r, _ := utf8.DecodeRuneInString(name); unicode.IsDigit(r) {
31 name = "_" + name
32 }
33 return GoPackageName(name)
34}
35
36// badToUnderscore is the mapping function used to generate Go names from package names,
37// which can be dotted in the input .proto file. It replaces non-identifier characters such as
38// dot or dash with underscore.
39func badToUnderscore(r rune) rune {
40 if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
41 return r
42 }
43 return '_'
44}
45
46// baseName returns the last path element of the name, with the last dotted suffix removed.
47func baseName(name string) string {
48 // First, find the last element
49 if i := strings.LastIndex(name, "/"); i >= 0 {
50 name = name[i+1:]
51 }
52 // Now drop the suffix
53 if i := strings.LastIndex(name, "."); i >= 0 {
54 name = name[:i]
55 }
56 return name
57}
Damien Neilc7d07d92018-08-22 13:46:02 -070058
59// camelCase converts a name to CamelCase.
60//
61// If there is an interior underscore followed by a lower case letter,
62// drop the underscore and convert the letter to upper case.
63// There is a remote possibility of this rewrite causing a name collision,
64// but it's so remote we're prepared to pretend it's nonexistent - since the
65// C++ generator lowercases names, it's extremely unlikely to have two fields
66// with different capitalizations.
67func camelCase(s string) GoIdent {
68 if s == "" {
69 return ""
70 }
71 var t []byte
72 i := 0
73 // Invariant: if the next letter is lower case, it must be converted
74 // to upper case.
75 // That is, we process a word at a time, where words are marked by _ or
76 // upper case letter. Digits are treated as words.
77 for ; i < len(s); i++ {
78 c := s[i]
79 switch {
80 case c == '.':
81 t = append(t, '_') // Convert . to _.
82 case c == '_' && (i == 0 || s[i-1] == '.'):
83 // Convert initial _ to X so we start with a capital letter.
84 // Do the same for _ after .; not strictly necessary, but matches
85 // historic behavior.
86 t = append(t, 'X')
87 case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
88 // Skip the underscore in s.
89 case isASCIIDigit(c):
90 t = append(t, c)
91 default:
92 // Assume we have a letter now - if not, it's a bogus identifier.
93 // The next word is a sequence of characters that must start upper case.
94 if isASCIILower(c) {
95 c ^= ' ' // Make it a capital letter.
96 }
97 t = append(t, c) // Guaranteed not lower case.
98 // Accept lower case sequence that follows.
99 for i+1 < len(s) && isASCIILower(s[i+1]) {
100 i++
101 t = append(t, s[i])
102 }
103 }
104 }
105 return GoIdent(t)
106}
107
108// Is c an ASCII lower-case letter?
109func isASCIILower(c byte) bool {
110 return 'a' <= c && c <= 'z'
111}
112
113// Is c an ASCII digit?
114func isASCIIDigit(c byte) bool {
115 return '0' <= c && c <= '9'
116}