Joe Tsai | 411f339 | 2018-11-16 15:31:26 -0800 | [diff] [blame] | 1 | // Copyright 2018 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 5 | package protogen |
| 6 | |
| 7 | import ( |
Damien Neil | d901677 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 8 | "fmt" |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 9 | "go/token" |
| 10 | "strconv" |
| 11 | "strings" |
| 12 | "unicode" |
| 13 | "unicode/utf8" |
Damien Neil | abc6fc1 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 14 | |
Joe Tsai | 01ab296 | 2018-09-21 17:44:00 -0700 | [diff] [blame] | 15 | "github.com/golang/protobuf/v2/reflect/protoreflect" |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 16 | ) |
| 17 | |
Damien Neil | d901677 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 18 | // A GoIdent is a Go identifier, consisting of a name and import path. |
Joe Tsai | 411f339 | 2018-11-16 15:31:26 -0800 | [diff] [blame] | 19 | // The name is a single identifier and may not be a dot-qualified selector. |
Damien Neil | d901677 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 20 | type GoIdent struct { |
| 21 | GoName string |
| 22 | GoImportPath GoImportPath |
| 23 | } |
| 24 | |
| 25 | func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) } |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 26 | |
Damien Neil | abc6fc1 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 27 | // newGoIdent returns the Go identifier for a descriptor. |
| 28 | func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent { |
| 29 | name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".") |
| 30 | return GoIdent{ |
| 31 | GoName: camelCase(name), |
| 32 | GoImportPath: f.GoImportPath, |
| 33 | } |
| 34 | } |
| 35 | |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 36 | // A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf". |
| 37 | type GoImportPath string |
| 38 | |
| 39 | func (p GoImportPath) String() string { return strconv.Quote(string(p)) } |
| 40 | |
Joe Tsai | c1c17aa | 2018-11-16 11:14:14 -0800 | [diff] [blame] | 41 | // Ident returns a GoIdent with s as the GoName and p as the GoImportPath. |
| 42 | func (p GoImportPath) Ident(s string) GoIdent { |
| 43 | return GoIdent{GoName: s, GoImportPath: p} |
| 44 | } |
| 45 | |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 46 | // A GoPackageName is the name of a Go package. e.g., "protobuf". |
| 47 | type GoPackageName string |
| 48 | |
Joe Tsai | b6405bd | 2018-11-15 14:44:37 -0800 | [diff] [blame] | 49 | // cleanPackageName converts a string to a valid Go package name. |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 50 | func cleanPackageName(name string) GoPackageName { |
Joe Tsai | b6405bd | 2018-11-15 14:44:37 -0800 | [diff] [blame] | 51 | return GoPackageName(cleanGoName(name, false)) |
| 52 | } |
| 53 | |
| 54 | // cleanGoName converts a string to a valid Go identifier. |
| 55 | // If mustExport, then the returned identifier is exported if not already. |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 56 | func cleanGoName(s string, mustExport bool) string { |
| 57 | // Sanitize the input to the set of valid characters, |
| 58 | // which must be '_' or be in the Unicode L or N categories. |
| 59 | s = strings.Map(func(r rune) rune { |
Joe Tsai | b6405bd | 2018-11-15 14:44:37 -0800 | [diff] [blame] | 60 | if unicode.IsLetter(r) || unicode.IsDigit(r) { |
| 61 | return r |
| 62 | } |
| 63 | return '_' |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 64 | }, s) |
| 65 | r, n := utf8.DecodeRuneInString(s) |
| 66 | |
| 67 | // Export the identifier by either uppercasing the first character or by |
| 68 | // prepending 'X' (to ensure name starts in the Unicode Lu category). |
Joe Tsai | b6405bd | 2018-11-15 14:44:37 -0800 | [diff] [blame] | 69 | if mustExport { |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 70 | // If possible, uppercase the first character. However, not all |
| 71 | // characters in the Unicode L category have an Lu equivalent. |
| 72 | if unicode.IsUpper(unicode.ToUpper(r)) { |
| 73 | return string(unicode.ToUpper(r)) + s[n:] |
Joe Tsai | b6405bd | 2018-11-15 14:44:37 -0800 | [diff] [blame] | 74 | } |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 75 | return "X" + s |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 76 | } |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 77 | |
| 78 | // Prepend '_' in the event of a Go keyword conflict or if |
| 79 | // the identifier is invalid (does not start in the Unicode L category). |
| 80 | if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { |
| 81 | return "_" + s |
| 82 | } |
| 83 | return s |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 84 | } |
| 85 | |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 86 | // baseName returns the last path element of the name, with the last dotted suffix removed. |
| 87 | func baseName(name string) string { |
| 88 | // First, find the last element |
| 89 | if i := strings.LastIndex(name, "/"); i >= 0 { |
| 90 | name = name[i+1:] |
| 91 | } |
| 92 | // Now drop the suffix |
| 93 | if i := strings.LastIndex(name, "."); i >= 0 { |
| 94 | name = name[:i] |
| 95 | } |
| 96 | return name |
| 97 | } |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 98 | |
| 99 | // camelCase converts a name to CamelCase. |
| 100 | // |
| 101 | // If there is an interior underscore followed by a lower case letter, |
| 102 | // drop the underscore and convert the letter to upper case. |
| 103 | // There is a remote possibility of this rewrite causing a name collision, |
| 104 | // but it's so remote we're prepared to pretend it's nonexistent - since the |
| 105 | // C++ generator lowercases names, it's extremely unlikely to have two fields |
| 106 | // with different capitalizations. |
Damien Neil | d901677 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 107 | func camelCase(s string) string { |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 108 | // Invariant: if the next letter is lower case, it must be converted |
| 109 | // to upper case. |
| 110 | // That is, we process a word at a time, where words are marked by _ or |
| 111 | // upper case letter. Digits are treated as words. |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 112 | var b []byte |
| 113 | for i := 0; i < len(s); i++ { |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 114 | c := s[i] |
| 115 | switch { |
Damien Neil | 3863ee5 | 2018-10-09 13:24:04 -0700 | [diff] [blame] | 116 | case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]): |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 117 | // Skip over '.' in ".{{lowercase}}". |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 118 | case c == '.': |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 119 | b = append(b, '_') // convert '.' to '_' |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 120 | case c == '_' && (i == 0 || s[i-1] == '.'): |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 121 | // Convert initial '_' to ensure we start with a capital letter. |
| 122 | // Do the same for '_' after '.' to match historic behavior. |
| 123 | b = append(b, 'X') // convert '_' to 'X' |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 124 | case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 125 | // Skip over '_' in "_{{lowercase}}". |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 126 | case isASCIIDigit(c): |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 127 | b = append(b, c) |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 128 | default: |
| 129 | // Assume we have a letter now - if not, it's a bogus identifier. |
| 130 | // The next word is a sequence of characters that must start upper case. |
| 131 | if isASCIILower(c) { |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 132 | c -= 'a' - 'A' // convert lowercase to uppercase |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 133 | } |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 134 | b = append(b, c) |
| 135 | |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 136 | // Accept lower case sequence that follows. |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 137 | for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ { |
| 138 | b = append(b, s[i+1]) |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 139 | } |
| 140 | } |
| 141 | } |
Joe Tsai | 2c6f098 | 2018-12-13 18:37:25 -0800 | [diff] [blame] | 142 | return string(b) |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 143 | } |
| 144 | |
| 145 | // Is c an ASCII lower-case letter? |
| 146 | func isASCIILower(c byte) bool { |
| 147 | return 'a' <= c && c <= 'z' |
| 148 | } |
| 149 | |
| 150 | // Is c an ASCII digit? |
| 151 | func isASCIIDigit(c byte) bool { |
| 152 | return '0' <= c && c <= '9' |
| 153 | } |