Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 1 | package protogen |
| 2 | |
| 3 | import ( |
Damien Neil | d901677 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 4 | "fmt" |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 5 | "go/token" |
| 6 | "strconv" |
| 7 | "strings" |
| 8 | "unicode" |
| 9 | "unicode/utf8" |
Damien Neil | abc6fc1 | 2018-08-23 14:39:30 -0700 | [diff] [blame^] | 10 | |
| 11 | "google.golang.org/proto/reflect/protoreflect" |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 12 | ) |
| 13 | |
Damien Neil | d901677 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 14 | // A GoIdent is a Go identifier, consisting of a name and import path. |
| 15 | type GoIdent struct { |
| 16 | GoName string |
| 17 | GoImportPath GoImportPath |
| 18 | } |
| 19 | |
| 20 | func (id GoIdent) String() string { return fmt.Sprintf("%q.%v", id.GoImportPath, id.GoName) } |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 21 | |
Damien Neil | abc6fc1 | 2018-08-23 14:39:30 -0700 | [diff] [blame^] | 22 | // newGoIdent returns the Go identifier for a descriptor. |
| 23 | func newGoIdent(f *File, d protoreflect.Descriptor) GoIdent { |
| 24 | name := strings.TrimPrefix(string(d.FullName()), string(f.Desc.Package())+".") |
| 25 | return GoIdent{ |
| 26 | GoName: camelCase(name), |
| 27 | GoImportPath: f.GoImportPath, |
| 28 | } |
| 29 | } |
| 30 | |
Damien Neil | 220c202 | 2018-08-15 11:24:18 -0700 | [diff] [blame] | 31 | // A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf". |
| 32 | type GoImportPath string |
| 33 | |
| 34 | func (p GoImportPath) String() string { return strconv.Quote(string(p)) } |
| 35 | |
| 36 | // A GoPackageName is the name of a Go package. e.g., "protobuf". |
| 37 | type GoPackageName string |
| 38 | |
| 39 | // cleanPacakgeName converts a string to a valid Go package name. |
| 40 | func cleanPackageName(name string) GoPackageName { |
| 41 | name = strings.Map(badToUnderscore, name) |
| 42 | // Identifier must not be keyword: insert _. |
| 43 | if token.Lookup(name).IsKeyword() { |
| 44 | name = "_" + name |
| 45 | } |
| 46 | // Identifier must not begin with digit: insert _. |
| 47 | if r, _ := utf8.DecodeRuneInString(name); unicode.IsDigit(r) { |
| 48 | name = "_" + name |
| 49 | } |
| 50 | return GoPackageName(name) |
| 51 | } |
| 52 | |
| 53 | // badToUnderscore is the mapping function used to generate Go names from package names, |
| 54 | // which can be dotted in the input .proto file. It replaces non-identifier characters such as |
| 55 | // dot or dash with underscore. |
| 56 | func badToUnderscore(r rune) rune { |
| 57 | if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' { |
| 58 | return r |
| 59 | } |
| 60 | return '_' |
| 61 | } |
| 62 | |
| 63 | // baseName returns the last path element of the name, with the last dotted suffix removed. |
| 64 | func baseName(name string) string { |
| 65 | // First, find the last element |
| 66 | if i := strings.LastIndex(name, "/"); i >= 0 { |
| 67 | name = name[i+1:] |
| 68 | } |
| 69 | // Now drop the suffix |
| 70 | if i := strings.LastIndex(name, "."); i >= 0 { |
| 71 | name = name[:i] |
| 72 | } |
| 73 | return name |
| 74 | } |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 75 | |
| 76 | // camelCase converts a name to CamelCase. |
| 77 | // |
| 78 | // If there is an interior underscore followed by a lower case letter, |
| 79 | // drop the underscore and convert the letter to upper case. |
| 80 | // There is a remote possibility of this rewrite causing a name collision, |
| 81 | // but it's so remote we're prepared to pretend it's nonexistent - since the |
| 82 | // C++ generator lowercases names, it's extremely unlikely to have two fields |
| 83 | // with different capitalizations. |
Damien Neil | d901677 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 84 | func camelCase(s string) string { |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 85 | if s == "" { |
| 86 | return "" |
| 87 | } |
| 88 | var t []byte |
| 89 | i := 0 |
| 90 | // Invariant: if the next letter is lower case, it must be converted |
| 91 | // to upper case. |
| 92 | // That is, we process a word at a time, where words are marked by _ or |
| 93 | // upper case letter. Digits are treated as words. |
| 94 | for ; i < len(s); i++ { |
| 95 | c := s[i] |
| 96 | switch { |
| 97 | case c == '.': |
| 98 | t = append(t, '_') // Convert . to _. |
| 99 | case c == '_' && (i == 0 || s[i-1] == '.'): |
| 100 | // Convert initial _ to X so we start with a capital letter. |
| 101 | // Do the same for _ after .; not strictly necessary, but matches |
| 102 | // historic behavior. |
| 103 | t = append(t, 'X') |
| 104 | case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): |
| 105 | // Skip the underscore in s. |
| 106 | case isASCIIDigit(c): |
| 107 | t = append(t, c) |
| 108 | default: |
| 109 | // Assume we have a letter now - if not, it's a bogus identifier. |
| 110 | // The next word is a sequence of characters that must start upper case. |
| 111 | if isASCIILower(c) { |
| 112 | c ^= ' ' // Make it a capital letter. |
| 113 | } |
| 114 | t = append(t, c) // Guaranteed not lower case. |
| 115 | // Accept lower case sequence that follows. |
| 116 | for i+1 < len(s) && isASCIILower(s[i+1]) { |
| 117 | i++ |
| 118 | t = append(t, s[i]) |
| 119 | } |
| 120 | } |
| 121 | } |
Damien Neil | d901677 | 2018-08-23 14:39:30 -0700 | [diff] [blame] | 122 | return string(t) |
Damien Neil | c7d07d9 | 2018-08-22 13:46:02 -0700 | [diff] [blame] | 123 | } |
| 124 | |
| 125 | // Is c an ASCII lower-case letter? |
| 126 | func isASCIILower(c byte) bool { |
| 127 | return 'a' <= c && c <= 'z' |
| 128 | } |
| 129 | |
| 130 | // Is c an ASCII digit? |
| 131 | func isASCIIDigit(c byte) bool { |
| 132 | return '0' <= c && c <= '9' |
| 133 | } |