protogen: generate message skeletons

Copy generator.CamelCase for camel-casing names, with one change: Convert
'.' in names to '_'. This removes the need for the CamelCaseSlice function
which operates on a []string representing a name split along '.'s.

Add protogen.Message.

Reformat generated code.

Add regenerate.bash, largely copied from regenerate.sh.

Change-Id: Iecf0bfc43b552f53e458499a328b933b0c9c5f82
Reviewed-on: https://go-review.googlesource.com/130915
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/protogen/names.go b/protogen/names.go
index 1976cfd..b97c47d 100644
--- a/protogen/names.go
+++ b/protogen/names.go
@@ -8,6 +8,9 @@
 	"unicode/utf8"
 )
 
+// A GoIdent is a Go identifier.
+type GoIdent string
+
 // A GoImportPath is the import path of a Go package. e.g., "google.golang.org/genproto/protobuf".
 type GoImportPath string
 
@@ -52,3 +55,62 @@
 	}
 	return name
 }
+
+// camelCase converts a name to CamelCase.
+//
+// If there is an interior underscore followed by a lower case letter,
+// drop the underscore and convert the letter to upper case.
+// There is a remote possibility of this rewrite causing a name collision,
+// but it's so remote we're prepared to pretend it's nonexistent - since the
+// C++ generator lowercases names, it's extremely unlikely to have two fields
+// with different capitalizations.
+func camelCase(s string) GoIdent {
+	if s == "" {
+		return ""
+	}
+	var t []byte
+	i := 0
+	// Invariant: if the next letter is lower case, it must be converted
+	// to upper case.
+	// That is, we process a word at a time, where words are marked by _ or
+	// upper case letter. Digits are treated as words.
+	for ; i < len(s); i++ {
+		c := s[i]
+		switch {
+		case c == '.':
+			t = append(t, '_') // Convert . to _.
+		case c == '_' && (i == 0 || s[i-1] == '.'):
+			// Convert initial _ to X so we start with a capital letter.
+			// Do the same for _ after .; not strictly necessary, but matches
+			// historic behavior.
+			t = append(t, 'X')
+		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
+			// Skip the underscore in s.
+		case isASCIIDigit(c):
+			t = append(t, c)
+		default:
+			// Assume we have a letter now - if not, it's a bogus identifier.
+			// The next word is a sequence of characters that must start upper case.
+			if isASCIILower(c) {
+				c ^= ' ' // Make it a capital letter.
+			}
+			t = append(t, c) // Guaranteed not lower case.
+			// Accept lower case sequence that follows.
+			for i+1 < len(s) && isASCIILower(s[i+1]) {
+				i++
+				t = append(t, s[i])
+			}
+		}
+	}
+	return GoIdent(t)
+}
+
+// Is c an ASCII lower-case letter?
+func isASCIILower(c byte) bool {
+	return 'a' <= c && c <= 'z'
+}
+
+// Is c an ASCII digit?
+func isASCIIDigit(c byte) bool {
+	return '0' <= c && c <= '9'
+}
diff --git a/protogen/names_test.go b/protogen/names_test.go
new file mode 100644
index 0000000..021e71a
--- /dev/null
+++ b/protogen/names_test.go
@@ -0,0 +1,33 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package protogen
+
+import "testing"
+
+func TestCamelCase(t *testing.T) {
+	tests := []struct {
+		in   string
+		want GoIdent
+	}{
+		{"one", "One"},
+		{"one_two", "OneTwo"},
+		{"_my_field_name_2", "XMyFieldName_2"},
+		{"Something_Capped", "Something_Capped"},
+		{"my_Name", "My_Name"},
+		{"OneTwo", "OneTwo"},
+		{"_", "X"},
+		{"_a_", "XA_"},
+		{"one.two", "One_Two"},
+		{"one_two.three_four", "OneTwo_ThreeFour"},
+		{"_one._two", "XOne_XTwo"},
+		{"SCREAMING_SNAKE_CASE", "SCREAMING_SNAKE_CASE"},
+		{"double__underscore", "Double_Underscore"},
+	}
+	for _, tc := range tests {
+		if got := camelCase(tc.in); got != tc.want {
+			t.Errorf("CamelCase(%q) = %q, want %q", tc.in, got, tc.want)
+		}
+	}
+}
diff --git a/protogen/protogen.go b/protogen/protogen.go
index b10edad..f499a8d 100644
--- a/protogen/protogen.go
+++ b/protogen/protogen.go
@@ -11,8 +11,12 @@
 package protogen
 
 import (
+	"bufio"
 	"bytes"
 	"fmt"
+	"go/parser"
+	"go/printer"
+	"go/token"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -157,9 +161,15 @@
 		return resp
 	}
 	for _, gf := range gen.genFiles {
+		content, err := gf.Content()
+		if err != nil {
+			return &pluginpb.CodeGeneratorResponse{
+				Error: proto.String(err.Error()),
+			}
+		}
 		resp.File = append(resp.File, &pluginpb.CodeGeneratorResponse_File{
 			Name:    proto.String(gf.path),
-			Content: proto.String(string(gf.Content())),
+			Content: proto.String(string(content)),
 		})
 	}
 	return resp
@@ -171,19 +181,44 @@
 	return f, ok
 }
 
-// A File is a .proto source file.
+// A File describes a .proto source file.
 type File struct {
-	// TODO: Replace with protoreflect.FileDescriptor.
-	Desc *descpb.FileDescriptorProto
+	Desc *descpb.FileDescriptorProto // TODO: protoreflect.FileDescriptor
 
-	// Generate is true if the generator should generate code for this file.
-	Generate bool
+	Messages []*Message // top-level message declartions
+	Generate bool       // true if we should generate code for this file
 }
 
 func newFile(gen *Plugin, p *descpb.FileDescriptorProto) *File {
-	return &File{
+	f := &File{
 		Desc: p,
 	}
+	for _, d := range p.MessageType {
+		f.Messages = append(f.Messages, newMessage(gen, nil, d))
+	}
+	return f
+}
+
+// A Message describes a message.
+type Message struct {
+	Desc *descpb.DescriptorProto // TODO: protoreflect.MessageDescriptor
+
+	GoIdent  GoIdent    // name of the generated Go type
+	Messages []*Message // nested message declarations
+}
+
+func newMessage(gen *Plugin, parent *Message, p *descpb.DescriptorProto) *Message {
+	m := &Message{
+		Desc:    p,
+		GoIdent: camelCase(p.GetName()),
+	}
+	if parent != nil {
+		m.GoIdent = parent.GoIdent + "_" + m.GoIdent
+	}
+	for _, nested := range p.GetNestedType() {
+		m.Messages = append(m.Messages, newMessage(gen, m, nested))
+	}
+	return m
 }
 
 // A GeneratedFile is a generated file.
@@ -219,6 +254,31 @@
 }
 
 // Content returns the contents of the generated file.
-func (g *GeneratedFile) Content() []byte {
-	return g.buf.Bytes()
+func (g *GeneratedFile) Content() ([]byte, error) {
+	if !strings.HasSuffix(g.path, ".go") {
+		return g.buf.Bytes(), nil
+	}
+
+	// Reformat generated code.
+	original := g.buf.Bytes()
+	fset := token.NewFileSet()
+	ast, err := parser.ParseFile(fset, "", original, parser.ParseComments)
+	if err != nil {
+		// Print out the bad code with line numbers.
+		// This should never happen in practice, but it can while changing generated code
+		// so consider this a debugging aid.
+		var src bytes.Buffer
+		s := bufio.NewScanner(bytes.NewReader(original))
+		for line := 1; s.Scan(); line++ {
+			fmt.Fprintf(&src, "%5d\t%s\n", line, s.Bytes())
+		}
+		return nil, fmt.Errorf("%v: unparsable Go source: %v\n%v", g.path, err, src.String())
+	}
+	var out bytes.Buffer
+	if err = (&printer.Config{Mode: printer.TabIndent | printer.UseSpaces, Tabwidth: 8}).Fprint(&out, fset, ast); err != nil {
+		return nil, fmt.Errorf("%v: can not reformat Go source: %v", g.path, err)
+	}
+	// TODO: Patch annotation locations.
+	return out.Bytes(), nil
+
 }