reflect/protodesc: enforce strict validation

Hyrum's Law dictates that if we do not prevent naughty behavior,
people will rely on it. If we do not validate that the provided
file descriptor is correct today, it will be near impossible
to add proper validation checks later on.

The logic added validates that the provided file descriptor is
correct according to the same semantics as protoc,
which was reversed engineered to derive the set of rules implemented here.
The rules are unfortunately complicated because protobuf is a language
full of many non-orthogonal features. While our logic is complicated,
it is still 1/7th the size of the equivalent C++ code!

Change-Id: I6acc5dc3bd2e4c6bea6cd9e81214f8104402602a
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/184837
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/internal/filedesc/desc_list.go b/internal/filedesc/desc_list.go
index bbded60..cd92236 100644
--- a/internal/filedesc/desc_list.go
+++ b/internal/filedesc/desc_list.go
@@ -6,11 +6,15 @@
 
 import (
 	"fmt"
+	"math"
 	"sort"
 	"sync"
 
 	"google.golang.org/protobuf/internal/descfmt"
+	"google.golang.org/protobuf/internal/encoding/wire"
+	"google.golang.org/protobuf/internal/errors"
 	"google.golang.org/protobuf/internal/pragma"
+	"google.golang.org/protobuf/reflect/protoreflect"
 	pref "google.golang.org/protobuf/reflect/protoreflect"
 )
 
@@ -24,60 +28,57 @@
 type Names struct {
 	List []pref.Name
 	once sync.Once
-	has  map[pref.Name]struct{} // protected by once
+	has  map[pref.Name]int // protected by once
 }
 
-func (p *Names) Len() int            { return len(p.List) }
-func (p *Names) Get(i int) pref.Name { return p.List[i] }
-func (p *Names) Has(s pref.Name) bool {
+func (p *Names) Len() int                            { return len(p.List) }
+func (p *Names) Get(i int) pref.Name                 { return p.List[i] }
+func (p *Names) Has(s pref.Name) bool                { return p.lazyInit().has[s] > 0 }
+func (p *Names) Format(s fmt.State, r rune)          { descfmt.FormatList(s, r, p) }
+func (p *Names) ProtoInternal(pragma.DoNotImplement) {}
+func (p *Names) lazyInit() *Names {
 	p.once.Do(func() {
 		if len(p.List) > 0 {
-			p.has = make(map[pref.Name]struct{}, len(p.List))
+			p.has = make(map[pref.Name]int, len(p.List))
 			for _, s := range p.List {
-				p.has[s] = struct{}{}
+				p.has[s] = p.has[s] + 1
 			}
 		}
 	})
-	_, ok := p.has[s]
-	return ok
+	return p
 }
-func (p *Names) Format(s fmt.State, r rune)          { descfmt.FormatList(s, r, p) }
-func (p *Names) ProtoInternal(pragma.DoNotImplement) {}
+
+// CheckValid reports any errors with the set of names with an error message
+// that completes the sentence: "ranges is invalid because it has ..."
+func (p *Names) CheckValid() error {
+	for s, n := range p.lazyInit().has {
+		switch {
+		case n > 1:
+			return errors.New("duplicate name: %q", s)
+		case false && !s.IsValid():
+			// NOTE: The C++ implementation does not validate the identifier.
+			// See https://github.com/protocolbuffers/protobuf/issues/6335.
+			return errors.New("invalid name: %q", s)
+		}
+	}
+	return nil
+}
 
 type EnumRanges struct {
 	List   [][2]pref.EnumNumber // start inclusive; end inclusive
 	once   sync.Once
-	sorted [][2]pref.EnumNumber         // protected by once
-	has    map[pref.EnumNumber]struct{} // protected by once
+	sorted [][2]pref.EnumNumber // protected by once
 }
 
 func (p *EnumRanges) Len() int                     { return len(p.List) }
 func (p *EnumRanges) Get(i int) [2]pref.EnumNumber { return p.List[i] }
 func (p *EnumRanges) Has(n pref.EnumNumber) bool {
-	p.once.Do(func() {
-		for _, r := range p.List {
-			if r[0] == r[1]-0 {
-				if p.has == nil {
-					p.has = make(map[pref.EnumNumber]struct{}, len(p.List))
-				}
-				p.has[r[0]] = struct{}{}
-			} else {
-				p.sorted = append(p.sorted, r)
-			}
-		}
-		sort.Slice(p.sorted, func(i, j int) bool {
-			return p.sorted[i][0] < p.sorted[j][0]
-		})
-	})
-	if _, ok := p.has[n]; ok {
-		return true
-	}
-	for ls := p.sorted; len(ls) > 0; {
+	for ls := p.lazyInit().sorted; len(ls) > 0; {
 		i := len(ls) / 2
-		switch r := ls[i]; {
-		case n < r[0]:
+		switch r := enumRange(ls[i]); {
+		case n < r.Start():
 			ls = ls[:i] // search lower
-		case n > r[1]:
+		case n > r.End():
 			ls = ls[i+1:] // search upper
 		default:
 			return true
@@ -87,42 +88,60 @@
 }
 func (p *EnumRanges) Format(s fmt.State, r rune)          { descfmt.FormatList(s, r, p) }
 func (p *EnumRanges) ProtoInternal(pragma.DoNotImplement) {}
+func (p *EnumRanges) lazyInit() *EnumRanges {
+	p.once.Do(func() {
+		p.sorted = append(p.sorted, p.List...)
+		sort.Slice(p.sorted, func(i, j int) bool {
+			return p.sorted[i][0] < p.sorted[j][0]
+		})
+	})
+	return p
+}
+
+// CheckValid reports any errors with the set of names with an error message
+// that completes the sentence: "ranges is invalid because it has ..."
+func (p *EnumRanges) CheckValid() error {
+	var rp enumRange
+	for i, r := range p.lazyInit().sorted {
+		r := enumRange(r)
+		switch {
+		case !(r.Start() <= r.End()):
+			return errors.New("invalid range: %v", r)
+		case !(rp.End() < r.Start()) && i > 0:
+			return errors.New("overlapping ranges: %v with %v", rp, r)
+		}
+		rp = r
+	}
+	return nil
+}
+
+type enumRange [2]protoreflect.EnumNumber
+
+func (r enumRange) Start() protoreflect.EnumNumber { return r[0] } // inclusive
+func (r enumRange) End() protoreflect.EnumNumber   { return r[1] } // inclusive
+func (r enumRange) String() string {
+	if r.Start() == r.End() {
+		return fmt.Sprintf("%d", r.Start())
+	}
+	return fmt.Sprintf("%d to %d", r.Start(), r.End())
+}
 
 type FieldRanges struct {
 	List   [][2]pref.FieldNumber // start inclusive; end exclusive
 	once   sync.Once
-	sorted [][2]pref.FieldNumber         // protected by once
-	has    map[pref.FieldNumber]struct{} // protected by once
+	sorted [][2]pref.FieldNumber // protected by once
 }
 
 func (p *FieldRanges) Len() int                      { return len(p.List) }
 func (p *FieldRanges) Get(i int) [2]pref.FieldNumber { return p.List[i] }
 func (p *FieldRanges) Has(n pref.FieldNumber) bool {
-	p.once.Do(func() {
-		for _, r := range p.List {
-			if r[0] == r[1]-1 {
-				if p.has == nil {
-					p.has = make(map[pref.FieldNumber]struct{}, len(p.List))
-				}
-				p.has[r[0]] = struct{}{}
-			} else {
-				p.sorted = append(p.sorted, r)
-			}
-		}
-		sort.Slice(p.sorted, func(i, j int) bool {
-			return p.sorted[i][0] < p.sorted[j][0]
-		})
-	})
-	if _, ok := p.has[n]; ok {
-		return true
-	}
-	for ls := p.sorted; len(ls) > 0; {
+	for ls := p.lazyInit().sorted; len(ls) > 0; {
 		i := len(ls) / 2
-		switch r := ls[i]; {
-		case n < r[0]:
+		switch r := fieldRange(ls[i]); {
+		case n < r.Start():
 			ls = ls[:i] // search lower
-		case n >= r[1]:
-			ls = ls[i+1:] // search higher
+		case n > r.End():
+			ls = ls[i+1:] // search upper
 		default:
 			return true
 		}
@@ -131,6 +150,76 @@
 }
 func (p *FieldRanges) Format(s fmt.State, r rune)          { descfmt.FormatList(s, r, p) }
 func (p *FieldRanges) ProtoInternal(pragma.DoNotImplement) {}
+func (p *FieldRanges) lazyInit() *FieldRanges {
+	p.once.Do(func() {
+		p.sorted = append(p.sorted, p.List...)
+		sort.Slice(p.sorted, func(i, j int) bool {
+			return p.sorted[i][0] < p.sorted[j][0]
+		})
+	})
+	return p
+}
+
+// CheckValid reports any errors with the set of ranges with an error message
+// that completes the sentence: "ranges is invalid because it has ..."
+func (p *FieldRanges) CheckValid(isMessageSet bool) error {
+	var rp fieldRange
+	for i, r := range p.lazyInit().sorted {
+		r := fieldRange(r)
+		switch {
+		case !isValidFieldNumber(r.Start(), isMessageSet):
+			return errors.New("invalid field number: %d", r.Start())
+		case !isValidFieldNumber(r.End(), isMessageSet):
+			return errors.New("invalid field number: %d", r.End())
+		case !(r.Start() <= r.End()):
+			return errors.New("invalid range: %v", r)
+		case !(rp.End() < r.Start()) && i > 0:
+			return errors.New("overlapping ranges: %v with %v", rp, r)
+		}
+		rp = r
+	}
+	return nil
+}
+
+// isValidFieldNumber reports whether the field number is valid.
+// Unlike the FieldNumber.IsValid method, it allows ranges that cover the
+// reserved number range.
+func isValidFieldNumber(n protoreflect.FieldNumber, isMessageSet bool) bool {
+	if isMessageSet {
+		return wire.MinValidNumber <= n && n <= math.MaxInt32
+	}
+	return wire.MinValidNumber <= n && n <= wire.MaxValidNumber
+}
+
+// CheckOverlap reports an error if p and q overlap.
+func (p *FieldRanges) CheckOverlap(q *FieldRanges) error {
+	rps := p.lazyInit().sorted
+	rqs := q.lazyInit().sorted
+	for pi, qi := 0, 0; pi < len(rps) && qi < len(rqs); {
+		rp := fieldRange(rps[pi])
+		rq := fieldRange(rqs[qi])
+		if !(rp.End() < rq.Start() || rq.End() < rp.Start()) {
+			return errors.New("overlapping ranges: %v with %v", rp, rq)
+		}
+		if rp.Start() < rq.Start() {
+			pi++
+		} else {
+			qi++
+		}
+	}
+	return nil
+}
+
+type fieldRange [2]protoreflect.FieldNumber
+
+func (r fieldRange) Start() protoreflect.FieldNumber { return r[0] }     // inclusive
+func (r fieldRange) End() protoreflect.FieldNumber   { return r[1] - 1 } // inclusive
+func (r fieldRange) String() string {
+	if r.Start() == r.End() {
+		return fmt.Sprintf("%d", r.Start())
+	}
+	return fmt.Sprintf("%d to %d", r.Start(), r.End())
+}
 
 type FieldNumbers struct {
 	List []pref.FieldNumber
diff --git a/internal/filedesc/desc_test.go b/internal/filedesc/desc_test.go
index 5bea386..66ce518 100644
--- a/internal/filedesc/desc_test.go
+++ b/internal/filedesc/desc_test.go
@@ -39,22 +39,8 @@
 		MessageType: []*descriptorpb.DescriptorProto{{
 			Name: scalar.String("A"),
 			Options: &descriptorpb.MessageOptions{
-				MapEntry:   scalar.Bool(true),
 				Deprecated: scalar.Bool(true),
 			},
-			Field: []*descriptorpb.FieldDescriptorProto{{
-				Name:    scalar.String("key"),
-				Number:  scalar.Int32(1),
-				Options: &descriptorpb.FieldOptions{Deprecated: scalar.Bool(true)},
-				Label:   descriptorpb.FieldDescriptorProto_Label(pref.Optional).Enum(),
-				Type:    descriptorpb.FieldDescriptorProto_Type(pref.StringKind).Enum(),
-			}, {
-				Name:     scalar.String("value"),
-				Number:   scalar.Int32(2),
-				Label:    descriptorpb.FieldDescriptorProto_Label(pref.Optional).Enum(),
-				Type:     descriptorpb.FieldDescriptorProto_Type(pref.MessageKind).Enum(),
-				TypeName: scalar.String(".test.B"),
-			}},
 		}, {
 			Name: scalar.String("B"),
 			Field: []*descriptorpb.FieldDescriptorProto{{
@@ -86,7 +72,7 @@
 				Number:   scalar.Int32(4),
 				Label:    descriptorpb.FieldDescriptorProto_Label(pref.Repeated).Enum(),
 				Type:     descriptorpb.FieldDescriptorProto_Type(pref.MessageKind).Enum(),
-				TypeName: scalar.String(".test.A"),
+				TypeName: scalar.String(".test.B.FieldFourEntry"),
 			}, {
 				Name:    scalar.String("field_five"),
 				Number:  scalar.Int32(5),
@@ -119,6 +105,24 @@
 				{Start: scalar.Int32(1000), End: scalar.Int32(2000)},
 				{Start: scalar.Int32(3000), End: scalar.Int32(3001), Options: new(descriptorpb.ExtensionRangeOptions)},
 			},
+			NestedType: []*descriptorpb.DescriptorProto{{
+				Name: scalar.String("FieldFourEntry"),
+				Field: []*descriptorpb.FieldDescriptorProto{{
+					Name:   scalar.String("key"),
+					Number: scalar.Int32(1),
+					Label:  descriptorpb.FieldDescriptorProto_Label(pref.Optional).Enum(),
+					Type:   descriptorpb.FieldDescriptorProto_Type(pref.StringKind).Enum(),
+				}, {
+					Name:     scalar.String("value"),
+					Number:   scalar.Int32(2),
+					Label:    descriptorpb.FieldDescriptorProto_Label(pref.Optional).Enum(),
+					Type:     descriptorpb.FieldDescriptorProto_Type(pref.MessageKind).Enum(),
+					TypeName: scalar.String(".test.B"),
+				}},
+				Options: &descriptorpb.MessageOptions{
+					MapEntry: scalar.Bool(true),
+				},
+			}},
 		}, {
 			Name: scalar.String("C"),
 			NestedType: []*descriptorpb.DescriptorProto{{
@@ -168,9 +172,9 @@
 			Name:     scalar.String("X"),
 			Number:   scalar.Int32(1000),
 			Label:    descriptorpb.FieldDescriptorProto_Label(pref.Repeated).Enum(),
-			Type:     descriptorpb.FieldDescriptorProto_Type(pref.MessageKind).Enum(),
+			Type:     descriptorpb.FieldDescriptorProto_Type(pref.EnumKind).Enum(),
 			Options:  &descriptorpb.FieldOptions{Packed: scalar.Bool(true)},
-			TypeName: scalar.String(".test.C"),
+			TypeName: scalar.String(".test.E1"),
 			Extendee: scalar.String(".test.B"),
 		}},
 		Service: []*descriptorpb.ServiceDescriptorProto{{
@@ -239,57 +243,10 @@
 				"Name":          pref.Name("A"),
 				"FullName":      pref.FullName("test.A"),
 				"IsPlaceholder": false,
-				"IsMapEntry":    true,
+				"IsMapEntry":    false,
 				"Options": &descriptorpb.MessageOptions{
-					MapEntry:   scalar.Bool(true),
 					Deprecated: scalar.Bool(true),
 				},
-				"Fields": M{
-					"Len": 2,
-					"ByNumber:1": M{
-						"Parent":            M{"FullName": pref.FullName("test.A")},
-						"Index":             0,
-						"Name":              pref.Name("key"),
-						"FullName":          pref.FullName("test.A.key"),
-						"Number":            pref.FieldNumber(1),
-						"Cardinality":       pref.Optional,
-						"Kind":              pref.StringKind,
-						"Options":           &descriptorpb.FieldOptions{Deprecated: scalar.Bool(true)},
-						"HasJSONName":       false,
-						"JSONName":          "key",
-						"IsPacked":          false,
-						"IsList":            false,
-						"IsMap":             false,
-						"IsExtension":       false,
-						"IsWeak":            false,
-						"Default":           "",
-						"ContainingOneof":   nil,
-						"ContainingMessage": M{"FullName": pref.FullName("test.A")},
-						"Message":           nil,
-						"Enum":              nil,
-					},
-					"ByNumber:2": M{
-						"Parent":            M{"FullName": pref.FullName("test.A")},
-						"Index":             1,
-						"Name":              pref.Name("value"),
-						"FullName":          pref.FullName("test.A.value"),
-						"Number":            pref.FieldNumber(2),
-						"Cardinality":       pref.Optional,
-						"Kind":              pref.MessageKind,
-						"JSONName":          "value",
-						"IsPacked":          false,
-						"IsList":            false,
-						"IsMap":             false,
-						"IsExtension":       false,
-						"IsWeak":            false,
-						"Default":           nil,
-						"ContainingOneof":   nil,
-						"ContainingMessage": M{"FullName": pref.FullName("test.A")},
-						"Message":           M{"FullName": pref.FullName("test.B"), "IsPlaceholder": false},
-						"Enum":              nil,
-					},
-					"ByNumber:3": nil,
-				},
 				"Oneofs":          M{"Len": 0},
 				"RequiredNumbers": M{"Len": 0},
 				"ExtensionRanges": M{"Len": 0},
@@ -339,7 +296,7 @@
 						"MapKey":      M{"Kind": pref.StringKind},
 						"MapValue":    M{"Kind": pref.MessageKind, "Message": M{"FullName": pref.FullName("test.B")}},
 						"Default":     nil,
-						"Message":     M{"FullName": pref.FullName("test.A"), "IsPlaceholder": false},
+						"Message":     M{"FullName": pref.FullName("test.B.FieldFourEntry"), "IsPlaceholder": false},
 					},
 					"ByNumber:5": M{
 						"Cardinality": pref.Repeated,
@@ -417,6 +374,56 @@
 				},
 				"ExtensionRangeOptions:0": (*descriptorpb.ExtensionRangeOptions)(nil),
 				"ExtensionRangeOptions:1": new(descriptorpb.ExtensionRangeOptions),
+				"Messages": M{
+					"Get:0": M{
+						"Fields": M{
+							"Len": 2,
+							"ByNumber:1": M{
+								"Parent":            M{"FullName": pref.FullName("test.B.FieldFourEntry")},
+								"Index":             0,
+								"Name":              pref.Name("key"),
+								"FullName":          pref.FullName("test.B.FieldFourEntry.key"),
+								"Number":            pref.FieldNumber(1),
+								"Cardinality":       pref.Optional,
+								"Kind":              pref.StringKind,
+								"Options":           (*descriptorpb.FieldOptions)(nil),
+								"HasJSONName":       false,
+								"JSONName":          "key",
+								"IsPacked":          false,
+								"IsList":            false,
+								"IsMap":             false,
+								"IsExtension":       false,
+								"IsWeak":            false,
+								"Default":           "",
+								"ContainingOneof":   nil,
+								"ContainingMessage": M{"FullName": pref.FullName("test.B.FieldFourEntry")},
+								"Message":           nil,
+								"Enum":              nil,
+							},
+							"ByNumber:2": M{
+								"Parent":            M{"FullName": pref.FullName("test.B.FieldFourEntry")},
+								"Index":             1,
+								"Name":              pref.Name("value"),
+								"FullName":          pref.FullName("test.B.FieldFourEntry.value"),
+								"Number":            pref.FieldNumber(2),
+								"Cardinality":       pref.Optional,
+								"Kind":              pref.MessageKind,
+								"JSONName":          "value",
+								"IsPacked":          false,
+								"IsList":            false,
+								"IsMap":             false,
+								"IsExtension":       false,
+								"IsWeak":            false,
+								"Default":           nil,
+								"ContainingOneof":   nil,
+								"ContainingMessage": M{"FullName": pref.FullName("test.B.FieldFourEntry")},
+								"Message":           M{"FullName": pref.FullName("test.B"), "IsPlaceholder": false},
+								"Enum":              nil,
+							},
+							"ByNumber:3": nil,
+						},
+					},
+				},
 			},
 			"Get:2": M{
 				"Name":  pref.Name("C"),
@@ -475,7 +482,7 @@
 				"Name":              pref.Name("X"),
 				"Number":            pref.FieldNumber(1000),
 				"Cardinality":       pref.Repeated,
-				"Kind":              pref.MessageKind,
+				"Kind":              pref.EnumKind,
 				"IsExtension":       true,
 				"IsPacked":          true,
 				"IsList":            true,
@@ -484,7 +491,7 @@
 				"MapValue":          nil,
 				"ContainingOneof":   nil,
 				"ContainingMessage": M{"FullName": pref.FullName("test.B"), "IsPlaceholder": false},
-				"Message":           M{"FullName": pref.FullName("test.C"), "IsPlaceholder": false},
+				"Enum":              M{"FullName": pref.FullName("test.E1"), "IsPlaceholder": false},
 				"Options":           &descriptorpb.FieldOptions{Packed: scalar.Bool(true)},
 			},
 		},
@@ -598,22 +605,7 @@
 	Path:    "path/to/file.proto"
 	Package: test
 	Messages: [{
-		Name:       A
-		IsMapEntry: true
-		Fields: [{
-			Name:        key
-			Number:      1
-			Cardinality: optional
-			Kind:        string
-			JSONName:    "key"
-		}, {
-			Name:        value
-			Number:      2
-			Cardinality: optional
-			Kind:        message
-			JSONName:    "value"
-			Message:     test.B
-		}]
+		Name: A
 	}, {
 		Name: B
 		Fields: [{
@@ -680,6 +672,24 @@
 		ReservedRanges:  [100:200, 300]
 		RequiredNumbers: [6]
 		ExtensionRanges: [1000:2000, 3000]
+		Messages: [{
+			Name:       FieldFourEntry
+			IsMapEntry: true
+			Fields: [{
+				Name:        key
+				Number:      1
+				Cardinality: optional
+				Kind:        string
+				JSONName:    "key"
+			}, {
+				Name:        value
+				Number:      2
+				Cardinality: optional
+				Kind:        message
+				JSONName:    "value"
+				Message:     test.B
+			}]
+		}]
 	}, {
 		Name: C
 		Messages: [{
@@ -727,13 +737,13 @@
 		Name:        X
 		Number:      1000
 		Cardinality: repeated
-		Kind:        message
+		Kind:        enum
 		JSONName:    "X"
 		IsPacked:    true
 		IsExtension: true
 		IsList:      true
 		Extendee:    test.B
-		Message:     test.C
+		Enum:        test.E1
 	}]
 	Services: [{
 		Name: S