all: support enforce_utf8 override
In 2014, when proto3 was being developed, there were a number of early
adopters of the new syntax. Before the finalization of proto3 when
it was released in open-source in July 2016, a decision was made to
strictly validate strings in proto3. However, some of the early adopters
were already using invalid UTF-8 with string fields.
The google.protobuf.FieldOptions.enforce_utf8 option only exists to support
those grandfathered users where they can opt-out of the validation logic.
Practical use of that option in open source is impossible even if a user
specifies the proto1_legacy build tag since it requires a hacked
variant of descriptor.proto that is not externally available.
This CL supports enforce_utf8 by modifiyng internal/filedesc to
expose the flag if it detects it in the raw descriptor.
We add an strs.EnforceUTF8 function as a centralized place to determine
whether to perform validation. Validation opt-out is supported
only in builds with legacy support.
We implement support for validating UTF-8 in all proto3 string fields,
even if they are backed by a Go []byte.
Change-Id: I9c0628b84909bc7181125f09db730c80d490e485
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/186002
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/proto/decode_test.go b/proto/decode_test.go
index 5fa3a0f..ce2e1af 100644
--- a/proto/decode_test.go
+++ b/proto/decode_test.go
@@ -12,13 +12,20 @@
protoV1 "github.com/golang/protobuf/proto"
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/internal/encoding/pack"
+ "google.golang.org/protobuf/internal/filedesc"
+ "google.golang.org/protobuf/internal/flags"
"google.golang.org/protobuf/proto"
+ "google.golang.org/protobuf/reflect/protodesc"
+ "google.golang.org/protobuf/reflect/protoreflect"
pref "google.golang.org/protobuf/reflect/protoreflect"
+ "google.golang.org/protobuf/reflect/prototype"
+ "google.golang.org/protobuf/runtime/protoimpl"
legacypb "google.golang.org/protobuf/internal/testprotos/legacy"
legacy1pb "google.golang.org/protobuf/internal/testprotos/legacy/proto2.v0.0.0-20160225-2fc053c5"
testpb "google.golang.org/protobuf/internal/testprotos/test"
test3pb "google.golang.org/protobuf/internal/testprotos/test3"
+ "google.golang.org/protobuf/types/descriptorpb"
)
type testProto struct {
@@ -85,6 +92,23 @@
}
}
+func TestDecodeNoEnforceUTF8(t *testing.T) {
+ for _, test := range noEnforceUTF8TestProtos {
+ for _, want := range test.decodeTo {
+ t.Run(fmt.Sprintf("%s (%T)", test.desc, want), func(t *testing.T) {
+ got := reflect.New(reflect.TypeOf(want).Elem()).Interface().(proto.Message)
+ err := proto.Unmarshal(test.wire, got)
+ switch {
+ case flags.Proto1Legacy && err != nil:
+ t.Errorf("Unmarshal returned unexpected error: %v\nMessage:\n%v", err, marshalText(want))
+ case !flags.Proto1Legacy && err == nil:
+ t.Errorf("Unmarshal did not return expected error for invalid UTF8: %v\nMessage:\n%v", err, marshalText(want))
+ }
+ })
+ }
+ }
+}
+
var testProtos = []testProto{
{
desc: "basic scalar types",
@@ -1442,6 +1466,129 @@
},
}
+var noEnforceUTF8TestProtos = []testProto{
+ {
+ desc: "invalid UTF-8 in optional string field",
+ decodeTo: []proto.Message{&TestNoEnforceUTF8{
+ OptionalString: string("abc\xff"),
+ }},
+ wire: pack.Message{
+ pack.Tag{1, pack.BytesType}, pack.String("abc\xff"),
+ }.Marshal(),
+ },
+ {
+ desc: "invalid UTF-8 in optional string field of Go bytes",
+ decodeTo: []proto.Message{&TestNoEnforceUTF8{
+ OptionalBytes: []byte("abc\xff"),
+ }},
+ wire: pack.Message{
+ pack.Tag{2, pack.BytesType}, pack.String("abc\xff"),
+ }.Marshal(),
+ },
+ {
+ desc: "invalid UTF-8 in repeated string field",
+ decodeTo: []proto.Message{&TestNoEnforceUTF8{
+ RepeatedString: []string{string("foo"), string("abc\xff")},
+ }},
+ wire: pack.Message{
+ pack.Tag{3, pack.BytesType}, pack.String("foo"),
+ pack.Tag{3, pack.BytesType}, pack.String("abc\xff"),
+ }.Marshal(),
+ },
+ {
+ desc: "invalid UTF-8 in repeated string field of Go bytes",
+ decodeTo: []proto.Message{&TestNoEnforceUTF8{
+ RepeatedBytes: [][]byte{[]byte("foo"), []byte("abc\xff")},
+ }},
+ wire: pack.Message{
+ pack.Tag{4, pack.BytesType}, pack.String("foo"),
+ pack.Tag{4, pack.BytesType}, pack.String("abc\xff"),
+ }.Marshal(),
+ },
+ {
+ desc: "invalid UTF-8 in oneof string field",
+ decodeTo: []proto.Message{
+ &TestNoEnforceUTF8{OneofField: &TestNoEnforceUTF8_OneofString{string("abc\xff")}},
+ },
+ wire: pack.Message{pack.Tag{5, pack.BytesType}, pack.String("abc\xff")}.Marshal(),
+ },
+ {
+ desc: "invalid UTF-8 in oneof string field of Go bytes",
+ decodeTo: []proto.Message{
+ &TestNoEnforceUTF8{OneofField: &TestNoEnforceUTF8_OneofBytes{[]byte("abc\xff")}},
+ },
+ wire: pack.Message{pack.Tag{6, pack.BytesType}, pack.String("abc\xff")}.Marshal(),
+ },
+}
+
+type TestNoEnforceUTF8 struct {
+ OptionalString string `protobuf:"bytes,1,opt,name=optional_string"`
+ OptionalBytes []byte `protobuf:"bytes,2,opt,name=optional_bytes"`
+ RepeatedString []string `protobuf:"bytes,3,rep,name=repeated_string"`
+ RepeatedBytes [][]byte `protobuf:"bytes,4,rep,name=repeated_bytes"`
+ OneofField isOneofField `protobuf_oneof:"oneof_field"`
+}
+
+type isOneofField interface{ isOneofField() }
+
+type TestNoEnforceUTF8_OneofString struct {
+ OneofString string `protobuf:"bytes,5,opt,name=oneof_string,oneof"`
+}
+type TestNoEnforceUTF8_OneofBytes struct {
+ OneofBytes []byte `protobuf:"bytes,6,opt,name=oneof_bytes,oneof"`
+}
+
+func (*TestNoEnforceUTF8_OneofString) isOneofField() {}
+func (*TestNoEnforceUTF8_OneofBytes) isOneofField() {}
+
+func (m *TestNoEnforceUTF8) ProtoReflect() pref.Message {
+ return messageInfo_TestNoEnforceUTF8.MessageOf(m)
+}
+
+var messageInfo_TestNoEnforceUTF8 = protoimpl.MessageInfo{
+ GoType: reflect.TypeOf((*TestNoEnforceUTF8)(nil)),
+ PBType: &prototype.Message{
+ MessageDescriptor: func() protoreflect.MessageDescriptor {
+ pb := new(descriptorpb.FileDescriptorProto)
+ if err := prototext.Unmarshal([]byte(`
+ syntax: "proto3"
+ name: "test.proto"
+ message_type: [{
+ name: "TestNoEnforceUTF8"
+ field: [
+ {name:"optional_string" number:1 label:LABEL_OPTIONAL type:TYPE_STRING},
+ {name:"optional_bytes" number:2 label:LABEL_OPTIONAL type:TYPE_STRING},
+ {name:"repeated_string" number:3 label:LABEL_REPEATED type:TYPE_STRING},
+ {name:"repeated_bytes" number:4 label:LABEL_REPEATED type:TYPE_STRING},
+ {name:"oneof_string" number:5 label:LABEL_OPTIONAL type:TYPE_STRING, oneof_index:0},
+ {name:"oneof_bytes" number:6 label:LABEL_OPTIONAL type:TYPE_STRING, oneof_index:0}
+ ]
+ oneof_decl: [{name:"oneof_field"}]
+ }]
+ `), pb); err != nil {
+ panic(err)
+ }
+ fd, err := protodesc.NewFile(pb, nil)
+ if err != nil {
+ panic(err)
+ }
+ md := fd.Messages().Get(0)
+ for i := 0; i < md.Fields().Len(); i++ {
+ md.Fields().Get(i).(*filedesc.Field).L1.HasEnforceUTF8 = true
+ md.Fields().Get(i).(*filedesc.Field).L1.EnforceUTF8 = false
+ }
+ return md
+ }(),
+ NewMessage: func() pref.Message {
+ return pref.ProtoMessage(new(TestNoEnforceUTF8)).ProtoReflect()
+ },
+ },
+ OneofWrappers: []interface{}{
+ (*TestNoEnforceUTF8_OneofString)(nil),
+ (*TestNoEnforceUTF8_OneofBytes)(nil),
+ },
+}
+
func build(m proto.Message, opts ...buildOpt) proto.Message {
for _, opt := range opts {
opt(m)