all: support enforce_utf8 override
In 2014, when proto3 was being developed, there were a number of early
adopters of the new syntax. Before the finalization of proto3 when
it was released in open-source in July 2016, a decision was made to
strictly validate strings in proto3. However, some of the early adopters
were already using invalid UTF-8 with string fields.
The google.protobuf.FieldOptions.enforce_utf8 option only exists to support
those grandfathered users where they can opt-out of the validation logic.
Practical use of that option in open source is impossible even if a user
specifies the proto1_legacy build tag since it requires a hacked
variant of descriptor.proto that is not externally available.
This CL supports enforce_utf8 by modifiyng internal/filedesc to
expose the flag if it detects it in the raw descriptor.
We add an strs.EnforceUTF8 function as a centralized place to determine
whether to perform validation. Validation opt-out is supported
only in builds with legacy support.
We implement support for validating UTF-8 in all proto3 string fields,
even if they are backed by a Go []byte.
Change-Id: I9c0628b84909bc7181125f09db730c80d490e485
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/186002
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/internal/impl/codec_tables.go b/internal/impl/codec_tables.go
index 564187e..3ff4260 100644
--- a/internal/impl/codec_tables.go
+++ b/internal/impl/codec_tables.go
@@ -9,6 +9,7 @@
"reflect"
"google.golang.org/protobuf/internal/encoding/wire"
+ "google.golang.org/protobuf/internal/strs"
pref "google.golang.org/protobuf/reflect/protoreflect"
)
@@ -98,12 +99,15 @@
return coderDoubleSlice
}
case pref.StringKind:
- if ft.Kind() == reflect.String && fd.Syntax() == pref.Proto3 {
+ if ft.Kind() == reflect.String && strs.EnforceUTF8(fd) {
return coderStringSliceValidateUTF8
}
if ft.Kind() == reflect.String {
return coderStringSlice
}
+ if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 && strs.EnforceUTF8(fd) {
+ return coderBytesSliceValidateUTF8
+ }
if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 {
return coderBytesSlice
}
@@ -251,9 +255,15 @@
return coderDoubleNoZero
}
case pref.StringKind:
- if ft.Kind() == reflect.String {
+ if ft.Kind() == reflect.String && strs.EnforceUTF8(fd) {
return coderStringNoZeroValidateUTF8
}
+ if ft.Kind() == reflect.String {
+ return coderStringNoZero
+ }
+ if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 && strs.EnforceUTF8(fd) {
+ return coderBytesNoZeroValidateUTF8
+ }
if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 {
return coderBytesNoZero
}
@@ -392,12 +402,15 @@
return coderDouble
}
case pref.StringKind:
- if fd.Syntax() == pref.Proto3 && ft.Kind() == reflect.String {
+ if ft.Kind() == reflect.String && strs.EnforceUTF8(fd) {
return coderStringValidateUTF8
}
if ft.Kind() == reflect.String {
return coderString
}
+ if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 && strs.EnforceUTF8(fd) {
+ return coderBytesValidateUTF8
+ }
if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 {
return coderBytes
}
@@ -620,12 +633,15 @@
return coderDoubleIface
}
case pref.StringKind:
- if fd.Syntax() == pref.Proto3 && ft.Kind() == reflect.String {
+ if ft.Kind() == reflect.String && strs.EnforceUTF8(fd) {
return coderStringIfaceValidateUTF8
}
if ft.Kind() == reflect.String {
return coderStringIface
}
+ if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 && strs.EnforceUTF8(fd) {
+ return coderBytesIfaceValidateUTF8
+ }
if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 {
return coderBytesIface
}