all: support enforce_utf8 override

In 2014, when proto3 was being developed, there were a number of early
adopters of the new syntax. Before the finalization of proto3 when
it was released in open-source in July 2016, a decision was made to
strictly validate strings in proto3. However, some of the early adopters
were already using invalid UTF-8 with string fields.
The google.protobuf.FieldOptions.enforce_utf8 option only exists to support
those grandfathered users where they can opt-out of the validation logic.
Practical use of that option in open source is impossible even if a user
specifies the proto1_legacy build tag since it requires a hacked
variant of descriptor.proto that is not externally available.

This CL supports enforce_utf8 by modifiyng internal/filedesc to
expose the flag if it detects it in the raw descriptor.
We add an strs.EnforceUTF8 function as a centralized place to determine
whether to perform validation. Validation opt-out is supported
only in builds with legacy support.

We implement support for validating UTF-8 in all proto3 string fields,
even if they are backed by a Go []byte.

Change-Id: I9c0628b84909bc7181125f09db730c80d490e485
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/186002
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/internal/impl/codec_tables.go b/internal/impl/codec_tables.go
index 564187e..3ff4260 100644
--- a/internal/impl/codec_tables.go
+++ b/internal/impl/codec_tables.go
@@ -9,6 +9,7 @@
 	"reflect"
 
 	"google.golang.org/protobuf/internal/encoding/wire"
+	"google.golang.org/protobuf/internal/strs"
 	pref "google.golang.org/protobuf/reflect/protoreflect"
 )
 
@@ -98,12 +99,15 @@
 				return coderDoubleSlice
 			}
 		case pref.StringKind:
-			if ft.Kind() == reflect.String && fd.Syntax() == pref.Proto3 {
+			if ft.Kind() == reflect.String && strs.EnforceUTF8(fd) {
 				return coderStringSliceValidateUTF8
 			}
 			if ft.Kind() == reflect.String {
 				return coderStringSlice
 			}
+			if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 && strs.EnforceUTF8(fd) {
+				return coderBytesSliceValidateUTF8
+			}
 			if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 {
 				return coderBytesSlice
 			}
@@ -251,9 +255,15 @@
 				return coderDoubleNoZero
 			}
 		case pref.StringKind:
-			if ft.Kind() == reflect.String {
+			if ft.Kind() == reflect.String && strs.EnforceUTF8(fd) {
 				return coderStringNoZeroValidateUTF8
 			}
+			if ft.Kind() == reflect.String {
+				return coderStringNoZero
+			}
+			if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 && strs.EnforceUTF8(fd) {
+				return coderBytesNoZeroValidateUTF8
+			}
 			if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 {
 				return coderBytesNoZero
 			}
@@ -392,12 +402,15 @@
 				return coderDouble
 			}
 		case pref.StringKind:
-			if fd.Syntax() == pref.Proto3 && ft.Kind() == reflect.String {
+			if ft.Kind() == reflect.String && strs.EnforceUTF8(fd) {
 				return coderStringValidateUTF8
 			}
 			if ft.Kind() == reflect.String {
 				return coderString
 			}
+			if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 && strs.EnforceUTF8(fd) {
+				return coderBytesValidateUTF8
+			}
 			if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 {
 				return coderBytes
 			}
@@ -620,12 +633,15 @@
 				return coderDoubleIface
 			}
 		case pref.StringKind:
-			if fd.Syntax() == pref.Proto3 && ft.Kind() == reflect.String {
+			if ft.Kind() == reflect.String && strs.EnforceUTF8(fd) {
 				return coderStringIfaceValidateUTF8
 			}
 			if ft.Kind() == reflect.String {
 				return coderStringIface
 			}
+			if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 && strs.EnforceUTF8(fd) {
+				return coderBytesIfaceValidateUTF8
+			}
 			if ft.Kind() == reflect.Slice && ft.Elem().Kind() == reflect.Uint8 {
 				return coderBytesIface
 			}