proto, internal/impl: store unknown MessageSet items in non-mset format

In the v1 implementation, unknown MessageSet items are stored in a
message's unknown fields section in non-MessageSet format. For example,
consider a MessageSet containing an item with type_id T and value V.
If the type_id is not resolvable, the item will be placed in the unknown
fields as a bytes-valued field with number T and contents V. This
conversion is then reversed when marshaling a MessageSet containing
unknown fields.

Preserve this behavior in v2.

One consequence of this change is that actual unknown fields in a
MessageSet (any field other than 1) are now discarded. This matches
the previous behavior.

Change-Id: I3d913613f84e0ae82481078dbc91cb25628651cc
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/205697
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/proto/messageset.go b/proto/messageset.go
index 0d88097..e27e0b7 100644
--- a/proto/messageset.go
+++ b/proto/messageset.go
@@ -20,7 +20,7 @@
 		size += wire.SizeBytes(sizeMessage(v.Message()))
 		return true
 	})
-	size += len(m.GetUnknown())
+	size += messageset.SizeUnknown(m.GetUnknown())
 	return size
 }
 
@@ -36,8 +36,7 @@
 	if err != nil {
 		return b, err
 	}
-	b = append(b, m.GetUnknown()...)
-	return b, nil
+	return messageset.AppendUnknown(b, m.GetUnknown())
 }
 
 func marshalMessageSetField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value, o MarshalOptions) ([]byte, error) {
@@ -56,48 +55,34 @@
 	if !flags.ProtoLegacy {
 		return errors.New("no support for message_set_wire_format")
 	}
-	md := m.Descriptor()
-	for len(b) > 0 {
-		err := func() error {
-			num, v, n, err := messageset.ConsumeField(b)
-			if err != nil {
-				// Not a message set field.
-				//
-				// Return errUnknown to try to add this to the unknown fields.
-				// If the field is completely unparsable, we'll catch it
-				// when trying to skip the field.
-				return errUnknown
-			}
-			if !md.ExtensionRanges().Has(num) {
-				return errUnknown
-			}
-			xt, err := o.Resolver.FindExtensionByNumber(md.FullName(), num)
-			if err == protoregistry.NotFound {
-				return errUnknown
-			}
-			if err != nil {
-				return err
-			}
-			xd := xt.TypeDescriptor()
-			if err := o.unmarshalMessage(v, m.Mutable(xd).Message()); err != nil {
-				// Contents cannot be unmarshaled.
-				return err
-			}
-			b = b[n:]
-			return nil
-		}()
+	return messageset.Unmarshal(b, false, func(num wire.Number, v []byte) error {
+		err := unmarshalMessageSetField(m, num, v, o)
 		if err == errUnknown {
-			_, _, n := wire.ConsumeField(b)
-			if n < 0 {
-				return wire.ParseError(n)
-			}
-			m.SetUnknown(append(m.GetUnknown(), b[:n]...))
-			b = b[n:]
-			continue
+			unknown := m.GetUnknown()
+			unknown = wire.AppendTag(unknown, num, wire.BytesType)
+			unknown = wire.AppendBytes(unknown, v)
+			m.SetUnknown(unknown)
+			return nil
 		}
-		if err != nil {
-			return err
-		}
+		return err
+	})
+}
+
+func unmarshalMessageSetField(m protoreflect.Message, num wire.Number, v []byte, o UnmarshalOptions) error {
+	md := m.Descriptor()
+	if !md.ExtensionRanges().Has(num) {
+		return errUnknown
+	}
+	xt, err := o.Resolver.FindExtensionByNumber(md.FullName(), num)
+	if err == protoregistry.NotFound {
+		return errUnknown
+	}
+	if err != nil {
+		return err
+	}
+	xd := xt.TypeDescriptor()
+	if err := o.unmarshalMessage(v, m.Mutable(xd).Message()); err != nil {
+		return err
 	}
 	return nil
 }