blob: 9d22dd76277b5933e6e1926c4b7bc0b5ebbb2750 [file] [log] [blame]
Damien Neil302cb322019-06-19 15:22:13 -07001// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style.
3// license that can be found in the LICENSE file.
4
5// Package messageset encodes and decodes the obsolete MessageSet wire format.
6package messageset
7
8import (
9 "google.golang.org/protobuf/internal/encoding/wire"
10 "google.golang.org/protobuf/internal/errors"
11 pref "google.golang.org/protobuf/reflect/protoreflect"
Joe Tsai5ae10aa2019-07-11 18:23:08 -070012 preg "google.golang.org/protobuf/reflect/protoregistry"
Damien Neil302cb322019-06-19 15:22:13 -070013)
14
15// The MessageSet wire format is equivalent to a message defiend as follows,
16// where each Item defines an extension field with a field number of 'type_id'
17// and content of 'message'. MessageSet extensions must be non-repeated message
18// fields.
19//
20// message MessageSet {
21// repeated group Item = 1 {
22// required int32 type_id = 2;
23// required string message = 3;
24// }
25// }
26const (
27 FieldItem = wire.Number(1)
28 FieldTypeID = wire.Number(2)
29 FieldMessage = wire.Number(3)
30)
31
Joe Tsai5ae10aa2019-07-11 18:23:08 -070032// ExtensionName is the field name for extensions of MessageSet.
33//
34// A valid MessageSet extension must be of the form:
35// message MyMessage {
36// extend proto2.bridge.MessageSet {
37// optional MyMessage message_set_extension = 1234;
38// }
39// ...
40// }
41const ExtensionName = "message_set_extension"
42
Damien Neil302cb322019-06-19 15:22:13 -070043// IsMessageSet returns whether the message uses the MessageSet wire format.
44func IsMessageSet(md pref.MessageDescriptor) bool {
45 xmd, ok := md.(interface{ IsMessageSet() bool })
46 return ok && xmd.IsMessageSet()
47}
48
Joe Tsai5ae10aa2019-07-11 18:23:08 -070049// IsMessageSetExtension reports this field extends a MessageSet.
50func IsMessageSetExtension(fd pref.FieldDescriptor) bool {
51 if fd.Name() != ExtensionName {
52 return false
53 }
54 if fd.FullName().Parent() != fd.Message().FullName() {
55 return false
56 }
57 return IsMessageSet(fd.ContainingMessage())
58}
59
60// FindMessageSetExtension locates a MessageSet extension field by name.
61// In text and JSON formats, the extension name used is the message itself.
62// The extension field name is derived by appending ExtensionName.
63func FindMessageSetExtension(r preg.ExtensionTypeResolver, s pref.FullName) (pref.ExtensionType, error) {
Damien Neil01b51b42020-01-17 13:40:51 -080064 name := s.Append(ExtensionName)
65 xt, err := r.FindExtensionByName(name)
Joe Tsai5ae10aa2019-07-11 18:23:08 -070066 if err != nil {
Damien Neil01b51b42020-01-17 13:40:51 -080067 if err == preg.NotFound {
68 return nil, err
69 }
70 return nil, errors.Wrap(err, "%q", name)
Joe Tsai5ae10aa2019-07-11 18:23:08 -070071 }
Damien Neil79bfdbe2019-08-28 11:08:22 -070072 if !IsMessageSetExtension(xt.TypeDescriptor()) {
Joe Tsai5ae10aa2019-07-11 18:23:08 -070073 return nil, preg.NotFound
74 }
75 return xt, nil
76}
77
Damien Neil302cb322019-06-19 15:22:13 -070078// SizeField returns the size of a MessageSet item field containing an extension
79// with the given field number, not counting the contents of the message subfield.
80func SizeField(num wire.Number) int {
81 return 2*wire.SizeTag(FieldItem) + wire.SizeTag(FieldTypeID) + wire.SizeVarint(uint64(num))
82}
83
Damien Neilce3384c2019-11-06 13:18:28 -080084// Unmarshal parses a MessageSet.
85//
86// It calls fn with the type ID and value of each item in the MessageSet.
87// Unknown fields are discarded.
88//
89// If wantLen is true, the item values include the varint length prefix.
90// This is ugly, but simplifies the fast-path decoder in internal/impl.
91func Unmarshal(b []byte, wantLen bool, fn func(typeID wire.Number, value []byte) error) error {
92 for len(b) > 0 {
93 num, wtyp, n := wire.ConsumeTag(b)
94 if n < 0 {
95 return wire.ParseError(n)
96 }
97 b = b[n:]
98 if num != FieldItem || wtyp != wire.StartGroupType {
99 n := wire.ConsumeFieldValue(num, wtyp, b)
100 if n < 0 {
101 return wire.ParseError(n)
102 }
103 b = b[n:]
104 continue
105 }
Damien Neil9afe9bb2020-02-07 10:06:53 -0800106 typeID, value, n, err := ConsumeFieldValue(b, wantLen)
Damien Neilce3384c2019-11-06 13:18:28 -0800107 if err != nil {
108 return err
109 }
110 b = b[n:]
111 if typeID == 0 {
112 continue
113 }
114 if err := fn(typeID, value); err != nil {
115 return err
116 }
Damien Neil302cb322019-06-19 15:22:13 -0700117 }
Damien Neilce3384c2019-11-06 13:18:28 -0800118 return nil
Damien Neil302cb322019-06-19 15:22:13 -0700119}
120
Damien Neil9afe9bb2020-02-07 10:06:53 -0800121// ConsumeFieldValue parses b as a MessageSet item field value until and including
Damien Neil302cb322019-06-19 15:22:13 -0700122// the trailing end group marker. It assumes the start group tag has already been parsed.
123// It returns the contents of the type_id and message subfields and the total
124// item length.
125//
126// If wantLen is true, the returned message value includes the length prefix.
Damien Neil9afe9bb2020-02-07 10:06:53 -0800127func ConsumeFieldValue(b []byte, wantLen bool) (typeid wire.Number, message []byte, n int, err error) {
Damien Neil302cb322019-06-19 15:22:13 -0700128 ilen := len(b)
129 for {
130 num, wtyp, n := wire.ConsumeTag(b)
131 if n < 0 {
132 return 0, nil, 0, wire.ParseError(n)
133 }
134 b = b[n:]
135 switch {
136 case num == FieldItem && wtyp == wire.EndGroupType:
137 if wantLen && len(message) == 0 {
138 // The message field was missing, which should never happen.
139 // Be prepared for this case anyway.
140 message = wire.AppendVarint(message, 0)
141 }
142 return typeid, message, ilen - len(b), nil
143 case num == FieldTypeID && wtyp == wire.VarintType:
144 v, n := wire.ConsumeVarint(b)
145 if n < 0 {
146 return 0, nil, 0, wire.ParseError(n)
147 }
148 b = b[n:]
149 typeid = wire.Number(v)
150 case num == FieldMessage && wtyp == wire.BytesType:
151 m, n := wire.ConsumeBytes(b)
152 if n < 0 {
153 return 0, nil, 0, wire.ParseError(n)
154 }
155 if message == nil {
156 if wantLen {
157 message = b[:n]
158 } else {
159 message = m
160 }
161 } else {
162 // This case should never happen in practice, but handle it for
163 // correctness: The MessageSet item contains multiple message
164 // fields, which need to be merged.
165 //
166 // In the case where we're returning the length, this becomes
167 // quite inefficient since we need to strip the length off
168 // the existing data and reconstruct it with the combined length.
169 if wantLen {
170 _, nn := wire.ConsumeVarint(message)
171 m0 := message[nn:]
172 message = message[:0]
173 message = wire.AppendVarint(message, uint64(len(m0)+len(m)))
174 message = append(message, m0...)
175 message = append(message, m...)
176 } else {
177 message = append(message, m...)
178 }
179 }
180 b = b[n:]
181 }
182 }
183}
184
185// AppendFieldStart appends the start of a MessageSet item field containing
186// an extension with the given number. The caller must add the message
187// subfield (including the tag).
188func AppendFieldStart(b []byte, num wire.Number) []byte {
189 b = wire.AppendTag(b, FieldItem, wire.StartGroupType)
190 b = wire.AppendTag(b, FieldTypeID, wire.VarintType)
191 b = wire.AppendVarint(b, uint64(num))
192 return b
193}
194
195// AppendFieldEnd appends the trailing end group marker for a MessageSet item field.
196func AppendFieldEnd(b []byte) []byte {
197 return wire.AppendTag(b, FieldItem, wire.EndGroupType)
198}
Damien Neilce3384c2019-11-06 13:18:28 -0800199
200// SizeUnknown returns the size of an unknown fields section in MessageSet format.
201//
202// See AppendUnknown.
203func SizeUnknown(unknown []byte) (size int) {
204 for len(unknown) > 0 {
205 num, typ, n := wire.ConsumeTag(unknown)
206 if n < 0 || typ != wire.BytesType {
207 return 0
208 }
209 unknown = unknown[n:]
210 _, n = wire.ConsumeBytes(unknown)
211 if n < 0 {
212 return 0
213 }
214 unknown = unknown[n:]
215 size += SizeField(num) + wire.SizeTag(FieldMessage) + n
216 }
217 return size
218}
219
220// AppendUnknown appends unknown fields to b in MessageSet format.
221//
222// For historic reasons, unresolved items in a MessageSet are stored in a
223// message's unknown fields section in non-MessageSet format. That is, an
224// unknown item with typeID T and value V appears in the unknown fields as
225// a field with number T and value V.
226//
227// This function converts the unknown fields back into MessageSet form.
228func AppendUnknown(b, unknown []byte) ([]byte, error) {
229 for len(unknown) > 0 {
230 num, typ, n := wire.ConsumeTag(unknown)
231 if n < 0 || typ != wire.BytesType {
232 return nil, errors.New("invalid data in message set unknown fields")
233 }
234 unknown = unknown[n:]
235 _, n = wire.ConsumeBytes(unknown)
236 if n < 0 {
237 return nil, errors.New("invalid data in message set unknown fields")
238 }
239 b = AppendFieldStart(b, num)
240 b = wire.AppendTag(b, FieldMessage, wire.BytesType)
241 b = append(b, unknown[:n]...)
242 b = AppendFieldEnd(b)
243 unknown = unknown[n:]
244 }
245 return b, nil
246}