blob: b2a69b667cecad1bacd3259f08144d3e8cf32ec2 [file] [log] [blame]
Chris Fallin91473dc2014-12-12 15:58:26 -08001// Amalgamated source file
2#include "upb.h"
Chris Fallin91473dc2014-12-12 15:58:26 -08003
4
Josh Haberman78da6662016-01-13 19:05:43 -08005#include <ctype.h>
Chris Fallin91473dc2014-12-12 15:58:26 -08006#include <stdlib.h>
7#include <string.h>
8
9typedef struct {
10 size_t len;
Josh Habermane8ed0212015-06-08 17:56:03 -070011 char str[1]; /* Null-terminated string data follows. */
Chris Fallin91473dc2014-12-12 15:58:26 -080012} str_t;
13
14static str_t *newstr(const char *data, size_t len) {
15 str_t *ret = malloc(sizeof(*ret) + len);
16 if (!ret) return NULL;
17 ret->len = len;
18 memcpy(ret->str, data, len);
19 ret->str[len] = '\0';
20 return ret;
21}
22
23static void freestr(str_t *s) { free(s); }
24
Josh Habermane8ed0212015-06-08 17:56:03 -070025/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
Chris Fallin91473dc2014-12-12 15:58:26 -080026static bool upb_isbetween(char c, char low, char high) {
27 return c >= low && c <= high;
28}
29
30static bool upb_isletter(char c) {
31 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
32}
33
34static bool upb_isalphanum(char c) {
35 return upb_isletter(c) || upb_isbetween(c, '0', '9');
36}
37
38static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
39 bool start = true;
Josh Habermane8ed0212015-06-08 17:56:03 -070040 size_t i;
41 for (i = 0; i < len; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -080042 char c = str[i];
43 if (c == '.') {
44 if (start || !full) {
45 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
46 return false;
47 }
48 start = true;
49 } else if (start) {
50 if (!upb_isletter(c)) {
51 upb_status_seterrf(
52 s, "invalid name: path components must start with a letter (%s)",
53 str);
54 return false;
55 }
56 start = false;
57 } else {
58 if (!upb_isalphanum(c)) {
59 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
60 str);
61 return false;
62 }
63 }
64 }
65 return !start;
66}
67
68
69/* upb_def ********************************************************************/
70
71upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
72
73const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
74
75bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
76 assert(!upb_def_isfrozen(def));
77 if (!upb_isident(fullname, strlen(fullname), true, s)) return false;
78 free((void*)def->fullname);
79 def->fullname = upb_strdup(fullname);
80 return true;
81}
82
83upb_def *upb_def_dup(const upb_def *def, const void *o) {
84 switch (def->type) {
85 case UPB_DEF_MSG:
Josh Habermane8ed0212015-06-08 17:56:03 -070086 return upb_msgdef_upcast_mutable(
87 upb_msgdef_dup(upb_downcast_msgdef(def), o));
Chris Fallin91473dc2014-12-12 15:58:26 -080088 case UPB_DEF_FIELD:
Josh Habermane8ed0212015-06-08 17:56:03 -070089 return upb_fielddef_upcast_mutable(
90 upb_fielddef_dup(upb_downcast_fielddef(def), o));
Chris Fallin91473dc2014-12-12 15:58:26 -080091 case UPB_DEF_ENUM:
Josh Habermane8ed0212015-06-08 17:56:03 -070092 return upb_enumdef_upcast_mutable(
93 upb_enumdef_dup(upb_downcast_enumdef(def), o));
Chris Fallin91473dc2014-12-12 15:58:26 -080094 default: assert(false); return NULL;
95 }
96}
97
Chris Fallin91473dc2014-12-12 15:58:26 -080098static bool upb_def_init(upb_def *def, upb_deftype_t type,
99 const struct upb_refcounted_vtbl *vtbl,
100 const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700101 if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
Chris Fallin91473dc2014-12-12 15:58:26 -0800102 def->type = type;
103 def->fullname = NULL;
104 def->came_from_user = false;
105 return true;
106}
107
108static void upb_def_uninit(upb_def *def) {
109 free((void*)def->fullname);
110}
111
112static const char *msgdef_name(const upb_msgdef *m) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700113 const char *name = upb_def_fullname(upb_msgdef_upcast(m));
Chris Fallin91473dc2014-12-12 15:58:26 -0800114 return name ? name : "(anonymous)";
115}
116
117static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
118 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
119 upb_status_seterrmsg(s, "fielddef must have name and number set");
120 return false;
121 }
122
123 if (!f->type_is_set_) {
124 upb_status_seterrmsg(s, "fielddef type was not initialized");
125 return false;
126 }
127
128 if (upb_fielddef_lazy(f) &&
129 upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
130 upb_status_seterrmsg(s,
131 "only length-delimited submessage fields may be lazy");
132 return false;
133 }
134
135 if (upb_fielddef_hassubdef(f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700136 const upb_def *subdef;
137
Chris Fallin91473dc2014-12-12 15:58:26 -0800138 if (f->subdef_is_symbolic) {
139 upb_status_seterrf(s, "field '%s.%s' has not been resolved",
140 msgdef_name(f->msg.def), upb_fielddef_name(f));
141 return false;
142 }
143
Josh Habermane8ed0212015-06-08 17:56:03 -0700144 subdef = upb_fielddef_subdef(f);
Chris Fallin91473dc2014-12-12 15:58:26 -0800145 if (subdef == NULL) {
146 upb_status_seterrf(s, "field %s.%s is missing required subdef",
147 msgdef_name(f->msg.def), upb_fielddef_name(f));
148 return false;
149 }
150
151 if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
152 upb_status_seterrf(s,
153 "subdef of field %s.%s is not frozen or being frozen",
154 msgdef_name(f->msg.def), upb_fielddef_name(f));
155 return false;
156 }
157 }
158
159 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
160 bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
161 bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
162
Josh Habermane8ed0212015-06-08 17:56:03 -0700163 /* Previously verified by upb_validate_enumdef(). */
Chris Fallin91473dc2014-12-12 15:58:26 -0800164 assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
165
Josh Habermane8ed0212015-06-08 17:56:03 -0700166 /* We've already validated that we have an associated enumdef and that it
167 * has at least one member, so at least one of these should be true.
168 * Because if the user didn't set anything, we'll pick up the enum's
169 * default, but if the user *did* set something we should at least pick up
170 * the one they set (int32 or string). */
Chris Fallin91473dc2014-12-12 15:58:26 -0800171 assert(has_default_name || has_default_number);
172
173 if (!has_default_name) {
174 upb_status_seterrf(s,
175 "enum default for field %s.%s (%d) is not in the enum",
176 msgdef_name(f->msg.def), upb_fielddef_name(f),
177 upb_fielddef_defaultint32(f));
178 return false;
179 }
180
181 if (!has_default_number) {
182 upb_status_seterrf(s,
183 "enum default for field %s.%s (%s) is not in the enum",
184 msgdef_name(f->msg.def), upb_fielddef_name(f),
185 upb_fielddef_defaultstr(f, NULL));
186 return false;
187 }
188
Josh Habermane8ed0212015-06-08 17:56:03 -0700189 /* Lift the effective numeric default into the field's default slot, in case
190 * we were only getting it "by reference" from the enumdef. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800191 upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
192 }
193
Josh Habermane8ed0212015-06-08 17:56:03 -0700194 /* Ensure that MapEntry submessages only appear as repeated fields, not
195 * optional/required (singular) fields. */
Chris Fallina5075922015-02-02 15:07:34 -0800196 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
197 upb_fielddef_msgsubdef(f) != NULL) {
198 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
199 if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
200 upb_status_seterrf(s,
201 "Field %s refers to mapentry message but is not "
202 "a repeated field",
203 upb_fielddef_name(f) ? upb_fielddef_name(f) :
204 "(unnamed)");
205 return false;
206 }
207 }
208
Chris Fallin91473dc2014-12-12 15:58:26 -0800209 return true;
210}
211
212static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
213 if (upb_enumdef_numvals(e) == 0) {
214 upb_status_seterrf(s, "enum %s has no members (must have at least one)",
215 upb_enumdef_fullname(e));
216 return false;
217 }
218
219 return true;
220}
221
Josh Habermane8ed0212015-06-08 17:56:03 -0700222/* All submessage fields are lower than all other fields.
223 * Secondly, fields are increasing in order. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800224uint32_t field_rank(const upb_fielddef *f) {
225 uint32_t ret = upb_fielddef_number(f);
226 const uint32_t high_bit = 1 << 30;
227 assert(ret < high_bit);
228 if (!upb_fielddef_issubmsg(f))
229 ret |= high_bit;
230 return ret;
231}
232
233int cmp_fields(const void *p1, const void *p2) {
234 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
235 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
236 return field_rank(f1) - field_rank(f2);
237}
238
239static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700240 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
241 * lowest indexes, but we do not publicly guarantee this. */
242 upb_msg_field_iter j;
243 int i;
244 uint32_t selector;
Chris Fallin91473dc2014-12-12 15:58:26 -0800245 int n = upb_msgdef_numfields(m);
246 upb_fielddef **fields = malloc(n * sizeof(*fields));
247 if (!fields) return false;
248
Chris Fallin91473dc2014-12-12 15:58:26 -0800249 m->submsg_field_count = 0;
Chris Fallinfcd88892015-01-13 18:14:39 -0800250 for(i = 0, upb_msg_field_begin(&j, m);
251 !upb_msg_field_done(&j);
252 upb_msg_field_next(&j), i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -0800253 upb_fielddef *f = upb_msg_iter_field(&j);
254 assert(f->msg.def == m);
255 if (!upb_validate_field(f, s)) {
256 free(fields);
257 return false;
258 }
259 if (upb_fielddef_issubmsg(f)) {
260 m->submsg_field_count++;
261 }
262 fields[i] = f;
263 }
264
265 qsort(fields, n, sizeof(*fields), cmp_fields);
266
Josh Habermane8ed0212015-06-08 17:56:03 -0700267 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
Chris Fallin91473dc2014-12-12 15:58:26 -0800268 for (i = 0; i < n; i++) {
269 upb_fielddef *f = fields[i];
270 f->index_ = i;
271 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
272 selector += upb_handlers_selectorcount(f);
273 }
274 m->selector_count = selector;
275
276#ifndef NDEBUG
Josh Habermane8ed0212015-06-08 17:56:03 -0700277 {
278 /* Verify that all selectors for the message are distinct. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800279#define TRY(type) \
Josh Habermane8ed0212015-06-08 17:56:03 -0700280 if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
Chris Fallin91473dc2014-12-12 15:58:26 -0800281
Josh Habermane8ed0212015-06-08 17:56:03 -0700282 upb_inttable t;
283 upb_value v;
284 upb_selector_t sel;
285
286 upb_inttable_init(&t, UPB_CTYPE_BOOL);
287 v = upb_value_bool(true);
288 upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
289 upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
290 for(upb_msg_field_begin(&j, m);
291 !upb_msg_field_done(&j);
292 upb_msg_field_next(&j)) {
293 upb_fielddef *f = upb_msg_iter_field(&j);
294 /* These calls will assert-fail in upb_table if the value already
295 * exists. */
296 TRY(UPB_HANDLER_INT32);
297 TRY(UPB_HANDLER_INT64)
298 TRY(UPB_HANDLER_UINT32)
299 TRY(UPB_HANDLER_UINT64)
300 TRY(UPB_HANDLER_FLOAT)
301 TRY(UPB_HANDLER_DOUBLE)
302 TRY(UPB_HANDLER_BOOL)
303 TRY(UPB_HANDLER_STARTSTR)
304 TRY(UPB_HANDLER_STRING)
305 TRY(UPB_HANDLER_ENDSTR)
306 TRY(UPB_HANDLER_STARTSUBMSG)
307 TRY(UPB_HANDLER_ENDSUBMSG)
308 TRY(UPB_HANDLER_STARTSEQ)
309 TRY(UPB_HANDLER_ENDSEQ)
310 }
311 upb_inttable_uninit(&t);
Chris Fallin91473dc2014-12-12 15:58:26 -0800312 }
Chris Fallin91473dc2014-12-12 15:58:26 -0800313#undef TRY
314#endif
315
316 free(fields);
317 return true;
318}
319
320bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700321 int i;
322 int maxdepth;
323 bool ret;
Chris Fallin91473dc2014-12-12 15:58:26 -0800324 upb_status_clear(s);
325
Josh Habermane8ed0212015-06-08 17:56:03 -0700326 /* First perform validation, in two passes so we can check that we have a
327 * transitive closure without needing to search. */
328 for (i = 0; i < n; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -0800329 upb_def *def = defs[i];
330 if (upb_def_isfrozen(def)) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700331 /* Could relax this requirement if it's annoying. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800332 upb_status_seterrmsg(s, "def is already frozen");
333 goto err;
334 } else if (def->type == UPB_DEF_FIELD) {
335 upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
336 goto err;
337 } else if (def->type == UPB_DEF_ENUM) {
338 if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
339 goto err;
340 }
341 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -0700342 /* Set now to detect transitive closure in the second pass. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800343 def->came_from_user = true;
344 }
345 }
346
Josh Habermane8ed0212015-06-08 17:56:03 -0700347 /* Second pass of validation. Also assign selector bases and indexes, and
348 * compact tables. */
349 for (i = 0; i < n; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -0800350 upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]);
351 upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]);
352 if (m) {
353 upb_inttable_compact(&m->itof);
354 if (!assign_msg_indices(m, s)) {
355 goto err;
356 }
357 } else if (e) {
358 upb_inttable_compact(&e->iton);
359 }
360 }
361
Josh Habermane8ed0212015-06-08 17:56:03 -0700362 /* Def graph contains FieldDefs between each MessageDef, so double the
363 * limit. */
364 maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
Chris Fallin91473dc2014-12-12 15:58:26 -0800365
Josh Habermane8ed0212015-06-08 17:56:03 -0700366 /* Validation all passed; freeze the defs. */
367 ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
Chris Fallin91473dc2014-12-12 15:58:26 -0800368 assert(!(s && ret != upb_ok(s)));
369 return ret;
370
371err:
Josh Habermane8ed0212015-06-08 17:56:03 -0700372 for (i = 0; i < n; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -0800373 defs[i]->came_from_user = false;
374 }
375 assert(!(s && upb_ok(s)));
376 return false;
377}
378
379
380/* upb_enumdef ****************************************************************/
381
382static void upb_enumdef_free(upb_refcounted *r) {
383 upb_enumdef *e = (upb_enumdef*)r;
384 upb_inttable_iter i;
385 upb_inttable_begin(&i, &e->iton);
386 for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700387 /* To clean up the upb_strdup() from upb_enumdef_addval(). */
Chris Fallin91473dc2014-12-12 15:58:26 -0800388 free(upb_value_getcstr(upb_inttable_iter_value(&i)));
389 }
390 upb_strtable_uninit(&e->ntoi);
391 upb_inttable_uninit(&e->iton);
Josh Habermane8ed0212015-06-08 17:56:03 -0700392 upb_def_uninit(upb_enumdef_upcast_mutable(e));
Chris Fallin91473dc2014-12-12 15:58:26 -0800393 free(e);
394}
395
396upb_enumdef *upb_enumdef_new(const void *owner) {
397 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free};
398 upb_enumdef *e = malloc(sizeof(*e));
399 if (!e) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -0700400 if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner))
401 goto err2;
Chris Fallin91473dc2014-12-12 15:58:26 -0800402 if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
403 if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
404 return e;
405
406err1:
407 upb_strtable_uninit(&e->ntoi);
408err2:
409 free(e);
410 return NULL;
411}
412
413upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700414 upb_enum_iter i;
Chris Fallin91473dc2014-12-12 15:58:26 -0800415 upb_enumdef *new_e = upb_enumdef_new(owner);
416 if (!new_e) return NULL;
Chris Fallin91473dc2014-12-12 15:58:26 -0800417 for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
418 bool success = upb_enumdef_addval(
419 new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
420 if (!success) {
421 upb_enumdef_unref(new_e, owner);
422 return NULL;
423 }
424 }
425 return new_e;
426}
427
Chris Fallin91473dc2014-12-12 15:58:26 -0800428bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700429 upb_def *d = upb_enumdef_upcast_mutable(e);
Chris Fallin91473dc2014-12-12 15:58:26 -0800430 return upb_def_freeze(&d, 1, status);
431}
432
433const char *upb_enumdef_fullname(const upb_enumdef *e) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700434 return upb_def_fullname(upb_enumdef_upcast(e));
Chris Fallin91473dc2014-12-12 15:58:26 -0800435}
436
437bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
438 upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700439 return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
Chris Fallin91473dc2014-12-12 15:58:26 -0800440}
441
442bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
443 upb_status *status) {
444 if (!upb_isident(name, strlen(name), false, status)) {
445 return false;
446 }
447 if (upb_enumdef_ntoiz(e, name, NULL)) {
448 upb_status_seterrf(status, "name '%s' is already defined", name);
449 return false;
450 }
451 if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
452 upb_status_seterrmsg(status, "out of memory");
453 return false;
454 }
455 if (!upb_inttable_lookup(&e->iton, num, NULL) &&
456 !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) {
457 upb_status_seterrmsg(status, "out of memory");
458 upb_strtable_remove(&e->ntoi, name, NULL);
459 return false;
460 }
461 if (upb_enumdef_numvals(e) == 1) {
462 bool ok = upb_enumdef_setdefault(e, num, NULL);
463 UPB_ASSERT_VAR(ok, ok);
464 }
465 return true;
466}
467
468int32_t upb_enumdef_default(const upb_enumdef *e) {
469 assert(upb_enumdef_iton(e, e->defaultval));
470 return e->defaultval;
471}
472
473bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
474 assert(!upb_enumdef_isfrozen(e));
475 if (!upb_enumdef_iton(e, val)) {
476 upb_status_seterrf(s, "number '%d' is not in the enum.", val);
477 return false;
478 }
479 e->defaultval = val;
480 return true;
481}
482
483int upb_enumdef_numvals(const upb_enumdef *e) {
484 return upb_strtable_count(&e->ntoi);
485}
486
487void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700488 /* We iterate over the ntoi table, to account for duplicate numbers. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800489 upb_strtable_begin(i, &e->ntoi);
490}
491
492void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
493bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
494
495bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
496 size_t len, int32_t *num) {
497 upb_value v;
498 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
499 return false;
500 }
501 if (num) *num = upb_value_getint32(v);
502 return true;
503}
504
505const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
506 upb_value v;
507 return upb_inttable_lookup32(&def->iton, num, &v) ?
508 upb_value_getcstr(v) : NULL;
509}
510
511const char *upb_enum_iter_name(upb_enum_iter *iter) {
512 return upb_strtable_iter_key(iter);
513}
514
515int32_t upb_enum_iter_number(upb_enum_iter *iter) {
516 return upb_value_getint32(upb_strtable_iter_value(iter));
517}
518
519
520/* upb_fielddef ***************************************************************/
521
522static void upb_fielddef_init_default(upb_fielddef *f);
523
524static void upb_fielddef_uninit_default(upb_fielddef *f) {
525 if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
526 freestr(f->defaultval.bytes);
527}
528
Josh Haberman78da6662016-01-13 19:05:43 -0800529const char *upb_fielddef_fullname(const upb_fielddef *e) {
530 return upb_def_fullname(upb_fielddef_upcast(e));
531}
532
Chris Fallin91473dc2014-12-12 15:58:26 -0800533static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
534 void *closure) {
535 const upb_fielddef *f = (const upb_fielddef*)r;
536 if (upb_fielddef_containingtype(f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700537 visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
Chris Fallin91473dc2014-12-12 15:58:26 -0800538 }
Chris Fallinfcd88892015-01-13 18:14:39 -0800539 if (upb_fielddef_containingoneof(f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700540 visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure);
Chris Fallinfcd88892015-01-13 18:14:39 -0800541 }
Chris Fallin91473dc2014-12-12 15:58:26 -0800542 if (upb_fielddef_subdef(f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700543 visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
Chris Fallin91473dc2014-12-12 15:58:26 -0800544 }
545}
546
547static void freefield(upb_refcounted *r) {
548 upb_fielddef *f = (upb_fielddef*)r;
549 upb_fielddef_uninit_default(f);
550 if (f->subdef_is_symbolic)
551 free(f->sub.name);
Josh Habermane8ed0212015-06-08 17:56:03 -0700552 upb_def_uninit(upb_fielddef_upcast_mutable(f));
Chris Fallin91473dc2014-12-12 15:58:26 -0800553 free(f);
554}
555
556static const char *enumdefaultstr(const upb_fielddef *f) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700557 const upb_enumdef *e;
Chris Fallin91473dc2014-12-12 15:58:26 -0800558 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
Josh Habermane8ed0212015-06-08 17:56:03 -0700559 e = upb_fielddef_enumsubdef(f);
Chris Fallin91473dc2014-12-12 15:58:26 -0800560 if (f->default_is_string && f->defaultval.bytes) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700561 /* Default was explicitly set as a string. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800562 str_t *s = f->defaultval.bytes;
563 return s->str;
564 } else if (e) {
565 if (!f->default_is_string) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700566 /* Default was explicitly set as an integer; look it up in enumdef. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800567 const char *name = upb_enumdef_iton(e, f->defaultval.sint);
568 if (name) {
569 return name;
570 }
571 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -0700572 /* Default is completely unset; pull enumdef default. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800573 if (upb_enumdef_numvals(e) > 0) {
574 const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
575 assert(name);
576 return name;
577 }
578 }
579 }
580 return NULL;
581}
582
583static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700584 const upb_enumdef *e;
Chris Fallin91473dc2014-12-12 15:58:26 -0800585 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
Josh Habermane8ed0212015-06-08 17:56:03 -0700586 e = upb_fielddef_enumsubdef(f);
Chris Fallin91473dc2014-12-12 15:58:26 -0800587 if (!f->default_is_string) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700588 /* Default was explicitly set as an integer. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800589 *val = f->defaultval.sint;
590 return true;
591 } else if (e) {
592 if (f->defaultval.bytes) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700593 /* Default was explicitly set as a str; try to lookup corresponding int. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800594 str_t *s = f->defaultval.bytes;
595 if (upb_enumdef_ntoiz(e, s->str, val)) {
596 return true;
597 }
598 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -0700599 /* Default is unset; try to pull in enumdef default. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800600 if (upb_enumdef_numvals(e) > 0) {
601 *val = upb_enumdef_default(e);
602 return true;
603 }
604 }
605 }
606 return false;
607}
608
Josh Habermane8ed0212015-06-08 17:56:03 -0700609upb_fielddef *upb_fielddef_new(const void *o) {
Chris Fallin91473dc2014-12-12 15:58:26 -0800610 static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield};
611 upb_fielddef *f = malloc(sizeof(*f));
612 if (!f) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -0700613 if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) {
Chris Fallin91473dc2014-12-12 15:58:26 -0800614 free(f);
615 return NULL;
616 }
617 f->msg.def = NULL;
618 f->sub.def = NULL;
Chris Fallinfcd88892015-01-13 18:14:39 -0800619 f->oneof = NULL;
Chris Fallin91473dc2014-12-12 15:58:26 -0800620 f->subdef_is_symbolic = false;
621 f->msg_is_symbolic = false;
622 f->label_ = UPB_LABEL_OPTIONAL;
623 f->type_ = UPB_TYPE_INT32;
624 f->number_ = 0;
625 f->type_is_set_ = false;
626 f->tagdelim = false;
627 f->is_extension_ = false;
628 f->lazy_ = false;
629 f->packed_ = true;
630
Josh Habermane8ed0212015-06-08 17:56:03 -0700631 /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
632 * with all integer types and is in some since more "default" since the most
633 * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
634 *
635 * Other options to consider:
636 * - there is no default; users must set this manually (like type).
637 * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
638 * be an optimal default for signed integers. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800639 f->intfmt = UPB_INTFMT_VARIABLE;
640 return f;
641}
642
643upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700644 const char *srcname;
Chris Fallin91473dc2014-12-12 15:58:26 -0800645 upb_fielddef *newf = upb_fielddef_new(owner);
646 if (!newf) return NULL;
647 upb_fielddef_settype(newf, upb_fielddef_type(f));
648 upb_fielddef_setlabel(newf, upb_fielddef_label(f));
649 upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
650 upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
651 if (f->default_is_string && f->defaultval.bytes) {
652 str_t *s = f->defaultval.bytes;
653 upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
654 } else {
655 newf->default_is_string = f->default_is_string;
656 newf->defaultval = f->defaultval;
657 }
658
Chris Fallin91473dc2014-12-12 15:58:26 -0800659 if (f->subdef_is_symbolic) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700660 srcname = f->sub.name; /* Might be NULL. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800661 } else {
662 srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
663 }
664 if (srcname) {
665 char *newname = malloc(strlen(f->sub.def->fullname) + 2);
666 if (!newname) {
667 upb_fielddef_unref(newf, owner);
668 return NULL;
669 }
670 strcpy(newname, ".");
671 strcat(newname, f->sub.def->fullname);
672 upb_fielddef_setsubdefname(newf, newname, NULL);
673 free(newname);
674 }
675
676 return newf;
677}
678
Chris Fallin91473dc2014-12-12 15:58:26 -0800679bool upb_fielddef_typeisset(const upb_fielddef *f) {
680 return f->type_is_set_;
681}
682
683upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
684 assert(f->type_is_set_);
685 return f->type_;
686}
687
688uint32_t upb_fielddef_index(const upb_fielddef *f) {
689 return f->index_;
690}
691
692upb_label_t upb_fielddef_label(const upb_fielddef *f) {
693 return f->label_;
694}
695
696upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
697 return f->intfmt;
698}
699
700bool upb_fielddef_istagdelim(const upb_fielddef *f) {
701 return f->tagdelim;
702}
703
704uint32_t upb_fielddef_number(const upb_fielddef *f) {
705 return f->number_;
706}
707
708bool upb_fielddef_isextension(const upb_fielddef *f) {
709 return f->is_extension_;
710}
711
712bool upb_fielddef_lazy(const upb_fielddef *f) {
713 return f->lazy_;
714}
715
716bool upb_fielddef_packed(const upb_fielddef *f) {
717 return f->packed_;
718}
719
720const char *upb_fielddef_name(const upb_fielddef *f) {
Josh Habermane8ed0212015-06-08 17:56:03 -0700721 return upb_def_fullname(upb_fielddef_upcast(f));
Chris Fallin91473dc2014-12-12 15:58:26 -0800722}
723
Josh Habermanf654d492016-02-18 11:07:51 -0800724size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) {
Josh Haberman78da6662016-01-13 19:05:43 -0800725 const char *name = upb_fielddef_name(f);
Josh Habermanf654d492016-02-18 11:07:51 -0800726 size_t src, dst = 0;
Josh Haberman78da6662016-01-13 19:05:43 -0800727 bool ucase_next = false;
728
Josh Habermanf654d492016-02-18 11:07:51 -0800729#define WRITE(byte) \
730 ++dst; \
731 if (dst < len) buf[dst - 1] = byte; \
732 else if (dst == len) buf[dst - 1] = '\0'
733
734 if (!name) {
735 WRITE('\0');
736 return 0;
737 }
Josh Haberman78da6662016-01-13 19:05:43 -0800738
739 /* Implement the transformation as described in the spec:
740 * 1. upper case all letters after an underscore.
741 * 2. remove all underscores.
742 */
Josh Habermanf654d492016-02-18 11:07:51 -0800743 for (src = 0; name[src]; src++) {
744 if (name[src] == '_') {
Josh Haberman78da6662016-01-13 19:05:43 -0800745 ucase_next = true;
746 continue;
747 }
748
749 if (ucase_next) {
Josh Habermanf654d492016-02-18 11:07:51 -0800750 WRITE(toupper(name[src]));
Josh Haberman78da6662016-01-13 19:05:43 -0800751 ucase_next = false;
752 } else {
Josh Habermanf654d492016-02-18 11:07:51 -0800753 WRITE(name[src]);
Josh Haberman78da6662016-01-13 19:05:43 -0800754 }
755 }
756
Josh Habermanf654d492016-02-18 11:07:51 -0800757 WRITE('\0');
758 return dst;
759
760#undef WRITE
Josh Haberman78da6662016-01-13 19:05:43 -0800761}
762
Chris Fallin91473dc2014-12-12 15:58:26 -0800763const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
764 return f->msg_is_symbolic ? NULL : f->msg.def;
765}
766
Chris Fallinfcd88892015-01-13 18:14:39 -0800767const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
768 return f->oneof;
769}
770
Chris Fallin91473dc2014-12-12 15:58:26 -0800771upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
772 return (upb_msgdef*)upb_fielddef_containingtype(f);
773}
774
775const char *upb_fielddef_containingtypename(upb_fielddef *f) {
776 return f->msg_is_symbolic ? f->msg.name : NULL;
777}
778
779static void release_containingtype(upb_fielddef *f) {
780 if (f->msg_is_symbolic) free(f->msg.name);
781}
782
783bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
784 upb_status *s) {
785 assert(!upb_fielddef_isfrozen(f));
786 if (upb_fielddef_containingtype(f)) {
787 upb_status_seterrmsg(s, "field has already been added to a message.");
788 return false;
789 }
Josh Habermane8ed0212015-06-08 17:56:03 -0700790 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
791 * may have a leading "."). */
Chris Fallin91473dc2014-12-12 15:58:26 -0800792 release_containingtype(f);
793 f->msg.name = upb_strdup(name);
794 f->msg_is_symbolic = true;
795 return true;
796}
797
798bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
Chris Fallinfcd88892015-01-13 18:14:39 -0800799 if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
800 upb_status_seterrmsg(s, "Already added to message or oneof");
801 return false;
802 }
Josh Habermane8ed0212015-06-08 17:56:03 -0700803 return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
Chris Fallin91473dc2014-12-12 15:58:26 -0800804}
805
806static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
807 UPB_UNUSED(f);
808 UPB_UNUSED(type);
809 assert(f->type_is_set_ && upb_fielddef_type(f) == type);
810}
811
812int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
813 chkdefaulttype(f, UPB_TYPE_INT64);
814 return f->defaultval.sint;
815}
816
817int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
818 if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
819 int32_t val;
820 bool ok = enumdefaultint32(f, &val);
821 UPB_ASSERT_VAR(ok, ok);
822 return val;
823 } else {
824 chkdefaulttype(f, UPB_TYPE_INT32);
825 return f->defaultval.sint;
826 }
827}
828
829uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
830 chkdefaulttype(f, UPB_TYPE_UINT64);
831 return f->defaultval.uint;
832}
833
834uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
835 chkdefaulttype(f, UPB_TYPE_UINT32);
836 return f->defaultval.uint;
837}
838
839bool upb_fielddef_defaultbool(const upb_fielddef *f) {
840 chkdefaulttype(f, UPB_TYPE_BOOL);
841 return f->defaultval.uint;
842}
843
844float upb_fielddef_defaultfloat(const upb_fielddef *f) {
845 chkdefaulttype(f, UPB_TYPE_FLOAT);
846 return f->defaultval.flt;
847}
848
849double upb_fielddef_defaultdouble(const upb_fielddef *f) {
850 chkdefaulttype(f, UPB_TYPE_DOUBLE);
851 return f->defaultval.dbl;
852}
853
854const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
855 assert(f->type_is_set_);
856 assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
857 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
858 upb_fielddef_type(f) == UPB_TYPE_ENUM);
859
860 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
861 const char *ret = enumdefaultstr(f);
862 assert(ret);
Josh Habermane8ed0212015-06-08 17:56:03 -0700863 /* Enum defaults can't have embedded NULLs. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800864 if (len) *len = strlen(ret);
865 return ret;
866 }
867
868 if (f->default_is_string) {
869 str_t *str = f->defaultval.bytes;
870 if (len) *len = str->len;
871 return str->str;
872 }
873
874 return NULL;
875}
876
877static void upb_fielddef_init_default(upb_fielddef *f) {
878 f->default_is_string = false;
879 switch (upb_fielddef_type(f)) {
880 case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
881 case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
882 case UPB_TYPE_INT32:
883 case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
884 case UPB_TYPE_UINT64:
885 case UPB_TYPE_UINT32:
886 case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
887 case UPB_TYPE_STRING:
888 case UPB_TYPE_BYTES:
889 f->defaultval.bytes = newstr("", 0);
890 f->default_is_string = true;
891 break;
892 case UPB_TYPE_MESSAGE: break;
893 case UPB_TYPE_ENUM:
Josh Habermane8ed0212015-06-08 17:56:03 -0700894 /* This is our special sentinel that indicates "not set" for an enum. */
Chris Fallin91473dc2014-12-12 15:58:26 -0800895 f->default_is_string = true;
896 f->defaultval.bytes = NULL;
897 break;
898 }
899}
900
901const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
902 return f->subdef_is_symbolic ? NULL : f->sub.def;
903}
904
905const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
906 const upb_def *def = upb_fielddef_subdef(f);
907 return def ? upb_dyncast_msgdef(def) : NULL;
908}
909
910const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
911 const upb_def *def = upb_fielddef_subdef(f);
912 return def ? upb_dyncast_enumdef(def) : NULL;
913}
914
915upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
916 return (upb_def*)upb_fielddef_subdef(f);
917}
918
919const char *upb_fielddef_subdefname(const upb_fielddef *f) {
920 if (f->subdef_is_symbolic) {
921 return f->sub.name;
922 } else if (f->sub.def) {
923 return upb_def_fullname(f->sub.def);
924 } else {
925 return NULL;
926 }
927}
928
929bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
930 if (upb_fielddef_containingtype(f)) {
931 upb_status_seterrmsg(
932 s, "cannot change field number after adding to a message");
933 return false;
934 }
935 if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
936 upb_status_seterrf(s, "invalid field number (%u)", number);
937 return false;
938 }
939 f->number_ = number;
940 return true;
941}
942
943void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
944 assert(!upb_fielddef_isfrozen(f));
945 assert(upb_fielddef_checktype(type));
946 upb_fielddef_uninit_default(f);
947 f->type_ = type;
948 f->type_is_set_ = true;
949 upb_fielddef_init_default(f);
950}
951
952void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
953 assert(!upb_fielddef_isfrozen(f));
954 switch (type) {
955 case UPB_DESCRIPTOR_TYPE_DOUBLE:
956 upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
957 break;
958 case UPB_DESCRIPTOR_TYPE_FLOAT:
959 upb_fielddef_settype(f, UPB_TYPE_FLOAT);
960 break;
961 case UPB_DESCRIPTOR_TYPE_INT64:
962 case UPB_DESCRIPTOR_TYPE_SFIXED64:
963 case UPB_DESCRIPTOR_TYPE_SINT64:
964 upb_fielddef_settype(f, UPB_TYPE_INT64);
965 break;
966 case UPB_DESCRIPTOR_TYPE_UINT64:
967 case UPB_DESCRIPTOR_TYPE_FIXED64:
968 upb_fielddef_settype(f, UPB_TYPE_UINT64);
969 break;
970 case UPB_DESCRIPTOR_TYPE_INT32:
971 case UPB_DESCRIPTOR_TYPE_SFIXED32:
972 case UPB_DESCRIPTOR_TYPE_SINT32:
973 upb_fielddef_settype(f, UPB_TYPE_INT32);
974 break;
975 case UPB_DESCRIPTOR_TYPE_UINT32:
976 case UPB_DESCRIPTOR_TYPE_FIXED32:
977 upb_fielddef_settype(f, UPB_TYPE_UINT32);
978 break;
979 case UPB_DESCRIPTOR_TYPE_BOOL:
980 upb_fielddef_settype(f, UPB_TYPE_BOOL);
981 break;
982 case UPB_DESCRIPTOR_TYPE_STRING:
983 upb_fielddef_settype(f, UPB_TYPE_STRING);
984 break;
985 case UPB_DESCRIPTOR_TYPE_BYTES:
986 upb_fielddef_settype(f, UPB_TYPE_BYTES);
987 break;
988 case UPB_DESCRIPTOR_TYPE_GROUP:
989 case UPB_DESCRIPTOR_TYPE_MESSAGE:
990 upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
991 break;
992 case UPB_DESCRIPTOR_TYPE_ENUM:
993 upb_fielddef_settype(f, UPB_TYPE_ENUM);
994 break;
995 default: assert(false);
996 }
997
998 if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
999 type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
1000 type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
1001 type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
1002 upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
1003 } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
1004 type == UPB_DESCRIPTOR_TYPE_SINT32) {
1005 upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
1006 } else {
1007 upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
1008 }
1009
1010 upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
1011}
1012
1013upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
1014 switch (upb_fielddef_type(f)) {
1015 case UPB_TYPE_FLOAT: return UPB_DESCRIPTOR_TYPE_FLOAT;
1016 case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
1017 case UPB_TYPE_BOOL: return UPB_DESCRIPTOR_TYPE_BOOL;
1018 case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
1019 case UPB_TYPE_BYTES: return UPB_DESCRIPTOR_TYPE_BYTES;
1020 case UPB_TYPE_ENUM: return UPB_DESCRIPTOR_TYPE_ENUM;
1021 case UPB_TYPE_INT32:
1022 switch (upb_fielddef_intfmt(f)) {
1023 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
1024 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED32;
1025 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT32;
1026 }
1027 case UPB_TYPE_INT64:
1028 switch (upb_fielddef_intfmt(f)) {
1029 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
1030 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED64;
1031 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT64;
1032 }
1033 case UPB_TYPE_UINT32:
1034 switch (upb_fielddef_intfmt(f)) {
1035 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
1036 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED32;
1037 case UPB_INTFMT_ZIGZAG: return -1;
1038 }
1039 case UPB_TYPE_UINT64:
1040 switch (upb_fielddef_intfmt(f)) {
1041 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
1042 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED64;
1043 case UPB_INTFMT_ZIGZAG: return -1;
1044 }
1045 case UPB_TYPE_MESSAGE:
1046 return upb_fielddef_istagdelim(f) ?
1047 UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
1048 }
1049 return 0;
1050}
1051
1052void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
1053 assert(!upb_fielddef_isfrozen(f));
1054 f->is_extension_ = is_extension;
1055}
1056
1057void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
1058 assert(!upb_fielddef_isfrozen(f));
1059 f->lazy_ = lazy;
1060}
1061
1062void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
1063 assert(!upb_fielddef_isfrozen(f));
1064 f->packed_ = packed;
1065}
1066
1067void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
1068 assert(!upb_fielddef_isfrozen(f));
1069 assert(upb_fielddef_checklabel(label));
1070 f->label_ = label;
1071}
1072
1073void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
1074 assert(!upb_fielddef_isfrozen(f));
1075 assert(upb_fielddef_checkintfmt(fmt));
1076 f->intfmt = fmt;
1077}
1078
1079void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
1080 assert(!upb_fielddef_isfrozen(f));
1081 f->tagdelim = tag_delim;
1082 f->tagdelim = tag_delim;
1083}
1084
1085static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
1086 if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
1087 upb_fielddef_type(f) != type) {
1088 assert(false);
1089 return false;
1090 }
1091 if (f->default_is_string) {
1092 str_t *s = f->defaultval.bytes;
1093 assert(s || type == UPB_TYPE_ENUM);
1094 if (s) freestr(s);
1095 }
1096 f->default_is_string = false;
1097 return true;
1098}
1099
1100void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
1101 if (checksetdefault(f, UPB_TYPE_INT64))
1102 f->defaultval.sint = value;
1103}
1104
1105void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
1106 if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
1107 checksetdefault(f, UPB_TYPE_ENUM)) ||
1108 checksetdefault(f, UPB_TYPE_INT32)) {
1109 f->defaultval.sint = value;
1110 }
1111}
1112
1113void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
1114 if (checksetdefault(f, UPB_TYPE_UINT64))
1115 f->defaultval.uint = value;
1116}
1117
1118void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
1119 if (checksetdefault(f, UPB_TYPE_UINT32))
1120 f->defaultval.uint = value;
1121}
1122
1123void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
1124 if (checksetdefault(f, UPB_TYPE_BOOL))
1125 f->defaultval.uint = value;
1126}
1127
1128void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
1129 if (checksetdefault(f, UPB_TYPE_FLOAT))
1130 f->defaultval.flt = value;
1131}
1132
1133void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
1134 if (checksetdefault(f, UPB_TYPE_DOUBLE))
1135 f->defaultval.dbl = value;
1136}
1137
1138bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
1139 upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001140 str_t *str2;
Chris Fallin91473dc2014-12-12 15:58:26 -08001141 assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
1142 if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
1143 return false;
1144
1145 if (f->default_is_string) {
1146 str_t *s = f->defaultval.bytes;
1147 assert(s || f->type_ == UPB_TYPE_ENUM);
1148 if (s) freestr(s);
1149 } else {
1150 assert(f->type_ == UPB_TYPE_ENUM);
1151 }
1152
Josh Habermane8ed0212015-06-08 17:56:03 -07001153 str2 = newstr(str, len);
Chris Fallin91473dc2014-12-12 15:58:26 -08001154 f->defaultval.bytes = str2;
1155 f->default_is_string = true;
1156 return true;
1157}
1158
1159void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
1160 upb_status *s) {
1161 assert(f->type_is_set_);
1162 upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
1163}
1164
1165bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
Chris Fallin91473dc2014-12-12 15:58:26 -08001166 int32_t val;
Josh Habermane8ed0212015-06-08 17:56:03 -07001167 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
Chris Fallin91473dc2014-12-12 15:58:26 -08001168 return enumdefaultint32(f, &val);
1169}
1170
1171bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
1172 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1173 return enumdefaultstr(f) != NULL;
1174}
1175
1176static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
1177 upb_status *s) {
1178 if (f->type_ == UPB_TYPE_MESSAGE) {
1179 if (upb_dyncast_msgdef(subdef)) return true;
1180 upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
1181 return false;
1182 } else if (f->type_ == UPB_TYPE_ENUM) {
1183 if (upb_dyncast_enumdef(subdef)) return true;
1184 upb_status_seterrmsg(s, "invalid subdef type for this enum field");
1185 return false;
1186 } else {
1187 upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
1188 return false;
1189 }
1190}
1191
1192static void release_subdef(upb_fielddef *f) {
1193 if (f->subdef_is_symbolic) {
1194 free(f->sub.name);
1195 } else if (f->sub.def) {
1196 upb_unref2(f->sub.def, f);
1197 }
1198}
1199
1200bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
1201 upb_status *s) {
1202 assert(!upb_fielddef_isfrozen(f));
1203 assert(upb_fielddef_hassubdef(f));
1204 if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
1205 release_subdef(f);
1206 f->sub.def = subdef;
1207 f->subdef_is_symbolic = false;
1208 if (f->sub.def) upb_ref2(f->sub.def, f);
1209 return true;
1210}
1211
1212bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
1213 upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001214 return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
Chris Fallin91473dc2014-12-12 15:58:26 -08001215}
1216
1217bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
1218 upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001219 return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
Chris Fallin91473dc2014-12-12 15:58:26 -08001220}
1221
1222bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
1223 upb_status *s) {
1224 assert(!upb_fielddef_isfrozen(f));
1225 if (!upb_fielddef_hassubdef(f)) {
1226 upb_status_seterrmsg(s, "field type does not accept a subdef");
1227 return false;
1228 }
Josh Habermane8ed0212015-06-08 17:56:03 -07001229 /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1230 * may have a leading "."). */
Chris Fallin91473dc2014-12-12 15:58:26 -08001231 release_subdef(f);
1232 f->sub.name = upb_strdup(name);
1233 f->subdef_is_symbolic = true;
1234 return true;
1235}
1236
1237bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1238 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1239}
1240
1241bool upb_fielddef_isstring(const upb_fielddef *f) {
1242 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1243 upb_fielddef_type(f) == UPB_TYPE_BYTES;
1244}
1245
1246bool upb_fielddef_isseq(const upb_fielddef *f) {
1247 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1248}
1249
1250bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1251 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1252}
1253
Chris Fallina5075922015-02-02 15:07:34 -08001254bool upb_fielddef_ismap(const upb_fielddef *f) {
1255 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1256 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1257}
1258
Josh Haberman78da6662016-01-13 19:05:43 -08001259bool upb_fielddef_haspresence(const upb_fielddef *f) {
1260 if (upb_fielddef_isseq(f)) return false;
1261 if (upb_fielddef_issubmsg(f)) return true;
1262
1263 /* Primitive field: return true unless there is a message that specifies
1264 * presence should not exist. */
1265 if (f->msg_is_symbolic || !f->msg.def) return true;
1266 return f->msg.def->primitives_have_presence;
1267}
1268
Chris Fallin91473dc2014-12-12 15:58:26 -08001269bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1270 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1271}
1272
1273static bool between(int32_t x, int32_t low, int32_t high) {
1274 return x >= low && x <= high;
1275}
1276
1277bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
1278bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
1279bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1280
1281bool upb_fielddef_checkdescriptortype(int32_t type) {
1282 return between(type, 1, 18);
1283}
1284
1285/* upb_msgdef *****************************************************************/
1286
1287static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
1288 void *closure) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001289 upb_msg_oneof_iter o;
Chris Fallin91473dc2014-12-12 15:58:26 -08001290 const upb_msgdef *m = (const upb_msgdef*)r;
Chris Fallinfcd88892015-01-13 18:14:39 -08001291 upb_msg_field_iter i;
1292 for(upb_msg_field_begin(&i, m);
1293 !upb_msg_field_done(&i);
1294 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08001295 upb_fielddef *f = upb_msg_iter_field(&i);
Josh Habermane8ed0212015-06-08 17:56:03 -07001296 visit(r, upb_fielddef_upcast2(f), closure);
Chris Fallin91473dc2014-12-12 15:58:26 -08001297 }
Chris Fallinfcd88892015-01-13 18:14:39 -08001298 for(upb_msg_oneof_begin(&o, m);
1299 !upb_msg_oneof_done(&o);
1300 upb_msg_oneof_next(&o)) {
1301 upb_oneofdef *f = upb_msg_iter_oneof(&o);
Josh Habermane8ed0212015-06-08 17:56:03 -07001302 visit(r, upb_oneofdef_upcast2(f), closure);
Chris Fallinfcd88892015-01-13 18:14:39 -08001303 }
Chris Fallin91473dc2014-12-12 15:58:26 -08001304}
1305
1306static void freemsg(upb_refcounted *r) {
1307 upb_msgdef *m = (upb_msgdef*)r;
Chris Fallinfcd88892015-01-13 18:14:39 -08001308 upb_strtable_uninit(&m->ntoo);
Chris Fallin91473dc2014-12-12 15:58:26 -08001309 upb_strtable_uninit(&m->ntof);
1310 upb_inttable_uninit(&m->itof);
Josh Habermane8ed0212015-06-08 17:56:03 -07001311 upb_def_uninit(upb_msgdef_upcast_mutable(m));
Chris Fallin91473dc2014-12-12 15:58:26 -08001312 free(m);
1313}
1314
1315upb_msgdef *upb_msgdef_new(const void *owner) {
1316 static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg};
1317 upb_msgdef *m = malloc(sizeof(*m));
1318 if (!m) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -07001319 if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner))
1320 goto err2;
Chris Fallinfcd88892015-01-13 18:14:39 -08001321 if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
1322 if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
1323 if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08001324 m->map_entry = false;
Josh Haberman78da6662016-01-13 19:05:43 -08001325 m->primitives_have_presence = true;
Chris Fallin91473dc2014-12-12 15:58:26 -08001326 return m;
1327
1328err1:
Chris Fallinfcd88892015-01-13 18:14:39 -08001329 upb_strtable_uninit(&m->ntof);
Chris Fallin91473dc2014-12-12 15:58:26 -08001330err2:
Chris Fallinfcd88892015-01-13 18:14:39 -08001331 upb_inttable_uninit(&m->itof);
1332err3:
Chris Fallin91473dc2014-12-12 15:58:26 -08001333 free(m);
1334 return NULL;
1335}
1336
1337upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001338 bool ok;
1339 upb_msg_field_iter i;
1340 upb_msg_oneof_iter o;
1341
Chris Fallin91473dc2014-12-12 15:58:26 -08001342 upb_msgdef *newm = upb_msgdef_new(owner);
1343 if (!newm) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -07001344 ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
1345 upb_def_fullname(upb_msgdef_upcast(m)),
1346 NULL);
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08001347 newm->map_entry = m->map_entry;
Josh Haberman78da6662016-01-13 19:05:43 -08001348 newm->primitives_have_presence = m->primitives_have_presence;
Chris Fallin91473dc2014-12-12 15:58:26 -08001349 UPB_ASSERT_VAR(ok, ok);
Chris Fallinfcd88892015-01-13 18:14:39 -08001350 for(upb_msg_field_begin(&i, m);
1351 !upb_msg_field_done(&i);
1352 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08001353 upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
Josh Habermane8ed0212015-06-08 17:56:03 -07001354 /* Fields in oneofs are dup'd below. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001355 if (upb_fielddef_containingoneof(f)) continue;
Chris Fallin91473dc2014-12-12 15:58:26 -08001356 if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
1357 upb_msgdef_unref(newm, owner);
1358 return NULL;
1359 }
1360 }
Chris Fallinfcd88892015-01-13 18:14:39 -08001361 for(upb_msg_oneof_begin(&o, m);
1362 !upb_msg_oneof_done(&o);
1363 upb_msg_oneof_next(&o)) {
1364 upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
1365 if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
1366 upb_msgdef_unref(newm, owner);
1367 return NULL;
1368 }
1369 }
Chris Fallin91473dc2014-12-12 15:58:26 -08001370 return newm;
1371}
1372
Chris Fallin91473dc2014-12-12 15:58:26 -08001373bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001374 upb_def *d = upb_msgdef_upcast_mutable(m);
Chris Fallin91473dc2014-12-12 15:58:26 -08001375 return upb_def_freeze(&d, 1, status);
1376}
1377
1378const char *upb_msgdef_fullname(const upb_msgdef *m) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001379 return upb_def_fullname(upb_msgdef_upcast(m));
Chris Fallin91473dc2014-12-12 15:58:26 -08001380}
1381
1382bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
1383 upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001384 return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
Chris Fallin91473dc2014-12-12 15:58:26 -08001385}
1386
Josh Habermane8ed0212015-06-08 17:56:03 -07001387/* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
1388 * on status |s| and return false if not. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001389static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
1390 upb_status *s) {
1391 if (upb_fielddef_containingtype(f) != NULL) {
1392 upb_status_seterrmsg(s, "fielddef already belongs to a message");
1393 return false;
1394 } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1395 upb_status_seterrmsg(s, "field name or number were not set");
1396 return false;
1397 } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) ||
1398 upb_msgdef_itof(m, upb_fielddef_number(f))) {
1399 upb_status_seterrmsg(s, "duplicate field name or number for field");
1400 return false;
1401 }
1402 return true;
1403}
1404
1405static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
1406 release_containingtype(f);
1407 f->msg.def = m;
1408 f->msg_is_symbolic = false;
1409 upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
1410 upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1411 upb_ref2(f, m);
1412 upb_ref2(m, f);
1413 if (ref_donor) upb_fielddef_unref(f, ref_donor);
1414}
1415
Chris Fallin91473dc2014-12-12 15:58:26 -08001416bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
1417 upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001418 /* TODO: extensions need to have a separate namespace, because proto2 allows a
1419 * top-level extension (ie. one not in any package) to have the same name as a
1420 * field from the message.
1421 *
1422 * This also implies that there needs to be a separate lookup-by-name method
1423 * for extensions. It seems desirable for iteration to return both extensions
1424 * and non-extensions though.
1425 *
1426 * We also need to validate that the field number is in an extension range iff
1427 * it is an extension.
1428 *
1429 * This method is idempotent. Check if |f| is already part of this msgdef and
1430 * return immediately if so. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001431 if (upb_fielddef_containingtype(f) == m) {
1432 return true;
1433 }
1434
Josh Habermane8ed0212015-06-08 17:56:03 -07001435 /* Check constraints for all fields before performing any action. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001436 if (!check_field_add(m, f, s)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08001437 return false;
Chris Fallinfcd88892015-01-13 18:14:39 -08001438 } else if (upb_fielddef_containingoneof(f) != NULL) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001439 /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001440 upb_status_seterrmsg(s, "fielddef is part of a oneof");
Chris Fallin91473dc2014-12-12 15:58:26 -08001441 return false;
1442 }
1443
Josh Habermane8ed0212015-06-08 17:56:03 -07001444 /* Constraint checks ok, perform the action. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001445 add_field(m, f, ref_donor);
1446 return true;
1447}
1448
1449bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
1450 upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001451 upb_oneof_iter it;
1452
1453 /* Check various conditions that would prevent this oneof from being added. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001454 if (upb_oneofdef_containingtype(o)) {
1455 upb_status_seterrmsg(s, "oneofdef already belongs to a message");
1456 return false;
1457 } else if (upb_oneofdef_name(o) == NULL) {
1458 upb_status_seterrmsg(s, "oneofdef name was not set");
1459 return false;
1460 } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) {
1461 upb_status_seterrmsg(s, "duplicate oneof name");
1462 return false;
1463 }
1464
Josh Habermane8ed0212015-06-08 17:56:03 -07001465 /* Check that all of the oneof's fields do not conflict with names or numbers
1466 * of fields already in the message. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001467 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1468 const upb_fielddef *f = upb_oneof_iter_field(&it);
1469 if (!check_field_add(m, f, s)) {
1470 return false;
1471 }
1472 }
1473
Josh Habermane8ed0212015-06-08 17:56:03 -07001474 /* Everything checks out -- commit now. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001475
Josh Habermane8ed0212015-06-08 17:56:03 -07001476 /* Add oneof itself first. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001477 o->parent = m;
1478 upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
1479 upb_ref2(o, m);
1480 upb_ref2(m, o);
1481
Josh Habermane8ed0212015-06-08 17:56:03 -07001482 /* Add each field of the oneof directly to the msgdef. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001483 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1484 upb_fielddef *f = upb_oneof_iter_field(&it);
1485 add_field(m, f, NULL);
1486 }
1487
1488 if (ref_donor) upb_oneofdef_unref(o, ref_donor);
Chris Fallin91473dc2014-12-12 15:58:26 -08001489
1490 return true;
1491}
1492
Josh Haberman78da6662016-01-13 19:05:43 -08001493void upb_msgdef_setprimitiveshavepresence(upb_msgdef *m, bool have_presence) {
1494 assert(!upb_msgdef_isfrozen(m));
1495 m->primitives_have_presence = have_presence;
1496}
1497
Chris Fallin91473dc2014-12-12 15:58:26 -08001498const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
1499 upb_value val;
1500 return upb_inttable_lookup32(&m->itof, i, &val) ?
1501 upb_value_getptr(val) : NULL;
1502}
1503
1504const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
1505 size_t len) {
1506 upb_value val;
1507 return upb_strtable_lookup2(&m->ntof, name, len, &val) ?
1508 upb_value_getptr(val) : NULL;
1509}
1510
Chris Fallinfcd88892015-01-13 18:14:39 -08001511const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
1512 size_t len) {
1513 upb_value val;
1514 return upb_strtable_lookup2(&m->ntoo, name, len, &val) ?
1515 upb_value_getptr(val) : NULL;
1516}
1517
Chris Fallin91473dc2014-12-12 15:58:26 -08001518int upb_msgdef_numfields(const upb_msgdef *m) {
1519 return upb_strtable_count(&m->ntof);
1520}
1521
Chris Fallinfcd88892015-01-13 18:14:39 -08001522int upb_msgdef_numoneofs(const upb_msgdef *m) {
1523 return upb_strtable_count(&m->ntoo);
1524}
1525
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08001526void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1527 assert(!upb_msgdef_isfrozen(m));
1528 m->map_entry = map_entry;
1529}
1530
1531bool upb_msgdef_mapentry(const upb_msgdef *m) {
1532 return m->map_entry;
1533}
1534
Chris Fallinfcd88892015-01-13 18:14:39 -08001535void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
Chris Fallin91473dc2014-12-12 15:58:26 -08001536 upb_inttable_begin(iter, &m->itof);
1537}
1538
Chris Fallinfcd88892015-01-13 18:14:39 -08001539void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
Chris Fallin91473dc2014-12-12 15:58:26 -08001540
Chris Fallinfcd88892015-01-13 18:14:39 -08001541bool upb_msg_field_done(const upb_msg_field_iter *iter) {
1542 return upb_inttable_done(iter);
1543}
Chris Fallin91473dc2014-12-12 15:58:26 -08001544
Chris Fallinfcd88892015-01-13 18:14:39 -08001545upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
Chris Fallin91473dc2014-12-12 15:58:26 -08001546 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1547}
1548
Chris Fallinfcd88892015-01-13 18:14:39 -08001549void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
1550 upb_inttable_iter_setdone(iter);
1551}
1552
1553void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
1554 upb_strtable_begin(iter, &m->ntoo);
1555}
1556
1557void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); }
1558
1559bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
1560 return upb_strtable_done(iter);
1561}
1562
1563upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
1564 return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
1565}
1566
1567void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
1568 upb_strtable_iter_setdone(iter);
1569}
1570
1571/* upb_oneofdef ***************************************************************/
1572
1573static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
1574 void *closure) {
1575 const upb_oneofdef *o = (const upb_oneofdef*)r;
1576 upb_oneof_iter i;
1577 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1578 const upb_fielddef *f = upb_oneof_iter_field(&i);
Josh Habermane8ed0212015-06-08 17:56:03 -07001579 visit(r, upb_fielddef_upcast2(f), closure);
Chris Fallinfcd88892015-01-13 18:14:39 -08001580 }
1581 if (o->parent) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001582 visit(r, upb_msgdef_upcast2(o->parent), closure);
Chris Fallinfcd88892015-01-13 18:14:39 -08001583 }
1584}
1585
1586static void freeoneof(upb_refcounted *r) {
1587 upb_oneofdef *o = (upb_oneofdef*)r;
1588 upb_strtable_uninit(&o->ntof);
1589 upb_inttable_uninit(&o->itof);
Josh Habermane8ed0212015-06-08 17:56:03 -07001590 upb_def_uninit(upb_oneofdef_upcast_mutable(o));
Chris Fallinfcd88892015-01-13 18:14:39 -08001591 free(o);
1592}
1593
1594upb_oneofdef *upb_oneofdef_new(const void *owner) {
1595 static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof};
1596 upb_oneofdef *o = malloc(sizeof(*o));
1597 o->parent = NULL;
1598 if (!o) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -07001599 if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl,
1600 owner))
1601 goto err2;
Chris Fallinfcd88892015-01-13 18:14:39 -08001602 if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
1603 if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
1604 return o;
1605
1606err1:
1607 upb_inttable_uninit(&o->itof);
1608err2:
1609 free(o);
1610 return NULL;
1611}
1612
1613upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001614 bool ok;
1615 upb_oneof_iter i;
Chris Fallinfcd88892015-01-13 18:14:39 -08001616 upb_oneofdef *newo = upb_oneofdef_new(owner);
1617 if (!newo) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -07001618 ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo),
1619 upb_def_fullname(upb_oneofdef_upcast(o)), NULL);
Chris Fallinfcd88892015-01-13 18:14:39 -08001620 UPB_ASSERT_VAR(ok, ok);
Chris Fallinfcd88892015-01-13 18:14:39 -08001621 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1622 upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
1623 if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
1624 upb_oneofdef_unref(newo, owner);
1625 return NULL;
1626 }
1627 }
1628 return newo;
1629}
1630
Chris Fallinfcd88892015-01-13 18:14:39 -08001631const char *upb_oneofdef_name(const upb_oneofdef *o) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001632 return upb_def_fullname(upb_oneofdef_upcast(o));
Chris Fallinfcd88892015-01-13 18:14:39 -08001633}
1634
1635bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
1636 upb_status *s) {
1637 if (upb_oneofdef_containingtype(o)) {
1638 upb_status_seterrmsg(s, "oneof already added to a message");
1639 return false;
1640 }
Josh Habermane8ed0212015-06-08 17:56:03 -07001641 return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s);
Chris Fallinfcd88892015-01-13 18:14:39 -08001642}
1643
1644const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
1645 return o->parent;
1646}
1647
1648int upb_oneofdef_numfields(const upb_oneofdef *o) {
1649 return upb_strtable_count(&o->ntof);
1650}
1651
1652bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
1653 const void *ref_donor,
1654 upb_status *s) {
1655 assert(!upb_oneofdef_isfrozen(o));
1656 assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
1657
Josh Habermane8ed0212015-06-08 17:56:03 -07001658 /* This method is idempotent. Check if |f| is already part of this oneofdef
1659 * and return immediately if so. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001660 if (upb_fielddef_containingoneof(f) == o) {
1661 return true;
1662 }
1663
Josh Habermane8ed0212015-06-08 17:56:03 -07001664 /* The field must have an OPTIONAL label. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001665 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1666 upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
1667 return false;
1668 }
1669
Josh Habermane8ed0212015-06-08 17:56:03 -07001670 /* Check that no field with this name or number exists already in the oneof.
1671 * Also check that the field is not already part of a oneof. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001672 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1673 upb_status_seterrmsg(s, "field name or number were not set");
1674 return false;
1675 } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
1676 upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
1677 upb_status_seterrmsg(s, "duplicate field name or number");
1678 return false;
1679 } else if (upb_fielddef_containingoneof(f) != NULL) {
1680 upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
1681 return false;
1682 }
1683
Josh Habermane8ed0212015-06-08 17:56:03 -07001684 /* We allow adding a field to the oneof either if the field is not part of a
1685 * msgdef, or if it is and we are also part of the same msgdef. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001686 if (o->parent == NULL) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001687 /* If we're not in a msgdef, the field cannot be either. Otherwise we would
1688 * need to magically add this oneof to a msgdef to remain consistent, which
1689 * is surprising behavior. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001690 if (upb_fielddef_containingtype(f) != NULL) {
1691 upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
1692 "oneof does not");
1693 return false;
1694 }
1695 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07001696 /* If we're in a msgdef, the user can add fields that either aren't in any
1697 * msgdef (in which case they're added to our msgdef) or already a part of
1698 * our msgdef. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001699 if (upb_fielddef_containingtype(f) != NULL &&
1700 upb_fielddef_containingtype(f) != o->parent) {
1701 upb_status_seterrmsg(s, "fielddef belongs to a different message "
1702 "than oneof");
1703 return false;
1704 }
1705 }
1706
Josh Habermane8ed0212015-06-08 17:56:03 -07001707 /* Commit phase. First add the field to our parent msgdef, if any, because
1708 * that may fail; then add the field to our own tables. */
Chris Fallinfcd88892015-01-13 18:14:39 -08001709
1710 if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
1711 if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
1712 return false;
1713 }
1714 }
1715
1716 release_containingtype(f);
1717 f->oneof = o;
1718 upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
1719 upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1720 upb_ref2(f, o);
1721 upb_ref2(o, f);
1722 if (ref_donor) upb_fielddef_unref(f, ref_donor);
1723
1724 return true;
1725}
1726
1727const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
1728 const char *name, size_t length) {
1729 upb_value val;
1730 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
1731 upb_value_getptr(val) : NULL;
1732}
1733
1734const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
1735 upb_value val;
1736 return upb_inttable_lookup32(&o->itof, num, &val) ?
1737 upb_value_getptr(val) : NULL;
1738}
1739
1740void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
1741 upb_inttable_begin(iter, &o->itof);
1742}
1743
1744void upb_oneof_next(upb_oneof_iter *iter) {
1745 upb_inttable_next(iter);
1746}
1747
1748bool upb_oneof_done(upb_oneof_iter *iter) {
1749 return upb_inttable_done(iter);
1750}
1751
1752upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1753 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1754}
1755
1756void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
Chris Fallin91473dc2014-12-12 15:58:26 -08001757 upb_inttable_iter_setdone(iter);
1758}
Chris Fallind3262772015-05-14 18:24:26 -07001759
1760
1761#include <stdlib.h>
1762#include <stdio.h>
1763#include <string.h>
1764
1765typedef struct cleanup_ent {
1766 upb_cleanup_func *cleanup;
1767 void *ud;
1768 struct cleanup_ent *next;
1769} cleanup_ent;
1770
1771static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
1772
1773/* Default allocator **********************************************************/
1774
Josh Habermane8ed0212015-06-08 17:56:03 -07001775/* Just use realloc, keeping all allocated blocks in a linked list to destroy at
1776 * the end. */
Chris Fallind3262772015-05-14 18:24:26 -07001777
1778typedef struct mem_block {
Josh Habermane8ed0212015-06-08 17:56:03 -07001779 /* List is doubly-linked, because in cases where realloc() moves an existing
1780 * block, we need to be able to remove the old pointer from the list
1781 * efficiently. */
Chris Fallind3262772015-05-14 18:24:26 -07001782 struct mem_block *prev, *next;
1783#ifndef NDEBUG
Josh Habermane8ed0212015-06-08 17:56:03 -07001784 size_t size; /* Doesn't include mem_block structure. */
Chris Fallind3262772015-05-14 18:24:26 -07001785#endif
Chris Fallind3262772015-05-14 18:24:26 -07001786} mem_block;
1787
1788typedef struct {
1789 mem_block *head;
1790} default_alloc_ud;
1791
1792static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
Chris Fallind3262772015-05-14 18:24:26 -07001793 default_alloc_ud *ud = _ud;
Josh Habermane8ed0212015-06-08 17:56:03 -07001794 mem_block *from, *block;
1795 void *ret;
1796 UPB_UNUSED(oldsize);
Chris Fallind3262772015-05-14 18:24:26 -07001797
Josh Habermane8ed0212015-06-08 17:56:03 -07001798 from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
Chris Fallind3262772015-05-14 18:24:26 -07001799
1800#ifndef NDEBUG
1801 if (from) {
1802 assert(oldsize <= from->size);
1803 }
1804#endif
1805
Josh Habermane8ed0212015-06-08 17:56:03 -07001806 /* TODO(haberman): we probably need to provide even better alignment here,
1807 * like 16-byte alignment of the returned data pointer. */
1808 block = realloc(from, size + sizeof(mem_block));
Chris Fallind3262772015-05-14 18:24:26 -07001809 if (!block) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -07001810 ret = (char*)block + sizeof(*block);
Chris Fallind3262772015-05-14 18:24:26 -07001811
1812#ifndef NDEBUG
1813 block->size = size;
1814#endif
1815
1816 if (from) {
1817 if (block != from) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001818 /* The block was moved, so pointers in next and prev blocks must be
1819 * updated to its new location. */
Chris Fallind3262772015-05-14 18:24:26 -07001820 if (block->next) block->next->prev = block;
1821 if (block->prev) block->prev->next = block;
Josh Habermanfb8ed702015-06-22 17:23:55 -07001822 if (ud->head == from) ud->head = block;
Chris Fallind3262772015-05-14 18:24:26 -07001823 }
1824 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07001825 /* Insert at head of linked list. */
Chris Fallind3262772015-05-14 18:24:26 -07001826 block->prev = NULL;
1827 block->next = ud->head;
1828 if (block->next) block->next->prev = block;
1829 ud->head = block;
1830 }
1831
Josh Habermane8ed0212015-06-08 17:56:03 -07001832 return ret;
Chris Fallind3262772015-05-14 18:24:26 -07001833}
1834
1835static void default_alloc_cleanup(void *_ud) {
1836 default_alloc_ud *ud = _ud;
1837 mem_block *block = ud->head;
1838
1839 while (block) {
1840 void *to_free = block;
1841 block = block->next;
1842 free(to_free);
1843 }
1844}
1845
1846
1847/* Standard error functions ***************************************************/
1848
1849static bool default_err(void *ud, const upb_status *status) {
1850 UPB_UNUSED(ud);
Josh Habermanfb8ed702015-06-22 17:23:55 -07001851 UPB_UNUSED(status);
Chris Fallind3262772015-05-14 18:24:26 -07001852 return false;
1853}
1854
1855static bool write_err_to(void *ud, const upb_status *status) {
1856 upb_status *copy_to = ud;
1857 upb_status_copy(copy_to, status);
1858 return false;
1859}
1860
1861
1862/* upb_env ********************************************************************/
1863
1864void upb_env_init(upb_env *e) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001865 default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
Chris Fallind3262772015-05-14 18:24:26 -07001866 e->ok_ = true;
1867 e->bytes_allocated = 0;
1868 e->cleanup_head = NULL;
1869
Chris Fallind3262772015-05-14 18:24:26 -07001870 ud->head = NULL;
1871
Josh Habermane8ed0212015-06-08 17:56:03 -07001872 /* Set default functions. */
Chris Fallind3262772015-05-14 18:24:26 -07001873 upb_env_setallocfunc(e, default_alloc, ud);
1874 upb_env_seterrorfunc(e, default_err, NULL);
1875}
1876
1877void upb_env_uninit(upb_env *e) {
1878 cleanup_ent *ent = e->cleanup_head;
1879
1880 while (ent) {
1881 ent->cleanup(ent->ud);
1882 ent = ent->next;
1883 }
1884
Josh Habermane8ed0212015-06-08 17:56:03 -07001885 /* Must do this after running cleanup functions, because this will delete
1886 the memory we store our cleanup entries in! */
Chris Fallind3262772015-05-14 18:24:26 -07001887 if (e->alloc == default_alloc) {
1888 default_alloc_cleanup(e->alloc_ud);
1889 }
1890}
1891
1892UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
1893 void *ud) {
1894 e->alloc = alloc;
1895 e->alloc_ud = ud;
1896}
1897
1898UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
1899 void *ud) {
1900 e->err = func;
1901 e->err_ud = ud;
1902}
1903
1904void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
1905 e->err = write_err_to;
1906 e->err_ud = status;
1907}
1908
1909bool upb_env_ok(const upb_env *e) {
1910 return e->ok_;
1911}
1912
1913bool upb_env_reporterror(upb_env *e, const upb_status *status) {
1914 e->ok_ = false;
1915 return e->err(e->err_ud, status);
1916}
1917
1918bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
1919 cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
1920 if (!ent) return false;
1921
1922 ent->cleanup = func;
1923 ent->ud = ud;
1924 ent->next = e->cleanup_head;
1925 e->cleanup_head = ent;
1926
1927 return true;
1928}
1929
1930void *upb_env_malloc(upb_env *e, size_t size) {
1931 e->bytes_allocated += size;
1932 if (e->alloc == seeded_alloc) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001933 /* This is equivalent to the next branch, but allows inlining for a
1934 * measurable perf benefit. */
Chris Fallind3262772015-05-14 18:24:26 -07001935 return seeded_alloc(e->alloc_ud, NULL, 0, size);
1936 } else {
1937 return e->alloc(e->alloc_ud, NULL, 0, size);
1938 }
1939}
1940
1941void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001942 char *ret;
Chris Fallind3262772015-05-14 18:24:26 -07001943 assert(oldsize <= size);
Josh Habermane8ed0212015-06-08 17:56:03 -07001944 ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
Chris Fallind3262772015-05-14 18:24:26 -07001945
1946#ifndef NDEBUG
Josh Habermane8ed0212015-06-08 17:56:03 -07001947 /* Overwrite non-preserved memory to ensure callers are passing the oldsize
1948 * that they truly require. */
Chris Fallind3262772015-05-14 18:24:26 -07001949 memset(ret + oldsize, 0xff, size - oldsize);
1950#endif
1951
1952 return ret;
1953}
1954
1955size_t upb_env_bytesallocated(const upb_env *e) {
1956 return e->bytes_allocated;
1957}
1958
1959
1960/* upb_seededalloc ************************************************************/
1961
Josh Habermane8ed0212015-06-08 17:56:03 -07001962/* Be conservative and choose 16 in case anyone is using SSE. */
Chris Fallind3262772015-05-14 18:24:26 -07001963static const size_t maxalign = 16;
1964
1965static size_t align_up(size_t size) {
1966 return ((size + maxalign - 1) / maxalign) * maxalign;
1967}
1968
1969UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
1970 size_t size) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001971 upb_seededalloc *a = ud;
Chris Fallind3262772015-05-14 18:24:26 -07001972
Chris Fallind3262772015-05-14 18:24:26 -07001973 size = align_up(size);
1974
1975 assert(a->mem_limit >= a->mem_ptr);
1976
1977 if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07001978 /* Fast path: we can satisfy from the initial allocation. */
Chris Fallind3262772015-05-14 18:24:26 -07001979 void *ret = a->mem_ptr;
1980 a->mem_ptr += size;
1981 return ret;
1982 } else {
Chris Fallind3262772015-05-14 18:24:26 -07001983 char *chptr = ptr;
Josh Habermane8ed0212015-06-08 17:56:03 -07001984 /* Slow path: fallback to other allocator. */
1985 a->need_cleanup = true;
1986 /* Is `ptr` part of the user-provided initial block? Don't pass it to the
1987 * default allocator if so; otherwise, it may try to realloc() the block. */
Chris Fallind3262772015-05-14 18:24:26 -07001988 if (chptr >= a->mem_base && chptr < a->mem_limit) {
Josh Habermanfb8ed702015-06-22 17:23:55 -07001989 void *ret;
1990 assert(chptr + oldsize <= a->mem_limit);
1991 ret = a->alloc(a->alloc_ud, NULL, 0, size);
1992 if (ret) memcpy(ret, ptr, oldsize);
1993 return ret;
Chris Fallind3262772015-05-14 18:24:26 -07001994 } else {
1995 return a->alloc(a->alloc_ud, ptr, oldsize, size);
1996 }
1997 }
1998}
1999
2000void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002001 default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
Chris Fallind3262772015-05-14 18:24:26 -07002002 a->mem_base = mem;
2003 a->mem_ptr = mem;
2004 a->mem_limit = (char*)mem + len;
2005 a->need_cleanup = false;
2006 a->returned_allocfunc = false;
2007
Chris Fallind3262772015-05-14 18:24:26 -07002008 ud->head = NULL;
2009
2010 upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
2011}
2012
2013void upb_seededalloc_uninit(upb_seededalloc *a) {
2014 if (a->alloc == default_alloc && a->need_cleanup) {
2015 default_alloc_cleanup(a->alloc_ud);
2016 }
2017}
2018
2019UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
2020 upb_alloc_func *alloc,
2021 void *ud) {
2022 assert(!a->returned_allocfunc);
2023 a->alloc = alloc;
2024 a->alloc_ud = ud;
2025}
2026
2027upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
2028 a->returned_allocfunc = true;
2029 return seeded_alloc;
2030}
2031/*
Josh Haberman181c7f22015-07-15 11:05:10 -07002032** TODO(haberman): it's unclear whether a lot of the consistency checks should
2033** assert() or return false.
2034*/
Chris Fallin91473dc2014-12-12 15:58:26 -08002035
2036
2037#include <stdlib.h>
2038#include <string.h>
2039
2040
Josh Habermane8ed0212015-06-08 17:56:03 -07002041
2042/* Defined for the sole purpose of having a unique pointer value for
2043 * UPB_NO_CLOSURE. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002044char _upb_noclosure;
2045
2046static void freehandlers(upb_refcounted *r) {
2047 upb_handlers *h = (upb_handlers*)r;
2048
2049 upb_inttable_iter i;
2050 upb_inttable_begin(&i, &h->cleanup_);
2051 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
2052 void *val = (void*)upb_inttable_iter_key(&i);
2053 upb_value func_val = upb_inttable_iter_value(&i);
2054 upb_handlerfree *func = upb_value_getfptr(func_val);
2055 func(val);
2056 }
2057
2058 upb_inttable_uninit(&h->cleanup_);
2059 upb_msgdef_unref(h->msg, h);
2060 free(h->sub);
2061 free(h);
2062}
2063
2064static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
2065 void *closure) {
2066 const upb_handlers *h = (const upb_handlers*)r;
Chris Fallinfcd88892015-01-13 18:14:39 -08002067 upb_msg_field_iter i;
2068 for(upb_msg_field_begin(&i, h->msg);
2069 !upb_msg_field_done(&i);
2070 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08002071 upb_fielddef *f = upb_msg_iter_field(&i);
Josh Habermane8ed0212015-06-08 17:56:03 -07002072 const upb_handlers *sub;
Chris Fallin91473dc2014-12-12 15:58:26 -08002073 if (!upb_fielddef_issubmsg(f)) continue;
Josh Habermane8ed0212015-06-08 17:56:03 -07002074 sub = upb_handlers_getsubhandlers(h, f);
2075 if (sub) visit(r, upb_handlers_upcast(sub), closure);
Chris Fallin91473dc2014-12-12 15:58:26 -08002076 }
2077}
2078
2079static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
2080
2081typedef struct {
Josh Habermane8ed0212015-06-08 17:56:03 -07002082 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002083 upb_handlers_callback *callback;
2084 const void *closure;
2085} dfs_state;
2086
Josh Habermane8ed0212015-06-08 17:56:03 -07002087/* TODO(haberman): discard upb_handlers* objects that do not actually have any
2088 * handlers set and cannot reach any upb_handlers* object that does. This is
2089 * slightly tricky to do correctly. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002090static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
2091 dfs_state *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002092 upb_msg_field_iter i;
Chris Fallin91473dc2014-12-12 15:58:26 -08002093 upb_handlers *h = upb_handlers_new(m, owner);
2094 if (!h) return NULL;
2095 if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
2096
2097 s->callback(s->closure, h);
2098
Josh Habermane8ed0212015-06-08 17:56:03 -07002099 /* For each submessage field, get or create a handlers object and set it as
2100 * the subhandlers. */
Chris Fallinfcd88892015-01-13 18:14:39 -08002101 for(upb_msg_field_begin(&i, m);
2102 !upb_msg_field_done(&i);
2103 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08002104 upb_fielddef *f = upb_msg_iter_field(&i);
Josh Habermane8ed0212015-06-08 17:56:03 -07002105 const upb_msgdef *subdef;
2106 upb_value subm_ent;
2107
Chris Fallin91473dc2014-12-12 15:58:26 -08002108 if (!upb_fielddef_issubmsg(f)) continue;
2109
Josh Habermane8ed0212015-06-08 17:56:03 -07002110 subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
Chris Fallin91473dc2014-12-12 15:58:26 -08002111 if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
2112 upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
2113 } else {
2114 upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
2115 if (!sub_mh) goto oom;
2116 upb_handlers_setsubhandlers(h, f, sub_mh);
2117 upb_handlers_unref(sub_mh, &sub_mh);
2118 }
2119 }
2120 return h;
2121
2122oom:
2123 upb_handlers_unref(h, owner);
2124 return NULL;
2125}
2126
Josh Habermane8ed0212015-06-08 17:56:03 -07002127/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
2128 * subhandlers for this submessage field. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002129#define SUBH(h, selector) (h->sub[selector])
2130
Josh Habermane8ed0212015-06-08 17:56:03 -07002131/* The selector for a submessage field is the field index. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002132#define SUBH_F(h, f) SUBH(h, f->index_)
2133
2134static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
2135 upb_handlertype_t type) {
2136 upb_selector_t sel;
2137 assert(!upb_handlers_isfrozen(h));
2138 if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
2139 upb_status_seterrf(
2140 &h->status_, "type mismatch: field %s does not belong to message %s",
2141 upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
2142 return -1;
2143 }
2144 if (!upb_handlers_getselector(f, type, &sel)) {
2145 upb_status_seterrf(
2146 &h->status_,
2147 "type mismatch: cannot register handler type %d for field %s",
2148 type, upb_fielddef_name(f));
2149 return -1;
2150 }
2151 return sel;
2152}
2153
2154static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
2155 upb_handlertype_t type) {
2156 int32_t sel = trygetsel(h, f, type);
2157 assert(sel >= 0);
2158 return sel;
2159}
2160
2161static const void **returntype(upb_handlers *h, const upb_fielddef *f,
2162 upb_handlertype_t type) {
2163 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
2164}
2165
2166static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
2167 upb_handlertype_t type, upb_func *func,
2168 upb_handlerattr *attr) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002169 upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
2170 const void *closure_type;
2171 const void **context_closure_type;
2172
Chris Fallin91473dc2014-12-12 15:58:26 -08002173 assert(!upb_handlers_isfrozen(h));
2174
2175 if (sel < 0) {
2176 upb_status_seterrmsg(&h->status_,
2177 "incorrect handler type for this field.");
2178 return false;
2179 }
2180
2181 if (h->table[sel].func) {
2182 upb_status_seterrmsg(&h->status_,
2183 "cannot change handler once it has been set.");
2184 return false;
2185 }
2186
Chris Fallin91473dc2014-12-12 15:58:26 -08002187 if (attr) {
2188 set_attr = *attr;
2189 }
2190
Josh Habermane8ed0212015-06-08 17:56:03 -07002191 /* Check that the given closure type matches the closure type that has been
2192 * established for this context (if any). */
2193 closure_type = upb_handlerattr_closuretype(&set_attr);
Chris Fallin91473dc2014-12-12 15:58:26 -08002194
2195 if (type == UPB_HANDLER_STRING) {
2196 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
2197 } else if (f && upb_fielddef_isseq(f) &&
2198 type != UPB_HANDLER_STARTSEQ &&
2199 type != UPB_HANDLER_ENDSEQ) {
2200 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
2201 } else {
2202 context_closure_type = &h->top_closure_type;
2203 }
2204
2205 if (closure_type && *context_closure_type &&
2206 closure_type != *context_closure_type) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002207 /* TODO(haberman): better message for debugging. */
Chris Fallind3262772015-05-14 18:24:26 -07002208 if (f) {
2209 upb_status_seterrf(&h->status_,
2210 "closure type does not match for field %s",
2211 upb_fielddef_name(f));
2212 } else {
2213 upb_status_seterrmsg(
2214 &h->status_, "closure type does not match for message-level handler");
2215 }
Chris Fallin91473dc2014-12-12 15:58:26 -08002216 return false;
2217 }
2218
2219 if (closure_type)
2220 *context_closure_type = closure_type;
2221
Josh Habermane8ed0212015-06-08 17:56:03 -07002222 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
2223 * matches any pre-existing expectations about what type is expected. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002224 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
2225 const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
2226 const void *table_return_type =
2227 upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2228 if (return_type && table_return_type && return_type != table_return_type) {
2229 upb_status_seterrmsg(&h->status_, "closure return type does not match");
2230 return false;
2231 }
2232
2233 if (table_return_type && !return_type)
2234 upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
2235 }
2236
2237 h->table[sel].func = (upb_func*)func;
2238 h->table[sel].attr = set_attr;
2239 return true;
2240}
2241
Josh Habermane8ed0212015-06-08 17:56:03 -07002242/* Returns the effective closure type for this handler (which will propagate
2243 * from outer frames if this frame has no START* handler). Not implemented for
2244 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is
2245 * the effective closure type is unspecified (either no handler was registered
2246 * to specify it or the handler that was registered did not specify the closure
2247 * type). */
Chris Fallin91473dc2014-12-12 15:58:26 -08002248const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
2249 upb_handlertype_t type) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002250 const void *ret;
Chris Fallin91473dc2014-12-12 15:58:26 -08002251 upb_selector_t sel;
Josh Habermane8ed0212015-06-08 17:56:03 -07002252
2253 assert(type != UPB_HANDLER_STRING);
2254 ret = h->top_closure_type;
2255
Chris Fallin91473dc2014-12-12 15:58:26 -08002256 if (upb_fielddef_isseq(f) &&
2257 type != UPB_HANDLER_STARTSEQ &&
2258 type != UPB_HANDLER_ENDSEQ &&
2259 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
2260 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2261 }
2262
2263 if (type == UPB_HANDLER_STRING &&
2264 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
2265 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2266 }
2267
Josh Habermane8ed0212015-06-08 17:56:03 -07002268 /* The effective type of the submessage; not used yet.
2269 * if (type == SUBMESSAGE &&
2270 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
2271 * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2272 * } */
Chris Fallin91473dc2014-12-12 15:58:26 -08002273
2274 return ret;
2275}
2276
Josh Habermane8ed0212015-06-08 17:56:03 -07002277/* Checks whether the START* handler specified by f & type is missing even
2278 * though it is required to convert the established type of an outer frame
2279 * ("closure_type") into the established type of an inner frame (represented in
2280 * the return closure type of this handler's attr. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002281bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
2282 upb_status *status) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002283 const void *closure_type;
2284 const upb_handlerattr *attr;
2285 const void *return_closure_type;
2286
Chris Fallin91473dc2014-12-12 15:58:26 -08002287 upb_selector_t sel = handlers_getsel(h, f, type);
2288 if (h->table[sel].func) return true;
Josh Habermane8ed0212015-06-08 17:56:03 -07002289 closure_type = effective_closure_type(h, f, type);
2290 attr = &h->table[sel].attr;
2291 return_closure_type = upb_handlerattr_returnclosuretype(attr);
Chris Fallin91473dc2014-12-12 15:58:26 -08002292 if (closure_type && return_closure_type &&
2293 closure_type != return_closure_type) {
2294 upb_status_seterrf(status,
2295 "expected start handler to return sub type for field %f",
2296 upb_fielddef_name(f));
2297 return false;
2298 }
2299 return true;
2300}
2301
2302/* Public interface ***********************************************************/
2303
Chris Fallin91473dc2014-12-12 15:58:26 -08002304upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002305 int extra;
2306 upb_handlers *h;
2307
Chris Fallin91473dc2014-12-12 15:58:26 -08002308 assert(upb_msgdef_isfrozen(md));
2309
Josh Habermane8ed0212015-06-08 17:56:03 -07002310 extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
2311 h = calloc(sizeof(*h) + extra, 1);
Chris Fallin91473dc2014-12-12 15:58:26 -08002312 if (!h) return NULL;
2313
2314 h->msg = md;
2315 upb_msgdef_ref(h->msg, h);
2316 upb_status_clear(&h->status_);
2317 h->sub = calloc(md->submsg_field_count, sizeof(*h->sub));
2318 if (!h->sub) goto oom;
Josh Habermane8ed0212015-06-08 17:56:03 -07002319 if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
2320 goto oom;
Chris Fallin91473dc2014-12-12 15:58:26 -08002321 if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
2322
Josh Habermane8ed0212015-06-08 17:56:03 -07002323 /* calloc() above initialized all handlers to NULL. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002324 return h;
2325
2326oom:
Josh Habermane8ed0212015-06-08 17:56:03 -07002327 freehandlers(upb_handlers_upcast_mutable(h));
Chris Fallin91473dc2014-12-12 15:58:26 -08002328 return NULL;
2329}
2330
2331const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
2332 const void *owner,
2333 upb_handlers_callback *callback,
2334 const void *closure) {
2335 dfs_state state;
Josh Habermane8ed0212015-06-08 17:56:03 -07002336 upb_handlers *ret;
2337 bool ok;
2338 upb_refcounted *r;
2339
Chris Fallin91473dc2014-12-12 15:58:26 -08002340 state.callback = callback;
2341 state.closure = closure;
2342 if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
2343
Josh Habermane8ed0212015-06-08 17:56:03 -07002344 ret = newformsg(m, owner, &state);
Chris Fallin91473dc2014-12-12 15:58:26 -08002345
2346 upb_inttable_uninit(&state.tab);
2347 if (!ret) return NULL;
2348
Josh Habermane8ed0212015-06-08 17:56:03 -07002349 r = upb_handlers_upcast_mutable(ret);
2350 ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
Chris Fallin91473dc2014-12-12 15:58:26 -08002351 UPB_ASSERT_VAR(ok, ok);
2352
2353 return ret;
2354}
2355
2356const upb_status *upb_handlers_status(upb_handlers *h) {
2357 assert(!upb_handlers_isfrozen(h));
2358 return &h->status_;
2359}
2360
2361void upb_handlers_clearerr(upb_handlers *h) {
2362 assert(!upb_handlers_isfrozen(h));
2363 upb_status_clear(&h->status_);
2364}
2365
2366#define SETTER(name, handlerctype, handlertype) \
2367 bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
2368 handlerctype func, upb_handlerattr *attr) { \
2369 int32_t sel = trygetsel(h, f, handlertype); \
2370 return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
2371 }
2372
Josh Habermane8ed0212015-06-08 17:56:03 -07002373SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32)
2374SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64)
2375SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32)
2376SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64)
2377SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT)
2378SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE)
2379SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL)
2380SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR)
2381SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING)
2382SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR)
2383SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ)
2384SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG)
2385SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG)
2386SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ)
Chris Fallin91473dc2014-12-12 15:58:26 -08002387
2388#undef SETTER
2389
2390bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
2391 upb_handlerattr *attr) {
2392 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2393 (upb_func *)func, attr);
2394}
2395
2396bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
2397 upb_handlerattr *attr) {
2398 assert(!upb_handlers_isfrozen(h));
2399 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2400 (upb_func *)func, attr);
2401}
2402
2403bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
2404 const upb_handlers *sub) {
2405 assert(sub);
2406 assert(!upb_handlers_isfrozen(h));
2407 assert(upb_fielddef_issubmsg(f));
Josh Habermane8ed0212015-06-08 17:56:03 -07002408 if (SUBH_F(h, f)) return false; /* Can't reset. */
2409 if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08002410 return false;
2411 }
2412 SUBH_F(h, f) = sub;
2413 upb_ref2(sub, h);
2414 return true;
2415}
2416
2417const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
2418 const upb_fielddef *f) {
2419 assert(upb_fielddef_issubmsg(f));
2420 return SUBH_F(h, f);
2421}
2422
2423bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
2424 upb_handlerattr *attr) {
2425 if (!upb_handlers_gethandler(h, sel))
2426 return false;
2427 *attr = h->table[sel].attr;
2428 return true;
2429}
2430
2431const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
2432 upb_selector_t sel) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002433 /* STARTSUBMSG selector in sel is the field's selector base. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002434 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
2435}
2436
2437const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
2438
2439bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002440 bool ok;
Chris Fallin91473dc2014-12-12 15:58:26 -08002441 if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
2442 return false;
2443 }
Josh Habermane8ed0212015-06-08 17:56:03 -07002444 ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
Chris Fallin91473dc2014-12-12 15:58:26 -08002445 UPB_ASSERT_VAR(ok, ok);
2446 return true;
2447}
2448
2449
2450/* "Static" methods ***********************************************************/
2451
2452bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002453 /* TODO: verify we have a transitive closure. */
2454 int i;
2455 for (i = 0; i < n; i++) {
2456 upb_msg_field_iter j;
Chris Fallin91473dc2014-12-12 15:58:26 -08002457 upb_handlers *h = handlers[i];
2458
2459 if (!upb_ok(&h->status_)) {
2460 upb_status_seterrf(s, "handlers for message %s had error status: %s",
2461 upb_msgdef_fullname(upb_handlers_msgdef(h)),
2462 upb_status_errmsg(&h->status_));
2463 return false;
2464 }
2465
Josh Habermane8ed0212015-06-08 17:56:03 -07002466 /* Check that there are no closure mismatches due to missing Start* handlers
2467 * or subhandlers with different type-level types. */
Chris Fallinfcd88892015-01-13 18:14:39 -08002468 for(upb_msg_field_begin(&j, h->msg);
2469 !upb_msg_field_done(&j);
2470 upb_msg_field_next(&j)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08002471
2472 const upb_fielddef *f = upb_msg_iter_field(&j);
2473 if (upb_fielddef_isseq(f)) {
2474 if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
2475 return false;
2476 }
2477
2478 if (upb_fielddef_isstring(f)) {
2479 if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
2480 return false;
2481 }
2482
2483 if (upb_fielddef_issubmsg(f)) {
2484 bool hashandler = false;
2485 if (upb_handlers_gethandler(
2486 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
2487 upb_handlers_gethandler(
2488 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
2489 hashandler = true;
2490 }
2491
2492 if (upb_fielddef_isseq(f) &&
2493 (upb_handlers_gethandler(
2494 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
2495 upb_handlers_gethandler(
2496 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
2497 hashandler = true;
2498 }
2499
2500 if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002501 /* For now we add an empty subhandlers in this case. It makes the
2502 * decoder code generator simpler, because it only has to handle two
2503 * cases (submessage has handlers or not) as opposed to three
2504 * (submessage has handlers in enclosing message but no subhandlers).
2505 *
2506 * This makes parsing less efficient in the case that we want to
2507 * notice a submessage but skip its contents (like if we're testing
2508 * for submessage presence or counting the number of repeated
2509 * submessages). In this case we will end up parsing the submessage
2510 * field by field and throwing away the results for each, instead of
2511 * skipping the whole delimited thing at once. If this is an issue we
2512 * can revisit it, but do remember that this only arises when you have
2513 * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
2514 * submessage but no subhandlers. The uses cases for this are
2515 * limited. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002516 upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
2517 upb_handlers_setsubhandlers(h, f, sub);
2518 upb_handlers_unref(sub, &sub);
2519 }
2520
Josh Habermane8ed0212015-06-08 17:56:03 -07002521 /* TODO(haberman): check type of submessage.
2522 * This is slightly tricky; also consider whether we should check that
2523 * they match at setsubhandlers time. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002524 }
2525 }
2526 }
2527
2528 if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
2529 UPB_MAX_HANDLER_DEPTH)) {
2530 return false;
2531 }
2532
2533 return true;
2534}
2535
2536upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
2537 switch (upb_fielddef_type(f)) {
2538 case UPB_TYPE_INT32:
2539 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
2540 case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
2541 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
2542 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
2543 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
2544 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
2545 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
Josh Habermane8ed0212015-06-08 17:56:03 -07002546 default: assert(false); return -1; /* Invalid input. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002547 }
2548}
2549
2550bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2551 upb_selector_t *s) {
2552 switch (type) {
2553 case UPB_HANDLER_INT32:
2554 case UPB_HANDLER_INT64:
2555 case UPB_HANDLER_UINT32:
2556 case UPB_HANDLER_UINT64:
2557 case UPB_HANDLER_FLOAT:
2558 case UPB_HANDLER_DOUBLE:
2559 case UPB_HANDLER_BOOL:
2560 if (!upb_fielddef_isprimitive(f) ||
2561 upb_handlers_getprimitivehandlertype(f) != type)
2562 return false;
2563 *s = f->selector_base;
2564 break;
2565 case UPB_HANDLER_STRING:
2566 if (upb_fielddef_isstring(f)) {
2567 *s = f->selector_base;
2568 } else if (upb_fielddef_lazy(f)) {
2569 *s = f->selector_base + 3;
2570 } else {
2571 return false;
2572 }
2573 break;
2574 case UPB_HANDLER_STARTSTR:
2575 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2576 *s = f->selector_base + 1;
2577 } else {
2578 return false;
2579 }
2580 break;
2581 case UPB_HANDLER_ENDSTR:
2582 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2583 *s = f->selector_base + 2;
2584 } else {
2585 return false;
2586 }
2587 break;
2588 case UPB_HANDLER_STARTSEQ:
2589 if (!upb_fielddef_isseq(f)) return false;
2590 *s = f->selector_base - 2;
2591 break;
2592 case UPB_HANDLER_ENDSEQ:
2593 if (!upb_fielddef_isseq(f)) return false;
2594 *s = f->selector_base - 1;
2595 break;
2596 case UPB_HANDLER_STARTSUBMSG:
2597 if (!upb_fielddef_issubmsg(f)) return false;
Josh Habermane8ed0212015-06-08 17:56:03 -07002598 /* Selectors for STARTSUBMSG are at the beginning of the table so that the
2599 * selector can also be used as an index into the "sub" array of
2600 * subhandlers. The indexes for the two into these two tables are the
2601 * same, except that in the handler table the static selectors come first. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002602 *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
2603 break;
2604 case UPB_HANDLER_ENDSUBMSG:
2605 if (!upb_fielddef_issubmsg(f)) return false;
2606 *s = f->selector_base;
2607 break;
2608 }
Chris Fallind3262772015-05-14 18:24:26 -07002609 assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
Chris Fallin91473dc2014-12-12 15:58:26 -08002610 return true;
2611}
2612
2613uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
2614 return upb_fielddef_isseq(f) ? 2 : 0;
2615}
2616
2617uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
2618 uint32_t ret = 1;
Josh Habermane8ed0212015-06-08 17:56:03 -07002619 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
2620 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
Chris Fallin91473dc2014-12-12 15:58:26 -08002621 if (upb_fielddef_issubmsg(f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002622 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
Chris Fallin91473dc2014-12-12 15:58:26 -08002623 ret += 0;
2624 if (upb_fielddef_lazy(f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002625 /* STARTSTR/ENDSTR/STRING (for lazy) */
Chris Fallin91473dc2014-12-12 15:58:26 -08002626 ret += 3;
2627 }
2628 }
2629 return ret;
2630}
2631
2632
2633/* upb_handlerattr ************************************************************/
2634
2635void upb_handlerattr_init(upb_handlerattr *attr) {
2636 upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
2637 memcpy(attr, &from, sizeof(*attr));
2638}
2639
2640void upb_handlerattr_uninit(upb_handlerattr *attr) {
2641 UPB_UNUSED(attr);
2642}
2643
2644bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
2645 attr->handler_data_ = hd;
2646 return true;
2647}
2648
2649bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
2650 attr->closure_type_ = type;
2651 return true;
2652}
2653
2654const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
2655 return attr->closure_type_;
2656}
2657
2658bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
2659 const void *type) {
2660 attr->return_closure_type_ = type;
2661 return true;
2662}
2663
2664const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
2665 return attr->return_closure_type_;
2666}
2667
2668bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
2669 attr->alwaysok_ = alwaysok;
2670 return true;
2671}
2672
2673bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
2674 return attr->alwaysok_;
2675}
2676
2677/* upb_bufhandle **************************************************************/
2678
2679size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
2680 return h->objofs_;
2681}
2682
2683/* upb_byteshandler ***********************************************************/
2684
2685void upb_byteshandler_init(upb_byteshandler* h) {
2686 memset(h, 0, sizeof(*h));
2687}
2688
Josh Habermane8ed0212015-06-08 17:56:03 -07002689/* For when we support handlerfree callbacks. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002690void upb_byteshandler_uninit(upb_byteshandler* h) {
2691 UPB_UNUSED(h);
2692}
2693
2694bool upb_byteshandler_setstartstr(upb_byteshandler *h,
2695 upb_startstr_handlerfunc *func, void *d) {
2696 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
2697 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
2698 return true;
2699}
2700
2701bool upb_byteshandler_setstring(upb_byteshandler *h,
2702 upb_string_handlerfunc *func, void *d) {
2703 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
2704 h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
2705 return true;
2706}
2707
2708bool upb_byteshandler_setendstr(upb_byteshandler *h,
2709 upb_endfield_handlerfunc *func, void *d) {
2710 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
2711 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
2712 return true;
2713}
2714/*
Josh Haberman181c7f22015-07-15 11:05:10 -07002715** upb::RefCounted Implementation
2716**
2717** Our key invariants are:
2718** 1. reference cycles never span groups
2719** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
2720**
2721** The previous two are how we avoid leaking cycles. Other important
2722** invariants are:
2723** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
2724** this implies group(from) == group(to). (In practice, what we implement
2725** is even stronger; "from" and "to" will share a group if there has *ever*
2726** been a ref2(to, from), but all that is necessary for correctness is the
2727** weaker one).
2728** 4. mutable and immutable objects are never in the same group.
2729*/
Chris Fallin91473dc2014-12-12 15:58:26 -08002730
2731
2732#include <setjmp.h>
2733#include <stdlib.h>
2734
2735static void freeobj(upb_refcounted *o);
2736
2737const char untracked_val;
2738const void *UPB_UNTRACKED_REF = &untracked_val;
2739
2740/* arch-specific atomic primitives *******************************************/
2741
Josh Habermane8ed0212015-06-08 17:56:03 -07002742#ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
Chris Fallin91473dc2014-12-12 15:58:26 -08002743
2744static void atomic_inc(uint32_t *a) { (*a)++; }
2745static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
2746
Josh Habermane8ed0212015-06-08 17:56:03 -07002747#elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
Chris Fallin91473dc2014-12-12 15:58:26 -08002748
2749static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
2750static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
2751
Josh Habermane8ed0212015-06-08 17:56:03 -07002752#elif defined(WIN32) /*-------------------------------------------------------*/
Chris Fallin91473dc2014-12-12 15:58:26 -08002753
2754#include <Windows.h>
2755
2756static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
2757static bool atomic_dec(upb_atomic_t *a) {
2758 return InterlockedDecrement(&a->val) == 0;
2759}
2760
2761#else
2762#error Atomic primitives not defined for your platform/CPU. \
2763 Implement them or compile with UPB_THREAD_UNSAFE.
2764#endif
2765
Josh Habermane8ed0212015-06-08 17:56:03 -07002766/* All static objects point to this refcount.
2767 * It is special-cased in ref/unref below. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002768uint32_t static_refcount = -1;
2769
Josh Habermane8ed0212015-06-08 17:56:03 -07002770/* We can avoid atomic ops for statically-declared objects.
2771 * This is a minor optimization but nice since we can avoid degrading under
2772 * contention in this case. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002773
2774static void refgroup(uint32_t *group) {
2775 if (group != &static_refcount)
2776 atomic_inc(group);
2777}
2778
2779static bool unrefgroup(uint32_t *group) {
2780 if (group == &static_refcount) {
2781 return false;
2782 } else {
2783 return atomic_dec(group);
2784 }
2785}
2786
2787
2788/* Reference tracking (debug only) ********************************************/
2789
2790#ifdef UPB_DEBUG_REFS
2791
2792#ifdef UPB_THREAD_UNSAFE
2793
2794static void upb_lock() {}
2795static void upb_unlock() {}
2796
2797#else
2798
Josh Habermane8ed0212015-06-08 17:56:03 -07002799/* User must define functions that lock/unlock a global mutex and link this
2800 * file against them. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002801void upb_lock();
2802void upb_unlock();
2803
2804#endif
2805
Josh Habermane8ed0212015-06-08 17:56:03 -07002806/* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
2807 * code-paths that can normally never fail, like upb_refcounted_ref(). Since
2808 * we have no way to propagage out-of-memory errors back to the user, and since
2809 * these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002810#define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); }
2811
2812typedef struct {
Josh Habermane8ed0212015-06-08 17:56:03 -07002813 int count; /* How many refs there are (duplicates only allowed for ref2). */
Chris Fallin91473dc2014-12-12 15:58:26 -08002814 bool is_ref2;
2815} trackedref;
2816
2817static trackedref *trackedref_new(bool is_ref2) {
2818 trackedref *ret = malloc(sizeof(*ret));
2819 CHECK_OOM(ret);
2820 ret->count = 1;
2821 ret->is_ref2 = is_ref2;
2822 return ret;
2823}
2824
2825static void track(const upb_refcounted *r, const void *owner, bool ref2) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002826 upb_value v;
2827
Chris Fallin91473dc2014-12-12 15:58:26 -08002828 assert(owner);
2829 if (owner == UPB_UNTRACKED_REF) return;
2830
2831 upb_lock();
Chris Fallin91473dc2014-12-12 15:58:26 -08002832 if (upb_inttable_lookupptr(r->refs, owner, &v)) {
2833 trackedref *ref = upb_value_getptr(v);
Josh Habermane8ed0212015-06-08 17:56:03 -07002834 /* Since we allow multiple ref2's for the same to/from pair without
2835 * allocating separate memory for each one, we lose the fine-grained
2836 * tracking behavior we get with regular refs. Since ref2s only happen
2837 * inside upb, we'll accept this limitation until/unless there is a really
2838 * difficult upb-internal bug that can't be figured out without it. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002839 assert(ref2);
2840 assert(ref->is_ref2);
2841 ref->count++;
2842 } else {
2843 trackedref *ref = trackedref_new(ref2);
2844 bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref));
2845 CHECK_OOM(ok);
2846 if (ref2) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002847 /* We know this cast is safe when it is a ref2, because it's coming from
2848 * another refcounted object. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002849 const upb_refcounted *from = owner;
2850 assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
2851 ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL));
2852 CHECK_OOM(ok);
2853 }
2854 }
2855 upb_unlock();
2856}
2857
2858static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002859 upb_value v;
2860 bool found;
2861 trackedref *ref;
2862
Chris Fallin91473dc2014-12-12 15:58:26 -08002863 assert(owner);
2864 if (owner == UPB_UNTRACKED_REF) return;
2865
2866 upb_lock();
Josh Habermane8ed0212015-06-08 17:56:03 -07002867 found = upb_inttable_lookupptr(r->refs, owner, &v);
2868 /* This assert will fail if an owner attempts to release a ref it didn't have. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002869 UPB_ASSERT_VAR(found, found);
Josh Habermane8ed0212015-06-08 17:56:03 -07002870 ref = upb_value_getptr(v);
Chris Fallin91473dc2014-12-12 15:58:26 -08002871 assert(ref->is_ref2 == ref2);
2872 if (--ref->count == 0) {
2873 free(ref);
2874 upb_inttable_removeptr(r->refs, owner, NULL);
2875 if (ref2) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002876 /* We know this cast is safe when it is a ref2, because it's coming from
2877 * another refcounted object. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002878 const upb_refcounted *from = owner;
2879 bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
2880 assert(removed);
2881 }
2882 }
2883 upb_unlock();
2884}
2885
2886static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
Chris Fallin91473dc2014-12-12 15:58:26 -08002887 upb_value v;
Josh Habermane8ed0212015-06-08 17:56:03 -07002888 bool found;
2889 trackedref *ref;
2890
2891 upb_lock();
2892 found = upb_inttable_lookupptr(r->refs, owner, &v);
Chris Fallin91473dc2014-12-12 15:58:26 -08002893 UPB_ASSERT_VAR(found, found);
Josh Habermane8ed0212015-06-08 17:56:03 -07002894 ref = upb_value_getptr(v);
Chris Fallin91473dc2014-12-12 15:58:26 -08002895 assert(ref->is_ref2 == ref2);
2896 upb_unlock();
2897}
2898
Josh Habermane8ed0212015-06-08 17:56:03 -07002899/* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
2900 * originate from the given owner. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002901static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
Chris Fallin91473dc2014-12-12 15:58:26 -08002902 upb_inttable_iter i;
Josh Habermane8ed0212015-06-08 17:56:03 -07002903
2904 upb_lock();
Chris Fallin91473dc2014-12-12 15:58:26 -08002905 upb_inttable_begin(&i, owner->ref2s);
2906 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002907 upb_value v;
2908 upb_value count;
2909 trackedref *ref;
2910 bool ok;
2911 bool found;
2912
Chris Fallin91473dc2014-12-12 15:58:26 -08002913 upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
2914
Josh Habermane8ed0212015-06-08 17:56:03 -07002915 /* To get the count we need to look in the target's table. */
2916 found = upb_inttable_lookupptr(to->refs, owner, &v);
Chris Fallin91473dc2014-12-12 15:58:26 -08002917 assert(found);
Josh Habermane8ed0212015-06-08 17:56:03 -07002918 ref = upb_value_getptr(v);
2919 count = upb_value_int32(ref->count);
Chris Fallin91473dc2014-12-12 15:58:26 -08002920
Josh Habermane8ed0212015-06-08 17:56:03 -07002921 ok = upb_inttable_insertptr(tab, to, count);
Chris Fallin91473dc2014-12-12 15:58:26 -08002922 CHECK_OOM(ok);
2923 }
2924 upb_unlock();
2925}
2926
2927typedef struct {
2928 upb_inttable ref2;
2929 const upb_refcounted *obj;
2930} check_state;
2931
2932static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
2933 void *closure) {
2934 check_state *s = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -08002935 upb_inttable *ref2 = &s->ref2;
2936 upb_value v;
Josh Habermane8ed0212015-06-08 17:56:03 -07002937 bool removed;
2938 int32_t newcount;
2939
2940 assert(obj == s->obj);
2941 assert(subobj);
2942 removed = upb_inttable_removeptr(ref2, subobj, &v);
2943 /* The following assertion will fail if the visit() function visits a subobj
2944 * that it did not have a ref2 on, or visits the same subobj too many times. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002945 assert(removed);
Josh Habermane8ed0212015-06-08 17:56:03 -07002946 newcount = upb_value_getint32(v) - 1;
Chris Fallin91473dc2014-12-12 15:58:26 -08002947 if (newcount > 0) {
2948 upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
2949 }
2950}
2951
2952static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2953 void *closure) {
Josh Habermane8ed0212015-06-08 17:56:03 -07002954 bool ok;
2955
2956 /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
2957 * exactly the set of nodes that visit() should visit. So we verify visit()'s
2958 * correctness here. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002959 check_state state;
2960 state.obj = r;
Josh Habermane8ed0212015-06-08 17:56:03 -07002961 ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
Chris Fallin91473dc2014-12-12 15:58:26 -08002962 CHECK_OOM(ok);
2963 getref2s(r, &state.ref2);
2964
Josh Habermane8ed0212015-06-08 17:56:03 -07002965 /* This should visit any children in the ref2 table. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002966 if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
2967
Josh Habermane8ed0212015-06-08 17:56:03 -07002968 /* This assertion will fail if the visit() function missed any children. */
Chris Fallin91473dc2014-12-12 15:58:26 -08002969 assert(upb_inttable_count(&state.ref2) == 0);
2970 upb_inttable_uninit(&state.ref2);
2971 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2972}
2973
2974static bool trackinit(upb_refcounted *r) {
2975 r->refs = malloc(sizeof(*r->refs));
2976 r->ref2s = malloc(sizeof(*r->ref2s));
2977 if (!r->refs || !r->ref2s) goto err1;
2978
2979 if (!upb_inttable_init(r->refs, UPB_CTYPE_PTR)) goto err1;
2980 if (!upb_inttable_init(r->ref2s, UPB_CTYPE_PTR)) goto err2;
2981 return true;
2982
2983err2:
2984 upb_inttable_uninit(r->refs);
2985err1:
2986 free(r->refs);
2987 free(r->ref2s);
2988 return false;
2989}
2990
2991static void trackfree(const upb_refcounted *r) {
2992 upb_inttable_uninit(r->refs);
2993 upb_inttable_uninit(r->ref2s);
2994 free(r->refs);
2995 free(r->ref2s);
2996}
2997
2998#else
2999
3000static void track(const upb_refcounted *r, const void *owner, bool ref2) {
3001 UPB_UNUSED(r);
3002 UPB_UNUSED(owner);
3003 UPB_UNUSED(ref2);
3004}
3005
3006static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
3007 UPB_UNUSED(r);
3008 UPB_UNUSED(owner);
3009 UPB_UNUSED(ref2);
3010}
3011
3012static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
3013 UPB_UNUSED(r);
3014 UPB_UNUSED(owner);
3015 UPB_UNUSED(ref2);
3016}
3017
3018static bool trackinit(upb_refcounted *r) {
3019 UPB_UNUSED(r);
3020 return true;
3021}
3022
3023static void trackfree(const upb_refcounted *r) {
3024 UPB_UNUSED(r);
3025}
3026
3027static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
3028 void *closure) {
3029 if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
3030}
3031
Josh Habermane8ed0212015-06-08 17:56:03 -07003032#endif /* UPB_DEBUG_REFS */
Chris Fallin91473dc2014-12-12 15:58:26 -08003033
3034
3035/* freeze() *******************************************************************/
3036
Josh Habermane8ed0212015-06-08 17:56:03 -07003037/* The freeze() operation is by far the most complicated part of this scheme.
3038 * We compute strongly-connected components and then mutate the graph such that
3039 * we preserve the invariants documented at the top of this file. And we must
3040 * handle out-of-memory errors gracefully (without leaving the graph
3041 * inconsistent), which adds to the fun. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003042
Josh Habermane8ed0212015-06-08 17:56:03 -07003043/* The state used by the freeze operation (shared across many functions). */
Chris Fallin91473dc2014-12-12 15:58:26 -08003044typedef struct {
3045 int depth;
3046 int maxdepth;
3047 uint64_t index;
Josh Habermane8ed0212015-06-08 17:56:03 -07003048 /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by
3049 * color. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003050 upb_inttable objattr;
Josh Habermane8ed0212015-06-08 17:56:03 -07003051 upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */
3052 upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */
Chris Fallin91473dc2014-12-12 15:58:26 -08003053 upb_status *status;
3054 jmp_buf err;
3055} tarjan;
3056
3057static void release_ref2(const upb_refcounted *obj,
3058 const upb_refcounted *subobj,
3059 void *closure);
3060
Josh Habermane8ed0212015-06-08 17:56:03 -07003061/* Node attributes -----------------------------------------------------------*/
Chris Fallin91473dc2014-12-12 15:58:26 -08003062
Josh Habermane8ed0212015-06-08 17:56:03 -07003063/* After our analysis phase all nodes will be either GRAY or WHITE. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003064
3065typedef enum {
Josh Habermane8ed0212015-06-08 17:56:03 -07003066 BLACK = 0, /* Object has not been seen. */
3067 GRAY, /* Object has been found via a refgroup but may not be reachable. */
3068 GREEN, /* Object is reachable and is currently on the Tarjan stack. */
3069 WHITE /* Object is reachable and has been assigned a group (SCC). */
Chris Fallin91473dc2014-12-12 15:58:26 -08003070} color_t;
3071
3072UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
3073UPB_NORETURN static void oom(tarjan *t) {
3074 upb_status_seterrmsg(t->status, "out of memory");
3075 err(t);
3076}
3077
3078static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
3079 upb_value v;
3080 return upb_inttable_lookupptr(&t->objattr, r, &v) ?
3081 upb_value_getuint64(v) : 0;
3082}
3083
3084static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
3085 upb_value v;
3086 bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
3087 UPB_ASSERT_VAR(found, found);
3088 return upb_value_getuint64(v);
3089}
3090
3091static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
3092 upb_inttable_removeptr(&t->objattr, r, NULL);
3093 upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
3094}
3095
3096static color_t color(tarjan *t, const upb_refcounted *r) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003097 return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003098}
3099
3100static void set_gray(tarjan *t, const upb_refcounted *r) {
3101 assert(color(t, r) == BLACK);
3102 setattr(t, r, GRAY);
3103}
3104
Josh Habermane8ed0212015-06-08 17:56:03 -07003105/* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003106static void push(tarjan *t, const upb_refcounted *r) {
3107 assert(color(t, r) == BLACK || color(t, r) == GRAY);
Josh Habermane8ed0212015-06-08 17:56:03 -07003108 /* This defines the attr layout for the GREEN state. "index" and "lowlink"
3109 * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
Chris Fallin91473dc2014-12-12 15:58:26 -08003110 setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
3111 if (++t->index == 0x80000000) {
3112 upb_status_seterrmsg(t->status, "too many objects to freeze");
3113 err(t);
3114 }
3115 upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
3116}
3117
Josh Habermane8ed0212015-06-08 17:56:03 -07003118/* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
3119 * SCC group. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003120static upb_refcounted *pop(tarjan *t) {
3121 upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
3122 assert(color(t, r) == GREEN);
Josh Habermane8ed0212015-06-08 17:56:03 -07003123 /* This defines the attr layout for nodes in the WHITE state.
3124 * Top of group stack is [group, NULL]; we point at group. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003125 setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
3126 return r;
3127}
3128
3129static void tarjan_newgroup(tarjan *t) {
3130 uint32_t *group = malloc(sizeof(*group));
3131 if (!group) oom(t);
Josh Habermane8ed0212015-06-08 17:56:03 -07003132 /* Push group and empty group leader (we'll fill in leader later). */
Chris Fallin91473dc2014-12-12 15:58:26 -08003133 if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
3134 !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
3135 free(group);
3136 oom(t);
3137 }
3138 *group = 0;
3139}
3140
3141static uint32_t idx(tarjan *t, const upb_refcounted *r) {
3142 assert(color(t, r) == GREEN);
3143 return (getattr(t, r) >> 2) & 0x7FFFFFFF;
3144}
3145
3146static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
3147 if (color(t, r) == GREEN) {
3148 return getattr(t, r) >> 33;
3149 } else {
3150 return UINT32_MAX;
3151 }
3152}
3153
3154static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
3155 assert(color(t, r) == GREEN);
3156 setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
3157}
3158
3159static uint32_t *group(tarjan *t, upb_refcounted *r) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003160 uint64_t groupnum;
Chris Fallin91473dc2014-12-12 15:58:26 -08003161 upb_value v;
Josh Habermane8ed0212015-06-08 17:56:03 -07003162 bool found;
3163
3164 assert(color(t, r) == WHITE);
3165 groupnum = getattr(t, r) >> 8;
3166 found = upb_inttable_lookup(&t->groups, groupnum, &v);
Chris Fallin91473dc2014-12-12 15:58:26 -08003167 UPB_ASSERT_VAR(found, found);
3168 return upb_value_getptr(v);
3169}
3170
Josh Habermane8ed0212015-06-08 17:56:03 -07003171/* If the group leader for this object's group has not previously been set,
3172 * the given object is assigned to be its leader. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003173static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003174 uint64_t leader_slot;
Chris Fallin91473dc2014-12-12 15:58:26 -08003175 upb_value v;
Josh Habermane8ed0212015-06-08 17:56:03 -07003176 bool found;
3177
3178 assert(color(t, r) == WHITE);
3179 leader_slot = (getattr(t, r) >> 8) + 1;
3180 found = upb_inttable_lookup(&t->groups, leader_slot, &v);
Chris Fallin91473dc2014-12-12 15:58:26 -08003181 UPB_ASSERT_VAR(found, found);
3182 if (upb_value_getptr(v)) {
3183 return upb_value_getptr(v);
3184 } else {
3185 upb_inttable_remove(&t->groups, leader_slot, NULL);
3186 upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
3187 return r;
3188 }
3189}
3190
3191
Josh Habermane8ed0212015-06-08 17:56:03 -07003192/* Tarjan's algorithm --------------------------------------------------------*/
Chris Fallin91473dc2014-12-12 15:58:26 -08003193
Josh Habermane8ed0212015-06-08 17:56:03 -07003194/* See:
3195 * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
Chris Fallin91473dc2014-12-12 15:58:26 -08003196static void do_tarjan(const upb_refcounted *obj, tarjan *t);
3197
3198static void tarjan_visit(const upb_refcounted *obj,
3199 const upb_refcounted *subobj,
3200 void *closure) {
3201 tarjan *t = closure;
3202 if (++t->depth > t->maxdepth) {
3203 upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
3204 err(t);
3205 } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003206 /* Do nothing: we don't want to visit or color already-frozen nodes,
3207 * and WHITE nodes have already been assigned a SCC. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003208 } else if (color(t, subobj) < GREEN) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003209 /* Subdef has not yet been visited; recurse on it. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003210 do_tarjan(subobj, t);
3211 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
3212 } else if (color(t, subobj) == GREEN) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003213 /* Subdef is in the stack and hence in the current SCC. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003214 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
3215 }
3216 --t->depth;
3217}
3218
3219static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
3220 if (color(t, obj) == BLACK) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003221 /* We haven't seen this object's group; mark the whole group GRAY. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003222 const upb_refcounted *o = obj;
3223 do { set_gray(t, o); } while ((o = o->next) != obj);
3224 }
3225
3226 push(t, obj);
3227 visit(obj, tarjan_visit, t);
3228 if (lowlink(t, obj) == idx(t, obj)) {
3229 tarjan_newgroup(t);
3230 while (pop(t) != obj)
3231 ;
3232 }
3233}
3234
3235
Josh Habermane8ed0212015-06-08 17:56:03 -07003236/* freeze() ------------------------------------------------------------------*/
Chris Fallin91473dc2014-12-12 15:58:26 -08003237
3238static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
3239 void *_t) {
3240 tarjan *t = _t;
3241 assert(color(t, r) > BLACK);
3242 if (color(t, subobj) > BLACK && r->group != subobj->group) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003243 /* Previously this ref was not reflected in subobj->group because they
3244 * were in the same group; now that they are split a ref must be taken. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003245 refgroup(subobj->group);
3246 }
3247}
3248
3249static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
3250 int maxdepth) {
3251 volatile bool ret = false;
Josh Habermane8ed0212015-06-08 17:56:03 -07003252 int i;
3253 upb_inttable_iter iter;
Chris Fallin91473dc2014-12-12 15:58:26 -08003254
Josh Habermane8ed0212015-06-08 17:56:03 -07003255 /* We run in two passes so that we can allocate all memory before performing
3256 * any mutation of the input -- this allows us to leave the input unchanged
3257 * in the case of memory allocation failure. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003258 tarjan t;
3259 t.index = 0;
3260 t.depth = 0;
3261 t.maxdepth = maxdepth;
3262 t.status = s;
3263 if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
3264 if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
3265 if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
3266 if (setjmp(t.err) != 0) goto err4;
3267
3268
Josh Habermane8ed0212015-06-08 17:56:03 -07003269 for (i = 0; i < n; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -08003270 if (color(&t, roots[i]) < GREEN) {
3271 do_tarjan(roots[i], &t);
3272 }
3273 }
3274
Josh Habermane8ed0212015-06-08 17:56:03 -07003275 /* If we've made it this far, no further errors are possible so it's safe to
3276 * mutate the objects without risk of leaving them in an inconsistent state. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003277 ret = true;
3278
Josh Habermane8ed0212015-06-08 17:56:03 -07003279 /* The transformation that follows requires care. The preconditions are:
3280 * - all objects in attr map are WHITE or GRAY, and are in mutable groups
3281 * (groups of all mutable objs)
3282 * - no ref2(to, from) refs have incremented count(to) if both "to" and
3283 * "from" are in our attr map (this follows from invariants (2) and (3)) */
Chris Fallin91473dc2014-12-12 15:58:26 -08003284
Josh Habermane8ed0212015-06-08 17:56:03 -07003285 /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
3286 * new groups according to the SCC's we computed. These new groups will
3287 * consist of only frozen objects. None will be immediately collectible,
3288 * because WHITE objects are by definition reachable from one of "roots",
3289 * which the caller must own refs on. */
3290 upb_inttable_begin(&iter, &t.objattr);
3291 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3292 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3293 /* Since removal from a singly-linked list requires access to the object's
3294 * predecessor, we consider obj->next instead of obj for moving. With the
3295 * while() loop we guarantee that we will visit every node's predecessor.
3296 * Proof:
3297 * 1. every node's predecessor is in our attr map.
3298 * 2. though the loop body may change a node's predecessor, it will only
3299 * change it to be the node we are currently operating on, so with a
3300 * while() loop we guarantee ourselves the chance to remove each node. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003301 while (color(&t, obj->next) == WHITE &&
3302 group(&t, obj->next) != obj->next->group) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003303 upb_refcounted *leader;
3304
3305 /* Remove from old group. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003306 upb_refcounted *move = obj->next;
3307 if (obj == move) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003308 /* Removing the last object from a group. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003309 assert(*obj->group == obj->individual_count);
3310 free(obj->group);
3311 } else {
3312 obj->next = move->next;
Josh Habermane8ed0212015-06-08 17:56:03 -07003313 /* This may decrease to zero; we'll collect GRAY objects (if any) that
3314 * remain in the group in the third pass. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003315 assert(*move->group >= move->individual_count);
3316 *move->group -= move->individual_count;
3317 }
3318
Josh Habermane8ed0212015-06-08 17:56:03 -07003319 /* Add to new group. */
3320 leader = groupleader(&t, move);
Chris Fallin91473dc2014-12-12 15:58:26 -08003321 if (move == leader) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003322 /* First object added to new group is its leader. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003323 move->group = group(&t, move);
3324 move->next = move;
3325 *move->group = move->individual_count;
3326 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07003327 /* Group already has at least one object in it. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003328 assert(leader->group == group(&t, move));
3329 move->group = group(&t, move);
3330 move->next = leader->next;
3331 leader->next = move;
3332 *move->group += move->individual_count;
3333 }
3334
3335 move->is_frozen = true;
3336 }
3337 }
3338
Josh Habermane8ed0212015-06-08 17:56:03 -07003339 /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
3340 * increment count(to) if group(obj) != group(to) (which could now be the
3341 * case if "to" was just frozen). */
3342 upb_inttable_begin(&iter, &t.objattr);
3343 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3344 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
Chris Fallin91473dc2014-12-12 15:58:26 -08003345 visit(obj, crossref, &t);
3346 }
3347
Josh Habermane8ed0212015-06-08 17:56:03 -07003348 /* Pass 3: GRAY objects are collected if their group's refcount dropped to
3349 * zero when we removed its white nodes. This can happen if they had only
3350 * been kept alive by virtue of sharing a group with an object that was just
3351 * frozen.
3352 *
3353 * It is important that we do this last, since the GRAY object's free()
3354 * function could call unref2() on just-frozen objects, which will decrement
3355 * refs that were added in pass 2. */
3356 upb_inttable_begin(&iter, &t.objattr);
3357 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3358 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
Chris Fallin91473dc2014-12-12 15:58:26 -08003359 if (obj->group == NULL || *obj->group == 0) {
3360 if (obj->group) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003361 upb_refcounted *o;
3362
3363 /* We eagerly free() the group's count (since we can't easily determine
3364 * the group's remaining size it's the easiest way to ensure it gets
3365 * done). */
Chris Fallin91473dc2014-12-12 15:58:26 -08003366 free(obj->group);
3367
Josh Habermane8ed0212015-06-08 17:56:03 -07003368 /* Visit to release ref2's (done in a separate pass since release_ref2
3369 * depends on o->group being unmodified so it can test merged()). */
3370 o = obj;
Chris Fallin91473dc2014-12-12 15:58:26 -08003371 do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
3372
Josh Habermane8ed0212015-06-08 17:56:03 -07003373 /* Mark "group" fields as NULL so we know to free the objects later in
3374 * this loop, but also don't try to delete the group twice. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003375 o = obj;
3376 do { o->group = NULL; } while ((o = o->next) != obj);
3377 }
3378 freeobj(obj);
3379 }
3380 }
3381
3382err4:
3383 if (!ret) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003384 upb_inttable_begin(&iter, &t.groups);
3385 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
3386 free(upb_value_getptr(upb_inttable_iter_value(&iter)));
Chris Fallin91473dc2014-12-12 15:58:26 -08003387 }
3388 upb_inttable_uninit(&t.groups);
3389err3:
3390 upb_inttable_uninit(&t.stack);
3391err2:
3392 upb_inttable_uninit(&t.objattr);
3393err1:
3394 return ret;
3395}
3396
3397
3398/* Misc internal functions ***************************************************/
3399
3400static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
3401 return r->group == r2->group;
3402}
3403
3404static void merge(upb_refcounted *r, upb_refcounted *from) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003405 upb_refcounted *base;
3406 upb_refcounted *tmp;
3407
Chris Fallin91473dc2014-12-12 15:58:26 -08003408 if (merged(r, from)) return;
3409 *r->group += *from->group;
3410 free(from->group);
Josh Habermane8ed0212015-06-08 17:56:03 -07003411 base = from;
Chris Fallin91473dc2014-12-12 15:58:26 -08003412
Josh Habermane8ed0212015-06-08 17:56:03 -07003413 /* Set all refcount pointers in the "from" chain to the merged refcount.
3414 *
3415 * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
3416 * if the user continuously extends a group by one object. Prevent this by
3417 * using one of the techniques in this paper:
3418 * ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
Chris Fallin91473dc2014-12-12 15:58:26 -08003419 do { from->group = r->group; } while ((from = from->next) != base);
3420
Josh Habermane8ed0212015-06-08 17:56:03 -07003421 /* Merge the two circularly linked lists by swapping their next pointers. */
3422 tmp = r->next;
Chris Fallin91473dc2014-12-12 15:58:26 -08003423 r->next = base->next;
3424 base->next = tmp;
3425}
3426
3427static void unref(const upb_refcounted *r);
3428
3429static void release_ref2(const upb_refcounted *obj,
3430 const upb_refcounted *subobj,
3431 void *closure) {
3432 UPB_UNUSED(closure);
3433 untrack(subobj, obj, true);
3434 if (!merged(obj, subobj)) {
3435 assert(subobj->is_frozen);
3436 unref(subobj);
3437 }
3438}
3439
3440static void unref(const upb_refcounted *r) {
3441 if (unrefgroup(r->group)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003442 const upb_refcounted *o;
3443
Chris Fallin91473dc2014-12-12 15:58:26 -08003444 free(r->group);
3445
Josh Habermane8ed0212015-06-08 17:56:03 -07003446 /* In two passes, since release_ref2 needs a guarantee that any subobjs
3447 * are alive. */
3448 o = r;
Chris Fallin91473dc2014-12-12 15:58:26 -08003449 do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
3450
3451 o = r;
3452 do {
3453 const upb_refcounted *next = o->next;
3454 assert(o->is_frozen || o->individual_count == 0);
3455 freeobj((upb_refcounted*)o);
3456 o = next;
3457 } while(o != r);
3458 }
3459}
3460
3461static void freeobj(upb_refcounted *o) {
3462 trackfree(o);
3463 o->vtbl->free((upb_refcounted*)o);
3464}
3465
3466
3467/* Public interface ***********************************************************/
3468
3469bool upb_refcounted_init(upb_refcounted *r,
3470 const struct upb_refcounted_vtbl *vtbl,
3471 const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003472#ifndef NDEBUG
3473 /* Endianness check. This is unrelated to upb_refcounted, it's just a
3474 * convenient place to put the check that we can be assured will run for
3475 * basically every program using upb. */
3476 const int x = 1;
3477#ifdef UPB_BIG_ENDIAN
3478 assert(*(char*)&x != 1);
3479#else
3480 assert(*(char*)&x == 1);
3481#endif
3482#endif
3483
Chris Fallin91473dc2014-12-12 15:58:26 -08003484 r->next = r;
3485 r->vtbl = vtbl;
3486 r->individual_count = 0;
3487 r->is_frozen = false;
3488 r->group = malloc(sizeof(*r->group));
3489 if (!r->group) return false;
3490 *r->group = 0;
3491 if (!trackinit(r)) {
3492 free(r->group);
3493 return false;
3494 }
3495 upb_refcounted_ref(r, owner);
3496 return true;
3497}
3498
3499bool upb_refcounted_isfrozen(const upb_refcounted *r) {
3500 return r->is_frozen;
3501}
3502
3503void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
3504 track(r, owner, false);
3505 if (!r->is_frozen)
3506 ((upb_refcounted*)r)->individual_count++;
3507 refgroup(r->group);
3508}
3509
3510void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
3511 untrack(r, owner, false);
3512 if (!r->is_frozen)
3513 ((upb_refcounted*)r)->individual_count--;
3514 unref(r);
3515}
3516
3517void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003518 assert(!from->is_frozen); /* Non-const pointer implies this. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003519 track(r, from, true);
3520 if (r->is_frozen) {
3521 refgroup(r->group);
3522 } else {
3523 merge((upb_refcounted*)r, from);
3524 }
3525}
3526
3527void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003528 assert(!from->is_frozen); /* Non-const pointer implies this. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003529 untrack(r, from, true);
3530 if (r->is_frozen) {
3531 unref(r);
3532 } else {
3533 assert(merged(r, from));
3534 }
3535}
3536
3537void upb_refcounted_donateref(
3538 const upb_refcounted *r, const void *from, const void *to) {
3539 assert(from != to);
3540 if (to != NULL)
3541 upb_refcounted_ref(r, to);
3542 if (from != NULL)
3543 upb_refcounted_unref(r, from);
3544}
3545
3546void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
3547 checkref(r, owner, false);
3548}
3549
3550bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
3551 int maxdepth) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003552 int i;
3553 for (i = 0; i < n; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -08003554 assert(!roots[i]->is_frozen);
3555 }
3556 return freeze(roots, n, s, maxdepth);
3557}
Chris Fallin91473dc2014-12-12 15:58:26 -08003558
3559
3560#include <stdlib.h>
3561
Josh Habermane8ed0212015-06-08 17:56:03 -07003562/* Fallback implementation if the shim is not specialized by the JIT. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003563#define SHIM_WRITER(type, ctype) \
3564 bool upb_shim_set ## type (void *c, const void *hd, ctype val) { \
3565 uint8_t *m = c; \
3566 const upb_shim_data *d = hd; \
3567 if (d->hasbit > 0) \
3568 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \
3569 *(ctype*)&m[d->offset] = val; \
3570 return true; \
3571 } \
3572
3573SHIM_WRITER(double, double)
3574SHIM_WRITER(float, float)
3575SHIM_WRITER(int32, int32_t)
3576SHIM_WRITER(int64, int64_t)
3577SHIM_WRITER(uint32, uint32_t)
3578SHIM_WRITER(uint64, uint64_t)
3579SHIM_WRITER(bool, bool)
3580#undef SHIM_WRITER
3581
3582bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
3583 int32_t hasbit) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003584 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
3585 bool ok;
3586
Chris Fallin91473dc2014-12-12 15:58:26 -08003587 upb_shim_data *d = malloc(sizeof(*d));
3588 if (!d) return false;
3589 d->offset = offset;
3590 d->hasbit = hasbit;
3591
Chris Fallin91473dc2014-12-12 15:58:26 -08003592 upb_handlerattr_sethandlerdata(&attr, d);
3593 upb_handlerattr_setalwaysok(&attr, true);
3594 upb_handlers_addcleanup(h, d, free);
3595
3596#define TYPE(u, l) \
3597 case UPB_TYPE_##u: \
3598 ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
3599
Josh Habermane8ed0212015-06-08 17:56:03 -07003600 ok = false;
Chris Fallin91473dc2014-12-12 15:58:26 -08003601
3602 switch (upb_fielddef_type(f)) {
3603 TYPE(INT64, int64);
3604 TYPE(INT32, int32);
3605 TYPE(ENUM, int32);
3606 TYPE(UINT64, uint64);
3607 TYPE(UINT32, uint32);
3608 TYPE(DOUBLE, double);
3609 TYPE(FLOAT, float);
3610 TYPE(BOOL, bool);
3611 default: assert(false); break;
3612 }
3613#undef TYPE
3614
3615 upb_handlerattr_uninit(&attr);
3616 return ok;
3617}
3618
3619const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
3620 upb_fieldtype_t *type) {
3621 upb_func *f = upb_handlers_gethandler(h, s);
3622
3623 if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
3624 *type = UPB_TYPE_INT64;
3625 } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
3626 *type = UPB_TYPE_INT32;
3627 } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
3628 *type = UPB_TYPE_UINT64;
3629 } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
3630 *type = UPB_TYPE_UINT32;
3631 } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
3632 *type = UPB_TYPE_DOUBLE;
3633 } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
3634 *type = UPB_TYPE_FLOAT;
3635 } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
3636 *type = UPB_TYPE_BOOL;
3637 } else {
3638 return NULL;
3639 }
3640
3641 return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
3642}
Chris Fallin91473dc2014-12-12 15:58:26 -08003643
3644
3645#include <stdlib.h>
3646#include <string.h>
3647
Chris Fallin91473dc2014-12-12 15:58:26 -08003648static void upb_symtab_free(upb_refcounted *r) {
3649 upb_symtab *s = (upb_symtab*)r;
3650 upb_strtable_iter i;
3651 upb_strtable_begin(&i, &s->symtab);
3652 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3653 const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3654 upb_def_unref(def, s);
3655 }
3656 upb_strtable_uninit(&s->symtab);
3657 free(s);
3658}
3659
3660
3661upb_symtab *upb_symtab_new(const void *owner) {
3662 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
3663 upb_symtab *s = malloc(sizeof(*s));
Josh Habermane8ed0212015-06-08 17:56:03 -07003664 upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
Chris Fallin91473dc2014-12-12 15:58:26 -08003665 upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3666 return s;
3667}
3668
3669void upb_symtab_freeze(upb_symtab *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003670 upb_refcounted *r;
3671 bool ok;
3672
Chris Fallin91473dc2014-12-12 15:58:26 -08003673 assert(!upb_symtab_isfrozen(s));
Josh Habermane8ed0212015-06-08 17:56:03 -07003674 r = upb_symtab_upcast_mutable(s);
3675 /* The symtab does not take ref2's (see refcounted.h) on the defs, because
3676 * defs cannot refer back to the table and therefore cannot create cycles. So
3677 * 0 will suffice for maxdepth here. */
3678 ok = upb_refcounted_freeze(&r, 1, NULL, 0);
Chris Fallin91473dc2014-12-12 15:58:26 -08003679 UPB_ASSERT_VAR(ok, ok);
3680}
3681
3682const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3683 upb_value v;
3684 upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3685 upb_value_getptr(v) : NULL;
3686 return ret;
3687}
3688
3689const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3690 upb_value v;
3691 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3692 upb_value_getptr(v) : NULL;
3693 return def ? upb_dyncast_msgdef(def) : NULL;
3694}
3695
3696const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3697 upb_value v;
3698 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3699 upb_value_getptr(v) : NULL;
3700 return def ? upb_dyncast_enumdef(def) : NULL;
3701}
3702
Josh Habermane8ed0212015-06-08 17:56:03 -07003703/* Given a symbol and the base symbol inside which it is defined, find the
3704 * symbol's definition in t. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003705static upb_def *upb_resolvename(const upb_strtable *t,
3706 const char *base, const char *sym) {
3707 if(strlen(sym) == 0) return NULL;
3708 if(sym[0] == '.') {
Josh Habermane8ed0212015-06-08 17:56:03 -07003709 /* Symbols starting with '.' are absolute, so we do a single lookup.
3710 * Slice to omit the leading '.' */
Chris Fallin91473dc2014-12-12 15:58:26 -08003711 upb_value v;
3712 return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3713 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07003714 /* Remove components from base until we find an entry or run out.
3715 * TODO: This branch is totally broken, but currently not used. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003716 (void)base;
3717 assert(false);
3718 return NULL;
3719 }
3720}
3721
3722const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3723 const char *sym) {
3724 upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3725 return ret;
3726}
3727
Josh Habermanfb8ed702015-06-22 17:23:55 -07003728/* Starts a depth-first traversal at "def", recursing into any subdefs
3729 * (ie. submessage types). Adds duplicates of existing defs to addtab
3730 * wherever necessary, so that the resulting symtab will be consistent once
3731 * addtab is added.
Josh Habermane8ed0212015-06-08 17:56:03 -07003732 *
Josh Habermanfb8ed702015-06-22 17:23:55 -07003733 * More specifically, if any def D is found in the DFS that:
3734 *
3735 * 1. can reach a def that is being replaced by something in addtab, AND
3736 *
3737 * 2. is not itself being replaced already (ie. this name doesn't already
3738 * exist in addtab)
3739 *
3740 * ...then a duplicate (new copy) of D will be added to addtab.
3741 *
3742 * Returns true if this happened for any def reachable from "def."
3743 *
3744 * It is slightly tricky to do this correctly in the presence of cycles. If we
3745 * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
3746 * our stack can reach a def in addtab or not. Once we figure this out, that
3747 * answer needs to apply to *all* defs in these SCCs, even if we visited them
3748 * already. So a straight up one-pass cycle-detecting DFS won't work.
3749 *
3750 * To work around this problem, we traverse each SCC (which we already
3751 * computed, since these defs are frozen) as a single node. We first compute
3752 * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
3753 * the entire SCC. This requires breaking the encapsulation of upb_refcounted,
3754 * since that is where we get the data about what SCC we are in. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003755static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
3756 const void *new_owner, upb_inttable *seen,
3757 upb_status *s) {
Chris Fallin91473dc2014-12-12 15:58:26 -08003758 upb_value v;
Josh Habermane8ed0212015-06-08 17:56:03 -07003759 bool need_dup;
3760 const upb_def *base;
Josh Habermanfb8ed702015-06-22 17:23:55 -07003761 const void* memoize_key;
Josh Habermane8ed0212015-06-08 17:56:03 -07003762
Josh Habermanfb8ed702015-06-22 17:23:55 -07003763 /* Memoize results of this function for efficiency (since we're traversing a
3764 * DAG this is not needed to limit the depth of the search).
3765 *
3766 * We memoize by SCC instead of by individual def. */
3767 memoize_key = def->base.group;
3768
3769 if (upb_inttable_lookupptr(seen, memoize_key, &v))
Chris Fallin91473dc2014-12-12 15:58:26 -08003770 return upb_value_getbool(v);
3771
Josh Habermane8ed0212015-06-08 17:56:03 -07003772 /* Visit submessages for all messages in the SCC. */
3773 need_dup = false;
3774 base = def;
Chris Fallin91473dc2014-12-12 15:58:26 -08003775 do {
Josh Habermane8ed0212015-06-08 17:56:03 -07003776 upb_value v;
3777 const upb_msgdef *m;
3778
Chris Fallin91473dc2014-12-12 15:58:26 -08003779 assert(upb_def_isfrozen(def));
3780 if (def->type == UPB_DEF_FIELD) continue;
Chris Fallin91473dc2014-12-12 15:58:26 -08003781 if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
3782 need_dup = true;
3783 }
3784
Josh Habermanfb8ed702015-06-22 17:23:55 -07003785 /* For messages, continue the recursion by visiting all subdefs, but only
3786 * ones in different SCCs. */
Josh Habermane8ed0212015-06-08 17:56:03 -07003787 m = upb_dyncast_msgdef(def);
Chris Fallin91473dc2014-12-12 15:58:26 -08003788 if (m) {
Chris Fallinfcd88892015-01-13 18:14:39 -08003789 upb_msg_field_iter i;
3790 for(upb_msg_field_begin(&i, m);
3791 !upb_msg_field_done(&i);
3792 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08003793 upb_fielddef *f = upb_msg_iter_field(&i);
Josh Habermanfb8ed702015-06-22 17:23:55 -07003794 const upb_def *subdef;
3795
Chris Fallin91473dc2014-12-12 15:58:26 -08003796 if (!upb_fielddef_hassubdef(f)) continue;
Josh Habermanfb8ed702015-06-22 17:23:55 -07003797 subdef = upb_fielddef_subdef(f);
3798
3799 /* Skip subdefs in this SCC. */
3800 if (def->base.group == subdef->base.group) continue;
3801
Josh Habermane8ed0212015-06-08 17:56:03 -07003802 /* |= to avoid short-circuit; we need its side-effects. */
Josh Habermanfb8ed702015-06-22 17:23:55 -07003803 need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
Chris Fallin91473dc2014-12-12 15:58:26 -08003804 if (!upb_ok(s)) return false;
3805 }
3806 }
3807 } while ((def = (upb_def*)def->base.next) != base);
3808
3809 if (need_dup) {
Josh Habermanfb8ed702015-06-22 17:23:55 -07003810 /* Dup all defs in this SCC that don't already have entries in addtab. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003811 def = base;
3812 do {
Josh Habermane8ed0212015-06-08 17:56:03 -07003813 const char *name;
3814
Chris Fallin91473dc2014-12-12 15:58:26 -08003815 if (def->type == UPB_DEF_FIELD) continue;
Josh Habermane8ed0212015-06-08 17:56:03 -07003816 name = upb_def_fullname(def);
Chris Fallin91473dc2014-12-12 15:58:26 -08003817 if (!upb_strtable_lookup(addtab, name, NULL)) {
3818 upb_def *newdef = upb_def_dup(def, new_owner);
3819 if (!newdef) goto oom;
3820 newdef->came_from_user = false;
3821 if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
3822 goto oom;
3823 }
3824 } while ((def = (upb_def*)def->base.next) != base);
3825 }
3826
Josh Habermanfb8ed702015-06-22 17:23:55 -07003827 upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
Chris Fallin91473dc2014-12-12 15:58:26 -08003828 return need_dup;
3829
3830oom:
3831 upb_status_seterrmsg(s, "out of memory");
3832 return false;
3833}
3834
Josh Habermane8ed0212015-06-08 17:56:03 -07003835/* TODO(haberman): we need a lot more testing of error conditions.
3836 * The came_from_user stuff in particular is not tested. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003837bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
3838 upb_status *status) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003839 int i;
3840 upb_strtable_iter iter;
Chris Fallin91473dc2014-12-12 15:58:26 -08003841 upb_def **add_defs = NULL;
3842 upb_strtable addtab;
Josh Habermane8ed0212015-06-08 17:56:03 -07003843 upb_inttable seen;
3844
3845 assert(!upb_symtab_isfrozen(s));
Chris Fallin91473dc2014-12-12 15:58:26 -08003846 if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3847 upb_status_seterrmsg(status, "out of memory");
3848 return false;
3849 }
3850
Josh Habermane8ed0212015-06-08 17:56:03 -07003851 /* Add new defs to our "add" set. */
3852 for (i = 0; i < n; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -08003853 upb_def *def = defs[i];
Josh Habermane8ed0212015-06-08 17:56:03 -07003854 const char *fullname;
3855 upb_fielddef *f;
3856
Chris Fallin91473dc2014-12-12 15:58:26 -08003857 if (upb_def_isfrozen(def)) {
3858 upb_status_seterrmsg(status, "added defs must be mutable");
3859 goto err;
3860 }
3861 assert(!upb_def_isfrozen(def));
Josh Habermane8ed0212015-06-08 17:56:03 -07003862 fullname = upb_def_fullname(def);
Chris Fallin91473dc2014-12-12 15:58:26 -08003863 if (!fullname) {
3864 upb_status_seterrmsg(
3865 status, "Anonymous defs cannot be added to a symtab");
3866 goto err;
3867 }
3868
Josh Habermane8ed0212015-06-08 17:56:03 -07003869 f = upb_dyncast_fielddef_mutable(def);
Chris Fallin91473dc2014-12-12 15:58:26 -08003870
3871 if (f) {
3872 if (!upb_fielddef_containingtypename(f)) {
3873 upb_status_seterrmsg(status,
3874 "Standalone fielddefs must have a containing type "
3875 "(extendee) name set");
3876 goto err;
3877 }
3878 } else {
3879 if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3880 upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3881 goto err;
3882 }
Josh Habermane8ed0212015-06-08 17:56:03 -07003883 /* We need this to back out properly, because if there is a failure we
3884 * need to donate the ref back to the caller. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003885 def->came_from_user = true;
3886 upb_def_donateref(def, ref_donor, s);
3887 if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3888 goto oom_err;
3889 }
3890 }
3891
Josh Habermane8ed0212015-06-08 17:56:03 -07003892 /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
3893 * If the appropriate message only exists in the existing symtab, duplicate
3894 * it so we have a mutable copy we can add the fields to. */
3895 for (i = 0; i < n; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -08003896 upb_def *def = defs[i];
3897 upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
Josh Habermane8ed0212015-06-08 17:56:03 -07003898 const char *msgname;
3899 upb_value v;
3900 upb_msgdef *m;
3901
Chris Fallin91473dc2014-12-12 15:58:26 -08003902 if (!f) continue;
Josh Habermane8ed0212015-06-08 17:56:03 -07003903 msgname = upb_fielddef_containingtypename(f);
3904 /* We validated this earlier in this function. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003905 assert(msgname);
3906
Josh Habermane8ed0212015-06-08 17:56:03 -07003907 /* If the extendee name is absolutely qualified, move past the initial ".".
3908 * TODO(haberman): it is not obvious what it would mean if this was not
3909 * absolutely qualified. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003910 if (msgname[0] == '.') {
3911 msgname++;
3912 }
3913
Chris Fallin91473dc2014-12-12 15:58:26 -08003914 if (upb_strtable_lookup(&addtab, msgname, &v)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003915 /* Extendee is in the set of defs the user asked us to add. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003916 m = upb_value_getptr(v);
3917 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07003918 /* Need to find and dup the extendee from the existing symtab. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003919 const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
3920 if (!frozen_m) {
3921 upb_status_seterrf(status,
3922 "Tried to extend message %s that does not exist "
3923 "in this SymbolTable.",
3924 msgname);
3925 goto err;
3926 }
3927 m = upb_msgdef_dup(frozen_m, s);
3928 if (!m) goto oom_err;
3929 if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
3930 upb_msgdef_unref(m, s);
3931 goto oom_err;
3932 }
3933 }
3934
3935 if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
3936 goto err;
3937 }
3938 }
3939
Josh Habermane8ed0212015-06-08 17:56:03 -07003940 /* Add dups of any existing def that can reach a def with the same name as
3941 * anything in our "add" set. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003942 if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
Josh Habermane8ed0212015-06-08 17:56:03 -07003943 upb_strtable_begin(&iter, &s->symtab);
3944 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3945 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
Chris Fallin91473dc2014-12-12 15:58:26 -08003946 upb_resolve_dfs(def, &addtab, s, &seen, status);
3947 if (!upb_ok(status)) goto err;
3948 }
3949 upb_inttable_uninit(&seen);
3950
Josh Habermane8ed0212015-06-08 17:56:03 -07003951 /* Now using the table, resolve symbolic references for subdefs. */
3952 upb_strtable_begin(&iter, &addtab);
3953 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3954 const char *base;
3955 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
Chris Fallin91473dc2014-12-12 15:58:26 -08003956 upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
Chris Fallinfcd88892015-01-13 18:14:39 -08003957 upb_msg_field_iter j;
Josh Habermane8ed0212015-06-08 17:56:03 -07003958
3959 if (!m) continue;
3960 /* Type names are resolved relative to the message in which they appear. */
3961 base = upb_msgdef_fullname(m);
3962
Chris Fallinfcd88892015-01-13 18:14:39 -08003963 for(upb_msg_field_begin(&j, m);
3964 !upb_msg_field_done(&j);
3965 upb_msg_field_next(&j)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08003966 upb_fielddef *f = upb_msg_iter_field(&j);
3967 const char *name = upb_fielddef_subdefname(f);
3968 if (name && !upb_fielddef_subdef(f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07003969 /* Try the lookup in the current set of to-be-added defs first. If not
3970 * there, try existing defs. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003971 upb_def *subdef = upb_resolvename(&addtab, base, name);
3972 if (subdef == NULL) {
3973 subdef = upb_resolvename(&s->symtab, base, name);
3974 }
3975 if (subdef == NULL) {
3976 upb_status_seterrf(
3977 status, "couldn't resolve name '%s' in message '%s'", name, base);
3978 goto err;
3979 } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
3980 goto err;
3981 }
3982 }
3983 }
3984 }
3985
Josh Habermane8ed0212015-06-08 17:56:03 -07003986 /* We need an array of the defs in addtab, for passing to upb_def_freeze. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003987 add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
3988 if (add_defs == NULL) goto oom_err;
Josh Habermane8ed0212015-06-08 17:56:03 -07003989 upb_strtable_begin(&iter, &addtab);
3990 for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3991 add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
Chris Fallin91473dc2014-12-12 15:58:26 -08003992 }
3993
3994 if (!upb_def_freeze(add_defs, n, status)) goto err;
3995
Josh Habermane8ed0212015-06-08 17:56:03 -07003996 /* This must be delayed until all errors have been detected, since error
3997 * recovery code uses this table to cleanup defs. */
Chris Fallin91473dc2014-12-12 15:58:26 -08003998 upb_strtable_uninit(&addtab);
3999
Josh Habermane8ed0212015-06-08 17:56:03 -07004000 /* TODO(haberman) we don't properly handle errors after this point (like
4001 * OOM in upb_strtable_insert() below). */
4002 for (i = 0; i < n; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -08004003 upb_def *def = add_defs[i];
4004 const char *name = upb_def_fullname(def);
4005 upb_value v;
Josh Habermane8ed0212015-06-08 17:56:03 -07004006 bool success;
4007
Chris Fallin91473dc2014-12-12 15:58:26 -08004008 if (upb_strtable_remove(&s->symtab, name, &v)) {
4009 const upb_def *def = upb_value_getptr(v);
4010 upb_def_unref(def, s);
4011 }
Josh Habermane8ed0212015-06-08 17:56:03 -07004012 success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
Chris Fallin91473dc2014-12-12 15:58:26 -08004013 UPB_ASSERT_VAR(success, success == true);
4014 }
4015 free(add_defs);
4016 return true;
4017
4018oom_err:
4019 upb_status_seterrmsg(status, "out of memory");
4020err: {
Josh Habermane8ed0212015-06-08 17:56:03 -07004021 /* For defs the user passed in, we need to donate the refs back. For defs
4022 * we dup'd, we need to just unref them. */
4023 upb_strtable_begin(&iter, &addtab);
4024 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4025 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
Chris Fallin91473dc2014-12-12 15:58:26 -08004026 bool came_from_user = def->came_from_user;
4027 def->came_from_user = false;
4028 if (came_from_user) {
4029 upb_def_donateref(def, s, ref_donor);
4030 } else {
4031 upb_def_unref(def, s);
4032 }
4033 }
4034 }
4035 upb_strtable_uninit(&addtab);
4036 free(add_defs);
4037 assert(!upb_ok(status));
4038 return false;
4039}
4040
Josh Habermane8ed0212015-06-08 17:56:03 -07004041/* Iteration. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004042
4043static void advance_to_matching(upb_symtab_iter *iter) {
4044 if (iter->type == UPB_DEF_ANY)
4045 return;
4046
4047 while (!upb_strtable_done(&iter->iter) &&
4048 iter->type != upb_symtab_iter_def(iter)->type) {
4049 upb_strtable_next(&iter->iter);
4050 }
4051}
4052
4053void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
4054 upb_deftype_t type) {
4055 upb_strtable_begin(&iter->iter, &s->symtab);
4056 iter->type = type;
4057 advance_to_matching(iter);
4058}
4059
4060void upb_symtab_next(upb_symtab_iter *iter) {
4061 upb_strtable_next(&iter->iter);
4062 advance_to_matching(iter);
4063}
4064
4065bool upb_symtab_done(const upb_symtab_iter *iter) {
4066 return upb_strtable_done(&iter->iter);
4067}
4068
4069const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
4070 return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
4071}
4072/*
Josh Haberman181c7f22015-07-15 11:05:10 -07004073** upb_table Implementation
4074**
4075** Implementation is heavily inspired by Lua's ltable.c.
4076*/
Chris Fallin91473dc2014-12-12 15:58:26 -08004077
4078
4079#include <stdlib.h>
4080#include <string.h>
4081
Josh Habermane8ed0212015-06-08 17:56:03 -07004082#define UPB_MAXARRSIZE 16 /* 64k. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004083
Josh Habermane8ed0212015-06-08 17:56:03 -07004084/* From Chromium. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004085#define ARRAY_SIZE(x) \
4086 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
4087
4088static const double MAX_LOAD = 0.85;
4089
Josh Habermane8ed0212015-06-08 17:56:03 -07004090/* The minimum utilization of the array part of a mixed hash/array table. This
4091 * is a speed/memory-usage tradeoff (though it's not straightforward because of
4092 * cache effects). The lower this is, the more memory we'll use. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004093static const double MIN_DENSITY = 0.1;
4094
4095bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
4096
4097int log2ceil(uint64_t v) {
4098 int ret = 0;
4099 bool pow2 = is_pow2(v);
4100 while (v >>= 1) ret++;
Josh Habermane8ed0212015-06-08 17:56:03 -07004101 ret = pow2 ? ret : ret + 1; /* Ceiling. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004102 return UPB_MIN(UPB_MAXARRSIZE, ret);
4103}
4104
4105char *upb_strdup(const char *s) {
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08004106 return upb_strdup2(s, strlen(s));
4107}
4108
4109char *upb_strdup2(const char *s, size_t len) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004110 size_t n;
4111 char *p;
4112
4113 /* Prevent overflow errors. */
Chris Fallina5075922015-02-02 15:07:34 -08004114 if (len == SIZE_MAX) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -07004115 /* Always null-terminate, even if binary data; but don't rely on the input to
4116 * have a null-terminating byte since it may be a raw binary buffer. */
4117 n = len + 1;
4118 p = malloc(n);
Chris Fallina5075922015-02-02 15:07:34 -08004119 if (p) {
4120 memcpy(p, s, len);
4121 p[len] = 0;
4122 }
Chris Fallin91473dc2014-12-12 15:58:26 -08004123 return p;
4124}
4125
Josh Habermane8ed0212015-06-08 17:56:03 -07004126/* A type to represent the lookup key of either a strtable or an inttable. */
4127typedef union {
4128 uintptr_t num;
4129 struct {
4130 const char *str;
4131 size_t len;
4132 } str;
Chris Fallin91473dc2014-12-12 15:58:26 -08004133} lookupkey_t;
4134
Chris Fallin91473dc2014-12-12 15:58:26 -08004135static lookupkey_t strkey2(const char *str, size_t len) {
4136 lookupkey_t k;
Josh Habermane8ed0212015-06-08 17:56:03 -07004137 k.str.str = str;
4138 k.str.len = len;
Chris Fallin91473dc2014-12-12 15:58:26 -08004139 return k;
4140}
4141
4142static lookupkey_t intkey(uintptr_t key) {
4143 lookupkey_t k;
Josh Habermane8ed0212015-06-08 17:56:03 -07004144 k.num = key;
Chris Fallin91473dc2014-12-12 15:58:26 -08004145 return k;
4146}
4147
4148typedef uint32_t hashfunc_t(upb_tabkey key);
4149typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
4150
4151/* Base table (shared code) ***************************************************/
4152
Josh Habermane8ed0212015-06-08 17:56:03 -07004153/* For when we need to cast away const. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004154static upb_tabent *mutable_entries(upb_table *t) {
4155 return (upb_tabent*)t->entries;
4156}
4157
4158static bool isfull(upb_table *t) {
Josh Habermanf654d492016-02-18 11:07:51 -08004159 if (upb_table_size(t) == 0) {
4160 return true;
4161 } else {
4162 return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
4163 }
Chris Fallin91473dc2014-12-12 15:58:26 -08004164}
4165
4166static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004167 size_t bytes;
4168
Chris Fallin91473dc2014-12-12 15:58:26 -08004169 t->count = 0;
4170 t->ctype = ctype;
4171 t->size_lg2 = size_lg2;
4172 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
Josh Habermane8ed0212015-06-08 17:56:03 -07004173 bytes = upb_table_size(t) * sizeof(upb_tabent);
Chris Fallin91473dc2014-12-12 15:58:26 -08004174 if (bytes > 0) {
4175 t->entries = malloc(bytes);
4176 if (!t->entries) return false;
4177 memset(mutable_entries(t), 0, bytes);
4178 } else {
4179 t->entries = NULL;
4180 }
4181 return true;
4182}
4183
4184static void uninit(upb_table *t) { free(mutable_entries(t)); }
4185
4186static upb_tabent *emptyent(upb_table *t) {
4187 upb_tabent *e = mutable_entries(t) + upb_table_size(t);
4188 while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
4189}
4190
4191static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
4192 return (upb_tabent*)upb_getentry(t, hash);
4193}
4194
4195static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
4196 uint32_t hash, eqlfunc_t *eql) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004197 const upb_tabent *e;
4198
Chris Fallin91473dc2014-12-12 15:58:26 -08004199 if (t->size_lg2 == 0) return NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -07004200 e = upb_getentry(t, hash);
Chris Fallin91473dc2014-12-12 15:58:26 -08004201 if (upb_tabent_isempty(e)) return NULL;
4202 while (1) {
4203 if (eql(e->key, key)) return e;
4204 if ((e = e->next) == NULL) return NULL;
4205 }
4206}
4207
4208static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
4209 uint32_t hash, eqlfunc_t *eql) {
4210 return (upb_tabent*)findentry(t, key, hash, eql);
4211}
4212
4213static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
4214 uint32_t hash, eqlfunc_t *eql) {
4215 const upb_tabent *e = findentry(t, key, hash, eql);
4216 if (e) {
4217 if (v) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004218 _upb_value_setval(v, e->val.val, t->ctype);
Chris Fallin91473dc2014-12-12 15:58:26 -08004219 }
4220 return true;
4221 } else {
4222 return false;
4223 }
4224}
4225
Josh Habermane8ed0212015-06-08 17:56:03 -07004226/* The given key must not already exist in the table. */
4227static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
4228 upb_value val, uint32_t hash,
4229 hashfunc_t *hashfunc, eqlfunc_t *eql) {
4230 upb_tabent *mainpos_e;
4231 upb_tabent *our_e;
4232
Chris Fallin91473dc2014-12-12 15:58:26 -08004233 UPB_UNUSED(eql);
Josh Habermane8ed0212015-06-08 17:56:03 -07004234 UPB_UNUSED(key);
Chris Fallin91473dc2014-12-12 15:58:26 -08004235 assert(findentry(t, key, hash, eql) == NULL);
4236 assert(val.ctype == t->ctype);
Josh Habermane8ed0212015-06-08 17:56:03 -07004237
Chris Fallin91473dc2014-12-12 15:58:26 -08004238 t->count++;
Josh Habermane8ed0212015-06-08 17:56:03 -07004239 mainpos_e = getentry_mutable(t, hash);
4240 our_e = mainpos_e;
4241
Chris Fallin91473dc2014-12-12 15:58:26 -08004242 if (upb_tabent_isempty(mainpos_e)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004243 /* Our main position is empty; use it. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004244 our_e->next = NULL;
4245 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07004246 /* Collision. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004247 upb_tabent *new_e = emptyent(t);
Josh Habermane8ed0212015-06-08 17:56:03 -07004248 /* Head of collider's chain. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004249 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
4250 if (chain == mainpos_e) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004251 /* Existing ent is in its main posisiton (it has the same hash as us, and
4252 * is the head of our chain). Insert to new ent and append to this chain. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004253 new_e->next = mainpos_e->next;
4254 mainpos_e->next = new_e;
4255 our_e = new_e;
4256 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07004257 /* Existing ent is not in its main position (it is a node in some other
4258 * chain). This implies that no existing ent in the table has our hash.
4259 * Evict it (updating its chain) and use its ent for head of our chain. */
4260 *new_e = *mainpos_e; /* copies next. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004261 while (chain->next != mainpos_e) {
4262 chain = (upb_tabent*)chain->next;
4263 assert(chain);
4264 }
4265 chain->next = new_e;
4266 our_e = mainpos_e;
4267 our_e->next = NULL;
4268 }
4269 }
Josh Habermane8ed0212015-06-08 17:56:03 -07004270 our_e->key = tabkey;
4271 our_e->val.val = val.val;
Chris Fallin91473dc2014-12-12 15:58:26 -08004272 assert(findentry(t, key, hash, eql) == our_e);
4273}
4274
4275static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
4276 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
4277 upb_tabent *chain = getentry_mutable(t, hash);
4278 if (upb_tabent_isempty(chain)) return false;
4279 if (eql(chain->key, key)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004280 /* Element to remove is at the head of its chain. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004281 t->count--;
4282 if (val) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004283 _upb_value_setval(val, chain->val.val, t->ctype);
Chris Fallin91473dc2014-12-12 15:58:26 -08004284 }
4285 if (chain->next) {
4286 upb_tabent *move = (upb_tabent*)chain->next;
4287 *chain = *move;
4288 if (removed) *removed = move->key;
Josh Habermane8ed0212015-06-08 17:56:03 -07004289 move->key = 0; /* Make the slot empty. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004290 } else {
4291 if (removed) *removed = chain->key;
Josh Habermane8ed0212015-06-08 17:56:03 -07004292 chain->key = 0; /* Make the slot empty. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004293 }
4294 return true;
4295 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07004296 /* Element to remove is either in a non-head position or not in the
4297 * table. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004298 while (chain->next && !eql(chain->next->key, key))
4299 chain = (upb_tabent*)chain->next;
4300 if (chain->next) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004301 /* Found element to remove. */
4302 upb_tabent *rm;
4303
Chris Fallin91473dc2014-12-12 15:58:26 -08004304 if (val) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004305 _upb_value_setval(val, chain->next->val.val, t->ctype);
Chris Fallin91473dc2014-12-12 15:58:26 -08004306 }
Josh Habermane8ed0212015-06-08 17:56:03 -07004307 rm = (upb_tabent*)chain->next;
Chris Fallin91473dc2014-12-12 15:58:26 -08004308 if (removed) *removed = rm->key;
Josh Habermane8ed0212015-06-08 17:56:03 -07004309 rm->key = 0;
Chris Fallin91473dc2014-12-12 15:58:26 -08004310 chain->next = rm->next;
4311 t->count--;
4312 return true;
4313 } else {
4314 return false;
4315 }
4316 }
4317}
4318
4319static size_t next(const upb_table *t, size_t i) {
4320 do {
4321 if (++i >= upb_table_size(t))
4322 return SIZE_MAX;
4323 } while(upb_tabent_isempty(&t->entries[i]));
4324
4325 return i;
4326}
4327
4328static size_t begin(const upb_table *t) {
4329 return next(t, -1);
4330}
4331
4332
4333/* upb_strtable ***************************************************************/
4334
Josh Habermane8ed0212015-06-08 17:56:03 -07004335/* A simple "subclass" of upb_table that only adds a hash function for strings. */
4336
4337static upb_tabkey strcopy(lookupkey_t k2) {
4338 char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
4339 if (str == NULL) return 0;
4340 memcpy(str, &k2.str.len, sizeof(uint32_t));
4341 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
4342 return (uintptr_t)str;
4343}
Chris Fallin91473dc2014-12-12 15:58:26 -08004344
4345static uint32_t strhash(upb_tabkey key) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004346 uint32_t len;
4347 char *str = upb_tabstr(key, &len);
4348 return MurmurHash2(str, len, 0);
Chris Fallin91473dc2014-12-12 15:58:26 -08004349}
4350
4351static bool streql(upb_tabkey k1, lookupkey_t k2) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004352 uint32_t len;
4353 char *str = upb_tabstr(k1, &len);
4354 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
Chris Fallin91473dc2014-12-12 15:58:26 -08004355}
4356
4357bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
4358 return init(&t->t, ctype, 2);
4359}
4360
4361void upb_strtable_uninit(upb_strtable *t) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004362 size_t i;
4363 for (i = 0; i < upb_table_size(&t->t); i++)
4364 free((void*)t->t.entries[i].key);
Chris Fallin91473dc2014-12-12 15:58:26 -08004365 uninit(&t->t);
4366}
4367
4368bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
4369 upb_strtable new_table;
Josh Habermane8ed0212015-06-08 17:56:03 -07004370 upb_strtable_iter i;
4371
Chris Fallin91473dc2014-12-12 15:58:26 -08004372 if (!init(&new_table.t, t->t.ctype, size_lg2))
4373 return false;
Chris Fallin91473dc2014-12-12 15:58:26 -08004374 upb_strtable_begin(&i, t);
4375 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08004376 upb_strtable_insert2(
4377 &new_table,
4378 upb_strtable_iter_key(&i),
4379 upb_strtable_iter_keylength(&i),
4380 upb_strtable_iter_value(&i));
Chris Fallin91473dc2014-12-12 15:58:26 -08004381 }
4382 upb_strtable_uninit(t);
4383 *t = new_table;
4384 return true;
4385}
4386
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08004387bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
4388 upb_value v) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004389 lookupkey_t key;
4390 upb_tabkey tabkey;
4391 uint32_t hash;
4392
Chris Fallin91473dc2014-12-12 15:58:26 -08004393 if (isfull(&t->t)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004394 /* Need to resize. New table of double the size, add old elements to it. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004395 if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
4396 return false;
4397 }
4398 }
Chris Fallin91473dc2014-12-12 15:58:26 -08004399
Josh Habermane8ed0212015-06-08 17:56:03 -07004400 key = strkey2(k, len);
4401 tabkey = strcopy(key);
4402 if (tabkey == 0) return false;
4403
4404 hash = MurmurHash2(key.str.str, key.str.len, 0);
4405 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
Chris Fallin91473dc2014-12-12 15:58:26 -08004406 return true;
4407}
4408
4409bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
4410 upb_value *v) {
4411 uint32_t hash = MurmurHash2(key, len, 0);
4412 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
4413}
4414
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08004415bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
4416 upb_value *val) {
Chris Fallin91473dc2014-12-12 15:58:26 -08004417 uint32_t hash = MurmurHash2(key, strlen(key), 0);
4418 upb_tabkey tabkey;
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08004419 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004420 free((void*)tabkey);
Chris Fallin91473dc2014-12-12 15:58:26 -08004421 return true;
4422 } else {
4423 return false;
4424 }
4425}
4426
Josh Habermane8ed0212015-06-08 17:56:03 -07004427/* Iteration */
Chris Fallin91473dc2014-12-12 15:58:26 -08004428
4429static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
4430 return &i->t->t.entries[i->index];
4431}
4432
4433void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
4434 i->t = t;
4435 i->index = begin(&t->t);
4436}
4437
4438void upb_strtable_next(upb_strtable_iter *i) {
4439 i->index = next(&i->t->t, i->index);
4440}
4441
4442bool upb_strtable_done(const upb_strtable_iter *i) {
4443 return i->index >= upb_table_size(&i->t->t) ||
4444 upb_tabent_isempty(str_tabent(i));
4445}
4446
4447const char *upb_strtable_iter_key(upb_strtable_iter *i) {
4448 assert(!upb_strtable_done(i));
Josh Habermane8ed0212015-06-08 17:56:03 -07004449 return upb_tabstr(str_tabent(i)->key, NULL);
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08004450}
4451
4452size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004453 uint32_t len;
Chris Fallinfd1a3ff2015-01-06 15:44:09 -08004454 assert(!upb_strtable_done(i));
Josh Habermane8ed0212015-06-08 17:56:03 -07004455 upb_tabstr(str_tabent(i)->key, &len);
4456 return len;
Chris Fallin91473dc2014-12-12 15:58:26 -08004457}
4458
4459upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
4460 assert(!upb_strtable_done(i));
Josh Habermane8ed0212015-06-08 17:56:03 -07004461 return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
Chris Fallin91473dc2014-12-12 15:58:26 -08004462}
4463
4464void upb_strtable_iter_setdone(upb_strtable_iter *i) {
4465 i->index = SIZE_MAX;
4466}
4467
4468bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
4469 const upb_strtable_iter *i2) {
4470 if (upb_strtable_done(i1) && upb_strtable_done(i2))
4471 return true;
4472 return i1->t == i2->t && i1->index == i2->index;
4473}
4474
4475
4476/* upb_inttable ***************************************************************/
4477
Josh Habermane8ed0212015-06-08 17:56:03 -07004478/* For inttables we use a hybrid structure where small keys are kept in an
4479 * array and large keys are put in the hash table. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004480
Josh Habermane8ed0212015-06-08 17:56:03 -07004481static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
Chris Fallin91473dc2014-12-12 15:58:26 -08004482
4483static bool inteql(upb_tabkey k1, lookupkey_t k2) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004484 return k1 == k2.num;
Chris Fallin91473dc2014-12-12 15:58:26 -08004485}
4486
Josh Habermane8ed0212015-06-08 17:56:03 -07004487static upb_tabval *mutable_array(upb_inttable *t) {
4488 return (upb_tabval*)t->array;
Chris Fallin91473dc2014-12-12 15:58:26 -08004489}
4490
Josh Habermane8ed0212015-06-08 17:56:03 -07004491static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
Chris Fallin91473dc2014-12-12 15:58:26 -08004492 if (key < t->array_size) {
4493 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
4494 } else {
4495 upb_tabent *e =
4496 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
4497 return e ? &e->val : NULL;
4498 }
4499}
4500
Josh Habermane8ed0212015-06-08 17:56:03 -07004501static const upb_tabval *inttable_val_const(const upb_inttable *t,
Chris Fallin91473dc2014-12-12 15:58:26 -08004502 uintptr_t key) {
4503 return inttable_val((upb_inttable*)t, key);
4504}
4505
4506size_t upb_inttable_count(const upb_inttable *t) {
4507 return t->t.count + t->array_count;
4508}
4509
4510static void check(upb_inttable *t) {
4511 UPB_UNUSED(t);
4512#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
Josh Habermane8ed0212015-06-08 17:56:03 -07004513 {
4514 /* This check is very expensive (makes inserts/deletes O(N)). */
4515 size_t count = 0;
4516 upb_inttable_iter i;
4517 upb_inttable_begin(&i, t);
4518 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
4519 assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
4520 }
4521 assert(count == upb_inttable_count(t));
Chris Fallin91473dc2014-12-12 15:58:26 -08004522 }
Chris Fallin91473dc2014-12-12 15:58:26 -08004523#endif
4524}
4525
4526bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
4527 size_t asize, int hsize_lg2) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004528 size_t array_bytes;
4529
Chris Fallin91473dc2014-12-12 15:58:26 -08004530 if (!init(&t->t, ctype, hsize_lg2)) return false;
Josh Habermane8ed0212015-06-08 17:56:03 -07004531 /* Always make the array part at least 1 long, so that we know key 0
4532 * won't be in the hash part, which simplifies things. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004533 t->array_size = UPB_MAX(1, asize);
4534 t->array_count = 0;
Josh Habermane8ed0212015-06-08 17:56:03 -07004535 array_bytes = t->array_size * sizeof(upb_value);
Chris Fallin91473dc2014-12-12 15:58:26 -08004536 t->array = malloc(array_bytes);
4537 if (!t->array) {
4538 uninit(&t->t);
4539 return false;
4540 }
4541 memset(mutable_array(t), 0xff, array_bytes);
4542 check(t);
4543 return true;
4544}
4545
4546bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {
4547 return upb_inttable_sizedinit(t, ctype, 0, 4);
4548}
4549
4550void upb_inttable_uninit(upb_inttable *t) {
4551 uninit(&t->t);
4552 free(mutable_array(t));
4553}
4554
4555bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004556 /* XXX: Table can't store value (uint64_t)-1. Need to somehow statically
4557 * guarantee that this is not necessary, or fix the limitation. */
4558 upb_tabval tabval;
4559 tabval.val = val.val;
4560 UPB_UNUSED(tabval);
4561 assert(upb_arrhas(tabval));
4562
Chris Fallin91473dc2014-12-12 15:58:26 -08004563 if (key < t->array_size) {
4564 assert(!upb_arrhas(t->array[key]));
4565 t->array_count++;
Josh Habermane8ed0212015-06-08 17:56:03 -07004566 mutable_array(t)[key].val = val.val;
Chris Fallin91473dc2014-12-12 15:58:26 -08004567 } else {
4568 if (isfull(&t->t)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004569 /* Need to resize the hash part, but we re-use the array part. */
4570 size_t i;
Chris Fallin91473dc2014-12-12 15:58:26 -08004571 upb_table new_table;
4572 if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1))
4573 return false;
Chris Fallin91473dc2014-12-12 15:58:26 -08004574 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
4575 const upb_tabent *e = &t->t.entries[i];
Josh Habermane8ed0212015-06-08 17:56:03 -07004576 uint32_t hash;
Chris Fallin91473dc2014-12-12 15:58:26 -08004577 upb_value v;
Josh Habermane8ed0212015-06-08 17:56:03 -07004578
4579 _upb_value_setval(&v, e->val.val, t->t.ctype);
4580 hash = upb_inthash(e->key);
4581 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
Chris Fallin91473dc2014-12-12 15:58:26 -08004582 }
4583
4584 assert(t->t.count == new_table.count);
4585
4586 uninit(&t->t);
4587 t->t = new_table;
4588 }
Josh Habermane8ed0212015-06-08 17:56:03 -07004589 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
Chris Fallin91473dc2014-12-12 15:58:26 -08004590 }
4591 check(t);
4592 return true;
4593}
4594
4595bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004596 const upb_tabval *table_v = inttable_val_const(t, key);
Chris Fallin91473dc2014-12-12 15:58:26 -08004597 if (!table_v) return false;
Josh Habermane8ed0212015-06-08 17:56:03 -07004598 if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
Chris Fallin91473dc2014-12-12 15:58:26 -08004599 return true;
4600}
4601
4602bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004603 upb_tabval *table_v = inttable_val(t, key);
Chris Fallin91473dc2014-12-12 15:58:26 -08004604 if (!table_v) return false;
Josh Habermane8ed0212015-06-08 17:56:03 -07004605 table_v->val = val.val;
Chris Fallin91473dc2014-12-12 15:58:26 -08004606 return true;
4607}
4608
4609bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
4610 bool success;
4611 if (key < t->array_size) {
4612 if (upb_arrhas(t->array[key])) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004613 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
Chris Fallin91473dc2014-12-12 15:58:26 -08004614 t->array_count--;
4615 if (val) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004616 _upb_value_setval(val, t->array[key].val, t->t.ctype);
Chris Fallin91473dc2014-12-12 15:58:26 -08004617 }
Chris Fallin91473dc2014-12-12 15:58:26 -08004618 mutable_array(t)[key] = empty;
4619 success = true;
4620 } else {
4621 success = false;
4622 }
4623 } else {
4624 upb_tabkey removed;
4625 uint32_t hash = upb_inthash(key);
4626 success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
4627 }
4628 check(t);
4629 return success;
4630}
4631
4632bool upb_inttable_push(upb_inttable *t, upb_value val) {
4633 return upb_inttable_insert(t, upb_inttable_count(t), val);
4634}
4635
4636upb_value upb_inttable_pop(upb_inttable *t) {
4637 upb_value val;
4638 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
4639 UPB_ASSERT_VAR(ok, ok);
4640 return val;
4641}
4642
4643bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
4644 return upb_inttable_insert(t, (uintptr_t)key, val);
4645}
4646
4647bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
4648 upb_value *v) {
4649 return upb_inttable_lookup(t, (uintptr_t)key, v);
4650}
4651
4652bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
4653 return upb_inttable_remove(t, (uintptr_t)key, val);
4654}
4655
4656void upb_inttable_compact(upb_inttable *t) {
Josh Habermanf654d492016-02-18 11:07:51 -08004657 /* A power-of-two histogram of the table keys. */
4658 size_t counts[UPB_MAXARRSIZE + 1] = {0};
4659
4660 /* The max key in each bucket. */
4661 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
4662
Chris Fallin91473dc2014-12-12 15:58:26 -08004663 upb_inttable_iter i;
Josh Habermanf654d492016-02-18 11:07:51 -08004664 size_t arr_count;
4665 int size_lg2;
Josh Habermane8ed0212015-06-08 17:56:03 -07004666 upb_inttable new_t;
4667
Chris Fallin91473dc2014-12-12 15:58:26 -08004668 upb_inttable_begin(&i, t);
4669 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4670 uintptr_t key = upb_inttable_iter_key(&i);
Josh Habermanf654d492016-02-18 11:07:51 -08004671 int bucket = log2ceil(key);
4672 max[bucket] = UPB_MAX(max[bucket], key);
4673 counts[bucket]++;
Chris Fallin91473dc2014-12-12 15:58:26 -08004674 }
4675
Josh Habermanf654d492016-02-18 11:07:51 -08004676 /* Find the largest power of two that satisfies the MIN_DENSITY
4677 * definition (while actually having some keys). */
Josh Habermane8ed0212015-06-08 17:56:03 -07004678 arr_count = upb_inttable_count(t);
Chris Fallin91473dc2014-12-12 15:58:26 -08004679
Josh Habermanf654d492016-02-18 11:07:51 -08004680 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
4681 if (counts[size_lg2] == 0) {
4682 /* We can halve again without losing any entries. */
4683 continue;
4684 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
4685 break;
Chris Fallin91473dc2014-12-12 15:58:26 -08004686 }
Josh Habermanf654d492016-02-18 11:07:51 -08004687
4688 arr_count -= counts[size_lg2];
Chris Fallin91473dc2014-12-12 15:58:26 -08004689 }
4690
Josh Habermanf654d492016-02-18 11:07:51 -08004691 assert(arr_count <= upb_inttable_count(t));
Chris Fallin91473dc2014-12-12 15:58:26 -08004692
Josh Habermane8ed0212015-06-08 17:56:03 -07004693 {
4694 /* Insert all elements into new, perfectly-sized table. */
Josh Habermanf654d492016-02-18 11:07:51 -08004695 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
4696 size_t hash_count = upb_inttable_count(t) - arr_count;
4697 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
4698 size_t hashsize_lg2 = log2ceil(hash_size);
Chris Fallin91473dc2014-12-12 15:58:26 -08004699
Josh Habermane8ed0212015-06-08 17:56:03 -07004700 upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
4701 upb_inttable_begin(&i, t);
4702 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4703 uintptr_t k = upb_inttable_iter_key(&i);
4704 upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
4705 }
4706 assert(new_t.array_size == arr_size);
4707 assert(new_t.t.size_lg2 == hashsize_lg2);
Chris Fallin91473dc2014-12-12 15:58:26 -08004708 }
Chris Fallin91473dc2014-12-12 15:58:26 -08004709 upb_inttable_uninit(t);
4710 *t = new_t;
4711}
4712
Josh Habermane8ed0212015-06-08 17:56:03 -07004713/* Iteration. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004714
4715static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
4716 assert(!i->array_part);
4717 return &i->t->t.entries[i->index];
4718}
4719
Josh Habermane8ed0212015-06-08 17:56:03 -07004720static upb_tabval int_arrent(const upb_inttable_iter *i) {
Chris Fallin91473dc2014-12-12 15:58:26 -08004721 assert(i->array_part);
4722 return i->t->array[i->index];
4723}
4724
4725void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
4726 i->t = t;
4727 i->index = -1;
4728 i->array_part = true;
4729 upb_inttable_next(i);
4730}
4731
4732void upb_inttable_next(upb_inttable_iter *iter) {
4733 const upb_inttable *t = iter->t;
4734 if (iter->array_part) {
4735 while (++iter->index < t->array_size) {
4736 if (upb_arrhas(int_arrent(iter))) {
4737 return;
4738 }
4739 }
4740 iter->array_part = false;
4741 iter->index = begin(&t->t);
4742 } else {
4743 iter->index = next(&t->t, iter->index);
4744 }
4745}
4746
4747bool upb_inttable_done(const upb_inttable_iter *i) {
4748 if (i->array_part) {
4749 return i->index >= i->t->array_size ||
4750 !upb_arrhas(int_arrent(i));
4751 } else {
4752 return i->index >= upb_table_size(&i->t->t) ||
4753 upb_tabent_isempty(int_tabent(i));
4754 }
4755}
4756
4757uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
4758 assert(!upb_inttable_done(i));
Josh Habermane8ed0212015-06-08 17:56:03 -07004759 return i->array_part ? i->index : int_tabent(i)->key;
Chris Fallin91473dc2014-12-12 15:58:26 -08004760}
4761
4762upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
4763 assert(!upb_inttable_done(i));
4764 return _upb_value_val(
Josh Habermane8ed0212015-06-08 17:56:03 -07004765 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
Chris Fallin91473dc2014-12-12 15:58:26 -08004766 i->t->t.ctype);
4767}
4768
4769void upb_inttable_iter_setdone(upb_inttable_iter *i) {
4770 i->index = SIZE_MAX;
4771 i->array_part = false;
4772}
4773
4774bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
4775 const upb_inttable_iter *i2) {
4776 if (upb_inttable_done(i1) && upb_inttable_done(i2))
4777 return true;
4778 return i1->t == i2->t && i1->index == i2->index &&
4779 i1->array_part == i2->array_part;
4780}
4781
4782#ifdef UPB_UNALIGNED_READS_OK
Josh Habermane8ed0212015-06-08 17:56:03 -07004783/* -----------------------------------------------------------------------------
4784 * MurmurHash2, by Austin Appleby (released as public domain).
4785 * Reformatted and C99-ified by Joshua Haberman.
4786 * Note - This code makes a few assumptions about how your machine behaves -
4787 * 1. We can read a 4-byte value from any address without crashing
4788 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
4789 * And it has a few limitations -
4790 * 1. It will not work incrementally.
4791 * 2. It will not produce the same results on little-endian and big-endian
4792 * machines. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004793uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004794 /* 'm' and 'r' are mixing constants generated offline.
4795 * They're not really 'magic', they just happen to work well. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004796 const uint32_t m = 0x5bd1e995;
4797 const int32_t r = 24;
4798
Josh Habermane8ed0212015-06-08 17:56:03 -07004799 /* Initialize the hash to a 'random' value */
Chris Fallin91473dc2014-12-12 15:58:26 -08004800 uint32_t h = seed ^ len;
4801
Josh Habermane8ed0212015-06-08 17:56:03 -07004802 /* Mix 4 bytes at a time into the hash */
Chris Fallin91473dc2014-12-12 15:58:26 -08004803 const uint8_t * data = (const uint8_t *)key;
4804 while(len >= 4) {
4805 uint32_t k = *(uint32_t *)data;
4806
4807 k *= m;
4808 k ^= k >> r;
4809 k *= m;
4810
4811 h *= m;
4812 h ^= k;
4813
4814 data += 4;
4815 len -= 4;
4816 }
4817
Josh Habermane8ed0212015-06-08 17:56:03 -07004818 /* Handle the last few bytes of the input array */
Chris Fallin91473dc2014-12-12 15:58:26 -08004819 switch(len) {
4820 case 3: h ^= data[2] << 16;
4821 case 2: h ^= data[1] << 8;
4822 case 1: h ^= data[0]; h *= m;
4823 };
4824
Josh Habermane8ed0212015-06-08 17:56:03 -07004825 /* Do a few final mixes of the hash to ensure the last few
4826 * bytes are well-incorporated. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004827 h ^= h >> 13;
4828 h *= m;
4829 h ^= h >> 15;
4830
4831 return h;
4832}
4833
Josh Habermane8ed0212015-06-08 17:56:03 -07004834#else /* !UPB_UNALIGNED_READS_OK */
Chris Fallin91473dc2014-12-12 15:58:26 -08004835
Josh Habermane8ed0212015-06-08 17:56:03 -07004836/* -----------------------------------------------------------------------------
4837 * MurmurHashAligned2, by Austin Appleby
4838 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
4839 * on certain platforms.
4840 * Performance will be lower than MurmurHash2 */
Chris Fallin91473dc2014-12-12 15:58:26 -08004841
4842#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
4843
4844uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
4845 const uint32_t m = 0x5bd1e995;
4846 const int32_t r = 24;
4847 const uint8_t * data = (const uint8_t *)key;
4848 uint32_t h = seed ^ len;
4849 uint8_t align = (uintptr_t)data & 3;
4850
4851 if(align && (len >= 4)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004852 /* Pre-load the temp registers */
Chris Fallin91473dc2014-12-12 15:58:26 -08004853 uint32_t t = 0, d = 0;
Josh Habermane8ed0212015-06-08 17:56:03 -07004854 int32_t sl;
4855 int32_t sr;
Chris Fallin91473dc2014-12-12 15:58:26 -08004856
4857 switch(align) {
4858 case 1: t |= data[2] << 16;
4859 case 2: t |= data[1] << 8;
4860 case 3: t |= data[0];
4861 }
4862
4863 t <<= (8 * align);
4864
4865 data += 4-align;
4866 len -= 4-align;
4867
Josh Habermane8ed0212015-06-08 17:56:03 -07004868 sl = 8 * (4-align);
4869 sr = 8 * align;
Chris Fallin91473dc2014-12-12 15:58:26 -08004870
Josh Habermane8ed0212015-06-08 17:56:03 -07004871 /* Mix */
Chris Fallin91473dc2014-12-12 15:58:26 -08004872
4873 while(len >= 4) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004874 uint32_t k;
4875
Chris Fallin91473dc2014-12-12 15:58:26 -08004876 d = *(uint32_t *)data;
4877 t = (t >> sr) | (d << sl);
4878
Josh Habermane8ed0212015-06-08 17:56:03 -07004879 k = t;
Chris Fallin91473dc2014-12-12 15:58:26 -08004880
4881 MIX(h,k,m);
4882
4883 t = d;
4884
4885 data += 4;
4886 len -= 4;
4887 }
4888
Josh Habermane8ed0212015-06-08 17:56:03 -07004889 /* Handle leftover data in temp registers */
Chris Fallin91473dc2014-12-12 15:58:26 -08004890
4891 d = 0;
4892
4893 if(len >= align) {
Josh Habermane8ed0212015-06-08 17:56:03 -07004894 uint32_t k;
4895
Chris Fallin91473dc2014-12-12 15:58:26 -08004896 switch(align) {
4897 case 3: d |= data[2] << 16;
4898 case 2: d |= data[1] << 8;
4899 case 1: d |= data[0];
4900 }
4901
Josh Habermane8ed0212015-06-08 17:56:03 -07004902 k = (t >> sr) | (d << sl);
Chris Fallin91473dc2014-12-12 15:58:26 -08004903 MIX(h,k,m);
4904
4905 data += align;
4906 len -= align;
4907
Josh Habermane8ed0212015-06-08 17:56:03 -07004908 /* ----------
4909 * Handle tail bytes */
Chris Fallin91473dc2014-12-12 15:58:26 -08004910
4911 switch(len) {
4912 case 3: h ^= data[2] << 16;
4913 case 2: h ^= data[1] << 8;
4914 case 1: h ^= data[0]; h *= m;
4915 };
4916 } else {
4917 switch(len) {
4918 case 3: d |= data[2] << 16;
4919 case 2: d |= data[1] << 8;
4920 case 1: d |= data[0];
4921 case 0: h ^= (t >> sr) | (d << sl); h *= m;
4922 }
4923 }
4924
4925 h ^= h >> 13;
4926 h *= m;
4927 h ^= h >> 15;
4928
4929 return h;
4930 } else {
4931 while(len >= 4) {
4932 uint32_t k = *(uint32_t *)data;
4933
4934 MIX(h,k,m);
4935
4936 data += 4;
4937 len -= 4;
4938 }
4939
Josh Habermane8ed0212015-06-08 17:56:03 -07004940 /* ----------
4941 * Handle tail bytes */
Chris Fallin91473dc2014-12-12 15:58:26 -08004942
4943 switch(len) {
4944 case 3: h ^= data[2] << 16;
4945 case 2: h ^= data[1] << 8;
4946 case 1: h ^= data[0]; h *= m;
4947 };
4948
4949 h ^= h >> 13;
4950 h *= m;
4951 h ^= h >> 15;
4952
4953 return h;
4954 }
4955}
4956#undef MIX
4957
Josh Habermane8ed0212015-06-08 17:56:03 -07004958#endif /* UPB_UNALIGNED_READS_OK */
Chris Fallin91473dc2014-12-12 15:58:26 -08004959
4960#include <errno.h>
4961#include <stdarg.h>
4962#include <stddef.h>
4963#include <stdint.h>
4964#include <stdio.h>
4965#include <stdlib.h>
4966#include <string.h>
4967
4968bool upb_dumptostderr(void *closure, const upb_status* status) {
4969 UPB_UNUSED(closure);
4970 fprintf(stderr, "%s\n", upb_status_errmsg(status));
4971 return false;
4972}
4973
Josh Habermane8ed0212015-06-08 17:56:03 -07004974/* Guarantee null-termination and provide ellipsis truncation.
4975 * It may be tempting to "optimize" this by initializing these final
4976 * four bytes up-front and then being careful never to overwrite them,
4977 * this is safer and simpler. */
Chris Fallin91473dc2014-12-12 15:58:26 -08004978static void nullz(upb_status *status) {
4979 const char *ellipsis = "...";
4980 size_t len = strlen(ellipsis);
4981 assert(sizeof(status->msg) > len);
4982 memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
4983}
4984
4985void upb_status_clear(upb_status *status) {
Chris Fallin97b663a2015-01-09 16:15:22 -08004986 if (!status) return;
4987 status->ok_ = true;
4988 status->code_ = 0;
4989 status->msg[0] = '\0';
Chris Fallin91473dc2014-12-12 15:58:26 -08004990}
4991
4992bool upb_ok(const upb_status *status) { return status->ok_; }
4993
4994upb_errorspace *upb_status_errspace(const upb_status *status) {
4995 return status->error_space_;
4996}
4997
4998int upb_status_errcode(const upb_status *status) { return status->code_; }
4999
5000const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
5001
5002void upb_status_seterrmsg(upb_status *status, const char *msg) {
5003 if (!status) return;
5004 status->ok_ = false;
5005 strncpy(status->msg, msg, sizeof(status->msg));
5006 nullz(status);
5007}
5008
5009void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
5010 va_list args;
5011 va_start(args, fmt);
5012 upb_status_vseterrf(status, fmt, args);
5013 va_end(args);
5014}
5015
5016void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
5017 if (!status) return;
5018 status->ok_ = false;
Josh Habermane8ed0212015-06-08 17:56:03 -07005019 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
Chris Fallin91473dc2014-12-12 15:58:26 -08005020 nullz(status);
5021}
5022
5023void upb_status_seterrcode(upb_status *status, upb_errorspace *space,
5024 int code) {
5025 if (!status) return;
5026 status->ok_ = false;
5027 status->error_space_ = space;
5028 status->code_ = code;
5029 space->set_message(status, code);
5030}
5031
5032void upb_status_copy(upb_status *to, const upb_status *from) {
5033 if (!to) return;
5034 *to = *from;
5035}
Josh Habermane8ed0212015-06-08 17:56:03 -07005036/* This file was generated by upbc (the upb compiler).
5037 * Do not edit -- your changes will be discarded when the file is
5038 * regenerated. */
Chris Fallin91473dc2014-12-12 15:58:26 -08005039
5040
Josh Haberman78da6662016-01-13 19:05:43 -08005041static const upb_msgdef msgs[22];
5042static const upb_fielddef fields[105];
5043static const upb_enumdef enums[5];
5044static const upb_tabent strentries[268];
5045static const upb_tabent intentries[18];
Josh Habermanf654d492016-02-18 11:07:51 -08005046static const upb_tabval arrays[184];
Chris Fallin91473dc2014-12-12 15:58:26 -08005047
5048#ifdef UPB_DEBUG_REFS
Josh Haberman78da6662016-01-13 19:05:43 -08005049static upb_inttable reftables[266];
Chris Fallin91473dc2014-12-12 15:58:26 -08005050#endif
5051
Josh Haberman78da6662016-01-13 19:05:43 -08005052static const upb_msgdef msgs[22] = {
5053 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 40, 8, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[0]),&reftables[0], &reftables[1]),
5054 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]),&reftables[2], &reftables[3]),
5055 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ReservedRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[14], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[20]),&reftables[4], &reftables[5]),
5056 UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[17], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[24]),&reftables[6], &reftables[7]),
Josh Habermanf654d492016-02-18 11:07:51 -08005057 UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[21], 4, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]),&reftables[8], &reftables[9]),
5058 UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[25], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[32]),&reftables[10], &reftables[11]),
5059 UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[29], 2, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[36]),&reftables[12], &reftables[13]),
5060 UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 23, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[40]),&reftables[14], &reftables[15]),
5061 UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 12, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[42], 11, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[56]),&reftables[16], &reftables[17]),
5062 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 42, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[53], 13, 12), UPB_STRTABLE_INIT(12, 15, UPB_CTYPE_PTR, 4, &strentries[72]),&reftables[18], &reftables[19]),
5063 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[66], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[88]),&reftables[20], &reftables[21]),
5064 UPB_MSGDEF_INIT("google.protobuf.FileOptions", 31, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[68], 39, 15), UPB_STRTABLE_INIT(16, 31, UPB_CTYPE_PTR, 5, &strentries[92]),&reftables[22], &reftables[23]),
5065 UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 10, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[107], 8, 4), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[124]),&reftables[24], &reftables[25]),
5066 UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 15, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[115], 7, 6), UPB_STRTABLE_INIT(6, 7, UPB_CTYPE_PTR, 3, &strentries[132]),&reftables[26], &reftables[27]),
5067 UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 7, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[10], &arrays[122], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[140]),&reftables[28], &reftables[29]),
5068 UPB_MSGDEF_INIT("google.protobuf.OneofDescriptorProto", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[123], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[144]),&reftables[30], &reftables[31]),
5069 UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[125], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[148]),&reftables[32], &reftables[33]),
5070 UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 7, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[14], &arrays[129], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[152]),&reftables[34], &reftables[35]),
5071 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[130], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[156]),&reftables[36], &reftables[37]),
5072 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 19, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[132], 7, 5), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[160]),&reftables[38], &reftables[39]),
5073 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[139], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[168]),&reftables[40], &reftables[41]),
5074 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[148], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[184]),&reftables[42], &reftables[43]),
Chris Fallin91473dc2014-12-12 15:58:26 -08005075};
5076
Josh Haberman78da6662016-01-13 19:05:43 -08005077static const upb_fielddef fields[105] = {
5078 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[20], NULL, 15, 6, {0},&reftables[44], &reftables[45]),
5079 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[4], NULL, 6, 1, {0},&reftables[46], &reftables[47]),
5080 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_enable_arenas", 31, &msgs[11], NULL, 23, 12, {0},&reftables[48], &reftables[49]),
5081 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[11], NULL, 17, 6, {0},&reftables[50], &reftables[51]),
5082 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "client_streaming", 5, &msgs[13], NULL, 13, 4, {0},&reftables[52], &reftables[53]),
5083 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "csharp_namespace", 37, &msgs[11], NULL, 27, 14, {0},&reftables[54], &reftables[55]),
5084 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[8], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[56], &reftables[57]),
5085 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[7], NULL, 16, 7, {0},&reftables[58], &reftables[59]),
5086 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[9], NULL, 30, 8, {0},&reftables[60], &reftables[61]),
5087 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 1, &msgs[6], NULL, 6, 1, {0},&reftables[62], &reftables[63]),
5088 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[4], NULL, 7, 2, {0},&reftables[64], &reftables[65]),
5089 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[17], NULL, 6, 1, {0},&reftables[66], &reftables[67]),
5090 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[8], NULL, 8, 3, {0},&reftables[68], &reftables[69]),
5091 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[14], NULL, 6, 1, {0},&reftables[70], &reftables[71]),
5092 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[12], NULL, 8, 3, {0},&reftables[72], &reftables[73]),
5093 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 23, &msgs[11], NULL, 21, 10, {0},&reftables[74], &reftables[75]),
5094 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[20], NULL, 11, 4, {0},&reftables[76], &reftables[77]),
5095 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[2], NULL, 3, 1, {0},&reftables[78], &reftables[79]),
5096 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[80], &reftables[81]),
5097 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[9], (const upb_def*)(&msgs[3]), 13, 1, {0},&reftables[82], &reftables[83]),
5098 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[3]), 18, 2, {0},&reftables[84], &reftables[85]),
5099 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[7], NULL, 7, 2, {0},&reftables[86], &reftables[87]),
5100 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[7]), 24, 4, {0},&reftables[88], &reftables[89]),
5101 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[9], (const upb_def*)(&msgs[7]), 19, 3, {0},&reftables[90], &reftables[91]),
5102 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 21, 3, {0},&reftables[92], &reftables[93]),
5103 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[7]), 12, 0, {0},&reftables[94], &reftables[95]),
5104 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[10], (const upb_def*)(&msgs[9]), 5, 0, {0},&reftables[96], &reftables[97]),
5105 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[11], NULL, 14, 5, {0},&reftables[98], &reftables[99]),
5106 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[20], NULL, 6, 1, {0},&reftables[100], &reftables[101]),
5107 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[13], NULL, 7, 2, {0},&reftables[102], &reftables[103]),
5108 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[21], NULL, 5, 1, {0},&reftables[104], &reftables[105]),
5109 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[11], NULL, 20, 9, {0},&reftables[106], &reftables[107]),
5110 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[11], NULL, 18, 7, {0},&reftables[108], &reftables[109]),
5111 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[11], NULL, 13, 4, {0},&reftables[110], &reftables[111]),
5112 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[11], NULL, 9, 2, {0},&reftables[112], &reftables[113]),
5113 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[11], NULL, 6, 1, {0},&reftables[114], &reftables[115]),
5114 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_string_check_utf8", 27, &msgs[11], NULL, 22, 11, {0},&reftables[116], &reftables[117]),
5115 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "javanano_use_deprecated_package", 38, &msgs[11], NULL, 30, 15, {0},&reftables[118], &reftables[119]),
5116 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "json_name", 10, &msgs[7], NULL, 20, 9, {0},&reftables[120], &reftables[121]),
5117 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "jstype", 6, &msgs[8], (const upb_def*)(&enums[3]), 10, 5, {0},&reftables[122], &reftables[123]),
5118 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[7], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[124], &reftables[125]),
5119 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[8], NULL, 9, 4, {0},&reftables[126], &reftables[127]),
5120 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[19], NULL, 8, 2, {0},&reftables[128], &reftables[129]),
5121 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "leading_detached_comments", 6, &msgs[19], NULL, 16, 4, {0},&reftables[130], &reftables[131]),
5122 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[132], &reftables[133]),
5123 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "map_entry", 7, &msgs[12], NULL, 9, 4, {0},&reftables[134], &reftables[135]),
5124 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[12], NULL, 6, 1, {0},&reftables[136], &reftables[137]),
5125 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[9], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[138], &reftables[139]),
5126 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[16], (const upb_def*)(&msgs[13]), 6, 0, {0},&reftables[140], &reftables[141]),
5127 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[9], NULL, 22, 6, {0},&reftables[142], &reftables[143]),
5128 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[5], NULL, 4, 1, {0},&reftables[144], &reftables[145]),
5129 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[13], NULL, 4, 1, {0},&reftables[146], &reftables[147]),
5130 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 32, 8, {0},&reftables[148], &reftables[149]),
5131 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[3], NULL, 8, 2, {0},&reftables[150], &reftables[151]),
5132 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[15], NULL, 2, 0, {0},&reftables[152], &reftables[153]),
5133 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[16], NULL, 8, 2, {0},&reftables[154], &reftables[155]),
5134 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[7], NULL, 4, 1, {0},&reftables[156], &reftables[157]),
5135 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[20], (const upb_def*)(&msgs[21]), 5, 0, {0},&reftables[158], &reftables[159]),
5136 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[21], NULL, 2, 0, {0},&reftables[160], &reftables[161]),
5137 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[20], NULL, 10, 3, {0},&reftables[162], &reftables[163]),
5138 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 15, 1, {0},&reftables[164], &reftables[165]),
5139 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[12], NULL, 7, 2, {0},&reftables[166], &reftables[167]),
5140 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[7], NULL, 10, 3, {0},&reftables[168], &reftables[169]),
5141 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[5], NULL, 7, 2, {0},&reftables[170], &reftables[171]),
5142 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "objc_class_prefix", 36, &msgs[11], NULL, 24, 13, {0},&reftables[172], &reftables[173]),
5143 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "oneof_decl", 8, &msgs[0], (const upb_def*)(&msgs[15]), 28, 6, {0},&reftables[174], &reftables[175]),
5144 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "oneof_index", 9, &msgs[7], NULL, 19, 8, {0},&reftables[176], &reftables[177]),
5145 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[11], (const upb_def*)(&enums[4]), 12, 3, {0},&reftables[178], &reftables[179]),
5146 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[13], (const upb_def*)(&msgs[14]), 3, 0, {0},&reftables[180], &reftables[181]),
5147 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[3], (const upb_def*)(&msgs[4]), 7, 1, {0},&reftables[182], &reftables[183]),
5148 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[12]), 25, 5, {0},&reftables[184], &reftables[185]),
5149 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[5], (const upb_def*)(&msgs[6]), 3, 0, {0},&reftables[186], &reftables[187]),
5150 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[9], (const upb_def*)(&msgs[11]), 20, 4, {0},&reftables[188], &reftables[189]),
5151 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[16], (const upb_def*)(&msgs[17]), 7, 1, {0},&reftables[190], &reftables[191]),
5152 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[7], (const upb_def*)(&msgs[8]), 3, 0, {0},&reftables[192], &reftables[193]),
5153 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[13], NULL, 10, 3, {0},&reftables[194], &reftables[195]),
5154 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[9], NULL, 25, 7, {0},&reftables[196], &reftables[197]),
5155 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[8], NULL, 7, 2, {0},&reftables[198], &reftables[199]),
5156 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[19], NULL, 4, 0, {0},&reftables[200], &reftables[201]),
5157 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[20], NULL, 9, 2, {0},&reftables[202], &reftables[203]),
5158 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[9], NULL, 35, 9, {0},&reftables[204], &reftables[205]),
5159 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[11], NULL, 19, 8, {0},&reftables[206], &reftables[207]),
5160 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "reserved_name", 10, &msgs[0], NULL, 37, 9, {0},&reftables[208], &reftables[209]),
5161 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "reserved_range", 9, &msgs[0], (const upb_def*)(&msgs[2]), 31, 7, {0},&reftables[210], &reftables[211]),
5162 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "server_streaming", 6, &msgs[13], NULL, 14, 5, {0},&reftables[212], &reftables[213]),
5163 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[9], (const upb_def*)(&msgs[16]), 16, 2, {0},&reftables[214], &reftables[215]),
5164 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[9], (const upb_def*)(&msgs[18]), 21, 5, {0},&reftables[216], &reftables[217]),
5165 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[19], NULL, 7, 1, {0},&reftables[218], &reftables[219]),
5166 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[220], &reftables[221]),
5167 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[2], NULL, 2, 0, {0},&reftables[222], &reftables[223]),
5168 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[20], NULL, 12, 5, {0},&reftables[224], &reftables[225]),
5169 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "syntax", 12, &msgs[9], NULL, 39, 11, {0},&reftables[226], &reftables[227]),
5170 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[19], NULL, 11, 3, {0},&reftables[228], &reftables[229]),
5171 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[7], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[230], &reftables[231]),
5172 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[7], NULL, 13, 6, {0},&reftables[232], &reftables[233]),
5173 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[17], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[234], &reftables[235]),
5174 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[12], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[236], &reftables[237]),
5175 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[8], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[238], &reftables[239]),
5176 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[14], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[240], &reftables[241]),
5177 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[242], &reftables[243]),
5178 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[4], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[244], &reftables[245]),
5179 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[6], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[246], &reftables[247]),
5180 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[3], (const upb_def*)(&msgs[5]), 6, 0, {0},&reftables[248], &reftables[249]),
5181 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[8], NULL, 11, 6, {0},&reftables[250], &reftables[251]),
5182 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[9], NULL, 38, 10, {0},&reftables[252], &reftables[253]),
Chris Fallin91473dc2014-12-12 15:58:26 -08005183};
5184
Josh Haberman78da6662016-01-13 19:05:43 -08005185static const upb_enumdef enums[5] = {
Josh Habermanf654d492016-02-18 11:07:51 -08005186 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[188]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[151], 4, 3), 0, &reftables[254], &reftables[255]),
5187 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[192]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[155], 19, 18), 0, &reftables[256], &reftables[257]),
5188 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[224]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[174], 3, 3), 0, &reftables[258], &reftables[259]),
5189 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.JSType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[228]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[177], 3, 3), 0, &reftables[260], &reftables[261]),
5190 UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[232]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[180], 4, 3), 0, &reftables[262], &reftables[263]),
Chris Fallin91473dc2014-12-12 15:58:26 -08005191};
5192
Josh Haberman78da6662016-01-13 19:05:43 -08005193static const upb_tabent strentries[268] = {
5194 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005195 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005196 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "reserved_name"), UPB_TABVALUE_PTR_INIT(&fields[82]), NULL},
5197 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005198 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5199 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5200 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005201 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[25]), &strentries[12]},
5202 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[24]), &strentries[14]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005203 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005204 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
5205 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5206 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "reserved_range"), UPB_TABVALUE_PTR_INIT(&fields[83]), NULL},
5207 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
5208 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "oneof_decl"), UPB_TABVALUE_PTR_INIT(&fields[65]), NULL},
5209 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), &strentries[13]},
5210 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[88]), NULL},
5211 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
5212 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5213 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5214 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[89]), NULL},
5215 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005216 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5217 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5218 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005219 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
5220 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[69]), NULL},
5221 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[26]},
5222 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
5223 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005224 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
5225 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005226 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005227 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005228 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5229 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[50]), &strentries[34]},
5230 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
5231 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[9]), NULL},
5232 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5233 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5234 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "oneof_index"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
5235 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[40]), NULL},
5236 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5237 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005238 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5239 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5240 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5241 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005242 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[62]), &strentries[53]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005243 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005244 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
5245 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[94]), NULL},
5246 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "json_name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
5247 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[93]), &strentries[50]},
5248 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
5249 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5250 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
5251 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5252 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005253 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5254 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5255 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5256 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005257 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5258 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
5259 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5260 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
5261 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5262 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "jstype"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
5263 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[12]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005264 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5265 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005266 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[23]), NULL},
5267 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[104]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005268 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005269 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
5270 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[85]), NULL},
5271 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5272 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[86]), NULL},
5273 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5274 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5275 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "syntax"), UPB_TABVALUE_PTR_INIT(&fields[91]), NULL},
5276 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
5277 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
5278 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5279 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[72]), &strentries[86]},
5280 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
5281 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), &strentries[85]},
5282 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5283 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005284 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5285 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5286 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5287 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005288 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
5289 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "csharp_namespace"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005290 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5291 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5292 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5293 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5294 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005295 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5296 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5297 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
5298 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[120]},
5299 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5300 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5301 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
5302 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
5303 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5304 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5305 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5306 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[33]), &strentries[117]},
5307 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5308 {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[32]), &strentries[118]},
5309 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[31]), NULL},
5310 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5311 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "javanano_use_deprecated_package"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[123]},
5312 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[81]), NULL},
5313 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[67]), NULL},
5314 {UPB_TABKEY_STR("\026", "\000", "\000", "\000", "java_string_check_utf8"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
5315 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[15]), &strentries[119]},
5316 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "objc_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
5317 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "cc_enable_arenas"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
5318 {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[128]},
5319 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5320 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5321 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5322 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
5323 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
5324 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "map_entry"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
5325 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[61]), NULL},
5326 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5327 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "client_streaming"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
5328 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "server_streaming"), UPB_TABVALUE_PTR_INIT(&fields[84]), NULL},
5329 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
5330 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[29]), NULL},
5331 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5332 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5333 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
5334 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
5335 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005336 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5337 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5338 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5339 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005340 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005341 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[54]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005342 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005343 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[73]), &strentries[150]},
5344 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
5345 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[55]), &strentries[149]},
5346 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[95]), NULL},
5347 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005348 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5349 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5350 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005351 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5352 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
5353 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5354 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5355 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5356 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5357 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[87]), &strentries[167]},
5358 {UPB_TABKEY_STR("\031", "\000", "\000", "\000", "leading_detached_comments"), UPB_TABVALUE_PTR_INIT(&fields[43]), &strentries[165]},
5359 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[92]), NULL},
5360 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[42]), &strentries[164]},
5361 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
5362 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
5363 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5364 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5365 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
5366 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5367 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5368 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5369 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005370 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
5371 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5372 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5373 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5374 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005375 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
5376 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
5377 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[90]), &strentries[182]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005378 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5379 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005380 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
5381 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
5382 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[190]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005383 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5384 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
5385 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
5386 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
5387 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5388 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5389 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5390 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5391 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005392 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[221]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005393 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
5394 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5395 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
5396 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
5397 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
5398 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005399 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[222]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005400 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5401 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005402 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[219]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005403 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5404 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5405 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5406 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5407 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
5408 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
5409 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005410 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[218]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005411 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5412 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
5413 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
5414 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
5415 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
5416 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
5417 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
5418 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5419 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005420 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[225]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005421 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005422 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5423 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NORMAL"), UPB_TABVALUE_INT_INIT(0), NULL},
5424 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NUMBER"), UPB_TABVALUE_INT_INIT(2), NULL},
5425 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_STRING"), UPB_TABVALUE_INT_INIT(1), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005426 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005427 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[235]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005428 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5429 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
5430 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5431 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005432 {UPB_TABKEY_STR("\047", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo.Location"), UPB_TABVALUE_PTR_INIT(&msgs[19]), NULL},
5433 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.UninterpretedOption"), UPB_TABVALUE_PTR_INIT(&msgs[20]), NULL},
5434 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FileDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[9]), NULL},
5435 {UPB_TABKEY_STR("\045", "\000", "\000", "\000", "google.protobuf.MethodDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[13]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005436 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005437 {UPB_TABKEY_STR("\040", "\000", "\000", "\000", "google.protobuf.EnumValueOptions"), UPB_TABVALUE_PTR_INIT(&msgs[6]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005438 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5439 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005440 {UPB_TABKEY_STR("\055", "\000", "\000", "\000", "google.protobuf.DescriptorProto.ReservedRange"), UPB_TABVALUE_PTR_INIT(&msgs[2]), NULL},
5441 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "google.protobuf.DescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[0]), &strentries[248]},
5442 {UPB_TABKEY_STR("\041", "\000", "\000", "\000", "google.protobuf.FileDescriptorSet"), UPB_TABVALUE_PTR_INIT(&msgs[10]), &strentries[267]},
5443 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo"), UPB_TABVALUE_PTR_INIT(&msgs[18]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005444 {UPB_TABKEY_STR("\051", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Type"), UPB_TABVALUE_PTR_INIT(&enums[1]), NULL},
5445 {UPB_TABKEY_STR("\056", "\000", "\000", "\000", "google.protobuf.DescriptorProto.ExtensionRange"), UPB_TABVALUE_PTR_INIT(&msgs[1]), NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005446 {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.OneofDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[15]), NULL},
5447 {UPB_TABKEY_STR("\046", "\000", "\000", "\000", "google.protobuf.ServiceDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[16]), NULL},
5448 {UPB_TABKEY_STR("\034", "\000", "\000", "\000", "google.protobuf.FieldOptions"), UPB_TABVALUE_PTR_INIT(&msgs[8]), NULL},
5449 {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.FileOptions"), UPB_TABVALUE_PTR_INIT(&msgs[11]), NULL},
5450 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.EnumDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[3]), &strentries[265]},
Josh Habermane8ed0212015-06-08 17:56:03 -07005451 {UPB_TABKEY_STR("\052", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Label"), UPB_TABVALUE_PTR_INIT(&enums[0]), NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005452 {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.FileOptions.OptimizeMode"), UPB_TABVALUE_PTR_INIT(&enums[4]), NULL},
5453 {UPB_TABKEY_STR("\042", "\000", "\000", "\000", "google.protobuf.FieldOptions.CType"), UPB_TABVALUE_PTR_INIT(&enums[2]), &strentries[261]},
5454 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FieldOptions.JSType"), UPB_TABVALUE_PTR_INIT(&enums[3]), NULL},
5455 {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.EnumOptions"), UPB_TABVALUE_PTR_INIT(&msgs[4]), NULL},
5456 {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[7]), NULL},
5457 {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.EnumValueDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[5]), &strentries[258]},
5458 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.ServiceOptions"), UPB_TABVALUE_PTR_INIT(&msgs[17]), NULL},
5459 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.MessageOptions"), UPB_TABVALUE_PTR_INIT(&msgs[12]), NULL},
5460 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "google.protobuf.MethodOptions"), UPB_TABVALUE_PTR_INIT(&msgs[14]), &strentries[253]},
5461 {UPB_TABKEY_STR("\054", "\000", "\000", "\000", "google.protobuf.UninterpretedOption.NamePart"), UPB_TABVALUE_PTR_INIT(&msgs[21]), NULL},
Chris Fallin91473dc2014-12-12 15:58:26 -08005462};
5463
Josh Haberman78da6662016-01-13 19:05:43 -08005464static const upb_tabent intentries[18] = {
Josh Habermane8ed0212015-06-08 17:56:03 -07005465 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005466 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005467 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005468 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005469 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005470 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005471 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005472 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005473 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005474 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005475 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005476 {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
Josh Habermane8ed0212015-06-08 17:56:03 -07005477 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
Josh Haberman78da6662016-01-13 19:05:43 -08005478 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
5479 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5480 {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
5481 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5482 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[95]), NULL},
Chris Fallin91473dc2014-12-12 15:58:26 -08005483};
5484
Josh Habermanf654d492016-02-18 11:07:51 -08005485static const upb_tabval arrays[184] = {
Josh Habermane8ed0212015-06-08 17:56:03 -07005486 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005487 UPB_TABVALUE_PTR_INIT(&fields[52]),
5488 UPB_TABVALUE_PTR_INIT(&fields[25]),
5489 UPB_TABVALUE_PTR_INIT(&fields[60]),
5490 UPB_TABVALUE_PTR_INIT(&fields[20]),
5491 UPB_TABVALUE_PTR_INIT(&fields[24]),
5492 UPB_TABVALUE_PTR_INIT(&fields[22]),
5493 UPB_TABVALUE_PTR_INIT(&fields[70]),
5494 UPB_TABVALUE_PTR_INIT(&fields[65]),
5495 UPB_TABVALUE_PTR_INIT(&fields[83]),
5496 UPB_TABVALUE_PTR_INIT(&fields[82]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005497 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005498 UPB_TABVALUE_PTR_INIT(&fields[88]),
5499 UPB_TABVALUE_PTR_INIT(&fields[18]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005500 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005501 UPB_TABVALUE_PTR_INIT(&fields[89]),
5502 UPB_TABVALUE_PTR_INIT(&fields[17]),
5503 UPB_TABVALUE_EMPTY_INIT,
5504 UPB_TABVALUE_PTR_INIT(&fields[53]),
5505 UPB_TABVALUE_PTR_INIT(&fields[102]),
5506 UPB_TABVALUE_PTR_INIT(&fields[69]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005507 UPB_TABVALUE_EMPTY_INIT,
5508 UPB_TABVALUE_EMPTY_INIT,
5509 UPB_TABVALUE_PTR_INIT(&fields[1]),
Josh Haberman78da6662016-01-13 19:05:43 -08005510 UPB_TABVALUE_PTR_INIT(&fields[10]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005511 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005512 UPB_TABVALUE_PTR_INIT(&fields[50]),
5513 UPB_TABVALUE_PTR_INIT(&fields[63]),
5514 UPB_TABVALUE_PTR_INIT(&fields[71]),
5515 UPB_TABVALUE_EMPTY_INIT,
5516 UPB_TABVALUE_PTR_INIT(&fields[9]),
5517 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005518 UPB_TABVALUE_PTR_INIT(&fields[56]),
5519 UPB_TABVALUE_PTR_INIT(&fields[21]),
5520 UPB_TABVALUE_PTR_INIT(&fields[62]),
5521 UPB_TABVALUE_PTR_INIT(&fields[40]),
5522 UPB_TABVALUE_PTR_INIT(&fields[93]),
5523 UPB_TABVALUE_PTR_INIT(&fields[94]),
5524 UPB_TABVALUE_PTR_INIT(&fields[7]),
5525 UPB_TABVALUE_PTR_INIT(&fields[74]),
5526 UPB_TABVALUE_PTR_INIT(&fields[66]),
5527 UPB_TABVALUE_PTR_INIT(&fields[38]),
5528 UPB_TABVALUE_EMPTY_INIT,
5529 UPB_TABVALUE_PTR_INIT(&fields[6]),
5530 UPB_TABVALUE_PTR_INIT(&fields[77]),
5531 UPB_TABVALUE_PTR_INIT(&fields[12]),
5532 UPB_TABVALUE_EMPTY_INIT,
Josh Habermane8ed0212015-06-08 17:56:03 -07005533 UPB_TABVALUE_PTR_INIT(&fields[41]),
Josh Haberman78da6662016-01-13 19:05:43 -08005534 UPB_TABVALUE_PTR_INIT(&fields[39]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005535 UPB_TABVALUE_EMPTY_INIT,
5536 UPB_TABVALUE_EMPTY_INIT,
5537 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005538 UPB_TABVALUE_PTR_INIT(&fields[103]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005539 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005540 UPB_TABVALUE_PTR_INIT(&fields[49]),
5541 UPB_TABVALUE_PTR_INIT(&fields[76]),
5542 UPB_TABVALUE_PTR_INIT(&fields[8]),
5543 UPB_TABVALUE_PTR_INIT(&fields[47]),
5544 UPB_TABVALUE_PTR_INIT(&fields[19]),
5545 UPB_TABVALUE_PTR_INIT(&fields[85]),
5546 UPB_TABVALUE_PTR_INIT(&fields[23]),
5547 UPB_TABVALUE_PTR_INIT(&fields[72]),
5548 UPB_TABVALUE_PTR_INIT(&fields[86]),
5549 UPB_TABVALUE_PTR_INIT(&fields[80]),
5550 UPB_TABVALUE_PTR_INIT(&fields[104]),
5551 UPB_TABVALUE_PTR_INIT(&fields[91]),
5552 UPB_TABVALUE_EMPTY_INIT,
5553 UPB_TABVALUE_PTR_INIT(&fields[26]),
5554 UPB_TABVALUE_EMPTY_INIT,
5555 UPB_TABVALUE_PTR_INIT(&fields[35]),
5556 UPB_TABVALUE_EMPTY_INIT,
5557 UPB_TABVALUE_EMPTY_INIT,
5558 UPB_TABVALUE_EMPTY_INIT,
5559 UPB_TABVALUE_EMPTY_INIT,
5560 UPB_TABVALUE_EMPTY_INIT,
5561 UPB_TABVALUE_EMPTY_INIT,
Josh Habermane8ed0212015-06-08 17:56:03 -07005562 UPB_TABVALUE_PTR_INIT(&fields[34]),
Josh Haberman78da6662016-01-13 19:05:43 -08005563 UPB_TABVALUE_PTR_INIT(&fields[67]),
5564 UPB_TABVALUE_PTR_INIT(&fields[33]),
5565 UPB_TABVALUE_PTR_INIT(&fields[27]),
5566 UPB_TABVALUE_EMPTY_INIT,
5567 UPB_TABVALUE_EMPTY_INIT,
5568 UPB_TABVALUE_EMPTY_INIT,
5569 UPB_TABVALUE_EMPTY_INIT,
5570 UPB_TABVALUE_PTR_INIT(&fields[3]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005571 UPB_TABVALUE_PTR_INIT(&fields[32]),
Josh Haberman78da6662016-01-13 19:05:43 -08005572 UPB_TABVALUE_PTR_INIT(&fields[81]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005573 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005574 UPB_TABVALUE_PTR_INIT(&fields[31]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005575 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005576 UPB_TABVALUE_EMPTY_INIT,
5577 UPB_TABVALUE_PTR_INIT(&fields[15]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005578 UPB_TABVALUE_EMPTY_INIT,
5579 UPB_TABVALUE_EMPTY_INIT,
5580 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005581 UPB_TABVALUE_PTR_INIT(&fields[36]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005582 UPB_TABVALUE_EMPTY_INIT,
5583 UPB_TABVALUE_EMPTY_INIT,
5584 UPB_TABVALUE_EMPTY_INIT,
5585 UPB_TABVALUE_PTR_INIT(&fields[2]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005586 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005587 UPB_TABVALUE_EMPTY_INIT,
5588 UPB_TABVALUE_EMPTY_INIT,
5589 UPB_TABVALUE_EMPTY_INIT,
5590 UPB_TABVALUE_PTR_INIT(&fields[64]),
5591 UPB_TABVALUE_PTR_INIT(&fields[5]),
5592 UPB_TABVALUE_PTR_INIT(&fields[37]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005593 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005594 UPB_TABVALUE_PTR_INIT(&fields[46]),
5595 UPB_TABVALUE_PTR_INIT(&fields[61]),
5596 UPB_TABVALUE_PTR_INIT(&fields[14]),
5597 UPB_TABVALUE_EMPTY_INIT,
5598 UPB_TABVALUE_EMPTY_INIT,
5599 UPB_TABVALUE_EMPTY_INIT,
Josh Habermane8ed0212015-06-08 17:56:03 -07005600 UPB_TABVALUE_PTR_INIT(&fields[45]),
5601 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005602 UPB_TABVALUE_PTR_INIT(&fields[51]),
5603 UPB_TABVALUE_PTR_INIT(&fields[29]),
5604 UPB_TABVALUE_PTR_INIT(&fields[75]),
5605 UPB_TABVALUE_PTR_INIT(&fields[68]),
5606 UPB_TABVALUE_PTR_INIT(&fields[4]),
5607 UPB_TABVALUE_PTR_INIT(&fields[84]),
5608 UPB_TABVALUE_EMPTY_INIT,
5609 UPB_TABVALUE_EMPTY_INIT,
Josh Habermane8ed0212015-06-08 17:56:03 -07005610 UPB_TABVALUE_PTR_INIT(&fields[54]),
5611 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005612 UPB_TABVALUE_PTR_INIT(&fields[55]),
5613 UPB_TABVALUE_PTR_INIT(&fields[48]),
5614 UPB_TABVALUE_PTR_INIT(&fields[73]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005615 UPB_TABVALUE_EMPTY_INIT,
5616 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005617 UPB_TABVALUE_PTR_INIT(&fields[44]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005618 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005619 UPB_TABVALUE_PTR_INIT(&fields[78]),
5620 UPB_TABVALUE_PTR_INIT(&fields[87]),
5621 UPB_TABVALUE_PTR_INIT(&fields[42]),
5622 UPB_TABVALUE_PTR_INIT(&fields[92]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005623 UPB_TABVALUE_EMPTY_INIT,
Josh Habermane8ed0212015-06-08 17:56:03 -07005624 UPB_TABVALUE_PTR_INIT(&fields[43]),
Josh Haberman78da6662016-01-13 19:05:43 -08005625 UPB_TABVALUE_EMPTY_INIT,
5626 UPB_TABVALUE_EMPTY_INIT,
5627 UPB_TABVALUE_PTR_INIT(&fields[57]),
5628 UPB_TABVALUE_PTR_INIT(&fields[28]),
5629 UPB_TABVALUE_PTR_INIT(&fields[79]),
5630 UPB_TABVALUE_PTR_INIT(&fields[59]),
5631 UPB_TABVALUE_PTR_INIT(&fields[16]),
5632 UPB_TABVALUE_PTR_INIT(&fields[90]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005633 UPB_TABVALUE_PTR_INIT(&fields[0]),
5634 UPB_TABVALUE_EMPTY_INIT,
Josh Haberman78da6662016-01-13 19:05:43 -08005635 UPB_TABVALUE_PTR_INIT(&fields[58]),
5636 UPB_TABVALUE_PTR_INIT(&fields[30]),
Josh Habermane8ed0212015-06-08 17:56:03 -07005637 UPB_TABVALUE_EMPTY_INIT,
5638 UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
5639 UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
5640 UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
5641 UPB_TABVALUE_EMPTY_INIT,
5642 UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
5643 UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
5644 UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
5645 UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
5646 UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
5647 UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
5648 UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
5649 UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
5650 UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
5651 UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
5652 UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
5653 UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
5654 UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
5655 UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
5656 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
5657 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
5658 UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
5659 UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
5660 UPB_TABVALUE_PTR_INIT("STRING"),
5661 UPB_TABVALUE_PTR_INIT("CORD"),
5662 UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
Josh Haberman78da6662016-01-13 19:05:43 -08005663 UPB_TABVALUE_PTR_INIT("JS_NORMAL"),
5664 UPB_TABVALUE_PTR_INIT("JS_STRING"),
5665 UPB_TABVALUE_PTR_INIT("JS_NUMBER"),
Josh Habermane8ed0212015-06-08 17:56:03 -07005666 UPB_TABVALUE_EMPTY_INIT,
5667 UPB_TABVALUE_PTR_INIT("SPEED"),
5668 UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
5669 UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
Chris Fallin91473dc2014-12-12 15:58:26 -08005670};
5671
Josh Haberman78da6662016-01-13 19:05:43 -08005672static const upb_symtab symtab = UPB_SYMTAB_INIT(UPB_STRTABLE_INIT(27, 31, UPB_CTYPE_PTR, 5, &strentries[236]), &reftables[264], &reftables[265]);
Chris Fallin91473dc2014-12-12 15:58:26 -08005673
5674const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner) {
5675 upb_symtab_ref(&symtab, owner);
5676 return &symtab;
5677}
5678
5679#ifdef UPB_DEBUG_REFS
Josh Haberman78da6662016-01-13 19:05:43 -08005680static upb_inttable reftables[266] = {
5681 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5682 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5683 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5684 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5685 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5686 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5687 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5688 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5689 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5690 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5691 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5692 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5693 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5694 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5695 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5696 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5697 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5698 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5699 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5700 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5701 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5702 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5703 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5704 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5705 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5706 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5707 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5708 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5709 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5710 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5711 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5712 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5713 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5714 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5715 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5716 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5717 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5718 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5719 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5720 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5721 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5722 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5723 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5724 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5725 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5726 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5727 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5728 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5729 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5730 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5731 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5732 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5733 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5734 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
Chris Fallin91473dc2014-12-12 15:58:26 -08005735 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5736 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5737 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5738 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5739 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5740 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5741 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5742 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5743 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5744 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5745 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5746 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5747 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5748 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5749 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5750 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5751 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5752 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5753 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5754 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5755 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5756 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5757 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5758 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5759 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5760 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5761 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5762 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5763 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5764 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5765 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5766 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5767 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5768 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5769 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5770 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5771 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5772 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5773 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5774 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5775 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5776 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5777 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5778 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5779 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5780 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5781 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5782 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5783 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5784 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5785 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5786 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5787 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5788 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5789 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5790 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5791 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5792 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5793 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5794 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5795 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5796 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5797 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5798 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5799 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5800 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5801 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5802 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5803 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5804 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5805 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5806 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5807 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5808 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5809 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5810 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5811 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5812 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5813 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5814 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5815 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5816 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5817 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5818 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5819 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5820 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5821 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5822 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5823 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5824 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5825 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5826 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5827 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5828 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5829 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5830 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5831 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5832 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5833 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5834 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5835 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5836 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5837 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5838 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5839 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5840 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5841 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5842 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5843 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5844 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5845 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5846 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5847 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5848 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5849 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5850 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5851 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5852 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5853 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5854 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5855 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5856 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5857 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5858 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5859 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5860 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5861 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5862 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5863 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5864 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5865 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5866 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5867 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5868 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5869 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5870 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5871 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5872 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5873 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5874 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5875 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5876 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5877 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5878 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5879 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5880 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5881 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5882 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5883 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5884 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5885 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5886 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5887 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5888 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5889 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5890 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5891 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5892 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5893 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5894 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5895 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5896 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5897 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5898 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5899 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5900 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5901 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5902 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5903 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5904 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5905 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5906 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5907 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5908 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5909 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5910 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5911 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5912 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5913 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5914 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5915 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5916 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5917 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5918 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5919 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5920 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5921 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5922 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5923 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5924 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5925 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5926 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5927 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5928 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5929 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5930 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5931 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5932 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5933 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5934 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5935 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5936 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5937 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5938 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5939 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5940 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5941 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5942 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5943 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5944 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5945 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5946 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5947};
5948#endif
5949
5950/*
Josh Haberman181c7f22015-07-15 11:05:10 -07005951** XXX: The routines in this file that consume a string do not currently
5952** support having the string span buffers. In the future, as upb_sink and
5953** its buffering/sharing functionality evolve there should be an easy and
5954** idiomatic way of correctly handling this case. For now, we accept this
5955** limitation since we currently only parse descriptors from single strings.
5956*/
Chris Fallin91473dc2014-12-12 15:58:26 -08005957
5958
5959#include <errno.h>
5960#include <stdlib.h>
5961#include <string.h>
5962
Josh Haberman78da6662016-01-13 19:05:43 -08005963/* Compares a NULL-terminated string with a non-NULL-terminated string. */
5964static bool upb_streq(const char *str, const char *buf, size_t n) {
5965 return strlen(str) == n && memcmp(str, buf, n) == 0;
5966}
5967
Josh Habermane8ed0212015-06-08 17:56:03 -07005968/* upb_deflist is an internal-only dynamic array for storing a growing list of
5969 * upb_defs. */
Chris Fallind3262772015-05-14 18:24:26 -07005970typedef struct {
5971 upb_def **defs;
5972 size_t len;
5973 size_t size;
5974 bool owned;
5975} upb_deflist;
5976
Josh Habermane8ed0212015-06-08 17:56:03 -07005977/* We keep a stack of all the messages scopes we are currently in, as well as
5978 * the top-level file scope. This is necessary to correctly qualify the
5979 * definitions that are contained inside. "name" tracks the name of the
5980 * message or package (a bare name -- not qualified by any enclosing scopes). */
Chris Fallind3262772015-05-14 18:24:26 -07005981typedef struct {
5982 char *name;
Josh Habermane8ed0212015-06-08 17:56:03 -07005983 /* Index of the first def that is under this scope. For msgdefs, the
5984 * msgdef itself is at start-1. */
Chris Fallind3262772015-05-14 18:24:26 -07005985 int start;
5986} upb_descreader_frame;
5987
Josh Habermane8ed0212015-06-08 17:56:03 -07005988/* The maximum number of nested declarations that are allowed, ie.
5989 * message Foo {
5990 * message Bar {
5991 * message Baz {
5992 * }
5993 * }
5994 * }
5995 *
5996 * This is a resource limit that affects how big our runtime stack can grow.
5997 * TODO: make this a runtime-settable property of the Reader instance. */
Chris Fallind3262772015-05-14 18:24:26 -07005998#define UPB_MAX_MESSAGE_NESTING 64
5999
6000struct upb_descreader {
6001 upb_sink sink;
6002 upb_deflist defs;
6003 upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
6004 int stack_len;
6005
Josh Haberman78da6662016-01-13 19:05:43 -08006006 bool primitives_have_presence;
6007 int file_start;
6008
Chris Fallind3262772015-05-14 18:24:26 -07006009 uint32_t number;
6010 char *name;
6011 bool saw_number;
6012 bool saw_name;
6013
6014 char *default_string;
6015
6016 upb_fielddef *f;
6017};
6018
Chris Fallin91473dc2014-12-12 15:58:26 -08006019static char *upb_strndup(const char *buf, size_t n) {
6020 char *ret = malloc(n + 1);
6021 if (!ret) return NULL;
6022 memcpy(ret, buf, n);
6023 ret[n] = '\0';
6024 return ret;
6025}
6026
Josh Habermane8ed0212015-06-08 17:56:03 -07006027/* Returns a newly allocated string that joins input strings together, for
6028 * example:
6029 * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
6030 * join("", "Baz") -> "Baz"
6031 * Caller owns a ref on the returned string. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006032static char *upb_join(const char *base, const char *name) {
6033 if (!base || strlen(base) == 0) {
6034 return upb_strdup(name);
6035 } else {
6036 char *ret = malloc(strlen(base) + strlen(name) + 2);
6037 ret[0] = '\0';
6038 strcat(ret, base);
6039 strcat(ret, ".");
6040 strcat(ret, name);
6041 return ret;
6042 }
6043}
6044
6045
6046/* upb_deflist ****************************************************************/
6047
6048void upb_deflist_init(upb_deflist *l) {
6049 l->size = 0;
6050 l->defs = NULL;
6051 l->len = 0;
6052 l->owned = true;
6053}
6054
6055void upb_deflist_uninit(upb_deflist *l) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006056 size_t i;
Chris Fallin91473dc2014-12-12 15:58:26 -08006057 if (l->owned)
Josh Habermane8ed0212015-06-08 17:56:03 -07006058 for(i = 0; i < l->len; i++)
Chris Fallin91473dc2014-12-12 15:58:26 -08006059 upb_def_unref(l->defs[i], l);
6060 free(l->defs);
6061}
6062
6063bool upb_deflist_push(upb_deflist *l, upb_def *d) {
6064 if(++l->len >= l->size) {
6065 size_t new_size = UPB_MAX(l->size, 4);
6066 new_size *= 2;
6067 l->defs = realloc(l->defs, new_size * sizeof(void *));
6068 if (!l->defs) return false;
6069 l->size = new_size;
6070 }
6071 l->defs[l->len - 1] = d;
6072 return true;
6073}
6074
6075void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006076 size_t i;
Chris Fallin91473dc2014-12-12 15:58:26 -08006077 assert(l->owned);
Josh Habermane8ed0212015-06-08 17:56:03 -07006078 for (i = 0; i < l->len; i++)
Chris Fallin91473dc2014-12-12 15:58:26 -08006079 upb_def_donateref(l->defs[i], l, owner);
6080 l->owned = false;
6081}
6082
6083static upb_def *upb_deflist_last(upb_deflist *l) {
6084 return l->defs[l->len-1];
6085}
6086
Josh Habermane8ed0212015-06-08 17:56:03 -07006087/* Qualify the defname for all defs starting with offset "start" with "str". */
Chris Fallin91473dc2014-12-12 15:58:26 -08006088static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006089 uint32_t i;
6090 for (i = start; i < l->len; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006091 upb_def *def = l->defs[i];
6092 char *name = upb_join(str, upb_def_fullname(def));
6093 upb_def_setfullname(def, name, NULL);
6094 free(name);
6095 }
6096}
6097
6098
6099/* upb_descreader ************************************************************/
6100
Chris Fallin91473dc2014-12-12 15:58:26 -08006101static upb_msgdef *upb_descreader_top(upb_descreader *r) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006102 int index;
Chris Fallin91473dc2014-12-12 15:58:26 -08006103 assert(r->stack_len > 1);
Josh Habermane8ed0212015-06-08 17:56:03 -07006104 index = r->stack[r->stack_len-1].start - 1;
Chris Fallin91473dc2014-12-12 15:58:26 -08006105 assert(index >= 0);
6106 return upb_downcast_msgdef_mutable(r->defs.defs[index]);
6107}
6108
6109static upb_def *upb_descreader_last(upb_descreader *r) {
6110 return upb_deflist_last(&r->defs);
6111}
6112
Josh Habermane8ed0212015-06-08 17:56:03 -07006113/* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
6114 * entities that have names and can contain sub-definitions. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006115void upb_descreader_startcontainer(upb_descreader *r) {
6116 upb_descreader_frame *f = &r->stack[r->stack_len++];
6117 f->start = r->defs.len;
6118 f->name = NULL;
6119}
6120
6121void upb_descreader_endcontainer(upb_descreader *r) {
6122 upb_descreader_frame *f = &r->stack[--r->stack_len];
6123 upb_deflist_qualify(&r->defs, f->name, f->start);
6124 free(f->name);
6125 f->name = NULL;
6126}
6127
6128void upb_descreader_setscopename(upb_descreader *r, char *str) {
6129 upb_descreader_frame *f = &r->stack[r->stack_len-1];
6130 free(f->name);
6131 f->name = str;
6132}
6133
Josh Habermane8ed0212015-06-08 17:56:03 -07006134/* Handlers for google.protobuf.FileDescriptorProto. */
Josh Haberman78da6662016-01-13 19:05:43 -08006135static bool file_startmsg(void *closure, const void *hd) {
6136 upb_descreader *r = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -08006137 UPB_UNUSED(hd);
6138 upb_descreader_startcontainer(r);
Josh Haberman78da6662016-01-13 19:05:43 -08006139 r->primitives_have_presence = true;
6140 r->file_start = r->defs.len;
Chris Fallin91473dc2014-12-12 15:58:26 -08006141 return true;
6142}
6143
6144static bool file_endmsg(void *closure, const void *hd, upb_status *status) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006145 upb_descreader *r = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -08006146 UPB_UNUSED(hd);
6147 UPB_UNUSED(status);
Chris Fallin91473dc2014-12-12 15:58:26 -08006148 upb_descreader_endcontainer(r);
6149 return true;
6150}
6151
6152static size_t file_onpackage(void *closure, const void *hd, const char *buf,
6153 size_t n, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006154 upb_descreader *r = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -08006155 UPB_UNUSED(hd);
6156 UPB_UNUSED(handle);
Josh Habermane8ed0212015-06-08 17:56:03 -07006157 /* XXX: see comment at the top of the file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006158 upb_descreader_setscopename(r, upb_strndup(buf, n));
6159 return n;
6160}
6161
Josh Haberman78da6662016-01-13 19:05:43 -08006162static size_t file_onsyntax(void *closure, const void *hd, const char *buf,
6163 size_t n, const upb_bufhandle *handle) {
6164 upb_descreader *r = closure;
6165 UPB_UNUSED(hd);
6166 UPB_UNUSED(handle);
6167 /* XXX: see comment at the top of the file. */
6168 if (upb_streq("proto2", buf, n)) {
6169 /* Technically we could verify that proto3 hadn't previously been seen. */
6170 } else if (upb_streq("proto3", buf, n)) {
6171 uint32_t i;
6172 /* Update messages created before the syntax was read. */
6173 for (i = r->file_start; i < r->defs.len; i++) {
6174 upb_msgdef *m = upb_dyncast_msgdef_mutable(r->defs.defs[i]);
6175 if (m) {
6176 upb_msgdef_setprimitiveshavepresence(m, false);
6177 }
6178 }
6179
6180 /* Set a flag for any future messages that will be created. */
6181 r->primitives_have_presence = false;
6182 } else {
6183 /* Error: neither proto3 nor proto3.
6184 * TODO(haberman): there should be a status object we can report this to. */
6185 return 0;
6186 }
6187
6188 return n;
6189}
6190
Josh Habermane8ed0212015-06-08 17:56:03 -07006191/* Handlers for google.protobuf.EnumValueDescriptorProto. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006192static bool enumval_startmsg(void *closure, const void *hd) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006193 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006194 UPB_UNUSED(hd);
Chris Fallin91473dc2014-12-12 15:58:26 -08006195 r->saw_number = false;
6196 r->saw_name = false;
6197 return true;
6198}
6199
6200static size_t enumval_onname(void *closure, const void *hd, const char *buf,
6201 size_t n, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006202 upb_descreader *r = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -08006203 UPB_UNUSED(hd);
6204 UPB_UNUSED(handle);
Josh Habermane8ed0212015-06-08 17:56:03 -07006205 /* XXX: see comment at the top of the file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006206 free(r->name);
6207 r->name = upb_strndup(buf, n);
6208 r->saw_name = true;
6209 return n;
6210}
6211
6212static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006213 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006214 UPB_UNUSED(hd);
Chris Fallin91473dc2014-12-12 15:58:26 -08006215 r->number = val;
6216 r->saw_number = true;
6217 return true;
6218}
6219
6220static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006221 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006222 upb_enumdef *e;
6223 UPB_UNUSED(hd);
6224
Chris Fallin91473dc2014-12-12 15:58:26 -08006225 if(!r->saw_number || !r->saw_name) {
6226 upb_status_seterrmsg(status, "Enum value missing name or number.");
6227 return false;
6228 }
Josh Habermane8ed0212015-06-08 17:56:03 -07006229 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
Chris Fallin91473dc2014-12-12 15:58:26 -08006230 upb_enumdef_addval(e, r->name, r->number, status);
6231 free(r->name);
6232 r->name = NULL;
6233 return true;
6234}
6235
6236
Josh Habermane8ed0212015-06-08 17:56:03 -07006237/* Handlers for google.protobuf.EnumDescriptorProto. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006238static bool enum_startmsg(void *closure, const void *hd) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006239 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006240 UPB_UNUSED(hd);
6241 upb_deflist_push(&r->defs,
6242 upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs)));
Chris Fallin91473dc2014-12-12 15:58:26 -08006243 return true;
6244}
6245
6246static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006247 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006248 upb_enumdef *e;
6249 UPB_UNUSED(hd);
6250
6251 e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
Chris Fallin91473dc2014-12-12 15:58:26 -08006252 if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
6253 upb_status_seterrmsg(status, "Enum had no name.");
6254 return false;
6255 }
6256 if (upb_enumdef_numvals(e) == 0) {
6257 upb_status_seterrmsg(status, "Enum had no values.");
6258 return false;
6259 }
6260 return true;
6261}
6262
6263static size_t enum_onname(void *closure, const void *hd, const char *buf,
6264 size_t n, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006265 upb_descreader *r = closure;
6266 char *fullname = upb_strndup(buf, n);
Chris Fallin91473dc2014-12-12 15:58:26 -08006267 UPB_UNUSED(hd);
6268 UPB_UNUSED(handle);
Josh Habermane8ed0212015-06-08 17:56:03 -07006269 /* XXX: see comment at the top of the file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006270 upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
6271 free(fullname);
6272 return n;
6273}
6274
Josh Habermane8ed0212015-06-08 17:56:03 -07006275/* Handlers for google.protobuf.FieldDescriptorProto */
Chris Fallin91473dc2014-12-12 15:58:26 -08006276static bool field_startmsg(void *closure, const void *hd) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006277 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006278 UPB_UNUSED(hd);
Chris Fallin91473dc2014-12-12 15:58:26 -08006279 r->f = upb_fielddef_new(&r->defs);
6280 free(r->default_string);
6281 r->default_string = NULL;
6282
Josh Habermane8ed0212015-06-08 17:56:03 -07006283 /* fielddefs default to packed, but descriptors default to non-packed. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006284 upb_fielddef_setpacked(r->f, false);
6285 return true;
6286}
6287
Josh Habermane8ed0212015-06-08 17:56:03 -07006288/* Converts the default value in string "str" into "d". Passes a ref on str.
6289 * Returns true on success. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006290static bool parse_default(char *str, upb_fielddef *f) {
6291 bool success = true;
6292 char *end;
6293 switch (upb_fielddef_type(f)) {
6294 case UPB_TYPE_INT32: {
6295 long val = strtol(str, &end, 0);
6296 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
6297 success = false;
6298 else
6299 upb_fielddef_setdefaultint32(f, val);
6300 break;
6301 }
6302 case UPB_TYPE_INT64: {
Josh Habermane8ed0212015-06-08 17:56:03 -07006303 /* XXX: Need to write our own strtoll, since it's not available in c89. */
6304 long long val = strtol(str, &end, 0);
Chris Fallin91473dc2014-12-12 15:58:26 -08006305 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
6306 success = false;
6307 else
6308 upb_fielddef_setdefaultint64(f, val);
6309 break;
6310 }
6311 case UPB_TYPE_UINT32: {
Chris Fallind3262772015-05-14 18:24:26 -07006312 unsigned long val = strtoul(str, &end, 0);
Chris Fallin91473dc2014-12-12 15:58:26 -08006313 if (val > UINT32_MAX || errno == ERANGE || *end)
6314 success = false;
6315 else
6316 upb_fielddef_setdefaultuint32(f, val);
6317 break;
6318 }
6319 case UPB_TYPE_UINT64: {
Josh Habermane8ed0212015-06-08 17:56:03 -07006320 /* XXX: Need to write our own strtoull, since it's not available in c89. */
6321 unsigned long long val = strtoul(str, &end, 0);
Chris Fallin91473dc2014-12-12 15:58:26 -08006322 if (val > UINT64_MAX || errno == ERANGE || *end)
6323 success = false;
6324 else
6325 upb_fielddef_setdefaultuint64(f, val);
6326 break;
6327 }
6328 case UPB_TYPE_DOUBLE: {
6329 double val = strtod(str, &end);
6330 if (errno == ERANGE || *end)
6331 success = false;
6332 else
6333 upb_fielddef_setdefaultdouble(f, val);
6334 break;
6335 }
6336 case UPB_TYPE_FLOAT: {
Josh Habermane8ed0212015-06-08 17:56:03 -07006337 /* XXX: Need to write our own strtof, since it's not available in c89. */
6338 float val = strtod(str, &end);
Chris Fallin91473dc2014-12-12 15:58:26 -08006339 if (errno == ERANGE || *end)
6340 success = false;
6341 else
6342 upb_fielddef_setdefaultfloat(f, val);
6343 break;
6344 }
6345 case UPB_TYPE_BOOL: {
6346 if (strcmp(str, "false") == 0)
6347 upb_fielddef_setdefaultbool(f, false);
6348 else if (strcmp(str, "true") == 0)
6349 upb_fielddef_setdefaultbool(f, true);
6350 else
6351 success = false;
6352 break;
6353 }
6354 default: abort();
6355 }
6356 return success;
6357}
6358
6359static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006360 upb_descreader *r = closure;
6361 upb_fielddef *f = r->f;
Josh Habermane8ed0212015-06-08 17:56:03 -07006362 UPB_UNUSED(hd);
6363
6364 /* TODO: verify that all required fields were present. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006365 assert(upb_fielddef_number(f) != 0);
6366 assert(upb_fielddef_name(f) != NULL);
6367 assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
6368
6369 if (r->default_string) {
6370 if (upb_fielddef_issubmsg(f)) {
6371 upb_status_seterrmsg(status, "Submessages cannot have defaults.");
6372 return false;
6373 }
6374 if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
6375 upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
6376 } else {
6377 if (r->default_string && !parse_default(r->default_string, f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006378 /* We don't worry too much about giving a great error message since the
6379 * compiler should have ensured this was correct. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006380 upb_status_seterrmsg(status, "Error converting default value.");
6381 return false;
6382 }
6383 }
6384 }
6385 return true;
6386}
6387
6388static bool field_onlazy(void *closure, const void *hd, bool val) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006389 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006390 UPB_UNUSED(hd);
6391
Chris Fallin91473dc2014-12-12 15:58:26 -08006392 upb_fielddef_setlazy(r->f, val);
6393 return true;
6394}
6395
6396static bool field_onpacked(void *closure, const void *hd, bool val) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006397 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006398 UPB_UNUSED(hd);
6399
Chris Fallin91473dc2014-12-12 15:58:26 -08006400 upb_fielddef_setpacked(r->f, val);
6401 return true;
6402}
6403
6404static bool field_ontype(void *closure, const void *hd, int32_t val) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006405 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006406 UPB_UNUSED(hd);
6407
Chris Fallin91473dc2014-12-12 15:58:26 -08006408 upb_fielddef_setdescriptortype(r->f, val);
6409 return true;
6410}
6411
6412static bool field_onlabel(void *closure, const void *hd, int32_t val) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006413 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006414 UPB_UNUSED(hd);
6415
Chris Fallin91473dc2014-12-12 15:58:26 -08006416 upb_fielddef_setlabel(r->f, val);
6417 return true;
6418}
6419
6420static bool field_onnumber(void *closure, const void *hd, int32_t val) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006421 upb_descreader *r = closure;
6422 bool ok = upb_fielddef_setnumber(r->f, val, NULL);
Josh Habermane8ed0212015-06-08 17:56:03 -07006423 UPB_UNUSED(hd);
6424
Chris Fallin91473dc2014-12-12 15:58:26 -08006425 UPB_ASSERT_VAR(ok, ok);
6426 return true;
6427}
6428
6429static size_t field_onname(void *closure, const void *hd, const char *buf,
6430 size_t n, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006431 upb_descreader *r = closure;
6432 char *name = upb_strndup(buf, n);
Chris Fallin91473dc2014-12-12 15:58:26 -08006433 UPB_UNUSED(hd);
6434 UPB_UNUSED(handle);
Josh Habermane8ed0212015-06-08 17:56:03 -07006435
6436 /* XXX: see comment at the top of the file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006437 upb_fielddef_setname(r->f, name, NULL);
6438 free(name);
6439 return n;
6440}
6441
6442static size_t field_ontypename(void *closure, const void *hd, const char *buf,
6443 size_t n, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006444 upb_descreader *r = closure;
6445 char *name = upb_strndup(buf, n);
Chris Fallin91473dc2014-12-12 15:58:26 -08006446 UPB_UNUSED(hd);
6447 UPB_UNUSED(handle);
Josh Habermane8ed0212015-06-08 17:56:03 -07006448
6449 /* XXX: see comment at the top of the file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006450 upb_fielddef_setsubdefname(r->f, name, NULL);
6451 free(name);
6452 return n;
6453}
6454
6455static size_t field_onextendee(void *closure, const void *hd, const char *buf,
6456 size_t n, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006457 upb_descreader *r = closure;
6458 char *name = upb_strndup(buf, n);
Chris Fallin91473dc2014-12-12 15:58:26 -08006459 UPB_UNUSED(hd);
6460 UPB_UNUSED(handle);
Josh Habermane8ed0212015-06-08 17:56:03 -07006461
6462 /* XXX: see comment at the top of the file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006463 upb_fielddef_setcontainingtypename(r->f, name, NULL);
6464 free(name);
6465 return n;
6466}
6467
6468static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
6469 size_t n, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006470 upb_descreader *r = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -08006471 UPB_UNUSED(hd);
6472 UPB_UNUSED(handle);
Josh Habermane8ed0212015-06-08 17:56:03 -07006473
6474 /* Have to convert from string to the correct type, but we might not know the
6475 * type yet, so we save it as a string until the end of the field.
6476 * XXX: see comment at the top of the file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006477 free(r->default_string);
6478 r->default_string = upb_strndup(buf, n);
6479 return n;
6480}
6481
Josh Habermane8ed0212015-06-08 17:56:03 -07006482/* Handlers for google.protobuf.DescriptorProto (representing a message). */
Chris Fallin91473dc2014-12-12 15:58:26 -08006483static bool msg_startmsg(void *closure, const void *hd) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006484 upb_descreader *r = closure;
Josh Haberman78da6662016-01-13 19:05:43 -08006485 upb_msgdef *m;
Josh Habermane8ed0212015-06-08 17:56:03 -07006486 UPB_UNUSED(hd);
6487
Josh Haberman78da6662016-01-13 19:05:43 -08006488 m = upb_msgdef_new(&r->defs);
6489 upb_msgdef_setprimitiveshavepresence(m, r->primitives_have_presence);
6490 upb_deflist_push(&r->defs, upb_msgdef_upcast_mutable(m));
Chris Fallin91473dc2014-12-12 15:58:26 -08006491 upb_descreader_startcontainer(r);
6492 return true;
6493}
6494
6495static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006496 upb_descreader *r = closure;
6497 upb_msgdef *m = upb_descreader_top(r);
Josh Habermane8ed0212015-06-08 17:56:03 -07006498 UPB_UNUSED(hd);
6499
6500 if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006501 upb_status_seterrmsg(status, "Encountered message with no name.");
6502 return false;
6503 }
6504 upb_descreader_endcontainer(r);
6505 return true;
6506}
6507
6508static size_t msg_onname(void *closure, const void *hd, const char *buf,
6509 size_t n, const upb_bufhandle *handle) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006510 upb_descreader *r = closure;
6511 upb_msgdef *m = upb_descreader_top(r);
Josh Habermane8ed0212015-06-08 17:56:03 -07006512 /* XXX: see comment at the top of the file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006513 char *name = upb_strndup(buf, n);
Josh Habermane8ed0212015-06-08 17:56:03 -07006514 UPB_UNUSED(hd);
6515 UPB_UNUSED(handle);
6516
6517 upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
6518 upb_descreader_setscopename(r, name); /* Passes ownership of name. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006519 return n;
6520}
6521
6522static bool msg_onendfield(void *closure, const void *hd) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006523 upb_descreader *r = closure;
6524 upb_msgdef *m = upb_descreader_top(r);
Josh Habermane8ed0212015-06-08 17:56:03 -07006525 UPB_UNUSED(hd);
6526
Chris Fallin91473dc2014-12-12 15:58:26 -08006527 upb_msgdef_addfield(m, r->f, &r->defs, NULL);
6528 r->f = NULL;
6529 return true;
6530}
6531
6532static bool pushextension(void *closure, const void *hd) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006533 upb_descreader *r = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07006534 UPB_UNUSED(hd);
6535
Chris Fallin91473dc2014-12-12 15:58:26 -08006536 assert(upb_fielddef_containingtypename(r->f));
6537 upb_fielddef_setisextension(r->f, true);
Josh Habermane8ed0212015-06-08 17:56:03 -07006538 upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f));
Chris Fallin91473dc2014-12-12 15:58:26 -08006539 r->f = NULL;
6540 return true;
6541}
6542
6543#define D(name) upbdefs_google_protobuf_ ## name(s)
6544
6545static void reghandlers(const void *closure, upb_handlers *h) {
6546 const upb_symtab *s = closure;
6547 const upb_msgdef *m = upb_handlers_msgdef(h);
6548
6549 if (m == D(DescriptorProto)) {
6550 upb_handlers_setstartmsg(h, &msg_startmsg, NULL);
6551 upb_handlers_setendmsg(h, &msg_endmsg, NULL);
6552 upb_handlers_setstring(h, D(DescriptorProto_name), &msg_onname, NULL);
6553 upb_handlers_setendsubmsg(h, D(DescriptorProto_field), &msg_onendfield,
6554 NULL);
6555 upb_handlers_setendsubmsg(h, D(DescriptorProto_extension), &pushextension,
6556 NULL);
6557 } else if (m == D(FileDescriptorProto)) {
6558 upb_handlers_setstartmsg(h, &file_startmsg, NULL);
6559 upb_handlers_setendmsg(h, &file_endmsg, NULL);
6560 upb_handlers_setstring(h, D(FileDescriptorProto_package), &file_onpackage,
6561 NULL);
Josh Haberman78da6662016-01-13 19:05:43 -08006562 upb_handlers_setstring(h, D(FileDescriptorProto_syntax), &file_onsyntax,
6563 NULL);
Chris Fallin91473dc2014-12-12 15:58:26 -08006564 upb_handlers_setendsubmsg(h, D(FileDescriptorProto_extension), &pushextension,
6565 NULL);
6566 } else if (m == D(EnumValueDescriptorProto)) {
6567 upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
6568 upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
6569 upb_handlers_setstring(h, D(EnumValueDescriptorProto_name), &enumval_onname, NULL);
6570 upb_handlers_setint32(h, D(EnumValueDescriptorProto_number), &enumval_onnumber,
6571 NULL);
6572 } else if (m == D(EnumDescriptorProto)) {
6573 upb_handlers_setstartmsg(h, &enum_startmsg, NULL);
6574 upb_handlers_setendmsg(h, &enum_endmsg, NULL);
6575 upb_handlers_setstring(h, D(EnumDescriptorProto_name), &enum_onname, NULL);
6576 } else if (m == D(FieldDescriptorProto)) {
6577 upb_handlers_setstartmsg(h, &field_startmsg, NULL);
6578 upb_handlers_setendmsg(h, &field_endmsg, NULL);
6579 upb_handlers_setint32(h, D(FieldDescriptorProto_type), &field_ontype,
6580 NULL);
6581 upb_handlers_setint32(h, D(FieldDescriptorProto_label), &field_onlabel,
6582 NULL);
6583 upb_handlers_setint32(h, D(FieldDescriptorProto_number), &field_onnumber,
6584 NULL);
6585 upb_handlers_setstring(h, D(FieldDescriptorProto_name), &field_onname,
6586 NULL);
6587 upb_handlers_setstring(h, D(FieldDescriptorProto_type_name),
6588 &field_ontypename, NULL);
6589 upb_handlers_setstring(h, D(FieldDescriptorProto_extendee),
6590 &field_onextendee, NULL);
6591 upb_handlers_setstring(h, D(FieldDescriptorProto_default_value),
6592 &field_ondefaultval, NULL);
6593 } else if (m == D(FieldOptions)) {
6594 upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL);
6595 upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL);
6596 }
6597}
6598
6599#undef D
6600
Chris Fallind3262772015-05-14 18:24:26 -07006601void descreader_cleanup(void *_r) {
6602 upb_descreader *r = _r;
6603 free(r->name);
6604 upb_deflist_uninit(&r->defs);
6605 free(r->default_string);
6606 while (r->stack_len > 0) {
6607 upb_descreader_frame *f = &r->stack[--r->stack_len];
6608 free(f->name);
6609 }
6610}
6611
6612
6613/* Public API ****************************************************************/
6614
6615upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
6616 upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
6617 if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
6618 return NULL;
6619 }
6620
6621 upb_deflist_init(&r->defs);
6622 upb_sink_reset(upb_descreader_input(r), h, r);
6623 r->stack_len = 0;
6624 r->name = NULL;
6625 r->default_string = NULL;
6626
6627 return r;
6628}
6629
6630upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
6631 *n = r->defs.len;
6632 upb_deflist_donaterefs(&r->defs, owner);
6633 return r->defs.defs;
6634}
6635
6636upb_sink *upb_descreader_input(upb_descreader *r) {
6637 return &r->sink;
6638}
6639
Chris Fallin91473dc2014-12-12 15:58:26 -08006640const upb_handlers *upb_descreader_newhandlers(const void *owner) {
6641 const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
6642 const upb_handlers *h = upb_handlers_newfrozen(
6643 upbdefs_google_protobuf_FileDescriptorSet(s), owner, reghandlers, s);
6644 upb_symtab_unref(s, &s);
6645 return h;
6646}
6647/*
Josh Haberman181c7f22015-07-15 11:05:10 -07006648** protobuf decoder bytecode compiler
6649**
6650** Code to compile a upb::Handlers into bytecode for decoding a protobuf
6651** according to that specific schema and destination handlers.
6652**
6653** Compiling to bytecode is always the first step. If we are using the
6654** interpreted decoder we leave it as bytecode and interpret that. If we are
6655** using a JIT decoder we use a code generator to turn the bytecode into native
6656** code, LLVM IR, etc.
6657**
6658** Bytecode definition is in decoder.int.h.
6659*/
Chris Fallin91473dc2014-12-12 15:58:26 -08006660
6661#include <stdarg.h>
6662
6663#ifdef UPB_DUMP_BYTECODE
6664#include <stdio.h>
6665#endif
6666
6667#define MAXLABEL 5
6668#define EMPTYLABEL -1
6669
6670/* mgroup *********************************************************************/
6671
6672static void freegroup(upb_refcounted *r) {
6673 mgroup *g = (mgroup*)r;
6674 upb_inttable_uninit(&g->methods);
6675#ifdef UPB_USE_JIT_X64
6676 upb_pbdecoder_freejit(g);
6677#endif
6678 free(g->bytecode);
6679 free(g);
6680}
6681
6682static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
6683 void *closure) {
6684 const mgroup *g = (const mgroup*)r;
6685 upb_inttable_iter i;
6686 upb_inttable_begin(&i, &g->methods);
6687 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6688 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
Josh Habermane8ed0212015-06-08 17:56:03 -07006689 visit(r, upb_pbdecodermethod_upcast(method), closure);
Chris Fallin91473dc2014-12-12 15:58:26 -08006690 }
6691}
6692
6693mgroup *newgroup(const void *owner) {
6694 mgroup *g = malloc(sizeof(*g));
6695 static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
Josh Habermane8ed0212015-06-08 17:56:03 -07006696 upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
Chris Fallin91473dc2014-12-12 15:58:26 -08006697 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
6698 g->bytecode = NULL;
6699 g->bytecode_end = NULL;
6700 return g;
6701}
6702
6703
6704/* upb_pbdecodermethod ********************************************************/
6705
6706static void freemethod(upb_refcounted *r) {
6707 upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
Chris Fallin91473dc2014-12-12 15:58:26 -08006708
6709 if (method->dest_handlers_) {
6710 upb_handlers_unref(method->dest_handlers_, method);
6711 }
6712
6713 upb_inttable_uninit(&method->dispatch);
6714 free(method);
6715}
6716
6717static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
6718 void *closure) {
6719 const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
6720 visit(r, m->group, closure);
6721}
6722
6723static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
6724 mgroup *group) {
6725 static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
6726 upb_pbdecodermethod *ret = malloc(sizeof(*ret));
Josh Habermane8ed0212015-06-08 17:56:03 -07006727 upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
Chris Fallin91473dc2014-12-12 15:58:26 -08006728 upb_byteshandler_init(&ret->input_handler_);
6729
Josh Habermane8ed0212015-06-08 17:56:03 -07006730 /* The method references the group and vice-versa, in a circular reference. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006731 upb_ref2(ret, group);
6732 upb_ref2(group, ret);
6733 upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
Josh Habermane8ed0212015-06-08 17:56:03 -07006734 upb_pbdecodermethod_unref(ret, &ret);
Chris Fallin91473dc2014-12-12 15:58:26 -08006735
Josh Habermane8ed0212015-06-08 17:56:03 -07006736 ret->group = mgroup_upcast_mutable(group);
Chris Fallin91473dc2014-12-12 15:58:26 -08006737 ret->dest_handlers_ = dest_handlers;
Josh Habermane8ed0212015-06-08 17:56:03 -07006738 ret->is_native_ = false; /* If we JIT, it will update this later. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006739 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
6740
6741 if (ret->dest_handlers_) {
6742 upb_handlers_ref(ret->dest_handlers_, ret);
6743 }
6744 return ret;
6745}
6746
Chris Fallin91473dc2014-12-12 15:58:26 -08006747const upb_handlers *upb_pbdecodermethod_desthandlers(
6748 const upb_pbdecodermethod *m) {
6749 return m->dest_handlers_;
6750}
6751
6752const upb_byteshandler *upb_pbdecodermethod_inputhandler(
6753 const upb_pbdecodermethod *m) {
6754 return &m->input_handler_;
6755}
6756
6757bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
6758 return m->is_native_;
6759}
6760
6761const upb_pbdecodermethod *upb_pbdecodermethod_new(
6762 const upb_pbdecodermethodopts *opts, const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006763 const upb_pbdecodermethod *ret;
Chris Fallin91473dc2014-12-12 15:58:26 -08006764 upb_pbcodecache cache;
Josh Habermane8ed0212015-06-08 17:56:03 -07006765
Chris Fallin91473dc2014-12-12 15:58:26 -08006766 upb_pbcodecache_init(&cache);
Josh Habermane8ed0212015-06-08 17:56:03 -07006767 ret = upb_pbcodecache_getdecodermethod(&cache, opts);
Chris Fallin91473dc2014-12-12 15:58:26 -08006768 upb_pbdecodermethod_ref(ret, owner);
6769 upb_pbcodecache_uninit(&cache);
6770 return ret;
6771}
6772
6773
6774/* bytecode compiler **********************************************************/
6775
Josh Habermane8ed0212015-06-08 17:56:03 -07006776/* Data used only at compilation time. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006777typedef struct {
6778 mgroup *group;
6779
6780 uint32_t *pc;
6781 int fwd_labels[MAXLABEL];
6782 int back_labels[MAXLABEL];
6783
Josh Habermane8ed0212015-06-08 17:56:03 -07006784 /* For fields marked "lazy", parse them lazily or eagerly? */
Chris Fallin91473dc2014-12-12 15:58:26 -08006785 bool lazy;
6786} compiler;
6787
6788static compiler *newcompiler(mgroup *group, bool lazy) {
6789 compiler *ret = malloc(sizeof(*ret));
Josh Habermane8ed0212015-06-08 17:56:03 -07006790 int i;
6791
Chris Fallin91473dc2014-12-12 15:58:26 -08006792 ret->group = group;
6793 ret->lazy = lazy;
Josh Habermane8ed0212015-06-08 17:56:03 -07006794 for (i = 0; i < MAXLABEL; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -08006795 ret->fwd_labels[i] = EMPTYLABEL;
6796 ret->back_labels[i] = EMPTYLABEL;
6797 }
6798 return ret;
6799}
6800
6801static void freecompiler(compiler *c) {
6802 free(c);
6803}
6804
6805const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
6806
Josh Habermane8ed0212015-06-08 17:56:03 -07006807/* How many words an instruction is. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006808static int instruction_len(uint32_t instr) {
6809 switch (getop(instr)) {
6810 case OP_SETDISPATCH: return 1 + ptr_words;
6811 case OP_TAGN: return 3;
6812 case OP_SETBIGGROUPNUM: return 2;
6813 default: return 1;
6814 }
6815}
6816
6817bool op_has_longofs(int32_t instruction) {
6818 switch (getop(instruction)) {
6819 case OP_CALL:
6820 case OP_BRANCH:
6821 case OP_CHECKDELIM:
6822 return true;
Josh Habermane8ed0212015-06-08 17:56:03 -07006823 /* The "tag" instructions only have 8 bytes available for the jump target,
6824 * but that is ok because these opcodes only require short jumps. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006825 case OP_TAG1:
6826 case OP_TAG2:
6827 case OP_TAGN:
6828 return false;
6829 default:
6830 assert(false);
6831 return false;
6832 }
6833}
6834
6835static int32_t getofs(uint32_t instruction) {
6836 if (op_has_longofs(instruction)) {
6837 return (int32_t)instruction >> 8;
6838 } else {
6839 return (int8_t)(instruction >> 8);
6840 }
6841}
6842
6843static void setofs(uint32_t *instruction, int32_t ofs) {
6844 if (op_has_longofs(*instruction)) {
6845 *instruction = getop(*instruction) | ofs << 8;
6846 } else {
6847 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
6848 }
Josh Habermane8ed0212015-06-08 17:56:03 -07006849 assert(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006850}
6851
6852static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
6853
Josh Habermane8ed0212015-06-08 17:56:03 -07006854/* Defines a local label at the current PC location. All previous forward
6855 * references are updated to point to this location. The location is noted
6856 * for any future backward references. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006857static void label(compiler *c, unsigned int label) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006858 int val;
6859 uint32_t *codep;
6860
Chris Fallin91473dc2014-12-12 15:58:26 -08006861 assert(label < MAXLABEL);
Josh Habermane8ed0212015-06-08 17:56:03 -07006862 val = c->fwd_labels[label];
6863 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
Chris Fallin91473dc2014-12-12 15:58:26 -08006864 while (codep) {
6865 int ofs = getofs(*codep);
6866 setofs(codep, c->pc - codep - instruction_len(*codep));
6867 codep = ofs ? codep + ofs : NULL;
6868 }
6869 c->fwd_labels[label] = EMPTYLABEL;
6870 c->back_labels[label] = pcofs(c);
6871}
6872
Josh Habermane8ed0212015-06-08 17:56:03 -07006873/* Creates a reference to a numbered label; either a forward reference
6874 * (positive arg) or backward reference (negative arg). For forward references
6875 * the value returned now is actually a "next" pointer into a linked list of all
6876 * instructions that use this label and will be patched later when the label is
6877 * defined with label().
6878 *
6879 * The returned value is the offset that should be written into the instruction.
6880 */
Chris Fallin91473dc2014-12-12 15:58:26 -08006881static int32_t labelref(compiler *c, int label) {
6882 assert(label < MAXLABEL);
6883 if (label == LABEL_DISPATCH) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006884 /* No resolving required. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006885 return 0;
6886 } else if (label < 0) {
Josh Habermane8ed0212015-06-08 17:56:03 -07006887 /* Backward local label. Relative to the next instruction. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006888 uint32_t from = (c->pc + 1) - c->group->bytecode;
6889 return c->back_labels[-label] - from;
6890 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07006891 /* Forward local label: prepend to (possibly-empty) linked list. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006892 int *lptr = &c->fwd_labels[label];
6893 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
6894 *lptr = pcofs(c);
6895 return ret;
6896 }
6897}
6898
6899static void put32(compiler *c, uint32_t v) {
6900 mgroup *g = c->group;
6901 if (c->pc == g->bytecode_end) {
6902 int ofs = pcofs(c);
6903 size_t oldsize = g->bytecode_end - g->bytecode;
6904 size_t newsize = UPB_MAX(oldsize * 2, 64);
Josh Habermane8ed0212015-06-08 17:56:03 -07006905 /* TODO(haberman): handle OOM. */
Chris Fallin91473dc2014-12-12 15:58:26 -08006906 g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
6907 g->bytecode_end = g->bytecode + newsize;
6908 c->pc = g->bytecode + ofs;
6909 }
6910 *c->pc++ = v;
6911}
6912
6913static void putop(compiler *c, opcode op, ...) {
6914 va_list ap;
6915 va_start(ap, op);
6916
6917 switch (op) {
6918 case OP_SETDISPATCH: {
6919 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6920 put32(c, OP_SETDISPATCH);
6921 put32(c, ptr);
6922 if (sizeof(uintptr_t) > sizeof(uint32_t))
6923 put32(c, (uint64_t)ptr >> 32);
6924 break;
6925 }
6926 case OP_STARTMSG:
6927 case OP_ENDMSG:
6928 case OP_PUSHLENDELIM:
6929 case OP_POP:
6930 case OP_SETDELIM:
6931 case OP_HALT:
6932 case OP_RET:
Chris Fallin97b663a2015-01-09 16:15:22 -08006933 case OP_DISPATCH:
Chris Fallin91473dc2014-12-12 15:58:26 -08006934 put32(c, op);
6935 break;
6936 case OP_PARSE_DOUBLE:
6937 case OP_PARSE_FLOAT:
6938 case OP_PARSE_INT64:
6939 case OP_PARSE_UINT64:
6940 case OP_PARSE_INT32:
6941 case OP_PARSE_FIXED64:
6942 case OP_PARSE_FIXED32:
6943 case OP_PARSE_BOOL:
6944 case OP_PARSE_UINT32:
6945 case OP_PARSE_SFIXED32:
6946 case OP_PARSE_SFIXED64:
6947 case OP_PARSE_SINT32:
6948 case OP_PARSE_SINT64:
6949 case OP_STARTSEQ:
6950 case OP_ENDSEQ:
6951 case OP_STARTSUBMSG:
6952 case OP_ENDSUBMSG:
6953 case OP_STARTSTR:
6954 case OP_STRING:
6955 case OP_ENDSTR:
6956 case OP_PUSHTAGDELIM:
6957 put32(c, op | va_arg(ap, upb_selector_t) << 8);
6958 break;
6959 case OP_SETBIGGROUPNUM:
6960 put32(c, op);
6961 put32(c, va_arg(ap, int));
6962 break;
6963 case OP_CALL: {
6964 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6965 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6966 break;
6967 }
6968 case OP_CHECKDELIM:
6969 case OP_BRANCH: {
6970 uint32_t instruction = op;
6971 int label = va_arg(ap, int);
6972 setofs(&instruction, labelref(c, label));
6973 put32(c, instruction);
6974 break;
6975 }
6976 case OP_TAG1:
6977 case OP_TAG2: {
6978 int label = va_arg(ap, int);
6979 uint64_t tag = va_arg(ap, uint64_t);
6980 uint32_t instruction = op | (tag << 16);
6981 assert(tag <= 0xffff);
6982 setofs(&instruction, labelref(c, label));
6983 put32(c, instruction);
6984 break;
6985 }
6986 case OP_TAGN: {
6987 int label = va_arg(ap, int);
6988 uint64_t tag = va_arg(ap, uint64_t);
6989 uint32_t instruction = op | (upb_value_size(tag) << 16);
6990 setofs(&instruction, labelref(c, label));
6991 put32(c, instruction);
6992 put32(c, tag);
6993 put32(c, tag >> 32);
6994 break;
6995 }
6996 }
6997
6998 va_end(ap);
6999}
7000
7001#if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
7002
7003const char *upb_pbdecoder_getopname(unsigned int op) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007004#define QUOTE(x) #x
7005#define EXPAND_AND_QUOTE(x) QUOTE(x)
7006#define OPNAME(x) OP_##x
7007#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
7008#define T(x) OP(PARSE_##x)
7009 /* Keep in sync with list in decoder.int.h. */
7010 switch ((opcode)op) {
7011 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
7012 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
7013 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
7014 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
7015 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
7016 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
7017 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
7018 }
7019 return "<unknown op>";
Chris Fallin91473dc2014-12-12 15:58:26 -08007020#undef OP
7021#undef T
7022}
7023
7024#endif
7025
7026#ifdef UPB_DUMP_BYTECODE
7027
7028static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
7029
7030 uint32_t *begin = p;
7031
7032 while (p < end) {
7033 fprintf(f, "%p %8tx", p, p - begin);
7034 uint32_t instr = *p++;
7035 uint8_t op = getop(instr);
7036 fprintf(f, " %s", upb_pbdecoder_getopname(op));
7037 switch ((opcode)op) {
7038 case OP_SETDISPATCH: {
7039 const upb_inttable *dispatch;
7040 memcpy(&dispatch, p, sizeof(void*));
7041 p += ptr_words;
7042 const upb_pbdecodermethod *method =
7043 (void *)((char *)dispatch -
7044 offsetof(upb_pbdecodermethod, dispatch));
7045 fprintf(f, " %s", upb_msgdef_fullname(
7046 upb_handlers_msgdef(method->dest_handlers_)));
7047 break;
7048 }
Chris Fallin97b663a2015-01-09 16:15:22 -08007049 case OP_DISPATCH:
Chris Fallin91473dc2014-12-12 15:58:26 -08007050 case OP_STARTMSG:
7051 case OP_ENDMSG:
7052 case OP_PUSHLENDELIM:
7053 case OP_POP:
7054 case OP_SETDELIM:
7055 case OP_HALT:
7056 case OP_RET:
7057 break;
7058 case OP_PARSE_DOUBLE:
7059 case OP_PARSE_FLOAT:
7060 case OP_PARSE_INT64:
7061 case OP_PARSE_UINT64:
7062 case OP_PARSE_INT32:
7063 case OP_PARSE_FIXED64:
7064 case OP_PARSE_FIXED32:
7065 case OP_PARSE_BOOL:
7066 case OP_PARSE_UINT32:
7067 case OP_PARSE_SFIXED32:
7068 case OP_PARSE_SFIXED64:
7069 case OP_PARSE_SINT32:
7070 case OP_PARSE_SINT64:
7071 case OP_STARTSEQ:
7072 case OP_ENDSEQ:
7073 case OP_STARTSUBMSG:
7074 case OP_ENDSUBMSG:
7075 case OP_STARTSTR:
7076 case OP_STRING:
7077 case OP_ENDSTR:
7078 case OP_PUSHTAGDELIM:
7079 fprintf(f, " %d", instr >> 8);
7080 break;
7081 case OP_SETBIGGROUPNUM:
7082 fprintf(f, " %d", *p++);
7083 break;
7084 case OP_CHECKDELIM:
7085 case OP_CALL:
7086 case OP_BRANCH:
7087 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7088 break;
7089 case OP_TAG1:
7090 case OP_TAG2: {
7091 fprintf(f, " tag:0x%x", instr >> 16);
7092 if (getofs(instr)) {
7093 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7094 }
7095 break;
7096 }
7097 case OP_TAGN: {
7098 uint64_t tag = *p++;
7099 tag |= (uint64_t)*p++ << 32;
7100 fprintf(f, " tag:0x%llx", (long long)tag);
7101 fprintf(f, " n:%d", instr >> 16);
7102 if (getofs(instr)) {
7103 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7104 }
7105 break;
7106 }
7107 }
7108 fputs("\n", f);
7109 }
7110}
7111
7112#endif
7113
7114static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
7115 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
7116 uint64_t encoded_tag = upb_vencode32(tag);
Josh Habermane8ed0212015-06-08 17:56:03 -07007117 /* No tag should be greater than 5 bytes. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007118 assert(encoded_tag <= 0xffffffffff);
7119 return encoded_tag;
7120}
7121
7122static void putchecktag(compiler *c, const upb_fielddef *f,
7123 int wire_type, int dest) {
7124 uint64_t tag = get_encoded_tag(f, wire_type);
7125 switch (upb_value_size(tag)) {
7126 case 1:
7127 putop(c, OP_TAG1, dest, tag);
7128 break;
7129 case 2:
7130 putop(c, OP_TAG2, dest, tag);
7131 break;
7132 default:
7133 putop(c, OP_TAGN, dest, tag);
7134 break;
7135 }
7136}
7137
7138static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
7139 upb_selector_t selector;
7140 bool ok = upb_handlers_getselector(f, type, &selector);
7141 UPB_ASSERT_VAR(ok, ok);
7142 return selector;
7143}
7144
Josh Habermane8ed0212015-06-08 17:56:03 -07007145/* Takes an existing, primary dispatch table entry and repacks it with a
7146 * different alternate wire type. Called when we are inserting a secondary
7147 * dispatch table entry for an alternate wire type. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007148static uint64_t repack(uint64_t dispatch, int new_wt2) {
7149 uint64_t ofs;
7150 uint8_t wt1;
7151 uint8_t old_wt2;
7152 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
Josh Habermane8ed0212015-06-08 17:56:03 -07007153 assert(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007154 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
7155}
7156
Josh Habermane8ed0212015-06-08 17:56:03 -07007157/* Marks the current bytecode position as the dispatch target for this message,
7158 * field, and wire type. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007159static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
7160 const upb_fielddef *f, int wire_type) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007161 /* Offset is relative to msg base. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007162 uint64_t ofs = pcofs(c) - method->code_base.ofs;
7163 uint32_t fn = upb_fielddef_number(f);
7164 upb_inttable *d = &method->dispatch;
7165 upb_value v;
7166 if (upb_inttable_remove(d, fn, &v)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007167 /* TODO: prioritize based on packed setting in .proto file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007168 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
7169 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
7170 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
7171 } else {
7172 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
7173 upb_inttable_insert(d, fn, upb_value_uint64(val));
7174 }
7175}
7176
7177static void putpush(compiler *c, const upb_fielddef *f) {
7178 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
7179 putop(c, OP_PUSHLENDELIM);
7180 } else {
7181 uint32_t fn = upb_fielddef_number(f);
7182 if (fn >= 1 << 24) {
7183 putop(c, OP_PUSHTAGDELIM, 0);
7184 putop(c, OP_SETBIGGROUPNUM, fn);
7185 } else {
7186 putop(c, OP_PUSHTAGDELIM, fn);
7187 }
7188 }
7189}
7190
7191static upb_pbdecodermethod *find_submethod(const compiler *c,
7192 const upb_pbdecodermethod *method,
7193 const upb_fielddef *f) {
7194 const upb_handlers *sub =
7195 upb_handlers_getsubhandlers(method->dest_handlers_, f);
7196 upb_value v;
7197 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
7198 ? upb_value_getptr(v)
7199 : NULL;
7200}
7201
7202static void putsel(compiler *c, opcode op, upb_selector_t sel,
7203 const upb_handlers *h) {
7204 if (upb_handlers_gethandler(h, sel)) {
7205 putop(c, op, sel);
7206 }
7207}
7208
Josh Habermane8ed0212015-06-08 17:56:03 -07007209/* Puts an opcode to call a callback, but only if a callback actually exists for
7210 * this field and handler type. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007211static void maybeput(compiler *c, opcode op, const upb_handlers *h,
7212 const upb_fielddef *f, upb_handlertype_t type) {
7213 putsel(c, op, getsel(f, type), h);
7214}
7215
7216static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
7217 if (!upb_fielddef_lazy(f))
7218 return false;
7219
7220 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
7221 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
7222 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
7223}
7224
7225
7226/* bytecode compiler code generation ******************************************/
7227
Josh Habermane8ed0212015-06-08 17:56:03 -07007228/* Symbolic names for our local labels. */
7229#define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
7230#define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
7231#define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
7232#define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007233
Josh Habermane8ed0212015-06-08 17:56:03 -07007234/* Generates bytecode to parse a single non-lazy message field. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007235static void generate_msgfield(compiler *c, const upb_fielddef *f,
7236 upb_pbdecodermethod *method) {
7237 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7238 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
Josh Habermane8ed0212015-06-08 17:56:03 -07007239 int wire_type;
Chris Fallin91473dc2014-12-12 15:58:26 -08007240
7241 if (!sub_m) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007242 /* Don't emit any code for this field at all; it will be parsed as an
Josh Haberman78da6662016-01-13 19:05:43 -08007243 * unknown field.
7244 *
7245 * TODO(haberman): we should change this to parse it as a string field
7246 * instead. It will probably be faster, but more importantly, once we
7247 * start vending unknown fields, a field shouldn't be treated as unknown
7248 * just because it doesn't have subhandlers registered. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007249 return;
7250 }
7251
7252 label(c, LABEL_FIELD);
7253
Josh Habermane8ed0212015-06-08 17:56:03 -07007254 wire_type =
Chris Fallin91473dc2014-12-12 15:58:26 -08007255 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
7256 ? UPB_WIRE_TYPE_DELIMITED
7257 : UPB_WIRE_TYPE_START_GROUP;
7258
7259 if (upb_fielddef_isseq(f)) {
7260 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7261 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7262 dispatchtarget(c, method, f, wire_type);
7263 putop(c, OP_PUSHTAGDELIM, 0);
7264 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7265 label(c, LABEL_LOOPSTART);
7266 putpush(c, f);
7267 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7268 putop(c, OP_CALL, sub_m);
7269 putop(c, OP_POP);
7270 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7271 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7272 putop(c, OP_SETDELIM);
7273 }
7274 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7275 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7276 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7277 label(c, LABEL_LOOPBREAK);
7278 putop(c, OP_POP);
7279 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7280 } else {
7281 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7282 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7283 dispatchtarget(c, method, f, wire_type);
7284 putpush(c, f);
7285 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7286 putop(c, OP_CALL, sub_m);
7287 putop(c, OP_POP);
7288 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7289 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7290 putop(c, OP_SETDELIM);
7291 }
7292 }
7293}
7294
Josh Habermane8ed0212015-06-08 17:56:03 -07007295/* Generates bytecode to parse a single string or lazy submessage field. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007296static void generate_delimfield(compiler *c, const upb_fielddef *f,
7297 upb_pbdecodermethod *method) {
7298 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7299
7300 label(c, LABEL_FIELD);
7301 if (upb_fielddef_isseq(f)) {
7302 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7303 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7304 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7305 putop(c, OP_PUSHTAGDELIM, 0);
7306 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7307 label(c, LABEL_LOOPSTART);
7308 putop(c, OP_PUSHLENDELIM);
7309 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
Josh Habermane8ed0212015-06-08 17:56:03 -07007310 /* Need to emit even if no handler to skip past the string. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007311 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7312 putop(c, OP_POP);
7313 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7314 putop(c, OP_SETDELIM);
7315 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7316 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
7317 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7318 label(c, LABEL_LOOPBREAK);
7319 putop(c, OP_POP);
7320 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7321 } else {
7322 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7323 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7324 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7325 putop(c, OP_PUSHLENDELIM);
7326 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7327 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7328 putop(c, OP_POP);
7329 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7330 putop(c, OP_SETDELIM);
7331 }
7332}
7333
Josh Habermane8ed0212015-06-08 17:56:03 -07007334/* Generates bytecode to parse a single primitive field. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007335static void generate_primitivefield(compiler *c, const upb_fielddef *f,
7336 upb_pbdecodermethod *method) {
Chris Fallin91473dc2014-12-12 15:58:26 -08007337 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7338 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
Josh Habermane8ed0212015-06-08 17:56:03 -07007339 opcode parse_type;
7340 upb_selector_t sel;
7341 int wire_type;
Chris Fallin91473dc2014-12-12 15:58:26 -08007342
Josh Habermane8ed0212015-06-08 17:56:03 -07007343 label(c, LABEL_FIELD);
7344
7345 /* From a decoding perspective, ENUM is the same as INT32. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007346 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
7347 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
7348
Josh Habermane8ed0212015-06-08 17:56:03 -07007349 parse_type = (opcode)descriptor_type;
Chris Fallin91473dc2014-12-12 15:58:26 -08007350
Josh Habermane8ed0212015-06-08 17:56:03 -07007351 /* TODO(haberman): generate packed or non-packed first depending on "packed"
7352 * setting in the fielddef. This will favor (in speed) whichever was
7353 * specified. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007354
7355 assert((int)parse_type >= 0 && parse_type <= OP_MAX);
Josh Habermane8ed0212015-06-08 17:56:03 -07007356 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
7357 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
Chris Fallin91473dc2014-12-12 15:58:26 -08007358 if (upb_fielddef_isseq(f)) {
7359 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7360 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7361 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7362 putop(c, OP_PUSHLENDELIM);
Josh Habermane8ed0212015-06-08 17:56:03 -07007363 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
Chris Fallin91473dc2014-12-12 15:58:26 -08007364 label(c, LABEL_LOOPSTART);
7365 putop(c, parse_type, sel);
7366 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7367 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7368 dispatchtarget(c, method, f, wire_type);
7369 putop(c, OP_PUSHTAGDELIM, 0);
Josh Habermane8ed0212015-06-08 17:56:03 -07007370 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
Chris Fallin91473dc2014-12-12 15:58:26 -08007371 label(c, LABEL_LOOPSTART);
7372 putop(c, parse_type, sel);
7373 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7374 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7375 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7376 label(c, LABEL_LOOPBREAK);
Josh Habermane8ed0212015-06-08 17:56:03 -07007377 putop(c, OP_POP); /* Packed and non-packed join. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007378 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
Josh Habermane8ed0212015-06-08 17:56:03 -07007379 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007380 } else {
7381 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7382 putchecktag(c, f, wire_type, LABEL_DISPATCH);
7383 dispatchtarget(c, method, f, wire_type);
7384 putop(c, parse_type, sel);
7385 }
7386}
7387
Josh Habermane8ed0212015-06-08 17:56:03 -07007388/* Adds bytecode for parsing the given message to the given decoderplan,
7389 * while adding all dispatch targets to this message's dispatch table. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007390static void compile_method(compiler *c, upb_pbdecodermethod *method) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007391 const upb_handlers *h;
7392 const upb_msgdef *md;
7393 uint32_t* start_pc;
7394 upb_msg_field_iter i;
7395 upb_value val;
7396
Chris Fallin91473dc2014-12-12 15:58:26 -08007397 assert(method);
7398
Josh Habermane8ed0212015-06-08 17:56:03 -07007399 /* Clear all entries in the dispatch table. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007400 upb_inttable_uninit(&method->dispatch);
7401 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
7402
Josh Habermane8ed0212015-06-08 17:56:03 -07007403 h = upb_pbdecodermethod_desthandlers(method);
7404 md = upb_handlers_msgdef(h);
Chris Fallin91473dc2014-12-12 15:58:26 -08007405
7406 method->code_base.ofs = pcofs(c);
7407 putop(c, OP_SETDISPATCH, &method->dispatch);
7408 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
7409 label(c, LABEL_FIELD);
Josh Habermane8ed0212015-06-08 17:56:03 -07007410 start_pc = c->pc;
Chris Fallinfcd88892015-01-13 18:14:39 -08007411 for(upb_msg_field_begin(&i, md);
7412 !upb_msg_field_done(&i);
7413 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08007414 const upb_fielddef *f = upb_msg_iter_field(&i);
7415 upb_fieldtype_t type = upb_fielddef_type(f);
7416
7417 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
7418 generate_msgfield(c, f, method);
7419 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
7420 type == UPB_TYPE_MESSAGE) {
7421 generate_delimfield(c, f, method);
7422 } else {
7423 generate_primitivefield(c, f, method);
7424 }
7425 }
7426
Josh Habermane8ed0212015-06-08 17:56:03 -07007427 /* If there were no fields, or if no handlers were defined, we need to
7428 * generate a non-empty loop body so that we can at least dispatch for unknown
7429 * fields and check for the end of the message. */
Chris Fallin97b663a2015-01-09 16:15:22 -08007430 if (c->pc == start_pc) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007431 /* Check for end-of-message. */
Chris Fallin97b663a2015-01-09 16:15:22 -08007432 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
Josh Habermane8ed0212015-06-08 17:56:03 -07007433 /* Unconditionally dispatch. */
Chris Fallin97b663a2015-01-09 16:15:22 -08007434 putop(c, OP_DISPATCH, 0);
7435 }
7436
Josh Habermane8ed0212015-06-08 17:56:03 -07007437 /* For now we just loop back to the last field of the message (or if none,
7438 * the DISPATCH opcode for the message). */
Chris Fallin91473dc2014-12-12 15:58:26 -08007439 putop(c, OP_BRANCH, -LABEL_FIELD);
7440
Josh Habermane8ed0212015-06-08 17:56:03 -07007441 /* Insert both a label and a dispatch table entry for this end-of-msg. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007442 label(c, LABEL_ENDMSG);
Josh Habermane8ed0212015-06-08 17:56:03 -07007443 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
Chris Fallin91473dc2014-12-12 15:58:26 -08007444 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
7445
7446 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
7447 putop(c, OP_RET);
7448
7449 upb_inttable_compact(&method->dispatch);
7450}
7451
Josh Habermane8ed0212015-06-08 17:56:03 -07007452/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
7453 * Returns the method for these handlers.
7454 *
7455 * Generates a new method for every destination handlers reachable from "h". */
Chris Fallin91473dc2014-12-12 15:58:26 -08007456static void find_methods(compiler *c, const upb_handlers *h) {
7457 upb_value v;
Josh Habermane8ed0212015-06-08 17:56:03 -07007458 upb_msg_field_iter i;
7459 const upb_msgdef *md;
7460
Chris Fallin91473dc2014-12-12 15:58:26 -08007461 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
7462 return;
7463 newmethod(h, c->group);
7464
Josh Habermane8ed0212015-06-08 17:56:03 -07007465 /* Find submethods. */
7466 md = upb_handlers_msgdef(h);
Chris Fallinfcd88892015-01-13 18:14:39 -08007467 for(upb_msg_field_begin(&i, md);
7468 !upb_msg_field_done(&i);
7469 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08007470 const upb_fielddef *f = upb_msg_iter_field(&i);
7471 const upb_handlers *sub_h;
7472 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
7473 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007474 /* We only generate a decoder method for submessages with handlers.
7475 * Others will be parsed as unknown fields. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007476 find_methods(c, sub_h);
7477 }
7478 }
7479}
7480
Josh Habermane8ed0212015-06-08 17:56:03 -07007481/* (Re-)compile bytecode for all messages in "msgs."
7482 * Overwrites any existing bytecode in "c". */
Chris Fallin91473dc2014-12-12 15:58:26 -08007483static void compile_methods(compiler *c) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007484 upb_inttable_iter i;
7485
7486 /* Start over at the beginning of the bytecode. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007487 c->pc = c->group->bytecode;
7488
Chris Fallin91473dc2014-12-12 15:58:26 -08007489 upb_inttable_begin(&i, &c->group->methods);
7490 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7491 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7492 compile_method(c, method);
7493 }
7494}
7495
7496static void set_bytecode_handlers(mgroup *g) {
7497 upb_inttable_iter i;
7498 upb_inttable_begin(&i, &g->methods);
7499 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7500 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
Josh Habermane8ed0212015-06-08 17:56:03 -07007501 upb_byteshandler *h = &m->input_handler_;
Chris Fallin91473dc2014-12-12 15:58:26 -08007502
7503 m->code_base.ptr = g->bytecode + m->code_base.ofs;
7504
Chris Fallin91473dc2014-12-12 15:58:26 -08007505 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
7506 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
7507 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
7508 }
7509}
7510
7511
Chris Fallinfcd88892015-01-13 18:14:39 -08007512/* JIT setup. *****************************************************************/
Chris Fallin91473dc2014-12-12 15:58:26 -08007513
7514#ifdef UPB_USE_JIT_X64
7515
7516static void sethandlers(mgroup *g, bool allowjit) {
7517 g->jit_code = NULL;
7518 if (allowjit) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007519 /* Compile byte-code into machine code, create handlers. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007520 upb_pbdecoder_jit(g);
7521 } else {
7522 set_bytecode_handlers(g);
7523 }
7524}
7525
Josh Habermane8ed0212015-06-08 17:56:03 -07007526#else /* UPB_USE_JIT_X64 */
Chris Fallin91473dc2014-12-12 15:58:26 -08007527
7528static void sethandlers(mgroup *g, bool allowjit) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007529 /* No JIT compiled in; use bytecode handlers unconditionally. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007530 UPB_UNUSED(allowjit);
7531 set_bytecode_handlers(g);
7532}
7533
Josh Habermane8ed0212015-06-08 17:56:03 -07007534#endif /* UPB_USE_JIT_X64 */
Chris Fallin91473dc2014-12-12 15:58:26 -08007535
7536
Josh Habermane8ed0212015-06-08 17:56:03 -07007537/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
7538 * handlers and other mgroups (but verify we have a transitive closure). */
Chris Fallin91473dc2014-12-12 15:58:26 -08007539const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
7540 const void *owner) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007541 mgroup *g;
7542 compiler *c;
7543
Chris Fallin91473dc2014-12-12 15:58:26 -08007544 UPB_UNUSED(allowjit);
7545 assert(upb_handlers_isfrozen(dest));
7546
Josh Habermane8ed0212015-06-08 17:56:03 -07007547 g = newgroup(owner);
7548 c = newcompiler(g, lazy);
Chris Fallin91473dc2014-12-12 15:58:26 -08007549 find_methods(c, dest);
7550
Josh Habermane8ed0212015-06-08 17:56:03 -07007551 /* We compile in two passes:
7552 * 1. all messages are assigned relative offsets from the beginning of the
7553 * bytecode (saved in method->code_base).
7554 * 2. forwards OP_CALL instructions can be correctly linked since message
7555 * offsets have been previously assigned.
7556 *
7557 * Could avoid the second pass by linking OP_CALL instructions somehow. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007558 compile_methods(c);
7559 compile_methods(c);
7560 g->bytecode_end = c->pc;
7561 freecompiler(c);
7562
7563#ifdef UPB_DUMP_BYTECODE
Josh Habermane8ed0212015-06-08 17:56:03 -07007564 {
Josh Habermanf654d492016-02-18 11:07:51 -08007565 FILE *f = fopen("/tmp/upb-bytecode", "w");
Josh Habermane8ed0212015-06-08 17:56:03 -07007566 assert(f);
7567 dumpbc(g->bytecode, g->bytecode_end, stderr);
7568 dumpbc(g->bytecode, g->bytecode_end, f);
7569 fclose(f);
Josh Habermanf654d492016-02-18 11:07:51 -08007570
7571 f = fopen("/tmp/upb-bytecode.bin", "wb");
7572 assert(f);
7573 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
7574 fclose(f);
Josh Habermane8ed0212015-06-08 17:56:03 -07007575 }
Chris Fallin91473dc2014-12-12 15:58:26 -08007576#endif
7577
7578 sethandlers(g, allowjit);
7579 return g;
7580}
7581
7582
7583/* upb_pbcodecache ************************************************************/
7584
7585void upb_pbcodecache_init(upb_pbcodecache *c) {
7586 upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
7587 c->allow_jit_ = true;
7588}
7589
7590void upb_pbcodecache_uninit(upb_pbcodecache *c) {
7591 upb_inttable_iter i;
7592 upb_inttable_begin(&i, &c->groups);
7593 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7594 const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
Josh Habermane8ed0212015-06-08 17:56:03 -07007595 mgroup_unref(group, c);
Chris Fallin91473dc2014-12-12 15:58:26 -08007596 }
7597 upb_inttable_uninit(&c->groups);
7598}
7599
7600bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
7601 return c->allow_jit_;
7602}
7603
7604bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
7605 if (upb_inttable_count(&c->groups) > 0)
7606 return false;
7607 c->allow_jit_ = allow;
7608 return true;
7609}
7610
7611const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
7612 upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007613 upb_value v;
7614 bool ok;
7615
7616 /* Right now we build a new DecoderMethod every time.
7617 * TODO(haberman): properly cache methods by their true key. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007618 const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
7619 upb_inttable_push(&c->groups, upb_value_constptr(g));
7620
Josh Habermane8ed0212015-06-08 17:56:03 -07007621 ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
Chris Fallin91473dc2014-12-12 15:58:26 -08007622 UPB_ASSERT_VAR(ok, ok);
7623 return upb_value_getptr(v);
7624}
7625
7626
7627/* upb_pbdecodermethodopts ****************************************************/
7628
7629void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
7630 const upb_handlers *h) {
7631 opts->handlers = h;
7632 opts->lazy = false;
7633}
7634
7635void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
7636 opts->lazy = lazy;
7637}
7638/*
Josh Haberman181c7f22015-07-15 11:05:10 -07007639** upb::Decoder (Bytecode Decoder VM)
7640**
7641** Bytecode must previously have been generated using the bytecode compiler in
7642** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
7643** parse the input.
7644**
7645** Decoding is fully resumable; we just keep a pointer to the current bytecode
7646** instruction and resume from there. A fair amount of the logic here is to
7647** handle the fact that values can span buffer seams and we have to be able to
7648** be capable of suspending/resuming from any byte in the stream. This
7649** sometimes requires keeping a few trailing bytes from the last buffer around
7650** in the "residual" buffer.
7651*/
Chris Fallin91473dc2014-12-12 15:58:26 -08007652
7653#include <inttypes.h>
Chris Fallin91473dc2014-12-12 15:58:26 -08007654#include <stddef.h>
Chris Fallin91473dc2014-12-12 15:58:26 -08007655
7656#ifdef UPB_DUMP_BYTECODE
7657#include <stdio.h>
7658#endif
7659
7660#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
7661
Josh Habermane8ed0212015-06-08 17:56:03 -07007662/* Error messages that are shared between the bytecode and JIT decoders. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007663const char *kPbDecoderStackOverflow = "Nesting too deep.";
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007664const char *kPbDecoderSubmessageTooLong =
7665 "Submessage end extends past enclosing submessage.";
Chris Fallin91473dc2014-12-12 15:58:26 -08007666
Josh Habermane8ed0212015-06-08 17:56:03 -07007667/* Error messages shared within this file. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007668static const char *kUnterminatedVarint = "Unterminated varint.";
7669
7670/* upb_pbdecoder **************************************************************/
7671
7672static opcode halt = OP_HALT;
7673
Josh Haberman78da6662016-01-13 19:05:43 -08007674/* A dummy character we can point to when the user passes us a NULL buffer.
7675 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
7676 * behavior, which would invalidate functions like curbufleft(). */
7677static const char dummy_char;
7678
Josh Habermane8ed0212015-06-08 17:56:03 -07007679/* Whether an op consumes any of the input buffer. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007680static bool consumes_input(opcode op) {
7681 switch (op) {
7682 case OP_SETDISPATCH:
7683 case OP_STARTMSG:
7684 case OP_ENDMSG:
7685 case OP_STARTSEQ:
7686 case OP_ENDSEQ:
7687 case OP_STARTSUBMSG:
7688 case OP_ENDSUBMSG:
7689 case OP_STARTSTR:
7690 case OP_ENDSTR:
7691 case OP_PUSHTAGDELIM:
7692 case OP_POP:
7693 case OP_SETDELIM:
7694 case OP_SETBIGGROUPNUM:
7695 case OP_CHECKDELIM:
7696 case OP_CALL:
7697 case OP_RET:
7698 case OP_BRANCH:
7699 return false;
7700 default:
7701 return true;
7702 }
7703}
7704
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007705static size_t stacksize(upb_pbdecoder *d, size_t entries) {
7706 UPB_UNUSED(d);
7707 return entries * sizeof(upb_pbdecoder_frame);
7708}
7709
7710static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7711 UPB_UNUSED(d);
7712
7713#ifdef UPB_USE_JIT_X64
7714 if (d->method_->is_native_) {
7715 /* Each native stack frame needs two pointers, plus we need a few frames for
7716 * the enter/exit trampolines. */
7717 size_t ret = entries * sizeof(void*) * 2;
7718 ret += sizeof(void*) * 10;
7719 return ret;
7720 }
7721#endif
7722
7723 return entries * sizeof(uint32_t*);
7724}
7725
7726
Chris Fallin91473dc2014-12-12 15:58:26 -08007727static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7728
Josh Habermane8ed0212015-06-08 17:56:03 -07007729/* It's unfortunate that we have to micro-manage the compiler with
7730 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7731 * specific to one hardware configuration. But empirically on a Core i7,
7732 * performance increases 30-50% with these annotations. Every instance where
7733 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7734 * benchmarks. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007735
7736static void seterr(upb_pbdecoder *d, const char *msg) {
Chris Fallind3262772015-05-14 18:24:26 -07007737 upb_status status = UPB_STATUS_INIT;
7738 upb_status_seterrmsg(&status, msg);
7739 upb_env_reporterror(d->env, &status);
Chris Fallin91473dc2014-12-12 15:58:26 -08007740}
7741
7742void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
7743 seterr(d, msg);
7744}
7745
7746
7747/* Buffering ******************************************************************/
7748
Josh Habermane8ed0212015-06-08 17:56:03 -07007749/* We operate on one buffer at a time, which is either the user's buffer passed
7750 * to our "decode" callback or some residual bytes from the previous buffer. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007751
Josh Habermane8ed0212015-06-08 17:56:03 -07007752/* How many bytes can be safely read from d->ptr without reading past end-of-buf
7753 * or past the current delimited end. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007754static size_t curbufleft(const upb_pbdecoder *d) {
7755 assert(d->data_end >= d->ptr);
7756 return d->data_end - d->ptr;
7757}
7758
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007759/* How many bytes are available before end-of-buffer. */
7760static size_t bufleft(const upb_pbdecoder *d) {
7761 return d->end - d->ptr;
7762}
7763
Josh Habermane8ed0212015-06-08 17:56:03 -07007764/* Overall stream offset of d->ptr. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007765uint64_t offset(const upb_pbdecoder *d) {
7766 return d->bufstart_ofs + (d->ptr - d->buf);
7767}
7768
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007769/* How many bytes are available before the end of this delimited region. */
7770size_t delim_remaining(const upb_pbdecoder *d) {
7771 return d->top->end_ofs - offset(d);
7772}
7773
Josh Habermane8ed0212015-06-08 17:56:03 -07007774/* Advances d->ptr. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007775static void advance(upb_pbdecoder *d, size_t len) {
7776 assert(curbufleft(d) >= len);
7777 d->ptr += len;
7778}
7779
7780static bool in_buf(const char *p, const char *buf, const char *end) {
7781 return p >= buf && p <= end;
7782}
7783
7784static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7785 return in_buf(p, d->residual, d->residual_end);
7786}
7787
Josh Habermane8ed0212015-06-08 17:56:03 -07007788/* Calculates the delim_end value, which is affected by both the current buffer
7789 * and the parsing stack, so must be called whenever either is updated. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007790static void set_delim_end(upb_pbdecoder *d) {
7791 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
Chris Fallind3262772015-05-14 18:24:26 -07007792 if (delim_ofs <= (size_t)(d->end - d->buf)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08007793 d->delim_end = d->buf + delim_ofs;
7794 d->data_end = d->delim_end;
7795 } else {
7796 d->data_end = d->end;
7797 d->delim_end = NULL;
7798 }
7799}
7800
7801static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
7802 d->ptr = buf;
7803 d->buf = buf;
7804 d->end = end;
7805 set_delim_end(d);
7806}
7807
7808static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
7809 assert(curbufleft(d) == 0);
7810 d->bufstart_ofs += (d->end - d->buf);
7811 switchtobuf(d, buf, buf + len);
7812}
7813
7814static void checkpoint(upb_pbdecoder *d) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007815 /* The assertion here is in the interests of efficiency, not correctness.
7816 * We are trying to ensure that we don't checkpoint() more often than
7817 * necessary. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007818 assert(d->checkpoint != d->ptr);
7819 d->checkpoint = d->ptr;
7820}
7821
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007822/* Skips "bytes" bytes in the stream, which may be more than available. If we
7823 * skip more bytes than are available, we return a long read count to the caller
7824 * indicating how many bytes can be skipped over before passing actual data
7825 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
7826 * won't actually be read.
7827 */
7828static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7829 assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
7830 assert(d->skip == 0);
7831 if (bytes > delim_remaining(d)) {
7832 seterr(d, "Skipped value extended beyond enclosing submessage.");
7833 return upb_pbdecoder_suspend(d);
Josh Haberman78da6662016-01-13 19:05:43 -08007834 } else if (bufleft(d) >= bytes) {
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007835 /* Skipped data is all in current buffer, and more is still available. */
7836 advance(d, bytes);
7837 d->skip = 0;
7838 return DECODE_OK;
7839 } else {
7840 /* Skipped data extends beyond currently available buffers. */
7841 d->pc = d->last;
7842 d->skip = bytes - curbufleft(d);
7843 d->bufstart_ofs += (d->end - d->buf);
7844 d->residual_end = d->residual;
7845 switchtobuf(d, d->residual, d->residual_end);
7846 return d->size_param + d->skip;
7847 }
7848}
7849
7850
Josh Habermane8ed0212015-06-08 17:56:03 -07007851/* Resumes the decoder from an initial state or from a previous suspend. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007852int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
7853 size_t size, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007854 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007855
Josh Haberman78da6662016-01-13 19:05:43 -08007856 /* d->skip and d->residual_end could probably elegantly be represented
7857 * as a single variable, to more easily represent this invariant. */
7858 assert(!(d->skip && d->residual_end > d->residual));
7859
7860 /* We need to remember the original size_param, so that the value we return
7861 * is relative to it, even if we do some skipping first. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007862 d->size_param = size;
7863 d->handle = handle;
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007864
Josh Haberman78da6662016-01-13 19:05:43 -08007865 /* Have to handle this case specially (ie. not with skip()) because the user
7866 * is allowed to pass a NULL buffer here, which won't allow us to safely
7867 * calculate a d->end or use our normal functions like curbufleft(). */
7868 if (d->skip && d->skip >= size) {
7869 d->skip -= size;
7870 d->bufstart_ofs += size;
7871 buf = &dummy_char;
7872 size = 0;
7873
7874 /* We can't just return now, because we might need to execute some ops
7875 * like CHECKDELIM, which could call some callbacks and pop the stack. */
7876 }
7877
7878 /* We need to pretend that this was the actual buffer param, since some of the
7879 * calculations assume that d->ptr/d->buf is relative to this. */
7880 d->buf_param = buf;
7881
7882 if (!buf) {
7883 /* NULL buf is ok if its entire span is covered by the "skip" above, but
7884 * by this point we know that "skip" doesn't cover the buffer. */
7885 seterr(d, "Passed NULL buffer over non-skippable region.");
7886 return upb_pbdecoder_suspend(d);
7887 }
7888
Chris Fallin91473dc2014-12-12 15:58:26 -08007889 if (d->residual_end > d->residual) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007890 /* We have residual bytes from the last buffer. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007891 assert(d->ptr == d->residual);
7892 } else {
7893 switchtobuf(d, buf, buf + size);
7894 }
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007895
Chris Fallin91473dc2014-12-12 15:58:26 -08007896 d->checkpoint = d->ptr;
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007897
Josh Haberman78da6662016-01-13 19:05:43 -08007898 /* Handle skips that don't cover the whole buffer (as above). */
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007899 if (d->skip) {
7900 size_t skip_bytes = d->skip;
7901 d->skip = 0;
7902 CHECK_RETURN(skip(d, skip_bytes));
Josh Haberman78da6662016-01-13 19:05:43 -08007903 checkpoint(d);
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007904 }
7905
Josh Haberman78da6662016-01-13 19:05:43 -08007906 /* If we're inside an unknown group, continue to parse unknown values. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007907 if (d->top->groupnum < 0) {
7908 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
Josh Haberman78da6662016-01-13 19:05:43 -08007909 checkpoint(d);
Chris Fallin91473dc2014-12-12 15:58:26 -08007910 }
Josh Haberman5bdf4a42015-08-03 15:51:31 -07007911
Chris Fallin91473dc2014-12-12 15:58:26 -08007912 return DECODE_OK;
7913}
7914
Josh Habermane8ed0212015-06-08 17:56:03 -07007915/* Suspends the decoder at the last checkpoint, without saving any residual
7916 * bytes. If there are any unconsumed bytes, returns a short byte count. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007917size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7918 d->pc = d->last;
7919 if (d->checkpoint == d->residual) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007920 /* Checkpoint was in residual buf; no user bytes were consumed. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007921 d->ptr = d->residual;
7922 return 0;
7923 } else {
Josh Haberman78da6662016-01-13 19:05:43 -08007924 size_t ret = d->size_param - (d->end - d->checkpoint);
Chris Fallin91473dc2014-12-12 15:58:26 -08007925 assert(!in_residual_buf(d, d->checkpoint));
Josh Haberman78da6662016-01-13 19:05:43 -08007926 assert(d->buf == d->buf_param || d->buf == &dummy_char);
Josh Habermane8ed0212015-06-08 17:56:03 -07007927
Josh Haberman78da6662016-01-13 19:05:43 -08007928 d->bufstart_ofs += (d->checkpoint - d->buf);
Chris Fallin91473dc2014-12-12 15:58:26 -08007929 d->residual_end = d->residual;
7930 switchtobuf(d, d->residual, d->residual_end);
Josh Haberman78da6662016-01-13 19:05:43 -08007931 return ret;
Chris Fallin91473dc2014-12-12 15:58:26 -08007932 }
7933}
7934
Josh Habermane8ed0212015-06-08 17:56:03 -07007935/* Suspends the decoder at the last checkpoint, and saves any unconsumed
7936 * bytes in our residual buffer. This is necessary if we need more user
7937 * bytes to form a complete value, which might not be contiguous in the
7938 * user's buffers. Always consumes all user bytes. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007939static size_t suspend_save(upb_pbdecoder *d) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007940 /* We hit end-of-buffer before we could parse a full value.
7941 * Save any unconsumed bytes (if any) to the residual buffer. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007942 d->pc = d->last;
7943
7944 if (d->checkpoint == d->residual) {
Josh Habermane8ed0212015-06-08 17:56:03 -07007945 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
Chris Fallin91473dc2014-12-12 15:58:26 -08007946 assert((d->residual_end - d->residual) + d->size_param <=
7947 sizeof(d->residual));
7948 if (!in_residual_buf(d, d->ptr)) {
7949 d->bufstart_ofs -= (d->residual_end - d->residual);
7950 }
7951 memcpy(d->residual_end, d->buf_param, d->size_param);
7952 d->residual_end += d->size_param;
7953 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07007954 /* Checkpoint was in user buf; old residual bytes not needed. */
7955 size_t save;
Chris Fallin91473dc2014-12-12 15:58:26 -08007956 assert(!in_residual_buf(d, d->checkpoint));
Josh Habermane8ed0212015-06-08 17:56:03 -07007957
Chris Fallin91473dc2014-12-12 15:58:26 -08007958 d->ptr = d->checkpoint;
Josh Habermane8ed0212015-06-08 17:56:03 -07007959 save = curbufleft(d);
Chris Fallin91473dc2014-12-12 15:58:26 -08007960 assert(save <= sizeof(d->residual));
7961 memcpy(d->residual, d->ptr, save);
7962 d->residual_end = d->residual + save;
7963 d->bufstart_ofs = offset(d);
7964 }
7965
7966 switchtobuf(d, d->residual, d->residual_end);
7967 return d->size_param;
7968}
7969
Josh Habermane8ed0212015-06-08 17:56:03 -07007970/* Copies the next "bytes" bytes into "buf" and advances the stream.
7971 * Requires that this many bytes are available in the current buffer. */
Chris Fallind3262772015-05-14 18:24:26 -07007972UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7973 size_t bytes) {
Chris Fallin91473dc2014-12-12 15:58:26 -08007974 assert(bytes <= curbufleft(d));
7975 memcpy(buf, d->ptr, bytes);
7976 advance(d, bytes);
7977}
7978
Josh Habermane8ed0212015-06-08 17:56:03 -07007979/* Slow path for getting the next "bytes" bytes, regardless of whether they are
7980 * available in the current buffer or not. Returns a status code as described
7981 * in decoder.int.h. */
Chris Fallind3262772015-05-14 18:24:26 -07007982UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7983 size_t bytes) {
Chris Fallin91473dc2014-12-12 15:58:26 -08007984 const size_t avail = curbufleft(d);
7985 consumebytes(d, buf, avail);
7986 bytes -= avail;
7987 assert(bytes > 0);
7988 if (in_residual_buf(d, d->ptr)) {
7989 advancetobuf(d, d->buf_param, d->size_param);
7990 }
7991 if (curbufleft(d) >= bytes) {
Chris Fallind3262772015-05-14 18:24:26 -07007992 consumebytes(d, (char *)buf + avail, bytes);
Chris Fallin91473dc2014-12-12 15:58:26 -08007993 return DECODE_OK;
7994 } else if (d->data_end == d->delim_end) {
7995 seterr(d, "Submessage ended in the middle of a value or group");
7996 return upb_pbdecoder_suspend(d);
7997 } else {
7998 return suspend_save(d);
7999 }
8000}
8001
Josh Habermane8ed0212015-06-08 17:56:03 -07008002/* Gets the next "bytes" bytes, regardless of whether they are available in the
8003 * current buffer or not. Returns a status code as described in decoder.int.h.
8004 */
Chris Fallind3262772015-05-14 18:24:26 -07008005UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
8006 size_t bytes) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008007 if (curbufleft(d) >= bytes) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008008 /* Buffer has enough data to satisfy. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008009 consumebytes(d, buf, bytes);
8010 return DECODE_OK;
8011 } else {
8012 return getbytes_slow(d, buf, bytes);
8013 }
8014}
8015
Chris Fallind3262772015-05-14 18:24:26 -07008016UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
8017 size_t bytes) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008018 size_t ret = curbufleft(d);
8019 memcpy(buf, d->ptr, ret);
8020 if (in_residual_buf(d, d->ptr)) {
8021 size_t copy = UPB_MIN(bytes - ret, d->size_param);
Chris Fallind3262772015-05-14 18:24:26 -07008022 memcpy((char *)buf + ret, d->buf_param, copy);
Chris Fallin91473dc2014-12-12 15:58:26 -08008023 ret += copy;
8024 }
8025 return ret;
8026}
8027
Chris Fallind3262772015-05-14 18:24:26 -07008028UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
8029 size_t bytes) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008030 if (curbufleft(d) >= bytes) {
8031 memcpy(buf, d->ptr, bytes);
8032 return bytes;
8033 } else {
8034 return peekbytes_slow(d, buf, bytes);
8035 }
8036}
8037
8038
8039/* Decoding of wire types *****************************************************/
8040
Josh Habermane8ed0212015-06-08 17:56:03 -07008041/* Slow path for decoding a varint from the current buffer position.
8042 * Returns a status code as described in decoder.int.h. */
Chris Fallind3262772015-05-14 18:24:26 -07008043UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
8044 uint64_t *u64) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008045 uint8_t byte = 0x80;
8046 int bitpos;
Josh Habermane8ed0212015-06-08 17:56:03 -07008047 *u64 = 0;
Chris Fallin91473dc2014-12-12 15:58:26 -08008048 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
Josh Haberman78da6662016-01-13 19:05:43 -08008049 CHECK_RETURN(getbytes(d, &byte, 1));
Chris Fallin91473dc2014-12-12 15:58:26 -08008050 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
8051 }
8052 if(bitpos == 70 && (byte & 0x80)) {
8053 seterr(d, kUnterminatedVarint);
8054 return upb_pbdecoder_suspend(d);
8055 }
8056 return DECODE_OK;
8057}
8058
Josh Habermane8ed0212015-06-08 17:56:03 -07008059/* Decodes a varint from the current buffer position.
8060 * Returns a status code as described in decoder.int.h. */
Chris Fallind3262772015-05-14 18:24:26 -07008061UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008062 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
8063 *u64 = *d->ptr;
8064 advance(d, 1);
8065 return DECODE_OK;
8066 } else if (curbufleft(d) >= 10) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008067 /* Fast case. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008068 upb_decoderet r = upb_vdecode_fast(d->ptr);
8069 if (r.p == NULL) {
8070 seterr(d, kUnterminatedVarint);
8071 return upb_pbdecoder_suspend(d);
8072 }
8073 advance(d, r.p - d->ptr);
8074 *u64 = r.val;
8075 return DECODE_OK;
8076 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07008077 /* Slow case -- varint spans buffer seam. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008078 return upb_pbdecoder_decode_varint_slow(d, u64);
8079 }
8080}
8081
Josh Habermane8ed0212015-06-08 17:56:03 -07008082/* Decodes a 32-bit varint from the current buffer position.
8083 * Returns a status code as described in decoder.int.h. */
Chris Fallind3262772015-05-14 18:24:26 -07008084UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008085 uint64_t u64;
8086 int32_t ret = decode_varint(d, &u64);
8087 if (ret >= 0) return ret;
8088 if (u64 > UINT32_MAX) {
8089 seterr(d, "Unterminated 32-bit varint");
Josh Habermane8ed0212015-06-08 17:56:03 -07008090 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
8091 * so we know this path will always be treated as error by our caller.
8092 * Right now the size_t -> int32_t can overflow and produce negative values.
8093 */
Chris Fallin91473dc2014-12-12 15:58:26 -08008094 *u32 = 0;
8095 return upb_pbdecoder_suspend(d);
8096 }
8097 *u32 = u64;
8098 return DECODE_OK;
8099}
8100
Josh Habermane8ed0212015-06-08 17:56:03 -07008101/* Decodes a fixed32 from the current buffer position.
8102 * Returns a status code as described in decoder.int.h.
8103 * TODO: proper byte swapping for big-endian machines. */
Chris Fallind3262772015-05-14 18:24:26 -07008104UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008105 return getbytes(d, u32, 4);
8106}
8107
Josh Habermane8ed0212015-06-08 17:56:03 -07008108/* Decodes a fixed64 from the current buffer position.
8109 * Returns a status code as described in decoder.int.h.
8110 * TODO: proper byte swapping for big-endian machines. */
Chris Fallind3262772015-05-14 18:24:26 -07008111UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008112 return getbytes(d, u64, 8);
8113}
8114
Josh Habermane8ed0212015-06-08 17:56:03 -07008115/* Non-static versions of the above functions.
8116 * These are called by the JIT for fallback paths. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008117int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
8118 return decode_fixed32(d, u32);
8119}
8120
8121int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
8122 return decode_fixed64(d, u64);
8123}
8124
8125static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
8126static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
8127
Josh Habermane8ed0212015-06-08 17:56:03 -07008128/* Pushes a frame onto the decoder stack. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008129static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
8130 upb_pbdecoder_frame *fr = d->top;
8131
8132 if (end > fr->end_ofs) {
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008133 seterr(d, kPbDecoderSubmessageTooLong);
Chris Fallin91473dc2014-12-12 15:58:26 -08008134 return false;
Chris Fallind3262772015-05-14 18:24:26 -07008135 } else if (fr == d->limit) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008136 seterr(d, kPbDecoderStackOverflow);
8137 return false;
8138 }
8139
8140 fr++;
8141 fr->end_ofs = end;
8142 fr->dispatch = NULL;
8143 fr->groupnum = 0;
8144 d->top = fr;
8145 return true;
8146}
8147
8148static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008149 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
8150 * field number) prior to hitting any enclosing submessage end, pushing our
8151 * existing delim end prevents us from continuing to parse values from a
8152 * corrupt proto that doesn't give us an END tag in time. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008153 if (!decoder_push(d, d->top->end_ofs))
8154 return false;
8155 d->top->groupnum = arg;
8156 return true;
8157}
8158
Josh Habermane8ed0212015-06-08 17:56:03 -07008159/* Pops a frame from the decoder stack. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008160static void decoder_pop(upb_pbdecoder *d) { d->top--; }
8161
Chris Fallind3262772015-05-14 18:24:26 -07008162UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
8163 uint64_t expected) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008164 uint64_t data = 0;
8165 size_t bytes = upb_value_size(expected);
8166 size_t read = peekbytes(d, &data, bytes);
8167 if (read == bytes && data == expected) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008168 /* Advance past matched bytes. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008169 int32_t ok = getbytes(d, &data, read);
8170 UPB_ASSERT_VAR(ok, ok < 0);
8171 return DECODE_OK;
8172 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
8173 return suspend_save(d);
8174 } else {
8175 return DECODE_MISMATCH;
8176 }
8177}
8178
8179int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
8180 uint8_t wire_type) {
8181 if (fieldnum >= 0)
8182 goto have_tag;
8183
8184 while (true) {
8185 uint32_t tag;
8186 CHECK_RETURN(decode_v32(d, &tag));
8187 wire_type = tag & 0x7;
8188 fieldnum = tag >> 3;
8189
8190have_tag:
8191 if (fieldnum == 0) {
8192 seterr(d, "Saw invalid field number (0)");
8193 return upb_pbdecoder_suspend(d);
8194 }
8195
Josh Habermane8ed0212015-06-08 17:56:03 -07008196 /* TODO: deliver to unknown field callback. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008197 switch (wire_type) {
8198 case UPB_WIRE_TYPE_32BIT:
8199 CHECK_RETURN(skip(d, 4));
8200 break;
8201 case UPB_WIRE_TYPE_64BIT:
8202 CHECK_RETURN(skip(d, 8));
8203 break;
8204 case UPB_WIRE_TYPE_VARINT: {
8205 uint64_t u64;
8206 CHECK_RETURN(decode_varint(d, &u64));
8207 break;
8208 }
8209 case UPB_WIRE_TYPE_DELIMITED: {
8210 uint32_t len;
8211 CHECK_RETURN(decode_v32(d, &len));
8212 CHECK_RETURN(skip(d, len));
8213 break;
8214 }
8215 case UPB_WIRE_TYPE_START_GROUP:
8216 CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
8217 break;
8218 case UPB_WIRE_TYPE_END_GROUP:
8219 if (fieldnum == -d->top->groupnum) {
8220 decoder_pop(d);
8221 } else if (fieldnum == d->top->groupnum) {
8222 return DECODE_ENDGROUP;
8223 } else {
8224 seterr(d, "Unmatched ENDGROUP tag.");
8225 return upb_pbdecoder_suspend(d);
8226 }
8227 break;
8228 default:
8229 seterr(d, "Invalid wire type");
8230 return upb_pbdecoder_suspend(d);
8231 }
8232
8233 if (d->top->groupnum >= 0) {
8234 return DECODE_OK;
8235 }
8236
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008237 /* Unknown group -- continue looping over unknown fields. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008238 checkpoint(d);
8239 }
8240}
8241
8242static void goto_endmsg(upb_pbdecoder *d) {
8243 upb_value v;
8244 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
8245 UPB_ASSERT_VAR(found, found);
8246 d->pc = d->top->base + upb_value_getuint64(v);
8247}
8248
Josh Habermane8ed0212015-06-08 17:56:03 -07008249/* Parses a tag and jumps to the corresponding bytecode instruction for this
8250 * field.
8251 *
8252 * If the tag is unknown (or the wire type doesn't match), parses the field as
8253 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
8254 * instruction for the end of message. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008255static int32_t dispatch(upb_pbdecoder *d) {
8256 upb_inttable *dispatch = d->top->dispatch;
Chris Fallin91473dc2014-12-12 15:58:26 -08008257 uint32_t tag;
Josh Habermane8ed0212015-06-08 17:56:03 -07008258 uint8_t wire_type;
8259 uint32_t fieldnum;
Chris Fallin91473dc2014-12-12 15:58:26 -08008260 upb_value val;
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008261 int32_t retval;
Josh Habermane8ed0212015-06-08 17:56:03 -07008262
8263 /* Decode tag. */
8264 CHECK_RETURN(decode_v32(d, &tag));
8265 wire_type = tag & 0x7;
8266 fieldnum = tag >> 3;
8267
8268 /* Lookup tag. Because of packed/non-packed compatibility, we have to
8269 * check the wire type against two possibilities. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008270 if (fieldnum != DISPATCH_ENDMSG &&
8271 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
8272 uint64_t v = upb_value_getuint64(val);
8273 if (wire_type == (v & 0xff)) {
8274 d->pc = d->top->base + (v >> 16);
8275 return DECODE_OK;
8276 } else if (wire_type == ((v >> 8) & 0xff)) {
8277 bool found =
8278 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
8279 UPB_ASSERT_VAR(found, found);
8280 d->pc = d->top->base + upb_value_getuint64(val);
8281 return DECODE_OK;
8282 }
8283 }
8284
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008285 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
8286 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
8287 * we need to back up to, so that when we're done skipping unknown data we
8288 * can re-check the delimited end. */
8289 d->last--; /* Necessary if we get suspended */
8290 d->pc = d->last;
8291 assert(getop(*d->last) == OP_CHECKDELIM);
Chris Fallin91473dc2014-12-12 15:58:26 -08008292
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008293 /* Unknown field or ENDGROUP. */
8294 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
8295
8296 CHECK_RETURN(retval);
8297
8298 if (retval == DECODE_ENDGROUP) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008299 goto_endmsg(d);
8300 return DECODE_OK;
Chris Fallin91473dc2014-12-12 15:58:26 -08008301 }
Chris Fallind3262772015-05-14 18:24:26 -07008302
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008303 return DECODE_OK;
Chris Fallin91473dc2014-12-12 15:58:26 -08008304}
8305
Josh Habermane8ed0212015-06-08 17:56:03 -07008306/* Callers know that the stack is more than one deep because the opcodes that
8307 * call this only occur after PUSH operations. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008308upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
8309 assert(d->top != d->stack);
8310 return d->top - 1;
8311}
8312
8313
8314/* The main decoding loop *****************************************************/
8315
Josh Habermane8ed0212015-06-08 17:56:03 -07008316/* The main decoder VM function. Uses traditional bytecode dispatch loop with a
8317 * switch() statement. */
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008318size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
8319 const upb_bufhandle* handle) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008320
8321#define VMCASE(op, code) \
8322 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
8323#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
8324 VMCASE(OP_PARSE_ ## type, { \
8325 ctype val; \
8326 CHECK_RETURN(decode_ ## wt(d, &val)); \
8327 upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
8328 })
8329
8330 while(1) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008331 int32_t instruction;
8332 opcode op;
8333 uint32_t arg;
8334 int32_t longofs;
8335
Chris Fallin91473dc2014-12-12 15:58:26 -08008336 d->last = d->pc;
Josh Habermane8ed0212015-06-08 17:56:03 -07008337 instruction = *d->pc++;
8338 op = getop(instruction);
8339 arg = instruction >> 8;
8340 longofs = arg;
Chris Fallin91473dc2014-12-12 15:58:26 -08008341 assert(d->ptr != d->residual_end);
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008342 UPB_UNUSED(group);
Chris Fallin91473dc2014-12-12 15:58:26 -08008343#ifdef UPB_DUMP_BYTECODE
8344 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
8345 "%x %s (%d)\n",
8346 (int)offset(d),
8347 (int)(d->ptr - d->buf),
8348 (int)(d->data_end - d->ptr),
8349 (int)(d->end - d->ptr),
8350 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
8351 (int)(d->pc - 1 - group->bytecode),
8352 upb_pbdecoder_getopname(op),
8353 arg);
8354#endif
8355 switch (op) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008356 /* Technically, we are losing data if we see a 32-bit varint that is not
8357 * properly sign-extended. We could detect this and error about the data
8358 * loss, but proto2 does not do this, so we pass. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008359 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
8360 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
8361 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
8362 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
8363 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
8364 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
8365 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
8366 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
8367 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
8368 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
8369 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
8370 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
8371 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
8372
8373 VMCASE(OP_SETDISPATCH,
8374 d->top->base = d->pc - 1;
8375 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
8376 d->pc += sizeof(void*) / sizeof(uint32_t);
8377 )
8378 VMCASE(OP_STARTMSG,
8379 CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
8380 )
8381 VMCASE(OP_ENDMSG,
8382 CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
8383 )
8384 VMCASE(OP_STARTSEQ,
8385 upb_pbdecoder_frame *outer = outer_frame(d);
8386 CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
8387 )
8388 VMCASE(OP_ENDSEQ,
8389 CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
8390 )
8391 VMCASE(OP_STARTSUBMSG,
8392 upb_pbdecoder_frame *outer = outer_frame(d);
8393 CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
8394 )
8395 VMCASE(OP_ENDSUBMSG,
8396 CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
8397 )
8398 VMCASE(OP_STARTSTR,
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008399 uint32_t len = delim_remaining(d);
Chris Fallin91473dc2014-12-12 15:58:26 -08008400 upb_pbdecoder_frame *outer = outer_frame(d);
8401 CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
8402 if (len == 0) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008403 d->pc++; /* Skip OP_STRING. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008404 }
8405 )
8406 VMCASE(OP_STRING,
8407 uint32_t len = curbufleft(d);
8408 size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
8409 if (n > len) {
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008410 if (n > delim_remaining(d)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08008411 seterr(d, "Tried to skip past end of string.");
8412 return upb_pbdecoder_suspend(d);
8413 } else {
8414 int32_t ret = skip(d, n);
Josh Habermane8ed0212015-06-08 17:56:03 -07008415 /* This shouldn't return DECODE_OK, because n > len. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008416 assert(ret >= 0);
8417 return ret;
8418 }
8419 }
8420 advance(d, n);
8421 if (n < len || d->delim_end == NULL) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008422 /* We aren't finished with this string yet. */
8423 d->pc--; /* Repeat OP_STRING. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008424 if (n > 0) checkpoint(d);
8425 return upb_pbdecoder_suspend(d);
8426 }
8427 )
8428 VMCASE(OP_ENDSTR,
8429 CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
8430 )
8431 VMCASE(OP_PUSHTAGDELIM,
8432 CHECK_SUSPEND(pushtagdelim(d, arg));
8433 )
8434 VMCASE(OP_SETBIGGROUPNUM,
8435 d->top->groupnum = *d->pc++;
8436 )
8437 VMCASE(OP_POP,
8438 assert(d->top > d->stack);
8439 decoder_pop(d);
8440 )
8441 VMCASE(OP_PUSHLENDELIM,
8442 uint32_t len;
8443 CHECK_RETURN(decode_v32(d, &len));
8444 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
8445 set_delim_end(d);
8446 )
8447 VMCASE(OP_SETDELIM,
8448 set_delim_end(d);
8449 )
8450 VMCASE(OP_CHECKDELIM,
Josh Habermane8ed0212015-06-08 17:56:03 -07008451 /* We are guaranteed of this assert because we never allow ourselves to
8452 * consume bytes beyond data_end, which covers delim_end when non-NULL.
8453 */
Chris Fallin91473dc2014-12-12 15:58:26 -08008454 assert(!(d->delim_end && d->ptr > d->delim_end));
8455 if (d->ptr == d->delim_end)
8456 d->pc += longofs;
8457 )
8458 VMCASE(OP_CALL,
8459 d->callstack[d->call_len++] = d->pc;
8460 d->pc += longofs;
8461 )
8462 VMCASE(OP_RET,
8463 assert(d->call_len > 0);
8464 d->pc = d->callstack[--d->call_len];
8465 )
8466 VMCASE(OP_BRANCH,
8467 d->pc += longofs;
8468 )
8469 VMCASE(OP_TAG1,
Josh Habermane8ed0212015-06-08 17:56:03 -07008470 uint8_t expected;
Chris Fallin91473dc2014-12-12 15:58:26 -08008471 CHECK_SUSPEND(curbufleft(d) > 0);
Josh Habermane8ed0212015-06-08 17:56:03 -07008472 expected = (arg >> 8) & 0xff;
Chris Fallin91473dc2014-12-12 15:58:26 -08008473 if (*d->ptr == expected) {
8474 advance(d, 1);
8475 } else {
8476 int8_t shortofs;
8477 badtag:
8478 shortofs = arg;
8479 if (shortofs == LABEL_DISPATCH) {
8480 CHECK_RETURN(dispatch(d));
8481 } else {
8482 d->pc += shortofs;
Josh Habermane8ed0212015-06-08 17:56:03 -07008483 break; /* Avoid checkpoint(). */
Chris Fallin91473dc2014-12-12 15:58:26 -08008484 }
8485 }
8486 )
8487 VMCASE(OP_TAG2,
Josh Habermane8ed0212015-06-08 17:56:03 -07008488 uint16_t expected;
Chris Fallin91473dc2014-12-12 15:58:26 -08008489 CHECK_SUSPEND(curbufleft(d) > 0);
Josh Habermane8ed0212015-06-08 17:56:03 -07008490 expected = (arg >> 8) & 0xffff;
Chris Fallin91473dc2014-12-12 15:58:26 -08008491 if (curbufleft(d) >= 2) {
8492 uint16_t actual;
8493 memcpy(&actual, d->ptr, 2);
8494 if (expected == actual) {
8495 advance(d, 2);
8496 } else {
8497 goto badtag;
8498 }
8499 } else {
8500 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
8501 if (result == DECODE_MISMATCH) goto badtag;
8502 if (result >= 0) return result;
8503 }
8504 )
8505 VMCASE(OP_TAGN, {
8506 uint64_t expected;
Josh Habermane8ed0212015-06-08 17:56:03 -07008507 int32_t result;
Chris Fallin91473dc2014-12-12 15:58:26 -08008508 memcpy(&expected, d->pc, 8);
8509 d->pc += 2;
Josh Habermane8ed0212015-06-08 17:56:03 -07008510 result = upb_pbdecoder_checktag_slow(d, expected);
Chris Fallin91473dc2014-12-12 15:58:26 -08008511 if (result == DECODE_MISMATCH) goto badtag;
8512 if (result >= 0) return result;
8513 })
Chris Fallin97b663a2015-01-09 16:15:22 -08008514 VMCASE(OP_DISPATCH, {
8515 CHECK_RETURN(dispatch(d));
8516 })
Chris Fallin91473dc2014-12-12 15:58:26 -08008517 VMCASE(OP_HALT, {
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008518 return d->size_param;
Chris Fallin91473dc2014-12-12 15:58:26 -08008519 })
8520 }
8521 }
8522}
8523
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008524
8525/* BytesHandler handlers ******************************************************/
8526
Chris Fallin91473dc2014-12-12 15:58:26 -08008527void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
8528 upb_pbdecoder *d = closure;
8529 UPB_UNUSED(size_hint);
Chris Fallind3262772015-05-14 18:24:26 -07008530 d->top->end_ofs = UINT64_MAX;
8531 d->bufstart_ofs = 0;
Chris Fallin91473dc2014-12-12 15:58:26 -08008532 d->call_len = 1;
Chris Fallind3262772015-05-14 18:24:26 -07008533 d->callstack[0] = &halt;
Chris Fallin91473dc2014-12-12 15:58:26 -08008534 d->pc = pc;
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008535 d->skip = 0;
Chris Fallin91473dc2014-12-12 15:58:26 -08008536 return d;
8537}
8538
8539void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008540 upb_pbdecoder *d = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -08008541 UPB_UNUSED(hd);
8542 UPB_UNUSED(size_hint);
Chris Fallind3262772015-05-14 18:24:26 -07008543 d->top->end_ofs = UINT64_MAX;
8544 d->bufstart_ofs = 0;
Chris Fallin91473dc2014-12-12 15:58:26 -08008545 d->call_len = 0;
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008546 d->skip = 0;
Chris Fallin91473dc2014-12-12 15:58:26 -08008547 return d;
8548}
8549
8550bool upb_pbdecoder_end(void *closure, const void *handler_data) {
8551 upb_pbdecoder *d = closure;
8552 const upb_pbdecodermethod *method = handler_data;
Josh Habermane8ed0212015-06-08 17:56:03 -07008553 uint64_t end;
8554 char dummy;
Chris Fallin91473dc2014-12-12 15:58:26 -08008555
8556 if (d->residual_end > d->residual) {
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008557 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
8558 return false;
8559 }
8560
8561 if (d->skip) {
8562 seterr(d, "Unexpected EOF inside skipped data");
Chris Fallin91473dc2014-12-12 15:58:26 -08008563 return false;
8564 }
8565
8566 if (d->top->end_ofs != UINT64_MAX) {
8567 seterr(d, "Unexpected EOF inside delimited string");
8568 return false;
8569 }
8570
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008571 /* The user's end() call indicates that the message ends here. */
Josh Habermane8ed0212015-06-08 17:56:03 -07008572 end = offset(d);
Chris Fallin91473dc2014-12-12 15:58:26 -08008573 d->top->end_ofs = end;
8574
Chris Fallin91473dc2014-12-12 15:58:26 -08008575#ifdef UPB_USE_JIT_X64
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008576 if (method->is_native_) {
8577 const mgroup *group = (const mgroup*)method->group;
Chris Fallin91473dc2014-12-12 15:58:26 -08008578 if (d->top != d->stack)
8579 d->stack->end_ofs = 0;
8580 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
Josh Habermane8ed0212015-06-08 17:56:03 -07008581 } else
Chris Fallin91473dc2014-12-12 15:58:26 -08008582#endif
Josh Habermane8ed0212015-06-08 17:56:03 -07008583 {
Chris Fallin91473dc2014-12-12 15:58:26 -08008584 const uint32_t *p = d->pc;
Josh Habermane8ed0212015-06-08 17:56:03 -07008585 d->stack->end_ofs = end;
8586 /* Check the previous bytecode, but guard against beginning. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008587 if (p != method->code_base.ptr) p--;
8588 if (getop(*p) == OP_CHECKDELIM) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008589 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008590 assert(getop(*d->pc) == OP_TAG1 ||
8591 getop(*d->pc) == OP_TAG2 ||
Chris Fallin97b663a2015-01-09 16:15:22 -08008592 getop(*d->pc) == OP_TAGN ||
Josh Habermane8ed0212015-06-08 17:56:03 -07008593 getop(*d->pc) == OP_DISPATCH);
Chris Fallin91473dc2014-12-12 15:58:26 -08008594 d->pc = p;
8595 }
8596 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
Chris Fallin91473dc2014-12-12 15:58:26 -08008597 }
Chris Fallin91473dc2014-12-12 15:58:26 -08008598
8599 if (d->call_len != 0) {
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008600 seterr(d, "Unexpected EOF inside submessage or group");
Chris Fallin91473dc2014-12-12 15:58:26 -08008601 return false;
8602 }
8603
8604 return true;
8605}
8606
Josh Haberman5bdf4a42015-08-03 15:51:31 -07008607size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
8608 size_t size, const upb_bufhandle *handle) {
8609 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
8610
8611 if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
8612 CHECK_RETURN(result);
8613
8614 return run_decoder_vm(decoder, group, handle);
8615}
8616
8617
8618/* Public API *****************************************************************/
8619
Chris Fallin91473dc2014-12-12 15:58:26 -08008620void upb_pbdecoder_reset(upb_pbdecoder *d) {
8621 d->top = d->stack;
Chris Fallin91473dc2014-12-12 15:58:26 -08008622 d->top->groupnum = 0;
Chris Fallin91473dc2014-12-12 15:58:26 -08008623 d->ptr = d->residual;
8624 d->buf = d->residual;
8625 d->end = d->residual;
8626 d->residual_end = d->residual;
Chris Fallind3262772015-05-14 18:24:26 -07008627}
8628
Chris Fallind3262772015-05-14 18:24:26 -07008629upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
8630 upb_sink *sink) {
8631 const size_t default_max_nesting = 64;
8632#ifndef NDEBUG
8633 size_t size_before = upb_env_bytesallocated(e);
8634#endif
8635
8636 upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
8637 if (!d) return NULL;
8638
8639 d->method_ = m;
8640 d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
8641 d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
8642 if (!d->stack || !d->callstack) {
8643 return NULL;
8644 }
8645
8646 d->env = e;
8647 d->limit = d->stack + default_max_nesting - 1;
8648 d->stack_size = default_max_nesting;
Josh Haberman78da6662016-01-13 19:05:43 -08008649 d->status = NULL;
Chris Fallind3262772015-05-14 18:24:26 -07008650
8651 upb_pbdecoder_reset(d);
8652 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
8653
8654 assert(sink);
8655 if (d->method_->dest_handlers_) {
8656 if (sink->handlers != d->method_->dest_handlers_)
8657 return NULL;
8658 }
8659 upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
8660
Josh Habermane8ed0212015-06-08 17:56:03 -07008661 /* If this fails, increase the value in decoder.h. */
Chris Fallind3262772015-05-14 18:24:26 -07008662 assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
8663 return d;
Chris Fallin91473dc2014-12-12 15:58:26 -08008664}
8665
8666uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
8667 return offset(d);
8668}
8669
Chris Fallin91473dc2014-12-12 15:58:26 -08008670const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
8671 return d->method_;
8672}
8673
Chris Fallin91473dc2014-12-12 15:58:26 -08008674upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
8675 return &d->input_;
8676}
Chris Fallind3262772015-05-14 18:24:26 -07008677
8678size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
8679 return d->stack_size;
8680}
8681
8682bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
8683 assert(d->top >= d->stack);
8684
8685 if (max < (size_t)(d->top - d->stack)) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008686 /* Can't set a limit smaller than what we are currently at. */
Chris Fallind3262772015-05-14 18:24:26 -07008687 return false;
8688 }
8689
8690 if (max > d->stack_size) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008691 /* Need to reallocate stack and callstack to accommodate. */
Chris Fallind3262772015-05-14 18:24:26 -07008692 size_t old_size = stacksize(d, d->stack_size);
8693 size_t new_size = stacksize(d, max);
8694 void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
8695 if (!p) {
8696 return false;
8697 }
8698 d->stack = p;
8699
8700 old_size = callstacksize(d, d->stack_size);
8701 new_size = callstacksize(d, max);
8702 p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
8703 if (!p) {
8704 return false;
8705 }
8706 d->callstack = p;
8707
8708 d->stack_size = max;
8709 }
8710
8711 d->limit = d->stack + max - 1;
8712 return true;
8713}
Chris Fallin91473dc2014-12-12 15:58:26 -08008714/*
Josh Haberman181c7f22015-07-15 11:05:10 -07008715** upb::Encoder
8716**
8717** Since we are implementing pure handlers (ie. without any out-of-band access
8718** to pre-computed lengths), we have to buffer all submessages before we can
8719** emit even their first byte.
8720**
8721** Not knowing the size of submessages also means we can't write a perfect
8722** zero-copy implementation, even with buffering. Lengths are stored as
8723** varints, which means that we don't know how many bytes to reserve for the
8724** length until we know what the length is.
8725**
8726** This leaves us with three main choices:
8727**
8728** 1. buffer all submessage data in a temporary buffer, then copy it exactly
8729** once into the output buffer.
8730**
8731** 2. attempt to buffer data directly into the output buffer, estimating how
8732** many bytes each length will take. When our guesses are wrong, use
8733** memmove() to grow or shrink the allotted space.
8734**
8735** 3. buffer directly into the output buffer, allocating a max length
8736** ahead-of-time for each submessage length. If we overallocated, we waste
8737** space, but no memcpy() or memmove() is required. This approach requires
8738** defining a maximum size for submessages and rejecting submessages that
8739** exceed that size.
8740**
8741** (2) and (3) have the potential to have better performance, but they are more
8742** complicated and subtle to implement:
8743**
8744** (3) requires making an arbitrary choice of the maximum message size; it
8745** wastes space when submessages are shorter than this and fails
8746** completely when they are longer. This makes it more finicky and
8747** requires configuration based on the input. It also makes it impossible
8748** to perfectly match the output of reference encoders that always use the
8749** optimal amount of space for each length.
8750**
Josh Habermanf654d492016-02-18 11:07:51 -08008751** (2) requires guessing the the size upfront, and if multiple lengths are
Josh Haberman181c7f22015-07-15 11:05:10 -07008752** guessed wrong the minimum required number of memmove() operations may
8753** be complicated to compute correctly. Implemented properly, it may have
8754** a useful amortized or average cost, but more investigation is required
8755** to determine this and what the optimal algorithm is to achieve it.
8756**
8757** (1) makes you always pay for exactly one copy, but its implementation is
8758** the simplest and its performance is predictable.
8759**
8760** So for now, we implement (1) only. If we wish to optimize later, we should
8761** be able to do it without affecting users.
8762**
8763** The strategy is to buffer the segments of data that do *not* depend on
8764** unknown lengths in one buffer, and keep a separate buffer of segment pointers
8765** and lengths. When the top-level submessage ends, we can go beginning to end,
8766** alternating the writing of lengths with memcpy() of the rest of the data.
8767** At the top level though, no buffering is required.
8768*/
Chris Fallin91473dc2014-12-12 15:58:26 -08008769
8770
8771#include <stdlib.h>
8772
Josh Habermane8ed0212015-06-08 17:56:03 -07008773/* The output buffer is divided into segments; a segment is a string of data
8774 * that is "ready to go" -- it does not need any varint lengths inserted into
8775 * the middle. The seams between segments are where varints will be inserted
8776 * once they are known.
8777 *
8778 * We also use the concept of a "run", which is a range of encoded bytes that
8779 * occur at a single submessage level. Every segment contains one or more runs.
8780 *
8781 * A segment can span messages. Consider:
8782 *
8783 * .--Submessage lengths---------.
8784 * | | |
8785 * | V V
8786 * V | |--------------- | |-----------------
8787 * Submessages: | |-----------------------------------------------
8788 * Top-level msg: ------------------------------------------------------------
8789 *
8790 * Segments: ----- ------------------- -----------------
8791 * Runs: *---- *--------------*--- *----------------
8792 * (* marks the start)
8793 *
8794 * Note that the top-level menssage is not in any segment because it does not
8795 * have any length preceding it.
8796 *
8797 * A segment is only interrupted when another length needs to be inserted. So
8798 * observe how the second segment spans both the inner submessage and part of
8799 * the next enclosing message. */
Chris Fallind3262772015-05-14 18:24:26 -07008800typedef struct {
Josh Habermane8ed0212015-06-08 17:56:03 -07008801 uint32_t msglen; /* The length to varint-encode before this segment. */
8802 uint32_t seglen; /* Length of the segment. */
Chris Fallind3262772015-05-14 18:24:26 -07008803} upb_pb_encoder_segment;
8804
8805struct upb_pb_encoder {
8806 upb_env *env;
8807
Josh Habermane8ed0212015-06-08 17:56:03 -07008808 /* Our input and output. */
Chris Fallind3262772015-05-14 18:24:26 -07008809 upb_sink input_;
8810 upb_bytessink *output_;
8811
Josh Habermane8ed0212015-06-08 17:56:03 -07008812 /* The "subclosure" -- used as the inner closure as part of the bytessink
8813 * protocol. */
Chris Fallind3262772015-05-14 18:24:26 -07008814 void *subc;
8815
Josh Habermane8ed0212015-06-08 17:56:03 -07008816 /* The output buffer and limit, and our current write position. "buf"
8817 * initially points to "initbuf", but is dynamically allocated if we need to
8818 * grow beyond the initial size. */
Chris Fallind3262772015-05-14 18:24:26 -07008819 char *buf, *ptr, *limit;
8820
Josh Habermane8ed0212015-06-08 17:56:03 -07008821 /* The beginning of the current run, or undefined if we are at the top
8822 * level. */
Chris Fallind3262772015-05-14 18:24:26 -07008823 char *runbegin;
8824
Josh Habermane8ed0212015-06-08 17:56:03 -07008825 /* The list of segments we are accumulating. */
Chris Fallind3262772015-05-14 18:24:26 -07008826 upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8827
Josh Habermane8ed0212015-06-08 17:56:03 -07008828 /* The stack of enclosing submessages. Each entry in the stack points to the
8829 * segment where this submessage's length is being accumulated. */
Chris Fallind3262772015-05-14 18:24:26 -07008830 int *stack, *top, *stacklimit;
8831
Josh Habermane8ed0212015-06-08 17:56:03 -07008832 /* Depth of startmsg/endmsg calls. */
Chris Fallind3262772015-05-14 18:24:26 -07008833 int depth;
8834};
8835
Chris Fallin91473dc2014-12-12 15:58:26 -08008836/* low-level buffering ********************************************************/
8837
Josh Habermane8ed0212015-06-08 17:56:03 -07008838/* Low-level functions for interacting with the output buffer. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008839
Josh Habermane8ed0212015-06-08 17:56:03 -07008840/* TODO(haberman): handle pushback */
Chris Fallin91473dc2014-12-12 15:58:26 -08008841static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
8842 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
8843 UPB_ASSERT_VAR(n, n == len);
8844}
8845
8846static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8847 return &e->segbuf[*e->top];
8848}
8849
Josh Habermane8ed0212015-06-08 17:56:03 -07008850/* Call to ensure that at least "bytes" bytes are available for writing at
8851 * e->ptr. Returns false if the bytes could not be allocated. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008852static bool reserve(upb_pb_encoder *e, size_t bytes) {
Chris Fallind3262772015-05-14 18:24:26 -07008853 if ((size_t)(e->limit - e->ptr) < bytes) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008854 /* Grow buffer. */
8855 char *new_buf;
Chris Fallin91473dc2014-12-12 15:58:26 -08008856 size_t needed = bytes + (e->ptr - e->buf);
8857 size_t old_size = e->limit - e->buf;
Chris Fallind3262772015-05-14 18:24:26 -07008858
Chris Fallin91473dc2014-12-12 15:58:26 -08008859 size_t new_size = old_size;
Chris Fallind3262772015-05-14 18:24:26 -07008860
Chris Fallin91473dc2014-12-12 15:58:26 -08008861 while (new_size < needed) {
8862 new_size *= 2;
8863 }
8864
Josh Habermane8ed0212015-06-08 17:56:03 -07008865 new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
Chris Fallin91473dc2014-12-12 15:58:26 -08008866
8867 if (new_buf == NULL) {
8868 return false;
8869 }
8870
Chris Fallin91473dc2014-12-12 15:58:26 -08008871 e->ptr = new_buf + (e->ptr - e->buf);
8872 e->runbegin = new_buf + (e->runbegin - e->buf);
8873 e->limit = new_buf + new_size;
8874 e->buf = new_buf;
8875 }
8876
8877 return true;
8878}
8879
Josh Habermane8ed0212015-06-08 17:56:03 -07008880/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have
8881 * previously called reserve() with at least this many bytes. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008882static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
Chris Fallind3262772015-05-14 18:24:26 -07008883 assert((size_t)(e->limit - e->ptr) >= bytes);
Chris Fallin91473dc2014-12-12 15:58:26 -08008884 e->ptr += bytes;
8885}
8886
Josh Habermane8ed0212015-06-08 17:56:03 -07008887/* Call when all of the bytes for a handler have been written. Flushes the
8888 * bytes if possible and necessary, returning false if this failed. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008889static bool commit(upb_pb_encoder *e) {
8890 if (!e->top) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008891 /* We aren't inside a delimited region. Flush our accumulated bytes to
8892 * the output.
8893 *
8894 * TODO(haberman): in the future we may want to delay flushing for
8895 * efficiency reasons. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008896 putbuf(e, e->buf, e->ptr - e->buf);
8897 e->ptr = e->buf;
8898 }
8899
8900 return true;
8901}
8902
Josh Habermane8ed0212015-06-08 17:56:03 -07008903/* Writes the given bytes to the buffer, handling reserve/advance. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008904static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
8905 if (!reserve(e, len)) {
8906 return false;
8907 }
8908
8909 memcpy(e->ptr, data, len);
8910 encoder_advance(e, len);
8911 return true;
8912}
8913
Josh Habermane8ed0212015-06-08 17:56:03 -07008914/* Finish the current run by adding the run totals to the segment and message
8915 * length. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008916static void accumulate(upb_pb_encoder *e) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008917 size_t run_len;
Chris Fallin91473dc2014-12-12 15:58:26 -08008918 assert(e->ptr >= e->runbegin);
Josh Habermane8ed0212015-06-08 17:56:03 -07008919 run_len = e->ptr - e->runbegin;
Chris Fallin91473dc2014-12-12 15:58:26 -08008920 e->segptr->seglen += run_len;
8921 top(e)->msglen += run_len;
8922 e->runbegin = e->ptr;
8923}
8924
Josh Habermane8ed0212015-06-08 17:56:03 -07008925/* Call to indicate the start of delimited region for which the full length is
8926 * not yet known. All data will be buffered until the length is known.
8927 * Delimited regions may be nested; their lengths will all be tracked properly. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008928static bool start_delim(upb_pb_encoder *e) {
8929 if (e->top) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008930 /* We are already buffering, advance to the next segment and push it on the
8931 * stack. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008932 accumulate(e);
8933
8934 if (++e->top == e->stacklimit) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008935 /* TODO(haberman): grow stack? */
Chris Fallin91473dc2014-12-12 15:58:26 -08008936 return false;
8937 }
8938
8939 if (++e->segptr == e->seglimit) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008940 /* Grow segment buffer. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008941 size_t old_size =
8942 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8943 size_t new_size = old_size * 2;
Chris Fallind3262772015-05-14 18:24:26 -07008944 upb_pb_encoder_segment *new_buf =
8945 upb_env_realloc(e->env, e->segbuf, old_size, new_size);
Chris Fallin91473dc2014-12-12 15:58:26 -08008946
8947 if (new_buf == NULL) {
8948 return false;
8949 }
8950
Chris Fallin91473dc2014-12-12 15:58:26 -08008951 e->segptr = new_buf + (e->segptr - e->segbuf);
8952 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8953 e->segbuf = new_buf;
8954 }
8955 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07008956 /* We were previously at the top level, start buffering. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008957 e->segptr = e->segbuf;
8958 e->top = e->stack;
8959 e->runbegin = e->ptr;
8960 }
8961
8962 *e->top = e->segptr - e->segbuf;
8963 e->segptr->seglen = 0;
8964 e->segptr->msglen = 0;
8965
8966 return true;
8967}
8968
Josh Habermane8ed0212015-06-08 17:56:03 -07008969/* Call to indicate the end of a delimited region. We now know the length of
8970 * the delimited region. If we are not nested inside any other delimited
8971 * regions, we can now emit all of the buffered data we accumulated. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008972static bool end_delim(upb_pb_encoder *e) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008973 size_t msglen;
Chris Fallin91473dc2014-12-12 15:58:26 -08008974 accumulate(e);
Josh Habermane8ed0212015-06-08 17:56:03 -07008975 msglen = top(e)->msglen;
Chris Fallin91473dc2014-12-12 15:58:26 -08008976
8977 if (e->top == e->stack) {
Josh Habermane8ed0212015-06-08 17:56:03 -07008978 /* All lengths are now available, emit all buffered data. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008979 char buf[UPB_PB_VARINT_MAX_LEN];
8980 upb_pb_encoder_segment *s;
8981 const char *ptr = e->buf;
8982 for (s = e->segbuf; s <= e->segptr; s++) {
8983 size_t lenbytes = upb_vencode64(s->msglen, buf);
8984 putbuf(e, buf, lenbytes);
8985 putbuf(e, ptr, s->seglen);
8986 ptr += s->seglen;
8987 }
8988
8989 e->ptr = e->buf;
8990 e->top = NULL;
8991 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -07008992 /* Need to keep buffering; propagate length info into enclosing
8993 * submessages. */
Chris Fallin91473dc2014-12-12 15:58:26 -08008994 --e->top;
8995 top(e)->msglen += msglen + upb_varint_size(msglen);
8996 }
8997
8998 return true;
8999}
9000
9001
9002/* tag_t **********************************************************************/
9003
Josh Habermane8ed0212015-06-08 17:56:03 -07009004/* A precomputed (pre-encoded) tag and length. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009005
9006typedef struct {
9007 uint8_t bytes;
9008 char tag[7];
9009} tag_t;
9010
Josh Habermane8ed0212015-06-08 17:56:03 -07009011/* Allocates a new tag for this field, and sets it in these handlerattr. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009012static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
9013 upb_handlerattr *attr) {
9014 uint32_t n = upb_fielddef_number(f);
9015
9016 tag_t *tag = malloc(sizeof(tag_t));
9017 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
9018
9019 upb_handlerattr_init(attr);
9020 upb_handlerattr_sethandlerdata(attr, tag);
9021 upb_handlers_addcleanup(h, tag, free);
9022}
9023
9024static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
9025 return encode_bytes(e, tag->tag, tag->bytes);
9026}
9027
9028
9029/* encoding of wire types *****************************************************/
9030
9031static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009032 /* TODO(haberman): byte-swap for big endian. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009033 return encode_bytes(e, &val, sizeof(uint64_t));
9034}
9035
9036static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009037 /* TODO(haberman): byte-swap for big endian. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009038 return encode_bytes(e, &val, sizeof(uint32_t));
9039}
9040
9041static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
9042 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
9043 return false;
9044 }
9045
9046 encoder_advance(e, upb_vencode64(val, e->ptr));
9047 return true;
9048}
9049
9050static uint64_t dbl2uint64(double d) {
9051 uint64_t ret;
9052 memcpy(&ret, &d, sizeof(uint64_t));
9053 return ret;
9054}
9055
9056static uint32_t flt2uint32(float d) {
9057 uint32_t ret;
9058 memcpy(&ret, &d, sizeof(uint32_t));
9059 return ret;
9060}
9061
9062
9063/* encoding of proto types ****************************************************/
9064
9065static bool startmsg(void *c, const void *hd) {
9066 upb_pb_encoder *e = c;
9067 UPB_UNUSED(hd);
9068 if (e->depth++ == 0) {
9069 upb_bytessink_start(e->output_, 0, &e->subc);
9070 }
9071 return true;
9072}
9073
9074static bool endmsg(void *c, const void *hd, upb_status *status) {
9075 upb_pb_encoder *e = c;
9076 UPB_UNUSED(hd);
9077 UPB_UNUSED(status);
9078 if (--e->depth == 0) {
9079 upb_bytessink_end(e->output_);
9080 }
9081 return true;
9082}
9083
9084static void *encode_startdelimfield(void *c, const void *hd) {
9085 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
9086 return ok ? c : UPB_BREAK;
9087}
9088
9089static bool encode_enddelimfield(void *c, const void *hd) {
9090 UPB_UNUSED(hd);
9091 return end_delim(c);
9092}
9093
9094static void *encode_startgroup(void *c, const void *hd) {
9095 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
9096}
9097
9098static bool encode_endgroup(void *c, const void *hd) {
9099 return encode_tag(c, hd) && commit(c);
9100}
9101
9102static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
9103 UPB_UNUSED(size_hint);
9104 return encode_startdelimfield(c, hd);
9105}
9106
9107static size_t encode_strbuf(void *c, const void *hd, const char *buf,
9108 size_t len, const upb_bufhandle *h) {
9109 UPB_UNUSED(hd);
9110 UPB_UNUSED(h);
9111 return encode_bytes(c, buf, len) ? len : 0;
9112}
9113
9114#define T(type, ctype, convert, encode) \
9115 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
9116 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \
9117 } \
9118 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
9119 UPB_UNUSED(hd); \
9120 return encode(e, (convert)(val)); \
9121 }
9122
9123T(double, double, dbl2uint64, encode_fixed64)
Josh Habermane8ed0212015-06-08 17:56:03 -07009124T(float, float, flt2uint32, encode_fixed32)
9125T(int64, int64_t, uint64_t, encode_varint)
9126T(int32, int32_t, uint32_t, encode_varint)
9127T(fixed64, uint64_t, uint64_t, encode_fixed64)
9128T(fixed32, uint32_t, uint32_t, encode_fixed32)
9129T(bool, bool, bool, encode_varint)
9130T(uint32, uint32_t, uint32_t, encode_varint)
9131T(uint64, uint64_t, uint64_t, encode_varint)
9132T(enum, int32_t, uint32_t, encode_varint)
9133T(sfixed32, int32_t, uint32_t, encode_fixed32)
9134T(sfixed64, int64_t, uint64_t, encode_fixed64)
9135T(sint32, int32_t, upb_zzenc_32, encode_varint)
9136T(sint64, int64_t, upb_zzenc_64, encode_varint)
Chris Fallin91473dc2014-12-12 15:58:26 -08009137
9138#undef T
9139
9140
9141/* code to build the handlers *************************************************/
9142
9143static void newhandlers_callback(const void *closure, upb_handlers *h) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009144 const upb_msgdef *m;
9145 upb_msg_field_iter i;
9146
Chris Fallin91473dc2014-12-12 15:58:26 -08009147 UPB_UNUSED(closure);
9148
9149 upb_handlers_setstartmsg(h, startmsg, NULL);
9150 upb_handlers_setendmsg(h, endmsg, NULL);
9151
Josh Habermane8ed0212015-06-08 17:56:03 -07009152 m = upb_handlers_msgdef(h);
Chris Fallinfcd88892015-01-13 18:14:39 -08009153 for(upb_msg_field_begin(&i, m);
9154 !upb_msg_field_done(&i);
9155 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08009156 const upb_fielddef *f = upb_msg_iter_field(&i);
9157 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
9158 upb_fielddef_packed(f);
9159 upb_handlerattr attr;
9160 upb_wiretype_t wt =
9161 packed ? UPB_WIRE_TYPE_DELIMITED
9162 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
9163
Josh Habermane8ed0212015-06-08 17:56:03 -07009164 /* Pre-encode the tag for this field. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009165 new_tag(h, f, wt, &attr);
9166
9167 if (packed) {
9168 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
9169 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
9170 }
9171
9172#define T(upper, lower, upbtype) \
9173 case UPB_DESCRIPTOR_TYPE_##upper: \
9174 if (packed) { \
9175 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
9176 } else { \
9177 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
9178 } \
9179 break;
9180
9181 switch (upb_fielddef_descriptortype(f)) {
9182 T(DOUBLE, double, double);
9183 T(FLOAT, float, float);
9184 T(INT64, int64, int64);
9185 T(INT32, int32, int32);
9186 T(FIXED64, fixed64, uint64);
9187 T(FIXED32, fixed32, uint32);
9188 T(BOOL, bool, bool);
9189 T(UINT32, uint32, uint32);
9190 T(UINT64, uint64, uint64);
9191 T(ENUM, enum, int32);
9192 T(SFIXED32, sfixed32, int32);
9193 T(SFIXED64, sfixed64, int64);
9194 T(SINT32, sint32, int32);
9195 T(SINT64, sint64, int64);
9196 case UPB_DESCRIPTOR_TYPE_STRING:
9197 case UPB_DESCRIPTOR_TYPE_BYTES:
9198 upb_handlers_setstartstr(h, f, encode_startstr, &attr);
9199 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
9200 upb_handlers_setstring(h, f, encode_strbuf, &attr);
9201 break;
9202 case UPB_DESCRIPTOR_TYPE_MESSAGE:
9203 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
9204 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
9205 break;
9206 case UPB_DESCRIPTOR_TYPE_GROUP: {
Josh Habermane8ed0212015-06-08 17:56:03 -07009207 /* Endgroup takes a different tag (wire_type = END_GROUP). */
Chris Fallin91473dc2014-12-12 15:58:26 -08009208 upb_handlerattr attr2;
9209 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
9210
9211 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
9212 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
9213
9214 upb_handlerattr_uninit(&attr2);
9215 break;
9216 }
9217 }
9218
9219#undef T
9220
9221 upb_handlerattr_uninit(&attr);
9222 }
9223}
9224
Chris Fallind3262772015-05-14 18:24:26 -07009225void upb_pb_encoder_reset(upb_pb_encoder *e) {
9226 e->segptr = NULL;
9227 e->top = NULL;
9228 e->depth = 0;
9229}
9230
Chris Fallin91473dc2014-12-12 15:58:26 -08009231
9232/* public API *****************************************************************/
9233
9234const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
9235 const void *owner) {
9236 return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
9237}
9238
Chris Fallind3262772015-05-14 18:24:26 -07009239upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
9240 upb_bytessink *output) {
9241 const size_t initial_bufsize = 256;
9242 const size_t initial_segbufsize = 16;
Josh Habermane8ed0212015-06-08 17:56:03 -07009243 /* TODO(haberman): make this configurable. */
Chris Fallind3262772015-05-14 18:24:26 -07009244 const size_t stack_size = 64;
9245#ifndef NDEBUG
9246 const size_t size_before = upb_env_bytesallocated(env);
9247#endif
Chris Fallin91473dc2014-12-12 15:58:26 -08009248
Chris Fallind3262772015-05-14 18:24:26 -07009249 upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
9250 if (!e) return NULL;
Chris Fallin91473dc2014-12-12 15:58:26 -08009251
Chris Fallind3262772015-05-14 18:24:26 -07009252 e->buf = upb_env_malloc(env, initial_bufsize);
9253 e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
9254 e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
9255
9256 if (!e->buf || !e->segbuf || !e->stack) {
9257 return NULL;
Chris Fallin91473dc2014-12-12 15:58:26 -08009258 }
9259
Chris Fallind3262772015-05-14 18:24:26 -07009260 e->limit = e->buf + initial_bufsize;
9261 e->seglimit = e->segbuf + initial_segbufsize;
9262 e->stacklimit = e->stack + stack_size;
Chris Fallin91473dc2014-12-12 15:58:26 -08009263
Chris Fallin91473dc2014-12-12 15:58:26 -08009264 upb_pb_encoder_reset(e);
Chris Fallind3262772015-05-14 18:24:26 -07009265 upb_sink_reset(&e->input_, h, e);
9266
9267 e->env = env;
Chris Fallin91473dc2014-12-12 15:58:26 -08009268 e->output_ = output;
9269 e->subc = output->closure;
Chris Fallind3262772015-05-14 18:24:26 -07009270 e->ptr = e->buf;
Chris Fallin91473dc2014-12-12 15:58:26 -08009271
Josh Habermane8ed0212015-06-08 17:56:03 -07009272 /* If this fails, increase the value in encoder.h. */
Chris Fallind3262772015-05-14 18:24:26 -07009273 assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
9274 return e;
Chris Fallin91473dc2014-12-12 15:58:26 -08009275}
9276
9277upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
Chris Fallin91473dc2014-12-12 15:58:26 -08009278
9279
9280#include <stdio.h>
9281#include <stdlib.h>
9282#include <string.h>
9283
9284upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
9285 void *owner, upb_status *status) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009286 /* Create handlers. */
9287 const upb_pbdecodermethod *decoder_m;
Chris Fallin91473dc2014-12-12 15:58:26 -08009288 const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
Chris Fallind3262772015-05-14 18:24:26 -07009289 upb_env env;
Josh Habermane8ed0212015-06-08 17:56:03 -07009290 upb_pbdecodermethodopts opts;
9291 upb_pbdecoder *decoder;
9292 upb_descreader *reader;
9293 bool ok;
9294 upb_def **ret = NULL;
9295 upb_def **defs;
9296
9297 upb_pbdecodermethodopts_init(&opts, reader_h);
9298 decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
9299
Chris Fallind3262772015-05-14 18:24:26 -07009300 upb_env_init(&env);
9301 upb_env_reporterrorsto(&env, status);
Chris Fallin91473dc2014-12-12 15:58:26 -08009302
Josh Habermane8ed0212015-06-08 17:56:03 -07009303 reader = upb_descreader_create(&env, reader_h);
9304 decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
Chris Fallin91473dc2014-12-12 15:58:26 -08009305
Josh Habermane8ed0212015-06-08 17:56:03 -07009306 /* Push input data. */
9307 ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
Chris Fallin91473dc2014-12-12 15:58:26 -08009308
9309 if (!ok) goto cleanup;
Josh Habermane8ed0212015-06-08 17:56:03 -07009310 defs = upb_descreader_getdefs(reader, owner, n);
Chris Fallin91473dc2014-12-12 15:58:26 -08009311 ret = malloc(sizeof(upb_def*) * (*n));
9312 memcpy(ret, defs, sizeof(upb_def*) * (*n));
9313
9314cleanup:
Chris Fallind3262772015-05-14 18:24:26 -07009315 upb_env_uninit(&env);
Chris Fallin91473dc2014-12-12 15:58:26 -08009316 upb_handlers_unref(reader_h, &reader_h);
9317 upb_pbdecodermethod_unref(decoder_m, &decoder_m);
9318 return ret;
9319}
9320
9321bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
9322 upb_status *status) {
9323 int n;
Josh Habermane8ed0212015-06-08 17:56:03 -07009324 bool success;
Chris Fallin91473dc2014-12-12 15:58:26 -08009325 upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
9326 if (!defs) return false;
Josh Habermane8ed0212015-06-08 17:56:03 -07009327 success = upb_symtab_add(s, defs, n, &defs, status);
Chris Fallin91473dc2014-12-12 15:58:26 -08009328 free(defs);
9329 return success;
9330}
9331
9332char *upb_readfile(const char *filename, size_t *len) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009333 long size;
9334 char *buf;
Chris Fallin91473dc2014-12-12 15:58:26 -08009335 FILE *f = fopen(filename, "rb");
9336 if(!f) return NULL;
9337 if(fseek(f, 0, SEEK_END) != 0) goto error;
Josh Habermane8ed0212015-06-08 17:56:03 -07009338 size = ftell(f);
Chris Fallin91473dc2014-12-12 15:58:26 -08009339 if(size < 0) goto error;
9340 if(fseek(f, 0, SEEK_SET) != 0) goto error;
Josh Habermane8ed0212015-06-08 17:56:03 -07009341 buf = malloc(size + 1);
Chris Fallin91473dc2014-12-12 15:58:26 -08009342 if(size && fread(buf, size, 1, f) != 1) goto error;
9343 fclose(f);
9344 if (len) *len = size;
9345 return buf;
9346
9347error:
9348 fclose(f);
9349 return NULL;
9350}
9351
9352bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
9353 upb_status *status) {
9354 size_t len;
Josh Habermane8ed0212015-06-08 17:56:03 -07009355 bool success;
Chris Fallin91473dc2014-12-12 15:58:26 -08009356 char *data = upb_readfile(fname, &len);
9357 if (!data) {
9358 if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
9359 return false;
9360 }
Josh Habermane8ed0212015-06-08 17:56:03 -07009361 success = upb_load_descriptor_into_symtab(symtab, data, len, status);
Chris Fallin91473dc2014-12-12 15:58:26 -08009362 free(data);
9363 return success;
9364}
9365/*
Josh Haberman181c7f22015-07-15 11:05:10 -07009366 * upb::pb::TextPrinter
Chris Fallin91473dc2014-12-12 15:58:26 -08009367 *
9368 * OPT: This is not optimized at all. It uses printf() which parses the format
9369 * string every time, and it allocates memory for every put.
9370 */
9371
9372
9373#include <ctype.h>
9374#include <float.h>
9375#include <inttypes.h>
Josh Habermane8ed0212015-06-08 17:56:03 -07009376#include <stdarg.h>
Chris Fallin91473dc2014-12-12 15:58:26 -08009377#include <stdio.h>
9378#include <stdlib.h>
9379#include <string.h>
9380
9381
Chris Fallind3262772015-05-14 18:24:26 -07009382struct upb_textprinter {
9383 upb_sink input_;
9384 upb_bytessink *output_;
9385 int indent_depth_;
9386 bool single_line_;
9387 void *subc;
9388};
9389
Chris Fallin91473dc2014-12-12 15:58:26 -08009390#define CHECK(x) if ((x) < 0) goto err;
9391
9392static const char *shortname(const char *longname) {
9393 const char *last = strrchr(longname, '.');
9394 return last ? last + 1 : longname;
9395}
9396
9397static int indent(upb_textprinter *p) {
9398 int i;
9399 if (!p->single_line_)
9400 for (i = 0; i < p->indent_depth_; i++)
9401 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL);
9402 return 0;
9403}
9404
9405static int endfield(upb_textprinter *p) {
9406 const char ch = (p->single_line_ ? ' ' : '\n');
9407 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
9408 return 0;
9409}
9410
9411static int putescaped(upb_textprinter *p, const char *buf, size_t len,
9412 bool preserve_utf8) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009413 /* Based on CEscapeInternal() from Google's protobuf release. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009414 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
9415 const char *end = buf + len;
9416
Josh Habermane8ed0212015-06-08 17:56:03 -07009417 /* I think hex is prettier and more useful, but proto2 uses octal; should
9418 * investigate whether it can parse hex also. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009419 const bool use_hex = false;
Josh Habermane8ed0212015-06-08 17:56:03 -07009420 bool last_hex_escape = false; /* true if last output char was \xNN */
Chris Fallin91473dc2014-12-12 15:58:26 -08009421
9422 for (; buf < end; buf++) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009423 bool is_hex_escape;
9424
Chris Fallin91473dc2014-12-12 15:58:26 -08009425 if (dstend - dst < 4) {
9426 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9427 dst = dstbuf;
9428 }
9429
Josh Habermane8ed0212015-06-08 17:56:03 -07009430 is_hex_escape = false;
Chris Fallin91473dc2014-12-12 15:58:26 -08009431 switch (*buf) {
9432 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break;
9433 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break;
9434 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break;
9435 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
9436 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
9437 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
9438 default:
Josh Habermane8ed0212015-06-08 17:56:03 -07009439 /* Note that if we emit \xNN and the buf character after that is a hex
9440 * digit then that digit must be escaped too to prevent it being
9441 * interpreted as part of the character code by C. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009442 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
9443 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
9444 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
9445 is_hex_escape = use_hex;
9446 dst += 4;
9447 } else {
9448 *(dst++) = *buf; break;
9449 }
9450 }
9451 last_hex_escape = is_hex_escape;
9452 }
Josh Habermane8ed0212015-06-08 17:56:03 -07009453 /* Flush remaining data. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009454 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9455 return 0;
9456}
9457
9458bool putf(upb_textprinter *p, const char *fmt, ...) {
9459 va_list args;
Josh Habermane8ed0212015-06-08 17:56:03 -07009460 va_list args_copy;
9461 char *str;
9462 int written;
9463 int len;
9464 bool ok;
9465
Chris Fallin91473dc2014-12-12 15:58:26 -08009466 va_start(args, fmt);
9467
Josh Habermane8ed0212015-06-08 17:56:03 -07009468 /* Run once to get the length of the string. */
9469 _upb_va_copy(args_copy, args);
9470 len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
Chris Fallin91473dc2014-12-12 15:58:26 -08009471 va_end(args_copy);
9472
Josh Habermane8ed0212015-06-08 17:56:03 -07009473 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
9474 str = malloc(len + 1);
Chris Fallin91473dc2014-12-12 15:58:26 -08009475 if (!str) return false;
Josh Habermane8ed0212015-06-08 17:56:03 -07009476 written = vsprintf(str, fmt, args);
Chris Fallin91473dc2014-12-12 15:58:26 -08009477 va_end(args);
9478 UPB_ASSERT_VAR(written, written == len);
9479
Josh Habermane8ed0212015-06-08 17:56:03 -07009480 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
Chris Fallin91473dc2014-12-12 15:58:26 -08009481 free(str);
9482 return ok;
9483}
9484
9485
9486/* handlers *******************************************************************/
9487
9488static bool textprinter_startmsg(void *c, const void *hd) {
Chris Fallin91473dc2014-12-12 15:58:26 -08009489 upb_textprinter *p = c;
Josh Habermane8ed0212015-06-08 17:56:03 -07009490 UPB_UNUSED(hd);
Chris Fallin91473dc2014-12-12 15:58:26 -08009491 if (p->indent_depth_ == 0) {
9492 upb_bytessink_start(p->output_, 0, &p->subc);
9493 }
9494 return true;
9495}
9496
9497static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009498 upb_textprinter *p = c;
Chris Fallin91473dc2014-12-12 15:58:26 -08009499 UPB_UNUSED(hd);
9500 UPB_UNUSED(s);
Chris Fallin91473dc2014-12-12 15:58:26 -08009501 if (p->indent_depth_ == 0) {
9502 upb_bytessink_end(p->output_);
9503 }
9504 return true;
9505}
9506
9507#define TYPE(name, ctype, fmt) \
9508 static bool textprinter_put ## name(void *closure, const void *handler_data, \
9509 ctype val) { \
9510 upb_textprinter *p = closure; \
9511 const upb_fielddef *f = handler_data; \
9512 CHECK(indent(p)); \
9513 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \
9514 CHECK(endfield(p)); \
9515 return true; \
9516 err: \
9517 return false; \
9518}
9519
9520static bool textprinter_putbool(void *closure, const void *handler_data,
9521 bool val) {
9522 upb_textprinter *p = closure;
9523 const upb_fielddef *f = handler_data;
9524 CHECK(indent(p));
9525 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
9526 CHECK(endfield(p));
9527 return true;
9528err:
9529 return false;
9530}
9531
9532#define STRINGIFY_HELPER(x) #x
9533#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
9534
9535TYPE(int32, int32_t, "%" PRId32)
9536TYPE(int64, int64_t, "%" PRId64)
Josh Habermane8ed0212015-06-08 17:56:03 -07009537TYPE(uint32, uint32_t, "%" PRIu32)
Chris Fallin91473dc2014-12-12 15:58:26 -08009538TYPE(uint64, uint64_t, "%" PRIu64)
9539TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
9540TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
9541
9542#undef TYPE
9543
Josh Habermane8ed0212015-06-08 17:56:03 -07009544/* Output a symbolic value from the enum if found, else just print as int32. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009545static bool textprinter_putenum(void *closure, const void *handler_data,
9546 int32_t val) {
9547 upb_textprinter *p = closure;
9548 const upb_fielddef *f = handler_data;
9549 const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
9550 const char *label = upb_enumdef_iton(enum_def, val);
9551 if (label) {
9552 indent(p);
9553 putf(p, "%s: %s", upb_fielddef_name(f), label);
9554 endfield(p);
9555 } else {
9556 if (!textprinter_putint32(closure, handler_data, val))
9557 return false;
9558 }
9559 return true;
9560}
9561
9562static void *textprinter_startstr(void *closure, const void *handler_data,
9563 size_t size_hint) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009564 upb_textprinter *p = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -08009565 const upb_fielddef *f = handler_data;
9566 UPB_UNUSED(size_hint);
Chris Fallin91473dc2014-12-12 15:58:26 -08009567 indent(p);
9568 putf(p, "%s: \"", upb_fielddef_name(f));
9569 return p;
9570}
9571
9572static bool textprinter_endstr(void *closure, const void *handler_data) {
Chris Fallin91473dc2014-12-12 15:58:26 -08009573 upb_textprinter *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07009574 UPB_UNUSED(handler_data);
Chris Fallin91473dc2014-12-12 15:58:26 -08009575 putf(p, "\"");
9576 endfield(p);
9577 return true;
9578}
9579
9580static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
9581 size_t len, const upb_bufhandle *handle) {
Chris Fallin91473dc2014-12-12 15:58:26 -08009582 upb_textprinter *p = closure;
9583 const upb_fielddef *f = hd;
Josh Habermane8ed0212015-06-08 17:56:03 -07009584 UPB_UNUSED(handle);
Chris Fallin91473dc2014-12-12 15:58:26 -08009585 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
9586 return len;
9587err:
9588 return 0;
9589}
9590
9591static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
9592 upb_textprinter *p = closure;
9593 const char *name = handler_data;
9594 CHECK(indent(p));
9595 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
9596 p->indent_depth_++;
9597 return p;
9598err:
9599 return UPB_BREAK;
9600}
9601
9602static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
Chris Fallin91473dc2014-12-12 15:58:26 -08009603 upb_textprinter *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -07009604 UPB_UNUSED(handler_data);
Chris Fallin91473dc2014-12-12 15:58:26 -08009605 p->indent_depth_--;
9606 CHECK(indent(p));
9607 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
9608 CHECK(endfield(p));
9609 return true;
9610err:
9611 return false;
9612}
9613
Chris Fallin91473dc2014-12-12 15:58:26 -08009614static void onmreg(const void *c, upb_handlers *h) {
Chris Fallin91473dc2014-12-12 15:58:26 -08009615 const upb_msgdef *m = upb_handlers_msgdef(h);
Josh Habermane8ed0212015-06-08 17:56:03 -07009616 upb_msg_field_iter i;
9617 UPB_UNUSED(c);
Chris Fallin91473dc2014-12-12 15:58:26 -08009618
9619 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
9620 upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
9621
Chris Fallinfcd88892015-01-13 18:14:39 -08009622 for(upb_msg_field_begin(&i, m);
9623 !upb_msg_field_done(&i);
9624 upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -08009625 upb_fielddef *f = upb_msg_iter_field(&i);
9626 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
9627 upb_handlerattr_sethandlerdata(&attr, f);
9628 switch (upb_fielddef_type(f)) {
9629 case UPB_TYPE_INT32:
9630 upb_handlers_setint32(h, f, textprinter_putint32, &attr);
9631 break;
9632 case UPB_TYPE_INT64:
9633 upb_handlers_setint64(h, f, textprinter_putint64, &attr);
9634 break;
9635 case UPB_TYPE_UINT32:
9636 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
9637 break;
9638 case UPB_TYPE_UINT64:
9639 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
9640 break;
9641 case UPB_TYPE_FLOAT:
9642 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
9643 break;
9644 case UPB_TYPE_DOUBLE:
9645 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
9646 break;
9647 case UPB_TYPE_BOOL:
9648 upb_handlers_setbool(h, f, textprinter_putbool, &attr);
9649 break;
9650 case UPB_TYPE_STRING:
9651 case UPB_TYPE_BYTES:
9652 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
9653 upb_handlers_setstring(h, f, textprinter_putstr, &attr);
9654 upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
9655 break;
9656 case UPB_TYPE_MESSAGE: {
9657 const char *name =
9658 upb_fielddef_istagdelim(f)
9659 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
9660 : upb_fielddef_name(f);
9661 upb_handlerattr_sethandlerdata(&attr, name);
9662 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
9663 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
9664 break;
9665 }
9666 case UPB_TYPE_ENUM:
9667 upb_handlers_setint32(h, f, textprinter_putenum, &attr);
9668 break;
9669 }
9670 }
9671}
9672
Chris Fallind3262772015-05-14 18:24:26 -07009673static void textprinter_reset(upb_textprinter *p, bool single_line) {
9674 p->single_line_ = single_line;
9675 p->indent_depth_ = 0;
9676}
9677
9678
9679/* Public API *****************************************************************/
9680
9681upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
9682 upb_bytessink *output) {
9683 upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
9684 if (!p) return NULL;
9685
9686 p->output_ = output;
9687 upb_sink_reset(&p->input_, h, p);
9688 textprinter_reset(p, false);
9689
9690 return p;
9691}
9692
Chris Fallin91473dc2014-12-12 15:58:26 -08009693const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
9694 const void *owner) {
9695 return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
9696}
9697
9698upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
9699
Chris Fallin91473dc2014-12-12 15:58:26 -08009700void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
9701 p->single_line_ = single_line;
9702}
Chris Fallin91473dc2014-12-12 15:58:26 -08009703
9704
Josh Habermane8ed0212015-06-08 17:56:03 -07009705/* Index is descriptor type. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009706const uint8_t upb_pb_native_wire_types[] = {
Josh Habermane8ed0212015-06-08 17:56:03 -07009707 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */
9708 UPB_WIRE_TYPE_64BIT, /* DOUBLE */
9709 UPB_WIRE_TYPE_32BIT, /* FLOAT */
9710 UPB_WIRE_TYPE_VARINT, /* INT64 */
9711 UPB_WIRE_TYPE_VARINT, /* UINT64 */
9712 UPB_WIRE_TYPE_VARINT, /* INT32 */
9713 UPB_WIRE_TYPE_64BIT, /* FIXED64 */
9714 UPB_WIRE_TYPE_32BIT, /* FIXED32 */
9715 UPB_WIRE_TYPE_VARINT, /* BOOL */
9716 UPB_WIRE_TYPE_DELIMITED, /* STRING */
9717 UPB_WIRE_TYPE_START_GROUP, /* GROUP */
9718 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */
9719 UPB_WIRE_TYPE_DELIMITED, /* BYTES */
9720 UPB_WIRE_TYPE_VARINT, /* UINT32 */
9721 UPB_WIRE_TYPE_VARINT, /* ENUM */
9722 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */
9723 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */
9724 UPB_WIRE_TYPE_VARINT, /* SINT32 */
9725 UPB_WIRE_TYPE_VARINT, /* SINT64 */
Chris Fallin91473dc2014-12-12 15:58:26 -08009726};
9727
Josh Habermane8ed0212015-06-08 17:56:03 -07009728/* A basic branch-based decoder, uses 32-bit values to get good performance
9729 * on 32-bit architectures (but performs well on 64-bits also).
9730 * This scheme comes from the original Google Protobuf implementation
9731 * (proto2). */
Chris Fallin91473dc2014-12-12 15:58:26 -08009732upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
9733 upb_decoderet err = {NULL, 0};
9734 const char *p = r.p;
9735 uint32_t low = (uint32_t)r.val;
9736 uint32_t high = 0;
9737 uint32_t b;
9738 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9739 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9740 b = *(p++); low |= (b & 0x7fU) << 28;
9741 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done;
9742 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done;
9743 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
9744 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
9745 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
9746 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
9747 return err;
9748
9749done:
9750 r.val = ((uint64_t)high << 32) | low;
9751 r.p = p;
9752 return r;
9753}
9754
Josh Habermane8ed0212015-06-08 17:56:03 -07009755/* Like the previous, but uses 64-bit values. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009756upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
9757 const char *p = r.p;
9758 uint64_t val = r.val;
9759 uint64_t b;
9760 upb_decoderet err = {NULL, 0};
9761 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9762 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9763 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
9764 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
9765 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
9766 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
9767 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
9768 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
9769 return err;
9770
9771done:
9772 r.val = val;
9773 r.p = p;
9774 return r;
9775}
9776
Josh Habermane8ed0212015-06-08 17:56:03 -07009777/* Given an encoded varint v, returns an integer with a single bit set that
9778 * indicates the end of the varint. Subtracting one from this value will
9779 * yield a mask that leaves only bits that are part of the varint. Returns
9780 * 0 if the varint is unterminated. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009781static uint64_t upb_get_vstopbit(uint64_t v) {
9782 uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
9783 return ~cbits & (cbits+1);
9784}
9785
Josh Habermane8ed0212015-06-08 17:56:03 -07009786/* A branchless decoder. Credit to Pascal Massimino for the bit-twiddling. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009787upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
9788 uint64_t b;
Josh Habermane8ed0212015-06-08 17:56:03 -07009789 uint64_t stop_bit;
9790 upb_decoderet my_r;
Chris Fallin91473dc2014-12-12 15:58:26 -08009791 memcpy(&b, r.p, sizeof(b));
Josh Habermane8ed0212015-06-08 17:56:03 -07009792 stop_bit = upb_get_vstopbit(b);
Chris Fallin91473dc2014-12-12 15:58:26 -08009793 b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
9794 b += b & 0x007f007f007f007fULL;
9795 b += 3 * (b & 0x0000ffff0000ffffULL);
9796 b += 15 * (b & 0x00000000ffffffffULL);
9797 if (stop_bit == 0) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009798 /* Error: unterminated varint. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009799 upb_decoderet err_r = {(void*)0, 0};
9800 return err_r;
9801 }
Josh Habermane8ed0212015-06-08 17:56:03 -07009802 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9803 r.val | (b << 7));
Chris Fallin91473dc2014-12-12 15:58:26 -08009804 return my_r;
9805}
9806
Josh Habermane8ed0212015-06-08 17:56:03 -07009807/* A branchless decoder. Credit to Daniel Wright for the bit-twiddling. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009808upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
9809 uint64_t b;
Josh Habermane8ed0212015-06-08 17:56:03 -07009810 uint64_t stop_bit;
9811 upb_decoderet my_r;
Chris Fallin91473dc2014-12-12 15:58:26 -08009812 memcpy(&b, r.p, sizeof(b));
Josh Habermane8ed0212015-06-08 17:56:03 -07009813 stop_bit = upb_get_vstopbit(b);
Chris Fallin91473dc2014-12-12 15:58:26 -08009814 b &= (stop_bit - 1);
9815 b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
9816 b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
9817 b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
9818 if (stop_bit == 0) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009819 /* Error: unterminated varint. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009820 upb_decoderet err_r = {(void*)0, 0};
9821 return err_r;
9822 }
Josh Habermane8ed0212015-06-08 17:56:03 -07009823 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9824 r.val | (b << 14));
Chris Fallin91473dc2014-12-12 15:58:26 -08009825 return my_r;
9826}
9827
9828#line 1 "upb/json/parser.rl"
9829/*
Josh Haberman181c7f22015-07-15 11:05:10 -07009830** upb::json::Parser (upb_json_parser)
9831**
9832** A parser that uses the Ragel State Machine Compiler to generate
9833** the finite automata.
9834**
9835** Ragel only natively handles regular languages, but we can manually
9836** program it a bit to handle context-free languages like JSON, by using
9837** the "fcall" and "fret" constructs.
9838**
9839** This parser can handle the basics, but needs several things to be fleshed
9840** out:
9841**
9842** - handling of unicode escape sequences (including high surrogate pairs).
9843** - properly check and report errors for unknown fields, stack overflow,
9844** improper array nesting (or lack of nesting).
9845** - handling of base64 sequences with padding characters.
9846** - handling of push-back (non-success returns from sink functions).
9847** - handling of keys/escape-sequences/etc that span input buffers.
9848*/
Chris Fallin91473dc2014-12-12 15:58:26 -08009849
9850#include <stdio.h>
9851#include <stdint.h>
9852#include <assert.h>
9853#include <string.h>
9854#include <stdlib.h>
9855#include <errno.h>
9856
9857
Chris Fallind3262772015-05-14 18:24:26 -07009858#define UPB_JSON_MAX_DEPTH 64
9859
9860typedef struct {
9861 upb_sink sink;
9862
Josh Habermane8ed0212015-06-08 17:56:03 -07009863 /* The current message in which we're parsing, and the field whose value we're
9864 * expecting next. */
Chris Fallind3262772015-05-14 18:24:26 -07009865 const upb_msgdef *m;
9866 const upb_fielddef *f;
9867
Josh Haberman78da6662016-01-13 19:05:43 -08009868 /* The table mapping json name to fielddef for this message. */
9869 upb_strtable *name_table;
9870
Josh Habermane8ed0212015-06-08 17:56:03 -07009871 /* We are in a repeated-field context, ready to emit mapentries as
9872 * submessages. This flag alters the start-of-object (open-brace) behavior to
9873 * begin a sequence of mapentry messages rather than a single submessage. */
Chris Fallind3262772015-05-14 18:24:26 -07009874 bool is_map;
9875
Josh Habermane8ed0212015-06-08 17:56:03 -07009876 /* We are in a map-entry message context. This flag is set when parsing the
9877 * value field of a single map entry and indicates to all value-field parsers
9878 * (subobjects, strings, numbers, and bools) that the map-entry submessage
9879 * should end as soon as the value is parsed. */
Chris Fallind3262772015-05-14 18:24:26 -07009880 bool is_mapentry;
9881
Josh Habermane8ed0212015-06-08 17:56:03 -07009882 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9883 * message's map field that we're currently parsing. This differs from |f|
9884 * because |f| is the field in the *current* message (i.e., the map-entry
9885 * message itself), not the parent's field that leads to this map. */
Chris Fallind3262772015-05-14 18:24:26 -07009886 const upb_fielddef *mapfield;
9887} upb_jsonparser_frame;
9888
9889struct upb_json_parser {
9890 upb_env *env;
Josh Haberman78da6662016-01-13 19:05:43 -08009891 const upb_json_parsermethod *method;
Chris Fallind3262772015-05-14 18:24:26 -07009892 upb_bytessink input_;
9893
Josh Habermane8ed0212015-06-08 17:56:03 -07009894 /* Stack to track the JSON scopes we are in. */
Chris Fallind3262772015-05-14 18:24:26 -07009895 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9896 upb_jsonparser_frame *top;
9897 upb_jsonparser_frame *limit;
9898
Josh Haberman181c7f22015-07-15 11:05:10 -07009899 upb_status status;
Chris Fallind3262772015-05-14 18:24:26 -07009900
Josh Habermane8ed0212015-06-08 17:56:03 -07009901 /* Ragel's internal parsing stack for the parsing state machine. */
Chris Fallind3262772015-05-14 18:24:26 -07009902 int current_state;
9903 int parser_stack[UPB_JSON_MAX_DEPTH];
9904 int parser_top;
9905
Josh Habermane8ed0212015-06-08 17:56:03 -07009906 /* The handle for the current buffer. */
Chris Fallind3262772015-05-14 18:24:26 -07009907 const upb_bufhandle *handle;
9908
Josh Habermane8ed0212015-06-08 17:56:03 -07009909 /* Accumulate buffer. See details in parser.rl. */
Chris Fallind3262772015-05-14 18:24:26 -07009910 const char *accumulated;
9911 size_t accumulated_len;
9912 char *accumulate_buf;
9913 size_t accumulate_buf_size;
9914
Josh Habermane8ed0212015-06-08 17:56:03 -07009915 /* Multi-part text data. See details in parser.rl. */
Chris Fallind3262772015-05-14 18:24:26 -07009916 int multipart_state;
9917 upb_selector_t string_selector;
9918
Josh Habermane8ed0212015-06-08 17:56:03 -07009919 /* Input capture. See details in parser.rl. */
Chris Fallind3262772015-05-14 18:24:26 -07009920 const char *capture;
9921
Josh Habermane8ed0212015-06-08 17:56:03 -07009922 /* Intermediate result of parsing a unicode escape sequence. */
Chris Fallind3262772015-05-14 18:24:26 -07009923 uint32_t digit;
9924};
9925
Josh Haberman78da6662016-01-13 19:05:43 -08009926struct upb_json_parsermethod {
9927 upb_refcounted base;
9928
9929 upb_byteshandler input_handler_;
9930
9931 /* Mainly for the purposes of refcounting, so all the fielddefs we point
9932 * to stay alive. */
9933 const upb_msgdef *msg;
9934
9935 /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */
9936 upb_inttable name_tables;
9937};
9938
Chris Fallin91473dc2014-12-12 15:58:26 -08009939#define PARSER_CHECK_RETURN(x) if (!(x)) return false
9940
Josh Habermane8ed0212015-06-08 17:56:03 -07009941/* Used to signal that a capture has been suspended. */
Chris Fallin97b663a2015-01-09 16:15:22 -08009942static char suspend_capture;
9943
Chris Fallin91473dc2014-12-12 15:58:26 -08009944static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9945 upb_handlertype_t type) {
9946 upb_selector_t sel;
9947 bool ok = upb_handlers_getselector(p->top->f, type, &sel);
9948 UPB_ASSERT_VAR(ok, ok);
9949 return sel;
9950}
9951
9952static upb_selector_t parser_getsel(upb_json_parser *p) {
9953 return getsel_for_handlertype(
9954 p, upb_handlers_getprimitivehandlertype(p->top->f));
9955}
9956
Chris Fallin91473dc2014-12-12 15:58:26 -08009957static bool check_stack(upb_json_parser *p) {
9958 if ((p->top + 1) == p->limit) {
Josh Haberman181c7f22015-07-15 11:05:10 -07009959 upb_status_seterrmsg(&p->status, "Nesting too deep");
9960 upb_env_reporterror(p->env, &p->status);
Chris Fallin91473dc2014-12-12 15:58:26 -08009961 return false;
9962 }
9963
9964 return true;
9965}
9966
Josh Haberman78da6662016-01-13 19:05:43 -08009967static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
9968 upb_value v;
9969 bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v);
9970 UPB_ASSERT_VAR(ok, ok);
9971 frame->name_table = upb_value_getptr(v);
9972}
9973
Josh Habermane8ed0212015-06-08 17:56:03 -07009974/* There are GCC/Clang built-ins for overflow checking which we could start
9975 * using if there was any performance benefit to it. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009976
Chris Fallin97b663a2015-01-09 16:15:22 -08009977static bool checked_add(size_t a, size_t b, size_t *c) {
9978 if (SIZE_MAX - a < b) return false;
9979 *c = a + b;
Chris Fallin91473dc2014-12-12 15:58:26 -08009980 return true;
9981}
9982
Chris Fallin97b663a2015-01-09 16:15:22 -08009983static size_t saturating_multiply(size_t a, size_t b) {
Josh Habermane8ed0212015-06-08 17:56:03 -07009984 /* size_t is unsigned, so this is defined behavior even on overflow. */
Chris Fallin97b663a2015-01-09 16:15:22 -08009985 size_t ret = a * b;
9986 if (b != 0 && ret / b != a) {
9987 ret = SIZE_MAX;
Chris Fallin91473dc2014-12-12 15:58:26 -08009988 }
Chris Fallin97b663a2015-01-09 16:15:22 -08009989 return ret;
Chris Fallin91473dc2014-12-12 15:58:26 -08009990}
9991
Chris Fallin91473dc2014-12-12 15:58:26 -08009992
Chris Fallin97b663a2015-01-09 16:15:22 -08009993/* Base64 decoding ************************************************************/
Chris Fallin91473dc2014-12-12 15:58:26 -08009994
Josh Habermane8ed0212015-06-08 17:56:03 -07009995/* TODO(haberman): make this streaming. */
Chris Fallin91473dc2014-12-12 15:58:26 -08009996
9997static const signed char b64table[] = {
9998 -1, -1, -1, -1, -1, -1, -1, -1,
9999 -1, -1, -1, -1, -1, -1, -1, -1,
10000 -1, -1, -1, -1, -1, -1, -1, -1,
10001 -1, -1, -1, -1, -1, -1, -1, -1,
10002 -1, -1, -1, -1, -1, -1, -1, -1,
10003 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
10004 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
10005 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
10006 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
10007 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
10008 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
10009 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
10010 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
10011 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
10012 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
10013 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
10014 -1, -1, -1, -1, -1, -1, -1, -1,
10015 -1, -1, -1, -1, -1, -1, -1, -1,
10016 -1, -1, -1, -1, -1, -1, -1, -1,
10017 -1, -1, -1, -1, -1, -1, -1, -1,
10018 -1, -1, -1, -1, -1, -1, -1, -1,
10019 -1, -1, -1, -1, -1, -1, -1, -1,
10020 -1, -1, -1, -1, -1, -1, -1, -1,
10021 -1, -1, -1, -1, -1, -1, -1, -1,
10022 -1, -1, -1, -1, -1, -1, -1, -1,
10023 -1, -1, -1, -1, -1, -1, -1, -1,
10024 -1, -1, -1, -1, -1, -1, -1, -1,
10025 -1, -1, -1, -1, -1, -1, -1, -1,
10026 -1, -1, -1, -1, -1, -1, -1, -1,
10027 -1, -1, -1, -1, -1, -1, -1, -1,
10028 -1, -1, -1, -1, -1, -1, -1, -1,
10029 -1, -1, -1, -1, -1, -1, -1, -1
10030};
10031
Josh Habermane8ed0212015-06-08 17:56:03 -070010032/* Returns the table value sign-extended to 32 bits. Knowing that the upper
10033 * bits will be 1 for unrecognized characters makes it easier to check for
10034 * this error condition later (see below). */
Chris Fallin91473dc2014-12-12 15:58:26 -080010035int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
10036
Josh Habermane8ed0212015-06-08 17:56:03 -070010037/* Returns true if the given character is not a valid base64 character or
10038 * padding. */
Chris Fallin91473dc2014-12-12 15:58:26 -080010039bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
10040
10041static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
10042 size_t len) {
10043 const char *limit = ptr + len;
10044 for (; ptr < limit; ptr += 4) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010045 uint32_t val;
10046 char output[3];
10047
Chris Fallin91473dc2014-12-12 15:58:26 -080010048 if (limit - ptr < 4) {
Josh Haberman181c7f22015-07-15 11:05:10 -070010049 upb_status_seterrf(&p->status,
Chris Fallin91473dc2014-12-12 15:58:26 -080010050 "Base64 input for bytes field not a multiple of 4: %s",
10051 upb_fielddef_name(p->top->f));
Josh Haberman181c7f22015-07-15 11:05:10 -070010052 upb_env_reporterror(p->env, &p->status);
Chris Fallin91473dc2014-12-12 15:58:26 -080010053 return false;
10054 }
10055
Josh Habermane8ed0212015-06-08 17:56:03 -070010056 val = b64lookup(ptr[0]) << 18 |
10057 b64lookup(ptr[1]) << 12 |
10058 b64lookup(ptr[2]) << 6 |
10059 b64lookup(ptr[3]);
Chris Fallin91473dc2014-12-12 15:58:26 -080010060
Josh Habermane8ed0212015-06-08 17:56:03 -070010061 /* Test the upper bit; returns true if any of the characters returned -1. */
Chris Fallin91473dc2014-12-12 15:58:26 -080010062 if (val & 0x80000000) {
10063 goto otherchar;
10064 }
10065
Chris Fallin91473dc2014-12-12 15:58:26 -080010066 output[0] = val >> 16;
10067 output[1] = (val >> 8) & 0xff;
10068 output[2] = val & 0xff;
10069 upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
10070 }
10071 return true;
10072
10073otherchar:
10074 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
10075 nonbase64(ptr[3]) ) {
Josh Haberman181c7f22015-07-15 11:05:10 -070010076 upb_status_seterrf(&p->status,
Chris Fallin91473dc2014-12-12 15:58:26 -080010077 "Non-base64 characters in bytes field: %s",
10078 upb_fielddef_name(p->top->f));
Josh Haberman181c7f22015-07-15 11:05:10 -070010079 upb_env_reporterror(p->env, &p->status);
Chris Fallin91473dc2014-12-12 15:58:26 -080010080 return false;
10081 } if (ptr[2] == '=') {
Josh Habermane8ed0212015-06-08 17:56:03 -070010082 uint32_t val;
10083 char output;
10084
10085 /* Last group contains only two input bytes, one output byte. */
Chris Fallin91473dc2014-12-12 15:58:26 -080010086 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
10087 goto badpadding;
10088 }
10089
Josh Habermane8ed0212015-06-08 17:56:03 -070010090 val = b64lookup(ptr[0]) << 18 |
10091 b64lookup(ptr[1]) << 12;
Chris Fallin91473dc2014-12-12 15:58:26 -080010092
10093 assert(!(val & 0x80000000));
Josh Habermane8ed0212015-06-08 17:56:03 -070010094 output = val >> 16;
Chris Fallin91473dc2014-12-12 15:58:26 -080010095 upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
10096 return true;
10097 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -070010098 uint32_t val;
10099 char output[2];
10100
10101 /* Last group contains only three input bytes, two output bytes. */
Chris Fallin91473dc2014-12-12 15:58:26 -080010102 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
10103 goto badpadding;
10104 }
10105
Josh Habermane8ed0212015-06-08 17:56:03 -070010106 val = b64lookup(ptr[0]) << 18 |
10107 b64lookup(ptr[1]) << 12 |
10108 b64lookup(ptr[2]) << 6;
Chris Fallin91473dc2014-12-12 15:58:26 -080010109
Chris Fallin91473dc2014-12-12 15:58:26 -080010110 output[0] = val >> 16;
10111 output[1] = (val >> 8) & 0xff;
10112 upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
10113 return true;
10114 }
10115
10116badpadding:
Josh Haberman181c7f22015-07-15 11:05:10 -070010117 upb_status_seterrf(&p->status,
Chris Fallin91473dc2014-12-12 15:58:26 -080010118 "Incorrect base64 padding for field: %s (%.*s)",
10119 upb_fielddef_name(p->top->f),
10120 4, ptr);
Josh Haberman181c7f22015-07-15 11:05:10 -070010121 upb_env_reporterror(p->env, &p->status);
Chris Fallin91473dc2014-12-12 15:58:26 -080010122 return false;
10123}
10124
Chris Fallin91473dc2014-12-12 15:58:26 -080010125
Chris Fallin97b663a2015-01-09 16:15:22 -080010126/* Accumulate buffer **********************************************************/
Chris Fallin91473dc2014-12-12 15:58:26 -080010127
Josh Habermane8ed0212015-06-08 17:56:03 -070010128/* Functionality for accumulating a buffer.
10129 *
10130 * Some parts of the parser need an entire value as a contiguous string. For
10131 * example, to look up a member name in a hash table, or to turn a string into
10132 * a number, the relevant library routines need the input string to be in
10133 * contiguous memory, even if the value spanned two or more buffers in the
10134 * input. These routines handle that.
10135 *
10136 * In the common case we can just point to the input buffer to get this
10137 * contiguous string and avoid any actual copy. So we optimistically begin
10138 * this way. But there are a few cases where we must instead copy into a
10139 * separate buffer:
10140 *
10141 * 1. The string was not contiguous in the input (it spanned buffers).
10142 *
10143 * 2. The string included escape sequences that need to be interpreted to get
10144 * the true value in a contiguous buffer. */
Chris Fallin91473dc2014-12-12 15:58:26 -080010145
Chris Fallin97b663a2015-01-09 16:15:22 -080010146static void assert_accumulate_empty(upb_json_parser *p) {
Chris Fallinfcd88892015-01-13 18:14:39 -080010147 UPB_UNUSED(p);
Chris Fallin97b663a2015-01-09 16:15:22 -080010148 assert(p->accumulated == NULL);
10149 assert(p->accumulated_len == 0);
10150}
10151
10152static void accumulate_clear(upb_json_parser *p) {
10153 p->accumulated = NULL;
10154 p->accumulated_len = 0;
10155}
10156
Josh Habermane8ed0212015-06-08 17:56:03 -070010157/* Used internally by accumulate_append(). */
Chris Fallin97b663a2015-01-09 16:15:22 -080010158static bool accumulate_realloc(upb_json_parser *p, size_t need) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010159 void *mem;
Chris Fallind3262772015-05-14 18:24:26 -070010160 size_t old_size = p->accumulate_buf_size;
10161 size_t new_size = UPB_MAX(old_size, 128);
Chris Fallin97b663a2015-01-09 16:15:22 -080010162 while (new_size < need) {
10163 new_size = saturating_multiply(new_size, 2);
10164 }
10165
Josh Habermane8ed0212015-06-08 17:56:03 -070010166 mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
Chris Fallin97b663a2015-01-09 16:15:22 -080010167 if (!mem) {
Josh Haberman181c7f22015-07-15 11:05:10 -070010168 upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
10169 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010170 return false;
10171 }
10172
10173 p->accumulate_buf = mem;
10174 p->accumulate_buf_size = new_size;
10175 return true;
10176}
10177
Josh Habermane8ed0212015-06-08 17:56:03 -070010178/* Logically appends the given data to the append buffer.
10179 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
10180 * must be valid until the next accumulate_append() call (if any). */
Chris Fallin97b663a2015-01-09 16:15:22 -080010181static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
10182 bool can_alias) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010183 size_t need;
10184
Chris Fallin97b663a2015-01-09 16:15:22 -080010185 if (!p->accumulated && can_alias) {
10186 p->accumulated = buf;
10187 p->accumulated_len = len;
10188 return true;
10189 }
10190
Chris Fallin97b663a2015-01-09 16:15:22 -080010191 if (!checked_add(p->accumulated_len, len, &need)) {
Josh Haberman181c7f22015-07-15 11:05:10 -070010192 upb_status_seterrmsg(&p->status, "Integer overflow.");
10193 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010194 return false;
10195 }
10196
10197 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
10198 return false;
10199 }
10200
10201 if (p->accumulated != p->accumulate_buf) {
10202 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
10203 p->accumulated = p->accumulate_buf;
10204 }
10205
10206 memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
10207 p->accumulated_len += len;
10208 return true;
10209}
10210
Josh Habermane8ed0212015-06-08 17:56:03 -070010211/* Returns a pointer to the data accumulated since the last accumulate_clear()
10212 * call, and writes the length to *len. This with point either to the input
10213 * buffer or a temporary accumulate buffer. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010214static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
10215 assert(p->accumulated);
10216 *len = p->accumulated_len;
10217 return p->accumulated;
10218}
10219
10220
10221/* Mult-part text data ********************************************************/
10222
Josh Habermane8ed0212015-06-08 17:56:03 -070010223/* When we have text data in the input, it can often come in multiple segments.
10224 * For example, there may be some raw string data followed by an escape
10225 * sequence. The two segments are processed with different logic. Also buffer
10226 * seams in the input can cause multiple segments.
10227 *
10228 * As we see segments, there are two main cases for how we want to process them:
10229 *
10230 * 1. we want to push the captured input directly to string handlers.
10231 *
10232 * 2. we need to accumulate all the parts into a contiguous buffer for further
10233 * processing (field name lookup, string->number conversion, etc). */
Chris Fallin97b663a2015-01-09 16:15:22 -080010234
Josh Habermane8ed0212015-06-08 17:56:03 -070010235/* This is the set of states for p->multipart_state. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010236enum {
Josh Habermane8ed0212015-06-08 17:56:03 -070010237 /* We are not currently processing multipart data. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010238 MULTIPART_INACTIVE = 0,
10239
Josh Habermane8ed0212015-06-08 17:56:03 -070010240 /* We are processing multipart data by accumulating it into a contiguous
10241 * buffer. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010242 MULTIPART_ACCUMULATE = 1,
10243
Josh Habermane8ed0212015-06-08 17:56:03 -070010244 /* We are processing multipart data by pushing each part directly to the
10245 * current string handlers. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010246 MULTIPART_PUSHEAGERLY = 2
10247};
10248
Josh Habermane8ed0212015-06-08 17:56:03 -070010249/* Start a multi-part text value where we accumulate the data for processing at
10250 * the end. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010251static void multipart_startaccum(upb_json_parser *p) {
10252 assert_accumulate_empty(p);
10253 assert(p->multipart_state == MULTIPART_INACTIVE);
10254 p->multipart_state = MULTIPART_ACCUMULATE;
10255}
10256
Josh Habermane8ed0212015-06-08 17:56:03 -070010257/* Start a multi-part text value where we immediately push text data to a string
10258 * value with the given selector. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010259static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
10260 assert_accumulate_empty(p);
10261 assert(p->multipart_state == MULTIPART_INACTIVE);
10262 p->multipart_state = MULTIPART_PUSHEAGERLY;
10263 p->string_selector = sel;
10264}
10265
10266static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
10267 bool can_alias) {
10268 switch (p->multipart_state) {
10269 case MULTIPART_INACTIVE:
10270 upb_status_seterrmsg(
Josh Haberman181c7f22015-07-15 11:05:10 -070010271 &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
10272 upb_env_reporterror(p->env, &p->status);
Chris Fallin91473dc2014-12-12 15:58:26 -080010273 return false;
Chris Fallin97b663a2015-01-09 16:15:22 -080010274
10275 case MULTIPART_ACCUMULATE:
10276 if (!accumulate_append(p, buf, len, can_alias)) {
10277 return false;
10278 }
10279 break;
10280
10281 case MULTIPART_PUSHEAGERLY: {
10282 const upb_bufhandle *handle = can_alias ? p->handle : NULL;
10283 upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
10284 break;
Chris Fallin91473dc2014-12-12 15:58:26 -080010285 }
Chris Fallin91473dc2014-12-12 15:58:26 -080010286 }
10287
10288 return true;
10289}
10290
Josh Habermane8ed0212015-06-08 17:56:03 -070010291/* Note: this invalidates the accumulate buffer! Call only after reading its
10292 * contents. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010293static void multipart_end(upb_json_parser *p) {
10294 assert(p->multipart_state != MULTIPART_INACTIVE);
10295 p->multipart_state = MULTIPART_INACTIVE;
10296 accumulate_clear(p);
10297}
Chris Fallin91473dc2014-12-12 15:58:26 -080010298
Chris Fallin91473dc2014-12-12 15:58:26 -080010299
Chris Fallin97b663a2015-01-09 16:15:22 -080010300/* Input capture **************************************************************/
Chris Fallin91473dc2014-12-12 15:58:26 -080010301
Josh Habermane8ed0212015-06-08 17:56:03 -070010302/* Functionality for capturing a region of the input as text. Gracefully
10303 * handles the case where a buffer seam occurs in the middle of the captured
10304 * region. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010305
10306static void capture_begin(upb_json_parser *p, const char *ptr) {
10307 assert(p->multipart_state != MULTIPART_INACTIVE);
10308 assert(p->capture == NULL);
10309 p->capture = ptr;
10310}
10311
10312static bool capture_end(upb_json_parser *p, const char *ptr) {
10313 assert(p->capture);
10314 if (multipart_text(p, p->capture, ptr - p->capture, true)) {
10315 p->capture = NULL;
Chris Fallin91473dc2014-12-12 15:58:26 -080010316 return true;
10317 } else {
Chris Fallin91473dc2014-12-12 15:58:26 -080010318 return false;
10319 }
Chris Fallin91473dc2014-12-12 15:58:26 -080010320}
10321
Josh Habermane8ed0212015-06-08 17:56:03 -070010322/* This is called at the end of each input buffer (ie. when we have hit a
10323 * buffer seam). If we are in the middle of capturing the input, this
10324 * processes the unprocessed capture region. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010325static void capture_suspend(upb_json_parser *p, const char **ptr) {
10326 if (!p->capture) return;
10327
10328 if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010329 /* We use this as a signal that we were in the middle of capturing, and
10330 * that capturing should resume at the beginning of the next buffer.
10331 *
10332 * We can't use *ptr here, because we have no guarantee that this pointer
10333 * will be valid when we resume (if the underlying memory is freed, then
10334 * using the pointer at all, even to compare to NULL, is likely undefined
10335 * behavior). */
Chris Fallin97b663a2015-01-09 16:15:22 -080010336 p->capture = &suspend_capture;
10337 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -070010338 /* Need to back up the pointer to the beginning of the capture, since
10339 * we were not able to actually preserve it. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010340 *ptr = p->capture;
Chris Fallin91473dc2014-12-12 15:58:26 -080010341 }
10342}
10343
Chris Fallin97b663a2015-01-09 16:15:22 -080010344static void capture_resume(upb_json_parser *p, const char *ptr) {
10345 if (p->capture) {
10346 assert(p->capture == &suspend_capture);
10347 p->capture = ptr;
Chris Fallin91473dc2014-12-12 15:58:26 -080010348 }
Chris Fallin91473dc2014-12-12 15:58:26 -080010349}
10350
Chris Fallin97b663a2015-01-09 16:15:22 -080010351
10352/* Callbacks from the parser **************************************************/
10353
Josh Habermane8ed0212015-06-08 17:56:03 -070010354/* These are the functions called directly from the parser itself.
10355 * We define these in the same order as their declarations in the parser. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010356
Chris Fallin91473dc2014-12-12 15:58:26 -080010357static char escape_char(char in) {
10358 switch (in) {
10359 case 'r': return '\r';
10360 case 't': return '\t';
10361 case 'n': return '\n';
10362 case 'f': return '\f';
10363 case 'b': return '\b';
10364 case '/': return '/';
10365 case '"': return '"';
10366 case '\\': return '\\';
10367 default:
10368 assert(0);
10369 return 'x';
10370 }
10371}
10372
Chris Fallin97b663a2015-01-09 16:15:22 -080010373static bool escape(upb_json_parser *p, const char *ptr) {
Chris Fallin91473dc2014-12-12 15:58:26 -080010374 char ch = escape_char(*ptr);
Chris Fallin97b663a2015-01-09 16:15:22 -080010375 return multipart_text(p, &ch, 1, false);
Chris Fallin91473dc2014-12-12 15:58:26 -080010376}
10377
Chris Fallin97b663a2015-01-09 16:15:22 -080010378static void start_hex(upb_json_parser *p) {
10379 p->digit = 0;
10380}
10381
10382static void hexdigit(upb_json_parser *p, const char *ptr) {
10383 char ch = *ptr;
10384
10385 p->digit <<= 4;
10386
Chris Fallin91473dc2014-12-12 15:58:26 -080010387 if (ch >= '0' && ch <= '9') {
Chris Fallin97b663a2015-01-09 16:15:22 -080010388 p->digit += (ch - '0');
Chris Fallin91473dc2014-12-12 15:58:26 -080010389 } else if (ch >= 'a' && ch <= 'f') {
Chris Fallin97b663a2015-01-09 16:15:22 -080010390 p->digit += ((ch - 'a') + 10);
Chris Fallin91473dc2014-12-12 15:58:26 -080010391 } else {
10392 assert(ch >= 'A' && ch <= 'F');
Chris Fallin97b663a2015-01-09 16:15:22 -080010393 p->digit += ((ch - 'A') + 10);
Chris Fallin91473dc2014-12-12 15:58:26 -080010394 }
10395}
10396
Chris Fallin97b663a2015-01-09 16:15:22 -080010397static bool end_hex(upb_json_parser *p) {
10398 uint32_t codepoint = p->digit;
Chris Fallin91473dc2014-12-12 15:58:26 -080010399
Josh Habermane8ed0212015-06-08 17:56:03 -070010400 /* emit the codepoint as UTF-8. */
10401 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
Chris Fallin91473dc2014-12-12 15:58:26 -080010402 int length = 0;
10403 if (codepoint <= 0x7F) {
10404 utf8[0] = codepoint;
10405 length = 1;
10406 } else if (codepoint <= 0x07FF) {
10407 utf8[1] = (codepoint & 0x3F) | 0x80;
10408 codepoint >>= 6;
10409 utf8[0] = (codepoint & 0x1F) | 0xC0;
10410 length = 2;
10411 } else /* codepoint <= 0xFFFF */ {
10412 utf8[2] = (codepoint & 0x3F) | 0x80;
10413 codepoint >>= 6;
10414 utf8[1] = (codepoint & 0x3F) | 0x80;
10415 codepoint >>= 6;
10416 utf8[0] = (codepoint & 0x0F) | 0xE0;
10417 length = 3;
10418 }
Josh Habermane8ed0212015-06-08 17:56:03 -070010419 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
10420 * we have to wait for the next escape to get the full code point). */
Chris Fallin91473dc2014-12-12 15:58:26 -080010421
Chris Fallin97b663a2015-01-09 16:15:22 -080010422 return multipart_text(p, utf8, length, false);
Chris Fallin91473dc2014-12-12 15:58:26 -080010423}
10424
Chris Fallin97b663a2015-01-09 16:15:22 -080010425static void start_text(upb_json_parser *p, const char *ptr) {
10426 capture_begin(p, ptr);
10427}
10428
10429static bool end_text(upb_json_parser *p, const char *ptr) {
10430 return capture_end(p, ptr);
10431}
10432
10433static void start_number(upb_json_parser *p, const char *ptr) {
10434 multipart_startaccum(p);
10435 capture_begin(p, ptr);
10436}
10437
Chris Fallina5075922015-02-02 15:07:34 -080010438static bool parse_number(upb_json_parser *p);
10439
Chris Fallin97b663a2015-01-09 16:15:22 -080010440static bool end_number(upb_json_parser *p, const char *ptr) {
10441 if (!capture_end(p, ptr)) {
10442 return false;
10443 }
10444
Chris Fallina5075922015-02-02 15:07:34 -080010445 return parse_number(p);
10446}
10447
10448static bool parse_number(upb_json_parser *p) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010449 size_t len;
10450 const char *buf;
10451 const char *myend;
10452 char *end;
10453
10454 /* strtol() and friends unfortunately do not support specifying the length of
10455 * the input string, so we need to force a copy into a NULL-terminated buffer. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010456 if (!multipart_text(p, "\0", 1, false)) {
10457 return false;
10458 }
10459
Josh Habermane8ed0212015-06-08 17:56:03 -070010460 buf = accumulate_getptr(p, &len);
10461 myend = buf + len - 1; /* One for NULL. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010462
Josh Habermane8ed0212015-06-08 17:56:03 -070010463 /* XXX: We are using strtol to parse integers, but this is wrong as even
10464 * integers can be represented as 1e6 (for example), which strtol can't
10465 * handle correctly.
10466 *
10467 * XXX: Also, we can't handle large integers properly because strto[u]ll
10468 * isn't in C89.
10469 *
10470 * XXX: Also, we don't properly check floats for overflow, since strtof
10471 * isn't in C89. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010472 switch (upb_fielddef_type(p->top->f)) {
10473 case UPB_TYPE_ENUM:
10474 case UPB_TYPE_INT32: {
10475 long val = strtol(p->accumulated, &end, 0);
10476 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
10477 goto err;
10478 else
10479 upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
10480 break;
10481 }
10482 case UPB_TYPE_INT64: {
Josh Habermane8ed0212015-06-08 17:56:03 -070010483 long long val = strtol(p->accumulated, &end, 0);
Chris Fallin97b663a2015-01-09 16:15:22 -080010484 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
10485 goto err;
10486 else
10487 upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
10488 break;
10489 }
10490 case UPB_TYPE_UINT32: {
10491 unsigned long val = strtoul(p->accumulated, &end, 0);
10492 if (val > UINT32_MAX || errno == ERANGE || end != myend)
10493 goto err;
10494 else
10495 upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
10496 break;
10497 }
10498 case UPB_TYPE_UINT64: {
Josh Habermane8ed0212015-06-08 17:56:03 -070010499 unsigned long long val = strtoul(p->accumulated, &end, 0);
Chris Fallin97b663a2015-01-09 16:15:22 -080010500 if (val > UINT64_MAX || errno == ERANGE || end != myend)
10501 goto err;
10502 else
10503 upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
10504 break;
10505 }
10506 case UPB_TYPE_DOUBLE: {
10507 double val = strtod(p->accumulated, &end);
10508 if (errno == ERANGE || end != myend)
10509 goto err;
10510 else
10511 upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
10512 break;
10513 }
10514 case UPB_TYPE_FLOAT: {
Josh Habermane8ed0212015-06-08 17:56:03 -070010515 float val = strtod(p->accumulated, &end);
Chris Fallin97b663a2015-01-09 16:15:22 -080010516 if (errno == ERANGE || end != myend)
10517 goto err;
10518 else
10519 upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
10520 break;
10521 }
10522 default:
10523 assert(false);
10524 }
10525
10526 multipart_end(p);
Chris Fallina5075922015-02-02 15:07:34 -080010527
Chris Fallin97b663a2015-01-09 16:15:22 -080010528 return true;
10529
10530err:
Josh Haberman181c7f22015-07-15 11:05:10 -070010531 upb_status_seterrf(&p->status, "error parsing number: %s", buf);
10532 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010533 multipart_end(p);
10534 return false;
10535}
10536
10537static bool parser_putbool(upb_json_parser *p, bool val) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010538 bool ok;
10539
Chris Fallin97b663a2015-01-09 16:15:22 -080010540 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
Josh Haberman181c7f22015-07-15 11:05:10 -070010541 upb_status_seterrf(&p->status,
Chris Fallin97b663a2015-01-09 16:15:22 -080010542 "Boolean value specified for non-bool field: %s",
10543 upb_fielddef_name(p->top->f));
Josh Haberman181c7f22015-07-15 11:05:10 -070010544 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010545 return false;
10546 }
10547
Josh Habermane8ed0212015-06-08 17:56:03 -070010548 ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
Chris Fallin97b663a2015-01-09 16:15:22 -080010549 UPB_ASSERT_VAR(ok, ok);
Chris Fallina5075922015-02-02 15:07:34 -080010550
Chris Fallin97b663a2015-01-09 16:15:22 -080010551 return true;
10552}
10553
10554static bool start_stringval(upb_json_parser *p) {
10555 assert(p->top->f);
10556
10557 if (upb_fielddef_isstring(p->top->f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010558 upb_jsonparser_frame *inner;
10559 upb_selector_t sel;
10560
Chris Fallin97b663a2015-01-09 16:15:22 -080010561 if (!check_stack(p)) return false;
10562
Josh Habermane8ed0212015-06-08 17:56:03 -070010563 /* Start a new parser frame: parser frames correspond one-to-one with
10564 * handler frames, and string events occur in a sub-frame. */
10565 inner = p->top + 1;
10566 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
Chris Fallin97b663a2015-01-09 16:15:22 -080010567 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
10568 inner->m = p->top->m;
10569 inner->f = p->top->f;
Josh Haberman78da6662016-01-13 19:05:43 -080010570 inner->name_table = NULL;
Chris Fallina5075922015-02-02 15:07:34 -080010571 inner->is_map = false;
10572 inner->is_mapentry = false;
Chris Fallin97b663a2015-01-09 16:15:22 -080010573 p->top = inner;
10574
10575 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010576 /* For STRING fields we push data directly to the handlers as it is
10577 * parsed. We don't do this yet for BYTES fields, because our base64
10578 * decoder is not streaming.
10579 *
10580 * TODO(haberman): make base64 decoding streaming also. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010581 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10582 return true;
10583 } else {
10584 multipart_startaccum(p);
10585 return true;
10586 }
10587 } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010588 /* No need to push a frame -- symbolic enum names in quotes remain in the
10589 * current parser frame.
10590 *
10591 * Enum string values must accumulate so we can look up the value in a table
10592 * once it is complete. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010593 multipart_startaccum(p);
10594 return true;
10595 } else {
Josh Haberman181c7f22015-07-15 11:05:10 -070010596 upb_status_seterrf(&p->status,
Chris Fallin97b663a2015-01-09 16:15:22 -080010597 "String specified for non-string/non-enum field: %s",
10598 upb_fielddef_name(p->top->f));
Josh Haberman181c7f22015-07-15 11:05:10 -070010599 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010600 return false;
10601 }
10602}
10603
10604static bool end_stringval(upb_json_parser *p) {
10605 bool ok = true;
10606
10607 switch (upb_fielddef_type(p->top->f)) {
10608 case UPB_TYPE_BYTES:
10609 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10610 p->accumulated, p->accumulated_len)) {
10611 return false;
10612 }
Josh Habermane8ed0212015-06-08 17:56:03 -070010613 /* Fall through. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010614
10615 case UPB_TYPE_STRING: {
10616 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10617 upb_sink_endstr(&p->top->sink, sel);
10618 p->top--;
10619 break;
10620 }
10621
10622 case UPB_TYPE_ENUM: {
Josh Habermane8ed0212015-06-08 17:56:03 -070010623 /* Resolve enum symbolic name to integer value. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010624 const upb_enumdef *enumdef =
10625 (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
10626
10627 size_t len;
10628 const char *buf = accumulate_getptr(p, &len);
10629
10630 int32_t int_val = 0;
10631 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10632
10633 if (ok) {
10634 upb_selector_t sel = parser_getsel(p);
10635 upb_sink_putint32(&p->top->sink, sel, int_val);
10636 } else {
Josh Haberman181c7f22015-07-15 11:05:10 -070010637 upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
10638 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010639 }
10640
10641 break;
10642 }
10643
10644 default:
10645 assert(false);
Josh Haberman181c7f22015-07-15 11:05:10 -070010646 upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
10647 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010648 ok = false;
10649 break;
10650 }
10651
10652 multipart_end(p);
Chris Fallina5075922015-02-02 15:07:34 -080010653
Chris Fallin97b663a2015-01-09 16:15:22 -080010654 return ok;
10655}
10656
10657static void start_member(upb_json_parser *p) {
10658 assert(!p->top->f);
10659 multipart_startaccum(p);
10660}
10661
Josh Habermane8ed0212015-06-08 17:56:03 -070010662/* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10663 * field based on the current contents of the accumulate buffer. */
Chris Fallina5075922015-02-02 15:07:34 -080010664static bool parse_mapentry_key(upb_json_parser *p) {
10665
Chris Fallin97b663a2015-01-09 16:15:22 -080010666 size_t len;
10667 const char *buf = accumulate_getptr(p, &len);
10668
Josh Habermane8ed0212015-06-08 17:56:03 -070010669 /* Emit the key field. We do a bit of ad-hoc parsing here because the
10670 * parser state machine has already decided that this is a string field
10671 * name, and we are reinterpreting it as some arbitrary key type. In
10672 * particular, integer and bool keys are quoted, so we need to parse the
10673 * quoted string contents here. */
Chris Fallin97b663a2015-01-09 16:15:22 -080010674
Chris Fallina5075922015-02-02 15:07:34 -080010675 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10676 if (p->top->f == NULL) {
Josh Haberman181c7f22015-07-15 11:05:10 -070010677 upb_status_seterrmsg(&p->status, "mapentry message has no key");
10678 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010679 return false;
10680 }
Chris Fallina5075922015-02-02 15:07:34 -080010681 switch (upb_fielddef_type(p->top->f)) {
10682 case UPB_TYPE_INT32:
10683 case UPB_TYPE_INT64:
10684 case UPB_TYPE_UINT32:
10685 case UPB_TYPE_UINT64:
Josh Habermane8ed0212015-06-08 17:56:03 -070010686 /* Invoke end_number. The accum buffer has the number's text already. */
Chris Fallina5075922015-02-02 15:07:34 -080010687 if (!parse_number(p)) {
10688 return false;
10689 }
10690 break;
10691 case UPB_TYPE_BOOL:
10692 if (len == 4 && !strncmp(buf, "true", 4)) {
10693 if (!parser_putbool(p, true)) {
10694 return false;
10695 }
10696 } else if (len == 5 && !strncmp(buf, "false", 5)) {
10697 if (!parser_putbool(p, false)) {
10698 return false;
10699 }
10700 } else {
Josh Haberman181c7f22015-07-15 11:05:10 -070010701 upb_status_seterrmsg(&p->status,
Chris Fallina5075922015-02-02 15:07:34 -080010702 "Map bool key not 'true' or 'false'");
Josh Haberman181c7f22015-07-15 11:05:10 -070010703 upb_env_reporterror(p->env, &p->status);
Chris Fallina5075922015-02-02 15:07:34 -080010704 return false;
10705 }
10706 multipart_end(p);
10707 break;
10708 case UPB_TYPE_STRING:
10709 case UPB_TYPE_BYTES: {
10710 upb_sink subsink;
10711 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10712 upb_sink_startstr(&p->top->sink, sel, len, &subsink);
10713 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10714 upb_sink_putstring(&subsink, sel, buf, len, NULL);
10715 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10716 upb_sink_endstr(&subsink, sel);
10717 multipart_end(p);
10718 break;
10719 }
10720 default:
Josh Haberman181c7f22015-07-15 11:05:10 -070010721 upb_status_seterrmsg(&p->status, "Invalid field type for map key");
10722 upb_env_reporterror(p->env, &p->status);
Chris Fallina5075922015-02-02 15:07:34 -080010723 return false;
10724 }
Chris Fallin97b663a2015-01-09 16:15:22 -080010725
10726 return true;
10727}
10728
Josh Habermane8ed0212015-06-08 17:56:03 -070010729/* Helper: emit one map entry (as a submessage in the map field sequence). This
10730 * is invoked from end_membername(), at the end of the map entry's key string,
10731 * with the map key in the accumulate buffer. It parses the key from that
10732 * buffer, emits the handler calls to start the mapentry submessage (setting up
10733 * its subframe in the process), and sets up state in the subframe so that the
10734 * value parser (invoked next) will emit the mapentry's value field and then
10735 * end the mapentry message. */
Chris Fallina5075922015-02-02 15:07:34 -080010736
10737static bool handle_mapentry(upb_json_parser *p) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010738 const upb_fielddef *mapfield;
10739 const upb_msgdef *mapentrymsg;
10740 upb_jsonparser_frame *inner;
10741 upb_selector_t sel;
10742
10743 /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10744 * for the mapentry itself, and then set |f| in that frame so that the map
10745 * value field is parsed, and also set a flag to end the frame after the
10746 * map-entry value is parsed. */
Chris Fallina5075922015-02-02 15:07:34 -080010747 if (!check_stack(p)) return false;
10748
Josh Habermane8ed0212015-06-08 17:56:03 -070010749 mapfield = p->top->mapfield;
10750 mapentrymsg = upb_fielddef_msgsubdef(mapfield);
Chris Fallina5075922015-02-02 15:07:34 -080010751
Josh Habermane8ed0212015-06-08 17:56:03 -070010752 inner = p->top + 1;
Chris Fallina5075922015-02-02 15:07:34 -080010753 p->top->f = mapfield;
Josh Habermane8ed0212015-06-08 17:56:03 -070010754 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
Chris Fallina5075922015-02-02 15:07:34 -080010755 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10756 inner->m = mapentrymsg;
Josh Haberman78da6662016-01-13 19:05:43 -080010757 inner->name_table = NULL;
Chris Fallina5075922015-02-02 15:07:34 -080010758 inner->mapfield = mapfield;
10759 inner->is_map = false;
10760
Josh Habermane8ed0212015-06-08 17:56:03 -070010761 /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10762 * the key field value to the sink, and these handlers will pop the frame
10763 * if they see is_mapentry (when invoked by the parser state machine, they
10764 * would have just seen the map-entry value, not key). */
Chris Fallina5075922015-02-02 15:07:34 -080010765 inner->is_mapentry = false;
10766 p->top = inner;
10767
Josh Habermane8ed0212015-06-08 17:56:03 -070010768 /* send STARTMSG in submsg frame. */
Chris Fallina5075922015-02-02 15:07:34 -080010769 upb_sink_startmsg(&p->top->sink);
10770
10771 parse_mapentry_key(p);
10772
Josh Habermane8ed0212015-06-08 17:56:03 -070010773 /* Set up the value field to receive the map-entry value. */
Chris Fallina5075922015-02-02 15:07:34 -080010774 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
Josh Habermane8ed0212015-06-08 17:56:03 -070010775 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */
Chris Fallina5075922015-02-02 15:07:34 -080010776 p->top->mapfield = mapfield;
10777 if (p->top->f == NULL) {
Josh Haberman181c7f22015-07-15 11:05:10 -070010778 upb_status_seterrmsg(&p->status, "mapentry message has no value");
10779 upb_env_reporterror(p->env, &p->status);
Chris Fallina5075922015-02-02 15:07:34 -080010780 return false;
10781 }
10782
10783 return true;
10784}
10785
10786static bool end_membername(upb_json_parser *p) {
10787 assert(!p->top->f);
10788
10789 if (p->top->is_map) {
10790 return handle_mapentry(p);
10791 } else {
10792 size_t len;
10793 const char *buf = accumulate_getptr(p, &len);
Josh Haberman78da6662016-01-13 19:05:43 -080010794 upb_value v;
Chris Fallina5075922015-02-02 15:07:34 -080010795
Josh Haberman78da6662016-01-13 19:05:43 -080010796 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
10797 p->top->f = upb_value_getconstptr(v);
10798 multipart_end(p);
10799
10800 return true;
10801 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -070010802 /* TODO(haberman): Ignore unknown fields if requested/configured to do
10803 * so. */
Josh Haberman181c7f22015-07-15 11:05:10 -070010804 upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
10805 upb_env_reporterror(p->env, &p->status);
Chris Fallina5075922015-02-02 15:07:34 -080010806 return false;
10807 }
Chris Fallina5075922015-02-02 15:07:34 -080010808 }
10809}
10810
10811static void end_member(upb_json_parser *p) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010812 /* If we just parsed a map-entry value, end that frame too. */
Chris Fallina5075922015-02-02 15:07:34 -080010813 if (p->top->is_mapentry) {
Chris Fallina5075922015-02-02 15:07:34 -080010814 upb_status s = UPB_STATUS_INIT;
Chris Fallina5075922015-02-02 15:07:34 -080010815 upb_selector_t sel;
Josh Habermane8ed0212015-06-08 17:56:03 -070010816 bool ok;
10817 const upb_fielddef *mapfield;
10818
10819 assert(p->top > p->stack);
10820 /* send ENDMSG on submsg. */
10821 upb_sink_endmsg(&p->top->sink, &s);
10822 mapfield = p->top->mapfield;
10823
10824 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10825 p->top--;
10826 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
Chris Fallina5075922015-02-02 15:07:34 -080010827 UPB_ASSERT_VAR(ok, ok);
10828 upb_sink_endsubmsg(&p->top->sink, sel);
10829 }
10830
10831 p->top->f = NULL;
10832}
Chris Fallin97b663a2015-01-09 16:15:22 -080010833
10834static bool start_subobject(upb_json_parser *p) {
10835 assert(p->top->f);
10836
Chris Fallina5075922015-02-02 15:07:34 -080010837 if (upb_fielddef_ismap(p->top->f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010838 upb_jsonparser_frame *inner;
10839 upb_selector_t sel;
10840
10841 /* Beginning of a map. Start a new parser frame in a repeated-field
10842 * context. */
Chris Fallina5075922015-02-02 15:07:34 -080010843 if (!check_stack(p)) return false;
10844
Josh Habermane8ed0212015-06-08 17:56:03 -070010845 inner = p->top + 1;
10846 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
Chris Fallina5075922015-02-02 15:07:34 -080010847 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10848 inner->m = upb_fielddef_msgsubdef(p->top->f);
Josh Haberman78da6662016-01-13 19:05:43 -080010849 inner->name_table = NULL;
Chris Fallina5075922015-02-02 15:07:34 -080010850 inner->mapfield = p->top->f;
10851 inner->f = NULL;
10852 inner->is_map = true;
10853 inner->is_mapentry = false;
10854 p->top = inner;
10855
10856 return true;
10857 } else if (upb_fielddef_issubmsg(p->top->f)) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010858 upb_jsonparser_frame *inner;
10859 upb_selector_t sel;
10860
10861 /* Beginning of a subobject. Start a new parser frame in the submsg
10862 * context. */
Chris Fallina5075922015-02-02 15:07:34 -080010863 if (!check_stack(p)) return false;
10864
Josh Habermane8ed0212015-06-08 17:56:03 -070010865 inner = p->top + 1;
Chris Fallina5075922015-02-02 15:07:34 -080010866
Josh Habermane8ed0212015-06-08 17:56:03 -070010867 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
Chris Fallina5075922015-02-02 15:07:34 -080010868 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10869 inner->m = upb_fielddef_msgsubdef(p->top->f);
Josh Haberman78da6662016-01-13 19:05:43 -080010870 set_name_table(p, inner);
Chris Fallina5075922015-02-02 15:07:34 -080010871 inner->f = NULL;
10872 inner->is_map = false;
10873 inner->is_mapentry = false;
10874 p->top = inner;
10875
10876 return true;
10877 } else {
Josh Haberman181c7f22015-07-15 11:05:10 -070010878 upb_status_seterrf(&p->status,
Chris Fallin97b663a2015-01-09 16:15:22 -080010879 "Object specified for non-message/group field: %s",
10880 upb_fielddef_name(p->top->f));
Josh Haberman181c7f22015-07-15 11:05:10 -070010881 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010882 return false;
10883 }
Chris Fallin97b663a2015-01-09 16:15:22 -080010884}
10885
10886static void end_subobject(upb_json_parser *p) {
Chris Fallina5075922015-02-02 15:07:34 -080010887 if (p->top->is_map) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010888 upb_selector_t sel;
Chris Fallina5075922015-02-02 15:07:34 -080010889 p->top--;
Josh Habermane8ed0212015-06-08 17:56:03 -070010890 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
Chris Fallina5075922015-02-02 15:07:34 -080010891 upb_sink_endseq(&p->top->sink, sel);
10892 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -070010893 upb_selector_t sel;
Chris Fallina5075922015-02-02 15:07:34 -080010894 p->top--;
Josh Habermane8ed0212015-06-08 17:56:03 -070010895 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
Chris Fallina5075922015-02-02 15:07:34 -080010896 upb_sink_endsubmsg(&p->top->sink, sel);
10897 }
Chris Fallin97b663a2015-01-09 16:15:22 -080010898}
10899
10900static bool start_array(upb_json_parser *p) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010901 upb_jsonparser_frame *inner;
10902 upb_selector_t sel;
10903
Chris Fallin97b663a2015-01-09 16:15:22 -080010904 assert(p->top->f);
10905
10906 if (!upb_fielddef_isseq(p->top->f)) {
Josh Haberman181c7f22015-07-15 11:05:10 -070010907 upb_status_seterrf(&p->status,
Chris Fallin97b663a2015-01-09 16:15:22 -080010908 "Array specified for non-repeated field: %s",
10909 upb_fielddef_name(p->top->f));
Josh Haberman181c7f22015-07-15 11:05:10 -070010910 upb_env_reporterror(p->env, &p->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080010911 return false;
10912 }
10913
10914 if (!check_stack(p)) return false;
10915
Josh Habermane8ed0212015-06-08 17:56:03 -070010916 inner = p->top + 1;
10917 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
Chris Fallin97b663a2015-01-09 16:15:22 -080010918 upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10919 inner->m = p->top->m;
Josh Haberman78da6662016-01-13 19:05:43 -080010920 inner->name_table = NULL;
Chris Fallin97b663a2015-01-09 16:15:22 -080010921 inner->f = p->top->f;
Chris Fallina5075922015-02-02 15:07:34 -080010922 inner->is_map = false;
10923 inner->is_mapentry = false;
Chris Fallin97b663a2015-01-09 16:15:22 -080010924 p->top = inner;
10925
10926 return true;
10927}
10928
10929static void end_array(upb_json_parser *p) {
Josh Habermane8ed0212015-06-08 17:56:03 -070010930 upb_selector_t sel;
10931
Chris Fallin97b663a2015-01-09 16:15:22 -080010932 assert(p->top > p->stack);
10933
10934 p->top--;
Josh Habermane8ed0212015-06-08 17:56:03 -070010935 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
Chris Fallin97b663a2015-01-09 16:15:22 -080010936 upb_sink_endseq(&p->top->sink, sel);
10937}
10938
10939static void start_object(upb_json_parser *p) {
Chris Fallina5075922015-02-02 15:07:34 -080010940 if (!p->top->is_map) {
10941 upb_sink_startmsg(&p->top->sink);
10942 }
Chris Fallin97b663a2015-01-09 16:15:22 -080010943}
10944
10945static void end_object(upb_json_parser *p) {
Chris Fallina5075922015-02-02 15:07:34 -080010946 if (!p->top->is_map) {
10947 upb_status status;
Josh Haberman181c7f22015-07-15 11:05:10 -070010948 upb_status_clear(&status);
Chris Fallina5075922015-02-02 15:07:34 -080010949 upb_sink_endmsg(&p->top->sink, &status);
Josh Haberman181c7f22015-07-15 11:05:10 -070010950 if (!upb_ok(&status)) {
10951 upb_env_reporterror(p->env, &status);
10952 }
Chris Fallina5075922015-02-02 15:07:34 -080010953 }
Chris Fallin97b663a2015-01-09 16:15:22 -080010954}
10955
10956
Chris Fallin91473dc2014-12-12 15:58:26 -080010957#define CHECK_RETURN_TOP(x) if (!(x)) goto error
10958
Chris Fallin97b663a2015-01-09 16:15:22 -080010959
10960/* The actual parser **********************************************************/
10961
Josh Habermane8ed0212015-06-08 17:56:03 -070010962/* What follows is the Ragel parser itself. The language is specified in Ragel
10963 * and the actions call our C functions above.
10964 *
10965 * Ragel has an extensive set of functionality, and we use only a small part of
10966 * it. There are many action types but we only use a few:
10967 *
10968 * ">" -- transition into a machine
10969 * "%" -- transition out of a machine
10970 * "@" -- transition into a final state of a machine.
10971 *
10972 * "@" transitions are tricky because a machine can transition into a final
10973 * state repeatedly. But in some cases we know this can't happen, for example
10974 * a string which is delimited by a final '"' can only transition into its
10975 * final state once, when the closing '"' is seen. */
Chris Fallin91473dc2014-12-12 15:58:26 -080010976
Chris Fallin97b663a2015-01-09 16:15:22 -080010977
Josh Haberman78da6662016-01-13 19:05:43 -080010978#line 1246 "upb/json/parser.rl"
Chris Fallin91473dc2014-12-12 15:58:26 -080010979
10980
10981
Josh Haberman78da6662016-01-13 19:05:43 -080010982#line 1158 "upb/json/parser.c"
Chris Fallin91473dc2014-12-12 15:58:26 -080010983static const char _json_actions[] = {
10984 0, 1, 0, 1, 2, 1, 3, 1,
Chris Fallin97b663a2015-01-09 16:15:22 -080010985 5, 1, 6, 1, 7, 1, 8, 1,
10986 10, 1, 12, 1, 13, 1, 14, 1,
10987 15, 1, 16, 1, 17, 1, 21, 1,
10988 25, 1, 27, 2, 3, 8, 2, 4,
10989 5, 2, 6, 2, 2, 6, 8, 2,
10990 11, 9, 2, 13, 15, 2, 14, 15,
10991 2, 18, 1, 2, 19, 27, 2, 20,
10992 9, 2, 22, 27, 2, 23, 27, 2,
10993 24, 27, 2, 26, 27, 3, 14, 11,
10994 9
Chris Fallin91473dc2014-12-12 15:58:26 -080010995};
10996
10997static const unsigned char _json_key_offsets[] = {
Chris Fallin97b663a2015-01-09 16:15:22 -080010998 0, 0, 4, 9, 14, 15, 19, 24,
10999 29, 34, 38, 42, 45, 48, 50, 54,
11000 58, 60, 62, 67, 69, 71, 80, 86,
11001 92, 98, 104, 106, 115, 116, 116, 116,
11002 121, 126, 131, 132, 133, 134, 135, 135,
11003 136, 137, 138, 138, 139, 140, 141, 141,
11004 146, 151, 152, 156, 161, 166, 171, 175,
11005 175, 178, 178, 178
Chris Fallin91473dc2014-12-12 15:58:26 -080011006};
11007
11008static const char _json_trans_keys[] = {
11009 32, 123, 9, 13, 32, 34, 125, 9,
Chris Fallin97b663a2015-01-09 16:15:22 -080011010 13, 32, 34, 125, 9, 13, 34, 32,
11011 58, 9, 13, 32, 93, 125, 9, 13,
11012 32, 44, 125, 9, 13, 32, 44, 125,
11013 9, 13, 32, 34, 9, 13, 45, 48,
11014 49, 57, 48, 49, 57, 46, 69, 101,
11015 48, 57, 69, 101, 48, 57, 43, 45,
11016 48, 57, 48, 57, 48, 57, 46, 69,
11017 101, 48, 57, 34, 92, 34, 92, 34,
11018 47, 92, 98, 102, 110, 114, 116, 117,
11019 48, 57, 65, 70, 97, 102, 48, 57,
11020 65, 70, 97, 102, 48, 57, 65, 70,
11021 97, 102, 48, 57, 65, 70, 97, 102,
11022 34, 92, 34, 45, 91, 102, 110, 116,
11023 123, 48, 57, 34, 32, 93, 125, 9,
11024 13, 32, 44, 93, 9, 13, 32, 93,
11025 125, 9, 13, 97, 108, 115, 101, 117,
11026 108, 108, 114, 117, 101, 32, 34, 125,
11027 9, 13, 32, 34, 125, 9, 13, 34,
11028 32, 58, 9, 13, 32, 93, 125, 9,
11029 13, 32, 44, 125, 9, 13, 32, 44,
11030 125, 9, 13, 32, 34, 9, 13, 32,
11031 9, 13, 0
Chris Fallin91473dc2014-12-12 15:58:26 -080011032};
11033
11034static const char _json_single_lengths[] = {
Chris Fallin97b663a2015-01-09 16:15:22 -080011035 0, 2, 3, 3, 1, 2, 3, 3,
Chris Fallin91473dc2014-12-12 15:58:26 -080011036 3, 2, 2, 1, 3, 0, 2, 2,
11037 0, 0, 3, 2, 2, 9, 0, 0,
Chris Fallin97b663a2015-01-09 16:15:22 -080011038 0, 0, 2, 7, 1, 0, 0, 3,
11039 3, 3, 1, 1, 1, 1, 0, 1,
Chris Fallin91473dc2014-12-12 15:58:26 -080011040 1, 1, 0, 1, 1, 1, 0, 3,
Chris Fallin97b663a2015-01-09 16:15:22 -080011041 3, 1, 2, 3, 3, 3, 2, 0,
Chris Fallin91473dc2014-12-12 15:58:26 -080011042 1, 0, 0, 0
11043};
11044
11045static const char _json_range_lengths[] = {
Chris Fallin97b663a2015-01-09 16:15:22 -080011046 0, 1, 1, 1, 0, 1, 1, 1,
Chris Fallin91473dc2014-12-12 15:58:26 -080011047 1, 1, 1, 1, 0, 1, 1, 1,
11048 1, 1, 1, 0, 0, 0, 3, 3,
Chris Fallin97b663a2015-01-09 16:15:22 -080011049 3, 3, 0, 1, 0, 0, 0, 1,
11050 1, 1, 0, 0, 0, 0, 0, 0,
Chris Fallin91473dc2014-12-12 15:58:26 -080011051 0, 0, 0, 0, 0, 0, 0, 1,
Chris Fallin97b663a2015-01-09 16:15:22 -080011052 1, 0, 1, 1, 1, 1, 1, 0,
Chris Fallin91473dc2014-12-12 15:58:26 -080011053 1, 0, 0, 0
11054};
11055
11056static const short _json_index_offsets[] = {
Chris Fallin97b663a2015-01-09 16:15:22 -080011057 0, 0, 4, 9, 14, 16, 20, 25,
11058 30, 35, 39, 43, 46, 50, 52, 56,
11059 60, 62, 64, 69, 72, 75, 85, 89,
11060 93, 97, 101, 104, 113, 115, 116, 117,
11061 122, 127, 132, 134, 136, 138, 140, 141,
11062 143, 145, 147, 148, 150, 152, 154, 155,
11063 160, 165, 167, 171, 176, 181, 186, 190,
11064 191, 194, 195, 196
Chris Fallin91473dc2014-12-12 15:58:26 -080011065};
11066
11067static const char _json_indicies[] = {
11068 0, 2, 0, 1, 3, 4, 5, 3,
Chris Fallin97b663a2015-01-09 16:15:22 -080011069 1, 6, 7, 8, 6, 1, 9, 1,
11070 10, 11, 10, 1, 11, 1, 1, 11,
11071 12, 13, 14, 15, 13, 1, 16, 17,
11072 8, 16, 1, 17, 7, 17, 1, 18,
11073 19, 20, 1, 19, 20, 1, 22, 23,
11074 23, 21, 24, 1, 23, 23, 24, 21,
11075 25, 25, 26, 1, 26, 1, 26, 21,
11076 22, 23, 23, 20, 21, 28, 29, 27,
11077 31, 32, 30, 33, 33, 33, 33, 33,
11078 33, 33, 33, 34, 1, 35, 35, 35,
11079 1, 36, 36, 36, 1, 37, 37, 37,
11080 1, 38, 38, 38, 1, 40, 41, 39,
11081 42, 43, 44, 45, 46, 47, 48, 43,
11082 1, 49, 1, 50, 51, 53, 54, 1,
Chris Fallin91473dc2014-12-12 15:58:26 -080011083 53, 52, 55, 56, 54, 55, 1, 56,
Chris Fallin97b663a2015-01-09 16:15:22 -080011084 1, 1, 56, 52, 57, 1, 58, 1,
11085 59, 1, 60, 1, 61, 62, 1, 63,
11086 1, 64, 1, 65, 66, 1, 67, 1,
11087 68, 1, 69, 70, 71, 72, 70, 1,
11088 73, 74, 75, 73, 1, 76, 1, 77,
11089 78, 77, 1, 78, 1, 1, 78, 79,
11090 80, 81, 82, 80, 1, 83, 84, 75,
11091 83, 1, 84, 74, 84, 1, 85, 86,
11092 86, 1, 1, 1, 1, 0
Chris Fallin91473dc2014-12-12 15:58:26 -080011093};
11094
11095static const char _json_trans_targs[] = {
11096 1, 0, 2, 3, 4, 56, 3, 4,
Chris Fallin97b663a2015-01-09 16:15:22 -080011097 56, 5, 5, 6, 7, 8, 9, 56,
11098 8, 9, 11, 12, 18, 57, 13, 15,
11099 14, 16, 17, 20, 58, 21, 20, 58,
11100 21, 19, 22, 23, 24, 25, 26, 20,
11101 58, 21, 28, 30, 31, 34, 39, 43,
11102 47, 29, 59, 59, 32, 31, 29, 32,
11103 33, 35, 36, 37, 38, 59, 40, 41,
11104 42, 59, 44, 45, 46, 59, 48, 49,
11105 55, 48, 49, 55, 50, 50, 51, 52,
11106 53, 54, 55, 53, 54, 59, 56
Chris Fallin91473dc2014-12-12 15:58:26 -080011107};
11108
11109static const char _json_trans_actions[] = {
Chris Fallin97b663a2015-01-09 16:15:22 -080011110 0, 0, 0, 21, 77, 53, 0, 47,
11111 23, 17, 0, 0, 15, 19, 19, 50,
11112 0, 0, 0, 0, 0, 1, 0, 0,
11113 0, 0, 0, 3, 13, 0, 0, 35,
11114 5, 11, 0, 38, 7, 7, 7, 41,
11115 44, 9, 62, 56, 25, 0, 0, 0,
11116 31, 29, 33, 59, 15, 0, 27, 0,
11117 0, 0, 0, 0, 0, 68, 0, 0,
11118 0, 71, 0, 0, 0, 65, 21, 77,
11119 53, 0, 47, 23, 17, 0, 0, 15,
11120 19, 19, 50, 0, 0, 74, 0
Chris Fallin91473dc2014-12-12 15:58:26 -080011121};
11122
11123static const int json_start = 1;
Chris Fallin91473dc2014-12-12 15:58:26 -080011124
11125static const int json_en_number_machine = 10;
11126static const int json_en_string_machine = 19;
11127static const int json_en_value_machine = 27;
11128static const int json_en_main = 1;
11129
11130
Josh Haberman78da6662016-01-13 19:05:43 -080011131#line 1249 "upb/json/parser.rl"
Chris Fallin91473dc2014-12-12 15:58:26 -080011132
11133size_t parse(void *closure, const void *hd, const char *buf, size_t size,
11134 const upb_bufhandle *handle) {
Chris Fallin91473dc2014-12-12 15:58:26 -080011135 upb_json_parser *parser = closure;
11136
Josh Habermane8ed0212015-06-08 17:56:03 -070011137 /* Variables used by Ragel's generated code. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011138 int cs = parser->current_state;
11139 int *stack = parser->parser_stack;
11140 int top = parser->parser_top;
11141
11142 const char *p = buf;
11143 const char *pe = buf + size;
11144
Josh Habermane8ed0212015-06-08 17:56:03 -070011145 parser->handle = handle;
11146
11147 UPB_UNUSED(hd);
11148 UPB_UNUSED(handle);
11149
Chris Fallin97b663a2015-01-09 16:15:22 -080011150 capture_resume(parser, buf);
11151
Chris Fallin91473dc2014-12-12 15:58:26 -080011152
Josh Haberman78da6662016-01-13 19:05:43 -080011153#line 1329 "upb/json/parser.c"
Chris Fallin91473dc2014-12-12 15:58:26 -080011154 {
11155 int _klen;
11156 unsigned int _trans;
11157 const char *_acts;
11158 unsigned int _nacts;
11159 const char *_keys;
11160
11161 if ( p == pe )
11162 goto _test_eof;
11163 if ( cs == 0 )
11164 goto _out;
11165_resume:
11166 _keys = _json_trans_keys + _json_key_offsets[cs];
11167 _trans = _json_index_offsets[cs];
11168
11169 _klen = _json_single_lengths[cs];
11170 if ( _klen > 0 ) {
11171 const char *_lower = _keys;
11172 const char *_mid;
11173 const char *_upper = _keys + _klen - 1;
11174 while (1) {
11175 if ( _upper < _lower )
11176 break;
11177
11178 _mid = _lower + ((_upper-_lower) >> 1);
11179 if ( (*p) < *_mid )
11180 _upper = _mid - 1;
11181 else if ( (*p) > *_mid )
11182 _lower = _mid + 1;
11183 else {
11184 _trans += (unsigned int)(_mid - _keys);
11185 goto _match;
11186 }
11187 }
11188 _keys += _klen;
11189 _trans += _klen;
11190 }
11191
11192 _klen = _json_range_lengths[cs];
11193 if ( _klen > 0 ) {
11194 const char *_lower = _keys;
11195 const char *_mid;
11196 const char *_upper = _keys + (_klen<<1) - 2;
11197 while (1) {
11198 if ( _upper < _lower )
11199 break;
11200
11201 _mid = _lower + (((_upper-_lower) >> 1) & ~1);
11202 if ( (*p) < _mid[0] )
11203 _upper = _mid - 2;
11204 else if ( (*p) > _mid[1] )
11205 _lower = _mid + 2;
11206 else {
11207 _trans += (unsigned int)((_mid - _keys)>>1);
11208 goto _match;
11209 }
11210 }
11211 _trans += _klen;
11212 }
11213
11214_match:
11215 _trans = _json_indicies[_trans];
11216 cs = _json_trans_targs[_trans];
11217
11218 if ( _json_trans_actions[_trans] == 0 )
11219 goto _again;
11220
11221 _acts = _json_actions + _json_trans_actions[_trans];
11222 _nacts = (unsigned int) *_acts++;
11223 while ( _nacts-- > 0 )
11224 {
11225 switch ( *_acts++ )
11226 {
11227 case 0:
Josh Haberman78da6662016-01-13 19:05:43 -080011228#line 1161 "upb/json/parser.rl"
Chris Fallin91473dc2014-12-12 15:58:26 -080011229 { p--; {cs = stack[--top]; goto _again;} }
11230 break;
11231 case 1:
Josh Haberman78da6662016-01-13 19:05:43 -080011232#line 1162 "upb/json/parser.rl"
Chris Fallin91473dc2014-12-12 15:58:26 -080011233 { p--; {stack[top++] = cs; cs = 10; goto _again;} }
11234 break;
11235 case 2:
Josh Haberman78da6662016-01-13 19:05:43 -080011236#line 1166 "upb/json/parser.rl"
Chris Fallin91473dc2014-12-12 15:58:26 -080011237 { start_text(parser, p); }
11238 break;
11239 case 3:
Josh Haberman78da6662016-01-13 19:05:43 -080011240#line 1167 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011241 { CHECK_RETURN_TOP(end_text(parser, p)); }
Chris Fallin91473dc2014-12-12 15:58:26 -080011242 break;
11243 case 4:
Josh Haberman78da6662016-01-13 19:05:43 -080011244#line 1173 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011245 { start_hex(parser); }
Chris Fallin91473dc2014-12-12 15:58:26 -080011246 break;
11247 case 5:
Josh Haberman78da6662016-01-13 19:05:43 -080011248#line 1174 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011249 { hexdigit(parser, p); }
Chris Fallin91473dc2014-12-12 15:58:26 -080011250 break;
11251 case 6:
Josh Haberman78da6662016-01-13 19:05:43 -080011252#line 1175 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011253 { CHECK_RETURN_TOP(end_hex(parser)); }
Chris Fallin91473dc2014-12-12 15:58:26 -080011254 break;
11255 case 7:
Josh Haberman78da6662016-01-13 19:05:43 -080011256#line 1181 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011257 { CHECK_RETURN_TOP(escape(parser, p)); }
Chris Fallin91473dc2014-12-12 15:58:26 -080011258 break;
11259 case 8:
Josh Haberman78da6662016-01-13 19:05:43 -080011260#line 1187 "upb/json/parser.rl"
Chris Fallin91473dc2014-12-12 15:58:26 -080011261 { p--; {cs = stack[--top]; goto _again;} }
11262 break;
Chris Fallin97b663a2015-01-09 16:15:22 -080011263 case 9:
Josh Haberman78da6662016-01-13 19:05:43 -080011264#line 1190 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011265 { {stack[top++] = cs; cs = 19; goto _again;} }
11266 break;
11267 case 10:
Josh Haberman78da6662016-01-13 19:05:43 -080011268#line 1192 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011269 { p--; {stack[top++] = cs; cs = 27; goto _again;} }
11270 break;
11271 case 11:
Josh Haberman78da6662016-01-13 19:05:43 -080011272#line 1197 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011273 { start_member(parser); }
11274 break;
11275 case 12:
Josh Haberman78da6662016-01-13 19:05:43 -080011276#line 1198 "upb/json/parser.rl"
Chris Fallina5075922015-02-02 15:07:34 -080011277 { CHECK_RETURN_TOP(end_membername(parser)); }
Chris Fallin97b663a2015-01-09 16:15:22 -080011278 break;
11279 case 13:
Josh Haberman78da6662016-01-13 19:05:43 -080011280#line 1201 "upb/json/parser.rl"
Chris Fallina5075922015-02-02 15:07:34 -080011281 { end_member(parser); }
Chris Fallin97b663a2015-01-09 16:15:22 -080011282 break;
11283 case 14:
Josh Haberman78da6662016-01-13 19:05:43 -080011284#line 1207 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011285 { start_object(parser); }
11286 break;
11287 case 15:
Josh Haberman78da6662016-01-13 19:05:43 -080011288#line 1210 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011289 { end_object(parser); }
11290 break;
11291 case 16:
Josh Haberman78da6662016-01-13 19:05:43 -080011292#line 1216 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011293 { CHECK_RETURN_TOP(start_array(parser)); }
11294 break;
11295 case 17:
Josh Haberman78da6662016-01-13 19:05:43 -080011296#line 1220 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011297 { end_array(parser); }
11298 break;
11299 case 18:
Josh Haberman78da6662016-01-13 19:05:43 -080011300#line 1225 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011301 { start_number(parser, p); }
11302 break;
11303 case 19:
Josh Haberman78da6662016-01-13 19:05:43 -080011304#line 1226 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011305 { CHECK_RETURN_TOP(end_number(parser, p)); }
11306 break;
11307 case 20:
Josh Haberman78da6662016-01-13 19:05:43 -080011308#line 1228 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011309 { CHECK_RETURN_TOP(start_stringval(parser)); }
11310 break;
11311 case 21:
Josh Haberman78da6662016-01-13 19:05:43 -080011312#line 1229 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011313 { CHECK_RETURN_TOP(end_stringval(parser)); }
11314 break;
11315 case 22:
Josh Haberman78da6662016-01-13 19:05:43 -080011316#line 1231 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011317 { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
11318 break;
11319 case 23:
Josh Haberman78da6662016-01-13 19:05:43 -080011320#line 1233 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011321 { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
11322 break;
11323 case 24:
Josh Haberman78da6662016-01-13 19:05:43 -080011324#line 1235 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011325 { /* null value */ }
11326 break;
11327 case 25:
Josh Haberman78da6662016-01-13 19:05:43 -080011328#line 1237 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011329 { CHECK_RETURN_TOP(start_subobject(parser)); }
11330 break;
11331 case 26:
Josh Haberman78da6662016-01-13 19:05:43 -080011332#line 1238 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011333 { end_subobject(parser); }
11334 break;
11335 case 27:
Josh Haberman78da6662016-01-13 19:05:43 -080011336#line 1243 "upb/json/parser.rl"
Chris Fallin97b663a2015-01-09 16:15:22 -080011337 { p--; {cs = stack[--top]; goto _again;} }
11338 break;
Josh Haberman78da6662016-01-13 19:05:43 -080011339#line 1515 "upb/json/parser.c"
Chris Fallin91473dc2014-12-12 15:58:26 -080011340 }
11341 }
11342
11343_again:
11344 if ( cs == 0 )
11345 goto _out;
11346 if ( ++p != pe )
11347 goto _resume;
11348 _test_eof: {}
11349 _out: {}
11350 }
11351
Josh Haberman78da6662016-01-13 19:05:43 -080011352#line 1270 "upb/json/parser.rl"
Chris Fallin91473dc2014-12-12 15:58:26 -080011353
11354 if (p != pe) {
Josh Haberman181c7f22015-07-15 11:05:10 -070011355 upb_status_seterrf(&parser->status, "Parse error at %s\n", p);
11356 upb_env_reporterror(parser->env, &parser->status);
Chris Fallin97b663a2015-01-09 16:15:22 -080011357 } else {
11358 capture_suspend(parser, &p);
Chris Fallin91473dc2014-12-12 15:58:26 -080011359 }
11360
11361error:
Josh Habermane8ed0212015-06-08 17:56:03 -070011362 /* Save parsing state back to parser. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011363 parser->current_state = cs;
11364 parser->parser_top = top;
11365
11366 return p - buf;
11367}
11368
11369bool end(void *closure, const void *hd) {
11370 UPB_UNUSED(closure);
11371 UPB_UNUSED(hd);
Chris Fallind3262772015-05-14 18:24:26 -070011372
Josh Habermane8ed0212015-06-08 17:56:03 -070011373 /* Prevent compile warning on unused static constants. */
Chris Fallind3262772015-05-14 18:24:26 -070011374 UPB_UNUSED(json_start);
11375 UPB_UNUSED(json_en_number_machine);
11376 UPB_UNUSED(json_en_string_machine);
11377 UPB_UNUSED(json_en_value_machine);
11378 UPB_UNUSED(json_en_main);
Chris Fallin91473dc2014-12-12 15:58:26 -080011379 return true;
11380}
11381
Chris Fallind3262772015-05-14 18:24:26 -070011382static void json_parser_reset(upb_json_parser *p) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011383 int cs;
11384 int top;
11385
Chris Fallin91473dc2014-12-12 15:58:26 -080011386 p->top = p->stack;
11387 p->top->f = NULL;
Chris Fallina5075922015-02-02 15:07:34 -080011388 p->top->is_map = false;
11389 p->top->is_mapentry = false;
Chris Fallin91473dc2014-12-12 15:58:26 -080011390
Josh Habermane8ed0212015-06-08 17:56:03 -070011391 /* Emit Ragel initialization of the parser. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011392
Josh Haberman78da6662016-01-13 19:05:43 -080011393#line 1569 "upb/json/parser.c"
Chris Fallin91473dc2014-12-12 15:58:26 -080011394 {
11395 cs = json_start;
11396 top = 0;
11397 }
11398
Josh Haberman78da6662016-01-13 19:05:43 -080011399#line 1310 "upb/json/parser.rl"
Chris Fallin91473dc2014-12-12 15:58:26 -080011400 p->current_state = cs;
11401 p->parser_top = top;
Chris Fallin97b663a2015-01-09 16:15:22 -080011402 accumulate_clear(p);
11403 p->multipart_state = MULTIPART_INACTIVE;
11404 p->capture = NULL;
Chris Fallind3262772015-05-14 18:24:26 -070011405 p->accumulated = NULL;
Josh Haberman181c7f22015-07-15 11:05:10 -070011406 upb_status_clear(&p->status);
Chris Fallin91473dc2014-12-12 15:58:26 -080011407}
11408
Josh Haberman78da6662016-01-13 19:05:43 -080011409static void visit_json_parsermethod(const upb_refcounted *r,
11410 upb_refcounted_visit *visit,
11411 void *closure) {
11412 const upb_json_parsermethod *method = (upb_json_parsermethod*)r;
11413 visit(r, upb_msgdef_upcast2(method->msg), closure);
11414}
11415
11416static void free_json_parsermethod(upb_refcounted *r) {
11417 upb_json_parsermethod *method = (upb_json_parsermethod*)r;
11418
11419 upb_inttable_iter i;
11420 upb_inttable_begin(&i, &method->name_tables);
11421 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
11422 upb_value val = upb_inttable_iter_value(&i);
11423 upb_strtable *t = upb_value_getptr(val);
11424 upb_strtable_uninit(t);
11425 free(t);
11426 }
11427
11428 upb_inttable_uninit(&method->name_tables);
11429
11430 free(r);
11431}
11432
11433static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
11434 upb_msg_field_iter i;
11435 upb_strtable *t;
11436
Josh Habermanf654d492016-02-18 11:07:51 -080011437 /* It would be nice to stack-allocate this, but protobufs do not limit the
11438 * length of fields to any reasonable limit. */
11439 char *buf = NULL;
11440 size_t len = 0;
11441
Josh Haberman78da6662016-01-13 19:05:43 -080011442 if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) {
11443 return;
11444 }
11445
11446 /* TODO(haberman): handle malloc failure. */
11447 t = malloc(sizeof(*t));
11448 upb_strtable_init(t, UPB_CTYPE_CONSTPTR);
11449 upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t));
11450
11451 for(upb_msg_field_begin(&i, md);
11452 !upb_msg_field_done(&i);
11453 upb_msg_field_next(&i)) {
11454 const upb_fielddef *f = upb_msg_iter_field(&i);
Josh Habermanf654d492016-02-18 11:07:51 -080011455 size_t field_len = upb_fielddef_getjsonname(f, buf, len);
11456 if (field_len > len) {
11457 buf = realloc(buf, field_len);
11458 len = field_len;
11459 upb_fielddef_getjsonname(f, buf, len);
11460 }
Josh Haberman78da6662016-01-13 19:05:43 -080011461 upb_strtable_insert(t, buf, upb_value_constptr(f));
Josh Haberman78da6662016-01-13 19:05:43 -080011462
11463 if (upb_fielddef_issubmsg(f)) {
11464 add_jsonname_table(m, upb_fielddef_msgsubdef(f));
11465 }
11466 }
Josh Habermanf654d492016-02-18 11:07:51 -080011467
11468 free(buf);
Josh Haberman78da6662016-01-13 19:05:43 -080011469}
Chris Fallind3262772015-05-14 18:24:26 -070011470
11471/* Public API *****************************************************************/
11472
Josh Haberman78da6662016-01-13 19:05:43 -080011473upb_json_parser *upb_json_parser_create(upb_env *env,
11474 const upb_json_parsermethod *method,
11475 upb_sink *output) {
Chris Fallind3262772015-05-14 18:24:26 -070011476#ifndef NDEBUG
11477 const size_t size_before = upb_env_bytesallocated(env);
11478#endif
11479 upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
11480 if (!p) return false;
11481
11482 p->env = env;
Josh Haberman78da6662016-01-13 19:05:43 -080011483 p->method = method;
Chris Fallind3262772015-05-14 18:24:26 -070011484 p->limit = p->stack + UPB_JSON_MAX_DEPTH;
11485 p->accumulate_buf = NULL;
11486 p->accumulate_buf_size = 0;
Josh Haberman78da6662016-01-13 19:05:43 -080011487 upb_bytessink_reset(&p->input_, &method->input_handler_, p);
Chris Fallind3262772015-05-14 18:24:26 -070011488
11489 json_parser_reset(p);
11490 upb_sink_reset(&p->top->sink, output->handlers, output->closure);
11491 p->top->m = upb_handlers_msgdef(output->handlers);
Josh Haberman78da6662016-01-13 19:05:43 -080011492 set_name_table(p, p->top);
Chris Fallind3262772015-05-14 18:24:26 -070011493
Josh Haberman181c7f22015-07-15 11:05:10 -070011494 /* If this fails, uncomment and increase the value in parser.h. */
11495 /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
Chris Fallind3262772015-05-14 18:24:26 -070011496 assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11497 return p;
Chris Fallin91473dc2014-12-12 15:58:26 -080011498}
11499
11500upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
11501 return &p->input_;
11502}
Josh Haberman78da6662016-01-13 19:05:43 -080011503
11504upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md,
11505 const void* owner) {
11506 static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod,
11507 free_json_parsermethod};
11508 upb_json_parsermethod *ret = malloc(sizeof(*ret));
11509 upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner);
11510
11511 ret->msg = md;
11512 upb_ref2(md, ret);
11513
11514 upb_byteshandler_init(&ret->input_handler_);
11515 upb_byteshandler_setstring(&ret->input_handler_, parse, ret);
11516 upb_byteshandler_setendstr(&ret->input_handler_, end, ret);
11517
11518 upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR);
11519
11520 add_jsonname_table(ret, md);
11521
11522 return ret;
11523}
11524
11525const upb_byteshandler *upb_json_parsermethod_inputhandler(
11526 const upb_json_parsermethod *m) {
11527 return &m->input_handler_;
11528}
Chris Fallin91473dc2014-12-12 15:58:26 -080011529/*
Josh Haberman181c7f22015-07-15 11:05:10 -070011530** This currently uses snprintf() to format primitives, and could be optimized
11531** further.
11532*/
Chris Fallin91473dc2014-12-12 15:58:26 -080011533
11534
11535#include <stdlib.h>
11536#include <stdio.h>
11537#include <string.h>
11538#include <stdint.h>
11539
Chris Fallind3262772015-05-14 18:24:26 -070011540struct upb_json_printer {
11541 upb_sink input_;
Josh Habermane8ed0212015-06-08 17:56:03 -070011542 /* BytesSink closure. */
Chris Fallind3262772015-05-14 18:24:26 -070011543 void *subc_;
11544 upb_bytessink *output_;
11545
Josh Habermane8ed0212015-06-08 17:56:03 -070011546 /* We track the depth so that we know when to emit startstr/endstr on the
11547 * output. */
Chris Fallind3262772015-05-14 18:24:26 -070011548 int depth_;
11549
Josh Habermane8ed0212015-06-08 17:56:03 -070011550 /* Have we emitted the first element? This state is necessary to emit commas
11551 * without leaving a trailing comma in arrays/maps. We keep this state per
11552 * frame depth.
11553 *
11554 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
11555 * We count frames (contexts in which we separate elements by commas) as both
11556 * repeated fields and messages (maps), and the worst case is a
11557 * message->repeated field->submessage->repeated field->... nesting. */
Chris Fallind3262772015-05-14 18:24:26 -070011558 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
11559};
11560
Josh Habermane8ed0212015-06-08 17:56:03 -070011561/* StringPiece; a pointer plus a length. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011562typedef struct {
Josh Haberman78da6662016-01-13 19:05:43 -080011563 char *ptr;
Chris Fallin91473dc2014-12-12 15:58:26 -080011564 size_t len;
11565} strpc;
11566
Josh Haberman78da6662016-01-13 19:05:43 -080011567void freestrpc(void *ptr) {
11568 strpc *pc = ptr;
11569 free(pc->ptr);
11570 free(pc);
11571}
11572
11573/* Convert fielddef name to JSON name and return as a string piece. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011574strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
Josh Haberman78da6662016-01-13 19:05:43 -080011575 /* TODO(haberman): handle malloc failure. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011576 strpc *ret = malloc(sizeof(*ret));
Josh Habermanf654d492016-02-18 11:07:51 -080011577 size_t len;
11578 ret->len = upb_fielddef_getjsonname(f, NULL, 0);
11579 ret->ptr = malloc(ret->len);
11580 len = upb_fielddef_getjsonname(f, ret->ptr, ret->len);
11581 UPB_ASSERT_VAR(len, len == ret->len);
11582 ret->len--; /* NULL */
Josh Haberman78da6662016-01-13 19:05:43 -080011583
11584 upb_handlers_addcleanup(h, ret, freestrpc);
Chris Fallin91473dc2014-12-12 15:58:26 -080011585 return ret;
11586}
11587
Josh Habermane8ed0212015-06-08 17:56:03 -070011588/* ------------ JSON string printing: values, maps, arrays ------------------ */
Chris Fallin91473dc2014-12-12 15:58:26 -080011589
11590static void print_data(
11591 upb_json_printer *p, const char *buf, unsigned int len) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011592 /* TODO: Will need to change if we support pushback from the sink. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011593 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
11594 UPB_ASSERT_VAR(n, n == len);
11595}
11596
11597static void print_comma(upb_json_printer *p) {
11598 if (!p->first_elem_[p->depth_]) {
11599 print_data(p, ",", 1);
11600 }
11601 p->first_elem_[p->depth_] = false;
11602}
11603
Josh Habermane8ed0212015-06-08 17:56:03 -070011604/* Helpers that print properly formatted elements to the JSON output stream. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011605
Josh Habermane8ed0212015-06-08 17:56:03 -070011606/* Used for escaping control chars in strings. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011607static const char kControlCharLimit = 0x20;
11608
Josh Habermane8ed0212015-06-08 17:56:03 -070011609UPB_INLINE bool is_json_escaped(char c) {
11610 /* See RFC 4627. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011611 unsigned char uc = (unsigned char)c;
11612 return uc < kControlCharLimit || uc == '"' || uc == '\\';
11613}
11614
Josh Habermanf654d492016-02-18 11:07:51 -080011615UPB_INLINE const char* json_nice_escape(char c) {
Chris Fallin91473dc2014-12-12 15:58:26 -080011616 switch (c) {
11617 case '"': return "\\\"";
11618 case '\\': return "\\\\";
11619 case '\b': return "\\b";
11620 case '\f': return "\\f";
11621 case '\n': return "\\n";
11622 case '\r': return "\\r";
11623 case '\t': return "\\t";
11624 default: return NULL;
11625 }
11626}
11627
Josh Habermane8ed0212015-06-08 17:56:03 -070011628/* Write a properly escaped string chunk. The surrounding quotes are *not*
11629 * printed; this is so that the caller has the option of emitting the string
11630 * content in chunks. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011631static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
11632 const char* unescaped_run = NULL;
Josh Habermane8ed0212015-06-08 17:56:03 -070011633 unsigned int i;
11634 for (i = 0; i < len; i++) {
Chris Fallin91473dc2014-12-12 15:58:26 -080011635 char c = buf[i];
Josh Habermane8ed0212015-06-08 17:56:03 -070011636 /* Handle escaping. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011637 if (is_json_escaped(c)) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011638 /* Use a "nice" escape, like \n, if one exists for this character. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011639 const char* escape = json_nice_escape(c);
Josh Habermane8ed0212015-06-08 17:56:03 -070011640 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
11641 * escape. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011642 char escape_buf[8];
11643 if (!escape) {
11644 unsigned char byte = (unsigned char)c;
Josh Habermane8ed0212015-06-08 17:56:03 -070011645 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
Chris Fallin91473dc2014-12-12 15:58:26 -080011646 escape = escape_buf;
11647 }
11648
Josh Habermane8ed0212015-06-08 17:56:03 -070011649 /* N.B. that we assume that the input encoding is equal to the output
11650 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we
11651 * can simply pass the bytes through. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011652
Josh Habermane8ed0212015-06-08 17:56:03 -070011653 /* If there's a current run of unescaped chars, print that run first. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011654 if (unescaped_run) {
11655 print_data(p, unescaped_run, &buf[i] - unescaped_run);
11656 unescaped_run = NULL;
11657 }
Josh Habermane8ed0212015-06-08 17:56:03 -070011658 /* Then print the escape code. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011659 print_data(p, escape, strlen(escape));
11660 } else {
Josh Habermane8ed0212015-06-08 17:56:03 -070011661 /* Add to the current unescaped run of characters. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011662 if (unescaped_run == NULL) {
11663 unescaped_run = &buf[i];
11664 }
11665 }
11666 }
11667
Josh Habermane8ed0212015-06-08 17:56:03 -070011668 /* If the string ended in a run of unescaped characters, print that last run. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011669 if (unescaped_run) {
11670 print_data(p, unescaped_run, &buf[len] - unescaped_run);
11671 }
11672}
11673
11674#define CHKLENGTH(x) if (!(x)) return -1;
11675
Josh Habermane8ed0212015-06-08 17:56:03 -070011676/* Helpers that format floating point values according to our custom formats.
11677 * Right now we use %.8g and %.17g for float/double, respectively, to match
11678 * proto2::util::JsonFormat's defaults. May want to change this later. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011679
11680static size_t fmt_double(double val, char* buf, size_t length) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011681 size_t n = _upb_snprintf(buf, length, "%.17g", val);
Chris Fallin91473dc2014-12-12 15:58:26 -080011682 CHKLENGTH(n > 0 && n < length);
11683 return n;
11684}
11685
11686static size_t fmt_float(float val, char* buf, size_t length) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011687 size_t n = _upb_snprintf(buf, length, "%.8g", val);
Chris Fallin91473dc2014-12-12 15:58:26 -080011688 CHKLENGTH(n > 0 && n < length);
11689 return n;
11690}
11691
11692static size_t fmt_bool(bool val, char* buf, size_t length) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011693 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
Chris Fallin91473dc2014-12-12 15:58:26 -080011694 CHKLENGTH(n > 0 && n < length);
11695 return n;
11696}
11697
11698static size_t fmt_int64(long val, char* buf, size_t length) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011699 size_t n = _upb_snprintf(buf, length, "%ld", val);
Chris Fallin91473dc2014-12-12 15:58:26 -080011700 CHKLENGTH(n > 0 && n < length);
11701 return n;
11702}
11703
11704static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011705 size_t n = _upb_snprintf(buf, length, "%llu", val);
Chris Fallin91473dc2014-12-12 15:58:26 -080011706 CHKLENGTH(n > 0 && n < length);
11707 return n;
11708}
11709
Josh Habermane8ed0212015-06-08 17:56:03 -070011710/* Print a map key given a field name. Called by scalar field handlers and by
11711 * startseq for repeated fields. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011712static bool putkey(void *closure, const void *handler_data) {
11713 upb_json_printer *p = closure;
11714 const strpc *key = handler_data;
11715 print_comma(p);
11716 print_data(p, "\"", 1);
11717 putstring(p, key->ptr, key->len);
11718 print_data(p, "\":", 2);
11719 return true;
11720}
11721
Chris Fallind3262772015-05-14 18:24:26 -070011722#define CHKFMT(val) if ((val) == (size_t)-1) return false;
Chris Fallin91473dc2014-12-12 15:58:26 -080011723#define CHK(val) if (!(val)) return false;
11724
11725#define TYPE_HANDLERS(type, fmt_func) \
11726 static bool put##type(void *closure, const void *handler_data, type val) { \
11727 upb_json_printer *p = closure; \
Chris Fallin91473dc2014-12-12 15:58:26 -080011728 char data[64]; \
11729 size_t length = fmt_func(val, data, sizeof(data)); \
Josh Habermane8ed0212015-06-08 17:56:03 -070011730 UPB_UNUSED(handler_data); \
Chris Fallin91473dc2014-12-12 15:58:26 -080011731 CHKFMT(length); \
11732 print_data(p, data, length); \
11733 return true; \
11734 } \
11735 static bool scalar_##type(void *closure, const void *handler_data, \
11736 type val) { \
11737 CHK(putkey(closure, handler_data)); \
11738 CHK(put##type(closure, handler_data, val)); \
11739 return true; \
11740 } \
11741 static bool repeated_##type(void *closure, const void *handler_data, \
Chris Fallina5075922015-02-02 15:07:34 -080011742 type val) { \
Chris Fallin91473dc2014-12-12 15:58:26 -080011743 upb_json_printer *p = closure; \
11744 print_comma(p); \
11745 CHK(put##type(closure, handler_data, val)); \
11746 return true; \
11747 }
11748
Chris Fallina5075922015-02-02 15:07:34 -080011749#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \
11750 static bool putmapkey_##type(void *closure, const void *handler_data, \
11751 type val) { \
11752 upb_json_printer *p = closure; \
11753 print_data(p, "\"", 1); \
11754 CHK(put##type(closure, handler_data, val)); \
11755 print_data(p, "\":", 2); \
11756 return true; \
11757 }
11758
Josh Habermane8ed0212015-06-08 17:56:03 -070011759TYPE_HANDLERS(double, fmt_double)
11760TYPE_HANDLERS(float, fmt_float)
11761TYPE_HANDLERS(bool, fmt_bool)
11762TYPE_HANDLERS(int32_t, fmt_int64)
11763TYPE_HANDLERS(uint32_t, fmt_int64)
11764TYPE_HANDLERS(int64_t, fmt_int64)
11765TYPE_HANDLERS(uint64_t, fmt_uint64)
Chris Fallin91473dc2014-12-12 15:58:26 -080011766
Josh Habermane8ed0212015-06-08 17:56:03 -070011767/* double and float are not allowed to be map keys. */
11768TYPE_HANDLERS_MAPKEY(bool, fmt_bool)
11769TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64)
11770TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
11771TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64)
11772TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
Chris Fallina5075922015-02-02 15:07:34 -080011773
Chris Fallin91473dc2014-12-12 15:58:26 -080011774#undef TYPE_HANDLERS
Chris Fallina5075922015-02-02 15:07:34 -080011775#undef TYPE_HANDLERS_MAPKEY
Chris Fallin91473dc2014-12-12 15:58:26 -080011776
11777typedef struct {
11778 void *keyname;
11779 const upb_enumdef *enumdef;
11780} EnumHandlerData;
11781
11782static bool scalar_enum(void *closure, const void *handler_data,
11783 int32_t val) {
11784 const EnumHandlerData *hd = handler_data;
11785 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070011786 const char *symbolic_name;
11787
Chris Fallin91473dc2014-12-12 15:58:26 -080011788 CHK(putkey(closure, hd->keyname));
11789
Josh Habermane8ed0212015-06-08 17:56:03 -070011790 symbolic_name = upb_enumdef_iton(hd->enumdef, val);
Chris Fallin91473dc2014-12-12 15:58:26 -080011791 if (symbolic_name) {
11792 print_data(p, "\"", 1);
11793 putstring(p, symbolic_name, strlen(symbolic_name));
11794 print_data(p, "\"", 1);
11795 } else {
11796 putint32_t(closure, NULL, val);
11797 }
11798
11799 return true;
11800}
11801
Chris Fallina5075922015-02-02 15:07:34 -080011802static void print_enum_symbolic_name(upb_json_printer *p,
11803 const upb_enumdef *def,
11804 int32_t val) {
11805 const char *symbolic_name = upb_enumdef_iton(def, val);
11806 if (symbolic_name) {
11807 print_data(p, "\"", 1);
11808 putstring(p, symbolic_name, strlen(symbolic_name));
11809 print_data(p, "\"", 1);
11810 } else {
11811 putint32_t(p, NULL, val);
11812 }
11813}
11814
Chris Fallin91473dc2014-12-12 15:58:26 -080011815static bool repeated_enum(void *closure, const void *handler_data,
11816 int32_t val) {
11817 const EnumHandlerData *hd = handler_data;
11818 upb_json_printer *p = closure;
11819 print_comma(p);
11820
Chris Fallina5075922015-02-02 15:07:34 -080011821 print_enum_symbolic_name(p, hd->enumdef, val);
11822
11823 return true;
11824}
11825
11826static bool mapvalue_enum(void *closure, const void *handler_data,
11827 int32_t val) {
11828 const EnumHandlerData *hd = handler_data;
11829 upb_json_printer *p = closure;
11830
11831 print_enum_symbolic_name(p, hd->enumdef, val);
Chris Fallin91473dc2014-12-12 15:58:26 -080011832
11833 return true;
11834}
11835
11836static void *scalar_startsubmsg(void *closure, const void *handler_data) {
11837 return putkey(closure, handler_data) ? closure : UPB_BREAK;
11838}
11839
11840static void *repeated_startsubmsg(void *closure, const void *handler_data) {
Chris Fallin91473dc2014-12-12 15:58:26 -080011841 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070011842 UPB_UNUSED(handler_data);
Chris Fallin91473dc2014-12-12 15:58:26 -080011843 print_comma(p);
11844 return closure;
11845}
11846
Chris Fallina5075922015-02-02 15:07:34 -080011847static void start_frame(upb_json_printer *p) {
11848 p->depth_++;
Chris Fallin91473dc2014-12-12 15:58:26 -080011849 p->first_elem_[p->depth_] = true;
11850 print_data(p, "{", 1);
Chris Fallina5075922015-02-02 15:07:34 -080011851}
11852
11853static void end_frame(upb_json_printer *p) {
11854 print_data(p, "}", 1);
11855 p->depth_--;
11856}
11857
11858static bool printer_startmsg(void *closure, const void *handler_data) {
Chris Fallina5075922015-02-02 15:07:34 -080011859 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070011860 UPB_UNUSED(handler_data);
Chris Fallina5075922015-02-02 15:07:34 -080011861 if (p->depth_ == 0) {
11862 upb_bytessink_start(p->output_, 0, &p->subc_);
11863 }
11864 start_frame(p);
Chris Fallin91473dc2014-12-12 15:58:26 -080011865 return true;
11866}
11867
Chris Fallina5075922015-02-02 15:07:34 -080011868static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011869 upb_json_printer *p = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -080011870 UPB_UNUSED(handler_data);
11871 UPB_UNUSED(s);
Chris Fallina5075922015-02-02 15:07:34 -080011872 end_frame(p);
11873 if (p->depth_ == 0) {
Chris Fallin91473dc2014-12-12 15:58:26 -080011874 upb_bytessink_end(p->output_);
11875 }
Chris Fallin91473dc2014-12-12 15:58:26 -080011876 return true;
11877}
11878
11879static void *startseq(void *closure, const void *handler_data) {
11880 upb_json_printer *p = closure;
11881 CHK(putkey(closure, handler_data));
11882 p->depth_++;
11883 p->first_elem_[p->depth_] = true;
11884 print_data(p, "[", 1);
11885 return closure;
11886}
11887
11888static bool endseq(void *closure, const void *handler_data) {
Chris Fallin91473dc2014-12-12 15:58:26 -080011889 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070011890 UPB_UNUSED(handler_data);
Chris Fallin91473dc2014-12-12 15:58:26 -080011891 print_data(p, "]", 1);
11892 p->depth_--;
11893 return true;
11894}
11895
Chris Fallina5075922015-02-02 15:07:34 -080011896static void *startmap(void *closure, const void *handler_data) {
11897 upb_json_printer *p = closure;
11898 CHK(putkey(closure, handler_data));
11899 p->depth_++;
11900 p->first_elem_[p->depth_] = true;
11901 print_data(p, "{", 1);
11902 return closure;
11903}
11904
11905static bool endmap(void *closure, const void *handler_data) {
Chris Fallina5075922015-02-02 15:07:34 -080011906 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070011907 UPB_UNUSED(handler_data);
Chris Fallina5075922015-02-02 15:07:34 -080011908 print_data(p, "}", 1);
11909 p->depth_--;
11910 return true;
11911}
11912
Chris Fallin91473dc2014-12-12 15:58:26 -080011913static size_t putstr(void *closure, const void *handler_data, const char *str,
11914 size_t len, const upb_bufhandle *handle) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011915 upb_json_printer *p = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -080011916 UPB_UNUSED(handler_data);
11917 UPB_UNUSED(handle);
Chris Fallin91473dc2014-12-12 15:58:26 -080011918 putstring(p, str, len);
11919 return len;
11920}
11921
Josh Habermane8ed0212015-06-08 17:56:03 -070011922/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011923static size_t putbytes(void *closure, const void *handler_data, const char *str,
11924 size_t len, const upb_bufhandle *handle) {
Chris Fallin91473dc2014-12-12 15:58:26 -080011925 upb_json_printer *p = closure;
11926
Josh Habermane8ed0212015-06-08 17:56:03 -070011927 /* This is the regular base64, not the "web-safe" version. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011928 static const char base64[] =
11929 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
11930
Josh Habermane8ed0212015-06-08 17:56:03 -070011931 /* Base64-encode. */
Chris Fallin91473dc2014-12-12 15:58:26 -080011932 char data[16000];
11933 const char *limit = data + sizeof(data);
11934 const unsigned char *from = (const unsigned char*)str;
11935 char *to = data;
11936 size_t remaining = len;
Josh Habermane8ed0212015-06-08 17:56:03 -070011937 size_t bytes;
11938
11939 UPB_UNUSED(handler_data);
11940 UPB_UNUSED(handle);
11941
Chris Fallin91473dc2014-12-12 15:58:26 -080011942 while (remaining > 2) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011943 /* TODO(haberman): handle encoded lengths > sizeof(data) */
Chris Fallin91473dc2014-12-12 15:58:26 -080011944 UPB_ASSERT_VAR(limit, (limit - to) >= 4);
11945
11946 to[0] = base64[from[0] >> 2];
11947 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11948 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
11949 to[3] = base64[from[2] & 0x3f];
11950
11951 remaining -= 3;
11952 to += 4;
11953 from += 3;
11954 }
11955
11956 switch (remaining) {
11957 case 2:
11958 to[0] = base64[from[0] >> 2];
11959 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11960 to[2] = base64[(from[1] & 0xf) << 2];
11961 to[3] = '=';
11962 to += 4;
11963 from += 2;
11964 break;
11965 case 1:
11966 to[0] = base64[from[0] >> 2];
11967 to[1] = base64[((from[0] & 0x3) << 4)];
11968 to[2] = '=';
11969 to[3] = '=';
11970 to += 4;
11971 from += 1;
11972 break;
11973 }
11974
Josh Habermane8ed0212015-06-08 17:56:03 -070011975 bytes = to - data;
Chris Fallin91473dc2014-12-12 15:58:26 -080011976 print_data(p, "\"", 1);
11977 putstring(p, data, bytes);
11978 print_data(p, "\"", 1);
11979 return len;
11980}
11981
11982static void *scalar_startstr(void *closure, const void *handler_data,
11983 size_t size_hint) {
Josh Habermane8ed0212015-06-08 17:56:03 -070011984 upb_json_printer *p = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -080011985 UPB_UNUSED(handler_data);
11986 UPB_UNUSED(size_hint);
Chris Fallin91473dc2014-12-12 15:58:26 -080011987 CHK(putkey(closure, handler_data));
11988 print_data(p, "\"", 1);
11989 return p;
11990}
11991
11992static size_t scalar_str(void *closure, const void *handler_data,
11993 const char *str, size_t len,
11994 const upb_bufhandle *handle) {
11995 CHK(putstr(closure, handler_data, str, len, handle));
11996 return len;
11997}
11998
11999static bool scalar_endstr(void *closure, const void *handler_data) {
Chris Fallin91473dc2014-12-12 15:58:26 -080012000 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070012001 UPB_UNUSED(handler_data);
Chris Fallin91473dc2014-12-12 15:58:26 -080012002 print_data(p, "\"", 1);
12003 return true;
12004}
12005
12006static void *repeated_startstr(void *closure, const void *handler_data,
12007 size_t size_hint) {
Josh Habermane8ed0212015-06-08 17:56:03 -070012008 upb_json_printer *p = closure;
Chris Fallin91473dc2014-12-12 15:58:26 -080012009 UPB_UNUSED(handler_data);
12010 UPB_UNUSED(size_hint);
Chris Fallin91473dc2014-12-12 15:58:26 -080012011 print_comma(p);
12012 print_data(p, "\"", 1);
12013 return p;
12014}
12015
12016static size_t repeated_str(void *closure, const void *handler_data,
12017 const char *str, size_t len,
12018 const upb_bufhandle *handle) {
12019 CHK(putstr(closure, handler_data, str, len, handle));
12020 return len;
12021}
12022
12023static bool repeated_endstr(void *closure, const void *handler_data) {
Chris Fallin91473dc2014-12-12 15:58:26 -080012024 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070012025 UPB_UNUSED(handler_data);
Chris Fallin91473dc2014-12-12 15:58:26 -080012026 print_data(p, "\"", 1);
12027 return true;
12028}
12029
Chris Fallina5075922015-02-02 15:07:34 -080012030static void *mapkeyval_startstr(void *closure, const void *handler_data,
12031 size_t size_hint) {
Josh Habermane8ed0212015-06-08 17:56:03 -070012032 upb_json_printer *p = closure;
Chris Fallina5075922015-02-02 15:07:34 -080012033 UPB_UNUSED(handler_data);
12034 UPB_UNUSED(size_hint);
Chris Fallina5075922015-02-02 15:07:34 -080012035 print_data(p, "\"", 1);
12036 return p;
12037}
12038
12039static size_t mapkey_str(void *closure, const void *handler_data,
12040 const char *str, size_t len,
12041 const upb_bufhandle *handle) {
12042 CHK(putstr(closure, handler_data, str, len, handle));
12043 return len;
12044}
12045
12046static bool mapkey_endstr(void *closure, const void *handler_data) {
Chris Fallina5075922015-02-02 15:07:34 -080012047 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070012048 UPB_UNUSED(handler_data);
Chris Fallina5075922015-02-02 15:07:34 -080012049 print_data(p, "\":", 2);
12050 return true;
12051}
12052
12053static bool mapvalue_endstr(void *closure, const void *handler_data) {
Chris Fallina5075922015-02-02 15:07:34 -080012054 upb_json_printer *p = closure;
Josh Habermane8ed0212015-06-08 17:56:03 -070012055 UPB_UNUSED(handler_data);
Chris Fallina5075922015-02-02 15:07:34 -080012056 print_data(p, "\"", 1);
12057 return true;
12058}
12059
Chris Fallin91473dc2014-12-12 15:58:26 -080012060static size_t scalar_bytes(void *closure, const void *handler_data,
12061 const char *str, size_t len,
12062 const upb_bufhandle *handle) {
12063 CHK(putkey(closure, handler_data));
12064 CHK(putbytes(closure, handler_data, str, len, handle));
12065 return len;
12066}
12067
12068static size_t repeated_bytes(void *closure, const void *handler_data,
12069 const char *str, size_t len,
12070 const upb_bufhandle *handle) {
12071 upb_json_printer *p = closure;
12072 print_comma(p);
12073 CHK(putbytes(closure, handler_data, str, len, handle));
12074 return len;
12075}
12076
Chris Fallina5075922015-02-02 15:07:34 -080012077static size_t mapkey_bytes(void *closure, const void *handler_data,
12078 const char *str, size_t len,
12079 const upb_bufhandle *handle) {
12080 upb_json_printer *p = closure;
12081 CHK(putbytes(closure, handler_data, str, len, handle));
12082 print_data(p, ":", 1);
12083 return len;
12084}
12085
12086static void set_enum_hd(upb_handlers *h,
12087 const upb_fielddef *f,
12088 upb_handlerattr *attr) {
12089 EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
12090 hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
12091 hd->keyname = newstrpc(h, f);
12092 upb_handlers_addcleanup(h, hd, free);
12093 upb_handlerattr_sethandlerdata(attr, hd);
12094}
12095
Josh Habermane8ed0212015-06-08 17:56:03 -070012096/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
12097 * in a map).
12098 *
12099 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
12100 * key or value cases properly. The right way to do this is to allocate a
12101 * temporary structure at the start of a mapentry submessage, store key and
12102 * value data in it as key and value handlers are called, and then print the
12103 * key/value pair once at the end of the submessage. If we don't do this, we
12104 * should at least detect the case and throw an error. However, so far all of
12105 * our sources that emit mapentry messages do so canonically (with one key
12106 * field, and then one value field), so this is not a pressing concern at the
12107 * moment. */
Chris Fallina5075922015-02-02 15:07:34 -080012108void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
Chris Fallina5075922015-02-02 15:07:34 -080012109 const upb_msgdef *md = upb_handlers_msgdef(h);
12110
Josh Habermane8ed0212015-06-08 17:56:03 -070012111 /* A mapentry message is printed simply as '"key": value'. Rather than
12112 * special-case key and value for every type below, we just handle both
12113 * fields explicitly here. */
Chris Fallina5075922015-02-02 15:07:34 -080012114 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
12115 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
Chris Fallin91473dc2014-12-12 15:58:26 -080012116
12117 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
Chris Fallin91473dc2014-12-12 15:58:26 -080012118
Josh Habermane8ed0212015-06-08 17:56:03 -070012119 UPB_UNUSED(closure);
12120
Chris Fallina5075922015-02-02 15:07:34 -080012121 switch (upb_fielddef_type(key_field)) {
12122 case UPB_TYPE_INT32:
12123 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
12124 break;
12125 case UPB_TYPE_INT64:
12126 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
12127 break;
12128 case UPB_TYPE_UINT32:
12129 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
12130 break;
12131 case UPB_TYPE_UINT64:
12132 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
12133 break;
12134 case UPB_TYPE_BOOL:
12135 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
12136 break;
12137 case UPB_TYPE_STRING:
12138 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
12139 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
12140 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
12141 break;
12142 case UPB_TYPE_BYTES:
12143 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
12144 break;
12145 default:
12146 assert(false);
12147 break;
12148 }
12149
12150 switch (upb_fielddef_type(value_field)) {
12151 case UPB_TYPE_INT32:
12152 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
12153 break;
12154 case UPB_TYPE_INT64:
12155 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
12156 break;
12157 case UPB_TYPE_UINT32:
12158 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
12159 break;
12160 case UPB_TYPE_UINT64:
12161 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
12162 break;
12163 case UPB_TYPE_BOOL:
12164 upb_handlers_setbool(h, value_field, putbool, &empty_attr);
12165 break;
12166 case UPB_TYPE_FLOAT:
12167 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
12168 break;
12169 case UPB_TYPE_DOUBLE:
12170 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
12171 break;
12172 case UPB_TYPE_STRING:
12173 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
12174 upb_handlers_setstring(h, value_field, putstr, &empty_attr);
12175 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
12176 break;
12177 case UPB_TYPE_BYTES:
12178 upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
12179 break;
12180 case UPB_TYPE_ENUM: {
12181 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
12182 set_enum_hd(h, value_field, &enum_attr);
12183 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
12184 upb_handlerattr_uninit(&enum_attr);
12185 break;
12186 }
12187 case UPB_TYPE_MESSAGE:
Josh Habermane8ed0212015-06-08 17:56:03 -070012188 /* No handler necessary -- the submsg handlers will print the message
12189 * as appropriate. */
Chris Fallina5075922015-02-02 15:07:34 -080012190 break;
12191 }
12192
12193 upb_handlerattr_uninit(&empty_attr);
12194}
12195
12196void printer_sethandlers(const void *closure, upb_handlers *h) {
Chris Fallina5075922015-02-02 15:07:34 -080012197 const upb_msgdef *md = upb_handlers_msgdef(h);
12198 bool is_mapentry = upb_msgdef_mapentry(md);
12199 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
Josh Habermane8ed0212015-06-08 17:56:03 -070012200 upb_msg_field_iter i;
12201
12202 UPB_UNUSED(closure);
Chris Fallina5075922015-02-02 15:07:34 -080012203
12204 if (is_mapentry) {
Josh Habermane8ed0212015-06-08 17:56:03 -070012205 /* mapentry messages are sufficiently different that we handle them
12206 * separately. */
Chris Fallina5075922015-02-02 15:07:34 -080012207 printer_sethandlers_mapentry(closure, h);
12208 return;
12209 }
12210
12211 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
12212 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
12213
12214#define TYPE(type, name, ctype) \
12215 case type: \
12216 if (upb_fielddef_isseq(f)) { \
12217 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
12218 } else { \
12219 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \
12220 } \
Chris Fallin91473dc2014-12-12 15:58:26 -080012221 break;
12222
Chris Fallina5075922015-02-02 15:07:34 -080012223 upb_msg_field_begin(&i, md);
Chris Fallinfcd88892015-01-13 18:14:39 -080012224 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
Chris Fallin91473dc2014-12-12 15:58:26 -080012225 const upb_fielddef *f = upb_msg_iter_field(&i);
12226
12227 upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
12228 upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
12229
Chris Fallina5075922015-02-02 15:07:34 -080012230 if (upb_fielddef_ismap(f)) {
12231 upb_handlers_setstartseq(h, f, startmap, &name_attr);
12232 upb_handlers_setendseq(h, f, endmap, &name_attr);
12233 } else if (upb_fielddef_isseq(f)) {
Chris Fallin91473dc2014-12-12 15:58:26 -080012234 upb_handlers_setstartseq(h, f, startseq, &name_attr);
12235 upb_handlers_setendseq(h, f, endseq, &empty_attr);
12236 }
12237
12238 switch (upb_fielddef_type(f)) {
12239 TYPE(UPB_TYPE_FLOAT, float, float);
12240 TYPE(UPB_TYPE_DOUBLE, double, double);
12241 TYPE(UPB_TYPE_BOOL, bool, bool);
12242 TYPE(UPB_TYPE_INT32, int32, int32_t);
12243 TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
12244 TYPE(UPB_TYPE_INT64, int64, int64_t);
12245 TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
12246 case UPB_TYPE_ENUM: {
Josh Habermane8ed0212015-06-08 17:56:03 -070012247 /* For now, we always emit symbolic names for enums. We may want an
12248 * option later to control this behavior, but we will wait for a real
12249 * need first. */
Chris Fallin91473dc2014-12-12 15:58:26 -080012250 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
Chris Fallina5075922015-02-02 15:07:34 -080012251 set_enum_hd(h, f, &enum_attr);
Chris Fallin91473dc2014-12-12 15:58:26 -080012252
12253 if (upb_fielddef_isseq(f)) {
12254 upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
12255 } else {
12256 upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
12257 }
12258
12259 upb_handlerattr_uninit(&enum_attr);
12260 break;
12261 }
12262 case UPB_TYPE_STRING:
12263 if (upb_fielddef_isseq(f)) {
12264 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
12265 upb_handlers_setstring(h, f, repeated_str, &empty_attr);
12266 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
12267 } else {
12268 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
12269 upb_handlers_setstring(h, f, scalar_str, &empty_attr);
12270 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
12271 }
12272 break;
12273 case UPB_TYPE_BYTES:
Josh Habermane8ed0212015-06-08 17:56:03 -070012274 /* XXX: this doesn't support strings that span buffers yet. The base64
12275 * encoder will need to be made resumable for this to work properly. */
Chris Fallin91473dc2014-12-12 15:58:26 -080012276 if (upb_fielddef_isseq(f)) {
12277 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
12278 } else {
12279 upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
12280 }
12281 break;
12282 case UPB_TYPE_MESSAGE:
12283 if (upb_fielddef_isseq(f)) {
12284 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
12285 } else {
12286 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
12287 }
12288 break;
12289 }
12290
12291 upb_handlerattr_uninit(&name_attr);
12292 }
12293
12294 upb_handlerattr_uninit(&empty_attr);
12295#undef TYPE
12296}
12297
Chris Fallind3262772015-05-14 18:24:26 -070012298static void json_printer_reset(upb_json_printer *p) {
12299 p->depth_ = 0;
12300}
12301
12302
Chris Fallin91473dc2014-12-12 15:58:26 -080012303/* Public API *****************************************************************/
12304
Chris Fallind3262772015-05-14 18:24:26 -070012305upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
12306 upb_bytessink *output) {
12307#ifndef NDEBUG
12308 size_t size_before = upb_env_bytesallocated(e);
12309#endif
Chris Fallin91473dc2014-12-12 15:58:26 -080012310
Chris Fallind3262772015-05-14 18:24:26 -070012311 upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
12312 if (!p) return NULL;
Chris Fallin91473dc2014-12-12 15:58:26 -080012313
Chris Fallin91473dc2014-12-12 15:58:26 -080012314 p->output_ = output;
Chris Fallind3262772015-05-14 18:24:26 -070012315 json_printer_reset(p);
12316 upb_sink_reset(&p->input_, h, p);
12317
Josh Habermane8ed0212015-06-08 17:56:03 -070012318 /* If this fails, increase the value in printer.h. */
Chris Fallind3262772015-05-14 18:24:26 -070012319 assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
12320 return p;
Chris Fallin91473dc2014-12-12 15:58:26 -080012321}
12322
12323upb_sink *upb_json_printer_input(upb_json_printer *p) {
12324 return &p->input_;
12325}
12326
12327const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
12328 const void *owner) {
12329 return upb_handlers_newfrozen(md, owner, printer_sethandlers, NULL);
12330}