Merge pull request #155 from cfallin/ruby-maps Support for maps in the MRI C Ruby extension.

commit: 5446deaea7ffc29f6e09368cb6238da083969123 [log] [tgz]
author: Joshua Haberman <jhaberman@gmail.com> Tue Jan 13 13:50:11 2015 -0800
committer: Joshua Haberman <jhaberman@gmail.com> Tue Jan 13 13:50:11 2015 -0800
tree: 994ba61d6441558edbff629b3da99d31552600bb
parent: 052e0205a76717f39fc65e303fd2b92ab1df3028 [diff]
parent: ace4212e60bf1abd46181c9ddb9fe31b6d9fac45 [diff]
diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c
index bb6f10e..a18aaac 100644
--- a/ruby/ext/google/protobuf_c/defs.c
+++ b/ruby/ext/google/protobuf_c/defs.c

@@ -226,6 +226,7 @@
 void Descriptor_mark(void* _self) {
   Descriptor* self = _self;
   rb_gc_mark(self->klass);
+  rb_gc_mark(self->typeclass_references);
 }
 
 void Descriptor_free(void* _self) {
@@ -270,6 +271,7 @@
   self->fill_method = NULL;
   self->pb_serialize_handlers = NULL;
   self->json_serialize_handlers = NULL;
+  self->typeclass_references = rb_ary_new();
   return ret;
 }
 
@@ -923,6 +925,7 @@
 void MessageBuilderContext_mark(void* _self) {
   MessageBuilderContext* self = _self;
   rb_gc_mark(self->descriptor);
+  rb_gc_mark(self->builder);
 }
 
 void MessageBuilderContext_free(void* _self) {
@@ -935,6 +938,7 @@
   VALUE ret = TypedData_Wrap_Struct(
       klass, &_MessageBuilderContext_type, self);
   self->descriptor = Qnil;
+  self->builder = Qnil;
   return ret;
 }
 
@@ -943,24 +947,29 @@
       module, "MessageBuilderContext", rb_cObject);
   rb_define_alloc_func(klass, MessageBuilderContext_alloc);
   rb_define_method(klass, "initialize",
-                   MessageBuilderContext_initialize, 1);
+                   MessageBuilderContext_initialize, 2);
   rb_define_method(klass, "optional", MessageBuilderContext_optional, -1);
   rb_define_method(klass, "required", MessageBuilderContext_required, -1);
   rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1);
+  rb_define_method(klass, "map", MessageBuilderContext_map, -1);
   cMessageBuilderContext = klass;
   rb_gc_register_address(&cMessageBuilderContext);
 }
 
 /*
  * call-seq:
- *     MessageBuilderContext.new(desc) => context
+ *     MessageBuilderContext.new(desc, builder) => context
  *
- * Create a new builder context around the given message descriptor. This class
- * is intended to serve as a DSL context to be used with #instance_eval.
+ * Create a new message builder context around the given message descriptor and
+ * builder context. This class is intended to serve as a DSL context to be used
+ * with #instance_eval.
  */
-VALUE MessageBuilderContext_initialize(VALUE _self, VALUE msgdef) {
+VALUE MessageBuilderContext_initialize(VALUE _self,
+                                       VALUE msgdef,
+                                       VALUE builder) {
   DEFINE_SELF(MessageBuilderContext, self, _self);
   self->descriptor = msgdef;
+  self->builder = builder;
   return Qnil;
 }
 
@@ -1065,6 +1074,97 @@
                           name, type, number, type_class);
 }
 
+/*
+ * call-seq:
+ *     MessageBuilderContext.map(name, key_type, value_type, number,
+ *                               value_type_class = nil)
+ *
+ * Defines a new map field on this message type with the given key and value
+ * types, tag number, and type class (for message and enum value types). The key
+ * type must be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type
+ * type must be a Ruby symbol (as accepted by FieldDescriptor#type=) and the
+ * type_class must be a string, if present (as accepted by
+ * FieldDescriptor#submsg_name=).
+ */
+VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) {
+  DEFINE_SELF(MessageBuilderContext, self, _self);
+
+  if (argc < 4) {
+    rb_raise(rb_eArgError, "Expected at least 4 arguments.");
+  }
+  VALUE name = argv[0];
+  VALUE key_type = argv[1];
+  VALUE value_type = argv[2];
+  VALUE number = argv[3];
+  VALUE type_class = (argc > 4) ? argv[4] : Qnil;
+
+  // Validate the key type. We can't accept enums, messages, or floats/doubles
+  // as map keys. (We exclude these explicitly, and the field-descriptor setter
+  // below then ensures that the type is one of the remaining valid options.)
+  if (SYM2ID(key_type) == rb_intern("float") ||
+      SYM2ID(key_type) == rb_intern("double") ||
+      SYM2ID(key_type) == rb_intern("enum") ||
+      SYM2ID(key_type) == rb_intern("message")) {
+    rb_raise(rb_eArgError,
+             "Cannot add a map field with a float, double, enum, or message "
+             "type.");
+  }
+
+  // Create a new message descriptor for the map entry message, and create a
+  // repeated submessage field here with that type.
+  VALUE mapentry_desc = rb_class_new_instance(0, NULL, cDescriptor);
+  VALUE mapentry_desc_name = rb_funcall(self->descriptor, rb_intern("name"), 0);
+  mapentry_desc_name = rb_str_cat2(mapentry_desc_name, "_MapEntry_");
+  mapentry_desc_name = rb_str_cat2(mapentry_desc_name,
+                                   rb_id2name(SYM2ID(name)));
+  Descriptor_name_set(mapentry_desc, mapentry_desc_name);
+
+  // The 'mapentry' attribute has no Ruby setter because we do not want the user
+  // attempting to DIY the setup below; we want to ensure that the fields are
+  // correct. So we reach into the msgdef here to set the bit manually.
+  Descriptor* mapentry_desc_self = ruby_to_Descriptor(mapentry_desc);
+  upb_msgdef_setmapentry((upb_msgdef*)mapentry_desc_self->msgdef, true);
+
+  // optional <type> key = 1;
+  VALUE key_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
+  FieldDescriptor_name_set(key_field, rb_str_new2("key"));
+  FieldDescriptor_label_set(key_field, ID2SYM(rb_intern("optional")));
+  FieldDescriptor_number_set(key_field, INT2NUM(1));
+  FieldDescriptor_type_set(key_field, key_type);
+  Descriptor_add_field(mapentry_desc, key_field);
+
+  // optional <type> value = 2;
+  VALUE value_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
+  FieldDescriptor_name_set(value_field, rb_str_new2("value"));
+  FieldDescriptor_label_set(value_field, ID2SYM(rb_intern("optional")));
+  FieldDescriptor_number_set(value_field, INT2NUM(2));
+  FieldDescriptor_type_set(value_field, value_type);
+  if (type_class != Qnil) {
+    VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute.
+    submsg_name = rb_str_append(submsg_name, type_class);
+    FieldDescriptor_submsg_name_set(value_field, submsg_name);
+  }
+  Descriptor_add_field(mapentry_desc, value_field);
+
+  // Add the map-entry message type to the current builder, and use the type to
+  // create the map field itself.
+  Builder* builder_self = ruby_to_Builder(self->builder);
+  rb_ary_push(builder_self->pending_list, mapentry_desc);
+
+  VALUE map_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
+  VALUE name_str = rb_str_new2(rb_id2name(SYM2ID(name)));
+  FieldDescriptor_name_set(map_field, name_str);
+  FieldDescriptor_number_set(map_field, number);
+  FieldDescriptor_label_set(map_field, ID2SYM(rb_intern("repeated")));
+  FieldDescriptor_type_set(map_field, ID2SYM(rb_intern("message")));
+  VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute.
+  submsg_name = rb_str_append(submsg_name, mapentry_desc_name);
+  FieldDescriptor_submsg_name_set(map_field, submsg_name);
+  Descriptor_add_field(self->descriptor, map_field);
+
+  return Qnil;
+}
+
 // -----------------------------------------------------------------------------
 // EnumBuilderContext.
 // -----------------------------------------------------------------------------
@@ -1190,7 +1290,8 @@
 VALUE Builder_add_message(VALUE _self, VALUE name) {
   DEFINE_SELF(Builder, self, _self);
   VALUE msgdef = rb_class_new_instance(0, NULL, cDescriptor);
-  VALUE ctx = rb_class_new_instance(1, &msgdef, cMessageBuilderContext);
+  VALUE args[2] = { msgdef, _self };
+  VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext);
   VALUE block = rb_block_proc();
   rb_funcall(msgdef, rb_intern("name="), 1, name);
   rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block);

diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c
index 8aba3c9..e5e1514 100644
--- a/ruby/ext/google/protobuf_c/encode_decode.c
+++ b/ruby/ext/google/protobuf_c/encode_decode.c

@@ -64,7 +64,7 @@
 static void *startseq_handler(void* closure, const void* hd) {
   MessageHeader* msg = closure;
   const size_t *ofs = hd;
-  return (void*)DEREF(Message_data(msg), *ofs, VALUE);
+  return (void*)DEREF(msg, *ofs, VALUE);
 }
 
 // Handlers that append primitive values to a repeated field (a regular Ruby
@@ -115,7 +115,7 @@
   const size_t *ofs = hd;
   VALUE str = rb_str_new2("");
   rb_enc_associate(str, kRubyStringUtf8Encoding);
-  DEREF(Message_data(msg), *ofs, VALUE) = str;
+  DEREF(msg, *ofs, VALUE) = str;
   return (void*)str;
 }
 
@@ -127,7 +127,7 @@
   const size_t *ofs = hd;
   VALUE str = rb_str_new2("");
   rb_enc_associate(str, kRubyString8bitEncoding);
-  DEREF(Message_data(msg), *ofs, VALUE) = str;
+  DEREF(msg, *ofs, VALUE) = str;
   return (void*)str;
 }
 
@@ -163,20 +163,237 @@
       get_def_obj((void*)submsgdata->md);
   VALUE subklass = Descriptor_msgclass(subdesc);
 
-  if (DEREF(Message_data(msg), submsgdata->ofs, VALUE) == Qnil) {
-    DEREF(Message_data(msg), submsgdata->ofs, VALUE) =
+  if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
+    DEREF(msg, submsgdata->ofs, VALUE) =
         rb_class_new_instance(0, NULL, subklass);
   }
 
-  VALUE submsg_rb = DEREF(Message_data(msg), submsgdata->ofs, VALUE);
+  VALUE submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
   MessageHeader* submsg;
   TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
   return submsg;
 }
 
+// Handler data for startmap/endmap handlers.
+typedef struct {
+  size_t ofs;
+  upb_fieldtype_t key_field_type;
+  upb_fieldtype_t value_field_type;
+  VALUE value_field_typeclass;
+} map_handlerdata_t;
+
+// Temporary frame for map parsing: at the beginning of a map entry message, a
+// submsg handler allocates a frame to hold (i) a reference to the Map object
+// into which this message will be inserted and (ii) storage slots to
+// temporarily hold the key and value for this map entry until the end of the
+// submessage. When the submessage ends, another handler is called to insert the
+// value into the map.
+typedef struct {
+  VALUE map;
+  char key_storage[NATIVE_SLOT_MAX_SIZE];
+  char value_storage[NATIVE_SLOT_MAX_SIZE];
+} map_parse_frame_t;
+
+// Handler to begin a map entry: allocates a temporary frame. This is the
+// 'startsubmsg' handler on the msgdef that contains the map field.
+static void *startmapentry_handler(void *closure, const void *hd) {
+  MessageHeader* msg = closure;
+  const map_handlerdata_t* mapdata = hd;
+  VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE);
+
+  map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
+  frame->map = map_rb;
+
+  native_slot_init(mapdata->key_field_type, &frame->key_storage);
+  native_slot_init(mapdata->value_field_type, &frame->value_storage);
+
+  return frame;
+}
+
+// Handler to end a map entry: inserts the value defined during the message into
+// the map. This is the 'endmsg' handler on the map entry msgdef.
+static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
+  map_parse_frame_t* frame = closure;
+  const map_handlerdata_t* mapdata = hd;
+
+  VALUE key = native_slot_get(
+      mapdata->key_field_type, Qnil,
+      &frame->key_storage);
+  VALUE value = native_slot_get(
+      mapdata->value_field_type, mapdata->value_field_typeclass,
+      &frame->value_storage);
+
+  Map_index_set(frame->map, key, value);
+  free(frame);
+
+  return true;
+}
+
+// Allocates a new map_handlerdata_t given the map entry message definition. If
+// the offset of the field within the parent message is also given, that is
+// added to the handler data as well. Note that this is called *twice* per map
+// field: once in the parent message handler setup when setting the startsubmsg
+// handler and once in the map entry message handler setup when setting the
+// key/value and endmsg handlers. The reason is that there is no easy way to
+// pass the handlerdata down to the sub-message handler setup.
+static map_handlerdata_t* new_map_handlerdata(
+    size_t ofs,
+    const upb_msgdef* mapentry_def,
+    Descriptor* desc) {
+
+  map_handlerdata_t* hd = ALLOC(map_handlerdata_t);
+  hd->ofs = ofs;
+  const upb_fielddef* key_field = upb_msgdef_itof(mapentry_def,
+                                                  MAP_KEY_FIELD);
+  assert(key_field != NULL);
+  hd->key_field_type = upb_fielddef_type(key_field);
+  const upb_fielddef* value_field = upb_msgdef_itof(mapentry_def,
+                                                    MAP_VALUE_FIELD);
+  assert(value_field != NULL);
+  hd->value_field_type = upb_fielddef_type(value_field);
+  hd->value_field_typeclass = field_type_class(value_field);
+
+  // Ensure that value_field_typeclass is properly GC-rooted. We must do this
+  // because we hold a reference to the Ruby class in the handlerdata, which is
+  // owned by the handlers. The handlers are owned by *this* message's Ruby
+  // object, but each Ruby object is rooted independently at the def -> Ruby
+  // object map. So we have to ensure that the Ruby objects we depend on will
+  // stick around as long as we're around.
+  if (hd->value_field_typeclass != Qnil) {
+    rb_ary_push(desc->typeclass_references, hd->value_field_typeclass);
+  }
+
+  return hd;
+}
+
+// Set up handlers for a repeated field.
+static void add_handlers_for_repeated_field(upb_handlers *h,
+                                            const upb_fielddef *f,
+                                            size_t offset) {
+  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+  upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
+  upb_handlers_setstartseq(h, f, startseq_handler, &attr);
+  upb_handlerattr_uninit(&attr);
+
+  switch (upb_fielddef_type(f)) {
+
+#define SET_HANDLER(utype, ltype)                                 \
+  case utype:                                                     \
+    upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
+    break;
+
+    SET_HANDLER(UPB_TYPE_BOOL,   bool);
+    SET_HANDLER(UPB_TYPE_INT32,  int32);
+    SET_HANDLER(UPB_TYPE_UINT32, uint32);
+    SET_HANDLER(UPB_TYPE_ENUM,   int32);
+    SET_HANDLER(UPB_TYPE_FLOAT,  float);
+    SET_HANDLER(UPB_TYPE_INT64,  int64);
+    SET_HANDLER(UPB_TYPE_UINT64, uint64);
+    SET_HANDLER(UPB_TYPE_DOUBLE, double);
+
+#undef SET_HANDLER
+
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES: {
+      bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
+      upb_handlers_setstartstr(h, f, is_bytes ?
+                               appendbytes_handler : appendstr_handler,
+                               NULL);
+      upb_handlers_setstring(h, f, stringdata_handler, NULL);
+      break;
+    }
+    case UPB_TYPE_MESSAGE: {
+      upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+      upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
+      upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
+      upb_handlerattr_uninit(&attr);
+      break;
+    }
+  }
+}
+
+// Set up handlers for a singular field.
+static void add_handlers_for_singular_field(upb_handlers *h,
+                                            const upb_fielddef *f,
+                                            size_t offset) {
+  switch (upb_fielddef_type(f)) {
+    case UPB_TYPE_BOOL:
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_FLOAT:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT64:
+    case UPB_TYPE_DOUBLE:
+      upb_shim_set(h, f, offset, -1);
+      break;
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES: {
+      bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
+      upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+      upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
+      upb_handlers_setstartstr(h, f,
+                               is_bytes ? bytes_handler : str_handler,
+                               &attr);
+      upb_handlers_setstring(h, f, stringdata_handler, &attr);
+      upb_handlerattr_uninit(&attr);
+      break;
+    }
+    case UPB_TYPE_MESSAGE: {
+      upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+      upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
+      upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
+      upb_handlerattr_uninit(&attr);
+      break;
+    }
+  }
+}
+
+// Adds handlers to a map field.
+static void add_handlers_for_mapfield(upb_handlers* h,
+                                      const upb_fielddef* fielddef,
+                                      size_t offset,
+                                      Descriptor* desc) {
+  const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
+  map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc);
+  upb_handlers_addcleanup(h, hd, free);
+  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+  upb_handlerattr_sethandlerdata(&attr, hd);
+  upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr);
+  upb_handlerattr_uninit(&attr);
+}
+
+// Adds handlers to a map-entry msgdef.
+static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
+                                      upb_handlers* h,
+                                      Descriptor* desc) {
+  const upb_fielddef* key_field = map_entry_key(msgdef);
+  const upb_fielddef* value_field = map_entry_value(msgdef);
+  map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc);
+  upb_handlers_addcleanup(h, hd, free);
+  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+  upb_handlerattr_sethandlerdata(&attr, hd);
+  upb_handlers_setendmsg(h, endmap_handler, &attr);
+
+  add_handlers_for_singular_field(
+      h, key_field,
+      offsetof(map_parse_frame_t, key_storage));
+  add_handlers_for_singular_field(
+      h, value_field,
+      offsetof(map_parse_frame_t, value_storage));
+}
+
 static void add_handlers_for_message(const void *closure, upb_handlers *h) {
-  Descriptor* desc = ruby_to_Descriptor(
-      get_def_obj((void*)upb_handlers_msgdef(h)));
+  const upb_msgdef* msgdef = upb_handlers_msgdef(h);
+  Descriptor* desc = ruby_to_Descriptor(get_def_obj((void*)msgdef));
+
+  // If this is a mapentry message type, set up a special set of handlers and
+  // bail out of the normal (user-defined) message type handling.
+  if (upb_msgdef_mapentry(msgdef)) {
+    add_handlers_for_mapentry(msgdef, h, desc);
+    return;
+  }
+
   // Ensure layout exists. We may be invoked to create handlers for a given
   // message if we are included as a submsg of another message type before our
   // class is actually built, so to work around this, we just create the layout
@@ -191,82 +408,15 @@
        !upb_msg_done(&i);
        upb_msg_next(&i)) {
     const upb_fielddef *f = upb_msg_iter_field(&i);
-    size_t offset = desc->layout->offsets[upb_fielddef_index(f)];
+    size_t offset = desc->layout->offsets[upb_fielddef_index(f)] +
+        sizeof(MessageHeader);
 
-    if (upb_fielddef_isseq(f)) {
-      upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-      upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
-      upb_handlers_setstartseq(h, f, startseq_handler, &attr);
-      upb_handlerattr_uninit(&attr);
-
-      switch (upb_fielddef_type(f)) {
-
-#define SET_HANDLER(utype, ltype)                                 \
-  case utype:                                                     \
-    upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
-    break;
-
-        SET_HANDLER(UPB_TYPE_BOOL,   bool);
-        SET_HANDLER(UPB_TYPE_INT32,  int32);
-        SET_HANDLER(UPB_TYPE_UINT32, uint32);
-        SET_HANDLER(UPB_TYPE_ENUM,   int32);
-        SET_HANDLER(UPB_TYPE_FLOAT,  float);
-        SET_HANDLER(UPB_TYPE_INT64,  int64);
-        SET_HANDLER(UPB_TYPE_UINT64, uint64);
-        SET_HANDLER(UPB_TYPE_DOUBLE, double);
-
-#undef SET_HANDLER
-
-        case UPB_TYPE_STRING:
-        case UPB_TYPE_BYTES: {
-          bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
-          upb_handlers_setstartstr(h, f, is_bytes ?
-                                   appendbytes_handler : appendstr_handler,
-                                   NULL);
-          upb_handlers_setstring(h, f, stringdata_handler, NULL);
-        }
-        case UPB_TYPE_MESSAGE: {
-          upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-          upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
-          upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
-          upb_handlerattr_uninit(&attr);
-          break;
-        }
-      }
-    }
-
-    switch (upb_fielddef_type(f)) {
-      case UPB_TYPE_BOOL:
-      case UPB_TYPE_INT32:
-      case UPB_TYPE_UINT32:
-      case UPB_TYPE_ENUM:
-      case UPB_TYPE_FLOAT:
-      case UPB_TYPE_INT64:
-      case UPB_TYPE_UINT64:
-      case UPB_TYPE_DOUBLE:
-        // The shim writes directly at the given offset (instead of using
-        // DEREF()) so we need to add the msg overhead.
-        upb_shim_set(h, f, offset + sizeof(MessageHeader), -1);
-        break;
-      case UPB_TYPE_STRING:
-      case UPB_TYPE_BYTES: {
-        bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
-        upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-        upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
-        upb_handlers_setstartstr(h, f,
-                                 is_bytes ? bytes_handler : str_handler,
-                                 &attr);
-        upb_handlers_setstring(h, f, stringdata_handler, &attr);
-        upb_handlerattr_uninit(&attr);
-        break;
-      }
-      case UPB_TYPE_MESSAGE: {
-        upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-        upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
-        upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
-        upb_handlerattr_uninit(&attr);
-        break;
-      }
+    if (is_map_field(f)) {
+      add_handlers_for_mapfield(h, f, offset, desc);
+    } else if (upb_fielddef_isseq(f)) {
+      add_handlers_for_repeated_field(h, f, offset);
+    } else {
+      add_handlers_for_singular_field(h, f, offset);
     }
   }
 }
@@ -558,6 +708,88 @@
   upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
 }
 
+static void put_ruby_value(VALUE value,
+                           const upb_fielddef *f,
+                           VALUE type_class,
+                           int depth,
+                           upb_sink *sink) {
+  upb_selector_t sel = 0;
+  if (upb_fielddef_isprimitive(f)) {
+    sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
+  }
+
+  switch (upb_fielddef_type(f)) {
+    case UPB_TYPE_INT32:
+      upb_sink_putint32(sink, sel, NUM2INT(value));
+      break;
+    case UPB_TYPE_INT64:
+      upb_sink_putint64(sink, sel, NUM2LL(value));
+      break;
+    case UPB_TYPE_UINT32:
+      upb_sink_putuint32(sink, sel, NUM2UINT(value));
+      break;
+    case UPB_TYPE_UINT64:
+      upb_sink_putuint64(sink, sel, NUM2ULL(value));
+      break;
+    case UPB_TYPE_FLOAT:
+      upb_sink_putfloat(sink, sel, NUM2DBL(value));
+      break;
+    case UPB_TYPE_DOUBLE:
+      upb_sink_putdouble(sink, sel, NUM2DBL(value));
+      break;
+    case UPB_TYPE_ENUM: {
+      if (TYPE(value) == T_SYMBOL) {
+        value = rb_funcall(type_class, rb_intern("resolve"), 1, value);
+      }
+      upb_sink_putint32(sink, sel, NUM2INT(value));
+      break;
+    }
+    case UPB_TYPE_BOOL:
+      upb_sink_putbool(sink, sel, value == Qtrue);
+      break;
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES:
+      putstr(value, f, sink);
+      break;
+    case UPB_TYPE_MESSAGE:
+      putsubmsg(value, f, sink, depth);
+  }
+}
+
+static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
+                   int depth) {
+  if (map == Qnil) return;
+  Map* self = ruby_to_Map(map);
+
+  upb_sink subsink;
+
+  upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
+
+  assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
+  const upb_fielddef* key_field = map_field_key(f);
+  const upb_fielddef* value_field = map_field_value(f);
+
+  Map_iter it;
+  for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) {
+    VALUE key = Map_iter_key(&it);
+    VALUE value = Map_iter_value(&it);
+
+    upb_sink entry_sink;
+    upb_sink_startsubmsg(&subsink, getsel(f, UPB_HANDLER_STARTSUBMSG), &entry_sink);
+    upb_sink_startmsg(&entry_sink);
+
+    put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink);
+    put_ruby_value(value, value_field, self->value_type_class, depth + 1,
+                   &entry_sink);
+
+    upb_status status;
+    upb_sink_endmsg(&entry_sink, &status);
+    upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
+  }
+
+  upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
+}
+
 static void putmsg(VALUE msg_rb, const Descriptor* desc,
                    upb_sink *sink, int depth) {
   upb_sink_startmsg(sink);
@@ -571,33 +803,38 @@
 
   MessageHeader* msg;
   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
-  void* msg_data = Message_data(msg);
 
   upb_msg_iter i;
   for (upb_msg_begin(&i, desc->msgdef);
        !upb_msg_done(&i);
        upb_msg_next(&i)) {
     upb_fielddef *f = upb_msg_iter_field(&i);
-    uint32_t offset = desc->layout->offsets[upb_fielddef_index(f)];
+    uint32_t offset =
+        desc->layout->offsets[upb_fielddef_index(f)] + sizeof(MessageHeader);
 
-    if (upb_fielddef_isseq(f)) {
-      VALUE ary = DEREF(msg_data, offset, VALUE);
+    if (is_map_field(f)) {
+      VALUE map = DEREF(msg, offset, VALUE);
+      if (map != Qnil) {
+        putmap(map, f, sink, depth);
+      }
+    } else if (upb_fielddef_isseq(f)) {
+      VALUE ary = DEREF(msg, offset, VALUE);
       if (ary != Qnil) {
         putary(ary, f, sink, depth);
       }
     } else if (upb_fielddef_isstring(f)) {
-      VALUE str = DEREF(msg_data, offset, VALUE);
+      VALUE str = DEREF(msg, offset, VALUE);
       if (RSTRING_LEN(str) > 0) {
         putstr(str, f, sink);
       }
     } else if (upb_fielddef_issubmsg(f)) {
-      putsubmsg(DEREF(msg_data, offset, VALUE), f, sink, depth);
+      putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth);
     } else {
       upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
 
 #define T(upbtypeconst, upbtype, ctype, default_value)                \
   case upbtypeconst: {                                                \
-      ctype value = DEREF(msg_data, offset, ctype);                   \
+      ctype value = DEREF(msg, offset, ctype);                        \
       if (value != default_value) {                                   \
         upb_sink_put##upbtype(sink, sel, value);                      \
       }                                                               \

diff --git a/ruby/ext/google/protobuf_c/extconf.rb b/ruby/ext/google/protobuf_c/extconf.rb
index 7cf7bf6..8d60392 100644
--- a/ruby/ext/google/protobuf_c/extconf.rb
+++ b/ruby/ext/google/protobuf_c/extconf.rb

@@ -5,6 +5,6 @@
 $CFLAGS += " -O3 -std=c99 -Wno-unused-function -DNDEBUG "
 
 $objs = ["protobuf.o", "defs.o", "storage.o", "message.o",
-         "repeated_field.o", "encode_decode.o", "upb.o"]
+         "repeated_field.o", "map.o", "encode_decode.o", "upb.o"]
 
 create_makefile("google/protobuf_c")

diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c
new file mode 100644
index 0000000..4ee71d1
--- /dev/null
+++ b/ruby/ext/google/protobuf_c/map.c

@@ -0,0 +1,805 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2014 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protobuf.h"
+
+// -----------------------------------------------------------------------------
+// Basic map operations on top of upb's strtable.
+//
+// Note that we roll our own `Map` container here because, as for
+// `RepeatedField`, we want a strongly-typed container. This is so that any user
+// errors due to incorrect map key or value types are raised as close as
+// possible to the error site, rather than at some deferred point (e.g.,
+// serialization).
+//
+// We build our `Map` on top of upb_strtable so that we're able to take
+// advantage of the native_slot storage abstraction, as RepeatedField does.
+// (This is not quite a perfect mapping -- see the key conversions below -- but
+// gives us full support and error-checking for all value types for free.)
+// -----------------------------------------------------------------------------
+
+// Map values are stored using the native_slot abstraction (as with repeated
+// field values), but keys are a bit special. Since we use a strtable, we need
+// to store keys as sequences of bytes such that equality of those bytes maps
+// one-to-one to equality of keys. We store strings directly (i.e., they map to
+// their own bytes) and integers as native integers (using the native_slot
+// abstraction).
+
+// Note that there is another tradeoff here in keeping string keys as native
+// strings rather than Ruby strings: traversing the Map requires conversion to
+// Ruby string values on every traversal, potentially creating more garbage. We
+// should consider ways to cache a Ruby version of the key if this becomes an
+// issue later.
+
+// Forms a key to use with the underlying strtable from a Ruby key value. |buf|
+// must point to TABLE_KEY_BUF_LENGTH bytes of temporary space, used to
+// construct a key byte sequence if needed. |out_key| and |out_length| provide
+// the resulting key data/length.
+#define TABLE_KEY_BUF_LENGTH 8  // sizeof(uint64_t)
+static void table_key(Map* self, VALUE key,
+                      char* buf,
+                      const char** out_key,
+                      size_t* out_length) {
+  switch (self->key_type) {
+    case UPB_TYPE_BYTES:
+    case UPB_TYPE_STRING:
+      // Strings: use string content directly.
+      Check_Type(key, T_STRING);
+      native_slot_validate_string_encoding(self->key_type, key);
+      *out_key = RSTRING_PTR(key);
+      *out_length = RSTRING_LEN(key);
+      break;
+
+    case UPB_TYPE_BOOL:
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_UINT64:
+      native_slot_set(self->key_type, Qnil, buf, key);
+      *out_key = buf;
+      *out_length = native_slot_size(self->key_type);
+      break;
+
+    default:
+      // Map constructor should not allow a Map with another key type to be
+      // constructed.
+      assert(false);
+      break;
+  }
+}
+
+static VALUE table_key_to_ruby(Map* self, const char* buf, size_t length) {
+  switch (self->key_type) {
+    case UPB_TYPE_BYTES:
+    case UPB_TYPE_STRING: {
+      VALUE ret = rb_str_new(buf, length);
+      rb_enc_associate(ret,
+                       (self->key_type == UPB_TYPE_BYTES) ?
+                       kRubyString8bitEncoding : kRubyStringUtf8Encoding);
+      return ret;
+    }
+
+    case UPB_TYPE_BOOL:
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_UINT64:
+      return native_slot_get(self->key_type, Qnil, buf);
+
+    default:
+      assert(false);
+      return Qnil;
+  }
+}
+
+static void* value_memory(upb_value* v) {
+  return (void*)(&v->val.uint64);
+}
+
+// -----------------------------------------------------------------------------
+// Map container type.
+// -----------------------------------------------------------------------------
+
+const rb_data_type_t Map_type = {
+  "Google::Protobuf::Map",
+  { Map_mark, Map_free, NULL },
+};
+
+VALUE cMap;
+
+Map* ruby_to_Map(VALUE _self) {
+  Map* self;
+  TypedData_Get_Struct(_self, Map, &Map_type, self);
+  return self;
+}
+
+void Map_mark(void* _self) {
+  Map* self = _self;
+
+  rb_gc_mark(self->value_type_class);
+
+  if (self->value_type == UPB_TYPE_STRING ||
+      self->value_type == UPB_TYPE_BYTES ||
+      self->value_type == UPB_TYPE_MESSAGE) {
+    upb_strtable_iter it;
+    for (upb_strtable_begin(&it, &self->table);
+         !upb_strtable_done(&it);
+         upb_strtable_next(&it)) {
+      upb_value v = upb_strtable_iter_value(&it);
+      void* mem = value_memory(&v);
+      native_slot_mark(self->value_type, mem);
+    }
+  }
+}
+
+void Map_free(void* _self) {
+  Map* self = _self;
+  upb_strtable_uninit(&self->table);
+  xfree(self);
+}
+
+VALUE Map_alloc(VALUE klass) {
+  Map* self = ALLOC(Map);
+  memset(self, 0, sizeof(Map));
+  self->value_type_class = Qnil;
+  VALUE ret = TypedData_Wrap_Struct(klass, &Map_type, self);
+  return ret;
+}
+
+static bool needs_typeclass(upb_fieldtype_t type) {
+  switch (type) {
+    case UPB_TYPE_MESSAGE:
+    case UPB_TYPE_ENUM:
+      return true;
+    default:
+      return false;
+  }
+}
+
+/*
+ * call-seq:
+ *     Map.new(key_type, value_type, value_typeclass = nil, init_hashmap = {})
+ *     => new map
+ *
+ * Allocates a new Map container. This constructor may be called with 2, 3, or 4
+ * arguments. The first two arguments are always present and are symbols (taking
+ * on the same values as field-type symbols in message descriptors) that
+ * indicate the type of the map key and value fields.
+ *
+ * The supported key types are: :int32, :int64, :uint32, :uint64, :bool,
+ * :string, :bytes.
+ *
+ * The supported value types are: :int32, :int64, :uint32, :uint64, :bool,
+ * :string, :bytes, :enum, :message.
+ *
+ * The third argument, value_typeclass, must be present if value_type is :enum
+ * or :message. As in RepeatedField#new, this argument must be a message class
+ * (for :message) or enum module (for :enum).
+ *
+ * The last argument, if present, provides initial content for map. Note that
+ * this may be an ordinary Ruby hashmap or another Map instance with identical
+ * key and value types. Also note that this argument may be present whether or
+ * not value_typeclass is present (and it is unambiguously separate from
+ * value_typeclass because value_typeclass's presence is strictly determined by
+ * value_type). The contents of this initial hashmap or Map instance are
+ * shallow-copied into the new Map: the original map is unmodified, but
+ * references to underlying objects will be shared if the value type is a
+ * message type.
+ */
+VALUE Map_init(int argc, VALUE* argv, VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+
+  // We take either two args (:key_type, :value_type), three args (:key_type,
+  // :value_type, "ValueMessageType"), or four args (the above plus an initial
+  // hashmap).
+  if (argc < 2 || argc > 4) {
+    rb_raise(rb_eArgError, "Map constructor expects 2, 3 or 4 arguments.");
+  }
+
+  self->key_type = ruby_to_fieldtype(argv[0]);
+  self->value_type = ruby_to_fieldtype(argv[1]);
+
+  // Check that the key type is an allowed type.
+  switch (self->key_type) {
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_UINT64:
+    case UPB_TYPE_BOOL:
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES:
+      // These are OK.
+      break;
+    default:
+      rb_raise(rb_eArgError, "Invalid key type for map.");
+  }
+
+  int init_value_arg = 2;
+  if (needs_typeclass(self->value_type) && argc > 2) {
+    self->value_type_class = argv[2];
+    validate_type_class(self->value_type, self->value_type_class);
+    init_value_arg = 3;
+  }
+
+  // Table value type is always UINT64: this ensures enough space to store the
+  // native_slot value.
+  if (!upb_strtable_init(&self->table, UPB_CTYPE_UINT64)) {
+    rb_raise(rb_eRuntimeError, "Could not allocate table.");
+  }
+
+  if (argc > init_value_arg) {
+    Map_merge_into_self(_self, argv[init_value_arg]);
+  }
+
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *     Map.each(&block)
+ *
+ * Invokes &block on each |key, value| pair in the map, in unspecified order.
+ * Note that Map also includes Enumerable; map thus acts like a normal Ruby
+ * sequence.
+ */
+VALUE Map_each(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+
+  upb_strtable_iter it;
+  for (upb_strtable_begin(&it, &self->table);
+       !upb_strtable_done(&it);
+       upb_strtable_next(&it)) {
+
+    VALUE key = table_key_to_ruby(
+        self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
+
+    upb_value v = upb_strtable_iter_value(&it);
+    void* mem = value_memory(&v);
+    VALUE value = native_slot_get(self->value_type,
+                                  self->value_type_class,
+                                  mem);
+
+    rb_yield_values(2, key, value);
+  }
+
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *     Map.keys => [list_of_keys]
+ *
+ * Returns the list of keys contained in the map, in unspecified order.
+ */
+VALUE Map_keys(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+
+  VALUE ret = rb_ary_new();
+  upb_strtable_iter it;
+  for (upb_strtable_begin(&it, &self->table);
+       !upb_strtable_done(&it);
+       upb_strtable_next(&it)) {
+
+    VALUE key = table_key_to_ruby(
+        self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
+
+    rb_ary_push(ret, key);
+  }
+
+  return ret;
+}
+
+/*
+ * call-seq:
+ *     Map.values => [list_of_values]
+ *
+ * Returns the list of values contained in the map, in unspecified order.
+ */
+VALUE Map_values(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+
+  VALUE ret = rb_ary_new();
+  upb_strtable_iter it;
+  for (upb_strtable_begin(&it, &self->table);
+       !upb_strtable_done(&it);
+       upb_strtable_next(&it)) {
+
+    upb_value v = upb_strtable_iter_value(&it);
+    void* mem = value_memory(&v);
+    VALUE value = native_slot_get(self->value_type,
+                                  self->value_type_class,
+                                  mem);
+
+    rb_ary_push(ret, value);
+  }
+
+  return ret;
+}
+
+/*
+ * call-seq:
+ *     Map.[](key) => value
+ *
+ * Accesses the element at the given key. Throws an exception if the key type is
+ * incorrect. Returns nil when the key is not present in the map.
+ */
+VALUE Map_index(VALUE _self, VALUE key) {
+  Map* self = ruby_to_Map(_self);
+
+  char keybuf[TABLE_KEY_BUF_LENGTH];
+  const char* keyval = NULL;
+  size_t length = 0;
+  table_key(self, key, keybuf, &keyval, &length);
+
+  upb_value v;
+  if (upb_strtable_lookup2(&self->table, keyval, length, &v)) {
+    void* mem = value_memory(&v);
+    return native_slot_get(self->value_type, self->value_type_class, mem);
+  } else {
+    return Qnil;
+  }
+}
+
+/*
+ * call-seq:
+ *     Map.[]=(key, value) => value
+ *
+ * Inserts or overwrites the value at the given key with the given new value.
+ * Throws an exception if the key type is incorrect. Returns the new value that
+ * was just inserted.
+ */
+VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) {
+  Map* self = ruby_to_Map(_self);
+
+  char keybuf[TABLE_KEY_BUF_LENGTH];
+  const char* keyval = NULL;
+  size_t length = 0;
+  table_key(self, key, keybuf, &keyval, &length);
+
+  upb_value v;
+  void* mem = value_memory(&v);
+  native_slot_set(self->value_type, self->value_type_class, mem, value);
+
+  // Replace any existing value by issuing a 'remove' operation first.
+  upb_strtable_remove2(&self->table, keyval, length, NULL);
+  if (!upb_strtable_insert2(&self->table, keyval, length, v)) {
+    rb_raise(rb_eRuntimeError, "Could not insert into table");
+  }
+
+  // Ruby hashmap's :[]= method also returns the inserted value.
+  return value;
+}
+
+/*
+ * call-seq:
+ *     Map.has_key?(key) => bool
+ *
+ * Returns true if the given key is present in the map. Throws an exception if
+ * the key has the wrong type.
+ */
+VALUE Map_has_key(VALUE _self, VALUE key) {
+  Map* self = ruby_to_Map(_self);
+
+  char keybuf[TABLE_KEY_BUF_LENGTH];
+  const char* keyval = NULL;
+  size_t length = 0;
+  table_key(self, key, keybuf, &keyval, &length);
+
+  if (upb_strtable_lookup2(&self->table, keyval, length, NULL)) {
+    return Qtrue;
+  } else {
+    return Qfalse;
+  }
+}
+
+/*
+ * call-seq:
+ *     Map.delete(key) => old_value
+ *
+ * Deletes the value at the given key, if any, returning either the old value or
+ * nil if none was present. Throws an exception if the key is of the wrong type.
+ */
+VALUE Map_delete(VALUE _self, VALUE key) {
+  Map* self = ruby_to_Map(_self);
+
+  char keybuf[TABLE_KEY_BUF_LENGTH];
+  const char* keyval = NULL;
+  size_t length = 0;
+  table_key(self, key, keybuf, &keyval, &length);
+
+  upb_value v;
+  if (upb_strtable_remove2(&self->table, keyval, length, &v)) {
+    void* mem = value_memory(&v);
+    return native_slot_get(self->value_type, self->value_type_class, mem);
+  } else {
+    return Qnil;
+  }
+}
+
+/*
+ * call-seq:
+ *     Map.clear
+ *
+ * Removes all entries from the map.
+ */
+VALUE Map_clear(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+
+  // Uninit and reinit the table -- this is faster than iterating and doing a
+  // delete-lookup on each key.
+  upb_strtable_uninit(&self->table);
+  if (!upb_strtable_init(&self->table, UPB_CTYPE_INT64)) {
+    rb_raise(rb_eRuntimeError, "Unable to re-initialize table");
+  }
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *     Map.length
+ *
+ * Returns the number of entries (key-value pairs) in the map.
+ */
+VALUE Map_length(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+  return ULL2NUM(upb_strtable_count(&self->table));
+}
+
+static VALUE Map_new_this_type(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+  VALUE new_map = Qnil;
+  VALUE key_type = fieldtype_to_ruby(self->key_type);
+  VALUE value_type = fieldtype_to_ruby(self->value_type);
+  if (self->value_type_class != Qnil) {
+    new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 3,
+                         key_type, value_type, self->value_type_class);
+  } else {
+    new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 2,
+                         key_type, value_type);
+  }
+  return new_map;
+}
+
+/*
+ * call-seq:
+ *     Map.dup => new_map
+ *
+ * Duplicates this map with a shallow copy. References to all non-primitive
+ * element objects (e.g., submessages) are shared.
+ */
+VALUE Map_dup(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+  VALUE new_map = Map_new_this_type(_self);
+  Map* new_self = ruby_to_Map(new_map);
+
+  upb_strtable_iter it;
+  for (upb_strtable_begin(&it, &self->table);
+       !upb_strtable_done(&it);
+       upb_strtable_next(&it)) {
+
+    upb_value v = upb_strtable_iter_value(&it);
+    void* mem = value_memory(&v);
+    upb_value dup;
+    void* dup_mem = value_memory(&dup);
+    native_slot_dup(self->value_type, dup_mem, mem);
+
+    if (!upb_strtable_insert2(&new_self->table,
+                              upb_strtable_iter_key(&it),
+                              upb_strtable_iter_keylength(&it),
+                              dup)) {
+      rb_raise(rb_eRuntimeError, "Error inserting value into new table");
+    }
+  }
+
+  return new_map;
+}
+
+// Used by Google::Protobuf.deep_copy but not exposed directly.
+VALUE Map_deep_copy(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+  VALUE new_map = Map_new_this_type(_self);
+  Map* new_self = ruby_to_Map(new_map);
+
+  upb_strtable_iter it;
+  for (upb_strtable_begin(&it, &self->table);
+       !upb_strtable_done(&it);
+       upb_strtable_next(&it)) {
+
+    upb_value v = upb_strtable_iter_value(&it);
+    void* mem = value_memory(&v);
+    upb_value dup;
+    void* dup_mem = value_memory(&dup);
+    native_slot_deep_copy(self->value_type, dup_mem, mem);
+
+    if (!upb_strtable_insert2(&new_self->table,
+                              upb_strtable_iter_key(&it),
+                              upb_strtable_iter_keylength(&it),
+                              dup)) {
+      rb_raise(rb_eRuntimeError, "Error inserting value into new table");
+    }
+  }
+
+  return new_map;
+}
+
+/*
+ * call-seq:
+ *     Map.==(other) => boolean
+ *
+ * Compares this map to another. Maps are equal if they have identical key sets,
+ * and for each key, the values in both maps compare equal. Elements are
+ * compared as per normal Ruby semantics, by calling their :== methods (or
+ * performing a more efficient comparison for primitive types).
+ *
+ * Maps with dissimilar key types or value types/typeclasses are never equal,
+ * even if value comparison (for example, between integers and floats) would
+ * have otherwise indicated that every element has equal value.
+ */
+VALUE Map_eq(VALUE _self, VALUE _other) {
+  Map* self = ruby_to_Map(_self);
+
+  // Allow comparisons to Ruby hashmaps by converting to a temporary Map
+  // instance. Slow, but workable.
+  if (TYPE(_other) == T_HASH) {
+    VALUE other_map = Map_new_this_type(_self);
+    Map_merge_into_self(other_map, _other);
+    _other = other_map;
+  }
+
+  Map* other = ruby_to_Map(_other);
+
+  if (self == other) {
+    return Qtrue;
+  }
+  if (self->key_type != other->key_type ||
+      self->value_type != other->value_type ||
+      self->value_type_class != other->value_type_class) {
+    return Qfalse;
+  }
+  if (upb_strtable_count(&self->table) != upb_strtable_count(&other->table)) {
+    return Qfalse;
+  }
+
+  // For each member of self, check that an equal member exists at the same key
+  // in other.
+  upb_strtable_iter it;
+  for (upb_strtable_begin(&it, &self->table);
+       !upb_strtable_done(&it);
+       upb_strtable_next(&it)) {
+
+    upb_value v = upb_strtable_iter_value(&it);
+    void* mem = value_memory(&v);
+    upb_value other_v;
+    void* other_mem = value_memory(&other_v);
+
+    if (!upb_strtable_lookup2(&other->table,
+                              upb_strtable_iter_key(&it),
+                              upb_strtable_iter_keylength(&it),
+                              &other_v)) {
+      // Not present in other map.
+      return Qfalse;
+    }
+
+    if (!native_slot_eq(self->value_type, mem, other_mem)) {
+      // Present, but value not equal.
+      return Qfalse;
+    }
+  }
+
+  return Qtrue;
+}
+
+/*
+ * call-seq:
+ *     Map.hash => hash_value
+ *
+ * Returns a hash value based on this map's contents.
+ */
+VALUE Map_hash(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+
+  st_index_t h = rb_hash_start(0);
+  VALUE hash_sym = rb_intern("hash");
+
+  upb_strtable_iter it;
+  for (upb_strtable_begin(&it, &self->table);
+       !upb_strtable_done(&it);
+       upb_strtable_next(&it)) {
+    VALUE key = table_key_to_ruby(
+        self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
+
+    upb_value v = upb_strtable_iter_value(&it);
+    void* mem = value_memory(&v);
+    VALUE value = native_slot_get(self->value_type,
+                                  self->value_type_class,
+                                  mem);
+
+    h = rb_hash_uint(h, NUM2LONG(rb_funcall(key, hash_sym, 0)));
+    h = rb_hash_uint(h, NUM2LONG(rb_funcall(value, hash_sym, 0)));
+  }
+
+  return INT2FIX(h);
+}
+
+/*
+ * call-seq:
+ *     Map.inspect => string
+ *
+ * Returns a string representing this map's elements. It will be formatted as
+ * "{key => value, key => value, ...}", with each key and value string
+ * representation computed by its own #inspect method.
+ */
+VALUE Map_inspect(VALUE _self) {
+  Map* self = ruby_to_Map(_self);
+
+  VALUE str = rb_str_new2("{");
+
+  bool first = true;
+  VALUE inspect_sym = rb_intern("inspect");
+
+  upb_strtable_iter it;
+  for (upb_strtable_begin(&it, &self->table);
+       !upb_strtable_done(&it);
+       upb_strtable_next(&it)) {
+    VALUE key = table_key_to_ruby(
+        self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
+
+    upb_value v = upb_strtable_iter_value(&it);
+    void* mem = value_memory(&v);
+    VALUE value = native_slot_get(self->value_type,
+                                  self->value_type_class,
+                                  mem);
+
+    if (!first) {
+      str = rb_str_cat2(str, ", ");
+    } else {
+      first = false;
+    }
+    str = rb_str_append(str, rb_funcall(key, inspect_sym, 0));
+    str = rb_str_cat2(str, " => ");
+    str = rb_str_append(str, rb_funcall(value, inspect_sym, 0));
+  }
+
+  str = rb_str_cat2(str, "}");
+  return str;
+}
+
+/*
+ * call-seq:
+ *     Map.merge(other_map) => map
+ *
+ * Copies key/value pairs from other_map into a copy of this map. If a key is
+ * set in other_map and this map, the value from other_map overwrites the value
+ * in the new copy of this map. Returns the new copy of this map with merged
+ * contents.
+ */
+VALUE Map_merge(VALUE _self, VALUE hashmap) {
+  VALUE dupped = Map_dup(_self);
+  return Map_merge_into_self(dupped, hashmap);
+}
+
+static int merge_into_self_callback(VALUE key, VALUE value, VALUE self) {
+  Map_index_set(self, key, value);
+  return ST_CONTINUE;
+}
+
+// Used only internally -- shared by #merge and #initialize.
+VALUE Map_merge_into_self(VALUE _self, VALUE hashmap) {
+  if (TYPE(hashmap) == T_HASH) {
+    rb_hash_foreach(hashmap, merge_into_self_callback, _self);
+  } else if (RB_TYPE_P(hashmap, T_DATA) && RTYPEDDATA_P(hashmap) &&
+             RTYPEDDATA_TYPE(hashmap) == &Map_type) {
+
+    Map* self = ruby_to_Map(_self);
+    Map* other = ruby_to_Map(hashmap);
+
+    if (self->key_type != other->key_type ||
+        self->value_type != other->value_type ||
+        self->value_type_class != other->value_type_class) {
+      rb_raise(rb_eArgError, "Attempt to merge Map with mismatching types");
+    }
+
+    upb_strtable_iter it;
+    for (upb_strtable_begin(&it, &other->table);
+         !upb_strtable_done(&it);
+         upb_strtable_next(&it)) {
+
+      // Replace any existing value by issuing a 'remove' operation first.
+      upb_value oldv;
+      upb_strtable_remove2(&self->table,
+                           upb_strtable_iter_key(&it),
+                           upb_strtable_iter_keylength(&it),
+                           &oldv);
+
+      upb_value v = upb_strtable_iter_value(&it);
+      upb_strtable_insert2(&self->table,
+                           upb_strtable_iter_key(&it),
+                           upb_strtable_iter_keylength(&it),
+                           v);
+    }
+  } else {
+    rb_raise(rb_eArgError, "Unknown type merging into Map");
+  }
+  return _self;
+}
+
+// Internal method: map iterator initialization (used for serialization).
+void Map_begin(VALUE _self, Map_iter* iter) {
+  Map* self = ruby_to_Map(_self);
+  iter->self = self;
+  upb_strtable_begin(&iter->it, &self->table);
+}
+
+void Map_next(Map_iter* iter) {
+  upb_strtable_next(&iter->it);
+}
+
+bool Map_done(Map_iter* iter) {
+  return upb_strtable_done(&iter->it);
+}
+
+VALUE Map_iter_key(Map_iter* iter) {
+  return table_key_to_ruby(
+      iter->self,
+      upb_strtable_iter_key(&iter->it),
+      upb_strtable_iter_keylength(&iter->it));
+}
+
+VALUE Map_iter_value(Map_iter* iter) {
+  upb_value v = upb_strtable_iter_value(&iter->it);
+  void* mem = value_memory(&v);
+  return native_slot_get(iter->self->value_type,
+                         iter->self->value_type_class,
+                         mem);
+}
+
+void Map_register(VALUE module) {
+  VALUE klass = rb_define_class_under(module, "Map", rb_cObject);
+  rb_define_alloc_func(klass, Map_alloc);
+  cMap = klass;
+  rb_gc_register_address(&cMap);
+
+  rb_define_method(klass, "initialize", Map_init, -1);
+  rb_define_method(klass, "each", Map_each, 0);
+  rb_define_method(klass, "keys", Map_keys, 0);
+  rb_define_method(klass, "values", Map_values, 0);
+  rb_define_method(klass, "[]", Map_index, 1);
+  rb_define_method(klass, "[]=", Map_index_set, 2);
+  rb_define_method(klass, "has_key?", Map_has_key, 1);
+  rb_define_method(klass, "delete", Map_delete, 1);
+  rb_define_method(klass, "clear", Map_clear, 0);
+  rb_define_method(klass, "length", Map_length, 0);
+  rb_define_method(klass, "dup", Map_dup, 0);
+  rb_define_method(klass, "==", Map_eq, 1);
+  rb_define_method(klass, "hash", Map_hash, 0);
+  rb_define_method(klass, "inspect", Map_inspect, 0);
+  rb_define_method(klass, "merge", Map_merge, 1);
+  rb_include_module(klass, rb_mEnumerable);
+}

diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c
index 105b780..ee8881d 100644
--- a/ruby/ext/google/protobuf_c/message.c
+++ b/ruby/ext/google/protobuf_c/message.c

@@ -139,7 +139,14 @@
              "Unknown field name in initialization map entry.");
   }
 
-  if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
+  if (is_map_field(f)) {
+    if (TYPE(val) != T_HASH) {
+      rb_raise(rb_eArgError,
+               "Expected Hash object as initializer value for map field.");
+    }
+    VALUE map = layout_get(self->descriptor->layout, Message_data(self), f);
+    Map_merge_into_self(map, val);
+  } else if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
     if (TYPE(val) != T_ARRAY) {
       rb_raise(rb_eArgError,
                "Expected array as initializer value for repeated field.");
@@ -450,13 +457,15 @@
  * call-seq:
  *     Google::Protobuf.deep_copy(obj) => copy_of_obj
  *
- * Performs a deep copy of either a RepeatedField instance or a message object,
- * recursively copying its members.
+ * Performs a deep copy of a RepeatedField instance, a Map instance, or a
+ * message object, recursively copying its members.
  */
 VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) {
   VALUE klass = CLASS_OF(obj);
   if (klass == cRepeatedField) {
     return RepeatedField_deep_copy(obj);
+  } else if (klass == cMap) {
+    return Map_deep_copy(obj);
   } else {
     return Message_deep_copy(obj);
   }

diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c
index d586228..3055270 100644
--- a/ruby/ext/google/protobuf_c/protobuf.c
+++ b/ruby/ext/google/protobuf_c/protobuf.c

@@ -82,6 +82,7 @@
   EnumBuilderContext_register(internal);
   Builder_register(internal);
   RepeatedField_register(protobuf);
+  Map_register(protobuf);
 
   rb_define_singleton_method(protobuf, "encode", Google_Protobuf_encode, 1);
   rb_define_singleton_method(protobuf, "decode", Google_Protobuf_decode, 2);

diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h
index c3a5d65..88ae62e 100644
--- a/ruby/ext/google/protobuf_c/protobuf.h
+++ b/ruby/ext/google/protobuf_c/protobuf.h

@@ -110,6 +110,10 @@
   const upb_pbdecodermethod* fill_method;
   const upb_handlers* pb_serialize_handlers;
   const upb_handlers* json_serialize_handlers;
+  // Handlers hold type class references for sub-message fields directly in some
+  // cases. We need to keep these rooted because they might otherwise be
+  // collected.
+  VALUE typeclass_references;
 };
 
 struct FieldDescriptor {
@@ -123,6 +127,7 @@
 
 struct MessageBuilderContext {
   VALUE descriptor;
+  VALUE builder;
 };
 
 struct EnumBuilderContext {
@@ -213,10 +218,13 @@
 VALUE MessageBuilderContext_alloc(VALUE klass);
 void MessageBuilderContext_register(VALUE module);
 MessageBuilderContext* ruby_to_MessageBuilderContext(VALUE value);
-VALUE MessageBuilderContext_initialize(VALUE _self, VALUE descriptor);
+VALUE MessageBuilderContext_initialize(VALUE _self,
+                                       VALUE descriptor,
+                                       VALUE builder);
 VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self);
 VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self);
 VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self);
+VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self);
 
 void EnumBuilderContext_mark(void* _self);
 void EnumBuilderContext_free(void* _self);
@@ -239,6 +247,8 @@
 // Native slot storage abstraction.
 // -----------------------------------------------------------------------------
 
+#define NATIVE_SLOT_MAX_SIZE sizeof(void*)
+
 size_t native_slot_size(upb_fieldtype_t type);
 void native_slot_set(upb_fieldtype_t type,
                      VALUE type_class,
@@ -246,7 +256,7 @@
                      VALUE value);
 VALUE native_slot_get(upb_fieldtype_t type,
                       VALUE type_class,
-                      void* memory);
+                      const void* memory);
 void native_slot_init(upb_fieldtype_t type, void* memory);
 void native_slot_mark(upb_fieldtype_t type, void* memory);
 void native_slot_dup(upb_fieldtype_t type, void* to, void* from);
@@ -254,11 +264,27 @@
 bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2);
 
 void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value);
+void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE value);
 
 extern rb_encoding* kRubyStringUtf8Encoding;
 extern rb_encoding* kRubyStringASCIIEncoding;
 extern rb_encoding* kRubyString8bitEncoding;
 
+VALUE field_type_class(const upb_fielddef* field);
+
+#define MAP_KEY_FIELD 1
+#define MAP_VALUE_FIELD 2
+
+// These operate on a map field (i.e., a repeated field of submessages whose
+// submessage type is a map-entry msgdef).
+bool is_map_field(const upb_fielddef* field);
+const upb_fielddef* map_field_key(const upb_fielddef* field);
+const upb_fielddef* map_field_value(const upb_fielddef* field);
+
+// These operate on a map-entry msgdef.
+const upb_fielddef* map_entry_key(const upb_msgdef* msgdef);
+const upb_fielddef* map_entry_value(const upb_msgdef* msgdef);
+
 // -----------------------------------------------------------------------------
 // Repeated field container type.
 // -----------------------------------------------------------------------------
@@ -282,7 +308,6 @@
 
 RepeatedField* ruby_to_RepeatedField(VALUE value);
 
-void RepeatedField_register(VALUE module);
 VALUE RepeatedField_each(VALUE _self);
 VALUE RepeatedField_index(VALUE _self, VALUE _index);
 void* RepeatedField_index_native(VALUE _self, int index);
@@ -302,6 +327,59 @@
 VALUE RepeatedField_inspect(VALUE _self);
 VALUE RepeatedField_plus(VALUE _self, VALUE list);
 
+// Defined in repeated_field.c; also used by Map.
+void validate_type_class(upb_fieldtype_t type, VALUE klass);
+
+// -----------------------------------------------------------------------------
+// Map container type.
+// -----------------------------------------------------------------------------
+
+typedef struct {
+  upb_fieldtype_t key_type;
+  upb_fieldtype_t value_type;
+  VALUE value_type_class;
+  upb_strtable table;
+} Map;
+
+void Map_mark(void* self);
+void Map_free(void* self);
+VALUE Map_alloc(VALUE klass);
+VALUE Map_init(int argc, VALUE* argv, VALUE self);
+void Map_register(VALUE module);
+
+extern const rb_data_type_t Map_type;
+extern VALUE cMap;
+
+Map* ruby_to_Map(VALUE value);
+
+VALUE Map_each(VALUE _self);
+VALUE Map_keys(VALUE _self);
+VALUE Map_values(VALUE _self);
+VALUE Map_index(VALUE _self, VALUE key);
+VALUE Map_index_set(VALUE _self, VALUE key, VALUE value);
+VALUE Map_has_key(VALUE _self, VALUE key);
+VALUE Map_delete(VALUE _self, VALUE key);
+VALUE Map_clear(VALUE _self);
+VALUE Map_length(VALUE _self);
+VALUE Map_dup(VALUE _self);
+VALUE Map_deep_copy(VALUE _self);
+VALUE Map_eq(VALUE _self, VALUE _other);
+VALUE Map_hash(VALUE _self);
+VALUE Map_inspect(VALUE _self);
+VALUE Map_merge(VALUE _self, VALUE hashmap);
+VALUE Map_merge_into_self(VALUE _self, VALUE hashmap);
+
+typedef struct {
+  Map* self;
+  upb_strtable_iter it;
+} Map_iter;
+
+void Map_begin(VALUE _self, Map_iter* iter);
+void Map_next(Map_iter* iter);
+bool Map_done(Map_iter* iter);
+VALUE Map_iter_key(Map_iter* iter);
+VALUE Map_iter_value(Map_iter* iter);
+
 // -----------------------------------------------------------------------------
 // Message layout / storage.
 // -----------------------------------------------------------------------------
@@ -315,7 +393,7 @@
 MessageLayout* create_layout(const upb_msgdef* msgdef);
 void free_layout(MessageLayout* layout);
 VALUE layout_get(MessageLayout* layout,
-                 void* storage,
+                 const void* storage,
                  const upb_fielddef* field);
 void layout_set(MessageLayout* layout,
                 void* storage,

diff --git a/ruby/ext/google/protobuf_c/repeated_field.c b/ruby/ext/google/protobuf_c/repeated_field.c
index 6bd13b0..6e3f0bc 100644
--- a/ruby/ext/google/protobuf_c/repeated_field.c
+++ b/ruby/ext/google/protobuf_c/repeated_field.c

@@ -324,6 +324,10 @@
  * element types are equal, their lengths are equal, and each element is equal.
  * Elements are compared as per normal Ruby semantics, by calling their :==
  * methods (or performing a more efficient comparison for primitive types).
+ *
+ * Repeated fields with dissimilar element types are never equal, even if value
+ * comparison (for example, between integers and floats) would have otherwise
+ * indicated that every element has equal value.
  */
 VALUE RepeatedField_eq(VALUE _self, VALUE _other) {
   if (_self == _other) {
@@ -458,7 +462,7 @@
   return dupped;
 }
 
-static void validate_type_class(upb_fieldtype_t type, VALUE klass) {
+void validate_type_class(upb_fieldtype_t type, VALUE klass) {
   if (rb_iv_get(klass, kDescriptorInstanceVar) == Qnil) {
     rb_raise(rb_eArgError,
              "Type class has no descriptor. Please pass a "

diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c
index c4d801a..14f49d4 100644
--- a/ruby/ext/google/protobuf_c/storage.c
+++ b/ruby/ext/google/protobuf_c/storage.c

@@ -57,7 +57,17 @@
   }
 }
 
-static void check_int_range_precision(upb_fieldtype_t type, VALUE val) {
+static bool is_ruby_num(VALUE value) {
+  return (TYPE(value) == T_FLOAT ||
+          TYPE(value) == T_FIXNUM ||
+          TYPE(value) == T_BIGNUM);
+}
+
+void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE val) {
+  if (!is_ruby_num(val)) {
+    rb_raise(rb_eTypeError, "Expected number type for integral field.");
+  }
+
   // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
   // bound; we just need to do precision checks (i.e., disallow rounding) and
   // check for < 0 on unsigned types.
@@ -76,12 +86,6 @@
   }
 }
 
-static bool is_ruby_num(VALUE value) {
-  return (TYPE(value) == T_FLOAT ||
-          TYPE(value) == T_FIXNUM ||
-          TYPE(value) == T_BIGNUM);
-}
-
 void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) {
   bool bad_encoding = false;
   rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value));
@@ -156,14 +160,14 @@
       int32_t int_val = 0;
       if (TYPE(value) == T_SYMBOL) {
         // Ensure that the given symbol exists in the enum module.
-        VALUE lookup = rb_const_get(type_class, SYM2ID(value));
+        VALUE lookup = rb_funcall(type_class, rb_intern("resolve"), 1, value);
         if (lookup == Qnil) {
           rb_raise(rb_eRangeError, "Unknown symbol value for enum field.");
         } else {
           int_val = NUM2INT(lookup);
         }
       } else {
-        check_int_range_precision(UPB_TYPE_INT32, value);
+        native_slot_check_int_range_precision(UPB_TYPE_INT32, value);
         int_val = NUM2INT(value);
       }
       DEREF(memory, int32_t) = int_val;
@@ -173,10 +177,7 @@
     case UPB_TYPE_INT64:
     case UPB_TYPE_UINT32:
     case UPB_TYPE_UINT64:
-      if (!is_ruby_num(value)) {
-        rb_raise(rb_eTypeError, "Expected number type for integral field.");
-      }
-      check_int_range_precision(type, value);
+      native_slot_check_int_range_precision(type, value);
       switch (type) {
       case UPB_TYPE_INT32:
         DEREF(memory, int32_t) = NUM2INT(value);
@@ -199,7 +200,9 @@
   }
 }
 
-VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) {
+VALUE native_slot_get(upb_fieldtype_t type,
+                      VALUE type_class,
+                      const void* memory) {
   switch (type) {
     case UPB_TYPE_FLOAT:
       return DBL2NUM(DEREF(memory, float));
@@ -210,7 +213,7 @@
     case UPB_TYPE_STRING:
     case UPB_TYPE_BYTES:
     case UPB_TYPE_MESSAGE:
-      return *((VALUE *)memory);
+      return DEREF(memory, VALUE);
     case UPB_TYPE_ENUM: {
       int32_t val = DEREF(memory, int32_t);
       VALUE symbol = enum_lookup(type_class, INT2NUM(val));
@@ -246,8 +249,9 @@
       break;
     case UPB_TYPE_STRING:
     case UPB_TYPE_BYTES:
-      // TODO(cfallin): set encoding appropriately
       DEREF(memory, VALUE) = rb_str_new2("");
+      rb_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) ?
+                       kRubyString8bitEncoding : kRubyStringUtf8Encoding);
       break;
     case UPB_TYPE_MESSAGE:
       DEREF(memory, VALUE) = Qnil;
@@ -322,6 +326,43 @@
 }
 
 // -----------------------------------------------------------------------------
+// Map field utilities.
+// -----------------------------------------------------------------------------
+
+bool is_map_field(const upb_fielddef* field) {
+  if (upb_fielddef_label(field) != UPB_LABEL_REPEATED ||
+      upb_fielddef_type(field) != UPB_TYPE_MESSAGE) {
+    return false;
+  }
+  const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
+  return upb_msgdef_mapentry(subdef);
+}
+
+const upb_fielddef* map_field_key(const upb_fielddef* field) {
+  assert(is_map_field(field));
+  const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
+  return map_entry_key(subdef);
+}
+
+const upb_fielddef* map_field_value(const upb_fielddef* field) {
+  assert(is_map_field(field));
+  const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
+  return map_entry_value(subdef);
+}
+
+const upb_fielddef* map_entry_key(const upb_msgdef* msgdef) {
+  const upb_fielddef* key_field = upb_msgdef_itof(msgdef, MAP_KEY_FIELD);
+  assert(key_field != NULL);
+  return key_field;
+}
+
+const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) {
+  const upb_fielddef* value_field = upb_msgdef_itof(msgdef, MAP_VALUE_FIELD);
+  assert(value_field != NULL);
+  return value_field;
+}
+
+// -----------------------------------------------------------------------------
 // Memory layout management.
 // -----------------------------------------------------------------------------
 
@@ -334,9 +375,12 @@
   size_t off = 0;
   for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) {
     const upb_fielddef* field = upb_msg_iter_field(&it);
-    size_t field_size =
-        (upb_fielddef_label(field) == UPB_LABEL_REPEATED) ?
-        sizeof(VALUE) : native_slot_size(upb_fielddef_type(field));
+    size_t field_size = 0;
+    if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+      field_size = sizeof(VALUE);
+    } else {
+      field_size = native_slot_size(upb_fielddef_type(field));
+    }
     // align current offset
     off = (off + field_size - 1) & ~(field_size - 1);
     layout->offsets[upb_fielddef_index(field)] = off;
@@ -357,7 +401,7 @@
   xfree(layout);
 }
 
-static VALUE get_type_class(const upb_fielddef* field) {
+VALUE field_type_class(const upb_fielddef* field) {
   VALUE type_class = Qnil;
   if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) {
     VALUE submsgdesc =
@@ -372,7 +416,7 @@
 }
 
 VALUE layout_get(MessageLayout* layout,
-                 void* storage,
+                 const void* storage,
                  const upb_fielddef* field) {
   void* memory = ((uint8_t *)storage) +
       layout->offsets[upb_fielddef_index(field)];
@@ -380,7 +424,7 @@
     return *((VALUE *)memory);
   } else {
     return native_slot_get(upb_fielddef_type(field),
-                           get_type_class(field),
+                           field_type_class(field),
                            memory);
   }
 }
@@ -398,9 +442,8 @@
     rb_raise(rb_eTypeError, "Repeated field array has wrong element type");
   }
 
-  if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE ||
-      upb_fielddef_type(field) == UPB_TYPE_ENUM) {
-    RepeatedField* self = ruby_to_RepeatedField(val);
+  if (self->field_type == UPB_TYPE_MESSAGE ||
+      self->field_type == UPB_TYPE_ENUM) {
     if (self->field_type_class !=
         get_def_obj(upb_fielddef_subdef(field))) {
       rb_raise(rb_eTypeError,
@@ -409,17 +452,48 @@
   }
 }
 
+static void check_map_field_type(VALUE val, const upb_fielddef* field) {
+  assert(is_map_field(field));
+  const upb_fielddef* key_field = map_field_key(field);
+  const upb_fielddef* value_field = map_field_value(field);
+
+  if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) ||
+      RTYPEDDATA_TYPE(val) != &Map_type) {
+    rb_raise(rb_eTypeError, "Expected Map instance");
+  }
+
+  Map* self = ruby_to_Map(val);
+  if (self->key_type != upb_fielddef_type(key_field)) {
+    rb_raise(rb_eTypeError, "Map key type does not match field's key type");
+  }
+  if (self->value_type != upb_fielddef_type(value_field)) {
+    rb_raise(rb_eTypeError, "Map value type does not match field's value type");
+  }
+  if (upb_fielddef_type(value_field) == UPB_TYPE_MESSAGE ||
+      upb_fielddef_type(value_field) == UPB_TYPE_ENUM) {
+    if (self->value_type_class !=
+        get_def_obj(upb_fielddef_subdef(value_field))) {
+      rb_raise(rb_eTypeError,
+               "Map value type has wrong message/enum class");
+    }
+  }
+}
+
+
 void layout_set(MessageLayout* layout,
                 void* storage,
                 const upb_fielddef* field,
                 VALUE val) {
   void* memory = ((uint8_t *)storage) +
       layout->offsets[upb_fielddef_index(field)];
-  if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+  if (is_map_field(field)) {
+    check_map_field_type(val, field);
+    DEREF(memory, VALUE) = val;
+  } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
     check_repeated_field_type(val, field);
-    *((VALUE *)memory) = val;
+    DEREF(memory, VALUE) = val;
   } else {
-    native_slot_set(upb_fielddef_type(field), get_type_class(field),
+    native_slot_set(upb_fielddef_type(field), field_type_class(field),
                     memory, val);
   }
 }
@@ -434,9 +508,34 @@
     void* memory = ((uint8_t *)storage) +
         layout->offsets[upb_fielddef_index(field)];
 
-    if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+    if (is_map_field(field)) {
+      VALUE map = Qnil;
+
+      const upb_fielddef* key_field = map_field_key(field);
+      const upb_fielddef* value_field = map_field_value(field);
+      VALUE type_class = field_type_class(value_field);
+
+      if (type_class != Qnil) {
+        VALUE args[3] = {
+          fieldtype_to_ruby(upb_fielddef_type(key_field)),
+          fieldtype_to_ruby(upb_fielddef_type(value_field)),
+          type_class,
+        };
+        map = rb_class_new_instance(3, args, cMap);
+      } else {
+        VALUE args[2] = {
+          fieldtype_to_ruby(upb_fielddef_type(key_field)),
+          fieldtype_to_ruby(upb_fielddef_type(value_field)),
+        };
+        map = rb_class_new_instance(2, args, cMap);
+      }
+
+      DEREF(memory, VALUE) = map;
+    } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
       VALUE ary = Qnil;
-      VALUE type_class = get_type_class(field);
+
+      VALUE type_class = field_type_class(field);
+
       if (type_class != Qnil) {
         VALUE args[2] = {
           fieldtype_to_ruby(upb_fielddef_type(field)),
@@ -447,7 +546,8 @@
         VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) };
         ary = rb_class_new_instance(1, args, cRepeatedField);
       }
-      *((VALUE *)memory) = ary;
+
+      DEREF(memory, VALUE) = ary;
     } else {
       native_slot_init(upb_fielddef_type(field), memory);
     }
@@ -464,7 +564,7 @@
         layout->offsets[upb_fielddef_index(field)];
 
     if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
-      rb_gc_mark(*((VALUE *)memory));
+      rb_gc_mark(DEREF(memory, VALUE));
     } else {
       native_slot_mark(upb_fielddef_type(field), memory);
     }
@@ -482,8 +582,10 @@
     void* from_memory = ((uint8_t *)from) +
         layout->offsets[upb_fielddef_index(field)];
 
-    if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
-      *((VALUE *)to_memory) = RepeatedField_dup(*((VALUE *)from_memory));
+    if (is_map_field(field)) {
+      DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE));
+    } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+      DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE));
     } else {
       native_slot_dup(upb_fielddef_type(field), to_memory, from_memory);
     }
@@ -501,8 +603,12 @@
     void* from_memory = ((uint8_t *)from) +
         layout->offsets[upb_fielddef_index(field)];
 
-    if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
-      *((VALUE *)to_memory) = RepeatedField_deep_copy(*((VALUE *)from_memory));
+    if (is_map_field(field)) {
+      DEREF(to_memory, VALUE) =
+          Map_deep_copy(DEREF(from_memory, VALUE));
+    } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+      DEREF(to_memory, VALUE) =
+          RepeatedField_deep_copy(DEREF(from_memory, VALUE));
     } else {
       native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory);
     }
@@ -520,11 +626,12 @@
     void* msg2_memory = ((uint8_t *)msg2) +
         layout->offsets[upb_fielddef_index(field)];
 
-    if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
-      if (RepeatedField_eq(*((VALUE *)msg1_memory),
-                           *((VALUE *)msg2_memory)) == Qfalse) {
-        return Qfalse;
-      }
+    if (is_map_field(field)) {
+      return Map_eq(DEREF(msg1_memory, VALUE),
+                    DEREF(msg2_memory, VALUE));
+    } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+      return RepeatedField_eq(DEREF(msg1_memory, VALUE),
+                              DEREF(msg2_memory, VALUE));
     } else {
       if (!native_slot_eq(upb_fielddef_type(field),
                           msg1_memory, msg2_memory)) {

diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c
index c9f4719..571c809 100644
--- a/ruby/ext/google/protobuf_c/upb.c
+++ b/ruby/ext/google/protobuf_c/upb.c

@@ -1269,6 +1269,7 @@
   if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2;
   if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
   if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
+  m->map_entry = false;
   return m;
 
 err1:
@@ -1283,6 +1284,7 @@
   if (!newm) return NULL;
   bool ok = upb_def_setfullname(UPB_UPCAST(newm),
                                 upb_def_fullname(UPB_UPCAST(m)), NULL);
+  newm->map_entry = m->map_entry;
   UPB_ASSERT_VAR(ok, ok);
   upb_msg_iter i;
   for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
@@ -1386,6 +1388,15 @@
   return upb_strtable_count(&m->ntof);
 }
 
+void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
+  assert(!upb_msgdef_isfrozen(m));
+  m->map_entry = map_entry;
+}
+
+bool upb_msgdef_mapentry(const upb_msgdef *m) {
+  return m->map_entry;
+}
+
 void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
   upb_inttable_begin(iter, &m->itof);
 }
@@ -3401,31 +3412,32 @@
 }
 
 char *upb_strdup(const char *s) {
-  size_t n = strlen(s) + 1;
+  return upb_strdup2(s, strlen(s));
+}
+
+char *upb_strdup2(const char *s, size_t len) {
+  // Prevent overflow errors.
+  if (len == SIZE_MAX) return NULL;
+  // Always null-terminate, even if binary data; but don't rely on the input to
+  // have a null-terminating byte since it may be a raw binary buffer.
+  size_t n = len + 1;
   char *p = malloc(n);
-  if (p) memcpy(p, s, n);
+  if (p) {
+    memcpy(p, s, len);
+    p[len] = 0;
+  }
   return p;
 }
 
 // A type to represent the lookup key of either a strtable or an inttable.
-// This is like upb_tabkey, but can carry a size also to allow lookups of
-// non-NULL-terminated strings (we don't store string lengths in the table).
 typedef struct {
   upb_tabkey key;
-  uint32_t len;  // For string keys only.
 } lookupkey_t;
 
-static lookupkey_t strkey(const char *str) {
-  lookupkey_t k;
-  k.key.str = (char*)str;
-  k.len = strlen(str);
-  return k;
-}
-
 static lookupkey_t strkey2(const char *str, size_t len) {
   lookupkey_t k;
-  k.key.str = (char*)str;
-  k.len = len;
+  k.key.s.str = (char*)str;
+  k.key.s.length = len;
   return k;
 }
 
@@ -3607,11 +3619,12 @@
 // A simple "subclass" of upb_table that only adds a hash function for strings.
 
 static uint32_t strhash(upb_tabkey key) {
-  return MurmurHash2(key.str, strlen(key.str), 0);
+  return MurmurHash2(key.s.str, key.s.length, 0);
 }
 
 static bool streql(upb_tabkey k1, lookupkey_t k2) {
-  return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0';
+  return k1.s.length == k2.key.s.length &&
+         memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0;
 }
 
 bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
@@ -3620,7 +3633,7 @@
 
 void upb_strtable_uninit(upb_strtable *t) {
   for (size_t i = 0; i < upb_table_size(&t->t); i++)
-    free((void*)t->t.entries[i].key.str);
+    free((void*)t->t.entries[i].key.s.str);
   uninit(&t->t);
 }
 
@@ -3631,26 +3644,30 @@
   upb_strtable_iter i;
   upb_strtable_begin(&i, t);
   for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
-    upb_strtable_insert(
-        &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i));
+    upb_strtable_insert2(
+        &new_table,
+        upb_strtable_iter_key(&i),
+        upb_strtable_iter_keylength(&i),
+        upb_strtable_iter_value(&i));
   }
   upb_strtable_uninit(t);
   *t = new_table;
   return true;
 }
 
-bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
+bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
+                          upb_value v) {
   if (isfull(&t->t)) {
     // Need to resize.  New table of double the size, add old elements to it.
     if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
       return false;
     }
   }
-  if ((k = upb_strdup(k)) == NULL) return false;
+  if ((k = upb_strdup2(k, len)) == NULL) return false;
 
-  lookupkey_t key = strkey(k);
-  uint32_t hash = MurmurHash2(key.key.str, key.len, 0);
-  insert(&t->t, strkey(k), v, hash, &strhash, &streql);
+  lookupkey_t key = strkey2(k, len);
+  uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0);
+  insert(&t->t, key, v, hash, &strhash, &streql);
   return true;
 }
 
@@ -3660,11 +3677,12 @@
   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
 }
 
-bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
+bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
+                         upb_value *val) {
   uint32_t hash = MurmurHash2(key, strlen(key), 0);
   upb_tabkey tabkey;
-  if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) {
-    free((void*)tabkey.str);
+  if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
+    free((void*)tabkey.s.str);
     return true;
   } else {
     return false;
@@ -3693,7 +3711,12 @@
 
 const char *upb_strtable_iter_key(upb_strtable_iter *i) {
   assert(!upb_strtable_done(i));
-  return str_tabent(i)->key.str;
+  return str_tabent(i)->key.s.str;
+}
+
+size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
+  assert(!upb_strtable_done(i));
+  return str_tabent(i)->key.s.length;
 }
 
 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
@@ -4209,8 +4232,10 @@
 }
 
 void upb_status_clear(upb_status *status) {
-  upb_status blank = UPB_STATUS_INIT;
-  upb_status_copy(status, &blank);
+  if (!status) return;
+  status->ok_ = true;
+  status->code_ = 0;
+  status->msg[0] = '\0';
 }
 
 bool upb_ok(const upb_status *status) { return status->ok_; }
@@ -5977,6 +6002,7 @@
     case OP_SETDELIM:
     case OP_HALT:
     case OP_RET:
+    case OP_DISPATCH:
       put32(c, op);
       break;
     case OP_PARSE_DOUBLE:
@@ -6057,7 +6083,7 @@
     OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
     OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
     OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
-    OP(SETBIGGROUPNUM), OP(HALT),
+    OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
   };
   return op > OP_HALT ? names[0] : names[op];
 #undef OP
@@ -6089,6 +6115,7 @@
                               upb_handlers_msgdef(method->dest_handlers_)));
         break;
       }
+      case OP_DISPATCH:
       case OP_STARTMSG:
       case OP_ENDMSG:
       case OP_PUSHLENDELIM:
@@ -6434,6 +6461,7 @@
   putop(c, OP_SETDISPATCH, &method->dispatch);
   putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
  label(c, LABEL_FIELD);
+  uint32_t* start_pc = c->pc;
   upb_msg_iter i;
   for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
     const upb_fielddef *f = upb_msg_iter_field(&i);
@@ -6449,8 +6477,18 @@
     }
   }
 
+  // If there were no fields, or if no handlers were defined, we need to
+  // generate a non-empty loop body so that we can at least dispatch for unknown
+  // fields and check for the end of the message.
+  if (c->pc == start_pc) {
+    // Check for end-of-message.
+    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+    // Unconditionally dispatch.
+    putop(c, OP_DISPATCH, 0);
+  }
+
   // For now we just loop back to the last field of the message (or if none,
-  // the DISPATCH opcode for the message.
+  // the DISPATCH opcode for the message).
   putop(c, OP_BRANCH, -LABEL_FIELD);
 
   // Insert both a label and a dispatch table entry for this end-of-msg.
@@ -7434,6 +7472,9 @@
         if (result == DECODE_MISMATCH) goto badtag;
         if (result >= 0) return result;
       })
+      VMCASE(OP_DISPATCH, {
+        CHECK_RETURN(dispatch(d));
+      })
       VMCASE(OP_HALT, {
         return size;
       })
@@ -7492,7 +7533,8 @@
       // Rewind from OP_TAG* to OP_CHECKDELIM.
       assert(getop(*d->pc) == OP_TAG1 ||
              getop(*d->pc) == OP_TAG2 ||
-             getop(*d->pc) == OP_TAGN);
+             getop(*d->pc) == OP_TAGN ||
+             getop(*d->pc == OP_DISPATCH));
       d->pc = p;
     }
     upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
@@ -8627,6 +8669,9 @@
 
 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
 
+// Used to signal that a capture has been suspended.
+static char suspend_capture;
+
 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
                                              upb_handlertype_t type) {
   upb_selector_t sel;
@@ -8640,41 +8685,6 @@
       p, upb_handlers_getprimitivehandlertype(p->top->f));
 }
 
-static void start_member(upb_json_parser *p) {
-  assert(!p->top->f);
-  assert(!p->accumulated);
-  p->accumulated_len = 0;
-}
-
-static bool end_member(upb_json_parser *p) {
-  // TODO(haberman): support keys that span buffers or have escape sequences.
-  assert(!p->top->f);
-  assert(p->accumulated);
-  const upb_fielddef *f =
-      upb_msgdef_ntof(p->top->m, p->accumulated, p->accumulated_len);
-
-  if (!f) {
-    // TODO(haberman): Ignore unknown fields if requested/configured to do so.
-    upb_status_seterrf(p->status, "No such field: %.*s\n",
-                       (int)p->accumulated_len, p->accumulated);
-    return false;
-  }
-
-  p->top->f = f;
-  p->accumulated = NULL;
-
-  return true;
-}
-
-static void start_object(upb_json_parser *p) {
-  upb_sink_startmsg(&p->top->sink);
-}
-
-static void end_object(upb_json_parser *p) {
-  upb_status status;
-  upb_sink_endmsg(&p->top->sink, &status);
-}
-
 static bool check_stack(upb_json_parser *p) {
   if ((p->top + 1) == p->limit) {
     upb_status_seterrmsg(p->status, "Nesting too deep");
@@ -8684,83 +8694,28 @@
   return true;
 }
 
-static bool start_subobject(upb_json_parser *p) {
-  assert(p->top->f);
+// There are GCC/Clang built-ins for overflow checking which we could start
+// using if there was any performance benefit to it.
 
-  if (!upb_fielddef_issubmsg(p->top->f)) {
-    upb_status_seterrf(p->status,
-                       "Object specified for non-message/group field: %s",
-                       upb_fielddef_name(p->top->f));
-    return false;
-  }
-
-  if (!check_stack(p)) return false;
-
-  upb_jsonparser_frame *inner = p->top + 1;
-
-  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
-  upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
-  inner->m = upb_fielddef_msgsubdef(p->top->f);
-  inner->f = NULL;
-  p->top = inner;
-
+static bool checked_add(size_t a, size_t b, size_t *c) {
+  if (SIZE_MAX - a < b) return false;
+  *c = a + b;
   return true;
 }
 
-static void end_subobject(upb_json_parser *p) {
-  p->top--;
-  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
-  upb_sink_endsubmsg(&p->top->sink, sel);
-}
-
-static bool start_array(upb_json_parser *p) {
-  assert(p->top->f);
-
-  if (!upb_fielddef_isseq(p->top->f)) {
-    upb_status_seterrf(p->status,
-                       "Array specified for non-repeated field: %s",
-                       upb_fielddef_name(p->top->f));
-    return false;
+static size_t saturating_multiply(size_t a, size_t b) {
+  // size_t is unsigned, so this is defined behavior even on overflow.
+  size_t ret = a * b;
+  if (b != 0 && ret / b != a) {
+    ret = SIZE_MAX;
   }
-
-  if (!check_stack(p)) return false;
-
-  upb_jsonparser_frame *inner = p->top + 1;
-  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
-  upb_sink_startseq(&p->top->sink, sel, &inner->sink);
-  inner->m = p->top->m;
-  inner->f = p->top->f;
-  p->top = inner;
-
-  return true;
+  return ret;
 }
 
-static void end_array(upb_json_parser *p) {
-  assert(p->top > p->stack);
 
-  p->top--;
-  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
-  upb_sink_endseq(&p->top->sink, sel);
-}
+/* Base64 decoding ************************************************************/
 
-static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
-
-static bool parser_putbool(upb_json_parser *p, bool val) {
-  if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
-    upb_status_seterrf(p->status,
-                       "Boolean value specified for non-bool field: %s",
-                       upb_fielddef_name(p->top->f));
-    return false;
-  }
-
-  bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
-  UPB_ASSERT_VAR(ok, ok);
-  return true;
-}
-
-static void start_text(upb_json_parser *p, const char *ptr) {
-  p->text_begin = ptr;
-}
+// TODO(haberman): make this streaming.
 
 static const signed char b64table[] = {
   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
@@ -8880,148 +8835,231 @@
   return false;
 }
 
-static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
-  assert(!p->accumulated);  // TODO: handle this case.
-  p->accumulated = p->text_begin;
-  p->accumulated_len = ptr - p->text_begin;
 
-  if (p->top->f && upb_fielddef_isstring(p->top->f)) {
-    // This is a string field (as opposed to a member name).
-    upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
-    if (upb_fielddef_type(p->top->f) == UPB_TYPE_BYTES) {
-      PARSER_CHECK_RETURN(base64_push(p, sel, p->accumulated,
-                                      p->accumulated_len));
-    } else {
-      upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
-    }
-    p->accumulated = NULL;
-  } else if (p->top->f &&
-             upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
-             !is_num) {
+/* Accumulate buffer **********************************************************/
 
-    // Enum case: resolve enum symbolic name to integer value.
-    const upb_enumdef *enumdef =
-        (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
+// Functionality for accumulating a buffer.
+//
+// Some parts of the parser need an entire value as a contiguous string.  For
+// example, to look up a member name in a hash table, or to turn a string into
+// a number, the relevant library routines need the input string to be in
+// contiguous memory, even if the value spanned two or more buffers in the
+// input.  These routines handle that.
+//
+// In the common case we can just point to the input buffer to get this
+// contiguous string and avoid any actual copy.  So we optimistically begin
+// this way.  But there are a few cases where we must instead copy into a
+// separate buffer:
+//
+//   1. The string was not contiguous in the input (it spanned buffers).
+//
+//   2. The string included escape sequences that need to be interpreted to get
+//      the true value in a contiguous buffer.
 
-    int32_t int_val = 0;
-    if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
-                         &int_val)) {
-      upb_selector_t sel = parser_getsel(p);
-      upb_sink_putint32(&p->top->sink, sel, int_val);
-    } else {
-      upb_status_seterrmsg(p->status, "Enum value name unknown");
+static void assert_accumulate_empty(upb_json_parser *p) {
+  assert(p->accumulated == NULL);
+  assert(p->accumulated_len == 0);
+}
+
+static void accumulate_clear(upb_json_parser *p) {
+  p->accumulated = NULL;
+  p->accumulated_len = 0;
+}
+
+// Used internally by accumulate_append().
+static bool accumulate_realloc(upb_json_parser *p, size_t need) {
+  size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
+  while (new_size < need) {
+    new_size = saturating_multiply(new_size, 2);
+  }
+
+  void *mem = realloc(p->accumulate_buf, new_size);
+  if (!mem) {
+    upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
+    return false;
+  }
+
+  p->accumulate_buf = mem;
+  p->accumulate_buf_size = new_size;
+  return true;
+}
+
+// Logically appends the given data to the append buffer.
+// If "can_alias" is true, we will try to avoid actually copying, but the buffer
+// must be valid until the next accumulate_append() call (if any).
+static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
+                              bool can_alias) {
+  if (!p->accumulated && can_alias) {
+    p->accumulated = buf;
+    p->accumulated_len = len;
+    return true;
+  }
+
+  size_t need;
+  if (!checked_add(p->accumulated_len, len, &need)) {
+    upb_status_seterrmsg(p->status, "Integer overflow.");
+    return false;
+  }
+
+  if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
+    return false;
+  }
+
+  if (p->accumulated != p->accumulate_buf) {
+    memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
+    p->accumulated = p->accumulate_buf;
+  }
+
+  memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
+  p->accumulated_len += len;
+  return true;
+}
+
+// Returns a pointer to the data accumulated since the last accumulate_clear()
+// call, and writes the length to *len.  This with point either to the input
+// buffer or a temporary accumulate buffer.
+static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
+  assert(p->accumulated);
+  *len = p->accumulated_len;
+  return p->accumulated;
+}
+
+
+/* Mult-part text data ********************************************************/
+
+// When we have text data in the input, it can often come in multiple segments.
+// For example, there may be some raw string data followed by an escape
+// sequence.  The two segments are processed with different logic.  Also buffer
+// seams in the input can cause multiple segments.
+//
+// As we see segments, there are two main cases for how we want to process them:
+//
+//  1. we want to push the captured input directly to string handlers.
+//
+//  2. we need to accumulate all the parts into a contiguous buffer for further
+//     processing (field name lookup, string->number conversion, etc).
+
+// This is the set of states for p->multipart_state.
+enum {
+  // We are not currently processing multipart data.
+  MULTIPART_INACTIVE = 0,
+
+  // We are processing multipart data by accumulating it into a contiguous
+  // buffer.
+  MULTIPART_ACCUMULATE = 1,
+
+  // We are processing multipart data by pushing each part directly to the
+  // current string handlers.
+  MULTIPART_PUSHEAGERLY = 2
+};
+
+// Start a multi-part text value where we accumulate the data for processing at
+// the end.
+static void multipart_startaccum(upb_json_parser *p) {
+  assert_accumulate_empty(p);
+  assert(p->multipart_state == MULTIPART_INACTIVE);
+  p->multipart_state = MULTIPART_ACCUMULATE;
+}
+
+// Start a multi-part text value where we immediately push text data to a string
+// value with the given selector.
+static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
+  assert_accumulate_empty(p);
+  assert(p->multipart_state == MULTIPART_INACTIVE);
+  p->multipart_state = MULTIPART_PUSHEAGERLY;
+  p->string_selector = sel;
+}
+
+static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
+                           bool can_alias) {
+  switch (p->multipart_state) {
+    case MULTIPART_INACTIVE:
+      upb_status_seterrmsg(
+          p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
       return false;
+
+    case MULTIPART_ACCUMULATE:
+      if (!accumulate_append(p, buf, len, can_alias)) {
+        return false;
+      }
+      break;
+
+    case MULTIPART_PUSHEAGERLY: {
+      const upb_bufhandle *handle = can_alias ? p->handle : NULL;
+      upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
+      break;
     }
-    p->accumulated = NULL;
   }
 
   return true;
 }
 
-static bool start_stringval(upb_json_parser *p) {
-  assert(p->top->f);
+// Note: this invalidates the accumulate buffer!  Call only after reading its
+// contents.
+static void multipart_end(upb_json_parser *p) {
+  assert(p->multipart_state != MULTIPART_INACTIVE);
+  p->multipart_state = MULTIPART_INACTIVE;
+  accumulate_clear(p);
+}
 
-  if (upb_fielddef_isstring(p->top->f)) {
-    if (!check_stack(p)) return false;
 
-    // Start a new parser frame: parser frames correspond one-to-one with
-    // handler frames, and string events occur in a sub-frame.
-    upb_jsonparser_frame *inner = p->top + 1;
-    upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
-    upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
-    inner->m = p->top->m;
-    inner->f = p->top->f;
-    p->top = inner;
+/* Input capture **************************************************************/
 
-    return true;
-  } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
-    // Do nothing -- symbolic enum names in quotes remain in the
-    // current parser frame.
+// Functionality for capturing a region of the input as text.  Gracefully
+// handles the case where a buffer seam occurs in the middle of the captured
+// region.
+
+static void capture_begin(upb_json_parser *p, const char *ptr) {
+  assert(p->multipart_state != MULTIPART_INACTIVE);
+  assert(p->capture == NULL);
+  p->capture = ptr;
+}
+
+static bool capture_end(upb_json_parser *p, const char *ptr) {
+  assert(p->capture);
+  if (multipart_text(p, p->capture, ptr - p->capture, true)) {
+    p->capture = NULL;
     return true;
   } else {
-    upb_status_seterrf(p->status,
-                       "String specified for non-string/non-enum field: %s",
-                       upb_fielddef_name(p->top->f));
     return false;
   }
-
 }
 
-static void end_stringval(upb_json_parser *p) {
-  if (upb_fielddef_isstring(p->top->f)) {
-    upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
-    upb_sink_endstr(&p->top->sink, sel);
-    p->top--;
+// This is called at the end of each input buffer (ie. when we have hit a
+// buffer seam).  If we are in the middle of capturing the input, this
+// processes the unprocessed capture region.
+static void capture_suspend(upb_json_parser *p, const char **ptr) {
+  if (!p->capture) return;
+
+  if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
+    // We use this as a signal that we were in the middle of capturing, and
+    // that capturing should resume at the beginning of the next buffer.
+    //
+    // We can't use *ptr here, because we have no guarantee that this pointer
+    // will be valid when we resume (if the underlying memory is freed, then
+    // using the pointer at all, even to compare to NULL, is likely undefined
+    // behavior).
+    p->capture = &suspend_capture;
+  } else {
+    // Need to back up the pointer to the beginning of the capture, since
+    // we were not able to actually preserve it.
+    *ptr = p->capture;
   }
 }
 
-static void start_number(upb_json_parser *p, const char *ptr) {
-  start_text(p, ptr);
-  assert(p->accumulated == NULL);
-}
-
-static void end_number(upb_json_parser *p, const char *ptr) {
-  end_text(p, ptr, true);
-  const char *myend = p->accumulated + p->accumulated_len;
-  char *end;
-
-  switch (upb_fielddef_type(p->top->f)) {
-    case UPB_TYPE_ENUM:
-    case UPB_TYPE_INT32: {
-      long val = strtol(p->accumulated, &end, 0);
-      if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
-        assert(false);
-      else
-        upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
-      break;
-    }
-    case UPB_TYPE_INT64: {
-      long long val = strtoll(p->accumulated, &end, 0);
-      if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
-        assert(false);
-      else
-        upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
-      break;
-    }
-    case UPB_TYPE_UINT32: {
-      unsigned long val = strtoul(p->accumulated, &end, 0);
-      if (val > UINT32_MAX || errno == ERANGE || end != myend)
-        assert(false);
-      else
-        upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
-      break;
-    }
-    case UPB_TYPE_UINT64: {
-      unsigned long long val = strtoull(p->accumulated, &end, 0);
-      if (val > UINT64_MAX || errno == ERANGE || end != myend)
-        assert(false);
-      else
-        upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
-      break;
-    }
-    case UPB_TYPE_DOUBLE: {
-      double val = strtod(p->accumulated, &end);
-      if (errno == ERANGE || end != myend)
-        assert(false);
-      else
-        upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
-      break;
-    }
-    case UPB_TYPE_FLOAT: {
-      float val = strtof(p->accumulated, &end);
-      if (errno == ERANGE || end != myend)
-        assert(false);
-      else
-        upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
-      break;
-    }
-    default:
-      assert(false);
+static void capture_resume(upb_json_parser *p, const char *ptr) {
+  if (p->capture) {
+    assert(p->capture == &suspend_capture);
+    p->capture = ptr;
   }
-
-  p->accumulated = NULL;
 }
 
+
+/* Callbacks from the parser **************************************************/
+
+// These are the functions called directly from the parser itself.
+// We define these in the same order as their declarations in the parser.
+
 static char escape_char(char in) {
   switch (in) {
     case 'r': return '\r';
@@ -9038,35 +9076,33 @@
   }
 }
 
-static void escape(upb_json_parser *p, const char *ptr) {
+static bool escape(upb_json_parser *p, const char *ptr) {
   char ch = escape_char(*ptr);
-  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
-  upb_sink_putstring(&p->top->sink, sel, &ch, 1, NULL);
+  return multipart_text(p, &ch, 1, false);
 }
 
-static uint8_t hexdigit(char ch) {
+static void start_hex(upb_json_parser *p) {
+  p->digit = 0;
+}
+
+static void hexdigit(upb_json_parser *p, const char *ptr) {
+  char ch = *ptr;
+
+  p->digit <<= 4;
+
   if (ch >= '0' && ch <= '9') {
-    return ch - '0';
+    p->digit += (ch - '0');
   } else if (ch >= 'a' && ch <= 'f') {
-    return ch - 'a' + 10;
+    p->digit += ((ch - 'a') + 10);
   } else {
     assert(ch >= 'A' && ch <= 'F');
-    return ch - 'A' + 10;
+    p->digit += ((ch - 'A') + 10);
   }
 }
 
-static void start_hex(upb_json_parser *p, const char *ptr) {
-  start_text(p, ptr);
-}
+static bool end_hex(upb_json_parser *p) {
+  uint32_t codepoint = p->digit;
 
-static void hex(upb_json_parser *p, const char *end) {
-  const char *start = p->text_begin;
-  UPB_ASSERT_VAR(end, end - start == 4);
-  uint16_t codepoint =
-      (hexdigit(start[0]) << 12) |
-      (hexdigit(start[1]) << 8) |
-      (hexdigit(start[2]) << 4) |
-      hexdigit(start[3]);
   // emit the codepoint as UTF-8.
   char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
   int length = 0;
@@ -9089,160 +9125,466 @@
   // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
   // we have to wait for the next escape to get the full code point).
 
-  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
-  upb_sink_putstring(&p->top->sink, sel, utf8, length, NULL);
+  return multipart_text(p, utf8, length, false);
 }
 
+static void start_text(upb_json_parser *p, const char *ptr) {
+  capture_begin(p, ptr);
+}
+
+static bool end_text(upb_json_parser *p, const char *ptr) {
+  return capture_end(p, ptr);
+}
+
+static void start_number(upb_json_parser *p, const char *ptr) {
+  multipart_startaccum(p);
+  capture_begin(p, ptr);
+}
+
+static bool end_number(upb_json_parser *p, const char *ptr) {
+  if (!capture_end(p, ptr)) {
+    return false;
+  }
+
+  // strtol() and friends unfortunately do not support specifying the length of
+  // the input string, so we need to force a copy into a NULL-terminated buffer.
+  if (!multipart_text(p, "\0", 1, false)) {
+    return false;
+  }
+
+  size_t len;
+  const char *buf = accumulate_getptr(p, &len);
+  const char *myend = buf + len - 1;  // One for NULL.
+  char *end;
+
+  switch (upb_fielddef_type(p->top->f)) {
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_INT32: {
+      long val = strtol(p->accumulated, &end, 0);
+      if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_INT64: {
+      long long val = strtoll(p->accumulated, &end, 0);
+      if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_UINT32: {
+      unsigned long val = strtoul(p->accumulated, &end, 0);
+      if (val > UINT32_MAX || errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_UINT64: {
+      unsigned long long val = strtoull(p->accumulated, &end, 0);
+      if (val > UINT64_MAX || errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_DOUBLE: {
+      double val = strtod(p->accumulated, &end);
+      if (errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_FLOAT: {
+      float val = strtof(p->accumulated, &end);
+      if (errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    default:
+      assert(false);
+  }
+
+  multipart_end(p);
+  return true;
+
+err:
+  upb_status_seterrf(p->status, "error parsing number: %s", buf);
+  multipart_end(p);
+  return false;
+}
+
+static bool parser_putbool(upb_json_parser *p, bool val) {
+  if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
+    upb_status_seterrf(p->status,
+                       "Boolean value specified for non-bool field: %s",
+                       upb_fielddef_name(p->top->f));
+    return false;
+  }
+
+  bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
+  UPB_ASSERT_VAR(ok, ok);
+  return true;
+}
+
+static bool start_stringval(upb_json_parser *p) {
+  assert(p->top->f);
+
+  if (upb_fielddef_isstring(p->top->f)) {
+    if (!check_stack(p)) return false;
+
+    // Start a new parser frame: parser frames correspond one-to-one with
+    // handler frames, and string events occur in a sub-frame.
+    upb_jsonparser_frame *inner = p->top + 1;
+    upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+    upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
+    inner->m = p->top->m;
+    inner->f = p->top->f;
+    p->top = inner;
+
+    if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
+      // For STRING fields we push data directly to the handlers as it is
+      // parsed.  We don't do this yet for BYTES fields, because our base64
+      // decoder is not streaming.
+      //
+      // TODO(haberman): make base64 decoding streaming also.
+      multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
+      return true;
+    } else {
+      multipart_startaccum(p);
+      return true;
+    }
+  } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
+    // No need to push a frame -- symbolic enum names in quotes remain in the
+    // current parser frame.
+    //
+    // Enum string values must accumulate so we can look up the value in a table
+    // once it is complete.
+    multipart_startaccum(p);
+    return true;
+  } else {
+    upb_status_seterrf(p->status,
+                       "String specified for non-string/non-enum field: %s",
+                       upb_fielddef_name(p->top->f));
+    return false;
+  }
+}
+
+static bool end_stringval(upb_json_parser *p) {
+  bool ok = true;
+
+  switch (upb_fielddef_type(p->top->f)) {
+    case UPB_TYPE_BYTES:
+      if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
+                       p->accumulated, p->accumulated_len)) {
+        return false;
+      }
+      // Fall through.
+
+    case UPB_TYPE_STRING: {
+      upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+      upb_sink_endstr(&p->top->sink, sel);
+      p->top--;
+      break;
+    }
+
+    case UPB_TYPE_ENUM: {
+      // Resolve enum symbolic name to integer value.
+      const upb_enumdef *enumdef =
+          (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
+
+      size_t len;
+      const char *buf = accumulate_getptr(p, &len);
+
+      int32_t int_val = 0;
+      ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
+
+      if (ok) {
+        upb_selector_t sel = parser_getsel(p);
+        upb_sink_putint32(&p->top->sink, sel, int_val);
+      } else {
+        upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
+      }
+
+      break;
+    }
+
+    default:
+      assert(false);
+      upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
+      ok = false;
+      break;
+  }
+
+  multipart_end(p);
+  return ok;
+}
+
+static void start_member(upb_json_parser *p) {
+  assert(!p->top->f);
+  multipart_startaccum(p);
+}
+
+static bool end_member(upb_json_parser *p) {
+  assert(!p->top->f);
+  size_t len;
+  const char *buf = accumulate_getptr(p, &len);
+
+  const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+
+  if (!f) {
+    // TODO(haberman): Ignore unknown fields if requested/configured to do so.
+    upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
+    return false;
+  }
+
+  p->top->f = f;
+  multipart_end(p);
+
+  return true;
+}
+
+static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
+
+static bool start_subobject(upb_json_parser *p) {
+  assert(p->top->f);
+
+  if (!upb_fielddef_issubmsg(p->top->f)) {
+    upb_status_seterrf(p->status,
+                       "Object specified for non-message/group field: %s",
+                       upb_fielddef_name(p->top->f));
+    return false;
+  }
+
+  if (!check_stack(p)) return false;
+
+  upb_jsonparser_frame *inner = p->top + 1;
+
+  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+  upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+  inner->m = upb_fielddef_msgsubdef(p->top->f);
+  inner->f = NULL;
+  p->top = inner;
+
+  return true;
+}
+
+static void end_subobject(upb_json_parser *p) {
+  p->top--;
+  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
+  upb_sink_endsubmsg(&p->top->sink, sel);
+}
+
+static bool start_array(upb_json_parser *p) {
+  assert(p->top->f);
+
+  if (!upb_fielddef_isseq(p->top->f)) {
+    upb_status_seterrf(p->status,
+                       "Array specified for non-repeated field: %s",
+                       upb_fielddef_name(p->top->f));
+    return false;
+  }
+
+  if (!check_stack(p)) return false;
+
+  upb_jsonparser_frame *inner = p->top + 1;
+  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
+  upb_sink_startseq(&p->top->sink, sel, &inner->sink);
+  inner->m = p->top->m;
+  inner->f = p->top->f;
+  p->top = inner;
+
+  return true;
+}
+
+static void end_array(upb_json_parser *p) {
+  assert(p->top > p->stack);
+
+  p->top--;
+  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
+  upb_sink_endseq(&p->top->sink, sel);
+}
+
+static void start_object(upb_json_parser *p) {
+  upb_sink_startmsg(&p->top->sink);
+}
+
+static void end_object(upb_json_parser *p) {
+  upb_status status;
+  upb_sink_endmsg(&p->top->sink, &status);
+}
+
+
 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
 
+
+/* The actual parser **********************************************************/
+
 // What follows is the Ragel parser itself.  The language is specified in Ragel
 // and the actions call our C functions above.
+//
+// Ragel has an extensive set of functionality, and we use only a small part of
+// it.  There are many action types but we only use a few:
+//
+//   ">" -- transition into a machine
+//   "%" -- transition out of a machine
+//   "@" -- transition into a final state of a machine.
+//
+// "@" transitions are tricky because a machine can transition into a final
+// state repeatedly.  But in some cases we know this can't happen, for example
+// a string which is delimited by a final '"' can only transition into its
+// final state once, when the closing '"' is seen.
 
-#line 596 "upb/json/parser.rl"
+
+#line 904 "upb/json/parser.rl"
 
 
 
-#line 514 "upb/json/parser.c"
+#line 816 "upb/json/parser.c"
 static const char _json_actions[] = {
 	0, 1, 0, 1, 2, 1, 3, 1, 
-	4, 1, 5, 1, 6, 1, 7, 1, 
-	9, 1, 11, 1, 12, 1, 13, 1, 
-	14, 1, 15, 1, 16, 1, 24, 1, 
-	26, 2, 3, 7, 2, 5, 2, 2, 
-	5, 7, 2, 10, 8, 2, 12, 14, 
-	2, 13, 14, 2, 17, 1, 2, 18, 
-	26, 2, 19, 8, 2, 20, 26, 2, 
-	21, 26, 2, 22, 26, 2, 23, 26, 
-	2, 25, 26, 3, 13, 10, 8
+	5, 1, 6, 1, 7, 1, 8, 1, 
+	10, 1, 12, 1, 13, 1, 14, 1, 
+	15, 1, 16, 1, 17, 1, 21, 1, 
+	25, 1, 27, 2, 3, 8, 2, 4, 
+	5, 2, 6, 2, 2, 6, 8, 2, 
+	11, 9, 2, 13, 15, 2, 14, 15, 
+	2, 18, 1, 2, 19, 27, 2, 20, 
+	9, 2, 22, 27, 2, 23, 27, 2, 
+	24, 27, 2, 26, 27, 3, 14, 11, 
+	9
 };
 
 static const unsigned char _json_key_offsets[] = {
-	0, 0, 4, 9, 14, 18, 22, 27, 
-	32, 37, 41, 45, 48, 51, 53, 57, 
-	61, 63, 65, 70, 72, 74, 83, 89, 
-	95, 101, 107, 109, 118, 118, 118, 123, 
-	128, 133, 133, 134, 135, 136, 137, 137, 
-	138, 139, 140, 140, 141, 142, 143, 143, 
-	148, 153, 157, 161, 166, 171, 176, 180, 
-	180, 183, 183, 183
+	0, 0, 4, 9, 14, 15, 19, 24, 
+	29, 34, 38, 42, 45, 48, 50, 54, 
+	58, 60, 62, 67, 69, 71, 80, 86, 
+	92, 98, 104, 106, 115, 116, 116, 116, 
+	121, 126, 131, 132, 133, 134, 135, 135, 
+	136, 137, 138, 138, 139, 140, 141, 141, 
+	146, 151, 152, 156, 161, 166, 171, 175, 
+	175, 178, 178, 178
 };
 
 static const char _json_trans_keys[] = {
 	32, 123, 9, 13, 32, 34, 125, 9, 
-	13, 32, 34, 125, 9, 13, 32, 58, 
-	9, 13, 32, 58, 9, 13, 32, 93, 
-	125, 9, 13, 32, 44, 125, 9, 13, 
-	32, 44, 125, 9, 13, 32, 34, 9, 
-	13, 45, 48, 49, 57, 48, 49, 57, 
-	46, 69, 101, 48, 57, 69, 101, 48, 
-	57, 43, 45, 48, 57, 48, 57, 48, 
-	57, 46, 69, 101, 48, 57, 34, 92, 
-	34, 92, 34, 47, 92, 98, 102, 110, 
-	114, 116, 117, 48, 57, 65, 70, 97, 
-	102, 48, 57, 65, 70, 97, 102, 48, 
-	57, 65, 70, 97, 102, 48, 57, 65, 
-	70, 97, 102, 34, 92, 34, 45, 91, 
-	102, 110, 116, 123, 48, 57, 32, 93, 
-	125, 9, 13, 32, 44, 93, 9, 13, 
-	32, 93, 125, 9, 13, 97, 108, 115, 
-	101, 117, 108, 108, 114, 117, 101, 32, 
-	34, 125, 9, 13, 32, 34, 125, 9, 
-	13, 32, 58, 9, 13, 32, 58, 9, 
-	13, 32, 93, 125, 9, 13, 32, 44, 
-	125, 9, 13, 32, 44, 125, 9, 13, 
-	32, 34, 9, 13, 32, 9, 13, 0
+	13, 32, 34, 125, 9, 13, 34, 32, 
+	58, 9, 13, 32, 93, 125, 9, 13, 
+	32, 44, 125, 9, 13, 32, 44, 125, 
+	9, 13, 32, 34, 9, 13, 45, 48, 
+	49, 57, 48, 49, 57, 46, 69, 101, 
+	48, 57, 69, 101, 48, 57, 43, 45, 
+	48, 57, 48, 57, 48, 57, 46, 69, 
+	101, 48, 57, 34, 92, 34, 92, 34, 
+	47, 92, 98, 102, 110, 114, 116, 117, 
+	48, 57, 65, 70, 97, 102, 48, 57, 
+	65, 70, 97, 102, 48, 57, 65, 70, 
+	97, 102, 48, 57, 65, 70, 97, 102, 
+	34, 92, 34, 45, 91, 102, 110, 116, 
+	123, 48, 57, 34, 32, 93, 125, 9, 
+	13, 32, 44, 93, 9, 13, 32, 93, 
+	125, 9, 13, 97, 108, 115, 101, 117, 
+	108, 108, 114, 117, 101, 32, 34, 125, 
+	9, 13, 32, 34, 125, 9, 13, 34, 
+	32, 58, 9, 13, 32, 93, 125, 9, 
+	13, 32, 44, 125, 9, 13, 32, 44, 
+	125, 9, 13, 32, 34, 9, 13, 32, 
+	9, 13, 0
 };
 
 static const char _json_single_lengths[] = {
-	0, 2, 3, 3, 2, 2, 3, 3, 
+	0, 2, 3, 3, 1, 2, 3, 3, 
 	3, 2, 2, 1, 3, 0, 2, 2, 
 	0, 0, 3, 2, 2, 9, 0, 0, 
-	0, 0, 2, 7, 0, 0, 3, 3, 
-	3, 0, 1, 1, 1, 1, 0, 1, 
+	0, 0, 2, 7, 1, 0, 0, 3, 
+	3, 3, 1, 1, 1, 1, 0, 1, 
 	1, 1, 0, 1, 1, 1, 0, 3, 
-	3, 2, 2, 3, 3, 3, 2, 0, 
+	3, 1, 2, 3, 3, 3, 2, 0, 
 	1, 0, 0, 0
 };
 
 static const char _json_range_lengths[] = {
-	0, 1, 1, 1, 1, 1, 1, 1, 
+	0, 1, 1, 1, 0, 1, 1, 1, 
 	1, 1, 1, 1, 0, 1, 1, 1, 
 	1, 1, 1, 0, 0, 0, 3, 3, 
-	3, 3, 0, 1, 0, 0, 1, 1, 
-	1, 0, 0, 0, 0, 0, 0, 0, 
+	3, 3, 0, 1, 0, 0, 0, 1, 
+	1, 1, 0, 0, 0, 0, 0, 0, 
 	0, 0, 0, 0, 0, 0, 0, 1, 
-	1, 1, 1, 1, 1, 1, 1, 0, 
+	1, 0, 1, 1, 1, 1, 1, 0, 
 	1, 0, 0, 0
 };
 
 static const short _json_index_offsets[] = {
-	0, 0, 4, 9, 14, 18, 22, 27, 
-	32, 37, 41, 45, 48, 52, 54, 58, 
-	62, 64, 66, 71, 74, 77, 87, 91, 
-	95, 99, 103, 106, 115, 116, 117, 122, 
-	127, 132, 133, 135, 137, 139, 141, 142, 
-	144, 146, 148, 149, 151, 153, 155, 156, 
-	161, 166, 170, 174, 179, 184, 189, 193, 
-	194, 197, 198, 199
+	0, 0, 4, 9, 14, 16, 20, 25, 
+	30, 35, 39, 43, 46, 50, 52, 56, 
+	60, 62, 64, 69, 72, 75, 85, 89, 
+	93, 97, 101, 104, 113, 115, 116, 117, 
+	122, 127, 132, 134, 136, 138, 140, 141, 
+	143, 145, 147, 148, 150, 152, 154, 155, 
+	160, 165, 167, 171, 176, 181, 186, 190, 
+	191, 194, 195, 196
 };
 
 static const char _json_indicies[] = {
 	0, 2, 0, 1, 3, 4, 5, 3, 
-	1, 6, 7, 8, 6, 1, 9, 10, 
-	9, 1, 11, 12, 11, 1, 12, 1, 
-	1, 12, 13, 14, 15, 16, 14, 1, 
-	17, 18, 8, 17, 1, 18, 7, 18, 
-	1, 19, 20, 21, 1, 20, 21, 1, 
-	23, 24, 24, 22, 25, 1, 24, 24, 
-	25, 22, 26, 26, 27, 1, 27, 1, 
-	27, 22, 23, 24, 24, 21, 22, 29, 
-	30, 28, 32, 33, 31, 34, 34, 34, 
-	34, 34, 34, 34, 34, 35, 1, 36, 
-	36, 36, 1, 37, 37, 37, 1, 38, 
-	38, 38, 1, 39, 39, 39, 1, 41, 
-	42, 40, 43, 44, 45, 46, 47, 48, 
-	49, 44, 1, 50, 51, 53, 54, 1, 
+	1, 6, 7, 8, 6, 1, 9, 1, 
+	10, 11, 10, 1, 11, 1, 1, 11, 
+	12, 13, 14, 15, 13, 1, 16, 17, 
+	8, 16, 1, 17, 7, 17, 1, 18, 
+	19, 20, 1, 19, 20, 1, 22, 23, 
+	23, 21, 24, 1, 23, 23, 24, 21, 
+	25, 25, 26, 1, 26, 1, 26, 21, 
+	22, 23, 23, 20, 21, 28, 29, 27, 
+	31, 32, 30, 33, 33, 33, 33, 33, 
+	33, 33, 33, 34, 1, 35, 35, 35, 
+	1, 36, 36, 36, 1, 37, 37, 37, 
+	1, 38, 38, 38, 1, 40, 41, 39, 
+	42, 43, 44, 45, 46, 47, 48, 43, 
+	1, 49, 1, 50, 51, 53, 54, 1, 
 	53, 52, 55, 56, 54, 55, 1, 56, 
-	1, 1, 56, 52, 57, 58, 1, 59, 
-	1, 60, 1, 61, 1, 62, 63, 1, 
-	64, 1, 65, 1, 66, 67, 1, 68, 
-	1, 69, 1, 70, 71, 72, 73, 71, 
-	1, 74, 75, 76, 74, 1, 77, 78, 
-	77, 1, 79, 80, 79, 1, 80, 1, 
-	1, 80, 81, 82, 83, 84, 82, 1, 
-	85, 86, 76, 85, 1, 86, 75, 86, 
-	1, 87, 88, 88, 1, 1, 1, 1, 
-	0
+	1, 1, 56, 52, 57, 1, 58, 1, 
+	59, 1, 60, 1, 61, 62, 1, 63, 
+	1, 64, 1, 65, 66, 1, 67, 1, 
+	68, 1, 69, 70, 71, 72, 70, 1, 
+	73, 74, 75, 73, 1, 76, 1, 77, 
+	78, 77, 1, 78, 1, 1, 78, 79, 
+	80, 81, 82, 80, 1, 83, 84, 75, 
+	83, 1, 84, 74, 84, 1, 85, 86, 
+	86, 1, 1, 1, 1, 0
 };
 
 static const char _json_trans_targs[] = {
 	1, 0, 2, 3, 4, 56, 3, 4, 
-	56, 5, 6, 5, 6, 7, 8, 9, 
-	56, 8, 9, 11, 12, 18, 57, 13, 
-	15, 14, 16, 17, 20, 58, 21, 20, 
-	58, 21, 19, 22, 23, 24, 25, 26, 
-	20, 58, 21, 28, 29, 30, 34, 39, 
-	43, 47, 59, 59, 31, 30, 33, 31, 
-	32, 59, 35, 36, 37, 38, 59, 40, 
-	41, 42, 59, 44, 45, 46, 59, 48, 
-	49, 55, 48, 49, 55, 50, 51, 50, 
-	51, 52, 53, 54, 55, 53, 54, 59, 
-	56
+	56, 5, 5, 6, 7, 8, 9, 56, 
+	8, 9, 11, 12, 18, 57, 13, 15, 
+	14, 16, 17, 20, 58, 21, 20, 58, 
+	21, 19, 22, 23, 24, 25, 26, 20, 
+	58, 21, 28, 30, 31, 34, 39, 43, 
+	47, 29, 59, 59, 32, 31, 29, 32, 
+	33, 35, 36, 37, 38, 59, 40, 41, 
+	42, 59, 44, 45, 46, 59, 48, 49, 
+	55, 48, 49, 55, 50, 50, 51, 52, 
+	53, 54, 55, 53, 54, 59, 56
 };
 
 static const char _json_trans_actions[] = {
-	0, 0, 0, 21, 75, 48, 0, 42, 
-	23, 17, 17, 0, 0, 15, 19, 19, 
-	45, 0, 0, 0, 0, 0, 1, 0, 
-	0, 0, 0, 0, 3, 13, 0, 0, 
-	33, 5, 11, 0, 7, 0, 0, 0, 
-	36, 39, 9, 57, 51, 25, 0, 0, 
-	0, 29, 60, 54, 15, 0, 27, 0, 
-	0, 31, 0, 0, 0, 0, 66, 0, 
-	0, 0, 69, 0, 0, 0, 63, 21, 
-	75, 48, 0, 42, 23, 17, 17, 0, 
-	0, 15, 19, 19, 45, 0, 0, 72, 
-	0
+	0, 0, 0, 21, 77, 53, 0, 47, 
+	23, 17, 0, 0, 15, 19, 19, 50, 
+	0, 0, 0, 0, 0, 1, 0, 0, 
+	0, 0, 0, 3, 13, 0, 0, 35, 
+	5, 11, 0, 38, 7, 7, 7, 41, 
+	44, 9, 62, 56, 25, 0, 0, 0, 
+	31, 29, 33, 59, 15, 0, 27, 0, 
+	0, 0, 0, 0, 0, 68, 0, 0, 
+	0, 71, 0, 0, 0, 65, 21, 77, 
+	53, 0, 47, 23, 17, 0, 0, 15, 
+	19, 19, 50, 0, 0, 74, 0
 };
 
 static const int json_start = 1;
@@ -9255,13 +9597,14 @@
 static const int json_en_main = 1;
 
 
-#line 599 "upb/json/parser.rl"
+#line 907 "upb/json/parser.rl"
 
 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
              const upb_bufhandle *handle) {
   UPB_UNUSED(hd);
   UPB_UNUSED(handle);
   upb_json_parser *parser = closure;
+  parser->handle = handle;
 
   // Variables used by Ragel's generated code.
   int cs = parser->current_state;
@@ -9271,8 +9614,10 @@
   const char *p = buf;
   const char *pe = buf + size;
 
+  capture_resume(parser, buf);
+
   
-#line 684 "upb/json/parser.c"
+#line 987 "upb/json/parser.c"
 	{
 	int _klen;
 	unsigned int _trans;
@@ -9347,114 +9692,118 @@
 		switch ( *_acts++ )
 		{
 	case 0:
-#line 517 "upb/json/parser.rl"
+#line 819 "upb/json/parser.rl"
 	{ p--; {cs = stack[--top]; goto _again;} }
 	break;
 	case 1:
-#line 518 "upb/json/parser.rl"
+#line 820 "upb/json/parser.rl"
 	{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
 	break;
 	case 2:
-#line 522 "upb/json/parser.rl"
+#line 824 "upb/json/parser.rl"
 	{ start_text(parser, p); }
 	break;
 	case 3:
-#line 523 "upb/json/parser.rl"
-	{ CHECK_RETURN_TOP(end_text(parser, p, false)); }
+#line 825 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_text(parser, p)); }
 	break;
 	case 4:
-#line 529 "upb/json/parser.rl"
-	{ start_hex(parser, p); }
+#line 831 "upb/json/parser.rl"
+	{ start_hex(parser); }
 	break;
 	case 5:
-#line 530 "upb/json/parser.rl"
-	{ hex(parser, p); }
+#line 832 "upb/json/parser.rl"
+	{ hexdigit(parser, p); }
 	break;
 	case 6:
-#line 536 "upb/json/parser.rl"
-	{ escape(parser, p); }
+#line 833 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_hex(parser)); }
 	break;
 	case 7:
-#line 539 "upb/json/parser.rl"
-	{ {cs = stack[--top]; goto _again;} }
+#line 839 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(escape(parser, p)); }
 	break;
 	case 8:
-#line 540 "upb/json/parser.rl"
-	{ {stack[top++] = cs; cs = 19; goto _again;} }
-	break;
-	case 9:
-#line 542 "upb/json/parser.rl"
-	{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
-	break;
-	case 10:
-#line 547 "upb/json/parser.rl"
-	{ start_member(parser); }
-	break;
-	case 11:
-#line 548 "upb/json/parser.rl"
-	{ CHECK_RETURN_TOP(end_member(parser)); }
-	break;
-	case 12:
-#line 551 "upb/json/parser.rl"
-	{ clear_member(parser); }
-	break;
-	case 13:
-#line 557 "upb/json/parser.rl"
-	{ start_object(parser); }
-	break;
-	case 14:
-#line 560 "upb/json/parser.rl"
-	{ end_object(parser); }
-	break;
-	case 15:
-#line 566 "upb/json/parser.rl"
-	{ CHECK_RETURN_TOP(start_array(parser)); }
-	break;
-	case 16:
-#line 570 "upb/json/parser.rl"
-	{ end_array(parser); }
-	break;
-	case 17:
-#line 575 "upb/json/parser.rl"
-	{ start_number(parser, p); }
-	break;
-	case 18:
-#line 576 "upb/json/parser.rl"
-	{ end_number(parser, p); }
-	break;
-	case 19:
-#line 578 "upb/json/parser.rl"
-	{ CHECK_RETURN_TOP(start_stringval(parser)); }
-	break;
-	case 20:
-#line 579 "upb/json/parser.rl"
-	{ end_stringval(parser); }
-	break;
-	case 21:
-#line 581 "upb/json/parser.rl"
-	{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
-	break;
-	case 22:
-#line 583 "upb/json/parser.rl"
-	{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
-	break;
-	case 23:
-#line 585 "upb/json/parser.rl"
-	{ /* null value */ }
-	break;
-	case 24:
-#line 587 "upb/json/parser.rl"
-	{ CHECK_RETURN_TOP(start_subobject(parser)); }
-	break;
-	case 25:
-#line 588 "upb/json/parser.rl"
-	{ end_subobject(parser); }
-	break;
-	case 26:
-#line 593 "upb/json/parser.rl"
+#line 845 "upb/json/parser.rl"
 	{ p--; {cs = stack[--top]; goto _again;} }
 	break;
-#line 866 "upb/json/parser.c"
+	case 9:
+#line 848 "upb/json/parser.rl"
+	{ {stack[top++] = cs; cs = 19; goto _again;} }
+	break;
+	case 10:
+#line 850 "upb/json/parser.rl"
+	{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
+	break;
+	case 11:
+#line 855 "upb/json/parser.rl"
+	{ start_member(parser); }
+	break;
+	case 12:
+#line 856 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_member(parser)); }
+	break;
+	case 13:
+#line 859 "upb/json/parser.rl"
+	{ clear_member(parser); }
+	break;
+	case 14:
+#line 865 "upb/json/parser.rl"
+	{ start_object(parser); }
+	break;
+	case 15:
+#line 868 "upb/json/parser.rl"
+	{ end_object(parser); }
+	break;
+	case 16:
+#line 874 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(start_array(parser)); }
+	break;
+	case 17:
+#line 878 "upb/json/parser.rl"
+	{ end_array(parser); }
+	break;
+	case 18:
+#line 883 "upb/json/parser.rl"
+	{ start_number(parser, p); }
+	break;
+	case 19:
+#line 884 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_number(parser, p)); }
+	break;
+	case 20:
+#line 886 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(start_stringval(parser)); }
+	break;
+	case 21:
+#line 887 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_stringval(parser)); }
+	break;
+	case 22:
+#line 889 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
+	break;
+	case 23:
+#line 891 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
+	break;
+	case 24:
+#line 893 "upb/json/parser.rl"
+	{ /* null value */ }
+	break;
+	case 25:
+#line 895 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(start_subobject(parser)); }
+	break;
+	case 26:
+#line 896 "upb/json/parser.rl"
+	{ end_subobject(parser); }
+	break;
+	case 27:
+#line 901 "upb/json/parser.rl"
+	{ p--; {cs = stack[--top]; goto _again;} }
+	break;
+#line 1173 "upb/json/parser.c"
 		}
 	}
 
@@ -9467,10 +9816,12 @@
 	_out: {}
 	}
 
-#line 615 "upb/json/parser.rl"
+#line 926 "upb/json/parser.rl"
 
   if (p != pe) {
     upb_status_seterrf(parser->status, "Parse error at %s\n", p);
+  } else {
+    capture_suspend(parser, &p);
   }
 
 error:
@@ -9487,8 +9838,13 @@
   return true;
 }
 
+
+/* Public API *****************************************************************/
+
 void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
   p->limit = p->stack + UPB_JSON_MAX_DEPTH;
+  p->accumulate_buf = NULL;
+  p->accumulate_buf_size = 0;
   upb_byteshandler_init(&p->input_handler_);
   upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
   upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
@@ -9498,6 +9854,7 @@
 
 void upb_json_parser_uninit(upb_json_parser *p) {
   upb_byteshandler_uninit(&p->input_handler_);
+  free(p->accumulate_buf);
 }
 
 void upb_json_parser_reset(upb_json_parser *p) {
@@ -9508,18 +9865,18 @@
   int top;
   // Emit Ragel initialization of the parser.
   
-#line 920 "upb/json/parser.c"
+#line 1235 "upb/json/parser.c"
 	{
 	cs = json_start;
 	top = 0;
 	}
 
-#line 655 "upb/json/parser.rl"
+#line 974 "upb/json/parser.rl"
   p->current_state = cs;
   p->parser_top = top;
-  p->text_begin = NULL;
-  p->accumulated = NULL;
-  p->accumulated_len = 0;
+  accumulate_clear(p);
+  p->multipart_state = MULTIPART_INACTIVE;
+  p->capture = NULL;
 }
 
 void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {

diff --git a/ruby/ext/google/protobuf_c/upb.h b/ruby/ext/google/protobuf_c/upb.h
index 150aef1..fbcb8e9 100644
--- a/ruby/ext/google/protobuf_c/upb.h
+++ b/ruby/ext/google/protobuf_c/upb.h

@@ -600,6 +600,9 @@
 
 // Like strdup(), which isn't always available since it's not ANSI C.
 char *upb_strdup(const char *s);
+// Variant that works with a length-delimited rather than NULL-delimited string,
+// as supported by strtable.
+char *upb_strdup2(const char *s, size_t len);
 
 UPB_INLINE void _upb_value_setval(upb_value *v, _upb_value val,
                                   upb_ctype_t ctype) {
@@ -654,12 +657,24 @@
 
 typedef union {
   uintptr_t num;
-  const char *str;  // We own, nullz.
+  struct {
+    // We own this. NULL-terminated but may also contain binary data; see
+    // explicit length below.
+    // TODO: move the length to the start of the string in order to reduce
+    // tabkey's size (to one machine word) in a way that supports static
+    // initialization.
+    const char *str;
+    size_t length;
+  } s;
 } upb_tabkey;
 
 #define UPB_TABKEY_NUM(n) {n}
 #ifdef UPB_C99
-#define UPB_TABKEY_STR(s) {.str = s}
+// Given that |s| is a string literal, sizeof(s) gives us a
+// compile-time-constant strlen(). We must ensure that this works for static
+// data initializers.
+#define UPB_TABKEY_STR(strval) { .s = { .str = strval,                    \
+                                        .length = sizeof(strval) - 1 } }
 #endif
 // TODO(haberman): C++
 #define UPB_TABKEY_NONE {0}
@@ -765,7 +780,14 @@
 // If a table resize was required but memory allocation failed, false is
 // returned and the table is unchanged.
 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
-bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
+bool upb_strtable_insert2(upb_strtable *t, const char *key, size_t len,
+                          upb_value val);
+
+// For NULL-terminated strings.
+UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
+                                    upb_value val) {
+  return upb_strtable_insert2(t, key, strlen(key), val);
+}
 
 // Looks up key in this table, returning "true" if the key was found.
 // If v is non-NULL, copies the value for this key into *v.
@@ -782,7 +804,14 @@
 // Removes an item from the table.  Returns true if the remove was successful,
 // and stores the removed item in *val if non-NULL.
 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
-bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val);
+bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
+                          upb_value *val);
+
+// For NULL-terminated strings.
+UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
+                                    upb_value *v) {
+  return upb_strtable_remove2(t, key, strlen(key), v);
+}
 
 // Updates an existing entry in an inttable.  If the entry does not exist,
 // returns false and does nothing.  Unlike insert/remove, this does not
@@ -876,6 +905,7 @@
 void upb_strtable_next(upb_strtable_iter *i);
 bool upb_strtable_done(const upb_strtable_iter *i);
 const char *upb_strtable_iter_key(upb_strtable_iter *i);
+size_t upb_strtable_iter_keylength(upb_strtable_iter *i);
 upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
 void upb_strtable_iter_setdone(upb_strtable_iter *i);
 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
@@ -1777,6 +1807,10 @@
   // just be moved into symtab.c?
   MessageDef* Dup(const void* owner) const;
 
+  // Is this message a map entry?
+  void setmapentry(bool map_entry);
+  bool mapentry() const;
+
   // Iteration over fields.  The order is undefined.
   class iterator : public std::iterator<std::forward_iterator_tag, FieldDef*> {
    public:
@@ -1823,6 +1857,11 @@
   upb_inttable itof;  // int to field
   upb_strtable ntof;  // name to field
 
+  // Is this a map-entry message?
+  // TODO: set this flag properly for static descriptors; regenerate
+  // descriptor.upb.c.
+  bool map_entry;
+
   // TODO(haberman): proper extension ranges (there can be multiple).
 ));
 
@@ -1830,7 +1869,7 @@
                         refs, ref2s)                                          \
   {                                                                           \
     UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count,             \
-        submsg_field_count, itof, ntof                                        \
+        submsg_field_count, itof, ntof, false                                 \
   }
 
 UPB_BEGIN_EXTERN_C  // {
@@ -1878,6 +1917,9 @@
   return (upb_fielddef *)upb_msgdef_ntof(m, name, len);
 }
 
+void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
+bool upb_msgdef_mapentry(const upb_msgdef *m);
+
 // upb_msg_iter i;
 // for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
 //   upb_fielddef *f = upb_msg_iter_field(&i);
@@ -2331,6 +2373,12 @@
 inline MessageDef* MessageDef::Dup(const void *owner) const {
   return upb_msgdef_dup(this, owner);
 }
+inline void MessageDef::setmapentry(bool map_entry) {
+  upb_msgdef_setmapentry(this, map_entry);
+}
+inline bool MessageDef::mapentry() const {
+  return upb_msgdef_mapentry(this);
+}
 inline MessageDef::iterator MessageDef::begin() { return iterator(this); }
 inline MessageDef::iterator MessageDef::end() { return iterator::end(this); }
 inline MessageDef::const_iterator MessageDef::begin() const {
@@ -6614,7 +6662,9 @@
                            //   | unused (24)         | opc |
                            //   | upb_inttable* (32 or 64)  |
 
-  OP_HALT           = 36,  // No arg.
+  OP_DISPATCH       = 36,  // No arg.
+
+  OP_HALT           = 37,  // No arg.
 } opcode;
 
 #define OP_MAX OP_HALT
@@ -7291,15 +7341,24 @@
   int parser_stack[UPB_JSON_MAX_DEPTH];
   int parser_top;
 
-  // A pointer to the beginning of whatever text we are currently parsing.
-  const char *text_begin;
+  // The handle for the current buffer.
+  const upb_bufhandle *handle;
 
-  // We have to accumulate text for member names, integers, unicode escapes, and
-  // base64 partial results.
+  // Accumulate buffer.  See details in parser.rl.
   const char *accumulated;
   size_t accumulated_len;
-  // TODO: add members and code for allocating a buffer when necessary (when the
-  // member spans input buffers or contains escapes).
+  char *accumulate_buf;
+  size_t accumulate_buf_size;
+
+  // Multi-part text data.  See details in parser.rl.
+  int multipart_state;
+  upb_selector_t string_selector;
+
+  // Input capture.  See details in parser.rl.
+  const char *capture;
+
+  // Intermediate result of parsing a unicode escape sequence.
+  uint32_t digit;
 ));
 
 UPB_BEGIN_EXTERN_C

diff --git a/ruby/tests/basic.rb b/ruby/tests/basic.rb
index 05b3a0f..9265503 100644
--- a/ruby/tests/basic.rb
+++ b/ruby/tests/basic.rb

@@ -36,23 +36,43 @@
     add_message "TestMessage2" do
       optional :foo, :int32, 1
     end
+
     add_message "Recursive1" do
       optional :foo, :message, 1, "Recursive2"
     end
     add_message "Recursive2" do
       optional :foo, :message, 1, "Recursive1"
     end
+
     add_enum "TestEnum" do
       value :Default, 0
       value :A, 1
       value :B, 2
       value :C, 3
     end
+
     add_message "BadFieldNames" do
       optional :dup, :int32, 1
       optional :class, :int32, 2
       optional :"a.b", :int32, 3
     end
+
+    add_message "MapMessage" do
+      map :map_string_int32, :string, :int32, 1
+      map :map_string_msg, :string, :message, 2, "TestMessage2"
+    end
+    add_message "MapMessageWireEquiv" do
+      repeated :map_string_int32, :message, 1, "MapMessageWireEquiv_entry1"
+      repeated :map_string_msg, :message, 2, "MapMessageWireEquiv_entry2"
+    end
+    add_message "MapMessageWireEquiv_entry1" do
+      optional :key, :string, 1
+      optional :value, :int32, 2
+    end
+    add_message "MapMessageWireEquiv_entry2" do
+      optional :key, :string, 1
+      optional :value, :message, 2, "TestMessage2"
+    end
   end
 
   TestMessage = pool.lookup("TestMessage").msgclass
@@ -61,6 +81,12 @@
   Recursive2 = pool.lookup("Recursive2").msgclass
   TestEnum = pool.lookup("TestEnum").enummodule
   BadFieldNames = pool.lookup("BadFieldNames").msgclass
+  MapMessage = pool.lookup("MapMessage").msgclass
+  MapMessageWireEquiv = pool.lookup("MapMessageWireEquiv").msgclass
+  MapMessageWireEquiv_entry1 =
+    pool.lookup("MapMessageWireEquiv_entry1").msgclass
+  MapMessageWireEquiv_entry2 =
+    pool.lookup("MapMessageWireEquiv_entry2").msgclass
 
 # ------------ test cases ---------------
 
@@ -300,7 +326,7 @@
       l.push :B
       l.push :C
       assert l.count == 3
-      assert_raise NameError do
+      assert_raise RangeError do
         l.push :D
       end
       assert l[0] == :A
@@ -324,12 +350,244 @@
       end
     end
 
+    def test_map_basic
+      # allowed key types:
+      # :int32, :int64, :uint32, :uint64, :bool, :string, :bytes.
+
+      m = Google::Protobuf::Map.new(:string, :int32)
+      m["asdf"] = 1
+      assert m["asdf"] == 1
+      m["jkl;"] = 42
+      assert m == { "jkl;" => 42, "asdf" => 1 }
+      assert m.has_key?("asdf")
+      assert !m.has_key?("qwerty")
+      assert m.length == 2
+
+      m2 = m.dup
+      assert m == m2
+      assert m.hash != 0
+      assert m.hash == m2.hash
+
+      collected = {}
+      m.each { |k,v| collected[v] = k }
+      assert collected == { 42 => "jkl;", 1 => "asdf" }
+
+      assert m.delete("asdf") == 1
+      assert !m.has_key?("asdf")
+      assert m["asdf"] == nil
+      assert !m.has_key?("asdf")
+
+      # We only assert on inspect value when there is one map entry because the
+      # order in which elements appear is unspecified (depends on the internal
+      # hash function). We don't want a brittle test.
+      assert m.inspect == "{\"jkl;\" => 42}"
+
+      assert m.keys == ["jkl;"]
+      assert m.values == [42]
+
+      m.clear
+      assert m.length == 0
+      assert m == {}
+
+      assert_raise TypeError do
+        m[1] = 1
+      end
+      assert_raise RangeError do
+        m["asdf"] = 0x1_0000_0000
+      end
+    end
+
+    def test_map_ctor
+      m = Google::Protobuf::Map.new(:string, :int32,
+                                    {"a" => 1, "b" => 2, "c" => 3})
+      assert m == {"a" => 1, "c" => 3, "b" => 2}
+    end
+
+    def test_map_keytypes
+      m = Google::Protobuf::Map.new(:int32, :int32)
+      m[1] = 42
+      m[-1] = 42
+      assert_raise RangeError do
+        m[0x8000_0000] = 1
+      end
+      assert_raise TypeError do
+        m["asdf"] = 1
+      end
+
+      m = Google::Protobuf::Map.new(:int64, :int32)
+      m[0x1000_0000_0000_0000] = 1
+      assert_raise RangeError do
+        m[0x1_0000_0000_0000_0000] = 1
+      end
+      assert_raise TypeError do
+        m["asdf"] = 1
+      end
+
+      m = Google::Protobuf::Map.new(:uint32, :int32)
+      m[0x8000_0000] = 1
+      assert_raise RangeError do
+        m[0x1_0000_0000] = 1
+      end
+      assert_raise RangeError do
+        m[-1] = 1
+      end
+
+      m = Google::Protobuf::Map.new(:uint64, :int32)
+      m[0x8000_0000_0000_0000] = 1
+      assert_raise RangeError do
+        m[0x1_0000_0000_0000_0000] = 1
+      end
+      assert_raise RangeError do
+        m[-1] = 1
+      end
+
+      m = Google::Protobuf::Map.new(:bool, :int32)
+      m[true] = 1
+      m[false] = 2
+      assert_raise TypeError do
+        m[1] = 1
+      end
+      assert_raise TypeError do
+        m["asdf"] = 1
+      end
+
+      m = Google::Protobuf::Map.new(:string, :int32)
+      m["asdf"] = 1
+      assert_raise TypeError do
+        m[1] = 1
+      end
+      assert_raise TypeError do
+        bytestring = ["FFFF"].pack("H*")
+        m[bytestring] = 1
+      end
+
+      m = Google::Protobuf::Map.new(:bytes, :int32)
+      bytestring = ["FFFF"].pack("H*")
+      m[bytestring] = 1
+      assert_raise TypeError do
+        m["asdf"] = 1
+      end
+      assert_raise TypeError do
+        m[1] = 1
+      end
+    end
+
+    def test_map_msg_enum_valuetypes
+      m = Google::Protobuf::Map.new(:string, :message, TestMessage)
+      m["asdf"] = TestMessage.new
+      assert_raise TypeError do
+        m["jkl;"] = TestMessage2.new
+      end
+
+      m = Google::Protobuf::Map.new(
+        :string, :message, TestMessage,
+        { "a" => TestMessage.new(:optional_int32 => 42),
+          "b" => TestMessage.new(:optional_int32 => 84) })
+      assert m.length == 2
+      assert m.values.map{|msg| msg.optional_int32}.sort == [42, 84]
+
+      m = Google::Protobuf::Map.new(:string, :enum, TestEnum,
+                                    { "x" => :A, "y" => :B, "z" => :C })
+      assert m.length == 3
+      assert m["z"] == :C
+      m["z"] = 2
+      assert m["z"] == :B
+      m["z"] = 4
+      assert m["z"] == 4
+      assert_raise RangeError do
+        m["z"] = :Z
+      end
+      assert_raise TypeError do
+        m["z"] = "z"
+      end
+    end
+
+    def test_map_dup_deep_copy
+      m = Google::Protobuf::Map.new(
+        :string, :message, TestMessage,
+        { "a" => TestMessage.new(:optional_int32 => 42),
+          "b" => TestMessage.new(:optional_int32 => 84) })
+
+      m2 = m.dup
+      assert m == m2
+      assert m.object_id != m2.object_id
+      assert m["a"].object_id == m2["a"].object_id
+      assert m["b"].object_id == m2["b"].object_id
+
+      m2 = Google::Protobuf.deep_copy(m)
+      assert m == m2
+      assert m.object_id != m2.object_id
+      assert m["a"].object_id != m2["a"].object_id
+      assert m["b"].object_id != m2["b"].object_id
+    end
+
+    def test_map_field
+      m = MapMessage.new
+      assert m.map_string_int32 == {}
+      assert m.map_string_msg == {}
+
+      m = MapMessage.new(
+        :map_string_int32 => {"a" => 1, "b" => 2},
+        :map_string_msg => {"a" => TestMessage2.new(:foo => 1),
+                            "b" => TestMessage2.new(:foo => 2)})
+      assert m.map_string_int32.keys.sort == ["a", "b"]
+      assert m.map_string_int32["a"] == 1
+      assert m.map_string_msg["b"].foo == 2
+
+      m.map_string_int32["c"] = 3
+      assert m.map_string_int32["c"] == 3
+      m.map_string_msg["c"] = TestMessage2.new(:foo => 3)
+      assert m.map_string_msg["c"] == TestMessage2.new(:foo => 3)
+      m.map_string_msg.delete("b")
+      m.map_string_msg.delete("c")
+      assert m.map_string_msg == { "a" => TestMessage2.new(:foo => 1) }
+
+      assert_raise TypeError do
+        m.map_string_msg["e"] = TestMessage.new # wrong value type
+      end
+      # ensure nothing was added by the above
+      assert m.map_string_msg == { "a" => TestMessage2.new(:foo => 1) }
+
+      m.map_string_int32 = Google::Protobuf::Map.new(:string, :int32)
+      assert_raise TypeError do
+        m.map_string_int32 = Google::Protobuf::Map.new(:string, :int64)
+      end
+      assert_raise TypeError do
+        m.map_string_int32 = {}
+      end
+
+      assert_raise TypeError do
+        m = MapMessage.new(:map_string_int32 => { 1 => "I am not a number" })
+      end
+    end
+
+    def test_map_encode_decode
+      m = MapMessage.new(
+        :map_string_int32 => {"a" => 1, "b" => 2},
+        :map_string_msg => {"a" => TestMessage2.new(:foo => 1),
+                            "b" => TestMessage2.new(:foo => 2)})
+      m2 = MapMessage.decode(MapMessage.encode(m))
+      assert m == m2
+
+      m3 = MapMessageWireEquiv.decode(MapMessage.encode(m))
+      assert m3.map_string_int32.length == 2
+
+      kv = {}
+      m3.map_string_int32.map { |msg| kv[msg.key] = msg.value }
+      assert kv == {"a" => 1, "b" => 2}
+
+      kv = {}
+      m3.map_string_msg.map { |msg| kv[msg.key] = msg.value }
+      assert kv == {"a" => TestMessage2.new(:foo => 1),
+                    "b" => TestMessage2.new(:foo => 2)}
+    end
+
     def test_enum_field
       m = TestMessage.new
       assert m.optional_enum == :Default
       m.optional_enum = :A
       assert m.optional_enum == :A
-      assert_raise NameError do
+      assert_raise RangeError do
         m.optional_enum = :ASDF
       end
       m.optional_enum = 1
@@ -384,7 +642,8 @@
                           :repeated_string => ["a", "b", "c"],
                           :repeated_int32 => [42, 43, 44],
                           :repeated_enum => [:A, :B, :C, 100],
-                          :repeated_msg => [TestMessage2.new(:foo => 1), TestMessage2.new(:foo => 2)])
+                          :repeated_msg => [TestMessage2.new(:foo => 1),
+                                            TestMessage2.new(:foo => 2)])
       data = TestMessage.encode m
       m2 = TestMessage.decode data
       assert m == m2
commit	5446deaea7ffc29f6e09368cb6238da083969123	[log] [tgz]
author	Joshua Haberman <jhaberman@gmail.com>	Tue Jan 13 13:50:11 2015 -0800
committer	Joshua Haberman <jhaberman@gmail.com>	Tue Jan 13 13:50:11 2015 -0800
tree	994ba61d6441558edbff629b3da99d31552600bb
parent	052e0205a76717f39fc65e303fd2b92ab1df3028 [diff]
parent	ace4212e60bf1abd46181c9ddb9fe31b6d9fac45 [diff]