blob: b2b3242ad92d8d92e30820e562aa0a0504f1cb14 [file] [log] [blame]
Feng Xiaoe96ff302015-06-15 18:21:48 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34//
35// This file defines static methods and classes for comparing Protocol
36// Messages (see //google/protobuf/util/message_differencer.h for more
37// information).
38
39#include <google/protobuf/util/message_differencer.h>
40
41#include <algorithm>
42#include <memory>
43#ifndef _SHARED_PTR_H
44#include <google/protobuf/stubs/shared_ptr.h>
45#endif
46#include <utility>
47
Feng Xiaoeee38b02015-08-22 18:25:48 -070048#include <google/protobuf/stubs/callback.h>
Feng Xiaoe96ff302015-06-15 18:21:48 -070049#include <google/protobuf/stubs/common.h>
Jisi Liu3b3c8ab2016-03-30 11:39:59 -070050#include <google/protobuf/stubs/logging.h>
Feng Xiaoe96ff302015-06-15 18:21:48 -070051#include <google/protobuf/stubs/stringprintf.h>
52#include <google/protobuf/any.h>
53#include <google/protobuf/io/printer.h>
54#include <google/protobuf/io/zero_copy_stream.h>
55#include <google/protobuf/io/zero_copy_stream_impl.h>
56#include <google/protobuf/dynamic_message.h>
57#include <google/protobuf/text_format.h>
58#include <google/protobuf/util/field_comparator.h>
59#include <google/protobuf/stubs/strutil.h>
60
61namespace google {
62namespace protobuf {
63
64namespace util {
65
66// When comparing a repeated field as map, MultipleFieldMapKeyComparator can
67// be used to specify multiple fields as key for key comparison.
68// Two elements of a repeated field will be regarded as having the same key
69// iff they have the same value for every specified key field.
70// Note that you can also specify only one field as key.
71class MessageDifferencer::MultipleFieldsMapKeyComparator
72 : public MessageDifferencer::MapKeyComparator {
73 public:
74 MultipleFieldsMapKeyComparator(
75 MessageDifferencer* message_differencer,
76 const vector<vector<const FieldDescriptor*> >& key_field_paths)
77 : message_differencer_(message_differencer),
78 key_field_paths_(key_field_paths) {
79 GOOGLE_CHECK(!key_field_paths_.empty());
80 for (int i = 0; i < key_field_paths_.size(); ++i) {
81 GOOGLE_CHECK(!key_field_paths_[i].empty());
82 }
83 }
84 MultipleFieldsMapKeyComparator(
85 MessageDifferencer* message_differencer,
86 const FieldDescriptor* key)
87 : message_differencer_(message_differencer) {
88 vector<const FieldDescriptor*> key_field_path;
89 key_field_path.push_back(key);
90 key_field_paths_.push_back(key_field_path);
91 }
Feng Xiao818c5ee2015-06-15 21:42:57 -070092 virtual bool IsMatch(
Feng Xiaoe96ff302015-06-15 18:21:48 -070093 const Message& message1,
94 const Message& message2,
Feng Xiao818c5ee2015-06-15 21:42:57 -070095 const vector<SpecificField>& parent_fields) const {
Feng Xiaoe96ff302015-06-15 18:21:48 -070096 for (int i = 0; i < key_field_paths_.size(); ++i) {
97 if (!IsMatchInternal(message1, message2, parent_fields,
98 key_field_paths_[i], 0)) {
99 return false;
100 }
101 }
102 return true;
103 }
104 private:
105 bool IsMatchInternal(
106 const Message& message1,
107 const Message& message2,
108 const vector<SpecificField>& parent_fields,
109 const vector<const FieldDescriptor*>& key_field_path,
110 int path_index) const {
111 const FieldDescriptor* field = key_field_path[path_index];
112 vector<SpecificField> current_parent_fields(parent_fields);
113 if (path_index == key_field_path.size() - 1) {
114 if (field->is_repeated()) {
115 if (!message_differencer_->CompareRepeatedField(
116 message1, message2, field, &current_parent_fields)) {
117 return false;
118 }
119 } else {
120 if (!message_differencer_->CompareFieldValueUsingParentFields(
121 message1, message2, field, -1, -1, &current_parent_fields)) {
122 return false;
123 }
124 }
125 return true;
126 } else {
127 const Reflection* reflection1 = message1.GetReflection();
128 const Reflection* reflection2 = message2.GetReflection();
129 bool has_field1 = reflection1->HasField(message1, field);
130 bool has_field2 = reflection2->HasField(message2, field);
131 if (!has_field1 && !has_field2) {
132 return true;
133 }
134 if (has_field1 != has_field2) {
135 return false;
136 }
137 SpecificField specific_field;
138 specific_field.field = field;
139 current_parent_fields.push_back(specific_field);
140 return IsMatchInternal(
141 reflection1->GetMessage(message1, field),
142 reflection2->GetMessage(message2, field),
143 current_parent_fields,
144 key_field_path,
145 path_index + 1);
146 }
147 }
148 MessageDifferencer* message_differencer_;
149 vector<vector<const FieldDescriptor*> > key_field_paths_;
150 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
151};
152
153bool MessageDifferencer::Equals(const Message& message1,
154 const Message& message2) {
155 MessageDifferencer differencer;
156
157 return differencer.Compare(message1, message2);
158}
159
160bool MessageDifferencer::Equivalent(const Message& message1,
161 const Message& message2) {
162 MessageDifferencer differencer;
163 differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
164
165 return differencer.Compare(message1, message2);
166}
167
168bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
169 const Message& message2) {
170 MessageDifferencer differencer;
171 differencer.set_float_comparison(
172 MessageDifferencer::APPROXIMATE);
173
174 return differencer.Compare(message1, message2);
175}
176
177bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
178 const Message& message2) {
179 MessageDifferencer differencer;
180 differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
181 differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
182
183 return differencer.Compare(message1, message2);
184}
185
186// ===========================================================================
187
188MessageDifferencer::MessageDifferencer()
189 : reporter_(NULL),
190 field_comparator_(NULL),
191 message_field_comparison_(EQUAL),
192 scope_(FULL),
193 repeated_field_comparison_(AS_LIST),
194 report_matches_(false),
195 output_string_(NULL) { }
196
197MessageDifferencer::~MessageDifferencer() {
198 for (int i = 0; i < owned_key_comparators_.size(); ++i) {
199 delete owned_key_comparators_[i];
200 }
201 for (int i = 0; i < ignore_criteria_.size(); ++i) {
202 delete ignore_criteria_[i];
203 }
204}
205
206void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
207 GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
208 field_comparator_ = comparator;
209}
210
211void MessageDifferencer::set_message_field_comparison(
212 MessageFieldComparison comparison) {
213 message_field_comparison_ = comparison;
214}
215
216void MessageDifferencer::set_scope(Scope scope) {
217 scope_ = scope;
218}
219
220MessageDifferencer::Scope MessageDifferencer::scope() {
221 return scope_;
222}
223
224void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
225 default_field_comparator_.set_float_comparison(
226 comparison == EXACT ?
227 DefaultFieldComparator::EXACT : DefaultFieldComparator::APPROXIMATE);
228}
229
230void MessageDifferencer::set_repeated_field_comparison(
231 RepeatedFieldComparison comparison) {
232 repeated_field_comparison_ = comparison;
233}
234
235void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
236 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
237 << field->full_name();
238 const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
239 GOOGLE_CHECK(key_comparator == NULL)
240 << "Cannot treat this repeated field as both Map and Set for"
241 << " comparison. Field name is: " << field->full_name();
Feng Xiaoe841bac2015-12-11 17:09:20 -0800242 GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
243 << "Cannot treat the same field as both SET and LIST. Field name is: "
244 << field->full_name();
Feng Xiaoe96ff302015-06-15 18:21:48 -0700245 set_fields_.insert(field);
246}
247
Feng Xiaoe841bac2015-12-11 17:09:20 -0800248void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
249 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
250 << field->full_name();
251 const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
252 GOOGLE_CHECK(key_comparator == NULL)
253 << "Cannot treat this repeated field as both Map and Set for"
254 << " comparison. Field name is: " << field->full_name();
255 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
256 << "Cannot treat the same field as both SET and LIST. Field name is: "
257 << field->full_name();
258 list_fields_.insert(field);
259}
260
Feng Xiaoe96ff302015-06-15 18:21:48 -0700261void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
262 const FieldDescriptor* key) {
263 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
264 << field->full_name();
265 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
266 << "Field has to be message type. Field name is: "
267 << field->full_name();
268 GOOGLE_CHECK(key->containing_type() == field->message_type())
269 << key->full_name()
270 << " must be a direct subfield within the repeated field "
271 << field->full_name() << ", not " << key->containing_type()->full_name();
272 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
273 << "Cannot treat this repeated field as both Map and Set for "
274 << "comparison.";
Feng Xiaoe841bac2015-12-11 17:09:20 -0800275 GOOGLE_CHECK(list_fields_.find(field) == list_fields_.end())
276 << "Cannot treat this repeated field as both Map and List for "
277 << "comparison.";
Feng Xiaoe96ff302015-06-15 18:21:48 -0700278 MapKeyComparator* key_comparator =
279 new MultipleFieldsMapKeyComparator(this, key);
280 owned_key_comparators_.push_back(key_comparator);
281 map_field_key_comparator_[field] = key_comparator;
282}
283
284void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
285 const FieldDescriptor* field,
286 const vector<const FieldDescriptor*>& key_fields) {
287 vector<vector<const FieldDescriptor*> > key_field_paths;
288 for (int i = 0; i < key_fields.size(); ++i) {
289 vector<const FieldDescriptor*> key_field_path;
290 key_field_path.push_back(key_fields[i]);
291 key_field_paths.push_back(key_field_path);
292 }
293 TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
294}
295
296void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
297 const FieldDescriptor* field,
298 const vector<vector<const FieldDescriptor*> >& key_field_paths) {
299 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
300 << field->full_name();
301 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
302 << "Field has to be message type. Field name is: "
303 << field->full_name();
304 for (int i = 0; i < key_field_paths.size(); ++i) {
305 const vector<const FieldDescriptor*>& key_field_path = key_field_paths[i];
306 for (int j = 0; j < key_field_path.size(); ++j) {
307 const FieldDescriptor* parent_field =
308 j == 0 ? field : key_field_path[j - 1];
309 const FieldDescriptor* child_field = key_field_path[j];
310 GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
311 << child_field->full_name()
312 << " must be a direct subfield within the field: "
313 << parent_field->full_name();
314 if (j != 0) {
315 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
316 << parent_field->full_name() << " has to be of type message.";
317 GOOGLE_CHECK(!parent_field->is_repeated())
318 << parent_field->full_name() << " cannot be a repeated field.";
319 }
320 }
321 }
322 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
323 << "Cannot treat this repeated field as both Map and Set for "
324 << "comparison.";
325 MapKeyComparator* key_comparator =
326 new MultipleFieldsMapKeyComparator(this, key_field_paths);
327 owned_key_comparators_.push_back(key_comparator);
328 map_field_key_comparator_[field] = key_comparator;
329}
330
331void MessageDifferencer::TreatAsMapUsingKeyComparator(
332 const FieldDescriptor* field,
333 const MapKeyComparator* key_comparator) {
334 GOOGLE_CHECK(field->is_repeated()) << "Field must be repeated: "
335 << field->full_name();
336 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
337 << "Field has to be message type. Field name is: "
338 << field->full_name();
339 GOOGLE_CHECK(set_fields_.find(field) == set_fields_.end())
340 << "Cannot treat this repeated field as both Map and Set for "
341 << "comparison.";
342 map_field_key_comparator_[field] = key_comparator;
343}
344
345void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
346 ignore_criteria_.push_back(ignore_criteria);
347}
348
349void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
350 ignored_fields_.insert(field);
351}
352
353void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
354 double fraction, double margin) {
355 default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
356}
357
358void MessageDifferencer::ReportDifferencesToString(string* output) {
359 GOOGLE_DCHECK(output) << "Specified output string was NULL";
360
361 output_string_ = output;
362 output_string_->clear();
363}
364
365void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
366 // If an output string is set, clear it to prevent
367 // it superceding the specified reporter.
368 if (output_string_) {
369 output_string_ = NULL;
370 }
371
372 reporter_ = reporter;
373}
374
375bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
376 const FieldDescriptor* field2) {
377 // Handle sentinel values (i.e. make sure NULLs are always ordered
378 // at the end of the list).
379 if (field1 == NULL) {
380 return false;
381 }
382
383 if (field2 == NULL) {
384 return true;
385 }
386
387 // Always order fields by their tag number
388 return (field1->number() < field2->number());
389}
390
391bool MessageDifferencer::Compare(const Message& message1,
392 const Message& message2) {
393 vector<SpecificField> parent_fields;
394
395 bool result = false;
396
397 // Setup the internal reporter if need be.
398 if (output_string_) {
399 io::StringOutputStream output_stream(output_string_);
400 StreamReporter reporter(&output_stream);
401 reporter_ = &reporter;
402 result = Compare(message1, message2, &parent_fields);
403 reporter_ = NULL;
404 } else {
405 result = Compare(message1, message2, &parent_fields);
406 }
407
408 return result;
409}
410
411bool MessageDifferencer::CompareWithFields(
412 const Message& message1,
413 const Message& message2,
414 const vector<const FieldDescriptor*>& message1_fields_arg,
415 const vector<const FieldDescriptor*>& message2_fields_arg) {
416 if (message1.GetDescriptor() != message2.GetDescriptor()) {
417 GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
418 << "descriptors.";
419 return false;
420 }
421
422 vector<SpecificField> parent_fields;
423
424 bool result = false;
425
426 vector<const FieldDescriptor*> message1_fields(message1_fields_arg);
427 vector<const FieldDescriptor*> message2_fields(message2_fields_arg);
428
429 std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
430 std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
431 // Append NULL sentinel values.
432 message1_fields.push_back(NULL);
433 message2_fields.push_back(NULL);
434
435 // Setup the internal reporter if need be.
436 if (output_string_) {
437 io::StringOutputStream output_stream(output_string_);
438 StreamReporter reporter(&output_stream);
439 reporter_ = &reporter;
440 result = CompareRequestedFieldsUsingSettings(
441 message1, message2, message1_fields, message2_fields, &parent_fields);
442 reporter_ = NULL;
443 } else {
444 result = CompareRequestedFieldsUsingSettings(
445 message1, message2, message1_fields, message2_fields, &parent_fields);
446 }
447
448 return result;
449}
450
451bool MessageDifferencer::Compare(
452 const Message& message1,
453 const Message& message2,
454 vector<SpecificField>* parent_fields) {
455 const Descriptor* descriptor1 = message1.GetDescriptor();
456 const Descriptor* descriptor2 = message2.GetDescriptor();
457 if (descriptor1 != descriptor2) {
458 GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
459 << "descriptors.";
460 return false;
461 }
462 // Expand google.protobuf.Any payload if possible.
463 if (descriptor1->full_name() == internal::kAnyFullTypeName) {
464 google::protobuf::scoped_ptr<Message> data1;
465 google::protobuf::scoped_ptr<Message> data2;
466 if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
467 return Compare(*data1, *data2, parent_fields);
468 }
469 }
470 const Reflection* reflection1 = message1.GetReflection();
471 const Reflection* reflection2 = message2.GetReflection();
472
473 // Retrieve all the set fields, including extensions.
474 vector<const FieldDescriptor*> message1_fields;
475 vector<const FieldDescriptor*> message2_fields;
476
477 reflection1->ListFields(message1, &message1_fields);
478 reflection2->ListFields(message2, &message2_fields);
479
480 // Add sentinel values to deal with the
481 // case where the number of the fields in
482 // each list are different.
483 message1_fields.push_back(NULL);
484 message2_fields.push_back(NULL);
485
486 bool unknown_compare_result = true;
487 // Ignore unknown fields in EQUIVALENT mode
488 if (message_field_comparison_ != EQUIVALENT) {
489 const google::protobuf::UnknownFieldSet* unknown_field_set1 =
490 &reflection1->GetUnknownFields(message1);
491 const google::protobuf::UnknownFieldSet* unknown_field_set2 =
492 &reflection2->GetUnknownFields(message2);
493 if (!CompareUnknownFields(message1, message2,
494 *unknown_field_set1, *unknown_field_set2,
495 parent_fields)) {
496 if (reporter_ == NULL) {
497 return false;
498 };
499 unknown_compare_result = false;
500 }
501 }
502
503 return CompareRequestedFieldsUsingSettings(
504 message1, message2,
505 message1_fields, message2_fields,
506 parent_fields) && unknown_compare_result;
507}
508
509bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
510 const Message& message1,
511 const Message& message2,
512 const vector<const FieldDescriptor*>& message1_fields,
513 const vector<const FieldDescriptor*>& message2_fields,
514 vector<SpecificField>* parent_fields) {
515 if (scope_ == FULL) {
516 if (message_field_comparison_ == EQUIVALENT) {
517 // We need to merge the field lists of both messages (i.e.
518 // we are merely checking for a difference in field values,
519 // rather than the addition or deletion of fields).
520 vector<const FieldDescriptor*> fields_union;
521 CombineFields(message1_fields, FULL, message2_fields, FULL,
522 &fields_union);
523 return CompareWithFieldsInternal(message1, message2, fields_union,
524 fields_union, parent_fields);
525 } else {
526 // Simple equality comparison, use the unaltered field lists.
527 return CompareWithFieldsInternal(message1, message2, message1_fields,
528 message2_fields, parent_fields);
529 }
530 } else {
531 if (message_field_comparison_ == EQUIVALENT) {
532 // We use the list of fields for message1 for both messages when
533 // comparing. This way, extra fields in message2 are ignored,
534 // and missing fields in message2 use their default value.
535 return CompareWithFieldsInternal(message1, message2, message1_fields,
536 message1_fields, parent_fields);
537 } else {
538 // We need to consider the full list of fields for message1
539 // but only the intersection for message2. This way, any fields
540 // only present in message2 will be ignored, but any fields only
541 // present in message1 will be marked as a difference.
542 vector<const FieldDescriptor*> fields_intersection;
543 CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL,
544 &fields_intersection);
545 return CompareWithFieldsInternal(message1, message2, message1_fields,
546 fields_intersection, parent_fields);
547 }
548 }
549}
550
551void MessageDifferencer::CombineFields(
552 const vector<const FieldDescriptor*>& fields1,
553 Scope fields1_scope,
554 const vector<const FieldDescriptor*>& fields2,
555 Scope fields2_scope,
556 vector<const FieldDescriptor*>* combined_fields) {
557
558 int index1 = 0;
559 int index2 = 0;
560
561 while (index1 < fields1.size() && index2 < fields2.size()) {
562 const FieldDescriptor* field1 = fields1[index1];
563 const FieldDescriptor* field2 = fields2[index2];
564
565 if (FieldBefore(field1, field2)) {
566 if (fields1_scope == FULL) {
567 combined_fields->push_back(fields1[index1]);
568 }
569 ++index1;
570 } else if (FieldBefore(field2, field1)) {
571 if (fields2_scope == FULL) {
572 combined_fields->push_back(fields2[index2]);
573 }
574 ++index2;
575 } else {
576 combined_fields->push_back(fields1[index1]);
577 ++index1;
578 ++index2;
579 }
580 }
581}
582
583bool MessageDifferencer::CompareWithFieldsInternal(
584 const Message& message1,
585 const Message& message2,
586 const vector<const FieldDescriptor*>& message1_fields,
587 const vector<const FieldDescriptor*>& message2_fields,
588 vector<SpecificField>* parent_fields) {
589 bool isDifferent = false;
590 int field_index1 = 0;
591 int field_index2 = 0;
592
593 const Reflection* reflection1 = message1.GetReflection();
594 const Reflection* reflection2 = message2.GetReflection();
595
596 while (true) {
597 const FieldDescriptor* field1 = message1_fields[field_index1];
598 const FieldDescriptor* field2 = message2_fields[field_index2];
599
600 // Once we have reached sentinel values, we are done the comparison.
601 if (field1 == NULL && field2 == NULL) {
602 break;
603 }
604
605 // Check for differences in the field itself.
606 if (FieldBefore(field1, field2)) {
607 // Field 1 is not in the field list for message 2.
608 if (IsIgnored(message1, message2, field1, *parent_fields)) {
609 // We are ignoring field1. Report the ignore and move on to
610 // the next field in message1_fields.
611 if (reporter_ != NULL) {
612 SpecificField specific_field;
613 specific_field.field = field1;
614
615 parent_fields->push_back(specific_field);
616 reporter_->ReportIgnored(message1, message2, *parent_fields);
617 parent_fields->pop_back();
618 }
619 ++field_index1;
620 continue;
621 }
622
623 if (reporter_ != NULL) {
624 int count = field1->is_repeated() ?
625 reflection1->FieldSize(message1, field1) : 1;
626
627 for (int i = 0; i < count; ++i) {
628 SpecificField specific_field;
629 specific_field.field = field1;
630 specific_field.index = field1->is_repeated() ? i : -1;
631
632 parent_fields->push_back(specific_field);
633 reporter_->ReportDeleted(message1, message2, *parent_fields);
634 parent_fields->pop_back();
635 }
636
637 isDifferent = true;
638 } else {
639 return false;
640 }
641
642 ++field_index1;
643 continue;
644 } else if (FieldBefore(field2, field1)) {
645 // Field 2 is not in the field list for message 1.
646 if (IsIgnored(message1, message2, field2, *parent_fields)) {
647 // We are ignoring field2. Report the ignore and move on to
648 // the next field in message2_fields.
649 if (reporter_ != NULL) {
650 SpecificField specific_field;
651 specific_field.field = field2;
652
653 parent_fields->push_back(specific_field);
654 reporter_->ReportIgnored(message1, message2, *parent_fields);
655 parent_fields->pop_back();
656 }
657 ++field_index2;
658 continue;
659 }
660
661 if (reporter_ != NULL) {
662 int count = field2->is_repeated() ?
663 reflection2->FieldSize(message2, field2) : 1;
664
665 for (int i = 0; i < count; ++i) {
666 SpecificField specific_field;
667 specific_field.field = field2;
668 specific_field.index = field2->is_repeated() ? i : -1;
669 specific_field.new_index = specific_field.index;
670
671 parent_fields->push_back(specific_field);
672 reporter_->ReportAdded(message1, message2, *parent_fields);
673 parent_fields->pop_back();
674 }
675
676 isDifferent = true;
677 } else {
678 return false;
679 }
680
681 ++field_index2;
682 continue;
683 }
684
685 // By this point, field1 and field2 are guarenteed to point to the same
686 // field, so we can now compare the values.
687 if (IsIgnored(message1, message2, field1, *parent_fields)) {
688 // Ignore this field. Report and move on.
689 if (reporter_ != NULL) {
690 SpecificField specific_field;
691 specific_field.field = field1;
692
693 parent_fields->push_back(specific_field);
694 reporter_->ReportIgnored(message1, message2, *parent_fields);
695 parent_fields->pop_back();
696 }
697
698 ++field_index1;
699 ++field_index2;
700 continue;
701 }
702
703 bool fieldDifferent = false;
704 if (field1->is_repeated()) {
705 fieldDifferent = !CompareRepeatedField(message1, message2, field1,
706 parent_fields);
707 if (fieldDifferent) {
708 if (reporter_ == NULL) return false;
709 isDifferent = true;
710 }
711 } else {
712 fieldDifferent = !CompareFieldValueUsingParentFields(
713 message1, message2, field1, -1, -1, parent_fields);
714
715 // If we have found differences, either report them or terminate if
716 // no reporter is present.
717 if (fieldDifferent && reporter_ == NULL) {
718 return false;
719 }
720
721 if (reporter_ != NULL) {
722 SpecificField specific_field;
723 specific_field.field = field1;
724 parent_fields->push_back(specific_field);
725 if (fieldDifferent) {
726 reporter_->ReportModified(message1, message2, *parent_fields);
727 isDifferent = true;
728 } else if (report_matches_) {
729 reporter_->ReportMatched(message1, message2, *parent_fields);
730 }
731 parent_fields->pop_back();
732 }
733 }
734 // Increment the field indicies.
735 ++field_index1;
736 ++field_index2;
737 }
738
739 return !isDifferent;
740}
741
742bool MessageDifferencer::IsMatch(const FieldDescriptor* repeated_field,
743 const MapKeyComparator* key_comparator,
744 const Message* message1,
745 const Message* message2,
746 const vector<SpecificField>& parent_fields,
747 int index1, int index2) {
748 vector<SpecificField> current_parent_fields(parent_fields);
749 if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
750 return CompareFieldValueUsingParentFields(
751 *message1, *message2, repeated_field, index1, index2,
752 &current_parent_fields);
753 }
754 // Back up the Reporter and output_string_. They will be reset in the
755 // following code.
756 Reporter* backup_reporter = reporter_;
757 string* output_string = output_string_;
758 reporter_ = NULL;
759 output_string_ = NULL;
760 bool match;
761
762 if (key_comparator == NULL) {
763 match = CompareFieldValueUsingParentFields(
764 *message1, *message2, repeated_field, index1, index2,
765 &current_parent_fields);
766 } else {
767 const Reflection* reflection1 = message1->GetReflection();
768 const Reflection* reflection2 = message2->GetReflection();
769 const Message& m1 =
770 reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
771 const Message& m2 =
772 reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
773 SpecificField specific_field;
774 specific_field.field = repeated_field;
775 current_parent_fields.push_back(specific_field);
776 match = key_comparator->IsMatch(m1, m2, current_parent_fields);
777 }
778
779 reporter_ = backup_reporter;
780 output_string_ = output_string;
781 return match;
782}
783
784bool MessageDifferencer::CompareRepeatedField(
785 const Message& message1,
786 const Message& message2,
787 const FieldDescriptor* repeated_field,
788 vector<SpecificField>* parent_fields) {
789 // the input FieldDescriptor is guaranteed to be repeated field.
790 const Reflection* reflection1 = message1.GetReflection();
791 const Reflection* reflection2 = message2.GetReflection();
792 const int count1 = reflection1->FieldSize(message1, repeated_field);
793 const int count2 = reflection2->FieldSize(message2, repeated_field);
794 const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
795
796 // If the field is not treated as subset and no detailed reports is needed,
797 // we do a quick check on the number of the elements to avoid unnecessary
798 // comparison.
799 if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
800 return false;
801 }
802 // A match can never be found if message1 has more items than message2.
803 if (count1 > count2 && reporter_ == NULL) {
804 return false;
805 }
806
807 // These two list are used for store the index of the correspondent
808 // element in peer repeated field.
809 vector<int> match_list1;
810 vector<int> match_list2;
811
812 // Try to match indices of the repeated fields. Return false if match fails
813 // and there's no detailed report needed.
814 if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
815 *parent_fields, &match_list1, &match_list2) &&
816 reporter_ == NULL) {
817 return false;
818 }
819
820 bool fieldDifferent = false;
821 SpecificField specific_field;
822 specific_field.field = repeated_field;
823
824 // At this point, we have already matched pairs of fields (with the reporting
825 // to be done later). Now to check if the paired elements are different.
826 for (int i = 0; i < count1; i++) {
827 if (match_list1[i] == -1) continue;
828 specific_field.index = i;
829 specific_field.new_index = match_list1[i];
830
831 const bool result = CompareFieldValueUsingParentFields(
832 message1, message2, repeated_field, i, specific_field.new_index,
833 parent_fields);
834
835 // If we have found differences, either report them or terminate if
836 // no reporter is present. Note that ReportModified, ReportMoved, and
837 // ReportMatched are all mutually exclusive.
838 if (!result) {
839 if (reporter_ == NULL) return false;
840 parent_fields->push_back(specific_field);
841 reporter_->ReportModified(message1, message2, *parent_fields);
842 parent_fields->pop_back();
843 fieldDifferent = true;
844 } else if (reporter_ != NULL &&
845 specific_field.index != specific_field.new_index) {
846 parent_fields->push_back(specific_field);
847 reporter_->ReportMoved(message1, message2, *parent_fields);
848 parent_fields->pop_back();
849 } else if (report_matches_ && reporter_ != NULL) {
850 parent_fields->push_back(specific_field);
851 reporter_->ReportMatched(message1, message2, *parent_fields);
852 parent_fields->pop_back();
853 }
854 }
855
856 // Report any remaining additions or deletions.
857 for (int i = 0; i < count2; ++i) {
858 if (match_list2[i] != -1) continue;
859 if (!treated_as_subset) {
860 fieldDifferent = true;
861 }
862
863 if (reporter_ == NULL) continue;
864 specific_field.index = i;
865 specific_field.new_index = i;
866 parent_fields->push_back(specific_field);
867 reporter_->ReportAdded(message1, message2, *parent_fields);
868 parent_fields->pop_back();
869 }
870
871 for (int i = 0; i < count1; ++i) {
872 if (match_list1[i] != -1) continue;
873 specific_field.index = i;
874 parent_fields->push_back(specific_field);
875 reporter_->ReportDeleted(message1, message2, *parent_fields);
876 parent_fields->pop_back();
877 fieldDifferent = true;
878 }
879 return !fieldDifferent;
880}
881
882bool MessageDifferencer::CompareFieldValue(const Message& message1,
883 const Message& message2,
884 const FieldDescriptor* field,
885 int index1,
886 int index2) {
887 return CompareFieldValueUsingParentFields(message1, message2, field, index1,
888 index2, NULL);
889}
890
891bool MessageDifferencer::CompareFieldValueUsingParentFields(
892 const Message& message1, const Message& message2,
893 const FieldDescriptor* field, int index1, int index2,
894 vector<SpecificField>* parent_fields) {
895 FieldContext field_context(parent_fields);
896 FieldComparator::ComparisonResult result = GetFieldComparisonResult(
897 message1, message2, field, index1, index2, &field_context);
898
899 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
900 result == FieldComparator::RECURSE) {
901 // Get the nested messages and compare them using one of the Compare
902 // methods.
903 const Reflection* reflection1 = message1.GetReflection();
904 const Reflection* reflection2 = message2.GetReflection();
905 const Message& m1 = field->is_repeated() ?
906 reflection1->GetRepeatedMessage(message1, field, index1) :
907 reflection1->GetMessage(message1, field);
908 const Message& m2 = field->is_repeated() ?
909 reflection2->GetRepeatedMessage(message2, field, index2) :
910 reflection2->GetMessage(message2, field);
911
912 // parent_fields is used in calls to Reporter methods.
913 if (parent_fields != NULL) {
914 // Append currently compared field to the end of parent_fields.
915 SpecificField specific_field;
916 specific_field.field = field;
917 specific_field.index = index1;
918 specific_field.new_index = index2;
919 parent_fields->push_back(specific_field);
920 const bool compare_result = Compare(m1, m2, parent_fields);
921 parent_fields->pop_back();
922 return compare_result;
923 } else {
924 // Recreates parent_fields as if m1 and m2 had no parents.
925 return Compare(m1, m2);
926 }
927 } else {
928 return (result == FieldComparator::SAME);
929 }
930}
931
932bool MessageDifferencer::CheckPathChanged(
933 const vector<SpecificField>& field_path) {
934 for (int i = 0; i < field_path.size(); ++i) {
935 if (field_path[i].index != field_path[i].new_index) return true;
936 }
937 return false;
938}
939
940bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
941 if (!field->is_repeated()) return false;
942 if (field->is_map()) return true;
Feng Xiaoe841bac2015-12-11 17:09:20 -0800943 if (repeated_field_comparison_ == AS_SET)
944 return list_fields_.find(field) == list_fields_.end();
Feng Xiaoe96ff302015-06-15 18:21:48 -0700945 return (set_fields_.find(field) != set_fields_.end());
946}
947
948bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
949 return scope_ == PARTIAL &&
950 (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
951}
952
953bool MessageDifferencer::IsIgnored(
954 const Message& message1,
955 const Message& message2,
956 const FieldDescriptor* field,
957 const vector<SpecificField>& parent_fields) {
958 if (ignored_fields_.find(field) != ignored_fields_.end()) {
959 return true;
960 }
961 for (int i = 0; i < ignore_criteria_.size(); ++i) {
962 if (ignore_criteria_[i]->IsIgnored(message1, message2, field,
963 parent_fields)) {
964 return true;
965 }
966 }
967 return false;
968}
969
Jisi Liu46e8ff62015-10-05 11:59:43 -0700970bool MessageDifferencer::IsUnknownFieldIgnored(
971 const Message& message1, const Message& message2,
972 const SpecificField& field, const vector<SpecificField>& parent_fields) {
973 for (int i = 0; i < ignore_criteria_.size(); ++i) {
974 if (ignore_criteria_[i]->IsUnknownFieldIgnored(message1, message2, field,
975 parent_fields)) {
976 return true;
977 }
978 }
979 return false;
980}
981
Feng Xiaoe96ff302015-06-15 18:21:48 -0700982const MessageDifferencer::MapKeyComparator* MessageDifferencer
983 ::GetMapKeyComparator(const FieldDescriptor* field) {
984 if (!field->is_repeated()) return NULL;
985 if (map_field_key_comparator_.find(field) !=
986 map_field_key_comparator_.end()) {
987 return map_field_key_comparator_[field];
988 }
989 return NULL;
990}
991
992namespace {
993
994typedef pair<int, const UnknownField*> IndexUnknownFieldPair;
995
996struct UnknownFieldOrdering {
997 inline bool operator()(const IndexUnknownFieldPair& a,
998 const IndexUnknownFieldPair& b) const {
999 if (a.second->number() < b.second->number()) return true;
1000 if (a.second->number() > b.second->number()) return false;
1001 return a.second->type() < b.second->type();
1002 }
1003};
1004
1005} // namespace
1006
1007bool MessageDifferencer::UnpackAny(const Message& any,
1008 google::protobuf::scoped_ptr<Message>* data) {
1009 const Reflection* reflection = any.GetReflection();
1010 const FieldDescriptor* type_url_field;
1011 const FieldDescriptor* value_field;
1012 if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
1013 return false;
1014 }
1015 const string& type_url = reflection->GetString(any, type_url_field);
1016 string full_type_name;
1017 if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1018 return false;
1019 }
1020
1021 const google::protobuf::Descriptor* desc =
1022 any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1023 full_type_name);
1024 if (desc == NULL) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001025 GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
Feng Xiaoe96ff302015-06-15 18:21:48 -07001026 return false;
1027 }
1028
1029 if (dynamic_message_factory_ == NULL) {
1030 dynamic_message_factory_.reset(new DynamicMessageFactory());
1031 }
1032 data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
1033 string serialized_value = reflection->GetString(any, value_field);
1034 if (!(*data)->ParseFromString(serialized_value)) {
Jisi Liu3b3c8ab2016-03-30 11:39:59 -07001035 GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
Feng Xiaoe96ff302015-06-15 18:21:48 -07001036 return false;
1037 }
1038 return true;
1039}
1040
1041bool MessageDifferencer::CompareUnknownFields(
1042 const Message& message1, const Message& message2,
1043 const google::protobuf::UnknownFieldSet& unknown_field_set1,
1044 const google::protobuf::UnknownFieldSet& unknown_field_set2,
1045 vector<SpecificField>* parent_field) {
1046 // Ignore unknown fields in EQUIVALENT mode.
1047 if (message_field_comparison_ == EQUIVALENT) return true;
1048
1049 if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
1050 return true;
1051 }
1052
1053 bool is_different = false;
1054
1055 // We first sort the unknown fields by field number and type (in other words,
1056 // in tag order), making sure to preserve ordering of values with the same
1057 // tag. This allows us to report only meaningful differences between the
1058 // two sets -- that is, differing values for the same tag. We use
1059 // IndexUnknownFieldPairs to keep track of the field's original index for
1060 // reporting purposes.
1061 vector<IndexUnknownFieldPair> fields1; // unknown_field_set1, sorted
1062 vector<IndexUnknownFieldPair> fields2; // unknown_field_set2, sorted
1063 fields1.reserve(unknown_field_set1.field_count());
1064 fields2.reserve(unknown_field_set2.field_count());
1065
1066 for (int i = 0; i < unknown_field_set1.field_count(); i++) {
1067 fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
1068 }
1069 for (int i = 0; i < unknown_field_set2.field_count(); i++) {
1070 fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
1071 }
1072
1073 UnknownFieldOrdering is_before;
1074 std::stable_sort(fields1.begin(), fields1.end(), is_before);
1075 std::stable_sort(fields2.begin(), fields2.end(), is_before);
1076
1077 // In order to fill in SpecificField::index, we have to keep track of how
1078 // many values we've seen with the same field number and type.
1079 // current_repeated points at the first field in this range, and
1080 // current_repeated_start{1,2} are the indexes of the first field in the
1081 // range within fields1 and fields2.
1082 const UnknownField* current_repeated = NULL;
1083 int current_repeated_start1 = 0;
1084 int current_repeated_start2 = 0;
1085
1086 // Now that we have two sorted lists, we can detect fields which appear only
1087 // in one list or the other by traversing them simultaneously.
1088 int index1 = 0;
1089 int index2 = 0;
1090 while (index1 < fields1.size() || index2 < fields2.size()) {
1091 enum { ADDITION, DELETION, MODIFICATION, COMPARE_GROUPS,
1092 NO_CHANGE } change_type;
1093
1094 // focus_field is the field we're currently reporting on. (In the case
1095 // of a modification, it's the field on the left side.)
1096 const UnknownField* focus_field;
1097 bool match = false;
1098
1099 if (index2 == fields2.size() ||
1100 (index1 < fields1.size() &&
1101 is_before(fields1[index1], fields2[index2]))) {
1102 // fields1[index1] is not present in fields2.
1103 change_type = DELETION;
1104 focus_field = fields1[index1].second;
1105 } else if (index1 == fields1.size() ||
1106 is_before(fields2[index2], fields1[index1])) {
1107 // fields2[index2] is not present in fields1.
1108 if (scope_ == PARTIAL) {
1109 // Ignore.
1110 ++index2;
1111 continue;
1112 }
1113 change_type = ADDITION;
1114 focus_field = fields2[index2].second;
1115 } else {
1116 // Field type and number are the same. See if the values differ.
1117 change_type = MODIFICATION;
1118 focus_field = fields1[index1].second;
1119
1120 switch (focus_field->type()) {
1121 case UnknownField::TYPE_VARINT:
1122 match = fields1[index1].second->varint() ==
1123 fields2[index2].second->varint();
1124 break;
1125 case UnknownField::TYPE_FIXED32:
1126 match = fields1[index1].second->fixed32() ==
1127 fields2[index2].second->fixed32();
1128 break;
1129 case UnknownField::TYPE_FIXED64:
1130 match = fields1[index1].second->fixed64() ==
1131 fields2[index2].second->fixed64();
1132 break;
1133 case UnknownField::TYPE_LENGTH_DELIMITED:
1134 match = fields1[index1].second->length_delimited() ==
1135 fields2[index2].second->length_delimited();
1136 break;
1137 case UnknownField::TYPE_GROUP:
1138 // We must deal with this later, after building the SpecificField.
1139 change_type = COMPARE_GROUPS;
1140 break;
1141 }
1142 if (match && change_type != COMPARE_GROUPS) {
1143 change_type = NO_CHANGE;
1144 }
1145 }
1146
1147 if (current_repeated == NULL ||
1148 focus_field->number() != current_repeated->number() ||
1149 focus_field->type() != current_repeated->type()) {
1150 // We've started a new repeated field.
1151 current_repeated = focus_field;
1152 current_repeated_start1 = index1;
1153 current_repeated_start2 = index2;
1154 }
1155
1156 if (change_type == NO_CHANGE && reporter_ == NULL) {
1157 // Fields were already compared and matched and we have no reporter.
1158 ++index1;
1159 ++index2;
1160 continue;
1161 }
1162
Feng Xiaoe96ff302015-06-15 18:21:48 -07001163 // Build the SpecificField. This is slightly complicated.
1164 SpecificField specific_field;
1165 specific_field.unknown_field_number = focus_field->number();
1166 specific_field.unknown_field_type = focus_field->type();
1167
1168 specific_field.unknown_field_set1 = &unknown_field_set1;
1169 specific_field.unknown_field_set2 = &unknown_field_set2;
1170
1171 if (change_type != ADDITION) {
1172 specific_field.unknown_field_index1 = fields1[index1].first;
1173 }
1174 if (change_type != DELETION) {
1175 specific_field.unknown_field_index2 = fields2[index2].first;
1176 }
1177
1178 // Calculate the field index.
1179 if (change_type == ADDITION) {
1180 specific_field.index = index2 - current_repeated_start2;
1181 specific_field.new_index = index2 - current_repeated_start2;
1182 } else {
1183 specific_field.index = index1 - current_repeated_start1;
1184 specific_field.new_index = index2 - current_repeated_start2;
1185 }
1186
Jisi Liu46e8ff62015-10-05 11:59:43 -07001187 if (IsUnknownFieldIgnored(message1, message2, specific_field,
1188 *parent_field)) {
1189 if (reporter_ != NULL) {
1190 parent_field->push_back(specific_field);
1191 reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
1192 parent_field->pop_back();
1193 }
1194 return true;
1195 }
1196
1197 if (change_type == ADDITION || change_type == DELETION ||
1198 change_type == MODIFICATION) {
1199 if (reporter_ == NULL) {
1200 // We found a difference and we have no reproter.
1201 return false;
1202 }
1203 is_different = true;
1204 }
1205
Feng Xiaoe96ff302015-06-15 18:21:48 -07001206 parent_field->push_back(specific_field);
1207
1208 switch (change_type) {
1209 case ADDITION:
1210 reporter_->ReportAdded(message1, message2, *parent_field);
1211 ++index2;
1212 break;
1213 case DELETION:
1214 reporter_->ReportDeleted(message1, message2, *parent_field);
1215 ++index1;
1216 break;
1217 case MODIFICATION:
1218 reporter_->ReportModified(message1, message2, *parent_field);
1219 ++index1;
1220 ++index2;
1221 break;
1222 case COMPARE_GROUPS:
1223 if (!CompareUnknownFields(message1, message2,
1224 fields1[index1].second->group(),
1225 fields2[index2].second->group(),
1226 parent_field)) {
1227 if (reporter_ == NULL) return false;
1228 is_different = true;
1229 reporter_->ReportModified(message1, message2, *parent_field);
1230 }
1231 ++index1;
1232 ++index2;
1233 break;
1234 case NO_CHANGE:
1235 ++index1;
1236 ++index2;
1237 if (report_matches_) {
1238 reporter_->ReportMatched(message1, message2, *parent_field);
1239 }
1240 }
1241
1242 parent_field->pop_back();
1243 }
1244
1245 return !is_different;
1246}
1247
1248namespace {
1249
1250// Find maximum bipartite matching using the argumenting path algorithm.
1251class MaximumMatcher {
1252 public:
1253 typedef ResultCallback2<bool, int, int> NodeMatchCallback;
1254 // MaximumMatcher takes ownership of the passed in callback and uses it to
1255 // determine whether a node on the left side of the bipartial graph matches
1256 // a node on the right side. count1 is the number of nodes on the left side
1257 // of the graph and count2 to is the number of nodes on the right side.
1258 // Every node is referred to using 0-based indices.
1259 // If a maximum match is found, the result will be stored in match_list1 and
1260 // match_list2. match_list1[i] == j means the i-th node on the left side is
1261 // matched to the j-th node on the right side and match_list2[x] == y means
1262 // the x-th node on the right side is matched to y-th node on the left side.
1263 // match_list1[i] == -1 means the node is not matched. Same with match_list2.
1264 MaximumMatcher(int count1, int count2, NodeMatchCallback* callback,
1265 vector<int>* match_list1, vector<int>* match_list2);
1266 // Find a maximum match and return the number of matched node pairs.
1267 // If early_return is true, this method will return 0 immediately when it
1268 // finds that not all nodes on the left side can be matched.
1269 int FindMaximumMatch(bool early_return);
1270 private:
1271 // Determines whether the node on the left side of the bipartial graph
1272 // matches the one on the right side.
1273 bool Match(int left, int right);
1274 // Find an argumenting path starting from the node v on the left side. If a
1275 // path can be found, update match_list2_ to reflect the path and return
1276 // true.
1277 bool FindArgumentPathDFS(int v, vector<bool>* visited);
1278
1279 int count1_;
1280 int count2_;
1281 google::protobuf::scoped_ptr<NodeMatchCallback> match_callback_;
1282 map<pair<int, int>, bool> cached_match_results_;
1283 vector<int>* match_list1_;
1284 vector<int>* match_list2_;
1285 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
1286};
1287
1288MaximumMatcher::MaximumMatcher(int count1, int count2,
1289 NodeMatchCallback* callback,
1290 vector<int>* match_list1,
1291 vector<int>* match_list2)
1292 : count1_(count1), count2_(count2), match_callback_(callback),
1293 match_list1_(match_list1), match_list2_(match_list2) {
1294 match_list1_->assign(count1, -1);
1295 match_list2_->assign(count2, -1);
1296}
1297
1298int MaximumMatcher::FindMaximumMatch(bool early_return) {
1299 int result = 0;
1300 for (int i = 0; i < count1_; ++i) {
1301 vector<bool> visited(count1_);
1302 if (FindArgumentPathDFS(i, &visited)) {
1303 ++result;
1304 } else if (early_return) {
1305 return 0;
1306 }
1307 }
1308 // Backfill match_list1_ as we only filled match_list2_ when finding
1309 // argumenting pathes.
1310 for (int i = 0; i < count2_; ++i) {
1311 if ((*match_list2_)[i] != -1) {
1312 (*match_list1_)[(*match_list2_)[i]] = i;
1313 }
1314 }
1315 return result;
1316}
1317
1318bool MaximumMatcher::Match(int left, int right) {
1319 pair<int, int> p(left, right);
1320 map<pair<int, int>, bool>::iterator it = cached_match_results_.find(p);
1321 if (it != cached_match_results_.end()) {
1322 return it->second;
1323 }
1324 cached_match_results_[p] = match_callback_->Run(left, right);
1325 return cached_match_results_[p];
1326}
1327
1328bool MaximumMatcher::FindArgumentPathDFS(int v, vector<bool>* visited) {
1329 (*visited)[v] = true;
1330 // We try to match those un-matched nodes on the right side first. This is
1331 // the step that the navie greedy matching algorithm uses. In the best cases
1332 // where the greedy algorithm can find a maximum matching, we will always
1333 // find a match in this step and the performance will be identical to the
1334 // greedy algorithm.
1335 for (int i = 0; i < count2_; ++i) {
1336 int matched = (*match_list2_)[i];
1337 if (matched == -1 && Match(v, i)) {
1338 (*match_list2_)[i] = v;
1339 return true;
1340 }
1341 }
1342 // Then we try those already matched nodes and see if we can find an
1343 // alternaive match for the node matched to them.
1344 // The greedy algorithm will stop before this and fail to produce the
1345 // correct result.
1346 for (int i = 0; i < count2_; ++i) {
1347 int matched = (*match_list2_)[i];
1348 if (matched != -1 && Match(v, i)) {
1349 if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
1350 (*match_list2_)[i] = v;
1351 return true;
1352 }
1353 }
1354 }
1355 return false;
1356}
1357
1358} // namespace
1359
1360bool MessageDifferencer::MatchRepeatedFieldIndices(
1361 const Message& message1,
1362 const Message& message2,
1363 const FieldDescriptor* repeated_field,
1364 const vector<SpecificField>& parent_fields,
1365 vector<int>* match_list1,
1366 vector<int>* match_list2) {
1367 const int count1 =
1368 message1.GetReflection()->FieldSize(message1, repeated_field);
1369 const int count2 =
1370 message2.GetReflection()->FieldSize(message2, repeated_field);
1371 const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
1372
1373 match_list1->assign(count1, -1);
1374 match_list2->assign(count2, -1);
1375
1376 SpecificField specific_field;
1377 specific_field.field = repeated_field;
1378
1379 bool success = true;
1380 // Find potential match if this is a special repeated field.
1381 if (key_comparator != NULL || IsTreatedAsSet(repeated_field)) {
1382 if (scope_ == PARTIAL) {
1383 // When partial matching is enabled, Compare(a, b) && Compare(a, c)
1384 // doesn't neccessarily imply Compare(b, c). Therefore a naive greedy
1385 // algorithm will fail to find a maximum matching.
1386 // Here we use the argumenting path algorithm.
Bo Yang7c14dc82015-09-15 18:25:02 -07001387 MaximumMatcher::NodeMatchCallback* callback =
1388 google::protobuf::internal::NewPermanentCallback(
1389 this, &MessageDifferencer::IsMatch,
1390 repeated_field, key_comparator,
1391 &message1, &message2, parent_fields);
Feng Xiaoe96ff302015-06-15 18:21:48 -07001392 MaximumMatcher matcher(count1, count2, callback, match_list1,
1393 match_list2);
1394 // If diff info is not needed, we should end the matching process as
1395 // soon as possible if not all items can be matched.
1396 bool early_return = (reporter_ == NULL);
1397 int match_count = matcher.FindMaximumMatch(early_return);
1398 if (match_count != count1 && reporter_ == NULL) return false;
1399 success = success && (match_count == count1);
1400 } else {
1401 for (int i = 0; i < count1; ++i) {
1402 // Indicates any matched elements for this repeated field.
1403 bool match = false;
1404
1405 specific_field.index = i;
1406 specific_field.new_index = i;
1407
1408 for (int j = 0; j < count2; j++) {
1409 if (match_list2->at(j) != -1) continue;
1410 specific_field.index = i;
1411 specific_field.new_index = j;
1412
1413 match = IsMatch(repeated_field, key_comparator,
1414 &message1, &message2, parent_fields, i, j);
1415
1416 if (match) {
1417 match_list1->at(specific_field.index) = specific_field.new_index;
1418 match_list2->at(specific_field.new_index) = specific_field.index;
1419 break;
1420 }
1421 }
1422 if (!match && reporter_ == NULL) return false;
1423 success = success && match;
1424 }
1425 }
1426 } else {
1427 // If this field should be treated as list, just label the match_list.
1428 for (int i = 0; i < count1 && i < count2; i++) {
1429 match_list1->at(i) = i;
1430 match_list2->at(i) = i;
1431 }
1432 }
1433
1434 return success;
1435}
1436
1437FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
1438 const Message& message1, const Message& message2,
1439 const FieldDescriptor* field, int index1, int index2,
1440 const FieldContext* field_context) {
1441 FieldComparator* comparator = field_comparator_ != NULL ?
1442 field_comparator_ : &default_field_comparator_;
1443 return comparator->Compare(message1, message2, field,
1444 index1, index2, field_context);
1445}
1446
1447// ===========================================================================
1448
1449MessageDifferencer::Reporter::Reporter() { }
1450MessageDifferencer::Reporter::~Reporter() {}
1451
1452// ===========================================================================
1453
1454MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
1455MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
1456
1457// ===========================================================================
1458
1459MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
1460MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
1461
1462// ===========================================================================
1463
1464// Note that the printer's delimiter is not used, because if we are given a
1465// printer, we don't know its delimiter.
1466MessageDifferencer::StreamReporter::StreamReporter(
1467 io::ZeroCopyOutputStream* output) : printer_(new io::Printer(output, '$')),
1468 delete_printer_(true),
1469 report_modified_aggregates_(false) { }
1470
1471MessageDifferencer::StreamReporter::StreamReporter(
1472 io::Printer* printer) : printer_(printer),
1473 delete_printer_(false),
1474 report_modified_aggregates_(false) { }
1475
1476MessageDifferencer::StreamReporter::~StreamReporter() {
1477 if (delete_printer_) delete printer_;
1478}
1479
1480void MessageDifferencer::StreamReporter::PrintPath(
1481 const vector<SpecificField>& field_path, bool left_side) {
1482 for (int i = 0; i < field_path.size(); ++i) {
1483 if (i > 0) {
1484 printer_->Print(".");
1485 }
1486
1487 SpecificField specific_field = field_path[i];
1488
1489 if (specific_field.field != NULL) {
1490 if (specific_field.field->is_extension()) {
1491 printer_->Print("($name$)", "name",
1492 specific_field.field->full_name());
1493 } else {
1494 printer_->PrintRaw(specific_field.field->name());
1495 }
1496 } else {
1497 printer_->PrintRaw(SimpleItoa(specific_field.unknown_field_number));
1498 }
1499 if (left_side && specific_field.index >= 0) {
1500 printer_->Print("[$name$]", "name", SimpleItoa(specific_field.index));
1501 }
1502 if (!left_side && specific_field.new_index >= 0) {
1503 printer_->Print("[$name$]", "name", SimpleItoa(specific_field.new_index));
1504 }
1505 }
1506}
1507
1508void MessageDifferencer::
1509StreamReporter::PrintValue(const Message& message,
1510 const vector<SpecificField>& field_path,
1511 bool left_side) {
1512 const SpecificField& specific_field = field_path.back();
1513 const FieldDescriptor* field = specific_field.field;
1514 if (field != NULL) {
1515 string output;
1516 int index = left_side ? specific_field.index : specific_field.new_index;
1517 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1518 const Reflection* reflection = message.GetReflection();
1519 const Message& field_message = field->is_repeated() ?
1520 reflection->GetRepeatedMessage(message, field, index) :
1521 reflection->GetMessage(message, field);
1522 output = field_message.ShortDebugString();
1523 if (output.empty()) {
1524 printer_->Print("{ }");
1525 } else {
1526 printer_->Print("{ $name$ }", "name", output);
1527 }
1528 } else {
1529 TextFormat::PrintFieldValueToString(message, field, index, &output);
1530 printer_->PrintRaw(output);
1531 }
1532 } else {
1533 const UnknownFieldSet* unknown_fields =
1534 (left_side ?
1535 specific_field.unknown_field_set1 :
1536 specific_field.unknown_field_set2);
1537 const UnknownField* unknown_field = &unknown_fields->field(
1538 left_side ?
1539 specific_field.unknown_field_index1 :
1540 specific_field.unknown_field_index2);
1541 PrintUnknownFieldValue(unknown_field);
1542 }
1543}
1544
1545void MessageDifferencer::
1546StreamReporter::PrintUnknownFieldValue(const UnknownField* unknown_field) {
1547 GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
1548
1549 string output;
1550 switch (unknown_field->type()) {
1551 case UnknownField::TYPE_VARINT:
1552 output = SimpleItoa(unknown_field->varint());
1553 break;
1554 case UnknownField::TYPE_FIXED32:
1555 output = StrCat("0x", strings::Hex(unknown_field->fixed32(),
1556 strings::ZERO_PAD_8));
1557 break;
1558 case UnknownField::TYPE_FIXED64:
1559 output = StrCat("0x", strings::Hex(unknown_field->fixed64(),
1560 strings::ZERO_PAD_16));
1561 break;
1562 case UnknownField::TYPE_LENGTH_DELIMITED:
1563 output = StringPrintf("\"%s\"",
1564 CEscape(unknown_field->length_delimited()).c_str());
1565 break;
1566 case UnknownField::TYPE_GROUP:
1567 // TODO(kenton): Print the contents of the group like we do for
1568 // messages. Requires an equivalent of ShortDebugString() for
1569 // UnknownFieldSet.
1570 output = "{ ... }";
1571 break;
1572 }
1573 printer_->PrintRaw(output);
1574}
1575
1576void MessageDifferencer::StreamReporter::Print(const string& str) {
1577 printer_->Print(str.c_str());
1578}
1579
1580void MessageDifferencer::StreamReporter::ReportAdded(
1581 const Message& message1,
1582 const Message& message2,
1583 const vector<SpecificField>& field_path) {
1584 printer_->Print("added: ");
1585 PrintPath(field_path, false);
1586 printer_->Print(": ");
1587 PrintValue(message2, field_path, false);
1588 printer_->Print("\n"); // Print for newlines.
1589}
1590
1591void MessageDifferencer::StreamReporter::ReportDeleted(
1592 const Message& message1,
1593 const Message& message2,
1594 const vector<SpecificField>& field_path) {
1595 printer_->Print("deleted: ");
1596 PrintPath(field_path, true);
1597 printer_->Print(": ");
1598 PrintValue(message1, field_path, true);
1599 printer_->Print("\n"); // Print for newlines
1600}
1601
1602void MessageDifferencer::StreamReporter::ReportModified(
1603 const Message& message1,
1604 const Message& message2,
1605 const vector<SpecificField>& field_path) {
1606 if (!report_modified_aggregates_ && field_path.back().field == NULL) {
1607 if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
1608 // Any changes to the subfields have already been printed.
1609 return;
1610 }
1611 } else if (!report_modified_aggregates_) {
1612 if (field_path.back().field->cpp_type() ==
1613 FieldDescriptor::CPPTYPE_MESSAGE) {
1614 // Any changes to the subfields have already been printed.
1615 return;
1616 }
1617 }
1618
1619 printer_->Print("modified: ");
1620 PrintPath(field_path, true);
1621 if (CheckPathChanged(field_path)) {
1622 printer_->Print(" -> ");
1623 PrintPath(field_path, false);
1624 }
1625 printer_->Print(": ");
1626 PrintValue(message1, field_path, true);
1627 printer_->Print(" -> ");
1628 PrintValue(message2, field_path, false);
1629 printer_->Print("\n"); // Print for newlines.
1630}
1631
1632void MessageDifferencer::StreamReporter::ReportMoved(
1633 const Message& message1,
1634 const Message& message2,
1635 const vector<SpecificField>& field_path) {
1636 printer_->Print("moved: ");
1637 PrintPath(field_path, true);
1638 printer_->Print(" -> ");
1639 PrintPath(field_path, false);
1640 printer_->Print(" : ");
1641 PrintValue(message1, field_path, true);
1642 printer_->Print("\n"); // Print for newlines.
1643}
1644
1645void MessageDifferencer::StreamReporter::ReportMatched(
1646 const Message& message1,
1647 const Message& message2,
1648 const vector<SpecificField>& field_path) {
1649 printer_->Print("matched: ");
1650 PrintPath(field_path, true);
1651 if (CheckPathChanged(field_path)) {
1652 printer_->Print(" -> ");
1653 PrintPath(field_path, false);
1654 }
1655 printer_->Print(" : ");
1656 PrintValue(message1, field_path, true);
1657 printer_->Print("\n"); // Print for newlines.
1658}
1659
1660void MessageDifferencer::StreamReporter::ReportIgnored(
1661 const Message& message1,
1662 const Message& message2,
1663 const vector<SpecificField>& field_path) {
1664 printer_->Print("ignored: ");
1665 PrintPath(field_path, true);
1666 if (CheckPathChanged(field_path)) {
1667 printer_->Print(" -> ");
1668 PrintPath(field_path, false);
1669 }
1670 printer_->Print("\n"); // Print for newlines.
1671}
1672
Jisi Liu46e8ff62015-10-05 11:59:43 -07001673void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
1674 const Message& message1, const Message& message2,
1675 const vector<SpecificField>& field_path) {
1676 printer_->Print("ignored: ");
1677 PrintPath(field_path, true);
1678 if (CheckPathChanged(field_path)) {
1679 printer_->Print(" -> ");
1680 PrintPath(field_path, false);
1681 }
1682 printer_->Print("\n"); // Print for newlines.
1683}
1684
Feng Xiaoe96ff302015-06-15 18:21:48 -07001685} // namespace util
1686} // namespace protobuf
1687} // namespace google