blob: fd907dc00922f3a7a82e5eee8ebe0a8b6be0d65e [file] [log] [blame]
Ben Murdocheb525c52013-07-10 11:40:50 +01001// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/core/browser/form_structure.h"
6
7#include <utility>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/logging.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/sha1.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_util.h"
16#include "base/strings/stringprintf.h"
17#include "base/strings/utf_string_conversions.h"
18#include "base/time/time.h"
19#include "components/autofill/content/browser/autocheckout_page_meta_data.h"
20#include "components/autofill/core/browser/autofill_metrics.h"
21#include "components/autofill/core/browser/autofill_type.h"
22#include "components/autofill/core/browser/autofill_xml_parser.h"
23#include "components/autofill/core/browser/field_types.h"
24#include "components/autofill/core/browser/form_field.h"
25#include "components/autofill/core/common/autofill_constants.h"
26#include "components/autofill/core/common/form_data.h"
27#include "components/autofill/core/common/form_data_predictions.h"
28#include "components/autofill/core/common/form_field_data.h"
29#include "components/autofill/core/common/form_field_data_predictions.h"
Torne (Richard Coles)a36e5922013-08-05 13:57:33 +010030#include "third_party/icu/source/i18n/unicode/regex.h"
Ben Murdocheb525c52013-07-10 11:40:50 +010031#include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
32
33namespace autofill {
34namespace {
35
36const char kFormMethodPost[] = "post";
37
38// XML elements and attributes.
39const char kAttributeAcceptedFeatures[] = "accepts";
40const char kAttributeAutofillUsed[] = "autofillused";
41const char kAttributeAutofillType[] = "autofilltype";
42const char kAttributeClientVersion[] = "clientversion";
43const char kAttributeDataPresent[] = "datapresent";
44const char kAttributeFieldID[] = "fieldid";
45const char kAttributeFieldType[] = "fieldtype";
46const char kAttributeFormSignature[] = "formsignature";
47const char kAttributeName[] = "name";
48const char kAttributeSignature[] = "signature";
49const char kAttributeUrlprefixSignature[] = "urlprefixsignature";
50const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments
51const char kAcceptedFeaturesAutocheckoutExperiment[] = "a,e"; // a=autocheckout
52const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
53const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
54const char kXMLElementAutofillQuery[] = "autofillquery";
55const char kXMLElementAutofillUpload[] = "autofillupload";
56const char kXMLElementFieldAssignments[] = "fieldassignments";
57const char kXMLElementField[] = "field";
58const char kXMLElementFields[] = "fields";
59const char kXMLElementForm[] = "form";
Ben Murdochbb1529c2013-08-08 10:24:53 +010060const char kBillingMode[] = "billing";
61const char kShippingMode[] = "shipping";
Ben Murdocheb525c52013-07-10 11:40:50 +010062
Torne (Richard Coles)a36e5922013-08-05 13:57:33 +010063// Stip away >= 5 consecutive digits.
64const char kIgnorePatternInFieldName[] = "\\d{5,}+";
65
Ben Murdocheb525c52013-07-10 11:40:50 +010066// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
67// |available_field_types| and returns the hex representation as a string.
Ben Murdoch32409262013-08-07 11:04:47 +010068std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
Ben Murdocheb525c52013-07-10 11:40:50 +010069 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
70 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
71 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
72
73 // Pack the types in |available_field_types| into |bit_field|.
74 std::vector<uint8> bit_field(kNumBytes, 0);
Ben Murdoch32409262013-08-07 11:04:47 +010075 for (ServerFieldTypeSet::const_iterator field_type =
76 available_field_types.begin();
Ben Murdocheb525c52013-07-10 11:40:50 +010077 field_type != available_field_types.end();
78 ++field_type) {
79 // Set the appropriate bit in the field. The bit we set is the one
80 // |field_type| % 8 from the left of the byte.
81 const size_t byte = *field_type / 8;
82 const size_t bit = 0x80 >> (*field_type % 8);
83 DCHECK(byte < bit_field.size());
84 bit_field[byte] |= bit;
85 }
86
87 // Discard any trailing zeroes.
88 // If there are no available types, we return the empty string.
89 size_t data_end = bit_field.size();
90 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
91 }
92
93 // Print all meaningfull bytes into a string.
94 std::string data_presence;
95 data_presence.reserve(data_end * 2 + 1);
96 for (size_t i = 0; i < data_end; ++i) {
97 base::StringAppendF(&data_presence, "%02x", bit_field[i]);
98 }
99
100 return data_presence;
101}
102
103// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
104// in upload xml, and also add them to the parent XmlElement.
105void EncodeFieldForUpload(const AutofillField& field,
106 buzz::XmlElement* parent) {
107 // Don't upload checkable fields.
108 if (field.is_checkable)
109 return;
110
Ben Murdoch32409262013-08-07 11:04:47 +0100111 ServerFieldTypeSet types = field.possible_types();
Ben Murdocheb525c52013-07-10 11:40:50 +0100112 // |types| could be empty in unit-tests only.
Ben Murdoch32409262013-08-07 11:04:47 +0100113 for (ServerFieldTypeSet::iterator field_type = types.begin();
Ben Murdocheb525c52013-07-10 11:40:50 +0100114 field_type != types.end(); ++field_type) {
115 buzz::XmlElement *field_element = new buzz::XmlElement(
116 buzz::QName(kXMLElementField));
117
118 field_element->SetAttr(buzz::QName(kAttributeSignature),
119 field.FieldSignature());
120 field_element->SetAttr(buzz::QName(kAttributeAutofillType),
121 base::IntToString(*field_type));
122 parent->AddElement(field_element);
123 }
124}
125
126// Helper for |EncodeFormRequest()| that creates XmlElement for the given field
127// in query xml, and also add it to the parent XmlElement.
128void EncodeFieldForQuery(const AutofillField& field,
129 buzz::XmlElement* parent) {
130 buzz::XmlElement *field_element = new buzz::XmlElement(
131 buzz::QName(kXMLElementField));
132 field_element->SetAttr(buzz::QName(kAttributeSignature),
133 field.FieldSignature());
134 parent->AddElement(field_element);
135}
136
137// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
138// in field assignments xml, and also add them to the parent XmlElement.
139void EncodeFieldForFieldAssignments(const AutofillField& field,
140 buzz::XmlElement* parent) {
Ben Murdoch32409262013-08-07 11:04:47 +0100141 ServerFieldTypeSet types = field.possible_types();
142 for (ServerFieldTypeSet::iterator field_type = types.begin();
Ben Murdocheb525c52013-07-10 11:40:50 +0100143 field_type != types.end(); ++field_type) {
144 buzz::XmlElement *field_element = new buzz::XmlElement(
145 buzz::QName(kXMLElementFields));
146
147 field_element->SetAttr(buzz::QName(kAttributeFieldID),
148 field.FieldSignature());
149 field_element->SetAttr(buzz::QName(kAttributeFieldType),
150 base::IntToString(*field_type));
151 field_element->SetAttr(buzz::QName(kAttributeName),
152 UTF16ToUTF8(field.name));
153 parent->AddElement(field_element);
154 }
155}
156
157// Returns |true| iff the |token| is a type hint for a contact field, as
158// specified in the implementation section of http://is.gd/whatwg_autocomplete
159// Note that "fax" and "pager" are intentionally ignored, as Chrome does not
160// support filling either type of information.
161bool IsContactTypeHint(const std::string& token) {
162 return token == "home" || token == "work" || token == "mobile";
163}
164
165// Returns |true| iff the |token| is a type hint appropriate for a field of the
166// given |field_type|, as specified in the implementation section of
167// http://is.gd/whatwg_autocomplete
168bool ContactTypeHintMatchesFieldType(const std::string& token,
Ben Murdochbb1529c2013-08-08 10:24:53 +0100169 HtmlFieldType field_type) {
Ben Murdocheb525c52013-07-10 11:40:50 +0100170 // The "home" and "work" type hints are only appropriate for email and phone
171 // number field types.
172 if (token == "home" || token == "work") {
Ben Murdochbb1529c2013-08-08 10:24:53 +0100173 return field_type == HTML_TYPE_EMAIL ||
174 (field_type >= HTML_TYPE_TEL &&
175 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
Ben Murdocheb525c52013-07-10 11:40:50 +0100176 }
177
178 // The "mobile" type hint is only appropriate for phone number field types.
179 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
180 // support filling either type of information.
181 if (token == "mobile") {
Ben Murdochbb1529c2013-08-08 10:24:53 +0100182 return field_type >= HTML_TYPE_TEL &&
183 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
Ben Murdocheb525c52013-07-10 11:40:50 +0100184 }
185
186 return false;
187}
188
189// Returns the Chrome Autofill-supported field type corresponding to the given
Ben Murdochbb1529c2013-08-08 10:24:53 +0100190// |autocomplete_attribute_value|, if there is one, in the context of the given
191// |field|. Chrome Autofill supports a subset of the field types listed at
Ben Murdocheb525c52013-07-10 11:40:50 +0100192// http://is.gd/whatwg_autocomplete
Ben Murdochbb1529c2013-08-08 10:24:53 +0100193HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
194 const std::string& autocomplete_attribute_value,
Ben Murdocheb525c52013-07-10 11:40:50 +0100195 const AutofillField& field) {
Ben Murdochbb1529c2013-08-08 10:24:53 +0100196 if (autocomplete_attribute_value == "name")
197 return HTML_TYPE_NAME;
Ben Murdocheb525c52013-07-10 11:40:50 +0100198
Ben Murdochbb1529c2013-08-08 10:24:53 +0100199 if (autocomplete_attribute_value == "given-name")
200 return HTML_TYPE_GIVEN_NAME;
Ben Murdocheb525c52013-07-10 11:40:50 +0100201
Ben Murdochbb1529c2013-08-08 10:24:53 +0100202 if (autocomplete_attribute_value == "additional-name") {
Ben Murdocheb525c52013-07-10 11:40:50 +0100203 if (field.max_length == 1)
Ben Murdochbb1529c2013-08-08 10:24:53 +0100204 return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
Ben Murdocheb525c52013-07-10 11:40:50 +0100205 else
Ben Murdochbb1529c2013-08-08 10:24:53 +0100206 return HTML_TYPE_ADDITIONAL_NAME;
Ben Murdocheb525c52013-07-10 11:40:50 +0100207 }
208
Ben Murdochbb1529c2013-08-08 10:24:53 +0100209 if (autocomplete_attribute_value == "family-name")
210 return HTML_TYPE_FAMILY_NAME;
Ben Murdocheb525c52013-07-10 11:40:50 +0100211
Ben Murdochbb1529c2013-08-08 10:24:53 +0100212 if (autocomplete_attribute_value == "organization")
213 return HTML_TYPE_ORGANIZATION;
Ben Murdocheb525c52013-07-10 11:40:50 +0100214
Ben Murdochbb1529c2013-08-08 10:24:53 +0100215 if (autocomplete_attribute_value == "address-line1")
216 return HTML_TYPE_ADDRESS_LINE1;
Ben Murdocheb525c52013-07-10 11:40:50 +0100217
Ben Murdochbb1529c2013-08-08 10:24:53 +0100218 if (autocomplete_attribute_value == "address-line2")
219 return HTML_TYPE_ADDRESS_LINE2;
Ben Murdocheb525c52013-07-10 11:40:50 +0100220
Ben Murdochbb1529c2013-08-08 10:24:53 +0100221 if (autocomplete_attribute_value == "locality")
222 return HTML_TYPE_LOCALITY;
Ben Murdocheb525c52013-07-10 11:40:50 +0100223
Ben Murdochbb1529c2013-08-08 10:24:53 +0100224 if (autocomplete_attribute_value == "region")
225 return HTML_TYPE_REGION;
Ben Murdocheb525c52013-07-10 11:40:50 +0100226
Ben Murdochbb1529c2013-08-08 10:24:53 +0100227 if (autocomplete_attribute_value == "country")
228 return HTML_TYPE_COUNTRY_CODE;
Ben Murdocheb525c52013-07-10 11:40:50 +0100229
Ben Murdochbb1529c2013-08-08 10:24:53 +0100230 if (autocomplete_attribute_value == "country-name")
231 return HTML_TYPE_COUNTRY_NAME;
Ben Murdocheb525c52013-07-10 11:40:50 +0100232
Ben Murdochbb1529c2013-08-08 10:24:53 +0100233 if (autocomplete_attribute_value == "postal-code")
234 return HTML_TYPE_POSTAL_CODE;
Ben Murdocheb525c52013-07-10 11:40:50 +0100235
Ben Murdochbb1529c2013-08-08 10:24:53 +0100236 if (autocomplete_attribute_value == "cc-name")
237 return HTML_TYPE_CREDIT_CARD_NAME;
Ben Murdocheb525c52013-07-10 11:40:50 +0100238
Ben Murdochbb1529c2013-08-08 10:24:53 +0100239 if (autocomplete_attribute_value == "cc-number")
240 return HTML_TYPE_CREDIT_CARD_NUMBER;
Ben Murdocheb525c52013-07-10 11:40:50 +0100241
Ben Murdochbb1529c2013-08-08 10:24:53 +0100242 if (autocomplete_attribute_value == "cc-exp") {
Ben Murdocheb525c52013-07-10 11:40:50 +0100243 if (field.max_length == 5)
Ben Murdochbb1529c2013-08-08 10:24:53 +0100244 return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
245 else if (field.max_length == 7)
246 return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
Ben Murdocheb525c52013-07-10 11:40:50 +0100247 else
Ben Murdochbb1529c2013-08-08 10:24:53 +0100248 return HTML_TYPE_CREDIT_CARD_EXP;
Ben Murdocheb525c52013-07-10 11:40:50 +0100249 }
250
Ben Murdochbb1529c2013-08-08 10:24:53 +0100251 if (autocomplete_attribute_value == "cc-exp-month")
252 return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
Ben Murdocheb525c52013-07-10 11:40:50 +0100253
Ben Murdochbb1529c2013-08-08 10:24:53 +0100254 if (autocomplete_attribute_value == "cc-exp-year") {
Ben Murdocheb525c52013-07-10 11:40:50 +0100255 if (field.max_length == 2)
Ben Murdochbb1529c2013-08-08 10:24:53 +0100256 return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
257 else if (field.max_length == 4)
258 return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
Ben Murdocheb525c52013-07-10 11:40:50 +0100259 else
Ben Murdochbb1529c2013-08-08 10:24:53 +0100260 return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
Ben Murdocheb525c52013-07-10 11:40:50 +0100261 }
262
Ben Murdochbb1529c2013-08-08 10:24:53 +0100263 if (autocomplete_attribute_value == "cc-csc")
264 return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
Ben Murdocheb525c52013-07-10 11:40:50 +0100265
Ben Murdochbb1529c2013-08-08 10:24:53 +0100266 if (autocomplete_attribute_value == "cc-type")
267 return HTML_TYPE_CREDIT_CARD_TYPE;
Ben Murdocheb525c52013-07-10 11:40:50 +0100268
Ben Murdochbb1529c2013-08-08 10:24:53 +0100269 if (autocomplete_attribute_value == "tel")
270 return HTML_TYPE_TEL;
Ben Murdocheb525c52013-07-10 11:40:50 +0100271
Ben Murdochbb1529c2013-08-08 10:24:53 +0100272 if (autocomplete_attribute_value == "tel-country-code")
273 return HTML_TYPE_TEL_COUNTRY_CODE;
Ben Murdocheb525c52013-07-10 11:40:50 +0100274
Ben Murdochbb1529c2013-08-08 10:24:53 +0100275 if (autocomplete_attribute_value == "tel-national")
276 return HTML_TYPE_TEL_NATIONAL;
Ben Murdocheb525c52013-07-10 11:40:50 +0100277
Ben Murdochbb1529c2013-08-08 10:24:53 +0100278 if (autocomplete_attribute_value == "tel-area-code")
279 return HTML_TYPE_TEL_AREA_CODE;
Ben Murdocheb525c52013-07-10 11:40:50 +0100280
Ben Murdochbb1529c2013-08-08 10:24:53 +0100281 if (autocomplete_attribute_value == "tel-local")
282 return HTML_TYPE_TEL_LOCAL;
Ben Murdocheb525c52013-07-10 11:40:50 +0100283
Ben Murdochbb1529c2013-08-08 10:24:53 +0100284 if (autocomplete_attribute_value == "tel-local-prefix")
285 return HTML_TYPE_TEL_LOCAL_PREFIX;
Ben Murdocheb525c52013-07-10 11:40:50 +0100286
Ben Murdochbb1529c2013-08-08 10:24:53 +0100287 if (autocomplete_attribute_value == "tel-local-suffix")
288 return HTML_TYPE_TEL_LOCAL_SUFFIX;
Ben Murdocheb525c52013-07-10 11:40:50 +0100289
Ben Murdochbb1529c2013-08-08 10:24:53 +0100290 if (autocomplete_attribute_value == "email")
291 return HTML_TYPE_EMAIL;
Ben Murdocheb525c52013-07-10 11:40:50 +0100292
Ben Murdochbb1529c2013-08-08 10:24:53 +0100293 return HTML_TYPE_UNKNOWN;
Ben Murdocheb525c52013-07-10 11:40:50 +0100294}
295
Torne (Richard Coles)a36e5922013-08-05 13:57:33 +0100296std::string StripDigitsIfRequired(const base::string16& input) {
297 UErrorCode status = U_ZERO_ERROR;
298 CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern,
299 (kIgnorePatternInFieldName));
300 CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher,
301 (icu_pattern, UREGEX_CASE_INSENSITIVE, status));
302 DCHECK_EQ(status, U_ZERO_ERROR);
303
304 icu::UnicodeString icu_input(input.data(), input.length());
305 matcher.reset(icu_input);
306
307 icu::UnicodeString replaced_string = matcher.replaceAll("", status);
308
309 std::string return_string;
310 status = U_ZERO_ERROR;
311 UTF16ToUTF8(replaced_string.getBuffer(),
312 static_cast<size_t>(replaced_string.length()),
313 &return_string);
314 if (status != U_ZERO_ERROR) {
315 DVLOG(1) << "Couldn't strip digits in " << UTF16ToUTF8(input);
316 return UTF16ToUTF8(input);
317 }
318
319 return return_string;
320}
321
Ben Murdocheb525c52013-07-10 11:40:50 +0100322} // namespace
323
324FormStructure::FormStructure(const FormData& form,
325 const std::string& autocheckout_url_prefix)
326 : form_name_(form.name),
327 source_url_(form.origin),
328 target_url_(form.action),
329 autofill_count_(0),
330 active_field_count_(0),
331 upload_required_(USE_UPLOAD_RATES),
332 server_experiment_id_("no server response"),
333 has_author_specified_types_(false),
334 autocheckout_url_prefix_(autocheckout_url_prefix),
335 filled_by_autocheckout_(false) {
336 // Copy the form fields.
337 std::map<base::string16, size_t> unique_names;
338 for (std::vector<FormFieldData>::const_iterator field =
339 form.fields.begin();
340 field != form.fields.end(); field++) {
341
342 if (!ShouldSkipField(*field)) {
343 // Add all supported form fields (including with empty names) to the
344 // signature. This is a requirement for Autofill servers.
345 form_signature_field_names_.append("&");
Torne (Richard Coles)a36e5922013-08-05 13:57:33 +0100346 form_signature_field_names_.append(StripDigitsIfRequired(field->name));
Ben Murdocheb525c52013-07-10 11:40:50 +0100347
348 ++active_field_count_;
349 }
350
351 // Generate a unique name for this field by appending a counter to the name.
352 // Make sure to prepend the counter with a non-numeric digit so that we are
353 // guaranteed to avoid collisions.
354 if (!unique_names.count(field->name))
355 unique_names[field->name] = 1;
356 else
357 ++unique_names[field->name];
358 base::string16 unique_name = field->name + ASCIIToUTF16("_") +
359 base::IntToString16(unique_names[field->name]);
360 fields_.push_back(new AutofillField(*field, unique_name));
361 }
362
363 std::string method = UTF16ToUTF8(form.method);
364 if (StringToLowerASCII(method) == kFormMethodPost) {
365 method_ = POST;
366 } else {
367 // Either the method is 'get', or we don't know. In this case we default
368 // to GET.
369 method_ = GET;
370 }
371}
372
373FormStructure::~FormStructure() {}
374
375void FormStructure::DetermineHeuristicTypes(
376 const AutofillMetrics& metric_logger) {
377 // First, try to detect field types based on each field's |autocomplete|
378 // attribute value. If there is at least one form field that specifies an
379 // autocomplete type hint, don't try to apply other heuristics to match fields
380 // in this form.
381 bool has_author_specified_sections;
Ben Murdochbb1529c2013-08-08 10:24:53 +0100382 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
Ben Murdocheb525c52013-07-10 11:40:50 +0100383 &has_author_specified_sections);
384
385 if (!has_author_specified_types_) {
Ben Murdoch32409262013-08-07 11:04:47 +0100386 ServerFieldTypeMap field_type_map;
Ben Murdocheb525c52013-07-10 11:40:50 +0100387 FormField::ParseFormFields(fields_.get(), &field_type_map);
Ben Murdoch32409262013-08-07 11:04:47 +0100388 for (size_t i = 0; i < field_count(); ++i) {
389 AutofillField* field = fields_[i];
390 ServerFieldTypeMap::iterator iter =
391 field_type_map.find(field->unique_name());
Ben Murdocheb525c52013-07-10 11:40:50 +0100392 if (iter != field_type_map.end())
393 field->set_heuristic_type(iter->second);
394 }
395 }
396
397 UpdateAutofillCount();
398 IdentifySections(has_author_specified_sections);
399
400 if (IsAutofillable(true)) {
401 metric_logger.LogDeveloperEngagementMetric(
402 AutofillMetrics::FILLABLE_FORM_PARSED);
403 if (has_author_specified_types_) {
404 metric_logger.LogDeveloperEngagementMetric(
405 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
406 }
407 }
408}
409
410bool FormStructure::EncodeUploadRequest(
Ben Murdoch32409262013-08-07 11:04:47 +0100411 const ServerFieldTypeSet& available_field_types,
Ben Murdocheb525c52013-07-10 11:40:50 +0100412 bool form_was_autofilled,
413 std::string* encoded_xml) const {
414 DCHECK(ShouldBeCrowdsourced());
415
416 // Verify that |available_field_types| agrees with the possible field types we
417 // are uploading.
418 for (std::vector<AutofillField*>::const_iterator field = begin();
419 field != end();
420 ++field) {
Ben Murdoch32409262013-08-07 11:04:47 +0100421 for (ServerFieldTypeSet::const_iterator type =
422 (*field)->possible_types().begin();
Ben Murdocheb525c52013-07-10 11:40:50 +0100423 type != (*field)->possible_types().end();
424 ++type) {
425 DCHECK(*type == UNKNOWN_TYPE ||
426 *type == EMPTY_TYPE ||
427 available_field_types.count(*type));
428 }
429 }
430
431 // Set up the <autofillupload> element and its attributes.
432 buzz::XmlElement autofill_request_xml(
433 (buzz::QName(kXMLElementAutofillUpload)));
434 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
435 kClientVersion);
436 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
437 FormSignature());
438 autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
439 form_was_autofilled ? "true" : "false");
440 autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
441 EncodeFieldTypes(available_field_types).c_str());
442
443 if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
444 return false; // Malformed form, skip it.
445
446 // Obtain the XML structure as a string.
447 *encoded_xml = kXMLDeclaration;
448 *encoded_xml += autofill_request_xml.Str().c_str();
449
450 // To enable this logging, run with the flag --vmodule="form_structure=2".
451 VLOG(2) << "\n" << *encoded_xml;
452
453 return true;
454}
455
456bool FormStructure::EncodeFieldAssignments(
Ben Murdoch32409262013-08-07 11:04:47 +0100457 const ServerFieldTypeSet& available_field_types,
Ben Murdocheb525c52013-07-10 11:40:50 +0100458 std::string* encoded_xml) const {
459 DCHECK(ShouldBeCrowdsourced());
460
461 // Set up the <fieldassignments> element and its attributes.
462 buzz::XmlElement autofill_request_xml(
463 (buzz::QName(kXMLElementFieldAssignments)));
464 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
465 FormSignature());
466
467 if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS,
468 &autofill_request_xml))
469 return false; // Malformed form, skip it.
470
471 // Obtain the XML structure as a string.
472 *encoded_xml = kXMLDeclaration;
473 *encoded_xml += autofill_request_xml.Str().c_str();
474
475 return true;
476}
477
478// static
479bool FormStructure::EncodeQueryRequest(
480 const std::vector<FormStructure*>& forms,
481 std::vector<std::string>* encoded_signatures,
482 std::string* encoded_xml) {
483 DCHECK(encoded_signatures);
484 DCHECK(encoded_xml);
485 encoded_xml->clear();
486 encoded_signatures->clear();
487 encoded_signatures->reserve(forms.size());
488
489 // Set up the <autofillquery> element and attributes.
490 buzz::XmlElement autofill_request_xml(
491 (buzz::QName(kXMLElementAutofillQuery)));
492 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
493 kClientVersion);
494
495 // autocheckout_url_prefix tells the Autofill server where the forms in the
496 // request came from, and the the Autofill server checks internal status and
497 // decide to enable Autocheckout or not and may return Autocheckout related
498 // data in the response accordingly.
499 // There is no page/frame level object associated with FormStructure that
500 // we could extract URL prefix from. But, all the forms should come from the
501 // same frame, so they should have the same Autocheckout URL prefix. Thus we
502 // use URL prefix from the first form with Autocheckout enabled.
503 std::string autocheckout_url_prefix;
504
505 // Some badly formatted web sites repeat forms - detect that and encode only
506 // one form as returned data would be the same for all the repeated forms.
507 std::set<std::string> processed_forms;
508 for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
509 it != forms.end();
510 ++it) {
511 std::string signature((*it)->FormSignature());
512 if (processed_forms.find(signature) != processed_forms.end())
513 continue;
514 processed_forms.insert(signature);
515 scoped_ptr<buzz::XmlElement> encompassing_xml_element(
516 new buzz::XmlElement(buzz::QName(kXMLElementForm)));
517 encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
518 signature);
519
520 if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
521 encompassing_xml_element.get()))
522 continue; // Malformed form, skip it.
523
524 if ((*it)->IsAutocheckoutEnabled()) {
525 if (autocheckout_url_prefix.empty()) {
526 autocheckout_url_prefix = (*it)->autocheckout_url_prefix_;
527 } else {
528 // Making sure all the forms in the request has the same url_prefix.
529 DCHECK_EQ(autocheckout_url_prefix, (*it)->autocheckout_url_prefix_);
530 }
531 }
532
533 autofill_request_xml.AddElement(encompassing_xml_element.release());
534 encoded_signatures->push_back(signature);
535 }
536
537 if (!encoded_signatures->size())
538 return false;
539
540 if (autocheckout_url_prefix.empty()) {
541 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
542 kAcceptedFeaturesExperiment);
543 } else {
544 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
545 kAcceptedFeaturesAutocheckoutExperiment);
546 autofill_request_xml.SetAttr(buzz::QName(kAttributeUrlprefixSignature),
547 Hash64Bit(autocheckout_url_prefix));
548 }
549
550 // Obtain the XML structure as a string.
551 *encoded_xml = kXMLDeclaration;
552 *encoded_xml += autofill_request_xml.Str().c_str();
553
554 return true;
555}
556
557// static
558void FormStructure::ParseQueryResponse(
559 const std::string& response_xml,
560 const std::vector<FormStructure*>& forms,
561 autofill::AutocheckoutPageMetaData* page_meta_data,
562 const AutofillMetrics& metric_logger) {
563 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
564
565 // Parse the field types from the server response to the query.
566 std::vector<AutofillServerFieldInfo> field_infos;
567 UploadRequired upload_required;
568 std::string experiment_id;
569 AutofillQueryXmlParser parse_handler(&field_infos,
570 &upload_required,
571 &experiment_id,
572 page_meta_data);
573 buzz::XmlParser parser(&parse_handler);
574 parser.Parse(response_xml.c_str(), response_xml.length(), true);
575 if (!parse_handler.succeeded())
576 return;
577
578 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
579 metric_logger.LogServerExperimentIdForQuery(experiment_id);
580
581 bool heuristics_detected_fillable_field = false;
582 bool query_response_overrode_heuristics = false;
583
584 // Copy the field types into the actual form.
585 std::vector<AutofillServerFieldInfo>::iterator current_info =
586 field_infos.begin();
587 for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
588 iter != forms.end(); ++iter) {
589 FormStructure* form = *iter;
590 form->upload_required_ = upload_required;
591 form->server_experiment_id_ = experiment_id;
592
593 for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
594 field != form->fields_.end(); ++field) {
595 if (form->ShouldSkipField(**field))
596 continue;
597
598 // In some cases *successful* response does not return all the fields.
599 // Quit the update of the types then.
600 if (current_info == field_infos.end())
601 break;
602
603 // UNKNOWN_TYPE is reserved for use by the client.
604 DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
605
Ben Murdoch32409262013-08-07 11:04:47 +0100606 ServerFieldType heuristic_type = (*field)->heuristic_type();
Ben Murdocheb525c52013-07-10 11:40:50 +0100607 if (heuristic_type != UNKNOWN_TYPE)
608 heuristics_detected_fillable_field = true;
609
610 (*field)->set_server_type(current_info->field_type);
Ben Murdochbb1529c2013-08-08 10:24:53 +0100611 if (heuristic_type != (*field)->Type().GetStorableType())
Ben Murdocheb525c52013-07-10 11:40:50 +0100612 query_response_overrode_heuristics = true;
613
614 // Copy default value into the field if available.
615 if (!current_info->default_value.empty())
616 (*field)->set_default_value(current_info->default_value);
617
618 ++current_info;
619 }
620
621 form->UpdateAutofillCount();
622 form->IdentifySections(false);
623 }
624
625 AutofillMetrics::ServerQueryMetric metric;
626 if (query_response_overrode_heuristics) {
627 if (heuristics_detected_fillable_field) {
628 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
629 } else {
630 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
631 }
632 } else {
633 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
634 }
635 metric_logger.LogServerQueryMetric(metric);
636}
637
638// static
639void FormStructure::GetFieldTypePredictions(
640 const std::vector<FormStructure*>& form_structures,
641 std::vector<FormDataPredictions>* forms) {
642 forms->clear();
643 forms->reserve(form_structures.size());
644 for (size_t i = 0; i < form_structures.size(); ++i) {
645 FormStructure* form_structure = form_structures[i];
646 FormDataPredictions form;
647 form.data.name = form_structure->form_name_;
648 form.data.method =
649 ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET");
650 form.data.origin = form_structure->source_url_;
651 form.data.action = form_structure->target_url_;
652 form.signature = form_structure->FormSignature();
653 form.experiment_id = form_structure->server_experiment_id_;
654
655 for (std::vector<AutofillField*>::const_iterator field =
656 form_structure->fields_.begin();
657 field != form_structure->fields_.end(); ++field) {
658 form.data.fields.push_back(FormFieldData(**field));
659
660 FormFieldDataPredictions annotated_field;
661 annotated_field.signature = (*field)->FieldSignature();
662 annotated_field.heuristic_type =
Ben Murdochbb1529c2013-08-08 10:24:53 +0100663 AutofillType((*field)->heuristic_type()).ToString();
Ben Murdocheb525c52013-07-10 11:40:50 +0100664 annotated_field.server_type =
Ben Murdochbb1529c2013-08-08 10:24:53 +0100665 AutofillType((*field)->server_type()).ToString();
666 annotated_field.overall_type = (*field)->Type().ToString();
Ben Murdocheb525c52013-07-10 11:40:50 +0100667 form.fields.push_back(annotated_field);
668 }
669
670 forms->push_back(form);
671 }
672}
673
674std::string FormStructure::FormSignature() const {
675 std::string scheme(target_url_.scheme());
676 std::string host(target_url_.host());
677
678 // If target host or scheme is empty, set scheme and host of source url.
679 // This is done to match the Toolbar's behavior.
680 if (scheme.empty() || host.empty()) {
681 scheme = source_url_.scheme();
682 host = source_url_.host();
683 }
684
685 std::string form_string = scheme + "://" + host + "&" +
686 UTF16ToUTF8(form_name_) +
687 form_signature_field_names_;
688
689 return Hash64Bit(form_string);
690}
691
692bool FormStructure::IsAutocheckoutEnabled() const {
693 return !autocheckout_url_prefix_.empty();
694}
695
696bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
697 return (field.is_checkable || field.form_control_type == "password") &&
698 !IsAutocheckoutEnabled();
699}
700
701size_t FormStructure::RequiredFillableFields() const {
702 return IsAutocheckoutEnabled() ? 0 : kRequiredAutofillFields;
703}
704
705bool FormStructure::IsAutofillable(bool require_method_post) const {
706 if (autofill_count() < RequiredFillableFields())
707 return false;
708
709 return ShouldBeParsed(require_method_post);
710}
711
712void FormStructure::UpdateAutofillCount() {
713 autofill_count_ = 0;
714 for (std::vector<AutofillField*>::const_iterator iter = begin();
715 iter != end(); ++iter) {
716 AutofillField* field = *iter;
717 if (field && field->IsFieldFillable())
718 ++autofill_count_;
719 }
720}
721
722bool FormStructure::ShouldBeParsed(bool require_method_post) const {
723 if (active_field_count() < RequiredFillableFields())
724 return false;
725
726 // Rule out http(s)://*/search?...
727 // e.g. http://www.google.com/search?q=...
728 // http://search.yahoo.com/search?p=...
729 if (target_url_.path() == "/search")
730 return false;
731
732 if (!IsAutocheckoutEnabled()) {
733 // Make sure there is at least one text field when Autocheckout is
734 // not enabled.
735 bool has_text_field = false;
736 for (std::vector<AutofillField*>::const_iterator it = begin();
737 it != end() && !has_text_field; ++it) {
738 has_text_field |= (*it)->form_control_type != "select-one";
739 }
740 if (!has_text_field)
741 return false;
742 }
743
744 return !require_method_post || (method_ == POST);
745}
746
747bool FormStructure::ShouldBeCrowdsourced() const {
748 // Allow all forms in Autocheckout flow to be crowdsourced.
749 return (!has_author_specified_types_ && ShouldBeParsed(true)) ||
750 IsAutocheckoutEnabled();
751}
752
753void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
754 // Map from field signatures to cached fields.
755 std::map<std::string, const AutofillField*> cached_fields;
756 for (size_t i = 0; i < cached_form.field_count(); ++i) {
757 const AutofillField* field = cached_form.field(i);
758 cached_fields[field->FieldSignature()] = field;
759 }
760
761 for (std::vector<AutofillField*>::const_iterator iter = begin();
762 iter != end(); ++iter) {
763 AutofillField* field = *iter;
764
765 std::map<std::string, const AutofillField*>::const_iterator
766 cached_field = cached_fields.find(field->FieldSignature());
767 if (cached_field != cached_fields.end()) {
768 if (field->form_control_type != "select-one" &&
769 field->value == cached_field->second->value) {
770 // From the perspective of learning user data, text fields containing
771 // default values are equivalent to empty fields.
772 field->value = base::string16();
773 }
774
775 field->set_heuristic_type(cached_field->second->heuristic_type());
776 field->set_server_type(cached_field->second->server_type());
777 }
778 }
779
780 UpdateAutofillCount();
781
782 filled_by_autocheckout_ = cached_form.filled_by_autocheckout();
783 server_experiment_id_ = cached_form.server_experiment_id();
784
785 // The form signature should match between query and upload requests to the
786 // server. On many websites, form elements are dynamically added, removed, or
787 // rearranged via JavaScript between page load and form submission, so we
788 // copy over the |form_signature_field_names_| corresponding to the query
789 // request.
790 DCHECK_EQ(cached_form.form_name_, form_name_);
791 DCHECK_EQ(cached_form.source_url_, source_url_);
792 DCHECK_EQ(cached_form.target_url_, target_url_);
793 form_signature_field_names_ = cached_form.form_signature_field_names_;
794}
795
796void FormStructure::LogQualityMetrics(
797 const AutofillMetrics& metric_logger,
798 const base::TimeTicks& load_time,
799 const base::TimeTicks& interaction_time,
800 const base::TimeTicks& submission_time) const {
801 std::string experiment_id = server_experiment_id();
802 metric_logger.LogServerExperimentIdForUpload(experiment_id);
803
804 size_t num_detected_field_types = 0;
805 bool did_autofill_all_possible_fields = true;
806 bool did_autofill_some_possible_fields = false;
807 for (size_t i = 0; i < field_count(); ++i) {
808 const AutofillField* field = this->field(i);
809 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED,
810 experiment_id);
811
812 // No further logging for empty fields nor for fields where the entered data
813 // does not appear to already exist in the user's stored Autofill data.
Ben Murdoch32409262013-08-07 11:04:47 +0100814 const ServerFieldTypeSet& field_types = field->possible_types();
Ben Murdocheb525c52013-07-10 11:40:50 +0100815 DCHECK(!field_types.empty());
816 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
817 continue;
818
819 ++num_detected_field_types;
820 if (field->is_autofilled)
821 did_autofill_some_possible_fields = true;
822 else
823 did_autofill_all_possible_fields = false;
824
825 // Collapse field types that Chrome treats as identical, e.g. home and
826 // billing address fields.
Ben Murdoch32409262013-08-07 11:04:47 +0100827 ServerFieldTypeSet collapsed_field_types;
828 for (ServerFieldTypeSet::const_iterator it = field_types.begin();
Ben Murdocheb525c52013-07-10 11:40:50 +0100829 it != field_types.end();
830 ++it) {
831 // Since we currently only support US phone numbers, the (city code + main
832 // digits) number is almost always identical to the whole phone number.
833 // TODO(isherman): Improve this logic once we add support for
834 // international numbers.
835 if (*it == PHONE_HOME_CITY_AND_NUMBER)
836 collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
837 else
Ben Murdochbb1529c2013-08-08 10:24:53 +0100838 collapsed_field_types.insert(AutofillType(*it).GetStorableType());
Ben Murdocheb525c52013-07-10 11:40:50 +0100839 }
840
841 // Capture the field's type, if it is unambiguous.
Ben Murdoch32409262013-08-07 11:04:47 +0100842 ServerFieldType field_type = UNKNOWN_TYPE;
Ben Murdocheb525c52013-07-10 11:40:50 +0100843 if (collapsed_field_types.size() == 1)
844 field_type = *collapsed_field_types.begin();
845
Ben Murdochbb1529c2013-08-08 10:24:53 +0100846 ServerFieldType heuristic_type =
847 AutofillType(field->heuristic_type()).GetStorableType();
848 ServerFieldType server_type =
849 AutofillType(field->server_type()).GetStorableType();
850 ServerFieldType predicted_type = field->Type().GetStorableType();
Ben Murdocheb525c52013-07-10 11:40:50 +0100851
852 // Log heuristic, server, and overall type quality metrics, independently of
853 // whether the field was autofilled.
854 if (heuristic_type == UNKNOWN_TYPE) {
855 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
856 field_type, experiment_id);
857 } else if (field_types.count(heuristic_type)) {
858 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
859 field_type, experiment_id);
860 } else {
861 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
862 field_type, experiment_id);
863 }
864
865 if (server_type == NO_SERVER_DATA) {
866 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
867 field_type, experiment_id);
868 } else if (field_types.count(server_type)) {
869 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
870 field_type, experiment_id);
871 } else {
872 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
873 field_type, experiment_id);
874 }
875
876 if (predicted_type == UNKNOWN_TYPE) {
877 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
878 field_type, experiment_id);
879 } else if (field_types.count(predicted_type)) {
880 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
881 field_type, experiment_id);
882 } else {
883 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
884 field_type, experiment_id);
885 }
886
887 // TODO(isherman): <select> fields don't support |is_autofilled()|, so we
888 // have to skip them for the remaining metrics.
889 if (field->form_control_type == "select-one")
890 continue;
891
892 if (field->is_autofilled) {
893 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED,
894 experiment_id);
895 } else {
896 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED,
897 experiment_id);
898
899 if (heuristic_type == UNKNOWN_TYPE) {
900 metric_logger.LogQualityMetric(
901 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN,
902 experiment_id);
903 } else if (field_types.count(heuristic_type)) {
904 metric_logger.LogQualityMetric(
905 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH,
906 experiment_id);
907 } else {
908 metric_logger.LogQualityMetric(
909 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH,
910 experiment_id);
911 }
912
913 if (server_type == NO_SERVER_DATA) {
914 metric_logger.LogQualityMetric(
915 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN,
916 experiment_id);
917 } else if (field_types.count(server_type)) {
918 metric_logger.LogQualityMetric(
919 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH,
920 experiment_id);
921 } else {
922 metric_logger.LogQualityMetric(
923 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH,
924 experiment_id);
925 }
926 }
927 }
928
929 if (num_detected_field_types < RequiredFillableFields()) {
930 metric_logger.LogUserHappinessMetric(
931 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
932 } else {
933 if (did_autofill_all_possible_fields) {
934 metric_logger.LogUserHappinessMetric(
935 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
936 } else if (did_autofill_some_possible_fields) {
937 metric_logger.LogUserHappinessMetric(
938 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
939 } else {
940 metric_logger.LogUserHappinessMetric(
941 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
942 }
943
944 // Unlike the other times, the |submission_time| should always be available.
945 DCHECK(!submission_time.is_null());
946
947 // The |load_time| might be unset, in the case that the form was dynamically
948 // added to the DOM.
949 if (!load_time.is_null()) {
950 // Submission should always chronologically follow form load.
951 DCHECK(submission_time > load_time);
952 base::TimeDelta elapsed = submission_time - load_time;
953 if (did_autofill_some_possible_fields)
954 metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
955 else
956 metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
957 }
958
959 // The |interaction_time| might be unset, in the case that the user
960 // submitted a blank form.
961 if (!interaction_time.is_null()) {
962 // Submission should always chronologically follow interaction.
963 DCHECK(submission_time > interaction_time);
964 base::TimeDelta elapsed = submission_time - interaction_time;
965 if (did_autofill_some_possible_fields) {
966 metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
967 } else {
968 metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
969 elapsed);
970 }
971 }
972 }
973}
974
975const AutofillField* FormStructure::field(size_t index) const {
976 if (index >= fields_.size()) {
977 NOTREACHED();
978 return NULL;
979 }
980
981 return fields_[index];
982}
983
984AutofillField* FormStructure::field(size_t index) {
985 return const_cast<AutofillField*>(
986 static_cast<const FormStructure*>(this)->field(index));
987}
988
989size_t FormStructure::field_count() const {
990 return fields_.size();
991}
992
993size_t FormStructure::active_field_count() const {
994 return active_field_count_;
995}
996
997std::string FormStructure::server_experiment_id() const {
998 return server_experiment_id_;
999}
1000
1001FormData FormStructure::ToFormData() const {
1002 // |data.user_submitted| will always be false.
1003 FormData data;
1004 data.name = form_name_;
1005 data.origin = source_url_;
1006 data.action = target_url_;
1007 data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET");
1008
1009 for (size_t i = 0; i < fields_.size(); ++i) {
1010 data.fields.push_back(FormFieldData(*fields_[i]));
1011 }
1012
1013 return data;
1014}
1015
1016bool FormStructure::operator==(const FormData& form) const {
1017 // TODO(jhawkins): Is this enough to differentiate a form?
1018 if (form_name_ == form.name &&
1019 source_url_ == form.origin &&
1020 target_url_ == form.action) {
1021 return true;
1022 }
1023
1024 // TODO(jhawkins): Compare field names, IDs and labels once we have labels
1025 // set up.
1026
1027 return false;
1028}
1029
1030bool FormStructure::operator!=(const FormData& form) const {
1031 return !operator==(form);
1032}
1033
1034std::string FormStructure::Hash64Bit(const std::string& str) {
1035 std::string hash_bin = base::SHA1HashString(str);
1036 DCHECK_EQ(20U, hash_bin.length());
1037
1038 uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
1039 (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
1040 (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
1041 (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
1042 (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
1043 (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
1044 (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
1045 ((static_cast<uint64>(hash_bin[7])) & 0xFF);
1046
1047 return base::Uint64ToString(hash64);
1048}
1049
1050bool FormStructure::EncodeFormRequest(
1051 FormStructure::EncodeRequestType request_type,
1052 buzz::XmlElement* encompassing_xml_element) const {
1053 if (!field_count()) // Nothing to add.
1054 return false;
1055
1056 // Some badly formatted web sites repeat fields - limit number of fields to
1057 // 48, which is far larger than any valid form and XML still fits into 2K.
1058 // Do not send requests for forms with more than this many fields, as they are
1059 // near certainly not valid/auto-fillable.
1060 const size_t kMaxFieldsOnTheForm = 48;
1061 if (field_count() > kMaxFieldsOnTheForm)
1062 return false;
1063
1064 // Add the child nodes for the form fields.
1065 for (size_t index = 0; index < field_count(); ++index) {
1066 const AutofillField* field = fields_[index];
1067 switch (request_type) {
1068 case FormStructure::UPLOAD:
1069 EncodeFieldForUpload(*field, encompassing_xml_element);
1070 break;
1071 case FormStructure::QUERY:
1072 if (ShouldSkipField(*field))
1073 continue;
1074 EncodeFieldForQuery(*field, encompassing_xml_element);
1075 break;
1076 case FormStructure::FIELD_ASSIGNMENTS:
1077 EncodeFieldForFieldAssignments(*field, encompassing_xml_element);
1078 break;
1079 }
1080 }
1081 return true;
1082}
1083
1084void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
1085 bool* found_types,
1086 bool* found_sections) {
1087 const std::string kDefaultSection = "-default";
1088
1089 *found_types = false;
1090 *found_sections = false;
1091 for (std::vector<AutofillField*>::iterator it = fields_.begin();
1092 it != fields_.end(); ++it) {
1093 AutofillField* field = *it;
1094
1095 // To prevent potential section name collisions, add a default suffix for
1096 // other fields. Without this, 'autocomplete' attribute values
1097 // "section--shipping street-address" and "shipping street-address" would be
1098 // parsed identically, given the section handling code below. We do this
1099 // before any validation so that fields with invalid attributes still end up
1100 // in the default section. These default section names will be overridden
1101 // by subsequent heuristic parsing steps if there are no author-specified
1102 // section names.
1103 field->set_section(kDefaultSection);
1104
1105 // Canonicalize the attribute value by trimming whitespace, collapsing
1106 // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1107 std::string autocomplete_attribute =
1108 CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1109 autocomplete_attribute = StringToLowerASCII(autocomplete_attribute);
1110
1111 // The autocomplete attribute is overloaded: it can specify either a field
1112 // type hint or whether autocomplete should be enabled at all. Ignore the
1113 // latter type of attribute value.
1114 if (autocomplete_attribute.empty() ||
1115 autocomplete_attribute == "on" ||
1116 autocomplete_attribute == "off") {
1117 continue;
1118 }
1119
1120 // Any other value, even it is invalid, is considered to be a type hint.
1121 // This allows a website's author to specify an attribute like
1122 // autocomplete="other" on a field to disable all Autofill heuristics for
1123 // the form.
1124 *found_types = true;
1125
1126 // Tokenize the attribute value. Per the spec, the tokens are parsed in
1127 // reverse order.
1128 std::vector<std::string> tokens;
1129 Tokenize(autocomplete_attribute, " ", &tokens);
1130
1131 // The final token must be the field type.
1132 // If it is not one of the known types, abort.
1133 DCHECK(!tokens.empty());
1134 std::string field_type_token = tokens.back();
1135 tokens.pop_back();
Ben Murdochbb1529c2013-08-08 10:24:53 +01001136 HtmlFieldType field_type =
1137 FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1138 if (field_type == HTML_TYPE_UNKNOWN)
Ben Murdocheb525c52013-07-10 11:40:50 +01001139 continue;
1140
1141 // The preceding token, if any, may be a type hint.
1142 if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1143 // If it is, it must match the field type; otherwise, abort.
1144 // Note that an invalid token invalidates the entire attribute value, even
1145 // if the other tokens are valid.
1146 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1147 continue;
1148
1149 // Chrome Autofill ignores these type hints.
1150 tokens.pop_back();
1151 }
1152
1153 // The preceding token, if any, may be a fixed string that is either
1154 // "shipping" or "billing". Chrome Autofill treats these as implicit
1155 // section name suffixes.
1156 DCHECK_EQ(kDefaultSection, field->section());
1157 std::string section = field->section();
Ben Murdochbb1529c2013-08-08 10:24:53 +01001158 HtmlFieldMode mode = HTML_MODE_NONE;
1159 if (!tokens.empty()) {
1160 if (tokens.back() == kShippingMode)
1161 mode = HTML_MODE_SHIPPING;
1162 else if (tokens.back() == kBillingMode)
1163 mode = HTML_MODE_BILLING;
1164 }
Ben Murdoch7dbb3d52013-07-17 14:55:54 +01001165
Ben Murdochbb1529c2013-08-08 10:24:53 +01001166 if (mode != HTML_MODE_NONE) {
Ben Murdocheb525c52013-07-10 11:40:50 +01001167 section = "-" + tokens.back();
1168 tokens.pop_back();
1169 }
1170
1171 // The preceding token, if any, may be a named section.
1172 const std::string kSectionPrefix = "section-";
1173 if (!tokens.empty() &&
1174 StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1175 // Prepend this section name to the suffix set in the preceding block.
1176 section = tokens.back().substr(kSectionPrefix.size()) + section;
1177 tokens.pop_back();
1178 }
1179
1180 // No other tokens are allowed. If there are any remaining, abort.
1181 if (!tokens.empty())
1182 continue;
1183
1184 if (section != kDefaultSection) {
1185 *found_sections = true;
1186 field->set_section(section);
1187 }
1188
1189 // No errors encountered while parsing!
1190 // Update the |field|'s type based on what was parsed from the attribute.
Ben Murdochbb1529c2013-08-08 10:24:53 +01001191 field->SetHtmlType(field_type, mode);
Ben Murdocheb525c52013-07-10 11:40:50 +01001192 }
1193}
1194
1195void FormStructure::IdentifySections(bool has_author_specified_sections) {
1196 if (fields_.empty())
1197 return;
1198
1199 if (!has_author_specified_sections) {
1200 // Name sections after the first field in the section.
1201 base::string16 current_section = fields_.front()->unique_name();
1202
1203 // Keep track of the types we've seen in this section.
Ben Murdoch32409262013-08-07 11:04:47 +01001204 std::set<ServerFieldType> seen_types;
1205 ServerFieldType previous_type = UNKNOWN_TYPE;
Ben Murdocheb525c52013-07-10 11:40:50 +01001206
1207 for (std::vector<AutofillField*>::iterator field = fields_.begin();
1208 field != fields_.end(); ++field) {
Ben Murdochbb1529c2013-08-08 10:24:53 +01001209 const ServerFieldType current_type = (*field)->Type().GetStorableType();
Ben Murdocheb525c52013-07-10 11:40:50 +01001210
1211 bool already_saw_current_type = seen_types.count(current_type) > 0;
1212
1213 // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1214 // evening phone number. Our phone number detection is also generally a
1215 // little off. Hence, ignore this field type as a signal here.
Ben Murdoch2385ea32013-08-06 11:01:04 +01001216 if (AutofillType(current_type).group() == PHONE_HOME)
Ben Murdocheb525c52013-07-10 11:40:50 +01001217 already_saw_current_type = false;
1218
1219 // Some forms have adjacent fields of the same type. Two common examples:
1220 // * Forms with two email fields, where the second is meant to "confirm"
1221 // the first.
1222 // * Forms with a <select> menu for states in some countries, and a
1223 // freeform <input> field for states in other countries. (Usually,
1224 // only one of these two will be visible for any given choice of
1225 // country.)
1226 // Generally, adjacent fields of the same type belong in the same logical
1227 // section.
1228 if (current_type == previous_type)
1229 already_saw_current_type = false;
1230
1231 previous_type = current_type;
1232
1233 if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1234 // We reached the end of a section, so start a new section.
1235 seen_types.clear();
1236 current_section = (*field)->unique_name();
1237 }
1238
1239 seen_types.insert(current_type);
1240 (*field)->set_section(UTF16ToUTF8(current_section));
1241 }
1242 }
1243
1244 // Ensure that credit card and address fields are in separate sections.
1245 // This simplifies the section-aware logic in autofill_manager.cc.
1246 for (std::vector<AutofillField*>::iterator field = fields_.begin();
1247 field != fields_.end(); ++field) {
Ben Murdoch32409262013-08-07 11:04:47 +01001248 FieldTypeGroup field_type_group = (*field)->Type().group();
Ben Murdoch2385ea32013-08-06 11:01:04 +01001249 if (field_type_group == CREDIT_CARD)
Ben Murdocheb525c52013-07-10 11:40:50 +01001250 (*field)->set_section((*field)->section() + "-cc");
1251 else
1252 (*field)->set_section((*field)->section() + "-default");
1253 }
1254}
1255
1256} // namespace autofill