Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * 1. Redistributions of source code must retain the above copyright |
| 8 | * notice, this list of conditions and the following disclaimer. |
| 9 | * 2. Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * |
| 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "config.h" |
| 27 | #include "core/dom/DecodedDataDocumentParser.h" |
| 28 | |
Ben Murdoch | df95704 | 2013-08-06 11:01:27 +0100 | [diff] [blame] | 29 | #include "bindings/v8/ExceptionStatePlaceholder.h" |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 30 | #include "core/dom/Document.h" |
Ben Murdoch | 8375017 | 2013-07-24 10:36:59 +0100 | [diff] [blame] | 31 | #include "core/dom/Element.h" |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 32 | #include "core/loader/TextResourceDecoder.h" |
Ben Murdoch | 8375017 | 2013-07-24 10:36:59 +0100 | [diff] [blame] | 33 | #include "wtf/text/TextEncodingRegistry.h" |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 34 | |
| 35 | namespace WebCore { |
| 36 | |
Ben Murdoch | 8375017 | 2013-07-24 10:36:59 +0100 | [diff] [blame] | 37 | namespace { |
| 38 | |
| 39 | class TitleEncodingFixer { |
| 40 | public: |
| 41 | explicit TitleEncodingFixer(Document* document) |
| 42 | : m_document(document) |
| 43 | , m_firstEncoding(document->decoder()->encoding()) |
| 44 | { |
| 45 | } |
| 46 | |
| 47 | // It's possible for the encoding of the document to change while we're decoding |
| 48 | // data. That can only occur while we're processing the <head> portion of the |
| 49 | // document. There isn't much user-visible content in the <head>, but there is |
| 50 | // the <title> element. This function detects that situation and re-decodes the |
| 51 | // document's title so that the user doesn't see an incorrectly decoded title |
| 52 | // in the title bar. |
| 53 | inline void fixTitleEncodingIfNeeded() |
| 54 | { |
| 55 | if (m_firstEncoding == m_document->decoder()->encoding()) |
| 56 | return; // In the common case, the encoding doesn't change and there isn't any work to do. |
| 57 | fixTitleEncoding(); |
| 58 | } |
| 59 | |
| 60 | private: |
| 61 | void fixTitleEncoding(); |
| 62 | |
| 63 | Document* m_document; |
| 64 | WTF::TextEncoding m_firstEncoding; |
| 65 | }; |
| 66 | |
| 67 | void TitleEncodingFixer::fixTitleEncoding() |
| 68 | { |
| 69 | RefPtr<Element> titleElement = m_document->titleElement(); |
| 70 | if (!titleElement |
| 71 | || titleElement->firstElementChild() |
| 72 | || m_firstEncoding != Latin1Encoding() |
| 73 | || !titleElement->textContent().containsOnlyLatin1()) |
| 74 | return; // Either we don't have a title yet or something bizzare as happened and we give up. |
| 75 | CString originalBytes = titleElement->textContent().latin1(); |
| 76 | OwnPtr<TextCodec> codec = newTextCodec(m_document->decoder()->encoding()); |
| 77 | String correctlyDecodedTitle = codec->decode(originalBytes.data(), originalBytes.length(), true); |
Ben Murdoch | 1fad5ca | 2013-08-07 11:05:11 +0100 | [diff] [blame] | 78 | titleElement->setTextContent(correctlyDecodedTitle, IGNORE_EXCEPTION); |
Ben Murdoch | 8375017 | 2013-07-24 10:36:59 +0100 | [diff] [blame] | 79 | } |
| 80 | |
| 81 | } |
| 82 | |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 83 | DecodedDataDocumentParser::DecodedDataDocumentParser(Document* document) |
| 84 | : DocumentParser(document) |
| 85 | { |
| 86 | } |
| 87 | |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 88 | size_t DecodedDataDocumentParser::appendBytes(const char* data, size_t length) |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 89 | { |
| 90 | if (!length) |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 91 | return 0; |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 92 | |
Ben Murdoch | 8375017 | 2013-07-24 10:36:59 +0100 | [diff] [blame] | 93 | TitleEncodingFixer encodingFixer(document()); |
| 94 | |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 95 | String decoded = document()->decoder()->decode(data, length); |
Ben Murdoch | 8375017 | 2013-07-24 10:36:59 +0100 | [diff] [blame] | 96 | |
| 97 | encodingFixer.fixTitleEncodingIfNeeded(); |
| 98 | |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 99 | if (decoded.isEmpty()) |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 100 | return 0; |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 101 | |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 102 | size_t consumedChars = decoded.length(); |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 103 | append(decoded.releaseImpl()); |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 104 | |
| 105 | return consumedChars; |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 106 | } |
| 107 | |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 108 | size_t DecodedDataDocumentParser::flush() |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 109 | { |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 110 | // null decoder indicates there is no data received. |
| 111 | // We have nothing to do in that case. |
| 112 | TextResourceDecoder* decoder = document()->decoder(); |
| 113 | if (!decoder) |
| 114 | return 0; |
| 115 | String remainingData = decoder->flush(); |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 116 | if (remainingData.isEmpty()) |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 117 | return 0; |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 118 | |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 119 | size_t consumedChars = remainingData.length(); |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 120 | append(remainingData.releaseImpl()); |
Ben Murdoch | 591b958 | 2013-07-10 11:41:44 +0100 | [diff] [blame] | 121 | |
| 122 | return consumedChars; |
Torne (Richard Coles) | 53e740f | 2013-05-09 18:38:43 +0100 | [diff] [blame] | 123 | } |
| 124 | |
| 125 | }; |