blob: 7ceaabe6d582fc797f5350d51b1ba5743c7f734d [file] [log] [blame]
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +01001/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "core/dom/DecodedDataDocumentParser.h"
28
Ben Murdochdf957042013-08-06 11:01:27 +010029#include "bindings/v8/ExceptionStatePlaceholder.h"
Ben Murdoch591b9582013-07-10 11:41:44 +010030#include "core/dom/Document.h"
Ben Murdoch83750172013-07-24 10:36:59 +010031#include "core/dom/Element.h"
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +010032#include "core/loader/TextResourceDecoder.h"
Ben Murdoch83750172013-07-24 10:36:59 +010033#include "wtf/text/TextEncodingRegistry.h"
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +010034
35namespace WebCore {
36
Ben Murdoch83750172013-07-24 10:36:59 +010037namespace {
38
39class TitleEncodingFixer {
40public:
41 explicit TitleEncodingFixer(Document* document)
42 : m_document(document)
43 , m_firstEncoding(document->decoder()->encoding())
44 {
45 }
46
47 // It's possible for the encoding of the document to change while we're decoding
48 // data. That can only occur while we're processing the <head> portion of the
49 // document. There isn't much user-visible content in the <head>, but there is
50 // the <title> element. This function detects that situation and re-decodes the
51 // document's title so that the user doesn't see an incorrectly decoded title
52 // in the title bar.
53 inline void fixTitleEncodingIfNeeded()
54 {
55 if (m_firstEncoding == m_document->decoder()->encoding())
56 return; // In the common case, the encoding doesn't change and there isn't any work to do.
57 fixTitleEncoding();
58 }
59
60private:
61 void fixTitleEncoding();
62
63 Document* m_document;
64 WTF::TextEncoding m_firstEncoding;
65};
66
67void TitleEncodingFixer::fixTitleEncoding()
68{
69 RefPtr<Element> titleElement = m_document->titleElement();
70 if (!titleElement
71 || titleElement->firstElementChild()
72 || m_firstEncoding != Latin1Encoding()
73 || !titleElement->textContent().containsOnlyLatin1())
74 return; // Either we don't have a title yet or something bizzare as happened and we give up.
75 CString originalBytes = titleElement->textContent().latin1();
76 OwnPtr<TextCodec> codec = newTextCodec(m_document->decoder()->encoding());
77 String correctlyDecodedTitle = codec->decode(originalBytes.data(), originalBytes.length(), true);
Ben Murdoch1fad5ca2013-08-07 11:05:11 +010078 titleElement->setTextContent(correctlyDecodedTitle, IGNORE_EXCEPTION);
Ben Murdoch83750172013-07-24 10:36:59 +010079}
80
81}
82
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +010083DecodedDataDocumentParser::DecodedDataDocumentParser(Document* document)
84 : DocumentParser(document)
85{
86}
87
Ben Murdoch591b9582013-07-10 11:41:44 +010088size_t DecodedDataDocumentParser::appendBytes(const char* data, size_t length)
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +010089{
90 if (!length)
Ben Murdoch591b9582013-07-10 11:41:44 +010091 return 0;
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +010092
Ben Murdoch83750172013-07-24 10:36:59 +010093 TitleEncodingFixer encodingFixer(document());
94
Ben Murdoch591b9582013-07-10 11:41:44 +010095 String decoded = document()->decoder()->decode(data, length);
Ben Murdoch83750172013-07-24 10:36:59 +010096
97 encodingFixer.fixTitleEncodingIfNeeded();
98
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +010099 if (decoded.isEmpty())
Ben Murdoch591b9582013-07-10 11:41:44 +0100100 return 0;
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +0100101
Ben Murdoch591b9582013-07-10 11:41:44 +0100102 size_t consumedChars = decoded.length();
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +0100103 append(decoded.releaseImpl());
Ben Murdoch591b9582013-07-10 11:41:44 +0100104
105 return consumedChars;
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +0100106}
107
Ben Murdoch591b9582013-07-10 11:41:44 +0100108size_t DecodedDataDocumentParser::flush()
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +0100109{
Ben Murdoch591b9582013-07-10 11:41:44 +0100110 // null decoder indicates there is no data received.
111 // We have nothing to do in that case.
112 TextResourceDecoder* decoder = document()->decoder();
113 if (!decoder)
114 return 0;
115 String remainingData = decoder->flush();
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +0100116 if (remainingData.isEmpty())
Ben Murdoch591b9582013-07-10 11:41:44 +0100117 return 0;
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +0100118
Ben Murdoch591b9582013-07-10 11:41:44 +0100119 size_t consumedChars = remainingData.length();
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +0100120 append(remainingData.releaseImpl());
Ben Murdoch591b9582013-07-10 11:41:44 +0100121
122 return consumedChars;
Torne (Richard Coles)53e740f2013-05-09 18:38:43 +0100123}
124
125};