Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 1 | // Copyright 2016 PDFium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | |
dsinclair | 488b7ad | 2016-10-04 11:55:50 -0700 | [diff] [blame] | 7 | #include "core/fpdfapi/parser/cpdf_stream.h" |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 8 | |
tsepez | 9e05ee1 | 2016-11-21 13:19:10 -0800 | [diff] [blame] | 9 | #include <utility> |
| 10 | |
Lei Zhang | 2617056 | 2018-04-17 17:01:52 +0000 | [diff] [blame^] | 11 | #include "constants/stream_dict_common.h" |
dsinclair | 488b7ad | 2016-10-04 11:55:50 -0700 | [diff] [blame] | 12 | #include "core/fpdfapi/parser/cpdf_dictionary.h" |
tsepez | 0e606b5 | 2016-11-18 16:22:41 -0800 | [diff] [blame] | 13 | #include "core/fpdfapi/parser/cpdf_number.h" |
dsinclair | 488b7ad | 2016-10-04 11:55:50 -0700 | [diff] [blame] | 14 | #include "core/fpdfapi/parser/cpdf_stream_acc.h" |
| 15 | #include "core/fpdfapi/parser/fpdf_parser_decode.h" |
Dan Sinclair | bcd1e70 | 2017-08-31 13:19:18 -0400 | [diff] [blame] | 16 | #include "core/fxcrt/fx_stream.h" |
tsepez | e6db16e | 2016-09-19 10:45:09 -0700 | [diff] [blame] | 17 | #include "third_party/base/numerics/safe_conversions.h" |
tsepez | a9caab9 | 2016-12-14 05:57:10 -0800 | [diff] [blame] | 18 | #include "third_party/base/ptr_util.h" |
weili | a470b5e | 2016-08-23 22:08:37 -0700 | [diff] [blame] | 19 | #include "third_party/base/stl_util.h" |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 20 | |
tsepez | e6db16e | 2016-09-19 10:45:09 -0700 | [diff] [blame] | 21 | CPDF_Stream::CPDF_Stream() {} |
| 22 | |
tsepez | 47fb8c0 | 2016-12-15 13:51:34 -0800 | [diff] [blame] | 23 | CPDF_Stream::CPDF_Stream(std::unique_ptr<uint8_t, FxFreeDeleter> pData, |
tsepez | 9e05ee1 | 2016-11-21 13:19:10 -0800 | [diff] [blame] | 24 | uint32_t size, |
| 25 | std::unique_ptr<CPDF_Dictionary> pDict) |
Artem Strygin | 4fde70e | 2017-09-04 17:01:41 +0300 | [diff] [blame] | 26 | : m_pDict(std::move(pDict)) { |
| 27 | SetData(std::move(pData), size); |
| 28 | } |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 29 | |
tsepez | 836f7d5 | 2016-10-10 14:31:05 -0700 | [diff] [blame] | 30 | CPDF_Stream::~CPDF_Stream() { |
| 31 | m_ObjNum = kInvalidObjNum; |
| 32 | if (m_pDict && m_pDict->GetObjNum() == kInvalidObjNum) |
| 33 | m_pDict.release(); // lowercase release, release ownership. |
| 34 | } |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 35 | |
| 36 | CPDF_Object::Type CPDF_Stream::GetType() const { |
| 37 | return STREAM; |
| 38 | } |
| 39 | |
| 40 | CPDF_Dictionary* CPDF_Stream::GetDict() const { |
tsepez | e6db16e | 2016-09-19 10:45:09 -0700 | [diff] [blame] | 41 | return m_pDict.get(); |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 42 | } |
| 43 | |
| 44 | bool CPDF_Stream::IsStream() const { |
| 45 | return true; |
| 46 | } |
| 47 | |
| 48 | CPDF_Stream* CPDF_Stream::AsStream() { |
| 49 | return this; |
| 50 | } |
| 51 | |
| 52 | const CPDF_Stream* CPDF_Stream::AsStream() const { |
| 53 | return this; |
| 54 | } |
| 55 | |
tsepez | 596fc4c | 2016-06-07 06:41:50 -0700 | [diff] [blame] | 56 | void CPDF_Stream::InitStream(const uint8_t* pData, |
tsepez | b5e8f14 | 2016-03-25 15:18:35 -0700 | [diff] [blame] | 57 | uint32_t size, |
tsepez | 9e05ee1 | 2016-11-21 13:19:10 -0800 | [diff] [blame] | 58 | std::unique_ptr<CPDF_Dictionary> pDict) { |
| 59 | m_pDict = std::move(pDict); |
Artem Strygin | 4fde70e | 2017-09-04 17:01:41 +0300 | [diff] [blame] | 60 | SetData(pData, size); |
tsepez | e6db16e | 2016-09-19 10:45:09 -0700 | [diff] [blame] | 61 | } |
| 62 | |
tsepez | 833619b | 2016-12-07 09:21:17 -0800 | [diff] [blame] | 63 | void CPDF_Stream::InitStreamFromFile( |
Dan Sinclair | 0b95042 | 2017-09-21 15:49:49 -0400 | [diff] [blame] | 64 | const RetainPtr<IFX_SeekableReadStream>& pFile, |
tsepez | 833619b | 2016-12-07 09:21:17 -0800 | [diff] [blame] | 65 | std::unique_ptr<CPDF_Dictionary> pDict) { |
tsepez | 9e05ee1 | 2016-11-21 13:19:10 -0800 | [diff] [blame] | 66 | m_pDict = std::move(pDict); |
tsepez | e6db16e | 2016-09-19 10:45:09 -0700 | [diff] [blame] | 67 | m_bMemoryBased = false; |
| 68 | m_pDataBuf.reset(); |
| 69 | m_pFile = pFile; |
| 70 | m_dwSize = pdfium::base::checked_cast<uint32_t>(pFile->GetSize()); |
| 71 | if (m_pDict) |
tsepez | 0e606b5 | 2016-11-18 16:22:41 -0800 | [diff] [blame] | 72 | m_pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(m_dwSize)); |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 73 | } |
| 74 | |
tsepez | 335cf09 | 2016-11-09 13:28:26 -0800 | [diff] [blame] | 75 | std::unique_ptr<CPDF_Object> CPDF_Stream::Clone() const { |
weili | a470b5e | 2016-08-23 22:08:37 -0700 | [diff] [blame] | 76 | return CloneObjectNonCyclic(false); |
| 77 | } |
| 78 | |
tsepez | 335cf09 | 2016-11-09 13:28:26 -0800 | [diff] [blame] | 79 | std::unique_ptr<CPDF_Object> CPDF_Stream::CloneNonCyclic( |
weili | a470b5e | 2016-08-23 22:08:37 -0700 | [diff] [blame] | 80 | bool bDirect, |
| 81 | std::set<const CPDF_Object*>* pVisited) const { |
| 82 | pVisited->insert(this); |
Tom Sepez | afd0d1f | 2017-04-04 14:37:18 -0700 | [diff] [blame] | 83 | auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(this); |
Lei Zhang | 07401ba | 2017-12-11 22:12:08 +0000 | [diff] [blame] | 84 | pAcc->LoadAllDataRaw(); |
tsepez | 9e05ee1 | 2016-11-21 13:19:10 -0800 | [diff] [blame] | 85 | |
Tom Sepez | afd0d1f | 2017-04-04 14:37:18 -0700 | [diff] [blame] | 86 | uint32_t streamSize = pAcc->GetSize(); |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 87 | CPDF_Dictionary* pDict = GetDict(); |
tsepez | 9e05ee1 | 2016-11-21 13:19:10 -0800 | [diff] [blame] | 88 | std::unique_ptr<CPDF_Dictionary> pNewDict; |
weili | a470b5e | 2016-08-23 22:08:37 -0700 | [diff] [blame] | 89 | if (pDict && !pdfium::ContainsKey(*pVisited, pDict)) { |
tsepez | 9e05ee1 | 2016-11-21 13:19:10 -0800 | [diff] [blame] | 90 | pNewDict = ToDictionary( |
| 91 | static_cast<CPDF_Object*>(pDict)->CloneNonCyclic(bDirect, pVisited)); |
weili | a470b5e | 2016-08-23 22:08:37 -0700 | [diff] [blame] | 92 | } |
Tom Sepez | afd0d1f | 2017-04-04 14:37:18 -0700 | [diff] [blame] | 93 | return pdfium::MakeUnique<CPDF_Stream>(pAcc->DetachData(), streamSize, |
tsepez | 9e05ee1 | 2016-11-21 13:19:10 -0800 | [diff] [blame] | 94 | std::move(pNewDict)); |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 95 | } |
| 96 | |
Artem Strygin | 90555e0 | 2017-07-28 19:41:59 +0300 | [diff] [blame] | 97 | void CPDF_Stream::SetDataAndRemoveFilter(const uint8_t* pData, uint32_t size) { |
| 98 | SetData(pData, size); |
| 99 | m_pDict->RemoveFor("Filter"); |
Lei Zhang | 2617056 | 2018-04-17 17:01:52 +0000 | [diff] [blame^] | 100 | m_pDict->RemoveFor(pdfium::stream::kDecodeParms); |
Artem Strygin | 90555e0 | 2017-07-28 19:41:59 +0300 | [diff] [blame] | 101 | } |
| 102 | |
| 103 | void CPDF_Stream::SetDataAndRemoveFilter(std::ostringstream* stream) { |
Henrique Nakashima | aa1c787 | 2018-01-30 19:12:10 +0000 | [diff] [blame] | 104 | if (stream->tellp() <= 0) { |
| 105 | SetDataAndRemoveFilter(nullptr, 0); |
| 106 | return; |
| 107 | } |
| 108 | |
Artem Strygin | 90555e0 | 2017-07-28 19:41:59 +0300 | [diff] [blame] | 109 | SetDataAndRemoveFilter( |
| 110 | reinterpret_cast<const uint8_t*>(stream->str().c_str()), stream->tellp()); |
| 111 | } |
| 112 | |
tsepez | e6db16e | 2016-09-19 10:45:09 -0700 | [diff] [blame] | 113 | void CPDF_Stream::SetData(const uint8_t* pData, uint32_t size) { |
Artem Strygin | 4fde70e | 2017-09-04 17:01:41 +0300 | [diff] [blame] | 114 | std::unique_ptr<uint8_t, FxFreeDeleter> data_copy; |
| 115 | if (pData) { |
| 116 | data_copy.reset(FX_Alloc(uint8_t, size)); |
| 117 | memcpy(data_copy.get(), pData, size); |
| 118 | } |
| 119 | SetData(std::move(data_copy), size); |
| 120 | } |
| 121 | |
| 122 | void CPDF_Stream::SetData(std::unique_ptr<uint8_t, FxFreeDeleter> pData, |
| 123 | uint32_t size) { |
tsepez | e6db16e | 2016-09-19 10:45:09 -0700 | [diff] [blame] | 124 | m_bMemoryBased = true; |
Artem Strygin | 4fde70e | 2017-09-04 17:01:41 +0300 | [diff] [blame] | 125 | m_pFile = nullptr; |
| 126 | m_pDataBuf = std::move(pData); |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 127 | m_dwSize = size; |
| 128 | if (!m_pDict) |
tsepez | a9caab9 | 2016-12-14 05:57:10 -0800 | [diff] [blame] | 129 | m_pDict = pdfium::MakeUnique<CPDF_Dictionary>(); |
tsepez | 0e606b5 | 2016-11-18 16:22:41 -0800 | [diff] [blame] | 130 | m_pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(size)); |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 131 | } |
| 132 | |
Henrique Nakashima | d39443e | 2017-06-27 16:13:17 -0400 | [diff] [blame] | 133 | void CPDF_Stream::SetData(std::ostringstream* stream) { |
Henrique Nakashima | aa1c787 | 2018-01-30 19:12:10 +0000 | [diff] [blame] | 134 | if (stream->tellp() <= 0) { |
| 135 | SetData(nullptr, 0); |
| 136 | return; |
| 137 | } |
| 138 | |
Henrique Nakashima | d39443e | 2017-06-27 16:13:17 -0400 | [diff] [blame] | 139 | SetData(reinterpret_cast<const uint8_t*>(stream->str().c_str()), |
| 140 | stream->tellp()); |
| 141 | } |
| 142 | |
tsepez | 12f3e4a | 2016-11-02 15:17:29 -0700 | [diff] [blame] | 143 | bool CPDF_Stream::ReadRawData(FX_FILESIZE offset, |
| 144 | uint8_t* buf, |
| 145 | uint32_t size) const { |
Graeme Connell | baf318f | 2017-02-06 10:52:35 -0700 | [diff] [blame] | 146 | if (!m_bMemoryBased && m_pFile) |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 147 | return m_pFile->ReadBlock(buf, offset, size); |
| 148 | |
| 149 | if (m_pDataBuf) |
Dan Sinclair | 1c5d0b4 | 2017-04-03 15:05:11 -0400 | [diff] [blame] | 150 | memcpy(buf, m_pDataBuf.get() + offset, size); |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 151 | |
tsepez | 12f3e4a | 2016-11-02 15:17:29 -0700 | [diff] [blame] | 152 | return true; |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 153 | } |
| 154 | |
tsepez | 430ab83 | 2016-11-18 14:48:21 -0800 | [diff] [blame] | 155 | bool CPDF_Stream::HasFilter() const { |
| 156 | return m_pDict && m_pDict->KeyExist("Filter"); |
| 157 | } |
| 158 | |
Ryan Harrison | 275e260 | 2017-09-18 14:23:18 -0400 | [diff] [blame] | 159 | WideString CPDF_Stream::GetUnicodeText() const { |
Tom Sepez | afd0d1f | 2017-04-04 14:37:18 -0700 | [diff] [blame] | 160 | auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(this); |
Lei Zhang | 07401ba | 2017-12-11 22:12:08 +0000 | [diff] [blame] | 161 | pAcc->LoadAllDataFiltered(); |
Tom Sepez | afd0d1f | 2017-04-04 14:37:18 -0700 | [diff] [blame] | 162 | return PDF_DecodeText(pAcc->GetData(), pAcc->GetSize()); |
Tom Sepez | 5fc239a | 2016-03-10 14:10:38 -0800 | [diff] [blame] | 163 | } |
Dan Sinclair | c68b1e7 | 2017-05-08 16:59:54 -0400 | [diff] [blame] | 164 | |
Dan Sinclair | 5b59033 | 2017-05-10 13:59:14 -0400 | [diff] [blame] | 165 | bool CPDF_Stream::WriteTo(IFX_ArchiveStream* archive) const { |
| 166 | if (!GetDict()->WriteTo(archive) || !archive->WriteString("stream\r\n")) |
Dan Sinclair | c68b1e7 | 2017-05-08 16:59:54 -0400 | [diff] [blame] | 167 | return false; |
Dan Sinclair | c68b1e7 | 2017-05-08 16:59:54 -0400 | [diff] [blame] | 168 | |
| 169 | auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(this); |
Lei Zhang | 07401ba | 2017-12-11 22:12:08 +0000 | [diff] [blame] | 170 | pAcc->LoadAllDataRaw(); |
Dan Sinclair | 5b59033 | 2017-05-10 13:59:14 -0400 | [diff] [blame] | 171 | return archive->WriteBlock(pAcc->GetData(), pAcc->GetSize()) && |
| 172 | archive->WriteString("\r\nendstream"); |
Dan Sinclair | c68b1e7 | 2017-05-08 16:59:54 -0400 | [diff] [blame] | 173 | } |