blob: dd0a76d5b22564d2eb0c42bea102fe5271694eed [file] [log] [blame]
kumarashishg826308d2023-06-23 13:21:22 +00001// Copyright 2016 The PDFium Authors
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -07002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/page/cpdf_streamparser.h"
8
kumarashishg826308d2023-06-23 13:21:22 +00009#include <ctype.h>
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070010
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070011#include <algorithm>
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070012#include <memory>
13#include <utility>
14
Haibo Huang49cc9302020-04-27 16:14:24 -070015#include "constants/stream_dict_common.h"
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070016#include "core/fpdfapi/page/cpdf_docpagedata.h"
17#include "core/fpdfapi/parser/cpdf_array.h"
18#include "core/fpdfapi/parser/cpdf_boolean.h"
19#include "core/fpdfapi/parser/cpdf_dictionary.h"
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070020#include "core/fpdfapi/parser/cpdf_name.h"
21#include "core/fpdfapi/parser/cpdf_null.h"
22#include "core/fpdfapi/parser/cpdf_number.h"
23#include "core/fpdfapi/parser/cpdf_stream.h"
24#include "core/fpdfapi/parser/cpdf_string.h"
25#include "core/fpdfapi/parser/fpdf_parser_decode.h"
26#include "core/fpdfapi/parser/fpdf_parser_utility.h"
Haibo Huang49cc9302020-04-27 16:14:24 -070027#include "core/fxcodec/jpeg/jpegmodule.h"
28#include "core/fxcodec/scanlinedecoder.h"
kumarashishg826308d2023-06-23 13:21:22 +000029#include "core/fxcrt/data_vector.h"
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070030#include "core/fxcrt/fx_extension.h"
Haibo Huang49cc9302020-04-27 16:14:24 -070031#include "core/fxcrt/fx_memory_wrappers.h"
32#include "core/fxcrt/fx_safe_types.h"
kumarashishg826308d2023-06-23 13:21:22 +000033#include "core/fxcrt/span_util.h"
34#include "core/fxge/calculate_pitch.h"
35#include "third_party/base/check.h"
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070036
37namespace {
38
Philip P. Moltmann33357ca2017-05-11 09:25:13 -070039const uint32_t kMaxNestedParsingLevel = 512;
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070040const size_t kMaxStringLength = 32767;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070041
Haibo Huang49cc9302020-04-27 16:14:24 -070042const char kTrue[] = "true";
43const char kFalse[] = "false";
44const char kNull[] = "null";
45
46uint32_t DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070047 if (!pDecoder)
48 return FX_INVALID_OFFSET;
Haibo Huang49cc9302020-04-27 16:14:24 -070049
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070050 int ncomps = pDecoder->CountComps();
51 int bpc = pDecoder->GetBPC();
52 int width = pDecoder->GetWidth();
53 int height = pDecoder->GetHeight();
Haibo Huang49cc9302020-04-27 16:14:24 -070054 if (width <= 0 || height <= 0)
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070055 return FX_INVALID_OFFSET;
56
kumarashishg826308d2023-06-23 13:21:22 +000057 absl::optional<uint32_t> maybe_size =
58 fxge::CalculatePitch8(bpc, ncomps, width);
59 if (!maybe_size.has_value())
60 return FX_INVALID_OFFSET;
61
62 FX_SAFE_UINT32 size = maybe_size.value();
Haibo Huang49cc9302020-04-27 16:14:24 -070063 size *= height;
64 if (size.ValueOrDefault(0) == 0)
65 return FX_INVALID_OFFSET;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070066
Haibo Huang49cc9302020-04-27 16:14:24 -070067 for (int row = 0; row < height; ++row) {
kumarashishg826308d2023-06-23 13:21:22 +000068 if (pDecoder->GetScanline(row).empty())
Haibo Huang49cc9302020-04-27 16:14:24 -070069 break;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070070 }
71 return pDecoder->GetSrcOffset();
72}
73
Haibo Huang49cc9302020-04-27 16:14:24 -070074uint32_t DecodeInlineStream(pdfium::span<const uint8_t> src_span,
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070075 int width,
76 int height,
77 const ByteString& decoder,
kumarashishg826308d2023-06-23 13:21:22 +000078 RetainPtr<const CPDF_Dictionary> pParam,
Haibo Huang49cc9302020-04-27 16:14:24 -070079 uint32_t orig_size) {
80 // |decoder| should not be an abbreviation.
kumarashishg826308d2023-06-23 13:21:22 +000081 DCHECK(decoder != "A85");
82 DCHECK(decoder != "AHx");
83 DCHECK(decoder != "CCF");
84 DCHECK(decoder != "DCT");
85 DCHECK(decoder != "Fl");
86 DCHECK(decoder != "LZW");
87 DCHECK(decoder != "RL");
Haibo Huang49cc9302020-04-27 16:14:24 -070088
89 std::unique_ptr<uint8_t, FxFreeDeleter> ignored_result;
90 uint32_t ignored_size;
91 if (decoder == "FlateDecode") {
kumarashishg826308d2023-06-23 13:21:22 +000092 return FlateOrLZWDecode(false, src_span, pParam.Get(), orig_size,
93 &ignored_result, &ignored_size);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070094 }
Haibo Huang49cc9302020-04-27 16:14:24 -070095 if (decoder == "LZWDecode") {
kumarashishg826308d2023-06-23 13:21:22 +000096 return FlateOrLZWDecode(true, src_span, pParam.Get(), 0, &ignored_result,
Haibo Huang49cc9302020-04-27 16:14:24 -070097 &ignored_size);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070098 }
Haibo Huang49cc9302020-04-27 16:14:24 -070099 if (decoder == "DCTDecode") {
kumarashishg826308d2023-06-23 13:21:22 +0000100 std::unique_ptr<ScanlineDecoder> pDecoder = JpegModule::CreateDecoder(
101 src_span, width, height, 0,
102 !pParam || pParam->GetIntegerFor("ColorTransform", 1));
Haibo Huang49cc9302020-04-27 16:14:24 -0700103 return DecodeAllScanlines(std::move(pDecoder));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700104 }
Haibo Huang49cc9302020-04-27 16:14:24 -0700105 if (decoder == "CCITTFaxDecode") {
106 std::unique_ptr<ScanlineDecoder> pDecoder =
kumarashishg826308d2023-06-23 13:21:22 +0000107 CreateFaxDecoder(src_span, width, height, pParam.Get());
Haibo Huang49cc9302020-04-27 16:14:24 -0700108 return DecodeAllScanlines(std::move(pDecoder));
109 }
110
111 if (decoder == "ASCII85Decode")
112 return A85Decode(src_span, &ignored_result, &ignored_size);
113 if (decoder == "ASCIIHexDecode")
114 return HexDecode(src_span, &ignored_result, &ignored_size);
115 if (decoder == "RunLengthDecode")
116 return RunLengthDecode(src_span, &ignored_result, &ignored_size);
117
118 return FX_INVALID_OFFSET;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700119}
120
121} // namespace
122
Haibo Huang49cc9302020-04-27 16:14:24 -0700123CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span)
124 : m_pBuf(span) {}
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700125
Haibo Huang49cc9302020-04-27 16:14:24 -0700126CPDF_StreamParser::CPDF_StreamParser(pdfium::span<const uint8_t> span,
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700127 const WeakPtr<ByteStringPool>& pPool)
Haibo Huang49cc9302020-04-27 16:14:24 -0700128 : m_pPool(pPool), m_pBuf(span) {}
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700129
kumarashishg826308d2023-06-23 13:21:22 +0000130CPDF_StreamParser::~CPDF_StreamParser() = default;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700131
Haibo Huang49cc9302020-04-27 16:14:24 -0700132RetainPtr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700133 CPDF_Document* pDoc,
Haibo Huang49cc9302020-04-27 16:14:24 -0700134 RetainPtr<CPDF_Dictionary> pDict,
135 const CPDF_Object* pCSObj) {
136 if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos]))
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700137 m_Pos++;
138
Haibo Huang49cc9302020-04-27 16:14:24 -0700139 if (m_Pos == m_pBuf.size())
140 return nullptr;
141
142 ByteString decoder;
kumarashishg826308d2023-06-23 13:21:22 +0000143 RetainPtr<const CPDF_Dictionary> pParam;
144 RetainPtr<const CPDF_Object> pFilter = pDict->GetDirectObjectFor("Filter");
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700145 if (pFilter) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700146 const CPDF_Array* pArray = pFilter->AsArray();
147 if (pArray) {
kumarashishg826308d2023-06-23 13:21:22 +0000148 decoder = pArray->GetByteStringAt(0);
149 RetainPtr<const CPDF_Array> pParams =
Haibo Huang49cc9302020-04-27 16:14:24 -0700150 pDict->GetArrayFor(pdfium::stream::kDecodeParms);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700151 if (pParams)
152 pParam = pParams->GetDictAt(0);
153 } else {
Haibo Huang49cc9302020-04-27 16:14:24 -0700154 decoder = pFilter->GetString();
155 pParam = pDict->GetDictFor(pdfium::stream::kDecodeParms);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700156 }
157 }
158 uint32_t width = pDict->GetIntegerFor("Width");
159 uint32_t height = pDict->GetIntegerFor("Height");
Haibo Huang49cc9302020-04-27 16:14:24 -0700160 uint32_t bpc = 1;
161 uint32_t nComponents = 1;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700162 if (pCSObj) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700163 RetainPtr<CPDF_ColorSpace> pCS =
164 CPDF_DocPageData::FromDocument(pDoc)->GetColorSpace(pCSObj, nullptr);
165 nComponents = pCS ? pCS->CountComponents() : 3;
166 bpc = pDict->GetIntegerFor("BitsPerComponent");
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700167 }
kumarashishg826308d2023-06-23 13:21:22 +0000168 absl::optional<uint32_t> maybe_size =
169 fxge::CalculatePitch8(bpc, nComponents, width);
170 if (!maybe_size.has_value())
171 return nullptr;
172
173 FX_SAFE_UINT32 size = maybe_size.value();
Haibo Huang49cc9302020-04-27 16:14:24 -0700174 size *= height;
175 if (!size.IsValid())
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700176 return nullptr;
177
Haibo Huang49cc9302020-04-27 16:14:24 -0700178 uint32_t dwOrigSize = size.ValueOrDie();
kumarashishg826308d2023-06-23 13:21:22 +0000179 DataVector<uint8_t> data;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700180 uint32_t dwStreamSize;
Haibo Huang49cc9302020-04-27 16:14:24 -0700181 if (decoder.IsEmpty()) {
182 dwOrigSize = std::min<uint32_t>(dwOrigSize, m_pBuf.size() - m_Pos);
kumarashishg826308d2023-06-23 13:21:22 +0000183 auto src_span = m_pBuf.subspan(m_Pos, dwOrigSize);
184 data = DataVector<uint8_t>(src_span.begin(), src_span.end());
Haibo Huang49cc9302020-04-27 16:14:24 -0700185 dwStreamSize = dwOrigSize;
186 m_Pos += dwOrigSize;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700187 } else {
Haibo Huang49cc9302020-04-27 16:14:24 -0700188 dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height,
kumarashishg826308d2023-06-23 13:21:22 +0000189 decoder, std::move(pParam), dwOrigSize);
Haibo Huang49cc9302020-04-27 16:14:24 -0700190 if (!pdfium::base::IsValueInRangeForNumericType<int>(dwStreamSize))
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700191 return nullptr;
192
193 uint32_t dwSavePos = m_Pos;
194 m_Pos += dwStreamSize;
kumarashishg826308d2023-06-23 13:21:22 +0000195 while (true) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700196 uint32_t dwPrevPos = m_Pos;
kumarashishg826308d2023-06-23 13:21:22 +0000197 ElementType type = ParseNextElement();
198 if (type == ElementType::kEndOfData)
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700199 break;
200
kumarashishg826308d2023-06-23 13:21:22 +0000201 if (type != ElementType::kKeyword) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700202 dwStreamSize += m_Pos - dwPrevPos;
203 continue;
204 }
Philip P. Moltmann33357ca2017-05-11 09:25:13 -0700205 if (GetWord() == "EI") {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700206 m_Pos = dwPrevPos;
207 break;
208 }
209 dwStreamSize += m_Pos - dwPrevPos;
210 }
211 m_Pos = dwSavePos;
kumarashishg826308d2023-06-23 13:21:22 +0000212 auto src_span = m_pBuf.subspan(m_Pos, dwStreamSize);
213 data = DataVector<uint8_t>(src_span.begin(), src_span.end());
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700214 m_Pos += dwStreamSize;
215 }
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700216 pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
kumarashishg826308d2023-06-23 13:21:22 +0000217 return pdfium::MakeRetain<CPDF_Stream>(std::move(data), std::move(pDict));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700218}
219
kumarashishg826308d2023-06-23 13:21:22 +0000220CPDF_StreamParser::ElementType CPDF_StreamParser::ParseNextElement() {
Haibo Huang49cc9302020-04-27 16:14:24 -0700221 m_pLastObj.Reset();
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700222 m_WordSize = 0;
223 if (!PositionIsInBounds())
kumarashishg826308d2023-06-23 13:21:22 +0000224 return ElementType::kEndOfData;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700225
Haibo Huang49cc9302020-04-27 16:14:24 -0700226 uint8_t ch = m_pBuf[m_Pos++];
kumarashishg826308d2023-06-23 13:21:22 +0000227 while (true) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700228 while (PDFCharIsWhitespace(ch)) {
229 if (!PositionIsInBounds())
kumarashishg826308d2023-06-23 13:21:22 +0000230 return ElementType::kEndOfData;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700231
232 ch = m_pBuf[m_Pos++];
233 }
234
235 if (ch != '%')
236 break;
237
kumarashishg826308d2023-06-23 13:21:22 +0000238 while (true) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700239 if (!PositionIsInBounds())
kumarashishg826308d2023-06-23 13:21:22 +0000240 return ElementType::kEndOfData;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700241
242 ch = m_pBuf[m_Pos++];
243 if (PDFCharIsLineEnding(ch))
244 break;
245 }
246 }
247
248 if (PDFCharIsDelimiter(ch) && ch != '/') {
249 m_Pos--;
Philip P. Moltmann33357ca2017-05-11 09:25:13 -0700250 m_pLastObj = ReadNextObject(false, false, 0);
kumarashishg826308d2023-06-23 13:21:22 +0000251 return ElementType::kOther;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700252 }
253
254 bool bIsNumber = true;
kumarashishg826308d2023-06-23 13:21:22 +0000255 while (true) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700256 if (m_WordSize < kMaxWordLength)
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700257 m_WordBuffer[m_WordSize++] = ch;
258
259 if (!PDFCharIsNumeric(ch))
260 bIsNumber = false;
261
262 if (!PositionIsInBounds())
263 break;
264
265 ch = m_pBuf[m_Pos++];
266
267 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
268 m_Pos--;
269 break;
270 }
271 }
272
273 m_WordBuffer[m_WordSize] = 0;
274 if (bIsNumber)
kumarashishg826308d2023-06-23 13:21:22 +0000275 return ElementType::kNumber;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700276
277 if (m_WordBuffer[0] == '/')
kumarashishg826308d2023-06-23 13:21:22 +0000278 return ElementType::kName;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700279
280 if (m_WordSize == 4) {
kumarashishg826308d2023-06-23 13:21:22 +0000281 if (GetWord() == kTrue) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700282 m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(true);
kumarashishg826308d2023-06-23 13:21:22 +0000283 return ElementType::kOther;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700284 }
kumarashishg826308d2023-06-23 13:21:22 +0000285 if (GetWord() == kNull) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700286 m_pLastObj = pdfium::MakeRetain<CPDF_Null>();
kumarashishg826308d2023-06-23 13:21:22 +0000287 return ElementType::kOther;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700288 }
289 } else if (m_WordSize == 5) {
kumarashishg826308d2023-06-23 13:21:22 +0000290 if (GetWord() == kFalse) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700291 m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(false);
kumarashishg826308d2023-06-23 13:21:22 +0000292 return ElementType::kOther;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700293 }
294 }
kumarashishg826308d2023-06-23 13:21:22 +0000295 return ElementType::kKeyword;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700296}
297
Haibo Huang49cc9302020-04-27 16:14:24 -0700298RetainPtr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700299 bool bAllowNestedArray,
Philip P. Moltmann33357ca2017-05-11 09:25:13 -0700300 bool bInArray,
301 uint32_t dwRecursionLevel) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700302 bool bIsNumber;
Philip P. Moltmann33357ca2017-05-11 09:25:13 -0700303 // Must get the next word before returning to avoid infinite loops.
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700304 GetNextWord(bIsNumber);
Philip P. Moltmann33357ca2017-05-11 09:25:13 -0700305 if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700306 return nullptr;
307
308 if (bIsNumber) {
309 m_WordBuffer[m_WordSize] = 0;
kumarashishg826308d2023-06-23 13:21:22 +0000310 return pdfium::MakeRetain<CPDF_Number>(GetWord());
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700311 }
312
313 int first_char = m_WordBuffer[0];
314 if (first_char == '/') {
kumarashishg826308d2023-06-23 13:21:22 +0000315 ByteString name = PDF_NameDecode(GetWord().Substr(1));
Haibo Huang49cc9302020-04-27 16:14:24 -0700316 return pdfium::MakeRetain<CPDF_Name>(m_pPool, name);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700317 }
318
319 if (first_char == '(') {
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700320 ByteString str = ReadString();
Haibo Huang49cc9302020-04-27 16:14:24 -0700321 return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700322 }
323
324 if (first_char == '<') {
325 if (m_WordSize == 1)
Haibo Huang49cc9302020-04-27 16:14:24 -0700326 return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(), true);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700327
Haibo Huang49cc9302020-04-27 16:14:24 -0700328 auto pDict = pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
kumarashishg826308d2023-06-23 13:21:22 +0000329 while (true) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700330 GetNextWord(bIsNumber);
331 if (m_WordSize == 2 && m_WordBuffer[0] == '>')
332 break;
333
334 if (!m_WordSize || m_WordBuffer[0] != '/')
335 return nullptr;
336
kumarashishg826308d2023-06-23 13:21:22 +0000337 ByteString key = PDF_NameDecode(GetWord().Substr(1));
Haibo Huang49cc9302020-04-27 16:14:24 -0700338 RetainPtr<CPDF_Object> pObj =
Philip P. Moltmann33357ca2017-05-11 09:25:13 -0700339 ReadNextObject(true, bInArray, dwRecursionLevel + 1);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700340 if (!pObj)
341 return nullptr;
342
kumarashishg826308d2023-06-23 13:21:22 +0000343 pDict->SetFor(key, std::move(pObj));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700344 }
Haibo Huang49cc9302020-04-27 16:14:24 -0700345 return pDict;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700346 }
347
348 if (first_char == '[') {
Philip P. Moltmann33357ca2017-05-11 09:25:13 -0700349 if ((!bAllowNestedArray && bInArray))
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700350 return nullptr;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700351
Haibo Huang49cc9302020-04-27 16:14:24 -0700352 auto pArray = pdfium::MakeRetain<CPDF_Array>();
kumarashishg826308d2023-06-23 13:21:22 +0000353 while (true) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700354 RetainPtr<CPDF_Object> pObj =
Philip P. Moltmann33357ca2017-05-11 09:25:13 -0700355 ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700356 if (pObj) {
kumarashishg826308d2023-06-23 13:21:22 +0000357 pArray->Append(std::move(pObj));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700358 continue;
359 }
360 if (!m_WordSize || m_WordBuffer[0] == ']')
361 break;
362 }
Haibo Huang49cc9302020-04-27 16:14:24 -0700363 return pArray;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700364 }
365
kumarashishg826308d2023-06-23 13:21:22 +0000366 if (GetWord() == kFalse)
Haibo Huang49cc9302020-04-27 16:14:24 -0700367 return pdfium::MakeRetain<CPDF_Boolean>(false);
kumarashishg826308d2023-06-23 13:21:22 +0000368 if (GetWord() == kTrue)
Haibo Huang49cc9302020-04-27 16:14:24 -0700369 return pdfium::MakeRetain<CPDF_Boolean>(true);
kumarashishg826308d2023-06-23 13:21:22 +0000370 if (GetWord() == kNull)
Haibo Huang49cc9302020-04-27 16:14:24 -0700371 return pdfium::MakeRetain<CPDF_Null>();
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700372 return nullptr;
373}
374
375// TODO(npm): the following methods are almost identical in cpdf_syntaxparser
376void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
377 m_WordSize = 0;
378 bIsNumber = true;
379 if (!PositionIsInBounds())
380 return;
381
Haibo Huang49cc9302020-04-27 16:14:24 -0700382 uint8_t ch = m_pBuf[m_Pos++];
kumarashishg826308d2023-06-23 13:21:22 +0000383 while (true) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700384 while (PDFCharIsWhitespace(ch)) {
385 if (!PositionIsInBounds()) {
386 return;
387 }
388 ch = m_pBuf[m_Pos++];
389 }
390
391 if (ch != '%')
392 break;
393
kumarashishg826308d2023-06-23 13:21:22 +0000394 while (true) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700395 if (!PositionIsInBounds())
396 return;
397 ch = m_pBuf[m_Pos++];
398 if (PDFCharIsLineEnding(ch))
399 break;
400 }
401 }
402
403 if (PDFCharIsDelimiter(ch)) {
404 bIsNumber = false;
405 m_WordBuffer[m_WordSize++] = ch;
406 if (ch == '/') {
kumarashishg826308d2023-06-23 13:21:22 +0000407 while (true) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700408 if (!PositionIsInBounds())
409 return;
410 ch = m_pBuf[m_Pos++];
411 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
412 m_Pos--;
413 return;
414 }
Haibo Huang49cc9302020-04-27 16:14:24 -0700415 if (m_WordSize < kMaxWordLength)
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700416 m_WordBuffer[m_WordSize++] = ch;
417 }
418 } else if (ch == '<') {
419 if (!PositionIsInBounds())
420 return;
421 ch = m_pBuf[m_Pos++];
422 if (ch == '<')
423 m_WordBuffer[m_WordSize++] = ch;
424 else
425 m_Pos--;
426 } else if (ch == '>') {
427 if (!PositionIsInBounds())
428 return;
429 ch = m_pBuf[m_Pos++];
430 if (ch == '>')
431 m_WordBuffer[m_WordSize++] = ch;
432 else
433 m_Pos--;
434 }
435 return;
436 }
437
kumarashishg826308d2023-06-23 13:21:22 +0000438 while (true) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700439 if (m_WordSize < kMaxWordLength)
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700440 m_WordBuffer[m_WordSize++] = ch;
441 if (!PDFCharIsNumeric(ch))
442 bIsNumber = false;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700443 if (!PositionIsInBounds())
444 return;
Haibo Huang49cc9302020-04-27 16:14:24 -0700445
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700446 ch = m_pBuf[m_Pos++];
447 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
448 m_Pos--;
449 break;
450 }
451 }
452}
453
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700454ByteString CPDF_StreamParser::ReadString() {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700455 if (!PositionIsInBounds())
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700456 return ByteString();
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700457
kumarashishg826308d2023-06-23 13:21:22 +0000458 ByteString buf;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700459 int parlevel = 0;
460 int status = 0;
461 int iEscCode = 0;
kumarashishg826308d2023-06-23 13:21:22 +0000462 uint8_t ch = m_pBuf[m_Pos++];
463 while (true) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700464 switch (status) {
465 case 0:
466 if (ch == ')') {
467 if (parlevel == 0) {
kumarashishg826308d2023-06-23 13:21:22 +0000468 return buf.First(std::min(buf.GetLength(), kMaxStringLength));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700469 }
470 parlevel--;
kumarashishg826308d2023-06-23 13:21:22 +0000471 buf += ')';
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700472 } else if (ch == '(') {
473 parlevel++;
kumarashishg826308d2023-06-23 13:21:22 +0000474 buf += '(';
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700475 } else if (ch == '\\') {
476 status = 1;
477 } else {
kumarashishg826308d2023-06-23 13:21:22 +0000478 buf += static_cast<char>(ch);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700479 }
480 break;
481 case 1:
Haibo Huang49cc9302020-04-27 16:14:24 -0700482 if (FXSYS_IsOctalDigit(ch)) {
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700483 iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700484 status = 2;
485 break;
486 }
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700487 if (ch == '\r') {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700488 status = 4;
489 break;
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700490 }
491 if (ch == '\n') {
492 // Do nothing.
493 } else if (ch == 'n') {
kumarashishg826308d2023-06-23 13:21:22 +0000494 buf += '\n';
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700495 } else if (ch == 'r') {
kumarashishg826308d2023-06-23 13:21:22 +0000496 buf += '\r';
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700497 } else if (ch == 't') {
kumarashishg826308d2023-06-23 13:21:22 +0000498 buf += '\t';
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700499 } else if (ch == 'b') {
kumarashishg826308d2023-06-23 13:21:22 +0000500 buf += '\b';
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700501 } else if (ch == 'f') {
kumarashishg826308d2023-06-23 13:21:22 +0000502 buf += '\f';
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700503 } else {
kumarashishg826308d2023-06-23 13:21:22 +0000504 buf += static_cast<char>(ch);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700505 }
506 status = 0;
507 break;
508 case 2:
Haibo Huang49cc9302020-04-27 16:14:24 -0700509 if (FXSYS_IsOctalDigit(ch)) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700510 iEscCode =
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700511 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700512 status = 3;
513 } else {
kumarashishg826308d2023-06-23 13:21:22 +0000514 buf += static_cast<char>(iEscCode);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700515 status = 0;
516 continue;
517 }
518 break;
519 case 3:
Haibo Huang49cc9302020-04-27 16:14:24 -0700520 if (FXSYS_IsOctalDigit(ch)) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700521 iEscCode =
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700522 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
kumarashishg826308d2023-06-23 13:21:22 +0000523 buf += static_cast<char>(iEscCode);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700524 status = 0;
525 } else {
kumarashishg826308d2023-06-23 13:21:22 +0000526 buf += static_cast<char>(iEscCode);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700527 status = 0;
528 continue;
529 }
530 break;
531 case 4:
532 status = 0;
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700533 if (ch != '\n')
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700534 continue;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700535 break;
536 }
537 if (!PositionIsInBounds())
kumarashishg826308d2023-06-23 13:21:22 +0000538 return buf.First(std::min(buf.GetLength(), kMaxStringLength));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700539
540 ch = m_pBuf[m_Pos++];
541 }
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700542}
543
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700544ByteString CPDF_StreamParser::ReadHexString() {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700545 if (!PositionIsInBounds())
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700546 return ByteString();
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700547
kumarashishg826308d2023-06-23 13:21:22 +0000548 ByteString buf;
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700549 bool bFirst = true;
550 int code = 0;
551 while (PositionIsInBounds()) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700552 uint8_t ch = m_pBuf[m_Pos++];
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700553 if (ch == '>')
554 break;
555
kumarashishg826308d2023-06-23 13:21:22 +0000556 if (!isxdigit(ch))
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700557 continue;
558
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700559 int val = FXSYS_HexCharToInt(ch);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700560 if (bFirst) {
561 code = val * 16;
562 } else {
563 code += val;
kumarashishg826308d2023-06-23 13:21:22 +0000564 buf += static_cast<uint8_t>(code);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700565 }
566 bFirst = !bFirst;
567 }
568 if (!bFirst)
kumarashishg826308d2023-06-23 13:21:22 +0000569 buf += static_cast<char>(code);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700570
kumarashishg826308d2023-06-23 13:21:22 +0000571 return buf.First(std::min<size_t>(buf.GetLength(), kMaxStringLength));
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700572}
573
574bool CPDF_StreamParser::PositionIsInBounds() const {
Haibo Huang49cc9302020-04-27 16:14:24 -0700575 return m_Pos < m_pBuf.size();
576}