blob: 67ac9485fb7de10105b571d856043c7474d6afca [file] [log] [blame]
kumarashishg826308d2023-06-23 13:21:22 +00001// Copyright 2017 The PDFium Authors
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -07002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfdoc/cpdf_structtree.h"
8
kumarashishg826308d2023-06-23 13:21:22 +00009#include <utility>
10
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070011#include "core/fpdfapi/parser/cpdf_array.h"
Haibo Huang49cc9302020-04-27 16:14:24 -070012#include "core/fpdfapi/parser/cpdf_dictionary.h"
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070013#include "core/fpdfapi/parser/cpdf_document.h"
14#include "core/fpdfapi/parser/cpdf_number.h"
15#include "core/fpdfapi/parser/cpdf_reference.h"
16#include "core/fpdfdoc/cpdf_numbertree.h"
17#include "core/fpdfdoc/cpdf_structelement.h"
kumarashishg826308d2023-06-23 13:21:22 +000018#include "core/fxcrt/stl_util.h"
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070019
20namespace {
21
22bool IsTagged(const CPDF_Document* pDoc) {
kumarashishg826308d2023-06-23 13:21:22 +000023 RetainPtr<const CPDF_Dictionary> pMarkInfo =
24 pDoc->GetRoot()->GetDictFor("MarkInfo");
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070025 return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
26}
27
28} // namespace
29
30// static
31std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage(
32 const CPDF_Document* pDoc,
kumarashishg826308d2023-06-23 13:21:22 +000033 RetainPtr<const CPDF_Dictionary> pPageDict) {
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070034 if (!IsTagged(pDoc))
35 return nullptr;
36
kumarashishg826308d2023-06-23 13:21:22 +000037 auto pTree = std::make_unique<CPDF_StructTree>(pDoc);
38 pTree->LoadPageTree(std::move(pPageDict));
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070039 return pTree;
40}
41
42CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
43 : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
Haibo Huang49cc9302020-04-27 16:14:24 -070044 m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr) {}
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070045
Haibo Huang49cc9302020-04-27 16:14:24 -070046CPDF_StructTree::~CPDF_StructTree() = default;
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070047
kumarashishg826308d2023-06-23 13:21:22 +000048ByteString CPDF_StructTree::GetRoleMapNameFor(const ByteString& type) const {
49 if (m_pRoleMap) {
50 ByteString mapped = m_pRoleMap->GetNameFor(type);
51 if (!mapped.IsEmpty())
52 return mapped;
53 }
54 return type;
55}
56
57void CPDF_StructTree::LoadPageTree(RetainPtr<const CPDF_Dictionary> pPageDict) {
58 m_pPage = std::move(pPageDict);
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070059 if (!m_pTreeRoot)
60 return;
61
kumarashishg826308d2023-06-23 13:21:22 +000062 RetainPtr<const CPDF_Object> pKids = m_pTreeRoot->GetDirectObjectFor("K");
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070063 if (!pKids)
64 return;
65
66 uint32_t dwKids = 0;
67 if (pKids->IsDictionary())
68 dwKids = 1;
Haibo Huang49cc9302020-04-27 16:14:24 -070069 else if (const CPDF_Array* pArray = pKids->AsArray())
kumarashishg826308d2023-06-23 13:21:22 +000070 dwKids = fxcrt::CollectionSize<uint32_t>(*pArray);
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070071 else
72 return;
73
74 m_Kids.clear();
75 m_Kids.resize(dwKids);
kumarashishg826308d2023-06-23 13:21:22 +000076
77 RetainPtr<const CPDF_Dictionary> pParentTree =
78 m_pTreeRoot->GetDictFor("ParentTree");
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070079 if (!pParentTree)
80 return;
81
kumarashishg826308d2023-06-23 13:21:22 +000082 CPDF_NumberTree parent_tree(std::move(pParentTree));
83 int parents_id = m_pPage->GetIntegerFor("StructParents", -1);
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070084 if (parents_id < 0)
85 return;
86
kumarashishg826308d2023-06-23 13:21:22 +000087 RetainPtr<const CPDF_Array> pParentArray =
88 ToArray(parent_tree.LookupValue(parents_id));
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070089 if (!pParentArray)
90 return;
91
Haibo Huang49cc9302020-04-27 16:14:24 -070092 StructElementMap element_map;
93 for (size_t i = 0; i < pParentArray->size(); i++) {
kumarashishg826308d2023-06-23 13:21:22 +000094 RetainPtr<const CPDF_Dictionary> pParent = pParentArray->GetDictAt(i);
95 if (pParent)
96 AddPageNode(std::move(pParent), &element_map, 0);
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070097 }
98}
99
100RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
kumarashishg826308d2023-06-23 13:21:22 +0000101 RetainPtr<const CPDF_Dictionary> pDict,
Haibo Huang49cc9302020-04-27 16:14:24 -0700102 StructElementMap* map,
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700103 int nLevel) {
104 static constexpr int kStructTreeMaxRecursion = 32;
105 if (nLevel > kStructTreeMaxRecursion)
106 return nullptr;
107
108 auto it = map->find(pDict);
109 if (it != map->end())
110 return it->second;
111
kumarashishg826308d2023-06-23 13:21:22 +0000112 RetainPtr<const CPDF_Dictionary> key(pDict);
113 auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, pDict);
114 (*map)[key] = pElement;
115 RetainPtr<const CPDF_Dictionary> pParent = pDict->GetDictFor("P");
116 if (!pParent || pParent->GetNameFor("Type") == "StructTreeRoot") {
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700117 if (!AddTopLevelNode(pDict, pElement))
kumarashishg826308d2023-06-23 13:21:22 +0000118 map->erase(key);
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700119 return pElement;
120 }
121
122 RetainPtr<CPDF_StructElement> pParentElement =
kumarashishg826308d2023-06-23 13:21:22 +0000123 AddPageNode(std::move(pParent), map, nLevel + 1);
124 if (!pParentElement)
125 return pElement;
126
127 if (!pParentElement->UpdateKidIfElement(pDict, pElement.Get()))
128 map->erase(key);
129
130 pElement->SetParent(pParentElement.Get());
131
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700132 return pElement;
133}
134
135bool CPDF_StructTree::AddTopLevelNode(
Haibo Huang49cc9302020-04-27 16:14:24 -0700136 const CPDF_Dictionary* pDict,
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700137 const RetainPtr<CPDF_StructElement>& pElement) {
kumarashishg826308d2023-06-23 13:21:22 +0000138 RetainPtr<const CPDF_Object> pObj = m_pTreeRoot->GetDirectObjectFor("K");
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700139 if (!pObj)
140 return false;
141
142 if (pObj->IsDictionary()) {
143 if (pObj->GetObjNum() != pDict->GetObjNum())
144 return false;
145 m_Kids[0] = pElement;
146 }
147
Haibo Huang49cc9302020-04-27 16:14:24 -0700148 const CPDF_Array* pTopKids = pObj->AsArray();
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700149 if (!pTopKids)
150 return true;
151
152 bool bSave = false;
Haibo Huang49cc9302020-04-27 16:14:24 -0700153 for (size_t i = 0; i < pTopKids->size(); i++) {
kumarashishg826308d2023-06-23 13:21:22 +0000154 RetainPtr<const CPDF_Reference> pKidRef =
155 ToReference(pTopKids->GetObjectAt(i));
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -0700156 if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
157 m_Kids[i] = pElement;
158 bSave = true;
159 }
160 }
161 return bSave;
162}