kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 1 | // Copyright 2017 The PDFium Authors |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | |
| 7 | #include "core/fpdfdoc/cpdf_structtree.h" |
| 8 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 9 | #include <utility> |
| 10 | |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 11 | #include "core/fpdfapi/parser/cpdf_array.h" |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 12 | #include "core/fpdfapi/parser/cpdf_dictionary.h" |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 13 | #include "core/fpdfapi/parser/cpdf_document.h" |
| 14 | #include "core/fpdfapi/parser/cpdf_number.h" |
| 15 | #include "core/fpdfapi/parser/cpdf_reference.h" |
| 16 | #include "core/fpdfdoc/cpdf_numbertree.h" |
| 17 | #include "core/fpdfdoc/cpdf_structelement.h" |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 18 | #include "core/fxcrt/stl_util.h" |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 19 | |
| 20 | namespace { |
| 21 | |
| 22 | bool IsTagged(const CPDF_Document* pDoc) { |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 23 | RetainPtr<const CPDF_Dictionary> pMarkInfo = |
| 24 | pDoc->GetRoot()->GetDictFor("MarkInfo"); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 25 | return pMarkInfo && pMarkInfo->GetIntegerFor("Marked"); |
| 26 | } |
| 27 | |
| 28 | } // namespace |
| 29 | |
| 30 | // static |
| 31 | std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage( |
| 32 | const CPDF_Document* pDoc, |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 33 | RetainPtr<const CPDF_Dictionary> pPageDict) { |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 34 | if (!IsTagged(pDoc)) |
| 35 | return nullptr; |
| 36 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 37 | auto pTree = std::make_unique<CPDF_StructTree>(pDoc); |
| 38 | pTree->LoadPageTree(std::move(pPageDict)); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 39 | return pTree; |
| 40 | } |
| 41 | |
| 42 | CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc) |
| 43 | : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")), |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 44 | m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr) {} |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 45 | |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 46 | CPDF_StructTree::~CPDF_StructTree() = default; |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 47 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 48 | ByteString CPDF_StructTree::GetRoleMapNameFor(const ByteString& type) const { |
| 49 | if (m_pRoleMap) { |
| 50 | ByteString mapped = m_pRoleMap->GetNameFor(type); |
| 51 | if (!mapped.IsEmpty()) |
| 52 | return mapped; |
| 53 | } |
| 54 | return type; |
| 55 | } |
| 56 | |
| 57 | void CPDF_StructTree::LoadPageTree(RetainPtr<const CPDF_Dictionary> pPageDict) { |
| 58 | m_pPage = std::move(pPageDict); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 59 | if (!m_pTreeRoot) |
| 60 | return; |
| 61 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 62 | RetainPtr<const CPDF_Object> pKids = m_pTreeRoot->GetDirectObjectFor("K"); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 63 | if (!pKids) |
| 64 | return; |
| 65 | |
| 66 | uint32_t dwKids = 0; |
| 67 | if (pKids->IsDictionary()) |
| 68 | dwKids = 1; |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 69 | else if (const CPDF_Array* pArray = pKids->AsArray()) |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 70 | dwKids = fxcrt::CollectionSize<uint32_t>(*pArray); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 71 | else |
| 72 | return; |
| 73 | |
| 74 | m_Kids.clear(); |
| 75 | m_Kids.resize(dwKids); |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 76 | |
| 77 | RetainPtr<const CPDF_Dictionary> pParentTree = |
| 78 | m_pTreeRoot->GetDictFor("ParentTree"); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 79 | if (!pParentTree) |
| 80 | return; |
| 81 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 82 | CPDF_NumberTree parent_tree(std::move(pParentTree)); |
| 83 | int parents_id = m_pPage->GetIntegerFor("StructParents", -1); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 84 | if (parents_id < 0) |
| 85 | return; |
| 86 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 87 | RetainPtr<const CPDF_Array> pParentArray = |
| 88 | ToArray(parent_tree.LookupValue(parents_id)); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 89 | if (!pParentArray) |
| 90 | return; |
| 91 | |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 92 | StructElementMap element_map; |
| 93 | for (size_t i = 0; i < pParentArray->size(); i++) { |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 94 | RetainPtr<const CPDF_Dictionary> pParent = pParentArray->GetDictAt(i); |
| 95 | if (pParent) |
| 96 | AddPageNode(std::move(pParent), &element_map, 0); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 97 | } |
| 98 | } |
| 99 | |
| 100 | RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode( |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 101 | RetainPtr<const CPDF_Dictionary> pDict, |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 102 | StructElementMap* map, |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 103 | int nLevel) { |
| 104 | static constexpr int kStructTreeMaxRecursion = 32; |
| 105 | if (nLevel > kStructTreeMaxRecursion) |
| 106 | return nullptr; |
| 107 | |
| 108 | auto it = map->find(pDict); |
| 109 | if (it != map->end()) |
| 110 | return it->second; |
| 111 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 112 | RetainPtr<const CPDF_Dictionary> key(pDict); |
| 113 | auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, pDict); |
| 114 | (*map)[key] = pElement; |
| 115 | RetainPtr<const CPDF_Dictionary> pParent = pDict->GetDictFor("P"); |
| 116 | if (!pParent || pParent->GetNameFor("Type") == "StructTreeRoot") { |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 117 | if (!AddTopLevelNode(pDict, pElement)) |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 118 | map->erase(key); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 119 | return pElement; |
| 120 | } |
| 121 | |
| 122 | RetainPtr<CPDF_StructElement> pParentElement = |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 123 | AddPageNode(std::move(pParent), map, nLevel + 1); |
| 124 | if (!pParentElement) |
| 125 | return pElement; |
| 126 | |
| 127 | if (!pParentElement->UpdateKidIfElement(pDict, pElement.Get())) |
| 128 | map->erase(key); |
| 129 | |
| 130 | pElement->SetParent(pParentElement.Get()); |
| 131 | |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 132 | return pElement; |
| 133 | } |
| 134 | |
| 135 | bool CPDF_StructTree::AddTopLevelNode( |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 136 | const CPDF_Dictionary* pDict, |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 137 | const RetainPtr<CPDF_StructElement>& pElement) { |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 138 | RetainPtr<const CPDF_Object> pObj = m_pTreeRoot->GetDirectObjectFor("K"); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 139 | if (!pObj) |
| 140 | return false; |
| 141 | |
| 142 | if (pObj->IsDictionary()) { |
| 143 | if (pObj->GetObjNum() != pDict->GetObjNum()) |
| 144 | return false; |
| 145 | m_Kids[0] = pElement; |
| 146 | } |
| 147 | |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 148 | const CPDF_Array* pTopKids = pObj->AsArray(); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 149 | if (!pTopKids) |
| 150 | return true; |
| 151 | |
| 152 | bool bSave = false; |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 153 | for (size_t i = 0; i < pTopKids->size(); i++) { |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 154 | RetainPtr<const CPDF_Reference> pKidRef = |
| 155 | ToReference(pTopKids->GetObjectAt(i)); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 156 | if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) { |
| 157 | m_Kids[i] = pElement; |
| 158 | bSave = true; |
| 159 | } |
| 160 | } |
| 161 | return bSave; |
| 162 | } |