Add APIs for limited use of document tagged code.
BUG=pdfium:568
Review-Url: https://codereview.chromium.org/2519343002
diff --git a/BUILD.gn b/BUILD.gn
index 84a415c..29572e7 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -97,6 +97,7 @@
"fpdfsdk/fpdf_flatten.cpp",
"fpdfsdk/fpdf_progressive.cpp",
"fpdfsdk/fpdf_searchex.cpp",
+ "fpdfsdk/fpdf_structtree.cpp",
"fpdfsdk/fpdf_sysfontinfo.cpp",
"fpdfsdk/fpdf_transformpage.cpp",
"fpdfsdk/fpdfdoc.cpp",
@@ -124,6 +125,7 @@
"public/fpdf_progressive.h",
"public/fpdf_save.h",
"public/fpdf_searchex.h",
+ "public/fpdf_structtree.h",
"public/fpdf_sysfontinfo.h",
"public/fpdf_text.h",
"public/fpdf_transformpage.h",
@@ -1797,6 +1799,7 @@
"core/fxge/ge/fx_ge_text_embeddertest.cpp",
"fpdfsdk/fpdf_dataavail_embeddertest.cpp",
"fpdfsdk/fpdf_flatten_embeddertest.cpp",
+ "fpdfsdk/fpdf_structtree_embeddertest.cpp",
"fpdfsdk/fpdfdoc_embeddertest.cpp",
"fpdfsdk/fpdfedit_embeddertest.cpp",
"fpdfsdk/fpdfext_embeddertest.cpp",
diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp
new file mode 100644
index 0000000..541c46b
--- /dev/null
+++ b/fpdfsdk/fpdf_structtree.cpp
@@ -0,0 +1,88 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "public/fpdf_structtree.h"
+
+#include "core/fpdfapi/page/cpdf_page.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfdoc/fpdf_tagged.h"
+#include "fpdfsdk/fsdk_define.h"
+
+namespace {
+
+IPDF_StructTree* ToStructTree(FPDF_STRUCTTREE struct_tree) {
+ return reinterpret_cast<IPDF_StructTree*>(struct_tree);
+}
+
+IPDF_StructElement* ToStructTreeElement(FPDF_STRUCTELEMENT struct_element) {
+ return reinterpret_cast<IPDF_StructElement*>(struct_element);
+}
+
+} // namespace
+
+DLLEXPORT FPDF_STRUCTTREE STDCALL FPDF_StructTree_GetForPage(FPDF_PAGE page) {
+ CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
+ if (!pPage)
+ return nullptr;
+ return IPDF_StructTree::LoadPage(pPage->m_pDocument, pPage->m_pFormDict);
+}
+
+DLLEXPORT void STDCALL FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree) {
+ delete ToStructTree(struct_tree);
+}
+
+DLLEXPORT int STDCALL
+FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree) {
+ IPDF_StructTree* tree = ToStructTree(struct_tree);
+ return tree ? tree->CountTopElements() : -1;
+}
+
+DLLEXPORT FPDF_STRUCTELEMENT STDCALL
+FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index) {
+ IPDF_StructTree* tree = ToStructTree(struct_tree);
+ if (!tree || index < 0 || index >= tree->CountTopElements())
+ return nullptr;
+ return tree->GetTopElement(index);
+}
+
+DLLEXPORT unsigned long STDCALL
+FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,
+ void* buffer,
+ unsigned long buflen) {
+ IPDF_StructElement* elem = ToStructTreeElement(struct_element);
+ if (!elem)
+ return 0;
+
+ CPDF_Dictionary* dict = elem->GetDict();
+ if (!dict)
+ return 0;
+
+ CFX_WideString str = elem->GetDict()->GetUnicodeTextFor("Alt");
+ if (str.IsEmpty())
+ return 0;
+
+ CFX_ByteString encodedStr = str.UTF16LE_Encode();
+ const unsigned long len = encodedStr.GetLength();
+ if (buffer && len <= buflen)
+ FXSYS_memcpy(buffer, encodedStr.c_str(), len);
+ return len;
+}
+
+DLLEXPORT int STDCALL
+FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) {
+ IPDF_StructElement* elem = ToStructTreeElement(struct_element);
+ return elem ? elem->CountKids() : -1;
+}
+
+DLLEXPORT FPDF_STRUCTELEMENT STDCALL
+FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,
+ int index) {
+ IPDF_StructElement* elem = ToStructTreeElement(struct_element);
+ if (!elem || index < 0 || index >= elem->CountKids())
+ return nullptr;
+
+ CPDF_StructKid kid = elem->GetKid(index);
+ return kid.m_Type == CPDF_StructKid::Element ? kid.m_Element.m_pElement
+ : nullptr;
+}
diff --git a/fpdfsdk/fpdf_structtree_embeddertest.cpp b/fpdfsdk/fpdf_structtree_embeddertest.cpp
new file mode 100644
index 0000000..58b3172
--- /dev/null
+++ b/fpdfsdk/fpdf_structtree_embeddertest.cpp
@@ -0,0 +1,70 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fxcrt/fx_string.h"
+#include "public/fpdf_structtree.h"
+#include "testing/embedder_test.h"
+#include "testing/test_support.h"
+
+class FPDFStructTreeEmbeddertest : public EmbedderTest, public TestSaver {};
+
+TEST_F(FPDFStructTreeEmbeddertest, GetAltText) {
+ ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ FPDF_STRUCTTREE struct_tree = FPDF_StructTree_GetForPage(page);
+ ASSERT_TRUE(struct_tree);
+ ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree));
+
+ FPDF_STRUCTELEMENT element = FPDF_StructTree_GetChildAtIndex(struct_tree, -1);
+ EXPECT_EQ(nullptr, element);
+ element = FPDF_StructTree_GetChildAtIndex(struct_tree, 1);
+ EXPECT_EQ(nullptr, element);
+ element = FPDF_StructTree_GetChildAtIndex(struct_tree, 0);
+ ASSERT_NE(nullptr, element);
+ EXPECT_EQ(0U, FPDF_StructElement_GetAltText(element, nullptr, 0));
+
+ ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
+ FPDF_STRUCTELEMENT child_element =
+ FPDF_StructElement_GetChildAtIndex(element, -1);
+ EXPECT_EQ(nullptr, child_element);
+ child_element = FPDF_StructElement_GetChildAtIndex(element, 1);
+ EXPECT_EQ(nullptr, child_element);
+ child_element = FPDF_StructElement_GetChildAtIndex(element, 0);
+ ASSERT_NE(nullptr, child_element);
+ EXPECT_EQ(0U, FPDF_StructElement_GetAltText(child_element, nullptr, 0));
+
+ ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element));
+ FPDF_STRUCTELEMENT gchild_element =
+ FPDF_StructElement_GetChildAtIndex(child_element, -1);
+ EXPECT_EQ(nullptr, gchild_element);
+ gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 1);
+ EXPECT_EQ(nullptr, gchild_element);
+ gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 0);
+ ASSERT_NE(nullptr, gchild_element);
+ ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, nullptr, 0));
+
+ unsigned short buffer[12];
+ memset(buffer, 0, sizeof(buffer));
+ // Deliberately pass in a small buffer size to make sure |buffer| remains
+ // untouched.
+ ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer, 1));
+ for (size_t i = 0; i < FX_ArraySize(buffer); ++i)
+ EXPECT_EQ(0U, buffer[i]);
+
+ ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer,
+ sizeof(buffer)));
+ const FX_WCHAR kExpected[] = L"Black Image";
+ EXPECT_EQ(CFX_WideString(kExpected),
+ CFX_WideString::FromUTF16LE(buffer, FXSYS_len(kExpected)));
+
+ ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild_element));
+ FPDF_STRUCTELEMENT ggchild_element =
+ FPDF_StructElement_GetChildAtIndex(gchild_element, 0);
+ EXPECT_EQ(nullptr, ggchild_element);
+
+ FPDF_StructTree_Close(struct_tree);
+ FPDF_ClosePage(page);
+}
diff --git a/fpdfsdk/fpdfdoc.cpp b/fpdfsdk/fpdfdoc.cpp
index 254be3f..2dcf606 100644
--- a/fpdfsdk/fpdfdoc.cpp
+++ b/fpdfsdk/fpdfdoc.cpp
@@ -64,7 +64,7 @@
unsigned long buflen) {
CFX_ByteString encodedText = text.UTF16LE_Encode();
unsigned long len = encodedText.GetLength();
- if (buffer && buflen >= len)
+ if (buffer && len <= buflen)
FXSYS_memcpy(buffer, encodedText.c_str(), len);
return len;
}
@@ -186,7 +186,7 @@
CPDF_Action action(ToDictionary(static_cast<CPDF_Object*>(pDict)));
CFX_ByteString path = action.GetFilePath().UTF8Encode();
unsigned long len = path.GetLength() + 1;
- if (buffer && buflen >= len)
+ if (buffer && len <= buflen)
FXSYS_memcpy(buffer, path.c_str(), len);
return len;
}
@@ -203,7 +203,7 @@
CPDF_Action action(ToDictionary(static_cast<CPDF_Object*>(pDict)));
CFX_ByteString path = action.GetURI(pDoc);
unsigned long len = path.GetLength() + 1;
- if (buffer && buflen >= len)
+ if (buffer && len <= buflen)
FXSYS_memcpy(buffer, path.c_str(), len);
return len;
}
diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp
index 959bf14..3f5115a 100644
--- a/fpdfsdk/fpdfview.cpp
+++ b/fpdfsdk/fpdfview.cpp
@@ -1110,7 +1110,7 @@
int len = utf16Name.GetLength();
if (!buffer) {
*buflen = len;
- } else if (*buflen >= len) {
+ } else if (len <= *buflen) {
memcpy(buffer, utf16Name.c_str(), len);
*buflen = len;
} else {
diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c
index 5e6c36f..ed9a3fa 100644
--- a/fpdfsdk/fpdfview_c_api_test.c
+++ b/fpdfsdk/fpdfview_c_api_test.c
@@ -20,6 +20,7 @@
#include "public/fpdf_progressive.h"
#include "public/fpdf_save.h"
#include "public/fpdf_searchex.h"
+#include "public/fpdf_structtree.h"
#include "public/fpdf_sysfontinfo.h"
#include "public/fpdf_text.h"
#include "public/fpdf_transformpage.h"
@@ -154,6 +155,15 @@
// fpdf_searchex.h
CHK(FPDFText_GetCharIndexFromTextIndex);
+ // fpdf_structtree.h
+ CHK(FPDF_StructTree_GetForPage);
+ CHK(FPDF_StructTree_Close);
+ CHK(FPDF_StructTree_CountChildren);
+ CHK(FPDF_StructTree_GetChildAtIndex);
+ CHK(FPDF_StructElement_GetAltText);
+ CHK(FPDF_StructElement_CountChildren);
+ CHK(FPDF_StructElement_GetChildAtIndex);
+
// fpdf_sysfontinfo.h
CHK(FPDF_GetDefaultTTFMap);
CHK(FPDF_AddInstalledFont);
diff --git a/public/fpdf_doc.h b/public/fpdf_doc.h
index b245d46..10f8995 100644
--- a/public/fpdf_doc.h
+++ b/public/fpdf_doc.h
@@ -68,7 +68,7 @@
// |buflen| parameters.
//
// Regardless of the platform, the |buffer| is always in UTF-16LE encoding. The
-// string is terminated by a UTF16 NUL character. If |buflen| is less then the
+// string is terminated by a UTF16 NUL character. If |buflen| is less than the
// required length, or |buffer| is NULL, |buffer| will not be modified.
DLLEXPORT unsigned long STDCALL FPDFBookmark_GetTitle(FPDF_BOOKMARK bookmark,
void* buffer,
@@ -142,7 +142,7 @@
// NUL character.
//
// Regardless of the platform, the |buffer| is always in UTF-16LE encoding.
-// If |buflen| is less then the returned length, or |buffer| is NULL, |buffer|
+// If |buflen| is less than the returned length, or |buffer| is NULL, |buffer|
// will not be modified.
DLLEXPORT unsigned long STDCALL
FPDFAction_GetFilePath(FPDF_ACTION action, void* buffer, unsigned long buflen);
@@ -156,7 +156,7 @@
//
// Returns the number of bytes in the URI path, including trailing zeros.
//
-// The |buffer| is always encoded in 7-bit ASCII. If |buflen| is less then the
+// The |buffer| is always encoded in 7-bit ASCII. If |buflen| is less than the
// returned length, or |buffer| is NULL, |buffer| will not be modified.
DLLEXPORT unsigned long STDCALL FPDFAction_GetURIPath(FPDF_DOCUMENT document,
FPDF_ACTION action,
diff --git a/public/fpdf_formfill.h b/public/fpdf_formfill.h
index ada87d3..09b80ea 100644
--- a/public/fpdf_formfill.h
+++ b/public/fpdf_formfill.h
@@ -149,10 +149,8 @@
* The filePath should be always input in local encoding.
*
* The return value always indicated number of bytes required for the
- * buffer, even when there is
- * no buffer specified, or the buffer size is less then required. In this
- * case, the buffer will not
- * be modified.
+ * buffer , even when there is no buffer specified, or the buffer size is
+ * less than required. In this case, the buffer will not be modified.
*/
int (*Doc_getFilePath)(struct _IPDF_JsPlatform* pThis,
void* filePath,
diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h
new file mode 100644
index 0000000..3d4da40
--- /dev/null
+++ b/public/fpdf_structtree.h
@@ -0,0 +1,103 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef PUBLIC_FPDF_STRUCTTREE_H_
+#define PUBLIC_FPDF_STRUCTTREE_H_
+
+// NOLINTNEXTLINE(build/include)
+#include "fpdfview.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Function: FPDF_StructTree_GetForPage
+// Get the structure tree for a page.
+// Parameters:
+// page - Handle to the page. Returned by FPDF_LoadPage
+// function.
+// Return value:
+// A handle to the structure tree or NULL on error.
+DLLEXPORT FPDF_STRUCTTREE STDCALL FPDF_StructTree_GetForPage(FPDF_PAGE page);
+
+// Function: FPDF_StructTree_Close
+// Release the resource allocate by FPDF_StructTree_GetForPage.
+// Parameters:
+// struct_tree - Handle to the struct tree. Returned by
+// FPDF_StructTree_LoadPage function.
+// Return value:
+// NULL
+DLLEXPORT void STDCALL FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree);
+
+// Function: FPDF_StructTree_CountChildren
+// Count the number of children for the structure tree.
+// Parameters:
+// struct_tree - Handle to the struct tree. Returned by
+// FPDF_StructTree_LoadPage function.
+// Return value:
+// The number of children, or -1 on error.
+DLLEXPORT int STDCALL
+FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree);
+
+// Function: FPDF_StructTree_GetChildAtIndex
+// Get a child in the structure tree.
+// Parameters:
+// struct_tree - Handle to the struct tree. Returned by
+// FPDF_StructTree_LoadPage function.
+// index - The index for the child, 0-based.
+// Return value:
+// The child at the n-th index or NULL on error.
+DLLEXPORT FPDF_STRUCTELEMENT STDCALL
+FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index);
+
+// Function: FPDF_StructElement_GetAltText
+// Get the alt text for a given element.
+// Parameters:
+// struct_element - Handle to the struct element.
+// buffer - A buffer for output the alt text. May be NULL.
+// buflen - The length of the buffer, in bytes. May be 0.
+// Return value:
+// The number of bytes in the title, including the terminating NUL
+// character. The number of bytes is returned regardless of the
+// |buffer| and |buflen| parameters.
+// Comments:
+// Regardless of the platform, the |buffer| is always in UTF-16LE
+// encoding. The string is terminated by a UTF16 NUL character. If
+// |buflen| is less than the required length, or |buffer| is NULL,
+// |buffer| will not be modified.
+DLLEXPORT unsigned long STDCALL
+FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,
+ void* buffer,
+ unsigned long buflen);
+
+// Function: FPDF_StructElement_CountChildren
+// Count the number of children for the structure element.
+// Parameters:
+// struct_element - Handle to the struct element.
+// Return value:
+// The number of children, or -1 on error.
+DLLEXPORT int STDCALL
+FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element);
+
+// Function: FPDF_StructElement_GetChildAtIndex
+// Get a child in the structure element.
+// Parameters:
+// struct_tree - Handle to the struct element.
+// index - The index for the child, 0-based.
+// Return value:
+// The child at the n-th index or NULL on error.
+// Comments:
+// If the child exists but is not an element, then this function will
+// return NULL. This will also return NULL for out of bounds indices.
+DLLEXPORT FPDF_STRUCTELEMENT STDCALL
+FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,
+ int index);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // PUBLIC_FPDF_STRUCTTREE_H_
diff --git a/public/fpdfview.h b/public/fpdfview.h
index 469053c..581951c 100644
--- a/public/fpdfview.h
+++ b/public/fpdfview.h
@@ -39,6 +39,8 @@
typedef void* FPDF_PATH;
typedef void* FPDF_RECORDER;
typedef void* FPDF_SCHHANDLE;
+typedef void* FPDF_STRUCTELEMENT;
+typedef void* FPDF_STRUCTTREE;
typedef void* FPDF_TEXTPAGE;
#ifdef PDF_ENABLE_XFA
diff --git a/testing/resources/tagged_alt_text.pdf b/testing/resources/tagged_alt_text.pdf
new file mode 100644
index 0000000..a899ce1
--- /dev/null
+++ b/testing/resources/tagged_alt_text.pdf
Binary files differ