Add APIs for limited use of document tagged code.

BUG=pdfium:568

Review-Url: https://codereview.chromium.org/2519343002
diff --git a/BUILD.gn b/BUILD.gn
index 84a415c..29572e7 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -97,6 +97,7 @@
     "fpdfsdk/fpdf_flatten.cpp",
     "fpdfsdk/fpdf_progressive.cpp",
     "fpdfsdk/fpdf_searchex.cpp",
+    "fpdfsdk/fpdf_structtree.cpp",
     "fpdfsdk/fpdf_sysfontinfo.cpp",
     "fpdfsdk/fpdf_transformpage.cpp",
     "fpdfsdk/fpdfdoc.cpp",
@@ -124,6 +125,7 @@
     "public/fpdf_progressive.h",
     "public/fpdf_save.h",
     "public/fpdf_searchex.h",
+    "public/fpdf_structtree.h",
     "public/fpdf_sysfontinfo.h",
     "public/fpdf_text.h",
     "public/fpdf_transformpage.h",
@@ -1797,6 +1799,7 @@
     "core/fxge/ge/fx_ge_text_embeddertest.cpp",
     "fpdfsdk/fpdf_dataavail_embeddertest.cpp",
     "fpdfsdk/fpdf_flatten_embeddertest.cpp",
+    "fpdfsdk/fpdf_structtree_embeddertest.cpp",
     "fpdfsdk/fpdfdoc_embeddertest.cpp",
     "fpdfsdk/fpdfedit_embeddertest.cpp",
     "fpdfsdk/fpdfext_embeddertest.cpp",
diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp
new file mode 100644
index 0000000..541c46b
--- /dev/null
+++ b/fpdfsdk/fpdf_structtree.cpp
@@ -0,0 +1,88 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "public/fpdf_structtree.h"
+
+#include "core/fpdfapi/page/cpdf_page.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfdoc/fpdf_tagged.h"
+#include "fpdfsdk/fsdk_define.h"
+
+namespace {
+
+IPDF_StructTree* ToStructTree(FPDF_STRUCTTREE struct_tree) {
+  return reinterpret_cast<IPDF_StructTree*>(struct_tree);
+}
+
+IPDF_StructElement* ToStructTreeElement(FPDF_STRUCTELEMENT struct_element) {
+  return reinterpret_cast<IPDF_StructElement*>(struct_element);
+}
+
+}  // namespace
+
+DLLEXPORT FPDF_STRUCTTREE STDCALL FPDF_StructTree_GetForPage(FPDF_PAGE page) {
+  CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
+  if (!pPage)
+    return nullptr;
+  return IPDF_StructTree::LoadPage(pPage->m_pDocument, pPage->m_pFormDict);
+}
+
+DLLEXPORT void STDCALL FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree) {
+  delete ToStructTree(struct_tree);
+}
+
+DLLEXPORT int STDCALL
+FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree) {
+  IPDF_StructTree* tree = ToStructTree(struct_tree);
+  return tree ? tree->CountTopElements() : -1;
+}
+
+DLLEXPORT FPDF_STRUCTELEMENT STDCALL
+FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index) {
+  IPDF_StructTree* tree = ToStructTree(struct_tree);
+  if (!tree || index < 0 || index >= tree->CountTopElements())
+    return nullptr;
+  return tree->GetTopElement(index);
+}
+
+DLLEXPORT unsigned long STDCALL
+FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,
+                              void* buffer,
+                              unsigned long buflen) {
+  IPDF_StructElement* elem = ToStructTreeElement(struct_element);
+  if (!elem)
+    return 0;
+
+  CPDF_Dictionary* dict = elem->GetDict();
+  if (!dict)
+    return 0;
+
+  CFX_WideString str = elem->GetDict()->GetUnicodeTextFor("Alt");
+  if (str.IsEmpty())
+    return 0;
+
+  CFX_ByteString encodedStr = str.UTF16LE_Encode();
+  const unsigned long len = encodedStr.GetLength();
+  if (buffer && len <= buflen)
+    FXSYS_memcpy(buffer, encodedStr.c_str(), len);
+  return len;
+}
+
+DLLEXPORT int STDCALL
+FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) {
+  IPDF_StructElement* elem = ToStructTreeElement(struct_element);
+  return elem ? elem->CountKids() : -1;
+}
+
+DLLEXPORT FPDF_STRUCTELEMENT STDCALL
+FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,
+                                   int index) {
+  IPDF_StructElement* elem = ToStructTreeElement(struct_element);
+  if (!elem || index < 0 || index >= elem->CountKids())
+    return nullptr;
+
+  CPDF_StructKid kid = elem->GetKid(index);
+  return kid.m_Type == CPDF_StructKid::Element ? kid.m_Element.m_pElement
+                                               : nullptr;
+}
diff --git a/fpdfsdk/fpdf_structtree_embeddertest.cpp b/fpdfsdk/fpdf_structtree_embeddertest.cpp
new file mode 100644
index 0000000..58b3172
--- /dev/null
+++ b/fpdfsdk/fpdf_structtree_embeddertest.cpp
@@ -0,0 +1,70 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fxcrt/fx_string.h"
+#include "public/fpdf_structtree.h"
+#include "testing/embedder_test.h"
+#include "testing/test_support.h"
+
+class FPDFStructTreeEmbeddertest : public EmbedderTest, public TestSaver {};
+
+TEST_F(FPDFStructTreeEmbeddertest, GetAltText) {
+  ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  ASSERT_TRUE(page);
+
+  FPDF_STRUCTTREE struct_tree = FPDF_StructTree_GetForPage(page);
+  ASSERT_TRUE(struct_tree);
+  ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree));
+
+  FPDF_STRUCTELEMENT element = FPDF_StructTree_GetChildAtIndex(struct_tree, -1);
+  EXPECT_EQ(nullptr, element);
+  element = FPDF_StructTree_GetChildAtIndex(struct_tree, 1);
+  EXPECT_EQ(nullptr, element);
+  element = FPDF_StructTree_GetChildAtIndex(struct_tree, 0);
+  ASSERT_NE(nullptr, element);
+  EXPECT_EQ(0U, FPDF_StructElement_GetAltText(element, nullptr, 0));
+
+  ASSERT_EQ(1, FPDF_StructElement_CountChildren(element));
+  FPDF_STRUCTELEMENT child_element =
+      FPDF_StructElement_GetChildAtIndex(element, -1);
+  EXPECT_EQ(nullptr, child_element);
+  child_element = FPDF_StructElement_GetChildAtIndex(element, 1);
+  EXPECT_EQ(nullptr, child_element);
+  child_element = FPDF_StructElement_GetChildAtIndex(element, 0);
+  ASSERT_NE(nullptr, child_element);
+  EXPECT_EQ(0U, FPDF_StructElement_GetAltText(child_element, nullptr, 0));
+
+  ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element));
+  FPDF_STRUCTELEMENT gchild_element =
+      FPDF_StructElement_GetChildAtIndex(child_element, -1);
+  EXPECT_EQ(nullptr, gchild_element);
+  gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 1);
+  EXPECT_EQ(nullptr, gchild_element);
+  gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 0);
+  ASSERT_NE(nullptr, gchild_element);
+  ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, nullptr, 0));
+
+  unsigned short buffer[12];
+  memset(buffer, 0, sizeof(buffer));
+  // Deliberately pass in a small buffer size to make sure |buffer| remains
+  // untouched.
+  ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer, 1));
+  for (size_t i = 0; i < FX_ArraySize(buffer); ++i)
+    EXPECT_EQ(0U, buffer[i]);
+
+  ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer,
+                                               sizeof(buffer)));
+  const FX_WCHAR kExpected[] = L"Black Image";
+  EXPECT_EQ(CFX_WideString(kExpected),
+            CFX_WideString::FromUTF16LE(buffer, FXSYS_len(kExpected)));
+
+  ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild_element));
+  FPDF_STRUCTELEMENT ggchild_element =
+      FPDF_StructElement_GetChildAtIndex(gchild_element, 0);
+  EXPECT_EQ(nullptr, ggchild_element);
+
+  FPDF_StructTree_Close(struct_tree);
+  FPDF_ClosePage(page);
+}
diff --git a/fpdfsdk/fpdfdoc.cpp b/fpdfsdk/fpdfdoc.cpp
index 254be3f..2dcf606 100644
--- a/fpdfsdk/fpdfdoc.cpp
+++ b/fpdfsdk/fpdfdoc.cpp
@@ -64,7 +64,7 @@
                                                   unsigned long buflen) {
   CFX_ByteString encodedText = text.UTF16LE_Encode();
   unsigned long len = encodedText.GetLength();
-  if (buffer && buflen >= len)
+  if (buffer && len <= buflen)
     FXSYS_memcpy(buffer, encodedText.c_str(), len);
   return len;
 }
@@ -186,7 +186,7 @@
   CPDF_Action action(ToDictionary(static_cast<CPDF_Object*>(pDict)));
   CFX_ByteString path = action.GetFilePath().UTF8Encode();
   unsigned long len = path.GetLength() + 1;
-  if (buffer && buflen >= len)
+  if (buffer && len <= buflen)
     FXSYS_memcpy(buffer, path.c_str(), len);
   return len;
 }
@@ -203,7 +203,7 @@
   CPDF_Action action(ToDictionary(static_cast<CPDF_Object*>(pDict)));
   CFX_ByteString path = action.GetURI(pDoc);
   unsigned long len = path.GetLength() + 1;
-  if (buffer && buflen >= len)
+  if (buffer && len <= buflen)
     FXSYS_memcpy(buffer, path.c_str(), len);
   return len;
 }
diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp
index 959bf14..3f5115a 100644
--- a/fpdfsdk/fpdfview.cpp
+++ b/fpdfsdk/fpdfview.cpp
@@ -1110,7 +1110,7 @@
   int len = utf16Name.GetLength();
   if (!buffer) {
     *buflen = len;
-  } else if (*buflen >= len) {
+  } else if (len <= *buflen) {
     memcpy(buffer, utf16Name.c_str(), len);
     *buflen = len;
   } else {
diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c
index 5e6c36f..ed9a3fa 100644
--- a/fpdfsdk/fpdfview_c_api_test.c
+++ b/fpdfsdk/fpdfview_c_api_test.c
@@ -20,6 +20,7 @@
 #include "public/fpdf_progressive.h"
 #include "public/fpdf_save.h"
 #include "public/fpdf_searchex.h"
+#include "public/fpdf_structtree.h"
 #include "public/fpdf_sysfontinfo.h"
 #include "public/fpdf_text.h"
 #include "public/fpdf_transformpage.h"
@@ -154,6 +155,15 @@
     // fpdf_searchex.h
     CHK(FPDFText_GetCharIndexFromTextIndex);
 
+    // fpdf_structtree.h
+    CHK(FPDF_StructTree_GetForPage);
+    CHK(FPDF_StructTree_Close);
+    CHK(FPDF_StructTree_CountChildren);
+    CHK(FPDF_StructTree_GetChildAtIndex);
+    CHK(FPDF_StructElement_GetAltText);
+    CHK(FPDF_StructElement_CountChildren);
+    CHK(FPDF_StructElement_GetChildAtIndex);
+
     // fpdf_sysfontinfo.h
     CHK(FPDF_GetDefaultTTFMap);
     CHK(FPDF_AddInstalledFont);
diff --git a/public/fpdf_doc.h b/public/fpdf_doc.h
index b245d46..10f8995 100644
--- a/public/fpdf_doc.h
+++ b/public/fpdf_doc.h
@@ -68,7 +68,7 @@
 // |buflen| parameters.
 //
 // Regardless of the platform, the |buffer| is always in UTF-16LE encoding. The
-// string is terminated by a UTF16 NUL character. If |buflen| is less then the
+// string is terminated by a UTF16 NUL character. If |buflen| is less than the
 // required length, or |buffer| is NULL, |buffer| will not be modified.
 DLLEXPORT unsigned long STDCALL FPDFBookmark_GetTitle(FPDF_BOOKMARK bookmark,
                                                       void* buffer,
@@ -142,7 +142,7 @@
 // NUL character.
 //
 // Regardless of the platform, the |buffer| is always in UTF-16LE encoding.
-// If |buflen| is less then the returned length, or |buffer| is NULL, |buffer|
+// If |buflen| is less than the returned length, or |buffer| is NULL, |buffer|
 // will not be modified.
 DLLEXPORT unsigned long STDCALL
 FPDFAction_GetFilePath(FPDF_ACTION action, void* buffer, unsigned long buflen);
@@ -156,7 +156,7 @@
 //
 // Returns the number of bytes in the URI path, including trailing zeros.
 //
-// The |buffer| is always encoded in 7-bit ASCII. If |buflen| is less then the
+// The |buffer| is always encoded in 7-bit ASCII. If |buflen| is less than the
 // returned length, or |buffer| is NULL, |buffer| will not be modified.
 DLLEXPORT unsigned long STDCALL FPDFAction_GetURIPath(FPDF_DOCUMENT document,
                                                       FPDF_ACTION action,
diff --git a/public/fpdf_formfill.h b/public/fpdf_formfill.h
index ada87d3..09b80ea 100644
--- a/public/fpdf_formfill.h
+++ b/public/fpdf_formfill.h
@@ -149,10 +149,8 @@
   *       The filePath should be always input in local encoding.
   *
   *       The return value always indicated number of bytes required for the
-  * buffer, even when there is
-  *       no buffer specified, or the buffer size is less then required. In this
-  * case, the buffer will not
-  *       be modified.
+  *       buffer , even when there is no buffer specified, or the buffer size is
+  *       less than required. In this case, the buffer will not be modified.
   */
   int (*Doc_getFilePath)(struct _IPDF_JsPlatform* pThis,
                          void* filePath,
diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h
new file mode 100644
index 0000000..3d4da40
--- /dev/null
+++ b/public/fpdf_structtree.h
@@ -0,0 +1,103 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef PUBLIC_FPDF_STRUCTTREE_H_
+#define PUBLIC_FPDF_STRUCTTREE_H_
+
+// NOLINTNEXTLINE(build/include)
+#include "fpdfview.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Function: FPDF_StructTree_GetForPage
+//          Get the structure tree for a page.
+// Parameters:
+//          page        -   Handle to the page. Returned by FPDF_LoadPage
+//          function.
+// Return value:
+//          A handle to the structure tree or NULL on error.
+DLLEXPORT FPDF_STRUCTTREE STDCALL FPDF_StructTree_GetForPage(FPDF_PAGE page);
+
+// Function: FPDF_StructTree_Close
+//          Release the resource allocate by FPDF_StructTree_GetForPage.
+// Parameters:
+//          struct_tree -   Handle to the struct tree. Returned by
+//          FPDF_StructTree_LoadPage function.
+// Return value:
+//          NULL
+DLLEXPORT void STDCALL FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree);
+
+// Function: FPDF_StructTree_CountChildren
+//          Count the number of children for the structure tree.
+// Parameters:
+//          struct_tree -   Handle to the struct tree. Returned by
+//          FPDF_StructTree_LoadPage function.
+// Return value:
+//          The number of children, or -1 on error.
+DLLEXPORT int STDCALL
+FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree);
+
+// Function: FPDF_StructTree_GetChildAtIndex
+//          Get a child in the structure tree.
+// Parameters:
+//          struct_tree -   Handle to the struct tree. Returned by
+//          FPDF_StructTree_LoadPage function.
+//          index       -   The index for the child, 0-based.
+// Return value:
+//          The child at the n-th index or NULL on error.
+DLLEXPORT FPDF_STRUCTELEMENT STDCALL
+FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index);
+
+// Function: FPDF_StructElement_GetAltText
+//          Get the alt text for a given element.
+// Parameters:
+//          struct_element -   Handle to the struct element.
+//          buffer         -   A buffer for output the alt text. May be NULL.
+//          buflen         -   The length of the buffer, in bytes. May be 0.
+// Return value:
+//          The number of bytes in the title, including the terminating NUL
+//          character. The number of bytes is returned regardless of the
+//          |buffer| and |buflen| parameters.
+// Comments:
+//          Regardless of the platform, the |buffer| is always in UTF-16LE
+//          encoding. The string is terminated by a UTF16 NUL character. If
+//          |buflen| is less than the required length, or |buffer| is NULL,
+//          |buffer| will not be modified.
+DLLEXPORT unsigned long STDCALL
+FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,
+                              void* buffer,
+                              unsigned long buflen);
+
+// Function: FPDF_StructElement_CountChildren
+//          Count the number of children for the structure element.
+// Parameters:
+//          struct_element -   Handle to the struct element.
+// Return value:
+//          The number of children, or -1 on error.
+DLLEXPORT int STDCALL
+FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element);
+
+// Function: FPDF_StructElement_GetChildAtIndex
+//          Get a child in the structure element.
+// Parameters:
+//          struct_tree -   Handle to the struct element.
+//          index       -   The index for the child, 0-based.
+// Return value:
+//          The child at the n-th index or NULL on error.
+// Comments:
+//          If the child exists but is not an element, then this function will
+//          return NULL. This will also return NULL for out of bounds indices.
+DLLEXPORT FPDF_STRUCTELEMENT STDCALL
+FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,
+                                   int index);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // PUBLIC_FPDF_STRUCTTREE_H_
diff --git a/public/fpdfview.h b/public/fpdfview.h
index 469053c..581951c 100644
--- a/public/fpdfview.h
+++ b/public/fpdfview.h
@@ -39,6 +39,8 @@
 typedef void* FPDF_PATH;
 typedef void* FPDF_RECORDER;
 typedef void* FPDF_SCHHANDLE;
+typedef void* FPDF_STRUCTELEMENT;
+typedef void* FPDF_STRUCTTREE;
 typedef void* FPDF_TEXTPAGE;
 
 #ifdef PDF_ENABLE_XFA
diff --git a/testing/resources/tagged_alt_text.pdf b/testing/resources/tagged_alt_text.pdf
new file mode 100644
index 0000000..a899ce1
--- /dev/null
+++ b/testing/resources/tagged_alt_text.pdf
Binary files differ