Basic APIs and tests for extracting attachments

1. Added API for extracting attachment properties and data.
  * Expanded the embedder test to cover all the new APIs.

Bug=pdfium:174

Change-Id: I09bffd412410e9aea45faca442d2b72eefafef4e
Reviewed-on: https://pdfium-review.googlesource.com/7790
Reviewed-by: dsinclair <dsinclair@chromium.org>
Commit-Queue: dsinclair <dsinclair@chromium.org>
diff --git a/fpdfsdk/fpdfannot.cpp b/fpdfsdk/fpdfannot.cpp
index 412c80b..1c4345a 100644
--- a/fpdfsdk/fpdfannot.cpp
+++ b/fpdfsdk/fpdfannot.cpp
@@ -170,10 +170,6 @@
   return !!FPDFDOC_GetAnnotAP(pAnnotDict, CPDF_Annot::AppearanceMode::Normal);
 }
 
-CFX_ByteString CFXByteStringFromFPDFWideString(FPDF_WIDESTRING text) {
-  return CFX_WideString::FromUTF16LE(text, CFX_WideString::WStringLength(text))
-      .UTF8Encode();
-}
 void UpdateContentStream(CPDF_Form* pForm, CPDF_Stream* pStream) {
   ASSERT(pForm);
   ASSERT(pStream);
@@ -760,14 +756,9 @@
   if (!pAnnotDict)
     return 0;
 
-  CFX_ByteString contents =
-      pAnnotDict->GetUnicodeTextFor(CFXByteStringFromFPDFWideString(key))
-          .UTF16LE_Encode();
-  unsigned long len = contents.GetLength();
-  if (buffer && buflen >= len)
-    memcpy(buffer, contents.c_str(), len);
-
-  return len;
+  return Utf16EncodeMaybeCopyAndReturnLength(
+      pAnnotDict->GetUnicodeTextFor(CFXByteStringFromFPDFWideString(key)),
+      buffer, buflen);
 }
 
 DLLEXPORT int STDCALL FPDFAnnot_GetFlags(FPDF_ANNOTATION annot) {
diff --git a/fpdfsdk/fpdfattachment.cpp b/fpdfsdk/fpdfattachment.cpp
index e07d15b..337ab35 100644
--- a/fpdfsdk/fpdfattachment.cpp
+++ b/fpdfsdk/fpdfattachment.cpp
@@ -4,7 +4,10 @@
 
 #include "public/fpdf_attachment.h"
 
+#include "core/fpdfapi/page/cpdf_streamparser.h"
 #include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_string.h"
+#include "core/fpdfapi/parser/fpdf_parser_decode.h"
 #include "core/fpdfdoc/cpdf_filespec.h"
 #include "core/fpdfdoc/cpdf_nametree.h"
 #include "fpdfsdk/fsdk_define.h"
@@ -17,28 +20,119 @@
   return CPDF_NameTree(pDoc, "EmbeddedFiles").GetCount();
 }
 
-DLLEXPORT unsigned long STDCALL
-FPDFDoc_GetAttachmentName(FPDF_DOCUMENT document,
-                          int index,
-                          void* buffer,
-                          unsigned long buflen) {
+DLLEXPORT FPDF_ATTACHMENT STDCALL FPDFDoc_GetAttachment(FPDF_DOCUMENT document,
+                                                        int index) {
   CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
   if (!pDoc || index < 0)
-    return 0;
+    return nullptr;
 
   CPDF_NameTree nameTree(pDoc, "EmbeddedFiles");
   if (static_cast<size_t>(index) >= nameTree.GetCount())
-    return 0;
+    return nullptr;
 
   CFX_ByteString csName;
-  CPDF_Object* pFile = nameTree.LookupValueAndName(index, &csName);
+  return nameTree.LookupValueAndName(index, &csName);
+}
+
+DLLEXPORT unsigned long STDCALL
+FPDFAttachment_GetName(FPDF_ATTACHMENT attachment,
+                       void* buffer,
+                       unsigned long buflen) {
+  CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment);
   if (!pFile)
     return 0;
 
-  CFX_ByteString name = CPDF_FileSpec(pFile).GetFileName().UTF16LE_Encode();
-  unsigned long len = name.GetLength();
-  if (buffer && buflen >= len)
-    memcpy(buffer, name.c_str(), len);
+  return Utf16EncodeMaybeCopyAndReturnLength(CPDF_FileSpec(pFile).GetFileName(),
+                                             buffer, buflen);
+}
 
-  return len;
+DLLEXPORT FPDF_BOOL STDCALL FPDFAttachment_HasKey(FPDF_ATTACHMENT attachment,
+                                                  FPDF_WIDESTRING key) {
+  CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment);
+  if (!pFile)
+    return 0;
+
+  CPDF_Dictionary* pParamsDict = CPDF_FileSpec(pFile).GetParamsDict();
+  if (!pParamsDict)
+    return 0;
+
+  return pParamsDict->KeyExist(CFXByteStringFromFPDFWideString(key));
+}
+
+DLLEXPORT FPDF_OBJECT_TYPE STDCALL
+FPDFAttachment_GetValueType(FPDF_ATTACHMENT attachment, FPDF_WIDESTRING key) {
+  if (!FPDFAttachment_HasKey(attachment, key))
+    return FPDF_OBJECT_UNKNOWN;
+
+  CPDF_Object* pObj = CPDF_FileSpec(CPDFObjectFromFPDFAttachment(attachment))
+                          .GetParamsDict()
+                          ->GetObjectFor(CFXByteStringFromFPDFWideString(key));
+  if (!pObj)
+    return FPDF_OBJECT_UNKNOWN;
+
+  return pObj->GetType();
+}
+
+DLLEXPORT unsigned long STDCALL
+FPDFAttachment_GetStringValue(FPDF_ATTACHMENT attachment,
+                              FPDF_WIDESTRING key,
+                              void* buffer,
+                              unsigned long buflen) {
+  CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment);
+  if (!pFile)
+    return 0;
+
+  CPDF_Dictionary* pParamsDict = CPDF_FileSpec(pFile).GetParamsDict();
+  if (!pParamsDict)
+    return 0;
+
+  CFX_ByteString bsKey = CFXByteStringFromFPDFWideString(key);
+  CFX_WideString value = pParamsDict->GetUnicodeTextFor(bsKey);
+  if (bsKey == "CheckSum") {
+    CPDF_String* stringValue = pParamsDict->GetObjectFor(bsKey)->AsString();
+    if (stringValue->IsHex()) {
+      value =
+          CPDF_String(nullptr, PDF_EncodeString(stringValue->GetString(), true),
+                      false)
+              .GetUnicodeText();
+    }
+  }
+
+  return Utf16EncodeMaybeCopyAndReturnLength(value, buffer, buflen);
+}
+
+DLLEXPORT unsigned long STDCALL
+FPDFAttachment_GetFile(FPDF_ATTACHMENT attachment,
+                       void* buffer,
+                       unsigned long buflen) {
+  CPDF_Object* pFile = CPDFObjectFromFPDFAttachment(attachment);
+  if (!pFile)
+    return 0;
+
+  CPDF_Stream* pFileStream = CPDF_FileSpec(pFile).GetFileStream();
+  if (!pFileStream)
+    return 0;
+
+  uint8_t* data = pFileStream->GetRawData();
+  uint32_t len = pFileStream->GetRawSize();
+  CPDF_Dictionary* pFileDict = pFileStream->GetDict();
+  if (!pFileDict || pFileDict->GetStringFor("Filter").IsEmpty()) {
+    if (buffer && buflen >= len)
+      memcpy(buffer, data, len);
+
+    return len;
+  }
+
+  // Decode the stream if a stream filter is specified.
+  uint8_t* decodedData = nullptr;
+  uint32_t decodedLen = 0;
+  CPDF_StreamParser::DecodeInlineStream(
+      data, len, pFileDict->GetIntegerFor("Width"),
+      pFileDict->GetIntegerFor("Height"), pFileDict->GetStringFor("Filter"),
+      pFileDict->GetDictFor("DecodeParms"), &decodedData, &decodedLen);
+  if (buffer && buflen >= decodedLen)
+    memcpy(buffer, decodedData, decodedLen);
+
+  FX_Free(decodedData);
+  return decodedLen;
 }
diff --git a/fpdfsdk/fpdfattachment_embeddertest.cpp b/fpdfsdk/fpdfattachment_embeddertest.cpp
index 2cbda8a..d873d9b 100644
--- a/fpdfsdk/fpdfattachment_embeddertest.cpp
+++ b/fpdfsdk/fpdfattachment_embeddertest.cpp
@@ -3,6 +3,7 @@
 // found in the LICENSE file.
 
 #include "public/fpdf_attachment.h"
+#include "public/fpdfview.h"
 #include "testing/embedder_test.h"
 
 class FPDFAttachmentEmbeddertest : public EmbedderTest {};
@@ -12,11 +13,75 @@
   ASSERT_TRUE(OpenDocument("embedded_attachments.pdf"));
   EXPECT_EQ(2, FPDFDoc_GetAttachmentCount(document()));
 
+  // Retrieve the first attachment.
+  FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(document(), 0);
+  ASSERT_TRUE(attachment);
+
   // Check that the name of the first attachment is correct.
-  unsigned long len = FPDFDoc_GetAttachmentName(document(), 0, nullptr, 0);
+  unsigned long len = FPDFAttachment_GetName(attachment, nullptr, 0);
   std::vector<char> buf(len);
-  EXPECT_EQ(12u, FPDFDoc_GetAttachmentName(document(), 0, buf.data(), len));
+  EXPECT_EQ(12u, FPDFAttachment_GetName(attachment, buf.data(), len));
   EXPECT_STREQ(L"1.txt",
                GetPlatformWString(reinterpret_cast<unsigned short*>(buf.data()))
                    .c_str());
+
+  // Check that the content of the first attachment is correct.
+  len = FPDFAttachment_GetFile(attachment, nullptr, 0);
+  buf.clear();
+  buf.resize(len);
+  ASSERT_EQ(4u, FPDFAttachment_GetFile(attachment, buf.data(), len));
+  EXPECT_EQ(std::string("test"), std::string(buf.data(), 4));
+
+  // Check that a non-existent key does not exist.
+  EXPECT_FALSE(
+      FPDFAttachment_HasKey(attachment, GetFPDFWideString(L"none").get()));
+
+  // Check that the string value of a non-string dictionary entry is empty.
+  std::unique_ptr<unsigned short, pdfium::FreeDeleter> size_key =
+      GetFPDFWideString(L"Size");
+  EXPECT_EQ(FPDF_OBJECT_NUMBER,
+            FPDFAttachment_GetValueType(attachment, size_key.get()));
+  EXPECT_EQ(2u, FPDFAttachment_GetStringValue(attachment, size_key.get(),
+                                              nullptr, 0));
+
+  // Check that the creation date of the first attachment is correct.
+  std::unique_ptr<unsigned short, pdfium::FreeDeleter> date_key =
+      GetFPDFWideString(L"CreationDate");
+  len = FPDFAttachment_GetStringValue(attachment, date_key.get(), nullptr, 0);
+  buf.clear();
+  buf.resize(len);
+  EXPECT_EQ(48u, FPDFAttachment_GetStringValue(attachment, date_key.get(),
+                                               buf.data(), len));
+  EXPECT_STREQ(L"D:20170712214438-07'00'",
+               GetPlatformWString(reinterpret_cast<unsigned short*>(buf.data()))
+                   .c_str());
+
+  // Retrieve the second attachment.
+  attachment = FPDFDoc_GetAttachment(document(), 1);
+  ASSERT_TRUE(attachment);
+
+  // Retrieve the second attachment file.
+  len = FPDFAttachment_GetFile(attachment, nullptr, 0);
+  buf.clear();
+  buf.resize(len);
+  EXPECT_EQ(5869u, FPDFAttachment_GetFile(attachment, buf.data(), len));
+
+  // Check that the calculated checksum of the file data matches expectation.
+  const char kCheckSum[] = "72afcddedf554dda63c0c88e06f1ce18";
+  const wchar_t kCheckSumW[] = L"<72AFCDDEDF554DDA63C0C88E06F1CE18>";
+  const std::string generated_checksum =
+      GenerateMD5Base16(reinterpret_cast<uint8_t*>(buf.data()), len);
+  EXPECT_EQ(kCheckSum, generated_checksum);
+
+  // Check that the stored checksum matches expectation.
+  std::unique_ptr<unsigned short, pdfium::FreeDeleter> checksum_key =
+      GetFPDFWideString(L"CheckSum");
+  len =
+      FPDFAttachment_GetStringValue(attachment, checksum_key.get(), nullptr, 0);
+  buf.clear();
+  buf.resize(len);
+  EXPECT_EQ(70u, FPDFAttachment_GetStringValue(attachment, checksum_key.get(),
+                                               buf.data(), len));
+  EXPECT_EQ(kCheckSumW,
+            GetPlatformWString(reinterpret_cast<unsigned short*>(buf.data())));
 }
diff --git a/fpdfsdk/fpdfdoc.cpp b/fpdfsdk/fpdfdoc.cpp
index 7be53a6..b608860 100644
--- a/fpdfsdk/fpdfdoc.cpp
+++ b/fpdfsdk/fpdfdoc.cpp
@@ -60,16 +60,6 @@
   return pHolder->get();
 }
 
-unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text,
-                                                  void* buffer,
-                                                  unsigned long buflen) {
-  CFX_ByteString encodedText = text.UTF16LE_Encode();
-  unsigned long len = encodedText.GetLength();
-  if (buffer && len <= buflen)
-    memcpy(buffer, encodedText.c_str(), len);
-  return len;
-}
-
 }  // namespace
 
 DLLEXPORT FPDF_BOOKMARK STDCALL
diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp
index 2e52ad6..06e72b3 100644
--- a/fpdfsdk/fpdfview.cpp
+++ b/fpdfsdk/fpdfview.cpp
@@ -320,10 +320,30 @@
   return static_cast<CPDF_PageObject*>(page_object);
 }
 
+CPDF_Object* CPDFObjectFromFPDFAttachment(FPDF_ATTACHMENT attachment) {
+  return static_cast<CPDF_Object*>(attachment);
+}
+
+CFX_ByteString CFXByteStringFromFPDFWideString(FPDF_WIDESTRING wide_string) {
+  return CFX_WideString::FromUTF16LE(wide_string,
+                                     CFX_WideString::WStringLength(wide_string))
+      .UTF8Encode();
+}
+
 CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap) {
   return static_cast<CFX_DIBitmap*>(bitmap);
 }
 
+unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text,
+                                                  void* buffer,
+                                                  unsigned long buflen) {
+  CFX_ByteString encodedText = text.UTF16LE_Encode();
+  unsigned long len = encodedText.GetLength();
+  if (buffer && len <= buflen)
+    memcpy(buffer, encodedText.c_str(), len);
+  return len;
+}
+
 CFX_RetainPtr<IFX_SeekableReadStream> MakeSeekableReadStream(
     FPDF_FILEACCESS* pFileAccess) {
   return pdfium::MakeRetain<CPDF_CustomAccess>(pFileAccess);
diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c
index 2fcaf74..6753e66 100644
--- a/fpdfsdk/fpdfview_c_api_test.c
+++ b/fpdfsdk/fpdfview_c_api_test.c
@@ -64,7 +64,12 @@
 
     // fpdf_attachment.h
     CHK(FPDFDoc_GetAttachmentCount);
-    CHK(FPDFDoc_GetAttachmentName);
+    CHK(FPDFDoc_GetAttachment);
+    CHK(FPDFAttachment_GetName);
+    CHK(FPDFAttachment_HasKey);
+    CHK(FPDFAttachment_GetValueType);
+    CHK(FPDFAttachment_GetStringValue);
+    CHK(FPDFAttachment_GetFile);
 
     // fpdf_dataavail.h
     CHK(FPDFAvail_Create);
diff --git a/fpdfsdk/fsdk_define.h b/fpdfsdk/fsdk_define.h
index 4cfe344..610b854 100644
--- a/fpdfsdk/fsdk_define.h
+++ b/fpdfsdk/fsdk_define.h
@@ -67,8 +67,16 @@
 
 CPDF_PageObject* CPDFPageObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object);
 
+CPDF_Object* CPDFObjectFromFPDFAttachment(FPDF_ATTACHMENT attachment);
+
+CFX_ByteString CFXByteStringFromFPDFWideString(FPDF_WIDESTRING wide_string);
+
 CFX_DIBitmap* CFXBitmapFromFPDFBitmap(FPDF_BITMAP bitmap);
 
+unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text,
+                                                  void* buffer,
+                                                  unsigned long buflen);
+
 void FSDK_SetSandBoxPolicy(FPDF_DWORD policy, FPDF_BOOL enable);
 FPDF_BOOL FSDK_IsSandBoxPolicyEnabled(FPDF_DWORD policy);
 void FPDF_RenderPage_Retail(CPDF_PageRenderContext* pContext,