Add a simple check for V4 cross reference table

Some unknown software generates cross reference table with the
claimed object numbers are all off by one. Add a simple verification
function to detect this scenario, thus have a chance to rebuild
the correct cross reference table.

To avoid unnecessary checks and potiential performance hit, we only
check for the very first non free entry.

BUG=602650

Review URL: https://codereview.chromium.org/1910063004
diff --git a/DEPS b/DEPS
index 855dfe2..2e6f801 100644
--- a/DEPS
+++ b/DEPS
@@ -67,6 +67,13 @@
   '+third_party/base',
 ]
 
+specific_include_rules = {
+  # Allow embedder tests to use public APIs.
+  "(.*embeddertest\.cpp)": [
+      "+public",
+  ]
+}
+
 hooks = [
   # Pull GN binaries. This needs to be before running GYP below.
   {
diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser.cpp
index 1ec912e..acf51de 100644
--- a/core/fpdfapi/fpdf_parser/cpdf_parser.cpp
+++ b/core/fpdfapi/fpdf_parser/cpdf_parser.cpp
@@ -316,6 +316,32 @@
   return 0;
 }
 
+// Ideally, all the cross reference entries should be verified.
+// In reality, we rarely see well-formed cross references don't match
+// with the objects. crbug/602650 showed a case where object numbers
+// in the cross reference table are all off by one.
+bool CPDF_Parser::VerifyCrossRefV4() {
+  for (const auto& it : m_ObjectInfo) {
+    if (it.second.pos == 0)
+      continue;
+    // Find the first non-zero position.
+    FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+    m_pSyntax->RestorePos(it.second.pos);
+    bool is_num = false;
+    CFX_ByteString num_str = m_pSyntax->GetNextWord(&is_num);
+    m_pSyntax->RestorePos(SavedPos);
+    if (!is_num || num_str.IsEmpty() ||
+        FXSYS_atoui(num_str.c_str()) != it.first) {
+      // If the object number read doesn't match the one stored,
+      // something is wrong with the cross reference table.
+      return false;
+    } else {
+      return true;
+    }
+  }
+  return true;
+}
+
 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
   if (!LoadCrossRefV4(xrefpos, 0, TRUE))
     return FALSE;
@@ -366,6 +392,8 @@
   for (size_t i = 0; i < CrossRefList.size(); ++i) {
     if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
       return FALSE;
+    if (i == 0 && !VerifyCrossRefV4())
+      return FALSE;
   }
   return TRUE;
 }
diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp
index f427ec5..042b221 100644
--- a/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp
+++ b/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include "public/fpdf_text.h"
 #include "testing/embedder_test.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
@@ -38,3 +39,18 @@
   EXPECT_FALSE(OpenDocument("bug_325_a.pdf"));
   EXPECT_FALSE(OpenDocument("bug_325_b.pdf"));
 }
+
+TEST_F(CPDFParserEmbeddertest, Bug_602650) {
+  // Test the case that cross reference entries, which are well formed,
+  // but do not match with the objects.
+  EXPECT_TRUE(OpenDocument("bug_602650.pdf"));
+  FPDF_PAGE page = LoadPage(0);
+  EXPECT_NE(nullptr, page);
+  FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
+  EXPECT_NE(nullptr, text_page);
+  // The page should not be blank.
+  EXPECT_LT(0, FPDFText_CountChars(text_page));
+
+  FPDFText_ClosePage(text_page);
+  UnloadPage(page);
+}
diff --git a/core/fpdfapi/fpdf_parser/include/cpdf_parser.h b/core/fpdfapi/fpdf_parser/include/cpdf_parser.h
index 5331cca..48511e9 100644
--- a/core/fpdfapi/fpdf_parser/include/cpdf_parser.h
+++ b/core/fpdfapi/fpdf_parser/include/cpdf_parser.h
@@ -114,6 +114,9 @@
   FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, uint32_t offset);
   void SetEncryptDictionary(CPDF_Dictionary* pDict);
   void ShrinkObjectMap(uint32_t size);
+  // A simple check whether the cross reference table matches with
+  // the objects.
+  bool VerifyCrossRefV4();
 
   CPDF_Document* m_pDocument;
   std::unique_ptr<CPDF_SyntaxParser> m_pSyntax;
diff --git a/testing/resources/bug_602650.pdf b/testing/resources/bug_602650.pdf
new file mode 100644
index 0000000..fc915f3
--- /dev/null
+++ b/testing/resources/bug_602650.pdf
@@ -0,0 +1,105 @@
+%PDF-1.3
+% ò¤ô
+1 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Times-Roman
+>>
+endobj
+2 0 obj <<
+  /Type /Font
+  /Subtype /Type1
+  /BaseFont /Helvetica
+>>
+endobj
+3 0 obj <<
+>>
+stream
+BT
+20 50 Td
+/F1 12 Tf
+(Hello, world!) Tj
+0 50 Td
+/F2 16 Tf
+(Goodbye, world!) Tj
+ET
+endstream
+endobj
+4 0 obj <<
+  /Type /Catalog
+  /Pages 5 0 R
+>>
+5 0 obj <<
+  /Type /Pages
+  /MediaBox [ 0 0 200 200 ]
+  /Count 1
+  /Kids [ 6 0 R ]
+>>
+endobj
+6 0 obj <<
+  /Type /Page
+  /Parent 5 0 R
+  /Resources <<
+    /Font <<
+      /F1 1 0 R
+      /F2 2 0 R
+    >>
+  >>
+  /Contents 3 0 R
+>>
+endobj
+xref
+1 7
+0000000000 65535 f 
+0000000015 00000 n 
+0000000093 00000 n 
+0000000169 00000 n 
+0000000290 00000 n 
+0000000336 00000 n 
+0000000429 00000 n 
+trailer <<
+  /Size 7
+  /Root 4 0 R
+>>
+startxref
+571
+%%EOF
+4 0 obj <<
+  /Type /Catalog
+  /Pages 5 0 R
+>>
+endobj
+5 0 obj <<
+  /Type /Pages
+  /MediaBox [ 1 1 250 250 ]
+  /Count 1
+  /Kids [ 6 0 R ]
+>>
+endobj
+6 0 obj <<
+  /Type /Page
+  /Parent 5 0 R
+  /Resources <<
+    /Font <<
+      /F1 1 0 R
+      /F2 2 0 R
+    >>
+  >>
+  /Contents 3 0 R
+>>
+endobj
+xref
+0 1
+0000000000 65535 f 
+4 3
+0000000778 00000 n 
+0000000831 00000 n 
+0000000924 00000 n 
+trailer <<
+  /Size 7
+  /Root 4 0 R
+  /Prev 571
+>>
+startxref
+1066
+%%EOF