Move the classes in fpdf_text_int.cpp into their own files - CPDF_TextPageFind and CPDF_LinkExtract moved to new file - fpdf_text_int.cpp renamed to be CPDF_TextPage definition Review-Url: https://codereview.chromium.org/2286723003

commit: 2d396ac157bcd6da78190def936e5eaf278a6ca7 [log] [tgz]
author: npm <npm@chromium.org> Fri Aug 26 10:00:25 2016 -0700
committer: Commit bot <commit-bot@chromium.org> Fri Aug 26 10:00:25 2016 -0700
tree: 0b2416b9e93dd770b4180735a96a3763e9f25eff
parent: 18df9f5d1ff48fb3a225a84920fe46b63fba46a3 [diff]
diff --git a/BUILD.gn b/BUILD.gn
index ca2338f..f603099 100644
--- a/BUILD.gn
+++ b/BUILD.gn

@@ -538,7 +538,9 @@
 
 static_library("fpdftext") {
   sources = [
-    "core/fpdftext/fpdf_text_int.cpp",
+    "core/fpdftext/cpdf_linkextract.cpp",
+    "core/fpdftext/cpdf_textpage.cpp",
+    "core/fpdftext/cpdf_textpagefind.cpp",
     "core/fpdftext/include/cpdf_linkextract.h",
     "core/fpdftext/include/cpdf_textpage.h",
     "core/fpdftext/include/cpdf_textpagefind.h",

diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
new file mode 100644
index 0000000..1677b67
--- /dev/null
+++ b/core/fpdftext/cpdf_linkextract.cpp

@@ -0,0 +1,173 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdftext/include/cpdf_linkextract.h"
+
+#include <vector>
+
+#include "core/fpdftext/include/cpdf_textpage.h"
+#include "core/fxcrt/include/fx_ext.h"
+#include "core/fxcrt/include/fx_string.h"
+#include "core/fxcrt/include/fx_system.h"
+
+CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage)
+    : m_pTextPage(pTextPage) {}
+
+CPDF_LinkExtract::~CPDF_LinkExtract() {}
+
+void CPDF_LinkExtract::ExtractLinks() {
+  m_LinkArray.clear();
+  if (!m_pTextPage->IsParsed())
+    return;
+
+  m_strPageText = m_pTextPage->GetPageText(0, -1);
+  if (m_strPageText.IsEmpty())
+    return;
+
+  ParseLink();
+}
+
+void CPDF_LinkExtract::ParseLink() {
+  int start = 0, pos = 0;
+  int TotalChar = m_pTextPage->CountChars();
+  while (pos < TotalChar) {
+    FPDF_CHAR_INFO pageChar;
+    m_pTextPage->GetCharInfo(pos, &pageChar);
+    if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED ||
+        pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) {
+      int nCount = pos - start;
+      if (pos == TotalChar - 1)
+        nCount++;
+      CFX_WideString strBeCheck;
+      strBeCheck = m_pTextPage->GetPageText(start, nCount);
+      if (strBeCheck.GetLength() > 5) {
+        while (strBeCheck.GetLength() > 0) {
+          FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1);
+          if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') {
+            strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1);
+            nCount--;
+          } else {
+            break;
+          }
+        }
+        if (nCount > 5 &&
+            (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
+          m_LinkArray.push_back({start, nCount, strBeCheck});
+        }
+      }
+      start = ++pos;
+    } else {
+      pos++;
+    }
+  }
+}
+
+bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
+  CFX_WideString str = strBeCheck;
+  str.MakeLower();
+  if (str.Find(L"http://www.") != -1) {
+    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
+    return true;
+  }
+  if (str.Find(L"http://") != -1) {
+    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
+    return true;
+  }
+  if (str.Find(L"https://www.") != -1) {
+    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
+    return true;
+  }
+  if (str.Find(L"https://") != -1) {
+    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
+    return true;
+  }
+  if (str.Find(L"www.") != -1) {
+    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
+    strBeCheck = L"http://" + strBeCheck;
+    return true;
+  }
+  return false;
+}
+
+bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
+  int aPos = str.Find(L'@');
+  // Invalid when no '@'.
+  if (aPos < 1)
+    return false;
+
+  // Check the local part.
+  int pPos = aPos;  // Used to track the position of '@' or '.'.
+  for (int i = aPos - 1; i >= 0; i--) {
+    FX_WCHAR ch = str.GetAt(i);
+    if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
+      continue;
+
+    if (ch != L'.' || i == pPos - 1 || i == 0) {
+      if (i == aPos - 1) {
+        // There is '.' or invalid char before '@'.
+        return FALSE;
+      }
+      // End extracting for other invalid chars, '.' at the beginning, or
+      // consecutive '.'.
+      int removed_len = i == pPos - 1 ? i + 2 : i + 1;
+      str = str.Right(str.GetLength() - removed_len);
+      break;
+    }
+    // Found a valid '.'.
+    pPos = i;
+  }
+
+  // Check the domain name part.
+  aPos = str.Find(L'@');
+  if (aPos < 1)
+    return false;
+
+  str.TrimRight(L'.');
+  // At least one '.' in domain name, but not at the beginning.
+  // TODO(weili): RFC5322 allows domain names to be a local name without '.'.
+  // Check whether we should remove this check.
+  int ePos = str.Find(L'.', aPos + 1);
+  if (ePos == -1 || ePos == aPos + 1)
+    return false;
+
+  // Validate all other chars in domain name.
+  int nLen = str.GetLength();
+  pPos = 0;  // Used to track the position of '.'.
+  for (int i = aPos + 1; i < nLen; i++) {
+    FX_WCHAR wch = str.GetAt(i);
+    if (wch == L'-' || FXSYS_iswalnum(wch))
+      continue;
+
+    if (wch != L'.' || i == pPos + 1) {
+      // Domain name should end before invalid char.
+      int host_end = i == pPos + 1 ? i - 2 : i - 1;
+      if (pPos > 0 && host_end - aPos >= 3) {
+        // Trim the ending invalid chars if there is at least one '.' and name.
+        str = str.Left(host_end + 1);
+        break;
+      }
+      return false;
+    }
+    pPos = i;
+  }
+
+  if (str.Find(L"mailto:") == -1)
+    str = L"mailto:" + str;
+
+  return true;
+}
+
+CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const {
+  return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";
+}
+
+std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const {
+  if (index >= m_LinkArray.size())
+    return std::vector<CFX_FloatRect>();
+
+  return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start,
+                                   m_LinkArray[index].m_Count);
+}

diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/cpdf_textpage.cpp
similarity index 75%
rename from core/fpdftext/fpdf_text_int.cpp
rename to core/fpdftext/cpdf_textpage.cpp
index fbd9c9c..3981cfe 100644
--- a/core/fpdftext/fpdf_text_int.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp

@@ -4,10 +4,9 @@
 
 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
 
+#include "core/fpdftext/include/cpdf_textpage.h"
+
 #include <algorithm>
-#include <cctype>
-#include <cwctype>
-#include <memory>
 #include <utility>
 #include <vector>
 
@@ -19,35 +18,12 @@
 #include "core/fpdfapi/fpdf_page/include/cpdf_textobject.h"
 #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h"
 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h"
-#include "core/fpdftext/include/cpdf_linkextract.h"
-#include "core/fpdftext/include/cpdf_textpage.h"
-#include "core/fpdftext/include/cpdf_textpagefind.h"
 #include "core/fpdftext/unicodenormalizationdata.h"
 #include "core/fxcrt/fx_bidi.h"
 #include "core/fxcrt/include/fx_ext.h"
 #include "core/fxcrt/include/fx_ucd.h"
 #include "third_party/base/stl_util.h"
 
-#define FPDFTEXT_MATCHCASE 0x00000001
-#define FPDFTEXT_MATCHWHOLEWORD 0x00000002
-#define FPDFTEXT_CONSECUTIVE 0x00000004
-
-#define FPDFTEXT_CHAR_ERROR -1
-#define FPDFTEXT_CHAR_NORMAL 0
-#define FPDFTEXT_CHAR_GENERATED 1
-#define FPDFTEXT_CHAR_UNUNICODE 2
-#define FPDFTEXT_CHAR_HYPHEN 3
-#define FPDFTEXT_CHAR_PIECE 4
-
-#define TEXT_SPACE_CHAR L' '
-#define TEXT_LINEFEED_CHAR L'\n'
-#define TEXT_RETURN_CHAR L'\r'
-#define TEXT_EMPTY L""
-#define TEXT_SPACE L" "
-#define TEXT_RETURN_LINEFEED L"\r\n"
-#define TEXT_LINEFEED L"\n"
-#define TEXT_CHARRATIO_GAPDELTA 0.070
-
 namespace {
 
 const FX_FLOAT kDefaultFontSize = 1.0f;
@@ -55,22 +31,6 @@
     nullptr, g_UnicodeData_Normalization_Map1, g_UnicodeData_Normalization_Map2,
     g_UnicodeData_Normalization_Map3, g_UnicodeData_Normalization_Map4};
 
-FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) {
-  if (curChar < 255)
-    return FALSE;
-  if ((curChar >= 0x0600 && curChar <= 0x06FF) ||
-      (curChar >= 0xFE70 && curChar <= 0xFEFF) ||
-      (curChar >= 0xFB50 && curChar <= 0xFDFF) ||
-      (curChar >= 0x0400 && curChar <= 0x04FF) ||
-      (curChar >= 0x0500 && curChar <= 0x052F) ||
-      (curChar >= 0xA640 && curChar <= 0xA69F) ||
-      (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 ||
-      (curChar >= 0x2000 && curChar <= 0x206F)) {
-    return FALSE;
-  }
-  return TRUE;
-}
-
 FX_FLOAT NormalizeThreshold(FX_FLOAT threshold) {
   if (threshold < 300)
     return threshold / 2.0f;
@@ -1587,563 +1547,3 @@
   rect.Intersect(rect2);
   return !rect.IsEmpty();
 }
-
-CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage)
-    : m_pTextPage(pTextPage),
-      m_flags(0),
-      m_findNextStart(-1),
-      m_findPreStart(-1),
-      m_bMatchCase(FALSE),
-      m_bMatchWholeWord(FALSE),
-      m_resStart(0),
-      m_resEnd(-1),
-      m_IsFind(FALSE) {
-  m_strText = m_pTextPage->GetPageText();
-  int nCount = pTextPage->CountChars();
-  if (nCount) {
-    m_CharIndex.push_back(0);
-  }
-  for (int i = 0; i < nCount; i++) {
-    FPDF_CHAR_INFO info;
-    pTextPage->GetCharInfo(i, &info);
-    int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
-    if (info.m_Flag == FPDFTEXT_CHAR_NORMAL ||
-        info.m_Flag == FPDFTEXT_CHAR_GENERATED) {
-      if (indexSize % 2) {
-        m_CharIndex.push_back(1);
-      } else {
-        if (indexSize <= 0) {
-          continue;
-        }
-        m_CharIndex[indexSize - 1] += 1;
-      }
-    } else {
-      if (indexSize % 2) {
-        if (indexSize <= 0) {
-          continue;
-        }
-        m_CharIndex[indexSize - 1] = i + 1;
-      } else {
-        m_CharIndex.push_back(i + 1);
-      }
-    }
-  }
-  int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
-  if (indexSize % 2) {
-    m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
-  }
-}
-
-CPDF_TextPageFind::~CPDF_TextPageFind() {}
-
-int CPDF_TextPageFind::GetCharIndex(int index) const {
-  return m_pTextPage->CharIndexFromTextIndex(index);
-}
-
-FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
-                                     int flags,
-                                     int startPos) {
-  if (!m_pTextPage) {
-    return FALSE;
-  }
-  if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) {
-    m_strText = m_pTextPage->GetPageText();
-  }
-  CFX_WideString findwhatStr = findwhat;
-  m_findWhat = findwhatStr;
-  m_flags = flags;
-  m_bMatchCase = flags & FPDFTEXT_MATCHCASE;
-  if (m_strText.IsEmpty()) {
-    m_IsFind = FALSE;
-    return TRUE;
-  }
-  FX_STRSIZE len = findwhatStr.GetLength();
-  if (!m_bMatchCase) {
-    findwhatStr.MakeLower();
-    m_strText.MakeLower();
-  }
-  m_bMatchWholeWord = flags & FPDFTEXT_MATCHWHOLEWORD;
-  m_findNextStart = startPos;
-  if (startPos == -1) {
-    m_findPreStart = m_strText.GetLength() - 1;
-  } else {
-    m_findPreStart = startPos;
-  }
-  m_csFindWhatArray.clear();
-  int i = 0;
-  while (i < len) {
-    if (findwhatStr.GetAt(i) != ' ') {
-      break;
-    }
-    i++;
-  }
-  if (i < len) {
-    ExtractFindWhat(findwhatStr);
-  } else {
-    m_csFindWhatArray.push_back(findwhatStr);
-  }
-  if (m_csFindWhatArray.empty()) {
-    return FALSE;
-  }
-  m_IsFind = TRUE;
-  m_resStart = 0;
-  m_resEnd = -1;
-  return TRUE;
-}
-
-FX_BOOL CPDF_TextPageFind::FindNext() {
-  if (!m_pTextPage) {
-    return FALSE;
-  }
-  m_resArray.clear();
-  if (m_findNextStart == -1) {
-    return FALSE;
-  }
-  if (m_strText.IsEmpty()) {
-    m_IsFind = FALSE;
-    return m_IsFind;
-  }
-  int strLen = m_strText.GetLength();
-  if (m_findNextStart > strLen - 1) {
-    m_IsFind = FALSE;
-    return m_IsFind;
-  }
-  int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray);
-  int nResultPos = 0;
-  int nStartPos = 0;
-  nStartPos = m_findNextStart;
-  FX_BOOL bSpaceStart = FALSE;
-  for (int iWord = 0; iWord < nCount; iWord++) {
-    CFX_WideString csWord = m_csFindWhatArray[iWord];
-    if (csWord.IsEmpty()) {
-      if (iWord == nCount - 1) {
-        FX_WCHAR strInsert = m_strText.GetAt(nStartPos);
-        if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_SPACE_CHAR ||
-            strInsert == TEXT_RETURN_CHAR || strInsert == 160) {
-          nResultPos = nStartPos + 1;
-          break;
-        }
-        iWord = -1;
-      } else if (iWord == 0) {
-        bSpaceStart = TRUE;
-      }
-      continue;
-    }
-    int endIndex;
-    nResultPos = m_strText.Find(csWord.c_str(), nStartPos);
-    if (nResultPos == -1) {
-      m_IsFind = FALSE;
-      return m_IsFind;
-    }
-    endIndex = nResultPos + csWord.GetLength() - 1;
-    if (iWord == 0) {
-      m_resStart = nResultPos;
-    }
-    FX_BOOL bMatch = TRUE;
-    if (iWord != 0 && !bSpaceStart) {
-      int PreResEndPos = nStartPos;
-      int curChar = csWord.GetAt(0);
-      CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
-      int lastChar = lastWord.GetAt(lastWord.GetLength() - 1);
-      if (nStartPos == nResultPos &&
-          !(IsIgnoreSpaceCharacter(lastChar) ||
-            IsIgnoreSpaceCharacter(curChar))) {
-        bMatch = FALSE;
-      }
-      for (int d = PreResEndPos; d < nResultPos; d++) {
-        FX_WCHAR strInsert = m_strText.GetAt(d);
-        if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
-            strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
-          bMatch = FALSE;
-          break;
-        }
-      }
-    } else if (bSpaceStart) {
-      if (nResultPos > 0) {
-        FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1);
-        if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
-            strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
-          bMatch = FALSE;
-          m_resStart = nResultPos;
-        } else {
-          m_resStart = nResultPos - 1;
-        }
-      }
-    }
-    if (m_bMatchWholeWord && bMatch) {
-      bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex);
-    }
-    nStartPos = endIndex + 1;
-    if (!bMatch) {
-      iWord = -1;
-      if (bSpaceStart) {
-        nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();
-      } else {
-        nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();
-      }
-    }
-  }
-  m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1;
-  m_IsFind = TRUE;
-  int resStart = GetCharIndex(m_resStart);
-  int resEnd = GetCharIndex(m_resEnd);
-  m_resArray = m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1);
-  if (m_flags & FPDFTEXT_CONSECUTIVE) {
-    m_findNextStart = m_resStart + 1;
-    m_findPreStart = m_resEnd - 1;
-  } else {
-    m_findNextStart = m_resEnd + 1;
-    m_findPreStart = m_resStart - 1;
-  }
-  return m_IsFind;
-}
-
-FX_BOOL CPDF_TextPageFind::FindPrev() {
-  if (!m_pTextPage) {
-    return FALSE;
-  }
-  m_resArray.clear();
-  if (m_strText.IsEmpty() || m_findPreStart < 0) {
-    m_IsFind = FALSE;
-    return m_IsFind;
-  }
-  CPDF_TextPageFind findEngine(m_pTextPage);
-  FX_BOOL ret = findEngine.FindFirst(m_findWhat, m_flags);
-  if (!ret) {
-    m_IsFind = FALSE;
-    return m_IsFind;
-  }
-  int order = -1, MatchedCount = 0;
-  while (ret) {
-    ret = findEngine.FindNext();
-    if (ret) {
-      int order1 = findEngine.GetCurOrder();
-      int MatchedCount1 = findEngine.GetMatchedCount();
-      if (((order1 + MatchedCount1) - 1) > m_findPreStart) {
-        break;
-      }
-      order = order1;
-      MatchedCount = MatchedCount1;
-    }
-  }
-  if (order == -1) {
-    m_IsFind = FALSE;
-    return m_IsFind;
-  }
-  m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
-  m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
-  m_IsFind = TRUE;
-  m_resArray = m_pTextPage->GetRectArray(order, MatchedCount);
-  if (m_flags & FPDFTEXT_CONSECUTIVE) {
-    m_findNextStart = m_resStart + 1;
-    m_findPreStart = m_resEnd - 1;
-  } else {
-    m_findNextStart = m_resEnd + 1;
-    m_findPreStart = m_resStart - 1;
-  }
-  return m_IsFind;
-}
-
-void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
-  if (findwhat.IsEmpty()) {
-    return;
-  }
-  int index = 0;
-  while (1) {
-    CFX_WideString csWord = TEXT_EMPTY;
-    int ret =
-        ExtractSubString(csWord, findwhat.c_str(), index, TEXT_SPACE_CHAR);
-    if (csWord.IsEmpty()) {
-      if (ret) {
-        m_csFindWhatArray.push_back(L"");
-        index++;
-        continue;
-      } else {
-        break;
-      }
-    }
-    int pos = 0;
-    while (pos < csWord.GetLength()) {
-      CFX_WideString curStr = csWord.Mid(pos, 1);
-      FX_WCHAR curChar = csWord.GetAt(pos);
-      if (IsIgnoreSpaceCharacter(curChar)) {
-        if (pos > 0 && curChar == 0x2019) {
-          pos++;
-          continue;
-        }
-        if (pos > 0) {
-          m_csFindWhatArray.push_back(csWord.Mid(0, pos));
-        }
-        m_csFindWhatArray.push_back(curStr);
-        if (pos == csWord.GetLength() - 1) {
-          csWord.clear();
-          break;
-        }
-        csWord = csWord.Right(csWord.GetLength() - pos - 1);
-        pos = 0;
-        continue;
-      }
-      pos++;
-    }
-    if (!csWord.IsEmpty()) {
-      m_csFindWhatArray.push_back(csWord);
-    }
-    index++;
-  }
-}
-
-FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
-                                            int startPos,
-                                            int endPos) {
-  FX_WCHAR char_left = 0;
-  FX_WCHAR char_right = 0;
-  int char_count = endPos - startPos + 1;
-  if (char_count < 1) {
-    return FALSE;
-  }
-  if (char_count == 1 && csPageText.GetAt(startPos) > 255) {
-    return TRUE;
-  }
-  if (startPos - 1 >= 0) {
-    char_left = csPageText.GetAt(startPos - 1);
-  }
-  if (startPos + char_count < csPageText.GetLength()) {
-    char_right = csPageText.GetAt(startPos + char_count);
-  }
-  if ((char_left > 'A' && char_left < 'a') ||
-      (char_left > 'a' && char_left < 'z') ||
-      (char_left > 0xfb00 && char_left < 0xfb06) || std::iswdigit(char_left) ||
-      (char_right > 'A' && char_right < 'a') ||
-      (char_right > 'a' && char_right < 'z') ||
-      (char_right > 0xfb00 && char_right < 0xfb06) ||
-      std::iswdigit(char_right)) {
-    return FALSE;
-  }
-  if (!(('A' > char_left || char_left > 'Z') &&
-        ('a' > char_left || char_left > 'z') &&
-        ('A' > char_right || char_right > 'Z') &&
-        ('a' > char_right || char_right > 'z'))) {
-    return FALSE;
-  }
-  if (char_count > 0) {
-    if (csPageText.GetAt(startPos) >= L'0' &&
-        csPageText.GetAt(startPos) <= L'9' && char_left >= L'0' &&
-        char_left <= L'9') {
-      return FALSE;
-    }
-    if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' &&
-        char_right >= L'0' && char_right <= L'9') {
-      return FALSE;
-    }
-  }
-  return TRUE;
-}
-
-FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString,
-                                            const FX_WCHAR* lpszFullString,
-                                            int iSubString,
-                                            FX_WCHAR chSep) {
-  if (!lpszFullString) {
-    return FALSE;
-  }
-  while (iSubString--) {
-    lpszFullString = wcschr(lpszFullString, chSep);
-    if (!lpszFullString) {
-      rString.clear();
-      return FALSE;
-    }
-    lpszFullString++;
-    while (*lpszFullString == chSep) {
-      lpszFullString++;
-    }
-  }
-  const FX_WCHAR* lpchEnd = wcschr(lpszFullString, chSep);
-  int nLen = lpchEnd ? (int)(lpchEnd - lpszFullString)
-                     : (int)FXSYS_wcslen(lpszFullString);
-  ASSERT(nLen >= 0);
-  FXSYS_memcpy(rString.GetBuffer(nLen), lpszFullString,
-               nLen * sizeof(FX_WCHAR));
-  rString.ReleaseBuffer();
-  return TRUE;
-}
-
-CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) {
-  CFX_WideString str2;
-  str2.clear();
-  int nlen = str.GetLength();
-  for (int i = nlen - 1; i >= 0; i--) {
-    str2 += str.GetAt(i);
-  }
-  return str2;
-}
-
-int CPDF_TextPageFind::GetCurOrder() const {
-  return GetCharIndex(m_resStart);
-}
-
-int CPDF_TextPageFind::GetMatchedCount() const {
-  int resStart = GetCharIndex(m_resStart);
-  int resEnd = GetCharIndex(m_resEnd);
-  return resEnd - resStart + 1;
-}
-
-CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage)
-    : m_pTextPage(pTextPage) {}
-
-CPDF_LinkExtract::~CPDF_LinkExtract() {
-}
-
-void CPDF_LinkExtract::ExtractLinks() {
-  m_LinkArray.clear();
-  if (!m_pTextPage->IsParsed())
-    return;
-
-  m_strPageText = m_pTextPage->GetPageText(0, -1);
-  if (m_strPageText.IsEmpty())
-    return;
-
-  ParseLink();
-}
-
-void CPDF_LinkExtract::ParseLink() {
-  int start = 0, pos = 0;
-  int TotalChar = m_pTextPage->CountChars();
-  while (pos < TotalChar) {
-    FPDF_CHAR_INFO pageChar;
-    m_pTextPage->GetCharInfo(pos, &pageChar);
-    if (pageChar.m_Flag == FPDFTEXT_CHAR_GENERATED ||
-        pageChar.m_Unicode == 0x20 || pos == TotalChar - 1) {
-      int nCount = pos - start;
-      if (pos == TotalChar - 1) {
-        nCount++;
-      }
-      CFX_WideString strBeCheck;
-      strBeCheck = m_pTextPage->GetPageText(start, nCount);
-      if (strBeCheck.GetLength() > 5) {
-        while (strBeCheck.GetLength() > 0) {
-          FX_WCHAR ch = strBeCheck.GetAt(strBeCheck.GetLength() - 1);
-          if (ch == L')' || ch == L',' || ch == L'>' || ch == L'.') {
-            strBeCheck = strBeCheck.Mid(0, strBeCheck.GetLength() - 1);
-            nCount--;
-          } else {
-            break;
-          }
-        }
-        if (nCount > 5 &&
-            (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
-          m_LinkArray.push_back({start, nCount, strBeCheck});
-        }
-      }
-      start = ++pos;
-    } else {
-      pos++;
-    }
-  }
-}
-
-bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
-  CFX_WideString str = strBeCheck;
-  str.MakeLower();
-  if (str.Find(L"http://www.") != -1) {
-    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
-    return true;
-  }
-  if (str.Find(L"http://") != -1) {
-    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
-    return true;
-  }
-  if (str.Find(L"https://www.") != -1) {
-    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
-    return true;
-  }
-  if (str.Find(L"https://") != -1) {
-    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
-    return true;
-  }
-  if (str.Find(L"www.") != -1) {
-    strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
-    strBeCheck = L"http://" + strBeCheck;
-    return true;
-  }
-  return false;
-}
-
-bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
-  int aPos = str.Find(L'@');
-  // Invalid when no '@'.
-  if (aPos < 1)
-    return false;
-
-  // Check the local part.
-  int pPos = aPos;  // Used to track the position of '@' or '.'.
-  for (int i = aPos - 1; i >= 0; i--) {
-    FX_WCHAR ch = str.GetAt(i);
-    if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
-      continue;
-
-    if (ch != L'.' || i == pPos - 1 || i == 0) {
-      if (i == aPos - 1) {
-        // There is '.' or invalid char before '@'.
-        return FALSE;
-      }
-      // End extracting for other invalid chars, '.' at the beginning, or
-      // consecutive '.'.
-      int removed_len = i == pPos - 1 ? i + 2 : i + 1;
-      str = str.Right(str.GetLength() - removed_len);
-      break;
-    }
-    // Found a valid '.'.
-    pPos = i;
-  }
-
-  // Check the domain name part.
-  aPos = str.Find(L'@');
-  if (aPos < 1)
-    return false;
-
-  str.TrimRight(L'.');
-  // At least one '.' in domain name, but not at the beginning.
-  // TODO(weili): RFC5322 allows domain names to be a local name without '.'.
-  // Check whether we should remove this check.
-  int ePos = str.Find(L'.', aPos + 1);
-  if (ePos == -1 || ePos == aPos + 1)
-    return false;
-
-  // Validate all other chars in domain name.
-  int nLen = str.GetLength();
-  pPos = 0;  // Used to track the position of '.'.
-  for (int i = aPos + 1; i < nLen; i++) {
-    FX_WCHAR wch = str.GetAt(i);
-    if (wch == L'-' || FXSYS_iswalnum(wch))
-      continue;
-
-    if (wch != L'.' || i == pPos + 1) {
-      // Domain name should end before invalid char.
-      int host_end = i == pPos + 1 ? i - 2 : i - 1;
-      if (pPos > 0 && host_end - aPos >= 3) {
-        // Trim the ending invalid chars if there is at least one '.' and name.
-        str = str.Left(host_end + 1);
-        break;
-      }
-      return false;
-    }
-    pPos = i;
-  }
-
-  if (str.Find(L"mailto:") == -1)
-    str = L"mailto:" + str;
-
-  return true;
-}
-
-CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const {
-  return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";
-}
-
-std::vector<CFX_FloatRect> CPDF_LinkExtract::GetRects(size_t index) const {
-  if (index >= m_LinkArray.size())
-    return std::vector<CFX_FloatRect>();
-
-  return m_pTextPage->GetRectArray(m_LinkArray[index].m_Start,
-                                   m_LinkArray[index].m_Count);
-}

diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp
new file mode 100644
index 0000000..a67bdf1
--- /dev/null
+++ b/core/fpdftext/cpdf_textpagefind.cpp

@@ -0,0 +1,407 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdftext/include/cpdf_textpagefind.h"
+
+#include <cwchar>
+#include <cwctype>
+#include <vector>
+
+#include "core/fpdftext/include/cpdf_textpage.h"
+#include "core/fxcrt/include/fx_string.h"
+#include "core/fxcrt/include/fx_system.h"
+#include "third_party/base/stl_util.h"
+
+namespace {
+
+FX_BOOL IsIgnoreSpaceCharacter(FX_WCHAR curChar) {
+  if (curChar < 255 || (curChar >= 0x0600 && curChar <= 0x06FF) ||
+      (curChar >= 0xFE70 && curChar <= 0xFEFF) ||
+      (curChar >= 0xFB50 && curChar <= 0xFDFF) ||
+      (curChar >= 0x0400 && curChar <= 0x04FF) ||
+      (curChar >= 0x0500 && curChar <= 0x052F) ||
+      (curChar >= 0xA640 && curChar <= 0xA69F) ||
+      (curChar >= 0x2DE0 && curChar <= 0x2DFF) || curChar == 8467 ||
+      (curChar >= 0x2000 && curChar <= 0x206F)) {
+    return FALSE;
+  }
+  return TRUE;
+}
+
+}  // namespace
+
+CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage)
+    : m_pTextPage(pTextPage),
+      m_flags(0),
+      m_findNextStart(-1),
+      m_findPreStart(-1),
+      m_bMatchCase(FALSE),
+      m_bMatchWholeWord(FALSE),
+      m_resStart(0),
+      m_resEnd(-1),
+      m_IsFind(FALSE) {
+  m_strText = m_pTextPage->GetPageText();
+  int nCount = pTextPage->CountChars();
+  if (nCount)
+    m_CharIndex.push_back(0);
+  for (int i = 0; i < nCount; i++) {
+    FPDF_CHAR_INFO info;
+    pTextPage->GetCharInfo(i, &info);
+    int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
+    if (info.m_Flag == FPDFTEXT_CHAR_NORMAL ||
+        info.m_Flag == FPDFTEXT_CHAR_GENERATED) {
+      if (indexSize % 2) {
+        m_CharIndex.push_back(1);
+      } else {
+        if (indexSize <= 0)
+          continue;
+        m_CharIndex[indexSize - 1] += 1;
+      }
+    } else {
+      if (indexSize % 2) {
+        if (indexSize <= 0)
+          continue;
+        m_CharIndex[indexSize - 1] = i + 1;
+      } else {
+        m_CharIndex.push_back(i + 1);
+      }
+    }
+  }
+  int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
+  if (indexSize % 2)
+    m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
+}
+
+CPDF_TextPageFind::~CPDF_TextPageFind() {}
+
+int CPDF_TextPageFind::GetCharIndex(int index) const {
+  return m_pTextPage->CharIndexFromTextIndex(index);
+}
+
+FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
+                                     int flags,
+                                     int startPos) {
+  if (!m_pTextPage)
+    return FALSE;
+  if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE))
+    m_strText = m_pTextPage->GetPageText();
+  CFX_WideString findwhatStr = findwhat;
+  m_findWhat = findwhatStr;
+  m_flags = flags;
+  m_bMatchCase = flags & FPDFTEXT_MATCHCASE;
+  if (m_strText.IsEmpty()) {
+    m_IsFind = FALSE;
+    return TRUE;
+  }
+  FX_STRSIZE len = findwhatStr.GetLength();
+  if (!m_bMatchCase) {
+    findwhatStr.MakeLower();
+    m_strText.MakeLower();
+  }
+  m_bMatchWholeWord = flags & FPDFTEXT_MATCHWHOLEWORD;
+  m_findNextStart = startPos;
+  if (startPos == -1)
+    m_findPreStart = m_strText.GetLength() - 1;
+  else
+    m_findPreStart = startPos;
+  m_csFindWhatArray.clear();
+  int i = 0;
+  while (i < len) {
+    if (findwhatStr.GetAt(i) != ' ')
+      break;
+    i++;
+  }
+  if (i < len)
+    ExtractFindWhat(findwhatStr);
+  else
+    m_csFindWhatArray.push_back(findwhatStr);
+  if (m_csFindWhatArray.empty())
+    return FALSE;
+  m_IsFind = TRUE;
+  m_resStart = 0;
+  m_resEnd = -1;
+  return TRUE;
+}
+
+FX_BOOL CPDF_TextPageFind::FindNext() {
+  if (!m_pTextPage)
+    return FALSE;
+  m_resArray.clear();
+  if (m_findNextStart == -1)
+    return FALSE;
+  if (m_strText.IsEmpty()) {
+    m_IsFind = FALSE;
+    return m_IsFind;
+  }
+  int strLen = m_strText.GetLength();
+  if (m_findNextStart > strLen - 1) {
+    m_IsFind = FALSE;
+    return m_IsFind;
+  }
+  int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray);
+  int nResultPos = 0;
+  int nStartPos = 0;
+  nStartPos = m_findNextStart;
+  bool bSpaceStart = false;
+  for (int iWord = 0; iWord < nCount; iWord++) {
+    CFX_WideString csWord = m_csFindWhatArray[iWord];
+    if (csWord.IsEmpty()) {
+      if (iWord == nCount - 1) {
+        FX_WCHAR strInsert = m_strText.GetAt(nStartPos);
+        if (strInsert == TEXT_LINEFEED_CHAR || strInsert == TEXT_SPACE_CHAR ||
+            strInsert == TEXT_RETURN_CHAR || strInsert == 160) {
+          nResultPos = nStartPos + 1;
+          break;
+        }
+        iWord = -1;
+      } else if (iWord == 0) {
+        bSpaceStart = true;
+      }
+      continue;
+    }
+    int endIndex;
+    nResultPos = m_strText.Find(csWord.c_str(), nStartPos);
+    if (nResultPos == -1) {
+      m_IsFind = FALSE;
+      return m_IsFind;
+    }
+    endIndex = nResultPos + csWord.GetLength() - 1;
+    if (iWord == 0)
+      m_resStart = nResultPos;
+    FX_BOOL bMatch = TRUE;
+    if (iWord != 0 && !bSpaceStart) {
+      int PreResEndPos = nStartPos;
+      int curChar = csWord.GetAt(0);
+      CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
+      int lastChar = lastWord.GetAt(lastWord.GetLength() - 1);
+      if (nStartPos == nResultPos &&
+          !(IsIgnoreSpaceCharacter(lastChar) ||
+            IsIgnoreSpaceCharacter(curChar))) {
+        bMatch = FALSE;
+      }
+      for (int d = PreResEndPos; d < nResultPos; d++) {
+        FX_WCHAR strInsert = m_strText.GetAt(d);
+        if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
+            strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
+          bMatch = FALSE;
+          break;
+        }
+      }
+    } else if (bSpaceStart) {
+      if (nResultPos > 0) {
+        FX_WCHAR strInsert = m_strText.GetAt(nResultPos - 1);
+        if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
+            strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
+          bMatch = FALSE;
+          m_resStart = nResultPos;
+        } else {
+          m_resStart = nResultPos - 1;
+        }
+      }
+    }
+    if (m_bMatchWholeWord && bMatch) {
+      bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex);
+    }
+    nStartPos = endIndex + 1;
+    if (!bMatch) {
+      iWord = -1;
+      if (bSpaceStart)
+        nStartPos = m_resStart + m_csFindWhatArray[1].GetLength();
+      else
+        nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();
+    }
+  }
+  m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1;
+  m_IsFind = TRUE;
+  int resStart = GetCharIndex(m_resStart);
+  int resEnd = GetCharIndex(m_resEnd);
+  m_resArray = m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1);
+  if (m_flags & FPDFTEXT_CONSECUTIVE) {
+    m_findNextStart = m_resStart + 1;
+    m_findPreStart = m_resEnd - 1;
+  } else {
+    m_findNextStart = m_resEnd + 1;
+    m_findPreStart = m_resStart - 1;
+  }
+  return m_IsFind;
+}
+
+FX_BOOL CPDF_TextPageFind::FindPrev() {
+  if (!m_pTextPage)
+    return FALSE;
+  m_resArray.clear();
+  if (m_strText.IsEmpty() || m_findPreStart < 0) {
+    m_IsFind = FALSE;
+    return m_IsFind;
+  }
+  CPDF_TextPageFind findEngine(m_pTextPage);
+  FX_BOOL ret = findEngine.FindFirst(m_findWhat, m_flags);
+  if (!ret) {
+    m_IsFind = FALSE;
+    return m_IsFind;
+  }
+  int order = -1, MatchedCount = 0;
+  while (ret) {
+    ret = findEngine.FindNext();
+    if (ret) {
+      int order1 = findEngine.GetCurOrder();
+      int MatchedCount1 = findEngine.GetMatchedCount();
+      if (((order1 + MatchedCount1) - 1) > m_findPreStart)
+        break;
+      order = order1;
+      MatchedCount = MatchedCount1;
+    }
+  }
+  if (order == -1) {
+    m_IsFind = FALSE;
+    return m_IsFind;
+  }
+  m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
+  m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
+  m_IsFind = TRUE;
+  m_resArray = m_pTextPage->GetRectArray(order, MatchedCount);
+  if (m_flags & FPDFTEXT_CONSECUTIVE) {
+    m_findNextStart = m_resStart + 1;
+    m_findPreStart = m_resEnd - 1;
+  } else {
+    m_findNextStart = m_resEnd + 1;
+    m_findPreStart = m_resStart - 1;
+  }
+  return m_IsFind;
+}
+
+void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
+  if (findwhat.IsEmpty())
+    return;
+  int index = 0;
+  while (1) {
+    CFX_WideString csWord = TEXT_EMPTY;
+    int ret =
+        ExtractSubString(csWord, findwhat.c_str(), index, TEXT_SPACE_CHAR);
+    if (csWord.IsEmpty()) {
+      if (ret) {
+        m_csFindWhatArray.push_back(L"");
+        index++;
+        continue;
+      } else {
+        break;
+      }
+    }
+    int pos = 0;
+    while (pos < csWord.GetLength()) {
+      CFX_WideString curStr = csWord.Mid(pos, 1);
+      FX_WCHAR curChar = csWord.GetAt(pos);
+      if (IsIgnoreSpaceCharacter(curChar)) {
+        if (pos > 0 && curChar == 0x2019) {
+          pos++;
+          continue;
+        }
+        if (pos > 0)
+          m_csFindWhatArray.push_back(csWord.Mid(0, pos));
+        m_csFindWhatArray.push_back(curStr);
+        if (pos == csWord.GetLength() - 1) {
+          csWord.clear();
+          break;
+        }
+        csWord = csWord.Right(csWord.GetLength() - pos - 1);
+        pos = 0;
+        continue;
+      }
+      pos++;
+    }
+    if (!csWord.IsEmpty())
+      m_csFindWhatArray.push_back(csWord);
+    index++;
+  }
+}
+
+FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
+                                            int startPos,
+                                            int endPos) {
+  FX_WCHAR char_left = 0;
+  FX_WCHAR char_right = 0;
+  int char_count = endPos - startPos + 1;
+  if (char_count < 1)
+    return FALSE;
+  if (char_count == 1 && csPageText.GetAt(startPos) > 255)
+    return TRUE;
+  if (startPos - 1 >= 0)
+    char_left = csPageText.GetAt(startPos - 1);
+  if (startPos + char_count < csPageText.GetLength())
+    char_right = csPageText.GetAt(startPos + char_count);
+  if ((char_left > 'A' && char_left < 'a') ||
+      (char_left > 'a' && char_left < 'z') ||
+      (char_left > 0xfb00 && char_left < 0xfb06) || std::iswdigit(char_left) ||
+      (char_right > 'A' && char_right < 'a') ||
+      (char_right > 'a' && char_right < 'z') ||
+      (char_right > 0xfb00 && char_right < 0xfb06) ||
+      std::iswdigit(char_right)) {
+    return FALSE;
+  }
+  if (!(('A' > char_left || char_left > 'Z') &&
+        ('a' > char_left || char_left > 'z') &&
+        ('A' > char_right || char_right > 'Z') &&
+        ('a' > char_right || char_right > 'z'))) {
+    return FALSE;
+  }
+  if (char_count > 0) {
+    if (csPageText.GetAt(startPos) >= L'0' &&
+        csPageText.GetAt(startPos) <= L'9' && char_left >= L'0' &&
+        char_left <= L'9') {
+      return FALSE;
+    }
+    if (csPageText.GetAt(endPos) >= L'0' && csPageText.GetAt(endPos) <= L'9' &&
+        char_right >= L'0' && char_right <= L'9') {
+      return FALSE;
+    }
+  }
+  return TRUE;
+}
+
+FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString,
+                                            const FX_WCHAR* lpszFullString,
+                                            int iSubString,
+                                            FX_WCHAR chSep) {
+  if (!lpszFullString)
+    return FALSE;
+  while (iSubString--) {
+    lpszFullString = std::wcschr(lpszFullString, chSep);
+    if (!lpszFullString) {
+      rString.clear();
+      return FALSE;
+    }
+    lpszFullString++;
+    while (*lpszFullString == chSep)
+      lpszFullString++;
+  }
+  const FX_WCHAR* lpchEnd = std::wcschr(lpszFullString, chSep);
+  int nLen = lpchEnd ? (int)(lpchEnd - lpszFullString)
+                     : (int)FXSYS_wcslen(lpszFullString);
+  ASSERT(nLen >= 0);
+  FXSYS_memcpy(rString.GetBuffer(nLen), lpszFullString,
+               nLen * sizeof(FX_WCHAR));
+  rString.ReleaseBuffer();
+  return TRUE;
+}
+
+CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) {
+  CFX_WideString str2;
+  str2.clear();
+  int nlen = str.GetLength();
+  for (int i = nlen - 1; i >= 0; i--)
+    str2 += str.GetAt(i);
+  return str2;
+}
+
+int CPDF_TextPageFind::GetCurOrder() const {
+  return GetCharIndex(m_resStart);
+}
+
+int CPDF_TextPageFind::GetMatchedCount() const {
+  int resStart = GetCharIndex(m_resStart);
+  int resEnd = GetCharIndex(m_resEnd);
+  return resEnd - resStart + 1;
+}

diff --git a/core/fpdftext/include/cpdf_textpage.h b/core/fpdftext/include/cpdf_textpage.h
index 7f25fd7..64b1613 100644
--- a/core/fpdftext/include/cpdf_textpage.h
+++ b/core/fpdftext/include/cpdf_textpage.h

@@ -20,6 +20,26 @@
 class CPDF_Page;
 class CPDF_TextObject;
 
+#define FPDFTEXT_MATCHCASE 0x00000001
+#define FPDFTEXT_MATCHWHOLEWORD 0x00000002
+#define FPDFTEXT_CONSECUTIVE 0x00000004
+
+#define FPDFTEXT_CHAR_ERROR -1
+#define FPDFTEXT_CHAR_NORMAL 0
+#define FPDFTEXT_CHAR_GENERATED 1
+#define FPDFTEXT_CHAR_UNUNICODE 2
+#define FPDFTEXT_CHAR_HYPHEN 3
+#define FPDFTEXT_CHAR_PIECE 4
+
+#define TEXT_SPACE_CHAR L' '
+#define TEXT_LINEFEED_CHAR L'\n'
+#define TEXT_RETURN_CHAR L'\r'
+#define TEXT_EMPTY L""
+#define TEXT_SPACE L" "
+#define TEXT_RETURN_LINEFEED L"\r\n"
+#define TEXT_LINEFEED L"\n"
+#define TEXT_CHARRATIO_GAPDELTA 0.070
+
 enum class FPDFText_MarkedContent { Pass = 0, Done, Delay };
 
 enum class FPDFText_Direction { Left = -1, Right = 1 };
commit	2d396ac157bcd6da78190def936e5eaf278a6ca7	[log] [tgz]
author	npm <npm@chromium.org>	Fri Aug 26 10:00:25 2016 -0700
committer	Commit bot <commit-bot@chromium.org>	Fri Aug 26 10:00:25 2016 -0700
tree	0b2416b9e93dd770b4180735a96a3763e9f25eff
parent	18df9f5d1ff48fb3a225a84920fe46b63fba46a3 [diff]