blob: 1c3b22d7f3db617912c5e6d547c0dd9b04374fb8 [file] [log] [blame]
// Copyright 2017 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <map>
#include <memory>
#include <utility>
#include <vector>
#include "core/fpdfapi/cpdf_modulemgr.h"
#include "core/fpdfapi/font/cpdf_font.h"
#include "core/fpdfapi/font/cpdf_type1font.h"
#include "core/fpdfapi/page/cpdf_docpagedata.h"
#include "core/fpdfapi/page/cpdf_textobject.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxge/cfx_fontmgr.h"
#include "core/fxge/fx_font.h"
#include "fpdfsdk/fsdk_define.h"
#include "public/fpdf_edit.h"
namespace {
CPDF_Dictionary* LoadFontDesc(CPDF_Document* pDoc,
const CFX_ByteString& font_name,
CFX_Font* pFont,
const uint8_t* data,
uint32_t size,
int font_type) {
CPDF_Dictionary* fontDesc = pDoc->NewIndirect<CPDF_Dictionary>();
fontDesc->SetNewFor<CPDF_Name>("Type", "FontDescriptor");
fontDesc->SetNewFor<CPDF_Name>("FontName", font_name);
int flags = 0;
if (FXFT_Is_Face_fixedwidth(pFont->GetFace()))
flags |= FXFONT_FIXED_PITCH;
if (font_name.Find("Serif") > -1)
flags |= FXFONT_SERIF;
if (FXFT_Is_Face_Italic(pFont->GetFace()))
flags |= FXFONT_ITALIC;
if (FXFT_Is_Face_Bold(pFont->GetFace()))
flags |= FXFONT_BOLD;
// TODO(npm): How do I know if a font is symbolic, script, allcap, smallcap
flags |= FXFONT_NONSYMBOLIC;
fontDesc->SetNewFor<CPDF_Number>("Flags", flags);
FX_RECT bbox;
pFont->GetBBox(bbox);
auto pBBox = pdfium::MakeUnique<CPDF_Array>();
pBBox->AddNew<CPDF_Number>(bbox.left);
pBBox->AddNew<CPDF_Number>(bbox.bottom);
pBBox->AddNew<CPDF_Number>(bbox.right);
pBBox->AddNew<CPDF_Number>(bbox.top);
fontDesc->SetFor("FontBBox", std::move(pBBox));
// TODO(npm): calculate italic angle correctly
fontDesc->SetNewFor<CPDF_Number>("ItalicAngle", pFont->IsItalic() ? -12 : 0);
fontDesc->SetNewFor<CPDF_Number>("Ascent", pFont->GetAscent());
fontDesc->SetNewFor<CPDF_Number>("Descent", pFont->GetDescent());
// TODO(npm): calculate the capheight, stemV correctly
fontDesc->SetNewFor<CPDF_Number>("CapHeight", pFont->GetAscent());
fontDesc->SetNewFor<CPDF_Number>("StemV", pFont->IsBold() ? 120 : 70);
CPDF_Stream* pStream = pDoc->NewIndirect<CPDF_Stream>();
pStream->SetData(data, size);
CFX_ByteString fontFile =
font_type == FPDF_FONT_TYPE1 ? "FontFile" : "FontFile2";
fontDesc->SetNewFor<CPDF_Reference>(fontFile, pDoc, pStream->GetObjNum());
return fontDesc;
}
const char ToUnicodeStart[] =
"/CIDInit /ProcSet findresource begin\n"
"12 dict begin\n"
"begincmap\n"
"/CIDSystemInfo\n"
"<</Registry (Adobe)\n"
"/Ordering (Identity)\n"
"/Supplement 0\n"
">> def\n"
"/CMapName /Adobe-Identity-H def\n"
"CMapType 2 def\n"
"1 begincodespacerange\n"
"<0000> <FFFFF>\n"
"endcodespacerange\n";
const char ToUnicodeEnd[] =
"endcmap\n"
"CMapName currentdict /CMap defineresource pop\n"
"end\n"
"end\n";
void AddCharcode(CFX_ByteTextBuf* pBuffer, uint32_t number) {
ASSERT(number <= 0xFFFF);
*pBuffer << "<";
char ans[4];
FXSYS_IntToFourHexChars(number, ans);
for (size_t i = 0; i < 4; ++i)
pBuffer->AppendChar(ans[i]);
*pBuffer << ">";
}
// PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in
// UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description
void AddUnicode(CFX_ByteTextBuf* pBuffer, uint32_t unicode) {
char ans[8];
*pBuffer << "<";
size_t numChars = FXSYS_ToUTF16BE(unicode, ans);
for (size_t i = 0; i < numChars; ++i)
pBuffer->AppendChar(ans[i]);
*pBuffer << ">";
}
// Loads the charcode to unicode mapping into a stream
CPDF_Stream* LoadUnicode(CPDF_Document* pDoc,
const std::map<uint32_t, uint32_t>& to_unicode) {
// A map charcode->unicode
std::map<uint32_t, uint32_t> char_to_uni;
// A map <char_start, char_end> to vector v of unicode characters of size (end
// - start + 1). This abbreviates: start->v[0], start+1->v[1], etc. PDF spec
// 1.7 Section 5.9.2 says that only the last byte of the unicode may change.
std::map<std::pair<uint32_t, uint32_t>, std::vector<uint32_t>>
map_range_vector;
// A map <start, end> -> unicode
// This abbreviates: start->unicode, start+1->unicode+1, etc.
// PDF spec 1.7 Section 5.9.2 says that only the last byte of the unicode may
// change.
std::map<std::pair<uint32_t, uint32_t>, uint32_t> map_range;
// Calculate the maps
for (auto iter = to_unicode.begin(); iter != to_unicode.end(); ++iter) {
uint32_t firstCharcode = iter->first;
uint32_t firstUnicode = iter->second;
if (std::next(iter) == to_unicode.end() ||
firstCharcode + 1 != std::next(iter)->first) {
char_to_uni[firstCharcode] = firstUnicode;
continue;
}
++iter;
uint32_t curCharcode = iter->first;
uint32_t curUnicode = iter->second;
if (curCharcode % 256 == 0) {
char_to_uni[firstCharcode] = firstUnicode;
char_to_uni[curCharcode] = curUnicode;
continue;
}
const size_t maxExtra = 255 - (curCharcode % 256);
auto next_it = std::next(iter);
if (firstUnicode + 1 != curUnicode) {
// Consecutive charcodes mapping to non-consecutive unicodes
std::vector<uint32_t> unicodes;
unicodes.push_back(firstUnicode);
unicodes.push_back(curUnicode);
for (size_t i = 0; i < maxExtra; ++i) {
if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first)
break;
++iter;
++curCharcode;
unicodes.push_back(iter->second);
next_it = std::next(iter);
}
ASSERT(iter->first - firstCharcode + 1 == unicodes.size());
map_range_vector[std::make_pair(firstCharcode, iter->first)] = unicodes;
continue;
}
// Consecutive charcodes mapping to consecutive unicodes
for (size_t i = 0; i < maxExtra; ++i) {
if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first ||
curUnicode + 1 != next_it->second) {
break;
}
++iter;
++curCharcode;
++curUnicode;
next_it = std::next(iter);
}
map_range[std::make_pair(firstCharcode, curCharcode)] = firstUnicode;
}
CFX_ByteTextBuf buffer;
buffer << ToUnicodeStart;
// Add maps to buffer
buffer << static_cast<uint32_t>(char_to_uni.size()) << " beginbfchar\n";
for (const auto& iter : char_to_uni) {
AddCharcode(&buffer, iter.first);
buffer << " ";
AddUnicode(&buffer, iter.second);
buffer << "\n";
}
buffer << "endbfchar\n"
<< static_cast<uint32_t>(map_range_vector.size() + map_range.size())
<< " beginbfrange\n";
for (const auto& iter : map_range_vector) {
const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
AddCharcode(&buffer, charcodeRange.first);
buffer << " ";
AddCharcode(&buffer, charcodeRange.second);
buffer << " [";
const std::vector<uint32_t>& unicodes = iter.second;
for (size_t i = 0; i < unicodes.size(); ++i) {
uint32_t uni = unicodes[i];
AddUnicode(&buffer, uni);
if (i != unicodes.size() - 1)
buffer << " ";
}
buffer << "]\n";
}
for (const auto& iter : map_range) {
const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
AddCharcode(&buffer, charcodeRange.first);
buffer << " ";
AddCharcode(&buffer, charcodeRange.second);
buffer << " ";
AddUnicode(&buffer, iter.second);
buffer << "\n";
}
buffer << "endbfrange\n";
buffer << ToUnicodeEnd;
// TODO(npm): Encrypt / Compress?
uint32_t bufferSize = buffer.GetSize();
auto pDict = pdfium::MakeUnique<CPDF_Dictionary>();
pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(bufferSize));
return pDoc->NewIndirect<CPDF_Stream>(buffer.DetachBuffer(), bufferSize,
std::move(pDict));
}
const uint32_t kMaxSimpleFontChar = 0xFF;
void* LoadSimpleFont(CPDF_Document* pDoc,
std::unique_ptr<CFX_Font> pFont,
const uint8_t* data,
uint32_t size,
int font_type) {
CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>();
fontDict->SetNewFor<CPDF_Name>("Type", "Font");
fontDict->SetNewFor<CPDF_Name>(
"Subtype", font_type == FPDF_FONT_TYPE1 ? "Type1" : "TrueType");
CFX_ByteString name = pFont->GetFaceName();
if (name.IsEmpty())
name = "Unnamed";
fontDict->SetNewFor<CPDF_Name>("BaseFont", name);
uint32_t glyphIndex;
uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex);
if (currentChar > kMaxSimpleFontChar || glyphIndex == 0)
return nullptr;
fontDict->SetNewFor<CPDF_Number>("FirstChar", static_cast<int>(currentChar));
CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>();
while (true) {
widthsArray->AddNew<CPDF_Number>(pFont->GetGlyphWidth(glyphIndex));
uint32_t nextChar =
FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex);
// Simple fonts have 1-byte charcodes only.
if (nextChar > kMaxSimpleFontChar || glyphIndex == 0)
break;
for (uint32_t i = currentChar + 1; i < nextChar; i++)
widthsArray->AddNew<CPDF_Number>(0);
currentChar = nextChar;
}
fontDict->SetNewFor<CPDF_Number>("LastChar", static_cast<int>(currentChar));
fontDict->SetNewFor<CPDF_Reference>("Widths", pDoc, widthsArray->GetObjNum());
CPDF_Dictionary* fontDesc =
LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type);
fontDict->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
fontDesc->GetObjNum());
return pDoc->LoadFont(fontDict);
}
const uint32_t kMaxUnicode = 0x10FFFF;
void* LoadCompositeFont(CPDF_Document* pDoc,
std::unique_ptr<CFX_Font> pFont,
const uint8_t* data,
uint32_t size,
int font_type) {
CPDF_Dictionary* fontDict = pDoc->NewIndirect<CPDF_Dictionary>();
fontDict->SetNewFor<CPDF_Name>("Type", "Font");
fontDict->SetNewFor<CPDF_Name>("Subtype", "Type0");
// TODO(npm): Get the correct encoding, if it's not identity.
CFX_ByteString encoding = "Identity-H";
fontDict->SetNewFor<CPDF_Name>("Encoding", encoding);
CFX_ByteString name = pFont->GetFaceName();
if (name.IsEmpty())
name = "Unnamed";
fontDict->SetNewFor<CPDF_Name>(
"BaseFont", font_type == FPDF_FONT_TYPE1 ? name + "-" + encoding : name);
CPDF_Dictionary* pCIDFont = pDoc->NewIndirect<CPDF_Dictionary>();
pCIDFont->SetNewFor<CPDF_Name>("Type", "Font");
pCIDFont->SetNewFor<CPDF_Name>("Subtype", font_type == FPDF_FONT_TYPE1
? "CIDFontType0"
: "CIDFontType2");
pCIDFont->SetNewFor<CPDF_Name>("BaseFont", name);
// TODO(npm): Maybe use FT_Get_CID_Registry_Ordering_Supplement to get the
// CIDSystemInfo
CPDF_Dictionary* pCIDSystemInfo = pDoc->NewIndirect<CPDF_Dictionary>();
pCIDSystemInfo->SetNewFor<CPDF_Name>("Registry", "Adobe");
pCIDSystemInfo->SetNewFor<CPDF_Name>("Ordering", "Identity");
pCIDSystemInfo->SetNewFor<CPDF_Number>("Supplement", 0);
pCIDFont->SetNewFor<CPDF_Reference>("CIDSystemInfo", pDoc,
pCIDSystemInfo->GetObjNum());
CPDF_Dictionary* fontDesc =
LoadFontDesc(pDoc, name, pFont.get(), data, size, font_type);
pCIDFont->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
fontDesc->GetObjNum());
uint32_t glyphIndex;
uint32_t currentChar = FXFT_Get_First_Char(pFont->GetFace(), &glyphIndex);
// If it doesn't have a single char, just fail
if (glyphIndex == 0 || currentChar > kMaxUnicode)
return nullptr;
std::map<uint32_t, uint32_t> to_unicode;
std::map<uint32_t, uint32_t> widths;
while (true) {
if (currentChar > kMaxUnicode)
break;
widths[glyphIndex] = pFont->GetGlyphWidth(glyphIndex);
to_unicode[glyphIndex] = currentChar;
currentChar =
FXFT_Get_Next_Char(pFont->GetFace(), currentChar, &glyphIndex);
if (glyphIndex == 0)
break;
}
CPDF_Array* widthsArray = pDoc->NewIndirect<CPDF_Array>();
for (auto it = widths.begin(); it != widths.end(); ++it) {
int ch = it->first;
int w = it->second;
if (std::next(it) == widths.end()) {
// Only one char left, use format c [w]
auto oneW = pdfium::MakeUnique<CPDF_Array>();
oneW->AddNew<CPDF_Number>(w);
widthsArray->AddNew<CPDF_Number>(ch);
widthsArray->Add(std::move(oneW));
break;
}
++it;
int next_ch = it->first;
int next_w = it->second;
if (next_ch == ch + 1 && next_w == w) {
// The array can have a group c_first c_last w: all CIDs in the range from
// c_first to c_last will have width w
widthsArray->AddNew<CPDF_Number>(ch);
ch = next_ch;
while (true) {
auto next_it = std::next(it);
if (next_it == widths.end() || next_it->first != it->first + 1 ||
next_it->second != it->second) {
break;
}
++it;
ch = it->first;
}
widthsArray->AddNew<CPDF_Number>(ch);
widthsArray->AddNew<CPDF_Number>(w);
continue;
}
// Otherwise we can have a group of the form c [w1 w2 ...]: c has width
// w1, c+1 has width w2, etc.
widthsArray->AddNew<CPDF_Number>(ch);
auto curWidthArray = pdfium::MakeUnique<CPDF_Array>();
curWidthArray->AddNew<CPDF_Number>(w);
curWidthArray->AddNew<CPDF_Number>(next_w);
while (true) {
auto next_it = std::next(it);
if (next_it == widths.end() || next_it->first != it->first + 1)
break;
++it;
curWidthArray->AddNew<CPDF_Number>(static_cast<int>(it->second));
}
widthsArray->Add(std::move(curWidthArray));
}
pCIDFont->SetNewFor<CPDF_Reference>("W", pDoc, widthsArray->GetObjNum());
// TODO(npm): Support vertical writing
auto pDescendant = pdfium::MakeUnique<CPDF_Array>();
pDescendant->AddNew<CPDF_Reference>(pDoc, pCIDFont->GetObjNum());
fontDict->SetFor("DescendantFonts", std::move(pDescendant));
CPDF_Stream* toUnicodeStream = LoadUnicode(pDoc, to_unicode);
fontDict->SetNewFor<CPDF_Reference>("ToUnicode", pDoc,
toUnicodeStream->GetObjNum());
return pDoc->LoadFont(fontDict);
}
} // namespace
DLLEXPORT FPDF_PAGEOBJECT STDCALL FPDFPageObj_NewTextObj(FPDF_DOCUMENT document,
FPDF_BYTESTRING font,
float font_size) {
CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
if (!pDoc)
return nullptr;
CPDF_Font* pFont = CPDF_Font::GetStockFont(pDoc, CFX_ByteStringC(font));
if (!pFont)
return nullptr;
auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
pTextObj->m_TextState.SetFont(pFont);
pTextObj->m_TextState.SetFontSize(font_size);
pTextObj->DefaultStates();
return pTextObj.release(); // Caller takes ownership.
}
DLLEXPORT FPDF_BOOL STDCALL FPDFText_SetText(FPDF_PAGEOBJECT text_object,
FPDF_WIDESTRING text) {
auto* pTextObj = static_cast<CPDF_TextObject*>(text_object);
if (!pTextObj)
return false;
FX_STRSIZE len = CFX_WideString::WStringLength(text);
CFX_WideString encodedText = CFX_WideString::FromUTF16LE(text, len);
CFX_ByteString byteText;
for (wchar_t wc : encodedText) {
pTextObj->GetFont()->AppendChar(
&byteText, pTextObj->GetFont()->CharCodeFromUnicode(wc));
}
pTextObj->SetText(byteText);
return true;
}
DLLEXPORT FPDF_FONT STDCALL FPDFText_LoadFont(FPDF_DOCUMENT document,
const uint8_t* data,
uint32_t size,
int font_type,
FPDF_BOOL cid) {
CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
if (!pDoc || !data || size == 0 ||
(font_type != FPDF_FONT_TYPE1 && font_type != FPDF_FONT_TRUETYPE)) {
return nullptr;
}
auto pFont = pdfium::MakeUnique<CFX_Font>();
// TODO(npm): Maybe use FT_Get_X11_Font_Format to check format? Otherwise, we
// are allowing giving any font that can be loaded on freetype and setting it
// as any font type.
if (!pFont->LoadEmbedded(data, size))
return nullptr;
return cid ? LoadCompositeFont(pDoc, std::move(pFont), data, size, font_type)
: LoadSimpleFont(pDoc, std::move(pFont), data, size, font_type);
}
DLLEXPORT FPDF_BOOL STDCALL FPDFText_SetFillColor(FPDF_PAGEOBJECT text_object,
unsigned int R,
unsigned int G,
unsigned int B,
unsigned int A) {
return FPDFPageObj_SetFillColor(text_object, R, G, B, A);
}
DLLEXPORT void STDCALL FPDFFont_Close(FPDF_FONT font) {
CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
if (!pFont)
return;
CPDF_Document* pDoc = pFont->GetDocument();
if (!pDoc)
return;
CPDF_DocPageData* pPageData = pDoc->GetPageData();
if (!pPageData->IsForceClear())
pPageData->ReleaseFont(pFont->GetFontDict());
}
DLLEXPORT FPDF_PAGEOBJECT STDCALL
FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document,
FPDF_FONT font,
float font_size) {
CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
CPDF_Font* pFont = static_cast<CPDF_Font*>(font);
if (!pDoc || !pFont)
return nullptr;
auto pTextObj = pdfium::MakeUnique<CPDF_TextObject>();
pTextObj->m_TextState.SetFont(pDoc->LoadFont(pFont->GetFontDict()));
pTextObj->m_TextState.SetFontSize(font_size);
pTextObj->DefaultStates();
return pTextObj.release();
}