blob: b359a8d33d257e76a1be2fd734c876585f4027b3 [file] [log] [blame]
#include "SkNativeParsedPDF.h"
#include "SkPdfNativeTokenizer.h"
#include "SkPdfBasics.h"
#include "SkPdfParser.h"
#include "SkPdfObject.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "SkPdfFileTrailerDictionary_autogen.h"
#include "SkPdfCatalogDictionary_autogen.h"
#include "SkPdfPageObjectDictionary_autogen.h"
#include "SkPdfPageTreeNodeDictionary_autogen.h"
#include "SkPdfMapper_autogen.h"
static long getFileSize(const char* filename)
{
struct stat stat_buf;
int rc = stat(filename, &stat_buf);
return rc == 0 ? (long)stat_buf.st_size : -1;
}
static unsigned char* lineHome(unsigned char* start, unsigned char* current) {
while (current > start && !isPdfEOL(*(current - 1))) {
current--;
}
return current;
}
static unsigned char* previousLineHome(unsigned char* start, unsigned char* current) {
if (current > start && isPdfEOL(*(current - 1))) {
current--;
}
// allows CR+LF, LF+CR but not two CR+CR or LF+LF
if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
current--;
}
while (current > start && !isPdfEOL(*(current - 1))) {
current--;
}
return current;
}
static unsigned char* ignoreLine(unsigned char* current, unsigned char* end) {
while (current < end && !isPdfEOL(*current)) {
current++;
}
current++;
if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
current++;
}
return current;
}
// TODO(edisonn): NYI
// TODO(edisonn): 3 constructuctors from URL, from stream, from file ...
// TODO(edisonn): write one that accepts errors in the file and ignores/fixis them
// TODO(edisonn): testing:
// 1) run on a lot of file
// 2) recoverable corupt file: remove endobj, endsteam, remove other keywords, use other white spaces, insert comments randomly, ...
// 3) irrecoverable corrupt file
SkNativeParsedPDF::SkNativeParsedPDF(const char* path) : fAllocator(new SkPdfAllocator()) {
FILE* file = fopen(path, "r");
fContentLength = getFileSize(path);
fFileContent = new unsigned char[fContentLength];
fread(fFileContent, fContentLength, 1, file);
fclose(file);
file = NULL;
unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
// TODO(edisonn): report/issue
}
long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
bool storeCatalog = true;
while (xrefByteOffset >= 0) {
unsigned char* trailerStart = readCrossReferenceSection(fFileContent + xrefByteOffset, xrefstartKeywordLine);
xrefByteOffset = readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog);
storeCatalog = false;
}
// TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
// TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
// load catalog
fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
fillPages(tree);
// now actually read all objects if we want, or do it lazyly
// and resolve references?... or not ...
}
// TODO(edisonn): NYI
SkNativeParsedPDF::~SkNativeParsedPDF() {
delete[] fFileContent;
delete fAllocator;
}
unsigned char* SkNativeParsedPDF::readCrossReferenceSection(unsigned char* xrefStart, unsigned char* trailerEnd) {
unsigned char* current = ignoreLine(xrefStart, trailerEnd); // TODO(edisonn): verify next keyord is "xref", use nextObject here
SkPdfObject token;
while (current < trailerEnd) {
token.reset();
unsigned char* previous = current;
current = nextObject(current, trailerEnd, &token, NULL);
if (!token.isInteger()) {
return previous;
}
int startId = (int)token.intValue();
token.reset();
current = nextObject(current, trailerEnd, &token, NULL);
if (!token.isInteger()) {
// TODO(edisonn): report/warning
return current;
}
int entries = (int)token.intValue();
for (int i = 0; i < entries; i++) {
token.reset();
current = nextObject(current, trailerEnd, &token, NULL);
if (!token.isInteger()) {
// TODO(edisonn): report/warning
return current;
}
int offset = (int)token.intValue();
token.reset();
current = nextObject(current, trailerEnd, &token, NULL);
if (!token.isInteger()) {
// TODO(edisonn): report/warning
return current;
}
int generation = (int)token.intValue();
token.reset();
current = nextObject(current, trailerEnd, &token, NULL);
if (!token.isKeyword() || token.len() != 1 || (*token.c_str() != 'f' && *token.c_str() != 'n')) {
// TODO(edisonn): report/warning
return current;
}
addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
}
}
// TODO(edisonn): it should never get here? there is no trailer?
return current;
}
long SkNativeParsedPDF::readTrailer(unsigned char* trailerStart, unsigned char* trailerEnd, bool storeCatalog) {
unsigned char* current = ignoreLine(trailerStart, trailerEnd); // TODO(edisonn): verify next keyord is "trailer" use nextObject here
SkPdfObject token;
current = nextObject(current, trailerEnd, &token, fAllocator);
SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
if (storeCatalog) {
const SkPdfObject* ref = trailer->Root(NULL);
if (ref == NULL || !ref->isReference()) {
// TODO(edisonn): oops, we have to fix the corrup pdf file
return -1;
}
fRootCatalogRef = ref;
}
if (trailer->has_Prev()) {
return (long)trailer->Prev(NULL);
}
return -1;
}
void SkNativeParsedPDF::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
// TODO(edisonn): security here
while (fObjects.count() < id + 1) {
reset(fObjects.append());
}
fObjects[id].fOffset = offset;
fObjects[id].fObj = NULL;
}
SkPdfObject* SkNativeParsedPDF::readObject(int id/*, int expectedGeneration*/) const {
long startOffset = fObjects[id].fOffset;
//long endOffset = fObjects[id].fOffsetEnd;
// TODO(edisonn): use hinted endOffset
// TODO(edisonn): current implementation will result in a lot of memory usage
// to decrease memory usage, we wither need to be smart and know where objects end, and we will
// alocate only the chancks needed, or the tokenizer will not make copies, but then it needs to
// cache the results so it does not go twice on the same buffer
unsigned char* current = fFileContent + startOffset;
unsigned char* end = fFileContent + fContentLength;
SkPdfNativeTokenizer tokenizer(current, end - current, fMapper, fAllocator);
SkPdfObject idObj;
SkPdfObject generationObj;
SkPdfObject objKeyword;
SkPdfObject* dict = fAllocator->allocObject();
current = nextObject(current, end, &idObj, NULL);
if (current >= end) {
// TODO(edisonn): report warning/error
return NULL;
}
current = nextObject(current, end, &generationObj, NULL);
if (current >= end) {
// TODO(edisonn): report warning/error
return NULL;
}
current = nextObject(current, end, &objKeyword, NULL);
if (current >= end) {
// TODO(edisonn): report warning/error
return NULL;
}
if (!idObj.isInteger() || !generationObj.isInteger() || id != idObj.intValue()/* || generation != generationObj.intValue()*/) {
// TODO(edisonn): report warning/error
}
if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
// TODO(edisonn): report warning/error
}
current = nextObject(current, end, dict, fAllocator);
// TODO(edisonn): report warning/error - verify last token is endobj
return dict;
}
void SkNativeParsedPDF::fillPages(SkPdfPageTreeNodeDictionary* tree) {
const SkPdfArray* kids = tree->Kids(this);
if (kids == NULL) {
*fPages.append() = (SkPdfPageObjectDictionary*)tree;
return;
}
int cnt = kids->size();
for (int i = 0; i < cnt; i++) {
const SkPdfObject* obj = resolveReference(kids->objAtAIndex(i));
if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfObjectType) {
*fPages.append() = (SkPdfPageObjectDictionary*)obj;
} else {
// TODO(edisonn): verify that it is a page tree indeed
fillPages((SkPdfPageTreeNodeDictionary*)obj);
}
}
}
int SkNativeParsedPDF::pages() const {
return fPages.count();
}
SkPdfResourceDictionary* SkNativeParsedPDF::pageResources(int page) {
return fPages[page]->Resources(this);
}
// TODO(edisonn): Partial implemented. Move the logics directly in the code generator for inheritable and default value?
SkRect SkNativeParsedPDF::MediaBox(int page) const {
SkPdfPageObjectDictionary* current = fPages[page];
while (!current->has_MediaBox() && current->has_Parent()) {
current = (SkPdfPageObjectDictionary*)current->Parent(this);
}
if (current) {
return current->MediaBox(this);
}
return SkRect::MakeEmpty();
}
// TODO(edisonn): stream or array ... ? for now only array
SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfPage(int page) const {
if (fPages[page]->isContentsAStream(this)) {
return tokenizerOfStream(fPages[page]->getContentsAsStream(this));
} else {
// TODO(edisonn): NYI, we need to concatenate all streams in the array or make the tokenizer smart
// so we don't allocate new memory
return NULL;
}
}
SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfStream(SkPdfObject* stream) const {
if (stream == NULL) {
return NULL;
}
return new SkPdfNativeTokenizer(stream, fMapper, fAllocator);
}
// TODO(edisonn): NYI
SkPdfNativeTokenizer* SkNativeParsedPDF::tokenizerOfBuffer(unsigned char* buffer, size_t len) const {
// warning does not track two calls in the same buffer! the buffer is updated!
// make a clean copy if needed!
return new SkPdfNativeTokenizer(buffer, len, fMapper, fAllocator);
}
size_t SkNativeParsedPDF::objects() const {
return fObjects.count();
}
SkPdfObject* SkNativeParsedPDF::object(int i) {
SkASSERT(!(i < 0 || i > fObjects.count()));
if (i < 0 || i > fObjects.count()) {
return NULL;
}
if (fObjects[i].fObj == NULL) {
// TODO(edisonn): when we read the cross reference sections, store the start of the next object
// and fill fOffsetEnd
fObjects[i].fObj = readObject(i);
}
return fObjects[i].fObj;
}
const SkPdfMapper* SkNativeParsedPDF::mapper() const {
return fMapper;
}
SkPdfReal* SkNativeParsedPDF::createReal(double value) const {
SkPdfObject* obj = fAllocator->allocObject();
SkPdfObject::makeReal(value, obj);
return (SkPdfReal*)obj;
}
SkPdfInteger* SkNativeParsedPDF::createInteger(int value) const {
SkPdfObject* obj = fAllocator->allocObject();
SkPdfObject::makeInteger(value, obj);
return (SkPdfInteger*)obj;
}
SkPdfString* SkNativeParsedPDF::createString(unsigned char* sz, size_t len) const {
SkPdfObject* obj = fAllocator->allocObject();
SkPdfObject::makeString(sz, len, obj);
return (SkPdfString*)obj;
}
PdfContext* gPdfContext = NULL;
void SkNativeParsedPDF::drawPage(int page, SkCanvas* canvas) {
SkPdfNativeTokenizer* tokenizer = tokenizerOfPage(page);
PdfContext pdfContext(this);
pdfContext.fOriginalMatrix = SkMatrix::I();
pdfContext.fGraphicsState.fResources = pageResources(page);
gPdfContext = &pdfContext;
// TODO(edisonn): get matrix stuff right.
// TODO(edisonn): add DPI/scale/zoom.
SkScalar z = SkIntToScalar(0);
SkRect rect = MediaBox(page);
SkScalar w = rect.width();
SkScalar h = rect.height();
SkPoint pdfSpace[4] = {SkPoint::Make(z, z), SkPoint::Make(w, z), SkPoint::Make(w, h), SkPoint::Make(z, h)};
// SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)};
// TODO(edisonn): add flag for this app to create sourunding buffer zone
// TODO(edisonn): add flagg for no clipping.
// Use larger image to make sure we do not draw anything outside of page
// could be used in tests.
#ifdef PDF_DEBUG_3X
SkPoint skiaSpace[4] = {SkPoint::Make(w+z, h+h), SkPoint::Make(w+w, h+h), SkPoint::Make(w+w, h+z), SkPoint::Make(w+z, h+z)};
#else
SkPoint skiaSpace[4] = {SkPoint::Make(z, h), SkPoint::Make(w, h), SkPoint::Make(w, z), SkPoint::Make(z, z)};
#endif
//SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(w, h)};
//SkPoint skiaSpace[2] = {SkPoint::Make(w, z), SkPoint::Make(z, h)};
//SkPoint pdfSpace[2] = {SkPoint::Make(z, z), SkPoint::Make(z, h)};
//SkPoint skiaSpace[2] = {SkPoint::Make(z, h), SkPoint::Make(z, z)};
//SkPoint pdfSpace[3] = {SkPoint::Make(z, z), SkPoint::Make(z, h), SkPoint::Make(w, h)};
//SkPoint skiaSpace[3] = {SkPoint::Make(z, h), SkPoint::Make(z, z), SkPoint::Make(w, 0)};
SkAssertResult(pdfContext.fOriginalMatrix.setPolyToPoly(pdfSpace, skiaSpace, 4));
SkTraceMatrix(pdfContext.fOriginalMatrix, "Original matrix");
pdfContext.fGraphicsState.fMatrix = pdfContext.fOriginalMatrix;
pdfContext.fGraphicsState.fMatrixTm = pdfContext.fGraphicsState.fMatrix;
pdfContext.fGraphicsState.fMatrixTlm = pdfContext.fGraphicsState.fMatrix;
canvas->setMatrix(pdfContext.fOriginalMatrix);
#ifndef PDF_DEBUG_NO_PAGE_CLIPING
canvas->clipRect(SkRect::MakeXYWH(z, z, w, h), SkRegion::kIntersect_Op, true);
#endif
// erase with red before?
// SkPaint paint;
// paint.setColor(SK_ColorRED);
// canvas->drawRect(rect, paint);
PdfMainLooper looper(NULL, tokenizer, &pdfContext, canvas);
looper.loop();
delete tokenizer;
canvas->flush();
}
SkPdfAllocator* SkNativeParsedPDF::allocator() const {
return fAllocator;
}
SkPdfObject* SkNativeParsedPDF::resolveReference(SkPdfObject* ref) const {
return (SkPdfObject*)resolveReference((const SkPdfObject*)ref);
}
// TODO(edisonn): fix infinite loop if ref to itself!
// TODO(edisonn): perf, fix refs at load, and resolve will simply return fResolvedReference?
SkPdfObject* SkNativeParsedPDF::resolveReference(const SkPdfObject* ref) const {
if (ref && ref->isReference()) {
int id = ref->referenceId();
// TODO(edisonn): generation/updates not supported now
//int gen = ref->referenceGeneration();
SkASSERT(!(id < 0 || id > fObjects.count()));
if (id < 0 || id > fObjects.count()) {
return NULL;
}
// TODO(edisonn): verify id and gen expected
if (fObjects[id].fResolvedReference != NULL) {
return fObjects[id].fResolvedReference;
}
if (fObjects[id].fObj == NULL) {
fObjects[id].fObj = readObject(id);
}
if (fObjects[id].fResolvedReference == NULL) {
if (!fObjects[id].fObj->isReference()) {
fObjects[id].fResolvedReference = fObjects[id].fObj;
} else {
fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
}
}
return fObjects[id].fResolvedReference;
}
// TODO(edisonn): fix the mess with const, probably we need to remove it pretty much everywhere
return (SkPdfObject*)ref;
}