| #!/usr/bin/env python |
| # Copyright 2014 The PDFium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Expands a hand-written PDF testcase (template) into a valid PDF file. |
| |
| There are several places in a PDF file where byte-offsets are required. This |
| script replaces {{name}}-style variables in the input with calculated results |
| |
| {{header}} - expands to the header comment required for PDF files. |
| {{xref}} - expands to a generated xref table, noting the offset. |
| {{trailer}} - expands to a standard trailer with "1 0 R" as the /Root. |
| {{startxref} - expands to a startxref directive followed by correct offset. |
| {{object x y}} - expands to |x y obj| declaration, noting the offset. |
| """ |
| |
| import optparse |
| import os |
| import re |
| import sys |
| |
| class TemplateProcessor: |
| HEADER_TOKEN = '{{header}}' |
| HEADER_REPLACEMENT = '%PDF-1.7\n%\xa0\xf2\xa4\xf4' |
| |
| XREF_TOKEN = '{{xref}}' |
| XREF_REPLACEMENT = 'xref\n%d %d\n' |
| |
| XREF_REPLACEMENT_N = '%010d %05d n \n' |
| XREF_REPLACEMENT_F = '0000000000 65535 f \n' |
| # XREF rows must be exactly 20 bytes - space required. |
| assert(len(XREF_REPLACEMENT_F) == 20) |
| |
| TRAILER_TOKEN = '{{trailer}}' |
| TRAILER_REPLACEMENT = 'trailer<< /Root 1 0 R /Size %d >>' |
| |
| STARTXREF_TOKEN= '{{startxref}}' |
| STARTXREF_REPLACEMENT = 'startxref\n%d' |
| |
| OBJECT_PATTERN = r'\{\{object\s+(\d+)\s+(\d+)\}\}' |
| OBJECT_REPLACEMENT = r'\1 \2 obj' |
| |
| def __init__(self): |
| self.offset = 0 |
| self.xref_offset = 0 |
| self.max_object_number = 0 |
| self.objects = { } |
| |
| def insert_xref_entry(self, object_number, generation_number): |
| self.objects[object_number] = (self.offset, generation_number) |
| self.max_object_number = max(self.max_object_number, object_number) |
| |
| def generate_xref_table(self): |
| result = self.XREF_REPLACEMENT % (0, self.max_object_number + 1) |
| for i in range(0, self.max_object_number + 1): |
| if i in self.objects: |
| result += self.XREF_REPLACEMENT_N % self.objects[i] |
| else: |
| result += self.XREF_REPLACEMENT_F |
| return result |
| |
| def process_line(self, line): |
| if self.HEADER_TOKEN in line: |
| line = line.replace(self.HEADER_TOKEN, self.HEADER_REPLACEMENT) |
| if self.XREF_TOKEN in line: |
| self.xref_offset = self.offset |
| line = self.generate_xref_table() |
| if self.TRAILER_TOKEN in line: |
| replacement = self.TRAILER_REPLACEMENT % (self.max_object_number + 1) |
| line = line.replace(self.TRAILER_TOKEN, replacement) |
| if self.STARTXREF_TOKEN in line: |
| replacement = self.STARTXREF_REPLACEMENT % self.xref_offset |
| line = line.replace(self.STARTXREF_TOKEN, replacement) |
| match = re.match(self.OBJECT_PATTERN, line) |
| if match: |
| self.insert_xref_entry(int(match.group(1)), int(match.group(2))) |
| line = re.sub(self.OBJECT_PATTERN, self.OBJECT_REPLACEMENT, line) |
| self.offset += len(line) |
| return line |
| |
| |
| def expand_file(input_path, output_path): |
| processor = TemplateProcessor() |
| try: |
| with open(input_path, 'rb') as infile: |
| with open(output_path, 'wb') as outfile: |
| for line in infile: |
| outfile.write(processor.process_line(line)) |
| except IOError: |
| print >> sys.stderr, 'failed to process %s' % input_path |
| |
| |
| def main(): |
| parser = optparse.OptionParser() |
| parser.add_option('--output-dir', default='') |
| options, args = parser.parse_args() |
| for testcase_path in args: |
| testcase_filename = os.path.basename(testcase_path) |
| testcase_root, _ = os.path.splitext(testcase_filename) |
| output_dir = os.path.dirname(testcase_path) |
| if options.output_dir: |
| output_dir = options.output_dir |
| output_path = os.path.join(output_dir, testcase_root + '.pdf') |
| expand_file(testcase_path, output_path) |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |