Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright 2014 The PDFium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Expands a hand-written PDF testcase (template) into a valid PDF file. |
| 7 | |
| 8 | There are several places in a PDF file where byte-offsets are required. This |
| 9 | script replaces {{name}}-style variables in the input with calculated results |
| 10 | |
| 11 | {{header}} - expands to the header comment required for PDF files. |
| 12 | {{xref}} - expands to a generated xref table, noting the offset. |
Lei Zhang | bdb330e | 2017-06-26 12:23:51 -0700 | [diff] [blame^] | 13 | {{trailer}} - expands to a standard trailer with "1 0 R" as the /Root. |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 14 | {{startxref} - expands to a startxref directive followed by correct offset. |
Lei Zhang | bdb330e | 2017-06-26 12:23:51 -0700 | [diff] [blame^] | 15 | {{object x y}} - expands to |x y obj| declaration, noting the offset. |
| 16 | """ |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 17 | |
| 18 | import optparse |
| 19 | import os |
| 20 | import re |
| 21 | import sys |
| 22 | |
| 23 | class TemplateProcessor: |
Lei Zhang | bdb330e | 2017-06-26 12:23:51 -0700 | [diff] [blame^] | 24 | HEADER_TOKEN = '{{header}}' |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 25 | HEADER_REPLACEMENT = '%PDF-1.7\n%\xa0\xf2\xa4\xf4' |
| 26 | |
| 27 | XREF_TOKEN = '{{xref}}' |
| 28 | XREF_REPLACEMENT = 'xref\n%d %d\n' |
Tom Sepez | 93daa3c | 2015-02-05 10:51:54 -0800 | [diff] [blame] | 29 | |
Tom Sepez | 93daa3c | 2015-02-05 10:51:54 -0800 | [diff] [blame] | 30 | XREF_REPLACEMENT_N = '%010d %05d n \n' |
| 31 | XREF_REPLACEMENT_F = '0000000000 65535 f \n' |
Lei Zhang | bdb330e | 2017-06-26 12:23:51 -0700 | [diff] [blame^] | 32 | # XREF rows must be exactly 20 bytes - space required. |
| 33 | assert(len(XREF_REPLACEMENT_F) == 20) |
| 34 | |
| 35 | TRAILER_TOKEN = '{{trailer}}' |
| 36 | TRAILER_REPLACEMENT = 'trailer<< /Root 1 0 R /Size %d >>' |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 37 | |
| 38 | STARTXREF_TOKEN= '{{startxref}}' |
| 39 | STARTXREF_REPLACEMENT = 'startxref\n%d' |
| 40 | |
| 41 | OBJECT_PATTERN = r'\{\{object\s+(\d+)\s+(\d+)\}\}' |
| 42 | OBJECT_REPLACEMENT = r'\1 \2 obj' |
| 43 | |
| 44 | def __init__(self): |
| 45 | self.offset = 0 |
| 46 | self.xref_offset = 0 |
| 47 | self.max_object_number = 0 |
| 48 | self.objects = { } |
| 49 | |
| 50 | def insert_xref_entry(self, object_number, generation_number): |
| 51 | self.objects[object_number] = (self.offset, generation_number) |
| 52 | self.max_object_number = max(self.max_object_number, object_number) |
| 53 | |
| 54 | def generate_xref_table(self): |
| 55 | result = self.XREF_REPLACEMENT % (0, self.max_object_number + 1) |
| 56 | for i in range(0, self.max_object_number + 1): |
| 57 | if i in self.objects: |
| 58 | result += self.XREF_REPLACEMENT_N % self.objects[i] |
| 59 | else: |
| 60 | result += self.XREF_REPLACEMENT_F |
| 61 | return result |
| 62 | |
| 63 | def process_line(self, line): |
| 64 | if self.HEADER_TOKEN in line: |
| 65 | line = line.replace(self.HEADER_TOKEN, self.HEADER_REPLACEMENT) |
| 66 | if self.XREF_TOKEN in line: |
| 67 | self.xref_offset = self.offset |
| 68 | line = self.generate_xref_table() |
Lei Zhang | bdb330e | 2017-06-26 12:23:51 -0700 | [diff] [blame^] | 69 | if self.TRAILER_TOKEN in line: |
| 70 | replacement = self.TRAILER_REPLACEMENT % (self.max_object_number + 1) |
| 71 | line = line.replace(self.TRAILER_TOKEN, replacement) |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 72 | if self.STARTXREF_TOKEN in line: |
| 73 | replacement = self.STARTXREF_REPLACEMENT % self.xref_offset |
| 74 | line = line.replace(self.STARTXREF_TOKEN, replacement) |
| 75 | match = re.match(self.OBJECT_PATTERN, line) |
| 76 | if match: |
| 77 | self.insert_xref_entry(int(match.group(1)), int(match.group(2))) |
| 78 | line = re.sub(self.OBJECT_PATTERN, self.OBJECT_REPLACEMENT, line) |
| 79 | self.offset += len(line) |
| 80 | return line |
| 81 | |
Tom Sepez | b7cb36a | 2015-02-13 16:54:48 -0800 | [diff] [blame] | 82 | |
| 83 | def expand_file(input_path, output_path): |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 84 | processor = TemplateProcessor() |
| 85 | try: |
Lei Zhang | 44aa03e | 2015-09-03 14:50:01 -0700 | [diff] [blame] | 86 | with open(input_path, 'rb') as infile: |
| 87 | with open(output_path, 'wb') as outfile: |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 88 | for line in infile: |
| 89 | outfile.write(processor.process_line(line)) |
| 90 | except IOError: |
Tom Sepez | b7cb36a | 2015-02-13 16:54:48 -0800 | [diff] [blame] | 91 | print >> sys.stderr, 'failed to process %s' % input_path |
| 92 | |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 93 | |
| 94 | def main(): |
Tom Sepez | b7cb36a | 2015-02-13 16:54:48 -0800 | [diff] [blame] | 95 | parser = optparse.OptionParser() |
| 96 | parser.add_option('--output-dir', default='') |
| 97 | options, args = parser.parse_args() |
| 98 | for testcase_path in args: |
| 99 | testcase_filename = os.path.basename(testcase_path) |
| 100 | testcase_root, _ = os.path.splitext(testcase_filename) |
| 101 | output_dir = os.path.dirname(testcase_path) |
| 102 | if options.output_dir: |
| 103 | output_dir = options.output_dir |
| 104 | output_path = os.path.join(output_dir, testcase_root + '.pdf') |
| 105 | expand_file(testcase_path, output_path) |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 106 | return 0 |
| 107 | |
Tom Sepez | b7cb36a | 2015-02-13 16:54:48 -0800 | [diff] [blame] | 108 | |
Tom Sepez | c8f6ab6 | 2015-01-22 11:20:06 -0800 | [diff] [blame] | 109 | if __name__ == '__main__': |
| 110 | sys.exit(main()) |