Benjamin Peterson | 90f5ba5 | 2010-03-11 22:53:45 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 2 | |
| 3 | """ Compare the output of two codecs. |
| 4 | |
| 5 | (c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com). |
| 6 | |
| 7 | Licensed to PSF under a Contributor Agreement. |
| 8 | |
| 9 | """ |
| 10 | import sys |
| 11 | |
| 12 | def compare_codecs(encoding1, encoding2): |
| 13 | |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 14 | print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2)) |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 15 | mismatch = 0 |
| 16 | # Check encoding |
| 17 | for i in range(sys.maxunicode): |
Georg Brandl | bf82e37 | 2008-05-16 17:02:34 +0000 | [diff] [blame] | 18 | u = chr(i) |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 19 | try: |
| 20 | c1 = u.encode(encoding1) |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 21 | except UnicodeError as reason: |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 22 | c1 = '<undefined>' |
| 23 | try: |
| 24 | c2 = u.encode(encoding2) |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 25 | except UnicodeError as reason: |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 26 | c2 = '<undefined>' |
| 27 | if c1 != c2: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 28 | print(' * encoding mismatch for 0x%04X: %-14r != %r' % \ |
| 29 | (i, c1, c2)) |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 30 | mismatch += 1 |
| 31 | # Check decoding |
| 32 | for i in range(256): |
| 33 | c = chr(i) |
| 34 | try: |
| 35 | u1 = c.decode(encoding1) |
| 36 | except UnicodeError: |
Georg Brandl | bf82e37 | 2008-05-16 17:02:34 +0000 | [diff] [blame] | 37 | u1 = '<undefined>' |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 38 | try: |
| 39 | u2 = c.decode(encoding2) |
| 40 | except UnicodeError: |
Georg Brandl | bf82e37 | 2008-05-16 17:02:34 +0000 | [diff] [blame] | 41 | u2 = '<undefined>' |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 42 | if u1 != u2: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 43 | print(' * decoding mismatch for 0x%04X: %-14r != %r' % \ |
| 44 | (i, u1, u2)) |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 45 | mismatch += 1 |
| 46 | if mismatch: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 47 | print() |
| 48 | print('Found %i mismatches' % mismatch) |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 49 | else: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 50 | print('-> Codecs are identical.') |
Marc-André Lemburg | 92b201d | 2005-10-21 13:47:03 +0000 | [diff] [blame] | 51 | |
| 52 | if __name__ == '__main__': |
| 53 | compare_codecs(sys.argv[1], sys.argv[2]) |