blob: 0f5c1e24a3a26345a48b0ad0622201936539919a [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Marc-André Lemburg92b201d2005-10-21 13:47:03 +00002
3""" Compare the output of two codecs.
4
5(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com).
6
7 Licensed to PSF under a Contributor Agreement.
8
9"""
10import sys
11
12def compare_codecs(encoding1, encoding2):
13
Collin Winter6afaeb72007-08-03 17:06:41 +000014 print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2))
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000015 mismatch = 0
16 # Check encoding
17 for i in range(sys.maxunicode):
Georg Brandlbf82e372008-05-16 17:02:34 +000018 u = chr(i)
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000019 try:
20 c1 = u.encode(encoding1)
Guido van Rossumb940e112007-01-10 16:19:56 +000021 except UnicodeError as reason:
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000022 c1 = '<undefined>'
23 try:
24 c2 = u.encode(encoding2)
Guido van Rossumb940e112007-01-10 16:19:56 +000025 except UnicodeError as reason:
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000026 c2 = '<undefined>'
27 if c1 != c2:
Collin Winter6afaeb72007-08-03 17:06:41 +000028 print(' * encoding mismatch for 0x%04X: %-14r != %r' % \
29 (i, c1, c2))
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000030 mismatch += 1
31 # Check decoding
32 for i in range(256):
33 c = chr(i)
34 try:
35 u1 = c.decode(encoding1)
36 except UnicodeError:
Georg Brandlbf82e372008-05-16 17:02:34 +000037 u1 = '<undefined>'
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000038 try:
39 u2 = c.decode(encoding2)
40 except UnicodeError:
Georg Brandlbf82e372008-05-16 17:02:34 +000041 u2 = '<undefined>'
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000042 if u1 != u2:
Collin Winter6afaeb72007-08-03 17:06:41 +000043 print(' * decoding mismatch for 0x%04X: %-14r != %r' % \
44 (i, u1, u2))
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000045 mismatch += 1
46 if mismatch:
Collin Winter6afaeb72007-08-03 17:06:41 +000047 print()
48 print('Found %i mismatches' % mismatch)
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000049 else:
Collin Winter6afaeb72007-08-03 17:06:41 +000050 print('-> Codecs are identical.')
Marc-André Lemburg92b201d2005-10-21 13:47:03 +000051
52if __name__ == '__main__':
53 compare_codecs(sys.argv[1], sys.argv[2])