Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 1 | """ Python 'utf-16' Codec |
| 2 | |
| 3 | |
| 4 | Written by Marc-Andre Lemburg (mal@lemburg.com). |
| 5 | |
| 6 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. |
| 7 | |
| 8 | """ |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 9 | import codecs, sys |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 10 | |
| 11 | ### Codec APIs |
| 12 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 13 | encode = codecs.utf_16_encode |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 14 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 15 | def decode(input, errors='strict'): |
| 16 | return codecs.utf_16_decode(input, errors, True) |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 17 | |
Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 18 | class IncrementalEncoder(codecs.IncrementalEncoder): |
| 19 | def __init__(self, errors='strict'): |
| 20 | codecs.IncrementalEncoder.__init__(self, errors) |
| 21 | self.encoder = None |
| 22 | |
| 23 | def encode(self, input, final=False): |
| 24 | if self.encoder is None: |
| 25 | result = codecs.utf_16_encode(input, self.errors)[0] |
| 26 | if sys.byteorder == 'little': |
| 27 | self.encoder = codecs.utf_16_le_encode |
| 28 | else: |
| 29 | self.encoder = codecs.utf_16_be_encode |
| 30 | return result |
| 31 | return self.encoder(input, self.errors)[0] |
| 32 | |
| 33 | def reset(self): |
| 34 | codecs.IncrementalEncoder.reset(self) |
| 35 | self.encoder = None |
| 36 | |
Walter Dörwald | 3abcb01 | 2007-04-16 22:10:50 +0000 | [diff] [blame] | 37 | def getstate(self): |
| 38 | # state info we return to the caller: |
| 39 | # 0: stream is in natural order for this platform |
| 40 | # 2: endianness hasn't been determined yet |
| 41 | # (we're never writing in unnatural order) |
| 42 | return (2 if self.encoder is None else 0) |
| 43 | |
| 44 | def setstate(self, state): |
| 45 | if state: |
| 46 | self.encoder = None |
| 47 | else: |
| 48 | if sys.byteorder == 'little': |
| 49 | self.encoder = codecs.utf_16_le_encode |
| 50 | else: |
| 51 | self.encoder = codecs.utf_16_be_encode |
| 52 | |
Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 53 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder): |
| 54 | def __init__(self, errors='strict'): |
| 55 | codecs.BufferedIncrementalDecoder.__init__(self, errors) |
| 56 | self.decoder = None |
| 57 | |
| 58 | def _buffer_decode(self, input, errors, final): |
| 59 | if self.decoder is None: |
| 60 | (output, consumed, byteorder) = \ |
| 61 | codecs.utf_16_ex_decode(input, errors, 0, final) |
| 62 | if byteorder == -1: |
| 63 | self.decoder = codecs.utf_16_le_decode |
| 64 | elif byteorder == 1: |
| 65 | self.decoder = codecs.utf_16_be_decode |
| 66 | elif consumed >= 2: |
| 67 | raise UnicodeError("UTF-16 stream does not start with BOM") |
| 68 | return (output, consumed) |
| 69 | return self.decoder(input, self.errors, final) |
| 70 | |
| 71 | def reset(self): |
| 72 | codecs.BufferedIncrementalDecoder.reset(self) |
| 73 | self.decoder = None |
| 74 | |
Walter Dörwald | 3abcb01 | 2007-04-16 22:10:50 +0000 | [diff] [blame] | 75 | def getstate(self): |
| 76 | # additonal state info from the base class must be None here, |
| 77 | # as it isn't passed along to the caller |
| 78 | state = codecs.BufferedIncrementalDecoder.getstate(self)[0] |
| 79 | # additional state info we pass to the caller: |
| 80 | # 0: stream is in natural order for this platform |
| 81 | # 1: stream is in unnatural order |
| 82 | # 2: endianness hasn't been determined yet |
| 83 | if self.decoder is None: |
| 84 | return (state, 2) |
| 85 | addstate = int((sys.byteorder == "big") != |
| 86 | (self.decoder is codecs.utf_16_be_decode)) |
| 87 | return (state, addstate) |
| 88 | |
| 89 | def setstate(self, state): |
| 90 | # state[1] will be ignored by BufferedIncrementalDecoder.setstate() |
| 91 | codecs.BufferedIncrementalDecoder.setstate(self, state) |
| 92 | state = state[1] |
| 93 | if state == 0: |
| 94 | self.decoder = (codecs.utf_16_be_decode |
| 95 | if sys.byteorder == "big" |
| 96 | else codecs.utf_16_le_decode) |
| 97 | elif state == 1: |
| 98 | self.decoder = (codecs.utf_16_le_decode |
| 99 | if sys.byteorder == "big" |
| 100 | else codecs.utf_16_be_decode) |
| 101 | else: |
| 102 | self.decoder = None |
| 103 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 104 | class StreamWriter(codecs.StreamWriter): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 105 | def __init__(self, stream, errors='strict'): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 106 | codecs.StreamWriter.__init__(self, stream, errors) |
Victor Stinner | b64d0eb | 2010-05-22 17:01:13 +0000 | [diff] [blame^] | 107 | self.encoder = None |
| 108 | |
| 109 | def reset(self): |
| 110 | codecs.StreamWriter.reset(self) |
| 111 | self.encoder = None |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 112 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 113 | def encode(self, input, errors='strict'): |
Victor Stinner | b64d0eb | 2010-05-22 17:01:13 +0000 | [diff] [blame^] | 114 | if self.encoder is None: |
| 115 | result = codecs.utf_16_encode(input, errors) |
| 116 | if sys.byteorder == 'little': |
| 117 | self.encoder = codecs.utf_16_le_encode |
| 118 | else: |
| 119 | self.encoder = codecs.utf_16_be_encode |
| 120 | return result |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 121 | else: |
Victor Stinner | b64d0eb | 2010-05-22 17:01:13 +0000 | [diff] [blame^] | 122 | return self.encoder(input, errors) |
Tim Peters | 469cdad | 2002-08-08 20:19:19 +0000 | [diff] [blame] | 123 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 124 | class StreamReader(codecs.StreamReader): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 125 | |
Walter Dörwald | 729c31f | 2005-03-14 19:06:30 +0000 | [diff] [blame] | 126 | def reset(self): |
| 127 | codecs.StreamReader.reset(self) |
| 128 | try: |
| 129 | del self.decode |
| 130 | except AttributeError: |
| 131 | pass |
| 132 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 133 | def decode(self, input, errors='strict'): |
| 134 | (object, consumed, byteorder) = \ |
| 135 | codecs.utf_16_ex_decode(input, errors, 0, False) |
| 136 | if byteorder == -1: |
| 137 | self.decode = codecs.utf_16_le_decode |
| 138 | elif byteorder == 1: |
| 139 | self.decode = codecs.utf_16_be_decode |
| 140 | elif consumed>=2: |
Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 141 | raise UnicodeError("UTF-16 stream does not start with BOM") |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 142 | return (object, consumed) |
Marc-André Lemburg | 3ccb09c | 2002-04-05 12:12:00 +0000 | [diff] [blame] | 143 | |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 144 | ### encodings module API |
| 145 | |
| 146 | def getregentry(): |
Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 147 | return codecs.CodecInfo( |
| 148 | name='utf-16', |
| 149 | encode=encode, |
| 150 | decode=decode, |
| 151 | incrementalencoder=IncrementalEncoder, |
| 152 | incrementaldecoder=IncrementalDecoder, |
| 153 | streamreader=StreamReader, |
| 154 | streamwriter=StreamWriter, |
| 155 | ) |