| Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 1 | """ Python 'utf-16' Codec | 
 | 2 |  | 
 | 3 |  | 
 | 4 | Written by Marc-Andre Lemburg (mal@lemburg.com). | 
 | 5 |  | 
 | 6 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | 
 | 7 |  | 
 | 8 | """ | 
| Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 9 | import codecs, sys | 
| Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 10 |  | 
 | 11 | ### Codec APIs | 
 | 12 |  | 
| Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 13 | encode = codecs.utf_16_encode | 
| Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 14 |  | 
| Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 15 | def decode(input, errors='strict'): | 
 | 16 |     return codecs.utf_16_decode(input, errors, True) | 
| Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 17 |  | 
| Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 18 | class IncrementalEncoder(codecs.IncrementalEncoder): | 
 | 19 |     def __init__(self, errors='strict'): | 
 | 20 |         codecs.IncrementalEncoder.__init__(self, errors) | 
 | 21 |         self.encoder = None | 
 | 22 |  | 
 | 23 |     def encode(self, input, final=False): | 
 | 24 |         if self.encoder is None: | 
 | 25 |             result = codecs.utf_16_encode(input, self.errors)[0] | 
 | 26 |             if sys.byteorder == 'little': | 
 | 27 |                 self.encoder = codecs.utf_16_le_encode | 
 | 28 |             else: | 
 | 29 |                 self.encoder = codecs.utf_16_be_encode | 
 | 30 |             return result | 
 | 31 |         return self.encoder(input, self.errors)[0] | 
 | 32 |  | 
 | 33 |     def reset(self): | 
 | 34 |         codecs.IncrementalEncoder.reset(self) | 
 | 35 |         self.encoder = None | 
 | 36 |  | 
| Walter Dörwald | 3abcb01 | 2007-04-16 22:10:50 +0000 | [diff] [blame] | 37 |     def getstate(self): | 
 | 38 |         # state info we return to the caller: | 
 | 39 |         # 0: stream is in natural order for this platform | 
 | 40 |         # 2: endianness hasn't been determined yet | 
 | 41 |         # (we're never writing in unnatural order) | 
 | 42 |         return (2 if self.encoder is None else 0) | 
 | 43 |  | 
 | 44 |     def setstate(self, state): | 
 | 45 |         if state: | 
 | 46 |             self.encoder = None | 
 | 47 |         else: | 
 | 48 |             if sys.byteorder == 'little': | 
 | 49 |                 self.encoder = codecs.utf_16_le_encode | 
 | 50 |             else: | 
 | 51 |                 self.encoder = codecs.utf_16_be_encode | 
 | 52 |  | 
| Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 53 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder): | 
 | 54 |     def __init__(self, errors='strict'): | 
 | 55 |         codecs.BufferedIncrementalDecoder.__init__(self, errors) | 
 | 56 |         self.decoder = None | 
 | 57 |  | 
 | 58 |     def _buffer_decode(self, input, errors, final): | 
 | 59 |         if self.decoder is None: | 
 | 60 |             (output, consumed, byteorder) = \ | 
 | 61 |                 codecs.utf_16_ex_decode(input, errors, 0, final) | 
 | 62 |             if byteorder == -1: | 
 | 63 |                 self.decoder = codecs.utf_16_le_decode | 
 | 64 |             elif byteorder == 1: | 
 | 65 |                 self.decoder = codecs.utf_16_be_decode | 
 | 66 |             elif consumed >= 2: | 
 | 67 |                 raise UnicodeError("UTF-16 stream does not start with BOM") | 
 | 68 |             return (output, consumed) | 
 | 69 |         return self.decoder(input, self.errors, final) | 
 | 70 |  | 
 | 71 |     def reset(self): | 
 | 72 |         codecs.BufferedIncrementalDecoder.reset(self) | 
 | 73 |         self.decoder = None | 
 | 74 |  | 
| Walter Dörwald | 3abcb01 | 2007-04-16 22:10:50 +0000 | [diff] [blame] | 75 |     def getstate(self): | 
 | 76 |         # additonal state info from the base class must be None here, | 
 | 77 |         # as it isn't passed along to the caller | 
 | 78 |         state = codecs.BufferedIncrementalDecoder.getstate(self)[0] | 
 | 79 |         # additional state info we pass to the caller: | 
 | 80 |         # 0: stream is in natural order for this platform | 
 | 81 |         # 1: stream is in unnatural order | 
 | 82 |         # 2: endianness hasn't been determined yet | 
 | 83 |         if self.decoder is None: | 
 | 84 |             return (state, 2) | 
 | 85 |         addstate = int((sys.byteorder == "big") != | 
 | 86 |                        (self.decoder is codecs.utf_16_be_decode)) | 
 | 87 |         return (state, addstate) | 
 | 88 |  | 
 | 89 |     def setstate(self, state): | 
 | 90 |         # state[1] will be ignored by BufferedIncrementalDecoder.setstate() | 
 | 91 |         codecs.BufferedIncrementalDecoder.setstate(self, state) | 
 | 92 |         state = state[1] | 
 | 93 |         if state == 0: | 
 | 94 |             self.decoder = (codecs.utf_16_be_decode | 
 | 95 |                             if sys.byteorder == "big" | 
 | 96 |                             else codecs.utf_16_le_decode) | 
 | 97 |         elif state == 1: | 
 | 98 |             self.decoder = (codecs.utf_16_le_decode | 
 | 99 |                             if sys.byteorder == "big" | 
 | 100 |                             else codecs.utf_16_be_decode) | 
 | 101 |         else: | 
 | 102 |             self.decoder = None | 
 | 103 |  | 
| Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 104 | class StreamWriter(codecs.StreamWriter): | 
| Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 105 |     def __init__(self, stream, errors='strict'): | 
| Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 106 |         codecs.StreamWriter.__init__(self, stream, errors) | 
| Victor Stinner | a92ad7e | 2010-05-22 16:59:09 +0000 | [diff] [blame] | 107 |         self.encoder = None | 
 | 108 |  | 
 | 109 |     def reset(self): | 
 | 110 |         codecs.StreamWriter.reset(self) | 
 | 111 |         self.encoder = None | 
| Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 112 |  | 
| Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 113 |     def encode(self, input, errors='strict'): | 
| Victor Stinner | a92ad7e | 2010-05-22 16:59:09 +0000 | [diff] [blame] | 114 |         if self.encoder is None: | 
 | 115 |             result = codecs.utf_16_encode(input, errors) | 
 | 116 |             if sys.byteorder == 'little': | 
 | 117 |                 self.encoder = codecs.utf_16_le_encode | 
 | 118 |             else: | 
 | 119 |                 self.encoder = codecs.utf_16_be_encode | 
 | 120 |             return result | 
| Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 121 |         else: | 
| Victor Stinner | a92ad7e | 2010-05-22 16:59:09 +0000 | [diff] [blame] | 122 |             return self.encoder(input, errors) | 
| Tim Peters | 469cdad | 2002-08-08 20:19:19 +0000 | [diff] [blame] | 123 |  | 
| Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 124 | class StreamReader(codecs.StreamReader): | 
| Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 125 |  | 
| Walter Dörwald | 729c31f | 2005-03-14 19:06:30 +0000 | [diff] [blame] | 126 |     def reset(self): | 
 | 127 |         codecs.StreamReader.reset(self) | 
 | 128 |         try: | 
 | 129 |             del self.decode | 
 | 130 |         except AttributeError: | 
 | 131 |             pass | 
 | 132 |  | 
| Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 133 |     def decode(self, input, errors='strict'): | 
 | 134 |         (object, consumed, byteorder) = \ | 
 | 135 |             codecs.utf_16_ex_decode(input, errors, 0, False) | 
 | 136 |         if byteorder == -1: | 
 | 137 |             self.decode = codecs.utf_16_le_decode | 
 | 138 |         elif byteorder == 1: | 
 | 139 |             self.decode = codecs.utf_16_be_decode | 
 | 140 |         elif consumed>=2: | 
| Collin Winter | ce36ad8 | 2007-08-30 01:19:48 +0000 | [diff] [blame] | 141 |             raise UnicodeError("UTF-16 stream does not start with BOM") | 
| Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 142 |         return (object, consumed) | 
| Marc-André Lemburg | 3ccb09c | 2002-04-05 12:12:00 +0000 | [diff] [blame] | 143 |  | 
| Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 144 | ### encodings module API | 
 | 145 |  | 
 | 146 | def getregentry(): | 
| Thomas Wouters | a977329 | 2006-04-21 09:43:23 +0000 | [diff] [blame] | 147 |     return codecs.CodecInfo( | 
 | 148 |         name='utf-16', | 
 | 149 |         encode=encode, | 
 | 150 |         decode=decode, | 
 | 151 |         incrementalencoder=IncrementalEncoder, | 
 | 152 |         incrementaldecoder=IncrementalDecoder, | 
 | 153 |         streamreader=StreamReader, | 
 | 154 |         streamwriter=StreamWriter, | 
 | 155 |     ) |