Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 1 | """ Python 'utf-16' Codec |
| 2 | |
| 3 | |
| 4 | Written by Marc-Andre Lemburg (mal@lemburg.com). |
| 5 | |
| 6 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. |
| 7 | |
| 8 | """ |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 9 | import codecs, sys |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 10 | |
| 11 | ### Codec APIs |
| 12 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 13 | encode = codecs.utf_16_encode |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 14 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 15 | def decode(input, errors='strict'): |
| 16 | return codecs.utf_16_decode(input, errors, True) |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 17 | |
Walter Dörwald | abb02e5 | 2006-03-15 11:35:15 +0000 | [diff] [blame] | 18 | class IncrementalEncoder(codecs.IncrementalEncoder): |
| 19 | def __init__(self, errors='strict'): |
| 20 | codecs.IncrementalEncoder.__init__(self, errors) |
| 21 | self.encoder = None |
| 22 | |
| 23 | def encode(self, input, final=False): |
| 24 | if self.encoder is None: |
| 25 | result = codecs.utf_16_encode(input, self.errors)[0] |
| 26 | if sys.byteorder == 'little': |
| 27 | self.encoder = codecs.utf_16_le_encode |
| 28 | else: |
| 29 | self.encoder = codecs.utf_16_be_encode |
| 30 | return result |
| 31 | return self.encoder(input, self.errors)[0] |
| 32 | |
| 33 | def reset(self): |
| 34 | codecs.IncrementalEncoder.reset(self) |
| 35 | self.encoder = None |
| 36 | |
Victor Stinner | 73363e8 | 2010-07-28 01:39:45 +0000 | [diff] [blame] | 37 | def getstate(self): |
| 38 | # state info we return to the caller: |
| 39 | # 0: stream is in natural order for this platform |
| 40 | # 2: endianness hasn't been determined yet |
| 41 | # (we're never writing in unnatural order) |
| 42 | return (2 if self.encoder is None else 0) |
| 43 | |
| 44 | def setstate(self, state): |
| 45 | if state: |
| 46 | self.encoder = None |
| 47 | else: |
| 48 | if sys.byteorder == 'little': |
| 49 | self.encoder = codecs.utf_16_le_encode |
| 50 | else: |
| 51 | self.encoder = codecs.utf_16_be_encode |
| 52 | |
Walter Dörwald | abb02e5 | 2006-03-15 11:35:15 +0000 | [diff] [blame] | 53 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder): |
| 54 | def __init__(self, errors='strict'): |
| 55 | codecs.BufferedIncrementalDecoder.__init__(self, errors) |
| 56 | self.decoder = None |
| 57 | |
| 58 | def _buffer_decode(self, input, errors, final): |
| 59 | if self.decoder is None: |
| 60 | (output, consumed, byteorder) = \ |
| 61 | codecs.utf_16_ex_decode(input, errors, 0, final) |
| 62 | if byteorder == -1: |
| 63 | self.decoder = codecs.utf_16_le_decode |
| 64 | elif byteorder == 1: |
| 65 | self.decoder = codecs.utf_16_be_decode |
| 66 | elif consumed >= 2: |
| 67 | raise UnicodeError("UTF-16 stream does not start with BOM") |
| 68 | return (output, consumed) |
| 69 | return self.decoder(input, self.errors, final) |
| 70 | |
| 71 | def reset(self): |
| 72 | codecs.BufferedIncrementalDecoder.reset(self) |
| 73 | self.decoder = None |
| 74 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 75 | class StreamWriter(codecs.StreamWriter): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 76 | def __init__(self, stream, errors='strict'): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 77 | codecs.StreamWriter.__init__(self, stream, errors) |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 78 | self.encoder = None |
| 79 | |
| 80 | def reset(self): |
| 81 | codecs.StreamWriter.reset(self) |
| 82 | self.encoder = None |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 83 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 84 | def encode(self, input, errors='strict'): |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 85 | if self.encoder is None: |
Victor Stinner | 54b40ee | 2010-05-22 13:44:25 +0000 | [diff] [blame] | 86 | result = codecs.utf_16_encode(input, errors) |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 87 | if sys.byteorder == 'little': |
Victor Stinner | 54b40ee | 2010-05-22 13:44:25 +0000 | [diff] [blame] | 88 | self.encoder = codecs.utf_16_le_encode |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 89 | else: |
Victor Stinner | 54b40ee | 2010-05-22 13:44:25 +0000 | [diff] [blame] | 90 | self.encoder = codecs.utf_16_be_encode |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 91 | return result |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 92 | else: |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 93 | return self.encoder(input, errors) |
Tim Peters | 469cdad | 2002-08-08 20:19:19 +0000 | [diff] [blame] | 94 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 95 | class StreamReader(codecs.StreamReader): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 96 | |
Walter Dörwald | 729c31f | 2005-03-14 19:06:30 +0000 | [diff] [blame] | 97 | def reset(self): |
| 98 | codecs.StreamReader.reset(self) |
| 99 | try: |
| 100 | del self.decode |
| 101 | except AttributeError: |
| 102 | pass |
| 103 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 104 | def decode(self, input, errors='strict'): |
| 105 | (object, consumed, byteorder) = \ |
| 106 | codecs.utf_16_ex_decode(input, errors, 0, False) |
| 107 | if byteorder == -1: |
| 108 | self.decode = codecs.utf_16_le_decode |
| 109 | elif byteorder == 1: |
| 110 | self.decode = codecs.utf_16_be_decode |
| 111 | elif consumed>=2: |
| 112 | raise UnicodeError,"UTF-16 stream does not start with BOM" |
| 113 | return (object, consumed) |
Marc-André Lemburg | 3ccb09c | 2002-04-05 12:12:00 +0000 | [diff] [blame] | 114 | |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 115 | ### encodings module API |
| 116 | |
| 117 | def getregentry(): |
Walter Dörwald | abb02e5 | 2006-03-15 11:35:15 +0000 | [diff] [blame] | 118 | return codecs.CodecInfo( |
| 119 | name='utf-16', |
| 120 | encode=encode, |
| 121 | decode=decode, |
| 122 | incrementalencoder=IncrementalEncoder, |
| 123 | incrementaldecoder=IncrementalDecoder, |
| 124 | streamreader=StreamReader, |
| 125 | streamwriter=StreamWriter, |
| 126 | ) |