Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 1 | """ Python 'utf-16' Codec |
| 2 | |
| 3 | |
| 4 | Written by Marc-Andre Lemburg (mal@lemburg.com). |
| 5 | |
| 6 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. |
| 7 | |
| 8 | """ |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 9 | import codecs, sys |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 10 | |
| 11 | ### Codec APIs |
| 12 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 13 | encode = codecs.utf_16_encode |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 14 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 15 | def decode(input, errors='strict'): |
| 16 | return codecs.utf_16_decode(input, errors, True) |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 17 | |
Walter Dörwald | abb02e5 | 2006-03-15 11:35:15 +0000 | [diff] [blame] | 18 | class IncrementalEncoder(codecs.IncrementalEncoder): |
| 19 | def __init__(self, errors='strict'): |
| 20 | codecs.IncrementalEncoder.__init__(self, errors) |
| 21 | self.encoder = None |
| 22 | |
| 23 | def encode(self, input, final=False): |
| 24 | if self.encoder is None: |
| 25 | result = codecs.utf_16_encode(input, self.errors)[0] |
| 26 | if sys.byteorder == 'little': |
| 27 | self.encoder = codecs.utf_16_le_encode |
| 28 | else: |
| 29 | self.encoder = codecs.utf_16_be_encode |
| 30 | return result |
| 31 | return self.encoder(input, self.errors)[0] |
| 32 | |
| 33 | def reset(self): |
| 34 | codecs.IncrementalEncoder.reset(self) |
| 35 | self.encoder = None |
| 36 | |
| 37 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder): |
| 38 | def __init__(self, errors='strict'): |
| 39 | codecs.BufferedIncrementalDecoder.__init__(self, errors) |
| 40 | self.decoder = None |
| 41 | |
| 42 | def _buffer_decode(self, input, errors, final): |
| 43 | if self.decoder is None: |
| 44 | (output, consumed, byteorder) = \ |
| 45 | codecs.utf_16_ex_decode(input, errors, 0, final) |
| 46 | if byteorder == -1: |
| 47 | self.decoder = codecs.utf_16_le_decode |
| 48 | elif byteorder == 1: |
| 49 | self.decoder = codecs.utf_16_be_decode |
| 50 | elif consumed >= 2: |
| 51 | raise UnicodeError("UTF-16 stream does not start with BOM") |
| 52 | return (output, consumed) |
| 53 | return self.decoder(input, self.errors, final) |
| 54 | |
| 55 | def reset(self): |
| 56 | codecs.BufferedIncrementalDecoder.reset(self) |
| 57 | self.decoder = None |
| 58 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 59 | class StreamWriter(codecs.StreamWriter): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 60 | def __init__(self, stream, errors='strict'): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 61 | codecs.StreamWriter.__init__(self, stream, errors) |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 62 | self.encoder = None |
| 63 | |
| 64 | def reset(self): |
| 65 | codecs.StreamWriter.reset(self) |
| 66 | self.encoder = None |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 67 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 68 | def encode(self, input, errors='strict'): |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 69 | if self.encoder is None: |
Victor Stinner | 54b40ee | 2010-05-22 13:44:25 +0000 | [diff] [blame^] | 70 | result = codecs.utf_16_encode(input, errors) |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 71 | if sys.byteorder == 'little': |
Victor Stinner | 54b40ee | 2010-05-22 13:44:25 +0000 | [diff] [blame^] | 72 | self.encoder = codecs.utf_16_le_encode |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 73 | else: |
Victor Stinner | 54b40ee | 2010-05-22 13:44:25 +0000 | [diff] [blame^] | 74 | self.encoder = codecs.utf_16_be_encode |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 75 | return result |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 76 | else: |
Victor Stinner | 7df55da | 2010-05-22 13:37:56 +0000 | [diff] [blame] | 77 | return self.encoder(input, errors) |
Tim Peters | 469cdad | 2002-08-08 20:19:19 +0000 | [diff] [blame] | 78 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 79 | class StreamReader(codecs.StreamReader): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 80 | |
Walter Dörwald | 729c31f | 2005-03-14 19:06:30 +0000 | [diff] [blame] | 81 | def reset(self): |
| 82 | codecs.StreamReader.reset(self) |
| 83 | try: |
| 84 | del self.decode |
| 85 | except AttributeError: |
| 86 | pass |
| 87 | |
Walter Dörwald | 6965203 | 2004-09-07 20:24:22 +0000 | [diff] [blame] | 88 | def decode(self, input, errors='strict'): |
| 89 | (object, consumed, byteorder) = \ |
| 90 | codecs.utf_16_ex_decode(input, errors, 0, False) |
| 91 | if byteorder == -1: |
| 92 | self.decode = codecs.utf_16_le_decode |
| 93 | elif byteorder == 1: |
| 94 | self.decode = codecs.utf_16_be_decode |
| 95 | elif consumed>=2: |
| 96 | raise UnicodeError,"UTF-16 stream does not start with BOM" |
| 97 | return (object, consumed) |
Marc-André Lemburg | 3ccb09c | 2002-04-05 12:12:00 +0000 | [diff] [blame] | 98 | |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 99 | ### encodings module API |
| 100 | |
| 101 | def getregentry(): |
Walter Dörwald | abb02e5 | 2006-03-15 11:35:15 +0000 | [diff] [blame] | 102 | return codecs.CodecInfo( |
| 103 | name='utf-16', |
| 104 | encode=encode, |
| 105 | decode=decode, |
| 106 | incrementalencoder=IncrementalEncoder, |
| 107 | incrementaldecoder=IncrementalDecoder, |
| 108 | streamreader=StreamReader, |
| 109 | streamwriter=StreamWriter, |
| 110 | ) |