| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 1 | """ | 
 | 2 | Python 'utf-32' Codec | 
 | 3 | """ | 
 | 4 | import codecs, sys | 
 | 5 |  | 
 | 6 | ### Codec APIs | 
 | 7 |  | 
 | 8 | encode = codecs.utf_32_encode | 
 | 9 |  | 
 | 10 | def decode(input, errors='strict'): | 
 | 11 |     return codecs.utf_32_decode(input, errors, True) | 
 | 12 |  | 
 | 13 | class IncrementalEncoder(codecs.IncrementalEncoder): | 
 | 14 |     def __init__(self, errors='strict'): | 
 | 15 |         codecs.IncrementalEncoder.__init__(self, errors) | 
 | 16 |         self.encoder = None | 
 | 17 |  | 
 | 18 |     def encode(self, input, final=False): | 
 | 19 |         if self.encoder is None: | 
 | 20 |             result = codecs.utf_32_encode(input, self.errors)[0] | 
 | 21 |             if sys.byteorder == 'little': | 
 | 22 |                 self.encoder = codecs.utf_32_le_encode | 
 | 23 |             else: | 
 | 24 |                 self.encoder = codecs.utf_32_be_encode | 
 | 25 |             return result | 
 | 26 |         return self.encoder(input, self.errors)[0] | 
 | 27 |  | 
 | 28 |     def reset(self): | 
 | 29 |         codecs.IncrementalEncoder.reset(self) | 
 | 30 |         self.encoder = None | 
 | 31 |  | 
 | 32 |     def getstate(self): | 
 | 33 |         # state info we return to the caller: | 
 | 34 |         # 0: stream is in natural order for this platform | 
 | 35 |         # 2: endianness hasn't been determined yet | 
 | 36 |         # (we're never writing in unnatural order) | 
 | 37 |         return (2 if self.encoder is None else 0) | 
 | 38 |  | 
 | 39 |     def setstate(self, state): | 
 | 40 |         if state: | 
 | 41 |             self.encoder = None | 
 | 42 |         else: | 
 | 43 |             if sys.byteorder == 'little': | 
 | 44 |                 self.encoder = codecs.utf_32_le_encode | 
 | 45 |             else: | 
 | 46 |                 self.encoder = codecs.utf_32_be_encode | 
 | 47 |  | 
 | 48 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder): | 
 | 49 |     def __init__(self, errors='strict'): | 
 | 50 |         codecs.BufferedIncrementalDecoder.__init__(self, errors) | 
 | 51 |         self.decoder = None | 
 | 52 |  | 
 | 53 |     def _buffer_decode(self, input, errors, final): | 
 | 54 |         if self.decoder is None: | 
 | 55 |             (output, consumed, byteorder) = \ | 
 | 56 |                 codecs.utf_32_ex_decode(input, errors, 0, final) | 
 | 57 |             if byteorder == -1: | 
 | 58 |                 self.decoder = codecs.utf_32_le_decode | 
 | 59 |             elif byteorder == 1: | 
 | 60 |                 self.decoder = codecs.utf_32_be_decode | 
 | 61 |             elif consumed >= 4: | 
 | 62 |                 raise UnicodeError("UTF-32 stream does not start with BOM") | 
 | 63 |             return (output, consumed) | 
 | 64 |         return self.decoder(input, self.errors, final) | 
 | 65 |  | 
 | 66 |     def reset(self): | 
 | 67 |         codecs.BufferedIncrementalDecoder.reset(self) | 
 | 68 |         self.decoder = None | 
 | 69 |  | 
 | 70 |     def getstate(self): | 
 | 71 |         # additonal state info from the base class must be None here, | 
 | 72 |         # as it isn't passed along to the caller | 
 | 73 |         state = codecs.BufferedIncrementalDecoder.getstate(self)[0] | 
 | 74 |         # additional state info we pass to the caller: | 
 | 75 |         # 0: stream is in natural order for this platform | 
 | 76 |         # 1: stream is in unnatural order | 
 | 77 |         # 2: endianness hasn't been determined yet | 
 | 78 |         if self.decoder is None: | 
 | 79 |             return (state, 2) | 
 | 80 |         addstate = int((sys.byteorder == "big") != | 
 | 81 |                        (self.decoder is codecs.utf_32_be_decode)) | 
 | 82 |         return (state, addstate) | 
 | 83 |  | 
 | 84 |     def setstate(self, state): | 
 | 85 |         # state[1] will be ignored by BufferedIncrementalDecoder.setstate() | 
 | 86 |         codecs.BufferedIncrementalDecoder.setstate(self, state) | 
 | 87 |         state = state[1] | 
 | 88 |         if state == 0: | 
 | 89 |             self.decoder = (codecs.utf_32_be_decode | 
 | 90 |                             if sys.byteorder == "big" | 
 | 91 |                             else codecs.utf_32_le_decode) | 
 | 92 |         elif state == 1: | 
 | 93 |             self.decoder = (codecs.utf_32_le_decode | 
 | 94 |                             if sys.byteorder == "big" | 
 | 95 |                             else codecs.utf_32_be_decode) | 
 | 96 |         else: | 
 | 97 |             self.decoder = None | 
 | 98 |  | 
 | 99 | class StreamWriter(codecs.StreamWriter): | 
 | 100 |     def __init__(self, stream, errors='strict'): | 
| Victor Stinner | a92ad7e | 2010-05-22 16:59:09 +0000 | [diff] [blame] | 101 |         self.encoder = None | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 102 |         codecs.StreamWriter.__init__(self, stream, errors) | 
 | 103 |  | 
| Victor Stinner | a92ad7e | 2010-05-22 16:59:09 +0000 | [diff] [blame] | 104 |     def reset(self): | 
 | 105 |         codecs.StreamWriter.reset(self) | 
 | 106 |         self.encoder = None | 
 | 107 |  | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 108 |     def encode(self, input, errors='strict'): | 
| Victor Stinner | a92ad7e | 2010-05-22 16:59:09 +0000 | [diff] [blame] | 109 |         if self.encoder is None: | 
 | 110 |             result = codecs.utf_32_encode(input, errors) | 
 | 111 |             if sys.byteorder == 'little': | 
 | 112 |                 self.encoder = codecs.utf_32_le_encode | 
 | 113 |             else: | 
 | 114 |                 self.encoder = codecs.utf_32_be_encode | 
 | 115 |             return result | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 116 |         else: | 
| Victor Stinner | a92ad7e | 2010-05-22 16:59:09 +0000 | [diff] [blame] | 117 |             return self.encoder(input, errors) | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 118 |  | 
 | 119 | class StreamReader(codecs.StreamReader): | 
 | 120 |  | 
 | 121 |     def reset(self): | 
 | 122 |         codecs.StreamReader.reset(self) | 
 | 123 |         try: | 
 | 124 |             del self.decode | 
 | 125 |         except AttributeError: | 
 | 126 |             pass | 
 | 127 |  | 
 | 128 |     def decode(self, input, errors='strict'): | 
 | 129 |         (object, consumed, byteorder) = \ | 
 | 130 |             codecs.utf_32_ex_decode(input, errors, 0, False) | 
 | 131 |         if byteorder == -1: | 
 | 132 |             self.decode = codecs.utf_32_le_decode | 
 | 133 |         elif byteorder == 1: | 
| Walter Dörwald | 19e6238 | 2007-08-17 16:23:21 +0000 | [diff] [blame] | 134 |             self.decode = codecs.utf_32_be_decode | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 135 |         elif consumed>=4: | 
| Collin Winter | 4902e69 | 2007-08-30 18:18:27 +0000 | [diff] [blame] | 136 |             raise UnicodeError("UTF-32 stream does not start with BOM") | 
| Walter Dörwald | 41980ca | 2007-08-16 21:55:45 +0000 | [diff] [blame] | 137 |         return (object, consumed) | 
 | 138 |  | 
 | 139 | ### encodings module API | 
 | 140 |  | 
 | 141 | def getregentry(): | 
 | 142 |     return codecs.CodecInfo( | 
 | 143 |         name='utf-32', | 
 | 144 |         encode=encode, | 
 | 145 |         decode=decode, | 
 | 146 |         incrementalencoder=IncrementalEncoder, | 
 | 147 |         incrementaldecoder=IncrementalDecoder, | 
 | 148 |         streamreader=StreamReader, | 
 | 149 |         streamwriter=StreamWriter, | 
 | 150 |     ) |