blob: f0b7709a23c7efa7b4ba179b52618aa2fbff8b87 [file] [log] [blame]
Walter Dörwald41980ca2007-08-16 21:55:45 +00001"""
2Python 'utf-32' Codec
3"""
4import codecs, sys
5
6### Codec APIs
7
8encode = codecs.utf_32_encode
9
10def decode(input, errors='strict'):
11 return codecs.utf_32_decode(input, errors, True)
12
13class IncrementalEncoder(codecs.IncrementalEncoder):
14 def __init__(self, errors='strict'):
15 codecs.IncrementalEncoder.__init__(self, errors)
16 self.encoder = None
17
18 def encode(self, input, final=False):
19 if self.encoder is None:
20 result = codecs.utf_32_encode(input, self.errors)[0]
21 if sys.byteorder == 'little':
22 self.encoder = codecs.utf_32_le_encode
23 else:
24 self.encoder = codecs.utf_32_be_encode
25 return result
26 return self.encoder(input, self.errors)[0]
27
28 def reset(self):
29 codecs.IncrementalEncoder.reset(self)
30 self.encoder = None
31
32 def getstate(self):
33 # state info we return to the caller:
34 # 0: stream is in natural order for this platform
35 # 2: endianness hasn't been determined yet
36 # (we're never writing in unnatural order)
37 return (2 if self.encoder is None else 0)
38
39 def setstate(self, state):
40 if state:
41 self.encoder = None
42 else:
43 if sys.byteorder == 'little':
44 self.encoder = codecs.utf_32_le_encode
45 else:
46 self.encoder = codecs.utf_32_be_encode
47
48class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
49 def __init__(self, errors='strict'):
50 codecs.BufferedIncrementalDecoder.__init__(self, errors)
51 self.decoder = None
52
53 def _buffer_decode(self, input, errors, final):
54 if self.decoder is None:
55 (output, consumed, byteorder) = \
56 codecs.utf_32_ex_decode(input, errors, 0, final)
57 if byteorder == -1:
58 self.decoder = codecs.utf_32_le_decode
59 elif byteorder == 1:
60 self.decoder = codecs.utf_32_be_decode
61 elif consumed >= 4:
62 raise UnicodeError("UTF-32 stream does not start with BOM")
63 return (output, consumed)
64 return self.decoder(input, self.errors, final)
65
66 def reset(self):
67 codecs.BufferedIncrementalDecoder.reset(self)
68 self.decoder = None
69
70 def getstate(self):
71 # additonal state info from the base class must be None here,
72 # as it isn't passed along to the caller
73 state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
74 # additional state info we pass to the caller:
75 # 0: stream is in natural order for this platform
76 # 1: stream is in unnatural order
77 # 2: endianness hasn't been determined yet
78 if self.decoder is None:
79 return (state, 2)
80 addstate = int((sys.byteorder == "big") !=
81 (self.decoder is codecs.utf_32_be_decode))
82 return (state, addstate)
83
84 def setstate(self, state):
85 # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
86 codecs.BufferedIncrementalDecoder.setstate(self, state)
87 state = state[1]
88 if state == 0:
89 self.decoder = (codecs.utf_32_be_decode
90 if sys.byteorder == "big"
91 else codecs.utf_32_le_decode)
92 elif state == 1:
93 self.decoder = (codecs.utf_32_le_decode
94 if sys.byteorder == "big"
95 else codecs.utf_32_be_decode)
96 else:
97 self.decoder = None
98
99class StreamWriter(codecs.StreamWriter):
100 def __init__(self, stream, errors='strict'):
101 self.bom_written = False
102 codecs.StreamWriter.__init__(self, stream, errors)
103
104 def encode(self, input, errors='strict'):
105 self.bom_written = True
106 result = codecs.utf_32_encode(input, errors)
107 if sys.byteorder == 'little':
108 self.encode = codecs.utf_32_le_encode
109 else:
110 self.encode = codecs.utf_32_be_encode
111 return result
112
113class StreamReader(codecs.StreamReader):
114
115 def reset(self):
116 codecs.StreamReader.reset(self)
117 try:
118 del self.decode
119 except AttributeError:
120 pass
121
122 def decode(self, input, errors='strict'):
123 (object, consumed, byteorder) = \
124 codecs.utf_32_ex_decode(input, errors, 0, False)
125 if byteorder == -1:
126 self.decode = codecs.utf_32_le_decode
127 elif byteorder == 1:
Walter Dörwald19e62382007-08-17 16:23:21 +0000128 self.decode = codecs.utf_32_be_decode
Walter Dörwald41980ca2007-08-16 21:55:45 +0000129 elif consumed>=4:
Collin Winter4902e692007-08-30 18:18:27 +0000130 raise UnicodeError("UTF-32 stream does not start with BOM")
Walter Dörwald41980ca2007-08-16 21:55:45 +0000131 return (object, consumed)
132
133### encodings module API
134
135def getregentry():
136 return codecs.CodecInfo(
137 name='utf-32',
138 encode=encode,
139 decode=decode,
140 incrementalencoder=IncrementalEncoder,
141 incrementaldecoder=IncrementalDecoder,
142 streamreader=StreamReader,
143 streamwriter=StreamWriter,
144 )