blob: c61248242be8c7a2157df12d6f62ce824d31939c [file] [log] [blame]
Guido van Rossum0229bf62000-03-10 23:17:24 +00001""" Python 'utf-16' Codec
2
3
4Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""
Marc-André Lemburg92b550c2001-06-19 20:07:51 +00009import codecs, sys
Guido van Rossum0229bf62000-03-10 23:17:24 +000010
11### Codec APIs
12
Walter Dörwald69652032004-09-07 20:24:22 +000013encode = codecs.utf_16_encode
Guido van Rossum0229bf62000-03-10 23:17:24 +000014
Walter Dörwald69652032004-09-07 20:24:22 +000015def decode(input, errors='strict'):
16 return codecs.utf_16_decode(input, errors, True)
Guido van Rossum0229bf62000-03-10 23:17:24 +000017
Thomas Woutersa9773292006-04-21 09:43:23 +000018class IncrementalEncoder(codecs.IncrementalEncoder):
19 def __init__(self, errors='strict'):
20 codecs.IncrementalEncoder.__init__(self, errors)
21 self.encoder = None
22
23 def encode(self, input, final=False):
24 if self.encoder is None:
25 result = codecs.utf_16_encode(input, self.errors)[0]
26 if sys.byteorder == 'little':
27 self.encoder = codecs.utf_16_le_encode
28 else:
29 self.encoder = codecs.utf_16_be_encode
30 return result
31 return self.encoder(input, self.errors)[0]
32
33 def reset(self):
34 codecs.IncrementalEncoder.reset(self)
35 self.encoder = None
36
Walter Dörwald3abcb012007-04-16 22:10:50 +000037 def getstate(self):
38 # state info we return to the caller:
39 # 0: stream is in natural order for this platform
40 # 2: endianness hasn't been determined yet
41 # (we're never writing in unnatural order)
42 return (2 if self.encoder is None else 0)
43
44 def setstate(self, state):
45 if state:
46 self.encoder = None
47 else:
48 if sys.byteorder == 'little':
49 self.encoder = codecs.utf_16_le_encode
50 else:
51 self.encoder = codecs.utf_16_be_encode
52
Thomas Woutersa9773292006-04-21 09:43:23 +000053class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
54 def __init__(self, errors='strict'):
55 codecs.BufferedIncrementalDecoder.__init__(self, errors)
56 self.decoder = None
57
58 def _buffer_decode(self, input, errors, final):
59 if self.decoder is None:
60 (output, consumed, byteorder) = \
61 codecs.utf_16_ex_decode(input, errors, 0, final)
62 if byteorder == -1:
63 self.decoder = codecs.utf_16_le_decode
64 elif byteorder == 1:
65 self.decoder = codecs.utf_16_be_decode
66 elif consumed >= 2:
67 raise UnicodeError("UTF-16 stream does not start with BOM")
68 return (output, consumed)
69 return self.decoder(input, self.errors, final)
70
71 def reset(self):
72 codecs.BufferedIncrementalDecoder.reset(self)
73 self.decoder = None
74
Walter Dörwald3abcb012007-04-16 22:10:50 +000075 def getstate(self):
Martin Panter46f50722016-05-26 05:35:26 +000076 # additional state info from the base class must be None here,
Walter Dörwald3abcb012007-04-16 22:10:50 +000077 # as it isn't passed along to the caller
78 state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
79 # additional state info we pass to the caller:
80 # 0: stream is in natural order for this platform
81 # 1: stream is in unnatural order
82 # 2: endianness hasn't been determined yet
83 if self.decoder is None:
84 return (state, 2)
85 addstate = int((sys.byteorder == "big") !=
86 (self.decoder is codecs.utf_16_be_decode))
87 return (state, addstate)
88
89 def setstate(self, state):
90 # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
91 codecs.BufferedIncrementalDecoder.setstate(self, state)
92 state = state[1]
93 if state == 0:
94 self.decoder = (codecs.utf_16_be_decode
95 if sys.byteorder == "big"
96 else codecs.utf_16_le_decode)
97 elif state == 1:
98 self.decoder = (codecs.utf_16_le_decode
99 if sys.byteorder == "big"
100 else codecs.utf_16_be_decode)
101 else:
102 self.decoder = None
103
Walter Dörwald69652032004-09-07 20:24:22 +0000104class StreamWriter(codecs.StreamWriter):
Marc-André Lemburg92b550c2001-06-19 20:07:51 +0000105 def __init__(self, stream, errors='strict'):
Marc-André Lemburg92b550c2001-06-19 20:07:51 +0000106 codecs.StreamWriter.__init__(self, stream, errors)
Victor Stinnera92ad7e2010-05-22 16:59:09 +0000107 self.encoder = None
108
109 def reset(self):
110 codecs.StreamWriter.reset(self)
111 self.encoder = None
Marc-André Lemburg92b550c2001-06-19 20:07:51 +0000112
Walter Dörwald69652032004-09-07 20:24:22 +0000113 def encode(self, input, errors='strict'):
Victor Stinnera92ad7e2010-05-22 16:59:09 +0000114 if self.encoder is None:
115 result = codecs.utf_16_encode(input, errors)
116 if sys.byteorder == 'little':
117 self.encoder = codecs.utf_16_le_encode
118 else:
119 self.encoder = codecs.utf_16_be_encode
120 return result
Walter Dörwald69652032004-09-07 20:24:22 +0000121 else:
Victor Stinnera92ad7e2010-05-22 16:59:09 +0000122 return self.encoder(input, errors)
Tim Peters469cdad2002-08-08 20:19:19 +0000123
Walter Dörwald69652032004-09-07 20:24:22 +0000124class StreamReader(codecs.StreamReader):
Marc-André Lemburg92b550c2001-06-19 20:07:51 +0000125
Walter Dörwald729c31f2005-03-14 19:06:30 +0000126 def reset(self):
127 codecs.StreamReader.reset(self)
128 try:
129 del self.decode
130 except AttributeError:
131 pass
132
Walter Dörwald69652032004-09-07 20:24:22 +0000133 def decode(self, input, errors='strict'):
134 (object, consumed, byteorder) = \
135 codecs.utf_16_ex_decode(input, errors, 0, False)
136 if byteorder == -1:
137 self.decode = codecs.utf_16_le_decode
138 elif byteorder == 1:
139 self.decode = codecs.utf_16_be_decode
140 elif consumed>=2:
Collin Winterce36ad82007-08-30 01:19:48 +0000141 raise UnicodeError("UTF-16 stream does not start with BOM")
Walter Dörwald69652032004-09-07 20:24:22 +0000142 return (object, consumed)
Marc-André Lemburg3ccb09c2002-04-05 12:12:00 +0000143
Guido van Rossum0229bf62000-03-10 23:17:24 +0000144### encodings module API
145
146def getregentry():
Thomas Woutersa9773292006-04-21 09:43:23 +0000147 return codecs.CodecInfo(
148 name='utf-16',
149 encode=encode,
150 decode=decode,
151 incrementalencoder=IncrementalEncoder,
152 incrementaldecoder=IncrementalDecoder,
153 streamreader=StreamReader,
154 streamwriter=StreamWriter,
155 )