blob: eff08f387822c7dd36bf939bdc49d8db04543db7 [file] [log] [blame]
Guido van Rossum0229bf62000-03-10 23:17:24 +00001""" Python 'utf-16' Codec
2
3
4Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""
Marc-André Lemburg92b550c2001-06-19 20:07:51 +00009import codecs, sys
Guido van Rossum0229bf62000-03-10 23:17:24 +000010
11### Codec APIs
12
Walter Dörwald69652032004-09-07 20:24:22 +000013encode = codecs.utf_16_encode
Guido van Rossum0229bf62000-03-10 23:17:24 +000014
Walter Dörwald69652032004-09-07 20:24:22 +000015def decode(input, errors='strict'):
16 return codecs.utf_16_decode(input, errors, True)
Guido van Rossum0229bf62000-03-10 23:17:24 +000017
Thomas Woutersa9773292006-04-21 09:43:23 +000018class IncrementalEncoder(codecs.IncrementalEncoder):
19 def __init__(self, errors='strict'):
20 codecs.IncrementalEncoder.__init__(self, errors)
21 self.encoder = None
22
23 def encode(self, input, final=False):
24 if self.encoder is None:
25 result = codecs.utf_16_encode(input, self.errors)[0]
26 if sys.byteorder == 'little':
27 self.encoder = codecs.utf_16_le_encode
28 else:
29 self.encoder = codecs.utf_16_be_encode
30 return result
31 return self.encoder(input, self.errors)[0]
32
33 def reset(self):
34 codecs.IncrementalEncoder.reset(self)
35 self.encoder = None
36
37class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
38 def __init__(self, errors='strict'):
39 codecs.BufferedIncrementalDecoder.__init__(self, errors)
40 self.decoder = None
41
42 def _buffer_decode(self, input, errors, final):
43 if self.decoder is None:
44 (output, consumed, byteorder) = \
45 codecs.utf_16_ex_decode(input, errors, 0, final)
46 if byteorder == -1:
47 self.decoder = codecs.utf_16_le_decode
48 elif byteorder == 1:
49 self.decoder = codecs.utf_16_be_decode
50 elif consumed >= 2:
51 raise UnicodeError("UTF-16 stream does not start with BOM")
52 return (output, consumed)
53 return self.decoder(input, self.errors, final)
54
55 def reset(self):
56 codecs.BufferedIncrementalDecoder.reset(self)
57 self.decoder = None
58
Walter Dörwald69652032004-09-07 20:24:22 +000059class StreamWriter(codecs.StreamWriter):
Marc-André Lemburg92b550c2001-06-19 20:07:51 +000060 def __init__(self, stream, errors='strict'):
Walter Dörwald69652032004-09-07 20:24:22 +000061 self.bom_written = False
Marc-André Lemburg92b550c2001-06-19 20:07:51 +000062 codecs.StreamWriter.__init__(self, stream, errors)
63
Walter Dörwald69652032004-09-07 20:24:22 +000064 def encode(self, input, errors='strict'):
65 self.bom_written = True
66 result = codecs.utf_16_encode(input, errors)
67 if sys.byteorder == 'little':
68 self.encode = codecs.utf_16_le_encode
69 else:
70 self.encode = codecs.utf_16_be_encode
Marc-André Lemburg92b550c2001-06-19 20:07:51 +000071 return result
Tim Peters469cdad2002-08-08 20:19:19 +000072
Walter Dörwald69652032004-09-07 20:24:22 +000073class StreamReader(codecs.StreamReader):
Marc-André Lemburg92b550c2001-06-19 20:07:51 +000074
Walter Dörwald729c31f2005-03-14 19:06:30 +000075 def reset(self):
76 codecs.StreamReader.reset(self)
77 try:
78 del self.decode
79 except AttributeError:
80 pass
81
Walter Dörwald69652032004-09-07 20:24:22 +000082 def decode(self, input, errors='strict'):
83 (object, consumed, byteorder) = \
84 codecs.utf_16_ex_decode(input, errors, 0, False)
85 if byteorder == -1:
86 self.decode = codecs.utf_16_le_decode
87 elif byteorder == 1:
88 self.decode = codecs.utf_16_be_decode
89 elif consumed>=2:
90 raise UnicodeError,"UTF-16 stream does not start with BOM"
91 return (object, consumed)
Marc-André Lemburg3ccb09c2002-04-05 12:12:00 +000092
Guido van Rossum0229bf62000-03-10 23:17:24 +000093### encodings module API
94
95def getregentry():
Thomas Woutersa9773292006-04-21 09:43:23 +000096 return codecs.CodecInfo(
97 name='utf-16',
98 encode=encode,
99 decode=decode,
100 incrementalencoder=IncrementalEncoder,
101 incrementaldecoder=IncrementalDecoder,
102 streamreader=StreamReader,
103 streamwriter=StreamWriter,
104 )