blob: f3fadff6153bb96f8afeceef0952c7b9b7958805 [file] [log] [blame]
Guido van Rossum0229bf62000-03-10 23:17:24 +00001""" Python 'utf-16' Codec
2
3
4Written by Marc-Andre Lemburg (mal@lemburg.com).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""
Marc-André Lemburg92b550c2001-06-19 20:07:51 +00009import codecs, sys
Guido van Rossum0229bf62000-03-10 23:17:24 +000010
11### Codec APIs
12
Walter Dörwald69652032004-09-07 20:24:22 +000013encode = codecs.utf_16_encode
Guido van Rossum0229bf62000-03-10 23:17:24 +000014
Walter Dörwald69652032004-09-07 20:24:22 +000015def decode(input, errors='strict'):
16 return codecs.utf_16_decode(input, errors, True)
Guido van Rossum0229bf62000-03-10 23:17:24 +000017
Walter Dörwaldabb02e52006-03-15 11:35:15 +000018class IncrementalEncoder(codecs.IncrementalEncoder):
19 def __init__(self, errors='strict'):
20 codecs.IncrementalEncoder.__init__(self, errors)
21 self.encoder = None
22
23 def encode(self, input, final=False):
24 if self.encoder is None:
25 result = codecs.utf_16_encode(input, self.errors)[0]
26 if sys.byteorder == 'little':
27 self.encoder = codecs.utf_16_le_encode
28 else:
29 self.encoder = codecs.utf_16_be_encode
30 return result
31 return self.encoder(input, self.errors)[0]
32
33 def reset(self):
34 codecs.IncrementalEncoder.reset(self)
35 self.encoder = None
36
Victor Stinner73363e82010-07-28 01:39:45 +000037 def getstate(self):
38 # state info we return to the caller:
39 # 0: stream is in natural order for this platform
40 # 2: endianness hasn't been determined yet
41 # (we're never writing in unnatural order)
42 return (2 if self.encoder is None else 0)
43
44 def setstate(self, state):
45 if state:
46 self.encoder = None
47 else:
48 if sys.byteorder == 'little':
49 self.encoder = codecs.utf_16_le_encode
50 else:
51 self.encoder = codecs.utf_16_be_encode
52
Walter Dörwaldabb02e52006-03-15 11:35:15 +000053class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
54 def __init__(self, errors='strict'):
55 codecs.BufferedIncrementalDecoder.__init__(self, errors)
56 self.decoder = None
57
58 def _buffer_decode(self, input, errors, final):
59 if self.decoder is None:
60 (output, consumed, byteorder) = \
61 codecs.utf_16_ex_decode(input, errors, 0, final)
62 if byteorder == -1:
63 self.decoder = codecs.utf_16_le_decode
64 elif byteorder == 1:
65 self.decoder = codecs.utf_16_be_decode
66 elif consumed >= 2:
67 raise UnicodeError("UTF-16 stream does not start with BOM")
68 return (output, consumed)
69 return self.decoder(input, self.errors, final)
70
71 def reset(self):
72 codecs.BufferedIncrementalDecoder.reset(self)
73 self.decoder = None
74
Walter Dörwald69652032004-09-07 20:24:22 +000075class StreamWriter(codecs.StreamWriter):
Marc-André Lemburg92b550c2001-06-19 20:07:51 +000076 def __init__(self, stream, errors='strict'):
Marc-André Lemburg92b550c2001-06-19 20:07:51 +000077 codecs.StreamWriter.__init__(self, stream, errors)
Victor Stinner7df55da2010-05-22 13:37:56 +000078 self.encoder = None
79
80 def reset(self):
81 codecs.StreamWriter.reset(self)
82 self.encoder = None
Marc-André Lemburg92b550c2001-06-19 20:07:51 +000083
Walter Dörwald69652032004-09-07 20:24:22 +000084 def encode(self, input, errors='strict'):
Victor Stinner7df55da2010-05-22 13:37:56 +000085 if self.encoder is None:
Victor Stinner54b40ee2010-05-22 13:44:25 +000086 result = codecs.utf_16_encode(input, errors)
Victor Stinner7df55da2010-05-22 13:37:56 +000087 if sys.byteorder == 'little':
Victor Stinner54b40ee2010-05-22 13:44:25 +000088 self.encoder = codecs.utf_16_le_encode
Victor Stinner7df55da2010-05-22 13:37:56 +000089 else:
Victor Stinner54b40ee2010-05-22 13:44:25 +000090 self.encoder = codecs.utf_16_be_encode
Victor Stinner7df55da2010-05-22 13:37:56 +000091 return result
Walter Dörwald69652032004-09-07 20:24:22 +000092 else:
Victor Stinner7df55da2010-05-22 13:37:56 +000093 return self.encoder(input, errors)
Tim Peters469cdad2002-08-08 20:19:19 +000094
Walter Dörwald69652032004-09-07 20:24:22 +000095class StreamReader(codecs.StreamReader):
Marc-André Lemburg92b550c2001-06-19 20:07:51 +000096
Walter Dörwald729c31f2005-03-14 19:06:30 +000097 def reset(self):
98 codecs.StreamReader.reset(self)
99 try:
100 del self.decode
101 except AttributeError:
102 pass
103
Walter Dörwald69652032004-09-07 20:24:22 +0000104 def decode(self, input, errors='strict'):
105 (object, consumed, byteorder) = \
106 codecs.utf_16_ex_decode(input, errors, 0, False)
107 if byteorder == -1:
108 self.decode = codecs.utf_16_le_decode
109 elif byteorder == 1:
110 self.decode = codecs.utf_16_be_decode
111 elif consumed>=2:
112 raise UnicodeError,"UTF-16 stream does not start with BOM"
113 return (object, consumed)
Marc-André Lemburg3ccb09c2002-04-05 12:12:00 +0000114
Guido van Rossum0229bf62000-03-10 23:17:24 +0000115### encodings module API
116
117def getregentry():
Walter Dörwaldabb02e52006-03-15 11:35:15 +0000118 return codecs.CodecInfo(
119 name='utf-16',
120 encode=encode,
121 decode=decode,
122 incrementalencoder=IncrementalEncoder,
123 incrementaldecoder=IncrementalDecoder,
124 streamreader=StreamReader,
125 streamwriter=StreamWriter,
126 )