Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 1 | """ Python 'utf-16' Codec |
| 2 | |
| 3 | |
| 4 | Written by Marc-Andre Lemburg (mal@lemburg.com). |
| 5 | |
| 6 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. |
| 7 | |
| 8 | """ |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 9 | import codecs, sys |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 10 | |
| 11 | ### Codec APIs |
| 12 | |
| 13 | class Codec(codecs.Codec): |
| 14 | |
| 15 | # Note: Binding these as C functions will result in the class not |
| 16 | # converting them to methods. This is intended. |
| 17 | encode = codecs.utf_16_encode |
| 18 | decode = codecs.utf_16_decode |
| 19 | |
| 20 | class StreamWriter(Codec,codecs.StreamWriter): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 21 | def __init__(self, stream, errors='strict'): |
| 22 | self.bom_written = 0 |
| 23 | codecs.StreamWriter.__init__(self, stream, errors) |
| 24 | |
| 25 | def write(self, data): |
| 26 | result = codecs.StreamWriter.write(self, data) |
| 27 | if not self.bom_written: |
| 28 | self.bom_written = 1 |
| 29 | if sys.byteorder == 'little': |
| 30 | self.encode = codecs.utf_16_le_encode |
| 31 | else: |
| 32 | self.encode = codecs.utf_16_be_encode |
| 33 | return result |
Tim Peters | 469cdad | 2002-08-08 20:19:19 +0000 | [diff] [blame^] | 34 | |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 35 | class StreamReader(Codec,codecs.StreamReader): |
Marc-André Lemburg | 92b550c | 2001-06-19 20:07:51 +0000 | [diff] [blame] | 36 | def __init__(self, stream, errors='strict'): |
| 37 | self.bom_read = 0 |
| 38 | codecs.StreamReader.__init__(self, stream, errors) |
| 39 | |
| 40 | def read(self, size=-1): |
| 41 | if not self.bom_read: |
| 42 | signature = self.stream.read(2) |
| 43 | if signature == codecs.BOM_BE: |
| 44 | self.decode = codecs.utf_16_be_decode |
| 45 | elif signature == codecs.BOM_LE: |
| 46 | self.decode = codecs.utf_16_le_decode |
| 47 | else: |
| 48 | raise UnicodeError,"UTF-16 stream does not start with BOM" |
| 49 | if size > 2: |
| 50 | size -= 2 |
| 51 | elif size >= 0: |
| 52 | size = 0 |
| 53 | self.bom_read = 1 |
| 54 | return codecs.StreamReader.read(self, size) |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 55 | |
Marc-André Lemburg | 3ccb09c | 2002-04-05 12:12:00 +0000 | [diff] [blame] | 56 | def readline(self, size=None): |
| 57 | raise NotImplementedError, '.readline() is not implemented for UTF-16' |
| 58 | |
Guido van Rossum | 0229bf6 | 2000-03-10 23:17:24 +0000 | [diff] [blame] | 59 | ### encodings module API |
| 60 | |
| 61 | def getregentry(): |
| 62 | |
| 63 | return (Codec.encode,Codec.decode,StreamReader,StreamWriter) |