Blame - Lib/codecs.py - platform/external/python/cpython3

2000-03-10 23:20:43 +0000

[diff] [blame]

1

""" codecs -- Python Codec Registry, API and helpers.

2

3

4

Written by Marc-Andre Lemburg (mal@lemburg.com).

"""#"

Marc-André Lemburg

2002-12-12 17:37:50 +0000

[diff] [blame]

10

import __builtin__, sys

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

11

12

### Registry and builtin stateless codec functions

13

Guido van Rossum

b95de4f

2000-03-31 17:25:23 +0000

[diff] [blame]

14

try:

15

from _codecs import *

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

16

except ImportError, why:

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame^]

17

raise SystemError('Failed to load the builtin codecs: %s' % why)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

18

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

19

__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",

Walter Dörwald

2002-06-04 15:16:29 +0000

[diff] [blame]

20

"BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",

21

"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",

Walter Dörwald

3aeb632

2002-09-02 13:14:32 +0000

[diff] [blame]

22

"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",

23

"strict_errors", "ignore_errors", "replace_errors",

24

"xmlcharrefreplace_errors",

25

"register_error", "lookup_error"]

Skip Montanaro

e99d5ea

2001-01-20 19:54:20 +0000

[diff] [blame]

26

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

27

### Constants

28

29

#

Walter Dörwald

2002-06-04 15:16:29 +0000

[diff] [blame]

30

# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)

31

# and its possible byte string values

32

# for UTF8/UTF16/UTF32 output and little/big endian machines

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

33

#

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

34

Walter Dörwald

2002-06-04 15:16:29 +0000

[diff] [blame]

35

# UTF-8

36

BOM_UTF8 = '\xef\xbb\xbf'

37

38

# UTF-16, little endian

39

BOM_LE = BOM_UTF16_LE = '\xff\xfe'

40

41

# UTF-16, big endian

42

BOM_BE = BOM_UTF16_BE = '\xfe\xff'

43

44

# UTF-32, little endian

45

BOM_UTF32_LE = '\xff\xfe\x00\x00'

46

47

# UTF-32, big endian

48

BOM_UTF32_BE = '\x00\x00\xfe\xff'

49

Marc-André Lemburg

b28de0d

2002-12-12 17:37:50 +0000

[diff] [blame]

50

if sys.byteorder == 'little':

Walter Dörwald

2002-06-04 15:16:29 +0000

[diff] [blame]

51

Marc-André Lemburg

b28de0d

2002-12-12 17:37:50 +0000

[diff] [blame]

52

# UTF-16, native endianness

53

BOM = BOM_UTF16 = BOM_UTF16_LE

54

55

# UTF-32, native endianness

56

BOM_UTF32 = BOM_UTF32_LE

else:

# UTF-16, native endianness

61

BOM = BOM_UTF16 = BOM_UTF16_BE

62

63

# UTF-32, native endianness

64

BOM_UTF32 = BOM_UTF32_BE

Walter Dörwald

2002-06-04 15:16:29 +0000

[diff] [blame]

65

66

# Old broken names (don't use in new code)

67

BOM32_LE = BOM_UTF16_LE

68

BOM32_BE = BOM_UTF16_BE

69

BOM64_LE = BOM_UTF32_LE

70

BOM64_BE = BOM_UTF32_BE

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

71

72

73

### Codec base classes (defining the API)

74

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

75

class CodecInfo(tuple):

76

77

def __new__(cls, encode, decode, streamreader=None, streamwriter=None,

78

incrementalencoder=None, incrementaldecoder=None, name=None):

79

self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))

self.name = name

self.encode = encode

self.decode = decode

self.incrementalencoder = incrementalencoder

84

self.incrementaldecoder = incrementaldecoder

85

self.streamwriter = streamwriter

86

self.streamreader = streamreader

return self

def __repr__(self):

return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))

91

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

92

class Codec:

93

94

""" Defines the interface for stateless encoders/decoders.

95

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

96

The .encode()/.decode() methods may use different error

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

97

handling schemes by providing the errors argument. These

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

98

string values are predefined:

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

99

Guido van Rossum

d8855fd

2000-03-24 22:14:19 +0000

[diff] [blame]

100

'strict' - raise a ValueError error (or a subclass)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

101

'ignore' - ignore the character and continue with the next

102

'replace' - replace with a suitable replacement character;

103

Python will use the official U+FFFD REPLACEMENT

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

104

CHARACTER for the builtin Unicode codecs on

105

decoding and '?' on encoding.

106

'xmlcharrefreplace' - Replace with the appropriate XML

107

character reference (only for encoding).

108

'backslashreplace' - Replace with backslashed escape sequences

109

(only for encoding).

110

111

The set of allowed values can be extended via register_error.

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

112

113

"""

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

114

def encode(self, input, errors='strict'):

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

115

Fred Drake

3e74c0d

2000-03-17 15:40:35 +0000

[diff] [blame]

116

""" Encodes the object input and returns a tuple (output

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

117

object, length consumed).

118

119

errors defines the error handling to apply. It defaults to

120

'strict' handling.

121

122

The method may not store state in the Codec instance. Use

123

StreamCodec for codecs which have to keep state in order to

124

make encoding/decoding efficient.

125

126

The encoder must be able to handle zero length input and

127

return an empty object of the output object type in this

situation.

"""

raise NotImplementedError

132

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

133

def decode(self, input, errors='strict'):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

134

135

""" Decodes the object input and returns a tuple (output

136

object, length consumed).

137

138

input must be an object which provides the bf_getreadbuf

139

buffer slot. Python strings, buffer objects and memory

140

mapped files are examples of objects providing this slot.

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

141

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

142

errors defines the error handling to apply. It defaults to

143

'strict' handling.

144

145

The method may not store state in the Codec instance. Use

146

StreamCodec for codecs which have to keep state in order to

147

make encoding/decoding efficient.

148

149

The decoder must be able to handle zero length input and

150

return an empty object of the output object type in this

151

situation.

152

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

153

"""

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

154

raise NotImplementedError

155

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

156

class IncrementalEncoder(object):

157

"""

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame^]

158

An IncrementalEncoder encodes an input in multiple steps. The input can be

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

159

passed piece by piece to the encode() method. The IncrementalEncoder remembers

160

the state of the Encoding process between calls to encode().

161

"""

162

def __init__(self, errors='strict'):

163

"""

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame^]

164

Creates an IncrementalEncoder instance.

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

165

166

The IncrementalEncoder may use different error handling schemes by

167

providing the errors keyword argument. See the module docstring

168

for a list of possible values.

"""

self.errors = errors

self.buffer = ""

def encode(self, input, final=False):

174

"""

175

Encodes input and returns the resulting object.

176

"""

177

raise NotImplementedError

def reset(self):

"""

Resets the encoder to the initial state.

182

"""

183

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame^]

184

class BufferedIncrementalEncoder(IncrementalEncoder):

185

"""

186

This subclass of IncrementalEncoder can be used as the baseclass for an

187

incremental encoder if the encoder must keep some of the output in a

188

buffer between calls to encode().

189

"""

190

def __init__(self, errors='strict'):

191

IncrementalEncoder.__init__(self, errors)

192

self.buffer = "" # unencoded input that is kept between calls to encode()

193

194

def _buffer_encode(self, input, errors, final):

195

# Overwrite this method in subclasses: It must encode input

196

# and return an (output, length consumed) tuple

197

raise NotImplementedError

198

199

def encode(self, input, final=False):

200

# encode input (taking the buffer into account)

201

data = self.buffer + input

202

(result, consumed) = self._buffer_encode(data, self.errors, final)

203

# keep unencoded input until the next call

204

self.buffer = data[consumed:]

return result

def reset(self):

IncrementalEncoder.reset(self)

209

self.buffer = ""

210

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

211

class IncrementalDecoder(object):

212

"""

213

An IncrementalDecoder decodes an input in multiple steps. The input can be

214

passed piece by piece to the decode() method. The IncrementalDecoder

215

remembers the state of the decoding process between calls to decode().

216

"""

217

def __init__(self, errors='strict'):

218

"""

219

Creates a IncrementalDecoder instance.

220

221

The IncrementalDecoder may use different error handling schemes by

222

providing the errors keyword argument. See the module docstring

223

for a list of possible values.

"""

self.errors = errors

def decode(self, input, final=False):

228

"""

229

Decodes input and returns the resulting object.

230

"""

231

raise NotImplementedError

def reset(self):

"""

Resets the decoder to the initial state.

236

"""

237

238

class BufferedIncrementalDecoder(IncrementalDecoder):

239

"""

240

This subclass of IncrementalDecoder can be used as the baseclass for an

241

incremental decoder if the decoder must be able to handle incomplete byte

242

sequences.

243

"""

244

def __init__(self, errors='strict'):

245

IncrementalDecoder.__init__(self, errors)

246

self.buffer = "" # undecoded input that is kept between calls to decode()

247

248

def _buffer_decode(self, input, errors, final):

249

# Overwrite this method in subclasses: It must decode input

250

# and return an (output, length consumed) tuple

251

raise NotImplementedError

252

253

def decode(self, input, final=False):

254

# decode input (taking the buffer into account)

255

data = self.buffer + input

256

(result, consumed) = self._buffer_decode(data, self.errors, final)

257

# keep undecoded input until the next call

258

self.buffer = data[consumed:]

return result

def reset(self):

IncrementalDecoder.reset(self)

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame^]

263

self.buffer = ""

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

264

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

265

#

266

# The StreamWriter and StreamReader class provide generic working

Andrew M. Kuchling

97c5635

2001-09-18 20:29:48 +0000

[diff] [blame]

267

# interfaces which can be used to implement new encoding submodules

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

268

# very easily. See encodings/utf_8.py for an example on how this is

269

# done.

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

270

#

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

271

272

class StreamWriter(Codec):

273

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

274

def __init__(self, stream, errors='strict'):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

275

276

""" Creates a StreamWriter instance.

277

278

stream must be a file-like object open for writing

279

(binary) data.

280

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

281

The StreamWriter may use different error handling

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

282

schemes by providing the errors keyword argument. These

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

283

parameters are predefined:

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

284

285

'strict' - raise a ValueError (or a subclass)

286

'ignore' - ignore the character and continue with the next

287

'replace'- replace with a suitable replacement character

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

288

'xmlcharrefreplace' - Replace with the appropriate XML

289

character reference.

290

'backslashreplace' - Replace with backslashed escape

291

sequences (only for encoding).

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

292

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

293

The set of allowed parameter values can be extended via

294

register_error.

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

"""

self.stream = stream

self.errors = errors

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

299

def write(self, object):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

300

301

""" Writes the object's contents encoded to self.stream.

302

"""

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

303

data, consumed = self.encode(object, self.errors)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

304

self.stream.write(data)

305

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

306

def writelines(self, list):

307

308

""" Writes the concatenated list of strings to the stream

309

using .write().

310

"""

311

self.write(''.join(list))

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

312

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

313

def reset(self):

314

315

""" Flushes and resets the codec buffers used for keeping state.

316

317

Calling this method should ensure that the data on the

318

output is put into a clean state, that allows appending

319

of new fresh data without having to rescan the whole

320

stream to recover state.

"""

pass

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

325

def __getattr__(self, name,

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

326

getattr=getattr):

327

328

""" Inherit all other methods from the underlying stream.

329

"""

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

330

return getattr(self.stream, name)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

###

class StreamReader(Codec):

335

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

336

def __init__(self, stream, errors='strict'):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

337

338

""" Creates a StreamReader instance.

339

340

stream must be a file-like object open for reading

341

(binary) data.

342

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

343

The StreamReader may use different error handling

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

344

schemes by providing the errors keyword argument. These

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

345

parameters are predefined:

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

346

347

'strict' - raise a ValueError (or a subclass)

348

'ignore' - ignore the character and continue with the next

349

'replace'- replace with a suitable replacement character;

350

Walter Dörwald

2002-11-19 21:42:53 +0000

[diff] [blame]

351

The set of allowed parameter values can be extended via

352

register_error.

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

353

"""

354

self.stream = stream

355

self.errors = errors

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

356

self.bytebuffer = ""

Walter Dörwald

2005-07-20 22:15:39 +0000

[diff] [blame]

357

# For str->str decoding this will stay a str

358

# For str->unicode decoding the first read will promote it to unicode

359

self.charbuffer = ""

Martin v. Löwis

2005-09-18 08:34:39 +0000

[diff] [blame]

360

self.linebuffer = None

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

361

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

362

def decode(self, input, errors='strict'):

363

raise NotImplementedError

364

Martin v. Löwis

2005-08-24 07:38:12 +0000

[diff] [blame]

365

def read(self, size=-1, chars=-1, firstline=False):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

366

367

""" Decodes data from the stream self.stream and returns the

368

resulting object.

369

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

370

chars indicates the number of characters to read from the

371

stream. read() will never return more than chars

372

characters, but it might return less, if there are not enough

373

characters available.

374

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

375

size indicates the approximate maximum number of bytes to

376

read from the stream for decoding purposes. The decoder

377

can modify this setting as appropriate. The default value

378

-1 indicates to read and decode as much as possible. size

379

is intended to prevent having to decode huge files in one

380

step.

381

Martin v. Löwis

2005-08-24 07:38:12 +0000

[diff] [blame]

382

If firstline is true, and a UnicodeDecodeError happens

383

after the first line terminator in the input only the first line

384

will be returned, the rest of the input will be kept until the

385

next call to read().

386

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

387

The method should use a greedy read strategy meaning that

388

it should read as much data as is allowed within the

389

definition of the encoding and the given size, e.g. if

390

optional encoding endings or state markers are available

391

on the stream, these should be read too.

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

392

"""

Martin v. Löwis

2005-09-18 08:34:39 +0000

[diff] [blame]

393

# If we have lines cached, first merge them back into characters

394

if self.linebuffer:

395

self.charbuffer = "".join(self.linebuffer)

396

self.linebuffer = None

Tim Peters

536cf99

2005-12-25 23:18:31 +0000

[diff] [blame]

397

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

398

# read until we get the required number of characters (if available)

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

399

while True:

400

# can the request can be satisfied from the character buffer?

401

if chars < 0:

Walter Dörwald

ca19943

2006-03-06 22:39:12 +0000

[diff] [blame]

if size < 0:

if self.charbuffer:

break

elif len(self.charbuffer) >= size:

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

406

break

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

407

else:

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

408

if len(self.charbuffer) >= chars:

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

break

# we need more data

if size < 0:

newdata = self.stream.read()

413

else:

414

newdata = self.stream.read(size)

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

415

# decode bytes (those remaining from the last call included)

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

416

data = self.bytebuffer + newdata

Martin v. Löwis

2005-08-24 07:38:12 +0000

[diff] [blame]

417

try:

418

newchars, decodedbytes = self.decode(data, self.errors)

419

except UnicodeDecodeError, exc:

420

if firstline:

421

newchars, decodedbytes = self.decode(data[:exc.start], self.errors)

422

lines = newchars.splitlines(True)

if len(lines)<=1:

raise

else:

raise

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

427

# keep undecoded bytes until the next call

428

self.bytebuffer = data[decodedbytes:]

429

# put new characters in the character buffer

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

430

self.charbuffer += newchars

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

431

# there was no data available

432

if not newdata:

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

433

break

434

if chars < 0:

435

# Return everything we've got

436

result = self.charbuffer

Walter Dörwald

2005-07-20 22:15:39 +0000

[diff] [blame]

437

self.charbuffer = ""

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

438

else:

439

# Return the first chars characters

440

result = self.charbuffer[:chars]

441

self.charbuffer = self.charbuffer[chars:]

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

442

return result

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

443

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

444

def readline(self, size=None, keepends=True):

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

445

446

""" Read one line from the input stream and return the

447

decoded data.

448

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

449

size, if given, is passed as size argument to the

450

read() method.

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

451

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

452

"""

Martin v. Löwis

2005-09-18 08:34:39 +0000

[diff] [blame]

453

# If we have lines cached from an earlier read, return

454

# them unconditionally

455

if self.linebuffer:

456

line = self.linebuffer[0]

457

del self.linebuffer[0]

458

if len(self.linebuffer) == 1:

459

# revert to charbuffer mode; we might need more data

460

# next time

461

self.charbuffer = self.linebuffer[0]

462

self.linebuffer = None

463

if not keepends:

464

line = line.splitlines(False)[0]

465

return line

Tim Peters

536cf99

2005-12-25 23:18:31 +0000

[diff] [blame]

466

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

467

readsize = size or 72

Walter Dörwald

2005-07-20 22:15:39 +0000

[diff] [blame]

468

line = ""

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

469

# If size is given, we call read() only once

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

470

while True:

Martin v. Löwis

2005-08-24 07:38:12 +0000

[diff] [blame]

471

data = self.read(readsize, firstline=True)

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

472

if data:

Walter Dörwald

a4eb2d5

2005-04-21 21:42:35 +0000

[diff] [blame]

473

# If we're at a "\r" read one extra character (which might

474

# be a "\n") to get a proper line ending. If the stream is

Walter Dörwald

bc8e642

2005-04-21 21:32:03 +0000

[diff] [blame]

475

# temporarily exhausted we return the wrong line ending.

Walter Dörwald

2005-07-20 22:15:39 +0000

[diff] [blame]

476

if data.endswith("\r"):

Walter Dörwald

7a6dc13

2005-04-04 21:38:47 +0000

[diff] [blame]

477

data += self.read(size=1, chars=1)

Walter Dörwald

7a6dc13

2005-04-04 21:38:47 +0000

[diff] [blame]

478

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

479

line += data

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

480

lines = line.splitlines(True)

481

if lines:

Martin v. Löwis

2005-09-18 08:34:39 +0000

[diff] [blame]

482

if len(lines) > 1:

483

# More than one line result; the first line is a full line

# to return

line = lines[0]

del lines[0]

if len(lines) > 1:

# cache the remaining lines

489

lines[-1] += self.charbuffer

490

self.linebuffer = lines

491

self.charbuffer = None

492

else:

493

# only one remaining line, put it back into charbuffer

494

self.charbuffer = lines[0] + self.charbuffer

495

if not keepends:

496

line = line.splitlines(False)[0]

497

break

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

498

line0withend = lines[0]

499

line0withoutend = lines[0].splitlines(False)[0]

500

if line0withend != line0withoutend: # We really have a line end

501

# Put the rest back together and keep it until the next call

Walter Dörwald

2005-07-20 22:15:39 +0000

[diff] [blame]

502

self.charbuffer = "".join(lines[1:]) + self.charbuffer

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

if keepends:

line = line0withend

else:

line = line0withoutend

Walter Dörwald

9fa0946

2005-01-10 12:01:39 +0000

[diff] [blame]

507

break

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

508

# we didn't get anything or this was our only try

Walter Dörwald

9fa0946

2005-01-10 12:01:39 +0000

[diff] [blame]

509

if not data or size is not None:

Walter Dörwald

2004-12-21 22:24:00 +0000

[diff] [blame]

510

if line and not keepends:

511

line = line.splitlines(False)[0]

break

if readsize<8000:

readsize *= 2

return line

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

516

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

517

def readlines(self, sizehint=None, keepends=True):

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

518

519

""" Read all lines available on the input stream

520

and return them as list of lines.

521

522

Line breaks are implemented using the codec's decoder

523

method and are included in the list entries.

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

524

Marc-André Lemburg

d594849

2004-02-26 15:22:17 +0000

[diff] [blame]

525

sizehint, if given, is ignored since there is no efficient

526

way to finding the true end-of-line.

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

527

528

"""

Walter Dörwald

2004-09-07 20:24:22 +0000

[diff] [blame]

529

data = self.read()

Hye-Shik Chang

af5c7cf

2004-10-17 23:51:21 +0000

[diff] [blame]

530

return data.splitlines(keepends)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

def reset(self):

""" Resets the codec buffers used for keeping state.

535

536

Note that no stream repositioning should take place.

Thomas Wouters

7e47402

2000-07-16 12:04:32 +0000

[diff] [blame]

537

This method is primarily intended to be able to recover

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

538

from decoding errors.

539

540

"""

Walter Dörwald

729c31f

2005-03-14 19:06:30 +0000

[diff] [blame]

541

self.bytebuffer = ""

542

self.charbuffer = u""

Martin v. Löwis

2005-09-18 08:34:39 +0000

[diff] [blame]

543

self.linebuffer = None

Walter Dörwald

729c31f

2005-03-14 19:06:30 +0000

[diff] [blame]

544

Walter Dörwald

71fd90d

2005-03-14 19:25:41 +0000

[diff] [blame]

545

def seek(self, offset, whence=0):

Walter Dörwald

729c31f

2005-03-14 19:06:30 +0000

[diff] [blame]

546

""" Set the input stream's current position.

547

548

Resets the codec buffers used for keeping state.

549

"""

550

self.reset()

551

self.stream.seek(offset, whence)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

552

Walter Dörwald

2002-11-06 16:53:44 +0000

[diff] [blame]

553

def next(self):

554

555

""" Return the next decoded line from the input stream."""

556

line = self.readline()

if line:

return line

raise StopIteration

def __iter__(self):

return self

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

564

def __getattr__(self, name,

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

565

getattr=getattr):

566

567

""" Inherit all other methods from the underlying stream.

568

"""

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

569

return getattr(self.stream, name)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

###

class StreamReaderWriter:

574

Fred Drake

2000-04-13 14:11:21 +0000

[diff] [blame]

575

""" StreamReaderWriter instances allow wrapping streams which

576

work in both read and write modes.

577

578

The design is such that one can use the factory functions

Thomas Wouters

7e47402

2000-07-16 12:04:32 +0000

[diff] [blame]

579

returned by the codec.lookup() function to construct the

Fred Drake

2000-04-13 14:11:21 +0000

[diff] [blame]

580

instance.

581

582

"""

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

583

# Optional attributes set by the file wrappers below

584

encoding = 'unknown'

585

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

586

def __init__(self, stream, Reader, Writer, errors='strict'):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

587

588

""" Creates a StreamReaderWriter instance.

589

590

stream must be a Stream-like object.

591

592

Reader, Writer must be factory functions or classes

593

providing the StreamReader, StreamWriter interface resp.

594

595

Error handling is done in the same way as defined for the

596

StreamWriter/Readers.

"""

self.stream = stream

self.reader = Reader(stream, errors)

601

self.writer = Writer(stream, errors)

602

self.errors = errors

603

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

604

def read(self, size=-1):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

605

606

return self.reader.read(size)

607

Guido van Rossum

d58c26f

2000-05-01 16:17:32 +0000

[diff] [blame]

608

def readline(self, size=None):

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

609

610

return self.reader.readline(size)

611

Guido van Rossum

d58c26f

2000-05-01 16:17:32 +0000

[diff] [blame]

612

def readlines(self, sizehint=None):

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

613

614

return self.reader.readlines(sizehint)

615

Walter Dörwald

2002-11-06 16:53:44 +0000

[diff] [blame]

616

def next(self):

617

618

""" Return the next decoded line from the input stream."""

619

return self.reader.next()

def __iter__(self):

return self

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

624

def write(self, data):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

625

626

return self.writer.write(data)

627

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

628

def writelines(self, list):

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

629

630

return self.writer.writelines(list)

631

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

def reset(self):

self.reader.reset()

self.writer.reset()

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

637

def __getattr__(self, name,

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

638

getattr=getattr):

639

640

""" Inherit all other methods from the underlying stream.

641

"""

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

642

return getattr(self.stream, name)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

###

class StreamRecoder:

Fred Drake

2000-04-13 14:11:21 +0000

[diff] [blame]

648

""" StreamRecoder instances provide a frontend - backend

649

view of encoding data.

650

651

They use the complete set of APIs returned by the

652

codecs.lookup() function to implement their task.

653

654

Data written to the stream is first decoded into an

655

intermediate format (which is dependent on the given codec

656

combination) and then written to the stream using an instance

657

of the provided Writer class.

658

659

In the other direction, data is read from the stream using a

660

Reader instance and then return encoded data to the caller.

661

662

"""

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

663

# Optional attributes set by the file wrappers below

664

data_encoding = 'unknown'

665

file_encoding = 'unknown'

666

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

667

def __init__(self, stream, encode, decode, Reader, Writer,

668

errors='strict'):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

669

670

""" Creates a StreamRecoder instance which implements a two-way

671

conversion: encode and decode work on the frontend (the

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

672

input to .read() and output of .write()) while

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

673

Reader and Writer work on the backend (reading and

Fred Drake

908670c

2000-03-17 15:42:11 +0000

[diff] [blame]

674

writing to the stream).

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

675

676

You can use these objects to do transparent direct

677

recodings from e.g. latin-1 to utf-8 and back.

678

679

stream must be a file-like object.

680

681

encode, decode must adhere to the Codec interface, Reader,

682

Writer must be factory functions or classes providing the

683

StreamReader, StreamWriter interface resp.

684

685

encode and decode are needed for the frontend translation,

686

Reader and Writer for the backend translation. Unicode is

687

used as intermediate encoding.

688

689

Error handling is done in the same way as defined for the

690

StreamWriter/Readers.

"""

self.stream = stream

self.encode = encode

self.decode = decode

self.reader = Reader(stream, errors)

697

self.writer = Writer(stream, errors)

698

self.errors = errors

699

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

700

def read(self, size=-1):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

701

702

data = self.reader.read(size)

703

data, bytesencoded = self.encode(data, self.errors)

704

return data

705

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

706

def readline(self, size=None):

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

707

708

if size is None:

709

data = self.reader.readline()

710

else:

711

data = self.reader.readline(size)

712

data, bytesencoded = self.encode(data, self.errors)

713

return data

714

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

715

def readlines(self, sizehint=None):

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

716

Marc-André Lemburg

d594849

2004-02-26 15:22:17 +0000

[diff] [blame]

717

data = self.reader.read()

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

718

data, bytesencoded = self.encode(data, self.errors)

719

return data.splitlines(1)

720

Walter Dörwald

2002-11-06 16:53:44 +0000

[diff] [blame]

721

def next(self):

722

723

""" Return the next decoded line from the input stream."""

Walter Dörwald

c5238b8

2005-09-01 11:56:53 +0000

[diff] [blame]

724

data = self.reader.next()

725

data, bytesencoded = self.encode(data, self.errors)

726

return data

Walter Dörwald

2002-11-06 16:53:44 +0000

[diff] [blame]

def __iter__(self):

return self

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

731

def write(self, data):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

732

733

data, bytesdecoded = self.decode(data, self.errors)

734

return self.writer.write(data)

735

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

736

def writelines(self, list):

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

737

738

data = ''.join(list)

739

data, bytesdecoded = self.decode(data, self.errors)

740

return self.writer.write(data)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

def reset(self):

self.reader.reset()

self.writer.reset()

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

747

def __getattr__(self, name,

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

748

getattr=getattr):

749

750

""" Inherit all other methods from the underlying stream.

751

"""

Tim Peters

2001-05-15 17:19:16 +0000

[diff] [blame]

752

return getattr(self.stream, name)

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

### Shortcuts

Marc-André Lemburg

2000-06-21 21:21:04 +0000

[diff] [blame]

756

def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

757

758

""" Open an encoded file using the given mode and return

759

a wrapped version providing transparent encoding/decoding.

760

761

Note: The wrapped version will only accept the object format

762

defined by the codecs, i.e. Unicode objects for most builtin

Skip Montanaro

9f5f9d9

2005-03-16 03:51:56 +0000

[diff] [blame]

763

codecs. Output is also codec dependent and will usually be

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

764

Unicode as well.

765

Marc-André Lemburg

349a3d3

2000-06-21 21:21:04 +0000

[diff] [blame]

766

Files are always opened in binary mode, even if no binary mode

Walter Dörwald

7f3ed74

2003-02-02 23:08:27 +0000

[diff] [blame]

767

was specified. This is done to avoid data loss due to encodings

Marc-André Lemburg

349a3d3

2000-06-21 21:21:04 +0000

[diff] [blame]

768

using 8-bit values. The default file mode is 'rb' meaning to

769

open the file in binary read mode.

770

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

771

encoding specifies the encoding which is to be used for the

Walter Dörwald

7f3ed74

2003-02-02 23:08:27 +0000

[diff] [blame]

772

file.

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

773

774

errors may be given to define the error handling. It defaults

775

to 'strict' which causes ValueErrors to be raised in case an

776

encoding error occurs.

777

778

buffering has the same meaning as for the builtin open() API.

779

It defaults to line buffered.

780

Fred Drake

2000-04-13 14:11:21 +0000

[diff] [blame]

781

The returned wrapped file object provides an extra attribute

782

.encoding which allows querying the used encoding. This

783

attribute is only available if an encoding was specified as

784

parameter.

785

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

786

"""

787

if encoding is not None and \

788

'b' not in mode:

789

# Force opening of the file in binary mode

790

mode = mode + 'b'

791

file = __builtin__.open(filename, mode, buffering)

792

if encoding is None:

793

return file

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

794

info = lookup(encoding)

795

srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

796

# Add attributes to simplify introspection

797

srw.encoding = encoding

798

return srw

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

799

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

800

def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

801

802

""" Return a wrapped version of file which provides transparent

803

encoding translation.

804

805

Strings written to the wrapped file are interpreted according

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

806

to the given data_encoding and then written to the original

807

file as string using file_encoding. The intermediate encoding

808

will usually be Unicode but depends on the specified codecs.

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

809

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

810

Strings are read from the file using file_encoding and then

811

passed back to the caller as string using data_encoding.

812

813

If file_encoding is not given, it defaults to data_encoding.

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

814

815

errors may be given to define the error handling. It defaults

816

to 'strict' which causes ValueErrors to be raised in case an

817

encoding error occurs.

818

Fred Drake

2000-04-13 14:11:21 +0000

[diff] [blame]

819

The returned wrapped file object provides two extra attributes

820

.data_encoding and .file_encoding which reflect the given

821

parameters of the same name. The attributes can be used for

822

introspection by Python programs.

823

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

824

"""

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

825

if file_encoding is None:

826

file_encoding = data_encoding

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

827

info = lookup(data_encoding)

828

sr = StreamRecoder(file, info.encode, info.decode,

829

info.streamreader, info.streamwriter, errors)

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

830

# Add attributes to simplify introspection

831

sr.data_encoding = data_encoding

832

sr.file_encoding = file_encoding

833

return sr

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

834

Marc-André Lemburg

2001-09-19 11:24:48 +0000

[diff] [blame]

835

### Helpers for codec lookup

836

837

def getencoder(encoding):

838

839

""" Lookup up the codec for the given encoding and return

840

its encoder function.

841

842

Raises a LookupError in case the encoding cannot be found.

843

844

"""

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

845

return lookup(encoding).encode

Marc-André Lemburg

2001-09-19 11:24:48 +0000

[diff] [blame]

846

847

def getdecoder(encoding):

848

849

""" Lookup up the codec for the given encoding and return

850

its decoder function.

851

852

Raises a LookupError in case the encoding cannot be found.

853

854

"""

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

855

return lookup(encoding).decode

856

857

def getincrementalencoder(encoding):

858

859

""" Lookup up the codec for the given encoding and return

860

its IncrementalEncoder class or factory function.

861

862

Raises a LookupError in case the encoding cannot be found

863

or the codecs doesn't provide an incremental encoder.

864

865

"""

866

encoder = lookup(encoding).incrementalencoder

867

if encoder is None:

868

raise LookupError(encoding)

869

return encoder

870

871

def getincrementaldecoder(encoding):

872

873

""" Lookup up the codec for the given encoding and return

874

its IncrementalDecoder class or factory function.

875

876

Raises a LookupError in case the encoding cannot be found

877

or the codecs doesn't provide an incremental decoder.

878

879

"""

880

decoder = lookup(encoding).incrementaldecoder

881

if decoder is None:

882

raise LookupError(encoding)

883

return decoder

Marc-André Lemburg

2001-09-19 11:24:48 +0000

[diff] [blame]

884

885

def getreader(encoding):

886

887

""" Lookup up the codec for the given encoding and return

888

its StreamReader class or factory function.

889

890

Raises a LookupError in case the encoding cannot be found.

891

892

"""

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

893

return lookup(encoding).streamreader

Marc-André Lemburg

2001-09-19 11:24:48 +0000

[diff] [blame]

894

895

def getwriter(encoding):

896

897

""" Lookup up the codec for the given encoding and return

898

its StreamWriter class or factory function.

899

900

Raises a LookupError in case the encoding cannot be found.

901

902

"""

Thomas Wouters

2006-04-21 09:43:23 +0000

[diff] [blame]

903

return lookup(encoding).streamwriter

904

905

def iterencode(iterator, encoding, errors='strict', **kwargs):

"""

Encoding iterator.

Encodes the input strings from the iterator using a IncrementalEncoder.

910

911

errors and kwargs are passed through to the IncrementalEncoder

912

constructor.

913

"""

914

encoder = getincrementalencoder(encoding)(errors, **kwargs)

915

for input in iterator:

916

output = encoder.encode(input)

917

if output:

918

yield output

919

output = encoder.encode("", True)

if output:

yield output

def iterdecode(iterator, encoding, errors='strict', **kwargs):

"""

Decoding iterator.

Decodes the input strings from the iterator using a IncrementalDecoder.

928

929

errors and kwargs are passed through to the IncrementalDecoder

930

constructor.

931

"""

932

decoder = getincrementaldecoder(encoding)(errors, **kwargs)

933

for input in iterator:

934

output = decoder.decode(input)

935

if output:

936

yield output

937

output = decoder.decode("", True)

938

if output:

939

yield output

Marc-André Lemburg

2001-09-19 11:24:48 +0000

[diff] [blame]

940

Marc-André Lemburg

a866df8

2001-01-03 21:29:14 +0000

[diff] [blame]

941

### Helpers for charmap-based codecs

942

943

def make_identity_dict(rng):

944

945

""" make_identity_dict(rng) -> dict

946

947

Return a dictionary where elements of the rng sequence are

948

mapped to themselves.

Tim Peters

88869f9

2001-01-14 23:36:06 +0000

[diff] [blame]

949

Marc-André Lemburg

a866df8

2001-01-03 21:29:14 +0000

[diff] [blame]

"""

res = {}

for i in rng:

res[i]=i

return res

Marc-André Lemburg

2001-05-16 09:41:45 +0000

[diff] [blame]

956

def make_encoding_map(decoding_map):

957

958

""" Creates an encoding map from a decoding map.

959

Walter Dörwald

7f3ed74

2003-02-02 23:08:27 +0000

[diff] [blame]

960

If a target mapping in the decoding map occurs multiple

Marc-André Lemburg

716cf91

2001-05-16 09:41:45 +0000

[diff] [blame]

961

times, then that target is mapped to None (undefined mapping),

962

causing an exception when encountered by the charmap codec

963

during translation.

964

965

One example where this happens is cp875.py which decodes

966

multiple character to \u001a.

"""

m = {}

for k,v in decoding_map.items():

Raymond Hettinger

54f0222

2002-06-01 14:18:47 +0000

[diff] [blame]

971

if not v in m:

Marc-André Lemburg

716cf91

2001-05-16 09:41:45 +0000

[diff] [blame]

m[v] = k

else:

m[v] = None

return m

Tim Peters

3a2ab1a

2001-05-29 06:06:54 +0000

[diff] [blame]

976

Walter Dörwald

3aeb632

2002-09-02 13:14:32 +0000

[diff] [blame]

977

### error handlers

978

Martin v. Löwis

e2713be

2005-03-08 15:03:08 +0000

[diff] [blame]

979

try:

980

strict_errors = lookup_error("strict")

981

ignore_errors = lookup_error("ignore")

982

replace_errors = lookup_error("replace")

983

xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")

984

backslashreplace_errors = lookup_error("backslashreplace")

985

except LookupError:

986

# In --disable-unicode builds, these error handler are missing

987

strict_errors = None

988

ignore_errors = None

989

replace_errors = None

990

xmlcharrefreplace_errors = None

991

backslashreplace_errors = None

Walter Dörwald

3aeb632

2002-09-02 13:14:32 +0000

[diff] [blame]

992

Martin v. Löwis

6cd441d

2001-07-31 08:54:55 +0000

[diff] [blame]

993

# Tell modulefinder that using codecs probably needs the encodings

# package

_false = 0

if _false:

import encodings

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

999

### Tests

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

1000

Guido van Rossum

2000-03-10 23:20:43 +0000

[diff] [blame]

1001

if __name__ == '__main__':

1002

Guido van Rossum

2000-04-11 15:37:43 +0000

[diff] [blame]

1003

# Make stdout translate Latin-1 output into UTF-8 output

1004

sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')

Guido van Rossum

2000-04-11 15:41:38 +0000

[diff] [blame]

1005

Guido van Rossum