Blame - Lib/bz2.py - platform/external/python/cpython3

2011-04-03 17:05:46 +0200

[diff] [blame]

1

"""Interface to the libbzip2 compression library.

2

3

This module provides a file interface, classes for incremental

4

(de)compression, and functions for one-shot (de)compression.

5

"""

6

Nadeem Vawda

2012-06-04 23:32:38 +0200

[diff] [blame]

7

__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",

8

"open", "compress", "decompress"]

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

9

10

__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"

11

Serhiy Storchaka

cf4a2f2

2015-03-11 17:18:03 +0200

[diff] [blame]

12

from builtins import open as _builtin_open

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

13

import io

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

14

import warnings

15

Nadeem Vawda

72750a8

2012-01-18 01:57:14 +0200

[diff] [blame]

16

try:

17

from threading import RLock

Brett Cannon

cd171c8

2013-07-04 17:43:24 -0400

[diff] [blame]

18

except ImportError:

Nadeem Vawda

72750a8

2012-01-18 01:57:14 +0200

[diff] [blame]

19

from dummy_threading import RLock

20

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

21

from _bz2 import BZ2Compressor, BZ2Decompressor

_MODE_CLOSED = 0

_MODE_READ = 1

_MODE_READ_EOF = 2

_MODE_WRITE = 3

_BUFFER_SIZE = 8192

class BZ2File(io.BufferedIOBase):

33

34

"""A file object providing transparent bzip2 (de)compression.

35

36

A BZ2File can act as a wrapper for an existing file object, or refer

37

directly to a named file on disk.

38

39

Note that BZ2File provides a *binary* file interface - data read is

40

returned as bytes, and data to be written should be given as bytes.

41

"""

42

Nadeem Vawda

2012-06-04 23:31:20 +0200

[diff] [blame]

43

def __init__(self, filename, mode="r", buffering=None, compresslevel=9):

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

44

"""Open a bzip2-compressed file.

45

Nadeem Vawda

2012-10-08 20:31:34 +0200

[diff] [blame]

46

If filename is a str or bytes object, it gives the name

47

of the file to be opened. Otherwise, it should be a file object,

48

which will be used to read or write the compressed data.

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

49

Nadeem Vawda

2012-10-08 20:31:34 +0200

[diff] [blame]

50

mode can be 'r' for reading (default), 'w' for (over)writing,

Nadeem Vawda

2013-10-19 00:11:06 +0200

[diff] [blame]

51

'x' for creating exclusively, or 'a' for appending. These can

52

equivalently be given as 'rb', 'wb', 'xb', and 'ab'.

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

53

54

buffering is ignored. Its use is deprecated.

55

Nadeem Vawda

2013-10-19 00:11:06 +0200

[diff] [blame]

56

If mode is 'w', 'x' or 'a', compresslevel can be a number between 1

Nadeem Vawda

cac8909

2012-02-04 13:08:11 +0200

[diff] [blame]

57

and 9 specifying the level of compression: 1 produces the least

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

58

compression, and 9 (default) produces the most compression.

Nadeem Vawda

cac8909

2012-02-04 13:08:11 +0200

[diff] [blame]

59

60

If mode is 'r', the input file may be the concatenation of

61

multiple compressed streams.

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

62

"""

63

# This lock must be recursive, so that BufferedIOBase's

64

# readline(), readlines() and writelines() don't deadlock.

Nadeem Vawda

72750a8

2012-01-18 01:57:14 +0200

[diff] [blame]

65

self._lock = RLock()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

66

self._fp = None

67

self._closefp = False

68

self._mode = _MODE_CLOSED

self._pos = 0

self._size = -1

if buffering is not None:

73

warnings.warn("Use of 'buffering' argument is deprecated",

74

DeprecationWarning)

75

76

if not (1 <= compresslevel <= 9):

77

raise ValueError("compresslevel must be between 1 and 9")

78

79

if mode in ("", "r", "rb"):

80

mode = "rb"

81

mode_code = _MODE_READ

82

self._decompressor = BZ2Decompressor()

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

83

self._buffer = b""

84

self._buffer_offset = 0

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

85

elif mode in ("w", "wb"):

86

mode = "wb"

87

mode_code = _MODE_WRITE

Nadeem Vawda

249ab5e

2011-09-11 22:38:11 +0200

[diff] [blame]

88

self._compressor = BZ2Compressor(compresslevel)

Nadeem Vawda

2013-10-19 00:11:06 +0200

[diff] [blame]

89

elif mode in ("x", "xb"):

90

mode = "xb"

91

mode_code = _MODE_WRITE

92

self._compressor = BZ2Compressor(compresslevel)

Nadeem Vawda

2011-05-27 01:52:15 +0200

[diff] [blame]

93

elif mode in ("a", "ab"):

94

mode = "ab"

95

mode_code = _MODE_WRITE

Nadeem Vawda

249ab5e

2011-09-11 22:38:11 +0200

[diff] [blame]

96

self._compressor = BZ2Compressor(compresslevel)

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

97

else:

Nadeem Vawda

2012-10-08 19:20:49 +0200

[diff] [blame]

98

raise ValueError("Invalid mode: %r" % (mode,))

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

99

Nadeem Vawda

2012-06-04 23:31:20 +0200

[diff] [blame]

100

if isinstance(filename, (str, bytes)):

Nadeem Vawda

2012-10-08 19:20:49 +0200

[diff] [blame]

101

self._fp = _builtin_open(filename, mode)

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

102

self._closefp = True

103

self._mode = mode_code

Nadeem Vawda

2012-06-04 23:31:20 +0200

[diff] [blame]

104

elif hasattr(filename, "read") or hasattr(filename, "write"):

105

self._fp = filename

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

106

self._mode = mode_code

107

else:

Nadeem Vawda

2012-06-04 23:31:20 +0200

[diff] [blame]

108

raise TypeError("filename must be a str or bytes object, or a file")

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

109

110

def close(self):

111

"""Flush and close the file.

112

113

May be called more than once without error. Once the file is

114

closed, any other operation on it will raise a ValueError.

115

"""

116

with self._lock:

117

if self._mode == _MODE_CLOSED:

118

return

119

try:

120

if self._mode in (_MODE_READ, _MODE_READ_EOF):

121

self._decompressor = None

122

elif self._mode == _MODE_WRITE:

123

self._fp.write(self._compressor.flush())

124

self._compressor = None

125

finally:

Antoine Pitrou

24ce386

2011-04-03 17:08:49 +0200

[diff] [blame]

126

try:

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

if self._closefp:

self._fp.close()

finally:

self._fp = None

self._closefp = False

132

self._mode = _MODE_CLOSED

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

133

self._buffer = b""

134

self._buffer_offset = 0

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

@property

def closed(self):

"""True if this file is closed."""

139

return self._mode == _MODE_CLOSED

140

141

def fileno(self):

142

"""Return the file descriptor for the underlying file."""

Nadeem Vawda

44ae4a2

2011-11-30 17:39:30 +0200

[diff] [blame]

143

self._check_not_closed()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

144

return self._fp.fileno()

145

146

def seekable(self):

147

"""Return whether the file supports seeking."""

Nadeem Vawda

ae557d7

2012-02-12 01:51:38 +0200

[diff] [blame]

148

return self.readable() and self._fp.seekable()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

149

150

def readable(self):

151

"""Return whether the file was opened for reading."""

Nadeem Vawda

44ae4a2

2011-11-30 17:39:30 +0200

[diff] [blame]

152

self._check_not_closed()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

153

return self._mode in (_MODE_READ, _MODE_READ_EOF)

154

155

def writable(self):

156

"""Return whether the file was opened for writing."""

Nadeem Vawda

44ae4a2

2011-11-30 17:39:30 +0200

[diff] [blame]

157

self._check_not_closed()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

158

return self._mode == _MODE_WRITE

159

160

# Mode-checking helper functions.

161

162

def _check_not_closed(self):

163

if self.closed:

164

raise ValueError("I/O operation on closed file")

165

166

def _check_can_read(self):

Nadeem Vawda

b7a0bfe

2012-09-30 23:58:01 +0200

[diff] [blame]

167

if self._mode not in (_MODE_READ, _MODE_READ_EOF):

Nadeem Vawda

452add0

2012-10-01 23:02:50 +0200

[diff] [blame]

168

self._check_not_closed()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

169

raise io.UnsupportedOperation("File not open for reading")

170

171

def _check_can_write(self):

Nadeem Vawda

b7a0bfe

2012-09-30 23:58:01 +0200

[diff] [blame]

172

if self._mode != _MODE_WRITE:

Nadeem Vawda

452add0

2012-10-01 23:02:50 +0200

[diff] [blame]

173

self._check_not_closed()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

174

raise io.UnsupportedOperation("File not open for writing")

175

176

def _check_can_seek(self):

Nadeem Vawda

b7a0bfe

2012-09-30 23:58:01 +0200

[diff] [blame]

177

if self._mode not in (_MODE_READ, _MODE_READ_EOF):

Nadeem Vawda

452add0

2012-10-01 23:02:50 +0200

[diff] [blame]

178

self._check_not_closed()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

179

raise io.UnsupportedOperation("Seeking is only supported "

Nadeem Vawda

f1a1af2

2011-05-25 00:32:08 +0200

[diff] [blame]

180

"on files open for reading")

Nadeem Vawda

ae557d7

2012-02-12 01:51:38 +0200

[diff] [blame]

181

if not self._fp.seekable():

182

raise io.UnsupportedOperation("The underlying file object "

183

"does not support seeking")

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

184

185

# Fill the readahead buffer if it is empty. Returns False on EOF.

186

def _fill_buffer(self):

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

187

if self._mode == _MODE_READ_EOF:

188

return False

Nadeem Vawda

2012-08-04 15:29:28 +0200

[diff] [blame]

189

# Depending on the input data, our call to the decompressor may not

190

# return any data. In this case, try again after reading another block.

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

191

while self._buffer_offset == len(self._buffer):

192

rawblock = (self._decompressor.unused_data or

193

self._fp.read(_BUFFER_SIZE))

Nadeem Vawda

2011-05-27 01:52:15 +0200

[diff] [blame]

194

Nadeem Vawda

2012-08-04 15:29:28 +0200

[diff] [blame]

195

if not rawblock:

196

if self._decompressor.eof:

Nadeem Vawda

2012-10-08 19:20:49 +0200

[diff] [blame]

197

# End-of-stream marker and end of file. We're good.

Nadeem Vawda

2012-08-04 15:29:28 +0200

[diff] [blame]

198

self._mode = _MODE_READ_EOF

199

self._size = self._pos

200

return False

201

else:

Nadeem Vawda

2012-10-08 19:20:49 +0200

[diff] [blame]

202

# Problem - we were expecting more compressed data.

Nadeem Vawda

2012-08-04 15:29:28 +0200

[diff] [blame]

203

raise EOFError("Compressed file ended before the "

204

"end-of-stream marker was reached")

Nadeem Vawda

2011-05-27 01:52:15 +0200

[diff] [blame]

205

Nadeem Vawda

2012-08-04 15:29:28 +0200

[diff] [blame]

206

if self._decompressor.eof:

Nadeem Vawda

2012-10-08 19:20:49 +0200

[diff] [blame]

207

# Continue to next stream.

Nadeem Vawda

2012-08-04 15:29:28 +0200

[diff] [blame]

208

self._decompressor = BZ2Decompressor()

Nadeem Vawda

2013-12-04 23:01:15 +0100

[diff] [blame]

209

try:

210

self._buffer = self._decompressor.decompress(rawblock)

211

except OSError:

212

# Trailing data isn't a valid bzip2 stream. We're done here.

213

self._mode = _MODE_READ_EOF

214

self._size = self._pos

215

return False

216

else:

217

self._buffer = self._decompressor.decompress(rawblock)

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

218

self._buffer_offset = 0

219

return True

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

220

221

# Read data until EOF.

222

# If return_data is false, consume the data without returning it.

223

def _read_all(self, return_data=True):

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

224

# The loop assumes that _buffer_offset is 0. Ensure that this is true.

225

self._buffer = self._buffer[self._buffer_offset:]

226

self._buffer_offset = 0

227

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

228

blocks = []

229

while self._fill_buffer():

230

if return_data:

231

blocks.append(self._buffer)

232

self._pos += len(self._buffer)

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

233

self._buffer = b""

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

234

if return_data:

235

return b"".join(blocks)

236

237

# Read a block of up to n bytes.

238

# If return_data is false, consume the data without returning it.

239

def _read_block(self, n, return_data=True):

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

240

# If we have enough data buffered, return immediately.

241

end = self._buffer_offset + n

242

if end <= len(self._buffer):

243

data = self._buffer[self._buffer_offset : end]

244

self._buffer_offset = end

245

self._pos += len(data)

Nadeem Vawda

9e2a28e

2012-09-30 13:41:29 +0200

[diff] [blame]

246

return data if return_data else None

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

247

248

# The loop assumes that _buffer_offset is 0. Ensure that this is true.

249

self._buffer = self._buffer[self._buffer_offset:]

250

self._buffer_offset = 0

251

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

252

blocks = []

253

while n > 0 and self._fill_buffer():

254

if n < len(self._buffer):

255

data = self._buffer[:n]

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

256

self._buffer_offset = n

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

257

else:

258

data = self._buffer

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

259

self._buffer = b""

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

260

if return_data:

261

blocks.append(data)

262

self._pos += len(data)

263

n -= len(data)

264

if return_data:

265

return b"".join(blocks)

266

267

def peek(self, n=0):

268

"""Return buffered data without advancing the file position.

269

270

Always returns at least one byte of data, unless at EOF.

271

The exact number of bytes returned is unspecified.

272

"""

273

with self._lock:

274

self._check_can_read()

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

275

if not self._fill_buffer():

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

276

return b""

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

277

return self._buffer[self._buffer_offset:]

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

278

279

def read(self, size=-1):

280

"""Read up to size uncompressed bytes from the file.

281

282

If size is negative or omitted, read until EOF is reached.

283

Returns b'' if the file is already at EOF.

284

"""

285

with self._lock:

286

self._check_can_read()

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

287

if size == 0:

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

288

return b""

289

elif size < 0:

290

return self._read_all()

291

else:

292

return self._read_block(size)

293

294

def read1(self, size=-1):

Nadeem Vawda

2012-08-04 15:29:28 +0200

[diff] [blame]

295

"""Read up to size uncompressed bytes, while trying to avoid

296

making multiple reads from the underlying stream.

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

297

298

Returns b'' if the file is at EOF.

299

"""

Nadeem Vawda

2012-08-04 15:29:28 +0200

[diff] [blame]

300

# Usually, read1() calls _fp.read() at most once. However, sometimes

301

# this does not give enough data for the decompressor to make progress.

302

# In this case we make multiple reads, to avoid returning b"".

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

303

with self._lock:

304

self._check_can_read()

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

305

if (size == 0 or

306

# Only call _fill_buffer() if the buffer is actually empty.

307

# This gives a significant speedup if *size* is small.

308

(self._buffer_offset == len(self._buffer) and not self._fill_buffer())):

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

309

return b""

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

310

if size > 0:

311

data = self._buffer[self._buffer_offset :

312

self._buffer_offset + size]

313

self._buffer_offset += len(data)

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

314

else:

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

315

data = self._buffer[self._buffer_offset:]

316

self._buffer = b""

317

self._buffer_offset = 0

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

318

self._pos += len(data)

319

return data

320

321

def readinto(self, b):

322

"""Read up to len(b) bytes into b.

Antoine Pitrou

24ce386

2011-04-03 17:08:49 +0200

[diff] [blame]

323

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

324

Returns the number of bytes read (0 for EOF).

325

"""

326

with self._lock:

327

return io.BufferedIOBase.readinto(self, b)

328

329

def readline(self, size=-1):

330

"""Read a line of uncompressed bytes from the file.

331

332

The terminating newline (if present) is retained. If size is

333

non-negative, no more than size bytes will be read (in which

334

case the line may be incomplete). Returns b'' if already at EOF.

335

"""

Nadeem Vawda

eb70be2

2012-10-01 23:05:32 +0200

[diff] [blame]

336

if not isinstance(size, int):

337

if not hasattr(size, "__index__"):

338

raise TypeError("Integer argument expected")

339

size = size.__index__()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

340

with self._lock:

Nadeem Vawda

138ad50

2012-10-01 23:04:11 +0200

[diff] [blame]

341

self._check_can_read()

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

342

# Shortcut for the common case - the whole line is in the buffer.

343

if size < 0:

344

end = self._buffer.find(b"\n", self._buffer_offset) + 1

345

if end > 0:

346

line = self._buffer[self._buffer_offset : end]

347

self._buffer_offset = end

348

self._pos += len(line)

349

return line

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

350

return io.BufferedIOBase.readline(self, size)

351

352

def readlines(self, size=-1):

353

"""Read a list of lines of uncompressed bytes from the file.

354

355

size can be specified to control the number of lines read: no

356

further lines will be read once the total size of the lines read

357

so far equals or exceeds size.

358

"""

Nadeem Vawda

eb70be2

2012-10-01 23:05:32 +0200

[diff] [blame]

359

if not isinstance(size, int):

360

if not hasattr(size, "__index__"):

361

raise TypeError("Integer argument expected")

362

size = size.__index__()

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

363

with self._lock:

364

return io.BufferedIOBase.readlines(self, size)

365

366

def write(self, data):

367

"""Write a byte string to the file.

368

369

Returns the number of uncompressed bytes written, which is

370

always len(data). Note that due to buffering, the file on disk

371

may not reflect the data written until close() is called.

372

"""

373

with self._lock:

374

self._check_can_write()

375

compressed = self._compressor.compress(data)

376

self._fp.write(compressed)

377

self._pos += len(data)

378

return len(data)

379

380

def writelines(self, seq):

381

"""Write a sequence of byte strings to the file.

382

383

Returns the number of uncompressed bytes written.

384

seq can be any iterable yielding byte strings.

385

386

Line separators are not added between the written byte strings.

387

"""

388

with self._lock:

389

return io.BufferedIOBase.writelines(self, seq)

390

391

# Rewind the file to the beginning of the data stream.

392

def _rewind(self):

393

self._fp.seek(0, 0)

394

self._mode = _MODE_READ

395

self._pos = 0

396

self._decompressor = BZ2Decompressor()

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

397

self._buffer = b""

398

self._buffer_offset = 0

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

399

400

def seek(self, offset, whence=0):

401

"""Change the file position.

402

403

The new position is specified by offset, relative to the

404

position indicated by whence. Values for whence are:

405

406

0: start of stream (default); offset must not be negative

407

1: current stream position

408

2: end of stream; offset must not be positive

409

410

Returns the new file position.

411

412

Note that seeking is emulated, so depending on the parameters,

413

this operation may be extremely slow.

414

"""

415

with self._lock:

416

self._check_can_seek()

417

418

# Recalculate offset as an absolute file position.

if whence == 0:

pass

elif whence == 1:

offset = self._pos + offset

423

elif whence == 2:

424

# Seeking relative to EOF - we need to know the file's size.

425

if self._size < 0:

426

self._read_all(return_data=False)

427

offset = self._size + offset

428

else:

Nadeem Vawda

2012-10-08 19:20:49 +0200

[diff] [blame]

429

raise ValueError("Invalid value for whence: %s" % (whence,))

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

430

431

# Make it so that offset is the number of bytes to skip forward.

432

if offset < self._pos:

self._rewind()

else:

offset -= self._pos

# Read and discard data until we reach the desired position.

Nadeem Vawda

2012-09-30 03:57:33 +0200

[diff] [blame]

438

self._read_block(offset, return_data=False)

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

return self._pos

def tell(self):

"""Return the current file position."""

444

with self._lock:

445

self._check_not_closed()

return self._pos

Nadeem Vawda

2012-06-04 23:32:38 +0200

[diff] [blame]

449

def open(filename, mode="rb", compresslevel=9,

450

encoding=None, errors=None, newline=None):

451

"""Open a bzip2-compressed file in binary or text mode.

452

Nadeem Vawda

2012-10-08 20:31:34 +0200

[diff] [blame]

453

The filename argument can be an actual filename (a str or bytes

454

object), or an existing file object to read from or write to.

Nadeem Vawda

2012-06-04 23:32:38 +0200

[diff] [blame]

455

Nadeem Vawda

2013-10-19 00:11:06 +0200

[diff] [blame]

456

The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or

457

"ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.

458

The default mode is "rb", and the default compresslevel is 9.

Nadeem Vawda

2012-06-04 23:32:38 +0200

[diff] [blame]

459

Nadeem Vawda

2012-10-08 20:31:34 +0200

[diff] [blame]

460

For binary mode, this function is equivalent to the BZ2File

461

constructor: BZ2File(filename, mode, compresslevel). In this case,

462

the encoding, errors and newline arguments must not be provided.

Nadeem Vawda

2012-06-04 23:32:38 +0200

[diff] [blame]

463

464

For text mode, a BZ2File object is created, and wrapped in an

Nadeem Vawda

2012-10-08 20:31:34 +0200

[diff] [blame]

465

io.TextIOWrapper instance with the specified encoding, error

466

handling behavior, and line ending(s).

Nadeem Vawda

2012-06-04 23:32:38 +0200

[diff] [blame]

"""

if "t" in mode:

if "b" in mode:

raise ValueError("Invalid mode: %r" % (mode,))

472

else:

473

if encoding is not None:

474

raise ValueError("Argument 'encoding' not supported in binary mode")

475

if errors is not None:

476

raise ValueError("Argument 'errors' not supported in binary mode")

477

if newline is not None:

478

raise ValueError("Argument 'newline' not supported in binary mode")

479

480

bz_mode = mode.replace("t", "")

481

binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)

482

483

if "t" in mode:

484

return io.TextIOWrapper(binary_file, encoding, errors, newline)

else:

return binary_file

Antoine Pitrou

2011-04-03 17:05:46 +0200

[diff] [blame]

489

def compress(data, compresslevel=9):

490

"""Compress a block of data.

491

492

compresslevel, if given, must be a number between 1 and 9.

493

494

For incremental compression, use a BZ2Compressor object instead.

495

"""

496

comp = BZ2Compressor(compresslevel)

497

return comp.compress(data) + comp.flush()

498

499

500

def decompress(data):

501

"""Decompress a block of data.

502

503

For incremental decompression, use a BZ2Decompressor object instead.

504

"""

Nadeem Vawda

98838ba

2011-05-30 01:12:24 +0200

[diff] [blame]

505

results = []

Nadeem Vawda

2013-12-04 23:01:15 +0100

[diff] [blame]

506

while data:

Nadeem Vawda

2011-05-27 01:52:15 +0200

[diff] [blame]

507

decomp = BZ2Decompressor()

Nadeem Vawda

2013-12-04 23:01:15 +0100

[diff] [blame]

508

try:

509

res = decomp.decompress(data)

510

except OSError:

511

if results:

512

break # Leftover data is not a valid bzip2 stream; ignore it.

513

else:

514

raise # Error on the first iteration; bail out.

515

results.append(res)

Nadeem Vawda

2011-05-27 01:52:15 +0200

[diff] [blame]

516

if not decomp.eof:

517

raise ValueError("Compressed data ended before the "

518

"end-of-stream marker was reached")

Nadeem Vawda

2011-05-27 01:52:15 +0200

[diff] [blame]

519

data = decomp.unused_data

Nadeem Vawda