blob: 920a464d4f3cb97200399283c741b7579891b4ac [file] [log] [blame]
"""A dumb and slow but simple dbm clone.
For database spam, spam.dir contains the index (a text file),
spam.bak *may* contain a backup of the index (also a text file),
while spam.dat contains the data (a binary file).
XXX TO DO:
- seems to contain a bug when updating...
- reclaim free space (currently, space once occupied by deleted or expanded
items is never reused)
- support concurrent access (currently, if two processes take turns making
updates, they can mess up the index)
- support efficient access to large databases (currently, the whole index
is read when the database is opened, and some updates rewrite the whole index)
- support opening for read-only (flag = 'm')
"""
import os as _os
import __builtin__
_open = __builtin__.open
_BLOCKSIZE = 512
error = IOError # For anydbm
class _Database:
def __init__(self, file, mode):
self._mode = mode
self._dirfile = file + _os.extsep + 'dir'
self._datfile = file + _os.extsep + 'dat'
self._bakfile = file + _os.extsep + 'bak'
# Mod by Jack: create data file if needed
try:
f = _open(self._datfile, 'r')
except IOError:
f = _open(self._datfile, 'w', self._mode)
f.close()
self._update()
def _update(self):
self._index = {}
try:
f = _open(self._dirfile)
except IOError:
pass
else:
while 1:
line = f.readline().rstrip()
if not line: break
key, (pos, siz) = eval(line)
self._index[key] = (pos, siz)
f.close()
def _commit(self):
try: _os.unlink(self._bakfile)
except _os.error: pass
try: _os.rename(self._dirfile, self._bakfile)
except _os.error: pass
f = _open(self._dirfile, 'w', self._mode)
for key, (pos, siz) in self._index.items():
f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
f.close()
def __getitem__(self, key):
pos, siz = self._index[key] # may raise KeyError
f = _open(self._datfile, 'rb')
f.seek(pos)
dat = f.read(siz)
f.close()
return dat
def _addval(self, val):
f = _open(self._datfile, 'rb+')
f.seek(0, 2)
pos = int(f.tell())
## Does not work under MW compiler
## pos = ((pos + _BLOCKSIZE - 1) / _BLOCKSIZE) * _BLOCKSIZE
## f.seek(pos)
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
f.write('\0'*(npos-pos))
pos = npos
f.write(val)
f.close()
return (pos, len(val))
def _setval(self, pos, val):
f = _open(self._datfile, 'rb+')
f.seek(pos)
f.write(val)
f.close()
return (pos, len(val))
def _addkey(self, key, (pos, siz)):
self._index[key] = (pos, siz)
f = _open(self._dirfile, 'a', self._mode)
f.write("%s, (%s, %s)\n" % (`key`, `pos`, `siz`))
f.close()
def __setitem__(self, key, val):
if not type(key) == type('') == type(val):
raise TypeError, "keys and values must be strings"
if not self._index.has_key(key):
(pos, siz) = self._addval(val)
self._addkey(key, (pos, siz))
else:
pos, siz = self._index[key]
oldblocks = (siz + _BLOCKSIZE - 1) / _BLOCKSIZE
newblocks = (len(val) + _BLOCKSIZE - 1) / _BLOCKSIZE
if newblocks <= oldblocks:
pos, siz = self._setval(pos, val)
self._index[key] = pos, siz
else:
pos, siz = self._addval(val)
self._index[key] = pos, siz
def __delitem__(self, key):
del self._index[key]
self._commit()
def keys(self):
return self._index.keys()
def has_key(self, key):
return self._index.has_key(key)
def __contains__(self, key):
return self._index.has_key(key)
def iterkeys(self):
return self._index.iterkeys()
__iter__ = iterkeys
def __len__(self):
return len(self._index)
def close(self):
self._commit()
self._index = None
self._datfile = self._dirfile = self._bakfile = None
def __del__(self):
if self._index is not None:
self._commit()
def open(file, flag=None, mode=0666):
# flag, mode arguments are currently ignored
return _Database(file, mode)