blob: cef580e5cdca8a7f81d3c8a88a9415ce77ab087f [file] [log] [blame]
Guido van Rossumcc6764c1995-02-09 17:18:10 +00001"""Manage shelves of pickled objects.
2
3A "shelf" is a persistent, dictionary-like object. The difference
4with dbm databases is that the values (not the keys!) in a shelf can
5be essentially arbitrary Python objects -- anything that the "pickle"
6module can handle. This includes most class instances, recursive data
7types, and objects containing lots of shared sub-objects. The keys
8are ordinary strings.
9
10To summarize the interface (key is a string, data is an arbitrary
11object):
12
Fred Drake13a2c272000-02-10 17:17:14 +000013 import shelve
14 d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
Guido van Rossumcc6764c1995-02-09 17:18:10 +000015
Fred Drake13a2c272000-02-10 17:17:14 +000016 d[key] = data # store data at key (overwrites old data if
17 # using an existing key)
Tim Peters0eadaac2003-04-24 16:02:54 +000018 data = d[key] # retrieve a COPY of the data at key (raise
Martin v. Löwis153c9e42003-04-19 20:59:03 +000019 # KeyError if no such key) -- NOTE that this
20 # access returns a *copy* of the entry!
Fred Drake13a2c272000-02-10 17:17:14 +000021 del d[key] # delete data stored at key (raises KeyError
22 # if no such key)
Guido van Rossume2b70bc2006-08-18 22:13:04 +000023 flag = key in d # true if the key exists
Fred Drake13a2c272000-02-10 17:17:14 +000024 list = d.keys() # a list of all existing keys (slow!)
Guido van Rossumcc6764c1995-02-09 17:18:10 +000025
Fred Drake13a2c272000-02-10 17:17:14 +000026 d.close() # close it
Guido van Rossumcc6764c1995-02-09 17:18:10 +000027
28Dependent on the implementation, closing a persistent dictionary may
29or may not be necessary to flush changes to disk.
Martin v. Löwis153c9e42003-04-19 20:59:03 +000030
31Normally, d[key] returns a COPY of the entry. This needs care when
32mutable entries are mutated: for example, if d[key] is a list,
33 d[key].append(anitem)
34does NOT modify the entry d[key] itself, as stored in the persistent
35mapping -- it only modifies the copy, which is then immediately
36discarded, so that the append has NO effect whatsoever. To append an
37item to d[key] in a way that will affect the persistent mapping, use:
38 data = d[key]
39 data.append(anitem)
40 d[key] = data
41
42To avoid the problem with mutable entries, you may pass the keyword
43argument writeback=True in the call to shelve.open. When you use:
44 d = shelve.open(filename, writeback=True)
45then d keeps a cache of all entries you access, and writes them all back
46to the persistent mapping when you call d.close(). This ensures that
47such usage as d[key].append(anitem) works as intended.
48
49However, using keyword argument writeback=True may consume vast amount
50of memory for the cache, and it may make d.close() very slow, if you
51access many of d's entries after opening it in this way: d has no way to
52check which of the entries you access are mutable and/or which ones you
53actually mutate, so it must cache, and write back at close, all of the
54entries that you access. You can call d.sync() to write back all the
55entries in the cache, and empty the cache (d.sync() also synchronizes
56the persistent dictionary on disk, if feasible).
Guido van Rossumcc6764c1995-02-09 17:18:10 +000057"""
Guido van Rossuma48061a1995-01-10 00:31:14 +000058
Guido van Rossum68937b42007-05-18 00:51:22 +000059from pickle import Pickler, Unpickler
Brett Cannond24fffe2007-07-26 03:07:02 +000060from io import BytesIO
Guido van Rossuma48061a1995-01-10 00:31:14 +000061
Raymond Hettingerb9da9bc2008-02-04 20:44:31 +000062import collections
Raymond Hettinger79947162002-11-15 06:46:14 +000063
Andrew Svetlovac0f9652012-10-06 18:38:30 +030064__all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"]
Guido van Rossumcc6764c1995-02-09 17:18:10 +000065
Benjamin Petersond6313712008-07-31 16:23:04 +000066class _ClosedDict(collections.MutableMapping):
67 'Marker for a closed dict. Access attempts raise a ValueError.'
68
69 def closed(self, *args):
70 raise ValueError('invalid operation on closed shelf')
71 __iter__ = __len__ = __getitem__ = __setitem__ = __delitem__ = keys = closed
72
73 def __repr__(self):
74 return '<Closed Dictionary>'
75
Georg Brandl732324a2010-12-04 11:12:43 +000076
Raymond Hettingerb9da9bc2008-02-04 20:44:31 +000077class Shelf(collections.MutableMapping):
Tim Peters495ad3c2001-01-15 01:36:40 +000078 """Base class for shelf implementations.
Guido van Rossumcc6764c1995-02-09 17:18:10 +000079
Tim Peters495ad3c2001-01-15 01:36:40 +000080 This is initialized with a dictionary-like object.
81 See the module's __doc__ string for an overview of the interface.
82 """
Guido van Rossuma48061a1995-01-10 00:31:14 +000083
Martin v. Löwis79c32082007-08-11 06:57:14 +000084 def __init__(self, dict, protocol=None, writeback=False,
85 keyencoding="utf-8"):
Tim Peters495ad3c2001-01-15 01:36:40 +000086 self.dict = dict
Martin v. Löwis153c9e42003-04-19 20:59:03 +000087 if protocol is None:
Raymond Hettinger85602262009-02-03 04:19:10 +000088 protocol = 3
Martin v. Löwis153c9e42003-04-19 20:59:03 +000089 self._protocol = protocol
90 self.writeback = writeback
91 self.cache = {}
Georg Brandl732324a2010-12-04 11:12:43 +000092 self.keyencoding = keyencoding
Guido van Rossum2f7df121999-08-11 01:54:05 +000093
Raymond Hettingerb9da9bc2008-02-04 20:44:31 +000094 def __iter__(self):
Martin v. Löwis79c32082007-08-11 06:57:14 +000095 for k in self.dict.keys():
96 yield k.decode(self.keyencoding)
Guido van Rossuma48061a1995-01-10 00:31:14 +000097
Tim Peters495ad3c2001-01-15 01:36:40 +000098 def __len__(self):
99 return len(self.dict)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000100
Martin v. Löwise4913c92002-10-18 08:58:14 +0000101 def __contains__(self, key):
Martin v. Löwis79c32082007-08-11 06:57:14 +0000102 return key.encode(self.keyencoding) in self.dict
Martin v. Löwise4913c92002-10-18 08:58:14 +0000103
Tim Peters495ad3c2001-01-15 01:36:40 +0000104 def get(self, key, default=None):
Martin v. Löwis79c32082007-08-11 06:57:14 +0000105 if key.encode(self.keyencoding) in self.dict:
Tim Peters495ad3c2001-01-15 01:36:40 +0000106 return self[key]
107 return default
108
109 def __getitem__(self, key):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000110 try:
111 value = self.cache[key]
112 except KeyError:
Martin v. Löwis79c32082007-08-11 06:57:14 +0000113 f = BytesIO(self.dict[key.encode(self.keyencoding)])
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000114 value = Unpickler(f).load()
115 if self.writeback:
116 self.cache[key] = value
117 return value
Tim Peters495ad3c2001-01-15 01:36:40 +0000118
119 def __setitem__(self, key, value):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000120 if self.writeback:
121 self.cache[key] = value
Brett Cannond24fffe2007-07-26 03:07:02 +0000122 f = BytesIO()
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000123 p = Pickler(f, self._protocol)
Tim Peters495ad3c2001-01-15 01:36:40 +0000124 p.dump(value)
Martin v. Löwis79c32082007-08-11 06:57:14 +0000125 self.dict[key.encode(self.keyencoding)] = f.getvalue()
Tim Peters495ad3c2001-01-15 01:36:40 +0000126
127 def __delitem__(self, key):
Martin v. Löwis79c32082007-08-11 06:57:14 +0000128 del self.dict[key.encode(self.keyencoding)]
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000129 try:
130 del self.cache[key]
131 except KeyError:
132 pass
Tim Peters495ad3c2001-01-15 01:36:40 +0000133
Andrew Svetlovef08fb12012-10-06 13:52:19 +0300134 def __enter__(self):
135 return self
136
137 def __exit__(self, type, value, traceback):
138 self.close()
139
Tim Peters495ad3c2001-01-15 01:36:40 +0000140 def close(self):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000141 self.sync()
Tim Peters495ad3c2001-01-15 01:36:40 +0000142 try:
143 self.dict.close()
Raymond Hettinger68dcd342003-05-27 06:30:52 +0000144 except AttributeError:
Tim Peters495ad3c2001-01-15 01:36:40 +0000145 pass
R. David Murrayddb3ed02010-02-11 02:42:19 +0000146 # Catch errors that may happen when close is called from __del__
147 # because CPython is in interpreter shutdown.
148 try:
R. David Murray1f449142010-02-11 00:15:05 +0000149 self.dict = _ClosedDict()
R. David Murrayddb3ed02010-02-11 02:42:19 +0000150 except (NameError, TypeError):
151 self.dict = None
Tim Peters495ad3c2001-01-15 01:36:40 +0000152
153 def __del__(self):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000154 if not hasattr(self, 'writeback'):
155 # __init__ didn't succeed, so don't bother closing
Andrew Svetlovef08fb12012-10-06 13:52:19 +0300156 # see http://bugs.python.org/issue1339007 for details
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000157 return
Tim Peters495ad3c2001-01-15 01:36:40 +0000158 self.close()
159
160 def sync(self):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000161 if self.writeback and self.cache:
162 self.writeback = False
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000163 for key, entry in self.cache.items():
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000164 self[key] = entry
165 self.writeback = True
166 self.cache = {}
Tim Peters495ad3c2001-01-15 01:36:40 +0000167 if hasattr(self.dict, 'sync'):
168 self.dict.sync()
169
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000170
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000171class BsdDbShelf(Shelf):
Tim Peters495ad3c2001-01-15 01:36:40 +0000172 """Shelf implementation using the "BSD" db interface.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000173
Tim Peters495ad3c2001-01-15 01:36:40 +0000174 This adds methods first(), next(), previous(), last() and
175 set_location() that have no counterpart in [g]dbm databases.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000176
Tim Peters495ad3c2001-01-15 01:36:40 +0000177 The actual database must be opened using one of the "bsddb"
178 modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
179 bsddb.rnopen) and passed to the constructor.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000180
Tim Peters495ad3c2001-01-15 01:36:40 +0000181 See the module's __doc__ string for an overview of the interface.
182 """
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000183
Martin v. Löwis79c32082007-08-11 06:57:14 +0000184 def __init__(self, dict, protocol=None, writeback=False,
185 keyencoding="utf-8"):
186 Shelf.__init__(self, dict, protocol, writeback, keyencoding)
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000187
Tim Peters495ad3c2001-01-15 01:36:40 +0000188 def set_location(self, key):
189 (key, value) = self.dict.set_location(key)
Brett Cannond24fffe2007-07-26 03:07:02 +0000190 f = BytesIO(value)
Martin v. Löwis79c32082007-08-11 06:57:14 +0000191 return (key.decode(self.keyencoding), Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000192
Tim Peters495ad3c2001-01-15 01:36:40 +0000193 def next(self):
Georg Brandla18af4e2007-04-21 15:47:16 +0000194 (key, value) = next(self.dict)
Brett Cannond24fffe2007-07-26 03:07:02 +0000195 f = BytesIO(value)
Martin v. Löwis79c32082007-08-11 06:57:14 +0000196 return (key.decode(self.keyencoding), Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000197
Tim Peters495ad3c2001-01-15 01:36:40 +0000198 def previous(self):
199 (key, value) = self.dict.previous()
Brett Cannond24fffe2007-07-26 03:07:02 +0000200 f = BytesIO(value)
Martin v. Löwis79c32082007-08-11 06:57:14 +0000201 return (key.decode(self.keyencoding), Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000202
Tim Peters495ad3c2001-01-15 01:36:40 +0000203 def first(self):
204 (key, value) = self.dict.first()
Brett Cannond24fffe2007-07-26 03:07:02 +0000205 f = BytesIO(value)
Martin v. Löwis79c32082007-08-11 06:57:14 +0000206 return (key.decode(self.keyencoding), Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000207
Tim Peters495ad3c2001-01-15 01:36:40 +0000208 def last(self):
209 (key, value) = self.dict.last()
Brett Cannond24fffe2007-07-26 03:07:02 +0000210 f = BytesIO(value)
Martin v. Löwis79c32082007-08-11 06:57:14 +0000211 return (key.decode(self.keyencoding), Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000212
213
214class DbfilenameShelf(Shelf):
Georg Brandl0a7ac7d2008-05-26 10:29:35 +0000215 """Shelf implementation using the "dbm" generic dbm interface.
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000216
Tim Peters495ad3c2001-01-15 01:36:40 +0000217 This is initialized with the filename for the dbm database.
218 See the module's __doc__ string for an overview of the interface.
219 """
220
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000221 def __init__(self, filename, flag='c', protocol=None, writeback=False):
Georg Brandl0a7ac7d2008-05-26 10:29:35 +0000222 import dbm
223 Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback)
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000224
225
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000226def open(filename, flag='c', protocol=None, writeback=False):
Tim Peters495ad3c2001-01-15 01:36:40 +0000227 """Open a persistent dictionary for reading and writing.
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000228
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000229 The filename parameter is the base filename for the underlying
230 database. As a side-effect, an extension may be added to the
231 filename and more than one file may be created. The optional flag
232 parameter has the same interpretation as the flag parameter of
Georg Brandl0a7ac7d2008-05-26 10:29:35 +0000233 dbm.open(). The optional protocol parameter specifies the
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000234 version of the pickle protocol (0, 1, or 2).
235
Tim Peters495ad3c2001-01-15 01:36:40 +0000236 See the module's __doc__ string for an overview of the interface.
237 """
238
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000239 return DbfilenameShelf(filename, flag, protocol, writeback)