blob: 2971354af0d1c5609149b3f2f5539dba5a484480 [file] [log] [blame]
Guido van Rossumcc6764c1995-02-09 17:18:10 +00001"""Manage shelves of pickled objects.
2
3A "shelf" is a persistent, dictionary-like object. The difference
4with dbm databases is that the values (not the keys!) in a shelf can
5be essentially arbitrary Python objects -- anything that the "pickle"
6module can handle. This includes most class instances, recursive data
7types, and objects containing lots of shared sub-objects. The keys
8are ordinary strings.
9
10To summarize the interface (key is a string, data is an arbitrary
11object):
12
Fred Drake13a2c272000-02-10 17:17:14 +000013 import shelve
14 d = shelve.open(filename) # open, with (g)dbm filename -- no suffix
Guido van Rossumcc6764c1995-02-09 17:18:10 +000015
Fred Drake13a2c272000-02-10 17:17:14 +000016 d[key] = data # store data at key (overwrites old data if
17 # using an existing key)
Tim Peters0eadaac2003-04-24 16:02:54 +000018 data = d[key] # retrieve a COPY of the data at key (raise
Martin v. Löwis153c9e42003-04-19 20:59:03 +000019 # KeyError if no such key) -- NOTE that this
20 # access returns a *copy* of the entry!
Fred Drake13a2c272000-02-10 17:17:14 +000021 del d[key] # delete data stored at key (raises KeyError
22 # if no such key)
Guido van Rossume2b70bc2006-08-18 22:13:04 +000023 flag = key in d # true if the key exists
Fred Drake13a2c272000-02-10 17:17:14 +000024 list = d.keys() # a list of all existing keys (slow!)
Guido van Rossumcc6764c1995-02-09 17:18:10 +000025
Fred Drake13a2c272000-02-10 17:17:14 +000026 d.close() # close it
Guido van Rossumcc6764c1995-02-09 17:18:10 +000027
28Dependent on the implementation, closing a persistent dictionary may
29or may not be necessary to flush changes to disk.
Martin v. Löwis153c9e42003-04-19 20:59:03 +000030
31Normally, d[key] returns a COPY of the entry. This needs care when
32mutable entries are mutated: for example, if d[key] is a list,
33 d[key].append(anitem)
34does NOT modify the entry d[key] itself, as stored in the persistent
35mapping -- it only modifies the copy, which is then immediately
36discarded, so that the append has NO effect whatsoever. To append an
37item to d[key] in a way that will affect the persistent mapping, use:
38 data = d[key]
39 data.append(anitem)
40 d[key] = data
41
42To avoid the problem with mutable entries, you may pass the keyword
43argument writeback=True in the call to shelve.open. When you use:
44 d = shelve.open(filename, writeback=True)
45then d keeps a cache of all entries you access, and writes them all back
46to the persistent mapping when you call d.close(). This ensures that
47such usage as d[key].append(anitem) works as intended.
48
49However, using keyword argument writeback=True may consume vast amount
50of memory for the cache, and it may make d.close() very slow, if you
51access many of d's entries after opening it in this way: d has no way to
52check which of the entries you access are mutable and/or which ones you
53actually mutate, so it must cache, and write back at close, all of the
54entries that you access. You can call d.sync() to write back all the
55entries in the cache, and empty the cache (d.sync() also synchronizes
56the persistent dictionary on disk, if feasible).
Guido van Rossumcc6764c1995-02-09 17:18:10 +000057"""
Guido van Rossuma48061a1995-01-10 00:31:14 +000058
Guido van Rossum914c9381997-06-06 21:12:45 +000059# Try using cPickle and cStringIO if available.
60
61try:
Tim Peters495ad3c2001-01-15 01:36:40 +000062 from cPickle import Pickler, Unpickler
Guido van Rossum914c9381997-06-06 21:12:45 +000063except ImportError:
Tim Peters495ad3c2001-01-15 01:36:40 +000064 from pickle import Pickler, Unpickler
Guido van Rossum914c9381997-06-06 21:12:45 +000065
66try:
Tim Peters495ad3c2001-01-15 01:36:40 +000067 from cStringIO import StringIO
Guido van Rossum914c9381997-06-06 21:12:45 +000068except ImportError:
Tim Peters495ad3c2001-01-15 01:36:40 +000069 from StringIO import StringIO
Guido van Rossuma48061a1995-01-10 00:31:14 +000070
Raymond Hettinger79947162002-11-15 06:46:14 +000071import UserDict
Martin v. Löwis153c9e42003-04-19 20:59:03 +000072import warnings
Raymond Hettinger79947162002-11-15 06:46:14 +000073
Skip Montanaro0de65802001-02-15 22:15:14 +000074__all__ = ["Shelf","BsdDbShelf","DbfilenameShelf","open"]
Guido van Rossumcc6764c1995-02-09 17:18:10 +000075
Raymond Hettinger79947162002-11-15 06:46:14 +000076class Shelf(UserDict.DictMixin):
Tim Peters495ad3c2001-01-15 01:36:40 +000077 """Base class for shelf implementations.
Guido van Rossumcc6764c1995-02-09 17:18:10 +000078
Tim Peters495ad3c2001-01-15 01:36:40 +000079 This is initialized with a dictionary-like object.
80 See the module's __doc__ string for an overview of the interface.
81 """
Guido van Rossuma48061a1995-01-10 00:31:14 +000082
Raymond Hettinger1bc82f82004-12-05 03:58:17 +000083 def __init__(self, dict, protocol=None, writeback=False):
Tim Peters495ad3c2001-01-15 01:36:40 +000084 self.dict = dict
Martin v. Löwis153c9e42003-04-19 20:59:03 +000085 if protocol is None:
86 protocol = 0
87 self._protocol = protocol
88 self.writeback = writeback
89 self.cache = {}
Guido van Rossum2f7df121999-08-11 01:54:05 +000090
Tim Peters495ad3c2001-01-15 01:36:40 +000091 def keys(self):
92 return self.dict.keys()
Guido van Rossuma48061a1995-01-10 00:31:14 +000093
Tim Peters495ad3c2001-01-15 01:36:40 +000094 def __len__(self):
95 return len(self.dict)
Guido van Rossuma48061a1995-01-10 00:31:14 +000096
Martin v. Löwise4913c92002-10-18 08:58:14 +000097 def __contains__(self, key):
Guido van Rossume2b70bc2006-08-18 22:13:04 +000098 return key in self.dict
Martin v. Löwise4913c92002-10-18 08:58:14 +000099
Tim Peters495ad3c2001-01-15 01:36:40 +0000100 def get(self, key, default=None):
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000101 if key in self.dict:
Tim Peters495ad3c2001-01-15 01:36:40 +0000102 return self[key]
103 return default
104
105 def __getitem__(self, key):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000106 try:
107 value = self.cache[key]
108 except KeyError:
109 f = StringIO(self.dict[key])
110 value = Unpickler(f).load()
111 if self.writeback:
112 self.cache[key] = value
113 return value
Tim Peters495ad3c2001-01-15 01:36:40 +0000114
115 def __setitem__(self, key, value):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000116 if self.writeback:
117 self.cache[key] = value
Tim Peters495ad3c2001-01-15 01:36:40 +0000118 f = StringIO()
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000119 p = Pickler(f, self._protocol)
Tim Peters495ad3c2001-01-15 01:36:40 +0000120 p.dump(value)
121 self.dict[key] = f.getvalue()
122
123 def __delitem__(self, key):
124 del self.dict[key]
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000125 try:
126 del self.cache[key]
127 except KeyError:
128 pass
Tim Peters495ad3c2001-01-15 01:36:40 +0000129
130 def close(self):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000131 self.sync()
Tim Peters495ad3c2001-01-15 01:36:40 +0000132 try:
133 self.dict.close()
Raymond Hettinger68dcd342003-05-27 06:30:52 +0000134 except AttributeError:
Tim Peters495ad3c2001-01-15 01:36:40 +0000135 pass
136 self.dict = 0
137
138 def __del__(self):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000139 if not hasattr(self, 'writeback'):
140 # __init__ didn't succeed, so don't bother closing
141 return
Tim Peters495ad3c2001-01-15 01:36:40 +0000142 self.close()
143
144 def sync(self):
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000145 if self.writeback and self.cache:
146 self.writeback = False
147 for key, entry in self.cache.iteritems():
148 self[key] = entry
149 self.writeback = True
150 self.cache = {}
Tim Peters495ad3c2001-01-15 01:36:40 +0000151 if hasattr(self.dict, 'sync'):
152 self.dict.sync()
153
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000154
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000155class BsdDbShelf(Shelf):
Tim Peters495ad3c2001-01-15 01:36:40 +0000156 """Shelf implementation using the "BSD" db interface.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000157
Tim Peters495ad3c2001-01-15 01:36:40 +0000158 This adds methods first(), next(), previous(), last() and
159 set_location() that have no counterpart in [g]dbm databases.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000160
Tim Peters495ad3c2001-01-15 01:36:40 +0000161 The actual database must be opened using one of the "bsddb"
162 modules "open" routines (i.e. bsddb.hashopen, bsddb.btopen or
163 bsddb.rnopen) and passed to the constructor.
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000164
Tim Peters495ad3c2001-01-15 01:36:40 +0000165 See the module's __doc__ string for an overview of the interface.
166 """
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000167
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000168 def __init__(self, dict, protocol=None, writeback=False):
169 Shelf.__init__(self, dict, protocol, writeback)
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000170
Tim Peters495ad3c2001-01-15 01:36:40 +0000171 def set_location(self, key):
172 (key, value) = self.dict.set_location(key)
173 f = StringIO(value)
174 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000175
Tim Peters495ad3c2001-01-15 01:36:40 +0000176 def next(self):
177 (key, value) = self.dict.next()
178 f = StringIO(value)
179 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000180
Tim Peters495ad3c2001-01-15 01:36:40 +0000181 def previous(self):
182 (key, value) = self.dict.previous()
183 f = StringIO(value)
184 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000185
Tim Peters495ad3c2001-01-15 01:36:40 +0000186 def first(self):
187 (key, value) = self.dict.first()
188 f = StringIO(value)
189 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000190
Tim Peters495ad3c2001-01-15 01:36:40 +0000191 def last(self):
192 (key, value) = self.dict.last()
193 f = StringIO(value)
194 return (key, Unpickler(f).load())
Guido van Rossumabad1cc1995-08-11 14:19:16 +0000195
196
197class DbfilenameShelf(Shelf):
Tim Peters495ad3c2001-01-15 01:36:40 +0000198 """Shelf implementation using the "anydbm" generic dbm interface.
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000199
Tim Peters495ad3c2001-01-15 01:36:40 +0000200 This is initialized with the filename for the dbm database.
201 See the module's __doc__ string for an overview of the interface.
202 """
203
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000204 def __init__(self, filename, flag='c', protocol=None, writeback=False):
Tim Peters495ad3c2001-01-15 01:36:40 +0000205 import anydbm
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000206 Shelf.__init__(self, anydbm.open(filename, flag), protocol, writeback)
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000207
208
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000209def open(filename, flag='c', protocol=None, writeback=False):
Tim Peters495ad3c2001-01-15 01:36:40 +0000210 """Open a persistent dictionary for reading and writing.
Guido van Rossumcc6764c1995-02-09 17:18:10 +0000211
Martin v. Löwis153c9e42003-04-19 20:59:03 +0000212 The filename parameter is the base filename for the underlying
213 database. As a side-effect, an extension may be added to the
214 filename and more than one file may be created. The optional flag
215 parameter has the same interpretation as the flag parameter of
216 anydbm.open(). The optional protocol parameter specifies the
217 version of the pickle protocol (0, 1, or 2).
218
Tim Peters495ad3c2001-01-15 01:36:40 +0000219 See the module's __doc__ string for an overview of the interface.
220 """
221
Raymond Hettinger1bc82f82004-12-05 03:58:17 +0000222 return DbfilenameShelf(filename, flag, protocol, writeback)