blob: 1409eb1040090a484429665be81aa76fc99edac7 [file] [log] [blame]
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +00001#-----------------------------------------------------------------------
2#
3# Copyright (C) 2000, 2001 by Autonomous Zone Industries
Martin v. Löwisb2c7aff2002-11-23 11:26:07 +00004# Copyright (C) 2002 Gregory P. Smith
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +00005#
6# License: This is free software. You may use this software for any
7# purpose including modification/redistribution, so long as
8# this header remains intact and that you do not claim any
9# rights of ownership or authorship of this software. This
10# software has been tested, but no warranty is expressed or
11# implied.
12#
13# -- Gregory P. Smith <greg@electricrain.com>
14
15# This provides a simple database table interface built on top of
16# the Python BerkeleyDB 3 interface.
17#
18_cvsid = '$Id$'
19
20import string
21import sys
22try:
23 import cPickle
24 pickle = cPickle
25except ImportError:
26 import pickle
27import whrandom
28import xdrlib
29import re
30import copy
31
Martin v. Löwis7a3bae42002-11-19 17:48:49 +000032from bsddb.db import *
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +000033
34
35class TableDBError(StandardError): pass
36class TableAlreadyExists(TableDBError): pass
37
38
39class Cond:
40 """This condition matches everything"""
41 def __call__(self, s):
42 return 1
43
44class ExactCond(Cond):
45 """Acts as an exact match condition function"""
46 def __init__(self, strtomatch):
47 self.strtomatch = strtomatch
48 def __call__(self, s):
49 return s == self.strtomatch
50
51class PrefixCond(Cond):
52 """Acts as a condition function for matching a string prefix"""
53 def __init__(self, prefix):
54 self.prefix = prefix
55 def __call__(self, s):
56 return s[:len(self.prefix)] == self.prefix
57
Martin v. Löwisb2c7aff2002-11-23 11:26:07 +000058class PostfixCond(Cond):
59 """Acts as a condition function for matching a string postfix"""
60 def __init__(self, postfix):
61 self.postfix = postfix
62 def __call__(self, s):
63 return s[-len(self.postfix):] == self.postfix
64
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +000065class LikeCond(Cond):
66 """
67 Acts as a function that will match using an SQL 'LIKE' style
68 string. Case insensitive and % signs are wild cards.
69 This isn't perfect but it should work for the simple common cases.
70 """
71 def __init__(self, likestr, re_flags=re.IGNORECASE):
72 # escape python re characters
73 chars_to_escape = '.*+()[]?'
74 for char in chars_to_escape :
75 likestr = string.replace(likestr, char, '\\'+char)
76 # convert %s to wildcards
77 self.likestr = string.replace(likestr, '%', '.*')
78 self.re = re.compile('^'+self.likestr+'$', re_flags)
79 def __call__(self, s):
80 return self.re.match(s)
81
82#
83# keys used to store database metadata
84#
85_table_names_key = '__TABLE_NAMES__' # list of the tables in this db
86_columns = '._COLUMNS__' # table_name+this key contains a list of columns
87def _columns_key(table) : return table + _columns
88
89#
90# these keys are found within table sub databases
91#
92_data = '._DATA_.' # this+column+this+rowid key contains table data
93_rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
94 # row in the table. (no data is stored)
95_rowid_str_len = 8 # length in bytes of the unique rowid strings
96def _data_key(table, col, rowid) : return table + _data + col + _data + rowid
97def _search_col_data_key(table, col) : return table + _data + col + _data
98def _search_all_data_key(table) : return table + _data
99def _rowid_key(table, rowid) : return table + _rowid + rowid + _rowid
100def _search_rowid_key(table) : return table + _rowid
101
102def contains_metastrings(s) :
103 """Verify that the given string does not contain any
104 metadata strings that might interfere with dbtables database operation.
105 """
106 if string.find(s, _table_names_key) >= 0 or \
107 string.find(s, _columns) >= 0 or \
108 string.find(s, _data) >= 0 or \
109 string.find(s, _rowid) >= 0 :
110 return 1
111 else :
112 return 0
113
114
115class bsdTableDB :
Guido van Rossum7a98e802002-12-02 16:17:46 +0000116
117 # Save close() from bombing out if __init__() failed
118 db = None
119 env = None
120
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +0000121 def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, recover=0, dbflags=0) :
122 """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
123 Open database name in the dbhome BerkeleyDB directory.
124 Use keyword arguments when calling this constructor.
125 """
126 myflags = DB_THREAD
127 if create :
128 myflags = myflags | DB_CREATE
129 flagsforenv = DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN | dbflags
130 if recover :
131 flagsforenv = flagsforenv | DB_RECOVER
132 self.env = DBEnv()
133 self.env.set_lk_detect(DB_LOCK_DEFAULT) # enable auto deadlock avoidance
134 self.env.open(dbhome, myflags | flagsforenv)
135 if truncate :
136 myflags = myflags | DB_TRUNCATE
137 self.db = DB(self.env)
138 self.db.set_flags(DB_DUP) # allow duplicate entries [warning: be careful w/ metadata]
139 self.db.open(filename, DB_BTREE, myflags, mode)
140
141 self.dbfilename = filename
142
143 # Initialize the table names list if this is a new database
144 if not self.db.has_key(_table_names_key) :
145 self.db.put(_table_names_key, pickle.dumps([], 1))
146
147 # TODO verify more of the database's metadata?
148
149 self.__tablecolumns = {}
150
151 def __del__(self):
152 self.close()
153
154 def close(self):
155 if self.db is not None:
156 self.db.close()
157 self.db = None
158 if self.env is not None:
159 self.env.close()
160 self.env = None
161
162 def checkpoint(self, mins=0):
163 try:
164 self.env.txn_checkpoint(mins)
165 except DBIncompleteError:
166 pass
167
168 def sync(self):
169 try:
170 self.db.sync()
171 except DBIncompleteError:
172 pass
173
174 def _db_print(self) :
175 """Print the database to stdout for debugging"""
176 print "******** Printing raw database for debugging ********"
177 cur = self.db.cursor()
178 try:
179 key, data = cur.first()
180 while 1 :
181 print `{key: data}`
182 next = cur.next()
183 if next:
184 key, data = next
185 else:
186 cur.close()
187 return
188 except DBNotFoundError:
189 cur.close()
190
191
192 def CreateTable(self, table, columns) :
193 """CreateTable(table, columns) - Create a new table in the database
194 raises TableDBError if it already exists or for other DB errors.
195 """
196 assert type(columns) == type([])
197 txn = None
198 try:
199 # checking sanity of the table and column names here on
200 # table creation will prevent problems elsewhere.
201 if contains_metastrings(table) :
202 raise ValueError, "bad table name: contains reserved metastrings"
203 for column in columns :
204 if contains_metastrings(column) :
205 raise ValueError, "bad column name: contains reserved metastrings"
206
207 columnlist_key = _columns_key(table)
208 if self.db.has_key(columnlist_key) :
209 raise TableAlreadyExists, "table already exists"
210
211 txn = self.env.txn_begin()
212 # store the table's column info
213 self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)
214
215 # add the table name to the tablelist
216 tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
217 tablelist.append(table)
218 self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP
219 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
220
221 txn.commit()
222 txn = None
223
224 except DBError, dberror:
225 if txn :
226 txn.abort()
227 raise TableDBError, dberror[1]
228
229
230 def ListTableColumns(self, table):
231 """Return a list of columns in the given table. [] if the table doesn't exist.
232 """
233 assert type(table) == type('')
234 if contains_metastrings(table) :
235 raise ValueError, "bad table name: contains reserved metastrings"
236
237 columnlist_key = _columns_key(table)
238 if not self.db.has_key(columnlist_key):
239 return []
240 pickledcolumnlist = self.db.get(columnlist_key)
241 if pickledcolumnlist:
242 return pickle.loads(pickledcolumnlist)
243 else:
244 return []
245
246 def ListTables(self):
247 """Return a list of tables in this database."""
248 pickledtablelist = self.db.get(_table_names_key)
249 if pickledtablelist:
250 return pickle.loads(pickledtablelist)
251 else:
252 return []
253
254 def CreateOrExtendTable(self, table, columns):
255 """CreateOrExtendTable(table, columns) - Create a new table in the database.
256 If a table of this name already exists, extend it to have any
257 additional columns present in the given list as well as
258 all of its current columns.
259 """
260 assert type(columns) == type([])
261 try:
262 self.CreateTable(table, columns)
263 except TableAlreadyExists:
264 # the table already existed, add any new columns
265 txn = None
266 try:
267 columnlist_key = _columns_key(table)
268 txn = self.env.txn_begin()
269
270 # load the current column list
271 oldcolumnlist = pickle.loads(self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
272 # create a hash table for fast lookups of column names in the loop below
273 oldcolumnhash = {}
274 for c in oldcolumnlist:
275 oldcolumnhash[c] = c
276
277 # create a new column list containing both the old and new column names
278 newcolumnlist = copy.copy(oldcolumnlist)
279 for c in columns:
280 if not oldcolumnhash.has_key(c):
281 newcolumnlist.append(c)
282
283 # store the table's new extended column list
284 if newcolumnlist != oldcolumnlist :
285 # delete the old one first since we opened with DB_DUP
286 self.db.delete(columnlist_key, txn)
287 self.db.put(columnlist_key, pickle.dumps(newcolumnlist, 1), txn=txn)
288
289 txn.commit()
290 txn = None
291
292 self.__load_column_info(table)
293 except DBError, dberror:
294 if txn:
295 txn.abort()
296 raise TableDBError, dberror[1]
297
298
299 def __load_column_info(self, table) :
300 """initialize the self.__tablecolumns dict"""
301 # check the column names
302 try:
303 tcolpickles = self.db.get(_columns_key(table))
304 except DBNotFoundError:
305 raise TableDBError, "unknown table: " + `table`
306 if not tcolpickles:
307 raise TableDBError, "unknown table: " + `table`
308 self.__tablecolumns[table] = pickle.loads(tcolpickles)
309
310 def __new_rowid(self, table, txn=None) :
311 """Create a new unique row identifier"""
312 unique = 0
313 while not unique :
314 # Generate a random 64-bit row ID string
315 # (note: this code has <64 bits of randomness
316 # but it's plenty for our database id needs!)
317 p = xdrlib.Packer()
318 p.pack_int(int(whrandom.random()*2147483647))
319 p.pack_int(int(whrandom.random()*2147483647))
320 newid = p.get_buffer()
321
322 # Guarantee uniqueness by adding this key to the database
323 try:
324 self.db.put(_rowid_key(table, newid), None, txn=txn, flags=DB_NOOVERWRITE)
325 except DBKeyExistsError:
326 pass
327 else:
328 unique = 1
329
330 return newid
331
332
333 def Insert(self, table, rowdict) :
334 """Insert(table, datadict) - Insert a new row into the table
335 using the keys+values from rowdict as the column values.
336 """
337 txn = None
338 try:
339 if not self.db.has_key(_columns_key(table)) :
340 raise TableDBError, "unknown table"
341
342 # check the validity of each column name
343 if not self.__tablecolumns.has_key(table) :
344 self.__load_column_info(table)
345 for column in rowdict.keys() :
346 if not self.__tablecolumns[table].count(column) :
347 raise TableDBError, "unknown column: "+`column`
348
349 # get a unique row identifier for this row
350 rowid = self.__new_rowid(table)
351
352 txn = self.env.txn_begin()
353
354 # insert the row values into the table database
355 for column, dataitem in rowdict.items() :
356 # store the value
357 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
358
359 txn.commit()
360 txn = None
361
362 except DBError, dberror:
363 if txn :
364 txn.abort()
365 self.db.delete(_rowid_key(table, rowid))
366 raise TableDBError, dberror[1]
367
368
369 def Modify(self, table, conditions={}, mappings={}) :
370 """Modify(table, conditions) - Modify in rows matching 'conditions'
371 using mapping functions in 'mappings'
372 * conditions is a dictionary keyed on column names
373 containing condition functions expecting the data string as an
374 argument and returning a boolean.
375 * mappings is a dictionary keyed on column names containint condition
376 functions expecting the data string as an argument and returning the
377 new string for that column.
378 """
379 try:
380 matching_rowids = self.__Select(table, [], conditions)
381
382 # modify only requested columns
383 columns = mappings.keys()
384 for rowid in matching_rowids.keys() :
385 txn = None
386 try:
387 for column in columns :
388 txn = self.env.txn_begin()
389 # modify the requested column
390 try:
391 dataitem = self.db.get(_data_key(table, column, rowid), txn)
392 self.db.delete(_data_key(table, column, rowid), txn)
393 except DBNotFoundError:
394 dataitem = None # XXXXXXX row key somehow didn't exist, assume no error
395 dataitem = mappings[column](dataitem)
396 if dataitem <> None:
397 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
398 txn.commit()
399 txn = None
400
401 except DBError, dberror:
402 if txn :
403 txn.abort()
404 raise
405
406 except DBError, dberror:
407 raise TableDBError, dberror[1]
408
409 def Delete(self, table, conditions={}) :
410 """Delete(table, conditions) - Delete items matching the given
411 conditions from the table.
412 * conditions is a dictionary keyed on column names
413 containing condition functions expecting the data string as an
414 argument and returning a boolean.
415 """
416 try:
417 matching_rowids = self.__Select(table, [], conditions)
418
419 # delete row data from all columns
420 columns = self.__tablecolumns[table]
421 for rowid in matching_rowids.keys() :
422 txn = None
423 try:
424 txn = self.env.txn_begin()
425 for column in columns :
426 # delete the data key
427 try:
428 self.db.delete(_data_key(table, column, rowid), txn)
429 except DBNotFoundError:
430 pass # XXXXXXX column may not exist, assume no error
431
432 try:
433 self.db.delete(_rowid_key(table, rowid), txn)
434 except DBNotFoundError:
435 pass # XXXXXXX row key somehow didn't exist, assume no error
436 txn.commit()
437 txn = None
438 except DBError, dberror:
439 if txn :
440 txn.abort()
441 raise
442
443 except DBError, dberror:
444 raise TableDBError, dberror[1]
445
446
447 def Select(self, table, columns, conditions={}) :
448 """Select(table, conditions) - retrieve specific row data
449 Returns a list of row column->value mapping dictionaries.
450 * columns is a list of which column data to return. If
451 columns is None, all columns will be returned.
452 * conditions is a dictionary keyed on column names
453 containing callable conditions expecting the data string as an
454 argument and returning a boolean.
455 """
456 try:
457 if not self.__tablecolumns.has_key(table) :
458 self.__load_column_info(table)
459 if columns is None :
460 columns = self.__tablecolumns[table]
461 matching_rowids = self.__Select(table, columns, conditions)
462 except DBError, dberror:
463 raise TableDBError, dberror[1]
464
465 # return the matches as a list of dictionaries
466 return matching_rowids.values()
467
468
469 def __Select(self, table, columns, conditions) :
470 """__Select() - Used to implement Select and Delete (above)
471 Returns a dictionary keyed on rowids containing dicts
472 holding the row data for columns listed in the columns param
473 that match the given conditions.
474 * conditions is a dictionary keyed on column names
475 containing callable conditions expecting the data string as an
476 argument and returning a boolean.
477 """
478 # check the validity of each column name
479 if not self.__tablecolumns.has_key(table) :
480 self.__load_column_info(table)
481 if columns is None :
482 columns = self.tablecolumns[table]
483 for column in (columns + conditions.keys()) :
484 if not self.__tablecolumns[table].count(column) :
485 raise TableDBError, "unknown column: "+`column`
486
487 # keyed on rows that match so far, containings dicts keyed on
488 # column names containing the data for that row and column.
489 matching_rowids = {}
490
491 rejected_rowids = {} # keys are rowids that do not match
492
493 # attempt to sort the conditions in such a way as to minimize full column lookups
494 def cmp_conditions(atuple, btuple):
495 a = atuple[1]
496 b = btuple[1]
497 if type(a) == type(b) :
498 if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
499 return cmp(len(b.prefix), len(a.prefix)) # longest prefix first
500 if isinstance(a, LikeCond) and isinstance(b, LikeCond):
501 return cmp(len(b.likestr), len(a.likestr)) # longest likestr first
502 return 0
503 if isinstance(a, ExactCond):
504 return -1
505 if isinstance(b, ExactCond):
506 return 1
507 if isinstance(a, PrefixCond):
508 return -1
509 if isinstance(b, PrefixCond):
510 return 1
511 # leave all unknown condition callables alone as equals
512 return 0
513
514 conditionlist = conditions.items()
515 conditionlist.sort(cmp_conditions)
516
517 # Apply conditions to column data to find what we want
518 cur = self.db.cursor()
519 column_num = -1
520 for column, condition in conditionlist :
521 column_num = column_num + 1
522 searchkey = _search_col_data_key(table, column)
523 # speedup: don't linear search columns within loop
524 if column in columns :
525 savethiscolumndata = 1 # save the data for return
526 else :
527 savethiscolumndata = 0 # data only used for selection
528
529 try:
530 key, data = cur.set_range(searchkey)
531 while key[:len(searchkey)] == searchkey :
532 # extract the rowid from the key
533 rowid = key[-_rowid_str_len:]
534
535 if not rejected_rowids.has_key(rowid) :
536 # if no condition was specified or the condition
537 # succeeds, add row to our match list.
538 if not condition or condition(data) :
Martin v. Löwisb2c7aff2002-11-23 11:26:07 +0000539 if not matching_rowids.has_key(rowid) :
540 matching_rowids[rowid] = {}
541 if savethiscolumndata :
542 matching_rowids[rowid][column] = data
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +0000543 else :
544 if matching_rowids.has_key(rowid) :
545 del matching_rowids[rowid]
546 rejected_rowids[rowid] = rowid
547
548 key, data = cur.next()
549
550 except DBError, dberror:
551 if dberror[0] != DB_NOTFOUND :
552 raise
553 continue
554
555 cur.close()
556
557 # we're done selecting rows, garbage collect the reject list
558 del rejected_rowids
559
560 # extract any remaining desired column data from the
561 # database for the matching rows.
562 if len(columns) > 0 :
563 for rowid, rowdata in matching_rowids.items() :
564 for column in columns :
565 if rowdata.has_key(column) :
566 continue
567 try:
568 rowdata[column] = self.db.get(_data_key(table, column, rowid))
569 except DBError, dberror:
570 if dberror[0] != DB_NOTFOUND :
571 raise
572 rowdata[column] = None
573
574 # return the matches
575 return matching_rowids
576
577
578 def Drop(self, table) :
579 """Remove an entire table from the database
580 """
581 txn = None
582 try:
583 txn = self.env.txn_begin()
584
585 # delete the column list
586 self.db.delete(_columns_key(table), txn)
587
588 cur = self.db.cursor(txn)
589
590 # delete all keys containing this tables column and row info
591 table_key = _search_all_data_key(table)
592 while 1 :
593 try:
594 key, data = cur.set_range(table_key)
595 except DBNotFoundError:
596 break
597 # only delete items in this table
598 if key[:len(table_key)] != table_key :
599 break
600 cur.delete()
601
602 # delete all rowids used by this table
603 table_key = _search_rowid_key(table)
604 while 1 :
605 try:
606 key, data = cur.set_range(table_key)
607 except DBNotFoundError:
608 break
609 # only delete items in this table
610 if key[:len(table_key)] != table_key :
611 break
612 cur.delete()
613
614 cur.close()
615
616 # delete the tablename from the table name list
617 tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
618 try:
619 tablelist.remove(table)
620 except ValueError:
621 pass # hmm, it wasn't there, oh well, that's what we want.
622 self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP
623 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
624
625 txn.commit()
626 txn = None
627
628 if self.__tablecolumns.has_key(table) :
629 del self.__tablecolumns[table]
630
631 except DBError, dberror:
632 if txn :
633 txn.abort()
634 raise TableDBError, dberror[1]