blob: f1e88f23831c63a6e17f4030f064900818ebbd7b [file] [log] [blame]
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +00001#-----------------------------------------------------------------------
2#
3# Copyright (C) 2000, 2001 by Autonomous Zone Industries
4#
5# License: This is free software. You may use this software for any
6# purpose including modification/redistribution, so long as
7# this header remains intact and that you do not claim any
8# rights of ownership or authorship of this software. This
9# software has been tested, but no warranty is expressed or
10# implied.
11#
12# -- Gregory P. Smith <greg@electricrain.com>
13
14# This provides a simple database table interface built on top of
15# the Python BerkeleyDB 3 interface.
16#
17_cvsid = '$Id$'
18
19import string
20import sys
21try:
22 import cPickle
23 pickle = cPickle
24except ImportError:
25 import pickle
26import whrandom
27import xdrlib
28import re
29import copy
30
Martin v. Löwis7a3bae42002-11-19 17:48:49 +000031from bsddb.db import *
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +000032
33
34class TableDBError(StandardError): pass
35class TableAlreadyExists(TableDBError): pass
36
37
38class Cond:
39 """This condition matches everything"""
40 def __call__(self, s):
41 return 1
42
43class ExactCond(Cond):
44 """Acts as an exact match condition function"""
45 def __init__(self, strtomatch):
46 self.strtomatch = strtomatch
47 def __call__(self, s):
48 return s == self.strtomatch
49
50class PrefixCond(Cond):
51 """Acts as a condition function for matching a string prefix"""
52 def __init__(self, prefix):
53 self.prefix = prefix
54 def __call__(self, s):
55 return s[:len(self.prefix)] == self.prefix
56
57class LikeCond(Cond):
58 """
59 Acts as a function that will match using an SQL 'LIKE' style
60 string. Case insensitive and % signs are wild cards.
61 This isn't perfect but it should work for the simple common cases.
62 """
63 def __init__(self, likestr, re_flags=re.IGNORECASE):
64 # escape python re characters
65 chars_to_escape = '.*+()[]?'
66 for char in chars_to_escape :
67 likestr = string.replace(likestr, char, '\\'+char)
68 # convert %s to wildcards
69 self.likestr = string.replace(likestr, '%', '.*')
70 self.re = re.compile('^'+self.likestr+'$', re_flags)
71 def __call__(self, s):
72 return self.re.match(s)
73
74#
75# keys used to store database metadata
76#
77_table_names_key = '__TABLE_NAMES__' # list of the tables in this db
78_columns = '._COLUMNS__' # table_name+this key contains a list of columns
79def _columns_key(table) : return table + _columns
80
81#
82# these keys are found within table sub databases
83#
84_data = '._DATA_.' # this+column+this+rowid key contains table data
85_rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
86 # row in the table. (no data is stored)
87_rowid_str_len = 8 # length in bytes of the unique rowid strings
88def _data_key(table, col, rowid) : return table + _data + col + _data + rowid
89def _search_col_data_key(table, col) : return table + _data + col + _data
90def _search_all_data_key(table) : return table + _data
91def _rowid_key(table, rowid) : return table + _rowid + rowid + _rowid
92def _search_rowid_key(table) : return table + _rowid
93
94def contains_metastrings(s) :
95 """Verify that the given string does not contain any
96 metadata strings that might interfere with dbtables database operation.
97 """
98 if string.find(s, _table_names_key) >= 0 or \
99 string.find(s, _columns) >= 0 or \
100 string.find(s, _data) >= 0 or \
101 string.find(s, _rowid) >= 0 :
102 return 1
103 else :
104 return 0
105
106
107class bsdTableDB :
108 def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, recover=0, dbflags=0) :
109 """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
110 Open database name in the dbhome BerkeleyDB directory.
111 Use keyword arguments when calling this constructor.
112 """
113 myflags = DB_THREAD
114 if create :
115 myflags = myflags | DB_CREATE
116 flagsforenv = DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN | dbflags
117 if recover :
118 flagsforenv = flagsforenv | DB_RECOVER
119 self.env = DBEnv()
120 self.env.set_lk_detect(DB_LOCK_DEFAULT) # enable auto deadlock avoidance
121 self.env.open(dbhome, myflags | flagsforenv)
122 if truncate :
123 myflags = myflags | DB_TRUNCATE
124 self.db = DB(self.env)
125 self.db.set_flags(DB_DUP) # allow duplicate entries [warning: be careful w/ metadata]
126 self.db.open(filename, DB_BTREE, myflags, mode)
127
128 self.dbfilename = filename
129
130 # Initialize the table names list if this is a new database
131 if not self.db.has_key(_table_names_key) :
132 self.db.put(_table_names_key, pickle.dumps([], 1))
133
134 # TODO verify more of the database's metadata?
135
136 self.__tablecolumns = {}
137
138 def __del__(self):
139 self.close()
140
141 def close(self):
142 if self.db is not None:
143 self.db.close()
144 self.db = None
145 if self.env is not None:
146 self.env.close()
147 self.env = None
148
149 def checkpoint(self, mins=0):
150 try:
151 self.env.txn_checkpoint(mins)
152 except DBIncompleteError:
153 pass
154
155 def sync(self):
156 try:
157 self.db.sync()
158 except DBIncompleteError:
159 pass
160
161 def _db_print(self) :
162 """Print the database to stdout for debugging"""
163 print "******** Printing raw database for debugging ********"
164 cur = self.db.cursor()
165 try:
166 key, data = cur.first()
167 while 1 :
168 print `{key: data}`
169 next = cur.next()
170 if next:
171 key, data = next
172 else:
173 cur.close()
174 return
175 except DBNotFoundError:
176 cur.close()
177
178
179 def CreateTable(self, table, columns) :
180 """CreateTable(table, columns) - Create a new table in the database
181 raises TableDBError if it already exists or for other DB errors.
182 """
183 assert type(columns) == type([])
184 txn = None
185 try:
186 # checking sanity of the table and column names here on
187 # table creation will prevent problems elsewhere.
188 if contains_metastrings(table) :
189 raise ValueError, "bad table name: contains reserved metastrings"
190 for column in columns :
191 if contains_metastrings(column) :
192 raise ValueError, "bad column name: contains reserved metastrings"
193
194 columnlist_key = _columns_key(table)
195 if self.db.has_key(columnlist_key) :
196 raise TableAlreadyExists, "table already exists"
197
198 txn = self.env.txn_begin()
199 # store the table's column info
200 self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)
201
202 # add the table name to the tablelist
203 tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
204 tablelist.append(table)
205 self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP
206 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
207
208 txn.commit()
209 txn = None
210
211 except DBError, dberror:
212 if txn :
213 txn.abort()
214 raise TableDBError, dberror[1]
215
216
217 def ListTableColumns(self, table):
218 """Return a list of columns in the given table. [] if the table doesn't exist.
219 """
220 assert type(table) == type('')
221 if contains_metastrings(table) :
222 raise ValueError, "bad table name: contains reserved metastrings"
223
224 columnlist_key = _columns_key(table)
225 if not self.db.has_key(columnlist_key):
226 return []
227 pickledcolumnlist = self.db.get(columnlist_key)
228 if pickledcolumnlist:
229 return pickle.loads(pickledcolumnlist)
230 else:
231 return []
232
233 def ListTables(self):
234 """Return a list of tables in this database."""
235 pickledtablelist = self.db.get(_table_names_key)
236 if pickledtablelist:
237 return pickle.loads(pickledtablelist)
238 else:
239 return []
240
241 def CreateOrExtendTable(self, table, columns):
242 """CreateOrExtendTable(table, columns) - Create a new table in the database.
243 If a table of this name already exists, extend it to have any
244 additional columns present in the given list as well as
245 all of its current columns.
246 """
247 assert type(columns) == type([])
248 try:
249 self.CreateTable(table, columns)
250 except TableAlreadyExists:
251 # the table already existed, add any new columns
252 txn = None
253 try:
254 columnlist_key = _columns_key(table)
255 txn = self.env.txn_begin()
256
257 # load the current column list
258 oldcolumnlist = pickle.loads(self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
259 # create a hash table for fast lookups of column names in the loop below
260 oldcolumnhash = {}
261 for c in oldcolumnlist:
262 oldcolumnhash[c] = c
263
264 # create a new column list containing both the old and new column names
265 newcolumnlist = copy.copy(oldcolumnlist)
266 for c in columns:
267 if not oldcolumnhash.has_key(c):
268 newcolumnlist.append(c)
269
270 # store the table's new extended column list
271 if newcolumnlist != oldcolumnlist :
272 # delete the old one first since we opened with DB_DUP
273 self.db.delete(columnlist_key, txn)
274 self.db.put(columnlist_key, pickle.dumps(newcolumnlist, 1), txn=txn)
275
276 txn.commit()
277 txn = None
278
279 self.__load_column_info(table)
280 except DBError, dberror:
281 if txn:
282 txn.abort()
283 raise TableDBError, dberror[1]
284
285
286 def __load_column_info(self, table) :
287 """initialize the self.__tablecolumns dict"""
288 # check the column names
289 try:
290 tcolpickles = self.db.get(_columns_key(table))
291 except DBNotFoundError:
292 raise TableDBError, "unknown table: " + `table`
293 if not tcolpickles:
294 raise TableDBError, "unknown table: " + `table`
295 self.__tablecolumns[table] = pickle.loads(tcolpickles)
296
297 def __new_rowid(self, table, txn=None) :
298 """Create a new unique row identifier"""
299 unique = 0
300 while not unique :
301 # Generate a random 64-bit row ID string
302 # (note: this code has <64 bits of randomness
303 # but it's plenty for our database id needs!)
304 p = xdrlib.Packer()
305 p.pack_int(int(whrandom.random()*2147483647))
306 p.pack_int(int(whrandom.random()*2147483647))
307 newid = p.get_buffer()
308
309 # Guarantee uniqueness by adding this key to the database
310 try:
311 self.db.put(_rowid_key(table, newid), None, txn=txn, flags=DB_NOOVERWRITE)
312 except DBKeyExistsError:
313 pass
314 else:
315 unique = 1
316
317 return newid
318
319
320 def Insert(self, table, rowdict) :
321 """Insert(table, datadict) - Insert a new row into the table
322 using the keys+values from rowdict as the column values.
323 """
324 txn = None
325 try:
326 if not self.db.has_key(_columns_key(table)) :
327 raise TableDBError, "unknown table"
328
329 # check the validity of each column name
330 if not self.__tablecolumns.has_key(table) :
331 self.__load_column_info(table)
332 for column in rowdict.keys() :
333 if not self.__tablecolumns[table].count(column) :
334 raise TableDBError, "unknown column: "+`column`
335
336 # get a unique row identifier for this row
337 rowid = self.__new_rowid(table)
338
339 txn = self.env.txn_begin()
340
341 # insert the row values into the table database
342 for column, dataitem in rowdict.items() :
343 # store the value
344 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
345
346 txn.commit()
347 txn = None
348
349 except DBError, dberror:
350 if txn :
351 txn.abort()
352 self.db.delete(_rowid_key(table, rowid))
353 raise TableDBError, dberror[1]
354
355
356 def Modify(self, table, conditions={}, mappings={}) :
357 """Modify(table, conditions) - Modify in rows matching 'conditions'
358 using mapping functions in 'mappings'
359 * conditions is a dictionary keyed on column names
360 containing condition functions expecting the data string as an
361 argument and returning a boolean.
362 * mappings is a dictionary keyed on column names containint condition
363 functions expecting the data string as an argument and returning the
364 new string for that column.
365 """
366 try:
367 matching_rowids = self.__Select(table, [], conditions)
368
369 # modify only requested columns
370 columns = mappings.keys()
371 for rowid in matching_rowids.keys() :
372 txn = None
373 try:
374 for column in columns :
375 txn = self.env.txn_begin()
376 # modify the requested column
377 try:
378 dataitem = self.db.get(_data_key(table, column, rowid), txn)
379 self.db.delete(_data_key(table, column, rowid), txn)
380 except DBNotFoundError:
381 dataitem = None # XXXXXXX row key somehow didn't exist, assume no error
382 dataitem = mappings[column](dataitem)
383 if dataitem <> None:
384 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
385 txn.commit()
386 txn = None
387
388 except DBError, dberror:
389 if txn :
390 txn.abort()
391 raise
392
393 except DBError, dberror:
394 raise TableDBError, dberror[1]
395
396 def Delete(self, table, conditions={}) :
397 """Delete(table, conditions) - Delete items matching the given
398 conditions from the table.
399 * conditions is a dictionary keyed on column names
400 containing condition functions expecting the data string as an
401 argument and returning a boolean.
402 """
403 try:
404 matching_rowids = self.__Select(table, [], conditions)
405
406 # delete row data from all columns
407 columns = self.__tablecolumns[table]
408 for rowid in matching_rowids.keys() :
409 txn = None
410 try:
411 txn = self.env.txn_begin()
412 for column in columns :
413 # delete the data key
414 try:
415 self.db.delete(_data_key(table, column, rowid), txn)
416 except DBNotFoundError:
417 pass # XXXXXXX column may not exist, assume no error
418
419 try:
420 self.db.delete(_rowid_key(table, rowid), txn)
421 except DBNotFoundError:
422 pass # XXXXXXX row key somehow didn't exist, assume no error
423 txn.commit()
424 txn = None
425 except DBError, dberror:
426 if txn :
427 txn.abort()
428 raise
429
430 except DBError, dberror:
431 raise TableDBError, dberror[1]
432
433
434 def Select(self, table, columns, conditions={}) :
435 """Select(table, conditions) - retrieve specific row data
436 Returns a list of row column->value mapping dictionaries.
437 * columns is a list of which column data to return. If
438 columns is None, all columns will be returned.
439 * conditions is a dictionary keyed on column names
440 containing callable conditions expecting the data string as an
441 argument and returning a boolean.
442 """
443 try:
444 if not self.__tablecolumns.has_key(table) :
445 self.__load_column_info(table)
446 if columns is None :
447 columns = self.__tablecolumns[table]
448 matching_rowids = self.__Select(table, columns, conditions)
449 except DBError, dberror:
450 raise TableDBError, dberror[1]
451
452 # return the matches as a list of dictionaries
453 return matching_rowids.values()
454
455
456 def __Select(self, table, columns, conditions) :
457 """__Select() - Used to implement Select and Delete (above)
458 Returns a dictionary keyed on rowids containing dicts
459 holding the row data for columns listed in the columns param
460 that match the given conditions.
461 * conditions is a dictionary keyed on column names
462 containing callable conditions expecting the data string as an
463 argument and returning a boolean.
464 """
465 # check the validity of each column name
466 if not self.__tablecolumns.has_key(table) :
467 self.__load_column_info(table)
468 if columns is None :
469 columns = self.tablecolumns[table]
470 for column in (columns + conditions.keys()) :
471 if not self.__tablecolumns[table].count(column) :
472 raise TableDBError, "unknown column: "+`column`
473
474 # keyed on rows that match so far, containings dicts keyed on
475 # column names containing the data for that row and column.
476 matching_rowids = {}
477
478 rejected_rowids = {} # keys are rowids that do not match
479
480 # attempt to sort the conditions in such a way as to minimize full column lookups
481 def cmp_conditions(atuple, btuple):
482 a = atuple[1]
483 b = btuple[1]
484 if type(a) == type(b) :
485 if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
486 return cmp(len(b.prefix), len(a.prefix)) # longest prefix first
487 if isinstance(a, LikeCond) and isinstance(b, LikeCond):
488 return cmp(len(b.likestr), len(a.likestr)) # longest likestr first
489 return 0
490 if isinstance(a, ExactCond):
491 return -1
492 if isinstance(b, ExactCond):
493 return 1
494 if isinstance(a, PrefixCond):
495 return -1
496 if isinstance(b, PrefixCond):
497 return 1
498 # leave all unknown condition callables alone as equals
499 return 0
500
501 conditionlist = conditions.items()
502 conditionlist.sort(cmp_conditions)
503
504 # Apply conditions to column data to find what we want
505 cur = self.db.cursor()
506 column_num = -1
507 for column, condition in conditionlist :
508 column_num = column_num + 1
509 searchkey = _search_col_data_key(table, column)
510 # speedup: don't linear search columns within loop
511 if column in columns :
512 savethiscolumndata = 1 # save the data for return
513 else :
514 savethiscolumndata = 0 # data only used for selection
515
516 try:
517 key, data = cur.set_range(searchkey)
518 while key[:len(searchkey)] == searchkey :
519 # extract the rowid from the key
520 rowid = key[-_rowid_str_len:]
521
522 if not rejected_rowids.has_key(rowid) :
523 # if no condition was specified or the condition
524 # succeeds, add row to our match list.
525 if not condition or condition(data) :
526 # only create new entries in matcing_rowids on
527 # the first pass, otherwise reject the
528 # rowid as it must not have matched
529 # the previous passes
530 if column_num == 0 :
531 if not matching_rowids.has_key(rowid) :
532 matching_rowids[rowid] = {}
533 if savethiscolumndata :
534 matching_rowids[rowid][column] = data
535 else :
536 rejected_rowids[rowid] = rowid
537 else :
538 if matching_rowids.has_key(rowid) :
539 del matching_rowids[rowid]
540 rejected_rowids[rowid] = rowid
541
542 key, data = cur.next()
543
544 except DBError, dberror:
545 if dberror[0] != DB_NOTFOUND :
546 raise
547 continue
548
549 cur.close()
550
551 # we're done selecting rows, garbage collect the reject list
552 del rejected_rowids
553
554 # extract any remaining desired column data from the
555 # database for the matching rows.
556 if len(columns) > 0 :
557 for rowid, rowdata in matching_rowids.items() :
558 for column in columns :
559 if rowdata.has_key(column) :
560 continue
561 try:
562 rowdata[column] = self.db.get(_data_key(table, column, rowid))
563 except DBError, dberror:
564 if dberror[0] != DB_NOTFOUND :
565 raise
566 rowdata[column] = None
567
568 # return the matches
569 return matching_rowids
570
571
572 def Drop(self, table) :
573 """Remove an entire table from the database
574 """
575 txn = None
576 try:
577 txn = self.env.txn_begin()
578
579 # delete the column list
580 self.db.delete(_columns_key(table), txn)
581
582 cur = self.db.cursor(txn)
583
584 # delete all keys containing this tables column and row info
585 table_key = _search_all_data_key(table)
586 while 1 :
587 try:
588 key, data = cur.set_range(table_key)
589 except DBNotFoundError:
590 break
591 # only delete items in this table
592 if key[:len(table_key)] != table_key :
593 break
594 cur.delete()
595
596 # delete all rowids used by this table
597 table_key = _search_rowid_key(table)
598 while 1 :
599 try:
600 key, data = cur.set_range(table_key)
601 except DBNotFoundError:
602 break
603 # only delete items in this table
604 if key[:len(table_key)] != table_key :
605 break
606 cur.delete()
607
608 cur.close()
609
610 # delete the tablename from the table name list
611 tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
612 try:
613 tablelist.remove(table)
614 except ValueError:
615 pass # hmm, it wasn't there, oh well, that's what we want.
616 self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP
617 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
618
619 txn.commit()
620 txn = None
621
622 if self.__tablecolumns.has_key(table) :
623 del self.__tablecolumns[table]
624
625 except DBError, dberror:
626 if txn :
627 txn.abort()
628 raise TableDBError, dberror[1]
629