blob: 4841c283a5591734aa3998c0c6b2528f1e80ec2b [file] [log] [blame]
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +00001#-----------------------------------------------------------------------
2#
3# Copyright (C) 2000, 2001 by Autonomous Zone Industries
Martin v. Löwisb2c7aff2002-11-23 11:26:07 +00004# Copyright (C) 2002 Gregory P. Smith
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +00005#
6# License: This is free software. You may use this software for any
7# purpose including modification/redistribution, so long as
8# this header remains intact and that you do not claim any
9# rights of ownership or authorship of this software. This
10# software has been tested, but no warranty is expressed or
11# implied.
12#
13# -- Gregory P. Smith <greg@electricrain.com>
14
15# This provides a simple database table interface built on top of
16# the Python BerkeleyDB 3 interface.
17#
18_cvsid = '$Id$'
19
20import string
21import sys
22try:
23 import cPickle
24 pickle = cPickle
25except ImportError:
26 import pickle
27import whrandom
28import xdrlib
29import re
30import copy
31
Martin v. Löwis7a3bae42002-11-19 17:48:49 +000032from bsddb.db import *
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +000033
34
35class TableDBError(StandardError): pass
36class TableAlreadyExists(TableDBError): pass
37
38
39class Cond:
40 """This condition matches everything"""
41 def __call__(self, s):
42 return 1
43
44class ExactCond(Cond):
45 """Acts as an exact match condition function"""
46 def __init__(self, strtomatch):
47 self.strtomatch = strtomatch
48 def __call__(self, s):
49 return s == self.strtomatch
50
51class PrefixCond(Cond):
52 """Acts as a condition function for matching a string prefix"""
53 def __init__(self, prefix):
54 self.prefix = prefix
55 def __call__(self, s):
56 return s[:len(self.prefix)] == self.prefix
57
Martin v. Löwisb2c7aff2002-11-23 11:26:07 +000058class PostfixCond(Cond):
59 """Acts as a condition function for matching a string postfix"""
60 def __init__(self, postfix):
61 self.postfix = postfix
62 def __call__(self, s):
63 return s[-len(self.postfix):] == self.postfix
64
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +000065class LikeCond(Cond):
66 """
67 Acts as a function that will match using an SQL 'LIKE' style
68 string. Case insensitive and % signs are wild cards.
69 This isn't perfect but it should work for the simple common cases.
70 """
71 def __init__(self, likestr, re_flags=re.IGNORECASE):
72 # escape python re characters
73 chars_to_escape = '.*+()[]?'
74 for char in chars_to_escape :
75 likestr = string.replace(likestr, char, '\\'+char)
76 # convert %s to wildcards
77 self.likestr = string.replace(likestr, '%', '.*')
78 self.re = re.compile('^'+self.likestr+'$', re_flags)
79 def __call__(self, s):
80 return self.re.match(s)
81
82#
83# keys used to store database metadata
84#
85_table_names_key = '__TABLE_NAMES__' # list of the tables in this db
86_columns = '._COLUMNS__' # table_name+this key contains a list of columns
87def _columns_key(table) : return table + _columns
88
89#
90# these keys are found within table sub databases
91#
92_data = '._DATA_.' # this+column+this+rowid key contains table data
93_rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
94 # row in the table. (no data is stored)
95_rowid_str_len = 8 # length in bytes of the unique rowid strings
96def _data_key(table, col, rowid) : return table + _data + col + _data + rowid
97def _search_col_data_key(table, col) : return table + _data + col + _data
98def _search_all_data_key(table) : return table + _data
99def _rowid_key(table, rowid) : return table + _rowid + rowid + _rowid
100def _search_rowid_key(table) : return table + _rowid
101
102def contains_metastrings(s) :
103 """Verify that the given string does not contain any
104 metadata strings that might interfere with dbtables database operation.
105 """
106 if string.find(s, _table_names_key) >= 0 or \
107 string.find(s, _columns) >= 0 or \
108 string.find(s, _data) >= 0 or \
109 string.find(s, _rowid) >= 0 :
110 return 1
111 else :
112 return 0
113
114
115class bsdTableDB :
116 def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, recover=0, dbflags=0) :
117 """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
118 Open database name in the dbhome BerkeleyDB directory.
119 Use keyword arguments when calling this constructor.
120 """
121 myflags = DB_THREAD
122 if create :
123 myflags = myflags | DB_CREATE
124 flagsforenv = DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN | dbflags
125 if recover :
126 flagsforenv = flagsforenv | DB_RECOVER
127 self.env = DBEnv()
128 self.env.set_lk_detect(DB_LOCK_DEFAULT) # enable auto deadlock avoidance
129 self.env.open(dbhome, myflags | flagsforenv)
130 if truncate :
131 myflags = myflags | DB_TRUNCATE
132 self.db = DB(self.env)
133 self.db.set_flags(DB_DUP) # allow duplicate entries [warning: be careful w/ metadata]
134 self.db.open(filename, DB_BTREE, myflags, mode)
135
136 self.dbfilename = filename
137
138 # Initialize the table names list if this is a new database
139 if not self.db.has_key(_table_names_key) :
140 self.db.put(_table_names_key, pickle.dumps([], 1))
141
142 # TODO verify more of the database's metadata?
143
144 self.__tablecolumns = {}
145
146 def __del__(self):
147 self.close()
148
149 def close(self):
150 if self.db is not None:
151 self.db.close()
152 self.db = None
153 if self.env is not None:
154 self.env.close()
155 self.env = None
156
157 def checkpoint(self, mins=0):
158 try:
159 self.env.txn_checkpoint(mins)
160 except DBIncompleteError:
161 pass
162
163 def sync(self):
164 try:
165 self.db.sync()
166 except DBIncompleteError:
167 pass
168
169 def _db_print(self) :
170 """Print the database to stdout for debugging"""
171 print "******** Printing raw database for debugging ********"
172 cur = self.db.cursor()
173 try:
174 key, data = cur.first()
175 while 1 :
176 print `{key: data}`
177 next = cur.next()
178 if next:
179 key, data = next
180 else:
181 cur.close()
182 return
183 except DBNotFoundError:
184 cur.close()
185
186
187 def CreateTable(self, table, columns) :
188 """CreateTable(table, columns) - Create a new table in the database
189 raises TableDBError if it already exists or for other DB errors.
190 """
191 assert type(columns) == type([])
192 txn = None
193 try:
194 # checking sanity of the table and column names here on
195 # table creation will prevent problems elsewhere.
196 if contains_metastrings(table) :
197 raise ValueError, "bad table name: contains reserved metastrings"
198 for column in columns :
199 if contains_metastrings(column) :
200 raise ValueError, "bad column name: contains reserved metastrings"
201
202 columnlist_key = _columns_key(table)
203 if self.db.has_key(columnlist_key) :
204 raise TableAlreadyExists, "table already exists"
205
206 txn = self.env.txn_begin()
207 # store the table's column info
208 self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)
209
210 # add the table name to the tablelist
211 tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
212 tablelist.append(table)
213 self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP
214 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
215
216 txn.commit()
217 txn = None
218
219 except DBError, dberror:
220 if txn :
221 txn.abort()
222 raise TableDBError, dberror[1]
223
224
225 def ListTableColumns(self, table):
226 """Return a list of columns in the given table. [] if the table doesn't exist.
227 """
228 assert type(table) == type('')
229 if contains_metastrings(table) :
230 raise ValueError, "bad table name: contains reserved metastrings"
231
232 columnlist_key = _columns_key(table)
233 if not self.db.has_key(columnlist_key):
234 return []
235 pickledcolumnlist = self.db.get(columnlist_key)
236 if pickledcolumnlist:
237 return pickle.loads(pickledcolumnlist)
238 else:
239 return []
240
241 def ListTables(self):
242 """Return a list of tables in this database."""
243 pickledtablelist = self.db.get(_table_names_key)
244 if pickledtablelist:
245 return pickle.loads(pickledtablelist)
246 else:
247 return []
248
249 def CreateOrExtendTable(self, table, columns):
250 """CreateOrExtendTable(table, columns) - Create a new table in the database.
251 If a table of this name already exists, extend it to have any
252 additional columns present in the given list as well as
253 all of its current columns.
254 """
255 assert type(columns) == type([])
256 try:
257 self.CreateTable(table, columns)
258 except TableAlreadyExists:
259 # the table already existed, add any new columns
260 txn = None
261 try:
262 columnlist_key = _columns_key(table)
263 txn = self.env.txn_begin()
264
265 # load the current column list
266 oldcolumnlist = pickle.loads(self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
267 # create a hash table for fast lookups of column names in the loop below
268 oldcolumnhash = {}
269 for c in oldcolumnlist:
270 oldcolumnhash[c] = c
271
272 # create a new column list containing both the old and new column names
273 newcolumnlist = copy.copy(oldcolumnlist)
274 for c in columns:
275 if not oldcolumnhash.has_key(c):
276 newcolumnlist.append(c)
277
278 # store the table's new extended column list
279 if newcolumnlist != oldcolumnlist :
280 # delete the old one first since we opened with DB_DUP
281 self.db.delete(columnlist_key, txn)
282 self.db.put(columnlist_key, pickle.dumps(newcolumnlist, 1), txn=txn)
283
284 txn.commit()
285 txn = None
286
287 self.__load_column_info(table)
288 except DBError, dberror:
289 if txn:
290 txn.abort()
291 raise TableDBError, dberror[1]
292
293
294 def __load_column_info(self, table) :
295 """initialize the self.__tablecolumns dict"""
296 # check the column names
297 try:
298 tcolpickles = self.db.get(_columns_key(table))
299 except DBNotFoundError:
300 raise TableDBError, "unknown table: " + `table`
301 if not tcolpickles:
302 raise TableDBError, "unknown table: " + `table`
303 self.__tablecolumns[table] = pickle.loads(tcolpickles)
304
305 def __new_rowid(self, table, txn=None) :
306 """Create a new unique row identifier"""
307 unique = 0
308 while not unique :
309 # Generate a random 64-bit row ID string
310 # (note: this code has <64 bits of randomness
311 # but it's plenty for our database id needs!)
312 p = xdrlib.Packer()
313 p.pack_int(int(whrandom.random()*2147483647))
314 p.pack_int(int(whrandom.random()*2147483647))
315 newid = p.get_buffer()
316
317 # Guarantee uniqueness by adding this key to the database
318 try:
319 self.db.put(_rowid_key(table, newid), None, txn=txn, flags=DB_NOOVERWRITE)
320 except DBKeyExistsError:
321 pass
322 else:
323 unique = 1
324
325 return newid
326
327
328 def Insert(self, table, rowdict) :
329 """Insert(table, datadict) - Insert a new row into the table
330 using the keys+values from rowdict as the column values.
331 """
332 txn = None
333 try:
334 if not self.db.has_key(_columns_key(table)) :
335 raise TableDBError, "unknown table"
336
337 # check the validity of each column name
338 if not self.__tablecolumns.has_key(table) :
339 self.__load_column_info(table)
340 for column in rowdict.keys() :
341 if not self.__tablecolumns[table].count(column) :
342 raise TableDBError, "unknown column: "+`column`
343
344 # get a unique row identifier for this row
345 rowid = self.__new_rowid(table)
346
347 txn = self.env.txn_begin()
348
349 # insert the row values into the table database
350 for column, dataitem in rowdict.items() :
351 # store the value
352 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
353
354 txn.commit()
355 txn = None
356
357 except DBError, dberror:
358 if txn :
359 txn.abort()
360 self.db.delete(_rowid_key(table, rowid))
361 raise TableDBError, dberror[1]
362
363
364 def Modify(self, table, conditions={}, mappings={}) :
365 """Modify(table, conditions) - Modify in rows matching 'conditions'
366 using mapping functions in 'mappings'
367 * conditions is a dictionary keyed on column names
368 containing condition functions expecting the data string as an
369 argument and returning a boolean.
370 * mappings is a dictionary keyed on column names containint condition
371 functions expecting the data string as an argument and returning the
372 new string for that column.
373 """
374 try:
375 matching_rowids = self.__Select(table, [], conditions)
376
377 # modify only requested columns
378 columns = mappings.keys()
379 for rowid in matching_rowids.keys() :
380 txn = None
381 try:
382 for column in columns :
383 txn = self.env.txn_begin()
384 # modify the requested column
385 try:
386 dataitem = self.db.get(_data_key(table, column, rowid), txn)
387 self.db.delete(_data_key(table, column, rowid), txn)
388 except DBNotFoundError:
389 dataitem = None # XXXXXXX row key somehow didn't exist, assume no error
390 dataitem = mappings[column](dataitem)
391 if dataitem <> None:
392 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
393 txn.commit()
394 txn = None
395
396 except DBError, dberror:
397 if txn :
398 txn.abort()
399 raise
400
401 except DBError, dberror:
402 raise TableDBError, dberror[1]
403
404 def Delete(self, table, conditions={}) :
405 """Delete(table, conditions) - Delete items matching the given
406 conditions from the table.
407 * conditions is a dictionary keyed on column names
408 containing condition functions expecting the data string as an
409 argument and returning a boolean.
410 """
411 try:
412 matching_rowids = self.__Select(table, [], conditions)
413
414 # delete row data from all columns
415 columns = self.__tablecolumns[table]
416 for rowid in matching_rowids.keys() :
417 txn = None
418 try:
419 txn = self.env.txn_begin()
420 for column in columns :
421 # delete the data key
422 try:
423 self.db.delete(_data_key(table, column, rowid), txn)
424 except DBNotFoundError:
425 pass # XXXXXXX column may not exist, assume no error
426
427 try:
428 self.db.delete(_rowid_key(table, rowid), txn)
429 except DBNotFoundError:
430 pass # XXXXXXX row key somehow didn't exist, assume no error
431 txn.commit()
432 txn = None
433 except DBError, dberror:
434 if txn :
435 txn.abort()
436 raise
437
438 except DBError, dberror:
439 raise TableDBError, dberror[1]
440
441
442 def Select(self, table, columns, conditions={}) :
443 """Select(table, conditions) - retrieve specific row data
444 Returns a list of row column->value mapping dictionaries.
445 * columns is a list of which column data to return. If
446 columns is None, all columns will be returned.
447 * conditions is a dictionary keyed on column names
448 containing callable conditions expecting the data string as an
449 argument and returning a boolean.
450 """
451 try:
452 if not self.__tablecolumns.has_key(table) :
453 self.__load_column_info(table)
454 if columns is None :
455 columns = self.__tablecolumns[table]
456 matching_rowids = self.__Select(table, columns, conditions)
457 except DBError, dberror:
458 raise TableDBError, dberror[1]
459
460 # return the matches as a list of dictionaries
461 return matching_rowids.values()
462
463
464 def __Select(self, table, columns, conditions) :
465 """__Select() - Used to implement Select and Delete (above)
466 Returns a dictionary keyed on rowids containing dicts
467 holding the row data for columns listed in the columns param
468 that match the given conditions.
469 * conditions is a dictionary keyed on column names
470 containing callable conditions expecting the data string as an
471 argument and returning a boolean.
472 """
473 # check the validity of each column name
474 if not self.__tablecolumns.has_key(table) :
475 self.__load_column_info(table)
476 if columns is None :
477 columns = self.tablecolumns[table]
478 for column in (columns + conditions.keys()) :
479 if not self.__tablecolumns[table].count(column) :
480 raise TableDBError, "unknown column: "+`column`
481
482 # keyed on rows that match so far, containings dicts keyed on
483 # column names containing the data for that row and column.
484 matching_rowids = {}
485
486 rejected_rowids = {} # keys are rowids that do not match
487
488 # attempt to sort the conditions in such a way as to minimize full column lookups
489 def cmp_conditions(atuple, btuple):
490 a = atuple[1]
491 b = btuple[1]
492 if type(a) == type(b) :
493 if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
494 return cmp(len(b.prefix), len(a.prefix)) # longest prefix first
495 if isinstance(a, LikeCond) and isinstance(b, LikeCond):
496 return cmp(len(b.likestr), len(a.likestr)) # longest likestr first
497 return 0
498 if isinstance(a, ExactCond):
499 return -1
500 if isinstance(b, ExactCond):
501 return 1
502 if isinstance(a, PrefixCond):
503 return -1
504 if isinstance(b, PrefixCond):
505 return 1
506 # leave all unknown condition callables alone as equals
507 return 0
508
509 conditionlist = conditions.items()
510 conditionlist.sort(cmp_conditions)
511
512 # Apply conditions to column data to find what we want
513 cur = self.db.cursor()
514 column_num = -1
515 for column, condition in conditionlist :
516 column_num = column_num + 1
517 searchkey = _search_col_data_key(table, column)
518 # speedup: don't linear search columns within loop
519 if column in columns :
520 savethiscolumndata = 1 # save the data for return
521 else :
522 savethiscolumndata = 0 # data only used for selection
523
524 try:
525 key, data = cur.set_range(searchkey)
526 while key[:len(searchkey)] == searchkey :
527 # extract the rowid from the key
528 rowid = key[-_rowid_str_len:]
529
530 if not rejected_rowids.has_key(rowid) :
531 # if no condition was specified or the condition
532 # succeeds, add row to our match list.
533 if not condition or condition(data) :
Martin v. Löwisb2c7aff2002-11-23 11:26:07 +0000534 if not matching_rowids.has_key(rowid) :
535 matching_rowids[rowid] = {}
536 if savethiscolumndata :
537 matching_rowids[rowid][column] = data
Martin v. Löwis6aa4a1f2002-11-19 08:09:52 +0000538 else :
539 if matching_rowids.has_key(rowid) :
540 del matching_rowids[rowid]
541 rejected_rowids[rowid] = rowid
542
543 key, data = cur.next()
544
545 except DBError, dberror:
546 if dberror[0] != DB_NOTFOUND :
547 raise
548 continue
549
550 cur.close()
551
552 # we're done selecting rows, garbage collect the reject list
553 del rejected_rowids
554
555 # extract any remaining desired column data from the
556 # database for the matching rows.
557 if len(columns) > 0 :
558 for rowid, rowdata in matching_rowids.items() :
559 for column in columns :
560 if rowdata.has_key(column) :
561 continue
562 try:
563 rowdata[column] = self.db.get(_data_key(table, column, rowid))
564 except DBError, dberror:
565 if dberror[0] != DB_NOTFOUND :
566 raise
567 rowdata[column] = None
568
569 # return the matches
570 return matching_rowids
571
572
573 def Drop(self, table) :
574 """Remove an entire table from the database
575 """
576 txn = None
577 try:
578 txn = self.env.txn_begin()
579
580 # delete the column list
581 self.db.delete(_columns_key(table), txn)
582
583 cur = self.db.cursor(txn)
584
585 # delete all keys containing this tables column and row info
586 table_key = _search_all_data_key(table)
587 while 1 :
588 try:
589 key, data = cur.set_range(table_key)
590 except DBNotFoundError:
591 break
592 # only delete items in this table
593 if key[:len(table_key)] != table_key :
594 break
595 cur.delete()
596
597 # delete all rowids used by this table
598 table_key = _search_rowid_key(table)
599 while 1 :
600 try:
601 key, data = cur.set_range(table_key)
602 except DBNotFoundError:
603 break
604 # only delete items in this table
605 if key[:len(table_key)] != table_key :
606 break
607 cur.delete()
608
609 cur.close()
610
611 # delete the tablename from the table name list
612 tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
613 try:
614 tablelist.remove(table)
615 except ValueError:
616 pass # hmm, it wasn't there, oh well, that's what we want.
617 self.db.delete(_table_names_key, txn) # delete 1st, incase we opened with DB_DUP
618 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
619
620 txn.commit()
621 txn = None
622
623 if self.__tablecolumns.has_key(table) :
624 del self.__tablecolumns[table]
625
626 except DBError, dberror:
627 if txn :
628 txn.abort()
629 raise TableDBError, dberror[1]