0001 #----------------------------------------------------------------------- 0002 # 0003 # Copyright (C) 2000, 2001 by Autonomous Zone Industries 0004 # Copyright (C) 2002 Gregory P. Smith 0005 # 0006 # License: This is free software. You may use this software for any 0007 # purpose including modification/redistribution, so long as 0008 # this header remains intact and that you do not claim any 0009 # rights of ownership or authorship of this software. This 0010 # software has been tested, but no warranty is expressed or 0011 # implied. 0012 # 0013 # -- Gregory P. Smith <greg@electricrain.com> 0014 0015 # This provides a simple database table interface built on top of 0016 # the Python BerkeleyDB 3 interface. 0017 # 0018 _cvsid = '$Id: dbtables.py,v 1.11 2004/08/08 00:54:20 tim_one Exp $' 0019 0020 import re 0021 import sys 0022 import copy 0023 import xdrlib 0024 import random 0025 from types import ListType, StringType 0026 import cPickle as pickle 0027 0028 try: 0029 # For Pythons w/distutils pybsddb 0030 from bsddb3.db import * 0031 except ImportError: 0032 # For Python 2.3 0033 from bsddb.db import * 0034 0035 0036 class TableDBError(StandardError): 0037 pass 0038 class TableAlreadyExists(TableDBError): 0039 pass 0040 0041 0042 class Cond: 0043 """This condition matches everything""" 0044 def __call__(self, s): 0045 return 1 0046 0047 class ExactCond(Cond): 0048 """Acts as an exact match condition function""" 0049 def __init__(self, strtomatch): 0050 self.strtomatch = strtomatch 0051 def __call__(self, s): 0052 return s == self.strtomatch 0053 0054 class PrefixCond(Cond): 0055 """Acts as a condition function for matching a string prefix""" 0056 def __init__(self, prefix): 0057 self.prefix = prefix 0058 def __call__(self, s): 0059 return s[:len(self.prefix)] == self.prefix 0060 0061 class PostfixCond(Cond): 0062 """Acts as a condition function for matching a string postfix""" 0063 def __init__(self, postfix): 0064 self.postfix = postfix 0065 def __call__(self, s): 0066 return s[-len(self.postfix):] == self.postfix 0067 0068 class LikeCond(Cond): 0069 """ 0070 Acts as a function that will match using an SQL 'LIKE' style 0071 string. Case insensitive and % signs are wild cards. 0072 This isn't perfect but it should work for the simple common cases. 0073 """ 0074 def __init__(self, likestr, re_flags=re.IGNORECASE): 0075 # escape python re characters 0076 chars_to_escape = '.*+()[]?' 0077 for char in chars_to_escape : 0078 likestr = likestr.replace(char, '\\'+char) 0079 # convert %s to wildcards 0080 self.likestr = likestr.replace('%', '.*') 0081 self.re = re.compile('^'+self.likestr+'$', re_flags) 0082 def __call__(self, s): 0083 return self.re.match(s) 0084 0085 # 0086 # keys used to store database metadata 0087 # 0088 _table_names_key = '__TABLE_NAMES__' # list of the tables in this db 0089 _columns = '._COLUMNS__' # table_name+this key contains a list of columns 0090 0091 def _columns_key(table): 0092 return table + _columns 0093 0094 # 0095 # these keys are found within table sub databases 0096 # 0097 _data = '._DATA_.' # this+column+this+rowid key contains table data 0098 _rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each 0099 # row in the table. (no data is stored) 0100 _rowid_str_len = 8 # length in bytes of the unique rowid strings 0101 0102 def _data_key(table, col, rowid): 0103 return table + _data + col + _data + rowid 0104 0105 def _search_col_data_key(table, col): 0106 return table + _data + col + _data 0107 0108 def _search_all_data_key(table): 0109 return table + _data 0110 0111 def _rowid_key(table, rowid): 0112 return table + _rowid + rowid + _rowid 0113 0114 def _search_rowid_key(table): 0115 return table + _rowid 0116 0117 def contains_metastrings(s) : 0118 """Verify that the given string does not contain any 0119 metadata strings that might interfere with dbtables database operation. 0120 """ 0121 if (s.find(_table_names_key) >= 0 or 0122 s.find(_columns) >= 0 or 0123 s.find(_data) >= 0 or 0124 s.find(_rowid) >= 0): 0125 # Then 0126 return 1 0127 else: 0128 return 0 0129 0130 0131 class bsdTableDB : 0132 def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600, 0133 recover=0, dbflags=0): 0134 """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600) 0135 Open database name in the dbhome BerkeleyDB directory. 0136 Use keyword arguments when calling this constructor. 0137 """ 0138 self.db = None 0139 myflags = DB_THREAD 0140 if create: 0141 myflags |= DB_CREATE 0142 flagsforenv = (DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG | 0143 DB_INIT_TXN | dbflags) 0144 # DB_AUTO_COMMIT isn't a valid flag for env.open() 0145 try: 0146 dbflags |= DB_AUTO_COMMIT 0147 except AttributeError: 0148 pass 0149 if recover: 0150 flagsforenv = flagsforenv | DB_RECOVER 0151 self.env = DBEnv() 0152 # enable auto deadlock avoidance 0153 self.env.set_lk_detect(DB_LOCK_DEFAULT) 0154 self.env.open(dbhome, myflags | flagsforenv) 0155 if truncate: 0156 myflags |= DB_TRUNCATE 0157 self.db = DB(self.env) 0158 # this code relies on DBCursor.set* methods to raise exceptions 0159 # rather than returning None 0160 self.db.set_get_returns_none(1) 0161 # allow duplicate entries [warning: be careful w/ metadata] 0162 self.db.set_flags(DB_DUP) 0163 self.db.open(filename, DB_BTREE, dbflags | myflags, mode) 0164 self.dbfilename = filename 0165 # Initialize the table names list if this is a new database 0166 txn = self.env.txn_begin() 0167 try: 0168 if not self.db.has_key(_table_names_key, txn): 0169 self.db.put(_table_names_key, pickle.dumps([], 1), txn=txn) 0170 # Yes, bare except 0171 except: 0172 txn.abort() 0173 raise 0174 else: 0175 txn.commit() 0176 # TODO verify more of the database's metadata? 0177 self.__tablecolumns = {} 0178 0179 def __del__(self): 0180 self.close() 0181 0182 def close(self): 0183 if self.db is not None: 0184 self.db.close() 0185 self.db = None 0186 if self.env is not None: 0187 self.env.close() 0188 self.env = None 0189 0190 def checkpoint(self, mins=0): 0191 try: 0192 self.env.txn_checkpoint(mins) 0193 except DBIncompleteError: 0194 pass 0195 0196 def sync(self): 0197 try: 0198 self.db.sync() 0199 except DBIncompleteError: 0200 pass 0201 0202 def _db_print(self) : 0203 """Print the database to stdout for debugging""" 0204 print "******** Printing raw database for debugging ********" 0205 cur = self.db.cursor() 0206 try: 0207 key, data = cur.first() 0208 while 1: 0209 print repr({key: data}) 0210 next = cur.next() 0211 if next: 0212 key, data = next 0213 else: 0214 cur.close() 0215 return 0216 except DBNotFoundError: 0217 cur.close() 0218 0219 0220 def CreateTable(self, table, columns): 0221 """CreateTable(table, columns) - Create a new table in the database 0222 raises TableDBError if it already exists or for other DB errors. 0223 """ 0224 assert isinstance(columns, ListType) 0225 txn = None 0226 try: 0227 # checking sanity of the table and column names here on 0228 # table creation will prevent problems elsewhere. 0229 if contains_metastrings(table): 0230 raise ValueError( 0231 "bad table name: contains reserved metastrings") 0232 for column in columns : 0233 if contains_metastrings(column): 0234 raise ValueError( 0235 "bad column name: contains reserved metastrings") 0236 0237 columnlist_key = _columns_key(table) 0238 if self.db.has_key(columnlist_key): 0239 raise TableAlreadyExists, "table already exists" 0240 0241 txn = self.env.txn_begin() 0242 # store the table's column info 0243 self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn) 0244 0245 # add the table name to the tablelist 0246 tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn, 0247 flags=DB_RMW)) 0248 tablelist.append(table) 0249 # delete 1st, in case we opened with DB_DUP 0250 self.db.delete(_table_names_key, txn) 0251 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn) 0252 0253 txn.commit() 0254 txn = None 0255 except DBError, dberror: 0256 if txn: 0257 txn.abort() 0258 raise TableDBError, dberror[1] 0259 0260 0261 def ListTableColumns(self, table): 0262 """Return a list of columns in the given table. 0263 [] if the table doesn't exist. 0264 """ 0265 assert isinstance(table, StringType) 0266 if contains_metastrings(table): 0267 raise ValueError, "bad table name: contains reserved metastrings" 0268 0269 columnlist_key = _columns_key(table) 0270 if not self.db.has_key(columnlist_key): 0271 return [] 0272 pickledcolumnlist = self.db.get(columnlist_key) 0273 if pickledcolumnlist: 0274 return pickle.loads(pickledcolumnlist) 0275 else: 0276 return [] 0277 0278 def ListTables(self): 0279 """Return a list of tables in this database.""" 0280 pickledtablelist = self.db.get(_table_names_key) 0281 if pickledtablelist: 0282 return pickle.loads(pickledtablelist) 0283 else: 0284 return [] 0285 0286 def CreateOrExtendTable(self, table, columns): 0287 """CreateOrExtendTable(table, columns) 0288 0289 - Create a new table in the database. 0290 If a table of this name already exists, extend it to have any 0291 additional columns present in the given list as well as 0292 all of its current columns. 0293 """ 0294 assert isinstance(columns, ListType) 0295 try: 0296 self.CreateTable(table, columns) 0297 except TableAlreadyExists: 0298 # the table already existed, add any new columns 0299 txn = None 0300 try: 0301 columnlist_key = _columns_key(table) 0302 txn = self.env.txn_begin() 0303 0304 # load the current column list 0305 oldcolumnlist = pickle.loads( 0306 self.db.get(columnlist_key, txn=txn, flags=DB_RMW)) 0307 # create a hash table for fast lookups of column names in the 0308 # loop below 0309 oldcolumnhash = {} 0310 for c in oldcolumnlist: 0311 oldcolumnhash[c] = c 0312 0313 # create a new column list containing both the old and new 0314 # column names 0315 newcolumnlist = copy.copy(oldcolumnlist) 0316 for c in columns: 0317 if not oldcolumnhash.has_key(c): 0318 newcolumnlist.append(c) 0319 0320 # store the table's new extended column list 0321 if newcolumnlist != oldcolumnlist : 0322 # delete the old one first since we opened with DB_DUP 0323 self.db.delete(columnlist_key, txn) 0324 self.db.put(columnlist_key, 0325 pickle.dumps(newcolumnlist, 1), 0326 txn=txn) 0327 0328 txn.commit() 0329 txn = None 0330 0331 self.__load_column_info(table) 0332 except DBError, dberror: 0333 if txn: 0334 txn.abort() 0335 raise TableDBError, dberror[1] 0336 0337 0338 def __load_column_info(self, table) : 0339 """initialize the self.__tablecolumns dict""" 0340 # check the column names 0341 try: 0342 tcolpickles = self.db.get(_columns_key(table)) 0343 except DBNotFoundError: 0344 raise TableDBError, "unknown table: %r" % (table,) 0345 if not tcolpickles: 0346 raise TableDBError, "unknown table: %r" % (table,) 0347 self.__tablecolumns[table] = pickle.loads(tcolpickles) 0348 0349 def __new_rowid(self, table, txn) : 0350 """Create a new unique row identifier""" 0351 unique = 0 0352 while not unique: 0353 # Generate a random 64-bit row ID string 0354 # (note: this code has <64 bits of randomness 0355 # but it's plenty for our database id needs!) 0356 p = xdrlib.Packer() 0357 p.pack_int(int(random.random()*2147483647)) 0358 p.pack_int(int(random.random()*2147483647)) 0359 newid = p.get_buffer() 0360 0361 # Guarantee uniqueness by adding this key to the database 0362 try: 0363 self.db.put(_rowid_key(table, newid), None, txn=txn, 0364 flags=DB_NOOVERWRITE) 0365 except DBKeyExistError: 0366 pass 0367 else: 0368 unique = 1 0369 0370 return newid 0371 0372 0373 def Insert(self, table, rowdict) : 0374 """Insert(table, datadict) - Insert a new row into the table 0375 using the keys+values from rowdict as the column values. 0376 """ 0377 txn = None 0378 try: 0379 if not self.db.has_key(_columns_key(table)): 0380 raise TableDBError, "unknown table" 0381 0382 # check the validity of each column name 0383 if not self.__tablecolumns.has_key(table): 0384 self.__load_column_info(table) 0385 for column in rowdict.keys() : 0386 if not self.__tablecolumns[table].count(column): 0387 raise TableDBError, "unknown column: %r" % (column,) 0388 0389 # get a unique row identifier for this row 0390 txn = self.env.txn_begin() 0391 rowid = self.__new_rowid(table, txn=txn) 0392 0393 # insert the row values into the table database 0394 for column, dataitem in rowdict.items(): 0395 # store the value 0396 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn) 0397 0398 txn.commit() 0399 txn = None 0400 0401 except DBError, dberror: 0402 # WIBNI we could just abort the txn and re-raise the exception? 0403 # But no, because TableDBError is not related to DBError via 0404 # inheritance, so it would be backwards incompatible. Do the next 0405 # best thing. 0406 info = sys.exc_info() 0407 if txn: 0408 txn.abort() 0409 self.db.delete(_rowid_key(table, rowid)) 0410 raise TableDBError, dberror[1], info[2] 0411 0412 0413 def Modify(self, table, conditions={}, mappings={}): 0414 """Modify(table, conditions) - Modify in rows matching 'conditions' 0415 using mapping functions in 'mappings' 0416 * conditions is a dictionary keyed on column names 0417 containing condition functions expecting the data string as an 0418 argument and returning a boolean. 0419 * mappings is a dictionary keyed on column names containint condition 0420 functions expecting the data string as an argument and returning the 0421 new string for that column. 0422 """ 0423 try: 0424 matching_rowids = self.__Select(table, [], conditions) 0425 0426 # modify only requested columns 0427 columns = mappings.keys() 0428 for rowid in matching_rowids.keys(): 0429 txn = None 0430 try: 0431 for column in columns: 0432 txn = self.env.txn_begin() 0433 # modify the requested column 0434 try: 0435 dataitem = self.db.get( 0436 _data_key(table, column, rowid), 0437 txn) 0438 self.db.delete( 0439 _data_key(table, column, rowid), 0440 txn) 0441 except DBNotFoundError: 0442 # XXXXXXX row key somehow didn't exist, assume no 0443 # error 0444 dataitem = None 0445 dataitem = mappings[column](dataitem) 0446 if dataitem <> None: 0447 self.db.put( 0448 _data_key(table, column, rowid), 0449 dataitem, txn=txn) 0450 txn.commit() 0451 txn = None 0452 0453 except DBError, dberror: 0454 if txn: 0455 txn.abort() 0456 raise 0457 0458 except DBError, dberror: 0459 raise TableDBError, dberror[1] 0460 0461 def Delete(self, table, conditions={}): 0462 """Delete(table, conditions) - Delete items matching the given 0463 conditions from the table. 0464 * conditions is a dictionary keyed on column names 0465 containing condition functions expecting the data string as an 0466 argument and returning a boolean. 0467 """ 0468 try: 0469 matching_rowids = self.__Select(table, [], conditions) 0470 0471 # delete row data from all columns 0472 columns = self.__tablecolumns[table] 0473 for rowid in matching_rowids.keys(): 0474 txn = None 0475 try: 0476 txn = self.env.txn_begin() 0477 for column in columns: 0478 # delete the data key 0479 try: 0480 self.db.delete(_data_key(table, column, rowid), 0481 txn) 0482 except DBNotFoundError: 0483 # XXXXXXX column may not exist, assume no error 0484 pass 0485 0486 try: 0487 self.db.delete(_rowid_key(table, rowid), txn) 0488 except DBNotFoundError: 0489 # XXXXXXX row key somehow didn't exist, assume no error 0490 pass 0491 txn.commit() 0492 txn = None 0493 except DBError, dberror: 0494 if txn: 0495 txn.abort() 0496 raise 0497 except DBError, dberror: 0498 raise TableDBError, dberror[1] 0499 0500 0501 def Select(self, table, columns, conditions={}): 0502 """Select(table, conditions) - retrieve specific row data 0503 Returns a list of row column->value mapping dictionaries. 0504 * columns is a list of which column data to return. If 0505 columns is None, all columns will be returned. 0506 * conditions is a dictionary keyed on column names 0507 containing callable conditions expecting the data string as an 0508 argument and returning a boolean. 0509 """ 0510 try: 0511 if not self.__tablecolumns.has_key(table): 0512 self.__load_column_info(table) 0513 if columns is None: 0514 columns = self.__tablecolumns[table] 0515 matching_rowids = self.__Select(table, columns, conditions) 0516 except DBError, dberror: 0517 raise TableDBError, dberror[1] 0518 # return the matches as a list of dictionaries 0519 return matching_rowids.values() 0520 0521 0522 def __Select(self, table, columns, conditions): 0523 """__Select() - Used to implement Select and Delete (above) 0524 Returns a dictionary keyed on rowids containing dicts 0525 holding the row data for columns listed in the columns param 0526 that match the given conditions. 0527 * conditions is a dictionary keyed on column names 0528 containing callable conditions expecting the data string as an 0529 argument and returning a boolean. 0530 """ 0531 # check the validity of each column name 0532 if not self.__tablecolumns.has_key(table): 0533 self.__load_column_info(table) 0534 if columns is None: 0535 columns = self.tablecolumns[table] 0536 for column in (columns + conditions.keys()): 0537 if not self.__tablecolumns[table].count(column): 0538 raise TableDBError, "unknown column: %r" % (column,) 0539 0540 # keyed on rows that match so far, containings dicts keyed on 0541 # column names containing the data for that row and column. 0542 matching_rowids = {} 0543 # keys are rowids that do not match 0544 rejected_rowids = {} 0545 0546 # attempt to sort the conditions in such a way as to minimize full 0547 # column lookups 0548 def cmp_conditions(atuple, btuple): 0549 a = atuple[1] 0550 b = btuple[1] 0551 if type(a) is type(b): 0552 if isinstance(a, PrefixCond) and isinstance(b, PrefixCond): 0553 # longest prefix first 0554 return cmp(len(b.prefix), len(a.prefix)) 0555 if isinstance(a, LikeCond) and isinstance(b, LikeCond): 0556 # longest likestr first 0557 return cmp(len(b.likestr), len(a.likestr)) 0558 return 0 0559 if isinstance(a, ExactCond): 0560 return -1 0561 if isinstance(b, ExactCond): 0562 return 1 0563 if isinstance(a, PrefixCond): 0564 return -1 0565 if isinstance(b, PrefixCond): 0566 return 1 0567 # leave all unknown condition callables alone as equals 0568 return 0 0569 0570 conditionlist = conditions.items() 0571 conditionlist.sort(cmp_conditions) 0572 0573 # Apply conditions to column data to find what we want 0574 cur = self.db.cursor() 0575 column_num = -1 0576 for column, condition in conditionlist: 0577 column_num = column_num + 1 0578 searchkey = _search_col_data_key(table, column) 0579 # speedup: don't linear search columns within loop 0580 if column in columns: 0581 savethiscolumndata = 1 # save the data for return 0582 else: 0583 savethiscolumndata = 0 # data only used for selection 0584 0585 try: 0586 key, data = cur.set_range(searchkey) 0587 while key[:len(searchkey)] == searchkey: 0588 # extract the rowid from the key 0589 rowid = key[-_rowid_str_len:] 0590 0591 if not rejected_rowids.has_key(rowid): 0592 # if no condition was specified or the condition 0593 # succeeds, add row to our match list. 0594 if not condition or condition(data): 0595 if not matching_rowids.has_key(rowid): 0596 matching_rowids[rowid] = {} 0597 if savethiscolumndata: 0598 matching_rowids[rowid][column] = data 0599 else: 0600 if matching_rowids.has_key(rowid): 0601 del matching_rowids[rowid] 0602 rejected_rowids[rowid] = rowid 0603 0604 key, data = cur.next() 0605 0606 except DBError, dberror: 0607 if dberror[0] != DB_NOTFOUND: 0608 raise 0609 continue 0610 0611 cur.close() 0612 0613 # we're done selecting rows, garbage collect the reject list 0614 del rejected_rowids 0615 0616 # extract any remaining desired column data from the 0617 # database for the matching rows. 0618 if len(columns) > 0: 0619 for rowid, rowdata in matching_rowids.items(): 0620 for column in columns: 0621 if rowdata.has_key(column): 0622 continue 0623 try: 0624 rowdata[column] = self.db.get( 0625 _data_key(table, column, rowid)) 0626 except DBError, dberror: 0627 if dberror[0] != DB_NOTFOUND: 0628 raise 0629 rowdata[column] = None 0630 0631 # return the matches 0632 return matching_rowids 0633 0634 0635 def Drop(self, table): 0636 """Remove an entire table from the database""" 0637 txn = None 0638 try: 0639 txn = self.env.txn_begin() 0640 0641 # delete the column list 0642 self.db.delete(_columns_key(table), txn) 0643 0644 cur = self.db.cursor(txn) 0645 0646 # delete all keys containing this tables column and row info 0647 table_key = _search_all_data_key(table) 0648 while 1: 0649 try: 0650 key, data = cur.set_range(table_key) 0651 except DBNotFoundError: 0652 break 0653 # only delete items in this table 0654 if key[:len(table_key)] != table_key: 0655 break 0656 cur.delete() 0657 0658 # delete all rowids used by this table 0659 table_key = _search_rowid_key(table) 0660 while 1: 0661 try: 0662 key, data = cur.set_range(table_key) 0663 except DBNotFoundError: 0664 break 0665 # only delete items in this table 0666 if key[:len(table_key)] != table_key: 0667 break 0668 cur.delete() 0669 0670 cur.close() 0671 0672 # delete the tablename from the table name list 0673 tablelist = pickle.loads( 0674 self.db.get(_table_names_key, txn=txn, flags=DB_RMW)) 0675 try: 0676 tablelist.remove(table) 0677 except ValueError: 0678 # hmm, it wasn't there, oh well, that's what we want. 0679 pass 0680 # delete 1st, incase we opened with DB_DUP 0681 self.db.delete(_table_names_key, txn) 0682 self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn) 0683 0684 txn.commit() 0685 txn = None 0686 0687 if self.__tablecolumns.has_key(table): 0688 del self.__tablecolumns[table] 0689 0690 except DBError, dberror: 0691 if txn: 0692 txn.abort() 0693 raise TableDBError, dberror[1] 0694
Generated by PyXR 0.9.4