PyXR

c:\python24\lib \ bsddb \ dbtables.py



0001 #-----------------------------------------------------------------------
0002 #
0003 # Copyright (C) 2000, 2001 by Autonomous Zone Industries
0004 # Copyright (C) 2002 Gregory P. Smith
0005 #
0006 # License:      This is free software.  You may use this software for any
0007 #               purpose including modification/redistribution, so long as
0008 #               this header remains intact and that you do not claim any
0009 #               rights of ownership or authorship of this software.  This
0010 #               software has been tested, but no warranty is expressed or
0011 #               implied.
0012 #
0013 #   --  Gregory P. Smith <greg@electricrain.com>
0014 
0015 # This provides a simple database table interface built on top of
0016 # the Python BerkeleyDB 3 interface.
0017 #
0018 _cvsid = '$Id: dbtables.py,v 1.11 2004/08/08 00:54:20 tim_one Exp $'
0019 
0020 import re
0021 import sys
0022 import copy
0023 import xdrlib
0024 import random
0025 from types import ListType, StringType
0026 import cPickle as pickle
0027 
0028 try:
0029     # For Pythons w/distutils pybsddb
0030     from bsddb3.db import *
0031 except ImportError:
0032     # For Python 2.3
0033     from bsddb.db import *
0034 
0035 
0036 class TableDBError(StandardError):
0037     pass
0038 class TableAlreadyExists(TableDBError):
0039     pass
0040 
0041 
0042 class Cond:
0043     """This condition matches everything"""
0044     def __call__(self, s):
0045         return 1
0046 
0047 class ExactCond(Cond):
0048     """Acts as an exact match condition function"""
0049     def __init__(self, strtomatch):
0050         self.strtomatch = strtomatch
0051     def __call__(self, s):
0052         return s == self.strtomatch
0053 
0054 class PrefixCond(Cond):
0055     """Acts as a condition function for matching a string prefix"""
0056     def __init__(self, prefix):
0057         self.prefix = prefix
0058     def __call__(self, s):
0059         return s[:len(self.prefix)] == self.prefix
0060 
0061 class PostfixCond(Cond):
0062     """Acts as a condition function for matching a string postfix"""
0063     def __init__(self, postfix):
0064         self.postfix = postfix
0065     def __call__(self, s):
0066         return s[-len(self.postfix):] == self.postfix
0067 
0068 class LikeCond(Cond):
0069     """
0070     Acts as a function that will match using an SQL 'LIKE' style
0071     string.  Case insensitive and % signs are wild cards.
0072     This isn't perfect but it should work for the simple common cases.
0073     """
0074     def __init__(self, likestr, re_flags=re.IGNORECASE):
0075         # escape python re characters
0076         chars_to_escape = '.*+()[]?'
0077         for char in chars_to_escape :
0078             likestr = likestr.replace(char, '\\'+char)
0079         # convert %s to wildcards
0080         self.likestr = likestr.replace('%', '.*')
0081         self.re = re.compile('^'+self.likestr+'$', re_flags)
0082     def __call__(self, s):
0083         return self.re.match(s)
0084 
0085 #
0086 # keys used to store database metadata
0087 #
0088 _table_names_key = '__TABLE_NAMES__'  # list of the tables in this db
0089 _columns = '._COLUMNS__'  # table_name+this key contains a list of columns
0090 
0091 def _columns_key(table):
0092     return table + _columns
0093 
0094 #
0095 # these keys are found within table sub databases
0096 #
0097 _data =  '._DATA_.'  # this+column+this+rowid key contains table data
0098 _rowid = '._ROWID_.' # this+rowid+this key contains a unique entry for each
0099                      # row in the table.  (no data is stored)
0100 _rowid_str_len = 8   # length in bytes of the unique rowid strings
0101 
0102 def _data_key(table, col, rowid):
0103     return table + _data + col + _data + rowid
0104 
0105 def _search_col_data_key(table, col):
0106     return table + _data + col + _data
0107 
0108 def _search_all_data_key(table):
0109     return table + _data
0110 
0111 def _rowid_key(table, rowid):
0112     return table + _rowid + rowid + _rowid
0113 
0114 def _search_rowid_key(table):
0115     return table + _rowid
0116 
0117 def contains_metastrings(s) :
0118     """Verify that the given string does not contain any
0119     metadata strings that might interfere with dbtables database operation.
0120     """
0121     if (s.find(_table_names_key) >= 0 or
0122         s.find(_columns) >= 0 or
0123         s.find(_data) >= 0 or
0124         s.find(_rowid) >= 0):
0125         # Then
0126         return 1
0127     else:
0128         return 0
0129 
0130 
0131 class bsdTableDB :
0132     def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600,
0133                  recover=0, dbflags=0):
0134         """bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
0135         Open database name in the dbhome BerkeleyDB directory.
0136         Use keyword arguments when calling this constructor.
0137         """
0138         self.db = None
0139         myflags = DB_THREAD
0140         if create:
0141             myflags |= DB_CREATE
0142         flagsforenv = (DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_LOG |
0143                        DB_INIT_TXN | dbflags)
0144         # DB_AUTO_COMMIT isn't a valid flag for env.open()
0145         try:
0146             dbflags |= DB_AUTO_COMMIT
0147         except AttributeError:
0148             pass
0149         if recover:
0150             flagsforenv = flagsforenv | DB_RECOVER
0151         self.env = DBEnv()
0152         # enable auto deadlock avoidance
0153         self.env.set_lk_detect(DB_LOCK_DEFAULT)
0154         self.env.open(dbhome, myflags | flagsforenv)
0155         if truncate:
0156             myflags |= DB_TRUNCATE
0157         self.db = DB(self.env)
0158         # this code relies on DBCursor.set* methods to raise exceptions
0159         # rather than returning None
0160         self.db.set_get_returns_none(1)
0161         # allow duplicate entries [warning: be careful w/ metadata]
0162         self.db.set_flags(DB_DUP)
0163         self.db.open(filename, DB_BTREE, dbflags | myflags, mode)
0164         self.dbfilename = filename
0165         # Initialize the table names list if this is a new database
0166         txn = self.env.txn_begin()
0167         try:
0168             if not self.db.has_key(_table_names_key, txn):
0169                 self.db.put(_table_names_key, pickle.dumps([], 1), txn=txn)
0170         # Yes, bare except
0171         except:
0172             txn.abort()
0173             raise
0174         else:
0175             txn.commit()
0176         # TODO verify more of the database's metadata?
0177         self.__tablecolumns = {}
0178 
0179     def __del__(self):
0180         self.close()
0181 
0182     def close(self):
0183         if self.db is not None:
0184             self.db.close()
0185             self.db = None
0186         if self.env is not None:
0187             self.env.close()
0188             self.env = None
0189 
0190     def checkpoint(self, mins=0):
0191         try:
0192             self.env.txn_checkpoint(mins)
0193         except DBIncompleteError:
0194             pass
0195 
0196     def sync(self):
0197         try:
0198             self.db.sync()
0199         except DBIncompleteError:
0200             pass
0201 
0202     def _db_print(self) :
0203         """Print the database to stdout for debugging"""
0204         print "******** Printing raw database for debugging ********"
0205         cur = self.db.cursor()
0206         try:
0207             key, data = cur.first()
0208             while 1:
0209                 print repr({key: data})
0210                 next = cur.next()
0211                 if next:
0212                     key, data = next
0213                 else:
0214                     cur.close()
0215                     return
0216         except DBNotFoundError:
0217             cur.close()
0218 
0219 
0220     def CreateTable(self, table, columns):
0221         """CreateTable(table, columns) - Create a new table in the database
0222         raises TableDBError if it already exists or for other DB errors.
0223         """
0224         assert isinstance(columns, ListType)
0225         txn = None
0226         try:
0227             # checking sanity of the table and column names here on
0228             # table creation will prevent problems elsewhere.
0229             if contains_metastrings(table):
0230                 raise ValueError(
0231                     "bad table name: contains reserved metastrings")
0232             for column in columns :
0233                 if contains_metastrings(column):
0234                     raise ValueError(
0235                         "bad column name: contains reserved metastrings")
0236 
0237             columnlist_key = _columns_key(table)
0238             if self.db.has_key(columnlist_key):
0239                 raise TableAlreadyExists, "table already exists"
0240 
0241             txn = self.env.txn_begin()
0242             # store the table's column info
0243             self.db.put(columnlist_key, pickle.dumps(columns, 1), txn=txn)
0244 
0245             # add the table name to the tablelist
0246             tablelist = pickle.loads(self.db.get(_table_names_key, txn=txn,
0247                                                  flags=DB_RMW))
0248             tablelist.append(table)
0249             # delete 1st, in case we opened with DB_DUP
0250             self.db.delete(_table_names_key, txn)
0251             self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
0252 
0253             txn.commit()
0254             txn = None
0255         except DBError, dberror:
0256             if txn:
0257                 txn.abort()
0258             raise TableDBError, dberror[1]
0259 
0260 
0261     def ListTableColumns(self, table):
0262         """Return a list of columns in the given table.
0263         [] if the table doesn't exist.
0264         """
0265         assert isinstance(table, StringType)
0266         if contains_metastrings(table):
0267             raise ValueError, "bad table name: contains reserved metastrings"
0268 
0269         columnlist_key = _columns_key(table)
0270         if not self.db.has_key(columnlist_key):
0271             return []
0272         pickledcolumnlist = self.db.get(columnlist_key)
0273         if pickledcolumnlist:
0274             return pickle.loads(pickledcolumnlist)
0275         else:
0276             return []
0277 
0278     def ListTables(self):
0279         """Return a list of tables in this database."""
0280         pickledtablelist = self.db.get(_table_names_key)
0281         if pickledtablelist:
0282             return pickle.loads(pickledtablelist)
0283         else:
0284             return []
0285 
0286     def CreateOrExtendTable(self, table, columns):
0287         """CreateOrExtendTable(table, columns)
0288 
0289         - Create a new table in the database.
0290         If a table of this name already exists, extend it to have any
0291         additional columns present in the given list as well as
0292         all of its current columns.
0293         """
0294         assert isinstance(columns, ListType)
0295         try:
0296             self.CreateTable(table, columns)
0297         except TableAlreadyExists:
0298             # the table already existed, add any new columns
0299             txn = None
0300             try:
0301                 columnlist_key = _columns_key(table)
0302                 txn = self.env.txn_begin()
0303 
0304                 # load the current column list
0305                 oldcolumnlist = pickle.loads(
0306                     self.db.get(columnlist_key, txn=txn, flags=DB_RMW))
0307                 # create a hash table for fast lookups of column names in the
0308                 # loop below
0309                 oldcolumnhash = {}
0310                 for c in oldcolumnlist:
0311                     oldcolumnhash[c] = c
0312 
0313                 # create a new column list containing both the old and new
0314                 # column names
0315                 newcolumnlist = copy.copy(oldcolumnlist)
0316                 for c in columns:
0317                     if not oldcolumnhash.has_key(c):
0318                         newcolumnlist.append(c)
0319 
0320                 # store the table's new extended column list
0321                 if newcolumnlist != oldcolumnlist :
0322                     # delete the old one first since we opened with DB_DUP
0323                     self.db.delete(columnlist_key, txn)
0324                     self.db.put(columnlist_key,
0325                                 pickle.dumps(newcolumnlist, 1),
0326                                 txn=txn)
0327 
0328                 txn.commit()
0329                 txn = None
0330 
0331                 self.__load_column_info(table)
0332             except DBError, dberror:
0333                 if txn:
0334                     txn.abort()
0335                 raise TableDBError, dberror[1]
0336 
0337 
0338     def __load_column_info(self, table) :
0339         """initialize the self.__tablecolumns dict"""
0340         # check the column names
0341         try:
0342             tcolpickles = self.db.get(_columns_key(table))
0343         except DBNotFoundError:
0344             raise TableDBError, "unknown table: %r" % (table,)
0345         if not tcolpickles:
0346             raise TableDBError, "unknown table: %r" % (table,)
0347         self.__tablecolumns[table] = pickle.loads(tcolpickles)
0348 
0349     def __new_rowid(self, table, txn) :
0350         """Create a new unique row identifier"""
0351         unique = 0
0352         while not unique:
0353             # Generate a random 64-bit row ID string
0354             # (note: this code has <64 bits of randomness
0355             # but it's plenty for our database id needs!)
0356             p = xdrlib.Packer()
0357             p.pack_int(int(random.random()*2147483647))
0358             p.pack_int(int(random.random()*2147483647))
0359             newid = p.get_buffer()
0360 
0361             # Guarantee uniqueness by adding this key to the database
0362             try:
0363                 self.db.put(_rowid_key(table, newid), None, txn=txn,
0364                             flags=DB_NOOVERWRITE)
0365             except DBKeyExistError:
0366                 pass
0367             else:
0368                 unique = 1
0369 
0370         return newid
0371 
0372 
0373     def Insert(self, table, rowdict) :
0374         """Insert(table, datadict) - Insert a new row into the table
0375         using the keys+values from rowdict as the column values.
0376         """
0377         txn = None
0378         try:
0379             if not self.db.has_key(_columns_key(table)):
0380                 raise TableDBError, "unknown table"
0381 
0382             # check the validity of each column name
0383             if not self.__tablecolumns.has_key(table):
0384                 self.__load_column_info(table)
0385             for column in rowdict.keys() :
0386                 if not self.__tablecolumns[table].count(column):
0387                     raise TableDBError, "unknown column: %r" % (column,)
0388 
0389             # get a unique row identifier for this row
0390             txn = self.env.txn_begin()
0391             rowid = self.__new_rowid(table, txn=txn)
0392 
0393             # insert the row values into the table database
0394             for column, dataitem in rowdict.items():
0395                 # store the value
0396                 self.db.put(_data_key(table, column, rowid), dataitem, txn=txn)
0397 
0398             txn.commit()
0399             txn = None
0400 
0401         except DBError, dberror:
0402             # WIBNI we could just abort the txn and re-raise the exception?
0403             # But no, because TableDBError is not related to DBError via
0404             # inheritance, so it would be backwards incompatible.  Do the next
0405             # best thing.
0406             info = sys.exc_info()
0407             if txn:
0408                 txn.abort()
0409                 self.db.delete(_rowid_key(table, rowid))
0410             raise TableDBError, dberror[1], info[2]
0411 
0412 
0413     def Modify(self, table, conditions={}, mappings={}):
0414         """Modify(table, conditions) - Modify in rows matching 'conditions'
0415         using mapping functions in 'mappings'
0416         * conditions is a dictionary keyed on column names
0417         containing condition functions expecting the data string as an
0418         argument and returning a boolean.
0419         * mappings is a dictionary keyed on column names containint condition
0420         functions expecting the data string as an argument and returning the
0421         new string for that column.
0422         """
0423         try:
0424             matching_rowids = self.__Select(table, [], conditions)
0425 
0426             # modify only requested columns
0427             columns = mappings.keys()
0428             for rowid in matching_rowids.keys():
0429                 txn = None
0430                 try:
0431                     for column in columns:
0432                         txn = self.env.txn_begin()
0433                         # modify the requested column
0434                         try:
0435                             dataitem = self.db.get(
0436                                 _data_key(table, column, rowid),
0437                                 txn)
0438                             self.db.delete(
0439                                 _data_key(table, column, rowid),
0440                                 txn)
0441                         except DBNotFoundError:
0442                              # XXXXXXX row key somehow didn't exist, assume no
0443                              # error
0444                             dataitem = None
0445                         dataitem = mappings[column](dataitem)
0446                         if dataitem <> None:
0447                             self.db.put(
0448                                 _data_key(table, column, rowid),
0449                                 dataitem, txn=txn)
0450                         txn.commit()
0451                         txn = None
0452 
0453                 except DBError, dberror:
0454                     if txn:
0455                         txn.abort()
0456                     raise
0457 
0458         except DBError, dberror:
0459             raise TableDBError, dberror[1]
0460 
0461     def Delete(self, table, conditions={}):
0462         """Delete(table, conditions) - Delete items matching the given
0463         conditions from the table.
0464         * conditions is a dictionary keyed on column names
0465         containing condition functions expecting the data string as an
0466         argument and returning a boolean.
0467         """
0468         try:
0469             matching_rowids = self.__Select(table, [], conditions)
0470 
0471             # delete row data from all columns
0472             columns = self.__tablecolumns[table]
0473             for rowid in matching_rowids.keys():
0474                 txn = None
0475                 try:
0476                     txn = self.env.txn_begin()
0477                     for column in columns:
0478                         # delete the data key
0479                         try:
0480                             self.db.delete(_data_key(table, column, rowid),
0481                                            txn)
0482                         except DBNotFoundError:
0483                             # XXXXXXX column may not exist, assume no error
0484                             pass
0485 
0486                     try:
0487                         self.db.delete(_rowid_key(table, rowid), txn)
0488                     except DBNotFoundError:
0489                         # XXXXXXX row key somehow didn't exist, assume no error
0490                         pass
0491                     txn.commit()
0492                     txn = None
0493                 except DBError, dberror:
0494                     if txn:
0495                         txn.abort()
0496                     raise
0497         except DBError, dberror:
0498             raise TableDBError, dberror[1]
0499 
0500 
0501     def Select(self, table, columns, conditions={}):
0502         """Select(table, conditions) - retrieve specific row data
0503         Returns a list of row column->value mapping dictionaries.
0504         * columns is a list of which column data to return.  If
0505           columns is None, all columns will be returned.
0506         * conditions is a dictionary keyed on column names
0507           containing callable conditions expecting the data string as an
0508           argument and returning a boolean.
0509         """
0510         try:
0511             if not self.__tablecolumns.has_key(table):
0512                 self.__load_column_info(table)
0513             if columns is None:
0514                 columns = self.__tablecolumns[table]
0515             matching_rowids = self.__Select(table, columns, conditions)
0516         except DBError, dberror:
0517             raise TableDBError, dberror[1]
0518         # return the matches as a list of dictionaries
0519         return matching_rowids.values()
0520 
0521 
0522     def __Select(self, table, columns, conditions):
0523         """__Select() - Used to implement Select and Delete (above)
0524         Returns a dictionary keyed on rowids containing dicts
0525         holding the row data for columns listed in the columns param
0526         that match the given conditions.
0527         * conditions is a dictionary keyed on column names
0528         containing callable conditions expecting the data string as an
0529         argument and returning a boolean.
0530         """
0531         # check the validity of each column name
0532         if not self.__tablecolumns.has_key(table):
0533             self.__load_column_info(table)
0534         if columns is None:
0535             columns = self.tablecolumns[table]
0536         for column in (columns + conditions.keys()):
0537             if not self.__tablecolumns[table].count(column):
0538                 raise TableDBError, "unknown column: %r" % (column,)
0539 
0540         # keyed on rows that match so far, containings dicts keyed on
0541         # column names containing the data for that row and column.
0542         matching_rowids = {}
0543         # keys are rowids that do not match
0544         rejected_rowids = {}
0545 
0546         # attempt to sort the conditions in such a way as to minimize full
0547         # column lookups
0548         def cmp_conditions(atuple, btuple):
0549             a = atuple[1]
0550             b = btuple[1]
0551             if type(a) is type(b):
0552                 if isinstance(a, PrefixCond) and isinstance(b, PrefixCond):
0553                     # longest prefix first
0554                     return cmp(len(b.prefix), len(a.prefix))
0555                 if isinstance(a, LikeCond) and isinstance(b, LikeCond):
0556                     # longest likestr first
0557                     return cmp(len(b.likestr), len(a.likestr))
0558                 return 0
0559             if isinstance(a, ExactCond):
0560                 return -1
0561             if isinstance(b, ExactCond):
0562                 return 1
0563             if isinstance(a, PrefixCond):
0564                 return -1
0565             if isinstance(b, PrefixCond):
0566                 return 1
0567             # leave all unknown condition callables alone as equals
0568             return 0
0569 
0570         conditionlist = conditions.items()
0571         conditionlist.sort(cmp_conditions)
0572 
0573         # Apply conditions to column data to find what we want
0574         cur = self.db.cursor()
0575         column_num = -1
0576         for column, condition in conditionlist:
0577             column_num = column_num + 1
0578             searchkey = _search_col_data_key(table, column)
0579             # speedup: don't linear search columns within loop
0580             if column in columns:
0581                 savethiscolumndata = 1  # save the data for return
0582             else:
0583                 savethiscolumndata = 0  # data only used for selection
0584 
0585             try:
0586                 key, data = cur.set_range(searchkey)
0587                 while key[:len(searchkey)] == searchkey:
0588                     # extract the rowid from the key
0589                     rowid = key[-_rowid_str_len:]
0590 
0591                     if not rejected_rowids.has_key(rowid):
0592                         # if no condition was specified or the condition
0593                         # succeeds, add row to our match list.
0594                         if not condition or condition(data):
0595                             if not matching_rowids.has_key(rowid):
0596                                 matching_rowids[rowid] = {}
0597                             if savethiscolumndata:
0598                                 matching_rowids[rowid][column] = data
0599                         else:
0600                             if matching_rowids.has_key(rowid):
0601                                 del matching_rowids[rowid]
0602                             rejected_rowids[rowid] = rowid
0603 
0604                     key, data = cur.next()
0605 
0606             except DBError, dberror:
0607                 if dberror[0] != DB_NOTFOUND:
0608                     raise
0609                 continue
0610 
0611         cur.close()
0612 
0613         # we're done selecting rows, garbage collect the reject list
0614         del rejected_rowids
0615 
0616         # extract any remaining desired column data from the
0617         # database for the matching rows.
0618         if len(columns) > 0:
0619             for rowid, rowdata in matching_rowids.items():
0620                 for column in columns:
0621                     if rowdata.has_key(column):
0622                         continue
0623                     try:
0624                         rowdata[column] = self.db.get(
0625                             _data_key(table, column, rowid))
0626                     except DBError, dberror:
0627                         if dberror[0] != DB_NOTFOUND:
0628                             raise
0629                         rowdata[column] = None
0630 
0631         # return the matches
0632         return matching_rowids
0633 
0634 
0635     def Drop(self, table):
0636         """Remove an entire table from the database"""
0637         txn = None
0638         try:
0639             txn = self.env.txn_begin()
0640 
0641             # delete the column list
0642             self.db.delete(_columns_key(table), txn)
0643 
0644             cur = self.db.cursor(txn)
0645 
0646             # delete all keys containing this tables column and row info
0647             table_key = _search_all_data_key(table)
0648             while 1:
0649                 try:
0650                     key, data = cur.set_range(table_key)
0651                 except DBNotFoundError:
0652                     break
0653                 # only delete items in this table
0654                 if key[:len(table_key)] != table_key:
0655                     break
0656                 cur.delete()
0657 
0658             # delete all rowids used by this table
0659             table_key = _search_rowid_key(table)
0660             while 1:
0661                 try:
0662                     key, data = cur.set_range(table_key)
0663                 except DBNotFoundError:
0664                     break
0665                 # only delete items in this table
0666                 if key[:len(table_key)] != table_key:
0667                     break
0668                 cur.delete()
0669 
0670             cur.close()
0671 
0672             # delete the tablename from the table name list
0673             tablelist = pickle.loads(
0674                 self.db.get(_table_names_key, txn=txn, flags=DB_RMW))
0675             try:
0676                 tablelist.remove(table)
0677             except ValueError:
0678                 # hmm, it wasn't there, oh well, that's what we want.
0679                 pass
0680             # delete 1st, incase we opened with DB_DUP
0681             self.db.delete(_table_names_key, txn)
0682             self.db.put(_table_names_key, pickle.dumps(tablelist, 1), txn=txn)
0683 
0684             txn.commit()
0685             txn = None
0686 
0687             if self.__tablecolumns.has_key(table):
0688                 del self.__tablecolumns[table]
0689 
0690         except DBError, dberror:
0691             if txn:
0692                 txn.abort()
0693             raise TableDBError, dberror[1]
0694 

Generated by PyXR 0.9.4
SourceForge.net Logo