PyXR

c:\python24\lib \ dumbdbm.py



0001 """A dumb and slow but simple dbm clone.
0002 
0003 For database spam, spam.dir contains the index (a text file),
0004 spam.bak *may* contain a backup of the index (also a text file),
0005 while spam.dat contains the data (a binary file).
0006 
0007 XXX TO DO:
0008 
0009 - seems to contain a bug when updating...
0010 
0011 - reclaim free space (currently, space once occupied by deleted or expanded
0012 items is never reused)
0013 
0014 - support concurrent access (currently, if two processes take turns making
0015 updates, they can mess up the index)
0016 
0017 - support efficient access to large databases (currently, the whole index
0018 is read when the database is opened, and some updates rewrite the whole index)
0019 
0020 - support opening for read-only (flag = 'm')
0021 
0022 """
0023 
0024 import os as _os
0025 import __builtin__
0026 import UserDict
0027 
0028 _open = __builtin__.open
0029 
0030 _BLOCKSIZE = 512
0031 
0032 error = IOError                         # For anydbm
0033 
0034 class _Database(UserDict.DictMixin):
0035 
0036     # The on-disk directory and data files can remain in mutually
0037     # inconsistent states for an arbitrarily long time (see comments
0038     # at the end of __setitem__).  This is only repaired when _commit()
0039     # gets called.  One place _commit() gets called is from __del__(),
0040     # and if that occurs at program shutdown time, module globals may
0041     # already have gotten rebound to None.  Since it's crucial that
0042     # _commit() finish successfully, we can't ignore shutdown races
0043     # here, and _commit() must not reference any globals.
0044     _os = _os       # for _commit()
0045     _open = _open   # for _commit()
0046 
0047     def __init__(self, filebasename, mode):
0048         self._mode = mode
0049 
0050         # The directory file is a text file.  Each line looks like
0051         #    "%r, (%d, %d)\n" % (key, pos, siz)
0052         # where key is the string key, pos is the offset into the dat
0053         # file of the associated value's first byte, and siz is the number
0054         # of bytes in the associated value.
0055         self._dirfile = filebasename + _os.extsep + 'dir'
0056 
0057         # The data file is a binary file pointed into by the directory
0058         # file, and holds the values associated with keys.  Each value
0059         # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
0060         # binary 8-bit string value.
0061         self._datfile = filebasename + _os.extsep + 'dat'
0062         self._bakfile = filebasename + _os.extsep + 'bak'
0063 
0064         # The index is an in-memory dict, mirroring the directory file.
0065         self._index = None  # maps keys to (pos, siz) pairs
0066 
0067         # Mod by Jack: create data file if needed
0068         try:
0069             f = _open(self._datfile, 'r')
0070         except IOError:
0071             f = _open(self._datfile, 'w', self._mode)
0072         f.close()
0073         self._update()
0074 
0075     # Read directory file into the in-memory index dict.
0076     def _update(self):
0077         self._index = {}
0078         try:
0079             f = _open(self._dirfile)
0080         except IOError:
0081             pass
0082         else:
0083             for line in f:
0084                 key, pos_and_siz_pair = eval(line)
0085                 self._index[key] = pos_and_siz_pair
0086             f.close()
0087 
0088     # Write the index dict to the directory file.  The original directory
0089     # file (if any) is renamed with a .bak extension first.  If a .bak
0090     # file currently exists, it's deleted.
0091     def _commit(self):
0092         # CAUTION:  It's vital that _commit() succeed, and _commit() can
0093         # be called from __del__().  Therefore we must never reference a
0094         # global in this routine.
0095         if self._index is None:
0096             return  # nothing to do
0097 
0098         try:
0099             self._os.unlink(self._bakfile)
0100         except self._os.error:
0101             pass
0102 
0103         try:
0104             self._os.rename(self._dirfile, self._bakfile)
0105         except self._os.error:
0106             pass
0107 
0108         f = self._open(self._dirfile, 'w', self._mode)
0109         for key, pos_and_siz_pair in self._index.iteritems():
0110             f.write("%r, %r\n" % (key, pos_and_siz_pair))
0111         f.close()
0112 
0113     sync = _commit
0114 
0115     def __getitem__(self, key):
0116         pos, siz = self._index[key]     # may raise KeyError
0117         f = _open(self._datfile, 'rb')
0118         f.seek(pos)
0119         dat = f.read(siz)
0120         f.close()
0121         return dat
0122 
0123     # Append val to the data file, starting at a _BLOCKSIZE-aligned
0124     # offset.  The data file is first padded with NUL bytes (if needed)
0125     # to get to an aligned offset.  Return pair
0126     #     (starting offset of val, len(val))
0127     def _addval(self, val):
0128         f = _open(self._datfile, 'rb+')
0129         f.seek(0, 2)
0130         pos = int(f.tell())
0131         npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
0132         f.write('\0'*(npos-pos))
0133         pos = npos
0134         f.write(val)
0135         f.close()
0136         return (pos, len(val))
0137 
0138     # Write val to the data file, starting at offset pos.  The caller
0139     # is responsible for ensuring that there's enough room starting at
0140     # pos to hold val, without overwriting some other value.  Return
0141     # pair (pos, len(val)).
0142     def _setval(self, pos, val):
0143         f = _open(self._datfile, 'rb+')
0144         f.seek(pos)
0145         f.write(val)
0146         f.close()
0147         return (pos, len(val))
0148 
0149     # key is a new key whose associated value starts in the data file
0150     # at offset pos and with length siz.  Add an index record to
0151     # the in-memory index dict, and append one to the directory file.
0152     def _addkey(self, key, pos_and_siz_pair):
0153         self._index[key] = pos_and_siz_pair
0154         f = _open(self._dirfile, 'a', self._mode)
0155         f.write("%r, %r\n" % (key, pos_and_siz_pair))
0156         f.close()
0157 
0158     def __setitem__(self, key, val):
0159         if not type(key) == type('') == type(val):
0160             raise TypeError, "keys and values must be strings"
0161         if key not in self._index:
0162             self._addkey(key, self._addval(val))
0163         else:
0164             # See whether the new value is small enough to fit in the
0165             # (padded) space currently occupied by the old value.
0166             pos, siz = self._index[key]
0167             oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
0168             newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
0169             if newblocks <= oldblocks:
0170                 self._index[key] = self._setval(pos, val)
0171             else:
0172                 # The new value doesn't fit in the (padded) space used
0173                 # by the old value.  The blocks used by the old value are
0174                 # forever lost.
0175                 self._index[key] = self._addval(val)
0176 
0177             # Note that _index may be out of synch with the directory
0178             # file now:  _setval() and _addval() don't update the directory
0179             # file.  This also means that the on-disk directory and data
0180             # files are in a mutually inconsistent state, and they'll
0181             # remain that way until _commit() is called.  Note that this
0182             # is a disaster (for the database) if the program crashes
0183             # (so that _commit() never gets called).
0184 
0185     def __delitem__(self, key):
0186         # The blocks used by the associated value are lost.
0187         del self._index[key]
0188         # XXX It's unclear why we do a _commit() here (the code always
0189         # XXX has, so I'm not changing it).  _setitem__ doesn't try to
0190         # XXX keep the directory file in synch.  Why should we?  Or
0191         # XXX why shouldn't __setitem__?
0192         self._commit()
0193 
0194     def keys(self):
0195         return self._index.keys()
0196 
0197     def has_key(self, key):
0198         return key in self._index
0199 
0200     def __contains__(self, key):
0201         return key in self._index
0202 
0203     def iterkeys(self):
0204         return self._index.iterkeys()
0205     __iter__ = iterkeys
0206 
0207     def __len__(self):
0208         return len(self._index)
0209 
0210     def close(self):
0211         self._commit()
0212         self._index = self._datfile = self._dirfile = self._bakfile = None
0213 
0214     __del__ = close
0215 
0216 
0217 
0218 def open(file, flag=None, mode=0666):
0219     """Open the database file, filename, and return corresponding object.
0220 
0221     The flag argument, used to control how the database is opened in the
0222     other DBM implementations, is ignored in the dumbdbm module; the
0223     database is always opened for update, and will be created if it does
0224     not exist.
0225 
0226     The optional mode argument is the UNIX mode of the file, used only when
0227     the database has to be created.  It defaults to octal code 0666 (and
0228     will be modified by the prevailing umask).
0229 
0230     """
0231     # flag argument is currently ignored
0232     return _Database(file, mode)
0233 

Generated by PyXR 0.9.4
SourceForge.net Logo