0001 """A dumb and slow but simple dbm clone. 0002 0003 For database spam, spam.dir contains the index (a text file), 0004 spam.bak *may* contain a backup of the index (also a text file), 0005 while spam.dat contains the data (a binary file). 0006 0007 XXX TO DO: 0008 0009 - seems to contain a bug when updating... 0010 0011 - reclaim free space (currently, space once occupied by deleted or expanded 0012 items is never reused) 0013 0014 - support concurrent access (currently, if two processes take turns making 0015 updates, they can mess up the index) 0016 0017 - support efficient access to large databases (currently, the whole index 0018 is read when the database is opened, and some updates rewrite the whole index) 0019 0020 - support opening for read-only (flag = 'm') 0021 0022 """ 0023 0024 import os as _os 0025 import __builtin__ 0026 import UserDict 0027 0028 _open = __builtin__.open 0029 0030 _BLOCKSIZE = 512 0031 0032 error = IOError # For anydbm 0033 0034 class _Database(UserDict.DictMixin): 0035 0036 # The on-disk directory and data files can remain in mutually 0037 # inconsistent states for an arbitrarily long time (see comments 0038 # at the end of __setitem__). This is only repaired when _commit() 0039 # gets called. One place _commit() gets called is from __del__(), 0040 # and if that occurs at program shutdown time, module globals may 0041 # already have gotten rebound to None. Since it's crucial that 0042 # _commit() finish successfully, we can't ignore shutdown races 0043 # here, and _commit() must not reference any globals. 0044 _os = _os # for _commit() 0045 _open = _open # for _commit() 0046 0047 def __init__(self, filebasename, mode): 0048 self._mode = mode 0049 0050 # The directory file is a text file. Each line looks like 0051 # "%r, (%d, %d)\n" % (key, pos, siz) 0052 # where key is the string key, pos is the offset into the dat 0053 # file of the associated value's first byte, and siz is the number 0054 # of bytes in the associated value. 0055 self._dirfile = filebasename + _os.extsep + 'dir' 0056 0057 # The data file is a binary file pointed into by the directory 0058 # file, and holds the values associated with keys. Each value 0059 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw 0060 # binary 8-bit string value. 0061 self._datfile = filebasename + _os.extsep + 'dat' 0062 self._bakfile = filebasename + _os.extsep + 'bak' 0063 0064 # The index is an in-memory dict, mirroring the directory file. 0065 self._index = None # maps keys to (pos, siz) pairs 0066 0067 # Mod by Jack: create data file if needed 0068 try: 0069 f = _open(self._datfile, 'r') 0070 except IOError: 0071 f = _open(self._datfile, 'w', self._mode) 0072 f.close() 0073 self._update() 0074 0075 # Read directory file into the in-memory index dict. 0076 def _update(self): 0077 self._index = {} 0078 try: 0079 f = _open(self._dirfile) 0080 except IOError: 0081 pass 0082 else: 0083 for line in f: 0084 key, pos_and_siz_pair = eval(line) 0085 self._index[key] = pos_and_siz_pair 0086 f.close() 0087 0088 # Write the index dict to the directory file. The original directory 0089 # file (if any) is renamed with a .bak extension first. If a .bak 0090 # file currently exists, it's deleted. 0091 def _commit(self): 0092 # CAUTION: It's vital that _commit() succeed, and _commit() can 0093 # be called from __del__(). Therefore we must never reference a 0094 # global in this routine. 0095 if self._index is None: 0096 return # nothing to do 0097 0098 try: 0099 self._os.unlink(self._bakfile) 0100 except self._os.error: 0101 pass 0102 0103 try: 0104 self._os.rename(self._dirfile, self._bakfile) 0105 except self._os.error: 0106 pass 0107 0108 f = self._open(self._dirfile, 'w', self._mode) 0109 for key, pos_and_siz_pair in self._index.iteritems(): 0110 f.write("%r, %r\n" % (key, pos_and_siz_pair)) 0111 f.close() 0112 0113 sync = _commit 0114 0115 def __getitem__(self, key): 0116 pos, siz = self._index[key] # may raise KeyError 0117 f = _open(self._datfile, 'rb') 0118 f.seek(pos) 0119 dat = f.read(siz) 0120 f.close() 0121 return dat 0122 0123 # Append val to the data file, starting at a _BLOCKSIZE-aligned 0124 # offset. The data file is first padded with NUL bytes (if needed) 0125 # to get to an aligned offset. Return pair 0126 # (starting offset of val, len(val)) 0127 def _addval(self, val): 0128 f = _open(self._datfile, 'rb+') 0129 f.seek(0, 2) 0130 pos = int(f.tell()) 0131 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE 0132 f.write('\0'*(npos-pos)) 0133 pos = npos 0134 f.write(val) 0135 f.close() 0136 return (pos, len(val)) 0137 0138 # Write val to the data file, starting at offset pos. The caller 0139 # is responsible for ensuring that there's enough room starting at 0140 # pos to hold val, without overwriting some other value. Return 0141 # pair (pos, len(val)). 0142 def _setval(self, pos, val): 0143 f = _open(self._datfile, 'rb+') 0144 f.seek(pos) 0145 f.write(val) 0146 f.close() 0147 return (pos, len(val)) 0148 0149 # key is a new key whose associated value starts in the data file 0150 # at offset pos and with length siz. Add an index record to 0151 # the in-memory index dict, and append one to the directory file. 0152 def _addkey(self, key, pos_and_siz_pair): 0153 self._index[key] = pos_and_siz_pair 0154 f = _open(self._dirfile, 'a', self._mode) 0155 f.write("%r, %r\n" % (key, pos_and_siz_pair)) 0156 f.close() 0157 0158 def __setitem__(self, key, val): 0159 if not type(key) == type('') == type(val): 0160 raise TypeError, "keys and values must be strings" 0161 if key not in self._index: 0162 self._addkey(key, self._addval(val)) 0163 else: 0164 # See whether the new value is small enough to fit in the 0165 # (padded) space currently occupied by the old value. 0166 pos, siz = self._index[key] 0167 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE 0168 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE 0169 if newblocks <= oldblocks: 0170 self._index[key] = self._setval(pos, val) 0171 else: 0172 # The new value doesn't fit in the (padded) space used 0173 # by the old value. The blocks used by the old value are 0174 # forever lost. 0175 self._index[key] = self._addval(val) 0176 0177 # Note that _index may be out of synch with the directory 0178 # file now: _setval() and _addval() don't update the directory 0179 # file. This also means that the on-disk directory and data 0180 # files are in a mutually inconsistent state, and they'll 0181 # remain that way until _commit() is called. Note that this 0182 # is a disaster (for the database) if the program crashes 0183 # (so that _commit() never gets called). 0184 0185 def __delitem__(self, key): 0186 # The blocks used by the associated value are lost. 0187 del self._index[key] 0188 # XXX It's unclear why we do a _commit() here (the code always 0189 # XXX has, so I'm not changing it). _setitem__ doesn't try to 0190 # XXX keep the directory file in synch. Why should we? Or 0191 # XXX why shouldn't __setitem__? 0192 self._commit() 0193 0194 def keys(self): 0195 return self._index.keys() 0196 0197 def has_key(self, key): 0198 return key in self._index 0199 0200 def __contains__(self, key): 0201 return key in self._index 0202 0203 def iterkeys(self): 0204 return self._index.iterkeys() 0205 __iter__ = iterkeys 0206 0207 def __len__(self): 0208 return len(self._index) 0209 0210 def close(self): 0211 self._commit() 0212 self._index = self._datfile = self._dirfile = self._bakfile = None 0213 0214 __del__ = close 0215 0216 0217 0218 def open(file, flag=None, mode=0666): 0219 """Open the database file, filename, and return corresponding object. 0220 0221 The flag argument, used to control how the database is opened in the 0222 other DBM implementations, is ignored in the dumbdbm module; the 0223 database is always opened for update, and will be created if it does 0224 not exist. 0225 0226 The optional mode argument is the UNIX mode of the file, used only when 0227 the database has to be created. It defaults to octal code 0666 (and 0228 will be modified by the prevailing umask). 0229 0230 """ 0231 # flag argument is currently ignored 0232 return _Database(file, mode) 0233
Generated by PyXR 0.9.4