0001 """A collection of string operations (most are no longer used). 0002 0003 Warning: most of the code you see here isn't normally used nowadays. 0004 Beginning with Python 1.6, many of these functions are implemented as 0005 methods on the standard string object. They used to be implemented by 0006 a built-in module called strop, but strop is now obsolete itself. 0007 0008 Public module variables: 0009 0010 whitespace -- a string containing all characters considered whitespace 0011 lowercase -- a string containing all characters considered lowercase letters 0012 uppercase -- a string containing all characters considered uppercase letters 0013 letters -- a string containing all characters considered letters 0014 digits -- a string containing all characters considered decimal digits 0015 hexdigits -- a string containing all characters considered hexadecimal digits 0016 octdigits -- a string containing all characters considered octal digits 0017 punctuation -- a string containing all characters considered punctuation 0018 printable -- a string containing all characters considered printable 0019 0020 """ 0021 0022 # Some strings for ctype-style character classification 0023 whitespace = ' \t\n\r\v\f' 0024 lowercase = 'abcdefghijklmnopqrstuvwxyz' 0025 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 0026 letters = lowercase + uppercase 0027 ascii_lowercase = lowercase 0028 ascii_uppercase = uppercase 0029 ascii_letters = ascii_lowercase + ascii_uppercase 0030 digits = '0123456789' 0031 hexdigits = digits + 'abcdef' + 'ABCDEF' 0032 octdigits = '01234567' 0033 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" 0034 printable = digits + letters + punctuation + whitespace 0035 0036 # Case conversion helpers 0037 # Use str to convert Unicode literal in case of -U 0038 # Note that Cookie.py bogusly uses _idmap :( 0039 l = map(chr, xrange(256)) 0040 _idmap = str('').join(l) 0041 del l 0042 0043 # Functions which aren't available as string methods. 0044 0045 # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". 0046 # See also regsub.capwords(). 0047 def capwords(s, sep=None): 0048 """capwords(s, [sep]) -> string 0049 0050 Split the argument into words using split, capitalize each 0051 word using capitalize, and join the capitalized words using 0052 join. Note that this replaces runs of whitespace characters by 0053 a single space. 0054 0055 """ 0056 return (sep or ' ').join([x.capitalize() for x in s.split(sep)]) 0057 0058 0059 # Construct a translation string 0060 _idmapL = None 0061 def maketrans(fromstr, tostr): 0062 """maketrans(frm, to) -> string 0063 0064 Return a translation table (a string of 256 bytes long) 0065 suitable for use in string.translate. The strings frm and to 0066 must be of the same length. 0067 0068 """ 0069 if len(fromstr) != len(tostr): 0070 raise ValueError, "maketrans arguments must have same length" 0071 global _idmapL 0072 if not _idmapL: 0073 _idmapL = map(None, _idmap) 0074 L = _idmapL[:] 0075 fromstr = map(ord, fromstr) 0076 for i in range(len(fromstr)): 0077 L[fromstr[i]] = tostr[i] 0078 return ''.join(L) 0079 0080 0081 0082 #################################################################### 0083 import re as _re 0084 0085 class _multimap: 0086 """Helper class for combining multiple mappings. 0087 0088 Used by .{safe_,}substitute() to combine the mapping and keyword 0089 arguments. 0090 """ 0091 def __init__(self, primary, secondary): 0092 self._primary = primary 0093 self._secondary = secondary 0094 0095 def __getitem__(self, key): 0096 try: 0097 return self._primary[key] 0098 except KeyError: 0099 return self._secondary[key] 0100 0101 0102 class _TemplateMetaclass(type): 0103 pattern = r""" 0104 %(delim)s(?: 0105 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters 0106 (?P<named>%(id)s) | # delimiter and a Python identifier 0107 {(?P<braced>%(id)s)} | # delimiter and a braced identifier 0108 (?P<invalid>) # Other ill-formed delimiter exprs 0109 ) 0110 """ 0111 0112 def __init__(cls, name, bases, dct): 0113 super(_TemplateMetaclass, cls).__init__(name, bases, dct) 0114 if 'pattern' in dct: 0115 pattern = cls.pattern 0116 else: 0117 pattern = _TemplateMetaclass.pattern % { 0118 'delim' : _re.escape(cls.delimiter), 0119 'id' : cls.idpattern, 0120 } 0121 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) 0122 0123 0124 class Template: 0125 """A string class for supporting $-substitutions.""" 0126 __metaclass__ = _TemplateMetaclass 0127 0128 delimiter = '$' 0129 idpattern = r'[_a-z][_a-z0-9]*' 0130 0131 def __init__(self, template): 0132 self.template = template 0133 0134 # Search for $$, $identifier, ${identifier}, and any bare $'s 0135 0136 def _invalid(self, mo): 0137 i = mo.start('invalid') 0138 lines = self.template[:i].splitlines(True) 0139 if not lines: 0140 colno = 1 0141 lineno = 1 0142 else: 0143 colno = i - len(''.join(lines[:-1])) 0144 lineno = len(lines) 0145 raise ValueError('Invalid placeholder in string: line %d, col %d' % 0146 (lineno, colno)) 0147 0148 def substitute(self, *args, **kws): 0149 if len(args) > 1: 0150 raise TypeError('Too many positional arguments') 0151 if not args: 0152 mapping = kws 0153 elif kws: 0154 mapping = _multimap(kws, args[0]) 0155 else: 0156 mapping = args[0] 0157 # Helper function for .sub() 0158 def convert(mo): 0159 # Check the most common path first. 0160 named = mo.group('named') or mo.group('braced') 0161 if named is not None: 0162 val = mapping[named] 0163 # We use this idiom instead of str() because the latter will 0164 # fail if val is a Unicode containing non-ASCII characters. 0165 return '%s' % val 0166 if mo.group('escaped') is not None: 0167 return self.delimiter 0168 if mo.group('invalid') is not None: 0169 self._invalid(mo) 0170 raise ValueError('Unrecognized named group in pattern', 0171 self.pattern) 0172 return self.pattern.sub(convert, self.template) 0173 0174 def safe_substitute(self, *args, **kws): 0175 if len(args) > 1: 0176 raise TypeError('Too many positional arguments') 0177 if not args: 0178 mapping = kws 0179 elif kws: 0180 mapping = _multimap(kws, args[0]) 0181 else: 0182 mapping = args[0] 0183 # Helper function for .sub() 0184 def convert(mo): 0185 named = mo.group('named') 0186 if named is not None: 0187 try: 0188 # We use this idiom instead of str() because the latter 0189 # will fail if val is a Unicode containing non-ASCII 0190 return '%s' % mapping[named] 0191 except KeyError: 0192 return self.delimiter + named 0193 braced = mo.group('braced') 0194 if braced is not None: 0195 try: 0196 return '%s' % mapping[braced] 0197 except KeyError: 0198 return self.delimiter + '{' + braced + '}' 0199 if mo.group('escaped') is not None: 0200 return self.delimiter 0201 if mo.group('invalid') is not None: 0202 return self.delimiter 0203 raise ValueError('Unrecognized named group in pattern', 0204 self.pattern) 0205 return self.pattern.sub(convert, self.template) 0206 0207 0208 0209 #################################################################### 0210 # NOTE: Everything below here is deprecated. Use string methods instead. 0211 # This stuff will go away in Python 3.0. 0212 0213 # Backward compatible names for exceptions 0214 index_error = ValueError 0215 atoi_error = ValueError 0216 atof_error = ValueError 0217 atol_error = ValueError 0218 0219 # convert UPPER CASE letters to lower case 0220 def lower(s): 0221 """lower(s) -> string 0222 0223 Return a copy of the string s converted to lowercase. 0224 0225 """ 0226 return s.lower() 0227 0228 # Convert lower case letters to UPPER CASE 0229 def upper(s): 0230 """upper(s) -> string 0231 0232 Return a copy of the string s converted to uppercase. 0233 0234 """ 0235 return s.upper() 0236 0237 # Swap lower case letters and UPPER CASE 0238 def swapcase(s): 0239 """swapcase(s) -> string 0240 0241 Return a copy of the string s with upper case characters 0242 converted to lowercase and vice versa. 0243 0244 """ 0245 return s.swapcase() 0246 0247 # Strip leading and trailing tabs and spaces 0248 def strip(s, chars=None): 0249 """strip(s [,chars]) -> string 0250 0251 Return a copy of the string s with leading and trailing 0252 whitespace removed. 0253 If chars is given and not None, remove characters in chars instead. 0254 If chars is unicode, S will be converted to unicode before stripping. 0255 0256 """ 0257 return s.strip(chars) 0258 0259 # Strip leading tabs and spaces 0260 def lstrip(s, chars=None): 0261 """lstrip(s [,chars]) -> string 0262 0263 Return a copy of the string s with leading whitespace removed. 0264 If chars is given and not None, remove characters in chars instead. 0265 0266 """ 0267 return s.lstrip(chars) 0268 0269 # Strip trailing tabs and spaces 0270 def rstrip(s, chars=None): 0271 """rstrip(s [,chars]) -> string 0272 0273 Return a copy of the string s with trailing whitespace removed. 0274 If chars is given and not None, remove characters in chars instead. 0275 0276 """ 0277 return s.rstrip(chars) 0278 0279 0280 # Split a string into a list of space/tab-separated words 0281 def split(s, sep=None, maxsplit=-1): 0282 """split(s [,sep [,maxsplit]]) -> list of strings 0283 0284 Return a list of the words in the string s, using sep as the 0285 delimiter string. If maxsplit is given, splits at no more than 0286 maxsplit places (resulting in at most maxsplit+1 words). If sep 0287 is not specified or is None, any whitespace string is a separator. 0288 0289 (split and splitfields are synonymous) 0290 0291 """ 0292 return s.split(sep, maxsplit) 0293 splitfields = split 0294 0295 # Split a string into a list of space/tab-separated words 0296 def rsplit(s, sep=None, maxsplit=-1): 0297 """rsplit(s [,sep [,maxsplit]]) -> list of strings 0298 0299 Return a list of the words in the string s, using sep as the 0300 delimiter string, starting at the end of the string and working 0301 to the front. If maxsplit is given, at most maxsplit splits are 0302 done. If sep is not specified or is None, any whitespace string 0303 is a separator. 0304 """ 0305 return s.rsplit(sep, maxsplit) 0306 0307 # Join fields with optional separator 0308 def join(words, sep = ' '): 0309 """join(list [,sep]) -> string 0310 0311 Return a string composed of the words in list, with 0312 intervening occurrences of sep. The default separator is a 0313 single space. 0314 0315 (joinfields and join are synonymous) 0316 0317 """ 0318 return sep.join(words) 0319 joinfields = join 0320 0321 # Find substring, raise exception if not found 0322 def index(s, *args): 0323 """index(s, sub [,start [,end]]) -> int 0324 0325 Like find but raises ValueError when the substring is not found. 0326 0327 """ 0328 return s.index(*args) 0329 0330 # Find last substring, raise exception if not found 0331 def rindex(s, *args): 0332 """rindex(s, sub [,start [,end]]) -> int 0333 0334 Like rfind but raises ValueError when the substring is not found. 0335 0336 """ 0337 return s.rindex(*args) 0338 0339 # Count non-overlapping occurrences of substring 0340 def count(s, *args): 0341 """count(s, sub[, start[,end]]) -> int 0342 0343 Return the number of occurrences of substring sub in string 0344 s[start:end]. Optional arguments start and end are 0345 interpreted as in slice notation. 0346 0347 """ 0348 return s.count(*args) 0349 0350 # Find substring, return -1 if not found 0351 def find(s, *args): 0352 """find(s, sub [,start [,end]]) -> in 0353 0354 Return the lowest index in s where substring sub is found, 0355 such that sub is contained within s[start,end]. Optional 0356 arguments start and end are interpreted as in slice notation. 0357 0358 Return -1 on failure. 0359 0360 """ 0361 return s.find(*args) 0362 0363 # Find last substring, return -1 if not found 0364 def rfind(s, *args): 0365 """rfind(s, sub [,start [,end]]) -> int 0366 0367 Return the highest index in s where substring sub is found, 0368 such that sub is contained within s[start,end]. Optional 0369 arguments start and end are interpreted as in slice notation. 0370 0371 Return -1 on failure. 0372 0373 """ 0374 return s.rfind(*args) 0375 0376 # for a bit of speed 0377 _float = float 0378 _int = int 0379 _long = long 0380 0381 # Convert string to float 0382 def atof(s): 0383 """atof(s) -> float 0384 0385 Return the floating point number represented by the string s. 0386 0387 """ 0388 return _float(s) 0389 0390 0391 # Convert string to integer 0392 def atoi(s , base=10): 0393 """atoi(s [,base]) -> int 0394 0395 Return the integer represented by the string s in the given 0396 base, which defaults to 10. The string s must consist of one 0397 or more digits, possibly preceded by a sign. If base is 0, it 0398 is chosen from the leading characters of s, 0 for octal, 0x or 0399 0X for hexadecimal. If base is 16, a preceding 0x or 0X is 0400 accepted. 0401 0402 """ 0403 return _int(s, base) 0404 0405 0406 # Convert string to long integer 0407 def atol(s, base=10): 0408 """atol(s [,base]) -> long 0409 0410 Return the long integer represented by the string s in the 0411 given base, which defaults to 10. The string s must consist 0412 of one or more digits, possibly preceded by a sign. If base 0413 is 0, it is chosen from the leading characters of s, 0 for 0414 octal, 0x or 0X for hexadecimal. If base is 16, a preceding 0415 0x or 0X is accepted. A trailing L or l is not accepted, 0416 unless base is 0. 0417 0418 """ 0419 return _long(s, base) 0420 0421 0422 # Left-justify a string 0423 def ljust(s, width, *args): 0424 """ljust(s, width[, fillchar]) -> string 0425 0426 Return a left-justified version of s, in a field of the 0427 specified width, padded with spaces as needed. The string is 0428 never truncated. If specified the fillchar is used instead of spaces. 0429 0430 """ 0431 return s.ljust(width, *args) 0432 0433 # Right-justify a string 0434 def rjust(s, width, *args): 0435 """rjust(s, width[, fillchar]) -> string 0436 0437 Return a right-justified version of s, in a field of the 0438 specified width, padded with spaces as needed. The string is 0439 never truncated. If specified the fillchar is used instead of spaces. 0440 0441 """ 0442 return s.rjust(width, *args) 0443 0444 # Center a string 0445 def center(s, width, *args): 0446 """center(s, width[, fillchar]) -> string 0447 0448 Return a center version of s, in a field of the specified 0449 width. padded with spaces as needed. The string is never 0450 truncated. If specified the fillchar is used instead of spaces. 0451 0452 """ 0453 return s.center(width, *args) 0454 0455 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' 0456 # Decadent feature: the argument may be a string or a number 0457 # (Use of this is deprecated; it should be a string as with ljust c.s.) 0458 def zfill(x, width): 0459 """zfill(x, width) -> string 0460 0461 Pad a numeric string x with zeros on the left, to fill a field 0462 of the specified width. The string x is never truncated. 0463 0464 """ 0465 if not isinstance(x, basestring): 0466 x = repr(x) 0467 return x.zfill(width) 0468 0469 # Expand tabs in a string. 0470 # Doesn't take non-printing chars into account, but does understand \n. 0471 def expandtabs(s, tabsize=8): 0472 """expandtabs(s [,tabsize]) -> string 0473 0474 Return a copy of the string s with all tab characters replaced 0475 by the appropriate number of spaces, depending on the current 0476 column, and the tabsize (default 8). 0477 0478 """ 0479 return s.expandtabs(tabsize) 0480 0481 # Character translation through look-up table. 0482 def translate(s, table, deletions=""): 0483 """translate(s,table [,deletions]) -> string 0484 0485 Return a copy of the string s, where all characters occurring 0486 in the optional argument deletions are removed, and the 0487 remaining characters have been mapped through the given 0488 translation table, which must be a string of length 256. The 0489 deletions argument is not allowed for Unicode strings. 0490 0491 """ 0492 if deletions: 0493 return s.translate(table, deletions) 0494 else: 0495 # Add s[:0] so that if s is Unicode and table is an 8-bit string, 0496 # table is converted to Unicode. This means that table *cannot* 0497 # be a dictionary -- for that feature, use u.translate() directly. 0498 return s.translate(table + s[:0]) 0499 0500 # Capitalize a string, e.g. "aBc dEf" -> "Abc def". 0501 def capitalize(s): 0502 """capitalize(s) -> string 0503 0504 Return a copy of the string s with only its first character 0505 capitalized. 0506 0507 """ 0508 return s.capitalize() 0509 0510 # Substring replacement (global) 0511 def replace(s, old, new, maxsplit=-1): 0512 """replace (str, old, new[, maxsplit]) -> string 0513 0514 Return a copy of string str with all occurrences of substring 0515 old replaced by new. If the optional argument maxsplit is 0516 given, only the first maxsplit occurrences are replaced. 0517 0518 """ 0519 return s.replace(old, new, maxsplit) 0520 0521 0522 # Try importing optional built-in module "strop" -- if it exists, 0523 # it redefines some string operations that are 100-1000 times faster. 0524 # It also defines values for whitespace, lowercase and uppercase 0525 # that match <ctype.h>'s definitions. 0526 0527 try: 0528 from strop import maketrans, lowercase, uppercase, whitespace 0529 letters = lowercase + uppercase 0530 except ImportError: 0531 pass # Use the original versions 0532
Generated by PyXR 0.9.4