PyXR

c:\python24\lib \ string.py


0001 """A collection of string operations (most are no longer used).
0002 
0003 Warning: most of the code you see here isn't normally used nowadays.
0004 Beginning with Python 1.6, many of these functions are implemented as
0005 methods on the standard string object. They used to be implemented by
0006 a built-in module called strop, but strop is now obsolete itself.
0007 
0008 Public module variables:
0009 
0010 whitespace -- a string containing all characters considered whitespace
0011 lowercase -- a string containing all characters considered lowercase letters
0012 uppercase -- a string containing all characters considered uppercase letters
0013 letters -- a string containing all characters considered letters
0014 digits -- a string containing all characters considered decimal digits
0015 hexdigits -- a string containing all characters considered hexadecimal digits
0016 octdigits -- a string containing all characters considered octal digits
0017 punctuation -- a string containing all characters considered punctuation
0018 printable -- a string containing all characters considered printable
0019 
0020 """
0021 
0022 # Some strings for ctype-style character classification
0023 whitespace = ' \t\n\r\v\f'
0024 lowercase = 'abcdefghijklmnopqrstuvwxyz'
0025 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
0026 letters = lowercase + uppercase
0027 ascii_lowercase = lowercase
0028 ascii_uppercase = uppercase
0029 ascii_letters = ascii_lowercase + ascii_uppercase
0030 digits = '0123456789'
0031 hexdigits = digits + 'abcdef' + 'ABCDEF'
0032 octdigits = '01234567'
0033 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
0034 printable = digits + letters + punctuation + whitespace
0035 
0036 # Case conversion helpers
0037 # Use str to convert Unicode literal in case of -U
0038 # Note that Cookie.py bogusly uses _idmap :(
0039 l = map(chr, xrange(256))
0040 _idmap = str('').join(l)
0041 del l
0042 
0043 # Functions which aren't available as string methods.
0044 
0045 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
0046 # See also regsub.capwords().
0047 def capwords(s, sep=None):
0048     """capwords(s, [sep]) -> string
0049 
0050     Split the argument into words using split, capitalize each
0051     word using capitalize, and join the capitalized words using
0052     join. Note that this replaces runs of whitespace characters by
0053     a single space.
0054 
0055     """
0056     return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
0057 
0058 
0059 # Construct a translation string
0060 _idmapL = None
0061 def maketrans(fromstr, tostr):
0062     """maketrans(frm, to) -> string
0063 
0064     Return a translation table (a string of 256 bytes long)
0065     suitable for use in string.translate.  The strings frm and to
0066     must be of the same length.
0067 
0068     """
0069     if len(fromstr) != len(tostr):
0070         raise ValueError, "maketrans arguments must have same length"
0071     global _idmapL
0072     if not _idmapL:
0073         _idmapL = map(None, _idmap)
0074     L = _idmapL[:]
0075     fromstr = map(ord, fromstr)
0076     for i in range(len(fromstr)):
0077         L[fromstr[i]] = tostr[i]
0078     return ''.join(L)
0079 
0080 
0081 
0082 ####################################################################
0083 import re as _re
0084 
0085 class _multimap:
0086     """Helper class for combining multiple mappings.
0087 
0088     Used by .{safe_,}substitute() to combine the mapping and keyword
0089     arguments.
0090     """
0091     def __init__(self, primary, secondary):
0092         self._primary = primary
0093         self._secondary = secondary
0094 
0095     def __getitem__(self, key):
0096         try:
0097             return self._primary[key]
0098         except KeyError:
0099             return self._secondary[key]
0100 
0101 
0102 class _TemplateMetaclass(type):
0103     pattern = r"""
0104     %(delim)s(?:
0105       (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
0106       (?P<named>%(id)s)      |   # delimiter and a Python identifier
0107       {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier
0108       (?P<invalid>)              # Other ill-formed delimiter exprs
0109     )
0110     """
0111 
0112     def __init__(cls, name, bases, dct):
0113         super(_TemplateMetaclass, cls).__init__(name, bases, dct)
0114         if 'pattern' in dct:
0115             pattern = cls.pattern
0116         else:
0117             pattern = _TemplateMetaclass.pattern % {
0118                 'delim' : _re.escape(cls.delimiter),
0119                 'id'    : cls.idpattern,
0120                 }
0121         cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
0122 
0123 
0124 class Template:
0125     """A string class for supporting $-substitutions."""
0126     __metaclass__ = _TemplateMetaclass
0127 
0128     delimiter = '$'
0129     idpattern = r'[_a-z][_a-z0-9]*'
0130 
0131     def __init__(self, template):
0132         self.template = template
0133 
0134     # Search for $$, $identifier, ${identifier}, and any bare $'s
0135 
0136     def _invalid(self, mo):
0137         i = mo.start('invalid')
0138         lines = self.template[:i].splitlines(True)
0139         if not lines:
0140             colno = 1
0141             lineno = 1
0142         else:
0143             colno = i - len(''.join(lines[:-1]))
0144             lineno = len(lines)
0145         raise ValueError('Invalid placeholder in string: line %d, col %d' %
0146                          (lineno, colno))
0147 
0148     def substitute(self, *args, **kws):
0149         if len(args) > 1:
0150             raise TypeError('Too many positional arguments')
0151         if not args:
0152             mapping = kws
0153         elif kws:
0154             mapping = _multimap(kws, args[0])
0155         else:
0156             mapping = args[0]
0157         # Helper function for .sub()
0158         def convert(mo):
0159             # Check the most common path first.
0160             named = mo.group('named') or mo.group('braced')
0161             if named is not None:
0162                 val = mapping[named]
0163                 # We use this idiom instead of str() because the latter will
0164                 # fail if val is a Unicode containing non-ASCII characters.
0165                 return '%s' % val
0166             if mo.group('escaped') is not None:
0167                 return self.delimiter
0168             if mo.group('invalid') is not None:
0169                 self._invalid(mo)
0170             raise ValueError('Unrecognized named group in pattern',
0171                              self.pattern)
0172         return self.pattern.sub(convert, self.template)
0173 
0174     def safe_substitute(self, *args, **kws):
0175         if len(args) > 1:
0176             raise TypeError('Too many positional arguments')
0177         if not args:
0178             mapping = kws
0179         elif kws:
0180             mapping = _multimap(kws, args[0])
0181         else:
0182             mapping = args[0]
0183         # Helper function for .sub()
0184         def convert(mo):
0185             named = mo.group('named')
0186             if named is not None:
0187                 try:
0188                     # We use this idiom instead of str() because the latter
0189                     # will fail if val is a Unicode containing non-ASCII
0190                     return '%s' % mapping[named]
0191                 except KeyError:
0192                     return self.delimiter + named
0193             braced = mo.group('braced')
0194             if braced is not None:
0195                 try:
0196                     return '%s' % mapping[braced]
0197                 except KeyError:
0198                     return self.delimiter + '{' + braced + '}'
0199             if mo.group('escaped') is not None:
0200                 return self.delimiter
0201             if mo.group('invalid') is not None:
0202                 return self.delimiter
0203             raise ValueError('Unrecognized named group in pattern',
0204                              self.pattern)
0205         return self.pattern.sub(convert, self.template)
0206 
0207 
0208 
0209 ####################################################################
0210 # NOTE: Everything below here is deprecated.  Use string methods instead.
0211 # This stuff will go away in Python 3.0.
0212 
0213 # Backward compatible names for exceptions
0214 index_error = ValueError
0215 atoi_error = ValueError
0216 atof_error = ValueError
0217 atol_error = ValueError
0218 
0219 # convert UPPER CASE letters to lower case
0220 def lower(s):
0221     """lower(s) -> string
0222 
0223     Return a copy of the string s converted to lowercase.
0224 
0225     """
0226     return s.lower()
0227 
0228 # Convert lower case letters to UPPER CASE
0229 def upper(s):
0230     """upper(s) -> string
0231 
0232     Return a copy of the string s converted to uppercase.
0233 
0234     """
0235     return s.upper()
0236 
0237 # Swap lower case letters and UPPER CASE
0238 def swapcase(s):
0239     """swapcase(s) -> string
0240 
0241     Return a copy of the string s with upper case characters
0242     converted to lowercase and vice versa.
0243 
0244     """
0245     return s.swapcase()
0246 
0247 # Strip leading and trailing tabs and spaces
0248 def strip(s, chars=None):
0249     """strip(s [,chars]) -> string
0250 
0251     Return a copy of the string s with leading and trailing
0252     whitespace removed.
0253     If chars is given and not None, remove characters in chars instead.
0254     If chars is unicode, S will be converted to unicode before stripping.
0255 
0256     """
0257     return s.strip(chars)
0258 
0259 # Strip leading tabs and spaces
0260 def lstrip(s, chars=None):
0261     """lstrip(s [,chars]) -> string
0262 
0263     Return a copy of the string s with leading whitespace removed.
0264     If chars is given and not None, remove characters in chars instead.
0265 
0266     """
0267     return s.lstrip(chars)
0268 
0269 # Strip trailing tabs and spaces
0270 def rstrip(s, chars=None):
0271     """rstrip(s [,chars]) -> string
0272 
0273     Return a copy of the string s with trailing whitespace removed.
0274     If chars is given and not None, remove characters in chars instead.
0275 
0276     """
0277     return s.rstrip(chars)
0278 
0279 
0280 # Split a string into a list of space/tab-separated words
0281 def split(s, sep=None, maxsplit=-1):
0282     """split(s [,sep [,maxsplit]]) -> list of strings
0283 
0284     Return a list of the words in the string s, using sep as the
0285     delimiter string.  If maxsplit is given, splits at no more than
0286     maxsplit places (resulting in at most maxsplit+1 words).  If sep
0287     is not specified or is None, any whitespace string is a separator.
0288 
0289     (split and splitfields are synonymous)
0290 
0291     """
0292     return s.split(sep, maxsplit)
0293 splitfields = split
0294 
0295 # Split a string into a list of space/tab-separated words
0296 def rsplit(s, sep=None, maxsplit=-1):
0297     """rsplit(s [,sep [,maxsplit]]) -> list of strings
0298 
0299     Return a list of the words in the string s, using sep as the
0300     delimiter string, starting at the end of the string and working
0301     to the front.  If maxsplit is given, at most maxsplit splits are
0302     done. If sep is not specified or is None, any whitespace string
0303     is a separator.
0304     """
0305     return s.rsplit(sep, maxsplit)
0306 
0307 # Join fields with optional separator
0308 def join(words, sep = ' '):
0309     """join(list [,sep]) -> string
0310 
0311     Return a string composed of the words in list, with
0312     intervening occurrences of sep.  The default separator is a
0313     single space.
0314 
0315     (joinfields and join are synonymous)
0316 
0317     """
0318     return sep.join(words)
0319 joinfields = join
0320 
0321 # Find substring, raise exception if not found
0322 def index(s, *args):
0323     """index(s, sub [,start [,end]]) -> int
0324 
0325     Like find but raises ValueError when the substring is not found.
0326 
0327     """
0328     return s.index(*args)
0329 
0330 # Find last substring, raise exception if not found
0331 def rindex(s, *args):
0332     """rindex(s, sub [,start [,end]]) -> int
0333 
0334     Like rfind but raises ValueError when the substring is not found.
0335 
0336     """
0337     return s.rindex(*args)
0338 
0339 # Count non-overlapping occurrences of substring
0340 def count(s, *args):
0341     """count(s, sub[, start[,end]]) -> int
0342 
0343     Return the number of occurrences of substring sub in string
0344     s[start:end].  Optional arguments start and end are
0345     interpreted as in slice notation.
0346 
0347     """
0348     return s.count(*args)
0349 
0350 # Find substring, return -1 if not found
0351 def find(s, *args):
0352     """find(s, sub [,start [,end]]) -> in
0353 
0354     Return the lowest index in s where substring sub is found,
0355     such that sub is contained within s[start,end].  Optional
0356     arguments start and end are interpreted as in slice notation.
0357 
0358     Return -1 on failure.
0359 
0360     """
0361     return s.find(*args)
0362 
0363 # Find last substring, return -1 if not found
0364 def rfind(s, *args):
0365     """rfind(s, sub [,start [,end]]) -> int
0366 
0367     Return the highest index in s where substring sub is found,
0368     such that sub is contained within s[start,end].  Optional
0369     arguments start and end are interpreted as in slice notation.
0370 
0371     Return -1 on failure.
0372 
0373     """
0374     return s.rfind(*args)
0375 
0376 # for a bit of speed
0377 _float = float
0378 _int = int
0379 _long = long
0380 
0381 # Convert string to float
0382 def atof(s):
0383     """atof(s) -> float
0384 
0385     Return the floating point number represented by the string s.
0386 
0387     """
0388     return _float(s)
0389 
0390 
0391 # Convert string to integer
0392 def atoi(s , base=10):
0393     """atoi(s [,base]) -> int
0394 
0395     Return the integer represented by the string s in the given
0396     base, which defaults to 10.  The string s must consist of one
0397     or more digits, possibly preceded by a sign.  If base is 0, it
0398     is chosen from the leading characters of s, 0 for octal, 0x or
0399     0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
0400     accepted.
0401 
0402     """
0403     return _int(s, base)
0404 
0405 
0406 # Convert string to long integer
0407 def atol(s, base=10):
0408     """atol(s [,base]) -> long
0409 
0410     Return the long integer represented by the string s in the
0411     given base, which defaults to 10.  The string s must consist
0412     of one or more digits, possibly preceded by a sign.  If base
0413     is 0, it is chosen from the leading characters of s, 0 for
0414     octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
0415     0x or 0X is accepted.  A trailing L or l is not accepted,
0416     unless base is 0.
0417 
0418     """
0419     return _long(s, base)
0420 
0421 
0422 # Left-justify a string
0423 def ljust(s, width, *args):
0424     """ljust(s, width[, fillchar]) -> string
0425 
0426     Return a left-justified version of s, in a field of the
0427     specified width, padded with spaces as needed.  The string is
0428     never truncated.  If specified the fillchar is used instead of spaces.
0429 
0430     """
0431     return s.ljust(width, *args)
0432 
0433 # Right-justify a string
0434 def rjust(s, width, *args):
0435     """rjust(s, width[, fillchar]) -> string
0436 
0437     Return a right-justified version of s, in a field of the
0438     specified width, padded with spaces as needed.  The string is
0439     never truncated.  If specified the fillchar is used instead of spaces.
0440 
0441     """
0442     return s.rjust(width, *args)
0443 
0444 # Center a string
0445 def center(s, width, *args):
0446     """center(s, width[, fillchar]) -> string
0447 
0448     Return a center version of s, in a field of the specified
0449     width. padded with spaces as needed.  The string is never
0450     truncated.  If specified the fillchar is used instead of spaces.
0451 
0452     """
0453     return s.center(width, *args)
0454 
0455 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
0456 # Decadent feature: the argument may be a string or a number
0457 # (Use of this is deprecated; it should be a string as with ljust c.s.)
0458 def zfill(x, width):
0459     """zfill(x, width) -> string
0460 
0461     Pad a numeric string x with zeros on the left, to fill a field
0462     of the specified width.  The string x is never truncated.
0463 
0464     """
0465     if not isinstance(x, basestring):
0466         x = repr(x)
0467     return x.zfill(width)
0468 
0469 # Expand tabs in a string.
0470 # Doesn't take non-printing chars into account, but does understand \n.
0471 def expandtabs(s, tabsize=8):
0472     """expandtabs(s [,tabsize]) -> string
0473 
0474     Return a copy of the string s with all tab characters replaced
0475     by the appropriate number of spaces, depending on the current
0476     column, and the tabsize (default 8).
0477 
0478     """
0479     return s.expandtabs(tabsize)
0480 
0481 # Character translation through look-up table.
0482 def translate(s, table, deletions=""):
0483     """translate(s,table [,deletions]) -> string
0484 
0485     Return a copy of the string s, where all characters occurring
0486     in the optional argument deletions are removed, and the
0487     remaining characters have been mapped through the given
0488     translation table, which must be a string of length 256.  The
0489     deletions argument is not allowed for Unicode strings.
0490 
0491     """
0492     if deletions:
0493         return s.translate(table, deletions)
0494     else:
0495         # Add s[:0] so that if s is Unicode and table is an 8-bit string,
0496         # table is converted to Unicode.  This means that table *cannot*
0497         # be a dictionary -- for that feature, use u.translate() directly.
0498         return s.translate(table + s[:0])
0499 
0500 # Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
0501 def capitalize(s):
0502     """capitalize(s) -> string
0503 
0504     Return a copy of the string s with only its first character
0505     capitalized.
0506 
0507     """
0508     return s.capitalize()
0509 
0510 # Substring replacement (global)
0511 def replace(s, old, new, maxsplit=-1):
0512     """replace (str, old, new[, maxsplit]) -> string
0513 
0514     Return a copy of string str with all occurrences of substring
0515     old replaced by new. If the optional argument maxsplit is
0516     given, only the first maxsplit occurrences are replaced.
0517 
0518     """
0519     return s.replace(old, new, maxsplit)
0520 
0521 
0522 # Try importing optional built-in module "strop" -- if it exists,
0523 # it redefines some string operations that are 100-1000 times faster.
0524 # It also defines values for whitespace, lowercase and uppercase
0525 # that match <ctype.h>'s definitions.
0526 
0527 try:
0528     from strop import maketrans, lowercase, uppercase, whitespace
0529     letters = lowercase + uppercase
0530 except ImportError:
0531     pass                                          # Use the original versions
0532
Generated by PyXR 0.9.4