PyXR

c:\python24\lib \ encodings

Subpackages:

Modules

Init code



0001 """ Standard "encodings" Package
0002 
0003     Standard Python encoding modules are stored in this package
0004     directory.
0005 
0006     Codec modules must have names corresponding to normalized encoding
0007     names as defined in the normalize_encoding() function below, e.g.
0008     'utf-8' must be implemented by the module 'utf_8.py'.
0009 
0010     Each codec module must export the following interface:
0011 
0012     * getregentry() -> (encoder, decoder, stream_reader, stream_writer)
0013     The getregentry() API must return callable objects which adhere to
0014     the Python Codec Interface Standard.
0015 
0016     In addition, a module may optionally also define the following
0017     APIs which are then used by the package's codec search function:
0018 
0019     * getaliases() -> sequence of encoding name strings to use as aliases
0020 
0021     Alias names returned by getaliases() must be normalized encoding
0022     names as defined by normalize_encoding().
0023 
0024 Written by Marc-Andre Lemburg (mal@lemburg.com).
0025 
0026 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
0027 
0028 """#"
0029 
0030 import codecs, exceptions, types, aliases
0031 
0032 _cache = {}
0033 _unknown = '--unknown--'
0034 _import_tail = ['*']
0035 _norm_encoding_map = ('                                              . '
0036                       '0123456789       ABCDEFGHIJKLMNOPQRSTUVWXYZ     '
0037                       ' abcdefghijklmnopqrstuvwxyz                     '
0038                       '                                                '
0039                       '                                                '
0040                       '                ')
0041 _aliases = aliases.aliases
0042 
0043 class CodecRegistryError(exceptions.LookupError,
0044                          exceptions.SystemError):
0045     pass
0046 
0047 def normalize_encoding(encoding):
0048 
0049     """ Normalize an encoding name.
0050 
0051         Normalization works as follows: all non-alphanumeric
0052         characters except the dot used for Python package names are
0053         collapsed and replaced with a single underscore, e.g. '  -;#'
0054         becomes '_'. Leading and trailing underscores are removed.
0055 
0056         Note that encoding names should be ASCII only; if they do use
0057         non-ASCII characters, these must be Latin-1 compatible.
0058 
0059     """
0060     # Make sure we have an 8-bit string, because .translate() works
0061     # differently for Unicode strings.
0062     if type(encoding) is types.UnicodeType:
0063         # Note that .encode('latin-1') does *not* use the codec
0064         # registry, so this call doesn't recurse. (See unicodeobject.c
0065         # PyUnicode_AsEncodedString() for details)
0066         encoding = encoding.encode('latin-1')
0067     return '_'.join(encoding.translate(_norm_encoding_map).split())
0068 
0069 def search_function(encoding):
0070 
0071     # Cache lookup
0072     entry = _cache.get(encoding, _unknown)
0073     if entry is not _unknown:
0074         return entry
0075 
0076     # Import the module:
0077     #
0078     # First try to find an alias for the normalized encoding
0079     # name and lookup the module using the aliased name, then try to
0080     # lookup the module using the standard import scheme, i.e. first
0081     # try in the encodings package, then at top-level.
0082     #
0083     norm_encoding = normalize_encoding(encoding)
0084     aliased_encoding = _aliases.get(norm_encoding) or \
0085                        _aliases.get(norm_encoding.replace('.', '_'))
0086     if aliased_encoding is not None:
0087         modnames = [aliased_encoding,
0088                     norm_encoding]
0089     else:
0090         modnames = [norm_encoding]
0091     for modname in modnames:
0092         if not modname:
0093             continue
0094         try:
0095             mod = __import__(modname,
0096                              globals(), locals(), _import_tail)
0097         except ImportError:
0098             pass
0099         else:
0100             break
0101     else:
0102         mod = None
0103 
0104     try:
0105         getregentry = mod.getregentry
0106     except AttributeError:
0107         # Not a codec module
0108         mod = None
0109 
0110     if mod is None:
0111         # Cache misses
0112         _cache[encoding] = None
0113         return None
0114 
0115     # Now ask the module for the registry entry
0116     entry = tuple(getregentry())
0117     if len(entry) != 4:
0118         raise CodecRegistryError,\
0119               'module "%s" (%s) failed to register' % \
0120               (mod.__name__, mod.__file__)
0121     for obj in entry:
0122         if not callable(obj):
0123             raise CodecRegistryError,\
0124                   'incompatible codecs in module "%s" (%s)' % \
0125                   (mod.__name__, mod.__file__)
0126 
0127     # Cache the codec registry entry
0128     _cache[encoding] = entry
0129 
0130     # Register its aliases (without overwriting previously registered
0131     # aliases)
0132     try:
0133         codecaliases = mod.getaliases()
0134     except AttributeError:
0135         pass
0136     else:
0137         for alias in codecaliases:
0138             if not _aliases.has_key(alias):
0139                 _aliases[alias] = modname
0140 
0141     # Return the registry entry
0142     return entry
0143 
0144 # Register the search_function in the Python codec registry
0145 codecs.register(search_function)
0146 

Generated by PyXR 0.9.4
SourceForge.net Logo