0001 """ Standard "encodings" Package 0002 0003 Standard Python encoding modules are stored in this package 0004 directory. 0005 0006 Codec modules must have names corresponding to normalized encoding 0007 names as defined in the normalize_encoding() function below, e.g. 0008 'utf-8' must be implemented by the module 'utf_8.py'. 0009 0010 Each codec module must export the following interface: 0011 0012 * getregentry() -> (encoder, decoder, stream_reader, stream_writer) 0013 The getregentry() API must return callable objects which adhere to 0014 the Python Codec Interface Standard. 0015 0016 In addition, a module may optionally also define the following 0017 APIs which are then used by the package's codec search function: 0018 0019 * getaliases() -> sequence of encoding name strings to use as aliases 0020 0021 Alias names returned by getaliases() must be normalized encoding 0022 names as defined by normalize_encoding(). 0023 0024 Written by Marc-Andre Lemburg (mal@lemburg.com). 0025 0026 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 0027 0028 """#" 0029 0030 import codecs, exceptions, types, aliases 0031 0032 _cache = {} 0033 _unknown = '--unknown--' 0034 _import_tail = ['*'] 0035 _norm_encoding_map = (' . ' 0036 '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ ' 0037 ' abcdefghijklmnopqrstuvwxyz ' 0038 ' ' 0039 ' ' 0040 ' ') 0041 _aliases = aliases.aliases 0042 0043 class CodecRegistryError(exceptions.LookupError, 0044 exceptions.SystemError): 0045 pass 0046 0047 def normalize_encoding(encoding): 0048 0049 """ Normalize an encoding name. 0050 0051 Normalization works as follows: all non-alphanumeric 0052 characters except the dot used for Python package names are 0053 collapsed and replaced with a single underscore, e.g. ' -;#' 0054 becomes '_'. Leading and trailing underscores are removed. 0055 0056 Note that encoding names should be ASCII only; if they do use 0057 non-ASCII characters, these must be Latin-1 compatible. 0058 0059 """ 0060 # Make sure we have an 8-bit string, because .translate() works 0061 # differently for Unicode strings. 0062 if type(encoding) is types.UnicodeType: 0063 # Note that .encode('latin-1') does *not* use the codec 0064 # registry, so this call doesn't recurse. (See unicodeobject.c 0065 # PyUnicode_AsEncodedString() for details) 0066 encoding = encoding.encode('latin-1') 0067 return '_'.join(encoding.translate(_norm_encoding_map).split()) 0068 0069 def search_function(encoding): 0070 0071 # Cache lookup 0072 entry = _cache.get(encoding, _unknown) 0073 if entry is not _unknown: 0074 return entry 0075 0076 # Import the module: 0077 # 0078 # First try to find an alias for the normalized encoding 0079 # name and lookup the module using the aliased name, then try to 0080 # lookup the module using the standard import scheme, i.e. first 0081 # try in the encodings package, then at top-level. 0082 # 0083 norm_encoding = normalize_encoding(encoding) 0084 aliased_encoding = _aliases.get(norm_encoding) or \ 0085 _aliases.get(norm_encoding.replace('.', '_')) 0086 if aliased_encoding is not None: 0087 modnames = [aliased_encoding, 0088 norm_encoding] 0089 else: 0090 modnames = [norm_encoding] 0091 for modname in modnames: 0092 if not modname: 0093 continue 0094 try: 0095 mod = __import__(modname, 0096 globals(), locals(), _import_tail) 0097 except ImportError: 0098 pass 0099 else: 0100 break 0101 else: 0102 mod = None 0103 0104 try: 0105 getregentry = mod.getregentry 0106 except AttributeError: 0107 # Not a codec module 0108 mod = None 0109 0110 if mod is None: 0111 # Cache misses 0112 _cache[encoding] = None 0113 return None 0114 0115 # Now ask the module for the registry entry 0116 entry = tuple(getregentry()) 0117 if len(entry) != 4: 0118 raise CodecRegistryError,\ 0119 'module "%s" (%s) failed to register' % \ 0120 (mod.__name__, mod.__file__) 0121 for obj in entry: 0122 if not callable(obj): 0123 raise CodecRegistryError,\ 0124 'incompatible codecs in module "%s" (%s)' % \ 0125 (mod.__name__, mod.__file__) 0126 0127 # Cache the codec registry entry 0128 _cache[encoding] = entry 0129 0130 # Register its aliases (without overwriting previously registered 0131 # aliases) 0132 try: 0133 codecaliases = mod.getaliases() 0134 except AttributeError: 0135 pass 0136 else: 0137 for alias in codecaliases: 0138 if not _aliases.has_key(alias): 0139 _aliases[alias] = modname 0140 0141 # Return the registry entry 0142 return entry 0143 0144 # Register the search_function in the Python codec registry 0145 codecs.register(search_function) 0146
Generated by PyXR 0.9.4