0001 """Internationalization and localization support. 0002 0003 This module provides internationalization (I18N) and localization (L10N) 0004 support for your Python programs by providing an interface to the GNU gettext 0005 message catalog library. 0006 0007 I18N refers to the operation by which a program is made aware of multiple 0008 languages. L10N refers to the adaptation of your program, once 0009 internationalized, to the local language and cultural habits. 0010 0011 """ 0012 0013 # This module represents the integration of work, contributions, feedback, and 0014 # suggestions from the following people: 0015 # 0016 # Martin von Loewis, who wrote the initial implementation of the underlying 0017 # C-based libintlmodule (later renamed _gettext), along with a skeletal 0018 # gettext.py implementation. 0019 # 0020 # Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule, 0021 # which also included a pure-Python implementation to read .mo files if 0022 # intlmodule wasn't available. 0023 # 0024 # James Henstridge, who also wrote a gettext.py module, which has some 0025 # interesting, but currently unsupported experimental features: the notion of 0026 # a Catalog class and instances, and the ability to add to a catalog file via 0027 # a Python API. 0028 # 0029 # Barry Warsaw integrated these modules, wrote the .install() API and code, 0030 # and conformed all C and Python code to Python's coding standards. 0031 # 0032 # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this 0033 # module. 0034 # 0035 # J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs. 0036 # 0037 # TODO: 0038 # - Lazy loading of .mo files. Currently the entire catalog is loaded into 0039 # memory, but that's probably bad for large translated programs. Instead, 0040 # the lexical sort of original strings in GNU .mo files should be exploited 0041 # to do binary searches and lazy initializations. Or you might want to use 0042 # the undocumented double-hash algorithm for .mo files with hash tables, but 0043 # you'll need to study the GNU gettext code to do this. 0044 # 0045 # - Support Solaris .mo file formats. Unfortunately, we've been unable to 0046 # find this format documented anywhere. 0047 0048 0049 import locale, copy, os, re, struct, sys 0050 from errno import ENOENT 0051 0052 0053 __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog', 0054 'find', 'translation', 'install', 'textdomain', 'bindtextdomain', 0055 'dgettext', 'dngettext', 'gettext', 'ngettext', 0056 ] 0057 0058 _default_localedir = os.path.join(sys.prefix, 'share', 'locale') 0059 0060 0061 def test(condition, true, false): 0062 """ 0063 Implements the C expression: 0064 0065 condition ? true : false 0066 0067 Required to correctly interpret plural forms. 0068 """ 0069 if condition: 0070 return true 0071 else: 0072 return false 0073 0074 0075 def c2py(plural): 0076 """Gets a C expression as used in PO files for plural forms and returns a 0077 Python lambda function that implements an equivalent expression. 0078 """ 0079 # Security check, allow only the "n" identifier 0080 from StringIO import StringIO 0081 import token, tokenize 0082 tokens = tokenize.generate_tokens(StringIO(plural).readline) 0083 try: 0084 danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n'] 0085 except tokenize.TokenError: 0086 raise ValueError, \ 0087 'plural forms expression error, maybe unbalanced parenthesis' 0088 else: 0089 if danger: 0090 raise ValueError, 'plural forms expression could be dangerous' 0091 0092 # Replace some C operators by their Python equivalents 0093 plural = plural.replace('&&', ' and ') 0094 plural = plural.replace('||', ' or ') 0095 0096 expr = re.compile(r'\!([^=])') 0097 plural = expr.sub(' not \\1', plural) 0098 0099 # Regular expression and replacement function used to transform 0100 # "a?b:c" to "test(a,b,c)". 0101 expr = re.compile(r'(.*?)\?(.*?):(.*)') 0102 def repl(x): 0103 return "test(%s, %s, %s)" % (x.group(1), x.group(2), 0104 expr.sub(repl, x.group(3))) 0105 0106 # Code to transform the plural expression, taking care of parentheses 0107 stack = [''] 0108 for c in plural: 0109 if c == '(': 0110 stack.append('') 0111 elif c == ')': 0112 if len(stack) == 1: 0113 # Actually, we never reach this code, because unbalanced 0114 # parentheses get caught in the security check at the 0115 # beginning. 0116 raise ValueError, 'unbalanced parenthesis in plural form' 0117 s = expr.sub(repl, stack.pop()) 0118 stack[-1] += '(%s)' % s 0119 else: 0120 stack[-1] += c 0121 plural = expr.sub(repl, stack.pop()) 0122 0123 return eval('lambda n: int(%s)' % plural) 0124 0125 0126 0127 def _expand_lang(locale): 0128 from locale import normalize 0129 locale = normalize(locale) 0130 COMPONENT_CODESET = 1 << 0 0131 COMPONENT_TERRITORY = 1 << 1 0132 COMPONENT_MODIFIER = 1 << 2 0133 # split up the locale into its base components 0134 mask = 0 0135 pos = locale.find('@') 0136 if pos >= 0: 0137 modifier = locale[pos:] 0138 locale = locale[:pos] 0139 mask |= COMPONENT_MODIFIER 0140 else: 0141 modifier = '' 0142 pos = locale.find('.') 0143 if pos >= 0: 0144 codeset = locale[pos:] 0145 locale = locale[:pos] 0146 mask |= COMPONENT_CODESET 0147 else: 0148 codeset = '' 0149 pos = locale.find('_') 0150 if pos >= 0: 0151 territory = locale[pos:] 0152 locale = locale[:pos] 0153 mask |= COMPONENT_TERRITORY 0154 else: 0155 territory = '' 0156 language = locale 0157 ret = [] 0158 for i in range(mask+1): 0159 if not (i & ~mask): # if all components for this combo exist ... 0160 val = language 0161 if i & COMPONENT_TERRITORY: val += territory 0162 if i & COMPONENT_CODESET: val += codeset 0163 if i & COMPONENT_MODIFIER: val += modifier 0164 ret.append(val) 0165 ret.reverse() 0166 return ret 0167 0168 0169 0170 class NullTranslations: 0171 def __init__(self, fp=None): 0172 self._info = {} 0173 self._charset = None 0174 self._output_charset = None 0175 self._fallback = None 0176 if fp is not None: 0177 self._parse(fp) 0178 0179 def _parse(self, fp): 0180 pass 0181 0182 def add_fallback(self, fallback): 0183 if self._fallback: 0184 self._fallback.add_fallback(fallback) 0185 else: 0186 self._fallback = fallback 0187 0188 def gettext(self, message): 0189 if self._fallback: 0190 return self._fallback.gettext(message) 0191 return message 0192 0193 def lgettext(self, message): 0194 if self._fallback: 0195 return self._fallback.lgettext(message) 0196 return message 0197 0198 def ngettext(self, msgid1, msgid2, n): 0199 if self._fallback: 0200 return self._fallback.ngettext(msgid1, msgid2, n) 0201 if n == 1: 0202 return msgid1 0203 else: 0204 return msgid2 0205 0206 def lngettext(self, msgid1, msgid2, n): 0207 if self._fallback: 0208 return self._fallback.lngettext(msgid1, msgid2, n) 0209 if n == 1: 0210 return msgid1 0211 else: 0212 return msgid2 0213 0214 def ugettext(self, message): 0215 if self._fallback: 0216 return self._fallback.ugettext(message) 0217 return unicode(message) 0218 0219 def ungettext(self, msgid1, msgid2, n): 0220 if self._fallback: 0221 return self._fallback.ungettext(msgid1, msgid2, n) 0222 if n == 1: 0223 return unicode(msgid1) 0224 else: 0225 return unicode(msgid2) 0226 0227 def info(self): 0228 return self._info 0229 0230 def charset(self): 0231 return self._charset 0232 0233 def output_charset(self): 0234 return self._output_charset 0235 0236 def set_output_charset(self, charset): 0237 self._output_charset = charset 0238 0239 def install(self, unicode=False): 0240 import __builtin__ 0241 __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext 0242 0243 0244 class GNUTranslations(NullTranslations): 0245 # Magic number of .mo files 0246 LE_MAGIC = 0x950412deL 0247 BE_MAGIC = 0xde120495L 0248 0249 def _parse(self, fp): 0250 """Override this method to support alternative .mo formats.""" 0251 unpack = struct.unpack 0252 filename = getattr(fp, 'name', '') 0253 # Parse the .mo file header, which consists of 5 little endian 32 0254 # bit words. 0255 self._catalog = catalog = {} 0256 self.plural = lambda n: int(n != 1) # germanic plural by default 0257 buf = fp.read() 0258 buflen = len(buf) 0259 # Are we big endian or little endian? 0260 magic = unpack('<I', buf[:4])[0] 0261 if magic == self.LE_MAGIC: 0262 version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20]) 0263 ii = '<II' 0264 elif magic == self.BE_MAGIC: 0265 version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20]) 0266 ii = '>II' 0267 else: 0268 raise IOError(0, 'Bad magic number', filename) 0269 # Now put all messages from the .mo file buffer into the catalog 0270 # dictionary. 0271 for i in xrange(0, msgcount): 0272 mlen, moff = unpack(ii, buf[masteridx:masteridx+8]) 0273 mend = moff + mlen 0274 tlen, toff = unpack(ii, buf[transidx:transidx+8]) 0275 tend = toff + tlen 0276 if mend < buflen and tend < buflen: 0277 msg = buf[moff:mend] 0278 tmsg = buf[toff:tend] 0279 else: 0280 raise IOError(0, 'File is corrupt', filename) 0281 # See if we're looking at GNU .mo conventions for metadata 0282 if mlen == 0: 0283 # Catalog description 0284 lastk = k = None 0285 for item in tmsg.splitlines(): 0286 item = item.strip() 0287 if not item: 0288 continue 0289 if ':' in item: 0290 k, v = item.split(':', 1) 0291 k = k.strip().lower() 0292 v = v.strip() 0293 self._info[k] = v 0294 lastk = k 0295 elif lastk: 0296 self._info[lastk] += '\n' + item 0297 if k == 'content-type': 0298 self._charset = v.split('charset=')[1] 0299 elif k == 'plural-forms': 0300 v = v.split(';') 0301 plural = v[1].split('plural=')[1] 0302 self.plural = c2py(plural) 0303 # Note: we unconditionally convert both msgids and msgstrs to 0304 # Unicode using the character encoding specified in the charset 0305 # parameter of the Content-Type header. The gettext documentation 0306 # strongly encourages msgids to be us-ascii, but some appliations 0307 # require alternative encodings (e.g. Zope's ZCML and ZPT). For 0308 # traditional gettext applications, the msgid conversion will 0309 # cause no problems since us-ascii should always be a subset of 0310 # the charset encoding. We may want to fall back to 8-bit msgids 0311 # if the Unicode conversion fails. 0312 if '\x00' in msg: 0313 # Plural forms 0314 msgid1, msgid2 = msg.split('\x00') 0315 tmsg = tmsg.split('\x00') 0316 if self._charset: 0317 msgid1 = unicode(msgid1, self._charset) 0318 tmsg = [unicode(x, self._charset) for x in tmsg] 0319 for i in range(len(tmsg)): 0320 catalog[(msgid1, i)] = tmsg[i] 0321 else: 0322 if self._charset: 0323 msg = unicode(msg, self._charset) 0324 tmsg = unicode(tmsg, self._charset) 0325 catalog[msg] = tmsg 0326 # advance to next entry in the seek tables 0327 masteridx += 8 0328 transidx += 8 0329 0330 def gettext(self, message): 0331 missing = object() 0332 tmsg = self._catalog.get(message, missing) 0333 if tmsg is missing: 0334 if self._fallback: 0335 return self._fallback.gettext(message) 0336 return message 0337 # Encode the Unicode tmsg back to an 8-bit string, if possible 0338 if self._output_charset: 0339 return tmsg.encode(self._output_charset) 0340 elif self._charset: 0341 return tmsg.encode(self._charset) 0342 return tmsg 0343 0344 def lgettext(self, message): 0345 missing = object() 0346 tmsg = self._catalog.get(message, missing) 0347 if tmsg is missing: 0348 if self._fallback: 0349 return self._fallback.lgettext(message) 0350 return message 0351 if self._output_charset: 0352 return tmsg.encode(self._output_charset) 0353 return tmsg.encode(locale.getpreferredencoding()) 0354 0355 def ngettext(self, msgid1, msgid2, n): 0356 try: 0357 tmsg = self._catalog[(msgid1, self.plural(n))] 0358 if self._output_charset: 0359 return tmsg.encode(self._output_charset) 0360 elif self._charset: 0361 return tmsg.encode(self._charset) 0362 return tmsg 0363 except KeyError: 0364 if self._fallback: 0365 return self._fallback.ngettext(msgid1, msgid2, n) 0366 if n == 1: 0367 return msgid1 0368 else: 0369 return msgid2 0370 0371 def lngettext(self, msgid1, msgid2, n): 0372 try: 0373 tmsg = self._catalog[(msgid1, self.plural(n))] 0374 if self._output_charset: 0375 return tmsg.encode(self._output_charset) 0376 return tmsg.encode(locale.getpreferredencoding()) 0377 except KeyError: 0378 if self._fallback: 0379 return self._fallback.lngettext(msgid1, msgid2, n) 0380 if n == 1: 0381 return msgid1 0382 else: 0383 return msgid2 0384 0385 def ugettext(self, message): 0386 missing = object() 0387 tmsg = self._catalog.get(message, missing) 0388 if tmsg is missing: 0389 if self._fallback: 0390 return self._fallback.ugettext(message) 0391 return unicode(message) 0392 return tmsg 0393 0394 def ungettext(self, msgid1, msgid2, n): 0395 try: 0396 tmsg = self._catalog[(msgid1, self.plural(n))] 0397 except KeyError: 0398 if self._fallback: 0399 return self._fallback.ungettext(msgid1, msgid2, n) 0400 if n == 1: 0401 tmsg = unicode(msgid1) 0402 else: 0403 tmsg = unicode(msgid2) 0404 return tmsg 0405 0406 0407 # Locate a .mo file using the gettext strategy 0408 def find(domain, localedir=None, languages=None, all=0): 0409 # Get some reasonable defaults for arguments that were not supplied 0410 if localedir is None: 0411 localedir = _default_localedir 0412 if languages is None: 0413 languages = [] 0414 for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): 0415 val = os.environ.get(envar) 0416 if val: 0417 languages = val.split(':') 0418 break 0419 if 'C' not in languages: 0420 languages.append('C') 0421 # now normalize and expand the languages 0422 nelangs = [] 0423 for lang in languages: 0424 for nelang in _expand_lang(lang): 0425 if nelang not in nelangs: 0426 nelangs.append(nelang) 0427 # select a language 0428 if all: 0429 result = [] 0430 else: 0431 result = None 0432 for lang in nelangs: 0433 if lang == 'C': 0434 break 0435 mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain) 0436 if os.path.exists(mofile): 0437 if all: 0438 result.append(mofile) 0439 else: 0440 return mofile 0441 return result 0442 0443 0444 0445 # a mapping between absolute .mo file path and Translation object 0446 _translations = {} 0447 0448 def translation(domain, localedir=None, languages=None, 0449 class_=None, fallback=False, codeset=None): 0450 if class_ is None: 0451 class_ = GNUTranslations 0452 mofiles = find(domain, localedir, languages, all=1) 0453 if not mofiles: 0454 if fallback: 0455 return NullTranslations() 0456 raise IOError(ENOENT, 'No translation file found for domain', domain) 0457 # TBD: do we need to worry about the file pointer getting collected? 0458 # Avoid opening, reading, and parsing the .mo file after it's been done 0459 # once. 0460 result = None 0461 for mofile in mofiles: 0462 key = os.path.abspath(mofile) 0463 t = _translations.get(key) 0464 if t is None: 0465 t = _translations.setdefault(key, class_(open(mofile, 'rb'))) 0466 # Copy the translation object to allow setting fallbacks and 0467 # output charset. All other instance data is shared with the 0468 # cached object. 0469 t = copy.copy(t) 0470 if codeset: 0471 t.set_output_charset(codeset) 0472 if result is None: 0473 result = t 0474 else: 0475 result.add_fallback(t) 0476 return result 0477 0478 0479 def install(domain, localedir=None, unicode=False, codeset=None): 0480 t = translation(domain, localedir, fallback=True, codeset=codeset) 0481 t.install(unicode) 0482 0483 0484 0485 # a mapping b/w domains and locale directories 0486 _localedirs = {} 0487 # a mapping b/w domains and codesets 0488 _localecodesets = {} 0489 # current global domain, `messages' used for compatibility w/ GNU gettext 0490 _current_domain = 'messages' 0491 0492 0493 def textdomain(domain=None): 0494 global _current_domain 0495 if domain is not None: 0496 _current_domain = domain 0497 return _current_domain 0498 0499 0500 def bindtextdomain(domain, localedir=None): 0501 global _localedirs 0502 if localedir is not None: 0503 _localedirs[domain] = localedir 0504 return _localedirs.get(domain, _default_localedir) 0505 0506 0507 def bind_textdomain_codeset(domain, codeset=None): 0508 global _localecodesets 0509 if codeset is not None: 0510 _localecodesets[domain] = codeset 0511 return _localecodesets.get(domain) 0512 0513 0514 def dgettext(domain, message): 0515 try: 0516 t = translation(domain, _localedirs.get(domain, None), 0517 codeset=_localecodesets.get(domain)) 0518 except IOError: 0519 return message 0520 return t.gettext(message) 0521 0522 def ldgettext(domain, message): 0523 try: 0524 t = translation(domain, _localedirs.get(domain, None), 0525 codeset=_localecodesets.get(domain)) 0526 except IOError: 0527 return message 0528 return t.lgettext(message) 0529 0530 def dngettext(domain, msgid1, msgid2, n): 0531 try: 0532 t = translation(domain, _localedirs.get(domain, None), 0533 codeset=_localecodesets.get(domain)) 0534 except IOError: 0535 if n == 1: 0536 return msgid1 0537 else: 0538 return msgid2 0539 return t.ngettext(msgid1, msgid2, n) 0540 0541 def ldngettext(domain, msgid1, msgid2, n): 0542 try: 0543 t = translation(domain, _localedirs.get(domain, None), 0544 codeset=_localecodesets.get(domain)) 0545 except IOError: 0546 if n == 1: 0547 return msgid1 0548 else: 0549 return msgid2 0550 return t.lngettext(msgid1, msgid2, n) 0551 0552 def gettext(message): 0553 return dgettext(_current_domain, message) 0554 0555 def lgettext(message): 0556 return ldgettext(_current_domain, message) 0557 0558 def ngettext(msgid1, msgid2, n): 0559 return dngettext(_current_domain, msgid1, msgid2, n) 0560 0561 def lngettext(msgid1, msgid2, n): 0562 return ldngettext(_current_domain, msgid1, msgid2, n) 0563 0564 # dcgettext() has been deemed unnecessary and is not implemented. 0565 0566 # James Henstridge's Catalog constructor from GNOME gettext. Documented usage 0567 # was: 0568 # 0569 # import gettext 0570 # cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR) 0571 # _ = cat.gettext 0572 # print _('Hello World') 0573 0574 # The resulting catalog object currently don't support access through a 0575 # dictionary API, which was supported (but apparently unused) in GNOME 0576 # gettext. 0577 0578 Catalog = translation 0579
Generated by PyXR 0.9.4