PyXR

c:\python24\lib \ email \ _parseaddr.py


0001 # Copyright (C) 2002-2004 Python Software Foundation
0002 # Contact: email-sig@python.org
0003 
0004 """Email address parsing code.
0005 
0006 Lifted directly from rfc822.py.  This should eventually be rewritten.
0007 """
0008 
0009 import time
0010 
0011 SPACE = ' '
0012 EMPTYSTRING = ''
0013 COMMASPACE = ', '
0014 
0015 # Parse a date field
0016 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
0017                'aug', 'sep', 'oct', 'nov', 'dec',
0018                'january', 'february', 'march', 'april', 'may', 'june', 'july',
0019                'august', 'september', 'october', 'november', 'december']
0020 
0021 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
0022 
0023 # The timezone table does not include the military time zones defined
0024 # in RFC822, other than Z.  According to RFC1123, the description in
0025 # RFC822 gets the signs wrong, so we can't rely on any such time
0026 # zones.  RFC1123 recommends that numeric timezone indicators be used
0027 # instead of timezone names.
0028 
0029 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
0030               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
0031               'EST': -500, 'EDT': -400,  # Eastern
0032               'CST': -600, 'CDT': -500,  # Central
0033               'MST': -700, 'MDT': -600,  # Mountain
0034               'PST': -800, 'PDT': -700   # Pacific
0035               }
0036 
0037 
0038 def parsedate_tz(data):
0039     """Convert a date string to a time tuple.
0040 
0041     Accounts for military timezones.
0042     """
0043     data = data.split()
0044     # The FWS after the comma after the day-of-week is optional, so search and
0045     # adjust for this.
0046     if data[0].endswith(',') or data[0].lower() in _daynames:
0047         # There's a dayname here. Skip it
0048         del data[0]
0049     else:
0050         i = data[0].rfind(',')
0051         if i >= 0:
0052             data[0] = data[0][i+1:]
0053     if len(data) == 3: # RFC 850 date, deprecated
0054         stuff = data[0].split('-')
0055         if len(stuff) == 3:
0056             data = stuff + data[1:]
0057     if len(data) == 4:
0058         s = data[3]
0059         i = s.find('+')
0060         if i > 0:
0061             data[3:] = [s[:i], s[i+1:]]
0062         else:
0063             data.append('') # Dummy tz
0064     if len(data) < 5:
0065         return None
0066     data = data[:5]
0067     [dd, mm, yy, tm, tz] = data
0068     mm = mm.lower()
0069     if mm not in _monthnames:
0070         dd, mm = mm, dd.lower()
0071         if mm not in _monthnames:
0072             return None
0073     mm = _monthnames.index(mm) + 1
0074     if mm > 12:
0075         mm -= 12
0076     if dd[-1] == ',':
0077         dd = dd[:-1]
0078     i = yy.find(':')
0079     if i > 0:
0080         yy, tm = tm, yy
0081     if yy[-1] == ',':
0082         yy = yy[:-1]
0083     if not yy[0].isdigit():
0084         yy, tz = tz, yy
0085     if tm[-1] == ',':
0086         tm = tm[:-1]
0087     tm = tm.split(':')
0088     if len(tm) == 2:
0089         [thh, tmm] = tm
0090         tss = '0'
0091     elif len(tm) == 3:
0092         [thh, tmm, tss] = tm
0093     else:
0094         return None
0095     try:
0096         yy = int(yy)
0097         dd = int(dd)
0098         thh = int(thh)
0099         tmm = int(tmm)
0100         tss = int(tss)
0101     except ValueError:
0102         return None
0103     tzoffset = None
0104     tz = tz.upper()
0105     if _timezones.has_key(tz):
0106         tzoffset = _timezones[tz]
0107     else:
0108         try:
0109             tzoffset = int(tz)
0110         except ValueError:
0111             pass
0112     # Convert a timezone offset into seconds ; -0500 -> -18000
0113     if tzoffset:
0114         if tzoffset < 0:
0115             tzsign = -1
0116             tzoffset = -tzoffset
0117         else:
0118             tzsign = 1
0119         tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
0120     tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
0121     return tuple
0122 
0123 
0124 def parsedate(data):
0125     """Convert a time string to a time tuple."""
0126     t = parsedate_tz(data)
0127     if isinstance(t, tuple):
0128         return t[:9]
0129     else:
0130         return t
0131 
0132 
0133 def mktime_tz(data):
0134     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
0135     if data[9] is None:
0136         # No zone info, so localtime is better assumption than GMT
0137         return time.mktime(data[:8] + (-1,))
0138     else:
0139         t = time.mktime(data[:8] + (0,))
0140         return t - data[9] - time.timezone
0141 
0142 
0143 def quote(str):
0144     """Add quotes around a string."""
0145     return str.replace('\\', '\\\\').replace('"', '\\"')
0146 
0147 
0148 class AddrlistClass:
0149     """Address parser class by Ben Escoto.
0150 
0151     To understand what this class does, it helps to have a copy of RFC 2822 in
0152     front of you.
0153 
0154     Note: this class interface is deprecated and may be removed in the future.
0155     Use rfc822.AddressList instead.
0156     """
0157 
0158     def __init__(self, field):
0159         """Initialize a new instance.
0160 
0161         `field' is an unparsed address header field, containing
0162         one or more addresses.
0163         """
0164         self.specials = '()<>@,:;.\"[]'
0165         self.pos = 0
0166         self.LWS = ' \t'
0167         self.CR = '\r\n'
0168         self.atomends = self.specials + self.LWS + self.CR
0169         # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
0170         # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
0171         # syntax, so allow dots in phrases.
0172         self.phraseends = self.atomends.replace('.', '')
0173         self.field = field
0174         self.commentlist = []
0175 
0176     def gotonext(self):
0177         """Parse up to the start of the next address."""
0178         while self.pos < len(self.field):
0179             if self.field[self.pos] in self.LWS + '\n\r':
0180                 self.pos += 1
0181             elif self.field[self.pos] == '(':
0182                 self.commentlist.append(self.getcomment())
0183             else:
0184                 break
0185 
0186     def getaddrlist(self):
0187         """Parse all addresses.
0188 
0189         Returns a list containing all of the addresses.
0190         """
0191         result = []
0192         while self.pos < len(self.field):
0193             ad = self.getaddress()
0194             if ad:
0195                 result += ad
0196             else:
0197                 result.append(('', ''))
0198         return result
0199 
0200     def getaddress(self):
0201         """Parse the next address."""
0202         self.commentlist = []
0203         self.gotonext()
0204 
0205         oldpos = self.pos
0206         oldcl = self.commentlist
0207         plist = self.getphraselist()
0208 
0209         self.gotonext()
0210         returnlist = []
0211 
0212         if self.pos >= len(self.field):
0213             # Bad email address technically, no domain.
0214             if plist:
0215                 returnlist = [(SPACE.join(self.commentlist), plist[0])]
0216 
0217         elif self.field[self.pos] in '.@':
0218             # email address is just an addrspec
0219             # this isn't very efficient since we start over
0220             self.pos = oldpos
0221             self.commentlist = oldcl
0222             addrspec = self.getaddrspec()
0223             returnlist = [(SPACE.join(self.commentlist), addrspec)]
0224 
0225         elif self.field[self.pos] == ':':
0226             # address is a group
0227             returnlist = []
0228 
0229             fieldlen = len(self.field)
0230             self.pos += 1
0231             while self.pos < len(self.field):
0232                 self.gotonext()
0233                 if self.pos < fieldlen and self.field[self.pos] == ';':
0234                     self.pos += 1
0235                     break
0236                 returnlist = returnlist + self.getaddress()
0237 
0238         elif self.field[self.pos] == '<':
0239             # Address is a phrase then a route addr
0240             routeaddr = self.getrouteaddr()
0241 
0242             if self.commentlist:
0243                 returnlist = [(SPACE.join(plist) + ' (' +
0244                                ' '.join(self.commentlist) + ')', routeaddr)]
0245             else:
0246                 returnlist = [(SPACE.join(plist), routeaddr)]
0247 
0248         else:
0249             if plist:
0250                 returnlist = [(SPACE.join(self.commentlist), plist[0])]
0251             elif self.field[self.pos] in self.specials:
0252                 self.pos += 1
0253 
0254         self.gotonext()
0255         if self.pos < len(self.field) and self.field[self.pos] == ',':
0256             self.pos += 1
0257         return returnlist
0258 
0259     def getrouteaddr(self):
0260         """Parse a route address (Return-path value).
0261 
0262         This method just skips all the route stuff and returns the addrspec.
0263         """
0264         if self.field[self.pos] != '<':
0265             return
0266 
0267         expectroute = False
0268         self.pos += 1
0269         self.gotonext()
0270         adlist = ''
0271         while self.pos < len(self.field):
0272             if expectroute:
0273                 self.getdomain()
0274                 expectroute = False
0275             elif self.field[self.pos] == '>':
0276                 self.pos += 1
0277                 break
0278             elif self.field[self.pos] == '@':
0279                 self.pos += 1
0280                 expectroute = True
0281             elif self.field[self.pos] == ':':
0282                 self.pos += 1
0283             else:
0284                 adlist = self.getaddrspec()
0285                 self.pos += 1
0286                 break
0287             self.gotonext()
0288 
0289         return adlist
0290 
0291     def getaddrspec(self):
0292         """Parse an RFC 2822 addr-spec."""
0293         aslist = []
0294 
0295         self.gotonext()
0296         while self.pos < len(self.field):
0297             if self.field[self.pos] == '.':
0298                 aslist.append('.')
0299                 self.pos += 1
0300             elif self.field[self.pos] == '"':
0301                 aslist.append('"%s"' % self.getquote())
0302             elif self.field[self.pos] in self.atomends:
0303                 break
0304             else:
0305                 aslist.append(self.getatom())
0306             self.gotonext()
0307 
0308         if self.pos >= len(self.field) or self.field[self.pos] != '@':
0309             return EMPTYSTRING.join(aslist)
0310 
0311         aslist.append('@')
0312         self.pos += 1
0313         self.gotonext()
0314         return EMPTYSTRING.join(aslist) + self.getdomain()
0315 
0316     def getdomain(self):
0317         """Get the complete domain name from an address."""
0318         sdlist = []
0319         while self.pos < len(self.field):
0320             if self.field[self.pos] in self.LWS:
0321                 self.pos += 1
0322             elif self.field[self.pos] == '(':
0323                 self.commentlist.append(self.getcomment())
0324             elif self.field[self.pos] == '[':
0325                 sdlist.append(self.getdomainliteral())
0326             elif self.field[self.pos] == '.':
0327                 self.pos += 1
0328                 sdlist.append('.')
0329             elif self.field[self.pos] in self.atomends:
0330                 break
0331             else:
0332                 sdlist.append(self.getatom())
0333         return EMPTYSTRING.join(sdlist)
0334 
0335     def getdelimited(self, beginchar, endchars, allowcomments=True):
0336         """Parse a header fragment delimited by special characters.
0337 
0338         `beginchar' is the start character for the fragment.
0339         If self is not looking at an instance of `beginchar' then
0340         getdelimited returns the empty string.
0341 
0342         `endchars' is a sequence of allowable end-delimiting characters.
0343         Parsing stops when one of these is encountered.
0344 
0345         If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
0346         within the parsed fragment.
0347         """
0348         if self.field[self.pos] != beginchar:
0349             return ''
0350 
0351         slist = ['']
0352         quote = False
0353         self.pos += 1
0354         while self.pos < len(self.field):
0355             if quote:
0356                 slist.append(self.field[self.pos])
0357                 quote = False
0358             elif self.field[self.pos] in endchars:
0359                 self.pos += 1
0360                 break
0361             elif allowcomments and self.field[self.pos] == '(':
0362                 slist.append(self.getcomment())
0363             elif self.field[self.pos] == '\\':
0364                 quote = True
0365             else:
0366                 slist.append(self.field[self.pos])
0367             self.pos += 1
0368 
0369         return EMPTYSTRING.join(slist)
0370 
0371     def getquote(self):
0372         """Get a quote-delimited fragment from self's field."""
0373         return self.getdelimited('"', '"\r', False)
0374 
0375     def getcomment(self):
0376         """Get a parenthesis-delimited fragment from self's field."""
0377         return self.getdelimited('(', ')\r', True)
0378 
0379     def getdomainliteral(self):
0380         """Parse an RFC 2822 domain-literal."""
0381         return '[%s]' % self.getdelimited('[', ']\r', False)
0382 
0383     def getatom(self, atomends=None):
0384         """Parse an RFC 2822 atom.
0385 
0386         Optional atomends specifies a different set of end token delimiters
0387         (the default is to use self.atomends).  This is used e.g. in
0388         getphraselist() since phrase endings must not include the `.' (which
0389         is legal in phrases)."""
0390         atomlist = ['']
0391         if atomends is None:
0392             atomends = self.atomends
0393 
0394         while self.pos < len(self.field):
0395             if self.field[self.pos] in atomends:
0396                 break
0397             else:
0398                 atomlist.append(self.field[self.pos])
0399             self.pos += 1
0400 
0401         return EMPTYSTRING.join(atomlist)
0402 
0403     def getphraselist(self):
0404         """Parse a sequence of RFC 2822 phrases.
0405 
0406         A phrase is a sequence of words, which are in turn either RFC 2822
0407         atoms or quoted-strings.  Phrases are canonicalized by squeezing all
0408         runs of continuous whitespace into one space.
0409         """
0410         plist = []
0411 
0412         while self.pos < len(self.field):
0413             if self.field[self.pos] in self.LWS:
0414                 self.pos += 1
0415             elif self.field[self.pos] == '"':
0416                 plist.append(self.getquote())
0417             elif self.field[self.pos] == '(':
0418                 self.commentlist.append(self.getcomment())
0419             elif self.field[self.pos] in self.phraseends:
0420                 break
0421             else:
0422                 plist.append(self.getatom(self.phraseends))
0423 
0424         return plist
0425 
0426 class AddressList(AddrlistClass):
0427     """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
0428     def __init__(self, field):
0429         AddrlistClass.__init__(self, field)
0430         if field:
0431             self.addresslist = self.getaddrlist()
0432         else:
0433             self.addresslist = []
0434 
0435     def __len__(self):
0436         return len(self.addresslist)
0437 
0438     def __add__(self, other):
0439         # Set union
0440         newaddr = AddressList(None)
0441         newaddr.addresslist = self.addresslist[:]
0442         for x in other.addresslist:
0443             if not x in self.addresslist:
0444                 newaddr.addresslist.append(x)
0445         return newaddr
0446 
0447     def __iadd__(self, other):
0448         # Set union, in-place
0449         for x in other.addresslist:
0450             if not x in self.addresslist:
0451                 self.addresslist.append(x)
0452         return self
0453 
0454     def __sub__(self, other):
0455         # Set difference
0456         newaddr = AddressList(None)
0457         for x in self.addresslist:
0458             if not x in other.addresslist:
0459                 newaddr.addresslist.append(x)
0460         return newaddr
0461 
0462     def __isub__(self, other):
0463         # Set difference, in-place
0464         for x in other.addresslist:
0465             if x in self.addresslist:
0466                 self.addresslist.remove(x)
0467         return self
0468 
0469     def __getitem__(self, index):
0470         # Make indexing, slices, and 'in' work
0471         return self.addresslist[index]
0472
Generated by PyXR 0.9.4