0001 # Copyright (C) 2002-2004 Python Software Foundation 0002 # Contact: email-sig@python.org 0003 0004 """Email address parsing code. 0005 0006 Lifted directly from rfc822.py. This should eventually be rewritten. 0007 """ 0008 0009 import time 0010 0011 SPACE = ' ' 0012 EMPTYSTRING = '' 0013 COMMASPACE = ', ' 0014 0015 # Parse a date field 0016 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 0017 'aug', 'sep', 'oct', 'nov', 'dec', 0018 'january', 'february', 'march', 'april', 'may', 'june', 'july', 0019 'august', 'september', 'october', 'november', 'december'] 0020 0021 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] 0022 0023 # The timezone table does not include the military time zones defined 0024 # in RFC822, other than Z. According to RFC1123, the description in 0025 # RFC822 gets the signs wrong, so we can't rely on any such time 0026 # zones. RFC1123 recommends that numeric timezone indicators be used 0027 # instead of timezone names. 0028 0029 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, 0030 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) 0031 'EST': -500, 'EDT': -400, # Eastern 0032 'CST': -600, 'CDT': -500, # Central 0033 'MST': -700, 'MDT': -600, # Mountain 0034 'PST': -800, 'PDT': -700 # Pacific 0035 } 0036 0037 0038 def parsedate_tz(data): 0039 """Convert a date string to a time tuple. 0040 0041 Accounts for military timezones. 0042 """ 0043 data = data.split() 0044 # The FWS after the comma after the day-of-week is optional, so search and 0045 # adjust for this. 0046 if data[0].endswith(',') or data[0].lower() in _daynames: 0047 # There's a dayname here. Skip it 0048 del data[0] 0049 else: 0050 i = data[0].rfind(',') 0051 if i >= 0: 0052 data[0] = data[0][i+1:] 0053 if len(data) == 3: # RFC 850 date, deprecated 0054 stuff = data[0].split('-') 0055 if len(stuff) == 3: 0056 data = stuff + data[1:] 0057 if len(data) == 4: 0058 s = data[3] 0059 i = s.find('+') 0060 if i > 0: 0061 data[3:] = [s[:i], s[i+1:]] 0062 else: 0063 data.append('') # Dummy tz 0064 if len(data) < 5: 0065 return None 0066 data = data[:5] 0067 [dd, mm, yy, tm, tz] = data 0068 mm = mm.lower() 0069 if mm not in _monthnames: 0070 dd, mm = mm, dd.lower() 0071 if mm not in _monthnames: 0072 return None 0073 mm = _monthnames.index(mm) + 1 0074 if mm > 12: 0075 mm -= 12 0076 if dd[-1] == ',': 0077 dd = dd[:-1] 0078 i = yy.find(':') 0079 if i > 0: 0080 yy, tm = tm, yy 0081 if yy[-1] == ',': 0082 yy = yy[:-1] 0083 if not yy[0].isdigit(): 0084 yy, tz = tz, yy 0085 if tm[-1] == ',': 0086 tm = tm[:-1] 0087 tm = tm.split(':') 0088 if len(tm) == 2: 0089 [thh, tmm] = tm 0090 tss = '0' 0091 elif len(tm) == 3: 0092 [thh, tmm, tss] = tm 0093 else: 0094 return None 0095 try: 0096 yy = int(yy) 0097 dd = int(dd) 0098 thh = int(thh) 0099 tmm = int(tmm) 0100 tss = int(tss) 0101 except ValueError: 0102 return None 0103 tzoffset = None 0104 tz = tz.upper() 0105 if _timezones.has_key(tz): 0106 tzoffset = _timezones[tz] 0107 else: 0108 try: 0109 tzoffset = int(tz) 0110 except ValueError: 0111 pass 0112 # Convert a timezone offset into seconds ; -0500 -> -18000 0113 if tzoffset: 0114 if tzoffset < 0: 0115 tzsign = -1 0116 tzoffset = -tzoffset 0117 else: 0118 tzsign = 1 0119 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) 0120 tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset) 0121 return tuple 0122 0123 0124 def parsedate(data): 0125 """Convert a time string to a time tuple.""" 0126 t = parsedate_tz(data) 0127 if isinstance(t, tuple): 0128 return t[:9] 0129 else: 0130 return t 0131 0132 0133 def mktime_tz(data): 0134 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" 0135 if data[9] is None: 0136 # No zone info, so localtime is better assumption than GMT 0137 return time.mktime(data[:8] + (-1,)) 0138 else: 0139 t = time.mktime(data[:8] + (0,)) 0140 return t - data[9] - time.timezone 0141 0142 0143 def quote(str): 0144 """Add quotes around a string.""" 0145 return str.replace('\\', '\\\\').replace('"', '\\"') 0146 0147 0148 class AddrlistClass: 0149 """Address parser class by Ben Escoto. 0150 0151 To understand what this class does, it helps to have a copy of RFC 2822 in 0152 front of you. 0153 0154 Note: this class interface is deprecated and may be removed in the future. 0155 Use rfc822.AddressList instead. 0156 """ 0157 0158 def __init__(self, field): 0159 """Initialize a new instance. 0160 0161 `field' is an unparsed address header field, containing 0162 one or more addresses. 0163 """ 0164 self.specials = '()<>@,:;.\"[]' 0165 self.pos = 0 0166 self.LWS = ' \t' 0167 self.CR = '\r\n' 0168 self.atomends = self.specials + self.LWS + self.CR 0169 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it 0170 # is obsolete syntax. RFC 2822 requires that we recognize obsolete 0171 # syntax, so allow dots in phrases. 0172 self.phraseends = self.atomends.replace('.', '') 0173 self.field = field 0174 self.commentlist = [] 0175 0176 def gotonext(self): 0177 """Parse up to the start of the next address.""" 0178 while self.pos < len(self.field): 0179 if self.field[self.pos] in self.LWS + '\n\r': 0180 self.pos += 1 0181 elif self.field[self.pos] == '(': 0182 self.commentlist.append(self.getcomment()) 0183 else: 0184 break 0185 0186 def getaddrlist(self): 0187 """Parse all addresses. 0188 0189 Returns a list containing all of the addresses. 0190 """ 0191 result = [] 0192 while self.pos < len(self.field): 0193 ad = self.getaddress() 0194 if ad: 0195 result += ad 0196 else: 0197 result.append(('', '')) 0198 return result 0199 0200 def getaddress(self): 0201 """Parse the next address.""" 0202 self.commentlist = [] 0203 self.gotonext() 0204 0205 oldpos = self.pos 0206 oldcl = self.commentlist 0207 plist = self.getphraselist() 0208 0209 self.gotonext() 0210 returnlist = [] 0211 0212 if self.pos >= len(self.field): 0213 # Bad email address technically, no domain. 0214 if plist: 0215 returnlist = [(SPACE.join(self.commentlist), plist[0])] 0216 0217 elif self.field[self.pos] in '.@': 0218 # email address is just an addrspec 0219 # this isn't very efficient since we start over 0220 self.pos = oldpos 0221 self.commentlist = oldcl 0222 addrspec = self.getaddrspec() 0223 returnlist = [(SPACE.join(self.commentlist), addrspec)] 0224 0225 elif self.field[self.pos] == ':': 0226 # address is a group 0227 returnlist = [] 0228 0229 fieldlen = len(self.field) 0230 self.pos += 1 0231 while self.pos < len(self.field): 0232 self.gotonext() 0233 if self.pos < fieldlen and self.field[self.pos] == ';': 0234 self.pos += 1 0235 break 0236 returnlist = returnlist + self.getaddress() 0237 0238 elif self.field[self.pos] == '<': 0239 # Address is a phrase then a route addr 0240 routeaddr = self.getrouteaddr() 0241 0242 if self.commentlist: 0243 returnlist = [(SPACE.join(plist) + ' (' + 0244 ' '.join(self.commentlist) + ')', routeaddr)] 0245 else: 0246 returnlist = [(SPACE.join(plist), routeaddr)] 0247 0248 else: 0249 if plist: 0250 returnlist = [(SPACE.join(self.commentlist), plist[0])] 0251 elif self.field[self.pos] in self.specials: 0252 self.pos += 1 0253 0254 self.gotonext() 0255 if self.pos < len(self.field) and self.field[self.pos] == ',': 0256 self.pos += 1 0257 return returnlist 0258 0259 def getrouteaddr(self): 0260 """Parse a route address (Return-path value). 0261 0262 This method just skips all the route stuff and returns the addrspec. 0263 """ 0264 if self.field[self.pos] != '<': 0265 return 0266 0267 expectroute = False 0268 self.pos += 1 0269 self.gotonext() 0270 adlist = '' 0271 while self.pos < len(self.field): 0272 if expectroute: 0273 self.getdomain() 0274 expectroute = False 0275 elif self.field[self.pos] == '>': 0276 self.pos += 1 0277 break 0278 elif self.field[self.pos] == '@': 0279 self.pos += 1 0280 expectroute = True 0281 elif self.field[self.pos] == ':': 0282 self.pos += 1 0283 else: 0284 adlist = self.getaddrspec() 0285 self.pos += 1 0286 break 0287 self.gotonext() 0288 0289 return adlist 0290 0291 def getaddrspec(self): 0292 """Parse an RFC 2822 addr-spec.""" 0293 aslist = [] 0294 0295 self.gotonext() 0296 while self.pos < len(self.field): 0297 if self.field[self.pos] == '.': 0298 aslist.append('.') 0299 self.pos += 1 0300 elif self.field[self.pos] == '"': 0301 aslist.append('"%s"' % self.getquote()) 0302 elif self.field[self.pos] in self.atomends: 0303 break 0304 else: 0305 aslist.append(self.getatom()) 0306 self.gotonext() 0307 0308 if self.pos >= len(self.field) or self.field[self.pos] != '@': 0309 return EMPTYSTRING.join(aslist) 0310 0311 aslist.append('@') 0312 self.pos += 1 0313 self.gotonext() 0314 return EMPTYSTRING.join(aslist) + self.getdomain() 0315 0316 def getdomain(self): 0317 """Get the complete domain name from an address.""" 0318 sdlist = [] 0319 while self.pos < len(self.field): 0320 if self.field[self.pos] in self.LWS: 0321 self.pos += 1 0322 elif self.field[self.pos] == '(': 0323 self.commentlist.append(self.getcomment()) 0324 elif self.field[self.pos] == '[': 0325 sdlist.append(self.getdomainliteral()) 0326 elif self.field[self.pos] == '.': 0327 self.pos += 1 0328 sdlist.append('.') 0329 elif self.field[self.pos] in self.atomends: 0330 break 0331 else: 0332 sdlist.append(self.getatom()) 0333 return EMPTYSTRING.join(sdlist) 0334 0335 def getdelimited(self, beginchar, endchars, allowcomments=True): 0336 """Parse a header fragment delimited by special characters. 0337 0338 `beginchar' is the start character for the fragment. 0339 If self is not looking at an instance of `beginchar' then 0340 getdelimited returns the empty string. 0341 0342 `endchars' is a sequence of allowable end-delimiting characters. 0343 Parsing stops when one of these is encountered. 0344 0345 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed 0346 within the parsed fragment. 0347 """ 0348 if self.field[self.pos] != beginchar: 0349 return '' 0350 0351 slist = [''] 0352 quote = False 0353 self.pos += 1 0354 while self.pos < len(self.field): 0355 if quote: 0356 slist.append(self.field[self.pos]) 0357 quote = False 0358 elif self.field[self.pos] in endchars: 0359 self.pos += 1 0360 break 0361 elif allowcomments and self.field[self.pos] == '(': 0362 slist.append(self.getcomment()) 0363 elif self.field[self.pos] == '\\': 0364 quote = True 0365 else: 0366 slist.append(self.field[self.pos]) 0367 self.pos += 1 0368 0369 return EMPTYSTRING.join(slist) 0370 0371 def getquote(self): 0372 """Get a quote-delimited fragment from self's field.""" 0373 return self.getdelimited('"', '"\r', False) 0374 0375 def getcomment(self): 0376 """Get a parenthesis-delimited fragment from self's field.""" 0377 return self.getdelimited('(', ')\r', True) 0378 0379 def getdomainliteral(self): 0380 """Parse an RFC 2822 domain-literal.""" 0381 return '[%s]' % self.getdelimited('[', ']\r', False) 0382 0383 def getatom(self, atomends=None): 0384 """Parse an RFC 2822 atom. 0385 0386 Optional atomends specifies a different set of end token delimiters 0387 (the default is to use self.atomends). This is used e.g. in 0388 getphraselist() since phrase endings must not include the `.' (which 0389 is legal in phrases).""" 0390 atomlist = [''] 0391 if atomends is None: 0392 atomends = self.atomends 0393 0394 while self.pos < len(self.field): 0395 if self.field[self.pos] in atomends: 0396 break 0397 else: 0398 atomlist.append(self.field[self.pos]) 0399 self.pos += 1 0400 0401 return EMPTYSTRING.join(atomlist) 0402 0403 def getphraselist(self): 0404 """Parse a sequence of RFC 2822 phrases. 0405 0406 A phrase is a sequence of words, which are in turn either RFC 2822 0407 atoms or quoted-strings. Phrases are canonicalized by squeezing all 0408 runs of continuous whitespace into one space. 0409 """ 0410 plist = [] 0411 0412 while self.pos < len(self.field): 0413 if self.field[self.pos] in self.LWS: 0414 self.pos += 1 0415 elif self.field[self.pos] == '"': 0416 plist.append(self.getquote()) 0417 elif self.field[self.pos] == '(': 0418 self.commentlist.append(self.getcomment()) 0419 elif self.field[self.pos] in self.phraseends: 0420 break 0421 else: 0422 plist.append(self.getatom(self.phraseends)) 0423 0424 return plist 0425 0426 class AddressList(AddrlistClass): 0427 """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" 0428 def __init__(self, field): 0429 AddrlistClass.__init__(self, field) 0430 if field: 0431 self.addresslist = self.getaddrlist() 0432 else: 0433 self.addresslist = [] 0434 0435 def __len__(self): 0436 return len(self.addresslist) 0437 0438 def __add__(self, other): 0439 # Set union 0440 newaddr = AddressList(None) 0441 newaddr.addresslist = self.addresslist[:] 0442 for x in other.addresslist: 0443 if not x in self.addresslist: 0444 newaddr.addresslist.append(x) 0445 return newaddr 0446 0447 def __iadd__(self, other): 0448 # Set union, in-place 0449 for x in other.addresslist: 0450 if not x in self.addresslist: 0451 self.addresslist.append(x) 0452 return self 0453 0454 def __sub__(self, other): 0455 # Set difference 0456 newaddr = AddressList(None) 0457 for x in self.addresslist: 0458 if not x in other.addresslist: 0459 newaddr.addresslist.append(x) 0460 return newaddr 0461 0462 def __isub__(self, other): 0463 # Set difference, in-place 0464 for x in other.addresslist: 0465 if x in self.addresslist: 0466 self.addresslist.remove(x) 0467 return self 0468 0469 def __getitem__(self, index): 0470 # Make indexing, slices, and 'in' work 0471 return self.addresslist[index] 0472
Generated by PyXR 0.9.4