0001 """RFC 2822 message manipulation. 0002 0003 Note: This is only a very rough sketch of a full RFC-822 parser; in particular 0004 the tokenizing of addresses does not adhere to all the quoting rules. 0005 0006 Note: RFC 2822 is a long awaited update to RFC 822. This module should 0007 conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some 0008 effort at RFC 2822 updates have been made, but a thorough audit has not been 0009 performed. Consider any RFC 2822 non-conformance to be a bug. 0010 0011 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html 0012 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete) 0013 0014 Directions for use: 0015 0016 To create a Message object: first open a file, e.g.: 0017 0018 fp = open(file, 'r') 0019 0020 You can use any other legal way of getting an open file object, e.g. use 0021 sys.stdin or call os.popen(). Then pass the open file object to the Message() 0022 constructor: 0023 0024 m = Message(fp) 0025 0026 This class can work with any input object that supports a readline method. If 0027 the input object has seek and tell capability, the rewindbody method will 0028 work; also illegal lines will be pushed back onto the input stream. If the 0029 input object lacks seek but has an `unread' method that can push back a line 0030 of input, Message will use that to push back illegal lines. Thus this class 0031 can be used to parse messages coming from a buffered stream. 0032 0033 The optional `seekable' argument is provided as a workaround for certain stdio 0034 libraries in which tell() discards buffered data before discovering that the 0035 lseek() system call doesn't work. For maximum portability, you should set the 0036 seekable argument to zero to prevent that initial \code{tell} when passing in 0037 an unseekable object such as a a file object created from a socket object. If 0038 it is 1 on entry -- which it is by default -- the tell() method of the open 0039 file object is called once; if this raises an exception, seekable is reset to 0040 0. For other nonzero values of seekable, this test is not made. 0041 0042 To get the text of a particular header there are several methods: 0043 0044 str = m.getheader(name) 0045 str = m.getrawheader(name) 0046 0047 where name is the name of the header, e.g. 'Subject'. The difference is that 0048 getheader() strips the leading and trailing whitespace, while getrawheader() 0049 doesn't. Both functions retain embedded whitespace (including newlines) 0050 exactly as they are specified in the header, and leave the case of the text 0051 unchanged. 0052 0053 For addresses and address lists there are functions 0054 0055 realname, mailaddress = m.getaddr(name) 0056 list = m.getaddrlist(name) 0057 0058 where the latter returns a list of (realname, mailaddr) tuples. 0059 0060 There is also a method 0061 0062 time = m.getdate(name) 0063 0064 which parses a Date-like field and returns a time-compatible tuple, 0065 i.e. a tuple such as returned by time.localtime() or accepted by 0066 time.mktime(). 0067 0068 See the class definition for lower level access methods. 0069 0070 There are also some utility functions here. 0071 """ 0072 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com> 0073 0074 import time 0075 0076 __all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"] 0077 0078 _blanklines = ('\r\n', '\n') # Optimization for islast() 0079 0080 0081 class Message: 0082 """Represents a single RFC 2822-compliant message.""" 0083 0084 def __init__(self, fp, seekable = 1): 0085 """Initialize the class instance and read the headers.""" 0086 if seekable == 1: 0087 # Exercise tell() to make sure it works 0088 # (and then assume seek() works, too) 0089 try: 0090 fp.tell() 0091 except (AttributeError, IOError): 0092 seekable = 0 0093 else: 0094 seekable = 1 0095 self.fp = fp 0096 self.seekable = seekable 0097 self.startofheaders = None 0098 self.startofbody = None 0099 # 0100 if self.seekable: 0101 try: 0102 self.startofheaders = self.fp.tell() 0103 except IOError: 0104 self.seekable = 0 0105 # 0106 self.readheaders() 0107 # 0108 if self.seekable: 0109 try: 0110 self.startofbody = self.fp.tell() 0111 except IOError: 0112 self.seekable = 0 0113 0114 def rewindbody(self): 0115 """Rewind the file to the start of the body (if seekable).""" 0116 if not self.seekable: 0117 raise IOError, "unseekable file" 0118 self.fp.seek(self.startofbody) 0119 0120 def readheaders(self): 0121 """Read header lines. 0122 0123 Read header lines up to the entirely blank line that terminates them. 0124 The (normally blank) line that ends the headers is skipped, but not 0125 included in the returned list. If a non-header line ends the headers, 0126 (which is an error), an attempt is made to backspace over it; it is 0127 never included in the returned list. 0128 0129 The variable self.status is set to the empty string if all went well, 0130 otherwise it is an error message. The variable self.headers is a 0131 completely uninterpreted list of lines contained in the header (so 0132 printing them will reproduce the header exactly as it appears in the 0133 file). 0134 """ 0135 self.dict = {} 0136 self.unixfrom = '' 0137 self.headers = list = [] 0138 self.status = '' 0139 headerseen = "" 0140 firstline = 1 0141 startofline = unread = tell = None 0142 if hasattr(self.fp, 'unread'): 0143 unread = self.fp.unread 0144 elif self.seekable: 0145 tell = self.fp.tell 0146 while 1: 0147 if tell: 0148 try: 0149 startofline = tell() 0150 except IOError: 0151 startofline = tell = None 0152 self.seekable = 0 0153 line = self.fp.readline() 0154 if not line: 0155 self.status = 'EOF in headers' 0156 break 0157 # Skip unix From name time lines 0158 if firstline and line.startswith('From '): 0159 self.unixfrom = self.unixfrom + line 0160 continue 0161 firstline = 0 0162 if headerseen and line[0] in ' \t': 0163 # It's a continuation line. 0164 list.append(line) 0165 x = (self.dict[headerseen] + "\n " + line.strip()) 0166 self.dict[headerseen] = x.strip() 0167 continue 0168 elif self.iscomment(line): 0169 # It's a comment. Ignore it. 0170 continue 0171 elif self.islast(line): 0172 # Note! No pushback here! The delimiter line gets eaten. 0173 break 0174 headerseen = self.isheader(line) 0175 if headerseen: 0176 # It's a legal header line, save it. 0177 list.append(line) 0178 self.dict[headerseen] = line[len(headerseen)+1:].strip() 0179 continue 0180 else: 0181 # It's not a header line; throw it back and stop here. 0182 if not self.dict: 0183 self.status = 'No headers' 0184 else: 0185 self.status = 'Non-header line where header expected' 0186 # Try to undo the read. 0187 if unread: 0188 unread(line) 0189 elif tell: 0190 self.fp.seek(startofline) 0191 else: 0192 self.status = self.status + '; bad seek' 0193 break 0194 0195 def isheader(self, line): 0196 """Determine whether a given line is a legal header. 0197 0198 This method should return the header name, suitably canonicalized. 0199 You may override this method in order to use Message parsing on tagged 0200 data in RFC 2822-like formats with special header formats. 0201 """ 0202 i = line.find(':') 0203 if i > 0: 0204 return line[:i].lower() 0205 else: 0206 return None 0207 0208 def islast(self, line): 0209 """Determine whether a line is a legal end of RFC 2822 headers. 0210 0211 You may override this method if your application wants to bend the 0212 rules, e.g. to strip trailing whitespace, or to recognize MH template 0213 separators ('--------'). For convenience (e.g. for code reading from 0214 sockets) a line consisting of \r\n also matches. 0215 """ 0216 return line in _blanklines 0217 0218 def iscomment(self, line): 0219 """Determine whether a line should be skipped entirely. 0220 0221 You may override this method in order to use Message parsing on tagged 0222 data in RFC 2822-like formats that support embedded comments or 0223 free-text data. 0224 """ 0225 return False 0226 0227 def getallmatchingheaders(self, name): 0228 """Find all header lines matching a given header name. 0229 0230 Look through the list of headers and find all lines matching a given 0231 header name (and their continuation lines). A list of the lines is 0232 returned, without interpretation. If the header does not occur, an 0233 empty list is returned. If the header occurs multiple times, all 0234 occurrences are returned. Case is not important in the header name. 0235 """ 0236 name = name.lower() + ':' 0237 n = len(name) 0238 list = [] 0239 hit = 0 0240 for line in self.headers: 0241 if line[:n].lower() == name: 0242 hit = 1 0243 elif not line[:1].isspace(): 0244 hit = 0 0245 if hit: 0246 list.append(line) 0247 return list 0248 0249 def getfirstmatchingheader(self, name): 0250 """Get the first header line matching name. 0251 0252 This is similar to getallmatchingheaders, but it returns only the 0253 first matching header (and its continuation lines). 0254 """ 0255 name = name.lower() + ':' 0256 n = len(name) 0257 list = [] 0258 hit = 0 0259 for line in self.headers: 0260 if hit: 0261 if not line[:1].isspace(): 0262 break 0263 elif line[:n].lower() == name: 0264 hit = 1 0265 if hit: 0266 list.append(line) 0267 return list 0268 0269 def getrawheader(self, name): 0270 """A higher-level interface to getfirstmatchingheader(). 0271 0272 Return a string containing the literal text of the header but with the 0273 keyword stripped. All leading, trailing and embedded whitespace is 0274 kept in the string, however. Return None if the header does not 0275 occur. 0276 """ 0277 0278 list = self.getfirstmatchingheader(name) 0279 if not list: 0280 return None 0281 list[0] = list[0][len(name) + 1:] 0282 return ''.join(list) 0283 0284 def getheader(self, name, default=None): 0285 """Get the header value for a name. 0286 0287 This is the normal interface: it returns a stripped version of the 0288 header value for a given header name, or None if it doesn't exist. 0289 This uses the dictionary version which finds the *last* such header. 0290 """ 0291 try: 0292 return self.dict[name.lower()] 0293 except KeyError: 0294 return default 0295 get = getheader 0296 0297 def getheaders(self, name): 0298 """Get all values for a header. 0299 0300 This returns a list of values for headers given more than once; each 0301 value in the result list is stripped in the same way as the result of 0302 getheader(). If the header is not given, return an empty list. 0303 """ 0304 result = [] 0305 current = '' 0306 have_header = 0 0307 for s in self.getallmatchingheaders(name): 0308 if s[0].isspace(): 0309 if current: 0310 current = "%s\n %s" % (current, s.strip()) 0311 else: 0312 current = s.strip() 0313 else: 0314 if have_header: 0315 result.append(current) 0316 current = s[s.find(":") + 1:].strip() 0317 have_header = 1 0318 if have_header: 0319 result.append(current) 0320 return result 0321 0322 def getaddr(self, name): 0323 """Get a single address from a header, as a tuple. 0324 0325 An example return value: 0326 ('Guido van Rossum', 'guido@cwi.nl') 0327 """ 0328 # New, by Ben Escoto 0329 alist = self.getaddrlist(name) 0330 if alist: 0331 return alist[0] 0332 else: 0333 return (None, None) 0334 0335 def getaddrlist(self, name): 0336 """Get a list of addresses from a header. 0337 0338 Retrieves a list of addresses from a header, where each address is a 0339 tuple as returned by getaddr(). Scans all named headers, so it works 0340 properly with multiple To: or Cc: headers for example. 0341 """ 0342 raw = [] 0343 for h in self.getallmatchingheaders(name): 0344 if h[0] in ' \t': 0345 raw.append(h) 0346 else: 0347 if raw: 0348 raw.append(', ') 0349 i = h.find(':') 0350 if i > 0: 0351 addr = h[i+1:] 0352 raw.append(addr) 0353 alladdrs = ''.join(raw) 0354 a = AddressList(alladdrs) 0355 return a.addresslist 0356 0357 def getdate(self, name): 0358 """Retrieve a date field from a header. 0359 0360 Retrieves a date field from the named header, returning a tuple 0361 compatible with time.mktime(). 0362 """ 0363 try: 0364 data = self[name] 0365 except KeyError: 0366 return None 0367 return parsedate(data) 0368 0369 def getdate_tz(self, name): 0370 """Retrieve a date field from a header as a 10-tuple. 0371 0372 The first 9 elements make up a tuple compatible with time.mktime(), 0373 and the 10th is the offset of the poster's time zone from GMT/UTC. 0374 """ 0375 try: 0376 data = self[name] 0377 except KeyError: 0378 return None 0379 return parsedate_tz(data) 0380 0381 0382 # Access as a dictionary (only finds *last* header of each type): 0383 0384 def __len__(self): 0385 """Get the number of headers in a message.""" 0386 return len(self.dict) 0387 0388 def __getitem__(self, name): 0389 """Get a specific header, as from a dictionary.""" 0390 return self.dict[name.lower()] 0391 0392 def __setitem__(self, name, value): 0393 """Set the value of a header. 0394 0395 Note: This is not a perfect inversion of __getitem__, because any 0396 changed headers get stuck at the end of the raw-headers list rather 0397 than where the altered header was. 0398 """ 0399 del self[name] # Won't fail if it doesn't exist 0400 self.dict[name.lower()] = value 0401 text = name + ": " + value 0402 lines = text.split("\n") 0403 for line in lines: 0404 self.headers.append(line + "\n") 0405 0406 def __delitem__(self, name): 0407 """Delete all occurrences of a specific header, if it is present.""" 0408 name = name.lower() 0409 if not name in self.dict: 0410 return 0411 del self.dict[name] 0412 name = name + ':' 0413 n = len(name) 0414 list = [] 0415 hit = 0 0416 for i in range(len(self.headers)): 0417 line = self.headers[i] 0418 if line[:n].lower() == name: 0419 hit = 1 0420 elif not line[:1].isspace(): 0421 hit = 0 0422 if hit: 0423 list.append(i) 0424 for i in reversed(list): 0425 del self.headers[i] 0426 0427 def setdefault(self, name, default=""): 0428 lowername = name.lower() 0429 if lowername in self.dict: 0430 return self.dict[lowername] 0431 else: 0432 text = name + ": " + default 0433 lines = text.split("\n") 0434 for line in lines: 0435 self.headers.append(line + "\n") 0436 self.dict[lowername] = default 0437 return default 0438 0439 def has_key(self, name): 0440 """Determine whether a message contains the named header.""" 0441 return name.lower() in self.dict 0442 0443 def __contains__(self, name): 0444 """Determine whether a message contains the named header.""" 0445 return name.lower() in self.dict 0446 0447 def __iter__(self): 0448 return iter(self.dict) 0449 0450 def keys(self): 0451 """Get all of a message's header field names.""" 0452 return self.dict.keys() 0453 0454 def values(self): 0455 """Get all of a message's header field values.""" 0456 return self.dict.values() 0457 0458 def items(self): 0459 """Get all of a message's headers. 0460 0461 Returns a list of name, value tuples. 0462 """ 0463 return self.dict.items() 0464 0465 def __str__(self): 0466 return ''.join(self.headers) 0467 0468 0469 # Utility functions 0470 # ----------------- 0471 0472 # XXX Should fix unquote() and quote() to be really conformant. 0473 # XXX The inverses of the parse functions may also be useful. 0474 0475 0476 def unquote(str): 0477 """Remove quotes from a string.""" 0478 if len(str) > 1: 0479 if str.startswith('"') and str.endswith('"'): 0480 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') 0481 if str.startswith('<') and str.endswith('>'): 0482 return str[1:-1] 0483 return str 0484 0485 0486 def quote(str): 0487 """Add quotes around a string.""" 0488 return str.replace('\\', '\\\\').replace('"', '\\"') 0489 0490 0491 def parseaddr(address): 0492 """Parse an address into a (realname, mailaddr) tuple.""" 0493 a = AddressList(address) 0494 list = a.addresslist 0495 if not list: 0496 return (None, None) 0497 else: 0498 return list[0] 0499 0500 0501 class AddrlistClass: 0502 """Address parser class by Ben Escoto. 0503 0504 To understand what this class does, it helps to have a copy of 0505 RFC 2822 in front of you. 0506 0507 http://www.faqs.org/rfcs/rfc2822.html 0508 0509 Note: this class interface is deprecated and may be removed in the future. 0510 Use rfc822.AddressList instead. 0511 """ 0512 0513 def __init__(self, field): 0514 """Initialize a new instance. 0515 0516 `field' is an unparsed address header field, containing one or more 0517 addresses. 0518 """ 0519 self.specials = '()<>@,:;.\"[]' 0520 self.pos = 0 0521 self.LWS = ' \t' 0522 self.CR = '\r\n' 0523 self.atomends = self.specials + self.LWS + self.CR 0524 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it 0525 # is obsolete syntax. RFC 2822 requires that we recognize obsolete 0526 # syntax, so allow dots in phrases. 0527 self.phraseends = self.atomends.replace('.', '') 0528 self.field = field 0529 self.commentlist = [] 0530 0531 def gotonext(self): 0532 """Parse up to the start of the next address.""" 0533 while self.pos < len(self.field): 0534 if self.field[self.pos] in self.LWS + '\n\r': 0535 self.pos = self.pos + 1 0536 elif self.field[self.pos] == '(': 0537 self.commentlist.append(self.getcomment()) 0538 else: break 0539 0540 def getaddrlist(self): 0541 """Parse all addresses. 0542 0543 Returns a list containing all of the addresses. 0544 """ 0545 result = [] 0546 while 1: 0547 ad = self.getaddress() 0548 if ad: 0549 result += ad 0550 else: 0551 break 0552 return result 0553 0554 def getaddress(self): 0555 """Parse the next address.""" 0556 self.commentlist = [] 0557 self.gotonext() 0558 0559 oldpos = self.pos 0560 oldcl = self.commentlist 0561 plist = self.getphraselist() 0562 0563 self.gotonext() 0564 returnlist = [] 0565 0566 if self.pos >= len(self.field): 0567 # Bad email address technically, no domain. 0568 if plist: 0569 returnlist = [(' '.join(self.commentlist), plist[0])] 0570 0571 elif self.field[self.pos] in '.@': 0572 # email address is just an addrspec 0573 # this isn't very efficient since we start over 0574 self.pos = oldpos 0575 self.commentlist = oldcl 0576 addrspec = self.getaddrspec() 0577 returnlist = [(' '.join(self.commentlist), addrspec)] 0578 0579 elif self.field[self.pos] == ':': 0580 # address is a group 0581 returnlist = [] 0582 0583 fieldlen = len(self.field) 0584 self.pos = self.pos + 1 0585 while self.pos < len(self.field): 0586 self.gotonext() 0587 if self.pos < fieldlen and self.field[self.pos] == ';': 0588 self.pos = self.pos + 1 0589 break 0590 returnlist = returnlist + self.getaddress() 0591 0592 elif self.field[self.pos] == '<': 0593 # Address is a phrase then a route addr 0594 routeaddr = self.getrouteaddr() 0595 0596 if self.commentlist: 0597 returnlist = [(' '.join(plist) + ' (' + \ 0598 ' '.join(self.commentlist) + ')', routeaddr)] 0599 else: returnlist = [(' '.join(plist), routeaddr)] 0600 0601 else: 0602 if plist: 0603 returnlist = [(' '.join(self.commentlist), plist[0])] 0604 elif self.field[self.pos] in self.specials: 0605 self.pos = self.pos + 1 0606 0607 self.gotonext() 0608 if self.pos < len(self.field) and self.field[self.pos] == ',': 0609 self.pos = self.pos + 1 0610 return returnlist 0611 0612 def getrouteaddr(self): 0613 """Parse a route address (Return-path value). 0614 0615 This method just skips all the route stuff and returns the addrspec. 0616 """ 0617 if self.field[self.pos] != '<': 0618 return 0619 0620 expectroute = 0 0621 self.pos = self.pos + 1 0622 self.gotonext() 0623 adlist = "" 0624 while self.pos < len(self.field): 0625 if expectroute: 0626 self.getdomain() 0627 expectroute = 0 0628 elif self.field[self.pos] == '>': 0629 self.pos = self.pos + 1 0630 break 0631 elif self.field[self.pos] == '@': 0632 self.pos = self.pos + 1 0633 expectroute = 1 0634 elif self.field[self.pos] == ':': 0635 self.pos = self.pos + 1 0636 else: 0637 adlist = self.getaddrspec() 0638 self.pos = self.pos + 1 0639 break 0640 self.gotonext() 0641 0642 return adlist 0643 0644 def getaddrspec(self): 0645 """Parse an RFC 2822 addr-spec.""" 0646 aslist = [] 0647 0648 self.gotonext() 0649 while self.pos < len(self.field): 0650 if self.field[self.pos] == '.': 0651 aslist.append('.') 0652 self.pos = self.pos + 1 0653 elif self.field[self.pos] == '"': 0654 aslist.append('"%s"' % self.getquote()) 0655 elif self.field[self.pos] in self.atomends: 0656 break 0657 else: aslist.append(self.getatom()) 0658 self.gotonext() 0659 0660 if self.pos >= len(self.field) or self.field[self.pos] != '@': 0661 return ''.join(aslist) 0662 0663 aslist.append('@') 0664 self.pos = self.pos + 1 0665 self.gotonext() 0666 return ''.join(aslist) + self.getdomain() 0667 0668 def getdomain(self): 0669 """Get the complete domain name from an address.""" 0670 sdlist = [] 0671 while self.pos < len(self.field): 0672 if self.field[self.pos] in self.LWS: 0673 self.pos = self.pos + 1 0674 elif self.field[self.pos] == '(': 0675 self.commentlist.append(self.getcomment()) 0676 elif self.field[self.pos] == '[': 0677 sdlist.append(self.getdomainliteral()) 0678 elif self.field[self.pos] == '.': 0679 self.pos = self.pos + 1 0680 sdlist.append('.') 0681 elif self.field[self.pos] in self.atomends: 0682 break 0683 else: sdlist.append(self.getatom()) 0684 return ''.join(sdlist) 0685 0686 def getdelimited(self, beginchar, endchars, allowcomments = 1): 0687 """Parse a header fragment delimited by special characters. 0688 0689 `beginchar' is the start character for the fragment. If self is not 0690 looking at an instance of `beginchar' then getdelimited returns the 0691 empty string. 0692 0693 `endchars' is a sequence of allowable end-delimiting characters. 0694 Parsing stops when one of these is encountered. 0695 0696 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed 0697 within the parsed fragment. 0698 """ 0699 if self.field[self.pos] != beginchar: 0700 return '' 0701 0702 slist = [''] 0703 quote = 0 0704 self.pos = self.pos + 1 0705 while self.pos < len(self.field): 0706 if quote == 1: 0707 slist.append(self.field[self.pos]) 0708 quote = 0 0709 elif self.field[self.pos] in endchars: 0710 self.pos = self.pos + 1 0711 break 0712 elif allowcomments and self.field[self.pos] == '(': 0713 slist.append(self.getcomment()) 0714 elif self.field[self.pos] == '\\': 0715 quote = 1 0716 else: 0717 slist.append(self.field[self.pos]) 0718 self.pos = self.pos + 1 0719 0720 return ''.join(slist) 0721 0722 def getquote(self): 0723 """Get a quote-delimited fragment from self's field.""" 0724 return self.getdelimited('"', '"\r', 0) 0725 0726 def getcomment(self): 0727 """Get a parenthesis-delimited fragment from self's field.""" 0728 return self.getdelimited('(', ')\r', 1) 0729 0730 def getdomainliteral(self): 0731 """Parse an RFC 2822 domain-literal.""" 0732 return '[%s]' % self.getdelimited('[', ']\r', 0) 0733 0734 def getatom(self, atomends=None): 0735 """Parse an RFC 2822 atom. 0736 0737 Optional atomends specifies a different set of end token delimiters 0738 (the default is to use self.atomends). This is used e.g. in 0739 getphraselist() since phrase endings must not include the `.' (which 0740 is legal in phrases).""" 0741 atomlist = [''] 0742 if atomends is None: 0743 atomends = self.atomends 0744 0745 while self.pos < len(self.field): 0746 if self.field[self.pos] in atomends: 0747 break 0748 else: atomlist.append(self.field[self.pos]) 0749 self.pos = self.pos + 1 0750 0751 return ''.join(atomlist) 0752 0753 def getphraselist(self): 0754 """Parse a sequence of RFC 2822 phrases. 0755 0756 A phrase is a sequence of words, which are in turn either RFC 2822 0757 atoms or quoted-strings. Phrases are canonicalized by squeezing all 0758 runs of continuous whitespace into one space. 0759 """ 0760 plist = [] 0761 0762 while self.pos < len(self.field): 0763 if self.field[self.pos] in self.LWS: 0764 self.pos = self.pos + 1 0765 elif self.field[self.pos] == '"': 0766 plist.append(self.getquote()) 0767 elif self.field[self.pos] == '(': 0768 self.commentlist.append(self.getcomment()) 0769 elif self.field[self.pos] in self.phraseends: 0770 break 0771 else: 0772 plist.append(self.getatom(self.phraseends)) 0773 0774 return plist 0775 0776 class AddressList(AddrlistClass): 0777 """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" 0778 def __init__(self, field): 0779 AddrlistClass.__init__(self, field) 0780 if field: 0781 self.addresslist = self.getaddrlist() 0782 else: 0783 self.addresslist = [] 0784 0785 def __len__(self): 0786 return len(self.addresslist) 0787 0788 def __str__(self): 0789 return ", ".join(map(dump_address_pair, self.addresslist)) 0790 0791 def __add__(self, other): 0792 # Set union 0793 newaddr = AddressList(None) 0794 newaddr.addresslist = self.addresslist[:] 0795 for x in other.addresslist: 0796 if not x in self.addresslist: 0797 newaddr.addresslist.append(x) 0798 return newaddr 0799 0800 def __iadd__(self, other): 0801 # Set union, in-place 0802 for x in other.addresslist: 0803 if not x in self.addresslist: 0804 self.addresslist.append(x) 0805 return self 0806 0807 def __sub__(self, other): 0808 # Set difference 0809 newaddr = AddressList(None) 0810 for x in self.addresslist: 0811 if not x in other.addresslist: 0812 newaddr.addresslist.append(x) 0813 return newaddr 0814 0815 def __isub__(self, other): 0816 # Set difference, in-place 0817 for x in other.addresslist: 0818 if x in self.addresslist: 0819 self.addresslist.remove(x) 0820 return self 0821 0822 def __getitem__(self, index): 0823 # Make indexing, slices, and 'in' work 0824 return self.addresslist[index] 0825 0826 def dump_address_pair(pair): 0827 """Dump a (name, address) pair in a canonicalized form.""" 0828 if pair[0]: 0829 return '"' + pair[0] + '" <' + pair[1] + '>' 0830 else: 0831 return pair[1] 0832 0833 # Parse a date field 0834 0835 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 0836 'aug', 'sep', 'oct', 'nov', 'dec', 0837 'january', 'february', 'march', 'april', 'may', 'june', 'july', 0838 'august', 'september', 'october', 'november', 'december'] 0839 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] 0840 0841 # The timezone table does not include the military time zones defined 0842 # in RFC822, other than Z. According to RFC1123, the description in 0843 # RFC822 gets the signs wrong, so we can't rely on any such time 0844 # zones. RFC1123 recommends that numeric timezone indicators be used 0845 # instead of timezone names. 0846 0847 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, 0848 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) 0849 'EST': -500, 'EDT': -400, # Eastern 0850 'CST': -600, 'CDT': -500, # Central 0851 'MST': -700, 'MDT': -600, # Mountain 0852 'PST': -800, 'PDT': -700 # Pacific 0853 } 0854 0855 0856 def parsedate_tz(data): 0857 """Convert a date string to a time tuple. 0858 0859 Accounts for military timezones. 0860 """ 0861 if not data: 0862 return None 0863 data = data.split() 0864 if data[0][-1] in (',', '.') or data[0].lower() in _daynames: 0865 # There's a dayname here. Skip it 0866 del data[0] 0867 if len(data) == 3: # RFC 850 date, deprecated 0868 stuff = data[0].split('-') 0869 if len(stuff) == 3: 0870 data = stuff + data[1:] 0871 if len(data) == 4: 0872 s = data[3] 0873 i = s.find('+') 0874 if i > 0: 0875 data[3:] = [s[:i], s[i+1:]] 0876 else: 0877 data.append('') # Dummy tz 0878 if len(data) < 5: 0879 return None 0880 data = data[:5] 0881 [dd, mm, yy, tm, tz] = data 0882 mm = mm.lower() 0883 if not mm in _monthnames: 0884 dd, mm = mm, dd.lower() 0885 if not mm in _monthnames: 0886 return None 0887 mm = _monthnames.index(mm)+1 0888 if mm > 12: mm = mm - 12 0889 if dd[-1] == ',': 0890 dd = dd[:-1] 0891 i = yy.find(':') 0892 if i > 0: 0893 yy, tm = tm, yy 0894 if yy[-1] == ',': 0895 yy = yy[:-1] 0896 if not yy[0].isdigit(): 0897 yy, tz = tz, yy 0898 if tm[-1] == ',': 0899 tm = tm[:-1] 0900 tm = tm.split(':') 0901 if len(tm) == 2: 0902 [thh, tmm] = tm 0903 tss = '0' 0904 elif len(tm) == 3: 0905 [thh, tmm, tss] = tm 0906 else: 0907 return None 0908 try: 0909 yy = int(yy) 0910 dd = int(dd) 0911 thh = int(thh) 0912 tmm = int(tmm) 0913 tss = int(tss) 0914 except ValueError: 0915 return None 0916 tzoffset = None 0917 tz = tz.upper() 0918 if tz in _timezones: 0919 tzoffset = _timezones[tz] 0920 else: 0921 try: 0922 tzoffset = int(tz) 0923 except ValueError: 0924 pass 0925 # Convert a timezone offset into seconds ; -0500 -> -18000 0926 if tzoffset: 0927 if tzoffset < 0: 0928 tzsign = -1 0929 tzoffset = -tzoffset 0930 else: 0931 tzsign = 1 0932 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) 0933 tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset) 0934 return tuple 0935 0936 0937 def parsedate(data): 0938 """Convert a time string to a time tuple.""" 0939 t = parsedate_tz(data) 0940 if type(t) == type( () ): 0941 return t[:9] 0942 else: return t 0943 0944 0945 def mktime_tz(data): 0946 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" 0947 if data[9] is None: 0948 # No zone info, so localtime is better assumption than GMT 0949 return time.mktime(data[:8] + (-1,)) 0950 else: 0951 t = time.mktime(data[:8] + (0,)) 0952 return t - data[9] - time.timezone 0953 0954 def formatdate(timeval=None): 0955 """Returns time format preferred for Internet standards. 0956 0957 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 0958 0959 According to RFC 1123, day and month names must always be in 0960 English. If not for that, this code could use strftime(). It 0961 can't because strftime() honors the locale and could generated 0962 non-English names. 0963 """ 0964 if timeval is None: 0965 timeval = time.time() 0966 timeval = time.gmtime(timeval) 0967 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( 0968 ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]], 0969 timeval[2], 0970 ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 0971 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1], 0972 timeval[0], timeval[3], timeval[4], timeval[5]) 0973 0974 0975 # When used as script, run a small test program. 0976 # The first command line argument must be a filename containing one 0977 # message in RFC-822 format. 0978 0979 if __name__ == '__main__': 0980 import sys, os 0981 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1') 0982 if sys.argv[1:]: file = sys.argv[1] 0983 f = open(file, 'r') 0984 m = Message(f) 0985 print 'From:', m.getaddr('from') 0986 print 'To:', m.getaddrlist('to') 0987 print 'Subject:', m.getheader('subject') 0988 print 'Date:', m.getheader('date') 0989 date = m.getdate_tz('date') 0990 tz = date[-1] 0991 date = time.localtime(mktime_tz(date)) 0992 if date: 0993 print 'ParsedDate:', time.asctime(date), 0994 hhmmss = tz 0995 hhmm, ss = divmod(hhmmss, 60) 0996 hh, mm = divmod(hhmm, 60) 0997 print "%+03d%02d" % (hh, mm), 0998 if ss: print ".%02d" % ss, 0999 print 1000 else: 1001 print 'ParsedDate:', None 1002 m.rewindbody() 1003 n = 0 1004 while f.readline(): 1005 n = n + 1 1006 print 'Lines:', n 1007 print '-'*70 1008 print 'len =', len(m) 1009 if 'Date' in m: print 'Date =', m['Date'] 1010 if 'X-Nonsense' in m: pass 1011 print 'keys =', m.keys() 1012 print 'values =', m.values() 1013 print 'items =', m.items() 1014
Generated by PyXR 0.9.4