PyXR

c:\python24\lib \ rfc822.py



0001 """RFC 2822 message manipulation.
0002 
0003 Note: This is only a very rough sketch of a full RFC-822 parser; in particular
0004 the tokenizing of addresses does not adhere to all the quoting rules.
0005 
0006 Note: RFC 2822 is a long awaited update to RFC 822.  This module should
0007 conform to RFC 2822, and is thus mis-named (it's not worth renaming it).  Some
0008 effort at RFC 2822 updates have been made, but a thorough audit has not been
0009 performed.  Consider any RFC 2822 non-conformance to be a bug.
0010 
0011     RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
0012     RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
0013 
0014 Directions for use:
0015 
0016 To create a Message object: first open a file, e.g.:
0017 
0018   fp = open(file, 'r')
0019 
0020 You can use any other legal way of getting an open file object, e.g. use
0021 sys.stdin or call os.popen().  Then pass the open file object to the Message()
0022 constructor:
0023 
0024   m = Message(fp)
0025 
0026 This class can work with any input object that supports a readline method.  If
0027 the input object has seek and tell capability, the rewindbody method will
0028 work; also illegal lines will be pushed back onto the input stream.  If the
0029 input object lacks seek but has an `unread' method that can push back a line
0030 of input, Message will use that to push back illegal lines.  Thus this class
0031 can be used to parse messages coming from a buffered stream.
0032 
0033 The optional `seekable' argument is provided as a workaround for certain stdio
0034 libraries in which tell() discards buffered data before discovering that the
0035 lseek() system call doesn't work.  For maximum portability, you should set the
0036 seekable argument to zero to prevent that initial \code{tell} when passing in
0037 an unseekable object such as a a file object created from a socket object.  If
0038 it is 1 on entry -- which it is by default -- the tell() method of the open
0039 file object is called once; if this raises an exception, seekable is reset to
0040 0.  For other nonzero values of seekable, this test is not made.
0041 
0042 To get the text of a particular header there are several methods:
0043 
0044   str = m.getheader(name)
0045   str = m.getrawheader(name)
0046 
0047 where name is the name of the header, e.g. 'Subject'.  The difference is that
0048 getheader() strips the leading and trailing whitespace, while getrawheader()
0049 doesn't.  Both functions retain embedded whitespace (including newlines)
0050 exactly as they are specified in the header, and leave the case of the text
0051 unchanged.
0052 
0053 For addresses and address lists there are functions
0054 
0055   realname, mailaddress = m.getaddr(name)
0056   list = m.getaddrlist(name)
0057 
0058 where the latter returns a list of (realname, mailaddr) tuples.
0059 
0060 There is also a method
0061 
0062   time = m.getdate(name)
0063 
0064 which parses a Date-like field and returns a time-compatible tuple,
0065 i.e. a tuple such as returned by time.localtime() or accepted by
0066 time.mktime().
0067 
0068 See the class definition for lower level access methods.
0069 
0070 There are also some utility functions here.
0071 """
0072 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
0073 
0074 import time
0075 
0076 __all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
0077 
0078 _blanklines = ('\r\n', '\n')            # Optimization for islast()
0079 
0080 
0081 class Message:
0082     """Represents a single RFC 2822-compliant message."""
0083 
0084     def __init__(self, fp, seekable = 1):
0085         """Initialize the class instance and read the headers."""
0086         if seekable == 1:
0087             # Exercise tell() to make sure it works
0088             # (and then assume seek() works, too)
0089             try:
0090                 fp.tell()
0091             except (AttributeError, IOError):
0092                 seekable = 0
0093             else:
0094                 seekable = 1
0095         self.fp = fp
0096         self.seekable = seekable
0097         self.startofheaders = None
0098         self.startofbody = None
0099         #
0100         if self.seekable:
0101             try:
0102                 self.startofheaders = self.fp.tell()
0103             except IOError:
0104                 self.seekable = 0
0105         #
0106         self.readheaders()
0107         #
0108         if self.seekable:
0109             try:
0110                 self.startofbody = self.fp.tell()
0111             except IOError:
0112                 self.seekable = 0
0113 
0114     def rewindbody(self):
0115         """Rewind the file to the start of the body (if seekable)."""
0116         if not self.seekable:
0117             raise IOError, "unseekable file"
0118         self.fp.seek(self.startofbody)
0119 
0120     def readheaders(self):
0121         """Read header lines.
0122 
0123         Read header lines up to the entirely blank line that terminates them.
0124         The (normally blank) line that ends the headers is skipped, but not
0125         included in the returned list.  If a non-header line ends the headers,
0126         (which is an error), an attempt is made to backspace over it; it is
0127         never included in the returned list.
0128 
0129         The variable self.status is set to the empty string if all went well,
0130         otherwise it is an error message.  The variable self.headers is a
0131         completely uninterpreted list of lines contained in the header (so
0132         printing them will reproduce the header exactly as it appears in the
0133         file).
0134         """
0135         self.dict = {}
0136         self.unixfrom = ''
0137         self.headers = list = []
0138         self.status = ''
0139         headerseen = ""
0140         firstline = 1
0141         startofline = unread = tell = None
0142         if hasattr(self.fp, 'unread'):
0143             unread = self.fp.unread
0144         elif self.seekable:
0145             tell = self.fp.tell
0146         while 1:
0147             if tell:
0148                 try:
0149                     startofline = tell()
0150                 except IOError:
0151                     startofline = tell = None
0152                     self.seekable = 0
0153             line = self.fp.readline()
0154             if not line:
0155                 self.status = 'EOF in headers'
0156                 break
0157             # Skip unix From name time lines
0158             if firstline and line.startswith('From '):
0159                 self.unixfrom = self.unixfrom + line
0160                 continue
0161             firstline = 0
0162             if headerseen and line[0] in ' \t':
0163                 # It's a continuation line.
0164                 list.append(line)
0165                 x = (self.dict[headerseen] + "\n " + line.strip())
0166                 self.dict[headerseen] = x.strip()
0167                 continue
0168             elif self.iscomment(line):
0169                 # It's a comment.  Ignore it.
0170                 continue
0171             elif self.islast(line):
0172                 # Note! No pushback here!  The delimiter line gets eaten.
0173                 break
0174             headerseen = self.isheader(line)
0175             if headerseen:
0176                 # It's a legal header line, save it.
0177                 list.append(line)
0178                 self.dict[headerseen] = line[len(headerseen)+1:].strip()
0179                 continue
0180             else:
0181                 # It's not a header line; throw it back and stop here.
0182                 if not self.dict:
0183                     self.status = 'No headers'
0184                 else:
0185                     self.status = 'Non-header line where header expected'
0186                 # Try to undo the read.
0187                 if unread:
0188                     unread(line)
0189                 elif tell:
0190                     self.fp.seek(startofline)
0191                 else:
0192                     self.status = self.status + '; bad seek'
0193                 break
0194 
0195     def isheader(self, line):
0196         """Determine whether a given line is a legal header.
0197 
0198         This method should return the header name, suitably canonicalized.
0199         You may override this method in order to use Message parsing on tagged
0200         data in RFC 2822-like formats with special header formats.
0201         """
0202         i = line.find(':')
0203         if i > 0:
0204             return line[:i].lower()
0205         else:
0206             return None
0207 
0208     def islast(self, line):
0209         """Determine whether a line is a legal end of RFC 2822 headers.
0210 
0211         You may override this method if your application wants to bend the
0212         rules, e.g. to strip trailing whitespace, or to recognize MH template
0213         separators ('--------').  For convenience (e.g. for code reading from
0214         sockets) a line consisting of \r\n also matches.
0215         """
0216         return line in _blanklines
0217 
0218     def iscomment(self, line):
0219         """Determine whether a line should be skipped entirely.
0220 
0221         You may override this method in order to use Message parsing on tagged
0222         data in RFC 2822-like formats that support embedded comments or
0223         free-text data.
0224         """
0225         return False
0226 
0227     def getallmatchingheaders(self, name):
0228         """Find all header lines matching a given header name.
0229 
0230         Look through the list of headers and find all lines matching a given
0231         header name (and their continuation lines).  A list of the lines is
0232         returned, without interpretation.  If the header does not occur, an
0233         empty list is returned.  If the header occurs multiple times, all
0234         occurrences are returned.  Case is not important in the header name.
0235         """
0236         name = name.lower() + ':'
0237         n = len(name)
0238         list = []
0239         hit = 0
0240         for line in self.headers:
0241             if line[:n].lower() == name:
0242                 hit = 1
0243             elif not line[:1].isspace():
0244                 hit = 0
0245             if hit:
0246                 list.append(line)
0247         return list
0248 
0249     def getfirstmatchingheader(self, name):
0250         """Get the first header line matching name.
0251 
0252         This is similar to getallmatchingheaders, but it returns only the
0253         first matching header (and its continuation lines).
0254         """
0255         name = name.lower() + ':'
0256         n = len(name)
0257         list = []
0258         hit = 0
0259         for line in self.headers:
0260             if hit:
0261                 if not line[:1].isspace():
0262                     break
0263             elif line[:n].lower() == name:
0264                 hit = 1
0265             if hit:
0266                 list.append(line)
0267         return list
0268 
0269     def getrawheader(self, name):
0270         """A higher-level interface to getfirstmatchingheader().
0271 
0272         Return a string containing the literal text of the header but with the
0273         keyword stripped.  All leading, trailing and embedded whitespace is
0274         kept in the string, however.  Return None if the header does not
0275         occur.
0276         """
0277 
0278         list = self.getfirstmatchingheader(name)
0279         if not list:
0280             return None
0281         list[0] = list[0][len(name) + 1:]
0282         return ''.join(list)
0283 
0284     def getheader(self, name, default=None):
0285         """Get the header value for a name.
0286 
0287         This is the normal interface: it returns a stripped version of the
0288         header value for a given header name, or None if it doesn't exist.
0289         This uses the dictionary version which finds the *last* such header.
0290         """
0291         try:
0292             return self.dict[name.lower()]
0293         except KeyError:
0294             return default
0295     get = getheader
0296 
0297     def getheaders(self, name):
0298         """Get all values for a header.
0299 
0300         This returns a list of values for headers given more than once; each
0301         value in the result list is stripped in the same way as the result of
0302         getheader().  If the header is not given, return an empty list.
0303         """
0304         result = []
0305         current = ''
0306         have_header = 0
0307         for s in self.getallmatchingheaders(name):
0308             if s[0].isspace():
0309                 if current:
0310                     current = "%s\n %s" % (current, s.strip())
0311                 else:
0312                     current = s.strip()
0313             else:
0314                 if have_header:
0315                     result.append(current)
0316                 current = s[s.find(":") + 1:].strip()
0317                 have_header = 1
0318         if have_header:
0319             result.append(current)
0320         return result
0321 
0322     def getaddr(self, name):
0323         """Get a single address from a header, as a tuple.
0324 
0325         An example return value:
0326         ('Guido van Rossum', 'guido@cwi.nl')
0327         """
0328         # New, by Ben Escoto
0329         alist = self.getaddrlist(name)
0330         if alist:
0331             return alist[0]
0332         else:
0333             return (None, None)
0334 
0335     def getaddrlist(self, name):
0336         """Get a list of addresses from a header.
0337 
0338         Retrieves a list of addresses from a header, where each address is a
0339         tuple as returned by getaddr().  Scans all named headers, so it works
0340         properly with multiple To: or Cc: headers for example.
0341         """
0342         raw = []
0343         for h in self.getallmatchingheaders(name):
0344             if h[0] in ' \t':
0345                 raw.append(h)
0346             else:
0347                 if raw:
0348                     raw.append(', ')
0349                 i = h.find(':')
0350                 if i > 0:
0351                     addr = h[i+1:]
0352                 raw.append(addr)
0353         alladdrs = ''.join(raw)
0354         a = AddressList(alladdrs)
0355         return a.addresslist
0356 
0357     def getdate(self, name):
0358         """Retrieve a date field from a header.
0359 
0360         Retrieves a date field from the named header, returning a tuple
0361         compatible with time.mktime().
0362         """
0363         try:
0364             data = self[name]
0365         except KeyError:
0366             return None
0367         return parsedate(data)
0368 
0369     def getdate_tz(self, name):
0370         """Retrieve a date field from a header as a 10-tuple.
0371 
0372         The first 9 elements make up a tuple compatible with time.mktime(),
0373         and the 10th is the offset of the poster's time zone from GMT/UTC.
0374         """
0375         try:
0376             data = self[name]
0377         except KeyError:
0378             return None
0379         return parsedate_tz(data)
0380 
0381 
0382     # Access as a dictionary (only finds *last* header of each type):
0383 
0384     def __len__(self):
0385         """Get the number of headers in a message."""
0386         return len(self.dict)
0387 
0388     def __getitem__(self, name):
0389         """Get a specific header, as from a dictionary."""
0390         return self.dict[name.lower()]
0391 
0392     def __setitem__(self, name, value):
0393         """Set the value of a header.
0394 
0395         Note: This is not a perfect inversion of __getitem__, because any
0396         changed headers get stuck at the end of the raw-headers list rather
0397         than where the altered header was.
0398         """
0399         del self[name] # Won't fail if it doesn't exist
0400         self.dict[name.lower()] = value
0401         text = name + ": " + value
0402         lines = text.split("\n")
0403         for line in lines:
0404             self.headers.append(line + "\n")
0405 
0406     def __delitem__(self, name):
0407         """Delete all occurrences of a specific header, if it is present."""
0408         name = name.lower()
0409         if not name in self.dict:
0410             return
0411         del self.dict[name]
0412         name = name + ':'
0413         n = len(name)
0414         list = []
0415         hit = 0
0416         for i in range(len(self.headers)):
0417             line = self.headers[i]
0418             if line[:n].lower() == name:
0419                 hit = 1
0420             elif not line[:1].isspace():
0421                 hit = 0
0422             if hit:
0423                 list.append(i)
0424         for i in reversed(list):
0425             del self.headers[i]
0426 
0427     def setdefault(self, name, default=""):
0428         lowername = name.lower()
0429         if lowername in self.dict:
0430             return self.dict[lowername]
0431         else:
0432             text = name + ": " + default
0433             lines = text.split("\n")
0434             for line in lines:
0435                 self.headers.append(line + "\n")
0436             self.dict[lowername] = default
0437             return default
0438 
0439     def has_key(self, name):
0440         """Determine whether a message contains the named header."""
0441         return name.lower() in self.dict
0442 
0443     def __contains__(self, name):
0444         """Determine whether a message contains the named header."""
0445         return name.lower() in self.dict
0446 
0447     def __iter__(self):
0448         return iter(self.dict)
0449 
0450     def keys(self):
0451         """Get all of a message's header field names."""
0452         return self.dict.keys()
0453 
0454     def values(self):
0455         """Get all of a message's header field values."""
0456         return self.dict.values()
0457 
0458     def items(self):
0459         """Get all of a message's headers.
0460 
0461         Returns a list of name, value tuples.
0462         """
0463         return self.dict.items()
0464 
0465     def __str__(self):
0466         return ''.join(self.headers)
0467 
0468 
0469 # Utility functions
0470 # -----------------
0471 
0472 # XXX Should fix unquote() and quote() to be really conformant.
0473 # XXX The inverses of the parse functions may also be useful.
0474 
0475 
0476 def unquote(str):
0477     """Remove quotes from a string."""
0478     if len(str) > 1:
0479         if str.startswith('"') and str.endswith('"'):
0480             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
0481         if str.startswith('<') and str.endswith('>'):
0482             return str[1:-1]
0483     return str
0484 
0485 
0486 def quote(str):
0487     """Add quotes around a string."""
0488     return str.replace('\\', '\\\\').replace('"', '\\"')
0489 
0490 
0491 def parseaddr(address):
0492     """Parse an address into a (realname, mailaddr) tuple."""
0493     a = AddressList(address)
0494     list = a.addresslist
0495     if not list:
0496         return (None, None)
0497     else:
0498         return list[0]
0499 
0500 
0501 class AddrlistClass:
0502     """Address parser class by Ben Escoto.
0503 
0504     To understand what this class does, it helps to have a copy of
0505     RFC 2822 in front of you.
0506 
0507     http://www.faqs.org/rfcs/rfc2822.html
0508 
0509     Note: this class interface is deprecated and may be removed in the future.
0510     Use rfc822.AddressList instead.
0511     """
0512 
0513     def __init__(self, field):
0514         """Initialize a new instance.
0515 
0516         `field' is an unparsed address header field, containing one or more
0517         addresses.
0518         """
0519         self.specials = '()<>@,:;.\"[]'
0520         self.pos = 0
0521         self.LWS = ' \t'
0522         self.CR = '\r\n'
0523         self.atomends = self.specials + self.LWS + self.CR
0524         # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
0525         # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
0526         # syntax, so allow dots in phrases.
0527         self.phraseends = self.atomends.replace('.', '')
0528         self.field = field
0529         self.commentlist = []
0530 
0531     def gotonext(self):
0532         """Parse up to the start of the next address."""
0533         while self.pos < len(self.field):
0534             if self.field[self.pos] in self.LWS + '\n\r':
0535                 self.pos = self.pos + 1
0536             elif self.field[self.pos] == '(':
0537                 self.commentlist.append(self.getcomment())
0538             else: break
0539 
0540     def getaddrlist(self):
0541         """Parse all addresses.
0542 
0543         Returns a list containing all of the addresses.
0544         """
0545         result = []
0546         while 1:
0547             ad = self.getaddress()
0548             if ad:
0549                 result += ad
0550             else:
0551                 break
0552         return result
0553 
0554     def getaddress(self):
0555         """Parse the next address."""
0556         self.commentlist = []
0557         self.gotonext()
0558 
0559         oldpos = self.pos
0560         oldcl = self.commentlist
0561         plist = self.getphraselist()
0562 
0563         self.gotonext()
0564         returnlist = []
0565 
0566         if self.pos >= len(self.field):
0567             # Bad email address technically, no domain.
0568             if plist:
0569                 returnlist = [(' '.join(self.commentlist), plist[0])]
0570 
0571         elif self.field[self.pos] in '.@':
0572             # email address is just an addrspec
0573             # this isn't very efficient since we start over
0574             self.pos = oldpos
0575             self.commentlist = oldcl
0576             addrspec = self.getaddrspec()
0577             returnlist = [(' '.join(self.commentlist), addrspec)]
0578 
0579         elif self.field[self.pos] == ':':
0580             # address is a group
0581             returnlist = []
0582 
0583             fieldlen = len(self.field)
0584             self.pos = self.pos + 1
0585             while self.pos < len(self.field):
0586                 self.gotonext()
0587                 if self.pos < fieldlen and self.field[self.pos] == ';':
0588                     self.pos = self.pos + 1
0589                     break
0590                 returnlist = returnlist + self.getaddress()
0591 
0592         elif self.field[self.pos] == '<':
0593             # Address is a phrase then a route addr
0594             routeaddr = self.getrouteaddr()
0595 
0596             if self.commentlist:
0597                 returnlist = [(' '.join(plist) + ' (' + \
0598                          ' '.join(self.commentlist) + ')', routeaddr)]
0599             else: returnlist = [(' '.join(plist), routeaddr)]
0600 
0601         else:
0602             if plist:
0603                 returnlist = [(' '.join(self.commentlist), plist[0])]
0604             elif self.field[self.pos] in self.specials:
0605                 self.pos = self.pos + 1
0606 
0607         self.gotonext()
0608         if self.pos < len(self.field) and self.field[self.pos] == ',':
0609             self.pos = self.pos + 1
0610         return returnlist
0611 
0612     def getrouteaddr(self):
0613         """Parse a route address (Return-path value).
0614 
0615         This method just skips all the route stuff and returns the addrspec.
0616         """
0617         if self.field[self.pos] != '<':
0618             return
0619 
0620         expectroute = 0
0621         self.pos = self.pos + 1
0622         self.gotonext()
0623         adlist = ""
0624         while self.pos < len(self.field):
0625             if expectroute:
0626                 self.getdomain()
0627                 expectroute = 0
0628             elif self.field[self.pos] == '>':
0629                 self.pos = self.pos + 1
0630                 break
0631             elif self.field[self.pos] == '@':
0632                 self.pos = self.pos + 1
0633                 expectroute = 1
0634             elif self.field[self.pos] == ':':
0635                 self.pos = self.pos + 1
0636             else:
0637                 adlist = self.getaddrspec()
0638                 self.pos = self.pos + 1
0639                 break
0640             self.gotonext()
0641 
0642         return adlist
0643 
0644     def getaddrspec(self):
0645         """Parse an RFC 2822 addr-spec."""
0646         aslist = []
0647 
0648         self.gotonext()
0649         while self.pos < len(self.field):
0650             if self.field[self.pos] == '.':
0651                 aslist.append('.')
0652                 self.pos = self.pos + 1
0653             elif self.field[self.pos] == '"':
0654                 aslist.append('"%s"' % self.getquote())
0655             elif self.field[self.pos] in self.atomends:
0656                 break
0657             else: aslist.append(self.getatom())
0658             self.gotonext()
0659 
0660         if self.pos >= len(self.field) or self.field[self.pos] != '@':
0661             return ''.join(aslist)
0662 
0663         aslist.append('@')
0664         self.pos = self.pos + 1
0665         self.gotonext()
0666         return ''.join(aslist) + self.getdomain()
0667 
0668     def getdomain(self):
0669         """Get the complete domain name from an address."""
0670         sdlist = []
0671         while self.pos < len(self.field):
0672             if self.field[self.pos] in self.LWS:
0673                 self.pos = self.pos + 1
0674             elif self.field[self.pos] == '(':
0675                 self.commentlist.append(self.getcomment())
0676             elif self.field[self.pos] == '[':
0677                 sdlist.append(self.getdomainliteral())
0678             elif self.field[self.pos] == '.':
0679                 self.pos = self.pos + 1
0680                 sdlist.append('.')
0681             elif self.field[self.pos] in self.atomends:
0682                 break
0683             else: sdlist.append(self.getatom())
0684         return ''.join(sdlist)
0685 
0686     def getdelimited(self, beginchar, endchars, allowcomments = 1):
0687         """Parse a header fragment delimited by special characters.
0688 
0689         `beginchar' is the start character for the fragment.  If self is not
0690         looking at an instance of `beginchar' then getdelimited returns the
0691         empty string.
0692 
0693         `endchars' is a sequence of allowable end-delimiting characters.
0694         Parsing stops when one of these is encountered.
0695 
0696         If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
0697         within the parsed fragment.
0698         """
0699         if self.field[self.pos] != beginchar:
0700             return ''
0701 
0702         slist = ['']
0703         quote = 0
0704         self.pos = self.pos + 1
0705         while self.pos < len(self.field):
0706             if quote == 1:
0707                 slist.append(self.field[self.pos])
0708                 quote = 0
0709             elif self.field[self.pos] in endchars:
0710                 self.pos = self.pos + 1
0711                 break
0712             elif allowcomments and self.field[self.pos] == '(':
0713                 slist.append(self.getcomment())
0714             elif self.field[self.pos] == '\\':
0715                 quote = 1
0716             else:
0717                 slist.append(self.field[self.pos])
0718             self.pos = self.pos + 1
0719 
0720         return ''.join(slist)
0721 
0722     def getquote(self):
0723         """Get a quote-delimited fragment from self's field."""
0724         return self.getdelimited('"', '"\r', 0)
0725 
0726     def getcomment(self):
0727         """Get a parenthesis-delimited fragment from self's field."""
0728         return self.getdelimited('(', ')\r', 1)
0729 
0730     def getdomainliteral(self):
0731         """Parse an RFC 2822 domain-literal."""
0732         return '[%s]' % self.getdelimited('[', ']\r', 0)
0733 
0734     def getatom(self, atomends=None):
0735         """Parse an RFC 2822 atom.
0736 
0737         Optional atomends specifies a different set of end token delimiters
0738         (the default is to use self.atomends).  This is used e.g. in
0739         getphraselist() since phrase endings must not include the `.' (which
0740         is legal in phrases)."""
0741         atomlist = ['']
0742         if atomends is None:
0743             atomends = self.atomends
0744 
0745         while self.pos < len(self.field):
0746             if self.field[self.pos] in atomends:
0747                 break
0748             else: atomlist.append(self.field[self.pos])
0749             self.pos = self.pos + 1
0750 
0751         return ''.join(atomlist)
0752 
0753     def getphraselist(self):
0754         """Parse a sequence of RFC 2822 phrases.
0755 
0756         A phrase is a sequence of words, which are in turn either RFC 2822
0757         atoms or quoted-strings.  Phrases are canonicalized by squeezing all
0758         runs of continuous whitespace into one space.
0759         """
0760         plist = []
0761 
0762         while self.pos < len(self.field):
0763             if self.field[self.pos] in self.LWS:
0764                 self.pos = self.pos + 1
0765             elif self.field[self.pos] == '"':
0766                 plist.append(self.getquote())
0767             elif self.field[self.pos] == '(':
0768                 self.commentlist.append(self.getcomment())
0769             elif self.field[self.pos] in self.phraseends:
0770                 break
0771             else:
0772                 plist.append(self.getatom(self.phraseends))
0773 
0774         return plist
0775 
0776 class AddressList(AddrlistClass):
0777     """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
0778     def __init__(self, field):
0779         AddrlistClass.__init__(self, field)
0780         if field:
0781             self.addresslist = self.getaddrlist()
0782         else:
0783             self.addresslist = []
0784 
0785     def __len__(self):
0786         return len(self.addresslist)
0787 
0788     def __str__(self):
0789         return ", ".join(map(dump_address_pair, self.addresslist))
0790 
0791     def __add__(self, other):
0792         # Set union
0793         newaddr = AddressList(None)
0794         newaddr.addresslist = self.addresslist[:]
0795         for x in other.addresslist:
0796             if not x in self.addresslist:
0797                 newaddr.addresslist.append(x)
0798         return newaddr
0799 
0800     def __iadd__(self, other):
0801         # Set union, in-place
0802         for x in other.addresslist:
0803             if not x in self.addresslist:
0804                 self.addresslist.append(x)
0805         return self
0806 
0807     def __sub__(self, other):
0808         # Set difference
0809         newaddr = AddressList(None)
0810         for x in self.addresslist:
0811             if not x in other.addresslist:
0812                 newaddr.addresslist.append(x)
0813         return newaddr
0814 
0815     def __isub__(self, other):
0816         # Set difference, in-place
0817         for x in other.addresslist:
0818             if x in self.addresslist:
0819                 self.addresslist.remove(x)
0820         return self
0821 
0822     def __getitem__(self, index):
0823         # Make indexing, slices, and 'in' work
0824         return self.addresslist[index]
0825 
0826 def dump_address_pair(pair):
0827     """Dump a (name, address) pair in a canonicalized form."""
0828     if pair[0]:
0829         return '"' + pair[0] + '" <' + pair[1] + '>'
0830     else:
0831         return pair[1]
0832 
0833 # Parse a date field
0834 
0835 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
0836                'aug', 'sep', 'oct', 'nov', 'dec',
0837                'january', 'february', 'march', 'april', 'may', 'june', 'july',
0838                'august', 'september', 'october', 'november', 'december']
0839 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
0840 
0841 # The timezone table does not include the military time zones defined
0842 # in RFC822, other than Z.  According to RFC1123, the description in
0843 # RFC822 gets the signs wrong, so we can't rely on any such time
0844 # zones.  RFC1123 recommends that numeric timezone indicators be used
0845 # instead of timezone names.
0846 
0847 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
0848               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
0849               'EST': -500, 'EDT': -400,  # Eastern
0850               'CST': -600, 'CDT': -500,  # Central
0851               'MST': -700, 'MDT': -600,  # Mountain
0852               'PST': -800, 'PDT': -700   # Pacific
0853               }
0854 
0855 
0856 def parsedate_tz(data):
0857     """Convert a date string to a time tuple.
0858 
0859     Accounts for military timezones.
0860     """
0861     if not data:
0862         return None
0863     data = data.split()
0864     if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
0865         # There's a dayname here. Skip it
0866         del data[0]
0867     if len(data) == 3: # RFC 850 date, deprecated
0868         stuff = data[0].split('-')
0869         if len(stuff) == 3:
0870             data = stuff + data[1:]
0871     if len(data) == 4:
0872         s = data[3]
0873         i = s.find('+')
0874         if i > 0:
0875             data[3:] = [s[:i], s[i+1:]]
0876         else:
0877             data.append('') # Dummy tz
0878     if len(data) < 5:
0879         return None
0880     data = data[:5]
0881     [dd, mm, yy, tm, tz] = data
0882     mm = mm.lower()
0883     if not mm in _monthnames:
0884         dd, mm = mm, dd.lower()
0885         if not mm in _monthnames:
0886             return None
0887     mm = _monthnames.index(mm)+1
0888     if mm > 12: mm = mm - 12
0889     if dd[-1] == ',':
0890         dd = dd[:-1]
0891     i = yy.find(':')
0892     if i > 0:
0893         yy, tm = tm, yy
0894     if yy[-1] == ',':
0895         yy = yy[:-1]
0896     if not yy[0].isdigit():
0897         yy, tz = tz, yy
0898     if tm[-1] == ',':
0899         tm = tm[:-1]
0900     tm = tm.split(':')
0901     if len(tm) == 2:
0902         [thh, tmm] = tm
0903         tss = '0'
0904     elif len(tm) == 3:
0905         [thh, tmm, tss] = tm
0906     else:
0907         return None
0908     try:
0909         yy = int(yy)
0910         dd = int(dd)
0911         thh = int(thh)
0912         tmm = int(tmm)
0913         tss = int(tss)
0914     except ValueError:
0915         return None
0916     tzoffset = None
0917     tz = tz.upper()
0918     if tz in _timezones:
0919         tzoffset = _timezones[tz]
0920     else:
0921         try:
0922             tzoffset = int(tz)
0923         except ValueError:
0924             pass
0925     # Convert a timezone offset into seconds ; -0500 -> -18000
0926     if tzoffset:
0927         if tzoffset < 0:
0928             tzsign = -1
0929             tzoffset = -tzoffset
0930         else:
0931             tzsign = 1
0932         tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
0933     tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
0934     return tuple
0935 
0936 
0937 def parsedate(data):
0938     """Convert a time string to a time tuple."""
0939     t = parsedate_tz(data)
0940     if type(t) == type( () ):
0941         return t[:9]
0942     else: return t
0943 
0944 
0945 def mktime_tz(data):
0946     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
0947     if data[9] is None:
0948         # No zone info, so localtime is better assumption than GMT
0949         return time.mktime(data[:8] + (-1,))
0950     else:
0951         t = time.mktime(data[:8] + (0,))
0952         return t - data[9] - time.timezone
0953 
0954 def formatdate(timeval=None):
0955     """Returns time format preferred for Internet standards.
0956 
0957     Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
0958 
0959     According to RFC 1123, day and month names must always be in
0960     English.  If not for that, this code could use strftime().  It
0961     can't because strftime() honors the locale and could generated
0962     non-English names.
0963     """
0964     if timeval is None:
0965         timeval = time.time()
0966     timeval = time.gmtime(timeval)
0967     return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
0968             ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]],
0969             timeval[2],
0970             ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
0971              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1],
0972                                 timeval[0], timeval[3], timeval[4], timeval[5])
0973 
0974 
0975 # When used as script, run a small test program.
0976 # The first command line argument must be a filename containing one
0977 # message in RFC-822 format.
0978 
0979 if __name__ == '__main__':
0980     import sys, os
0981     file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
0982     if sys.argv[1:]: file = sys.argv[1]
0983     f = open(file, 'r')
0984     m = Message(f)
0985     print 'From:', m.getaddr('from')
0986     print 'To:', m.getaddrlist('to')
0987     print 'Subject:', m.getheader('subject')
0988     print 'Date:', m.getheader('date')
0989     date = m.getdate_tz('date')
0990     tz = date[-1]
0991     date = time.localtime(mktime_tz(date))
0992     if date:
0993         print 'ParsedDate:', time.asctime(date),
0994         hhmmss = tz
0995         hhmm, ss = divmod(hhmmss, 60)
0996         hh, mm = divmod(hhmm, 60)
0997         print "%+03d%02d" % (hh, mm),
0998         if ss: print ".%02d" % ss,
0999         print
1000     else:
1001         print 'ParsedDate:', None
1002     m.rewindbody()
1003     n = 0
1004     while f.readline():
1005         n = n + 1
1006     print 'Lines:', n
1007     print '-'*70
1008     print 'len =', len(m)
1009     if 'Date' in m: print 'Date =', m['Date']
1010     if 'X-Nonsense' in m: pass
1011     print 'keys =', m.keys()
1012     print 'values =', m.values()
1013     print 'items =', m.items()
1014 

Generated by PyXR 0.9.4
SourceForge.net Logo