PyXR

c:\python24\lib \ urllib.py



0001 """Open an arbitrary URL.
0002 
0003 See the following document for more info on URLs:
0004 "Names and Addresses, URIs, URLs, URNs, URCs", at
0005 http://www.w3.org/pub/WWW/Addressing/Overview.html
0006 
0007 See also the HTTP spec (from which the error codes are derived):
0008 "HTTP - Hypertext Transfer Protocol", at
0009 http://www.w3.org/pub/WWW/Protocols/
0010 
0011 Related standards and specs:
0012 - RFC1808: the "relative URL" spec. (authoritative status)
0013 - RFC1738 - the "URL standard". (authoritative status)
0014 - RFC1630 - the "URI spec". (informational status)
0015 
0016 The object returned by URLopener().open(file) will differ per
0017 protocol.  All you know is that is has methods read(), readline(),
0018 readlines(), fileno(), close() and info().  The read*(), fileno()
0019 and close() methods work like those of open files.
0020 The info() method returns a mimetools.Message object which can be
0021 used to query various info about the object, if available.
0022 (mimetools.Message objects are queried with the getheader() method.)
0023 """
0024 
0025 import string
0026 import socket
0027 import os
0028 import time
0029 import sys
0030 from urlparse import urljoin as basejoin
0031 
0032 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
0033            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
0034            "urlencode", "url2pathname", "pathname2url", "splittag",
0035            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
0036            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
0037            "splitnport", "splitquery", "splitattr", "splitvalue",
0038            "splitgophertype", "getproxies"]
0039 
0040 __version__ = '1.16'    # XXX This version is not always updated :-(
0041 
0042 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
0043 
0044 # Helper for non-unix systems
0045 if os.name == 'mac':
0046     from macurl2path import url2pathname, pathname2url
0047 elif os.name == 'nt':
0048     from nturl2path import url2pathname, pathname2url
0049 elif os.name == 'riscos':
0050     from rourl2path import url2pathname, pathname2url
0051 else:
0052     def url2pathname(pathname):
0053         return unquote(pathname)
0054     def pathname2url(pathname):
0055         return quote(pathname)
0056 
0057 # This really consists of two pieces:
0058 # (1) a class which handles opening of all sorts of URLs
0059 #     (plus assorted utilities etc.)
0060 # (2) a set of functions for parsing URLs
0061 # XXX Should these be separated out into different modules?
0062 
0063 
0064 # Shortcut for basic usage
0065 _urlopener = None
0066 def urlopen(url, data=None, proxies=None):
0067     """urlopen(url [, data]) -> open file-like object"""
0068     global _urlopener
0069     if proxies is not None:
0070         opener = FancyURLopener(proxies=proxies)
0071     elif not _urlopener:
0072         opener = FancyURLopener()
0073         _urlopener = opener
0074     else:
0075         opener = _urlopener
0076     if data is None:
0077         return opener.open(url)
0078     else:
0079         return opener.open(url, data)
0080 def urlretrieve(url, filename=None, reporthook=None, data=None):
0081     global _urlopener
0082     if not _urlopener:
0083         _urlopener = FancyURLopener()
0084     return _urlopener.retrieve(url, filename, reporthook, data)
0085 def urlcleanup():
0086     if _urlopener:
0087         _urlopener.cleanup()
0088 
0089 
0090 ftpcache = {}
0091 class URLopener:
0092     """Class to open URLs.
0093     This is a class rather than just a subroutine because we may need
0094     more than one set of global protocol-specific options.
0095     Note -- this is a base class for those who don't want the
0096     automatic handling of errors type 302 (relocated) and 401
0097     (authorization needed)."""
0098 
0099     __tempfiles = None
0100 
0101     version = "Python-urllib/%s" % __version__
0102 
0103     # Constructor
0104     def __init__(self, proxies=None, **x509):
0105         if proxies is None:
0106             proxies = getproxies()
0107         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
0108         self.proxies = proxies
0109         self.key_file = x509.get('key_file')
0110         self.cert_file = x509.get('cert_file')
0111         self.addheaders = [('User-agent', self.version)]
0112         self.__tempfiles = []
0113         self.__unlink = os.unlink # See cleanup()
0114         self.tempcache = None
0115         # Undocumented feature: if you assign {} to tempcache,
0116         # it is used to cache files retrieved with
0117         # self.retrieve().  This is not enabled by default
0118         # since it does not work for changing documents (and I
0119         # haven't got the logic to check expiration headers
0120         # yet).
0121         self.ftpcache = ftpcache
0122         # Undocumented feature: you can use a different
0123         # ftp cache by assigning to the .ftpcache member;
0124         # in case you want logically independent URL openers
0125         # XXX This is not threadsafe.  Bah.
0126 
0127     def __del__(self):
0128         self.close()
0129 
0130     def close(self):
0131         self.cleanup()
0132 
0133     def cleanup(self):
0134         # This code sometimes runs when the rest of this module
0135         # has already been deleted, so it can't use any globals
0136         # or import anything.
0137         if self.__tempfiles:
0138             for file in self.__tempfiles:
0139                 try:
0140                     self.__unlink(file)
0141                 except OSError:
0142                     pass
0143             del self.__tempfiles[:]
0144         if self.tempcache:
0145             self.tempcache.clear()
0146 
0147     def addheader(self, *args):
0148         """Add a header to be used by the HTTP interface only
0149         e.g. u.addheader('Accept', 'sound/basic')"""
0150         self.addheaders.append(args)
0151 
0152     # External interface
0153     def open(self, fullurl, data=None):
0154         """Use URLopener().open(file) instead of open(file, 'r')."""
0155         fullurl = unwrap(toBytes(fullurl))
0156         if self.tempcache and fullurl in self.tempcache:
0157             filename, headers = self.tempcache[fullurl]
0158             fp = open(filename, 'rb')
0159             return addinfourl(fp, headers, fullurl)
0160         urltype, url = splittype(fullurl)
0161         if not urltype:
0162             urltype = 'file'
0163         if urltype in self.proxies:
0164             proxy = self.proxies[urltype]
0165             urltype, proxyhost = splittype(proxy)
0166             host, selector = splithost(proxyhost)
0167             url = (host, fullurl) # Signal special case to open_*()
0168         else:
0169             proxy = None
0170         name = 'open_' + urltype
0171         self.type = urltype
0172         name = name.replace('-', '_')
0173         if not hasattr(self, name):
0174             if proxy:
0175                 return self.open_unknown_proxy(proxy, fullurl, data)
0176             else:
0177                 return self.open_unknown(fullurl, data)
0178         try:
0179             if data is None:
0180                 return getattr(self, name)(url)
0181             else:
0182                 return getattr(self, name)(url, data)
0183         except socket.error, msg:
0184             raise IOError, ('socket error', msg), sys.exc_info()[2]
0185 
0186     def open_unknown(self, fullurl, data=None):
0187         """Overridable interface to open unknown URL type."""
0188         type, url = splittype(fullurl)
0189         raise IOError, ('url error', 'unknown url type', type)
0190 
0191     def open_unknown_proxy(self, proxy, fullurl, data=None):
0192         """Overridable interface to open unknown URL type."""
0193         type, url = splittype(fullurl)
0194         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
0195 
0196     # External interface
0197     def retrieve(self, url, filename=None, reporthook=None, data=None):
0198         """retrieve(url) returns (filename, headers) for a local object
0199         or (tempfilename, headers) for a remote object."""
0200         url = unwrap(toBytes(url))
0201         if self.tempcache and url in self.tempcache:
0202             return self.tempcache[url]
0203         type, url1 = splittype(url)
0204         if filename is None and (not type or type == 'file'):
0205             try:
0206                 fp = self.open_local_file(url1)
0207                 hdrs = fp.info()
0208                 del fp
0209                 return url2pathname(splithost(url1)[1]), hdrs
0210             except IOError, msg:
0211                 pass
0212         fp = self.open(url, data)
0213         headers = fp.info()
0214         if filename:
0215             tfp = open(filename, 'wb')
0216         else:
0217             import tempfile
0218             garbage, path = splittype(url)
0219             garbage, path = splithost(path or "")
0220             path, garbage = splitquery(path or "")
0221             path, garbage = splitattr(path or "")
0222             suffix = os.path.splitext(path)[1]
0223             (fd, filename) = tempfile.mkstemp(suffix)
0224             self.__tempfiles.append(filename)
0225             tfp = os.fdopen(fd, 'wb')
0226         result = filename, headers
0227         if self.tempcache is not None:
0228             self.tempcache[url] = result
0229         bs = 1024*8
0230         size = -1
0231         blocknum = 1
0232         if reporthook:
0233             if "content-length" in headers:
0234                 size = int(headers["Content-Length"])
0235             reporthook(0, bs, size)
0236         block = fp.read(bs)
0237         if reporthook:
0238             reporthook(1, bs, size)
0239         while block:
0240             tfp.write(block)
0241             block = fp.read(bs)
0242             blocknum = blocknum + 1
0243             if reporthook:
0244                 reporthook(blocknum, bs, size)
0245         fp.close()
0246         tfp.close()
0247         del fp
0248         del tfp
0249         return result
0250 
0251     # Each method named open_<type> knows how to open that type of URL
0252 
0253     def open_http(self, url, data=None):
0254         """Use HTTP protocol."""
0255         import httplib
0256         user_passwd = None
0257         if isinstance(url, str):
0258             host, selector = splithost(url)
0259             if host:
0260                 user_passwd, host = splituser(host)
0261                 host = unquote(host)
0262             realhost = host
0263         else:
0264             host, selector = url
0265             urltype, rest = splittype(selector)
0266             url = rest
0267             user_passwd = None
0268             if urltype.lower() != 'http':
0269                 realhost = None
0270             else:
0271                 realhost, rest = splithost(rest)
0272                 if realhost:
0273                     user_passwd, realhost = splituser(realhost)
0274                 if user_passwd:
0275                     selector = "%s://%s%s" % (urltype, realhost, rest)
0276                 if proxy_bypass(realhost):
0277                     host = realhost
0278 
0279             #print "proxy via http:", host, selector
0280         if not host: raise IOError, ('http error', 'no host given')
0281         if user_passwd:
0282             import base64
0283             auth = base64.encodestring(user_passwd).strip()
0284         else:
0285             auth = None
0286         h = httplib.HTTP(host)
0287         if data is not None:
0288             h.putrequest('POST', selector)
0289             h.putheader('Content-type', 'application/x-www-form-urlencoded')
0290             h.putheader('Content-length', '%d' % len(data))
0291         else:
0292             h.putrequest('GET', selector)
0293         if auth: h.putheader('Authorization', 'Basic %s' % auth)
0294         if realhost: h.putheader('Host', realhost)
0295         for args in self.addheaders: h.putheader(*args)
0296         h.endheaders()
0297         if data is not None:
0298             h.send(data)
0299         errcode, errmsg, headers = h.getreply()
0300         fp = h.getfile()
0301         if errcode == 200:
0302             return addinfourl(fp, headers, "http:" + url)
0303         else:
0304             if data is None:
0305                 return self.http_error(url, fp, errcode, errmsg, headers)
0306             else:
0307                 return self.http_error(url, fp, errcode, errmsg, headers, data)
0308 
0309     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
0310         """Handle http errors.
0311         Derived class can override this, or provide specific handlers
0312         named http_error_DDD where DDD is the 3-digit error code."""
0313         # First check if there's a specific handler for this error
0314         name = 'http_error_%d' % errcode
0315         if hasattr(self, name):
0316             method = getattr(self, name)
0317             if data is None:
0318                 result = method(url, fp, errcode, errmsg, headers)
0319             else:
0320                 result = method(url, fp, errcode, errmsg, headers, data)
0321             if result: return result
0322         return self.http_error_default(url, fp, errcode, errmsg, headers)
0323 
0324     def http_error_default(self, url, fp, errcode, errmsg, headers):
0325         """Default error handler: close the connection and raise IOError."""
0326         void = fp.read()
0327         fp.close()
0328         raise IOError, ('http error', errcode, errmsg, headers)
0329 
0330     if hasattr(socket, "ssl"):
0331         def open_https(self, url, data=None):
0332             """Use HTTPS protocol."""
0333             import httplib
0334             user_passwd = None
0335             if isinstance(url, str):
0336                 host, selector = splithost(url)
0337                 if host:
0338                     user_passwd, host = splituser(host)
0339                     host = unquote(host)
0340                 realhost = host
0341             else:
0342                 host, selector = url
0343                 urltype, rest = splittype(selector)
0344                 url = rest
0345                 user_passwd = None
0346                 if urltype.lower() != 'https':
0347                     realhost = None
0348                 else:
0349                     realhost, rest = splithost(rest)
0350                     if realhost:
0351                         user_passwd, realhost = splituser(realhost)
0352                     if user_passwd:
0353                         selector = "%s://%s%s" % (urltype, realhost, rest)
0354                 #print "proxy via https:", host, selector
0355             if not host: raise IOError, ('https error', 'no host given')
0356             if user_passwd:
0357                 import base64
0358                 auth = base64.encodestring(user_passwd).strip()
0359             else:
0360                 auth = None
0361             h = httplib.HTTPS(host, 0,
0362                               key_file=self.key_file,
0363                               cert_file=self.cert_file)
0364             if data is not None:
0365                 h.putrequest('POST', selector)
0366                 h.putheader('Content-type',
0367                             'application/x-www-form-urlencoded')
0368                 h.putheader('Content-length', '%d' % len(data))
0369             else:
0370                 h.putrequest('GET', selector)
0371             if auth: h.putheader('Authorization', 'Basic %s' % auth)
0372             if realhost: h.putheader('Host', realhost)
0373             for args in self.addheaders: h.putheader(*args)
0374             h.endheaders()
0375             if data is not None:
0376                 h.send(data)
0377             errcode, errmsg, headers = h.getreply()
0378             fp = h.getfile()
0379             if errcode == 200:
0380                 return addinfourl(fp, headers, "https:" + url)
0381             else:
0382                 if data is None:
0383                     return self.http_error(url, fp, errcode, errmsg, headers)
0384                 else:
0385                     return self.http_error(url, fp, errcode, errmsg, headers,
0386                                            data)
0387 
0388     def open_gopher(self, url):
0389         """Use Gopher protocol."""
0390         import gopherlib
0391         host, selector = splithost(url)
0392         if not host: raise IOError, ('gopher error', 'no host given')
0393         host = unquote(host)
0394         type, selector = splitgophertype(selector)
0395         selector, query = splitquery(selector)
0396         selector = unquote(selector)
0397         if query:
0398             query = unquote(query)
0399             fp = gopherlib.send_query(selector, query, host)
0400         else:
0401             fp = gopherlib.send_selector(selector, host)
0402         return addinfourl(fp, noheaders(), "gopher:" + url)
0403 
0404     def open_file(self, url):
0405         """Use local file or FTP depending on form of URL."""
0406         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
0407             return self.open_ftp(url)
0408         else:
0409             return self.open_local_file(url)
0410 
0411     def open_local_file(self, url):
0412         """Use local file."""
0413         import mimetypes, mimetools, email.Utils, StringIO
0414         host, file = splithost(url)
0415         localname = url2pathname(file)
0416         try:
0417             stats = os.stat(localname)
0418         except OSError, e:
0419             raise IOError(e.errno, e.strerror, e.filename)
0420         size = stats.st_size
0421         modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
0422         mtype = mimetypes.guess_type(url)[0]
0423         headers = mimetools.Message(StringIO.StringIO(
0424             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
0425             (mtype or 'text/plain', size, modified)))
0426         if not host:
0427             urlfile = file
0428             if file[:1] == '/':
0429                 urlfile = 'file://' + file
0430             return addinfourl(open(localname, 'rb'),
0431                               headers, urlfile)
0432         host, port = splitport(host)
0433         if not port \
0434            and socket.gethostbyname(host) in (localhost(), thishost()):
0435             urlfile = file
0436             if file[:1] == '/':
0437                 urlfile = 'file://' + file
0438             return addinfourl(open(localname, 'rb'),
0439                               headers, urlfile)
0440         raise IOError, ('local file error', 'not on local host')
0441 
0442     def open_ftp(self, url):
0443         """Use FTP protocol."""
0444         import mimetypes, mimetools, StringIO
0445         host, path = splithost(url)
0446         if not host: raise IOError, ('ftp error', 'no host given')
0447         host, port = splitport(host)
0448         user, host = splituser(host)
0449         if user: user, passwd = splitpasswd(user)
0450         else: passwd = None
0451         host = unquote(host)
0452         user = unquote(user or '')
0453         passwd = unquote(passwd or '')
0454         host = socket.gethostbyname(host)
0455         if not port:
0456             import ftplib
0457             port = ftplib.FTP_PORT
0458         else:
0459             port = int(port)
0460         path, attrs = splitattr(path)
0461         path = unquote(path)
0462         dirs = path.split('/')
0463         dirs, file = dirs[:-1], dirs[-1]
0464         if dirs and not dirs[0]: dirs = dirs[1:]
0465         if dirs and not dirs[0]: dirs[0] = '/'
0466         key = user, host, port, '/'.join(dirs)
0467         # XXX thread unsafe!
0468         if len(self.ftpcache) > MAXFTPCACHE:
0469             # Prune the cache, rather arbitrarily
0470             for k in self.ftpcache.keys():
0471                 if k != key:
0472                     v = self.ftpcache[k]
0473                     del self.ftpcache[k]
0474                     v.close()
0475         try:
0476             if not key in self.ftpcache:
0477                 self.ftpcache[key] = \
0478                     ftpwrapper(user, passwd, host, port, dirs)
0479             if not file: type = 'D'
0480             else: type = 'I'
0481             for attr in attrs:
0482                 attr, value = splitvalue(attr)
0483                 if attr.lower() == 'type' and \
0484                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
0485                     type = value.upper()
0486             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
0487             mtype = mimetypes.guess_type("ftp:" + url)[0]
0488             headers = ""
0489             if mtype:
0490                 headers += "Content-Type: %s\n" % mtype
0491             if retrlen is not None and retrlen >= 0:
0492                 headers += "Content-Length: %d\n" % retrlen
0493             headers = mimetools.Message(StringIO.StringIO(headers))
0494             return addinfourl(fp, headers, "ftp:" + url)
0495         except ftperrors(), msg:
0496             raise IOError, ('ftp error', msg), sys.exc_info()[2]
0497 
0498     def open_data(self, url, data=None):
0499         """Use "data" URL."""
0500         # ignore POSTed data
0501         #
0502         # syntax of data URLs:
0503         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
0504         # mediatype := [ type "/" subtype ] *( ";" parameter )
0505         # data      := *urlchar
0506         # parameter := attribute "=" value
0507         import StringIO, mimetools
0508         try:
0509             [type, data] = url.split(',', 1)
0510         except ValueError:
0511             raise IOError, ('data error', 'bad data URL')
0512         if not type:
0513             type = 'text/plain;charset=US-ASCII'
0514         semi = type.rfind(';')
0515         if semi >= 0 and '=' not in type[semi:]:
0516             encoding = type[semi+1:]
0517             type = type[:semi]
0518         else:
0519             encoding = ''
0520         msg = []
0521         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
0522                                             time.gmtime(time.time())))
0523         msg.append('Content-type: %s' % type)
0524         if encoding == 'base64':
0525             import base64
0526             data = base64.decodestring(data)
0527         else:
0528             data = unquote(data)
0529         msg.append('Content-length: %d' % len(data))
0530         msg.append('')
0531         msg.append(data)
0532         msg = '\n'.join(msg)
0533         f = StringIO.StringIO(msg)
0534         headers = mimetools.Message(f, 0)
0535         f.fileno = None     # needed for addinfourl
0536         return addinfourl(f, headers, url)
0537 
0538 
0539 class FancyURLopener(URLopener):
0540     """Derived class with handlers for errors we can handle (perhaps)."""
0541 
0542     def __init__(self, *args, **kwargs):
0543         URLopener.__init__(self, *args, **kwargs)
0544         self.auth_cache = {}
0545         self.tries = 0
0546         self.maxtries = 10
0547 
0548     def http_error_default(self, url, fp, errcode, errmsg, headers):
0549         """Default error handling -- don't raise an exception."""
0550         return addinfourl(fp, headers, "http:" + url)
0551 
0552     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
0553         """Error 302 -- relocated (temporarily)."""
0554         self.tries += 1
0555         if self.maxtries and self.tries >= self.maxtries:
0556             if hasattr(self, "http_error_500"):
0557                 meth = self.http_error_500
0558             else:
0559                 meth = self.http_error_default
0560             self.tries = 0
0561             return meth(url, fp, 500,
0562                         "Internal Server Error: Redirect Recursion", headers)
0563         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
0564                                         data)
0565         self.tries = 0
0566         return result
0567 
0568     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
0569         if 'location' in headers:
0570             newurl = headers['location']
0571         elif 'uri' in headers:
0572             newurl = headers['uri']
0573         else:
0574             return
0575         void = fp.read()
0576         fp.close()
0577         # In case the server sent a relative URL, join with original:
0578         newurl = basejoin(self.type + ":" + url, newurl)
0579         return self.open(newurl)
0580 
0581     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
0582         """Error 301 -- also relocated (permanently)."""
0583         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
0584 
0585     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
0586         """Error 303 -- also relocated (essentially identical to 302)."""
0587         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
0588 
0589     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
0590         """Error 307 -- relocated, but turn POST into error."""
0591         if data is None:
0592             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
0593         else:
0594             return self.http_error_default(url, fp, errcode, errmsg, headers)
0595 
0596     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
0597         """Error 401 -- authentication required.
0598         See this URL for a description of the basic authentication scheme:
0599         http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
0600         if not 'www-authenticate' in headers:
0601             URLopener.http_error_default(self, url, fp,
0602                                          errcode, errmsg, headers)
0603         stuff = headers['www-authenticate']
0604         import re
0605         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
0606         if not match:
0607             URLopener.http_error_default(self, url, fp,
0608                                          errcode, errmsg, headers)
0609         scheme, realm = match.groups()
0610         if scheme.lower() != 'basic':
0611             URLopener.http_error_default(self, url, fp,
0612                                          errcode, errmsg, headers)
0613         name = 'retry_' + self.type + '_basic_auth'
0614         if data is None:
0615             return getattr(self,name)(url, realm)
0616         else:
0617             return getattr(self,name)(url, realm, data)
0618 
0619     def retry_http_basic_auth(self, url, realm, data=None):
0620         host, selector = splithost(url)
0621         i = host.find('@') + 1
0622         host = host[i:]
0623         user, passwd = self.get_user_passwd(host, realm, i)
0624         if not (user or passwd): return None
0625         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
0626         newurl = 'http://' + host + selector
0627         if data is None:
0628             return self.open(newurl)
0629         else:
0630             return self.open(newurl, data)
0631 
0632     def retry_https_basic_auth(self, url, realm, data=None):
0633         host, selector = splithost(url)
0634         i = host.find('@') + 1
0635         host = host[i:]
0636         user, passwd = self.get_user_passwd(host, realm, i)
0637         if not (user or passwd): return None
0638         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
0639         newurl = '//' + host + selector
0640         return self.open_https(newurl, data)
0641 
0642     def get_user_passwd(self, host, realm, clear_cache = 0):
0643         key = realm + '@' + host.lower()
0644         if key in self.auth_cache:
0645             if clear_cache:
0646                 del self.auth_cache[key]
0647             else:
0648                 return self.auth_cache[key]
0649         user, passwd = self.prompt_user_passwd(host, realm)
0650         if user or passwd: self.auth_cache[key] = (user, passwd)
0651         return user, passwd
0652 
0653     def prompt_user_passwd(self, host, realm):
0654         """Override this in a GUI environment!"""
0655         import getpass
0656         try:
0657             user = raw_input("Enter username for %s at %s: " % (realm,
0658                                                                 host))
0659             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
0660                 (user, realm, host))
0661             return user, passwd
0662         except KeyboardInterrupt:
0663             print
0664             return None, None
0665 
0666 
0667 # Utility functions
0668 
0669 _localhost = None
0670 def localhost():
0671     """Return the IP address of the magic hostname 'localhost'."""
0672     global _localhost
0673     if _localhost is None:
0674         _localhost = socket.gethostbyname('localhost')
0675     return _localhost
0676 
0677 _thishost = None
0678 def thishost():
0679     """Return the IP address of the current host."""
0680     global _thishost
0681     if _thishost is None:
0682         _thishost = socket.gethostbyname(socket.gethostname())
0683     return _thishost
0684 
0685 _ftperrors = None
0686 def ftperrors():
0687     """Return the set of errors raised by the FTP class."""
0688     global _ftperrors
0689     if _ftperrors is None:
0690         import ftplib
0691         _ftperrors = ftplib.all_errors
0692     return _ftperrors
0693 
0694 _noheaders = None
0695 def noheaders():
0696     """Return an empty mimetools.Message object."""
0697     global _noheaders
0698     if _noheaders is None:
0699         import mimetools
0700         import StringIO
0701         _noheaders = mimetools.Message(StringIO.StringIO(), 0)
0702         _noheaders.fp.close()   # Recycle file descriptor
0703     return _noheaders
0704 
0705 
0706 # Utility classes
0707 
0708 class ftpwrapper:
0709     """Class used by open_ftp() for cache of open FTP connections."""
0710 
0711     def __init__(self, user, passwd, host, port, dirs):
0712         self.user = user
0713         self.passwd = passwd
0714         self.host = host
0715         self.port = port
0716         self.dirs = dirs
0717         self.init()
0718 
0719     def init(self):
0720         import ftplib
0721         self.busy = 0
0722         self.ftp = ftplib.FTP()
0723         self.ftp.connect(self.host, self.port)
0724         self.ftp.login(self.user, self.passwd)
0725         for dir in self.dirs:
0726             self.ftp.cwd(dir)
0727 
0728     def retrfile(self, file, type):
0729         import ftplib
0730         self.endtransfer()
0731         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
0732         else: cmd = 'TYPE ' + type; isdir = 0
0733         try:
0734             self.ftp.voidcmd(cmd)
0735         except ftplib.all_errors:
0736             self.init()
0737             self.ftp.voidcmd(cmd)
0738         conn = None
0739         if file and not isdir:
0740             # Use nlst to see if the file exists at all
0741             try:
0742                 self.ftp.nlst(file)
0743             except ftplib.error_perm, reason:
0744                 raise IOError, ('ftp error', reason), sys.exc_info()[2]
0745             # Restore the transfer mode!
0746             self.ftp.voidcmd(cmd)
0747             # Try to retrieve as a file
0748             try:
0749                 cmd = 'RETR ' + file
0750                 conn = self.ftp.ntransfercmd(cmd)
0751             except ftplib.error_perm, reason:
0752                 if str(reason)[:3] != '550':
0753                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
0754         if not conn:
0755             # Set transfer mode to ASCII!
0756             self.ftp.voidcmd('TYPE A')
0757             # Try a directory listing
0758             if file: cmd = 'LIST ' + file
0759             else: cmd = 'LIST'
0760             conn = self.ftp.ntransfercmd(cmd)
0761         self.busy = 1
0762         # Pass back both a suitably decorated object and a retrieval length
0763         return (addclosehook(conn[0].makefile('rb'),
0764                              self.endtransfer), conn[1])
0765     def endtransfer(self):
0766         if not self.busy:
0767             return
0768         self.busy = 0
0769         try:
0770             self.ftp.voidresp()
0771         except ftperrors():
0772             pass
0773 
0774     def close(self):
0775         self.endtransfer()
0776         try:
0777             self.ftp.close()
0778         except ftperrors():
0779             pass
0780 
0781 class addbase:
0782     """Base class for addinfo and addclosehook."""
0783 
0784     def __init__(self, fp):
0785         self.fp = fp
0786         self.read = self.fp.read
0787         self.readline = self.fp.readline
0788         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
0789         if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
0790         if hasattr(self.fp, "__iter__"):
0791             self.__iter__ = self.fp.__iter__
0792             if hasattr(self.fp, "next"):
0793                 self.next = self.fp.next
0794 
0795     def __repr__(self):
0796         return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
0797                                              id(self), self.fp)
0798 
0799     def close(self):
0800         self.read = None
0801         self.readline = None
0802         self.readlines = None
0803         self.fileno = None
0804         if self.fp: self.fp.close()
0805         self.fp = None
0806 
0807 class addclosehook(addbase):
0808     """Class to add a close hook to an open file."""
0809 
0810     def __init__(self, fp, closehook, *hookargs):
0811         addbase.__init__(self, fp)
0812         self.closehook = closehook
0813         self.hookargs = hookargs
0814 
0815     def close(self):
0816         addbase.close(self)
0817         if self.closehook:
0818             self.closehook(*self.hookargs)
0819             self.closehook = None
0820             self.hookargs = None
0821 
0822 class addinfo(addbase):
0823     """class to add an info() method to an open file."""
0824 
0825     def __init__(self, fp, headers):
0826         addbase.__init__(self, fp)
0827         self.headers = headers
0828 
0829     def info(self):
0830         return self.headers
0831 
0832 class addinfourl(addbase):
0833     """class to add info() and geturl() methods to an open file."""
0834 
0835     def __init__(self, fp, headers, url):
0836         addbase.__init__(self, fp)
0837         self.headers = headers
0838         self.url = url
0839 
0840     def info(self):
0841         return self.headers
0842 
0843     def geturl(self):
0844         return self.url
0845 
0846 
0847 # Utilities to parse URLs (most of these return None for missing parts):
0848 # unwrap('<URL:type://host/path>') --> 'type://host/path'
0849 # splittype('type:opaquestring') --> 'type', 'opaquestring'
0850 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
0851 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
0852 # splitpasswd('user:passwd') -> 'user', 'passwd'
0853 # splitport('host:port') --> 'host', 'port'
0854 # splitquery('/path?query') --> '/path', 'query'
0855 # splittag('/path#tag') --> '/path', 'tag'
0856 # splitattr('/path;attr1=value1;attr2=value2;...') ->
0857 #   '/path', ['attr1=value1', 'attr2=value2', ...]
0858 # splitvalue('attr=value') --> 'attr', 'value'
0859 # splitgophertype('/Xselector') --> 'X', 'selector'
0860 # unquote('abc%20def') -> 'abc def'
0861 # quote('abc def') -> 'abc%20def')
0862 
0863 try:
0864     unicode
0865 except NameError:
0866     def _is_unicode(x):
0867         return 0
0868 else:
0869     def _is_unicode(x):
0870         return isinstance(x, unicode)
0871 
0872 def toBytes(url):
0873     """toBytes(u"URL") --> 'URL'."""
0874     # Most URL schemes require ASCII. If that changes, the conversion
0875     # can be relaxed
0876     if _is_unicode(url):
0877         try:
0878             url = url.encode("ASCII")
0879         except UnicodeError:
0880             raise UnicodeError("URL " + repr(url) +
0881                                " contains non-ASCII characters")
0882     return url
0883 
0884 def unwrap(url):
0885     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
0886     url = url.strip()
0887     if url[:1] == '<' and url[-1:] == '>':
0888         url = url[1:-1].strip()
0889     if url[:4] == 'URL:': url = url[4:].strip()
0890     return url
0891 
0892 _typeprog = None
0893 def splittype(url):
0894     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
0895     global _typeprog
0896     if _typeprog is None:
0897         import re
0898         _typeprog = re.compile('^([^/:]+):')
0899 
0900     match = _typeprog.match(url)
0901     if match:
0902         scheme = match.group(1)
0903         return scheme.lower(), url[len(scheme) + 1:]
0904     return None, url
0905 
0906 _hostprog = None
0907 def splithost(url):
0908     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
0909     global _hostprog
0910     if _hostprog is None:
0911         import re
0912         _hostprog = re.compile('^//([^/]*)(.*)$')
0913 
0914     match = _hostprog.match(url)
0915     if match: return match.group(1, 2)
0916     return None, url
0917 
0918 _userprog = None
0919 def splituser(host):
0920     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
0921     global _userprog
0922     if _userprog is None:
0923         import re
0924         _userprog = re.compile('^(.*)@(.*)$')
0925 
0926     match = _userprog.match(host)
0927     if match: return map(unquote, match.group(1, 2))
0928     return None, host
0929 
0930 _passwdprog = None
0931 def splitpasswd(user):
0932     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
0933     global _passwdprog
0934     if _passwdprog is None:
0935         import re
0936         _passwdprog = re.compile('^([^:]*):(.*)$')
0937 
0938     match = _passwdprog.match(user)
0939     if match: return match.group(1, 2)
0940     return user, None
0941 
0942 # splittag('/path#tag') --> '/path', 'tag'
0943 _portprog = None
0944 def splitport(host):
0945     """splitport('host:port') --> 'host', 'port'."""
0946     global _portprog
0947     if _portprog is None:
0948         import re
0949         _portprog = re.compile('^(.*):([0-9]+)$')
0950 
0951     match = _portprog.match(host)
0952     if match: return match.group(1, 2)
0953     return host, None
0954 
0955 _nportprog = None
0956 def splitnport(host, defport=-1):
0957     """Split host and port, returning numeric port.
0958     Return given default port if no ':' found; defaults to -1.
0959     Return numerical port if a valid number are found after ':'.
0960     Return None if ':' but not a valid number."""
0961     global _nportprog
0962     if _nportprog is None:
0963         import re
0964         _nportprog = re.compile('^(.*):(.*)$')
0965 
0966     match = _nportprog.match(host)
0967     if match:
0968         host, port = match.group(1, 2)
0969         try:
0970             if not port: raise ValueError, "no digits"
0971             nport = int(port)
0972         except ValueError:
0973             nport = None
0974         return host, nport
0975     return host, defport
0976 
0977 _queryprog = None
0978 def splitquery(url):
0979     """splitquery('/path?query') --> '/path', 'query'."""
0980     global _queryprog
0981     if _queryprog is None:
0982         import re
0983         _queryprog = re.compile('^(.*)\?([^?]*)$')
0984 
0985     match = _queryprog.match(url)
0986     if match: return match.group(1, 2)
0987     return url, None
0988 
0989 _tagprog = None
0990 def splittag(url):
0991     """splittag('/path#tag') --> '/path', 'tag'."""
0992     global _tagprog
0993     if _tagprog is None:
0994         import re
0995         _tagprog = re.compile('^(.*)#([^#]*)$')
0996 
0997     match = _tagprog.match(url)
0998     if match: return match.group(1, 2)
0999     return url, None
1000 
1001 def splitattr(url):
1002     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1003         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1004     words = url.split(';')
1005     return words[0], words[1:]
1006 
1007 _valueprog = None
1008 def splitvalue(attr):
1009     """splitvalue('attr=value') --> 'attr', 'value'."""
1010     global _valueprog
1011     if _valueprog is None:
1012         import re
1013         _valueprog = re.compile('^([^=]*)=(.*)$')
1014 
1015     match = _valueprog.match(attr)
1016     if match: return match.group(1, 2)
1017     return attr, None
1018 
1019 def splitgophertype(selector):
1020     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1021     if selector[:1] == '/' and selector[1:2]:
1022         return selector[1], selector[2:]
1023     return None, selector
1024 
1025 def unquote(s):
1026     """unquote('abc%20def') -> 'abc def'."""
1027     mychr = chr
1028     myatoi = int
1029     list = s.split('%')
1030     res = [list[0]]
1031     myappend = res.append
1032     del list[0]
1033     for item in list:
1034         if item[1:2]:
1035             try:
1036                 myappend(mychr(myatoi(item[:2], 16))
1037                      + item[2:])
1038             except ValueError:
1039                 myappend('%' + item)
1040         else:
1041             myappend('%' + item)
1042     return "".join(res)
1043 
1044 def unquote_plus(s):
1045     """unquote('%7e/abc+def') -> '~/abc def'"""
1046     s = s.replace('+', ' ')
1047     return unquote(s)
1048 
1049 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1050                'abcdefghijklmnopqrstuvwxyz'
1051                '0123456789' '_.-')
1052 
1053 _fast_safe_test = always_safe + '/'
1054 _fast_safe = None
1055 
1056 def _fast_quote(s):
1057     global _fast_safe
1058     if _fast_safe is None:
1059         _fast_safe = {}
1060         for c in _fast_safe_test:
1061             _fast_safe[c] = c
1062     res = list(s)
1063     for i in range(len(res)):
1064         c = res[i]
1065         if not c in _fast_safe:
1066             res[i] = '%%%02X' % ord(c)
1067     return ''.join(res)
1068 
1069 def quote(s, safe = '/'):
1070     """quote('abc def') -> 'abc%20def'
1071 
1072     Each part of a URL, e.g. the path info, the query, etc., has a
1073     different set of reserved characters that must be quoted.
1074 
1075     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1076     the following reserved characters.
1077 
1078     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1079                   "$" | ","
1080 
1081     Each of these characters is reserved in some component of a URL,
1082     but not necessarily in all of them.
1083 
1084     By default, the quote function is intended for quoting the path
1085     section of a URL.  Thus, it will not encode '/'.  This character
1086     is reserved, but in typical usage the quote function is being
1087     called on a path where the existing slash characters are used as
1088     reserved characters.
1089     """
1090     safe = always_safe + safe
1091     if _fast_safe_test == safe:
1092         return _fast_quote(s)
1093     res = list(s)
1094     for i in range(len(res)):
1095         c = res[i]
1096         if c not in safe:
1097             res[i] = '%%%02X' % ord(c)
1098     return ''.join(res)
1099 
1100 def quote_plus(s, safe = ''):
1101     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1102     if ' ' in s:
1103         l = s.split(' ')
1104         for i in range(len(l)):
1105             l[i] = quote(l[i], safe)
1106         return '+'.join(l)
1107     else:
1108         return quote(s, safe)
1109 
1110 def urlencode(query,doseq=0):
1111     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1112 
1113     If any values in the query arg are sequences and doseq is true, each
1114     sequence element is converted to a separate parameter.
1115 
1116     If the query arg is a sequence of two-element tuples, the order of the
1117     parameters in the output will match the order of parameters in the
1118     input.
1119     """
1120 
1121     if hasattr(query,"items"):
1122         # mapping objects
1123         query = query.items()
1124     else:
1125         # it's a bother at times that strings and string-like objects are
1126         # sequences...
1127         try:
1128             # non-sequence items should not work with len()
1129             # non-empty strings will fail this
1130             if len(query) and not isinstance(query[0], tuple):
1131                 raise TypeError
1132             # zero-length sequences of all types will get here and succeed,
1133             # but that's a minor nit - since the original implementation
1134             # allowed empty dicts that type of behavior probably should be
1135             # preserved for consistency
1136         except TypeError:
1137             ty,va,tb = sys.exc_info()
1138             raise TypeError, "not a valid non-string sequence or mapping object", tb
1139 
1140     l = []
1141     if not doseq:
1142         # preserve old behavior
1143         for k, v in query:
1144             k = quote_plus(str(k))
1145             v = quote_plus(str(v))
1146             l.append(k + '=' + v)
1147     else:
1148         for k, v in query:
1149             k = quote_plus(str(k))
1150             if isinstance(v, str):
1151                 v = quote_plus(v)
1152                 l.append(k + '=' + v)
1153             elif _is_unicode(v):
1154                 # is there a reasonable way to convert to ASCII?
1155                 # encode generates a string, but "replace" or "ignore"
1156                 # lose information and "strict" can raise UnicodeError
1157                 v = quote_plus(v.encode("ASCII","replace"))
1158                 l.append(k + '=' + v)
1159             else:
1160                 try:
1161                     # is this a sufficient test for sequence-ness?
1162                     x = len(v)
1163                 except TypeError:
1164                     # not a sequence
1165                     v = quote_plus(str(v))
1166                     l.append(k + '=' + v)
1167                 else:
1168                     # loop over the sequence
1169                     for elt in v:
1170                         l.append(k + '=' + quote_plus(str(elt)))
1171     return '&'.join(l)
1172 
1173 # Proxy handling
1174 def getproxies_environment():
1175     """Return a dictionary of scheme -> proxy server URL mappings.
1176 
1177     Scan the environment for variables named <scheme>_proxy;
1178     this seems to be the standard convention.  If you need a
1179     different way, you can pass a proxies dictionary to the
1180     [Fancy]URLopener constructor.
1181 
1182     """
1183     proxies = {}
1184     for name, value in os.environ.items():
1185         name = name.lower()
1186         if value and name[-6:] == '_proxy':
1187             proxies[name[:-6]] = value
1188     return proxies
1189 
1190 if sys.platform == 'darwin':
1191     def getproxies_internetconfig():
1192         """Return a dictionary of scheme -> proxy server URL mappings.
1193 
1194         By convention the mac uses Internet Config to store
1195         proxies.  An HTTP proxy, for instance, is stored under
1196         the HttpProxy key.
1197 
1198         """
1199         try:
1200             import ic
1201         except ImportError:
1202             return {}
1203 
1204         try:
1205             config = ic.IC()
1206         except ic.error:
1207             return {}
1208         proxies = {}
1209         # HTTP:
1210         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1211             try:
1212                 value = config['HTTPProxyHost']
1213             except ic.error:
1214                 pass
1215             else:
1216                 proxies['http'] = 'http://%s' % value
1217         # FTP: XXXX To be done.
1218         # Gopher: XXXX To be done.
1219         return proxies
1220 
1221     def proxy_bypass(x):
1222         return 0
1223 
1224     def getproxies():
1225         return getproxies_environment() or getproxies_internetconfig()
1226 
1227 elif os.name == 'nt':
1228     def getproxies_registry():
1229         """Return a dictionary of scheme -> proxy server URL mappings.
1230 
1231         Win32 uses the registry to store proxies.
1232 
1233         """
1234         proxies = {}
1235         try:
1236             import _winreg
1237         except ImportError:
1238             # Std module, so should be around - but you never know!
1239             return proxies
1240         try:
1241             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1242                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1243             proxyEnable = _winreg.QueryValueEx(internetSettings,
1244                                                'ProxyEnable')[0]
1245             if proxyEnable:
1246                 # Returned as Unicode but problems if not converted to ASCII
1247                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1248                                                        'ProxyServer')[0])
1249                 if '=' in proxyServer:
1250                     # Per-protocol settings
1251                     for p in proxyServer.split(';'):
1252                         protocol, address = p.split('=', 1)
1253                         # See if address has a type:// prefix
1254                         import re
1255                         if not re.match('^([^/:]+)://', address):
1256                             address = '%s://%s' % (protocol, address)
1257                         proxies[protocol] = address
1258                 else:
1259                     # Use one setting for all protocols
1260                     if proxyServer[:5] == 'http:':
1261                         proxies['http'] = proxyServer
1262                     else:
1263                         proxies['http'] = 'http://%s' % proxyServer
1264                         proxies['ftp'] = 'ftp://%s' % proxyServer
1265             internetSettings.Close()
1266         except (WindowsError, ValueError, TypeError):
1267             # Either registry key not found etc, or the value in an
1268             # unexpected format.
1269             # proxies already set up to be empty so nothing to do
1270             pass
1271         return proxies
1272 
1273     def getproxies():
1274         """Return a dictionary of scheme -> proxy server URL mappings.
1275 
1276         Returns settings gathered from the environment, if specified,
1277         or the registry.
1278 
1279         """
1280         return getproxies_environment() or getproxies_registry()
1281 
1282     def proxy_bypass(host):
1283         try:
1284             import _winreg
1285             import re
1286         except ImportError:
1287             # Std modules, so should be around - but you never know!
1288             return 0
1289         try:
1290             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1291                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1292             proxyEnable = _winreg.QueryValueEx(internetSettings,
1293                                                'ProxyEnable')[0]
1294             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1295                                                      'ProxyOverride')[0])
1296             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1297         except WindowsError:
1298             return 0
1299         if not proxyEnable or not proxyOverride:
1300             return 0
1301         # try to make a host list from name and IP address.
1302         host = [host]
1303         try:
1304             addr = socket.gethostbyname(host[0])
1305             if addr != host:
1306                 host.append(addr)
1307         except socket.error:
1308             pass
1309         # make a check value list from the registry entry: replace the
1310         # '<local>' string by the localhost entry and the corresponding
1311         # canonical entry.
1312         proxyOverride = proxyOverride.split(';')
1313         i = 0
1314         while i < len(proxyOverride):
1315             if proxyOverride[i] == '<local>':
1316                 proxyOverride[i:i+1] = ['localhost',
1317                                         '127.0.0.1',
1318                                         socket.gethostname(),
1319                                         socket.gethostbyname(
1320                                             socket.gethostname())]
1321             i += 1
1322         # print proxyOverride
1323         # now check if we match one of the registry values.
1324         for test in proxyOverride:
1325             test = test.replace(".", r"\.")     # mask dots
1326             test = test.replace("*", r".*")     # change glob sequence
1327             test = test.replace("?", r".")      # change glob char
1328             for val in host:
1329                 # print "%s <--> %s" %( test, val )
1330                 if re.match(test, val, re.I):
1331                     return 1
1332         return 0
1333 
1334 else:
1335     # By default use environment variables
1336     getproxies = getproxies_environment
1337 
1338     def proxy_bypass(host):
1339         return 0
1340 
1341 # Test and time quote() and unquote()
1342 def test1():
1343     s = ''
1344     for i in range(256): s = s + chr(i)
1345     s = s*4
1346     t0 = time.time()
1347     qs = quote(s)
1348     uqs = unquote(qs)
1349     t1 = time.time()
1350     if uqs != s:
1351         print 'Wrong!'
1352     print repr(s)
1353     print repr(qs)
1354     print repr(uqs)
1355     print round(t1 - t0, 3), 'sec'
1356 
1357 
1358 def reporthook(blocknum, blocksize, totalsize):
1359     # Report during remote transfers
1360     print "Block number: %d, Block size: %d, Total size: %d" % (
1361         blocknum, blocksize, totalsize)
1362 
1363 # Test program
1364 def test(args=[]):
1365     if not args:
1366         args = [
1367             '/etc/passwd',
1368             'file:/etc/passwd',
1369             'file://localhost/etc/passwd',
1370             'ftp://ftp.python.org/pub/python/README',
1371 ##          'gopher://gopher.micro.umn.edu/1/',
1372             'http://www.python.org/index.html',
1373             ]
1374         if hasattr(URLopener, "open_https"):
1375             args.append('https://synergy.as.cmu.edu/~geek/')
1376     try:
1377         for url in args:
1378             print '-'*10, url, '-'*10
1379             fn, h = urlretrieve(url, None, reporthook)
1380             print fn
1381             if h:
1382                 print '======'
1383                 for k in h.keys(): print k + ':', h[k]
1384                 print '======'
1385             fp = open(fn, 'rb')
1386             data = fp.read()
1387             del fp
1388             if '\r' in data:
1389                 table = string.maketrans("", "")
1390                 data = data.translate(table, "\r")
1391             print data
1392             fn, h = None, None
1393         print '-'*40
1394     finally:
1395         urlcleanup()
1396 
1397 def main():
1398     import getopt, sys
1399     try:
1400         opts, args = getopt.getopt(sys.argv[1:], "th")
1401     except getopt.error, msg:
1402         print msg
1403         print "Use -h for help"
1404         return
1405     t = 0
1406     for o, a in opts:
1407         if o == '-t':
1408             t = t + 1
1409         if o == '-h':
1410             print "Usage: python urllib.py [-t] [url ...]"
1411             print "-t runs self-test;",
1412             print "otherwise, contents of urls are printed"
1413             return
1414     if t:
1415         if t > 1:
1416             test1()
1417         test(args)
1418     else:
1419         if not args:
1420             print "Use -h for help"
1421         for url in args:
1422             print urlopen(url).read(),
1423 
1424 # Run test program when run as a script
1425 if __name__ == '__main__':
1426     main()
1427 

Generated by PyXR 0.9.4
SourceForge.net Logo