0001 """Open an arbitrary URL. 0002 0003 See the following document for more info on URLs: 0004 "Names and Addresses, URIs, URLs, URNs, URCs", at 0005 http://www.w3.org/pub/WWW/Addressing/Overview.html 0006 0007 See also the HTTP spec (from which the error codes are derived): 0008 "HTTP - Hypertext Transfer Protocol", at 0009 http://www.w3.org/pub/WWW/Protocols/ 0010 0011 Related standards and specs: 0012 - RFC1808: the "relative URL" spec. (authoritative status) 0013 - RFC1738 - the "URL standard". (authoritative status) 0014 - RFC1630 - the "URI spec". (informational status) 0015 0016 The object returned by URLopener().open(file) will differ per 0017 protocol. All you know is that is has methods read(), readline(), 0018 readlines(), fileno(), close() and info(). The read*(), fileno() 0019 and close() methods work like those of open files. 0020 The info() method returns a mimetools.Message object which can be 0021 used to query various info about the object, if available. 0022 (mimetools.Message objects are queried with the getheader() method.) 0023 """ 0024 0025 import string 0026 import socket 0027 import os 0028 import time 0029 import sys 0030 from urlparse import urljoin as basejoin 0031 0032 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 0033 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 0034 "urlencode", "url2pathname", "pathname2url", "splittag", 0035 "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 0036 "splittype", "splithost", "splituser", "splitpasswd", "splitport", 0037 "splitnport", "splitquery", "splitattr", "splitvalue", 0038 "splitgophertype", "getproxies"] 0039 0040 __version__ = '1.16' # XXX This version is not always updated :-( 0041 0042 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size 0043 0044 # Helper for non-unix systems 0045 if os.name == 'mac': 0046 from macurl2path import url2pathname, pathname2url 0047 elif os.name == 'nt': 0048 from nturl2path import url2pathname, pathname2url 0049 elif os.name == 'riscos': 0050 from rourl2path import url2pathname, pathname2url 0051 else: 0052 def url2pathname(pathname): 0053 return unquote(pathname) 0054 def pathname2url(pathname): 0055 return quote(pathname) 0056 0057 # This really consists of two pieces: 0058 # (1) a class which handles opening of all sorts of URLs 0059 # (plus assorted utilities etc.) 0060 # (2) a set of functions for parsing URLs 0061 # XXX Should these be separated out into different modules? 0062 0063 0064 # Shortcut for basic usage 0065 _urlopener = None 0066 def urlopen(url, data=None, proxies=None): 0067 """urlopen(url [, data]) -> open file-like object""" 0068 global _urlopener 0069 if proxies is not None: 0070 opener = FancyURLopener(proxies=proxies) 0071 elif not _urlopener: 0072 opener = FancyURLopener() 0073 _urlopener = opener 0074 else: 0075 opener = _urlopener 0076 if data is None: 0077 return opener.open(url) 0078 else: 0079 return opener.open(url, data) 0080 def urlretrieve(url, filename=None, reporthook=None, data=None): 0081 global _urlopener 0082 if not _urlopener: 0083 _urlopener = FancyURLopener() 0084 return _urlopener.retrieve(url, filename, reporthook, data) 0085 def urlcleanup(): 0086 if _urlopener: 0087 _urlopener.cleanup() 0088 0089 0090 ftpcache = {} 0091 class URLopener: 0092 """Class to open URLs. 0093 This is a class rather than just a subroutine because we may need 0094 more than one set of global protocol-specific options. 0095 Note -- this is a base class for those who don't want the 0096 automatic handling of errors type 302 (relocated) and 401 0097 (authorization needed).""" 0098 0099 __tempfiles = None 0100 0101 version = "Python-urllib/%s" % __version__ 0102 0103 # Constructor 0104 def __init__(self, proxies=None, **x509): 0105 if proxies is None: 0106 proxies = getproxies() 0107 assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 0108 self.proxies = proxies 0109 self.key_file = x509.get('key_file') 0110 self.cert_file = x509.get('cert_file') 0111 self.addheaders = [('User-agent', self.version)] 0112 self.__tempfiles = [] 0113 self.__unlink = os.unlink # See cleanup() 0114 self.tempcache = None 0115 # Undocumented feature: if you assign {} to tempcache, 0116 # it is used to cache files retrieved with 0117 # self.retrieve(). This is not enabled by default 0118 # since it does not work for changing documents (and I 0119 # haven't got the logic to check expiration headers 0120 # yet). 0121 self.ftpcache = ftpcache 0122 # Undocumented feature: you can use a different 0123 # ftp cache by assigning to the .ftpcache member; 0124 # in case you want logically independent URL openers 0125 # XXX This is not threadsafe. Bah. 0126 0127 def __del__(self): 0128 self.close() 0129 0130 def close(self): 0131 self.cleanup() 0132 0133 def cleanup(self): 0134 # This code sometimes runs when the rest of this module 0135 # has already been deleted, so it can't use any globals 0136 # or import anything. 0137 if self.__tempfiles: 0138 for file in self.__tempfiles: 0139 try: 0140 self.__unlink(file) 0141 except OSError: 0142 pass 0143 del self.__tempfiles[:] 0144 if self.tempcache: 0145 self.tempcache.clear() 0146 0147 def addheader(self, *args): 0148 """Add a header to be used by the HTTP interface only 0149 e.g. u.addheader('Accept', 'sound/basic')""" 0150 self.addheaders.append(args) 0151 0152 # External interface 0153 def open(self, fullurl, data=None): 0154 """Use URLopener().open(file) instead of open(file, 'r').""" 0155 fullurl = unwrap(toBytes(fullurl)) 0156 if self.tempcache and fullurl in self.tempcache: 0157 filename, headers = self.tempcache[fullurl] 0158 fp = open(filename, 'rb') 0159 return addinfourl(fp, headers, fullurl) 0160 urltype, url = splittype(fullurl) 0161 if not urltype: 0162 urltype = 'file' 0163 if urltype in self.proxies: 0164 proxy = self.proxies[urltype] 0165 urltype, proxyhost = splittype(proxy) 0166 host, selector = splithost(proxyhost) 0167 url = (host, fullurl) # Signal special case to open_*() 0168 else: 0169 proxy = None 0170 name = 'open_' + urltype 0171 self.type = urltype 0172 name = name.replace('-', '_') 0173 if not hasattr(self, name): 0174 if proxy: 0175 return self.open_unknown_proxy(proxy, fullurl, data) 0176 else: 0177 return self.open_unknown(fullurl, data) 0178 try: 0179 if data is None: 0180 return getattr(self, name)(url) 0181 else: 0182 return getattr(self, name)(url, data) 0183 except socket.error, msg: 0184 raise IOError, ('socket error', msg), sys.exc_info()[2] 0185 0186 def open_unknown(self, fullurl, data=None): 0187 """Overridable interface to open unknown URL type.""" 0188 type, url = splittype(fullurl) 0189 raise IOError, ('url error', 'unknown url type', type) 0190 0191 def open_unknown_proxy(self, proxy, fullurl, data=None): 0192 """Overridable interface to open unknown URL type.""" 0193 type, url = splittype(fullurl) 0194 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 0195 0196 # External interface 0197 def retrieve(self, url, filename=None, reporthook=None, data=None): 0198 """retrieve(url) returns (filename, headers) for a local object 0199 or (tempfilename, headers) for a remote object.""" 0200 url = unwrap(toBytes(url)) 0201 if self.tempcache and url in self.tempcache: 0202 return self.tempcache[url] 0203 type, url1 = splittype(url) 0204 if filename is None and (not type or type == 'file'): 0205 try: 0206 fp = self.open_local_file(url1) 0207 hdrs = fp.info() 0208 del fp 0209 return url2pathname(splithost(url1)[1]), hdrs 0210 except IOError, msg: 0211 pass 0212 fp = self.open(url, data) 0213 headers = fp.info() 0214 if filename: 0215 tfp = open(filename, 'wb') 0216 else: 0217 import tempfile 0218 garbage, path = splittype(url) 0219 garbage, path = splithost(path or "") 0220 path, garbage = splitquery(path or "") 0221 path, garbage = splitattr(path or "") 0222 suffix = os.path.splitext(path)[1] 0223 (fd, filename) = tempfile.mkstemp(suffix) 0224 self.__tempfiles.append(filename) 0225 tfp = os.fdopen(fd, 'wb') 0226 result = filename, headers 0227 if self.tempcache is not None: 0228 self.tempcache[url] = result 0229 bs = 1024*8 0230 size = -1 0231 blocknum = 1 0232 if reporthook: 0233 if "content-length" in headers: 0234 size = int(headers["Content-Length"]) 0235 reporthook(0, bs, size) 0236 block = fp.read(bs) 0237 if reporthook: 0238 reporthook(1, bs, size) 0239 while block: 0240 tfp.write(block) 0241 block = fp.read(bs) 0242 blocknum = blocknum + 1 0243 if reporthook: 0244 reporthook(blocknum, bs, size) 0245 fp.close() 0246 tfp.close() 0247 del fp 0248 del tfp 0249 return result 0250 0251 # Each method named open_<type> knows how to open that type of URL 0252 0253 def open_http(self, url, data=None): 0254 """Use HTTP protocol.""" 0255 import httplib 0256 user_passwd = None 0257 if isinstance(url, str): 0258 host, selector = splithost(url) 0259 if host: 0260 user_passwd, host = splituser(host) 0261 host = unquote(host) 0262 realhost = host 0263 else: 0264 host, selector = url 0265 urltype, rest = splittype(selector) 0266 url = rest 0267 user_passwd = None 0268 if urltype.lower() != 'http': 0269 realhost = None 0270 else: 0271 realhost, rest = splithost(rest) 0272 if realhost: 0273 user_passwd, realhost = splituser(realhost) 0274 if user_passwd: 0275 selector = "%s://%s%s" % (urltype, realhost, rest) 0276 if proxy_bypass(realhost): 0277 host = realhost 0278 0279 #print "proxy via http:", host, selector 0280 if not host: raise IOError, ('http error', 'no host given') 0281 if user_passwd: 0282 import base64 0283 auth = base64.encodestring(user_passwd).strip() 0284 else: 0285 auth = None 0286 h = httplib.HTTP(host) 0287 if data is not None: 0288 h.putrequest('POST', selector) 0289 h.putheader('Content-type', 'application/x-www-form-urlencoded') 0290 h.putheader('Content-length', '%d' % len(data)) 0291 else: 0292 h.putrequest('GET', selector) 0293 if auth: h.putheader('Authorization', 'Basic %s' % auth) 0294 if realhost: h.putheader('Host', realhost) 0295 for args in self.addheaders: h.putheader(*args) 0296 h.endheaders() 0297 if data is not None: 0298 h.send(data) 0299 errcode, errmsg, headers = h.getreply() 0300 fp = h.getfile() 0301 if errcode == 200: 0302 return addinfourl(fp, headers, "http:" + url) 0303 else: 0304 if data is None: 0305 return self.http_error(url, fp, errcode, errmsg, headers) 0306 else: 0307 return self.http_error(url, fp, errcode, errmsg, headers, data) 0308 0309 def http_error(self, url, fp, errcode, errmsg, headers, data=None): 0310 """Handle http errors. 0311 Derived class can override this, or provide specific handlers 0312 named http_error_DDD where DDD is the 3-digit error code.""" 0313 # First check if there's a specific handler for this error 0314 name = 'http_error_%d' % errcode 0315 if hasattr(self, name): 0316 method = getattr(self, name) 0317 if data is None: 0318 result = method(url, fp, errcode, errmsg, headers) 0319 else: 0320 result = method(url, fp, errcode, errmsg, headers, data) 0321 if result: return result 0322 return self.http_error_default(url, fp, errcode, errmsg, headers) 0323 0324 def http_error_default(self, url, fp, errcode, errmsg, headers): 0325 """Default error handler: close the connection and raise IOError.""" 0326 void = fp.read() 0327 fp.close() 0328 raise IOError, ('http error', errcode, errmsg, headers) 0329 0330 if hasattr(socket, "ssl"): 0331 def open_https(self, url, data=None): 0332 """Use HTTPS protocol.""" 0333 import httplib 0334 user_passwd = None 0335 if isinstance(url, str): 0336 host, selector = splithost(url) 0337 if host: 0338 user_passwd, host = splituser(host) 0339 host = unquote(host) 0340 realhost = host 0341 else: 0342 host, selector = url 0343 urltype, rest = splittype(selector) 0344 url = rest 0345 user_passwd = None 0346 if urltype.lower() != 'https': 0347 realhost = None 0348 else: 0349 realhost, rest = splithost(rest) 0350 if realhost: 0351 user_passwd, realhost = splituser(realhost) 0352 if user_passwd: 0353 selector = "%s://%s%s" % (urltype, realhost, rest) 0354 #print "proxy via https:", host, selector 0355 if not host: raise IOError, ('https error', 'no host given') 0356 if user_passwd: 0357 import base64 0358 auth = base64.encodestring(user_passwd).strip() 0359 else: 0360 auth = None 0361 h = httplib.HTTPS(host, 0, 0362 key_file=self.key_file, 0363 cert_file=self.cert_file) 0364 if data is not None: 0365 h.putrequest('POST', selector) 0366 h.putheader('Content-type', 0367 'application/x-www-form-urlencoded') 0368 h.putheader('Content-length', '%d' % len(data)) 0369 else: 0370 h.putrequest('GET', selector) 0371 if auth: h.putheader('Authorization', 'Basic %s' % auth) 0372 if realhost: h.putheader('Host', realhost) 0373 for args in self.addheaders: h.putheader(*args) 0374 h.endheaders() 0375 if data is not None: 0376 h.send(data) 0377 errcode, errmsg, headers = h.getreply() 0378 fp = h.getfile() 0379 if errcode == 200: 0380 return addinfourl(fp, headers, "https:" + url) 0381 else: 0382 if data is None: 0383 return self.http_error(url, fp, errcode, errmsg, headers) 0384 else: 0385 return self.http_error(url, fp, errcode, errmsg, headers, 0386 data) 0387 0388 def open_gopher(self, url): 0389 """Use Gopher protocol.""" 0390 import gopherlib 0391 host, selector = splithost(url) 0392 if not host: raise IOError, ('gopher error', 'no host given') 0393 host = unquote(host) 0394 type, selector = splitgophertype(selector) 0395 selector, query = splitquery(selector) 0396 selector = unquote(selector) 0397 if query: 0398 query = unquote(query) 0399 fp = gopherlib.send_query(selector, query, host) 0400 else: 0401 fp = gopherlib.send_selector(selector, host) 0402 return addinfourl(fp, noheaders(), "gopher:" + url) 0403 0404 def open_file(self, url): 0405 """Use local file or FTP depending on form of URL.""" 0406 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 0407 return self.open_ftp(url) 0408 else: 0409 return self.open_local_file(url) 0410 0411 def open_local_file(self, url): 0412 """Use local file.""" 0413 import mimetypes, mimetools, email.Utils, StringIO 0414 host, file = splithost(url) 0415 localname = url2pathname(file) 0416 try: 0417 stats = os.stat(localname) 0418 except OSError, e: 0419 raise IOError(e.errno, e.strerror, e.filename) 0420 size = stats.st_size 0421 modified = email.Utils.formatdate(stats.st_mtime, usegmt=True) 0422 mtype = mimetypes.guess_type(url)[0] 0423 headers = mimetools.Message(StringIO.StringIO( 0424 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 0425 (mtype or 'text/plain', size, modified))) 0426 if not host: 0427 urlfile = file 0428 if file[:1] == '/': 0429 urlfile = 'file://' + file 0430 return addinfourl(open(localname, 'rb'), 0431 headers, urlfile) 0432 host, port = splitport(host) 0433 if not port \ 0434 and socket.gethostbyname(host) in (localhost(), thishost()): 0435 urlfile = file 0436 if file[:1] == '/': 0437 urlfile = 'file://' + file 0438 return addinfourl(open(localname, 'rb'), 0439 headers, urlfile) 0440 raise IOError, ('local file error', 'not on local host') 0441 0442 def open_ftp(self, url): 0443 """Use FTP protocol.""" 0444 import mimetypes, mimetools, StringIO 0445 host, path = splithost(url) 0446 if not host: raise IOError, ('ftp error', 'no host given') 0447 host, port = splitport(host) 0448 user, host = splituser(host) 0449 if user: user, passwd = splitpasswd(user) 0450 else: passwd = None 0451 host = unquote(host) 0452 user = unquote(user or '') 0453 passwd = unquote(passwd or '') 0454 host = socket.gethostbyname(host) 0455 if not port: 0456 import ftplib 0457 port = ftplib.FTP_PORT 0458 else: 0459 port = int(port) 0460 path, attrs = splitattr(path) 0461 path = unquote(path) 0462 dirs = path.split('/') 0463 dirs, file = dirs[:-1], dirs[-1] 0464 if dirs and not dirs[0]: dirs = dirs[1:] 0465 if dirs and not dirs[0]: dirs[0] = '/' 0466 key = user, host, port, '/'.join(dirs) 0467 # XXX thread unsafe! 0468 if len(self.ftpcache) > MAXFTPCACHE: 0469 # Prune the cache, rather arbitrarily 0470 for k in self.ftpcache.keys(): 0471 if k != key: 0472 v = self.ftpcache[k] 0473 del self.ftpcache[k] 0474 v.close() 0475 try: 0476 if not key in self.ftpcache: 0477 self.ftpcache[key] = \ 0478 ftpwrapper(user, passwd, host, port, dirs) 0479 if not file: type = 'D' 0480 else: type = 'I' 0481 for attr in attrs: 0482 attr, value = splitvalue(attr) 0483 if attr.lower() == 'type' and \ 0484 value in ('a', 'A', 'i', 'I', 'd', 'D'): 0485 type = value.upper() 0486 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 0487 mtype = mimetypes.guess_type("ftp:" + url)[0] 0488 headers = "" 0489 if mtype: 0490 headers += "Content-Type: %s\n" % mtype 0491 if retrlen is not None and retrlen >= 0: 0492 headers += "Content-Length: %d\n" % retrlen 0493 headers = mimetools.Message(StringIO.StringIO(headers)) 0494 return addinfourl(fp, headers, "ftp:" + url) 0495 except ftperrors(), msg: 0496 raise IOError, ('ftp error', msg), sys.exc_info()[2] 0497 0498 def open_data(self, url, data=None): 0499 """Use "data" URL.""" 0500 # ignore POSTed data 0501 # 0502 # syntax of data URLs: 0503 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 0504 # mediatype := [ type "/" subtype ] *( ";" parameter ) 0505 # data := *urlchar 0506 # parameter := attribute "=" value 0507 import StringIO, mimetools 0508 try: 0509 [type, data] = url.split(',', 1) 0510 except ValueError: 0511 raise IOError, ('data error', 'bad data URL') 0512 if not type: 0513 type = 'text/plain;charset=US-ASCII' 0514 semi = type.rfind(';') 0515 if semi >= 0 and '=' not in type[semi:]: 0516 encoding = type[semi+1:] 0517 type = type[:semi] 0518 else: 0519 encoding = '' 0520 msg = [] 0521 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT', 0522 time.gmtime(time.time()))) 0523 msg.append('Content-type: %s' % type) 0524 if encoding == 'base64': 0525 import base64 0526 data = base64.decodestring(data) 0527 else: 0528 data = unquote(data) 0529 msg.append('Content-length: %d' % len(data)) 0530 msg.append('') 0531 msg.append(data) 0532 msg = '\n'.join(msg) 0533 f = StringIO.StringIO(msg) 0534 headers = mimetools.Message(f, 0) 0535 f.fileno = None # needed for addinfourl 0536 return addinfourl(f, headers, url) 0537 0538 0539 class FancyURLopener(URLopener): 0540 """Derived class with handlers for errors we can handle (perhaps).""" 0541 0542 def __init__(self, *args, **kwargs): 0543 URLopener.__init__(self, *args, **kwargs) 0544 self.auth_cache = {} 0545 self.tries = 0 0546 self.maxtries = 10 0547 0548 def http_error_default(self, url, fp, errcode, errmsg, headers): 0549 """Default error handling -- don't raise an exception.""" 0550 return addinfourl(fp, headers, "http:" + url) 0551 0552 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 0553 """Error 302 -- relocated (temporarily).""" 0554 self.tries += 1 0555 if self.maxtries and self.tries >= self.maxtries: 0556 if hasattr(self, "http_error_500"): 0557 meth = self.http_error_500 0558 else: 0559 meth = self.http_error_default 0560 self.tries = 0 0561 return meth(url, fp, 500, 0562 "Internal Server Error: Redirect Recursion", headers) 0563 result = self.redirect_internal(url, fp, errcode, errmsg, headers, 0564 data) 0565 self.tries = 0 0566 return result 0567 0568 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 0569 if 'location' in headers: 0570 newurl = headers['location'] 0571 elif 'uri' in headers: 0572 newurl = headers['uri'] 0573 else: 0574 return 0575 void = fp.read() 0576 fp.close() 0577 # In case the server sent a relative URL, join with original: 0578 newurl = basejoin(self.type + ":" + url, newurl) 0579 return self.open(newurl) 0580 0581 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 0582 """Error 301 -- also relocated (permanently).""" 0583 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 0584 0585 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 0586 """Error 303 -- also relocated (essentially identical to 302).""" 0587 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 0588 0589 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 0590 """Error 307 -- relocated, but turn POST into error.""" 0591 if data is None: 0592 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 0593 else: 0594 return self.http_error_default(url, fp, errcode, errmsg, headers) 0595 0596 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 0597 """Error 401 -- authentication required. 0598 See this URL for a description of the basic authentication scheme: 0599 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt""" 0600 if not 'www-authenticate' in headers: 0601 URLopener.http_error_default(self, url, fp, 0602 errcode, errmsg, headers) 0603 stuff = headers['www-authenticate'] 0604 import re 0605 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 0606 if not match: 0607 URLopener.http_error_default(self, url, fp, 0608 errcode, errmsg, headers) 0609 scheme, realm = match.groups() 0610 if scheme.lower() != 'basic': 0611 URLopener.http_error_default(self, url, fp, 0612 errcode, errmsg, headers) 0613 name = 'retry_' + self.type + '_basic_auth' 0614 if data is None: 0615 return getattr(self,name)(url, realm) 0616 else: 0617 return getattr(self,name)(url, realm, data) 0618 0619 def retry_http_basic_auth(self, url, realm, data=None): 0620 host, selector = splithost(url) 0621 i = host.find('@') + 1 0622 host = host[i:] 0623 user, passwd = self.get_user_passwd(host, realm, i) 0624 if not (user or passwd): return None 0625 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 0626 newurl = 'http://' + host + selector 0627 if data is None: 0628 return self.open(newurl) 0629 else: 0630 return self.open(newurl, data) 0631 0632 def retry_https_basic_auth(self, url, realm, data=None): 0633 host, selector = splithost(url) 0634 i = host.find('@') + 1 0635 host = host[i:] 0636 user, passwd = self.get_user_passwd(host, realm, i) 0637 if not (user or passwd): return None 0638 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 0639 newurl = '//' + host + selector 0640 return self.open_https(newurl, data) 0641 0642 def get_user_passwd(self, host, realm, clear_cache = 0): 0643 key = realm + '@' + host.lower() 0644 if key in self.auth_cache: 0645 if clear_cache: 0646 del self.auth_cache[key] 0647 else: 0648 return self.auth_cache[key] 0649 user, passwd = self.prompt_user_passwd(host, realm) 0650 if user or passwd: self.auth_cache[key] = (user, passwd) 0651 return user, passwd 0652 0653 def prompt_user_passwd(self, host, realm): 0654 """Override this in a GUI environment!""" 0655 import getpass 0656 try: 0657 user = raw_input("Enter username for %s at %s: " % (realm, 0658 host)) 0659 passwd = getpass.getpass("Enter password for %s in %s at %s: " % 0660 (user, realm, host)) 0661 return user, passwd 0662 except KeyboardInterrupt: 0663 print 0664 return None, None 0665 0666 0667 # Utility functions 0668 0669 _localhost = None 0670 def localhost(): 0671 """Return the IP address of the magic hostname 'localhost'.""" 0672 global _localhost 0673 if _localhost is None: 0674 _localhost = socket.gethostbyname('localhost') 0675 return _localhost 0676 0677 _thishost = None 0678 def thishost(): 0679 """Return the IP address of the current host.""" 0680 global _thishost 0681 if _thishost is None: 0682 _thishost = socket.gethostbyname(socket.gethostname()) 0683 return _thishost 0684 0685 _ftperrors = None 0686 def ftperrors(): 0687 """Return the set of errors raised by the FTP class.""" 0688 global _ftperrors 0689 if _ftperrors is None: 0690 import ftplib 0691 _ftperrors = ftplib.all_errors 0692 return _ftperrors 0693 0694 _noheaders = None 0695 def noheaders(): 0696 """Return an empty mimetools.Message object.""" 0697 global _noheaders 0698 if _noheaders is None: 0699 import mimetools 0700 import StringIO 0701 _noheaders = mimetools.Message(StringIO.StringIO(), 0) 0702 _noheaders.fp.close() # Recycle file descriptor 0703 return _noheaders 0704 0705 0706 # Utility classes 0707 0708 class ftpwrapper: 0709 """Class used by open_ftp() for cache of open FTP connections.""" 0710 0711 def __init__(self, user, passwd, host, port, dirs): 0712 self.user = user 0713 self.passwd = passwd 0714 self.host = host 0715 self.port = port 0716 self.dirs = dirs 0717 self.init() 0718 0719 def init(self): 0720 import ftplib 0721 self.busy = 0 0722 self.ftp = ftplib.FTP() 0723 self.ftp.connect(self.host, self.port) 0724 self.ftp.login(self.user, self.passwd) 0725 for dir in self.dirs: 0726 self.ftp.cwd(dir) 0727 0728 def retrfile(self, file, type): 0729 import ftplib 0730 self.endtransfer() 0731 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 0732 else: cmd = 'TYPE ' + type; isdir = 0 0733 try: 0734 self.ftp.voidcmd(cmd) 0735 except ftplib.all_errors: 0736 self.init() 0737 self.ftp.voidcmd(cmd) 0738 conn = None 0739 if file and not isdir: 0740 # Use nlst to see if the file exists at all 0741 try: 0742 self.ftp.nlst(file) 0743 except ftplib.error_perm, reason: 0744 raise IOError, ('ftp error', reason), sys.exc_info()[2] 0745 # Restore the transfer mode! 0746 self.ftp.voidcmd(cmd) 0747 # Try to retrieve as a file 0748 try: 0749 cmd = 'RETR ' + file 0750 conn = self.ftp.ntransfercmd(cmd) 0751 except ftplib.error_perm, reason: 0752 if str(reason)[:3] != '550': 0753 raise IOError, ('ftp error', reason), sys.exc_info()[2] 0754 if not conn: 0755 # Set transfer mode to ASCII! 0756 self.ftp.voidcmd('TYPE A') 0757 # Try a directory listing 0758 if file: cmd = 'LIST ' + file 0759 else: cmd = 'LIST' 0760 conn = self.ftp.ntransfercmd(cmd) 0761 self.busy = 1 0762 # Pass back both a suitably decorated object and a retrieval length 0763 return (addclosehook(conn[0].makefile('rb'), 0764 self.endtransfer), conn[1]) 0765 def endtransfer(self): 0766 if not self.busy: 0767 return 0768 self.busy = 0 0769 try: 0770 self.ftp.voidresp() 0771 except ftperrors(): 0772 pass 0773 0774 def close(self): 0775 self.endtransfer() 0776 try: 0777 self.ftp.close() 0778 except ftperrors(): 0779 pass 0780 0781 class addbase: 0782 """Base class for addinfo and addclosehook.""" 0783 0784 def __init__(self, fp): 0785 self.fp = fp 0786 self.read = self.fp.read 0787 self.readline = self.fp.readline 0788 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 0789 if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno 0790 if hasattr(self.fp, "__iter__"): 0791 self.__iter__ = self.fp.__iter__ 0792 if hasattr(self.fp, "next"): 0793 self.next = self.fp.next 0794 0795 def __repr__(self): 0796 return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 0797 id(self), self.fp) 0798 0799 def close(self): 0800 self.read = None 0801 self.readline = None 0802 self.readlines = None 0803 self.fileno = None 0804 if self.fp: self.fp.close() 0805 self.fp = None 0806 0807 class addclosehook(addbase): 0808 """Class to add a close hook to an open file.""" 0809 0810 def __init__(self, fp, closehook, *hookargs): 0811 addbase.__init__(self, fp) 0812 self.closehook = closehook 0813 self.hookargs = hookargs 0814 0815 def close(self): 0816 addbase.close(self) 0817 if self.closehook: 0818 self.closehook(*self.hookargs) 0819 self.closehook = None 0820 self.hookargs = None 0821 0822 class addinfo(addbase): 0823 """class to add an info() method to an open file.""" 0824 0825 def __init__(self, fp, headers): 0826 addbase.__init__(self, fp) 0827 self.headers = headers 0828 0829 def info(self): 0830 return self.headers 0831 0832 class addinfourl(addbase): 0833 """class to add info() and geturl() methods to an open file.""" 0834 0835 def __init__(self, fp, headers, url): 0836 addbase.__init__(self, fp) 0837 self.headers = headers 0838 self.url = url 0839 0840 def info(self): 0841 return self.headers 0842 0843 def geturl(self): 0844 return self.url 0845 0846 0847 # Utilities to parse URLs (most of these return None for missing parts): 0848 # unwrap('<URL:type://host/path>') --> 'type://host/path' 0849 # splittype('type:opaquestring') --> 'type', 'opaquestring' 0850 # splithost('//host[:port]/path') --> 'host[:port]', '/path' 0851 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 0852 # splitpasswd('user:passwd') -> 'user', 'passwd' 0853 # splitport('host:port') --> 'host', 'port' 0854 # splitquery('/path?query') --> '/path', 'query' 0855 # splittag('/path#tag') --> '/path', 'tag' 0856 # splitattr('/path;attr1=value1;attr2=value2;...') -> 0857 # '/path', ['attr1=value1', 'attr2=value2', ...] 0858 # splitvalue('attr=value') --> 'attr', 'value' 0859 # splitgophertype('/Xselector') --> 'X', 'selector' 0860 # unquote('abc%20def') -> 'abc def' 0861 # quote('abc def') -> 'abc%20def') 0862 0863 try: 0864 unicode 0865 except NameError: 0866 def _is_unicode(x): 0867 return 0 0868 else: 0869 def _is_unicode(x): 0870 return isinstance(x, unicode) 0871 0872 def toBytes(url): 0873 """toBytes(u"URL") --> 'URL'.""" 0874 # Most URL schemes require ASCII. If that changes, the conversion 0875 # can be relaxed 0876 if _is_unicode(url): 0877 try: 0878 url = url.encode("ASCII") 0879 except UnicodeError: 0880 raise UnicodeError("URL " + repr(url) + 0881 " contains non-ASCII characters") 0882 return url 0883 0884 def unwrap(url): 0885 """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 0886 url = url.strip() 0887 if url[:1] == '<' and url[-1:] == '>': 0888 url = url[1:-1].strip() 0889 if url[:4] == 'URL:': url = url[4:].strip() 0890 return url 0891 0892 _typeprog = None 0893 def splittype(url): 0894 """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 0895 global _typeprog 0896 if _typeprog is None: 0897 import re 0898 _typeprog = re.compile('^([^/:]+):') 0899 0900 match = _typeprog.match(url) 0901 if match: 0902 scheme = match.group(1) 0903 return scheme.lower(), url[len(scheme) + 1:] 0904 return None, url 0905 0906 _hostprog = None 0907 def splithost(url): 0908 """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 0909 global _hostprog 0910 if _hostprog is None: 0911 import re 0912 _hostprog = re.compile('^//([^/]*)(.*)$') 0913 0914 match = _hostprog.match(url) 0915 if match: return match.group(1, 2) 0916 return None, url 0917 0918 _userprog = None 0919 def splituser(host): 0920 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 0921 global _userprog 0922 if _userprog is None: 0923 import re 0924 _userprog = re.compile('^(.*)@(.*)$') 0925 0926 match = _userprog.match(host) 0927 if match: return map(unquote, match.group(1, 2)) 0928 return None, host 0929 0930 _passwdprog = None 0931 def splitpasswd(user): 0932 """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 0933 global _passwdprog 0934 if _passwdprog is None: 0935 import re 0936 _passwdprog = re.compile('^([^:]*):(.*)$') 0937 0938 match = _passwdprog.match(user) 0939 if match: return match.group(1, 2) 0940 return user, None 0941 0942 # splittag('/path#tag') --> '/path', 'tag' 0943 _portprog = None 0944 def splitport(host): 0945 """splitport('host:port') --> 'host', 'port'.""" 0946 global _portprog 0947 if _portprog is None: 0948 import re 0949 _portprog = re.compile('^(.*):([0-9]+)$') 0950 0951 match = _portprog.match(host) 0952 if match: return match.group(1, 2) 0953 return host, None 0954 0955 _nportprog = None 0956 def splitnport(host, defport=-1): 0957 """Split host and port, returning numeric port. 0958 Return given default port if no ':' found; defaults to -1. 0959 Return numerical port if a valid number are found after ':'. 0960 Return None if ':' but not a valid number.""" 0961 global _nportprog 0962 if _nportprog is None: 0963 import re 0964 _nportprog = re.compile('^(.*):(.*)$') 0965 0966 match = _nportprog.match(host) 0967 if match: 0968 host, port = match.group(1, 2) 0969 try: 0970 if not port: raise ValueError, "no digits" 0971 nport = int(port) 0972 except ValueError: 0973 nport = None 0974 return host, nport 0975 return host, defport 0976 0977 _queryprog = None 0978 def splitquery(url): 0979 """splitquery('/path?query') --> '/path', 'query'.""" 0980 global _queryprog 0981 if _queryprog is None: 0982 import re 0983 _queryprog = re.compile('^(.*)\?([^?]*)$') 0984 0985 match = _queryprog.match(url) 0986 if match: return match.group(1, 2) 0987 return url, None 0988 0989 _tagprog = None 0990 def splittag(url): 0991 """splittag('/path#tag') --> '/path', 'tag'.""" 0992 global _tagprog 0993 if _tagprog is None: 0994 import re 0995 _tagprog = re.compile('^(.*)#([^#]*)$') 0996 0997 match = _tagprog.match(url) 0998 if match: return match.group(1, 2) 0999 return url, None 1000 1001 def splitattr(url): 1002 """splitattr('/path;attr1=value1;attr2=value2;...') -> 1003 '/path', ['attr1=value1', 'attr2=value2', ...].""" 1004 words = url.split(';') 1005 return words[0], words[1:] 1006 1007 _valueprog = None 1008 def splitvalue(attr): 1009 """splitvalue('attr=value') --> 'attr', 'value'.""" 1010 global _valueprog 1011 if _valueprog is None: 1012 import re 1013 _valueprog = re.compile('^([^=]*)=(.*)$') 1014 1015 match = _valueprog.match(attr) 1016 if match: return match.group(1, 2) 1017 return attr, None 1018 1019 def splitgophertype(selector): 1020 """splitgophertype('/Xselector') --> 'X', 'selector'.""" 1021 if selector[:1] == '/' and selector[1:2]: 1022 return selector[1], selector[2:] 1023 return None, selector 1024 1025 def unquote(s): 1026 """unquote('abc%20def') -> 'abc def'.""" 1027 mychr = chr 1028 myatoi = int 1029 list = s.split('%') 1030 res = [list[0]] 1031 myappend = res.append 1032 del list[0] 1033 for item in list: 1034 if item[1:2]: 1035 try: 1036 myappend(mychr(myatoi(item[:2], 16)) 1037 + item[2:]) 1038 except ValueError: 1039 myappend('%' + item) 1040 else: 1041 myappend('%' + item) 1042 return "".join(res) 1043 1044 def unquote_plus(s): 1045 """unquote('%7e/abc+def') -> '~/abc def'""" 1046 s = s.replace('+', ' ') 1047 return unquote(s) 1048 1049 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 1050 'abcdefghijklmnopqrstuvwxyz' 1051 '0123456789' '_.-') 1052 1053 _fast_safe_test = always_safe + '/' 1054 _fast_safe = None 1055 1056 def _fast_quote(s): 1057 global _fast_safe 1058 if _fast_safe is None: 1059 _fast_safe = {} 1060 for c in _fast_safe_test: 1061 _fast_safe[c] = c 1062 res = list(s) 1063 for i in range(len(res)): 1064 c = res[i] 1065 if not c in _fast_safe: 1066 res[i] = '%%%02X' % ord(c) 1067 return ''.join(res) 1068 1069 def quote(s, safe = '/'): 1070 """quote('abc def') -> 'abc%20def' 1071 1072 Each part of a URL, e.g. the path info, the query, etc., has a 1073 different set of reserved characters that must be quoted. 1074 1075 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 1076 the following reserved characters. 1077 1078 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 1079 "$" | "," 1080 1081 Each of these characters is reserved in some component of a URL, 1082 but not necessarily in all of them. 1083 1084 By default, the quote function is intended for quoting the path 1085 section of a URL. Thus, it will not encode '/'. This character 1086 is reserved, but in typical usage the quote function is being 1087 called on a path where the existing slash characters are used as 1088 reserved characters. 1089 """ 1090 safe = always_safe + safe 1091 if _fast_safe_test == safe: 1092 return _fast_quote(s) 1093 res = list(s) 1094 for i in range(len(res)): 1095 c = res[i] 1096 if c not in safe: 1097 res[i] = '%%%02X' % ord(c) 1098 return ''.join(res) 1099 1100 def quote_plus(s, safe = ''): 1101 """Quote the query fragment of a URL; replacing ' ' with '+'""" 1102 if ' ' in s: 1103 l = s.split(' ') 1104 for i in range(len(l)): 1105 l[i] = quote(l[i], safe) 1106 return '+'.join(l) 1107 else: 1108 return quote(s, safe) 1109 1110 def urlencode(query,doseq=0): 1111 """Encode a sequence of two-element tuples or dictionary into a URL query string. 1112 1113 If any values in the query arg are sequences and doseq is true, each 1114 sequence element is converted to a separate parameter. 1115 1116 If the query arg is a sequence of two-element tuples, the order of the 1117 parameters in the output will match the order of parameters in the 1118 input. 1119 """ 1120 1121 if hasattr(query,"items"): 1122 # mapping objects 1123 query = query.items() 1124 else: 1125 # it's a bother at times that strings and string-like objects are 1126 # sequences... 1127 try: 1128 # non-sequence items should not work with len() 1129 # non-empty strings will fail this 1130 if len(query) and not isinstance(query[0], tuple): 1131 raise TypeError 1132 # zero-length sequences of all types will get here and succeed, 1133 # but that's a minor nit - since the original implementation 1134 # allowed empty dicts that type of behavior probably should be 1135 # preserved for consistency 1136 except TypeError: 1137 ty,va,tb = sys.exc_info() 1138 raise TypeError, "not a valid non-string sequence or mapping object", tb 1139 1140 l = [] 1141 if not doseq: 1142 # preserve old behavior 1143 for k, v in query: 1144 k = quote_plus(str(k)) 1145 v = quote_plus(str(v)) 1146 l.append(k + '=' + v) 1147 else: 1148 for k, v in query: 1149 k = quote_plus(str(k)) 1150 if isinstance(v, str): 1151 v = quote_plus(v) 1152 l.append(k + '=' + v) 1153 elif _is_unicode(v): 1154 # is there a reasonable way to convert to ASCII? 1155 # encode generates a string, but "replace" or "ignore" 1156 # lose information and "strict" can raise UnicodeError 1157 v = quote_plus(v.encode("ASCII","replace")) 1158 l.append(k + '=' + v) 1159 else: 1160 try: 1161 # is this a sufficient test for sequence-ness? 1162 x = len(v) 1163 except TypeError: 1164 # not a sequence 1165 v = quote_plus(str(v)) 1166 l.append(k + '=' + v) 1167 else: 1168 # loop over the sequence 1169 for elt in v: 1170 l.append(k + '=' + quote_plus(str(elt))) 1171 return '&'.join(l) 1172 1173 # Proxy handling 1174 def getproxies_environment(): 1175 """Return a dictionary of scheme -> proxy server URL mappings. 1176 1177 Scan the environment for variables named <scheme>_proxy; 1178 this seems to be the standard convention. If you need a 1179 different way, you can pass a proxies dictionary to the 1180 [Fancy]URLopener constructor. 1181 1182 """ 1183 proxies = {} 1184 for name, value in os.environ.items(): 1185 name = name.lower() 1186 if value and name[-6:] == '_proxy': 1187 proxies[name[:-6]] = value 1188 return proxies 1189 1190 if sys.platform == 'darwin': 1191 def getproxies_internetconfig(): 1192 """Return a dictionary of scheme -> proxy server URL mappings. 1193 1194 By convention the mac uses Internet Config to store 1195 proxies. An HTTP proxy, for instance, is stored under 1196 the HttpProxy key. 1197 1198 """ 1199 try: 1200 import ic 1201 except ImportError: 1202 return {} 1203 1204 try: 1205 config = ic.IC() 1206 except ic.error: 1207 return {} 1208 proxies = {} 1209 # HTTP: 1210 if 'UseHTTPProxy' in config and config['UseHTTPProxy']: 1211 try: 1212 value = config['HTTPProxyHost'] 1213 except ic.error: 1214 pass 1215 else: 1216 proxies['http'] = 'http://%s' % value 1217 # FTP: XXXX To be done. 1218 # Gopher: XXXX To be done. 1219 return proxies 1220 1221 def proxy_bypass(x): 1222 return 0 1223 1224 def getproxies(): 1225 return getproxies_environment() or getproxies_internetconfig() 1226 1227 elif os.name == 'nt': 1228 def getproxies_registry(): 1229 """Return a dictionary of scheme -> proxy server URL mappings. 1230 1231 Win32 uses the registry to store proxies. 1232 1233 """ 1234 proxies = {} 1235 try: 1236 import _winreg 1237 except ImportError: 1238 # Std module, so should be around - but you never know! 1239 return proxies 1240 try: 1241 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1242 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1243 proxyEnable = _winreg.QueryValueEx(internetSettings, 1244 'ProxyEnable')[0] 1245 if proxyEnable: 1246 # Returned as Unicode but problems if not converted to ASCII 1247 proxyServer = str(_winreg.QueryValueEx(internetSettings, 1248 'ProxyServer')[0]) 1249 if '=' in proxyServer: 1250 # Per-protocol settings 1251 for p in proxyServer.split(';'): 1252 protocol, address = p.split('=', 1) 1253 # See if address has a type:// prefix 1254 import re 1255 if not re.match('^([^/:]+)://', address): 1256 address = '%s://%s' % (protocol, address) 1257 proxies[protocol] = address 1258 else: 1259 # Use one setting for all protocols 1260 if proxyServer[:5] == 'http:': 1261 proxies['http'] = proxyServer 1262 else: 1263 proxies['http'] = 'http://%s' % proxyServer 1264 proxies['ftp'] = 'ftp://%s' % proxyServer 1265 internetSettings.Close() 1266 except (WindowsError, ValueError, TypeError): 1267 # Either registry key not found etc, or the value in an 1268 # unexpected format. 1269 # proxies already set up to be empty so nothing to do 1270 pass 1271 return proxies 1272 1273 def getproxies(): 1274 """Return a dictionary of scheme -> proxy server URL mappings. 1275 1276 Returns settings gathered from the environment, if specified, 1277 or the registry. 1278 1279 """ 1280 return getproxies_environment() or getproxies_registry() 1281 1282 def proxy_bypass(host): 1283 try: 1284 import _winreg 1285 import re 1286 except ImportError: 1287 # Std modules, so should be around - but you never know! 1288 return 0 1289 try: 1290 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1291 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1292 proxyEnable = _winreg.QueryValueEx(internetSettings, 1293 'ProxyEnable')[0] 1294 proxyOverride = str(_winreg.QueryValueEx(internetSettings, 1295 'ProxyOverride')[0]) 1296 # ^^^^ Returned as Unicode but problems if not converted to ASCII 1297 except WindowsError: 1298 return 0 1299 if not proxyEnable or not proxyOverride: 1300 return 0 1301 # try to make a host list from name and IP address. 1302 host = [host] 1303 try: 1304 addr = socket.gethostbyname(host[0]) 1305 if addr != host: 1306 host.append(addr) 1307 except socket.error: 1308 pass 1309 # make a check value list from the registry entry: replace the 1310 # '<local>' string by the localhost entry and the corresponding 1311 # canonical entry. 1312 proxyOverride = proxyOverride.split(';') 1313 i = 0 1314 while i < len(proxyOverride): 1315 if proxyOverride[i] == '<local>': 1316 proxyOverride[i:i+1] = ['localhost', 1317 '127.0.0.1', 1318 socket.gethostname(), 1319 socket.gethostbyname( 1320 socket.gethostname())] 1321 i += 1 1322 # print proxyOverride 1323 # now check if we match one of the registry values. 1324 for test in proxyOverride: 1325 test = test.replace(".", r"\.") # mask dots 1326 test = test.replace("*", r".*") # change glob sequence 1327 test = test.replace("?", r".") # change glob char 1328 for val in host: 1329 # print "%s <--> %s" %( test, val ) 1330 if re.match(test, val, re.I): 1331 return 1 1332 return 0 1333 1334 else: 1335 # By default use environment variables 1336 getproxies = getproxies_environment 1337 1338 def proxy_bypass(host): 1339 return 0 1340 1341 # Test and time quote() and unquote() 1342 def test1(): 1343 s = '' 1344 for i in range(256): s = s + chr(i) 1345 s = s*4 1346 t0 = time.time() 1347 qs = quote(s) 1348 uqs = unquote(qs) 1349 t1 = time.time() 1350 if uqs != s: 1351 print 'Wrong!' 1352 print repr(s) 1353 print repr(qs) 1354 print repr(uqs) 1355 print round(t1 - t0, 3), 'sec' 1356 1357 1358 def reporthook(blocknum, blocksize, totalsize): 1359 # Report during remote transfers 1360 print "Block number: %d, Block size: %d, Total size: %d" % ( 1361 blocknum, blocksize, totalsize) 1362 1363 # Test program 1364 def test(args=[]): 1365 if not args: 1366 args = [ 1367 '/etc/passwd', 1368 'file:/etc/passwd', 1369 'file://localhost/etc/passwd', 1370 'ftp://ftp.python.org/pub/python/README', 1371 ## 'gopher://gopher.micro.umn.edu/1/', 1372 'http://www.python.org/index.html', 1373 ] 1374 if hasattr(URLopener, "open_https"): 1375 args.append('https://synergy.as.cmu.edu/~geek/') 1376 try: 1377 for url in args: 1378 print '-'*10, url, '-'*10 1379 fn, h = urlretrieve(url, None, reporthook) 1380 print fn 1381 if h: 1382 print '======' 1383 for k in h.keys(): print k + ':', h[k] 1384 print '======' 1385 fp = open(fn, 'rb') 1386 data = fp.read() 1387 del fp 1388 if '\r' in data: 1389 table = string.maketrans("", "") 1390 data = data.translate(table, "\r") 1391 print data 1392 fn, h = None, None 1393 print '-'*40 1394 finally: 1395 urlcleanup() 1396 1397 def main(): 1398 import getopt, sys 1399 try: 1400 opts, args = getopt.getopt(sys.argv[1:], "th") 1401 except getopt.error, msg: 1402 print msg 1403 print "Use -h for help" 1404 return 1405 t = 0 1406 for o, a in opts: 1407 if o == '-t': 1408 t = t + 1 1409 if o == '-h': 1410 print "Usage: python urllib.py [-t] [url ...]" 1411 print "-t runs self-test;", 1412 print "otherwise, contents of urls are printed" 1413 return 1414 if t: 1415 if t > 1: 1416 test1() 1417 test(args) 1418 else: 1419 if not args: 1420 print "Use -h for help" 1421 for url in args: 1422 print urlopen(url).read(), 1423 1424 # Run test program when run as a script 1425 if __name__ == '__main__': 1426 main() 1427
Generated by PyXR 0.9.4