0001 """HTTP/1.1 client library 0002 0003 <intro stuff goes here> 0004 <other stuff, too> 0005 0006 HTTPConnection go through a number of "states", which defines when a client 0007 may legally make another request or fetch the response for a particular 0008 request. This diagram details these state transitions: 0009 0010 (null) 0011 | 0012 | HTTPConnection() 0013 v 0014 Idle 0015 | 0016 | putrequest() 0017 v 0018 Request-started 0019 | 0020 | ( putheader() )* endheaders() 0021 v 0022 Request-sent 0023 | 0024 | response = getresponse() 0025 v 0026 Unread-response [Response-headers-read] 0027 |\____________________ 0028 | | 0029 | response.read() | putrequest() 0030 v v 0031 Idle Req-started-unread-response 0032 ______/| 0033 / | 0034 response.read() | | ( putheader() )* endheaders() 0035 v v 0036 Request-started Req-sent-unread-response 0037 | 0038 | response.read() 0039 v 0040 Request-sent 0041 0042 This diagram presents the following rules: 0043 -- a second request may not be started until {response-headers-read} 0044 -- a response [object] cannot be retrieved until {request-sent} 0045 -- there is no differentiation between an unread response body and a 0046 partially read response body 0047 0048 Note: this enforcement is applied by the HTTPConnection class. The 0049 HTTPResponse class does not enforce this state machine, which 0050 implies sophisticated clients may accelerate the request/response 0051 pipeline. Caution should be taken, though: accelerating the states 0052 beyond the above pattern may imply knowledge of the server's 0053 connection-close behavior for certain requests. For example, it 0054 is impossible to tell whether the server will close the connection 0055 UNTIL the response headers have been read; this means that further 0056 requests cannot be placed into the pipeline until it is known that 0057 the server will NOT be closing the connection. 0058 0059 Logical State __state __response 0060 ------------- ------- ---------- 0061 Idle _CS_IDLE None 0062 Request-started _CS_REQ_STARTED None 0063 Request-sent _CS_REQ_SENT None 0064 Unread-response _CS_IDLE <response_class> 0065 Req-started-unread-response _CS_REQ_STARTED <response_class> 0066 Req-sent-unread-response _CS_REQ_SENT <response_class> 0067 """ 0068 0069 import errno 0070 import mimetools 0071 import socket 0072 from urlparse import urlsplit 0073 0074 try: 0075 from cStringIO import StringIO 0076 except ImportError: 0077 from StringIO import StringIO 0078 0079 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection", 0080 "HTTPException", "NotConnected", "UnknownProtocol", 0081 "UnknownTransferEncoding", "UnimplementedFileMode", 0082 "IncompleteRead", "InvalidURL", "ImproperConnectionState", 0083 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 0084 "BadStatusLine", "error"] 0085 0086 HTTP_PORT = 80 0087 HTTPS_PORT = 443 0088 0089 _UNKNOWN = 'UNKNOWN' 0090 0091 # connection states 0092 _CS_IDLE = 'Idle' 0093 _CS_REQ_STARTED = 'Request-started' 0094 _CS_REQ_SENT = 'Request-sent' 0095 0096 # status codes 0097 # informational 0098 CONTINUE = 100 0099 SWITCHING_PROTOCOLS = 101 0100 PROCESSING = 102 0101 0102 # successful 0103 OK = 200 0104 CREATED = 201 0105 ACCEPTED = 202 0106 NON_AUTHORITATIVE_INFORMATION = 203 0107 NO_CONTENT = 204 0108 RESET_CONTENT = 205 0109 PARTIAL_CONTENT = 206 0110 MULTI_STATUS = 207 0111 IM_USED = 226 0112 0113 # redirection 0114 MULTIPLE_CHOICES = 300 0115 MOVED_PERMANENTLY = 301 0116 FOUND = 302 0117 SEE_OTHER = 303 0118 NOT_MODIFIED = 304 0119 USE_PROXY = 305 0120 TEMPORARY_REDIRECT = 307 0121 0122 # client error 0123 BAD_REQUEST = 400 0124 UNAUTHORIZED = 401 0125 PAYMENT_REQUIRED = 402 0126 FORBIDDEN = 403 0127 NOT_FOUND = 404 0128 METHOD_NOT_ALLOWED = 405 0129 NOT_ACCEPTABLE = 406 0130 PROXY_AUTHENTICATION_REQUIRED = 407 0131 REQUEST_TIMEOUT = 408 0132 CONFLICT = 409 0133 GONE = 410 0134 LENGTH_REQUIRED = 411 0135 PRECONDITION_FAILED = 412 0136 REQUEST_ENTITY_TOO_LARGE = 413 0137 REQUEST_URI_TOO_LONG = 414 0138 UNSUPPORTED_MEDIA_TYPE = 415 0139 REQUESTED_RANGE_NOT_SATISFIABLE = 416 0140 EXPECTATION_FAILED = 417 0141 UNPROCESSABLE_ENTITY = 422 0142 LOCKED = 423 0143 FAILED_DEPENDENCY = 424 0144 UPGRADE_REQUIRED = 426 0145 0146 # server error 0147 INTERNAL_SERVER_ERROR = 500 0148 NOT_IMPLEMENTED = 501 0149 BAD_GATEWAY = 502 0150 SERVICE_UNAVAILABLE = 503 0151 GATEWAY_TIMEOUT = 504 0152 HTTP_VERSION_NOT_SUPPORTED = 505 0153 INSUFFICIENT_STORAGE = 507 0154 NOT_EXTENDED = 510 0155 0156 class HTTPMessage(mimetools.Message): 0157 0158 def addheader(self, key, value): 0159 """Add header for field key handling repeats.""" 0160 prev = self.dict.get(key) 0161 if prev is None: 0162 self.dict[key] = value 0163 else: 0164 combined = ", ".join((prev, value)) 0165 self.dict[key] = combined 0166 0167 def addcontinue(self, key, more): 0168 """Add more field data from a continuation line.""" 0169 prev = self.dict[key] 0170 self.dict[key] = prev + "\n " + more 0171 0172 def readheaders(self): 0173 """Read header lines. 0174 0175 Read header lines up to the entirely blank line that terminates them. 0176 The (normally blank) line that ends the headers is skipped, but not 0177 included in the returned list. If a non-header line ends the headers, 0178 (which is an error), an attempt is made to backspace over it; it is 0179 never included in the returned list. 0180 0181 The variable self.status is set to the empty string if all went well, 0182 otherwise it is an error message. The variable self.headers is a 0183 completely uninterpreted list of lines contained in the header (so 0184 printing them will reproduce the header exactly as it appears in the 0185 file). 0186 0187 If multiple header fields with the same name occur, they are combined 0188 according to the rules in RFC 2616 sec 4.2: 0189 0190 Appending each subsequent field-value to the first, each separated 0191 by a comma. The order in which header fields with the same field-name 0192 are received is significant to the interpretation of the combined 0193 field value. 0194 """ 0195 # XXX The implementation overrides the readheaders() method of 0196 # rfc822.Message. The base class design isn't amenable to 0197 # customized behavior here so the method here is a copy of the 0198 # base class code with a few small changes. 0199 0200 self.dict = {} 0201 self.unixfrom = '' 0202 self.headers = hlist = [] 0203 self.status = '' 0204 headerseen = "" 0205 firstline = 1 0206 startofline = unread = tell = None 0207 if hasattr(self.fp, 'unread'): 0208 unread = self.fp.unread 0209 elif self.seekable: 0210 tell = self.fp.tell 0211 while True: 0212 if tell: 0213 try: 0214 startofline = tell() 0215 except IOError: 0216 startofline = tell = None 0217 self.seekable = 0 0218 line = self.fp.readline() 0219 if not line: 0220 self.status = 'EOF in headers' 0221 break 0222 # Skip unix From name time lines 0223 if firstline and line.startswith('From '): 0224 self.unixfrom = self.unixfrom + line 0225 continue 0226 firstline = 0 0227 if headerseen and line[0] in ' \t': 0228 # XXX Not sure if continuation lines are handled properly 0229 # for http and/or for repeating headers 0230 # It's a continuation line. 0231 hlist.append(line) 0232 self.addcontinue(headerseen, line.strip()) 0233 continue 0234 elif self.iscomment(line): 0235 # It's a comment. Ignore it. 0236 continue 0237 elif self.islast(line): 0238 # Note! No pushback here! The delimiter line gets eaten. 0239 break 0240 headerseen = self.isheader(line) 0241 if headerseen: 0242 # It's a legal header line, save it. 0243 hlist.append(line) 0244 self.addheader(headerseen, line[len(headerseen)+1:].strip()) 0245 continue 0246 else: 0247 # It's not a header line; throw it back and stop here. 0248 if not self.dict: 0249 self.status = 'No headers' 0250 else: 0251 self.status = 'Non-header line where header expected' 0252 # Try to undo the read. 0253 if unread: 0254 unread(line) 0255 elif tell: 0256 self.fp.seek(startofline) 0257 else: 0258 self.status = self.status + '; bad seek' 0259 break 0260 0261 class HTTPResponse: 0262 0263 # strict: If true, raise BadStatusLine if the status line can't be 0264 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is 0265 # false because it prevents clients from talking to HTTP/0.9 0266 # servers. Note that a response with a sufficiently corrupted 0267 # status line will look like an HTTP/0.9 response. 0268 0269 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. 0270 0271 def __init__(self, sock, debuglevel=0, strict=0, method=None): 0272 self.fp = sock.makefile('rb', 0) 0273 self.debuglevel = debuglevel 0274 self.strict = strict 0275 self._method = method 0276 0277 self.msg = None 0278 0279 # from the Status-Line of the response 0280 self.version = _UNKNOWN # HTTP-Version 0281 self.status = _UNKNOWN # Status-Code 0282 self.reason = _UNKNOWN # Reason-Phrase 0283 0284 self.chunked = _UNKNOWN # is "chunked" being used? 0285 self.chunk_left = _UNKNOWN # bytes left to read in current chunk 0286 self.length = _UNKNOWN # number of bytes left in response 0287 self.will_close = _UNKNOWN # conn will close at end of response 0288 0289 def _read_status(self): 0290 # Initialize with Simple-Response defaults 0291 line = self.fp.readline() 0292 if self.debuglevel > 0: 0293 print "reply:", repr(line) 0294 if not line: 0295 # Presumably, the server closed the connection before 0296 # sending a valid response. 0297 raise BadStatusLine(line) 0298 try: 0299 [version, status, reason] = line.split(None, 2) 0300 except ValueError: 0301 try: 0302 [version, status] = line.split(None, 1) 0303 reason = "" 0304 except ValueError: 0305 # empty version will cause next test to fail and status 0306 # will be treated as 0.9 response. 0307 version = "" 0308 if not version.startswith('HTTP/'): 0309 if self.strict: 0310 self.close() 0311 raise BadStatusLine(line) 0312 else: 0313 # assume it's a Simple-Response from an 0.9 server 0314 self.fp = LineAndFileWrapper(line, self.fp) 0315 return "HTTP/0.9", 200, "" 0316 0317 # The status code is a three-digit number 0318 try: 0319 status = int(status) 0320 if status < 100 or status > 999: 0321 raise BadStatusLine(line) 0322 except ValueError: 0323 raise BadStatusLine(line) 0324 return version, status, reason 0325 0326 def begin(self): 0327 if self.msg is not None: 0328 # we've already started reading the response 0329 return 0330 0331 # read until we get a non-100 response 0332 while True: 0333 version, status, reason = self._read_status() 0334 if status != CONTINUE: 0335 break 0336 # skip the header from the 100 response 0337 while True: 0338 skip = self.fp.readline().strip() 0339 if not skip: 0340 break 0341 if self.debuglevel > 0: 0342 print "header:", skip 0343 0344 self.status = status 0345 self.reason = reason.strip() 0346 if version == 'HTTP/1.0': 0347 self.version = 10 0348 elif version.startswith('HTTP/1.'): 0349 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 0350 elif version == 'HTTP/0.9': 0351 self.version = 9 0352 else: 0353 raise UnknownProtocol(version) 0354 0355 if self.version == 9: 0356 self.chunked = 0 0357 self.will_close = 1 0358 self.msg = HTTPMessage(StringIO()) 0359 return 0360 0361 self.msg = HTTPMessage(self.fp, 0) 0362 if self.debuglevel > 0: 0363 for hdr in self.msg.headers: 0364 print "header:", hdr, 0365 0366 # don't let the msg keep an fp 0367 self.msg.fp = None 0368 0369 # are we using the chunked-style of transfer encoding? 0370 tr_enc = self.msg.getheader('transfer-encoding') 0371 if tr_enc and tr_enc.lower() == "chunked": 0372 self.chunked = 1 0373 self.chunk_left = None 0374 else: 0375 self.chunked = 0 0376 0377 # will the connection close at the end of the response? 0378 self.will_close = self._check_close() 0379 0380 # do we have a Content-Length? 0381 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" 0382 length = self.msg.getheader('content-length') 0383 if length and not self.chunked: 0384 try: 0385 self.length = int(length) 0386 except ValueError: 0387 self.length = None 0388 else: 0389 self.length = None 0390 0391 # does the body have a fixed length? (of zero) 0392 if (status == NO_CONTENT or status == NOT_MODIFIED or 0393 100 <= status < 200 or # 1xx codes 0394 self._method == 'HEAD'): 0395 self.length = 0 0396 0397 # if the connection remains open, and we aren't using chunked, and 0398 # a content-length was not provided, then assume that the connection 0399 # WILL close. 0400 if not self.will_close and \ 0401 not self.chunked and \ 0402 self.length is None: 0403 self.will_close = 1 0404 0405 def _check_close(self): 0406 conn = self.msg.getheader('connection') 0407 if self.version == 11: 0408 # An HTTP/1.1 proxy is assumed to stay open unless 0409 # explicitly closed. 0410 conn = self.msg.getheader('connection') 0411 if conn and "close" in conn.lower(): 0412 return True 0413 return False 0414 0415 # Some HTTP/1.0 implementations have support for persistent 0416 # connections, using rules different than HTTP/1.1. 0417 0418 # For older HTTP, Keep-Alive indiciates persistent connection. 0419 if self.msg.getheader('keep-alive'): 0420 return False 0421 0422 # At least Akamai returns a "Connection: Keep-Alive" header, 0423 # which was supposed to be sent by the client. 0424 if conn and "keep-alive" in conn.lower(): 0425 return False 0426 0427 # Proxy-Connection is a netscape hack. 0428 pconn = self.msg.getheader('proxy-connection') 0429 if pconn and "keep-alive" in pconn.lower(): 0430 return False 0431 0432 # otherwise, assume it will close 0433 return True 0434 0435 def close(self): 0436 if self.fp: 0437 self.fp.close() 0438 self.fp = None 0439 0440 def isclosed(self): 0441 # NOTE: it is possible that we will not ever call self.close(). This 0442 # case occurs when will_close is TRUE, length is None, and we 0443 # read up to the last byte, but NOT past it. 0444 # 0445 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be 0446 # called, meaning self.isclosed() is meaningful. 0447 return self.fp is None 0448 0449 # XXX It would be nice to have readline and __iter__ for this, too. 0450 0451 def read(self, amt=None): 0452 if self.fp is None: 0453 return '' 0454 0455 if self.chunked: 0456 return self._read_chunked(amt) 0457 0458 if amt is None: 0459 # unbounded read 0460 if self.will_close: 0461 s = self.fp.read() 0462 else: 0463 s = self._safe_read(self.length) 0464 self.close() # we read everything 0465 return s 0466 0467 if self.length is not None: 0468 if amt > self.length: 0469 # clip the read to the "end of response" 0470 amt = self.length 0471 self.length -= amt 0472 0473 # we do not use _safe_read() here because this may be a .will_close 0474 # connection, and the user is reading more bytes than will be provided 0475 # (for example, reading in 1k chunks) 0476 s = self.fp.read(amt) 0477 0478 return s 0479 0480 def _read_chunked(self, amt): 0481 assert self.chunked != _UNKNOWN 0482 chunk_left = self.chunk_left 0483 value = '' 0484 0485 # XXX This accumulates chunks by repeated string concatenation, 0486 # which is not efficient as the number or size of chunks gets big. 0487 while True: 0488 if chunk_left is None: 0489 line = self.fp.readline() 0490 i = line.find(';') 0491 if i >= 0: 0492 line = line[:i] # strip chunk-extensions 0493 chunk_left = int(line, 16) 0494 if chunk_left == 0: 0495 break 0496 if amt is None: 0497 value += self._safe_read(chunk_left) 0498 elif amt < chunk_left: 0499 value += self._safe_read(amt) 0500 self.chunk_left = chunk_left - amt 0501 return value 0502 elif amt == chunk_left: 0503 value += self._safe_read(amt) 0504 self._safe_read(2) # toss the CRLF at the end of the chunk 0505 self.chunk_left = None 0506 return value 0507 else: 0508 value += self._safe_read(chunk_left) 0509 amt -= chunk_left 0510 0511 # we read the whole chunk, get another 0512 self._safe_read(2) # toss the CRLF at the end of the chunk 0513 chunk_left = None 0514 0515 # read and discard trailer up to the CRLF terminator 0516 ### note: we shouldn't have any trailers! 0517 while True: 0518 line = self.fp.readline() 0519 if line == '\r\n': 0520 break 0521 0522 # we read everything; close the "file" 0523 self.close() 0524 0525 return value 0526 0527 def _safe_read(self, amt): 0528 """Read the number of bytes requested, compensating for partial reads. 0529 0530 Normally, we have a blocking socket, but a read() can be interrupted 0531 by a signal (resulting in a partial read). 0532 0533 Note that we cannot distinguish between EOF and an interrupt when zero 0534 bytes have been read. IncompleteRead() will be raised in this 0535 situation. 0536 0537 This function should be used when <amt> bytes "should" be present for 0538 reading. If the bytes are truly not available (due to EOF), then the 0539 IncompleteRead exception can be used to detect the problem. 0540 """ 0541 s = '' 0542 while amt > 0: 0543 chunk = self.fp.read(amt) 0544 if not chunk: 0545 raise IncompleteRead(s) 0546 s += chunk 0547 amt -= len(chunk) 0548 return s 0549 0550 def getheader(self, name, default=None): 0551 if self.msg is None: 0552 raise ResponseNotReady() 0553 return self.msg.getheader(name, default) 0554 0555 def getheaders(self): 0556 """Return list of (header, value) tuples.""" 0557 if self.msg is None: 0558 raise ResponseNotReady() 0559 return self.msg.items() 0560 0561 0562 class HTTPConnection: 0563 0564 _http_vsn = 11 0565 _http_vsn_str = 'HTTP/1.1' 0566 0567 response_class = HTTPResponse 0568 default_port = HTTP_PORT 0569 auto_open = 1 0570 debuglevel = 0 0571 strict = 0 0572 0573 def __init__(self, host, port=None, strict=None): 0574 self.sock = None 0575 self._buffer = [] 0576 self.__response = None 0577 self.__state = _CS_IDLE 0578 self._method = None 0579 0580 self._set_hostport(host, port) 0581 if strict is not None: 0582 self.strict = strict 0583 0584 def _set_hostport(self, host, port): 0585 if port is None: 0586 i = host.rfind(':') 0587 j = host.rfind(']') # ipv6 addresses have [...] 0588 if i > j: 0589 try: 0590 port = int(host[i+1:]) 0591 except ValueError: 0592 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) 0593 host = host[:i] 0594 else: 0595 port = self.default_port 0596 if host and host[0] == '[' and host[-1] == ']': 0597 host = host[1:-1] 0598 self.host = host 0599 self.port = port 0600 0601 def set_debuglevel(self, level): 0602 self.debuglevel = level 0603 0604 def connect(self): 0605 """Connect to the host and port specified in __init__.""" 0606 msg = "getaddrinfo returns an empty list" 0607 for res in socket.getaddrinfo(self.host, self.port, 0, 0608 socket.SOCK_STREAM): 0609 af, socktype, proto, canonname, sa = res 0610 try: 0611 self.sock = socket.socket(af, socktype, proto) 0612 if self.debuglevel > 0: 0613 print "connect: (%s, %s)" % (self.host, self.port) 0614 self.sock.connect(sa) 0615 except socket.error, msg: 0616 if self.debuglevel > 0: 0617 print 'connect fail:', (self.host, self.port) 0618 if self.sock: 0619 self.sock.close() 0620 self.sock = None 0621 continue 0622 break 0623 if not self.sock: 0624 raise socket.error, msg 0625 0626 def close(self): 0627 """Close the connection to the HTTP server.""" 0628 if self.sock: 0629 self.sock.close() # close it manually... there may be other refs 0630 self.sock = None 0631 if self.__response: 0632 self.__response.close() 0633 self.__response = None 0634 self.__state = _CS_IDLE 0635 0636 def send(self, str): 0637 """Send `str' to the server.""" 0638 if self.sock is None: 0639 if self.auto_open: 0640 self.connect() 0641 else: 0642 raise NotConnected() 0643 0644 # send the data to the server. if we get a broken pipe, then close 0645 # the socket. we want to reconnect when somebody tries to send again. 0646 # 0647 # NOTE: we DO propagate the error, though, because we cannot simply 0648 # ignore the error... the caller will know if they can retry. 0649 if self.debuglevel > 0: 0650 print "send:", repr(str) 0651 try: 0652 self.sock.sendall(str) 0653 except socket.error, v: 0654 if v[0] == 32: # Broken pipe 0655 self.close() 0656 raise 0657 0658 def _output(self, s): 0659 """Add a line of output to the current request buffer. 0660 0661 Assumes that the line does *not* end with \\r\\n. 0662 """ 0663 self._buffer.append(s) 0664 0665 def _send_output(self): 0666 """Send the currently buffered request and clear the buffer. 0667 0668 Appends an extra \\r\\n to the buffer. 0669 """ 0670 self._buffer.extend(("", "")) 0671 msg = "\r\n".join(self._buffer) 0672 del self._buffer[:] 0673 self.send(msg) 0674 0675 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): 0676 """Send a request to the server. 0677 0678 `method' specifies an HTTP request method, e.g. 'GET'. 0679 `url' specifies the object being requested, e.g. '/index.html'. 0680 `skip_host' if True does not add automatically a 'Host:' header 0681 `skip_accept_encoding' if True does not add automatically an 0682 'Accept-Encoding:' header 0683 """ 0684 0685 # if a prior response has been completed, then forget about it. 0686 if self.__response and self.__response.isclosed(): 0687 self.__response = None 0688 0689 0690 # in certain cases, we cannot issue another request on this connection. 0691 # this occurs when: 0692 # 1) we are in the process of sending a request. (_CS_REQ_STARTED) 0693 # 2) a response to a previous request has signalled that it is going 0694 # to close the connection upon completion. 0695 # 3) the headers for the previous response have not been read, thus 0696 # we cannot determine whether point (2) is true. (_CS_REQ_SENT) 0697 # 0698 # if there is no prior response, then we can request at will. 0699 # 0700 # if point (2) is true, then we will have passed the socket to the 0701 # response (effectively meaning, "there is no prior response"), and 0702 # will open a new one when a new request is made. 0703 # 0704 # Note: if a prior response exists, then we *can* start a new request. 0705 # We are not allowed to begin fetching the response to this new 0706 # request, however, until that prior response is complete. 0707 # 0708 if self.__state == _CS_IDLE: 0709 self.__state = _CS_REQ_STARTED 0710 else: 0711 raise CannotSendRequest() 0712 0713 # Save the method we use, we need it later in the response phase 0714 self._method = method 0715 if not url: 0716 url = '/' 0717 str = '%s %s %s' % (method, url, self._http_vsn_str) 0718 0719 self._output(str) 0720 0721 if self._http_vsn == 11: 0722 # Issue some standard headers for better HTTP/1.1 compliance 0723 0724 if not skip_host: 0725 # this header is issued *only* for HTTP/1.1 0726 # connections. more specifically, this means it is 0727 # only issued when the client uses the new 0728 # HTTPConnection() class. backwards-compat clients 0729 # will be using HTTP/1.0 and those clients may be 0730 # issuing this header themselves. we should NOT issue 0731 # it twice; some web servers (such as Apache) barf 0732 # when they see two Host: headers 0733 0734 # If we need a non-standard port,include it in the 0735 # header. If the request is going through a proxy, 0736 # but the host of the actual URL, not the host of the 0737 # proxy. 0738 0739 netloc = '' 0740 if url.startswith('http'): 0741 nil, netloc, nil, nil, nil = urlsplit(url) 0742 0743 if netloc: 0744 self.putheader('Host', netloc.encode("idna")) 0745 elif self.port == HTTP_PORT: 0746 self.putheader('Host', self.host.encode("idna")) 0747 else: 0748 self.putheader('Host', "%s:%s" % (self.host.encode("idna"), self.port)) 0749 0750 # note: we are assuming that clients will not attempt to set these 0751 # headers since *this* library must deal with the 0752 # consequences. this also means that when the supporting 0753 # libraries are updated to recognize other forms, then this 0754 # code should be changed (removed or updated). 0755 0756 # we only want a Content-Encoding of "identity" since we don't 0757 # support encodings such as x-gzip or x-deflate. 0758 if not skip_accept_encoding: 0759 self.putheader('Accept-Encoding', 'identity') 0760 0761 # we can accept "chunked" Transfer-Encodings, but no others 0762 # NOTE: no TE header implies *only* "chunked" 0763 #self.putheader('TE', 'chunked') 0764 0765 # if TE is supplied in the header, then it must appear in a 0766 # Connection header. 0767 #self.putheader('Connection', 'TE') 0768 0769 else: 0770 # For HTTP/1.0, the server will assume "not chunked" 0771 pass 0772 0773 def putheader(self, header, value): 0774 """Send a request header line to the server. 0775 0776 For example: h.putheader('Accept', 'text/html') 0777 """ 0778 if self.__state != _CS_REQ_STARTED: 0779 raise CannotSendHeader() 0780 0781 str = '%s: %s' % (header, value) 0782 self._output(str) 0783 0784 def endheaders(self): 0785 """Indicate that the last header line has been sent to the server.""" 0786 0787 if self.__state == _CS_REQ_STARTED: 0788 self.__state = _CS_REQ_SENT 0789 else: 0790 raise CannotSendHeader() 0791 0792 self._send_output() 0793 0794 def request(self, method, url, body=None, headers={}): 0795 """Send a complete request to the server.""" 0796 0797 try: 0798 self._send_request(method, url, body, headers) 0799 except socket.error, v: 0800 # trap 'Broken pipe' if we're allowed to automatically reconnect 0801 if v[0] != 32 or not self.auto_open: 0802 raise 0803 # try one more time 0804 self._send_request(method, url, body, headers) 0805 0806 def _send_request(self, method, url, body, headers): 0807 # honour explicitly requested Host: and Accept-Encoding headers 0808 header_names = dict.fromkeys([k.lower() for k in headers]) 0809 skips = {} 0810 if 'host' in header_names: 0811 skips['skip_host'] = 1 0812 if 'accept-encoding' in header_names: 0813 skips['skip_accept_encoding'] = 1 0814 0815 self.putrequest(method, url, **skips) 0816 0817 if body and ('content-length' not in header_names): 0818 self.putheader('Content-Length', str(len(body))) 0819 for hdr, value in headers.iteritems(): 0820 self.putheader(hdr, value) 0821 self.endheaders() 0822 0823 if body: 0824 self.send(body) 0825 0826 def getresponse(self): 0827 "Get the response from the server." 0828 0829 # if a prior response has been completed, then forget about it. 0830 if self.__response and self.__response.isclosed(): 0831 self.__response = None 0832 0833 # 0834 # if a prior response exists, then it must be completed (otherwise, we 0835 # cannot read this response's header to determine the connection-close 0836 # behavior) 0837 # 0838 # note: if a prior response existed, but was connection-close, then the 0839 # socket and response were made independent of this HTTPConnection 0840 # object since a new request requires that we open a whole new 0841 # connection 0842 # 0843 # this means the prior response had one of two states: 0844 # 1) will_close: this connection was reset and the prior socket and 0845 # response operate independently 0846 # 2) persistent: the response was retained and we await its 0847 # isclosed() status to become true. 0848 # 0849 if self.__state != _CS_REQ_SENT or self.__response: 0850 raise ResponseNotReady() 0851 0852 if self.debuglevel > 0: 0853 response = self.response_class(self.sock, self.debuglevel, 0854 strict=self.strict, 0855 method=self._method) 0856 else: 0857 response = self.response_class(self.sock, strict=self.strict, 0858 method=self._method) 0859 0860 response.begin() 0861 assert response.will_close != _UNKNOWN 0862 self.__state = _CS_IDLE 0863 0864 if response.will_close: 0865 # this effectively passes the connection to the response 0866 self.close() 0867 else: 0868 # remember this, so we can tell when it is complete 0869 self.__response = response 0870 0871 return response 0872 0873 # The next several classes are used to define FakeSocket,a socket-like 0874 # interface to an SSL connection. 0875 0876 # The primary complexity comes from faking a makefile() method. The 0877 # standard socket makefile() implementation calls dup() on the socket 0878 # file descriptor. As a consequence, clients can call close() on the 0879 # parent socket and its makefile children in any order. The underlying 0880 # socket isn't closed until they are all closed. 0881 0882 # The implementation uses reference counting to keep the socket open 0883 # until the last client calls close(). SharedSocket keeps track of 0884 # the reference counting and SharedSocketClient provides an constructor 0885 # and close() method that call incref() and decref() correctly. 0886 0887 class SharedSocket: 0888 0889 def __init__(self, sock): 0890 self.sock = sock 0891 self._refcnt = 0 0892 0893 def incref(self): 0894 self._refcnt += 1 0895 0896 def decref(self): 0897 self._refcnt -= 1 0898 assert self._refcnt >= 0 0899 if self._refcnt == 0: 0900 self.sock.close() 0901 0902 def __del__(self): 0903 self.sock.close() 0904 0905 class SharedSocketClient: 0906 0907 def __init__(self, shared): 0908 self._closed = 0 0909 self._shared = shared 0910 self._shared.incref() 0911 self._sock = shared.sock 0912 0913 def close(self): 0914 if not self._closed: 0915 self._shared.decref() 0916 self._closed = 1 0917 self._shared = None 0918 0919 class SSLFile(SharedSocketClient): 0920 """File-like object wrapping an SSL socket.""" 0921 0922 BUFSIZE = 8192 0923 0924 def __init__(self, sock, ssl, bufsize=None): 0925 SharedSocketClient.__init__(self, sock) 0926 self._ssl = ssl 0927 self._buf = '' 0928 self._bufsize = bufsize or self.__class__.BUFSIZE 0929 0930 def _read(self): 0931 buf = '' 0932 # put in a loop so that we retry on transient errors 0933 while True: 0934 try: 0935 buf = self._ssl.read(self._bufsize) 0936 except socket.sslerror, err: 0937 if (err[0] == socket.SSL_ERROR_WANT_READ 0938 or err[0] == socket.SSL_ERROR_WANT_WRITE): 0939 continue 0940 if (err[0] == socket.SSL_ERROR_ZERO_RETURN 0941 or err[0] == socket.SSL_ERROR_EOF): 0942 break 0943 raise 0944 except socket.error, err: 0945 if err[0] == errno.EINTR: 0946 continue 0947 if err[0] == errno.EBADF: 0948 # XXX socket was closed? 0949 break 0950 raise 0951 else: 0952 break 0953 return buf 0954 0955 def read(self, size=None): 0956 L = [self._buf] 0957 avail = len(self._buf) 0958 while size is None or avail < size: 0959 s = self._read() 0960 if s == '': 0961 break 0962 L.append(s) 0963 avail += len(s) 0964 all = "".join(L) 0965 if size is None: 0966 self._buf = '' 0967 return all 0968 else: 0969 self._buf = all[size:] 0970 return all[:size] 0971 0972 def readline(self): 0973 L = [self._buf] 0974 self._buf = '' 0975 while 1: 0976 i = L[-1].find("\n") 0977 if i >= 0: 0978 break 0979 s = self._read() 0980 if s == '': 0981 break 0982 L.append(s) 0983 if i == -1: 0984 # loop exited because there is no more data 0985 return "".join(L) 0986 else: 0987 all = "".join(L) 0988 # XXX could do enough bookkeeping not to do a 2nd search 0989 i = all.find("\n") + 1 0990 line = all[:i] 0991 self._buf = all[i:] 0992 return line 0993 0994 def readlines(self, sizehint=0): 0995 total = 0 0996 list = [] 0997 while True: 0998 line = self.readline() 0999 if not line: 1000 break 1001 list.append(line) 1002 total += len(line) 1003 if sizehint and total >= sizehint: 1004 break 1005 return list 1006 1007 def fileno(self): 1008 return self._sock.fileno() 1009 1010 def __iter__(self): 1011 return self 1012 1013 def next(self): 1014 line = self.readline() 1015 if not line: 1016 raise StopIteration 1017 return line 1018 1019 class FakeSocket(SharedSocketClient): 1020 1021 class _closedsocket: 1022 def __getattr__(self, name): 1023 raise error(9, 'Bad file descriptor') 1024 1025 def __init__(self, sock, ssl): 1026 sock = SharedSocket(sock) 1027 SharedSocketClient.__init__(self, sock) 1028 self._ssl = ssl 1029 1030 def close(self): 1031 SharedSocketClient.close(self) 1032 self._sock = self.__class__._closedsocket() 1033 1034 def makefile(self, mode, bufsize=None): 1035 if mode != 'r' and mode != 'rb': 1036 raise UnimplementedFileMode() 1037 return SSLFile(self._shared, self._ssl, bufsize) 1038 1039 def send(self, stuff, flags = 0): 1040 return self._ssl.write(stuff) 1041 1042 sendall = send 1043 1044 def recv(self, len = 1024, flags = 0): 1045 return self._ssl.read(len) 1046 1047 def __getattr__(self, attr): 1048 return getattr(self._sock, attr) 1049 1050 1051 class HTTPSConnection(HTTPConnection): 1052 "This class allows communication via SSL." 1053 1054 default_port = HTTPS_PORT 1055 1056 def __init__(self, host, port=None, key_file=None, cert_file=None, 1057 strict=None): 1058 HTTPConnection.__init__(self, host, port, strict) 1059 self.key_file = key_file 1060 self.cert_file = cert_file 1061 1062 def connect(self): 1063 "Connect to a host on a given (SSL) port." 1064 1065 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 1066 sock.connect((self.host, self.port)) 1067 ssl = socket.ssl(sock, self.key_file, self.cert_file) 1068 self.sock = FakeSocket(sock, ssl) 1069 1070 1071 class HTTP: 1072 "Compatibility class with httplib.py from 1.5." 1073 1074 _http_vsn = 10 1075 _http_vsn_str = 'HTTP/1.0' 1076 1077 debuglevel = 0 1078 1079 _connection_class = HTTPConnection 1080 1081 def __init__(self, host='', port=None, strict=None): 1082 "Provide a default host, since the superclass requires one." 1083 1084 # some joker passed 0 explicitly, meaning default port 1085 if port == 0: 1086 port = None 1087 1088 # Note that we may pass an empty string as the host; this will throw 1089 # an error when we attempt to connect. Presumably, the client code 1090 # will call connect before then, with a proper host. 1091 self._setup(self._connection_class(host, port, strict)) 1092 1093 def _setup(self, conn): 1094 self._conn = conn 1095 1096 # set up delegation to flesh out interface 1097 self.send = conn.send 1098 self.putrequest = conn.putrequest 1099 self.endheaders = conn.endheaders 1100 self.set_debuglevel = conn.set_debuglevel 1101 1102 conn._http_vsn = self._http_vsn 1103 conn._http_vsn_str = self._http_vsn_str 1104 1105 self.file = None 1106 1107 def connect(self, host=None, port=None): 1108 "Accept arguments to set the host/port, since the superclass doesn't." 1109 1110 if host is not None: 1111 self._conn._set_hostport(host, port) 1112 self._conn.connect() 1113 1114 def getfile(self): 1115 "Provide a getfile, since the superclass' does not use this concept." 1116 return self.file 1117 1118 def putheader(self, header, *values): 1119 "The superclass allows only one value argument." 1120 self._conn.putheader(header, '\r\n\t'.join(values)) 1121 1122 def getreply(self): 1123 """Compat definition since superclass does not define it. 1124 1125 Returns a tuple consisting of: 1126 - server status code (e.g. '200' if all goes well) 1127 - server "reason" corresponding to status code 1128 - any RFC822 headers in the response from the server 1129 """ 1130 try: 1131 response = self._conn.getresponse() 1132 except BadStatusLine, e: 1133 ### hmm. if getresponse() ever closes the socket on a bad request, 1134 ### then we are going to have problems with self.sock 1135 1136 ### should we keep this behavior? do people use it? 1137 # keep the socket open (as a file), and return it 1138 self.file = self._conn.sock.makefile('rb', 0) 1139 1140 # close our socket -- we want to restart after any protocol error 1141 self.close() 1142 1143 self.headers = None 1144 return -1, e.line, None 1145 1146 self.headers = response.msg 1147 self.file = response.fp 1148 return response.status, response.reason, response.msg 1149 1150 def close(self): 1151 self._conn.close() 1152 1153 # note that self.file == response.fp, which gets closed by the 1154 # superclass. just clear the object ref here. 1155 ### hmm. messy. if status==-1, then self.file is owned by us. 1156 ### well... we aren't explicitly closing, but losing this ref will 1157 ### do it 1158 self.file = None 1159 1160 if hasattr(socket, 'ssl'): 1161 class HTTPS(HTTP): 1162 """Compatibility with 1.5 httplib interface 1163 1164 Python 1.5.2 did not have an HTTPS class, but it defined an 1165 interface for sending http requests that is also useful for 1166 https. 1167 """ 1168 1169 _connection_class = HTTPSConnection 1170 1171 def __init__(self, host='', port=None, key_file=None, cert_file=None, 1172 strict=None): 1173 # provide a default host, pass the X509 cert info 1174 1175 # urf. compensate for bad input. 1176 if port == 0: 1177 port = None 1178 self._setup(self._connection_class(host, port, key_file, 1179 cert_file, strict)) 1180 1181 # we never actually use these for anything, but we keep them 1182 # here for compatibility with post-1.5.2 CVS. 1183 self.key_file = key_file 1184 self.cert_file = cert_file 1185 1186 1187 class HTTPException(Exception): 1188 # Subclasses that define an __init__ must call Exception.__init__ 1189 # or define self.args. Otherwise, str() will fail. 1190 pass 1191 1192 class NotConnected(HTTPException): 1193 pass 1194 1195 class InvalidURL(HTTPException): 1196 pass 1197 1198 class UnknownProtocol(HTTPException): 1199 def __init__(self, version): 1200 self.args = version, 1201 self.version = version 1202 1203 class UnknownTransferEncoding(HTTPException): 1204 pass 1205 1206 class UnimplementedFileMode(HTTPException): 1207 pass 1208 1209 class IncompleteRead(HTTPException): 1210 def __init__(self, partial): 1211 self.args = partial, 1212 self.partial = partial 1213 1214 class ImproperConnectionState(HTTPException): 1215 pass 1216 1217 class CannotSendRequest(ImproperConnectionState): 1218 pass 1219 1220 class CannotSendHeader(ImproperConnectionState): 1221 pass 1222 1223 class ResponseNotReady(ImproperConnectionState): 1224 pass 1225 1226 class BadStatusLine(HTTPException): 1227 def __init__(self, line): 1228 self.args = line, 1229 self.line = line 1230 1231 # for backwards compatibility 1232 error = HTTPException 1233 1234 class LineAndFileWrapper: 1235 """A limited file-like object for HTTP/0.9 responses.""" 1236 1237 # The status-line parsing code calls readline(), which normally 1238 # get the HTTP status line. For a 0.9 response, however, this is 1239 # actually the first line of the body! Clients need to get a 1240 # readable file object that contains that line. 1241 1242 def __init__(self, line, file): 1243 self._line = line 1244 self._file = file 1245 self._line_consumed = 0 1246 self._line_offset = 0 1247 self._line_left = len(line) 1248 1249 def __getattr__(self, attr): 1250 return getattr(self._file, attr) 1251 1252 def _done(self): 1253 # called when the last byte is read from the line. After the 1254 # call, all read methods are delegated to the underlying file 1255 # object. 1256 self._line_consumed = 1 1257 self.read = self._file.read 1258 self.readline = self._file.readline 1259 self.readlines = self._file.readlines 1260 1261 def read(self, amt=None): 1262 if self._line_consumed: 1263 return self._file.read(amt) 1264 assert self._line_left 1265 if amt is None or amt > self._line_left: 1266 s = self._line[self._line_offset:] 1267 self._done() 1268 if amt is None: 1269 return s + self._file.read() 1270 else: 1271 return s + self._file.read(amt - len(s)) 1272 else: 1273 assert amt <= self._line_left 1274 i = self._line_offset 1275 j = i + amt 1276 s = self._line[i:j] 1277 self._line_offset = j 1278 self._line_left -= amt 1279 if self._line_left == 0: 1280 self._done() 1281 return s 1282 1283 def readline(self): 1284 if self._line_consumed: 1285 return self._file.readline() 1286 assert self._line_left 1287 s = self._line[self._line_offset:] 1288 self._done() 1289 return s 1290 1291 def readlines(self, size=None): 1292 if self._line_consumed: 1293 return self._file.readlines(size) 1294 assert self._line_left 1295 L = [self._line[self._line_offset:]] 1296 self._done() 1297 if size is None: 1298 return L + self._file.readlines() 1299 else: 1300 return L + self._file.readlines(size) 1301 1302 def test(): 1303 """Test this module. 1304 1305 A hodge podge of tests collected here, because they have too many 1306 external dependencies for the regular test suite. 1307 """ 1308 1309 import sys 1310 import getopt 1311 opts, args = getopt.getopt(sys.argv[1:], 'd') 1312 dl = 0 1313 for o, a in opts: 1314 if o == '-d': dl = dl + 1 1315 host = 'www.python.org' 1316 selector = '/' 1317 if args[0:]: host = args[0] 1318 if args[1:]: selector = args[1] 1319 h = HTTP() 1320 h.set_debuglevel(dl) 1321 h.connect(host) 1322 h.putrequest('GET', selector) 1323 h.endheaders() 1324 status, reason, headers = h.getreply() 1325 print 'status =', status 1326 print 'reason =', reason 1327 print "read", len(h.getfile().read()) 1328 print 1329 if headers: 1330 for header in headers.headers: print header.strip() 1331 print 1332 1333 # minimal test that code to extract host from url works 1334 class HTTP11(HTTP): 1335 _http_vsn = 11 1336 _http_vsn_str = 'HTTP/1.1' 1337 1338 h = HTTP11('www.python.org') 1339 h.putrequest('GET', 'http://www.python.org/~jeremy/') 1340 h.endheaders() 1341 h.getreply() 1342 h.close() 1343 1344 if hasattr(socket, 'ssl'): 1345 1346 for host, selector in (('sourceforge.net', '/projects/python'), 1347 ): 1348 print "https://%s%s" % (host, selector) 1349 hs = HTTPS() 1350 hs.set_debuglevel(dl) 1351 hs.connect(host) 1352 hs.putrequest('GET', selector) 1353 hs.endheaders() 1354 status, reason, headers = hs.getreply() 1355 print 'status =', status 1356 print 'reason =', reason 1357 print "read", len(hs.getfile().read()) 1358 print 1359 if headers: 1360 for header in headers.headers: print header.strip() 1361 print 1362 1363 if __name__ == '__main__': 1364 test() 1365
Generated by PyXR 0.9.4