PyXR

c:\python24\lib \ httplib.py



0001 """HTTP/1.1 client library
0002 
0003 <intro stuff goes here>
0004 <other stuff, too>
0005 
0006 HTTPConnection go through a number of "states", which defines when a client
0007 may legally make another request or fetch the response for a particular
0008 request. This diagram details these state transitions:
0009 
0010     (null)
0011       |
0012       | HTTPConnection()
0013       v
0014     Idle
0015       |
0016       | putrequest()
0017       v
0018     Request-started
0019       |
0020       | ( putheader() )*  endheaders()
0021       v
0022     Request-sent
0023       |
0024       | response = getresponse()
0025       v
0026     Unread-response   [Response-headers-read]
0027       |\____________________
0028       |                     |
0029       | response.read()     | putrequest()
0030       v                     v
0031     Idle                  Req-started-unread-response
0032                      ______/|
0033                    /        |
0034    response.read() |        | ( putheader() )*  endheaders()
0035                    v        v
0036        Request-started    Req-sent-unread-response
0037                             |
0038                             | response.read()
0039                             v
0040                           Request-sent
0041 
0042 This diagram presents the following rules:
0043   -- a second request may not be started until {response-headers-read}
0044   -- a response [object] cannot be retrieved until {request-sent}
0045   -- there is no differentiation between an unread response body and a
0046      partially read response body
0047 
0048 Note: this enforcement is applied by the HTTPConnection class. The
0049       HTTPResponse class does not enforce this state machine, which
0050       implies sophisticated clients may accelerate the request/response
0051       pipeline. Caution should be taken, though: accelerating the states
0052       beyond the above pattern may imply knowledge of the server's
0053       connection-close behavior for certain requests. For example, it
0054       is impossible to tell whether the server will close the connection
0055       UNTIL the response headers have been read; this means that further
0056       requests cannot be placed into the pipeline until it is known that
0057       the server will NOT be closing the connection.
0058 
0059 Logical State                  __state            __response
0060 -------------                  -------            ----------
0061 Idle                           _CS_IDLE           None
0062 Request-started                _CS_REQ_STARTED    None
0063 Request-sent                   _CS_REQ_SENT       None
0064 Unread-response                _CS_IDLE           <response_class>
0065 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
0066 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
0067 """
0068 
0069 import errno
0070 import mimetools
0071 import socket
0072 from urlparse import urlsplit
0073 
0074 try:
0075     from cStringIO import StringIO
0076 except ImportError:
0077     from StringIO import StringIO
0078 
0079 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
0080            "HTTPException", "NotConnected", "UnknownProtocol",
0081            "UnknownTransferEncoding", "UnimplementedFileMode",
0082            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
0083            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
0084            "BadStatusLine", "error"]
0085 
0086 HTTP_PORT = 80
0087 HTTPS_PORT = 443
0088 
0089 _UNKNOWN = 'UNKNOWN'
0090 
0091 # connection states
0092 _CS_IDLE = 'Idle'
0093 _CS_REQ_STARTED = 'Request-started'
0094 _CS_REQ_SENT = 'Request-sent'
0095 
0096 # status codes
0097 # informational
0098 CONTINUE = 100
0099 SWITCHING_PROTOCOLS = 101
0100 PROCESSING = 102
0101 
0102 # successful
0103 OK = 200
0104 CREATED = 201
0105 ACCEPTED = 202
0106 NON_AUTHORITATIVE_INFORMATION = 203
0107 NO_CONTENT = 204
0108 RESET_CONTENT = 205
0109 PARTIAL_CONTENT = 206
0110 MULTI_STATUS = 207
0111 IM_USED = 226
0112 
0113 # redirection
0114 MULTIPLE_CHOICES = 300
0115 MOVED_PERMANENTLY = 301
0116 FOUND = 302
0117 SEE_OTHER = 303
0118 NOT_MODIFIED = 304
0119 USE_PROXY = 305
0120 TEMPORARY_REDIRECT = 307
0121 
0122 # client error
0123 BAD_REQUEST = 400
0124 UNAUTHORIZED = 401
0125 PAYMENT_REQUIRED = 402
0126 FORBIDDEN = 403
0127 NOT_FOUND = 404
0128 METHOD_NOT_ALLOWED = 405
0129 NOT_ACCEPTABLE = 406
0130 PROXY_AUTHENTICATION_REQUIRED = 407
0131 REQUEST_TIMEOUT = 408
0132 CONFLICT = 409
0133 GONE = 410
0134 LENGTH_REQUIRED = 411
0135 PRECONDITION_FAILED = 412
0136 REQUEST_ENTITY_TOO_LARGE = 413
0137 REQUEST_URI_TOO_LONG = 414
0138 UNSUPPORTED_MEDIA_TYPE = 415
0139 REQUESTED_RANGE_NOT_SATISFIABLE = 416
0140 EXPECTATION_FAILED = 417
0141 UNPROCESSABLE_ENTITY = 422
0142 LOCKED = 423
0143 FAILED_DEPENDENCY = 424
0144 UPGRADE_REQUIRED = 426
0145 
0146 # server error
0147 INTERNAL_SERVER_ERROR = 500
0148 NOT_IMPLEMENTED = 501
0149 BAD_GATEWAY = 502
0150 SERVICE_UNAVAILABLE = 503
0151 GATEWAY_TIMEOUT = 504
0152 HTTP_VERSION_NOT_SUPPORTED = 505
0153 INSUFFICIENT_STORAGE = 507
0154 NOT_EXTENDED = 510
0155 
0156 class HTTPMessage(mimetools.Message):
0157 
0158     def addheader(self, key, value):
0159         """Add header for field key handling repeats."""
0160         prev = self.dict.get(key)
0161         if prev is None:
0162             self.dict[key] = value
0163         else:
0164             combined = ", ".join((prev, value))
0165             self.dict[key] = combined
0166 
0167     def addcontinue(self, key, more):
0168         """Add more field data from a continuation line."""
0169         prev = self.dict[key]
0170         self.dict[key] = prev + "\n " + more
0171 
0172     def readheaders(self):
0173         """Read header lines.
0174 
0175         Read header lines up to the entirely blank line that terminates them.
0176         The (normally blank) line that ends the headers is skipped, but not
0177         included in the returned list.  If a non-header line ends the headers,
0178         (which is an error), an attempt is made to backspace over it; it is
0179         never included in the returned list.
0180 
0181         The variable self.status is set to the empty string if all went well,
0182         otherwise it is an error message.  The variable self.headers is a
0183         completely uninterpreted list of lines contained in the header (so
0184         printing them will reproduce the header exactly as it appears in the
0185         file).
0186 
0187         If multiple header fields with the same name occur, they are combined
0188         according to the rules in RFC 2616 sec 4.2:
0189 
0190         Appending each subsequent field-value to the first, each separated
0191         by a comma. The order in which header fields with the same field-name
0192         are received is significant to the interpretation of the combined
0193         field value.
0194         """
0195         # XXX The implementation overrides the readheaders() method of
0196         # rfc822.Message.  The base class design isn't amenable to
0197         # customized behavior here so the method here is a copy of the
0198         # base class code with a few small changes.
0199 
0200         self.dict = {}
0201         self.unixfrom = ''
0202         self.headers = hlist = []
0203         self.status = ''
0204         headerseen = ""
0205         firstline = 1
0206         startofline = unread = tell = None
0207         if hasattr(self.fp, 'unread'):
0208             unread = self.fp.unread
0209         elif self.seekable:
0210             tell = self.fp.tell
0211         while True:
0212             if tell:
0213                 try:
0214                     startofline = tell()
0215                 except IOError:
0216                     startofline = tell = None
0217                     self.seekable = 0
0218             line = self.fp.readline()
0219             if not line:
0220                 self.status = 'EOF in headers'
0221                 break
0222             # Skip unix From name time lines
0223             if firstline and line.startswith('From '):
0224                 self.unixfrom = self.unixfrom + line
0225                 continue
0226             firstline = 0
0227             if headerseen and line[0] in ' \t':
0228                 # XXX Not sure if continuation lines are handled properly
0229                 # for http and/or for repeating headers
0230                 # It's a continuation line.
0231                 hlist.append(line)
0232                 self.addcontinue(headerseen, line.strip())
0233                 continue
0234             elif self.iscomment(line):
0235                 # It's a comment.  Ignore it.
0236                 continue
0237             elif self.islast(line):
0238                 # Note! No pushback here!  The delimiter line gets eaten.
0239                 break
0240             headerseen = self.isheader(line)
0241             if headerseen:
0242                 # It's a legal header line, save it.
0243                 hlist.append(line)
0244                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
0245                 continue
0246             else:
0247                 # It's not a header line; throw it back and stop here.
0248                 if not self.dict:
0249                     self.status = 'No headers'
0250                 else:
0251                     self.status = 'Non-header line where header expected'
0252                 # Try to undo the read.
0253                 if unread:
0254                     unread(line)
0255                 elif tell:
0256                     self.fp.seek(startofline)
0257                 else:
0258                     self.status = self.status + '; bad seek'
0259                 break
0260 
0261 class HTTPResponse:
0262 
0263     # strict: If true, raise BadStatusLine if the status line can't be
0264     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
0265     # false because it prevents clients from talking to HTTP/0.9
0266     # servers.  Note that a response with a sufficiently corrupted
0267     # status line will look like an HTTP/0.9 response.
0268 
0269     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
0270 
0271     def __init__(self, sock, debuglevel=0, strict=0, method=None):
0272         self.fp = sock.makefile('rb', 0)
0273         self.debuglevel = debuglevel
0274         self.strict = strict
0275         self._method = method
0276 
0277         self.msg = None
0278 
0279         # from the Status-Line of the response
0280         self.version = _UNKNOWN # HTTP-Version
0281         self.status = _UNKNOWN  # Status-Code
0282         self.reason = _UNKNOWN  # Reason-Phrase
0283 
0284         self.chunked = _UNKNOWN         # is "chunked" being used?
0285         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
0286         self.length = _UNKNOWN          # number of bytes left in response
0287         self.will_close = _UNKNOWN      # conn will close at end of response
0288 
0289     def _read_status(self):
0290         # Initialize with Simple-Response defaults
0291         line = self.fp.readline()
0292         if self.debuglevel > 0:
0293             print "reply:", repr(line)
0294         if not line:
0295             # Presumably, the server closed the connection before
0296             # sending a valid response.
0297             raise BadStatusLine(line)
0298         try:
0299             [version, status, reason] = line.split(None, 2)
0300         except ValueError:
0301             try:
0302                 [version, status] = line.split(None, 1)
0303                 reason = ""
0304             except ValueError:
0305                 # empty version will cause next test to fail and status
0306                 # will be treated as 0.9 response.
0307                 version = ""
0308         if not version.startswith('HTTP/'):
0309             if self.strict:
0310                 self.close()
0311                 raise BadStatusLine(line)
0312             else:
0313                 # assume it's a Simple-Response from an 0.9 server
0314                 self.fp = LineAndFileWrapper(line, self.fp)
0315                 return "HTTP/0.9", 200, ""
0316 
0317         # The status code is a three-digit number
0318         try:
0319             status = int(status)
0320             if status < 100 or status > 999:
0321                 raise BadStatusLine(line)
0322         except ValueError:
0323             raise BadStatusLine(line)
0324         return version, status, reason
0325 
0326     def begin(self):
0327         if self.msg is not None:
0328             # we've already started reading the response
0329             return
0330 
0331         # read until we get a non-100 response
0332         while True:
0333             version, status, reason = self._read_status()
0334             if status != CONTINUE:
0335                 break
0336             # skip the header from the 100 response
0337             while True:
0338                 skip = self.fp.readline().strip()
0339                 if not skip:
0340                     break
0341                 if self.debuglevel > 0:
0342                     print "header:", skip
0343 
0344         self.status = status
0345         self.reason = reason.strip()
0346         if version == 'HTTP/1.0':
0347             self.version = 10
0348         elif version.startswith('HTTP/1.'):
0349             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
0350         elif version == 'HTTP/0.9':
0351             self.version = 9
0352         else:
0353             raise UnknownProtocol(version)
0354 
0355         if self.version == 9:
0356             self.chunked = 0
0357             self.will_close = 1
0358             self.msg = HTTPMessage(StringIO())
0359             return
0360 
0361         self.msg = HTTPMessage(self.fp, 0)
0362         if self.debuglevel > 0:
0363             for hdr in self.msg.headers:
0364                 print "header:", hdr,
0365 
0366         # don't let the msg keep an fp
0367         self.msg.fp = None
0368 
0369         # are we using the chunked-style of transfer encoding?
0370         tr_enc = self.msg.getheader('transfer-encoding')
0371         if tr_enc and tr_enc.lower() == "chunked":
0372             self.chunked = 1
0373             self.chunk_left = None
0374         else:
0375             self.chunked = 0
0376 
0377         # will the connection close at the end of the response?
0378         self.will_close = self._check_close()
0379 
0380         # do we have a Content-Length?
0381         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
0382         length = self.msg.getheader('content-length')
0383         if length and not self.chunked:
0384             try:
0385                 self.length = int(length)
0386             except ValueError:
0387                 self.length = None
0388         else:
0389             self.length = None
0390 
0391         # does the body have a fixed length? (of zero)
0392         if (status == NO_CONTENT or status == NOT_MODIFIED or
0393             100 <= status < 200 or      # 1xx codes
0394             self._method == 'HEAD'):
0395             self.length = 0
0396 
0397         # if the connection remains open, and we aren't using chunked, and
0398         # a content-length was not provided, then assume that the connection
0399         # WILL close.
0400         if not self.will_close and \
0401            not self.chunked and \
0402            self.length is None:
0403             self.will_close = 1
0404 
0405     def _check_close(self):
0406         conn = self.msg.getheader('connection')
0407         if self.version == 11:
0408             # An HTTP/1.1 proxy is assumed to stay open unless
0409             # explicitly closed.
0410             conn = self.msg.getheader('connection')
0411             if conn and "close" in conn.lower():
0412                 return True
0413             return False
0414 
0415         # Some HTTP/1.0 implementations have support for persistent
0416         # connections, using rules different than HTTP/1.1.
0417 
0418         # For older HTTP, Keep-Alive indiciates persistent connection.
0419         if self.msg.getheader('keep-alive'):
0420             return False
0421 
0422         # At least Akamai returns a "Connection: Keep-Alive" header,
0423         # which was supposed to be sent by the client.
0424         if conn and "keep-alive" in conn.lower():
0425             return False
0426 
0427         # Proxy-Connection is a netscape hack.
0428         pconn = self.msg.getheader('proxy-connection')
0429         if pconn and "keep-alive" in pconn.lower():
0430             return False
0431 
0432         # otherwise, assume it will close
0433         return True
0434 
0435     def close(self):
0436         if self.fp:
0437             self.fp.close()
0438             self.fp = None
0439 
0440     def isclosed(self):
0441         # NOTE: it is possible that we will not ever call self.close(). This
0442         #       case occurs when will_close is TRUE, length is None, and we
0443         #       read up to the last byte, but NOT past it.
0444         #
0445         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
0446         #          called, meaning self.isclosed() is meaningful.
0447         return self.fp is None
0448 
0449     # XXX It would be nice to have readline and __iter__ for this, too.
0450 
0451     def read(self, amt=None):
0452         if self.fp is None:
0453             return ''
0454 
0455         if self.chunked:
0456             return self._read_chunked(amt)
0457 
0458         if amt is None:
0459             # unbounded read
0460             if self.will_close:
0461                 s = self.fp.read()
0462             else:
0463                 s = self._safe_read(self.length)
0464             self.close()        # we read everything
0465             return s
0466 
0467         if self.length is not None:
0468             if amt > self.length:
0469                 # clip the read to the "end of response"
0470                 amt = self.length
0471             self.length -= amt
0472 
0473         # we do not use _safe_read() here because this may be a .will_close
0474         # connection, and the user is reading more bytes than will be provided
0475         # (for example, reading in 1k chunks)
0476         s = self.fp.read(amt)
0477 
0478         return s
0479 
0480     def _read_chunked(self, amt):
0481         assert self.chunked != _UNKNOWN
0482         chunk_left = self.chunk_left
0483         value = ''
0484 
0485         # XXX This accumulates chunks by repeated string concatenation,
0486         # which is not efficient as the number or size of chunks gets big.
0487         while True:
0488             if chunk_left is None:
0489                 line = self.fp.readline()
0490                 i = line.find(';')
0491                 if i >= 0:
0492                     line = line[:i] # strip chunk-extensions
0493                 chunk_left = int(line, 16)
0494                 if chunk_left == 0:
0495                     break
0496             if amt is None:
0497                 value += self._safe_read(chunk_left)
0498             elif amt < chunk_left:
0499                 value += self._safe_read(amt)
0500                 self.chunk_left = chunk_left - amt
0501                 return value
0502             elif amt == chunk_left:
0503                 value += self._safe_read(amt)
0504                 self._safe_read(2)  # toss the CRLF at the end of the chunk
0505                 self.chunk_left = None
0506                 return value
0507             else:
0508                 value += self._safe_read(chunk_left)
0509                 amt -= chunk_left
0510 
0511             # we read the whole chunk, get another
0512             self._safe_read(2)      # toss the CRLF at the end of the chunk
0513             chunk_left = None
0514 
0515         # read and discard trailer up to the CRLF terminator
0516         ### note: we shouldn't have any trailers!
0517         while True:
0518             line = self.fp.readline()
0519             if line == '\r\n':
0520                 break
0521 
0522         # we read everything; close the "file"
0523         self.close()
0524 
0525         return value
0526 
0527     def _safe_read(self, amt):
0528         """Read the number of bytes requested, compensating for partial reads.
0529 
0530         Normally, we have a blocking socket, but a read() can be interrupted
0531         by a signal (resulting in a partial read).
0532 
0533         Note that we cannot distinguish between EOF and an interrupt when zero
0534         bytes have been read. IncompleteRead() will be raised in this
0535         situation.
0536 
0537         This function should be used when <amt> bytes "should" be present for
0538         reading. If the bytes are truly not available (due to EOF), then the
0539         IncompleteRead exception can be used to detect the problem.
0540         """
0541         s = ''
0542         while amt > 0:
0543             chunk = self.fp.read(amt)
0544             if not chunk:
0545                 raise IncompleteRead(s)
0546             s += chunk
0547             amt -= len(chunk)
0548         return s
0549 
0550     def getheader(self, name, default=None):
0551         if self.msg is None:
0552             raise ResponseNotReady()
0553         return self.msg.getheader(name, default)
0554 
0555     def getheaders(self):
0556         """Return list of (header, value) tuples."""
0557         if self.msg is None:
0558             raise ResponseNotReady()
0559         return self.msg.items()
0560 
0561 
0562 class HTTPConnection:
0563 
0564     _http_vsn = 11
0565     _http_vsn_str = 'HTTP/1.1'
0566 
0567     response_class = HTTPResponse
0568     default_port = HTTP_PORT
0569     auto_open = 1
0570     debuglevel = 0
0571     strict = 0
0572 
0573     def __init__(self, host, port=None, strict=None):
0574         self.sock = None
0575         self._buffer = []
0576         self.__response = None
0577         self.__state = _CS_IDLE
0578         self._method = None
0579 
0580         self._set_hostport(host, port)
0581         if strict is not None:
0582             self.strict = strict
0583 
0584     def _set_hostport(self, host, port):
0585         if port is None:
0586             i = host.rfind(':')
0587             j = host.rfind(']')         # ipv6 addresses have [...]
0588             if i > j:
0589                 try:
0590                     port = int(host[i+1:])
0591                 except ValueError:
0592                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
0593                 host = host[:i]
0594             else:
0595                 port = self.default_port
0596             if host and host[0] == '[' and host[-1] == ']':
0597                 host = host[1:-1]
0598         self.host = host
0599         self.port = port
0600 
0601     def set_debuglevel(self, level):
0602         self.debuglevel = level
0603 
0604     def connect(self):
0605         """Connect to the host and port specified in __init__."""
0606         msg = "getaddrinfo returns an empty list"
0607         for res in socket.getaddrinfo(self.host, self.port, 0,
0608                                       socket.SOCK_STREAM):
0609             af, socktype, proto, canonname, sa = res
0610             try:
0611                 self.sock = socket.socket(af, socktype, proto)
0612                 if self.debuglevel > 0:
0613                     print "connect: (%s, %s)" % (self.host, self.port)
0614                 self.sock.connect(sa)
0615             except socket.error, msg:
0616                 if self.debuglevel > 0:
0617                     print 'connect fail:', (self.host, self.port)
0618                 if self.sock:
0619                     self.sock.close()
0620                 self.sock = None
0621                 continue
0622             break
0623         if not self.sock:
0624             raise socket.error, msg
0625 
0626     def close(self):
0627         """Close the connection to the HTTP server."""
0628         if self.sock:
0629             self.sock.close()   # close it manually... there may be other refs
0630             self.sock = None
0631         if self.__response:
0632             self.__response.close()
0633             self.__response = None
0634         self.__state = _CS_IDLE
0635 
0636     def send(self, str):
0637         """Send `str' to the server."""
0638         if self.sock is None:
0639             if self.auto_open:
0640                 self.connect()
0641             else:
0642                 raise NotConnected()
0643 
0644         # send the data to the server. if we get a broken pipe, then close
0645         # the socket. we want to reconnect when somebody tries to send again.
0646         #
0647         # NOTE: we DO propagate the error, though, because we cannot simply
0648         #       ignore the error... the caller will know if they can retry.
0649         if self.debuglevel > 0:
0650             print "send:", repr(str)
0651         try:
0652             self.sock.sendall(str)
0653         except socket.error, v:
0654             if v[0] == 32:      # Broken pipe
0655                 self.close()
0656             raise
0657 
0658     def _output(self, s):
0659         """Add a line of output to the current request buffer.
0660 
0661         Assumes that the line does *not* end with \\r\\n.
0662         """
0663         self._buffer.append(s)
0664 
0665     def _send_output(self):
0666         """Send the currently buffered request and clear the buffer.
0667 
0668         Appends an extra \\r\\n to the buffer.
0669         """
0670         self._buffer.extend(("", ""))
0671         msg = "\r\n".join(self._buffer)
0672         del self._buffer[:]
0673         self.send(msg)
0674 
0675     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
0676         """Send a request to the server.
0677 
0678         `method' specifies an HTTP request method, e.g. 'GET'.
0679         `url' specifies the object being requested, e.g. '/index.html'.
0680         `skip_host' if True does not add automatically a 'Host:' header
0681         `skip_accept_encoding' if True does not add automatically an
0682            'Accept-Encoding:' header
0683         """
0684 
0685         # if a prior response has been completed, then forget about it.
0686         if self.__response and self.__response.isclosed():
0687             self.__response = None
0688 
0689 
0690         # in certain cases, we cannot issue another request on this connection.
0691         # this occurs when:
0692         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
0693         #   2) a response to a previous request has signalled that it is going
0694         #      to close the connection upon completion.
0695         #   3) the headers for the previous response have not been read, thus
0696         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
0697         #
0698         # if there is no prior response, then we can request at will.
0699         #
0700         # if point (2) is true, then we will have passed the socket to the
0701         # response (effectively meaning, "there is no prior response"), and
0702         # will open a new one when a new request is made.
0703         #
0704         # Note: if a prior response exists, then we *can* start a new request.
0705         #       We are not allowed to begin fetching the response to this new
0706         #       request, however, until that prior response is complete.
0707         #
0708         if self.__state == _CS_IDLE:
0709             self.__state = _CS_REQ_STARTED
0710         else:
0711             raise CannotSendRequest()
0712 
0713         # Save the method we use, we need it later in the response phase
0714         self._method = method
0715         if not url:
0716             url = '/'
0717         str = '%s %s %s' % (method, url, self._http_vsn_str)
0718 
0719         self._output(str)
0720 
0721         if self._http_vsn == 11:
0722             # Issue some standard headers for better HTTP/1.1 compliance
0723 
0724             if not skip_host:
0725                 # this header is issued *only* for HTTP/1.1
0726                 # connections. more specifically, this means it is
0727                 # only issued when the client uses the new
0728                 # HTTPConnection() class. backwards-compat clients
0729                 # will be using HTTP/1.0 and those clients may be
0730                 # issuing this header themselves. we should NOT issue
0731                 # it twice; some web servers (such as Apache) barf
0732                 # when they see two Host: headers
0733 
0734                 # If we need a non-standard port,include it in the
0735                 # header.  If the request is going through a proxy,
0736                 # but the host of the actual URL, not the host of the
0737                 # proxy.
0738 
0739                 netloc = ''
0740                 if url.startswith('http'):
0741                     nil, netloc, nil, nil, nil = urlsplit(url)
0742 
0743                 if netloc:
0744                     self.putheader('Host', netloc.encode("idna"))
0745                 elif self.port == HTTP_PORT:
0746                     self.putheader('Host', self.host.encode("idna"))
0747                 else:
0748                     self.putheader('Host', "%s:%s" % (self.host.encode("idna"), self.port))
0749 
0750             # note: we are assuming that clients will not attempt to set these
0751             #       headers since *this* library must deal with the
0752             #       consequences. this also means that when the supporting
0753             #       libraries are updated to recognize other forms, then this
0754             #       code should be changed (removed or updated).
0755 
0756             # we only want a Content-Encoding of "identity" since we don't
0757             # support encodings such as x-gzip or x-deflate.
0758             if not skip_accept_encoding:
0759                 self.putheader('Accept-Encoding', 'identity')
0760 
0761             # we can accept "chunked" Transfer-Encodings, but no others
0762             # NOTE: no TE header implies *only* "chunked"
0763             #self.putheader('TE', 'chunked')
0764 
0765             # if TE is supplied in the header, then it must appear in a
0766             # Connection header.
0767             #self.putheader('Connection', 'TE')
0768 
0769         else:
0770             # For HTTP/1.0, the server will assume "not chunked"
0771             pass
0772 
0773     def putheader(self, header, value):
0774         """Send a request header line to the server.
0775 
0776         For example: h.putheader('Accept', 'text/html')
0777         """
0778         if self.__state != _CS_REQ_STARTED:
0779             raise CannotSendHeader()
0780 
0781         str = '%s: %s' % (header, value)
0782         self._output(str)
0783 
0784     def endheaders(self):
0785         """Indicate that the last header line has been sent to the server."""
0786 
0787         if self.__state == _CS_REQ_STARTED:
0788             self.__state = _CS_REQ_SENT
0789         else:
0790             raise CannotSendHeader()
0791 
0792         self._send_output()
0793 
0794     def request(self, method, url, body=None, headers={}):
0795         """Send a complete request to the server."""
0796 
0797         try:
0798             self._send_request(method, url, body, headers)
0799         except socket.error, v:
0800             # trap 'Broken pipe' if we're allowed to automatically reconnect
0801             if v[0] != 32 or not self.auto_open:
0802                 raise
0803             # try one more time
0804             self._send_request(method, url, body, headers)
0805 
0806     def _send_request(self, method, url, body, headers):
0807         # honour explicitly requested Host: and Accept-Encoding headers
0808         header_names = dict.fromkeys([k.lower() for k in headers])
0809         skips = {}
0810         if 'host' in header_names:
0811             skips['skip_host'] = 1
0812         if 'accept-encoding' in header_names:
0813             skips['skip_accept_encoding'] = 1
0814 
0815         self.putrequest(method, url, **skips)
0816 
0817         if body and ('content-length' not in header_names):
0818             self.putheader('Content-Length', str(len(body)))
0819         for hdr, value in headers.iteritems():
0820             self.putheader(hdr, value)
0821         self.endheaders()
0822 
0823         if body:
0824             self.send(body)
0825 
0826     def getresponse(self):
0827         "Get the response from the server."
0828 
0829         # if a prior response has been completed, then forget about it.
0830         if self.__response and self.__response.isclosed():
0831             self.__response = None
0832 
0833         #
0834         # if a prior response exists, then it must be completed (otherwise, we
0835         # cannot read this response's header to determine the connection-close
0836         # behavior)
0837         #
0838         # note: if a prior response existed, but was connection-close, then the
0839         # socket and response were made independent of this HTTPConnection
0840         # object since a new request requires that we open a whole new
0841         # connection
0842         #
0843         # this means the prior response had one of two states:
0844         #   1) will_close: this connection was reset and the prior socket and
0845         #                  response operate independently
0846         #   2) persistent: the response was retained and we await its
0847         #                  isclosed() status to become true.
0848         #
0849         if self.__state != _CS_REQ_SENT or self.__response:
0850             raise ResponseNotReady()
0851 
0852         if self.debuglevel > 0:
0853             response = self.response_class(self.sock, self.debuglevel,
0854                                            strict=self.strict,
0855                                            method=self._method)
0856         else:
0857             response = self.response_class(self.sock, strict=self.strict,
0858                                            method=self._method)
0859 
0860         response.begin()
0861         assert response.will_close != _UNKNOWN
0862         self.__state = _CS_IDLE
0863 
0864         if response.will_close:
0865             # this effectively passes the connection to the response
0866             self.close()
0867         else:
0868             # remember this, so we can tell when it is complete
0869             self.__response = response
0870 
0871         return response
0872 
0873 # The next several classes are used to define FakeSocket,a socket-like
0874 # interface to an SSL connection.
0875 
0876 # The primary complexity comes from faking a makefile() method.  The
0877 # standard socket makefile() implementation calls dup() on the socket
0878 # file descriptor.  As a consequence, clients can call close() on the
0879 # parent socket and its makefile children in any order.  The underlying
0880 # socket isn't closed until they are all closed.
0881 
0882 # The implementation uses reference counting to keep the socket open
0883 # until the last client calls close().  SharedSocket keeps track of
0884 # the reference counting and SharedSocketClient provides an constructor
0885 # and close() method that call incref() and decref() correctly.
0886 
0887 class SharedSocket:
0888 
0889     def __init__(self, sock):
0890         self.sock = sock
0891         self._refcnt = 0
0892 
0893     def incref(self):
0894         self._refcnt += 1
0895 
0896     def decref(self):
0897         self._refcnt -= 1
0898         assert self._refcnt >= 0
0899         if self._refcnt == 0:
0900             self.sock.close()
0901 
0902     def __del__(self):
0903         self.sock.close()
0904 
0905 class SharedSocketClient:
0906 
0907     def __init__(self, shared):
0908         self._closed = 0
0909         self._shared = shared
0910         self._shared.incref()
0911         self._sock = shared.sock
0912 
0913     def close(self):
0914         if not self._closed:
0915             self._shared.decref()
0916             self._closed = 1
0917             self._shared = None
0918 
0919 class SSLFile(SharedSocketClient):
0920     """File-like object wrapping an SSL socket."""
0921 
0922     BUFSIZE = 8192
0923 
0924     def __init__(self, sock, ssl, bufsize=None):
0925         SharedSocketClient.__init__(self, sock)
0926         self._ssl = ssl
0927         self._buf = ''
0928         self._bufsize = bufsize or self.__class__.BUFSIZE
0929 
0930     def _read(self):
0931         buf = ''
0932         # put in a loop so that we retry on transient errors
0933         while True:
0934             try:
0935                 buf = self._ssl.read(self._bufsize)
0936             except socket.sslerror, err:
0937                 if (err[0] == socket.SSL_ERROR_WANT_READ
0938                     or err[0] == socket.SSL_ERROR_WANT_WRITE):
0939                     continue
0940                 if (err[0] == socket.SSL_ERROR_ZERO_RETURN
0941                     or err[0] == socket.SSL_ERROR_EOF):
0942                     break
0943                 raise
0944             except socket.error, err:
0945                 if err[0] == errno.EINTR:
0946                     continue
0947                 if err[0] == errno.EBADF:
0948                     # XXX socket was closed?
0949                     break
0950                 raise
0951             else:
0952                 break
0953         return buf
0954 
0955     def read(self, size=None):
0956         L = [self._buf]
0957         avail = len(self._buf)
0958         while size is None or avail < size:
0959             s = self._read()
0960             if s == '':
0961                 break
0962             L.append(s)
0963             avail += len(s)
0964         all = "".join(L)
0965         if size is None:
0966             self._buf = ''
0967             return all
0968         else:
0969             self._buf = all[size:]
0970             return all[:size]
0971 
0972     def readline(self):
0973         L = [self._buf]
0974         self._buf = ''
0975         while 1:
0976             i = L[-1].find("\n")
0977             if i >= 0:
0978                 break
0979             s = self._read()
0980             if s == '':
0981                 break
0982             L.append(s)
0983         if i == -1:
0984             # loop exited because there is no more data
0985             return "".join(L)
0986         else:
0987             all = "".join(L)
0988             # XXX could do enough bookkeeping not to do a 2nd search
0989             i = all.find("\n") + 1
0990             line = all[:i]
0991             self._buf = all[i:]
0992             return line
0993 
0994     def readlines(self, sizehint=0):
0995         total = 0
0996         list = []
0997         while True:
0998             line = self.readline()
0999             if not line:
1000                 break
1001             list.append(line)
1002             total += len(line)
1003             if sizehint and total >= sizehint:
1004                 break
1005         return list
1006 
1007     def fileno(self):
1008         return self._sock.fileno()
1009 
1010     def __iter__(self):
1011         return self
1012 
1013     def next(self):
1014         line = self.readline()
1015         if not line:
1016             raise StopIteration
1017         return line
1018 
1019 class FakeSocket(SharedSocketClient):
1020 
1021     class _closedsocket:
1022         def __getattr__(self, name):
1023             raise error(9, 'Bad file descriptor')
1024 
1025     def __init__(self, sock, ssl):
1026         sock = SharedSocket(sock)
1027         SharedSocketClient.__init__(self, sock)
1028         self._ssl = ssl
1029 
1030     def close(self):
1031         SharedSocketClient.close(self)
1032         self._sock = self.__class__._closedsocket()
1033 
1034     def makefile(self, mode, bufsize=None):
1035         if mode != 'r' and mode != 'rb':
1036             raise UnimplementedFileMode()
1037         return SSLFile(self._shared, self._ssl, bufsize)
1038 
1039     def send(self, stuff, flags = 0):
1040         return self._ssl.write(stuff)
1041 
1042     sendall = send
1043 
1044     def recv(self, len = 1024, flags = 0):
1045         return self._ssl.read(len)
1046 
1047     def __getattr__(self, attr):
1048         return getattr(self._sock, attr)
1049 
1050 
1051 class HTTPSConnection(HTTPConnection):
1052     "This class allows communication via SSL."
1053 
1054     default_port = HTTPS_PORT
1055 
1056     def __init__(self, host, port=None, key_file=None, cert_file=None,
1057                  strict=None):
1058         HTTPConnection.__init__(self, host, port, strict)
1059         self.key_file = key_file
1060         self.cert_file = cert_file
1061 
1062     def connect(self):
1063         "Connect to a host on a given (SSL) port."
1064 
1065         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1066         sock.connect((self.host, self.port))
1067         ssl = socket.ssl(sock, self.key_file, self.cert_file)
1068         self.sock = FakeSocket(sock, ssl)
1069 
1070 
1071 class HTTP:
1072     "Compatibility class with httplib.py from 1.5."
1073 
1074     _http_vsn = 10
1075     _http_vsn_str = 'HTTP/1.0'
1076 
1077     debuglevel = 0
1078 
1079     _connection_class = HTTPConnection
1080 
1081     def __init__(self, host='', port=None, strict=None):
1082         "Provide a default host, since the superclass requires one."
1083 
1084         # some joker passed 0 explicitly, meaning default port
1085         if port == 0:
1086             port = None
1087 
1088         # Note that we may pass an empty string as the host; this will throw
1089         # an error when we attempt to connect. Presumably, the client code
1090         # will call connect before then, with a proper host.
1091         self._setup(self._connection_class(host, port, strict))
1092 
1093     def _setup(self, conn):
1094         self._conn = conn
1095 
1096         # set up delegation to flesh out interface
1097         self.send = conn.send
1098         self.putrequest = conn.putrequest
1099         self.endheaders = conn.endheaders
1100         self.set_debuglevel = conn.set_debuglevel
1101 
1102         conn._http_vsn = self._http_vsn
1103         conn._http_vsn_str = self._http_vsn_str
1104 
1105         self.file = None
1106 
1107     def connect(self, host=None, port=None):
1108         "Accept arguments to set the host/port, since the superclass doesn't."
1109 
1110         if host is not None:
1111             self._conn._set_hostport(host, port)
1112         self._conn.connect()
1113 
1114     def getfile(self):
1115         "Provide a getfile, since the superclass' does not use this concept."
1116         return self.file
1117 
1118     def putheader(self, header, *values):
1119         "The superclass allows only one value argument."
1120         self._conn.putheader(header, '\r\n\t'.join(values))
1121 
1122     def getreply(self):
1123         """Compat definition since superclass does not define it.
1124 
1125         Returns a tuple consisting of:
1126         - server status code (e.g. '200' if all goes well)
1127         - server "reason" corresponding to status code
1128         - any RFC822 headers in the response from the server
1129         """
1130         try:
1131             response = self._conn.getresponse()
1132         except BadStatusLine, e:
1133             ### hmm. if getresponse() ever closes the socket on a bad request,
1134             ### then we are going to have problems with self.sock
1135 
1136             ### should we keep this behavior? do people use it?
1137             # keep the socket open (as a file), and return it
1138             self.file = self._conn.sock.makefile('rb', 0)
1139 
1140             # close our socket -- we want to restart after any protocol error
1141             self.close()
1142 
1143             self.headers = None
1144             return -1, e.line, None
1145 
1146         self.headers = response.msg
1147         self.file = response.fp
1148         return response.status, response.reason, response.msg
1149 
1150     def close(self):
1151         self._conn.close()
1152 
1153         # note that self.file == response.fp, which gets closed by the
1154         # superclass. just clear the object ref here.
1155         ### hmm. messy. if status==-1, then self.file is owned by us.
1156         ### well... we aren't explicitly closing, but losing this ref will
1157         ### do it
1158         self.file = None
1159 
1160 if hasattr(socket, 'ssl'):
1161     class HTTPS(HTTP):
1162         """Compatibility with 1.5 httplib interface
1163 
1164         Python 1.5.2 did not have an HTTPS class, but it defined an
1165         interface for sending http requests that is also useful for
1166         https.
1167         """
1168 
1169         _connection_class = HTTPSConnection
1170 
1171         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1172                      strict=None):
1173             # provide a default host, pass the X509 cert info
1174 
1175             # urf. compensate for bad input.
1176             if port == 0:
1177                 port = None
1178             self._setup(self._connection_class(host, port, key_file,
1179                                                cert_file, strict))
1180 
1181             # we never actually use these for anything, but we keep them
1182             # here for compatibility with post-1.5.2 CVS.
1183             self.key_file = key_file
1184             self.cert_file = cert_file
1185 
1186 
1187 class HTTPException(Exception):
1188     # Subclasses that define an __init__ must call Exception.__init__
1189     # or define self.args.  Otherwise, str() will fail.
1190     pass
1191 
1192 class NotConnected(HTTPException):
1193     pass
1194 
1195 class InvalidURL(HTTPException):
1196     pass
1197 
1198 class UnknownProtocol(HTTPException):
1199     def __init__(self, version):
1200         self.args = version,
1201         self.version = version
1202 
1203 class UnknownTransferEncoding(HTTPException):
1204     pass
1205 
1206 class UnimplementedFileMode(HTTPException):
1207     pass
1208 
1209 class IncompleteRead(HTTPException):
1210     def __init__(self, partial):
1211         self.args = partial,
1212         self.partial = partial
1213 
1214 class ImproperConnectionState(HTTPException):
1215     pass
1216 
1217 class CannotSendRequest(ImproperConnectionState):
1218     pass
1219 
1220 class CannotSendHeader(ImproperConnectionState):
1221     pass
1222 
1223 class ResponseNotReady(ImproperConnectionState):
1224     pass
1225 
1226 class BadStatusLine(HTTPException):
1227     def __init__(self, line):
1228         self.args = line,
1229         self.line = line
1230 
1231 # for backwards compatibility
1232 error = HTTPException
1233 
1234 class LineAndFileWrapper:
1235     """A limited file-like object for HTTP/0.9 responses."""
1236 
1237     # The status-line parsing code calls readline(), which normally
1238     # get the HTTP status line.  For a 0.9 response, however, this is
1239     # actually the first line of the body!  Clients need to get a
1240     # readable file object that contains that line.
1241 
1242     def __init__(self, line, file):
1243         self._line = line
1244         self._file = file
1245         self._line_consumed = 0
1246         self._line_offset = 0
1247         self._line_left = len(line)
1248 
1249     def __getattr__(self, attr):
1250         return getattr(self._file, attr)
1251 
1252     def _done(self):
1253         # called when the last byte is read from the line.  After the
1254         # call, all read methods are delegated to the underlying file
1255         # object.
1256         self._line_consumed = 1
1257         self.read = self._file.read
1258         self.readline = self._file.readline
1259         self.readlines = self._file.readlines
1260 
1261     def read(self, amt=None):
1262         if self._line_consumed:
1263             return self._file.read(amt)
1264         assert self._line_left
1265         if amt is None or amt > self._line_left:
1266             s = self._line[self._line_offset:]
1267             self._done()
1268             if amt is None:
1269                 return s + self._file.read()
1270             else:
1271                 return s + self._file.read(amt - len(s))
1272         else:
1273             assert amt <= self._line_left
1274             i = self._line_offset
1275             j = i + amt
1276             s = self._line[i:j]
1277             self._line_offset = j
1278             self._line_left -= amt
1279             if self._line_left == 0:
1280                 self._done()
1281             return s
1282 
1283     def readline(self):
1284         if self._line_consumed:
1285             return self._file.readline()
1286         assert self._line_left
1287         s = self._line[self._line_offset:]
1288         self._done()
1289         return s
1290 
1291     def readlines(self, size=None):
1292         if self._line_consumed:
1293             return self._file.readlines(size)
1294         assert self._line_left
1295         L = [self._line[self._line_offset:]]
1296         self._done()
1297         if size is None:
1298             return L + self._file.readlines()
1299         else:
1300             return L + self._file.readlines(size)
1301 
1302 def test():
1303     """Test this module.
1304 
1305     A hodge podge of tests collected here, because they have too many
1306     external dependencies for the regular test suite.
1307     """
1308 
1309     import sys
1310     import getopt
1311     opts, args = getopt.getopt(sys.argv[1:], 'd')
1312     dl = 0
1313     for o, a in opts:
1314         if o == '-d': dl = dl + 1
1315     host = 'www.python.org'
1316     selector = '/'
1317     if args[0:]: host = args[0]
1318     if args[1:]: selector = args[1]
1319     h = HTTP()
1320     h.set_debuglevel(dl)
1321     h.connect(host)
1322     h.putrequest('GET', selector)
1323     h.endheaders()
1324     status, reason, headers = h.getreply()
1325     print 'status =', status
1326     print 'reason =', reason
1327     print "read", len(h.getfile().read())
1328     print
1329     if headers:
1330         for header in headers.headers: print header.strip()
1331     print
1332 
1333     # minimal test that code to extract host from url works
1334     class HTTP11(HTTP):
1335         _http_vsn = 11
1336         _http_vsn_str = 'HTTP/1.1'
1337 
1338     h = HTTP11('www.python.org')
1339     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1340     h.endheaders()
1341     h.getreply()
1342     h.close()
1343 
1344     if hasattr(socket, 'ssl'):
1345 
1346         for host, selector in (('sourceforge.net', '/projects/python'),
1347                                ):
1348             print "https://%s%s" % (host, selector)
1349             hs = HTTPS()
1350             hs.set_debuglevel(dl)
1351             hs.connect(host)
1352             hs.putrequest('GET', selector)
1353             hs.endheaders()
1354             status, reason, headers = hs.getreply()
1355             print 'status =', status
1356             print 'reason =', reason
1357             print "read", len(hs.getfile().read())
1358             print
1359             if headers:
1360                 for header in headers.headers: print header.strip()
1361             print
1362 
1363 if __name__ == '__main__':
1364     test()
1365 

Generated by PyXR 0.9.4
SourceForge.net Logo