0001 """HTTP server base class. 0002 0003 Note: the class in this module doesn't implement any HTTP request; see 0004 SimpleHTTPServer for simple implementations of GET, HEAD and POST 0005 (including CGI scripts). It does, however, optionally implement HTTP/1.1 0006 persistent connections, as of version 0.3. 0007 0008 Contents: 0009 0010 - BaseHTTPRequestHandler: HTTP request handler base class 0011 - test: test function 0012 0013 XXX To do: 0014 0015 - log requests even later (to capture byte count) 0016 - log user-agent header and other interesting goodies 0017 - send error log to separate file 0018 """ 0019 0020 0021 # See also: 0022 # 0023 # HTTP Working Group T. Berners-Lee 0024 # INTERNET-DRAFT R. T. Fielding 0025 # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 0026 # Expires September 8, 1995 March 8, 1995 0027 # 0028 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 0029 # 0030 # and 0031 # 0032 # Network Working Group R. Fielding 0033 # Request for Comments: 2616 et al 0034 # Obsoletes: 2068 June 1999 0035 # Category: Standards Track 0036 # 0037 # URL: http://www.faqs.org/rfcs/rfc2616.html 0038 0039 # Log files 0040 # --------- 0041 # 0042 # Here's a quote from the NCSA httpd docs about log file format. 0043 # 0044 # | The logfile format is as follows. Each line consists of: 0045 # | 0046 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 0047 # | 0048 # | host: Either the DNS name or the IP number of the remote client 0049 # | rfc931: Any information returned by identd for this person, 0050 # | - otherwise. 0051 # | authuser: If user sent a userid for authentication, the user name, 0052 # | - otherwise. 0053 # | DD: Day 0054 # | Mon: Month (calendar name) 0055 # | YYYY: Year 0056 # | hh: hour (24-hour format, the machine's timezone) 0057 # | mm: minutes 0058 # | ss: seconds 0059 # | request: The first line of the HTTP request as sent by the client. 0060 # | ddd: the status code returned by the server, - if not available. 0061 # | bbbb: the total number of bytes sent, 0062 # | *not including the HTTP/1.0 header*, - if not available 0063 # | 0064 # | You can determine the name of the file accessed through request. 0065 # 0066 # (Actually, the latter is only true if you know the server configuration 0067 # at the time the request was made!) 0068 0069 __version__ = "0.3" 0070 0071 __all__ = ["HTTPServer", "BaseHTTPRequestHandler"] 0072 0073 import sys 0074 import time 0075 import socket # For gethostbyaddr() 0076 import mimetools 0077 import SocketServer 0078 0079 # Default error message 0080 DEFAULT_ERROR_MESSAGE = """\ 0081 <head> 0082 <title>Error response</title> 0083 </head> 0084 <body> 0085 <h1>Error response</h1> 0086 <p>Error code %(code)d. 0087 <p>Message: %(message)s. 0088 <p>Error code explanation: %(code)s = %(explain)s. 0089 </body> 0090 """ 0091 0092 0093 class HTTPServer(SocketServer.TCPServer): 0094 0095 allow_reuse_address = 1 # Seems to make sense in testing environment 0096 0097 def server_bind(self): 0098 """Override server_bind to store the server name.""" 0099 SocketServer.TCPServer.server_bind(self) 0100 host, port = self.socket.getsockname()[:2] 0101 self.server_name = socket.getfqdn(host) 0102 self.server_port = port 0103 0104 0105 class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): 0106 0107 """HTTP request handler base class. 0108 0109 The following explanation of HTTP serves to guide you through the 0110 code as well as to expose any misunderstandings I may have about 0111 HTTP (so you don't need to read the code to figure out I'm wrong 0112 :-). 0113 0114 HTTP (HyperText Transfer Protocol) is an extensible protocol on 0115 top of a reliable stream transport (e.g. TCP/IP). The protocol 0116 recognizes three parts to a request: 0117 0118 1. One line identifying the request type and path 0119 2. An optional set of RFC-822-style headers 0120 3. An optional data part 0121 0122 The headers and data are separated by a blank line. 0123 0124 The first line of the request has the form 0125 0126 <command> <path> <version> 0127 0128 where <command> is a (case-sensitive) keyword such as GET or POST, 0129 <path> is a string containing path information for the request, 0130 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 0131 <path> is encoded using the URL encoding scheme (using %xx to signify 0132 the ASCII character with hex code xx). 0133 0134 The specification specifies that lines are separated by CRLF but 0135 for compatibility with the widest range of clients recommends 0136 servers also handle LF. Similarly, whitespace in the request line 0137 is treated sensibly (allowing multiple spaces between components 0138 and allowing trailing whitespace). 0139 0140 Similarly, for output, lines ought to be separated by CRLF pairs 0141 but most clients grok LF characters just fine. 0142 0143 If the first line of the request has the form 0144 0145 <command> <path> 0146 0147 (i.e. <version> is left out) then this is assumed to be an HTTP 0148 0.9 request; this form has no optional headers and data part and 0149 the reply consists of just the data. 0150 0151 The reply form of the HTTP 1.x protocol again has three parts: 0152 0153 1. One line giving the response code 0154 2. An optional set of RFC-822-style headers 0155 3. The data 0156 0157 Again, the headers and data are separated by a blank line. 0158 0159 The response code line has the form 0160 0161 <version> <responsecode> <responsestring> 0162 0163 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 0164 <responsecode> is a 3-digit response code indicating success or 0165 failure of the request, and <responsestring> is an optional 0166 human-readable string explaining what the response code means. 0167 0168 This server parses the request and the headers, and then calls a 0169 function specific to the request type (<command>). Specifically, 0170 a request SPAM will be handled by a method do_SPAM(). If no 0171 such method exists the server sends an error response to the 0172 client. If it exists, it is called with no arguments: 0173 0174 do_SPAM() 0175 0176 Note that the request name is case sensitive (i.e. SPAM and spam 0177 are different requests). 0178 0179 The various request details are stored in instance variables: 0180 0181 - client_address is the client IP address in the form (host, 0182 port); 0183 0184 - command, path and version are the broken-down request line; 0185 0186 - headers is an instance of mimetools.Message (or a derived 0187 class) containing the header information; 0188 0189 - rfile is a file object open for reading positioned at the 0190 start of the optional input data part; 0191 0192 - wfile is a file object open for writing. 0193 0194 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 0195 0196 The first thing to be written must be the response line. Then 0197 follow 0 or more header lines, then a blank line, and then the 0198 actual data (if any). The meaning of the header lines depends on 0199 the command executed by the server; in most cases, when data is 0200 returned, there should be at least one header line of the form 0201 0202 Content-type: <type>/<subtype> 0203 0204 where <type> and <subtype> should be registered MIME types, 0205 e.g. "text/html" or "text/plain". 0206 0207 """ 0208 0209 # The Python system version, truncated to its first component. 0210 sys_version = "Python/" + sys.version.split()[0] 0211 0212 # The server software version. You may want to override this. 0213 # The format is multiple whitespace-separated strings, 0214 # where each string is of the form name[/version]. 0215 server_version = "BaseHTTP/" + __version__ 0216 0217 def parse_request(self): 0218 """Parse a request (internal). 0219 0220 The request should be stored in self.raw_requestline; the results 0221 are in self.command, self.path, self.request_version and 0222 self.headers. 0223 0224 Return True for success, False for failure; on failure, an 0225 error is sent back. 0226 0227 """ 0228 self.command = None # set in case of error on the first line 0229 self.request_version = version = "HTTP/0.9" # Default 0230 self.close_connection = 1 0231 requestline = self.raw_requestline 0232 if requestline[-2:] == '\r\n': 0233 requestline = requestline[:-2] 0234 elif requestline[-1:] == '\n': 0235 requestline = requestline[:-1] 0236 self.requestline = requestline 0237 words = requestline.split() 0238 if len(words) == 3: 0239 [command, path, version] = words 0240 if version[:5] != 'HTTP/': 0241 self.send_error(400, "Bad request version (%r)" % version) 0242 return False 0243 try: 0244 base_version_number = version.split('/', 1)[1] 0245 version_number = base_version_number.split(".") 0246 # RFC 2145 section 3.1 says there can be only one "." and 0247 # - major and minor numbers MUST be treated as 0248 # separate integers; 0249 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 0250 # turn is lower than HTTP/12.3; 0251 # - Leading zeros MUST be ignored by recipients. 0252 if len(version_number) != 2: 0253 raise ValueError 0254 version_number = int(version_number[0]), int(version_number[1]) 0255 except (ValueError, IndexError): 0256 self.send_error(400, "Bad request version (%r)" % version) 0257 return False 0258 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 0259 self.close_connection = 0 0260 if version_number >= (2, 0): 0261 self.send_error(505, 0262 "Invalid HTTP Version (%s)" % base_version_number) 0263 return False 0264 elif len(words) == 2: 0265 [command, path] = words 0266 self.close_connection = 1 0267 if command != 'GET': 0268 self.send_error(400, 0269 "Bad HTTP/0.9 request type (%r)" % command) 0270 return False 0271 elif not words: 0272 return False 0273 else: 0274 self.send_error(400, "Bad request syntax (%r)" % requestline) 0275 return False 0276 self.command, self.path, self.request_version = command, path, version 0277 0278 # Examine the headers and look for a Connection directive 0279 self.headers = self.MessageClass(self.rfile, 0) 0280 0281 conntype = self.headers.get('Connection', "") 0282 if conntype.lower() == 'close': 0283 self.close_connection = 1 0284 elif (conntype.lower() == 'keep-alive' and 0285 self.protocol_version >= "HTTP/1.1"): 0286 self.close_connection = 0 0287 return True 0288 0289 def handle_one_request(self): 0290 """Handle a single HTTP request. 0291 0292 You normally don't need to override this method; see the class 0293 __doc__ string for information on how to handle specific HTTP 0294 commands such as GET and POST. 0295 0296 """ 0297 self.raw_requestline = self.rfile.readline() 0298 if not self.raw_requestline: 0299 self.close_connection = 1 0300 return 0301 if not self.parse_request(): # An error code has been sent, just exit 0302 return 0303 mname = 'do_' + self.command 0304 if not hasattr(self, mname): 0305 self.send_error(501, "Unsupported method (%r)" % self.command) 0306 return 0307 method = getattr(self, mname) 0308 method() 0309 0310 def handle(self): 0311 """Handle multiple requests if necessary.""" 0312 self.close_connection = 1 0313 0314 self.handle_one_request() 0315 while not self.close_connection: 0316 self.handle_one_request() 0317 0318 def send_error(self, code, message=None): 0319 """Send and log an error reply. 0320 0321 Arguments are the error code, and a detailed message. 0322 The detailed message defaults to the short entry matching the 0323 response code. 0324 0325 This sends an error response (so it must be called before any 0326 output has been generated), logs the error, and finally sends 0327 a piece of HTML explaining the error to the user. 0328 0329 """ 0330 0331 try: 0332 short, long = self.responses[code] 0333 except KeyError: 0334 short, long = '???', '???' 0335 if message is None: 0336 message = short 0337 explain = long 0338 self.log_error("code %d, message %s", code, message) 0339 content = (self.error_message_format % 0340 {'code': code, 'message': message, 'explain': explain}) 0341 self.send_response(code, message) 0342 self.send_header("Content-Type", "text/html") 0343 self.send_header('Connection', 'close') 0344 self.end_headers() 0345 if self.command != 'HEAD' and code >= 200 and code not in (204, 304): 0346 self.wfile.write(content) 0347 0348 error_message_format = DEFAULT_ERROR_MESSAGE 0349 0350 def send_response(self, code, message=None): 0351 """Send the response header and log the response code. 0352 0353 Also send two standard headers with the server software 0354 version and the current date. 0355 0356 """ 0357 self.log_request(code) 0358 if message is None: 0359 if code in self.responses: 0360 message = self.responses[code][0] 0361 else: 0362 message = '' 0363 if self.request_version != 'HTTP/0.9': 0364 self.wfile.write("%s %d %s\r\n" % 0365 (self.protocol_version, code, message)) 0366 # print (self.protocol_version, code, message) 0367 self.send_header('Server', self.version_string()) 0368 self.send_header('Date', self.date_time_string()) 0369 0370 def send_header(self, keyword, value): 0371 """Send a MIME header.""" 0372 if self.request_version != 'HTTP/0.9': 0373 self.wfile.write("%s: %s\r\n" % (keyword, value)) 0374 0375 if keyword.lower() == 'connection': 0376 if value.lower() == 'close': 0377 self.close_connection = 1 0378 elif value.lower() == 'keep-alive': 0379 self.close_connection = 0 0380 0381 def end_headers(self): 0382 """Send the blank line ending the MIME headers.""" 0383 if self.request_version != 'HTTP/0.9': 0384 self.wfile.write("\r\n") 0385 0386 def log_request(self, code='-', size='-'): 0387 """Log an accepted request. 0388 0389 This is called by send_reponse(). 0390 0391 """ 0392 0393 self.log_message('"%s" %s %s', 0394 self.requestline, str(code), str(size)) 0395 0396 def log_error(self, *args): 0397 """Log an error. 0398 0399 This is called when a request cannot be fulfilled. By 0400 default it passes the message on to log_message(). 0401 0402 Arguments are the same as for log_message(). 0403 0404 XXX This should go to the separate error log. 0405 0406 """ 0407 0408 self.log_message(*args) 0409 0410 def log_message(self, format, *args): 0411 """Log an arbitrary message. 0412 0413 This is used by all other logging functions. Override 0414 it if you have specific logging wishes. 0415 0416 The first argument, FORMAT, is a format string for the 0417 message to be logged. If the format string contains 0418 any % escapes requiring parameters, they should be 0419 specified as subsequent arguments (it's just like 0420 printf!). 0421 0422 The client host and current date/time are prefixed to 0423 every message. 0424 0425 """ 0426 0427 sys.stderr.write("%s - - [%s] %s\n" % 0428 (self.address_string(), 0429 self.log_date_time_string(), 0430 format%args)) 0431 0432 def version_string(self): 0433 """Return the server software version string.""" 0434 return self.server_version + ' ' + self.sys_version 0435 0436 def date_time_string(self): 0437 """Return the current date and time formatted for a message header.""" 0438 now = time.time() 0439 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now) 0440 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( 0441 self.weekdayname[wd], 0442 day, self.monthname[month], year, 0443 hh, mm, ss) 0444 return s 0445 0446 def log_date_time_string(self): 0447 """Return the current time formatted for logging.""" 0448 now = time.time() 0449 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 0450 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 0451 day, self.monthname[month], year, hh, mm, ss) 0452 return s 0453 0454 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 0455 0456 monthname = [None, 0457 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 0458 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 0459 0460 def address_string(self): 0461 """Return the client address formatted for logging. 0462 0463 This version looks up the full hostname using gethostbyaddr(), 0464 and tries to find a name that contains at least one dot. 0465 0466 """ 0467 0468 host, port = self.client_address[:2] 0469 return socket.getfqdn(host) 0470 0471 # Essentially static class variables 0472 0473 # The version of the HTTP protocol we support. 0474 # Set this to HTTP/1.1 to enable automatic keepalive 0475 protocol_version = "HTTP/1.0" 0476 0477 # The Message-like class used to parse headers 0478 MessageClass = mimetools.Message 0479 0480 # Table mapping response codes to messages; entries have the 0481 # form {code: (shortmessage, longmessage)}. 0482 # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html 0483 responses = { 0484 100: ('Continue', 'Request received, please continue'), 0485 101: ('Switching Protocols', 0486 'Switching to new protocol; obey Upgrade header'), 0487 0488 200: ('OK', 'Request fulfilled, document follows'), 0489 201: ('Created', 'Document created, URL follows'), 0490 202: ('Accepted', 0491 'Request accepted, processing continues off-line'), 0492 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), 0493 204: ('No response', 'Request fulfilled, nothing follows'), 0494 205: ('Reset Content', 'Clear input form for further input.'), 0495 206: ('Partial Content', 'Partial content follows.'), 0496 0497 300: ('Multiple Choices', 0498 'Object has several resources -- see URI list'), 0499 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), 0500 302: ('Found', 'Object moved temporarily -- see URI list'), 0501 303: ('See Other', 'Object moved -- see Method and URL list'), 0502 304: ('Not modified', 0503 'Document has not changed since given time'), 0504 305: ('Use Proxy', 0505 'You must use proxy specified in Location to access this ' 0506 'resource.'), 0507 307: ('Temporary Redirect', 0508 'Object moved temporarily -- see URI list'), 0509 0510 400: ('Bad request', 0511 'Bad request syntax or unsupported method'), 0512 401: ('Unauthorized', 0513 'No permission -- see authorization schemes'), 0514 402: ('Payment required', 0515 'No payment -- see charging schemes'), 0516 403: ('Forbidden', 0517 'Request forbidden -- authorization will not help'), 0518 404: ('Not Found', 'Nothing matches the given URI'), 0519 405: ('Method Not Allowed', 0520 'Specified method is invalid for this server.'), 0521 406: ('Not Acceptable', 'URI not available in preferred format.'), 0522 407: ('Proxy Authentication Required', 'You must authenticate with ' 0523 'this proxy before proceeding.'), 0524 408: ('Request Time-out', 'Request timed out; try again later.'), 0525 409: ('Conflict', 'Request conflict.'), 0526 410: ('Gone', 0527 'URI no longer exists and has been permanently removed.'), 0528 411: ('Length Required', 'Client must specify Content-Length.'), 0529 412: ('Precondition Failed', 'Precondition in headers is false.'), 0530 413: ('Request Entity Too Large', 'Entity is too large.'), 0531 414: ('Request-URI Too Long', 'URI is too long.'), 0532 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), 0533 416: ('Requested Range Not Satisfiable', 0534 'Cannot satisfy request range.'), 0535 417: ('Expectation Failed', 0536 'Expect condition could not be satisfied.'), 0537 0538 500: ('Internal error', 'Server got itself in trouble'), 0539 501: ('Not Implemented', 0540 'Server does not support this operation'), 0541 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), 0542 503: ('Service temporarily overloaded', 0543 'The server cannot process the request due to a high load'), 0544 504: ('Gateway timeout', 0545 'The gateway server did not receive a timely response'), 0546 505: ('HTTP Version not supported', 'Cannot fulfill request.'), 0547 } 0548 0549 0550 def test(HandlerClass = BaseHTTPRequestHandler, 0551 ServerClass = HTTPServer, protocol="HTTP/1.0"): 0552 """Test the HTTP request handler class. 0553 0554 This runs an HTTP server on port 8000 (or the first command line 0555 argument). 0556 0557 """ 0558 0559 if sys.argv[1:]: 0560 port = int(sys.argv[1]) 0561 else: 0562 port = 8000 0563 server_address = ('', port) 0564 0565 HandlerClass.protocol_version = protocol 0566 httpd = ServerClass(server_address, HandlerClass) 0567 0568 sa = httpd.socket.getsockname() 0569 print "Serving HTTP on", sa[0], "port", sa[1], "..." 0570 httpd.serve_forever() 0571 0572 0573 if __name__ == '__main__': 0574 test() 0575
Generated by PyXR 0.9.4