python之HTTPServer学习

来源:互联网 发布:印度女人知乎 编辑:程序博客网 时间:2024/05/21 07:56

背景

最近在研究python轻量级web开发框架-Flask,代码量不多,但是开发web服务器却很方便,很好奇这么小的体量内部有什么秘密。经一步步分析,发现Flask内核其实是werkzeug库,而该库底层实现使用了python的HTTPServer包,因此朔本追源最终还得从HTTPServer包开始

werkzeug分析

直接开门见山,使用werkzeug库的总入库为函数:run_simple
,定义如下:

def run_simple(hostname, port, application, use_reloader=False,               use_debugger=False, use_evalex=True,               extra_files=None, reloader_interval=1,               reloader_type='auto', threaded=False,               processes=1, request_handler=None, static_files=None,               passthrough_errors=False, ssl_context=None):
其中核心参数应该就是入参application,该对象有一套规范,后面分析时慢慢揭开,其他参数按照默认来进行后续分析

run_simple的核心处理如下:

def inner():        try:            fd = int(os.environ['WERKZEUG_SERVER_FD'])        except (LookupError, ValueError):            fd = None        srv = make_server(hostname, port, application, threaded,                          processes, request_handler,                          passthrough_errors, ssl_context,                          fd=fd)        if fd is None:            log_startup(srv.socket)        srv.serve_forever()

继续分析make_server:

def make_server(host=None, port=None, app=None, threaded=False, processes=1,                request_handler=None, passthrough_errors=False,                ssl_context=None, fd=None):    """Create a new server instance that is either threaded, or forks    or just processes one request after another.    """    if threaded and processes > 1:        raise ValueError("cannot have a multithreaded and "                         "multi process server.")    elif threaded:        return ThreadedWSGIServer(host, port, app, request_handler,                                  passthrough_errors, ssl_context, fd=fd)    elif processes > 1:        return ForkingWSGIServer(host, port, app, processes, request_handler,                                 passthrough_errors, ssl_context, fd=fd)    else:        return BaseWSGIServer(host, port, app, request_handler,                              passthrough_errors, ssl_context, fd=fd)

默认情况下返回BaseWSGIServer

class BaseWSGIServer(HTTPServer, object):    """Simple single-threaded, single-process WSGI server."""    multithread = False    multiprocess = False    request_queue_size = LISTEN_QUEUE    def __init__(self, host, port, app, handler=None,                 passthrough_errors=False, ssl_context=None, fd=None):        if handler is None:            handler = WSGIRequestHandler......    def serve_forever(self):        self.shutdown_signal = False        try:            HTTPServer.serve_forever(self)        except KeyboardInterrupt:            pass        finally:            self.server_close()

默认情况下,请求处理器为WSGIRequestHandler,核心处理是HTTPServer.serve_forever(self)


HTTPServer

在python安装路径的Lib目录下,BaseHTTPServer.py定义了HTTPServer:

class HTTPServer(SocketServer.TCPServer):    allow_reuse_address = 1    # Seems to make sense in testing environment    def server_bind(self):        """Override server_bind to store the server name."""        SocketServer.TCPServer.server_bind(self)        host, port = self.socket.getsockname()[:2]        self.server_name = socket.getfqdn(host)        self.server_port = port

def serve_forever(self, poll_interval=0.5):        """Handle one request at a time until shutdown.        Polls for shutdown every poll_interval seconds. Ignores        self.timeout. If you need to do periodic tasks, do them in        another thread.        """        self.__is_shut_down.clear()        try:            while not self.__shutdown_request:                # XXX: Consider using another file descriptor or                # connecting to the socket to wake this up instead of                # polling. Polling reduces our responsiveness to a                # shutdown request and wastes cpu at all other times.                r, w, e = _eintr_retry(select.select, [self], [], [],                                       poll_interval)                if self in r:                    self._handle_request_noblock()        finally:            self.__shutdown_request = False            self.__is_shut_down.set()
    def _handle_request_noblock(self):        """Handle one request, without blocking.        I assume that select.select has returned that the socket is        readable before this function was called, so there should be        no risk of blocking in get_request().        """        try:            request, client_address = self.get_request()        except socket.error:            return        if self.verify_request(request, client_address):            try:                self.process_request(request, client_address)            except:                self.handle_error(request, client_address)                self.shutdown_request(request)        else:            self.shutdown_request(request)


最后核心处理落入process_request---》finish_request---》RequestHandlerClass(request, client_address, self),也即最后调用进入默认请求处理器WSGIRequestHandler中进行处理

请求处理器

请求处理器继承体系如下图所示


核心在BaseRequestHandler的__init__方法中,这里完成了整个处理框架:

class BaseRequestHandler:    def __init__(self, request, client_address, server):        self.request = request        self.client_address = client_address        self.server = server        self.setup()        try:            self.handle()        finally:            self.finish()    def setup(self):        pass    def handle(self):        pass    def finish(self):        pass

StreamRequestHandler类实现了setup函数和finish函数:
class StreamRequestHandler(BaseRequestHandler):    rbufsize = -1    wbufsize = 0    timeout = None    disable_nagle_algorithm = False    def setup(self):        self.connection = self.request        if self.timeout is not None:            self.connection.settimeout(self.timeout)        if self.disable_nagle_algorithm:            self.connection.setsockopt(socket.IPPROTO_TCP,                                       socket.TCP_NODELAY, True)        self.rfile = self.connection.makefile('rb', self.rbufsize)        self.wfile = self.connection.makefile('wb', self.wbufsize)    def finish(self):        if not self.wfile.closed:            try:                self.wfile.flush()            except socket.error:                # A final socket error may have occurred here, such as                # the local error ECONNABORTED.                pass        self.wfile.close()        self.rfile.close()

其中self.request是通过Server的get_request()获取的,在BaseWSGIServer中get_request()实现如下:
    def get_request(self):        con, info = self.socket.accept()        return con, info

也即通过套接口接受的一个新socket


BaseHTTPRequestHandler类实现了handle函数:

    def handle(self):        """Handle multiple requests if necessary."""        self.close_connection = 1        self.handle_one_request()        while not self.close_connection:            self.handle_one_request()
    def handle_one_request(self):        try:            self.raw_requestline = self.rfile.readline(65537)            if len(self.raw_requestline) > 65536:                self.requestline = ''                self.request_version = ''                self.command = ''                self.send_error(414)                return            if not self.raw_requestline:                self.close_connection = 1                return            if not self.parse_request():                # An error code has been sent, just exit                return            mname = 'do_' + self.command            if not hasattr(self, mname):                self.send_error(501, "Unsupported method (%r)" % self.command)                return            method = getattr(self, mname)            method()            self.wfile.flush() #actually send the response if not already done.        except socket.timeout, e:            #a read or a write timed out.  Discard this connection            self.log_error("Request timed out: %r", e)            self.close_connection = 1            return

从上面可以看出,解析出请求行后,剩下的就是调用处理器自身的do_XXX()函数,其中XXX是http请求的方法名,到这里也可以看出,我们如果有特殊的处理方式时可以继承自BaseHTTPRequestHandler并且自己实现对应的do_XXX()函数即可。

接下来继续看WSGIRequestHandler类,该类继承了BaseHTTPRequestHandler类,并且重写了部分方法的实现:

class WSGIRequestHandler(BaseHTTPRequestHandler, object):    def handle(self):        """Handles a request ignoring dropped connections."""        rv = None        try:            rv = BaseHTTPRequestHandler.handle(self)        except (socket.error, socket.timeout) as e:            self.connection_dropped(e)        except Exception:            if self.server.ssl_context is None or not is_ssl_error():                raise        if self.server.shutdown_signal:            self.initiate_shutdown()        return rv    def handle_one_request(self):        """Handle a single HTTP request."""        self.raw_requestline = self.rfile.readline()        if not self.raw_requestline:            self.close_connection = 1        elif self.parse_request():            return self.run_wsgi()    def run_wsgi(self):        if self.headers.get('Expect', '').lower().strip() == '100-continue':            self.wfile.write(b'HTTP/1.1 100 Continue\r\n\r\n')        self.environ = environ = self.make_environ()        headers_set = []        headers_sent = []        def write(data):            assert headers_set, 'write() before start_response'            if not headers_sent:                status, response_headers = headers_sent[:] = headers_set                try:                    code, msg = status.split(None, 1)                except ValueError:                    code, msg = status, ""                self.send_response(int(code), msg)                header_keys = set()                for key, value in response_headers:                    self.send_header(key, value)                    key = key.lower()                    header_keys.add(key)                if 'content-length' not in header_keys:                    self.close_connection = True                    self.send_header('Connection', 'close')                if 'server' not in header_keys:                    self.send_header('Server', self.version_string())                if 'date' not in header_keys:                    self.send_header('Date', self.date_time_string())                self.end_headers()            assert isinstance(data, bytes), 'applications must write bytes'            self.wfile.write(data)            self.wfile.flush()        def start_response(status, response_headers, exc_info=None):            if exc_info:                try:                    if headers_sent:                        reraise(*exc_info)                finally:                    exc_info = None            elif headers_set:                raise AssertionError('Headers already set')            headers_set[:] = [status, response_headers]            return write        def execute(app):            application_iter = app(environ, start_response)            try:                for data in application_iter:                    write(data)                if not headers_sent:                    write(b'')            finally:                if hasattr(application_iter, 'close'):                    application_iter.close()                application_iter = None        try:            execute(self.server.app)        except (socket.error, socket.timeout) as e:            self.connection_dropped(e, environ)        except Exception:            if self.server.passthrough_errors:                raise            from werkzeug.debug.tbtools import get_current_traceback            traceback = get_current_traceback(ignore_system_exceptions=True)            try:                # if we haven't yet sent the headers but they are set                # we roll back to be able to set them again.                if not headers_sent:                    del headers_set[:]                execute(InternalServerError())            except Exception:                pass            self.server.log('error', 'Error on request:\n%s',                            traceback.plaintext)......

WSGIRequestHandler重写了handle_one_request函数,因此最后的处理落到了app(environ, start_response)中,所以如果用户定制app时至少需要处理这2个入参

Flask

前面走读了werkzeug源码的处理,该回到Flask的处理了。

class Flask(_PackageBoundObject):    request_class = Request    response_class = Response    url_rule_class = Rule    def run(self, host=None, port=None, debug=None, **options):        from werkzeug.serving import run_simple        if host is None:            host = '127.0.0.1'        if port is None:            server_name = self.config['SERVER_NAME']            if server_name and ':' in server_name:                port = int(server_name.rsplit(':', 1)[1])            else:                port = 5000        if debug is not None:            self.debug = bool(debug)        options.setdefault('use_reloader', self.debug)        options.setdefault('use_debugger', self.debug)        try:            run_simple(host, port, self, **options)        finally:            # reset the first request information if the development server            # reset normally.  This makes it possible to restart the server            # without reloader and that stuff from an interactive shell.            self._got_first_request = False    def add_url_rule(self, rule, endpoint=None, view_func=None, **options):        ......    def route(self, rule, **options):        ......    def endpoint(self, endpoint):    def dispatch_request(self):        req = _request_ctx_stack.top.request        if req.routing_exception is not None:            self.raise_routing_exception(req)        rule = req.url_rule        # if we provide automatic options for this URL and the        # request came with the OPTIONS method, reply automatically        if getattr(rule, 'provide_automatic_options', False) \           and req.method == 'OPTIONS':            return self.make_default_options_response()        # otherwise dispatch to the handler for that endpoint        return self.view_functions[rule.endpoint](**req.view_args)    def full_dispatch_request(self):        """Dispatches the request and on top of that performs request        pre and postprocessing as well as HTTP exception catching and        error handling.        .. versionadded:: 0.7        """        self.try_trigger_before_first_request_functions()        try:            request_started.send(self)            rv = self.preprocess_request()            if rv is None:                rv = self.dispatch_request()        except Exception as e:            rv = self.handle_user_exception(e)        return self.finalize_request(rv)    def wsgi_app(self, environ, start_response):        ctx = self.request_context(environ)        ctx.push()        error = None        try:            try:                response = self.full_dispatch_request()            except Exception as e:                error = e                response = self.handle_exception(e)            except:                error = sys.exc_info()[1]                raise            return response(environ, start_response)        finally:            if self.should_ignore_error(error):                error = None            ctx.auto_pop(error)    def __call__(self, environ, start_response):        """Shortcut for :attr:`wsgi_app`."""        return self.wsgi_app(environ, start_response)