boa源码解析（1）-接收请求，发送html的流程

来源：互联网发布：大数据挖掘系统编辑：程序博客网时间：2024/06/08 06:02

最近接触到boa，记录下来以便复习

本篇记录服务器接受请求，向客户端发送指定文件的流程

1.首先从boa.c的main函数开始

while ((c = getopt(argc, argv, "c:r:d")) != -1) {        switch (c) {        case 'c':            if (server_root)                free(server_root);            server_root = strdup(optarg);            if (!server_root) {                perror("strdup (for server_root)");                exit(1);            }            break;        case 'r':            if (chdir(optarg) == -1) {                log_error_time();                perror("chdir (to chroot)");                exit(1);            }            if (chroot(optarg) == -1) {                log_error_time();                perror("chroot");                exit(1);            }            if (chdir("/") == -1) {                log_error_time();                perror("chdir (after chroot)");                exit(1);            }            break;        case 'd':            do_fork = 0;            break;        default:            fprintf(stderr, "Usage: %s [-c serverroot] [-r chroot] [-d]\n", argv[0]);            exit(1);        }

刚开始会对传入的参数进行解析，例如-c /opt/app/,则server_root=/opt/app,server_root用于指定配置文件的所在位置，即boa.conf在/opt/app/目录下!!!

    fixup_server_root();    read_config_files();    open_logs();    server_s = create_server_socket();    init_signals();    drop_privs();    create_common_env();    build_needs_escape();

fixup_server_root():该函数中最重要的语句是chdir(server_root)，即跳转到配置文件所在的目录下，然后read_config_files,即读取配置文件!!!之后代码中很多参数的值都是从配置文件中获取的。

boa服务器创建socket连接，无非就是创建socket,将套接字设置为非阻塞，设置断开后端口可立即使用，绑定socket，设置监听。

init_signals:设置相应的信号及信号处理函数。

    /* background ourself */    if (do_fork) {        switch(fork()) {        case -1:            /* error */            perror("fork");            exit(1);            break;        case 0:            /* child, success */            break;        default:            /* parent, success */            exit(0);            break;        }    }

守护进程：当终端被关闭后，利用该终端开启的相应的进程也会被关闭。守护进程就是为了冲破这种障碍。它从被执行的时候开始运转，知道整个系统关闭才退出（当然可以认为的杀死相应的守护进程）。如果想让某个进程不因为用户或中断或其他变化而影响，那么就必须把这个进程变成一个守护进程。

守护进程的步骤：

1.使用fork，创建子进程，父进程退出
2.调用setsid，摆脱其他进程的控制

4.重设文件权限掩码

这里只做了第一步

程序总算是初始化完毕了。

接下来就是接收处理数据了。关键函数select_loop(server_s);boa会进入一个循环，不断地接收处理数据

/* any blocked req's move from request_ready to request_block */process_requests(server_s);if (!sigterm_flag && total_connections < (max_connections - 10)) {    BOA_FD_SET(server_s, &block_read_fdset); /* server always set */}req_timeout.tv_sec = (request_ready ? 0 :(ka_timeout ? ka_timeout : REQUEST_TIMEOUT));req_timeout.tv_usec = 0l;   /* reset timeout */if (select(max_fd + 1, &block_read_fdset,           &block_write_fdset, NULL,           (request_ready || request_block ? &req_timeout : NULL)) == -1) {/* what is the appropriate thing to do here on EBADF */if (errno == EINTR)   continue;   /* while(1) */else if (errno != EBADF) {    DIE("select");   }}time(&t_time);if (FD_ISSET(server_s, &block_read_fdset))     pending_requests = 1;}

刚开始，pending_request为0。从上面的代码可以看到，程序首先会运行process_request,但因为pending_request=0,程序只是进去逛了一圈，什么事情也不会做。

程序会一直阻塞在select，直到有新的客户端连接进来。当有新的客户端连接到服务器后，即接收到http报文，select不再阻塞，pending_request将变成1，然后就会再次进入process_request中

 if (pending_requests) {        get_request(server_s);#ifdef ORIGINAL_BEHAVIOR        pending_requests = 0;#endif    }

这时我们将会执行get_request来获取连接请求。在get_request中，首先执行accept，然后新建一个request

conn = new_request();    if (!conn) {        close(fd);        return;    }    conn->fd = fd;    conn->status = READ_HEADER;    conn->header_line = conn->client_stream;    conn->time_last = current_time;    conn->kacount = ka_max;

初始状态为READ_HEADER。最后把conn加入request_ready队列中：enqueue(&request_ready, conn);

执行完get_request函数后，程序继续执行process_request

current = request_ready;    while (current) {        time(&t_time);        if (current->buffer_end && /* there is data in the buffer */            current->status != DEAD && current->status != DONE) {            retval = req_flush(current);            /*             * retval can be -2=error, -1=blocked, or bytes left             */            if (retval == -2) { /* error */                current->status = DEAD;                retval = 0;            } else if (retval >= 0) {                /* notice the >= which is different from below?                   Here, we may just be flushing headers.                   We don't want to return 0 because we are not DONE                   or DEAD */                retval = 1;            }        } else {            switch (current->status) {            case READ_HEADER:            case ONE_CR:            case ONE_LF:            case TWO_CR:                retval = read_header(current);                break;            case BODY_READ:                retval = read_body(current);                break;            case BODY_WRITE:                retval = write_body(current);                break;            case WRITE:                retval = process_get(current);                break;            case PIPE_READ:                retval = read_from_pipe(current);                break;            case PIPE_WRITE:                retval = write_from_pipe(current);                break;            case DONE:                /* a non-status that will terminate the request */                retval = req_flush(current);                /*                 * retval can be -2=error, -1=blocked, or bytes left                 */                if (retval == -2) { /* error */                    current->status = DEAD;                    retval = 0;                } else if (retval > 0) {                    retval = 1;                }                break;            case DEAD:                retval = 0;                current->buffer_end = 0;                SQUASH_KA(current);                break;            default:                retval = 0;                fprintf(stderr, "Unknown status (%d), "                        "closing!\n", current->status);                current->status = DEAD;                break;            }        }        if (sigterm_flag)            SQUASH_KA(current);        /* we put this here instead of after the switch so that         * if we are on the last request, and get_request is successful,         * current->next is valid!         */        if (pending_requests)            get_request(server_s);        switch (retval) {        case -1:               /* request blocked */            trailer = current;            current = current->next;            block_request(trailer);            break;        case 0:                /* request complete */            current->time_last = current_time;            trailer = current;            current = current->next;            free_request(&request_ready, trailer);            break;        case 1:                /* more to do */            current->time_last = current_time;            current = current->next;            break;        default:            log_error_time();            fprintf(stderr, "Unknown retval in process.c - "                    "Status: %d, retval: %d\n", current->status, retval);            current = current->next;            break;        }    }

接下来开始循环处理刚才接收到的请求。刚开始由于没有数据，将会执行else中的内容，由于初始状态为READ_HEADER,程序将执行read_header，将会执行read读取报文，并返回1，由代码注释（more to do）我们也可以看出，我们只是获取了数据，并没有进行处理，然后current=current->next,如果只有一个求救的话，这时候current已经是NULL了，将会推出while循环，但数据并没有消失，仍有保存在request_ready队列中，因为request_ready还在，所以select(max_fd + 1, &block_read_fdset, &block_write_fdset, NULL,(request_ready || request_block ? &req_timeout : NULL))并不会一直阻塞，程序将再一次执行process_request函数，这时候current再次等于request_ready，处理刚才未处理的数据；这时候将再次执行read_header函数，不过不再是读取数据了，而是进行处理数据了。

while (check < (buffer + bytes)) {        switch (req->status) {        case READ_HEADER:            if (*check == '\r') {                req->status = ONE_CR;                req->header_end = check;            } else if (*check == '\n') {                req->status = ONE_LF;                req->header_end = check;            }            break;        case ONE_CR:            if (*check == '\n')                req->status = ONE_LF;            else if (*check != '\r')                req->status = READ_HEADER;            break;        case ONE_LF:            /* if here, we've found the end (for sure) of a header */            if (*check == '\r') /* could be end o headers */                req->status = TWO_CR;            else if (*check == '\n')                req->status = BODY_READ;            else                req->status = READ_HEADER;            break;        case TWO_CR:            if (*check == '\n')                req->status = BODY_READ;            else if (*check != '\r')                req->status = READ_HEADER;            break;        default:            break;        }#ifdef VERY_FASCIST_LOGGING        log_error_time();        fprintf(stderr, "status, check: %d, %d\n", req->status, *check);#endif        req->parse_pos++;       /* update parse position */        check++;        if (req->status == ONE_LF) {            *req->header_end = '\0';            /* terminate string that begins at req->header_line */            if (req->logline) {                if (process_option_line(req) == 0) {                    return 0;                }            } else {                if (process_logline(req) == 0)                    return 0;                if (req->simple)                    return process_header_end(req);            }            /* set header_line to point to beginning of new header */            req->header_line = check;        } else if (req->status == BODY_READ) {#ifdef VERY_FASCIST_LOGGING            int retval;            log_error_time();            fprintf(stderr, "%s:%d -- got to body read.\n",                    __FILE__, __LINE__);            retval = process_header_end(req);#else            int retval = process_header_end(req);#endif            /* process_header_end inits non-POST cgi's */            if (retval && req->method == M_POST) {                /* for body_{read,write}, set header_line to start of data,                   and header_end to end of data */                req->header_line = check;                req->header_end =                    req->client_stream + req->client_stream_pos;                req->status = BODY_WRITE;                /* so write it */                /* have to write first, or read will be confused                 * because of the special case where the                 * filesize is less than we have already read.                 */                /*                   As quoted from RFC1945:                   A valid Content-Length is required on all HTTP/1.0 POST requests. An                   HTTP/1.0 server should respond with a 400 (bad request) message if it                   cannot determine the length of the request message's content.                 */                if (req->content_length) {                    int content_length;                    content_length = boa_atoi(req->content_length);                    /* Is a content-length of 0 legal? */                    if (content_length <= 0) {                        log_error_time();                        fprintf(stderr, "Invalid Content-Length [%s] on POST!\n",                                req->content_length);                        send_r_bad_request(req);                        return 0;                    }                    if (single_post_limit && content_length > single_post_limit) {                        log_error_time();                        fprintf(stderr, "Content-Length [%d] > SinglePostLimit [%d] on POST!\n",                                content_length, single_post_limit);                        send_r_bad_request(req);                        return 0;                    }                    req->filesize = content_length;                    req->filepos = 0;                    if (req->header_end - req->header_line > req->filesize) {                        req->header_end = req->header_line + req->filesize;                    }                } else {                    log_error_time();                    fprintf(stderr, "Unknown Content-Length POST!\n");                    send_r_bad_request(req);                    return 0;                }            }                   /* either process_header_end failed or req->method != POST */            return retval;      /* 0 - close it done, 1 - keep on ready */        }                       /* req->status == BODY_READ */    }

首先，读取http第一行数据，即请求行。因为刚开始req->logfile为NULL，所有程序会执行process_logfile，该函数用于解析请求行数据。之后logfile不再为NULL，之后读取的请求头部都会执行process_option_file。请求头读取完毕后，http报文会空一行，也就是会有两个换行符，状态变更为BODY_READ，执行process_header_end。

int process_header_end(request * req){    if (!req->logline) {        send_r_error(req);        return 0;    }    /* Percent-decode request */    if (unescape_uri(req->request_uri, &(req->query_string)) == 0) {        log_error_doc(req);        fputs("Problem unescaping uri\n", stderr);        send_r_bad_request(req);        return 0;    }    /* clean pathname */    clean_pathname(req->request_uri);    if (req->request_uri[0] != '/') {        send_r_bad_request(req);        return 0;    }    if (translate_uri(req) == 0) { /* unescape, parse uri */        SQUASH_KA(req);        return 0;               /* failure, close down */    }    if (req->method == M_POST) {        req->post_data_fd = create_temporary_file(1, NULL, 0);        if (req->post_data_fd == 0)            return(0);        return(1); /* success */    }    if (req->is_cgi) {        return init_cgi(req);    }    req->status = WRITE;    return init_get(req);       /* get and head */}

该函数会对请求行的url进行解析。状态变更为write,执行init_get,读取要发送的文件数据。然后程序回到process_request,因为状态为WRITE,程序便会执行process_get，将文件发送到客户端。

阅读全文

0 0