获取网页的基本流程

来源:互联网 发布:问道手游刷道挂机软件 编辑:程序博客网 时间:2024/06/05 06:45
 
1. 获得ip地址(表达格式)的基本方式

    struct hostent* host_ent;
    host_ent = gethostbyname(host_addr); // host_addr基本一个字串,例如“net.pku.edu.cn”

    char abuf[INET_ADDRSTRLEN];
    inet_ntop(AF_INET, host_ent->h_addr_list[0], abuf, sizeof(abuf));

    // 这样,abuf中放置的就是ip的表达格式,字串“162.105.129.12”
    // 但是注意这个不是一个c-str,因为不是以'\0'结尾,需要重新放置。

    int len = strlen(abuf);
    char* result = new char[len + 1];
    memset( result, 0, len+1 );
    memcpy( result, abuf, len );

    // result中即为最后的结果

2. 建立socket连接

   A. 建立Socket,参考http_fetcher的makeSocket

   B. 构造请求buffer,一个简单的例子如下:

       char buffer[1024];

       buffer[0] = 0;
       strcat(buffer, "GET /~liuyuan/main.html HTTP/1.0\r\n");
       strcat(buffer, "Host: net.pku.edu.cn\r\n");
       strcat(buffer, "User-Agent: HTTP Fetcher/1.0\r\n");
       strcat(buffer, "Connection: Close\r\n\r\n");

   C. 读取头信息,参考http_fetcher的_http_read_header函数。


//////////////////////////////////////////////

extern "C" {

#include <http_fetcher.h>

}

#include <iostream>

using namespace std;

#define HTTP_VERSION             "HTTP/1.0"

int
main(int argc, char** argv)
{
    if (argc < 2)
    {
        cout << "More parameters needed!" << endl;
        return 0;
    }

    char* download_file;

    int sock;
   
    sock = makeSocket(argv[1]);

    char *requestBuf = (char *)malloc(1024);
    requestBuf[0] = 0;
//     snprintf(requestBuf, 1024, "GET / %s\r\n", HTTP_VERSION);
    snprintf(requestBuf, 1024, "GET /~liuyuan/main.html %s\r\n", HTTP_VERSION);
//     strcat(requestBuf, "Host: ");
//     strcat(requestBuf, argv[1]);
//     strcat(requestBuf, "\r\n");
//     strcat(requestBuf, "User-Agent: ");
//     strcat(requestBuf, "HTTP-Fether");
//     strcat(requestBuf, "/");
//     strcat(requestBuf, "1.0");
//     strcat(requestBuf, "\r\n");
    strcat(requestBuf, "Connection: Close\r\n\r\n");

    char *tmp = (char *)realloc(requestBuf, strlen(requestBuf) + 1);
    requestBuf = tmp;

    if(write(sock, requestBuf, strlen(requestBuf)) == -1)
    {
        close(sock);
        free(requestBuf);
        cout << "Write error!" << endl;
    }
    else
    {
        cout << "Write to socket:" << endl;
        cout << requestBuf << endl;       
    }
    free(requestBuf);
    requestBuf = NULL;

    char *headerBuf = (char *)malloc(1024);
    char *header_pt = headerBuf;
    int ret = _http_read_header(sock, header_pt); /* errorSource set within */
   
    char *content    = (char *)malloc(10240); // 10K
    char *content_pt = content;
    memset(content, 0, 10240);
   
    while (read(sock, content_pt, 1) > 0)
    {
        ++content_pt;
    }

    cout << content;

    free(headerBuf);           
    close(sock);
}
原创粉丝点击