Linux下使用Socket实现http文件下载

来源:互联网 发布:东翌编程 编辑:程序博客网 时间:2024/06/03 14:00
//test.cpp#include <stdio.h>#include <string.h>#include <stdlib.h>#include <sys/socket.h>#include <arpa/inet.h>#include <fcntl.h>#include <unistd.h>#include <netdb.h>#define TARGET_URL "http://seopic.699pic.com/photo/50010/8515.jpg_wh1200.jpg"#define TARGET_HOST "seopic.699pic.com"#define TARGET_PORT 80 //the default port 80static void GetIPfromDNS(char* ip_addr);static void get_resp_header(const char *response, int *status_code, char*content_type, long* content_length);int main(){    int client_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);    if (client_socket < 0) {        printf("invalid socket : %d\n", client_socket);        return 0;    }    struct sockaddr_in addr;    char ip_addr[64];    memset(&addr, 0, sizeof(addr));    GetIPfromDNS(ip_addr);    addr.sin_family = AF_INET;    addr.sin_addr.s_addr = inet_addr(ip_addr);    addr.sin_port = htons(TARGET_PORT);    int res = 0;    res = connect(client_socket, (struct sockaddr *) &addr, sizeof(addr));    if (res == -1){        printf("connect failed : %d\n", res);        return 0;    }    char sendbuf[1024] = {0};      char recvbuf[1024] = {0};     int index = 0;    char response;    bool isGetContent = false;    sprintf(sendbuf, \        "GET %s HTTP/1.1\r\n"\        "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n"\        "Accept: */*\r\n"\        "Host:%s\r\n"\        "\r\n"\        , TARGET_URL, TARGET_HOST);    send(client_socket, sendbuf, strlen(sendbuf),0);    while(recv(client_socket, &response, sizeof(response),0)!=0){        recvbuf[index++] = response;        if(response == '\r'){            if(recv(client_socket, &response, sizeof(response),0)!=0){                recvbuf[index++] = response;                if(response == '\n'){                       if(recv(client_socket, &response, sizeof(response),0)!=0){                            recvbuf[index++] = response;                            if(response == '\r'){                                if(recv(client_socket, &response, sizeof(response),0)!=0){                                    recvbuf[index++] = response;                                    if(response == '\n'){                                        isGetContent = true;                                        printf("\n\nSUCCESS GET HEAD\n\n");                                        break;                                    }                                }                            }                        }                }            }        }    }    if(isGetContent == true){        printf("---------------\n");        printf("#### %ld ####\n\n", strlen(recvbuf));        printf("%s", recvbuf);        printf("---------------\n\n\n");        fflush(stdout);        int status_code;        char content_type[1024];         long content_length;        get_resp_header(recvbuf, &status_code, content_type, &content_length);        printf("%d, %s, %ld\n",  status_code, content_type, content_length);        printf("Start write file to local disk ... .... \n");        fflush(stdout);        int fd = open("mydownload.jpg", O_CREAT | O_WRONLY, S_IRWXG | S_IRWXO | S_IRWXU);        unsigned char buf[1024];        int len = 0;        int writeLength = 0;        while((len = recv(client_socket, buf, 1024, 0))!=0){            write(fd, buf, len);            writeLength += len;            if(writeLength == content_length){                break;            }        }        printf("\n\nTHE END\n\n");        close(fd);    }    close(client_socket);    return 0;}static void GetIPfromDNS(char* ip_addr){    struct hostent *host = gethostbyname(TARGET_HOST);    if (!host) {        ip_addr = NULL;        return;    }    for (int i = 0; host->h_addr_list[i]; i++){        strcpy(ip_addr, inet_ntoa( * (struct in_addr*) host->h_addr_list[i]));        break;    }}/*# status_code : 200, 503, .... 状态码# content_type : image/jpeg 内容类型# content_length : 560437 内容长度(字节)*/void get_resp_header(const char *response, int *status_code, char*content_type, long* content_length){    char *pos = (char*)strstr(response, "HTTP/");    if (pos)        sscanf(pos, "%*s %d", status_code);    pos = (char*)strstr(response, "Content-Type:");    if (pos)        sscanf(pos, "%*s %s", content_type);    pos = (char*)strstr(response, "Content-Length:");    if (pos)        sscanf(pos, "%*s %ld",content_length);}

运行结果

$ g++ test.cpp$ ./a.out SUCCESS GET HEAD---------------#### 563 ####HTTP/1.1 200 OKServer: marco/1.6Date: Fri, 18 Aug 2017 03:06:52 GMTContent-Type: image/jpegContent-Length: 560437Connection: keep-aliveX-Request-Id: 96aaeffc8c0ece839ba5495988d22dc5; 54f5daf9ca0effd3deacdcc88a5e54daX-Source: U/304ETag: "82e871eb8d245fef907c9e5ef8cd8809"X-Slice-Complete-Length: 560437Last-Modified: Thu, 06 Apr 2017 12:59:05 GMTX-Slice-Size: 65536Expires: Wed, 23 Aug 2017 17:02:27 GMTCache-Control: max-age=691200Accept-Ranges: bytesAge: 458936Via: T.2424.H.1, V.mix-gd-can-008, T.141134.R.1, M.cun-gd-zhs-131---------------200, image/jpeg, 560437Start write file to local disk ... .... THE END

主要注意两个点。
1 组织HTTP协议的应用层数据包发起请求。

这里写图片描述

2 利用服务器返回的数据格式中连续两次\r\n解析出头部信息(包含文件大小)和文件原始数据(字节流);
(例程中的51行到70行联系用了4个判断语句直接找出连续的\r\n)

这里写图片描述

补充一点
这里使用了gethostbyname系统函数向DNS服务器发起查询IP,但是帮助文档中已经说明这个函数不再推荐使用了

The gethostbyname*() and gethostbyaddr*() functions are obsolete.  Applications should use getaddrinfo(3) and getnameinfo(3) instead.

HTTP使用了TCP作为传输层协议,所以会有3次握手的过程,标准的3次握手过程:

这里写图片描述