发一个多线程通过 HTTP 下载文件的类(Linux下的实现)
来源:互联网 发布:mac打不开flash网站 编辑:程序博客网 时间:2024/05/07 09:06
多线程下载文件这个话题已经是老汤了。
在HTTP协议1.1中,允许通过增加一个HTTP Header“Range”来指定下载文件的区间。
所以一般的做法都是:
- 首先获取到文件大小(通过Content-Length)
- 开一个线程池在进行分块下载。
而在具体怎么实现的流程上,还是有差别的。
1. 标准的做法是:首先用一个线程发送HTTP GET指令,服务器会返回Content-Length,并能够根据协议判断出服务器是否支持Range。如果支持Range,则调配其它线程对后续部分分块下载。第一个线程继续下载第1块。
2. 还一种做法,首先发送HTTP HEAD指令,通过返回的Content-Length进行分块,调配线程进行下载。
这里提供一个类,属于第2种。
为了提高IO性能,类中可以使用内存映射文件方式进行操作。
- #ifndef CHTTPFILEDOWNLOADER_H_
- #define CHTTPFILEDOWNLOADER_H_
- #include "Generic.h"
- class CHttpFileDownloader {
- public:
- CHttpFileDownloader();
- virtual ~CHttpFileDownloader();
- bool downloadUrlToFile( const char * lpszUrl, const char * lpszFile);
- bool waitForCompletion(void);
- private:
- string m_strLocalFile;
- pthread_t m_lLeaderThread;
- struct sockaddr_in m_stServerAddr;
- char m_szResourceURI[1024];
- char m_szDomain[1024];
- char m_szHost[1024];
- char m_szUrl[1024];
- sem_t m_stDownSem;
- pthread_mutex_t m_stDownloadThreadMutex;
- int m_nDownloadThreadCnt;
- bool m_bFailed;
- sem_t m_stCompleteSem;
- bool m_bSuccess;
- static void * leaderThread(void* param);
- static void * downloadThread(void* param);
- bool downloadProcess(void);
- void downloadBlock(unsigned char * pMemory, int nRangeStart, int nRangeSize);
- bool sendBuffer( int nSocket, char * pBuf, int nSize);
- bool sendStringStream(int nSocket, stringstream & oStream);
- int recvStringStream(int nSocket, stringstream & oStream);
- std::vector<string> parseResponse(string strResponse);
- bool isHttpStatusSuccess(string & strHttpResponse);
- string getHeaderValueByName(const char * lpszHeader, std::vector<string> & vItems);
- };
- #endif /* CHTTPFILEDOWNLOADER_H_ */
- /*
- * CHttpFileDownloader.cpp
- *
- * Created on: 2008-12-15
- * Author: root
- */
- #include "Generic.h"
- #include "CMainApp.h"
- #include "CHttpFileDownloader.h"
- // 64K
- #define DOWNLOAD_BLOCK_SIZE 1024*512
- #define MAX_DOWNLOAD_THREAD 5
- typedef struct _tagDownloadTask
- {
- CHttpFileDownloader * pThis;
- unsigned char * pStart;
- int nSize;
- int nRangeStart;
- }tagDownloadTask, *LPDownloadTask;
- CHttpFileDownloader::CHttpFileDownloader() {
- sem_init( &m_stCompleteSem, 0, 0);
- }
- CHttpFileDownloader::~CHttpFileDownloader() {
- sem_destroy( &m_stCompleteSem );
- }
- bool CHttpFileDownloader::sendStringStream(int nSocket, stringstream & oStream)
- {
- int nSize = oStream.str().length() * sizeof(char);
- char * pBuffer = new char[nSize];
- memcpy( pBuffer, oStream.str().c_str(), nSize);
- int nSent = 0;
- while(nSent < nSize)
- {
- int nRet = send( nSocket, (char*)(pBuffer + nSent), nSize - nSent, 0);
- if( nRet == -1 )
- break;
- nSent += nRet;
- }
- delete [] pBuffer;
- return (nSent == nSize);
- }
- bool CHttpFileDownloader::sendBuffer( int nSocket, char * pBuf, int nSize)
- {
- int nSent = 0;
- while(nSent < nSize)
- {
- int nRet = send( nSocket, (char*)(pBuf + nSent), nSize - nSent, 0);
- if( nRet == -1 )
- break;
- nSent += nRet;
- }
- return (nSent == nSize);
- }
- int CHttpFileDownloader::recvStringStream(int nSocket, stringstream & oStream)
- {
- int nReceived = 0;
- while(true)
- {
- char szBuf[1025] = {0};
- int nRet = recv( nSocket, szBuf, 1024, 0);
- if( nRet == 0 )
- break;
- if( nRet < 0 )
- break;
- oStream << szBuf;
- nReceived += nRet;
- if( oStream.str().find("/r/n/r/n") != string::npos )
- break;
- }
- return nReceived;
- }
- std::vector<string> CHttpFileDownloader::parseResponse(string strResponse)
- {
- std::vector<string> vItems;
- size_t nLast = strResponse.find("/r/n/r/n");
- if( nLast >= strResponse.length() )
- return vItems;
- size_t nPos = 0;
- while(nPos < nLast)
- {
- size_t nFind = strResponse.find("/r/n", nPos);
- if( nFind > nLast )
- break;
- vItems.push_back( strResponse.substr( nPos, nFind-nPos));
- nPos = nFind + 2;
- }
- return vItems;
- }
- bool CHttpFileDownloader::isHttpStatusSuccess(string & strHttpResponse)
- {
- size_t nBegin = strHttpResponse.find(' ');
- if( nBegin > strHttpResponse.length() )
- return false;
- size_t nEnd = strHttpResponse.find_last_of(' ');
- if( nEnd > strHttpResponse.length() )
- return false;
- string strStatusCode = strHttpResponse.substr( nBegin+1, nEnd-nBegin-1);
- int nStatusCode = atoi(strStatusCode.c_str());
- return (nStatusCode >= 200 && nStatusCode < 300);
- }
- string CHttpFileDownloader::getHeaderValueByName(const char * lpszHeader, std::vector<string> & vItems)
- {
- string strHeader = lpszHeader;
- std::transform( strHeader.begin(), strHeader.end(), strHeader.begin(), (int (*)(int))std::tolower);
- strHeader.append(":");
- string strValue = "";
- std::vector<string>::iterator iter;
- for( iter = vItems.begin(); iter != vItems.end(); iter++)
- {
- string strItem = *iter;
- std::transform( strItem.begin(), strItem.end(), strItem.begin(), (int (*)(int))std::tolower);
- if( strItem.find(strHeader) != 0 )
- continue;
- strValue = strItem.substr( strHeader.length() );
- break;
- }
- return strValue.erase( 0, strValue.find_first_not_of(' '));
- }
- bool CHttpFileDownloader::downloadUrlToFile( const char * lpszUrl, const char * lpszFile)
- {
- memset( m_szUrl, 0, 1024);
- memcpy( m_szUrl, lpszUrl, strlen(lpszUrl));
- m_strLocalFile = lpszFile;
- // create thread
- int nErr = pthread_create( &m_lLeaderThread
- , NULL
- , &leaderThread
- , this
- );
- if( nErr != 0 )
- {
- CMainApp::getSingleton()->log("Error: pthread_create download leader thread failed. Return=%d, Message=%s"
- , nErr
- , strerror(nErr)
- );
- return false;
- }
- return true;
- }
- bool CHttpFileDownloader::waitForCompletion(void)
- {
- sem_wait(&m_stCompleteSem);
- return m_bSuccess;
- }
- void * CHttpFileDownloader::leaderThread(void* param)
- {
- CHttpFileDownloader * pThis = static_cast<CHttpFileDownloader*>(param);
- CMainApp::getSingleton()->log("Info: download file /"%s/" start..."
- , pThis->m_szUrl
- );
- pThis->m_bSuccess = pThis->downloadProcess();
- sem_post( &pThis->m_stCompleteSem );
- CMainApp::getSingleton()->log("Info: download file /"%s/" %s..."
- , pThis->m_szUrl
- , pThis->m_bSuccess ? "success" : "failed"
- );
- return NULL;
- }
- bool CHttpFileDownloader::downloadProcess(void)
- {
- // parse the url and port
- string strUrl = m_szUrl;
- std::transform( strUrl.begin(), strUrl.end(), strUrl.begin(), (int (*)(int))std::tolower);
- size_t uFind = strUrl.find("http://");
- if( uFind != 0 )
- {
- CMainApp::getSingleton()->log("Error: Invalid URL:%s"
- , m_szUrl
- );
- return false;
- }
- int nLen = string("http://").length();
- uFind = strUrl.find('/', nLen);
- if( uFind > strUrl.length() )
- {
- CMainApp::getSingleton()->log("Error: Invalid URL:%s"
- , m_szUrl
- );
- return false;
- }
- strUrl = m_szUrl;
- string strHost = strUrl.substr( nLen, uFind - nLen);
- string strResourceURI = strUrl.substr(uFind);
- string strDomain = strHost;
- uint uPort = 80;
- uFind = strHost.find(':');
- if( uFind < strHost.length() )
- {
- strDomain = strHost.substr( 0, uFind);
- uPort = atoi( strHost.substr(uFind+1).c_str() );
- }
- struct hostent * pHostent = gethostbyname(strDomain.c_str());
- if( pHostent == NULL )
- {
- CMainApp::getSingleton()->log("Error: failed to resolve the IP address for the URL:%s"
- , m_szUrl
- );
- return false;
- }
- memset( &m_stServerAddr, 0, sizeof(m_stServerAddr));
- m_stServerAddr.sin_family = AF_INET;
- m_stServerAddr.sin_port = htons((short)uPort);
- memcpy( (char*)&m_stServerAddr.sin_addr.s_addr, pHostent->h_addr_list[0], pHostent->h_length);
- int sock = socket( AF_INET, SOCK_STREAM, 0);
- if( sock == -1 )
- {
- CMainApp::getSingleton()->log("Error: socket failed. error=%s"
- , strerror(errno)
- );
- return false;
- }
- memset( m_szResourceURI, 0, 1024);
- memcpy( m_szResourceURI, strResourceURI.c_str(), strlen(strResourceURI.c_str()));
- memset( m_szHost, 0, 1024);
- memcpy( m_szHost, strHost.c_str(), strlen(strHost.c_str()));
- memset( m_szDomain, 0, 1024);
- memcpy( m_szDomain, strDomain.c_str(), strlen(strDomain.c_str()));
- // populate the HTTP HEAD request
- stringstream strHttp;
- strHttp << "HEAD " << m_szResourceURI << " HTTP/1.1/r/n";
- strHttp << "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)/r/n";
- strHttp << "Host:" << m_szHost << "/r/n";
- strHttp << "Cache-Control: no-cache/r/n";
- strHttp << "Pragma: no-cache/r/n";
- strHttp << "Connection: Keep-Alive/r/n";
- strHttp << "Accept: */*/r/n";
- strHttp << "/r/n";
- int nRet = connect( sock
- , (struct sockaddr *)&m_stServerAddr
- , sizeof(struct sockaddr)
- );
- if( nRet == -1 )
- {
- CMainApp::getSingleton()->log("Error: failed to connect to URL:%s"
- , m_szUrl
- );
- return false;
- }
- struct timeval tv = {0};
- tv.tv_sec = 15;
- if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)))
- {
- CMainApp::getSingleton()->log("Error: setsockopt failed(1). error=%s"
- , strerror(errno)
- );
- return false;
- }
- if( !sendStringStream( sock, strHttp) )
- {
- CMainApp::getSingleton()->log("Error: failed to send the HTTP HEAD request to URL:%s"
- , m_szUrl
- );
- return false;
- }
- stringstream strResponse;
- recvStringStream( sock, strResponse);
- shutdown(sock, SHUT_RDWR);
- close(sock);
- // parse the response
- std::vector<string> vItems = parseResponse(strResponse.str());
- if( vItems.size() == 0 )
- {
- CMainApp::getSingleton()->log("Error: the HTTP HEAD response contains nothing. URL:%s"
- , m_szUrl
- );
- return false;
- }
- if( !isHttpStatusSuccess(vItems[0]) )
- {
- CMainApp::getSingleton()->log("Error: %s. URL:%s"
- , vItems[0].c_str()
- , m_szUrl
- );
- return false;
- }
- string strContentLen = getHeaderValueByName( "Content-Length", vItems);
- if( strContentLen.length() == 0 )
- {
- CMainApp::getSingleton()->log("Error: Invalid Content-Length in HTTP HEAD response. URL:%s"
- , m_szUrl
- );
- return false;
- }
- int nContentLength = atoi(strContentLen.c_str());
- // create file
- std::ofstream outStream;
- outStream.open( m_strLocalFile.c_str(), ios_base::out | ios_base::binary | ios_base::trunc);
- outStream.seekp(nContentLength-1);
- outStream.put('/0');
- outStream.close();
- int fd = open( m_strLocalFile.c_str(), O_RDWR);
- if( fd == -1 )
- {
- CMainApp::getSingleton()->log("Error: can not create file /"%s/". %s"
- , m_strLocalFile.c_str()
- , strerror(errno)
- );
- return false;
- }
- unsigned char * pMemory = (unsigned char *)mmap(NULL, nContentLength, PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_NONBLOCK, fd, 0);
- close(fd);
- if( pMemory == MAP_FAILED )
- {
- CMainApp::getSingleton()->log("Error: failed to map the file /"%s/" into memory; size=%d; error=%s"
- , m_strLocalFile.c_str()
- , nContentLength
- , strerror(errno)
- );
- return false;
- }
- mlock(pMemory, nContentLength);
- // create thread
- sem_init( &m_stDownSem, 0, MAX_DOWNLOAD_THREAD);
- pthread_mutex_init( &m_stDownloadThreadMutex, NULL);
- m_bFailed = false;
- int nDownloadLength = 0;
- m_nDownloadThreadCnt = 0;
- while(true)
- {
- sem_wait(&m_stDownSem);
- if( nDownloadLength >= nContentLength ||
- m_bFailed )
- {
- if( m_nDownloadThreadCnt == 0 )
- break;
- else
- continue;
- }
- LPDownloadTask pTask = (LPDownloadTask)malloc(sizeof(tagDownloadTask));
- pTask->pStart = (unsigned char*)(pMemory + nDownloadLength);
- pTask->nSize = ((DOWNLOAD_BLOCK_SIZE + nDownloadLength) > nContentLength)
- ? (nContentLength - nDownloadLength) : DOWNLOAD_BLOCK_SIZE;
- pTask->nRangeStart = nDownloadLength;
- pTask->pThis = this;
- nDownloadLength += pTask->nSize;
- // create thread
- pthread_t lThread;
- int nErr = pthread_create( &lThread
- , NULL
- , &downloadThread
- , pTask
- );
- if( nErr != 0 )
- {
- CMainApp::getSingleton()->log("Error: pthread_create download thread failed. Error=%d, Message=%s"
- , nErr
- , strerror(nErr)
- );
- m_bFailed = true;
- }
- else
- {
- pthread_mutex_lock(&m_stDownloadThreadMutex);
- m_nDownloadThreadCnt ++;
- pthread_mutex_unlock(&m_stDownloadThreadMutex);
- }
- }
- pthread_mutex_destroy( &m_stDownloadThreadMutex);
- sem_destroy(&m_stDownSem);
- if( msync( pMemory, nContentLength, MS_SYNC) == -1 )
- {
- CMainApp::getSingleton()->log("Error: failed to msync the file /"%s/" from memory; size=%d; error=%s"
- , m_strLocalFile.c_str()
- , nContentLength
- , strerror(errno)
- );
- m_bFailed = true;
- }
- munlock(pMemory, nContentLength);
- munmap( pMemory, nContentLength);
- return !m_bFailed;
- }
- void * CHttpFileDownloader::downloadThread(void* param)
- {
- LPDownloadTask pTask = static_cast<LPDownloadTask>(param);
- pTask->pThis->downloadBlock( pTask->pStart
- , pTask->nRangeStart
- , pTask->nSize
- );
- pthread_mutex_lock(&(pTask->pThis->m_stDownloadThreadMutex));
- pTask->pThis->m_nDownloadThreadCnt --;
- pthread_mutex_unlock(&(pTask->pThis->m_stDownloadThreadMutex));
- sem_post(&(pTask->pThis->m_stDownSem));
- free(pTask);
- return NULL;
- }
- void CHttpFileDownloader::downloadBlock(unsigned char * pMemory, int nRangeStart, int nRangeSize)
- {
- CMainApp::getSingleton()->log("Info: download block /"%s/" [%08d-%08d] start..."
- , m_szUrl
- , nRangeStart
- , nRangeStart + nRangeSize - 1
- );
- int nReceived = 0;
- int nErrorTimes = 0;
- while( nReceived < nRangeSize && nErrorTimes < 5 && !m_bFailed )
- {
- int nSocket = socket( AF_INET, SOCK_STREAM, 0);
- if( nSocket == -1 )
- {
- nErrorTimes++;
- continue;
- }
- int nRet = connect( nSocket
- , (struct sockaddr *)&m_stServerAddr
- , sizeof(struct sockaddr)
- );
- if( nRet == -1 )
- {
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error: failed to connect to URL:%s"
- , m_szUrl
- );
- continue;
- }
- {
- struct timeval tv = {0};
- tv.tv_sec = 15;
- if (setsockopt(nSocket, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)))
- {
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error: setsockopt failed(2). error=%s"
- , strerror(errno)
- );
- continue;
- }
- }
- {
- char szRequest[4096] = {0};
- sprintf( szRequest, "GET %s HTTP/1.1/r/n"
- "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)/r/n"
- "Host: %s/r/n"
- "Cache-Control: no-cache/r/n"
- "Pragma: no-cache/r/n"
- "Connection: Keep-Alive/r/n"
- "Accept: */*/r/n"
- "Range: bytes=%d-%d/r/n"
- "/r/n"
- , m_szResourceURI
- , m_szHost
- , nRangeStart+nReceived
- , nRangeStart+nRangeSize-1
- );
- if( !sendBuffer( nSocket, szRequest, strlen(szRequest)) )
- {
- shutdown(nSocket, SHUT_RDWR);
- close(nSocket);
- CMainApp::getSingleton()->log("Error: failed to send the HTTP GET request to URL:%s"
- , m_szUrl
- );
- nErrorTimes++;
- continue;
- }
- }
- char szBuf[1024] = {0};
- nRet = recv( nSocket, szBuf, 1024, 0);
- if( nRet <= 0 )
- {
- shutdown(nSocket, SHUT_RDWR);
- close(nSocket);
- CMainApp::getSingleton()->log("Error: recv failed(1). return code=%d, error=%s, URL=%s"
- , nRet
- , strerror(errno)
- , m_szUrl
- );
- nErrorTimes++;
- continue;
- }
- string strHttpResponse;
- int nRemain = 0;
- int nIndex = 0;
- for(nIndex = 0; nIndex < nRet; nIndex++)
- {
- if( szBuf[nIndex] == '/r' &&
- szBuf[nIndex+1] == '/n' &&
- szBuf[nIndex+2] == '/r' &&
- szBuf[nIndex+3] == '/n' )
- {
- char szTemp[1025] = {0};
- memcpy( szTemp, szBuf, nIndex+4);
- strHttpResponse = szTemp;
- nRemain = nRet -(nIndex+4);
- break;
- }
- }
- if( strHttpResponse.length() == 0 )
- {
- shutdown(nSocket, SHUT_RDWR);
- close(nSocket);
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error: the response does not contain a HTTP header(1), URL:%s"
- , m_szUrl
- );
- continue;
- }
- std::vector<string> vItems = parseResponse(strHttpResponse);
- if( vItems.size() == 0 )
- {
- shutdown(nSocket, SHUT_RDWR);
- close(nSocket);
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error: the response does not contain a HTTP header(2). URL:%s"
- , m_szUrl
- );
- continue;
- }
- if( !isHttpStatusSuccess(vItems[0]) )
- {
- shutdown(nSocket, SHUT_RDWR);
- close(nSocket);
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error: %s. URL:%s"
- , vItems[0].c_str()
- , m_szUrl
- );
- continue;
- }
- if( nRemain > 0 )
- {
- memcpy( (unsigned char*)(pMemory+nReceived), &(szBuf[nIndex+4]), nRemain);
- nReceived += nRemain;
- }
- while( (nReceived < nRangeSize) && !m_bFailed )
- {
- nRet = recv( nSocket, (unsigned char*)(pMemory+nReceived), nRangeSize - nReceived, 0);
- if( nRet <= 0 )
- {
- CMainApp::getSingleton()->log("Error: recv falied(2). return code=%d, error=%s, URL=%s"
- , nRet
- , strerror(errno)
- , m_szUrl
- );
- nErrorTimes++;
- break;
- }
- nReceived += nRet;
- }
- shutdown(nSocket, SHUT_RDWR);
- close(nSocket);
- }// while
- m_bFailed = m_bFailed ? m_bFailed : (nReceived != nRangeSize);
- CMainApp::getSingleton()->log("Info: download block /"%s/" [%08d-%08d] %s."
- , m_szUrl
- , nRangeStart
- , nRangeStart + nRangeSize - 1
- , (nReceived != nRangeSize) ? "Failed" : "Success"
- );
- }
- 发一个多线程通过 HTTP 下载文件的类(Linux下的实现)
- linux下http协议 多线程下载实现
- 在Http协议下实现多线程断点的下载
- 利用HTTP协议实现文件下载的多线程断点续传
- 利用HTTP协议实现文件下载的多线程断点续传
- 基于HTTP的多线程文件下载功能实现
- Linux下通过ftp命令下载windows下的文件
- Http的多线程下载的实现
- Http多线程下载文件的处理机制
- 5个linux下通过命令行下载文件的命令
- LINUX命令行下以HTTP方式下载文件的方法
- 通过WINET实现的http断点下载
- 通过HTTP协议实现多线程下载
- Android--通过Http协议实现多线程下载
- Android通过HTTP协议实现多线程下载
- Android通过HTTP协议实现多线程下载
- 通过HTTP协议实现多线程下载
- Android通过HTTP协议实现多线程下载
- J2ME:Canvas的图型操作
- 特殊字符编码
- As3.0 一些好书连接
- JS替换字符串
- AOP 详细描述
- 发一个多线程通过 HTTP 下载文件的类(Linux下的实现)
- test the difference between "DEFAULT NULL" and "DEFAULT 0"
- 遗传算法导论
- J2ME:模拟短信发送界面
- 2008年度一个下岗程序员的真实经历
- SqlHelper 类实现详细信息
- 桌面程序调用Web Service应用实例
- 打通NTFS权限 文件共享各取所需
- 为了你,我的儿子