torrent文件解析器

来源:互联网 发布:郑州程序员招聘信息 编辑:程序博客网 时间:2024/05/22 18:11

第二步工作是解析torrent文件,有了bencoding编码解析器 解析torrent文件当然是易如反掌的任务了.
实现的封装类CTorrentParser,完成的主要任务有:
1.判断torrent文件是否有效
2.得到如下的重要信息:
        tracker服务器列表
        文件列表
        分块尺寸
        分块个数
        分块sha1的数组
3.其他的一些次要信息如发布者,发布日期,注释等
4.计算infohash
        

Metainfo files are bencoded dictionaries with the following keys:

announce
The URL of the tracker.

info
This maps to a dictionary, with keys described below.

The name key maps to a string which is the suggested name to save the file (or directory) as. It is purely advisory.

piece length maps to the number of bytes in each piece the file is split into. For the purposes of transfer, files are split into fixed-size pieces which are all the same length except for possibly the last one which may be truncated. Piece length is almost always a power of two, most commonly 218 = 256 K (BitTorrent prior to version 3.2 uses 220 = 1 M as default).

pieces maps to a string whose length is a multiple of 20. It is to be subdivided into strings of length 20, each of which is the SHA1 hash of the piece at the corresponding index.

There is also a key length or a key files, but not both or neither. If length is present then the download represents a single file, otherwise it represents a set of files which go in a directory structure.

In the single file case, length maps to the length of the file in bytes.

For the purposes of the other keys, the multi-file case is treated as only having a single file by concatenating the files in the order they appear in the files list. The files list is the value files maps to, and is a list of dictionaries containing the following keys:

length
The length of the file, in bytes.
path
A list of strings corresponding to subdirectory names, the last of which is the actual file name (a zero length list is an error case).
In the single file case, the name key is the name of a file, in the muliple file case, it's the name of a directory.

下面是torrent文件解析器的VC++源代码:

// TorrentParser.h: interface for the CTorrentParser class.////////////////////////////////////////////////////////////////////////#if !defined(AFX_TORRENTPARSER_H__7E67DA03_B65C_427F_A241_24230BCD6D54__INCLUDED_)#define AFX_TORRENTPARSER_H__7E67DA03_B65C_427F_A241_24230BCD6D54__INCLUDED_#if _MSC_VER > 1000#pragma once#endif // _MSC_VER > 1000//torrent文件解析器#include "BEncode.h"#include "cyfile.h"#include "cyhash.h"class CTorrentParser  {public:class CFileInfo{public:string strfilename;//文件相对路径double dbfilelen;//文件长度};class CPieceSha1{public:string getstring();bool isempty();BYTE btData[20];};CTorrentParser();virtual ~CTorrentParser();void clear();bool parse(const char * szFilename);bool isvalid();CBEncodeObjectBase * getvalue(const char * szname);public:bool getinfo();list<string> m_listAnnounce;//tracker服务器列表list<CFileInfo> m_listFile;//文件列表string m_strName;//建议的默认文件名或者路径名int m_iPiecelength;//每个分块的长度CPieceSha1m_Infohash;//info字段的sha1CPieceSha1* m_pPieceSha1;//块数组指针int m_iPiececount;//块个数int m_iCreationDate;//创建日期string m_strComment;//注释string m_strPublisher;//发布者string m_strPublisherurl;//发布者网址string m_strCreatedBy;//创建工具private:string m_strfilename;CCyFile m_cyfile;CBEncode m_bencode;CBEncodeDict * m_pRootDict;//解析得到的根节点};#endif // !defined(AFX_TORRENTPARSER_H__7E67DA03_B65C_427F_A241_24230BCD6D54__INCLUDED_)

// TorrentParser.cpp: implementation of the CTorrentParser class.////////////////////////////////////////////////////////////////////////#include "stdafx.h"#include "TorrentParser.h"//////////////////////////////////////////////////////////////////////// Construction/Destruction//////////////////////////////////////////////////////////////////////CTorrentParser::CTorrentParser(){m_pRootDict = NULL;}CTorrentParser::~CTorrentParser(){clear();}bool CTorrentParser::parse(const char *szFilename){clear();if(szFilename)m_strfilename = szFilename;if(!m_cyfile.IsExist(m_strfilename.c_str()))return false;if(m_cyfile.OpenFile(m_strfilename.c_str()))//打开文件{BYTE* pData= m_cyfile.GetData();//读取文件数据m_cyfile.CloseFile();m_bencode.parse((const char*)pData);//得到根节点list<CBEncodeObjectBase*>::iterator it;for(it = m_bencode.m_listObj.begin();it!=m_bencode.m_listObj.end();++it){if((*it)->m_type == enum_BEncodeType_Dict){m_pRootDict= (CBEncodeDict*)(*it);break;}}CBEncodeString* pEOBAnnounce = (CBEncodeString*)getvalue("announce");CBEncodeDict* pEOBInfo = (CBEncodeDict*)getvalue("info");if(pEOBAnnounce && pEOBInfo)true;}return false;}void CTorrentParser::clear(){m_bencode.clear();m_pRootDict = NULL;m_cyfile.ReleaseData();}//检查是否有效的torrent文件bool CTorrentParser::isvalid(){//检查是否存在announce和info字段CBEncodeObjectBase* pEOBAnnounce = getvalue("announce");CBEncodeObjectBase* pEOBInfo = getvalue("info");if(pEOBAnnounce && pEOBInfo)return true;return false;}CBEncodeObjectBase * CTorrentParser::getvalue(const char *szname){if(m_pRootDict)return m_pRootDict->getvalue(szname);return NULL;}//开始获取信息bool CTorrentParser::getinfo(){string strValue;CBEncodeString* pEOBAnnounce = (CBEncodeString*)getvalue("announce");CBEncodeDict* pEOBInfo = (CBEncodeDict*)getvalue("info");if(pEOBAnnounce && pEOBInfo){//得到缺省tracker服务器if(pEOBAnnounce->getstring(strValue))m_listAnnounce.push_back(strValue);//得到备用tracker列表CBEncodeList* pEOBAnnounceList = (CBEncodeList*)getvalue("announce-list");if(pEOBAnnounceList && pEOBAnnounceList->m_type == enum_BEncodeType_List){list<CBEncodeObjectBase *>::iterator it;for(it = pEOBAnnounceList->m_listObj.begin();it!=pEOBAnnounceList->m_listObj.end();++it){if((*it)->m_type == enum_BEncodeType_List){CBEncodeList* pEOBAnnounceList2 = (CBEncodeList*)(*it);if(pEOBAnnounceList2->m_listObj.begin()!=pEOBAnnounceList2->m_listObj.end()){if(((CBEncodeString*)(*pEOBAnnounceList2->m_listObj.begin()))->getstring(strValue))m_listAnnounce.push_back(strValue);}}}}CBEncodeString* pEOBTmp;CBEncodeInt * pEOBInt;//得到创建日期m_iCreationDate = 0;pEOBInt = (CBEncodeInt*)getvalue("creation date");if(pEOBInt && pEOBInt->m_type == enum_BEncodeType_Int)m_iCreationDate = (int)pEOBInt->m_fValue;//得到注释pEOBTmp= (CBEncodeString*)getvalue("comment");if(pEOBTmp)pEOBTmp->getstring(m_strComment);//得到创建工具pEOBTmp = (CBEncodeString*)getvalue("createdby");if(pEOBTmp)pEOBTmp->getstring(m_strCreatedBy);//得到发布者pEOBTmp = (CBEncodeString*)getvalue("publisher");if(pEOBTmp)pEOBTmp->getstring(m_strPublisher);pEOBTmp = (CBEncodeString*)getvalue("publisher-url");if(pEOBTmp)pEOBTmp->getstring(m_strPublisherurl);//计算infohashif(pEOBInfo->m_error == enm_BEncodeErr_noerr){CCyHash ch;BYTE szSha1[21];if(ch.GetHash(CALG_SHA1,(BYTE*)pEOBInfo->szPos,pEOBInfo->ilen,szSha1))memcpy(m_Infohash.btData,szSha1,20);elsememset(m_Infohash.btData,0,20);//得到建议的默认文件名或者路径名pEOBTmp = (CBEncodeString*)pEOBInfo->getvalue("name");if(pEOBTmp)pEOBTmp->getstring(m_strName);//如果没有得到发布者,再次尝试获取if(m_strPublisher.empty()){pEOBTmp = (CBEncodeString*)pEOBInfo->getvalue("publisher");if(pEOBTmp)pEOBTmp->getstring(m_strPublisher);pEOBTmp = (CBEncodeString*)pEOBInfo->getvalue("publisher-url");if(pEOBTmp)pEOBTmp->getstring(m_strPublisherurl);}//得到分块长度m_iPiecelength = 0;pEOBInt = (CBEncodeInt*)pEOBInfo->getvalue("piece length");if(pEOBInt && pEOBInt->m_type == enum_BEncodeType_Int)m_iPiecelength = (int)pEOBInt->m_fValue;//得到piece的sha1数组及其piece个数pEOBTmp = (CBEncodeString*)pEOBInfo->getvalue("pieces");if(pEOBTmp->m_type == enum_BEncodeType_String && pEOBTmp->m_error == enm_BEncodeErr_noerr && pEOBTmp->m_szData){m_pPieceSha1 = (CPieceSha1*)pEOBTmp->m_szData;m_iPiececount = pEOBTmp->m_ilen/20;}//得到文件列表CFileInfo fi;pEOBInt = (CBEncodeInt*)pEOBInfo->getvalue("length");if(pEOBInt && pEOBInt->m_type == enum_BEncodeType_Int){fi.dbfilelen = (double)pEOBInt->m_fValue;fi.strfilename = m_strName;if(fi.strfilename.size()>0&&fi.dbfilelen>0)m_listFile.push_back(fi);}else{CBEncodeList * pEOBList = (CBEncodeList*)pEOBInfo->getvalue("files");if(pEOBList && pEOBList->m_type == enum_BEncodeType_List){list<CBEncodeObjectBase *>::iterator it;for(it = pEOBList ->m_listObj.begin();it!=pEOBList ->m_listObj.end();++it){if((*it)->m_type == enum_BEncodeType_Dict){fi.dbfilelen = 0;fi.strfilename = "";pEOBInt = (CBEncodeInt*)((CBEncodeDict*)(*it))->getvalue("length");if(pEOBInt && pEOBInt->m_type == enum_BEncodeType_Int)fi.dbfilelen = (double)pEOBInt->m_fValue;CBEncodeList* pEOBPathList = (CBEncodeList*)((CBEncodeDict*)(*it))->getvalue("path");if(pEOBPathList && pEOBPathList->m_type == enum_BEncodeType_List){list<CBEncodeObjectBase *>::iterator it2;for(it2 = pEOBPathList->m_listObj.begin();it2 != pEOBPathList->m_listObj.end();++it2){if(((CBEncodeString*)(*it2))->getstring(strValue))fi.strfilename +="//"+ strValue;else{fi.strfilename = "";break;}}}if(fi.strfilename.size()>0&&fi.dbfilelen>0)m_listFile.push_back(fi);}}}}//检查主要的信息是否正确获取,如果是就返回成功if(m_listAnnounce.size()==0 ||m_listFile.size()==0 ||m_Infohash.isempty() ||m_iPiececount == 0 ||m_iPiecelength == 0 ||m_pPieceSha1 == NULL)return false;return true;}}return false;}//检查CPieceSha1是否为空bool CTorrentParser::CPieceSha1::isempty(){CPieceSha1 pstmp;memset(pstmp.btData,0,sizeof(pstmp));if(memcmp(pstmp.btData,btData,sizeof(pstmp))==0)return true;return false;}string CTorrentParser::CPieceSha1::getstring(){char sz[100];char * szTmp = sz;BYTE bt;for(int i = 0;i<sizeof(CPieceSha1);i++){bt = btData[i];if((bt>='a' && bt<='z')||(bt>='A' && bt<='Z')||(bt>='0' && bt<='9')){sprintf(szTmp,"%c",bt);szTmp +=1;}else{sprintf(szTmp,"%%%02X",bt);szTmp +=3;}}*szTmp = 0;return sz;}
在逸搜论坛有一个解析torrent文件的小工具可以看到测试效果.