爬虫
来源:互联网 发布:办公一般用什么软件 编辑:程序博客网 时间:2024/04/30 00:26
void CXhDlg::OnButton1()
{
// TODO: Add your control notification handler code here
CInternetSession session;
CHttpFile *myHttpFile = NULL;
_ConnectionPtr m_pConnection;
m_pConnection.CreateInstance(__uuidof(Connection));
CString conn;
conn.Format("driver={SQL Server};Server=*.*.*.*;UID=*;PWD=*;DATABASE=*");
m_pConnection->Open(_bstr_t(conn),"","",adModeUnknown);
_variant_t RecordsAffected;
int type=0;
while( type++ < 80 )
{
bool flag = true;
int page=1;
while(flag)
{
CString m_SiteName;
m_SiteName.Format("%s%d%s%d","http://mmsg.qq.com/cgi-bin/gddylist?Type=",type,"&Sort=1&Page=",page++);
CString m_SiteInfo;
CString myData;
try
{
myHttpFile = (CHttpFile*)session.OpenURL(m_SiteName);
}
catch(CInternetException* m_pException)
{
delete myHttpFile;
m_pException->Delete();
return ;
}
if ( myHttpFile )
{
while( myHttpFile->ReadString( myData ) )
{
m_SiteInfo+=myData;
}
}
int x1 = m_SiteInfo.Find("<div align=/"center/">");
int x2 = m_SiteInfo.Find("</div>",x1);
int x3 = m_SiteInfo.Find("/",x1);
CString s1 = m_SiteInfo.Mid(x1+20,x3-x1-20);
CString s2 = m_SiteInfo.Mid(x3+1,x2-x3-1);
if (s1 =="0" )
break;
if (s1 == s2)
{
flag=false;
}
int t1 = m_SiteInfo.Find(">>",x1-100)+8;
int t2 = m_SiteInfo.Find("</td>",t1);
CString xtype = m_SiteInfo.Mid(t1,t2-t1);
int find=0,find1=m_SiteInfo.Find("<td width=/"35%/" bgcolor=",x1)+20;
m_SiteInfo = m_SiteInfo.Right(m_SiteInfo.GetLength()-find1);
while( (find=m_SiteInfo.Find( "<td width=/"35%/" bgcolor=" , 1)) > 0 )
{
int find1 = m_SiteInfo.Find( ">",find)+1;
int find2 = m_SiteInfo.Find( "</td>" , find1);
CString content=m_SiteInfo.Mid( find1,find2-find1 );//主体
content.Replace("'","''");
content.Replace("/"","/"/"");
m_SiteInfo = m_SiteInfo.Right(m_SiteInfo.GetLength()-find2);
CString sql;
sql.Format("insert into xh (xtype,content) values ('%s','%s')",xtype,content);
m_pConnection->Execute(_bstr_t(sql), &RecordsAffected, adCmdText);
::Sleep(200);
}
}
}
myHttpFile->Close();
session.Close();
MessageBox("完成");
return;
}
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- 爬虫
- Apache Ant 101:使Java项目生成易如反掌
- 页面打印
- 为你的blog增加精美flash时钟
- Windows C++ 程序员如何过度到Symbian OS C++ 程序员?
- 软件模型设计基础-图(Class diagram)
- 爬虫
- 软件模型设计基础-接口(Interface)
- 软件模型设计基础-行为事物
- 经典SQL之多条记录单行显示
- 我终于知道,我的ASP。NET为什么一直都用不了了。
- 软件模型设计基础-关系事物
- 执行Shell命令的方法。
- C++初学者
- Version7.2 For Flash MX 2004 update发布