爬虫

来源:互联网 发布:办公一般用什么软件 编辑:程序博客网 时间:2024/04/30 00:26

void CXhDlg::OnButton1()
{
 // TODO: Add your control notification handler code here
 CInternetSession session;
 CHttpFile *myHttpFile = NULL;
 
 _ConnectionPtr m_pConnection;
 m_pConnection.CreateInstance(__uuidof(Connection));
 CString conn;
 conn.Format("driver={SQL Server};Server=*.*.*.*;UID=*;PWD=*;DATABASE=*");
 m_pConnection->Open(_bstr_t(conn),"","",adModeUnknown);
 _variant_t RecordsAffected;
 
 int type=0;
 while( type++ < 80 )
 {
  bool flag = true;
  int page=1;
  while(flag)
  {
   CString m_SiteName;
   m_SiteName.Format("%s%d%s%d","http://mmsg.qq.com/cgi-bin/gddylist?Type=",type,"&Sort=1&Page=",page++);
   
   CString m_SiteInfo;
   CString myData;
   try
   {
    myHttpFile = (CHttpFile*)session.OpenURL(m_SiteName);
   }
   catch(CInternetException* m_pException)
   {
    delete myHttpFile;
    m_pException->Delete();
    return ;
   }
   if ( myHttpFile )
   {
    while( myHttpFile->ReadString( myData ) )
    {
     m_SiteInfo+=myData;
    }
   }
   
   int x1 = m_SiteInfo.Find("<div align=/"center/">");
   int x2 = m_SiteInfo.Find("</div>",x1);
   int x3 = m_SiteInfo.Find("/",x1);
   CString s1 = m_SiteInfo.Mid(x1+20,x3-x1-20);
   CString s2 = m_SiteInfo.Mid(x3+1,x2-x3-1);
   if (s1 =="0" )
    break;
   if (s1 == s2)
   {
    flag=false;
   }

   int t1 = m_SiteInfo.Find("&gt;&gt;",x1-100)+8;
   int t2 = m_SiteInfo.Find("</td>",t1);
   CString xtype = m_SiteInfo.Mid(t1,t2-t1);

   int find=0,find1=m_SiteInfo.Find("<td width=/"35%/" bgcolor=",x1)+20;
   m_SiteInfo = m_SiteInfo.Right(m_SiteInfo.GetLength()-find1);
   while( (find=m_SiteInfo.Find( "<td width=/"35%/" bgcolor=" , 1)) > 0 )
   {
    int find1 = m_SiteInfo.Find( ">",find)+1;
    int find2 = m_SiteInfo.Find( "</td>" , find1);
    CString content=m_SiteInfo.Mid( find1,find2-find1 );//主体
    content.Replace("'","''");
    content.Replace("/"","/"/"");
    
    m_SiteInfo = m_SiteInfo.Right(m_SiteInfo.GetLength()-find2);

    CString sql;
    sql.Format("insert into xh (xtype,content) values ('%s','%s')",xtype,content);
    m_pConnection->Execute(_bstr_t(sql), &RecordsAffected, adCmdText);
    ::Sleep(200);
   }
  }
 }
 myHttpFile->Close();
 session.Close();
 MessageBox("完成");
 return;
}

原创粉丝点击