VC获取网页标题,解决乱码问题

来源:互联网 发布:java导出压缩包 编辑:程序博客网 时间:2024/05/20 04:51

//效果截图如下(文章后面附有VS2008本工程下载地址):


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

主要代码如下:

//取网页标题void CGetWebTitleDlg::OnBnClickedBtnGetTitle(){m_HtmlCode.SetWindowText(_T(""));//clearCInternetSession mySession(NULL,0);CHttpFile* htmlFile=NULL;CString strLine,url,strHtml;TCHAR sRecv[1024];UINT CodePage=65001;//CP_UTF8:65001 CP_ACP:0m_Url.GetWindowText(url);TRY {htmlFile=(CHttpFile*)mySession.OpenURL(url);//打开连接//获取网页编码while(htmlFile->ReadString(sRecv,1024)){//先用UTF8来进行转换,如果html页面编码是gbk或gb2312,转换后中文字符为//乱码,但英文字符显示正常,我们判断html页码编码,通过寻找英文就可以了int nBufferSize = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)sRecv, -1, NULL, 0);wchar_t *pBuffer = new wchar_t[nBufferSize+1];memset(pBuffer,0,(nBufferSize+1)*sizeof(wchar_t)); MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)sRecv, -1 , pBuffer, nBufferSize*sizeof(wchar_t)); strHtml=pBuffer;if (-1!=strHtml.Find(_T("charset=gbk"))){CodePage=0;delete pBuffer;break;}if (-1!=strHtml.Find(_T("charset=GBK")))//http://www.sohu.com{CodePage=0;delete pBuffer;break;}if (-1!=strHtml.Find(_T("charset=gb2312"))){CodePage=0;delete pBuffer;break;}if (-1!=strHtml.Find(_T("charset=GB2312"))){CodePage=0;delete pBuffer;break;}if (-1!=strHtml.Find(_T("charset=utf-8"))){CodePage=65001;delete pBuffer;break;}if (-1!=strHtml.Find(_T("charset=UTF-8"))){CodePage=65001;delete pBuffer;break;}delete pBuffer;}strHtml=_T("");//获取网页源码htmlFile=(CHttpFile*)mySession.OpenURL(url);//重新打开连接while(htmlFile->ReadString(sRecv,1024)){// 编码转换,可解决中文乱码问题//gb2312转为unicode,则用CP_ACP//gbk转为unicode,也用CP_ACP//utf-8转为unicode,则用CP_UTF8int nBufferSize = MultiByteToWideChar(CodePage, 0, (LPCSTR)sRecv, -1, NULL, 0);wchar_t *pBuffer = new wchar_t[nBufferSize+1];memset(pBuffer,0,(nBufferSize+1)*sizeof(wchar_t)); //gb2312转为unicode,则用CP_ACP//gbk转为unicode,也用CP_ACP//utf-8转为unicode,则用CP_UTF8MultiByteToWideChar(CodePage, 0, (LPCSTR)sRecv, -1 , pBuffer, nBufferSize*sizeof(wchar_t)); strHtml+=pBuffer;strHtml+="\r\n";delete pBuffer;}htmlFile->Close();mySession.Close() ;delete htmlFile;m_HtmlCode.SetWindowText(strHtml);//显示网页源码//获取网页标题 CString szTitle=strHtml.GetString();int nStart=szTitle.Find(_T("<title>"));int nEnd=szTitle.Find(_T("</title>"));szTitle=szTitle.Mid(nStart+7,nEnd-nStart-7);this->SetWindowText(_T("获取到的网页标题为【")+szTitle+_T("】   By︶风不冷丶"));}CATCH (CException, e){TCHAR err[1024];e->GetErrorMessage(err,1024);m_HtmlCode.SetWindowText(err);}END_CATCH}

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

本例子,VS2008工程下载地址:

http://download.csdn.net/detail/friendan/6288523

友情提醒:直接在VS2008中运行程序时,会获取不到网页标题,错误信息为:无法解析服务器的名称或地址

这个我现在也不知道是什么原因,知道的望告知 一二,不剩感激。不过你去生成程序的目录,直接运行程序,就不会有以上错误了。

在VS2008直接运行工程获取网页标题,错误截图如下:


-------------------------------------------------------------------------------------------------------------


您的十分满意是我追求的宗旨。

您的一点建议是我后续的动力。







原创粉丝点击