网站收录查询代码

来源:互联网 发布:layout是什么软件 编辑:程序博客网 时间:2024/04/28 03:40

今天做个批量查询百度和google收录的小工具

不知道怎么做 想了下就去抓取查询页面然后获取收录总数

效果如下图

导入txt文本 文本里面每行一个网址

private void button4_Click(object sender, EventArgs e)
        {
            openFileDialog1.Filter = "超级文本(*.txt)|*.txt|(All   Files)|*.*|文档|*.rtf  ";
            if (openFileDialog1.ShowDialog() == DialogResult.OK)
            {
                string s = openFileDialog1.FileName;
                openFileDialog1.Dispose();
                StreamReader sr = new StreamReader(s);
                String line;
                while ((line = sr.ReadLine()) != null)
                {
                    DataGridViewRow row = new DataGridViewRow();
                    //row.Cells.Add(new DataGridCell(
                    //row.Cells[0].Value = line;
                   object[] para=new object[3];
                    para[0]=line;
                    dataGridView1.Rows.Add(para);
                }
                sr.Close();
                sr.Dispose();
               
            }

        }

查询代码

//public delegate string MethodCaller(string name);//定义个代理 
        /// <summary>
        /// 多线程查询
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void button1_Click(object sender, EventArgs e)
        {
            for (int i = 0; i < dataGridView1.RowCount; i++)
            {
                if (dataGridView1.Rows[i].Cells[0].Value != null && dataGridView1.Rows[i].Cells[0].Value.ToString().Trim()!="")
                {
                    string url = dataGridView1.Rows[i].Cells[0].Value.ToString().Trim();
                    //MethodCaller mc = new MethodCaller(getBaiduCount);
                    //IAsyncResult result = mc.BeginInvoke(url, null, null);//输出参数
                    //MethodCaller mg = new MethodCaller(getGoogleCount);
                    //IAsyncResult result1 = mg.BeginInvoke(url, null, null);
                    //dataGridView1.Rows[i].Cells[1].Value = mc.EndInvoke(result);
                    //dataGridView1.Rows[i].Cells[2].Value = mg.EndInvoke(result1);
                    ParameterizedThreadStart ParStart = new ParameterizedThreadStart(getBaiduCount);
                    Thread myThread = new Thread(ParStart);
                    object o = (object)(url+"_"+i.ToString()+"_1");

                    myThread.Start(o);
                    ParameterizedThreadStart ParStart1 = new ParameterizedThreadStart(getGoogleCount);
                    Thread myThread1 = new Thread(ParStart1);
                    object o1 = (object)(url + "_" + i.ToString() + "_2");

                    myThread1.Start(o1);
                    //dataGridView1.Rows[i].Cells[1].Value = getBaiduCount(url);
                    //dataGridView1.Rows[i].Cells[2].Value = getGoogleCount(url);

                }
            }
        }

 /// <summary>
        /// google收录查询
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public void getGoogleCount(object url)
        {
            string[] arr = url.ToString().Split('_');
            string ss = WebFunc.GetHtmlEx("http://www.google.com.hk/search?hl=zh-CN&newwindow=1&safe=strict&biw=1440&bih=506&q=site%3A"+arr[0]+"&btnG=Google+%E6%90%9C%E7%B4%A2&aq=f&aqi=&aql=&oq=" );
            string start = "<div id=resultStats>找到约 ";
            if (ss.Contains(start))
            {
                //start = ss.Substring(ss.IndexOf(start) + start.Length, 20);
                //string result = StringSplit(start, "条结果")[0].Replace(",", "");
                string result = StringSplit(StringSplit(ss, start)[1], "条结果")[0].Replace(",", "");
                dataGridView1.Rows[int.Parse(arr[1])].Cells[2].Value = result;
                //return result;
            }
            else
            {
                //return "0";
            }
        }
      /// <summary>
        /// 百度收录查询
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public void getBaiduCount(object url)
        {
            string[] arr = url.ToString().Split('_');
            string ss = "";
            if (url.ToString().Replace(".", ".").Split('.')[0] == "www")
            {
                //string ss = WebFunc.GetHtmlEx("http://www.baidu.com/s?wd=site%3A"+url);
                ss = WebFunc.GetHtmlEx("http://www.baidu.com/s?wd=site%3A" + arr[0]);
            }
            else
            {
                ss = WebFunc.GetHtmlEx("http://www.baidu.com/s?bs=site%3A" + arr[0].Replace(arr[0].Split('.')[0],"www")+"&f=8&wd=site%3A" + arr[0]);
            }
            string start = "<span class=\"nums\" style=\"margin-left:120px\">找到相关结果";
            if (ss.Contains(start))
            {
                //start = StringSplit(StringSplit(ss, start)[1], "个")[0].Replace(",", ""); //ss.Substring(ss.IndexOf(start) + start.Length, 20);
                //string result = StringSplit(start, "个")[0].Replace(",", "");
                string result=StringSplit(StringSplit(ss, start)[1], "个")[0].Replace(",", "").Replace("约","");
                dataGridView1.Rows[int.Parse(arr[1])].Cells[1].Value = result;
                //return result;
            }
            else
            {
                dataGridView1.Rows[int.Parse(arr[1])].Cells[1].Value = "0";
            }
        }
        /// <summary>
        /// 将字符串分割成数组
        /// </summary>
        /// <param name="strSource"></param>
        /// <param name="strSplit"></param>
        /// <returns></returns>
        public static string[] StringSplit(string strSource, string strSplit)
        {
            string[] strtmp = new string[1];
            int index = strSource.IndexOf(strSplit, 0);
            if (index < 0)
            {
                strtmp[0] = strSource;
                return strtmp;
            }
            else
            {
                strtmp[0] = strSource.Substring(0, index);
                return StringSplit(strSource.Substring(index + strSplit.Length), strSplit, strtmp);
            }
        }
        /// <summary>
        /// 采用递归将字符串分割成数组
        /// </summary>
        /// <param name="strSource"></param>
        /// <param name="strSplit"></param>
        /// <param name="attachArray"></param>
        /// <returns></returns>
        public static string[] StringSplit(string strSource, string strSplit, string[] attachArray)
        {
            string[] strtmp = new string[attachArray.Length + 1];
            attachArray.CopyTo(strtmp, 0);

            int index = strSource.IndexOf(strSplit, 0);
            if (index < 0)
            {
                strtmp[attachArray.Length] = strSource;
                return strtmp;
            }
            else
            {
                strtmp[attachArray.Length] = strSource.Substring(0, index);
                return StringSplit(strSource.Substring(index + strSplit.Length), strSplit, strtmp);
            }
        }

导出excel代码

 /// <summary>
        /// 另存为excel文件
        /// </summary>
        private void SaveAs() //另存新档按钮   导出成Excel
 
         {
 
             SaveFileDialog saveFileDialog = new SaveFileDialog();
 
             saveFileDialog.Filter = "Execl files (*.xls)|*.xls";
 
             saveFileDialog.FilterIndex = 0;
 
             saveFileDialog.RestoreDirectory = true;
 
             saveFileDialog.CreatePrompt = true;
 
             saveFileDialog.Title = "Export Excel File To"; 
 
 
             saveFileDialog.ShowDialog();
 
 
             Stream myStream;
 
             myStream = saveFileDialog.OpenFile();
 
             //StreamWriter sw = new StreamWriter(myStream, System.Text.Encoding.GetEncoding("gb2312"));
 
             StreamWriter sw = new StreamWriter(myStream, System.Text.Encoding.GetEncoding(-0));
 
             string str = "";
 
             try
 
             {
 
                //写标题
 
                 for (int i = 0; i < dataGridView1.ColumnCount; i++)
 
                 {
 
                     if (i > 0)
 
                     {
 
                         str += "\t";
 
                     }
                     str += dataGridView1.Columns[i].HeaderText;
                 }
                 sw.WriteLine(str);
                 //写内容
 
                 for (int j = 0; j < dataGridView1.Rows.Count; j++)
 
                 {
 
                     string tempStr = "";
 
                     for (int k = 0; k < dataGridView1.Columns.Count; k++)
 
                     {
 
                         if (k > 0)
 
                         {
 
                             tempStr += "\t";
 
                         }
                         if (dataGridView1.Rows[j].Cells[k].Value != null)
                         {
                             tempStr += dataGridView1.Rows[j].Cells[k].Value.ToString();
                         }
                     }
                     sw.WriteLine(tempStr);                    
 
                 }
 
                sw.Close();

                myStream.Close();

            }
            catch (Exception e)

            {

                MessageBox.Show(e.ToString());

            }
            finally

            {
                sw.Close();
                myStream.Close();
            }           

      }

根据网址获取源代码

static class WebFunc
    {
        /// <summary>
        /// 网页抓取
        /// </summary>
        private static CookieContainer cookie = new CookieContainer();
        private static string contentType = "application/x-www-form-urlencoded";
        private static string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
        private static string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";

        public static string GetHtmlEx(string url)
        {
            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                request.UserAgent = userAgent;
                request.ContentType = contentType;
                request.CookieContainer = cookie;
                request.Accept = accept;
                request.Method = "get";

                WebResponse response = request.GetResponse();
                Stream responseStream = response.GetResponseStream();
                Encoding encoding = null;
                for (int i = 0; i < response.Headers.Count; i++)
                {
                    Match m = Regex.Match(response.Headers[i].ToString(), "(?i)(?<=charset=)[^ ]+");
                    if (!m.Success) continue;
                    encoding = Encoding.GetEncoding(m.Value);
                    break;
                }
                StreamReader reader = new StreamReader(responseStream, encoding);
                String html = reader.ReadToEnd();
                response.Close();

                return html;
            }
            catch
            {
                return "";
            }
        }

    }

不过差的太频繁google会返回503错误的  呵呵

 

0 0