从HTML文本中查找IMG 并替换

来源:互联网 发布:单片机lcd显示屏原理 编辑:程序博客网 时间:2024/06/04 01:22

 

 


 

string strcon = news.content ;//从数据库中取出来的新闻内容,包含<img>标签

// 定义正则表达式用来匹配 img 标签


            Regex regImg = new Regex(@"(?<imgobj><img/b[^<>]*?/bsrc[/s/t/r/n]*=[/s/t/r/n]*[""']?[/s/t/r/n]*(?<imgUrl>[^/s/t/r/n""'<>]*)[^<>]*?/?[/s/t/r/n]*>)", RegexOptions.IgnoreCase);

 

            // 搜索匹配的字符串


            MatchCollection matches = regImg.Matches(strcon);

            int n= 0,m=0;
            string[] sUrlList = new string[matches.Count];
            string[] img = new string[matches.Count];


            // 取得匹配项列表


            foreach (Match match in matches)
            {
              

                img[n++] = match.Groups["imgobj"].Value; //整个IMG 标签
                sUrlList[m++] = match.Groups["imgUrl"].Value; //IMG SRC地址
                
            }

 

 string imgLocalPath = @"D:/web/LoadImge";//存放下载图片的路径
 

 

  for (int i = 0; i < sUrlList.Length; i++)
            {

                string fileName = sUrlList[i].Substring(sUrlList[i].LastIndexOf("/") + 1, sUrlList[i].Length - sUrlList[i].LastIndexOf("/") - 1);


                 string urlName = sUrlList[i];
               

                    WebClient wc = new WebClient();


                    if (!System.IO.File.Exists(imgLocalPath + "//" + fileName))
                    {
                        wc.DownloadFile(urlName, imgLocalPath + "//" + fileName);

                    }
                   


                   //替换掉整个IMG 标签
                    strcon = strcon.Replace(img[i],"<img border=/"0/" alt=/"/" width=/"220/"  src=/""+"http://localhost/CathImge/"  + fileName+ "/""+" />");

 

               //只替换Url

               strcon = strcon.Repalce(urlName,@"http://localhost/CathImge/"  + fileName);

                }

            }