c#去html标志,对字符串保留数字小数点,有中文的日期转化等特殊数据处理

来源:互联网 发布:淘宝店铺案例 编辑:程序博客网 时间:2024/05/24 03:27


命名空间

using System.Text.RegularExpressions;




1.我们采集到的数据 比如有些日期很不规则 带有中文 比如2013年5月8日     2013年5月  这里提供把这类不规则的日期转化成datatime类型




 public DateTime  ConvertTime(string a)        {                        DateTime dt;             try             {                 try                 {                     dt = Convert.ToDateTime(a);                 }                 catch                  {                     dt = ClearDate(a);                                  }             }             catch             {                 string newStr = "";                 Regex re = new Regex(@"(\d{2,4})年(\d{1,2})月(\d{1,2})日*|(\d{2,4})年(\d{1,2})月*|(\d{1,2})月(\d{1,2})日*|(\d{2,4})年*", RegexOptions.None);                     string[] lines = re.Split(a);                     List<string> dates = new List<string>();                                        int n;                     if (lines.Length >= 4)                     {                         for (int i = 0; i < 4; i++)                         {                             if (int.TryParse(lines[i], out n))                             {                                 dates.Add(lines[i].ToString()); //为数字                             }                         }                     }                     else                      {                         for (int i = 0; i < lines.Length; i++)                         {                             if (int.TryParse(lines[i], out n))                             {                                 dates.Add(lines[i].ToString()); //为数字                             }                         }                     }                     if (dates.Count() == 1)                     {                                                 newStr = dates[0].ToString()+"-1-1";                     }                     else                      {                         for (int s = 0; s < dates.Count(); s++)                         {                             if (s == dates.Count() - 1)                             {                                 if (dates[s] == "" | dates[s] == "0" | dates[s] == "00")                                 {                                     dates[s] = "1";                                 }                                 newStr += dates[s].ToString();                             }                             else                             {                                 if (dates[s] == "" | dates[s] == "0" | dates[s] == "00")                                 {                                     dates[s] = "1";                                 }                                 newStr += dates[s].ToString() + "-";                             }                         }                     }                                                    dt = Convert.ToDateTime(newStr);             }          return dt;                }   public DateTime ClearDate(string a)        {            Regex r = new Regex(@"([1-9]\d*\-{1}\d*\-{1}\d*)");            //开始匹配            Match m = r.Match(a);            string newStr = "";            while (m.Success)            {                //匹配成功                newStr += m.Groups[0].Value;                //从上一个匹配结束的位置开始下一个匹配                m = m.NextMatch();            }          return  Convert.ToDateTime(newStr);        }


直接调用ConvertTime就可完成转换  





2.带中文的字符串只保留数字和小数点


  public double ConvertNumber(string a)        {            Regex r = new Regex(@"([1-9]\d*\.?\d*)|(0\.\d*[1-9])");            //开始匹配            Match m = r.Match(a);            string newStr = "";            while (m.Success)            {                //匹配成功                newStr += m.Groups[0].Value;                //从上一个匹配结束的位置开始下一个匹配                m = m.NextMatch();            }            if (newStr == "")            {                newStr = "0.0";            }            return Convert.ToDouble(newStr);        }






3.去html标志

   public static string NoHTML(string Htmlstring)        {            //删除脚本            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",              RegexOptions.IgnoreCase);            //删除HTML            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "   ",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",              RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",              RegexOptions.IgnoreCase);            Htmlstring.Replace("<", "");            Htmlstring.Replace(">", "");            Htmlstring.Replace("\r\n", "");           // Htmlstring = System.Web.HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();            Htmlstring = Htmlstring.Trim();            return Htmlstring;        }