网络神采关键词过滤NET插件

来源:互联网 发布:vb读取硬盘序列号 编辑:程序博客网 时间:2024/06/06 09:59

此处代码仅供参考,完整代码请下载附件阅读。

不说废话,直接贴代码:

插件接口实现:

using System;using System.Collections.Generic;using System.Collections.Specialized;using System.Text;using System.Windows.Forms;using System.IO;using Bget.Plugin;namespace HX_Plug{    public class Plug : IBget  //插件接口    {        //创建插件        public void Create(string taskPath, string pluginPath, BgetInformation bgetInfo, Action action, bool firstCall)        {            this.WriteLog("创建插件...");        }        //销毁插件        public void Dispose(Bget.Plugin.Action action)        {            this.WriteLog("销毁插件...");        }        //正在下载内容文件        public void DownloadContentFile(string url, string path, bool skipIfFileExisted, string cookie, string referer)        {            this.WriteLog("正在下载内容文件...");        }        //正在下载独立文件        public string DownloadSingleFile(string url, string path, string fileNamePrefix, bool skipIfFileExisted, string cookie, string referer)        {            this.WriteLog("正在下载独立文件...");            return fileNamePrefix + Path.GetFileName(path);        }        //提取结果        public string ExtractResult(string extractionRule, string dataColumn, string htmlText, string url)        {            this.WriteLog("提取结果...");            return "";        }        //正在进行采集结果筛选        public bool Filter(string result, string extractionRule, string dataColumn, System.Data.DataRow extractingResultRow)        {            this.WriteLog("正在进行采集结果筛选...");            return true;        }        //所需选项        public RequiredOptions GetRequiredOptions()        {            this.WriteLog("所需选项...");            return RequiredOptions.None;        }        public Form GetSettingForm(string taskPath, string pluginPath, Bget.Plugin.BgetInformation bgetInfo)        {            return new hx_Plug();        }                //获取代理        public BgetWebProxy GetWebProxy(string requestingUrl, int retryTimes)        {            this.WriteLog("获取代理...");            return null;        }        //从数据库载入起始地址        public string LoadStartingUrl(string template, ref int position, string cookie)        {            this.WriteLog("从数据库载入起始地址...");            return "http://www.sensite.cn";        }        //正在登录        public string Login(string url)        {            this.WriteLog("正在登录...");            return "";        }        //选择下一层网址        public StringCollection PickNextLayerUrls(string htmlText, string layer, string url, string cookie)        {            return null;        }        //选择下一个网页网址        public string PickNextPageUrl(string htmlText, string layer, string url, string cookie)        {            return "";        }        //正在处理下载后的内容文件        public void ProcessContentFile(string path, bool skipped)        {            this.WriteLog("正在处理下载后的内容文件...");        }        //正在处理结果数据行        public bool ProcessResultRow(System.Data.DataRow extractedResultRow)        {            this.WriteLog("《红星关键字过滤插件 V1.0》");            this.WriteLog(string.Format("过滤:{0}", extractedResultRow[0].ToString()));            KeywordFilter keyFilter = new KeywordFilter();            extractedResultRow[1] = keyFilter.On_Filter(extractedResultRow[1].ToString());            return true;        }        //正在处理下载后的独立文件        public string ProcessSingleFile(string path, string fileNamePrefix, bool skipped)        {            this.WriteLog("正在处理下载后的独立文件...");            return fileNamePrefix + Path.GetFileName(path);        }        //正在请求URL        public string Visit(string url, byte[] postData, string layer, string cookie, string referer)        {            this.WriteLog("正在请求URL: " + url);            return "<html>test</html>";        }        public event LogEventHanlder Log;                private void WriteLog(string message)        {            if (this.Log != null)            {                this.Log(this, new LogEventArgs(message));            }        }        private void WriteLog(string message, int indent)        {            if (this.Log != null)            {                this.Log(this, new LogEventArgs(message, indent));            }        }         }}

具体实现功能代码:

using System;using System.Collections.Generic;using System.Text;using System.Data;using System.Windows.Forms;namespace HX_Plug{    /// <summary>    /// 文章内容过滤类    /// </summary>    public class KeywordFilter    {        private List<FilterStruct> _filter = new List<FilterStruct>();        /// <summary>        /// 构造函数,初始化关键字集合        /// </summary>        public KeywordFilter()        {            DataTable dt = new DataTable();            using (DBase db = new DBase())            {                dt = db.GetDataTable("select oldValue,newValue from Filter");            }            if (dt != null)            {                if (dt.Rows.Count != 0)                {                    for (int i = 0; i < dt.Rows.Count; i++)                    {                        FilterStruct fil = new FilterStruct();                        fil.OldValue = dt.Rows[i][0].ToString();                        fil.NewValue = dt.Rows[i][1].ToString();                        _filter.Add(fil);                    }                }            }        }        /// <summary>        /// 关键词过滤        /// </summary>        /// <param name="Content">内容</param>        /// <returns>过滤后的内容</returns>        public string On_Filter(string Content)        {            Content = ReplaceKeyword(Content);      //常规关键词过滤            Content = SubContent(Content, 2000);    //切割文章为指定长度            Content = SpltParagraph(Content);       //打乱句子            if(Content != string.Empty)                Content += "《红星关键字过滤系统V1.0》";            return Content;        }        /// <summary>        /// 过滤常规关键词        /// </summary>        /// <param name="Content">内容</param>        /// <returns>过滤结果</returns>        private string ReplaceKeyword(string Content)        {            for (int i = 0; i < _filter.Count; i++)            {                Content = Content.Replace(_filter[i].OldValue, _filter[i].NewValue);            }            return Content += _filter.Count.ToString();        }        /// <summary>        /// 句子打乱        /// </summary>        /// <param name="Content">原始内容</param>        /// <returns>打乱结果</returns>        private string SpltParagraph(string Content)        {            string[] Paragraph = Content.Split('。');            string src = string.Empty;            if (Paragraph.Length != 0 && Paragraph.Length > 5)            {                //随即交换一部分文章以句号分割的段落                Random r = new Random();                for (int i = 0; i < Paragraph.Length / 20; i++)                {                    Paragraph = RandomParagraph(r.Next(Paragraph.Length), r.Next(Paragraph.Length), Paragraph);                }                //重新组合文章内容                for (int i = 0; i < Paragraph.Length; i++)                {                    if (i == 0)                    {                        string line = Paragraph[i].ToString();                        if (line.Length > 8)                        {                            line = line.Replace(",", string.Empty);                            line = line.Replace("\"", string.Empty);                            line = line.Replace(",", string.Empty);                            line = line.Replace("“", string.Empty);                            line = line.Replace("”", string.Empty);                            line = line.Replace(" ", string.Empty);                            line = "<h3>" + line.Substring(0, 8) + "</h3>";                        }                        src += (line + "<p>" + Paragraph[i].ToString());                    }                    else if (i % 5 == 0)                    {                        string line = Paragraph[i].ToString();                        if (line.Length > 8)                        {                            line = line.Replace(",", string.Empty);                            line = line.Replace("\"", string.Empty);                            line = line.Replace(",", string.Empty);                            line = line.Replace("“", string.Empty);                            line = line.Replace("”", string.Empty);                            line = line.Replace(" ", string.Empty);                            line = "<h3>" + line.Substring(0, 8) + "</h3>";                        }                        src += ("。</p>" + line + "<p>" + Paragraph[i].ToString());                    }                    else                    {                        src += Paragraph[i].ToString();                    }                }                return src;            }            else            {                return Content;            }        }        /// <summary>        /// 随即交换文章内容        /// </summary>        /// <param name="start">起始交换处</param>        /// <param name="end">结束交换处</param>        /// <param name="Paragraph">段落集合</param>        /// <returns>交换结果</returns>        private string[] RandomParagraph(int start, int end, string[] Paragraph)        {            if (start != end && start < Paragraph.Length && end < Paragraph.Length)            {                string swap = string.Empty;                swap = Paragraph[start].ToString();                Paragraph[start] = Paragraph[end].ToString();                Paragraph[end] = swap;                return Paragraph;            }            else            {                return Paragraph;            }        }        /// <summary>        /// 切割文章为指定长度        /// </summary>        /// <param name="Content">文章内容</param>        /// <param name="length">切割长度</param>        /// <returns>切割结果</returns>        private string SubContent(string Content, int length)        {            if (Content.Length > length)            {                return Content = Content.Substring(0, length);            }            else if (Content.Length < 300)            {                return string.Empty;            }            else            {                return Content;            }        }    }    /// <summary>    /// 关键词过滤数据结构    /// </summary>    public struct FilterStruct    {        /// <summary>        /// 被替换的字符        /// </summary>        public string OldValue;        /// <summary>        /// 替换后的字符        /// </summary>        public string NewValue;    }}

数据库底层连接类:(我把这个类写成了个通用的DLL,N久都没换过了)

using System;using System.Collections.Generic;using System.Text;using System.Data;using System.Data.OleDb;using System.Windows.Forms;namespace HX_Plug{    /// <summary>    ///数据库基本操作类,提供Access数据库基本操作,生存于数据层    /// </summary>    public class DBase : IDisposable    {        /// <summary>        /// 数据库是否打开成功标志。成功:True,失败False。        /// </summary>        public bool Is_OpenState = false;        /// <summary>        /// Access数据库连接字符串        /// </summary>        private string strOleConn;        /// <summary>        /// Access数据库连接对象        /// </summary>        private OleDbConnection oleConn;        /// <summary>        /// 构造函数,初始化数据库连接,但不打开数据库        /// 使用步骤:1.构造对象。2.检测Is_OpenState是否打开成功。3.操作数据库。4.自动或手工释放资源        /// </summary>        public DBase()        {            strOleConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data source=User.mdb;";    //数据库连接字符串            oleConn = new OleDbConnection(strOleConn);  //实例化数据库连接对象            Is_OpenState = Open();  //设置当前数据库打开的状态        }        /// <summary>        /// 打开数据库        /// </summary>        /// <returns>数据库打开是否成功。</returns>        private bool Open()        {            try            {                //如果当前连接状态为关闭状态,则打开数据库连接                if (oleConn.State == ConnectionState.Closed)                {                    oleConn.Open();                }                return true;            }            catch            {                return false;            }        }        /// <summary>        /// 关闭数据库        /// </summary>        /// <returns>数据库打开是否成功。</returns>        private bool Close()        {            try            {                //如果当前连接状态为打开状态,则关闭数据库连接                if (oleConn.State == ConnectionState.Open)                {                    oleConn.Close();                }                return true;            }            catch            {                return false;            }        }        /// <summary>        /// 释放资源        /// </summary>        public void Dispose()        {            Close();                //关闭连接            if (oleConn != null)    //销毁对象            {                oleConn.Dispose();            }        }        /// <summary>        /// 析构函数,自动释放资源        /// </summary>        ~DBase()        {            Dispose();  //释放资源        }        /// <summary>        /// 执行SqlCommand语句,返回一个DataTable        /// </summary>        /// <param name="sqlCommand">SqlCommand语句</param>        /// <returns>执行成功返回DataTable对象,否则返回Null</returns>        public DataTable GetDataTable(string sqlCommand)        {            DataSet ds = new DataSet();            try            {                OleDbDataAdapter da = new OleDbDataAdapter(sqlCommand, oleConn);                da.Fill(ds);                int i = ds.Tables[0].Rows.Count;                return ds.Tables[0];            }            catch            {                return null;            }        }        void IDisposable.Dispose()        {        }    }}

悲哀,没有找到上传附件发功能。需要的话给我留个消息吧,我给你发过去。

附修改:

由于上网时间比较少,急需源代码的童鞋可以直接发送邮件To:549015917@qq.com;注明标题和内容,这样可以得到最快的处理!


原创粉丝点击