网络神采关键词过滤NET插件
来源:互联网 发布:vb读取硬盘序列号 编辑:程序博客网 时间:2024/06/06 09:59
此处代码仅供参考,完整代码请下载附件阅读。
不说废话,直接贴代码:
插件接口实现:
using System;using System.Collections.Generic;using System.Collections.Specialized;using System.Text;using System.Windows.Forms;using System.IO;using Bget.Plugin;namespace HX_Plug{ public class Plug : IBget //插件接口 { //创建插件 public void Create(string taskPath, string pluginPath, BgetInformation bgetInfo, Action action, bool firstCall) { this.WriteLog("创建插件..."); } //销毁插件 public void Dispose(Bget.Plugin.Action action) { this.WriteLog("销毁插件..."); } //正在下载内容文件 public void DownloadContentFile(string url, string path, bool skipIfFileExisted, string cookie, string referer) { this.WriteLog("正在下载内容文件..."); } //正在下载独立文件 public string DownloadSingleFile(string url, string path, string fileNamePrefix, bool skipIfFileExisted, string cookie, string referer) { this.WriteLog("正在下载独立文件..."); return fileNamePrefix + Path.GetFileName(path); } //提取结果 public string ExtractResult(string extractionRule, string dataColumn, string htmlText, string url) { this.WriteLog("提取结果..."); return ""; } //正在进行采集结果筛选 public bool Filter(string result, string extractionRule, string dataColumn, System.Data.DataRow extractingResultRow) { this.WriteLog("正在进行采集结果筛选..."); return true; } //所需选项 public RequiredOptions GetRequiredOptions() { this.WriteLog("所需选项..."); return RequiredOptions.None; } public Form GetSettingForm(string taskPath, string pluginPath, Bget.Plugin.BgetInformation bgetInfo) { return new hx_Plug(); } //获取代理 public BgetWebProxy GetWebProxy(string requestingUrl, int retryTimes) { this.WriteLog("获取代理..."); return null; } //从数据库载入起始地址 public string LoadStartingUrl(string template, ref int position, string cookie) { this.WriteLog("从数据库载入起始地址..."); return "http://www.sensite.cn"; } //正在登录 public string Login(string url) { this.WriteLog("正在登录..."); return ""; } //选择下一层网址 public StringCollection PickNextLayerUrls(string htmlText, string layer, string url, string cookie) { return null; } //选择下一个网页网址 public string PickNextPageUrl(string htmlText, string layer, string url, string cookie) { return ""; } //正在处理下载后的内容文件 public void ProcessContentFile(string path, bool skipped) { this.WriteLog("正在处理下载后的内容文件..."); } //正在处理结果数据行 public bool ProcessResultRow(System.Data.DataRow extractedResultRow) { this.WriteLog("《红星关键字过滤插件 V1.0》"); this.WriteLog(string.Format("过滤:{0}", extractedResultRow[0].ToString())); KeywordFilter keyFilter = new KeywordFilter(); extractedResultRow[1] = keyFilter.On_Filter(extractedResultRow[1].ToString()); return true; } //正在处理下载后的独立文件 public string ProcessSingleFile(string path, string fileNamePrefix, bool skipped) { this.WriteLog("正在处理下载后的独立文件..."); return fileNamePrefix + Path.GetFileName(path); } //正在请求URL public string Visit(string url, byte[] postData, string layer, string cookie, string referer) { this.WriteLog("正在请求URL: " + url); return "<html>test</html>"; } public event LogEventHanlder Log; private void WriteLog(string message) { if (this.Log != null) { this.Log(this, new LogEventArgs(message)); } } private void WriteLog(string message, int indent) { if (this.Log != null) { this.Log(this, new LogEventArgs(message, indent)); } } }}
具体实现功能代码:
using System;using System.Collections.Generic;using System.Text;using System.Data;using System.Windows.Forms;namespace HX_Plug{ /// <summary> /// 文章内容过滤类 /// </summary> public class KeywordFilter { private List<FilterStruct> _filter = new List<FilterStruct>(); /// <summary> /// 构造函数,初始化关键字集合 /// </summary> public KeywordFilter() { DataTable dt = new DataTable(); using (DBase db = new DBase()) { dt = db.GetDataTable("select oldValue,newValue from Filter"); } if (dt != null) { if (dt.Rows.Count != 0) { for (int i = 0; i < dt.Rows.Count; i++) { FilterStruct fil = new FilterStruct(); fil.OldValue = dt.Rows[i][0].ToString(); fil.NewValue = dt.Rows[i][1].ToString(); _filter.Add(fil); } } } } /// <summary> /// 关键词过滤 /// </summary> /// <param name="Content">内容</param> /// <returns>过滤后的内容</returns> public string On_Filter(string Content) { Content = ReplaceKeyword(Content); //常规关键词过滤 Content = SubContent(Content, 2000); //切割文章为指定长度 Content = SpltParagraph(Content); //打乱句子 if(Content != string.Empty) Content += "《红星关键字过滤系统V1.0》"; return Content; } /// <summary> /// 过滤常规关键词 /// </summary> /// <param name="Content">内容</param> /// <returns>过滤结果</returns> private string ReplaceKeyword(string Content) { for (int i = 0; i < _filter.Count; i++) { Content = Content.Replace(_filter[i].OldValue, _filter[i].NewValue); } return Content += _filter.Count.ToString(); } /// <summary> /// 句子打乱 /// </summary> /// <param name="Content">原始内容</param> /// <returns>打乱结果</returns> private string SpltParagraph(string Content) { string[] Paragraph = Content.Split('。'); string src = string.Empty; if (Paragraph.Length != 0 && Paragraph.Length > 5) { //随即交换一部分文章以句号分割的段落 Random r = new Random(); for (int i = 0; i < Paragraph.Length / 20; i++) { Paragraph = RandomParagraph(r.Next(Paragraph.Length), r.Next(Paragraph.Length), Paragraph); } //重新组合文章内容 for (int i = 0; i < Paragraph.Length; i++) { if (i == 0) { string line = Paragraph[i].ToString(); if (line.Length > 8) { line = line.Replace(",", string.Empty); line = line.Replace("\"", string.Empty); line = line.Replace(",", string.Empty); line = line.Replace("“", string.Empty); line = line.Replace("”", string.Empty); line = line.Replace(" ", string.Empty); line = "<h3>" + line.Substring(0, 8) + "</h3>"; } src += (line + "<p>" + Paragraph[i].ToString()); } else if (i % 5 == 0) { string line = Paragraph[i].ToString(); if (line.Length > 8) { line = line.Replace(",", string.Empty); line = line.Replace("\"", string.Empty); line = line.Replace(",", string.Empty); line = line.Replace("“", string.Empty); line = line.Replace("”", string.Empty); line = line.Replace(" ", string.Empty); line = "<h3>" + line.Substring(0, 8) + "</h3>"; } src += ("。</p>" + line + "<p>" + Paragraph[i].ToString()); } else { src += Paragraph[i].ToString(); } } return src; } else { return Content; } } /// <summary> /// 随即交换文章内容 /// </summary> /// <param name="start">起始交换处</param> /// <param name="end">结束交换处</param> /// <param name="Paragraph">段落集合</param> /// <returns>交换结果</returns> private string[] RandomParagraph(int start, int end, string[] Paragraph) { if (start != end && start < Paragraph.Length && end < Paragraph.Length) { string swap = string.Empty; swap = Paragraph[start].ToString(); Paragraph[start] = Paragraph[end].ToString(); Paragraph[end] = swap; return Paragraph; } else { return Paragraph; } } /// <summary> /// 切割文章为指定长度 /// </summary> /// <param name="Content">文章内容</param> /// <param name="length">切割长度</param> /// <returns>切割结果</returns> private string SubContent(string Content, int length) { if (Content.Length > length) { return Content = Content.Substring(0, length); } else if (Content.Length < 300) { return string.Empty; } else { return Content; } } } /// <summary> /// 关键词过滤数据结构 /// </summary> public struct FilterStruct { /// <summary> /// 被替换的字符 /// </summary> public string OldValue; /// <summary> /// 替换后的字符 /// </summary> public string NewValue; }}
数据库底层连接类:(我把这个类写成了个通用的DLL,N久都没换过了)
using System;using System.Collections.Generic;using System.Text;using System.Data;using System.Data.OleDb;using System.Windows.Forms;namespace HX_Plug{ /// <summary> ///数据库基本操作类,提供Access数据库基本操作,生存于数据层 /// </summary> public class DBase : IDisposable { /// <summary> /// 数据库是否打开成功标志。成功:True,失败False。 /// </summary> public bool Is_OpenState = false; /// <summary> /// Access数据库连接字符串 /// </summary> private string strOleConn; /// <summary> /// Access数据库连接对象 /// </summary> private OleDbConnection oleConn; /// <summary> /// 构造函数,初始化数据库连接,但不打开数据库 /// 使用步骤:1.构造对象。2.检测Is_OpenState是否打开成功。3.操作数据库。4.自动或手工释放资源 /// </summary> public DBase() { strOleConn = "Provider=Microsoft.Jet.OLEDB.4.0;Data source=User.mdb;"; //数据库连接字符串 oleConn = new OleDbConnection(strOleConn); //实例化数据库连接对象 Is_OpenState = Open(); //设置当前数据库打开的状态 } /// <summary> /// 打开数据库 /// </summary> /// <returns>数据库打开是否成功。</returns> private bool Open() { try { //如果当前连接状态为关闭状态,则打开数据库连接 if (oleConn.State == ConnectionState.Closed) { oleConn.Open(); } return true; } catch { return false; } } /// <summary> /// 关闭数据库 /// </summary> /// <returns>数据库打开是否成功。</returns> private bool Close() { try { //如果当前连接状态为打开状态,则关闭数据库连接 if (oleConn.State == ConnectionState.Open) { oleConn.Close(); } return true; } catch { return false; } } /// <summary> /// 释放资源 /// </summary> public void Dispose() { Close(); //关闭连接 if (oleConn != null) //销毁对象 { oleConn.Dispose(); } } /// <summary> /// 析构函数,自动释放资源 /// </summary> ~DBase() { Dispose(); //释放资源 } /// <summary> /// 执行SqlCommand语句,返回一个DataTable /// </summary> /// <param name="sqlCommand">SqlCommand语句</param> /// <returns>执行成功返回DataTable对象,否则返回Null</returns> public DataTable GetDataTable(string sqlCommand) { DataSet ds = new DataSet(); try { OleDbDataAdapter da = new OleDbDataAdapter(sqlCommand, oleConn); da.Fill(ds); int i = ds.Tables[0].Rows.Count; return ds.Tables[0]; } catch { return null; } } void IDisposable.Dispose() { } }}
悲哀,没有找到上传附件发功能。需要的话给我留个消息吧,我给你发过去。
附修改:
由于上网时间比较少,急需源代码的童鞋可以直接发送邮件To:549015917@qq.com;注明标题和内容,这样可以得到最快的处理!
- 网络神采关键词过滤NET插件
- 网络神采 网站数据采集软件
- 关键词过滤
- 关键词过滤
- 《网络神采4》技术大揭密之:DedeCMS存储过程
- php关键词过滤
- php 敏感关键词过滤
- [Vbs]_[过滤重复关键词]
- flex 关键词过滤 2.5.35
- C#实现文本关键词过滤
- php扩展trie_filter 过滤关键词
- .net关键词中英文对照
- 使用flex 做关键词、正则表达式过滤
- 求助 VC++ 基于关键词的文本过滤
- 邮件透明过滤-中文关键词的提取
- 阿里云文本关键词过滤检测
- 对文本内容进行关键词过滤
- Java敏感词过滤、java关键词过滤工具包原理
- Java中关于类的初始化
- 如何分辨主板的USB3.0接口是不是真的
- C++类中显式定义内联函数
- 从别人那里转的20条格言
- 在存储过程中执行3种oracle循环语句
- 网络神采关键词过滤NET插件
- .infig.status: error: cannot find input file
- Oracle 10G 如何使用超过1.7G的内存
- 黑马程序员-java 交通灯管理系统
- emacs 快捷键(包括C/C++模式)
- oracle 错误列表
- 亦经典语录
- 让你的代码变的更加强大(Making your C++ code robust)
- ie9不显示flash的解决方法