用C#使用HtmlParser.NET的例子。
来源:互联网 发布:造价预算软件 编辑:程序博客网 时间:2024/05/21 05:06
using System;
using System.IO;
using Winista.Text.HtmlParser;
using Winista.Text.HtmlParser.Lex;
using Winista.Text.HtmlParser.Util;
using Winista.Text.HtmlParser.Tags;
private void button1_Click(object sender, EventArgs e)
{
//we can use the stream to load a html file from the local disk
// or use the uri to load a web page from the internet
//byte[] htmlBytes = Encoding.UTF8.GetBytes(this.textBox1.Text);
//MemoryStream memsteam = new MemoryStream(htmlBytes);
//InputStreamSource input = new InputStreamSource(memsteam, "utf-8");
//Page page = new Page(input);
//Lexer lex = new Lexer(page);
if (this.textBox1.Text.Length<= 0)
return;
//here I read the html from the textbox
Lexer lexer =new Lexer(this.textBox1.Text);
Parser parser =new Parser(lexer);
NodeList htmlNodes = parser.Parse(null);
this.treeView1.Nodes.Clear();
this.treeView1.Nodes.Add("root");
TreeNode treeRoot =this.treeView1.Nodes[0];
for (int i= 0; i< htmlNodes.Count; i++)
{
this.RecursionHtmlNode(treeRoot, htmlNodes[i],false);
}
}
private void RecursionHtmlNode(TreeNode treeNode, INode htmlNode,bool siblingRequired)
{
if (htmlNode== null|| treeNode== null)return;
TreeNode current = treeNode;
//current node
if (htmlNodeis ITag)
{
ITag tag=(htmlNodeas ITag);
if (!tag.IsEndTag())
{
string nodeString= tag.TagName;
if (tag.Attributes!= null&& tag.Attributes.Count> 0)
{
if (tag.Attributes["ID"]!= null)
nodeString = nodeString+ " { id=\"" + tag.Attributes["ID"].ToString() + "\" }";
if (tag.Attributes["CLASS"]!= null)
nodeString = nodeString+ " { class=\"" + tag.Attributes["CLASS"].ToString() + "\" }";
if (tag.Attributes["STYLE"]!= null)
nodeString = nodeString+ " { style=\"" + tag.Attributes["STYLE"].ToString() + "\" }";
if (tag.Attributes["HREF"]!= null)
nodeString = nodeString+ " { href=\"" + tag.Attributes["HREF"].ToString() + "\" }";
}
current =new TreeNode(nodeString);
treeNode.Nodes.Add(current);
}
}
//the children nodes
if (htmlNode.Children!=null&& htmlNode.Children.Count> 0)
{
this.RecursionHtmlNode(current, htmlNode.FirstChild,true);
}
//the sibling nodes
if (siblingRequired)
{
INode sibling = htmlNode.NextSibling;
while (sibling!= null)
{
this.RecursionHtmlNode(treeNode, sibling,false);
sibling = sibling.NextSibling;
}
}
}
using System.IO;
using Winista.Text.HtmlParser;
using Winista.Text.HtmlParser.Lex;
using Winista.Text.HtmlParser.Util;
using Winista.Text.HtmlParser.Tags;
private void button1_Click(object sender, EventArgs e)
{
//we can use the stream to load a html file from the local disk
// or use the uri to load a web page from the internet
//byte[] htmlBytes = Encoding.UTF8.GetBytes(this.textBox1.Text);
//MemoryStream memsteam = new MemoryStream(htmlBytes);
//InputStreamSource input = new InputStreamSource(memsteam, "utf-8");
//Page page = new Page(input);
//Lexer lex = new Lexer(page);
if (this.textBox1.Text.Length<= 0)
return;
//here I read the html from the textbox
Lexer lexer =new Lexer(this.textBox1.Text);
Parser parser =new Parser(lexer);
NodeList htmlNodes = parser.Parse(null);
this.treeView1.Nodes.Clear();
this.treeView1.Nodes.Add("root");
TreeNode treeRoot =this.treeView1.Nodes[0];
for (int i= 0; i< htmlNodes.Count; i++)
{
this.RecursionHtmlNode(treeRoot, htmlNodes[i],false);
}
}
private void RecursionHtmlNode(TreeNode treeNode, INode htmlNode,bool siblingRequired)
{
if (htmlNode== null|| treeNode== null)return;
TreeNode current = treeNode;
//current node
if (htmlNodeis ITag)
{
ITag tag=(htmlNodeas ITag);
if (!tag.IsEndTag())
{
string nodeString= tag.TagName;
if (tag.Attributes!= null&& tag.Attributes.Count> 0)
{
if (tag.Attributes["ID"]!= null)
nodeString = nodeString+ " { id=\"" + tag.Attributes["ID"].ToString() + "\" }";
if (tag.Attributes["CLASS"]!= null)
nodeString = nodeString+ " { class=\"" + tag.Attributes["CLASS"].ToString() + "\" }";
if (tag.Attributes["STYLE"]!= null)
nodeString = nodeString+ " { style=\"" + tag.Attributes["STYLE"].ToString() + "\" }";
if (tag.Attributes["HREF"]!= null)
nodeString = nodeString+ " { href=\"" + tag.Attributes["HREF"].ToString() + "\" }";
}
current =new TreeNode(nodeString);
treeNode.Nodes.Add(current);
}
}
//the children nodes
if (htmlNode.Children!=null&& htmlNode.Children.Count> 0)
{
this.RecursionHtmlNode(current, htmlNode.FirstChild,true);
}
//the sibling nodes
if (siblingRequired)
{
INode sibling = htmlNode.NextSibling;
while (sibling!= null)
{
this.RecursionHtmlNode(treeNode, sibling,false);
sibling = sibling.NextSibling;
}
}
}
screen snapshot for the example:
The fault tolerance of the parser is very good, as shown in the pic below (although it could do this more intelligently, I really think that's enough for use):
- 用C#使用HtmlParser.NET的例子。
- Python HTMLParser的使用例子
- 一个Python HTMLParser的使用例子
- 使用HTMLParser提取新闻的例子
- Htmlparser 使用例子
- lucene.net 中htmlparser 的使用
- lucene.net 中htmlparser 的使用
- c#信息抓取二:HTMLParser.net使用详解
- HtmlParser的使用
- htmlparser的简单使用
- HTMLparser的初步使用
- HTMLPARSER的使用
- .net C# 反射技术使用例子
- .net C# 反射技术使用例子
- 使用htmlparser简单抓取京东图书信息存入数据库的小例子
- 使用HtmlParser解析HTML (C#版)
- 使用HtmlParser解析HTML (C#版)
- HTMLParser的两种使用
- Linux下查看Nginx Apache MySQL的并发连接数和连接状态
- oracle用户解锁与加锁
- fedora15安装NVIDIA官方显卡驱动
- 【C++】友元类和友元函数(转)
- VC_HWND和CWND的概念以及转换
- 用C#使用HtmlParser.NET的例子。
- liunx下完全卸载oracle10G
- 一个使用FFmpeg库读取3gp视频的例子-Android中使用FFmpeg媒体库(三)
- 求两个日期的间隔天数
- 修改临时表空间
- 【转载】消息钩子函数入门篇
- s3c6410 framebuffer分析
- oracle tablespace management
- Boost库的命名规则及静态编译命令。