使用Winista.Text.HtmlParser采集南京价格信息网

来源:互联网 发布:adc0809 51单片机程序 编辑:程序博客网 时间:2024/04/29 02:23

数据库:MySQL

平台:.net framework 2.0 (C#)

组建:Winista.Text.HtmlParser

演示页面:2013年3月7日南京市各区县农贸市场主副食品价格对比表  http://www.njprice.com/col71/col464/articleinfo.php?infoid=44181

2013年2月28日南京市各区县农贸市场主副食品价格对比表   http://www.njprice.com/col71/col464/articleinfo.php?infoid=44079

以及所有其他日期发布的《南京市各区县农贸市场主副食品价格对比表》

using System;using System.Collections.Generic;using System.Text;using nanjing_price.WebUtility;using Winista.Text.HtmlParser;using Winista.Text.HtmlParser.Lex;using Winista.Text.HtmlParser.Util;using Winista.Text.HtmlParser.Tags;using Winista.Text.HtmlParser.Filters;using org.nutlab;namespace nanjing_price.Fuction{    class NanjingMain    {        string content;        string urlStr;        public NanjingMain(string url)        {            this.urlStr = url;            getContent();            inputDB();        }        void getContent()        {            webUtility web = new webUtility();            content = Tools.filterScript(web.getContent(urlStr));        }        void inputDB()        {            DateTime publishTime = new DateTime();            good_price price = new good_price();            Parser parser = Parser.CreateParser(Tools.filterTableAttribute(content.Replace(System.Environment.NewLine, "")), "gb2312");            NodeFilter table = new TagNameFilter("table");            INode Table = parser.Parse(table)[2].Children[3];            parser = Parser.CreateParser(Table.ToHtml(), "gb2312");            Table = parser.Parse(table)[3];            parser = Parser.CreateParser(Table.ToHtml(), "gb2312");            Table = parser.Parse(table)[2];            Console.WriteLine(Table.ToHtml());            INode tempTag = Table;            TableTag tableTag = (TableTag)tempTag;//上面判断如果得到的第一个为table             Winista.Text.HtmlParser.Tags.TableRow[] tr = tableTag.Rows;//得到该table所有的tr            TableColumn[] tc = tr[2].Columns;            publishTime = Convert.ToDateTime(tc[0].ToPlainTextString().Trim().Substring(3));            Console.WriteLine(publishTime);            for (int i = 6; i < tr.Length; i++)            {                tc = tr[i].Columns;                for (int j = 3; j < tc.Length; j++)                {                    price.name = tc[0].ToPlainTextString().Trim();                    price.standard = tc[1].ToPlainTextString().Trim();                    price.unit = tc[2].ToPlainTextString().Trim();                    price.district_name = tr[4].Columns[j].ToPlainTextString().Trim();                    price.market_name = tr[5].Columns[j].ToPlainTextString().Trim();                    price.amount = tc[j].ToPlainTextString().Trim();                    price.publish_time = publishTime;                    price.get_time = System.DateTime.Now;                    price.get_url = urlStr;                    price.Add();                }            }        }    }}

源代码下载:点击下载

SVN: Google Code