C#获取网页信息示例
来源:互联网 发布:令狐冲 知乎 编辑:程序博客网 时间:2024/06/06 03:00
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Web;
using System.Net;
using System.Text.RegularExpressions;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
string url = "http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/600866.phtml";
String sourcecode = Htmlsource.getHtmlsource(url);
sourcecode = sourcecode.Replace("\"", "").Replace(" ","");
string pattern_t = @"<tableid=comInfo1width=100%>(.*?)</table>";
string pattern_h = @"<a.*?href=.*?>|</a>|\s";
Regex regex = new Regex(pattern_t, RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (regex.IsMatch(sourcecode))
{
sourcecode = regex.Match(sourcecode).Value;
}
string pattern_td = @"<td.*?>(.*?)</td>";
Regex regex_td = new Regex(pattern_td, RegexOptions.IgnoreCase | RegexOptions.Singleline);
string[] arr = new string[100];
for (int i = 0; i < arr.Length; i++) arr[i] = "-1";
if (regex_td.IsMatch(sourcecode))
{
int j = 0;
foreach (Match m in regex_td.Matches(sourcecode))
{
arr[j] = m.Groups[1].Value;
Regex regex_h = new Regex(pattern_h, RegexOptions.Singleline | RegexOptions.IgnoreCase);
arr[j] = regex_h.Replace(arr[j], "");
j++;
}
}
// Console.WriteLine(sourcecode);
for (int i = 0; i < arr.Length; i++) Console.WriteLine(arr[i]);
}
}
class Htmlsource
{
//获取网页
public static String getHtmlsource(String url)
{
string strHtml;
WebClient webclient = new WebClient();
Stream stream = webclient.OpenRead(url);
StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312"));
strHtml = reader.ReadToEnd();
stream.Close();
reader.Close();
strHtml = TransferEncoding(System.Text.Encoding.GetEncoding("gb2312"), System.Text.Encoding.GetEncoding("utf-8"), strHtml);
return strHtml;
}
//转码
private static string TransferEncoding(Encoding srcEncoding, Encoding dstEncoding, string srcStr)
{
byte[] srcBytes = srcEncoding.GetBytes(srcStr);
byte[] bytes = Encoding.Convert(srcEncoding, dstEncoding, srcBytes);
return dstEncoding.GetString(bytes);
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Web;
using System.Net;
using System.Text.RegularExpressions;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
string url = "http://vip.stock.finance.sina.com.cn/corp/go.php/vCI_CorpInfo/stockid/600866.phtml";
String sourcecode = Htmlsource.getHtmlsource(url);
sourcecode = sourcecode.Replace("\"", "").Replace(" ","");
string pattern_t = @"<tableid=comInfo1width=100%>(.*?)</table>";
string pattern_h = @"<a.*?href=.*?>|</a>|\s";
Regex regex = new Regex(pattern_t, RegexOptions.Singleline | RegexOptions.IgnoreCase);
if (regex.IsMatch(sourcecode))
{
sourcecode = regex.Match(sourcecode).Value;
}
string pattern_td = @"<td.*?>(.*?)</td>";
Regex regex_td = new Regex(pattern_td, RegexOptions.IgnoreCase | RegexOptions.Singleline);
string[] arr = new string[100];
for (int i = 0; i < arr.Length; i++) arr[i] = "-1";
if (regex_td.IsMatch(sourcecode))
{
int j = 0;
foreach (Match m in regex_td.Matches(sourcecode))
{
arr[j] = m.Groups[1].Value;
Regex regex_h = new Regex(pattern_h, RegexOptions.Singleline | RegexOptions.IgnoreCase);
arr[j] = regex_h.Replace(arr[j], "");
j++;
}
}
// Console.WriteLine(sourcecode);
for (int i = 0; i < arr.Length; i++) Console.WriteLine(arr[i]);
}
}
class Htmlsource
{
//获取网页
public static String getHtmlsource(String url)
{
string strHtml;
WebClient webclient = new WebClient();
Stream stream = webclient.OpenRead(url);
StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312"));
strHtml = reader.ReadToEnd();
stream.Close();
reader.Close();
strHtml = TransferEncoding(System.Text.Encoding.GetEncoding("gb2312"), System.Text.Encoding.GetEncoding("utf-8"), strHtml);
return strHtml;
}
//转码
private static string TransferEncoding(Encoding srcEncoding, Encoding dstEncoding, string srcStr)
{
byte[] srcBytes = srcEncoding.GetBytes(srcStr);
byte[] bytes = Encoding.Convert(srcEncoding, dstEncoding, srcBytes);
return dstEncoding.GetString(bytes);
}
}
}
0 0
- C#获取网页信息示例
- C#获取进程的信息示例
- .NET C#获取当前网页地址信息
- .NET C#获取当前网页地址信息
- .NET C#获取当前网页地址信息
- C# 抓取网页类(获取网页中所有信息)
- 如何在C#中获取指定网页源码的示例
- C# winfrom WebRequest获取html网页信息 引用using
- C# HttpWebRequest 绝技 根据URL地址获取网页信息
- C# HttpWebRequest 绝技 根据URL地址获取网页信息
- C#获取网页信息如:地址,参数等
- 正则表达式相关:C# 抓取网页类(获取网页中所有信息)
- 正则表达式相关:C# 抓取网页类(获取网页中所有信息)
- 正则表达式相关:C# 抓取网页类(获取网页中所有信息)
- 正则表达式相关:C# 抓取网页类(获取网页中所有信息)
- C#网页源代码获取
- C#获取网页内容
- c# 获取网页内容
- unity__摄像机
- leetcode:Construct Binary Tree from Inorder and Postorder Traversal
- android 中的dumpsys
- C#及出版第五天(流程控制2)
- 利用数组获取表单元素并循环插入数据库
- C#获取网页信息示例
- ztree点滴累计
- IPhone之AVAudioRecorder AVplayer
- c#基础班第六天(复杂数据类型)
- Java语言发展史
- 创建线程时,undefined reference to 'pthread_create'问题解决
- SharePoint Workflow架构(一)SharePoint 和Workflow Runtime的结合
- C#基础班第七天(函数)
- mybatis分页拦截器