读淘宝页面字节流提取宝贝图片地址宝贝标题宝贝价格
来源:互联网 发布:linux安装过程 编辑:程序博客网 时间:2024/04/28 02:03
public static class taobao_message
{
/// <summary>
/// 读页面的byte转化为string
/// </summary>
/// <param name="url">地址</param>
/// <returns></returns>
public static string webclinet_content(string url)
{
url = url.Replace("http://", "");
System.Net.WebClient client = new WebClient();
byte[] page = client.DownloadData("http://" + url);
string content = System.Text.Encoding.GetEncoding("GB2312").GetString(page);//淘宝的页面编码为gb2312
return content;
}
/// <summary>
/// 读淘宝宝贝的信息
/// 数组内容{宝贝图片地址,宝贝标题,宝贝价格}
/// </summary>
/// <param name="url">宝贝地址</param>
/// <returns></returns>
public static string[] baobei_mess(string url)
{
string content = webclinet_content(url);
string baobei_img =get_taobao(content,1);
string baobei_title = get_taobao(content,2);
string baobei_price = get_taobao(content,3);
string[] arry1 = {baobei_img,baobei_title,baobei_price};
return arry1;;
}
/// <summary>
/// 特定标签内容提取
/// </summary>
/// <param name="content">提取的字符串</param>
/// <param name="type">验证类型0:空; 1:淘宝贝图片;2:宝贝标题;3:宝贝价格;</param>
/// <returns></returns>
public static string get_taobao(string content, int type)
{
string result = "";
string reg = "";
switch (type)
{
case 0: return "";
case 1: reg = @"J_ImgBooth\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>"; break;
case 2: reg = "<h3>(<a[^>]*>)?([^<]*)(</a>)?</h3>"; break;
case 3: reg = "J_StrPrice[^>]*>([^<>]*)(</)"; break;
}
string regex = reg;
Regex re = new Regex(regex);
MatchCollection matches = re.Matches(content);
System.Collections.IEnumerator enu = matches.GetEnumerator();
switch (type)
{
case 0: return "";
case 1:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups["imgUrl"];
} break;
case 2:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups[2];
} break;
case 3:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups[1];
} break;
}
return result;
}
}
{
/// <summary>
/// 读页面的byte转化为string
/// </summary>
/// <param name="url">地址</param>
/// <returns></returns>
public static string webclinet_content(string url)
{
url = url.Replace("http://", "");
System.Net.WebClient client = new WebClient();
byte[] page = client.DownloadData("http://" + url);
string content = System.Text.Encoding.GetEncoding("GB2312").GetString(page);//淘宝的页面编码为gb2312
return content;
}
/// <summary>
/// 读淘宝宝贝的信息
/// 数组内容{宝贝图片地址,宝贝标题,宝贝价格}
/// </summary>
/// <param name="url">宝贝地址</param>
/// <returns></returns>
public static string[] baobei_mess(string url)
{
string content = webclinet_content(url);
string baobei_img =get_taobao(content,1);
string baobei_title = get_taobao(content,2);
string baobei_price = get_taobao(content,3);
string[] arry1 = {baobei_img,baobei_title,baobei_price};
return arry1;;
}
/// <summary>
/// 特定标签内容提取
/// </summary>
/// <param name="content">提取的字符串</param>
/// <param name="type">验证类型0:空; 1:淘宝贝图片;2:宝贝标题;3:宝贝价格;</param>
/// <returns></returns>
public static string get_taobao(string content, int type)
{
string result = "";
string reg = "";
switch (type)
{
case 0: return "";
case 1: reg = @"J_ImgBooth\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>"; break;
case 2: reg = "<h3>(<a[^>]*>)?([^<]*)(</a>)?</h3>"; break;
case 3: reg = "J_StrPrice[^>]*>([^<>]*)(</)"; break;
}
string regex = reg;
Regex re = new Regex(regex);
MatchCollection matches = re.Matches(content);
System.Collections.IEnumerator enu = matches.GetEnumerator();
switch (type)
{
case 0: return "";
case 1:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups["imgUrl"];
} break;
case 2:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups[2];
} break;
case 3:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups[1];
} break;
}
return result;
}
}
- 读淘宝页面字节流提取宝贝图片地址宝贝标题宝贝价格
- 淘宝api例子 通过宝贝地址取宝贝标题价格图片
- 淘宝api例子 通过宝贝地址取宝贝标题价格图片
- 宝贝
- 宝贝
- 宝贝
- 宝贝
- 宝贝
- 宝贝宝贝宝贝宝贝
- 宝贝
- 淘宝宝贝标题的优化设置技巧
- 淘宝宝贝美工视屏地址
- 淘宝宝贝图片批量下载教程
- 【淘宝SEO技巧】淘宝宝贝标题关键字优化
- 宝贝de宝贝
- 亲亲宝贝亲亲宝贝
- 修改淘宝标题是否会重新发布宝贝?2014.8.23
- 搞定淘宝宝贝图片尺寸标注
- 动态菜单DWZ
- 电影
- 使用 GetOleDbSchemaTable 和 Visual C# .NET 检索架构信息-----http://support.microsoft.com/kb/309681/zh-cn#
- Token 令牌
- 【IO】文件读写实用工具
- 读淘宝页面字节流提取宝贝图片地址宝贝标题宝贝价格
- JVM内存分析及导致内存溢出的不健壮代码及解决办法
- vs2005编译opencv 2.1小记[CMake 2.8.4 Python 2.6.5]
- builder模式
- JBOSS安全域
- 忘掉蜗居努力10000小时
- Java程序练习-回文平方数
- 谷歌数据导出服务开始支持Google Voice
- 慎用守护线程Daemon