用 ScrapySharp 并行下载天涯图片
来源:互联网 发布:vps修改远程登录端口 编辑:程序博客网 时间:2024/05/19 17:08
用 ScrapySharp 并行下载天涯图片
#r "HtmlAgilityPack.dll"
#r "ScrapySharp.dll"
open System
open System.Threading.Tasks
open HtmlAgilityPack
open ScrapySharp.Extensions
let url = "http://bbs.tianya.cn/post-12-563201-1.shtml"
let web = new ScrapySharp.Network.ScrapingBrowser()
let html = web.DownloadString(new Uri(url))
let doc = new HtmlAgilityPack.HtmlDocument()
doc.LoadHtml( html )
let urls =
doc.DocumentNode.CssSelect("div.bbs-content > img")
|> Seq.map(fun i -> i.GetAttributeValue("original"))
let urls = [ "http://img3.laibafile.cn/p/m/166829011.jpg";
"http://img3.laibafile.cn/p/m/166829027.jpg";
"http://img3.laibafile.cn/p/m/166829000.jpg";
"http://img3.laibafile.cn/p/m/166829039.jpg";
"http://img3.laibafile.cn/p/m/166829034.jpg";
"http://img3.laibafile.cn/p/m/166829030.jpg";
"http://img3.laibafile.cn/p/m/166829016.jpg";
"http://img3.laibafile.cn/p/m/166829024.jpg" ]
let GetPicture (filePath: string) (url: string) =
let path = filePath.Substring(0, filePath.LastIndexOf("."))
let ty =
let t = new Uri( url )
match t.Authority with
| var when var.Contains("laibafile.cn") -> "http://bbs.tianya.cn"
| var when var.Contains("tianya.cn") -> "http://bbs.tianya.cn"
| _ -> t.Scheme + "://" + t.Authority
let web = new ScrapySharp.Network.ScrapingBrowser()
web.NavigateToPage( new Uri(ty)) |> ignore
if not( IO.Directory.Exists( path ) ) then IO.Directory.CreateDirectory( path ) |> ignore
let file = url.Substring( url.LastIndexOf("/") )
let pic = (web.NavigateToPage( new Uri( url ))).RawResponse.Body
printfn "%s" url
File.WriteAllBytes( ( path + file ), pic )
let outPic = GetPicture filePath
Parallel.ForEach(urls, outPic) |> ignore
#r "HtmlAgilityPack.dll"
#r "ScrapySharp.dll"
open System
open System.Threading.Tasks
open HtmlAgilityPack
open ScrapySharp.Extensions
let url = "http://bbs.tianya.cn/post-12-563201-1.shtml"
let web = new ScrapySharp.Network.ScrapingBrowser()
let html = web.DownloadString(new Uri(url))
let doc = new HtmlAgilityPack.HtmlDocument()
doc.LoadHtml( html )
let urls =
doc.DocumentNode.CssSelect("div.bbs-content > img")
|> Seq.map(fun i -> i.GetAttributeValue("original"))
let urls = [ "http://img3.laibafile.cn/p/m/166829011.jpg";
"http://img3.laibafile.cn/p/m/166829027.jpg";
"http://img3.laibafile.cn/p/m/166829000.jpg";
"http://img3.laibafile.cn/p/m/166829039.jpg";
"http://img3.laibafile.cn/p/m/166829034.jpg";
"http://img3.laibafile.cn/p/m/166829030.jpg";
"http://img3.laibafile.cn/p/m/166829016.jpg";
"http://img3.laibafile.cn/p/m/166829024.jpg" ]
let GetPicture (filePath: string) (url: string) =
let path = filePath.Substring(0, filePath.LastIndexOf("."))
let ty =
let t = new Uri( url )
match t.Authority with
| var when var.Contains("laibafile.cn") -> "http://bbs.tianya.cn"
| var when var.Contains("tianya.cn") -> "http://bbs.tianya.cn"
| _ -> t.Scheme + "://" + t.Authority
let web = new ScrapySharp.Network.ScrapingBrowser()
web.NavigateToPage( new Uri(ty)) |> ignore
if not( IO.Directory.Exists( path ) ) then IO.Directory.CreateDirectory( path ) |> ignore
let file = url.Substring( url.LastIndexOf("/") )
let pic = (web.NavigateToPage( new Uri( url ))).RawResponse.Body
printfn "%s" url
File.WriteAllBytes( ( path + file ), pic )
let outPic = GetPicture filePath
Parallel.ForEach(urls, outPic) |> ignore
0 0
- 用 ScrapySharp 并行下载天涯图片
- 用 ScrapySharp 下载天涯图片
- 图片并行下载和顺序下载
- 天涯文章下载器
- node.js爬虫之下载图片,批量下载图片,控制下载图片并行上限
- 天涯
- 天涯
- 并行下载JavaScript
- 多线程并行下载,断点续传
- 用C#下载图片
- HTML Agility Pack 搭配 ScrapySharp
- 浏览器并行下载资源个数
- 用VBA下载google图片
- android用http下载图片
- 用ImageLoader下载网络图片
- 用python批量下载图片
- 用Python3下载网页图片
- 用线程下载网络图片
- 网摘》窗体、子窗体
- hdu1232 并查集
- c#线性表
- SSH2配置事务的两种方式
- 蓝桥杯 【基础练习】 十六进制转八进制
- 用 ScrapySharp 并行下载天涯图片
- <C++>内存存储区域划分
- Hello,World!
- SQL 2000 级联更新(约束健值更新)
- openfire
- IDL image dispaly in window
- C++内存管理
- 让Tomcat启动时加载所有hbase的jar包,避免出现NoClassDefFoundError错误
- 树洞