go写的爬虫小程序
来源:互联网 发布:java链表反序 编辑:程序博客网 时间:2024/05/22 11:30
package main
import (
"fmt"
"io/ioutil"
"math/rand"
"net/http"
"regexp"
"runtime"
"time"
)
var userAgent = []string{"Mozilla/5.0 (compatible, MSIE 10.0, Windows NT, DigExt)",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, 360SE)",
"Mozilla/4.0 (compatible, MSIE 8.0, Windows NT 6.0, Trident/4.0)",
"Mozilla/5.0 (compatible, MSIE 9.0, Windows NT 6.1, Trident/5.0,",
"Opera/9.80 (Windows NT 6.1, U, en) Presto/2.8.131 Version/11.11",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, TencentTraveler 4.0)",
"Mozilla/5.0 (Windows, U, Windows NT 6.1, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Macintosh, Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh, U, Intel Mac OS X 10_6_8, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Linux, U, Android 3.0, en-us, Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
"Mozilla/5.0 (iPad, U, CPU OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, Trident/4.0, SE 2.X MetaSr 1.0, SE 2.X MetaSr 1.0, .NET CLR 2.0.50727, SE 2.X MetaSr 1.0)",
"Mozilla/5.0 (iPhone, U, CPU iPhone OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"MQQBrowser/26 Mozilla/5.0 (Linux, U, Android 2.3.7, zh-cn, MB200 Build/GRJ22, CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"}
//随机数
var r = rand.New(rand.NewSource(time.Now().UnixNano()))
//正则表达式
var atagRegExp = regexp.MustCompile(`<a[^>]+[(href)|(HREF)]\s*\t*\n*=\s*\t*\n*[(".+")|('.+')][^>]*>[^<]*</a>`)
var hrefRegExp = regexp.MustCompile(`href="(.+)"\s*\t*\n*`)
func main() {
runtime.GOMAXPROCS(4)
c := make(chan int, 1000)
var k = 0
for i := 0; i < 3000; i++ {
go GetContent(c)
k += <-c
}
fmt.Println(k)
time.Sleep(5 * time.Second)
}
func GetContent(c chan int) int {
lenth := 0
url := "http://www.trade.com/trade.php"
req, _ := http.NewRequest("GET", url, nil)
req.Header.Set("User-Agent", GetRandomUserAgent())
client := http.DefaultClient
res, e := client.Do(req)
if e != nil {
fmt.Errorf("Get请求%s返回错误:%s", url, e)
return 0
}
if res.StatusCode == 200 {
body := res.Body
defer body.Close()
bodyBety, _ := ioutil.ReadAll(body)
resStr := string(bodyBety)
atag := atagRegExp.FindAllStringIndex(resStr, -1)
fmt.Println(len(resStr))
for _, a := range atag {
hrefStr := hrefRegExp.FindString(a)
fmt.Println(hrefStr)
}
}
c <- lenth
return 1
}
func GetRandomUserAgent() string {
return userAgent[r.Intn(len(userAgent))] //范围内随机数
}
import (
"fmt"
"io/ioutil"
"math/rand"
"net/http"
"regexp"
"runtime"
"time"
)
var userAgent = []string{"Mozilla/5.0 (compatible, MSIE 10.0, Windows NT, DigExt)",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, 360SE)",
"Mozilla/4.0 (compatible, MSIE 8.0, Windows NT 6.0, Trident/4.0)",
"Mozilla/5.0 (compatible, MSIE 9.0, Windows NT 6.1, Trident/5.0,",
"Opera/9.80 (Windows NT 6.1, U, en) Presto/2.8.131 Version/11.11",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, TencentTraveler 4.0)",
"Mozilla/5.0 (Windows, U, Windows NT 6.1, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Macintosh, Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh, U, Intel Mac OS X 10_6_8, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Linux, U, Android 3.0, en-us, Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
"Mozilla/5.0 (iPad, U, CPU OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, Trident/4.0, SE 2.X MetaSr 1.0, SE 2.X MetaSr 1.0, .NET CLR 2.0.50727, SE 2.X MetaSr 1.0)",
"Mozilla/5.0 (iPhone, U, CPU iPhone OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"MQQBrowser/26 Mozilla/5.0 (Linux, U, Android 2.3.7, zh-cn, MB200 Build/GRJ22, CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"}
//随机数
var r = rand.New(rand.NewSource(time.Now().UnixNano()))
//正则表达式
var atagRegExp = regexp.MustCompile(`<a[^>]+[(href)|(HREF)]\s*\t*\n*=\s*\t*\n*[(".+")|('.+')][^>]*>[^<]*</a>`)
var hrefRegExp = regexp.MustCompile(`href="(.+)"\s*\t*\n*`)
func main() {
runtime.GOMAXPROCS(4)
c := make(chan int, 1000)
var k = 0
for i := 0; i < 3000; i++ {
go GetContent(c)
k += <-c
}
fmt.Println(k)
time.Sleep(5 * time.Second)
}
func GetContent(c chan int) int {
lenth := 0
url := "http://www.trade.com/trade.php"
req, _ := http.NewRequest("GET", url, nil)
req.Header.Set("User-Agent", GetRandomUserAgent())
client := http.DefaultClient
res, e := client.Do(req)
if e != nil {
fmt.Errorf("Get请求%s返回错误:%s", url, e)
return 0
}
if res.StatusCode == 200 {
body := res.Body
defer body.Close()
bodyBety, _ := ioutil.ReadAll(body)
resStr := string(bodyBety)
atag := atagRegExp.FindAllStringIndex(resStr, -1)
fmt.Println(len(resStr))
for _, a := range atag {
hrefStr := hrefRegExp.FindString(a)
fmt.Println(hrefStr)
}
}
c <- lenth
return 1
}
func GetRandomUserAgent() string {
return userAgent[r.Intn(len(userAgent))] //范围内随机数
}
阅读全文
0 0
- go写的爬虫小程序
- 用python写了个小的爬虫程序
- 使用beautifulsoup写的第一个小爬虫程序
- 用Go写了一个可以用来访问goolge类似Proxy的小程序
- 用c/c++语言写的一个小的“爬虫”程序学习过程总结
- 关于Python3爬虫之写为朋友设计签名的小程序
- Python写的deviantArt小爬虫
- GO语言的TCP小程序
- Java写的爬虫的基本程序
- Java写的爬虫的基本程序
- Java写的爬虫的基本程序
- Java写的爬虫的基本程序
- 一个C#写的爬虫程序
- 一个C#写的爬虫程序
- 两个php写的爬虫程序
- Python写的一个爬虫程序
- Python写的网络爬虫程序
- selenium2java写一个小小的爬虫程序
- plsql无法登录,提示:监听程序: 所有适用例程都无法建立新连接
- 微信 for win10有必要更新吗?
- 第一周9.16
- spring boot学习笔记(三):controller用法及数据库操作
- ICPC2017南宁邀请赛1005&&HDU6197 (模拟
- go写的爬虫小程序
- 编写程序数一下 1到 100 的所有整数中出现多少次数字9
- android ble 开发的各种坑
- Windows下动过批处理指令在浏览器中打开指定文件中的URL地址
- 二进制存图至SqlServer
- Android 绘图Shader之BitmapShader
- iOS 11正式版即将来袭,这25个新功能你知道吗?
- Windows学习(012)--HOOK系列知识点
- 《android开发艺术探索笔记》Part7、Android动画深入分析