使用golang抓取京东商品数据
来源:互联网 发布:郁乎苍苍的乎 编辑:程序博客网 时间:2024/04/30 11:38
package mainimport ("encoding/json""fmt""io/ioutil""net/http""os"//"reflect""errors""regexp""strconv""strings"//"unicode/utf8""database/sql"_ "github.com/go-sql-driver/mysql")type JdProduct struct {skuid stringname stringskuidkey stringhref stringsrc stringcat []int64catName []stringbrand stringpType stringvenderId stringshopId stringspecialAttrs stringprice string}/*type JdPrice struct {id string `json: "id"`p string `json: "p"`m string `json: "m"`}*/const (offTheShelf = "SaleNo")var db *sql.DBvar productSkuidFetch = regexp.MustCompile(`[[:space:]]*skuid:[[:space:]]+(?P<skuid>[0-9]+),`)var productNameFetch = regexp.MustCompile(`[[:space:]]*name:[[:space:]]+\'(?P<name>.*)\',`)var productSkuidkeyFetch = regexp.MustCompile(`[[:space:]]*skuidkey:[[:space:]]*\'(?P<skuidkey>.*)\',`)var productHrefFetch = regexp.MustCompile(`[[:space:]]*href:[[:space:]]+\'(?P<href>.*)\',`)var productSrcFetch = regexp.MustCompile(`[[:space:]]*src:[[:space:]]+\'(?P<src>.*)\',`)var productCatFetch = regexp.MustCompile(`[[:space:]]*cat:[[:space:]]+\[(?P<cat>.*)\],`)var productCatNameFetch = regexp.MustCompile(`[[:space:]]*catName:[[:space:]]+\[(?P<catname>.*)\],`)var productBrandFetch = regexp.MustCompile(`[[:space:]]*brand:[[:space:]]+(?P<brand>[0-9]+),`)var productPTypeFetch = regexp.MustCompile(`[[:space:]]*pType:[[:space:]]+(?P<ptype>[0-9]+),`)var productVenderIdFetch = regexp.MustCompile(`[[:space:]]*venderId:[[:space:]]*(?P<venderId>[0-9]+),`)var productShopIdFetch = regexp.MustCompile(`[[:space:]]*shopId:[[:space:]]*\'(?P<shopId>[0-9]+)\',`)var productSpecialAttrsFetch = regexp.MustCompile(`[[:space:]]*specialAttrs:[[:space:]]*\[(?P<specialAttrs>.*)\],`)func connectDB() (db *sql.DB, err error) {db, err = sql.Open("mysql", "root:leeweop@/jd")if err != nil {panic(err.Error())return nil, errors.New("Connect to db failed")}return db, nil}func createDatabase(db *sql.DB, name string) error {_, err := db.Exec("CREATE DATABASE IF NOT EXISTS jd DEFAULT CHARSET utf8 COLLATE utf8_general_ci")if err != nil {panic(err.Error())return errors.New("Create database failed")}return nil}func createTable(db *sql.DB, name string) error {command := "CREATE TABLE IF NOT EXISTS " + name + " (skuid BIGINT(64) NOT NULL PRIMARY KEY, name VARCHAR(256) NOT NULL, skuidkey VARCHAR(64) NOT NULL, href VARCHAR(128) NOT NULL, src VARCHAR(128) NOT NULL, cat1 INT(32) NOT NULL, cat2 INT(32) NOT NULL, cat3 INT(32) NOT NULL, brand VARCHAR(128) NOT NULL, pType INT(32) NOT NULL, venderId VARCHAR(64) NOT NULL, shopId VARCHAR(64) NOT NULL, specialAttrs VARCHAR(256) NULL, price DOUBLE NOT NULL)"fmt.Println(command)_, err := db.Exec(command)if err != nil {panic(err.Error())return errors.New("Create table failed")}return nil}func insertIntoDB(db *sql.DB, product *JdProduct) error {stmt, err := db.Prepare("INSERT INTO test (skuid, name, skuidkey, href, src, cat1, cat2, cat3, brand, pType, venderId, shopId, specialAttrs, price)values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")defer stmt.Close()if err != nil {panic(err.Error())return errors.New("Command error")}_, err = stmt.Exec(product.skuid, product.name, product.skuidkey, product.href, product.src, product.cat[0], product.cat[1], product.cat[2], product.brand, product.pType, product.venderId, product.shopId, product.specialAttrs, product.price)if err != nil {panic(err.Error())return errors.New("Excute command error")}return nil}func dumpDatabase(db *sql.DB) {_, err := db.Exec("show databases")if err != nil {panic(err.Error())}var p JdProductp.cat = make([]int64, 3)rows, err := db.Query("select * from test")for rows.Next() {rows.Scan(&p.skuid, &p.name, &p.skuidkey, &p.href, &p.src, &p.cat[0], &p.cat[1], &p.cat[2], &p.brand, &p.pType, &p.venderId, &p.shopId, &p.specialAttrs, &p.price)}fmt.Printf("%+v\n", p)}func flushDatabase(db *sql.DB) {_, _ = db.Exec("drop table test")}/*func init() {db, err := connectDB()if err != nil {panic(err.Error())}flushDatabase(db)createDatabase(db, "jd")createTable(db, "test")//dumpDatabase(db)//db.Close()}*/func main() {//for i := 260000; i < 2611111; i++ {//u := fmt.Sprintf("http://item.jd.com/%d.html", i)//fmt.Println(u)db, err := connectDB()if err != nil {panic(err.Error())}flushDatabase(db)createDatabase(db, "jd")createTable(db, "test")resp, err := http.Get("http://item.jd.com/2788767.html")//resp, err := http.Get(u)if err != nil {fmt.Println(err.Error())}defer resp.Body.Close()body, err := ioutil.ReadAll(resp.Body)if err != nil {panic(err.Error())}var product JdProductif productSkuidFetch.MatchString(string(body)) {fmt.Println(productSkuidFetch.SubexpNames()[1])fmt.Println(productSkuidFetch.FindStringSubmatch(string(body))[1])product.skuid = productSkuidFetch.FindStringSubmatch(string(body))[1]}if productNameFetch.MatchString(string(body)) {fmt.Println(productNameFetch.SubexpNames()[1])fmt.Println(productNameFetch.FindStringSubmatch(string(body))[1])product.name = productNameFetch.FindStringSubmatch(string(body))[1]}if productSkuidkeyFetch.MatchString(string(body)) {fmt.Println(productSkuidkeyFetch.SubexpNames()[1])fmt.Println(productSkuidkeyFetch.FindStringSubmatch(string(body))[1])product.skuidkey = productSkuidkeyFetch.FindStringSubmatch(string(body))[1]}if productHrefFetch.MatchString(string(body)) {fmt.Println(productHrefFetch.SubexpNames()[1])fmt.Println(productHrefFetch.FindStringSubmatch(string(body))[1])product.href = productHrefFetch.FindStringSubmatch(string(body))[1]}if productSrcFetch.MatchString(string(body)) {fmt.Println(productSrcFetch.SubexpNames()[1])fmt.Println(productSrcFetch.FindStringSubmatch(string(body))[1])product.src = productSrcFetch.FindStringSubmatch(string(body))[1]}if productCatFetch.MatchString(string(body)) {fmt.Println(productCatFetch.SubexpNames()[1])fmt.Println(productCatFetch.FindStringSubmatch(string(body))[1])//product.cat = productCatFetch.FindStringSubmatch(string(body))[1]s := strings.Split(productCatFetch.FindStringSubmatch(string(body))[1], ",")for _, val := range s {//fmt.Println(val)v, _ := strconv.ParseInt(val, 10, 32)//fmt.Printf("%+c\n", v)product.cat = append(product.cat, v)}}if productCatNameFetch.MatchString(string(body)) {fmt.Println(productCatNameFetch.SubexpNames()[1])fmt.Println(productCatNameFetch.FindStringSubmatch(string(body))[1])//product.catName = productCatNameFetch.FindStringSubmatch(string(body))[1]s := strings.Split(productCatNameFetch.FindStringSubmatch(string(body))[1], ",")for _, val := range s {product.catName = append(product.catName, val)}}if productBrandFetch.MatchString(string(body)) {fmt.Println(productBrandFetch.SubexpNames()[1])fmt.Println(productBrandFetch.FindStringSubmatch(string(body))[1])product.brand = productBrandFetch.FindStringSubmatch(string(body))[1]}if productPTypeFetch.MatchString(string(body)) {fmt.Println(productPTypeFetch.SubexpNames()[1])fmt.Println(productPTypeFetch.FindStringSubmatch(string(body))[1])product.pType = productPTypeFetch.FindStringSubmatch(string(body))[1]}if productVenderIdFetch.MatchString(string(body)) {fmt.Println(productVenderIdFetch.SubexpNames()[1])fmt.Println(productVenderIdFetch.FindStringSubmatch(string(body))[1])product.venderId = productVenderIdFetch.FindStringSubmatch(string(body))[1]}if productShopIdFetch.MatchString(string(body)) {fmt.Println(productShopIdFetch.SubexpNames()[1])fmt.Println(productShopIdFetch.FindStringSubmatch(string(body))[1])product.shopId = productShopIdFetch.FindStringSubmatch(string(body))[1]}if productSpecialAttrsFetch.MatchString(string(body)) {fmt.Println(productSpecialAttrsFetch.SubexpNames()[1])fmt.Println(productSpecialAttrsFetch.FindStringSubmatch(string(body))[1])product.specialAttrs = productSpecialAttrsFetch.FindStringSubmatch(string(body))[1]if strings.Contains(product.specialAttrs, offTheShelf) {//continuefmt.Println("")}}url := "http://p.3.cn/prices/mgets?skuIds=J_" + product.skuid + "&type=" + product.pTypefmt.Println(url)r, err := http.Get(url)if err != nil {panic(err.Error())}p, err := ioutil.ReadAll(r.Body)if err != nil {panic(err.Error())}fmt.Println(string(p))m := make([]map[string]interface{}, 10)e := json.Unmarshal([]byte(p), &m)if e != nil {panic(e.Error())}if val, ok := m[0]["p"].(string); ok {product.price = val}var name strings := strings.Split(product.name, "\\u")for _, val := range s {//fmt.Println(val)v, _ := strconv.ParseInt(val, 16, 32)//fmt.Printf("%+c\n", v)name += fmt.Sprintf("%c", v)}fmt.Println(name)product.name = name//fmt.Printf("%+v\n", product)insertIntoDB(db, &product)dumpDatabase(db)file, err := os.Create("jd.html")if err != nil {panic(err.Error())}_, err = file.Write(body)if err != nil {panic(err.Error())}//}//fmt.Println(n, " byte has been write to jd.html")}
1 0
- 使用golang抓取京东商品数据
- 使用golang抓取京东全部商品分类信息
- 京东商品数据抓取
- 使用golang从雅虎抓取股票交易数据
- 蘑菇街商品数据抓取
- goLang 多线程抓取网页数据
- PYTHON抓取当当网商品数据
- CasperJS 抓取京东商品详情页中的商品信息
- [爬虫]使用python抓取京东全站数据(商品,店铺,分类,评论)
- golang 使用 goquery 抓取 知乎周刊
- 抓取京东评论数据
- 求助:关于淘宝商品详情数据的抓取问题
- java爬虫抓取天猫商品的价格数据
- python淘宝爬虫基于requests抓取淘宝商品数据
- 京东商品及评论 数据采集
- 使用Jsoup抓取数据
- 使用Selenium模拟浏览器抓取淘宝商品美食信息
- 京东数据抓取-抓取所有图书名称
- 最全整理浏览器兼容性问题与解决方案
- Android技巧之封装Log工具类
- iOS开发网络篇—数据缓存
- android蓝牙开发---与蓝牙模块进行通信
- PhalGo-介绍 PhalGo-介绍
- 使用golang抓取京东商品数据
- MT4中Dll调用详解
- ActiveMQ使用步骤--- 开发测试环境搭建及Java用例
- Android控件系列之RadioButton&RadioGroup
- 虚幻4开发环境安装
- 手机滚屏页面下拉自动加载内容
- Android自定义ViewGroup(四、打造自己的布局容器)
- viewpager fragment
- 为Android Studio2.1.1增加注释函数