使用golang抓取京东商品数据

来源:互联网 发布:郁乎苍苍的乎 编辑:程序博客网 时间:2024/04/30 11:38
package mainimport ("encoding/json""fmt""io/ioutil""net/http""os"//"reflect""errors""regexp""strconv""strings"//"unicode/utf8""database/sql"_ "github.com/go-sql-driver/mysql")type JdProduct struct {skuid        stringname         stringskuidkey     stringhref         stringsrc          stringcat          []int64catName      []stringbrand        stringpType        stringvenderId     stringshopId       stringspecialAttrs stringprice        string}/*type JdPrice struct {id string `json: "id"`p  string `json: "p"`m  string `json: "m"`}*/const (offTheShelf = "SaleNo")var db *sql.DBvar productSkuidFetch = regexp.MustCompile(`[[:space:]]*skuid:[[:space:]]+(?P<skuid>[0-9]+),`)var productNameFetch = regexp.MustCompile(`[[:space:]]*name:[[:space:]]+\'(?P<name>.*)\',`)var productSkuidkeyFetch = regexp.MustCompile(`[[:space:]]*skuidkey:[[:space:]]*\'(?P<skuidkey>.*)\',`)var productHrefFetch = regexp.MustCompile(`[[:space:]]*href:[[:space:]]+\'(?P<href>.*)\',`)var productSrcFetch = regexp.MustCompile(`[[:space:]]*src:[[:space:]]+\'(?P<src>.*)\',`)var productCatFetch = regexp.MustCompile(`[[:space:]]*cat:[[:space:]]+\[(?P<cat>.*)\],`)var productCatNameFetch = regexp.MustCompile(`[[:space:]]*catName:[[:space:]]+\[(?P<catname>.*)\],`)var productBrandFetch = regexp.MustCompile(`[[:space:]]*brand:[[:space:]]+(?P<brand>[0-9]+),`)var productPTypeFetch = regexp.MustCompile(`[[:space:]]*pType:[[:space:]]+(?P<ptype>[0-9]+),`)var productVenderIdFetch = regexp.MustCompile(`[[:space:]]*venderId:[[:space:]]*(?P<venderId>[0-9]+),`)var productShopIdFetch = regexp.MustCompile(`[[:space:]]*shopId:[[:space:]]*\'(?P<shopId>[0-9]+)\',`)var productSpecialAttrsFetch = regexp.MustCompile(`[[:space:]]*specialAttrs:[[:space:]]*\[(?P<specialAttrs>.*)\],`)func connectDB() (db *sql.DB, err error) {db, err = sql.Open("mysql", "root:leeweop@/jd")if err != nil {panic(err.Error())return nil, errors.New("Connect to db failed")}return db, nil}func createDatabase(db *sql.DB, name string) error {_, err := db.Exec("CREATE DATABASE IF NOT EXISTS jd DEFAULT CHARSET utf8 COLLATE utf8_general_ci")if err != nil {panic(err.Error())return errors.New("Create database failed")}return nil}func createTable(db *sql.DB, name string) error {command := "CREATE TABLE IF NOT EXISTS " + name + " (skuid BIGINT(64) NOT NULL PRIMARY KEY, name VARCHAR(256) NOT NULL, skuidkey VARCHAR(64) NOT NULL, href VARCHAR(128) NOT NULL, src VARCHAR(128) NOT NULL, cat1 INT(32) NOT NULL, cat2 INT(32) NOT NULL, cat3 INT(32) NOT NULL, brand VARCHAR(128) NOT NULL, pType INT(32) NOT NULL, venderId VARCHAR(64) NOT NULL, shopId VARCHAR(64) NOT NULL, specialAttrs VARCHAR(256) NULL, price DOUBLE NOT NULL)"fmt.Println(command)_, err := db.Exec(command)if err != nil {panic(err.Error())return errors.New("Create table failed")}return nil}func insertIntoDB(db *sql.DB, product *JdProduct) error {stmt, err := db.Prepare("INSERT INTO test (skuid, name, skuidkey, href, src, cat1, cat2, cat3, brand, pType, venderId, shopId, specialAttrs, price)values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")defer stmt.Close()if err != nil {panic(err.Error())return errors.New("Command error")}_, err = stmt.Exec(product.skuid, product.name, product.skuidkey, product.href, product.src, product.cat[0], product.cat[1], product.cat[2], product.brand, product.pType, product.venderId, product.shopId, product.specialAttrs, product.price)if err != nil {panic(err.Error())return errors.New("Excute command error")}return nil}func dumpDatabase(db *sql.DB) {_, err := db.Exec("show databases")if err != nil {panic(err.Error())}var p JdProductp.cat = make([]int64, 3)rows, err := db.Query("select * from test")for rows.Next() {rows.Scan(&p.skuid, &p.name, &p.skuidkey, &p.href, &p.src, &p.cat[0], &p.cat[1], &p.cat[2], &p.brand, &p.pType, &p.venderId, &p.shopId, &p.specialAttrs, &p.price)}fmt.Printf("%+v\n", p)}func flushDatabase(db *sql.DB) {_, _ = db.Exec("drop table test")}/*func init() {db, err := connectDB()if err != nil {panic(err.Error())}flushDatabase(db)createDatabase(db, "jd")createTable(db, "test")//dumpDatabase(db)//db.Close()}*/func main() {//for i := 260000; i < 2611111; i++ {//u := fmt.Sprintf("http://item.jd.com/%d.html", i)//fmt.Println(u)db, err := connectDB()if err != nil {panic(err.Error())}flushDatabase(db)createDatabase(db, "jd")createTable(db, "test")resp, err := http.Get("http://item.jd.com/2788767.html")//resp, err := http.Get(u)if err != nil {fmt.Println(err.Error())}defer resp.Body.Close()body, err := ioutil.ReadAll(resp.Body)if err != nil {panic(err.Error())}var product JdProductif productSkuidFetch.MatchString(string(body)) {fmt.Println(productSkuidFetch.SubexpNames()[1])fmt.Println(productSkuidFetch.FindStringSubmatch(string(body))[1])product.skuid = productSkuidFetch.FindStringSubmatch(string(body))[1]}if productNameFetch.MatchString(string(body)) {fmt.Println(productNameFetch.SubexpNames()[1])fmt.Println(productNameFetch.FindStringSubmatch(string(body))[1])product.name = productNameFetch.FindStringSubmatch(string(body))[1]}if productSkuidkeyFetch.MatchString(string(body)) {fmt.Println(productSkuidkeyFetch.SubexpNames()[1])fmt.Println(productSkuidkeyFetch.FindStringSubmatch(string(body))[1])product.skuidkey = productSkuidkeyFetch.FindStringSubmatch(string(body))[1]}if productHrefFetch.MatchString(string(body)) {fmt.Println(productHrefFetch.SubexpNames()[1])fmt.Println(productHrefFetch.FindStringSubmatch(string(body))[1])product.href = productHrefFetch.FindStringSubmatch(string(body))[1]}if productSrcFetch.MatchString(string(body)) {fmt.Println(productSrcFetch.SubexpNames()[1])fmt.Println(productSrcFetch.FindStringSubmatch(string(body))[1])product.src = productSrcFetch.FindStringSubmatch(string(body))[1]}if productCatFetch.MatchString(string(body)) {fmt.Println(productCatFetch.SubexpNames()[1])fmt.Println(productCatFetch.FindStringSubmatch(string(body))[1])//product.cat = productCatFetch.FindStringSubmatch(string(body))[1]s := strings.Split(productCatFetch.FindStringSubmatch(string(body))[1], ",")for _, val := range s {//fmt.Println(val)v, _ := strconv.ParseInt(val, 10, 32)//fmt.Printf("%+c\n", v)product.cat = append(product.cat, v)}}if productCatNameFetch.MatchString(string(body)) {fmt.Println(productCatNameFetch.SubexpNames()[1])fmt.Println(productCatNameFetch.FindStringSubmatch(string(body))[1])//product.catName = productCatNameFetch.FindStringSubmatch(string(body))[1]s := strings.Split(productCatNameFetch.FindStringSubmatch(string(body))[1], ",")for _, val := range s {product.catName = append(product.catName, val)}}if productBrandFetch.MatchString(string(body)) {fmt.Println(productBrandFetch.SubexpNames()[1])fmt.Println(productBrandFetch.FindStringSubmatch(string(body))[1])product.brand = productBrandFetch.FindStringSubmatch(string(body))[1]}if productPTypeFetch.MatchString(string(body)) {fmt.Println(productPTypeFetch.SubexpNames()[1])fmt.Println(productPTypeFetch.FindStringSubmatch(string(body))[1])product.pType = productPTypeFetch.FindStringSubmatch(string(body))[1]}if productVenderIdFetch.MatchString(string(body)) {fmt.Println(productVenderIdFetch.SubexpNames()[1])fmt.Println(productVenderIdFetch.FindStringSubmatch(string(body))[1])product.venderId = productVenderIdFetch.FindStringSubmatch(string(body))[1]}if productShopIdFetch.MatchString(string(body)) {fmt.Println(productShopIdFetch.SubexpNames()[1])fmt.Println(productShopIdFetch.FindStringSubmatch(string(body))[1])product.shopId = productShopIdFetch.FindStringSubmatch(string(body))[1]}if productSpecialAttrsFetch.MatchString(string(body)) {fmt.Println(productSpecialAttrsFetch.SubexpNames()[1])fmt.Println(productSpecialAttrsFetch.FindStringSubmatch(string(body))[1])product.specialAttrs = productSpecialAttrsFetch.FindStringSubmatch(string(body))[1]if strings.Contains(product.specialAttrs, offTheShelf) {//continuefmt.Println("")}}url := "http://p.3.cn/prices/mgets?skuIds=J_" + product.skuid + "&type=" + product.pTypefmt.Println(url)r, err := http.Get(url)if err != nil {panic(err.Error())}p, err := ioutil.ReadAll(r.Body)if err != nil {panic(err.Error())}fmt.Println(string(p))m := make([]map[string]interface{}, 10)e := json.Unmarshal([]byte(p), &m)if e != nil {panic(e.Error())}if val, ok := m[0]["p"].(string); ok {product.price = val}var name strings := strings.Split(product.name, "\\u")for _, val := range s {//fmt.Println(val)v, _ := strconv.ParseInt(val, 16, 32)//fmt.Printf("%+c\n", v)name += fmt.Sprintf("%c", v)}fmt.Println(name)product.name = name//fmt.Printf("%+v\n", product)insertIntoDB(db, &product)dumpDatabase(db)file, err := os.Create("jd.html")if err != nil {panic(err.Error())}_, err = file.Write(body)if err != nil {panic(err.Error())}//}//fmt.Println(n, " byte has been write to jd.html")}


1 0
原创粉丝点击