scrapy pipeline 同步和异步写入数据库

来源:互联网 发布:淘宝店被投诉知识产权 编辑:程序博客网 时间:2024/05/16 04:32
# 异步写入mysql数据库
from twisted.enterprise import  adbapi
from MySQLdb import cursors

class MysqlTwistedPipeline(object):
    #这个函数会自动调用
    @classmethod
    def from_settings(cls,settings):
        db_params = dict(
            host=settings["MYSQL_HOST"],
            port=settings["MYSQL_PORT"],
            user=settings["MYSQL_USER"],
            passwd=settings["MYSQL_PASSWD"],
            charset=settings["MYSQL_CHARSET"],
            db=settings["MYSQL_DBNAME"],
            use_unicode=True,
            cursorclass=cursors.DictCursor
        )
        dbpool = adbapi.ConnectionPool('MySQLdb',**db_params)

        return cls(dbpool)
    def __init__(self,dbpool):

        self.dbpool = dbpool

    def process_item(self,item,spider):

        query = self.dbpool.runInteraction(self.do_insert,item)
        query.addErrback(self.handle_error,item,spider)

    def handle_error(self,failure,item,spider):

        print failure
    def do_insert(self,cursor,item):
        sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'

        cursor.execute(sql, (item["title"], item["blog_url"], item["img_src"][0], item["blog_date"], item["tags"], item["like_count"],item["comment_count"], item["bookmark_count"], item["img_path"]))

from twisted.enterprise import adbapi
from MySQLdb import cursors
class MysqlTwistedSavePipeline(object):
    @classmethod
    def from_settings(cls,settings):
        db_params = dict(
            host = settings["MYSQL_HOST"],
            db = settings["MYSQL_DBNAME"],
            port = settings["MYSQL_PORT"],
            user = settings["MYSQL_USER"],
            passwd = settings["MYSQL_PASSWD"],
            charset = settings["MYSQL_CHARSET"],
            use_unicode = True,
            cursorclass = cursors.DictCursor
        )
        dbpool = adbapi.ConnectionPool('MySQLdb',**db_params)

        return cls(dbpool)

    def __init__(self,dbpool):
        self.dbpool = dbpool

    def process_item(self,item,spider):

        query = self.dbpool.runInteraction(self.do_insert,item)
        query.addErrback(self.handle_error,item,spider)

    def handle_error(self,failure,item,spider):

        print failure

    def do_insert(self,cursor,item):

        sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        cursor.execute(sql, (item["title"], item["blog_url"], item["img_src"][0], item["blog_date"], item["tags"], item["like_count"],item["comment_count"], item["bookmark_count"], item["img_path"]))




# 将item写入数据库
# 小数据可以使用同步写入

import MySQLdb
class MysqlPipeine(object):
    def __init__(self):

        self.conn = MySQLdb.connect(
            host = 'localhost',
            # http默认端口号:80
            # https默认端口号443
            # mysql默认端口号3306
            # flask端口:5000
            # django端口:8000
            port = 3306,
            user = 'root',
            passwd = '123456',
            db = 'jobbole',
            use_unicode = True,
            charset = 'utf8'
        )
        self.cursor = self.conn.cursor()
    # 处理item的函数
    def process_item(self,item,spider):

        # 准备sql语句
        sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        self.cursor.execute(sql,(item["title"],item["blog_url"],item["img_src"][0],item["blog_date"],item["tags"],item["like_count"],item["comment_count"],item["bookmark_count"],item["img_path"]))

        self.conn.commit()

    def close_spider(self,spider):
        self.cursor.close()
        self.conn.close()



原创粉丝点击