scrapy pipeline 同步和异步写入数据库
来源:互联网 发布:淘宝店被投诉知识产权 编辑:程序博客网 时间:2024/05/16 04:32
# 异步写入mysql数据库
from twisted.enterprise import adbapi
from MySQLdb import cursors
class MysqlTwistedPipeline(object):
#这个函数会自动调用
@classmethod
def from_settings(cls,settings):
db_params = dict(
host=settings["MYSQL_HOST"],
port=settings["MYSQL_PORT"],
user=settings["MYSQL_USER"],
passwd=settings["MYSQL_PASSWD"],
charset=settings["MYSQL_CHARSET"],
db=settings["MYSQL_DBNAME"],
use_unicode=True,
cursorclass=cursors.DictCursor
)
dbpool = adbapi.ConnectionPool('MySQLdb',**db_params)
return cls(dbpool)
def __init__(self,dbpool):
self.dbpool = dbpool
def process_item(self,item,spider):
query = self.dbpool.runInteraction(self.do_insert,item)
query.addErrback(self.handle_error,item,spider)
def handle_error(self,failure,item,spider):
print failure
def do_insert(self,cursor,item):
sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
cursor.execute(sql, (item["title"], item["blog_url"], item["img_src"][0], item["blog_date"], item["tags"], item["like_count"],item["comment_count"], item["bookmark_count"], item["img_path"]))
from twisted.enterprise import adbapi
from MySQLdb import cursors
class MysqlTwistedSavePipeline(object):
@classmethod
def from_settings(cls,settings):
db_params = dict(
host = settings["MYSQL_HOST"],
db = settings["MYSQL_DBNAME"],
port = settings["MYSQL_PORT"],
user = settings["MYSQL_USER"],
passwd = settings["MYSQL_PASSWD"],
charset = settings["MYSQL_CHARSET"],
use_unicode = True,
cursorclass = cursors.DictCursor
)
dbpool = adbapi.ConnectionPool('MySQLdb',**db_params)
return cls(dbpool)
def __init__(self,dbpool):
self.dbpool = dbpool
def process_item(self,item,spider):
query = self.dbpool.runInteraction(self.do_insert,item)
query.addErrback(self.handle_error,item,spider)
def handle_error(self,failure,item,spider):
print failure
def do_insert(self,cursor,item):
sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
cursor.execute(sql, (item["title"], item["blog_url"], item["img_src"][0], item["blog_date"], item["tags"], item["like_count"],item["comment_count"], item["bookmark_count"], item["img_path"]))
# 将item写入数据库
# 小数据可以使用同步写入
import MySQLdb
class MysqlPipeine(object):
def __init__(self):
self.conn = MySQLdb.connect(
host = 'localhost',
# http默认端口号:80
# https默认端口号443
# mysql默认端口号3306
# flask端口:5000
# django端口:8000
port = 3306,
user = 'root',
passwd = '123456',
db = 'jobbole',
use_unicode = True,
charset = 'utf8'
)
self.cursor = self.conn.cursor()
# 处理item的函数
def process_item(self,item,spider):
# 准备sql语句
sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
self.cursor.execute(sql,(item["title"],item["blog_url"],item["img_src"][0],item["blog_date"],item["tags"],item["like_count"],item["comment_count"],item["bookmark_count"],item["img_path"]))
self.conn.commit()
def close_spider(self,spider):
self.cursor.close()
self.conn.close()
from twisted.enterprise import adbapi
from MySQLdb import cursors
class MysqlTwistedPipeline(object):
#这个函数会自动调用
@classmethod
def from_settings(cls,settings):
db_params = dict(
host=settings["MYSQL_HOST"],
port=settings["MYSQL_PORT"],
user=settings["MYSQL_USER"],
passwd=settings["MYSQL_PASSWD"],
charset=settings["MYSQL_CHARSET"],
db=settings["MYSQL_DBNAME"],
use_unicode=True,
cursorclass=cursors.DictCursor
)
dbpool = adbapi.ConnectionPool('MySQLdb',**db_params)
return cls(dbpool)
def __init__(self,dbpool):
self.dbpool = dbpool
def process_item(self,item,spider):
query = self.dbpool.runInteraction(self.do_insert,item)
query.addErrback(self.handle_error,item,spider)
def handle_error(self,failure,item,spider):
print failure
def do_insert(self,cursor,item):
sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
cursor.execute(sql, (item["title"], item["blog_url"], item["img_src"][0], item["blog_date"], item["tags"], item["like_count"],item["comment_count"], item["bookmark_count"], item["img_path"]))
from twisted.enterprise import adbapi
from MySQLdb import cursors
class MysqlTwistedSavePipeline(object):
@classmethod
def from_settings(cls,settings):
db_params = dict(
host = settings["MYSQL_HOST"],
db = settings["MYSQL_DBNAME"],
port = settings["MYSQL_PORT"],
user = settings["MYSQL_USER"],
passwd = settings["MYSQL_PASSWD"],
charset = settings["MYSQL_CHARSET"],
use_unicode = True,
cursorclass = cursors.DictCursor
)
dbpool = adbapi.ConnectionPool('MySQLdb',**db_params)
return cls(dbpool)
def __init__(self,dbpool):
self.dbpool = dbpool
def process_item(self,item,spider):
query = self.dbpool.runInteraction(self.do_insert,item)
query.addErrback(self.handle_error,item,spider)
def handle_error(self,failure,item,spider):
print failure
def do_insert(self,cursor,item):
sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
cursor.execute(sql, (item["title"], item["blog_url"], item["img_src"][0], item["blog_date"], item["tags"], item["like_count"],item["comment_count"], item["bookmark_count"], item["img_path"]))
# 将item写入数据库
# 小数据可以使用同步写入
import MySQLdb
class MysqlPipeine(object):
def __init__(self):
self.conn = MySQLdb.connect(
host = 'localhost',
# http默认端口号:80
# https默认端口号443
# mysql默认端口号3306
# flask端口:5000
# django端口:8000
port = 3306,
user = 'root',
passwd = '123456',
db = 'jobbole',
use_unicode = True,
charset = 'utf8'
)
self.cursor = self.conn.cursor()
# 处理item的函数
def process_item(self,item,spider):
# 准备sql语句
sql = 'insert into bole_blogs(title,blog_url,img_src,blog_date,tags,like_count,comment_count,bookmark_count,img_path)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
self.cursor.execute(sql,(item["title"],item["blog_url"],item["img_src"][0],item["blog_date"],item["tags"],item["like_count"],item["comment_count"],item["bookmark_count"],item["img_path"]))
self.conn.commit()
def close_spider(self,spider):
self.cursor.close()
self.conn.close()
阅读全文
0 0
- scrapy pipeline 同步和异步写入数据库
- Scrapy Pipeline之与数据库交互
- Scrapy入门教程之写入数据库
- scrapy 的 item pipeline
- Scrapy框架学习(二)----Item Pipeline(管道)和Scrapy Shell
- DWR异步和同步
- 同步和异步
- 同步和异步转换
- 同步和异步
- 同步和异步
- 同步和异步
- 同步和异步学习
- 同步和异步
- 同步和异步
- 同步和异步
- java 同步和异步
- 同步和异步
- 同步和异步
- 决策树
- PHP基础3-变量和常量
- kali 安装virtualbox tools出错ERROR: UNABLE TO FIND THE SOURCES OF YOUR CURRENT LINUX KERNEL.
- 大话设计模式之工厂模式与策略模式
- java实现附件预览(openoffice+swftools+flexpaper)
- scrapy pipeline 同步和异步写入数据库
- java线程池
- 羊羊吃草
- python学习一:编译工具pycharm的下载安装
- 敏捷开发
- 野指针【C++】
- 排序算法
- 贪心算法
- Python -练习6