Python3 BeautifulSoup pymysql
来源:互联网 发布:淘宝雪花代码在线生成 编辑:程序博客网 时间:2024/05/22 15:44
import http.cookiejarimport loggingimport timeimport urllib.parseimport urllib.requestimport pymysqlimport xlrdfrom bs4 import BeautifulSoupclass SalesSpider(): def __init__(self, userId, passWord): FORMAT = time.strftime("[%Y-%m-%d %H:%M:%S]", time.localtime()) + "[SalesSpider]-----%(message)s------" logging.basicConfig(level=logging.INFO, format=FORMAT ,filemode="C:\\Users\\liyang\\Desktop\\salespider.log") self.userId = userId self.passWord = passWord cj = http.cookiejar.LWPCookieJar() self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(self.opener) def request(self , url , postData): response = None if postData is None: response = urllib.request.urlopen(url).read() else: response = urllib.request.urlopen(url, postData).read() js = eval(response.decode('utf-8'), type('Dummy', (dict,), dict(__getitem__=lambda s, n: n))()) logging.info("url["+url+ "]="+str(js)) return js def login(self): params = { "userId": self.userId, "password": self.passWord, } loginUrl = "" postData = urllib.parse.urlencode(params).encode(encoding='UTF8') html = self.opener.open(loginUrl, postData).read() soup = BeautifulSoup(html, "lxml") if "" == soup.title.string : logging.info("login success!") return True else: logging.info("login failed!") return False def submit(self , times , recordid): if self.findFromDB(recordid): return logging.info("No." + str(times) + ",key="+recordid) #50392,50396,50397,50015,50021,50394,50395,50009,50790 params = { "ObjSelect": "50392,50396,50397,50015,50021,50394,50395,50009,50790", "viewMonth":"2017-05-01#2017-07-01", "viewDay":"2017-06-18#2017-07-18", "Slt_Latn_Id":"", "paramSetValue": [ {"AtomId": 50007, "AtomValue": recordid, "TimeWindow": "2017-05-01#2017-05-01", "maxDataDt": "", "atomcopyid": "0", "operatorId": "0", "operatorName": "\u9ed8\u8ba4", "grpId": "", "grpPath": "", "showTypeId": "", "id": ""}] } jobUrl = "" postData = urllib.parse.urlencode(params).encode(encoding='UTF8') job = self.request(jobUrl , postData) if "true" == job["success"]: jobId = str(job["queryJobId"]) headerUrl = "" desc = self.request(headerUrl , None) saleDataUrl = "" saleData = self.request(saleDataUrl , None) if "0" != saleData["total"] : self.insertIntoDB(recordid , str(saleData)) def loginAndQuery(self): if not self.login(): return n = len(self.queryKeys) i = 0 while i < n : try: self.submit(i+1 , self.queryKeys[i]) except: logging.info("retry login 2 times!") reTry = 2 while reTry > 0: reTry -= 1 try: self.login() #重新登录 self.submit(i+1 , self.queryKeys[i]) break except: logging.info("reTry login " + str(2-reTry) +" time also is failed!") i += 1 def getQueryKeysFromXls(self , path , sheetName): workbook = xlrd.open_workbook(path) worksheets = workbook.sheet_names() buy = workbook.sheet_by_name(sheetName) num_rows = buy.nrows values = [] for curr_row in range(num_rows): row = buy.row_values(curr_row) values.append(row[0]) self.queryKeys = values def findFromDB(self, recordid): db = pymysql.connect(host='localhost', user='root', passwd='root', db='test1', port=3306, charset='utf8') cursor = db.cursor() sql = ' select * from sales where recordid = %s ' cursor.execute(sql, (recordid)) db.commit() cursor.close() db.close() return cursor.fetchone() is not None def insertIntoDB(self, recordid, js): db = pymysql.connect(host='localhost', user='root', passwd='root', db='test1', port=3306, charset='utf8') cursor = db.cursor() sql = " insert into sales(recordid , js) values(%s , %s) " cursor.execute(sql, (recordid , js)) db.commit() cursor.close() db.close()if __name__ == '__main__': spider = SalesSpider("", "") logging.info("spider is begin!") spider.getQueryKeysFromXls("C:\\Users\\liyang\\Desktop\\buy.xls" , "buy") spider.loginAndQuery() logging.info("spider is end!")
阅读全文
0 0
- Python3 BeautifulSoup pymysql
- Python3,pymysql
- python3+pymysql
- python3 :pymysql
- BeautifulSoup Python3
- python3配置pyMysql
- Python3 pymysql 操作笔记
- python3 pymysql模块
- Python3.x连接Pymysql
- python3.6 安装pymysql
- Python3 pymysql连接MySQL数据库
- python3 安装mysql库 pymysql
- python3 安装pymysql连接模块
- python3.6安装tensorflow、pymysql
- python3 使用 pymysql操作mysql
- Python3 win7安装BeautifulSoup
- Python3 win7安装BeautifulSoup
- python3.5安装BeautifulSoup
- scp
- PCB新手养成日记
- 集合类Map ConcurrentHashMap Collections.synchronizedMap()
- 如何利用Open Live Writer发布CSDN博客
- ExceptionUtil
- Python3 BeautifulSoup pymysql
- NOIP C++ 生涯
- pythonGui 对话框
- facebook登录和分享
- LeetCode 125:Valid Palindrome(c++)
- 2017技术书单
- 【LeetCode】5. Longest Palindromic Substring
- 实现"输入框"获得焦点时外边框变蓝
- 【面试题】剑指offer12--打印1到最大的n位数