pyspider递归调用
来源:互联网 发布:sql server安装包下载 编辑:程序博客网 时间:2024/06/05 18:03
代码如下
def predeal_page(self, response): page_qq = int(response.doc("html").find("#form\:j_id258").val()) page_size = 1000 if response.save['isFirst']==0: page_count = int(response.doc("html").find("#form\:j_id260").val()) else: page_count = response.save['count'] print("pagecount="+str(page_count)) start_time = time.strftime('%Y%m%d',time.localtime(time.time() - 2*24*60*60)) end_time = time.strftime('%Y%m%d',time.localtime(time.time()-1*24*60*60)) print("共搜索" + str(page_count) + "条记录") actionUrl = "http://xxx.com/pages/erpt/rpt_main_report.jsf" dic={} dic["P_AUTHTAG"]={"index":1,"value":"1111","name":"1111","label":"","displayCondition":""} dic["P_DATE_TYPE"] ={"index":2,"value":"1","name":"订单时间","label":"日期类型","displayCondition":"true"} dic["P_START_DATE"] ={"index":3,"value":start_time,"name":start_time,"label":"订单开始日期","displayCondition":"true"} dic["P_END_DATE"] ={"index":4,"value":end_time,"name":end_time,"label":"订单结束日期","displayCondition":"true"} formj_id23 = str(base64.b64encode(str.encode(json.dumps(dic,ensure_ascii = False).replace(' ', ''))),encoding = "utf-8") print(formj_id23) headers = {} headers["Content-Type"]="application/x-www-form-urlencoded" headers["Origin"] = "http://report.mall.10010.com" data = {} data["AJAXREQUEST"] = "_viewRoot" data["_authKey"] = "" data["form:j_id23"] = formj_id23 data["rpt_id"] = "TRADE_004" data["form:j_id26"] = "TRADE_004" data["form:j_id27"] = "/mallrpt/pages/trade_004.xhtml" data["form:j_id258"] = str(page_size) data["form:j_id259"] = "0" data["form:j_id260"] = "0" data["form:P_BROWSER_TYPE"] = "Chrome" data["form:P_BROWSER_VERSION"] = "43.0.2357.130" data["form:exportFormat"] = "" data["form:motionFlag"] = "query" data["form_SUBMIT"] = "1" data["javax.faces.ViewState"] = "rO0ABXVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAN0AAVqX2lkMXB0ACEv cGFnZXMvZXJwdC9ycHRfbWFpbl9yZXBvcnQueGh0bWw=" #判断是否是第一次调用 if response.save['isFirst'] == 0: data["form:j_id28"] = "form:j_id28" if response.save['isFirst'] == 1: data["form:j_id259"] = str(page_size*(response.save['num']-1)) data["param1"] = "next" data["form:j_id255"] = "form:j_id255" data["form:j_id260"] = str(page_count) #循环递归调用 times = int((page_count-1)/page_size) + 1 print("times="+str(times)) if response.save['num'] < times: self.crawl(actionUrl+"?id="+str(response.save['num']),data=data,cookies=response.cookies, callback=self.detail_page,headers=headers,method="POST",priority=1) num = response.save['num'] + 1 next=actionUrl+"?id2="+str(num) self.crawl(next,data=data,cookies=response.cookies, callback=self.predeal_page,headers=headers,method="POST",save={'isFirst':1,'num':num,'count':page_count})
阅读全文
0 0
- pyspider递归调用
- pyspider
- pyspider
- pyspider
- pyspider
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- 递归调用
- linux环境mysql安装
- C++语言编程题1
- 装饰器之基本方法
- 十大Intellij IDEA快捷键
- 第三篇:显示cifar-10中的图片
- pyspider递归调用
- Smallest Minimum Cut(HDU 6214 dinic模板)
- bean之后创建dao
- CCF—日期计算 20150902
- 实验一线性表的基本操作实现及其应用
- Glide 一个专注于平滑滚动的图片加载和缓存库__辉
- spring profiles实现不同的环境配置的一键切换
- kotlin 在安卓开发时 软键盘自动弹出的尝试
- 2.排序之快速排序