pyspider递归调用

来源:互联网 发布:sql server安装包下载 编辑:程序博客网 时间:2024/06/05 18:03

代码如下

    def predeal_page(self, response):        page_qq = int(response.doc("html").find("#form\:j_id258").val())        page_size = 1000        if response.save['isFirst']==0:            page_count = int(response.doc("html").find("#form\:j_id260").val())        else:            page_count = response.save['count']        print("pagecount="+str(page_count))        start_time = time.strftime('%Y%m%d',time.localtime(time.time() - 2*24*60*60))        end_time = time.strftime('%Y%m%d',time.localtime(time.time()-1*24*60*60))        print("共搜索" + str(page_count) + "条记录")        actionUrl = "http://xxx.com/pages/erpt/rpt_main_report.jsf"        dic={}        dic["P_AUTHTAG"]={"index":1,"value":"1111","name":"1111","label":"","displayCondition":""}        dic["P_DATE_TYPE"] ={"index":2,"value":"1","name":"订单时间","label":"日期类型","displayCondition":"true"}        dic["P_START_DATE"] ={"index":3,"value":start_time,"name":start_time,"label":"订单开始日期","displayCondition":"true"}        dic["P_END_DATE"] ={"index":4,"value":end_time,"name":end_time,"label":"订单结束日期","displayCondition":"true"}        formj_id23 = str(base64.b64encode(str.encode(json.dumps(dic,ensure_ascii = False).replace(' ', ''))),encoding = "utf-8")        print(formj_id23)        headers = {}          headers["Content-Type"]="application/x-www-form-urlencoded"         headers["Origin"] = "http://report.mall.10010.com"        data = {}        data["AJAXREQUEST"] = "_viewRoot"        data["_authKey"] = ""        data["form:j_id23"] = formj_id23        data["rpt_id"] = "TRADE_004"        data["form:j_id26"] = "TRADE_004"        data["form:j_id27"] = "/mallrpt/pages/trade_004.xhtml"        data["form:j_id258"] = str(page_size)        data["form:j_id259"] = "0"        data["form:j_id260"] = "0"        data["form:P_BROWSER_TYPE"] = "Chrome"        data["form:P_BROWSER_VERSION"] = "43.0.2357.130"        data["form:exportFormat"] = ""        data["form:motionFlag"] = "query"        data["form_SUBMIT"] = "1"        data["javax.faces.ViewState"] = "rO0ABXVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAN0AAVqX2lkMXB0ACEv cGFnZXMvZXJwdC9ycHRfbWFpbl9yZXBvcnQueGh0bWw="        #判断是否是第一次调用        if response.save['isFirst'] == 0:            data["form:j_id28"] = "form:j_id28"        if response.save['isFirst'] == 1:            data["form:j_id259"] = str(page_size*(response.save['num']-1))            data["param1"] = "next"            data["form:j_id255"] = "form:j_id255"            data["form:j_id260"] = str(page_count)         #循环递归调用           times = int((page_count-1)/page_size) + 1        print("times="+str(times))        if response.save['num'] < times:            self.crawl(actionUrl+"?id="+str(response.save['num']),data=data,cookies=response.cookies, callback=self.detail_page,headers=headers,method="POST",priority=1)            num = response.save['num'] + 1            next=actionUrl+"?id2="+str(num)            self.crawl(next,data=data,cookies=response.cookies, callback=self.predeal_page,headers=headers,method="POST",save={'isFirst':1,'num':num,'count':page_count})