【python 爬虫】selenium+phontomjs 用法
来源:互联网 发布:小说软件哪个好 编辑:程序博客网 时间:2024/05/29 03:20
程序1:输入病员号,查询报告列表信息
# encoding: utf-8from selenium import webdriverimport sysreload(sys)sys.setdefaultencoding('utf-8')from lxml import etreeimport pandas as pdimport timetime1=time.time()driver=webdriver.PhantomJS(executable_path='D:\\Program Files\\Python27\\Scripts\\phantomjs.exe')xuhao0=[]xuhao1=[]xuhao2=[]ideintity1=[]name1=[]sex1=[]age1=[]group1=[]apply_name=[]apply_time=[]status=[]apply_num=[]def spider(number): try: url = "http://211.83.161.4:8000/XHlisWebReport.aspx" html=driver.get(url) driver.find_element_by_id('txtoutpatient_id').send_keys(number) driver.find_element_by_id('btnConfirm').click() time.sleep(3) html=driver.page_source selector=etree.HTML(html) num0=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[2]/span/text()') for each in num0: print each xuhao0.append(each) num1=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[3]/text()') for each in num1: print each xuhao1.append(each) num2=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[4]/text()') for each in num2: print each xuhao2.append(each) ideintity=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[6]/text()') for each in ideintity: print each ideintity1.append(each) name=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[7]/text()') for each in name: print each name1.append(each) sex=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[8]/text()') for each in sex: print each sex1.append(each) age=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[9]/text()') for each in age: print each age1.append(each) group=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[12]/text()') for each in group: print each group1.append(each) apply_name1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[13]/text()') for each in apply_name1: print each apply_name.append(each) apply_time1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[14]/text()') for each in apply_time1: print each apply_time.append(each) status1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[15]/text()') for each in status1: print each status.append(each) apply_num1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[16]/text()') for each in apply_num1: print each apply_num.append(each) except: passif __name__ == '__main__': #####病员号 number = '0000201091' spider(number) data=pd.DataFrame({"序号":xuhao0,"检验单":xuhao1,"病员号":xuhao2,"送检目的":ideintity1,"姓名":name1,"性别":sex1,"年龄":age1,\ "工作组":group1,"审核人员":apply_name,"审核时间":apply_time,"状态":status,"申请单号":apply_num}) print data # 写出excel writer = pd.ExcelWriter(r'C:\\XHlisWebReport.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) data.to_excel(writer, index=False) writer.close() time2 = time.time() print u'ok,爬虫结束!' print u'总共耗时:' + str(time2 - time1) + 's' driver.close()
程序2 输入申请号查询报告详情
# encoding: utf-8from selenium import webdriverimport sysreload(sys)sys.setdefaultencoding('utf-8')from lxml import etreeimport pandas as pdimport timetime1=time.time()driver=webdriver.PhantomJS(executable_path='D:\\Program Files\\Python27\\Scripts\\phantomjs.exe')number1=[]No=[]test_project=[]result=[]host=[]values=[]phone=[]status=[]def spider(number): try: url="http://211.83.161.4:8000/XHlisWebReport.aspx" driver.get(url) driver.find_element_by_id('txtrequisition_id').send_keys(number) driver.find_element_by_id('btnConfirm').click() time.sleep(3) driver.find_element_by_xpath('//*[@id="GridView1"]/tbody/tr[2]').click() html2=driver.page_source selector=etree.HTML(html2) No1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[1]/text()') for each in No1: print each number1.append(number) No.append(each) test_project1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[2]/text()') for each in test_project1: print each test_project.append(each) result1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[3]/text()') for each in result1: print each result.append(each) host1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[4]/text()') for each in host1: print each host.append(each) status1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[5]/text()') for each in status1: print each status.append(each) values1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[6]/text()') for each in values1: print each values.append(each) phone1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[7]/text()') for each in phone1: print each phone.append(each) except: passif __name__ == '__main__': ########条码号################ number = '1166372801' spider(number) data = pd.DataFrame({"条码号":number1,"NO":No,"检验项目":test_project,"结果":result,"单位":host,"参考值":values,"代号":phone,"状态":status}) print data # 写出excel writer = pd.ExcelWriter(r'C:\\Reportdetail.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) data.to_excel(writer, index=False) writer.close() time2 = time.time() print u'ok,爬虫结束!' print u'总共耗时:' + str(time2 - time1) + 's' driver.close()
阅读全文
1 0
- 【python 爬虫】selenium+phontomjs 用法
- python selenium+phontomjs的详细用法及简单案例
- python爬虫 使用selenium+phontomjs 模拟点击输入 获取东航加载后的源码 机票价格
- python 针对selenium+phontomjs等模拟浏览器爬虫的反爬技术点
- 【python 爬虫】python淘宝爬虫实战(selenum+phontomjs)
- python phontomjs爬虫项目 如何使用代理IP
- Python爬虫利器之Selenium的用法
- Python爬虫利器五之Selenium的用法
- Python爬虫利器五之Selenium的用法
- Python爬虫利器五之Selenium的用法
- Python爬虫利器五之Selenium的用法
- Python selenium爬虫
- python+scrapy+selenium爬虫
- Python + Selenium 爬虫小记
- phantomjs+selenium+python爬虫
- python爬虫step1:selenium
- Python爬虫Selenium安装
- Python爬虫Selenium使用
- source insight 常用配置参数
- Code128A、B、C及Auto的区别
- adb shell 启动 android App
- 微信 {"errcode":48001,"errmsg":"api unauthorized, hints: [ req_id: 1QoCla0699ns81 ]"}
- 记录平时Android开发中的一些小技巧
- 【python 爬虫】selenium+phontomjs 用法
- vivado设置自定义编辑器与notepad++设置高亮方法
- WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED
- C# Dictionary的按下标访问的方法
- PHP数组函数整理
- JavaWeb三大组件(Servlet、Filter、Listener)
- Python3 类包装实现多线程
- 注解@PostConstruct与@PreDestroy详解及实例
- 第三章 授权——跟我学习springmvc shiro mybatis