#!/usr/bin/pythonimport osimport timeimport datetimeimport codecs#from lxml import etreefrom selenium import webdriver#import csvfrom bs4 import BeautifulSoup as bpimport reimport ioimport syssys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030') #cmd编码尝试 无弹窗就要在cmd下运行,则必须这样设置def zsk(): #抓取招行各种类信用卡 zs = [] for i in range(1,20): url = 'http://ccclub.cmbchina.com/ccproduct/cardlist.aspx?PageNo=%s'%i #从第一页中以获取最大页数 driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('zsk%s.png'%i) #截图保存 driver.quit() soup = bp(html,'html.parser') soup = soup.find_all('h2',{'class':'cardinfotitle'}) #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) zs.append(content) print('最终结果的列表_____________________________________________________',zs) for k in zs: print('信用卡有_____________________________________________________',k) list1 = [] string0 = '\n\n————————————招商银行信用卡有————————————:\n' string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n' xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) xrtime = ''.join(xrtime) string1 = string1 + xrtime + '\n\n' string0 = string0 + xrtime + '\n\n' #-------------------新增的文件需要的时间------------------ xrtime0 = time.strftime('%Y-%m-%d',time.localtime()) xrtime0 = ''.join(xrtime0) if (os.path.exists('D:\招商银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字 with open('D:\招商银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入 lines = f.readlines() #print('为什么未读进?!!!!!!!!!',lines) for line in lines: line = line.strip() list1.append(line) #print('原来的卡打印一下!!!!!!!!!!!!!!!',line) #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1) with open('D:\招商银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间 f.write(string0) for k in zs: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(zs))) #得到卡数目,并写入卡数目 xinka = [] for k in zs: if k not in list1: #print('有没有将新增的卡筛选出来????????????????????',k) xinka.append(k) print(xinka) if xinka != [] : #判断本次是否有新卡 f.write('本信用卡产品所在的网页链接是:' + url) f.write(string1) for k in xinka: f.write(k) f.write('\n') f.write('\n\n新增卡数目为:') f.write(str(len(xinka))) else: #若不存在显示本次运行不存在新卡 f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n') else: #若不存在文件则创建 with open('D:\招商银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: f.write('本信用卡产品所在的网页链接是:' + url) f.write(string0) for k in zs: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(zs))) #得到卡数目,并写入卡数目 def zxk(): #抓取中信各种类信用卡 zx = [] #中信银行特色信用卡____________________________________________________________________ url = 'https://creditcard.ecitic.com/shenqing/tesezhutika.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('zxtsk.png') #截图保存 driver.quit() ts = [] soup = bp(html,'html.parser') soup = soup.find_all('p',{'class':'card_name'}) #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) ts.append(content) ts.pop() ts.pop() print('最终结果的列表_____________________________________________________',ts) for i in ts: print('特色信用卡有_____________________________________________________',i) # 中信银行网友推荐top10信用卡_________________________________________________________ url = 'https://creditcard.ecitic.com/shenqing/index.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('zxtp10k.png') #截图保存 driver.quit() top = [] soup = bp(html,'html.parser') soup = soup.find_all('p',{'class':'card_name'}) #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) top.append(content) top.pop() top.pop() print('最终结果的列表_____________________________________________________',top) for i in top: print('top10卡有_____________________________________________________',i) # 中信银行标准信用卡_________________________________________________________ url = 'https://creditcard.ecitic.com/shenqing/biaozhunka2.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('zxbzk.png') #截图保存 driver.quit() bz = [] soup = bp(html,'html.parser') soup = soup.find_all('p',{'class':'card_name'}) #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) bz.append(content) bz.pop() bz.pop() print('最终结果的列表_____________________________________________________',bz) for i in bz: print('标准卡有_____________________________________________________',i) # 中信银行航空商旅信用卡_________________________________________________________ url = 'https://creditcard.ecitic.com/shenqing/hangkongshanglv.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('zxhkslk.png') #截图保存 driver.quit() hk = [] soup = bp(html,'html.parser') soup = soup.find_all('p',{'class':'card_name'}) #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) hk.append(content) hk.pop() hk.pop() print('最终结果的列表_____________________________________________________',hk) for i in hk: print('航空商旅卡有_____________________________________________________',i) # 中信银行高端白金信用卡_________________________________________________________ url = 'https://creditcard.ecitic.com/shenqing/gaoduanbaijin.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('zxgdbjk.png') #截图保存 driver.quit() bj = [] soup = bp(html,'html.parser') soup = soup.find_all('p',{'class':'card_name'}) #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) bj.append(content) bj.pop() bj.pop() print('最终结果的列表_____________________________________________________',bj) for i in bj: print('高端白金卡有_____________________________________________________',i) # 中信银行出国专享信用卡_________________________________________________________ url = 'https://creditcard.ecitic.com/shenqing/chuguozhuanxiang.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('zxcgzxk.png') #截图保存 driver.quit() cg = [] soup = bp(html,'html.parser') soup = soup.find_all('p',{'class':'card_name'}) #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) cg.append(content) cg.pop() cg.pop() print('最终结果的列表_____________________________________________________',cg) for i in cg: print('出国专享卡有_____________________________________________________',i) for i in (ts + top + bz + hk + bj + cg) : if i not in zx: zx.append(i) list1 = [] string0 = '\n\n————————————中信银行信用卡有————————————:\n' string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n' xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) xrtime = ''.join(xrtime) string1 = string1 + xrtime + '\n\n' string0 = string0 + xrtime + '\n\n' #-------------------新增的文件需要的时间------------------ xrtime0 = time.strftime('%Y-%m-%d',time.localtime()) xrtime0 = ''.join(xrtime0) if (os.path.exists('D:\中信银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字 with open('D:\中信银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入 lines = f.readlines() #print('为什么未读进?!!!!!!!!!',lines) for line in lines: line = line.strip() list1.append(line) #print('原来的卡打印一下!!!!!!!!!!!!!!!',line) #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1) with open('D:\中信银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间 f.write(string0) for k in zx: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(zx))) #得到卡数目,并写入卡数目 xinka = [] for k in zx: if k not in list1: #print('有没有将新增的卡筛选出来????????????????????',k) xinka.append(k) print(xinka) if xinka != [] : #判断本次是否有新卡 f.write('本信用卡产品所在的网页链接是:' + url) f.write(string1) for k in xinka: f.write(k) f.write('\n') f.write('\n\n新增卡数目为:') f.write(str(len(xinka))) else: #若不存在显示本次运行不存在新卡 f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n') else: #若不存在文件则创建 with open('D:\中信银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: f.write('本信用卡产品所在的网页链接是:' + url) f.write(string0) for k in zx: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(zx))) #得到卡数目,并写入卡数目def pfk(): #抓取浦发各种类信用卡 url = 'http://ccc.spdb.com.cn/apply_for_credit_cards/card/' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('pfk.png') #截图保存 driver.quit() pf = [] soup = bp(html,'html.parser') soup1 = soup.find_all('div',{'class':'ttd'}) #print('不知道这个打印出来是什么============================================',soup) for s1 in soup1: content1 = s1.text.strip() content1 = ''.join(content1) print('_____________________________________________________',content1) pf.append(content1) soup2 = soup.find_all('div',{'class':'ttdf'}) #print('不知道这个打印出来是什么============================================',soup) for s2 in soup2: content2 = s2.text.strip() content2 = ''.join(content2) print('_____________________________________________________',content2) pf.append(content2) print('最终结果的列表_____________________________________________________',pf) for i in pf: print('信用卡有_____________________________________________________',i) list1 = [] string0 = '\n\n————————————浦发银行信用卡有————————————:\n' string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n' xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) xrtime = ''.join(xrtime) string1 = string1 + xrtime + '\n\n' string0 = string0 + xrtime + '\n\n' #-------------------新增的文件需要的时间------------------ xrtime0 = time.strftime('%Y-%m-%d',time.localtime()) xrtime0 = ''.join(xrtime0) if (os.path.exists('D:\浦发银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字 with open('D:\浦发银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入 lines = f.readlines() #print('为什么未读进?!!!!!!!!!',lines) for line in lines: line = line.strip() list1.append(line) #print('原来的卡打印一下!!!!!!!!!!!!!!!',line) #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1) with open('D:\浦发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间 f.write(string0) for k in pf: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(pf))) #得到卡数目,并写入卡数目 xinka = [] for k in pf: if k not in list1: #print('有没有将新增的卡筛选出来????????????????????',k) xinka.append(k) print(xinka) if xinka != [] : #判断本次是否有新卡 f.write('本信用卡产品所在的网页链接是:' + url) f.write(string1) for k in xinka: f.write(k) f.write('\n') f.write('\n\n新增卡数目为:') f.write(str(len(xinka))) else: #若不存在显示本次运行不存在新卡 f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n') else: #若不存在文件则创建 with open('D:\浦发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: f.write('本信用卡产品所在的网页链接是:' + url) f.write(string0) for k in pf: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(pf))) #得到卡数目,并写入卡数目def msk(): #抓取民生各种类信用卡 ms = [] for i in range(1,5): url = 'https://creditcard.cmbc.com.cn/fe/Channel/14349?page=%s'%i driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('msk%s.png'%i) #截图保存 driver.quit() soup = bp(html,'html.parser') soup = soup.find_all('h3',{'class':'ms_home_page_product_content_li_title ms-textEllipsis'}) #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) ms.append(content) print('最终结果的列表_____________________________________________________',ms) for k in ms: print('信用卡有_____________________________________________________',k) list1 = [] string0 = '\n\n————————————民生银行信用卡有————————————:\n' string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n' xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) xrtime = ''.join(xrtime) string1 = string1 + xrtime + '\n\n' string0 = string0 + xrtime + '\n\n' #-------------------新增的文件需要的时间------------------ xrtime0 = time.strftime('%Y-%m-%d',time.localtime()) xrtime0 = ''.join(xrtime0) if (os.path.exists('D:\民生银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字 with open('D:\民生银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入 lines = f.readlines() #print('为什么未读进?!!!!!!!!!',lines) for line in lines: line = line.strip() list1.append(line) #print('原来的卡打印一下!!!!!!!!!!!!!!!',line) #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1) with open('D:\民生银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间 f.write(string0) for k in ms: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(ms))) #得到卡数目,并写入卡数目 xinka = [] for k in ms: if k not in list1: #print('有没有将新增的卡筛选出来????????????????????',k) xinka.append(k) print(xinka) if xinka != [] : #判断本次是否有新卡 f.write('本信用卡产品所在的网页链接是:' + url) f.write(string1) for k in xinka: f.write(k) f.write('\n') f.write('\n\n新增卡数目为:') f.write(str(len(xinka))) else: #若不存在显示本次运行不存在新卡 f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n') else: #若不存在文件则创建 with open('D:\民生银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: f.write('本信用卡产品所在的网页链接是:' + url) f.write(string0) for k in ms: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(ms))) #得到卡数目,并写入卡数目def xyk(): #抓取兴业各种类信用卡 url = 'http://creditcard.cib.com.cn/apply/products/BJseries/xing1.html' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('xyk.png') #截图保存 driver.quit() xy = [] soup = bp(html,'html.parser') soup1 = soup.find_all('a',{'class':'third level '}) #print('不知道这个打印出来是什么============================================',soup) for s1 in soup1: content1 = s1.text.strip() content1 = ''.join(content1) print('_____________________________________________________',content1) if content1 not in xy: xy.append(content1) soup2 = soup.find_all('a',{'class':'four level '}) #print('不知道这个打印出来是什么============================================',soup) for s2 in soup2: content2 = s2.text.strip() content2 = ''.join(content2) print('_____________________________________________________',content2) if content2 not in xy: xy.append(content2) print('最终结果的列表_____________________________________________________',xy) for i in xy: print('信用卡有_____________________________________________________',i) list1 = [] string0 = '\n\n————————————兴业银行信用卡有————————————:\n' string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n' xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) xrtime = ''.join(xrtime) string1 = string1 + xrtime + '\n\n' string0 = string0 + xrtime + '\n\n' #-------------------新增的文件需要的时间------------------ xrtime0 = time.strftime('%Y-%m-%d',time.localtime()) xrtime0 = ''.join(xrtime0) if (os.path.exists('D:\兴业银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字 with open('D:\兴业银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入 lines = f.readlines() #print('为什么未读进?!!!!!!!!!',lines) for line in lines: line = line.strip() list1.append(line) #print('原来的卡打印一下!!!!!!!!!!!!!!!',line) #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1) with open('D:\兴业银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间 f.write(string0) for k in xy: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(xy))) #得到卡数目,并写入卡数目 xinka = [] for k in xy: if k not in list1: #print('有没有将新增的卡筛选出来????????????????????',k) xinka.append(k) print(xinka) if xinka != [] : #判断本次是否有新卡 f.write('本信用卡产品所在的网页链接是:' + url) f.write(string1) for k in xinka: f.write(k) f.write('\n') f.write('\n\n新增卡数目为:') f.write(str(len(xinka))) else: #若不存在显示本次运行不存在新卡 f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n') else: #若不存在文件则创建 with open('D:\兴业银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: f.write('本信用卡产品所在的网页链接是:' + url) f.write(string0) for k in xy: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(xy))) #得到卡数目,并写入卡数目def gdk(): #抓取光大各种类信用卡 url = 'http://xyk.cebbank.com/home/ps/index.htm' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('gdk.png') #截图保存 driver.quit() gd = [] soup = bp(html,'html.parser') soup = soup.find_all('h4') #print('不知道这个打印出来是什么============================================',soup) for s in soup: content = s.text.strip() content = ''.join(content) print('_____________________________________________________',content) if content not in gd: gd.append(content) print('没有去掉冗余的结果是:==============================================',gd) for j in range(3): gd.pop(0) for j in range(7): gd.pop() print('最终结果的列表_____________________________________________________',gd) for i in gd: print('信用卡有_____________________________________________________',i) list1 = [] string0 = '\n\n————————————光大银行信用卡有————————————:\n' string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n' xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) xrtime = ''.join(xrtime) string1 = string1 + xrtime + '\n\n' string0 = string0 + xrtime + '\n\n' #-------------------新增的文件需要的时间------------------ xrtime0 = time.strftime('%Y-%m-%d',time.localtime()) xrtime0 = ''.join(xrtime0) if (os.path.exists('D:\光大银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字 with open('D:\光大银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入 lines = f.readlines() #print('为什么未读进?!!!!!!!!!',lines) for line in lines: line = line.strip() list1.append(line) #print('原来的卡打印一下!!!!!!!!!!!!!!!',line) #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1) with open('D:\光大银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间 f.write(string0) for k in gd: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(gd))) #得到卡数目,并写入卡数目 xinka = [] for k in gd: if k not in list1: #print('有没有将新增的卡筛选出来????????????????????',k) xinka.append(k) print(xinka) if xinka != [] : #判断本次是否有新卡 f.write('本信用卡产品所在的网页链接是:' + url) f.write(string1) for k in xinka: f.write(k) f.write('\n') f.write('\n\n新增卡数目为:') f.write(str(len(xinka))) else: #若不存在显示本次运行不存在新卡 f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n') else: #若不存在文件则创建 with open('D:\光大银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: f.write('本信用卡产品所在的网页链接是:' + url) f.write(string0) for k in gd: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(gd))) #得到卡数目,并写入卡数目def gfk(): #抓取广发各种类信用卡 gf = [] #广发银行推荐信用卡____________________________________________________________________ url = 'http://card.cgbchina.com.cn/Channel/11712350' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('gftjk.png') #截图保存 driver.quit() tj = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class','text'}) #print('class = text有什么————————————————:',soup) for soup in soup: soup = soup.find_all('a')[0] print('不知道这个打印出来是什么============================================',soup) content = soup.get('title') print(content) print(type(content)) print('_____________________________________________________',content) tj.append(content) print('最终结果的列表_____________________________________________________',tj) for i in tj: print('推荐信用卡有_____________________________________________________',i) # 广发银行都会白领信用卡_________________________________________________________ url = 'http://card.cgbchina.com.cn/Channel/11714125' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('gfblk.png') #截图保存 driver.quit() bl = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class','text'}) #print('class = text有什么————————————————:',soup) for soup in soup: soup = soup.find_all('a')[0] print('不知道这个打印出来是什么============================================',soup) content = soup.get('title') print(content) print(type(content)) print('_____________________________________________________',content) bl.append(content) print('最终结果的列表_____________________________________________________',bl) for i in bl: print('都会白领卡有_____________________________________________________',i) # 广发银行爱车达人信用卡_________________________________________________________ url = 'http://card.cgbchina.com.cn/Channel/11713864' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('gfack.png') #截图保存 driver.quit() ac = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class','text'}) #print('class = text有什么————————————————:',soup) for soup in soup: soup = soup.find_all('a')[0] print('不知道这个打印出来是什么============================================',soup) content = soup.get('title') print(content) print(type(content)) print('_____________________________________________________',content) ac.append(content) print('最终结果的列表_____________________________________________________',ac) for i in ac: print('爱车达人卡有_____________________________________________________',i) # 广发银行商旅优悦信用卡_________________________________________________________ url = 'http://card.cgbchina.com.cn/Channel/11713032' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('gfslk.png') #截图保存 driver.quit() sl = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class','text'}) #print('class = text有什么————————————————:',soup) for soup in soup: soup = soup.find_all('a')[0] print('不知道这个打印出来是什么============================================',soup) content = soup.get('title') print(content) print(type(content)) print('_____________________________________________________',content) sl.append(content) print('最终结果的列表_____________________________________________________',sl) for i in sl: print('商旅优悦卡有_____________________________________________________',i) # 广发银行至尊精英信用卡_________________________________________________________ url = 'http://card.cgbchina.com.cn/Channel/11712561' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('gfzzk.png') #截图保存 driver.quit() zz = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class','text'}) #print('class = text有什么————————————————:',soup) for soup in soup: soup = soup.find_all('a')[0] print('不知道这个打印出来是什么============================================',soup) content = soup.get('title') print(content) print(type(content)) print('_____________________________________________________',content) zz.append(content) print('最终结果的列表_____________________________________________________',zz) for i in zz: print('至尊精英卡有_____________________________________________________',i) # 广发银行联名信用卡_________________________________________________________ url = 'http://card.cgbchina.com.cn/Channel/15354893' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('gflmk.png') #截图保存 driver.quit() lm = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class','text'}) #print('class = text有什么————————————————:',soup) for soup in soup: soup = soup.find_all('a')[0] print('不知道这个打印出来是什么============================================',soup) content = soup.get('title') print(content) print(type(content)) print('_____________________________________________________',content) lm.append(content) print('最终结果的列表_____________________________________________________',lm) for i in lm: print('联名卡有_____________________________________________________',i) for i in (tj + bl + ac + sl + zz + lm) : if i not in gf: gf.append(i) print('_____________________________________________________',i) list1 = [] string0 = '\n\n————————————广发银行信用卡有————————————:\n' string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n' xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) xrtime = ''.join(xrtime) string1 = string1 + xrtime + '\n\n' string0 = string0 + xrtime + '\n\n' #-------------------新增的文件需要的时间------------------ xrtime0 = time.strftime('%Y-%m-%d',time.localtime()) xrtime0 = ''.join(xrtime0) if (os.path.exists('D:\广发银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字 with open('D:\广发银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入 lines = f.readlines() #print('为什么未读进?!!!!!!!!!',lines) for line in lines: line = line.strip() list1.append(line) #print('原来的卡打印一下!!!!!!!!!!!!!!!',line) #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1) with open('D:\广发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间 f.write(string0) for k in gf: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(gf))) #得到卡数目,并写入卡数目 xinka = [] for k in gf: if k not in list1: #print('有没有将新增的卡筛选出来????????????????????',k) xinka.append(k) print(xinka) if xinka != [] : #判断本次是否有新卡 f.write('本信用卡产品所在的网页链接是:' + url) f.write(string1) for k in xinka: f.write(k) f.write('\n') f.write('\n\n新增卡数目为:') f.write(str(len(xinka))) else: #若不存在显示本次运行不存在新卡 f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n') else: #若不存在文件则创建 with open('D:\广发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: f.write('本信用卡产品所在的网页链接是:' + url) f.write(string0) for k in gf: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(gf))) #得到卡数目,并写入卡数目def hxk(): #抓取华夏各种类信用卡 hx = [] #华夏白金系列信用卡____________________________________________________________________ url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/platinum/list.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('hxbjk.png') #截图保存 driver.quit() bj = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class':'Product_img_text'}) for soup in soup: soup = soup.find_all('a')[1] #print('有什么————————————————:',soup) for content in soup: content = content.string.strip() content = ''.join(content) bj.append(content) print('最终结果的列表_____________________________________________________',bj) for i in bj: print('白金信用卡有_____________________________________________________',i) # 华夏银行钛金系列信用卡_________________________________________________________ url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/titanium/list.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('hxtjk.png') #截图保存 driver.quit() tj = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class':'Product_img_text'}) for soup in soup: soup = soup.find_all('a')[1] #print('class = text有什么————————————————:',soup) for content in soup: content = content.string.strip() content = ''.join(content) tj.append(content) print('最终结果的列表_____________________________________________________',tj) for i in tj: print('钛金系列卡有_____________________________________________________',i) # 华夏银行财智系列信用卡_________________________________________________________ url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/wisdom/list.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('hxczk.png') #截图保存 driver.quit() cz = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class':'Product_img_text'}) for soup in soup: soup = soup.find_all('a')[1] #print('class = text有什么————————————————:',soup) for content in soup: content = content.string.strip() content = ''.join(content) cz.append(content) print('最终结果的列表_____________________________________________________',cz) for i in cz: print('财智卡有_____________________________________________________',i) # 华夏银行标准系列信用卡_________________________________________________________ url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/standard/list.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('hxbzk.png') #截图保存 driver.quit() bz = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class':'Product_img_text'}) for soup in soup: soup = soup.find_all('a')[1] #print('class = text有什么————————————————:',soup) for content in soup: content = content.string.strip() content = ''.join(content) bz.append(content) print('最终结果的列表_____________________________________________________',bz) for i in bz: print('标准系列卡有_____________________________________________________',i) # 华夏银行联名系列信用卡_________________________________________________________ url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/joint/list.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('hxsmk.png') #截图保存 driver.quit() lm = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class':'Product_img_text'}) for soup in soup: soup = soup.find_all('a')[1] #print('class = text有什么————————————————:',soup) for content in soup: content = content.string.strip() content = ''.join(content) lm.append(content) print('最终结果的列表_____________________________________________________',lm) for i in lm: print('联名系列卡有_____________________________________________________',i) # 华夏银行公务系列信用卡_________________________________________________________ url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/official/list.shtml' driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('hxgwk.png') #截图保存 driver.quit() gw = [] soup = bp(html,'html.parser') soup = soup.find_all('div',{'class':'Product_img_text'}) for soup in soup: soup = soup.find_all('a')[1] #print('class = text有什么————————————————:',soup) for content in soup: content = content.string.strip() content = ''.join(content) gw.append(content) print('最终结果的列表_____________________________________________________',gw) for i in gw: print('公务系列卡有_____________________________________________________',i) # 华夏银行缤纷系列信用卡_________________________________________________________ bf = [] for n in range(1,5): url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/colorful/list_%s.shtml'%n driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe') html = driver.get(url) driver.maximize_window() # 将浏览器最大化显示 html = driver.page_source time.sleep(5) print('_____________________________________________________对应html是_____________________________________________________',html) #driver.save_screenshot('hxbfk.png') #截图保存 driver.quit() soup = bp(html,'html.parser') soup = soup.find_all('div',{'class':'Product_img_text'}) for soup in soup: soup = soup.find_all('a')[1] #print('class = text有什么————————————————:',soup) for content in soup: content = content.string.strip() content = ''.join(content) bf.append(content) print('最终结果的列表_____________________________________________________',bf) for i in bf: print('缤纷系列卡有_____________________________________________________',i) for i in (bj + tj + cz + bz + lm + gw + bf) : if i not in hx: hx.append(i) print('_____________________________________________________',i) list1 = [] string0 = '\n\n————————————华夏银行信用卡有————————————:\n' string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n' xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) xrtime = ''.join(xrtime) string1 = string1 + xrtime + '\n\n' string0 = string0 + xrtime + '\n\n' #-------------------新增的文件需要的时间------------------ xrtime0 = time.strftime('%Y-%m-%d',time.localtime()) xrtime0 = ''.join(xrtime0) if (os.path.exists('D:\华夏银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字 with open('D:\华夏银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入 lines = f.readlines() #print('为什么未读进?!!!!!!!!!',lines) for line in lines: line = line.strip() list1.append(line) #print('原来的卡打印一下!!!!!!!!!!!!!!!',line) #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1) with open('D:\华夏银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间 f.write(string0) for k in hx: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(hx))) #得到卡数目,并写入卡数目 xinka = [] for k in hx: if k not in list1: #print('有没有将新增的卡筛选出来????????????????????',k) xinka.append(k) print(xinka) if xinka != [] : #判断本次是否有新卡 f.write('本信用卡产品所在的网页链接是:' + url) f.write(string1) for k in xinka: f.write(k) f.write('\n') f.write('\n\n新增卡数目为:') f.write(str(len(xinka))) else: #若不存在显示本次运行不存在新卡 f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n') else: #若不存在文件则创建 with open('D:\华夏银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: f.write('本信用卡产品所在的网页链接是:' + url) f.write(string0) for k in hx: f.write(k) f.write('\n') f.write('\n\n卡数目为:') f.write(str(len(hx))) #得到卡数目,并写入卡数目if __name__ == '__main__': bank = input('请输入希望搜寻信用卡产品的银行:') bank = bank.split(',') for ban in bank: if ban == '招商银行': zsk() elif ban == '浦发银行': pfk() elif ban == '中信银行': zxk() elif ban == '民生银行': msk() elif ban == '兴业银行': xyk() elif ban == '广发银行': gfk() elif ban == '光大银行': gdk() elif ban == '华夏银行': hxk() else : zsk() pfk() zxk() msk() xyk() gfk() gdk() hxk() exit = input("运行完毕!请输入任意键退出……")