xyk

来源：互联网发布：淘宝贷款最多贷多少钱编辑：程序博客网时间：2024/06/05 15:13
#!/usr/bin/pythonimport osimport timeimport datetimeimport codecs#from lxml import etreefrom selenium import webdriver#import csvfrom bs4 import BeautifulSoup as bpimport reimport ioimport syssys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')   #cmd编码尝试  无弹窗就要在cmd下运行，则必须这样设置def zsk():   #抓取招行各种类信用卡    zs = []            for i in range(1,20):        url = 'http://ccclub.cmbchina.com/ccproduct/cardlist.aspx?PageNo=%s'%i    #从第一页中以获取最大页数        driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')        html = driver.get(url)        driver.maximize_window()   # 将浏览器最大化显示           html = driver.page_source            time.sleep(5)            print('_____________________________________________________对应html是_____________________________________________________',html)                 #driver.save_screenshot('zsk%s.png'%i)   #截图保存        driver.quit()                          soup = bp(html,'html.parser')         soup = soup.find_all('h2',{'class':'cardinfotitle'})        #print('不知道这个打印出来是什么============================================',soup)        for s in soup:            content = s.text.strip()            content = ''.join(content)            print('_____________________________________________________',content)            zs.append(content)        print('最终结果的列表_____________________________________________________',zs)        for k in zs:            print('信用卡有_____________________________________________________',k)    list1 = []        string0 = '\n\n————————————招商银行信用卡有————————————：\n'    string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'    xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())    xrtime = ''.join(xrtime)    string1 = string1 + xrtime + '\n\n'    string0 = string0 + xrtime + '\n\n'    #-------------------新增的文件需要的时间------------------    xrtime0 = time.strftime('%Y-%m-%d',time.localtime())    xrtime0 = ''.join(xrtime0)            if (os.path.exists('D:\招商银行信用卡.doc')):    #是否存在信用卡文件  命名为不带时间后缀的名字        with open('D:\招商银行信用卡.doc','r+',encoding='utf-8') as f:  #存在即打开，读入            lines = f.readlines()            #print('为什么未读进?！！！！！！！！！',lines)                            for line in lines:                line = line.strip()                                    list1.append(line)                #print('原来的卡打印一下！！！！！！！！！！！！！！！',line)            #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)        with open('D:\招商银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:      #将本次与上一次比较，有新增则写入,本次命名为当前时间                        f.write(string0)            for k in zs:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(zs)))  #得到卡数目，并写入卡数目            xinka = []                        for k in zs:                              if k not in list1:                                        #print('有没有将新增的卡筛选出来？？？？？？？？？？？？？？？？？？？？',k)                    xinka.append(k)            print(xinka)                        if xinka != [] :   #判断本次是否有新卡                f.write('本信用卡产品所在的网页链接是：' + url)                f.write(string1)                for k in xinka:                    f.write(k)                    f.write('\n')                f.write('\n\n新增卡数目为：')                f.write(str(len(xinka)))            else:                  #若不存在显示本次运行不存在新卡                f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')    else:                    #若不存在文件则创建                with open('D:\招商银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:            f.write('本信用卡产品所在的网页链接是：' + url)                        f.write(string0)            for k in zs:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(zs)))  #得到卡数目，并写入卡数目            def zxk():   #抓取中信各种类信用卡    zx = []         #中信银行特色信用卡____________________________________________________________________    url = 'https://creditcard.ecitic.com/shenqing/tesezhutika.shtml'        driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('zxtsk.png')   #截图保存    driver.quit()    ts = []              soup = bp(html,'html.parser')     soup = soup.find_all('p',{'class':'card_name'})    #print('不知道这个打印出来是什么============================================',soup)    for s in soup:        content = s.text.strip()        content = ''.join(content)        print('_____________________________________________________',content)        ts.append(content)    ts.pop()    ts.pop()    print('最终结果的列表_____________________________________________________',ts)    for i in ts:        print('特色信用卡有_____________________________________________________',i)    # 中信银行网友推荐top10信用卡_________________________________________________________    url = 'https://creditcard.ecitic.com/shenqing/index.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('zxtp10k.png')   #截图保存    driver.quit()    top = []              soup = bp(html,'html.parser')     soup = soup.find_all('p',{'class':'card_name'})    #print('不知道这个打印出来是什么============================================',soup)    for s in soup:        content = s.text.strip()        content = ''.join(content)        print('_____________________________________________________',content)        top.append(content)            top.pop()    top.pop()            print('最终结果的列表_____________________________________________________',top)    for i in top:        print('top10卡有_____________________________________________________',i)    # 中信银行标准信用卡_________________________________________________________    url = 'https://creditcard.ecitic.com/shenqing/biaozhunka2.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('zxbzk.png')   #截图保存    driver.quit()    bz = []              soup = bp(html,'html.parser')     soup = soup.find_all('p',{'class':'card_name'})    #print('不知道这个打印出来是什么============================================',soup)    for s in soup:        content = s.text.strip()        content = ''.join(content)        print('_____________________________________________________',content)        bz.append(content)        bz.pop()    bz.pop()    print('最终结果的列表_____________________________________________________',bz)    for i in bz:        print('标准卡有_____________________________________________________',i)    # 中信银行航空商旅信用卡_________________________________________________________    url = 'https://creditcard.ecitic.com/shenqing/hangkongshanglv.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('zxhkslk.png')   #截图保存    driver.quit()    hk = []              soup = bp(html,'html.parser')     soup = soup.find_all('p',{'class':'card_name'})    #print('不知道这个打印出来是什么============================================',soup)    for s in soup:        content = s.text.strip()        content = ''.join(content)        print('_____________________________________________________',content)        hk.append(content)        hk.pop()    hk.pop()    print('最终结果的列表_____________________________________________________',hk)    for i in hk:        print('航空商旅卡有_____________________________________________________',i)            # 中信银行高端白金信用卡_________________________________________________________    url = 'https://creditcard.ecitic.com/shenqing/gaoduanbaijin.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('zxgdbjk.png')   #截图保存    driver.quit()    bj = []              soup = bp(html,'html.parser')     soup = soup.find_all('p',{'class':'card_name'})    #print('不知道这个打印出来是什么============================================',soup)    for s in soup:        content = s.text.strip()        content = ''.join(content)        print('_____________________________________________________',content)        bj.append(content)        bj.pop()    bj.pop()    print('最终结果的列表_____________________________________________________',bj)    for i in bj:        print('高端白金卡有_____________________________________________________',i)    # 中信银行出国专享信用卡_________________________________________________________    url = 'https://creditcard.ecitic.com/shenqing/chuguozhuanxiang.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('zxcgzxk.png')   #截图保存    driver.quit()    cg = []              soup = bp(html,'html.parser')     soup = soup.find_all('p',{'class':'card_name'})    #print('不知道这个打印出来是什么============================================',soup)    for s in soup:        content = s.text.strip()        content = ''.join(content)        print('_____________________________________________________',content)        cg.append(content)        cg.pop()    cg.pop()    print('最终结果的列表_____________________________________________________',cg)    for i in cg:        print('出国专享卡有_____________________________________________________',i)    for i in (ts + top + bz + hk + bj + cg) :        if i not in zx:            zx.append(i)    list1 = []        string0 = '\n\n————————————中信银行信用卡有————————————：\n'    string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'    xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())    xrtime = ''.join(xrtime)    string1 = string1 + xrtime + '\n\n'    string0 = string0 + xrtime + '\n\n'    #-------------------新增的文件需要的时间------------------    xrtime0 = time.strftime('%Y-%m-%d',time.localtime())    xrtime0 = ''.join(xrtime0)            if (os.path.exists('D:\中信银行信用卡.doc')):    #是否存在信用卡文件  命名为不带时间后缀的名字        with open('D:\中信银行信用卡.doc','r+',encoding='utf-8') as f:  #存在即打开，读入            lines = f.readlines()            #print('为什么未读进?！！！！！！！！！',lines)                            for line in lines:                line = line.strip()                                    list1.append(line)                #print('原来的卡打印一下！！！！！！！！！！！！！！！',line)            #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)        with open('D:\中信银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:      #将本次与上一次比较，有新增则写入,本次命名为当前时间                        f.write(string0)            for k in zx:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(zx)))  #得到卡数目，并写入卡数目            xinka = []                        for k in zx:                              if k not in list1:                                        #print('有没有将新增的卡筛选出来？？？？？？？？？？？？？？？？？？？？',k)                    xinka.append(k)            print(xinka)                        if xinka != [] :   #判断本次是否有新卡                f.write('本信用卡产品所在的网页链接是：' + url)                f.write(string1)                for k in xinka:                    f.write(k)                    f.write('\n')                f.write('\n\n新增卡数目为：')                f.write(str(len(xinka)))            else:                  #若不存在显示本次运行不存在新卡                f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')    else:                    #若不存在文件则创建                with open('D:\中信银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:            f.write('本信用卡产品所在的网页链接是：' + url)                        f.write(string0)            for k in zx:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(zx)))  #得到卡数目，并写入卡数目def pfk():   #抓取浦发各种类信用卡        url = 'http://ccc.spdb.com.cn/apply_for_credit_cards/card/'        driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)             #driver.save_screenshot('pfk.png')   #截图保存    driver.quit()    pf = []              soup = bp(html,'html.parser')     soup1 = soup.find_all('div',{'class':'ttd'})    #print('不知道这个打印出来是什么============================================',soup)    for s1 in soup1:        content1 = s1.text.strip()        content1 = ''.join(content1)        print('_____________________________________________________',content1)        pf.append(content1)    soup2 = soup.find_all('div',{'class':'ttdf'})    #print('不知道这个打印出来是什么============================================',soup)    for s2 in soup2:        content2 = s2.text.strip()        content2 = ''.join(content2)        print('_____________________________________________________',content2)        pf.append(content2)            print('最终结果的列表_____________________________________________________',pf)    for i in pf:        print('信用卡有_____________________________________________________',i)    list1 = []        string0 = '\n\n————————————浦发银行信用卡有————————————：\n'    string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'    xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())    xrtime = ''.join(xrtime)    string1 = string1 + xrtime + '\n\n'    string0 = string0 + xrtime + '\n\n'    #-------------------新增的文件需要的时间------------------    xrtime0 = time.strftime('%Y-%m-%d',time.localtime())    xrtime0 = ''.join(xrtime0)            if (os.path.exists('D:\浦发银行信用卡.doc')):    #是否存在信用卡文件  命名为不带时间后缀的名字        with open('D:\浦发银行信用卡.doc','r+',encoding='utf-8') as f:  #存在即打开，读入            lines = f.readlines()            #print('为什么未读进?！！！！！！！！！',lines)                            for line in lines:                line = line.strip()                                    list1.append(line)                #print('原来的卡打印一下！！！！！！！！！！！！！！！',line)            #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)                    with open('D:\浦发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:      #将本次与上一次比较，有新增则写入,本次命名为当前时间                        f.write(string0)            for k in pf:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(pf)))  #得到卡数目，并写入卡数目            xinka = []                        for k in pf:                              if k not in list1:                    #print('有没有将新增的卡筛选出来？？？？？？？？？？？？？？？？？？？？',k)                    xinka.append(k)            print(xinka)                        if xinka != [] :   #判断本次是否有新卡                f.write('本信用卡产品所在的网页链接是：' + url)                f.write(string1)                for k in xinka:                    f.write(k)                    f.write('\n')                f.write('\n\n新增卡数目为：')                f.write(str(len(xinka)))            else:                  #若不存在显示本次运行不存在新卡                f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')    else:                    #若不存在文件则创建                with open('D:\浦发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:            f.write('本信用卡产品所在的网页链接是：' + url)                        f.write(string0)            for k in pf:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(pf)))  #得到卡数目，并写入卡数目def msk():   #抓取民生各种类信用卡    ms = []            for i in range(1,5):        url = 'https://creditcard.cmbc.com.cn/fe/Channel/14349?page=%s'%i            driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')        html = driver.get(url)        driver.maximize_window()   # 将浏览器最大化显示           html = driver.page_source            time.sleep(5)            print('_____________________________________________________对应html是_____________________________________________________',html)                 #driver.save_screenshot('msk%s.png'%i)   #截图保存        driver.quit()                          soup = bp(html,'html.parser')         soup = soup.find_all('h3',{'class':'ms_home_page_product_content_li_title ms-textEllipsis'})        #print('不知道这个打印出来是什么============================================',soup)        for s in soup:            content = s.text.strip()            content = ''.join(content)            print('_____________________________________________________',content)            ms.append(content)        print('最终结果的列表_____________________________________________________',ms)        for k in ms:            print('信用卡有_____________________________________________________',k)    list1 = []        string0 = '\n\n————————————民生银行信用卡有————————————：\n'    string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'    xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())    xrtime = ''.join(xrtime)    string1 = string1 + xrtime + '\n\n'    string0 = string0 + xrtime + '\n\n'    #-------------------新增的文件需要的时间------------------    xrtime0 = time.strftime('%Y-%m-%d',time.localtime())    xrtime0 = ''.join(xrtime0)            if (os.path.exists('D:\民生银行信用卡.doc')):    #是否存在信用卡文件  命名为不带时间后缀的名字        with open('D:\民生银行信用卡.doc','r+',encoding='utf-8') as f:  #存在即打开，读入            lines = f.readlines()            #print('为什么未读进?！！！！！！！！！',lines)                            for line in lines:                line = line.strip()                                    list1.append(line)                #print('原来的卡打印一下！！！！！！！！！！！！！！！',line)            #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)                    with open('D:\民生银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:      #将本次与上一次比较，有新增则写入,本次命名为当前时间                        f.write(string0)            for k in ms:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(ms)))  #得到卡数目，并写入卡数目            xinka = []                        for k in ms:                              if k not in list1:                    #print('有没有将新增的卡筛选出来？？？？？？？？？？？？？？？？？？？？',k)                    xinka.append(k)            print(xinka)                        if xinka != [] :   #判断本次是否有新卡                f.write('本信用卡产品所在的网页链接是：' + url)                f.write(string1)                for k in xinka:                    f.write(k)                    f.write('\n')                f.write('\n\n新增卡数目为：')                f.write(str(len(xinka)))            else:                  #若不存在显示本次运行不存在新卡                f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')    else:                    #若不存在文件则创建                with open('D:\民生银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:            f.write('本信用卡产品所在的网页链接是：' + url)                        f.write(string0)            for k in ms:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(ms)))  #得到卡数目，并写入卡数目def xyk():   #抓取兴业各种类信用卡        url = 'http://creditcard.cib.com.cn/apply/products/BJseries/xing1.html'        driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)             #driver.save_screenshot('xyk.png')   #截图保存    driver.quit()    xy = []              soup = bp(html,'html.parser')     soup1 = soup.find_all('a',{'class':'third level '})    #print('不知道这个打印出来是什么============================================',soup)    for s1 in soup1:        content1 = s1.text.strip()        content1 = ''.join(content1)        print('_____________________________________________________',content1)        if content1 not in xy:            xy.append(content1)    soup2 = soup.find_all('a',{'class':'four level '})    #print('不知道这个打印出来是什么============================================',soup)    for s2 in soup2:        content2 = s2.text.strip()        content2 = ''.join(content2)        print('_____________________________________________________',content2)        if content2 not in xy:            xy.append(content2)    print('最终结果的列表_____________________________________________________',xy)    for i in xy:        print('信用卡有_____________________________________________________',i)        list1 = []        string0 = '\n\n————————————兴业银行信用卡有————————————：\n'    string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'    xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())    xrtime = ''.join(xrtime)    string1 = string1 + xrtime + '\n\n'    string0 = string0 + xrtime + '\n\n'        #-------------------新增的文件需要的时间------------------    xrtime0 = time.strftime('%Y-%m-%d',time.localtime())    xrtime0 = ''.join(xrtime0)            if (os.path.exists('D:\兴业银行信用卡.doc')):    #是否存在信用卡文件  命名为不带时间后缀的名字        with open('D:\兴业银行信用卡.doc','r+',encoding='utf-8') as f:  #存在即打开，读入            lines = f.readlines()            #print('为什么未读进?！！！！！！！！！',lines)                            for line in lines:                line = line.strip()                                    list1.append(line)                #print('原来的卡打印一下！！！！！！！！！！！！！！！',line)            #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)                    with open('D:\兴业银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:      #将本次与上一次比较，有新增则写入,本次命名为当前时间                        f.write(string0)            for k in xy:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(xy)))  #得到卡数目，并写入卡数目            xinka = []                        for k in xy:                              if k not in list1:                    #print('有没有将新增的卡筛选出来？？？？？？？？？？？？？？？？？？？？',k)                    xinka.append(k)            print(xinka)                        if xinka != [] :   #判断本次是否有新卡                f.write('本信用卡产品所在的网页链接是：' + url)                f.write(string1)                for k in xinka:                    f.write(k)                    f.write('\n')                f.write('\n\n新增卡数目为：')                f.write(str(len(xinka)))            else:                  #若不存在显示本次运行不存在新卡                f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')    else:                    #若不存在文件则创建                with open('D:\兴业银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:            f.write('本信用卡产品所在的网页链接是：' + url)                        f.write(string0)            for k in xy:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(xy)))  #得到卡数目，并写入卡数目def gdk():   #抓取光大各种类信用卡        url = 'http://xyk.cebbank.com/home/ps/index.htm'        driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)             #driver.save_screenshot('gdk.png')   #截图保存    driver.quit()    gd = []              soup = bp(html,'html.parser')     soup = soup.find_all('h4')    #print('不知道这个打印出来是什么============================================',soup)    for s in soup:        content = s.text.strip()        content = ''.join(content)        print('_____________________________________________________',content)        if content not in gd:            gd.append(content)    print('没有去掉冗余的结果是：==============================================',gd)    for j in range(3):        gd.pop(0)    for j in range(7):        gd.pop()    print('最终结果的列表_____________________________________________________',gd)    for i in gd:        print('信用卡有_____________________________________________________',i)        list1 = []        string0 = '\n\n————————————光大银行信用卡有————————————：\n'    string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'    xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())    xrtime = ''.join(xrtime)    string1 = string1 + xrtime + '\n\n'    string0 = string0 + xrtime + '\n\n'    #-------------------新增的文件需要的时间------------------    xrtime0 = time.strftime('%Y-%m-%d',time.localtime())    xrtime0 = ''.join(xrtime0)            if (os.path.exists('D:\光大银行信用卡.doc')):    #是否存在信用卡文件  命名为不带时间后缀的名字        with open('D:\光大银行信用卡.doc','r+',encoding='utf-8') as f:  #存在即打开，读入            lines = f.readlines()            #print('为什么未读进?！！！！！！！！！',lines)                            for line in lines:                line = line.strip()                                    list1.append(line)                #print('原来的卡打印一下！！！！！！！！！！！！！！！',line)            #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)        with open('D:\光大银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:      #将本次与上一次比较，有新增则写入,本次命名为当前时间                        f.write(string0)            for k in gd:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(gd)))  #得到卡数目，并写入卡数目            xinka = []                        for k in gd:                              if k not in list1:                                        #print('有没有将新增的卡筛选出来？？？？？？？？？？？？？？？？？？？？',k)                    xinka.append(k)            print(xinka)                        if xinka != [] :   #判断本次是否有新卡                f.write('本信用卡产品所在的网页链接是：' + url)                f.write(string1)                for k in xinka:                    f.write(k)                    f.write('\n')                f.write('\n\n新增卡数目为：')                f.write(str(len(xinka)))            else:                  #若不存在显示本次运行不存在新卡                f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')    else:                    #若不存在文件则创建                with open('D:\光大银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:            f.write('本信用卡产品所在的网页链接是：' + url)                        f.write(string0)            for k in gd:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(gd)))  #得到卡数目，并写入卡数目def gfk():   #抓取广发各种类信用卡    gf = []         #广发银行推荐信用卡____________________________________________________________________    url = 'http://card.cgbchina.com.cn/Channel/11712350'        driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)             #driver.save_screenshot('gftjk.png')   #截图保存    driver.quit()    tj = []              soup = bp(html,'html.parser')    soup = soup.find_all('div',{'class','text'})    #print('class = text有什么————————————————：',soup)    for soup in soup:        soup = soup.find_all('a')[0]        print('不知道这个打印出来是什么============================================',soup)        content = soup.get('title')        print(content)        print(type(content))        print('_____________________________________________________',content)        tj.append(content)    print('最终结果的列表_____________________________________________________',tj)    for i in tj:        print('推荐信用卡有_____________________________________________________',i)    # 广发银行都会白领信用卡_________________________________________________________    url = 'http://card.cgbchina.com.cn/Channel/11714125'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('gfblk.png')   #截图保存    driver.quit()    bl = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class','text'})    #print('class = text有什么————————————————：',soup)    for soup in soup:        soup = soup.find_all('a')[0]        print('不知道这个打印出来是什么============================================',soup)        content = soup.get('title')        print(content)        print(type(content))        print('_____________________________________________________',content)        bl.append(content)                    print('最终结果的列表_____________________________________________________',bl)    for i in bl:        print('都会白领卡有_____________________________________________________',i)    # 广发银行爱车达人信用卡_________________________________________________________    url = 'http://card.cgbchina.com.cn/Channel/11713864'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('gfack.png')   #截图保存    driver.quit()    ac = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class','text'})    #print('class = text有什么————————————————：',soup)    for soup in soup:        soup = soup.find_all('a')[0]        print('不知道这个打印出来是什么============================================',soup)        content = soup.get('title')        print(content)        print(type(content))        print('_____________________________________________________',content)                ac.append(content)    print('最终结果的列表_____________________________________________________',ac)    for i in ac:        print('爱车达人卡有_____________________________________________________',i)    # 广发银行商旅优悦信用卡_________________________________________________________    url = 'http://card.cgbchina.com.cn/Channel/11713032'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('gfslk.png')   #截图保存    driver.quit()    sl = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class','text'})    #print('class = text有什么————————————————：',soup)    for soup in soup:        soup = soup.find_all('a')[0]        print('不知道这个打印出来是什么============================================',soup)        content = soup.get('title')        print(content)        print(type(content))        print('_____________________________________________________',content)        sl.append(content)       print('最终结果的列表_____________________________________________________',sl)    for i in sl:        print('商旅优悦卡有_____________________________________________________',i)            # 广发银行至尊精英信用卡_________________________________________________________    url = 'http://card.cgbchina.com.cn/Channel/11712561'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('gfzzk.png')   #截图保存    driver.quit()    zz = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class','text'})    #print('class = text有什么————————————————：',soup)    for soup in soup:        soup = soup.find_all('a')[0]        print('不知道这个打印出来是什么============================================',soup)        content = soup.get('title')        print(content)        print(type(content))        print('_____________________________________________________',content)        zz.append(content)        print('最终结果的列表_____________________________________________________',zz)    for i in zz:        print('至尊精英卡有_____________________________________________________',i)    # 广发银行联名信用卡_________________________________________________________    url = 'http://card.cgbchina.com.cn/Channel/15354893'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('gflmk.png')   #截图保存    driver.quit()    lm = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class','text'})    #print('class = text有什么————————————————：',soup)    for soup in soup:        soup = soup.find_all('a')[0]        print('不知道这个打印出来是什么============================================',soup)        content = soup.get('title')        print(content)        print(type(content))        print('_____________________________________________________',content)                lm.append(content)    print('最终结果的列表_____________________________________________________',lm)    for i in lm:        print('联名卡有_____________________________________________________',i)    for i in (tj + bl + ac + sl + zz + lm) :        if i not in gf:            gf.append(i)            print('_____________________________________________________',i)    list1 = []        string0 = '\n\n————————————广发银行信用卡有————————————：\n'    string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'    xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())    xrtime = ''.join(xrtime)    string1 = string1 + xrtime + '\n\n'    string0 = string0 + xrtime + '\n\n'    #-------------------新增的文件需要的时间------------------    xrtime0 = time.strftime('%Y-%m-%d',time.localtime())    xrtime0 = ''.join(xrtime0)            if (os.path.exists('D:\广发银行信用卡.doc')):    #是否存在信用卡文件  命名为不带时间后缀的名字        with open('D:\广发银行信用卡.doc','r+',encoding='utf-8') as f:  #存在即打开，读入            lines = f.readlines()            #print('为什么未读进?！！！！！！！！！',lines)                            for line in lines:                line = line.strip()                                    list1.append(line)                #print('原来的卡打印一下！！！！！！！！！！！！！！！',line)            #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)        with open('D:\广发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:      #将本次与上一次比较，有新增则写入,本次命名为当前时间                        f.write(string0)            for k in gf:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(gf)))  #得到卡数目，并写入卡数目            xinka = []                        for k in gf:                              if k not in list1:                                        #print('有没有将新增的卡筛选出来？？？？？？？？？？？？？？？？？？？？',k)                    xinka.append(k)            print(xinka)                        if xinka != [] :   #判断本次是否有新卡                f.write('本信用卡产品所在的网页链接是：' + url)                f.write(string1)                for k in xinka:                    f.write(k)                    f.write('\n')                f.write('\n\n新增卡数目为：')                f.write(str(len(xinka)))            else:                  #若不存在显示本次运行不存在新卡                f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')    else:                    #若不存在文件则创建                with open('D:\广发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:            f.write('本信用卡产品所在的网页链接是：' + url)                        f.write(string0)            for k in gf:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(gf)))  #得到卡数目，并写入卡数目def hxk():   #抓取华夏各种类信用卡    hx = []         #华夏白金系列信用卡____________________________________________________________________    url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/platinum/list.shtml'        driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)             #driver.save_screenshot('hxbjk.png')   #截图保存    driver.quit()    bj = []              soup = bp(html,'html.parser')    soup = soup.find_all('div',{'class':'Product_img_text'})    for soup in soup:        soup = soup.find_all('a')[1]        #print('有什么————————————————：',soup)        for content in soup:            content = content.string.strip()            content = ''.join(content)             bj.append(content)    print('最终结果的列表_____________________________________________________',bj)    for i in bj:        print('白金信用卡有_____________________________________________________',i)    # 华夏银行钛金系列信用卡_________________________________________________________    url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/titanium/list.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('hxtjk.png')   #截图保存    driver.quit()    tj = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class':'Product_img_text'})    for soup in soup:        soup = soup.find_all('a')[1]        #print('class = text有什么————————————————：',soup)        for content in soup:            content = content.string.strip()            content = ''.join(content)             tj.append(content)                    print('最终结果的列表_____________________________________________________',tj)    for i in tj:        print('钛金系列卡有_____________________________________________________',i)    # 华夏银行财智系列信用卡_________________________________________________________    url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/wisdom/list.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('hxczk.png')   #截图保存    driver.quit()    cz = []              soup = bp(html,'html.parser')    soup = soup.find_all('div',{'class':'Product_img_text'})    for soup in soup:        soup = soup.find_all('a')[1]        #print('class = text有什么————————————————：',soup)        for content in soup:            content = content.string.strip()            content = ''.join(content)                     cz.append(content)    print('最终结果的列表_____________________________________________________',cz)    for i in cz:        print('财智卡有_____________________________________________________',i)    # 华夏银行标准系列信用卡_________________________________________________________    url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/standard/list.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('hxbzk.png')   #截图保存    driver.quit()    bz = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class':'Product_img_text'})    for soup in soup:        soup = soup.find_all('a')[1]        #print('class = text有什么————————————————：',soup)        for content in soup:            content = content.string.strip()            content = ''.join(content)                     bz.append(content)       print('最终结果的列表_____________________________________________________',bz)    for i in bz:        print('标准系列卡有_____________________________________________________',i)            # 华夏银行联名系列信用卡_________________________________________________________    url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/joint/list.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('hxsmk.png')   #截图保存    driver.quit()    lm = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class':'Product_img_text'})    for soup in soup:        soup = soup.find_all('a')[1]        #print('class = text有什么————————————————：',soup)        for content in soup:            content = content.string.strip()            content = ''.join(content)             lm.append(content)        print('最终结果的列表_____________________________________________________',lm)    for i in lm:        print('联名系列卡有_____________________________________________________',i)    # 华夏银行公务系列信用卡_________________________________________________________    url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/official/list.shtml'         driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')    html = driver.get(url)    driver.maximize_window()   # 将浏览器最大化显示       html = driver.page_source        time.sleep(5)        print('_____________________________________________________对应html是_____________________________________________________',html)        #driver.save_screenshot('hxgwk.png')   #截图保存    driver.quit()    gw = []              soup = bp(html,'html.parser')     soup = soup.find_all('div',{'class':'Product_img_text'})    for soup in soup:        soup = soup.find_all('a')[1]        #print('class = text有什么————————————————：',soup)        for content in soup:            content = content.string.strip()            content = ''.join(content)                     gw.append(content)    print('最终结果的列表_____________________________________________________',gw)    for i in gw:        print('公务系列卡有_____________________________________________________',i)    # 华夏银行缤纷系列信用卡_________________________________________________________    bf = []    for n in range(1,5):        url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/colorful/list_%s.shtml'%n             driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')        html = driver.get(url)        driver.maximize_window()   # 将浏览器最大化显示           html = driver.page_source            time.sleep(5)            print('_____________________________________________________对应html是_____________________________________________________',html)            #driver.save_screenshot('hxbfk.png')   #截图保存        driver.quit()                  soup = bp(html,'html.parser')         soup = soup.find_all('div',{'class':'Product_img_text'})        for soup in soup:            soup = soup.find_all('a')[1]            #print('class = text有什么————————————————：',soup)            for content in soup:                content = content.string.strip()                content = ''.join(content)                         bf.append(content)    print('最终结果的列表_____________________________________________________',bf)    for i in bf:        print('缤纷系列卡有_____________________________________________________',i)    for i in (bj + tj + cz + bz + lm + gw + bf) :        if i not in hx:            hx.append(i)            print('_____________________________________________________',i)    list1 = []        string0 = '\n\n————————————华夏银行信用卡有————————————：\n'    string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'    xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())    xrtime = ''.join(xrtime)    string1 = string1 + xrtime + '\n\n'    string0 = string0 + xrtime + '\n\n'    #-------------------新增的文件需要的时间------------------    xrtime0 = time.strftime('%Y-%m-%d',time.localtime())    xrtime0 = ''.join(xrtime0)            if (os.path.exists('D:\华夏银行信用卡.doc')):    #是否存在信用卡文件  命名为不带时间后缀的名字        with open('D:\华夏银行信用卡.doc','r+',encoding='utf-8') as f:  #存在即打开，读入            lines = f.readlines()            #print('为什么未读进?！！！！！！！！！',lines)                            for line in lines:                line = line.strip()                                    list1.append(line)                #print('原来的卡打印一下！！！！！！！！！！！！！！！',line)            #print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)        with open('D:\华夏银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:      #将本次与上一次比较，有新增则写入,本次命名为当前时间                        f.write(string0)            for k in hx:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(hx)))  #得到卡数目，并写入卡数目            xinka = []                        for k in hx:                              if k not in list1:                                        #print('有没有将新增的卡筛选出来？？？？？？？？？？？？？？？？？？？？',k)                    xinka.append(k)            print(xinka)                        if xinka != [] :   #判断本次是否有新卡                f.write('本信用卡产品所在的网页链接是：' + url)                f.write(string1)                for k in xinka:                    f.write(k)                    f.write('\n')                f.write('\n\n新增卡数目为：')                f.write(str(len(xinka)))            else:                  #若不存在显示本次运行不存在新卡                f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')    else:                    #若不存在文件则创建                with open('D:\华夏银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:            f.write('本信用卡产品所在的网页链接是：' + url)                        f.write(string0)            for k in hx:                f.write(k)                f.write('\n')            f.write('\n\n卡数目为：')              f.write(str(len(hx)))  #得到卡数目，并写入卡数目if __name__ == '__main__':    bank = input('请输入希望搜寻信用卡产品的银行：')    bank = bank.split('，')    for ban in bank:        if ban == '招商银行':            zsk()        elif ban == '浦发银行':            pfk()        elif ban == '中信银行':            zxk()        elif ban == '民生银行':            msk()        elif ban == '兴业银行':            xyk()        elif ban == '广发银行':            gfk()        elif ban == '光大银行':            gdk()        elif ban == '华夏银行':            hxk()        else :            zsk()            pfk()            zxk()            msk()            xyk()            gfk()            gdk()            hxk()    exit = input("运行完毕！请输入任意键退出……")
阅读全文
0 0