诸天至尊小说

来源:互联网 发布:大麦盒子解除网络限制 编辑:程序博客网 时间:2024/04/28 01:08
import re,os,random
from urllib import request
from bs4 import BeautifulSoup
from functools import reduce
url='http://www.aiquxs.com/read/50/50271/index.html'
req=request.Request(url)
res=request.urlopen(req).read()
soup=BeautifulSoup(res,'lxml')
name=soup.div.h3.get_text()[:-3]
t='e://电子书//%s'%name
if not os.path.isdir(t):
    os.mkdir(t)
else:
    pass


z=soup.div.dl
data=z.find_all('a')
m,n=[],[]
for i in data:
    x=i.get_text()
    #print(x)
    y=re.sub('index.html',i.get('href'),url)
    #print(y)
    m.append(x)
    n.append(y)
def h():
    headers = [
    {"User-Agent": "Mozilla/5.0 (Windows; U; Win 9x 4.90; en-GB; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"},
    {"User-Agent": "Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.6) Gecko/20040503"},
]
    return random.choice(headers)


def get(url,t):
    headers=h()
    req=request.Request(url)
    res=request.urlopen(req).read()
    soup=BeautifulSoup(res,'lxml')
    c=soup.find_all("div","content")
    c=re.findall("[\u300a\u300b]|[\u4e00-\u9fa5]|[\uFF00-\uFFEF]|[\,]|[\.]|[\!]",str(c))
    def f(x,y):
        return x+y
    c=reduce(f,c)
    with open(t,'w') as f:
        f.write(str(c))


for i,j in zip(m,n):
    t='e://电子书//%s//%s.txt'%(name,i)
    if not os.path.isfile(t):
        get(j,t)
        print('正在下载%s'%i)
    else:
        print('0000000000000000')
        continue
    
0 0
原创粉丝点击