python 批量保存网页中的超链接网址源代码

来源:互联网 发布:安信网络身份认证 编辑:程序博客网 时间:2024/04/29 09:51
import urllib2import timeimport re#connect to a URLf1=open('all2.txt','a')for page in range(5,194):    #url= "https://www.hac-ker.net/search.php?var=That%20is%20me&page="+str(page)    url= "http://www.example.com/archive?page="+str(page)    website = urllib2.urlopen(url,timeout = 10)    #read html code    html = website.read()    #use re.findall to get all the links    #links = re.findall('"((http)s?://.*?)"', html)    links = re.findall('>((http)s?://.*?)<', html)    #ti=time.strftime('%y-%m-%d %H:%M:%S',time.localtime(time.time()))    #f1.write(ti)    #f1.write("\n\n")    for i,b in links:        f1.write(i)        f1.write("\n")    page+=1    print page    print "\n"f1.close()

0 0