python批量下载色影无忌和蜂鸟的图片 爬虫小应用

来源:互联网 发布:sql server as用法 编辑:程序博客网 时间:2024/04/28 18:06
有些冗余信息,因为之前测试正则表达式,所以没有把它们给移走,不过不影响使用。
# -*- coding:utf-8 -*-import re,urllib,sys,os,timedef getAllUrl():entry=sys.argv[1]#try:getPage=urllib.urlopen(entry).read()#except:#print "Error"pattern=re.compile(r'<a href="(.+?)".+?>')web_site_pattern=re.compile(r'(http:.+?)')all_url = pattern.findall(getPage)for url in all_url:if web_site_pattern.match(url):print url#print urlprint "done"def download_pic():url=sys.argv[1];#local_path="C:/Tools/source/"connection=urllib.urlopen(url)data=connection.read()print "Waiting to get data"time.sleep(3)connection.close()#analyze #p=re.compile(r'img width="\d+".+src="(.+)".+')download_pic_pattern=re.compile(r'<img src="(.+?\.jpg)".+?/>')#p10=re.compile(r'(.+)\.jpg')all_url=download_pic_pattern.findall(data)#print all_urli=1directory="C:/Tools/source"name_pattern=re.compile(r'/(\w+?\.jpg)')if not os.path.exists(directory):os.mkdir(directory)for urls in all_url:print urls#print "working"#print local_pathi=i+1name=name_pattern.findall(urls)print name[0]local_path="C:/Tools/source/%s" % name[0] jpeg_connection=urllib.urlopen(urls)jpeg=jpeg_connection.read()time.sleep(1)print "waiting"f=file(local_path,"wb")f.write(jpeg)f.close()jpeg_connection.close()#i=i+1#f=file(local_path,"wb")#f.write(data)#f.close()print("Done")def download_pic_2():url=sys.argv[1];local_path="C:/Tools/a.jpg"data=urllib.urlretrieve(url,local_path)print("Done")def regulation():str1="abc123*GBK1024abc*defb1kc12*addd"p1=re.compile(r'abc')print p1.findall(str1)p2=re.compile(r'a.c')print p2.findall(str1)p3=re.compile(r'abc\*')print p3.findall(str1)p4=re.compile(r'[abc]12')print p4.findall(str1)p5=re.compile(r'\d\*')print p5.findall(str1)p6=re.compile(r'a[^\d]')print p6.findall(str1)p7=re.compile(r'a[^\d]*')print p7.findall(str1)p8=re.compile(r'[a-zA-Z]+(\d+)')print p8.findall(str1)str2="dadfae ef <img atl=\"500\" src=\"www.qq.com/1.jpg\" width=\"700\"> asdfe aa<ima"p9=re.compile(r'<img .+ src="(.+)" .+>')urls=p9.findall(str2)#printprint urlsfor url in urls:print urlif __name__ =="__main__":#main()#download_pic_2()#regulation()download_pic()#getAllUrl()


#######后续

后面修改了代码,使用beautifulsoup,可以更大范围的下载图片

代码如下: http://www.30daydo.com/article/56


0 0
原创粉丝点击