python_实践

来源:互联网 发布:sqlserver数据库备份 编辑:程序博客网 时间:2024/04/29 07:43
#下载一张图片import urllib.request as urresponse = ur.urlopen("http://placekitten.com/g/500/600")cat_img = response.read()with open('cat_500_600.jpg','wb') as f:    f.write(cat_img)
#模拟有道词典翻译:import urllib.request as urimport urllib.parse as upimport json while True:    #客户端输入内容    content = input("请输入要翻译的内容(字符q退出):")    if 'q' == content:        break    url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null"    #客户端数据提交    data ={}    data['type']="Auto"    data["i"]=content    data["doctype"]="json"    data["xmlVersion"]="1.8"    data["keyfrom"]="fanyi.web"    data["ue"]="UTF-8"    data["typeResult"]="true"    #对data数据进行格式化,encode:unicode 转化为 utf-8 的形式    data = up.urlencode(data).encode("utf-8")    response  = ur.urlopen(url,data)    #decode:将utf-8 的形式解码为 unicode 形式    html = response.read().decode("utf-8")    #转化为字典    target = json.loads(html)    print("翻译的结果:%s"%target["translateResult"][0][0]["tgt"])---------------->>> 请输入要翻译的内容:中国翻译的结果:China
#模拟浏览器翻译import urllib.request as urimport urllib.parse as upimport json #客户端输入内容content = input("请输入要翻译的内容(字符q退出):")#模拟浏览器head = {}head["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null"#客户端数据提交data ={}data['type']="Auto"data["i"]=contentdata["doctype"]="json"data["xmlVersion"]="1.8"data["keyfrom"]="fanyi.web"data["ue"]="UTF-8"data["typeResult"]="true"#对data数据进行格式化,encode:unicode 转化为 utf-8 的形式data = up.urlencode(data).encode("utf-8")req = ur.Request(url,data,head)response  = ur.urlopen(req)#decode:将utf-8 的形式解码为 unicode 形式html = response.read().decode("utf-8")#转化为字典target = json.loads(html)print("翻译的结果:%s"%target["translateResult"][0][0]["tgt"])#添加的浏览器头部 req.headers{'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'}修改headers通过Request 的headers参数通过Request.add_header()

代理
这里写图片描述

#查询代理IP是否成功import urllib.request as urimport randomurl = "http://www.whatismyip.com.tw"#ip集合iplist =["124.240.187.77:80","180.166.112.47:8888","223.19.230.181:80"]proxy_support = ur.ProxyHandler({"http":random.choice(iplist)})#创建一个openeropener = ur.build_opener(proxy_support)#添加浏览器头部opener.addheaders =[("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36")]#安装openerur.install_opener(opener)#验证代理ipresponse = ur.urlopen(url)html = response.read().decode("utf-8")print(html)
抓取图片# coding: UTF-8import urllib.requestimport os#url请求def url_open(url):     #获取请求头    req = urllib.request.Request(url)      #模拟浏览器访问    req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36")    #得到响应对象    response = urllib.request.urlopen(req)    html  = response.read()    return html#获取当前页码def get_page(url):         #获取页面中整个html代码字符    html = url_open(url).decode("utf-8")    #得到当前面码,通过查找字符串的开始和结束来截取 当页url 的页码    a = html.find("current-comment-page") + 23    b = html.find("]",a)    return html[a:b]    #当前页码    print(html[a:b])#查找当前url图片地址列表def find_imgs(url):    #获取页面中整个html代码字符    html = url_open(url).decode("utf-8")    #图片地址列表    img_addrs = []            a = html.find("img src=")    while a != -1:        #网页地址不可能超过255        b = html.find(".jpg",a, a + 255)            if b != -1:            img_addrs.append(html[a + 9 : b + 4])        else:             b = a + 9           #开始找下一个图片地址,也就是从找到的第一个结束后开始的们置        a = html.find("img src=",b)    return img_addrs    #    for each in img_addrs:#        print(each)#保存图片            def save_img(folder,img_addrs):    for each in img_addrs:                #获取图片名子,从后往前找        filenanme = each.split('/')[-1]        #打开每一个图片的地址,获取图片,并存入文件夹中        img = url_open(each)        with open(filenanme,'wb') as f:            f.write(img)def download_mm(folder="mm",pages=10):    #创建文件夹    os.mkdir(folder)    #进入文 件夹    os.chdir(folder)    #进入当前页面    url = "http://jandan.net/ooxx/"    #根据url 得到当前第一页    page_num =int(get_page(url))    #获取前10页的图片    for i in range(pages):        #根据浏览发现下一页,是前一页减 1        page_num -= i        #进入下一页的图片地址        page_url = url + 'page-'+str(page_num)+"#comments"        #将当前页面的所有图片地址存入一个列表中        img_addrs = find_imgs(page_url)        save_img(folder,img_addrs)#调用主函数        if __name__ == "__main__":    download_mm()    
0 0
原创粉丝点击