python爬虫(20160803)

来源：互联网发布：电脑绑定域名编辑：程序博客网时间：2024/06/01 20:46

#python实现资源下载（下载了一张图片）

from urllib.request import urlopen
pic= urlopen("http://placekitten.com/g/500/600").read()#创建request对象并读出解码？
with open("cat_pic,jpg","wb") as f:#创建文件？
f.write(pic)

#python创建request对象并读出解码

from urllib.request import urlopen
html=urlopen("http://www.baidu.com").read()#读出
html=html.decode("utf-8")#解码
print(html)

#python的request对象一些方法

from urllib.request import urlopen
html=urlopen("http://baidu.com")
print(html.info())##############1
print("\n")
print(html.getcode())###########2
print("\n")
print(html.geturl())############3

#python实现在线翻译

from urllib.request import Request
from urllib.request import urlopen
from urllib.parse import urlencode
from json import loads
import time#延迟提交方法防止机器访问失败
while True:
    #创建request对象
    content=input("input(q to break):")
    if content=='q':
        break
    url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=sogouse'
    data={}
    data['action']='FY_BY_CLICKBUTTON'
    data['doctype']='json'
    data['i']=content
    data['keyfrom']='fanyi.web'
    data['type']='ZH_CN2JA'
    data['typoResult']='true'
    data['ue']='UTF-8'
    data['xmlVersion']='1.8'
    data=urlencode(data).encode('utf-8')
    head={}
    head['User-Agent']='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586'
    rq=Request(url,data,head)
    request=urlopen(rq)
    #解码request对象
    html=request.read().decode('utf-8')
    #转换格式为字典
    html=loads(html)
    print(html['translateResult'][0][0]['tgt'])
    time.sleep(1)

#（承接上一个案例）python使用代理ip防止机器访问

from urllib.request import urlopen
from urllib.request import ProxyHandler
from urllib.request import build_opener
from urllib.request import install_opener
import random
#代理ip方法防止机器访问被绞杀
#安装代理ip
iplist=['120.25.171.183:8080','218.244.149.184:8888','119.6.136.122:80','122.96.59.105:82']
install_opener(build_opener(ProxyHandler({'http':random.choice(iplist)})))
###############################################################
request=urlopen('http://whatismyip.com.tw').read().decode('utf-8')
print(request)

0 0