python httplib urllib urllib2区别（一撇）

来源：互联网发布：第一版主网小说网网络编辑：程序博客网时间：2024/06/15 09:22

目录：

urlencode & quote & unquote (url 中带中文参数)

python httplib urllib urllib2区别（一撇）

python post请求实例 & json -- str互相转化（application/x-www-form-urlencoded \ multipart/form-data）

1, 前言：

python提供很多种非常友好的访问网页内容的方法，python2.x : 如 python的httplib、urllib和urllib2 ; python3.x 又提供了request的方法。同时，每种方法下面又分为：get post put delete 等method..

一时间江湖上充斥着“五门八派”的各种，令初学者眼花缭乱，不知如何下手，如何学起。

但是，有一点需要提醒的是：无论哪一种方案或方法，存在既有其合理性，用着哪一种方法上手；得心应手才是王道！！！

2, 下面我们比较一下python2.x 中的三种方法，先上实例，之后分析

（1）实例

import jsonimport sysimport hashlibimport urllibimport httplib ### none using now def generate_json_list():    reload(sys)    sys.setdefaultencoding('gbk')    print "[",    flag=False    for line in sys.stdin:        if flag:            print ",",        else:            flag=True        line=line.strip()        items=line.split("\t")        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}        out["createdAt"]=items[0]        out["scale"]=items[1]        out["channel"]=items[2]        out["word"]=items[3]        print json.dumps(out,encoding="gbk").decode("unicode-escape"),    print "]"import urllib2def import_out_hotwords(key, json_str, out):    HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"    #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"    #print "2--", json_str    value={"configKey":key,"configValue":json_str}    data=urllib.urlencode(value)    print >> sys.stderr, "### 3params", value, data    req = urllib2.Request(HOST, data)    req.add_header("content-type", "application/x-www-form-urlencoded")    req.get_method = lambda : 'PUT'    response = None     try:        response = urllib2.urlopen(req, timeout=5)        if response.code == 200:            print "insertSingle Succ: ", out["word"], out["channel"], out["key"]            response.close()    except urllib2.URLError as e:        if hasattr(e, 'code'):            print 'Error code:',e.code        elif hasattr(e, 'reason'):            print 'Reason:',e.reason    finally:        if response:            response.close()def import_out_hotwords_2(key, json_str, out):    HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"    #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"    #print "2--", json_str    value={"configKey":key,"configValue":json_str}    data=urllib.urlencode(value)    print >> sys.stderr, "## 2params", value, data    req = urllib2.Request(HOST, data)    req.add_header("content-type", "application/x-www-form-urlencoded")    req.get_method = lambda : 'PUT'    response = None     try:        response = urllib2.urlopen(req, timeout=5)        if response.code == 200:            print "insertSingle Succ: ", out["word"], out["channel"], out["key"]            response.close()    except urllib2.URLError as e:        if hasattr(e, 'code'):            print 'Error code:',e.code        elif hasattr(e, 'reason'):            print 'Reason:',e.reason    finally:        if response:            response.close()    def import_out_hotwords_old(key, json_str, out):    HOST = "10.129.232.109:5005"    conn = httplib.HTTPConnection(HOST)    #print "2--", json_str    value={"configKey":key,"configValue":json_str}    data=urllib.urlencode(value)    #print data    headers = {            'content-type': 'application/x-www-form-urlencoded',            'cache-control': 'no-cache'            }    conn.request("PUT", "/api/externalHotWords/insertSingle", body=data, headers=headers)    handler = conn.getresponse()    if handler.status == 200:        print "insertSingle Succ: ", out["word"], out["channel"], out["key"]    #if handler.read().decode('utf8').encode('gbk')[0] == "OK":    #    print "insertSingle Succ: ", json_str    conn.close()def generate_json():    reload(sys)    sys.setdefaultencoding('gbk')    for line in sys.stdin:        line=line.strip()        items=line.split("\t")        if len(items) < 4:            continue        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}        out["createdAt"]=items[0]        #out["scale"]=items[1]        out["channel"]=items[2]        out["word"]=items[3]        key = hashlib.md5((items[3] + items[2])).hexdigest()        key = "externalHotWords_" + key        out["key"] =  key        json_str = json.dumps(out,encoding="gbk")#.decode("unicode-escape")        #import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)        import_out_hotwords_2(key, json_str, out)def generate_json_old():    reload(sys)    sys.setdefaultencoding('gbk')    for line in sys.stdin:        line=line.strip()        items=line.split("\t")        if len(items) < 4:            continue        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}        out["createdAt"]=items[0]        #out["scale"]=items[1]        out["channel"]=items[2]        out["word"]=items[3]        key = hashlib.md5((items[3] + items[2])).hexdigest()        out["key"] = "externalHotWords_" + key        json_str = json.dumps(out,encoding="gbk").decode("unicode-escape")        #json_str = out        #print "1--", json_str        ## return 'req=' +  urllib.quote(reqinfo.decode('gbk', 'ignore').encode('utf8'))        import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)        #import_out_hotwords(key, json_str)if __name__=="__main__":    #generate_json_list()    generate_json()

下面的实例存在一个小问题：二次编码问题，首先对out进行json.dumps() 的json_str转化（正确），之后对json_str进行urllib.quote() （第一次编码）；最后在

value={"configKey":key,"configValue":json_str}  之后有urllib.urlencode() （第二次编码）

格式一：configValue=%7B%27scale%27%3A+%27%27%2C+%27word%27%3A+%27%5Cxb2%5Cxe2%5Cxca%    5Cxd4soso%27%2C+%27channel%27%3A+%27360_%5Cxca%5Cxb5%5Cxca%5Cxb1%5Cxc8%5Cxc8%5Cxb5%5Cxe3%27%2C+%27key%27%3A+%27externalHotWords_ed9f4ea3b7ff116c67366f7a576bcb08%27%2C+%27type%    27%3A+%27%27%2C+%27createdAt%27%3A+%272017-06-07+11%3A22%3A32%27%7D&configKey=ed9f4ea3b7ff116c67366f7a576bcb08

格式二：configValue=%257B%2522scale%2522%253A%2520%2522%2522%252C%2520%2522word%2522%253A%2520%2522%25E6%25B5%258B%25E8%25AF%2595soso%2522%2    52C%2520%2522channel%2522%253A%2520%2522360_%25E5%25AE%259E%25E6%2597%25B6%25E7%2583%25AD%25E7%2582%25B9%2522%252C%2520%2522key%2522%253A%2520%2522externalHotWords_ed9f4ea3b7f    f116c67366f7a576bcb08%2522%252C%2520%2522type%2522%253A%2520%2522%2522%252C%2520%2522createdAt%2522%253A%2520%25222017-06-07%252011%253A22%253A32%2522%257D&configKey=ed9f4ea3b    7ff116c67366f7a576bcb08

显然格式二是对格式一再次进行了编码（因为{ --> %7B;  % --> %25; ）

import jsonimport sysimport hashlibimport urllibimport httplib ### none using now def generate_json_list():    reload(sys)    sys.setdefaultencoding('gbk')    print "[",    flag=False    for line in sys.stdin:        if flag:            print ",",        else:            flag=True        line=line.strip()        items=line.split("\t")        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}        out["createdAt"]=items[0]        out["scale"]=items[1]        out["channel"]=items[2]        out["word"]=items[3]        print json.dumps(out,encoding="gbk").decode("unicode-escape"),    print "]"import urllib2def import_out_hotwords(key, json_str, out):    HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"    #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"    #print "2--", json_str    value={"configKey":key,"configValue":json_str}    data=urllib.urlencode(value)    req = urllib2.Request(HOST, data)    req.add_header("content-type", "application/x-www-form-urlencoded")    req.get_method = lambda : 'PUT'    response = None     try:        response = urllib2.urlopen(req, timeout=5)        if response.code == 200:            print "insertSingle Succ: ", out["word"], out["channel"], out["key"]            response.close()    except urllib2.URLError as e:        if hasattr(e, 'code'):            print 'Error code:',e.code        elif hasattr(e, 'reason'):            print 'Reason:',e.reason    finally:        if response:            response.close()    def import_out_hotwords_old(key, json_str, out):    HOST = "10.129.232.109:5005"    conn = httplib.HTTPConnection(HOST)    #print "2--", json_str    value={"configKey":key,"configValue":json_str}    data=urllib.urlencode(value)    #print data    headers = {            'content-type': 'application/x-www-form-urlencoded',            'cache-control': 'no-cache'            }    conn.request("PUT", "/api/externalHotWords/insertSingle", body=data, headers=headers)    handler = conn.getresponse()    if handler.status == 200:        print "insertSingle Succ: ", out["word"], out["channel"], out["key"]    #if handler.read().decode('utf8').encode('gbk')[0] == "OK":    #    print "insertSingle Succ: ", json_str    conn.close()def generate_json():    reload(sys)    sys.setdefaultencoding('gbk')    for line in sys.stdin:        line=line.strip()        items=line.split("\t")        if len(items) < 4:            continue        out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}        out["createdAt"]=items[0]        #out["scale"]=items[1]        out["channel"]=items[2]        out["word"]=items[3]        key = hashlib.md5((items[3] + items[2])).hexdigest()        out["key"] = "externalHotWords_" + key        json_str = json.dumps(out,encoding="gbk").decode("unicode-escape")        #json_str = out        #print "1--", json_str        ## return 'req=' +  urllib.quote(reqinfo.decode('gbk', 'ignore').encode('utf8'))        import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)        #import_out_hotwords(key, json_str)if __name__=="__main__":    #generate_json_list()    generate_json()cat

CMD: cat tmp | python generate_json2.py

[@10.134.105.160 HotRankingLoggers]# vi tmp
2017-06-07 11:22:32 6964 360_实时热点测试APP
2017-06-07 11:22:32 6498 360_实时热点测试soso

（2）分析（参考python的httplib、urllib和urllib2的区别及用）

urllib和urllib2

urllib 和urllib2都是接受URL请求的相关模块，但是urllib2可以接受一个Request类的实例来设置URL请求的headers，urllib仅可以接受URL。

这意味着，你不可以伪装你的User Agent字符串等。

urllib提供urlencode方法用来GET查询字符串的产生，而urllib2没有。这是为何urllib常和urllib2一起使用的原因。

目前的大部分http请求都是通过urllib2来访问的

httplib

httplib实现了HTTP和HTTPS的客户端协议，一般不直接使用，在python更高层的封装模块中（urllib,urllib2）使用了它的http实现。

（3）详解

urllib简单用法

1. google = urllib.urlopen('http://www.google.com')

2. print 'http header:/n', google.info()

3. print 'http status:', google.getcode()

4. print 'url:', google.geturl()

5. for line in google: # 就像在操作本地文件

6. print line,

7. google.close()

urllib2简单用法

1. import urllib2

2. response=urllib2.urlopen('http://www.douban.com')

3. html=response.read()

实际步骤：

1、urllib2.Request()的功能是构造一个请求信息，返回的req就是一个构造好的请求

2、urllib2.urlopen()的功能是发送刚刚构造好的请求req，并返回一个文件类的对象response，包括了所有的返回信息。

3、通过response.read()可以读取到response里面的html，通过response.info()可以读到一些额外的信息。如下：

1. #!/usr/bin/env python

2. import urllib2

3. req = urllib2.Request("http://www.douban.com")

4. response = urllib2.urlopen(req)

5. html = response.read()

6. print html

有时你会碰到，程序也对，但是服务器拒绝你的访问。这是为什么呢?问题出在请求中的头信息(header)。有的服务端有洁癖，不喜欢程序来触摸它。这个时候你需要将你的程序伪装成浏览器来发出请求。请求的方式就包含在header中。常见的情形：

1. import urllib

2. import urllib2

3. url = 'http://www.someserver.com/cgi-bin/register.cgi'

4. user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'# 将user_agent写入头信息

5. values = {'name' : 'who','password':'123456'}

6. headers = { 'User-Agent' : user_agent }

7. data = urllib.urlencode(values)

8. req = urllib2.Request(url, data, headers)

9. response = urllib2.urlopen(req)

10. the_page = response.read()

values是post数据

GET方法

例如百度：

百度是通过http://www.baidu.com/s?wd=XXX 来进行查询的，这样我们需要将{‘wd’:’xxx’}这个字典进行urlencode

1. #coding:utf-8

2. import urllib

3. import urllib2

4. url = 'http://www.baidu.com/s'

5. values = {'wd':'D_in'}

6. data = urllib.urlencode(values)

7. print data

8. url2 = url+'?'+data

9. response = urllib2.urlopen(url2)

10. the_page = response.read()

11. print the_page

POST方法

1. import urllib

2. import urllib2

3. url = 'http://www.someserver.com/cgi-bin/register.cgi'

4. user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' //将user_agent写入头信息

5. values = {'name' : 'who','password':'123456'} //post数据

6. headers = { 'User-Agent' : user_agent }

7. data = urllib.urlencode(values) //对post数据进行url编码

8. req = urllib2.Request(url, data, headers)

9. response = urllib2.urlopen(req)

10. the_page = response.read()

urllib2带cookie的使用

1. #coding:utf-8

2. import urllib2,urllib

3. import cookielib

5. url = r'http://www.renren.com/ajaxLogin'

7. #创建一个cj的cookie的容器

8. cj = cookielib.CookieJar()

9. opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

10. #将要POST出去的数据进行编码

11. data = urllib.urlencode({"email":email,"password":pass})

12. r = opener.open(url,data)

13. print cj

httplib简单用法

1. #!/usr/bin/env python

2. # -*- coding: utf-8 -*-

3. import httplib

4. import urllib

6. def sendhttp():

7. data = urllib.urlencode({'@number': 12524, '@type': 'issue', '@action': 'show'})

8. headers = {"Content-type": "application/x-www-form-urlencoded",

9. "Accept": "text/plain"}

10. conn = httplib.HTTPConnection('bugs.python.org')

11. conn.request('POST', '/', data, headers)

12. httpres = conn.getresponse()

13. print httpres.status

14. print httpres.reason

15. print httpres.read()

16.

17. if __name__ == '__main__':

18. sendhttp()

3，get put post delete 方法，参考自 python urllib2对http的get，put，post，delete）

#GET：
#!/usr/bin/env python
# -- coding:utf-8 --
import urllib2
def get():
    URL ='www.baidu.com' #页面的地址
    response =urllib2.urlopen(URL) #调用urllib2向服务器发送get请求
    returnresponse.read() #获取服务器返回的页面信息
#POST：
#!/usr/bin/env python
# -- coding:utf-8 --
import urllib
import urllib2
def post():
    URL ='http://umbra.nascom.nasa.gov/cgi-bin/eit-catalog.cgi' #页面的地址
    values ={'obs_year':'2011','obs_month':'March',   #post的值
             'obs_day':'8','start_year':'2011'
             ,'start_month':'March','start_day':'8'
             ,'start_hour':'All Hours','stop_year':'2011'
             ,'stop_month':'March','stop_day':'8'
             ,'stop_hour':'All Hours','xsize':'All'
             ,'ysize':'All','wave':'all'
             ,'filter':'all','object':'all'
              ,'xbin':'all','ybin':'all'
             ,'highc':'all'}
    data =urllib.urlencode(values)    #适用urllib对数据进行格式化编码
    printdata    #输出查看编码后的数据格式
    req =urllib2.Request(URL, data)    #生成页面请求的完整数据
    response =urllib2.urlopen(req)     #发送页面请求
    returnresponse.read()    #获取服务器返回的页面信息
#PUT
import urllib2
request = urllib2.Request('http://example.org',data='your_put_data')
request.add_header('Content-Type', 'your/contenttype')
request.get_method = lambda: 'PUT'
response = urllib2.urlopen(request)
#DELETE
import urllib2
request = urllib2.Request(uri)
request.get_method = lambda: 'DELETE'
response = urllib2.urlopen(request)

阅读全文

0 0