python网页提取

来源:互联网 发布:国民党真实抗战知乎 编辑:程序博客网 时间:2024/06/06 04:42
#!/usr/bin/python
# -*- coding: utf-8 -*-
#encoding=utf-8
#Filename:urllib2-header.py
  
import urllib2
import sys
  
url = 'http://notepad.cc/share/W7Cgs95rxW'
  
req = urllib2.Request(url)
#req.add_header('Referer','http://notepad.cc/lianghui')
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0')
r = urllib2.urlopen(req)


html = r.read()
receive_header = r.info()
  
html = html.decode('utf-8').encode(sys.getfilesystemencoding())
  
#print receive_header
#print '#####################################'
print html
0 0
原创粉丝点击