python学习笔记 第十一章

来源:互联网 发布:微信浏览器打开淘宝 编辑:程序博客网 时间:2024/05/21 10:07
# 用 requests 模块从 Web 下载文件# Import library filesimport requests# 发送一个http请求 send http requestsres = requests.get('http://www.gutenberg.org/cache/epub/1112/pg1112.txt')# check error 检查错误res.raise_for_status()# Check state code 检验状态码print(res.status_code == requests.codes.ok)print(len(res.text))# print the text of less then 250print(res.text[:250])# open a fileplayFile = open('RomeoAndJuliet.txt', 'wb')# iter_content()方法在循环的每次迭代中,返回一段内容。每一段都是 bytes 类型,你需要指定一段包含多少字节。for chunk in res.iter_content(100000):    # write text    playFile.write(chunk)playFile.close()# requests 抓取的是网页的原始数据,如果不是txt文件的话就会将html等数据也抓取下来

import requestsres = requests.get('http://inventwithpython.com/page_that_does_not_exist')try:    res.raise_for_status()except Exception as exc:    print('There was a problem: %s' % (exc))
import requests, bs4res = requests.get('http://nostarch.com')res.raise_for_status()noStarchSoup = bs4.BeautifulSoup(res.text, "lxml")exampleFile = open('example.html')exampleSoup = bs4.BeautifulSoup(exampleFile, "lxml")print(noStarchSoup)import requests, bs4exampleFile = open('example.html')exampleSoup = bs4.BeautifulSoup(exampleFile.read(), "lxml")# 用 select()方法寻找元素elems = exampleSoup.select('#author')print(type(elems))print(len(elems))print(str(elems[0]))print(elems[0].attrs)pElems = exampleSoup.select('p')print(str(pElems[0]))print(pElems[0].getText())



原创粉丝点击