Beautifulsoup 使用笔记

来源：互联网发布：vs 编程入门视频教程编辑：程序博客网时间：2024/06/06 02:06

1.在线文档 http://www.crummy.com/software/BeautifulSoup/bs4/doc/

2.常用方法

选择器 find_all(name, attrs, recursive, text, limit, **kwargs)

3.主要调用方法

4.完整代码

#-*- coding:utf-8 -*-from bs4 import BeautifulSoup;def main():html = """<html><head><title>The Dormouse's story</title></head> <p class="title"><b>The Dormouse's story</b></p> <p class="story">Once upon a time there were three little sisters; and their names were<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and<a href="http://example.com/tillie" class="cla" id="link3">Tillie</a>;and they lived at the bottom of a well.</p> <p class="story">...</p>"""# print html_doc  soup = BeautifulSoup(html)print soup.get_text()#获得文本print soup.find_all('title') #获取标题 print soup.find_all('a')#获取链接<a>print soup.find_all(id="link2")#根据ID来获取HTML元素 print soup.find_all("a",class_="cla")  #根据class来获取HTML元素#根据class属性来选择print soup.find_all("a", class_="sister")print soup.select("p.title")#多重属性来选择print soup.find_all("a", attrs={"class": "sister"})#根据文本来选择print soup.find_all(text="Elsie")print soup.find_all(text=["Tillie", "Elsie", "Lacie"])#限制查询的个数print soup.find_all("a", limit=2) if __name__ == '__main__':main()

0 0