爬虫系列15、为链接添加一个回调函数类

来源:互联网 发布:二姐alice的淘宝号码 编辑:程序博客网 时间:2024/06/16 09:49
class ScrapeCallback:    def __init__(self):        self.writer = csv.writer(open('countries.csv','w'))        # 表头        self.fields = ('area', 'ranked', 'university')        self.writer.writerow(self.fields)    def __call__(self, url, html):        if re.search('/view/',url):            tree = lxml.html.fromstring(html)            row = []            for field in self.fields:                row.append(tree.cssselect('table > tr#places_{}_row > td.w2p_fw'.format(field))    [0].text_content())            self.writer.writerow(row)
阅读全文
0 0
原创粉丝点击