根据用户ID爬取Twitter数据

来源:互联网 发布:pic16f877a单片机 编辑:程序博客网 时间:2024/06/05 14:02
我需要爬取的用户ID存放在一个.csv文件下,然后从官网注册到一个APP,并获得你的key和secret,写入下边的代码,就可以爬取tweets了。
每个ID会输出相应的tweet并且s会放在一个.csv文件里,而这个.csv文件就在你运行这段代码的文件夹下。
#!/usr/bin/env python
# encoding: utf-8
import tweepy
import csv

consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""


def get_all_tweets(user_id):
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)

    # 初始化一个数字来存储所有的tweets
    alltweets = []

    new_tweets = api.user_timeline(user_id=user_id, count=200)

    # save most recent tweets
    alltweets.extend(new_tweets)

    # save the id of the oldest tweet less one
    oldest = alltweets[-1].id - 1

    # keep grabbing tweets until there are no tweets left to grab
    while len(new_tweets) > 0:
        print "getting tweets before %s" % (oldest)

        # all subsiquent requests use the max_id param to prevent duplicates
        new_tweets = api.user_timeline(user_id=user_id, count=200, max_id=oldest)

        # save most recent tweets
        alltweets.extend(new_tweets)

        # update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1

        print "...%s tweets downloaded so far" % (len(alltweets))

    # transform the tweepy tweets into a 2D array that will populate the csv
    outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets]

    # write the csv
    with open('%s_tweets.csv' % user_id, 'wb') as f:
        writer = csv.writer(f)
        writer.writerow(["tweet_id", "created_at", "text"])
        writer.writerows(outtweets)
    pass


if __name__ == '__main__':
    with open(这里写你的文件的位置,例如:'e:/file/userID.csv', 'rb') as f:
        ID = csv.reader(f)
        for row in ID:
# 这里运用了错误查询机制,遇到用户ID出现问题时,可以跳过
            try:
                get_all_tweets(row[0])
            except tweepy.TweepError, e:
                print 'Failed to run the command on that user, Skipping...'
            except IndexError, e:
                print 'List index out of range, Skipping...'
                continue
           

1 0
原创粉丝点击