提供一个Twitter的API（Python）

来源：互联网发布：个人软件著作权申请编辑：程序博客网时间：2024/06/05 10:26

Twitter 的API有很多，但是总感觉不给力，总是扣扣索索不给你给全了。我总结了下，写了几个API

如下code：

'''Created on Jun 22, 2013@author: Yang'''import twitterimport jsonimport timeimport datetimefrom email.utils import parsedateimport math# Consumerkey = 'myY0zZaRQ1eDEELFfoen7g'# Consumersecret = 'RLXWJrU07HaX2KVOyrhIQ8aV81XHMeMKcbAxqDN4Jc'# token = '321341780-gQ1hpc6gLfqUuDc8TKH6HyvbAnJ1uWWiYEOoKwoY'# tokensecret = 'RndJtFxDJrhJVNNqjS3XMHgQi6ufY5U6OoRes9URwQ'# # t = twitter.Api(Consumerkey, Consumersecret, token, tokensecret)def timestamp(str):    #this function is to convert the time into stamptiem type    #this time is computed for us in the next steps    datatime = parsedate(str)    temp = list(datatime)[0:6]    c = datetime.datetime(temp[0], temp[1], temp[2], temp[3], temp[4], temp[5])    temp = time.mktime(c.timetuple())      return tempdef SearchQuery(query, t):    #in order to search tweets by query    #return a tweet list    tweets = t.GetSearch(term=query, count=200)    #count is the numver of tweets    t = []    c = 1    for s in tweets:        temp = json.loads(str(s))         t.append(temp)        tweetid = temp['id']            for i in range(0,5):        tweets = t.GetSearch(temp=query, max_id=tweetid, count=200)        tlist = []        for s in tweets:            temp = json.loads(str(s))            tlist.append(temp)            tweetid = temp['id']        t = t+tlist    return tdef SearchLocation(geo, t):    #in order to search by get information    #return a tweet list    tweets = t.GetSearch(geocode=geo, count=200)        t = []    c = 1    for s in tweets:        temp = json.loads(str(s))         t.append(temp)        tweetid = temp['id']            for i in range(0,5):        tweets = t.GetSearch(geocode=geo, max_id=tweetid, count=200)        tlist = []        for s in tweets:            temp = json.loads(str(s))            tlist.append(temp)            tweetid = temp['id']        t = t+tlist    return tdef GetUsertweets(id, tweetid, tweettime, delay=24*60*60, t):    #in order to get the user tweets since and befor tweets    #in time stamp one day is 24*60*60    #so we think that the delay is 24*3600    tweets = t.GetUserTimeline(id, max_id=tweetid, count=100)    #here we use the max_id to get the tweets    t = []    time = timestamp(tweettime)        if len(tweets)!=0:        for s in tweets:            temp = json.loads(str(s))            #t.append(temp)            creattime = temp['created_at']            creattime = timestamp(str(creattime))            if abs(creattime-time)>delay:                continue            else:                t.append(temp)        tweets = t.GetUserTimeline(id, since_id=tweetid, count=100)    #then we use the since_id to get the tweets     if len(tweets)!=0:        for s in tweets:            temp = json.loads(str(s))            #t.append(temp)            creattime = temp['created_at']            creattime = timestamp(str(creattime))            if abs(creattime-time)>delay:                continue            else:                t.append(temp)     return t

这个里面有两个主要程序search和getusertweets两个函数

里面都用到了max_id和since_id，并且反复的用，这样就会尽可能多抓到多的tweets。