python新浪微博模拟登陆

来源:互联网 发布:淘宝搜什么能买到片 编辑:程序博客网 时间:2024/04/29 09:02

    一直苦恼于新浪微博API开放的接口限制太多。只能用爬虫获取数据,然后我花了很长时间去找模拟登陆代码,根本没有一个能成功的。于是我就参考了谋篇文章,对代码做了些小修改,终于可以用了。

相关参考可以看http://blog.csdn.net/ta790799213/article/details/44205351

二:在模拟登陆的时候出现了个retcode=4049,解决办法:在http://login.sina.com.cn/?r=%2Fmember%2Fsecurity%2Fprotect.php登陆进去后,设置部分区域登陆不用验证码。如果不行,登陆微博,在登陆保护页同样设置一下区域登陆不用验证码。

三:代码如下:

# -*- coding: utf-8 -*-import requestsimport base64import reimport urllibimport urllib2import rsaimport jsonimport binasciiimport stringfrom weibo import Clientimport randomimport timeimport logging, logging.handlerscode = "5f9f84b2aa3198032416963c84c2d182"app_key = "1110261163"app_secret = "0de95a319a66c755c008b6332d7dd063"redirect_uri = "https://api.weibo.com/oauth2/default.html"class SinaCrawler:    def __init__(self, max_page):        self.session = None        self.MAX_PAGE = max_page        token = {u'access_token': u'2.00pE39sBn1UT7E61e7174d95TdYVED', u'remind_in': u'157679999', u'uid': u'1720813027', u'expires_at': 1575304674}        self.client = Client(app_key, app_secret, redirect_uri, token)        self.f = open("data", "w")    def __del__(self):        self.f.close()    def userlogin(self,username,password):        session = requests.Session()        url_prelogin = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.18)&_=1430736851146'        url_login = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.8)'        #get servertime,nonce, pubkey,rsakv        resp = session.get(url_prelogin)        print resp.content        p = re.compile('{.*}')        json_data  = re.search(p, resp.content).group()        print json_data        data       = eval(json_data)        servertime = data['servertime']        print 'servertime:',servertime        nonce      = data['nonce']        pubkey     = data['pubkey']        rsakv      = data['rsakv']        # calculate su        su  = base64.b64encode(urllib.quote(username))        #calculate sp        rsaPublickey= int(pubkey,16)        key = rsa.PublicKey(rsaPublickey,65537)        message = str(servertime) +'\t' + str(nonce) + '\n' + str(password)        sp = binascii.b2a_hex(rsa.encrypt(message,key))        postdata = {            'entry': 'weibo',            'gateway': '1',            'from': '',            'savestate': '7',            'userticket': '1',            'ssosimplelogin': '1',            'vsnf': '1',            'vsnval': '',            'su': su,            'service': 'miniblog',            'servertime': servertime,            'nonce': nonce,            'pwencode': 'rsa2',            'sp': sp,            'encoding': 'UTF-8',            'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',            'returntype': 'META',            'rsakv' : rsakv,        }        resp = session.post(url_login,data = postdata)        # print resp.headers        print resp.content        login_url = re.findall('{location\.replace\(\'(.+?)\'\);}',resp.content)        print 'login_url',login_url,type(login_url)        respo = session.get(login_url[0])        print respo.content        self.session = session            def do_search(self, query):        """ do search         Args:            query : str indicating the query         Return:            None        """        self.f.write('screen_name\tgender\trelated_msg\tregister_time\tlocation\n')        for page in range(1, self.MAX_PAGE + 1):            time.sleep(random.random())            self.do_search_page(page, query)    def do_search_page(self, page, query):        """ get search result of the page in the search html page         Args:            page : int indicating the number of the page        Return:            None        """        search_url  = "http://s.weibo.com/wb/%s&page=%d" % (query, page)        html_page = self.session.get(search_url)        print html_page.content#         print all_results#         res_cnt = 1#         for res in all_results:#             print 'page %d result %d done' % (page, res_cnt)#             res_cnt += 1#             information = self.get_person_info(res)#     def get_search_result(self, html_content):#         """ get search result from the html content #             #         Args:#             html_content: str for storing html content of the search page#         #         Return:#             None#         """#         #content = re.findall(r"\"pid\":\"pl_user_feedList\"(?P<tips>[\w\W]*?)", html_content)#         html_content = html_content.strip()#         content = re.findall(r"\"pid\":\"pl_wb_feedlist\"(?P<tips>[\w\W]*?)</script>", html_content)[0]        #         clean_content = string.replace(content, "\\\\", "\\")#         search_result = re.findall(r"<div class=\\\"WB_cardwrap S_bg2 clearfix\\\" >(?P<tips>[\w\W]*?)<\\/div>\\n<\\/div>", clean_content)#         return search_result               if __name__ == '__main__':    sina_crawler = SinaCrawler(2)    sina_crawler.userlogin('18650306405', 'lkz881199')    query = 'iphone'    #print type(query)    q = string.replace(str(urllib.quote(query)), "%", "%25")    print q    sina_crawler.do_search(q)
四:结果如图:

模拟登陆成功后,在每个页面上都会有你的个人信息入微博昵称:这个程序员不太冷2,还有uid。。


0 0