豆瓣电台加心歌曲自动下载(python实现)
来源:互联网 发布:linux wifi 配置 编辑:程序博客网 时间:2024/05/16 18:36
最近写了个豆瓣个人电台自动下载加心歌曲的小程序,基本能够下载,但需要手动将"http://douban.fm/mine?type=liked"页面全都下载下来,有点蛋疼,- - !!。由于还没有实现程序登录豆瓣的功能,暂时先这样用吧。
#!/usr/bin/python2.7# -*- coding:utf -*-import urllibimport reimport socketimport cookielibimport urllib2socket.setdefaulttimeout(1)def getpag(url):done = Falsetry:response = urllib.urlopen(url)re = response.read()done = Trueexcept Exception as e:print "error in getpag({0})".format(url)if done: return re else: return ""def removehtml(s):p = re.compile(r'(<.*?>)|(&.*?;)', re.S)return p.sub("", s)def removeotherword(s):p = re.compile(r'((([\(\[{])|(\xef\xbc\x88)).*?(([\)\]}])|(\xef\xbc\x89)))|(^\s+)|(\s+$)')return p.sub("", s)# login douban & sv cookie# tododef logindouban():loginurl = "http://www.douban.com/accounts/login"data = urllib.urlencode({'source':'simple','form_email':'vodmaker@gmail.com','form_password':'xxx','remember':'on',})print datacj = cookielib.CookieJar()opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))urllib2.install_opener(opener)opener.addheaders = [("User-agent", "Mozilla/5.0 (X11; Linux i686; rv:2.0.1) Gecko/20100101 Firefox/4.0.1")]req = urllib2.Request(loginurl, data)print urllib2.urlopen(req).geturl() ## s1 match exactly to s2def matchexact(s1, s2):ps1 = re.compile(s1, re.I)if ps1.search(s2) is not None:return Trueelse:return False# s1 match to s2def matchmost(s1, s2):s1 = re.compile(r'\s').sub("\\s*", s1)ps1 = re.compile(s1, re.I)if ps1.search(s2) is not None:return Trueelse:return False# [[name, artist], ...] per list_pagdef getmusiclist_perpag(list_pag):ptable = re.compile(r'<table\s*class="olts"\s*width="100%">.*?</table>', re.S)htable = ptable.search(list_pag).group()ptbody = re.compile(r'<tbody>.*?</tbody>', re.S)htbody = ptbody.search(htable).group()ptr = re.compile(r'<tr>\s*<td>(.*?)</td>.*?<span>(.*?)</span>.*?</tr>', re.S)musiclist = []for m in ptr.finditer(htbody):print "music:" + removeotherword(m.group(1)) + "artist:" + removeotherword(m.group(2))musiclist.append([removeotherword(m.group(1)), removeotherword(m.group(2))])return musiclist# download music from mp3.yahoo.com# parameter muscilist [[name, artist], ...]def downloadfromyahoo(musiclist):listurl = "http://music.yahoo.cn/s?q={0}&m=0"for ma in musiclist:music = ma[0]artist = ma[1]print "Music:\t" + music + "\tArtist:\t" + artist + "is Downloading..."u = listurl.format(urllib.quote_plus(music))listpag = getpag(u)ptable = re.compile(r'<div class="yst-music">.*?</table>', re.S)if ptable.search(listpag) is None:print "No search result of {0} in yahoo.cn".format(music)continuehtable = ptable.search(listpag).group()ptr = re.compile(r'<tr>\s*<td class="m_song">\s*<a href=".*?url=(.*?)"' r'.*?>(.*?)</a>' r'.*?<td class="m_singer">.*?>(.*?)</a>' r'.*?<td.*?<td>(.*?)</td>' r'.*?<td>(.*?)[mM][bB]' r'.*?</tr>' , re.S);find = Falsefor m in ptr.finditer(htable):downurl = urllib.unquote(m.group(1))music_t = removeotherword(removehtml(m.group(2)))artist_t = removeotherword(removehtml(m.group(3)))type_t = removeotherword(removehtml(m.group(4)))size_t = removeotherword(removehtml(m.group(5)))if matchexact(music_t, music) and matchexact(artist_t, artist) and float(size_t) > 2:print "download from :" + downurl + ""try:music_stream = urllib.urlopen(downurl).read()open("./down/"+music+"."+type_t, "wb").write(music_stream)find = Trueprint "download success: music:{0}, artist:{1}".format(music, artist)breakexcept Exception as e:continueprint eif not find:for m in ptr.finditer(htable):downurl = urllib.unquote(m.group(1))music_t = removeotherword(removehtml(m.group(2)))artist_t = removeotherword(removehtml(m.group(3)))type_t = removeotherword(removehtml(m.group(4)))size_t = removeotherword(removehtml(m.group(5)))if matchmost(music_t, music) and matchmost(artist_t, artist) and float(size_t) > 1:print "download from :" + downurl + ""try:music_stream = urllib.urlopen(downurl).read()open("./down/"+music+"."+type_t, "wb").write(music_stream)find = Trueprint "download success: music:{0}, artist:{1}".format(music, artist)breakexcept Exception as e:continueprint eif not find:print "download failed: music:{0}, artist:{1}".format(music, artist) # end downloadfromyahoo funcmusiclist = []for i in range(17):f = open("{0}.html".format(i), "r")listp = f.read()musiclist += getmusiclist_perpag(listp)downloadfromyahoo(musiclist)
貌似豆瓣有屏蔽程序访问页面的措施,目前仍纠结于如何实现登录的部分,不能保证一定能解决登录豆瓣自动抓取加心页面的功能,程序更新期限未知。
- 豆瓣电台加心歌曲自动下载(python实现)
- 豆瓣电台歌曲链接信息
- 豆瓣电台总结(三)歌曲列表展现
- Python爬虫学习记录(3)——用Python获取虾米加心歌曲,并获取MP3下载地址
- 学习豆瓣电台总结(一)
- 豆瓣FM电台Chrome扩展——下载
- swift实战-豆瓣电台
- 豆瓣电台api
- Swift3豆瓣电台
- (python)下载喜马拉雅电台的音频
- 打算写个下载豆瓣fm歌曲的小程序
- 豆瓣FM加心音乐批量下载
- 豆瓣FM歌曲播放圆形进度的实现
- ios端豆瓣电台 DoubanFM
- 豆瓣机器人 自动加入/退出小组、自动在小组发帖/删帖、自动回复 Python实现代码
- 获取豆瓣FM的数据(频道列表,歌曲列表)
- python 百度top100和top500歌曲下载
- Python爬虫下载QQ音乐网站歌曲
- 检索 COM 类工厂中 CLSID 为 {00024500-0000-0000-C000-000000000046} 的组件失败
- 面试题
- holdlock rowslock
- c#文件读写
- 脚本收缩数据库日志
- 豆瓣电台加心歌曲自动下载(python实现)
- 百度逾三亿美元战略投资“去哪儿网”
- PS技巧
- HDU 1757 A Simple Math Problem
- MFC DestroyWindow
- 动态代理
- 数制和码制及其之间的转换关系
- Android 开发
- Solaris 10 如何解决:/usr/include/sys/siginfo.h:259: error: 'ctid_t' is used as a type, but is not