1 运行wget -i Baidump3URL.txt
2 运行BaiduMp3.py> baidump3.txt
3 运行del *.htm?
获得baidu所有歌曲的名字列表


Baidump3URL.txt:


http://list.mp3.baidu.com/song/A.htm
http://list.mp3.baidu.com/song/B.htm
http://list.mp3.baidu.com/song/C.htm
http://list.mp3.baidu.com/song/D.htm
http://list.mp3.baidu.com/song/E.htm
http://list.mp3.baidu.com/song/F.htm
http://list.mp3.baidu.com/song/G.htm
http://list.mp3.baidu.com/song/H.htm
http://list.mp3.baidu.com/song/J.htm
http://list.mp3.baidu.com/song/K.htm
http://list.mp3.baidu.com/song/L.htm
http://list.mp3.baidu.com/song/M.htm
http://list.mp3.baidu.com/song/N.htm
http://list.mp3.baidu.com/song/O.htm
http://list.mp3.baidu.com/song/P.htm
http://list.mp3.baidu.com/song/Q.htm
http://list.mp3.baidu.com/song/R.htm
http://list.mp3.baidu.com/song/S.htm
http://list.mp3.baidu.com/song/T.htm
http://list.mp3.baidu.com/song/W.htm
http://list.mp3.baidu.com/song/X.htm
http://list.mp3.baidu.com/song/Y.htm
http://list.mp3.baidu.com/song/Z.htm


BaiduMp3.py:


#!/usr/bin/pythonimport urllibimport stringimport redef GetContent (url):  try:    URLFile=urllib.urlopen(url)  except IOError:    print "\nCan not retrieve ",url,"!\nThe connection cannot be made!\n"  else:    HTMLText=URLFile.read()    URLFile.close()    return HTMLTextif(__name__=="__main__"):  file=open('Baidump3URL.txt','r')  fileread=file.read()  urls=fileread.split('\n')    queue=[]  #  regexp=re.compile(r'" target=_blank>(.*?)</[aA]></td>')  for url in urls:    #print url    url=re.sub('http://list.mp3.baidu.com/song','.',url)    #print url        content=open(url,'r').read()    lines=content.split('\n')        for line in lines:      #print line      ccc=regexp.search(line)      if(ccc):        word=ccc.groups()[0]        if word in queue:          pass        else:          queue.append(word)              #print url  file.close()    regexp1=re.compile(r'[- ](.*)')    for w in queue:      w = unicode(w,'cp936')    w = w.encode('utf8')        ccc=regexp1.search(w)    if(ccc):      w=ccc.groups()[0]      #print w            w=re.sub('《|》|,|\.|·|!','',w)    if(''==w):      continue    print w