[导入]百度歌曲MP3 top 500批量下载

参考http://www.wespoke.com/archives/000978.html,用python重写了一下,可以指定目录,指定保存路径

#! /usr/bin/python
import urllib, re,sys,os,os.path,getopt

threadNum = 20
savePath = "mp3-2"
optlist,left = getopt.getopt(sys.argv[1:], "t:d:")
for opt in optlist:
    print opt
    if (opt[0]=='-t'):
        threadNum = int(opt[1])
    if (opt[0]=='-d'):
        savePath = opt[1]

print "threadnum="+str(threadNum)
print "savePath="+savePath

if (not os.path.exists(savePath)):
os.makedirs(savePath)
base = "http://list.mp3.baidu.com/topso/"
url = "http://list.mp3.baidu.com/topso/mp3topsong.html"

def getUrlData(url):
    num = 0
    while (num<3):
        num = num+1
        try :
            f = urllib.urlopen( url )
            data = f.readlines()
            f.close()
            return data
        except:
            pass
    return []

data = getUrlData(url)
pattern = re.compile( r'href="(.*?tsomp3.htm)' )
target = [];
for line in data:
    if ( line.find( "tsomp3.htm" )!=-1 ):
        items = pattern.findall( line )
        for item in items:
            target.append( item )

print "find ",len( target )," mp3 "

mp3Pattern = re.compile( r'href="(.*?\.mp3)"' )
titlePattern = re.compile( r'<title>.*?_(.*?)\s+</title>' )

import threading

lock = threading.Lock()

def getMp3():
    while True:
        t = ""
        lock.acquire()
        if ( len( target )>0 ):
            t = target[0]
            target.remove( t )
        else :
            return
        lock.release()
        tempUrl = base+t
        data = getUrlData(tempUrl)
        mp3Target = []
        title = "";
        for line in data:
            if ( line.find( "title" )!=-1 ):
                m = titlePattern.search( line )
                if ( m ):
                    title = m.group( 1 )
                    break
        for line in data:
            if ( len( mp3Target )>10 ):
                break
            if ( line.find( ".mp3" )!=-1 ):
                items = mp3Pattern.findall( line )
                for item in items:
                    mp3Target.append( item )
        filename = savePath+"/"+title+".mp3"
        for t in mp3Target:
            try :
                print "try to get "+title+".mp3,url=",t
                ret = urllib.urlretrieve( t, filename )
                size = os.path.getsize(filename)
                if (size>500*1024):
                    print "done:"+title+".mp3"
                    break
            except :
                print "fail to get "+title+".mp3 with url "+t
                pass

for num in range(threadNum):
    thread = threading.Thread( None, getMp3 )
    thread.start()
    print "start thread ",num

文章来源:http://spaces.msn.com/members/zzzhc/Blog/cns!1pPbKg7hHgS7AKKQm6CWG1ZQ!125.entry

posted on 2005-12-23 15:13 zzzhc 阅读(1047) 评论(0) 编辑收藏

新用户注册刷新评论列表


只有注册用户登录后才能发表评论。




网站导航: 博客园博客园最新博文博问管理

My Links

Blog Stats

常用链接

留言簿(1)

随笔分类(1)

随笔档案(8)

文章分类

搜索

最新评论

阅读排行榜

评论排行榜

[导入]百度歌曲MP3 top 500批量下载