下载8000首儿歌的python的代码:
复制代码 代码如下:
#-*- coding: utf-8 -*-from pyquery import pyquery as py
from lxml import etree
import urllib
import re
import os
import sys
import logging
def format(filename):
tuple=(' ',''','\'')
for char in tuple:
if (filename.find(char)!=-1):
filename=filename.replace(char,_)
return filename
def download_mp3(mp3_url, filename,dir):
f = dir+\\+filename
if os.path.exists(f):
logger.debug(f+ is existed.)
return
try:
open(f, 'wb').write(urllib.urlopen(mp3_url).read())
logger.debug( filename + ' is downloaded.')
except:
logger.debug( filename + ' is not downloaded.')
def download_all_mp3(start,end,dir,logger):
for x in range(start,end):
try:
url = http://www.youban.com/mp3-d + str(x) + .html
logger.debug(str(x) + : +url)
doc = py(url=url)
e = doc('.mp3downloadbox')
if e is none or e == '':
logger.debug(url+ is not existed.)
return
e = unicode(e)
#logger.debug( e)
regex = re.compile(ur.*
(.*).*downloadboxlist.*? m = regex.search(e)
if m is not none:
title = m.group(1).strip()
title2 = str(x)+_+title + .mp3
#title2 = re.sub(' ','_',title2)
title2 = format(title2)
link = m.group(2)
#logger.debug( title: + title + link: + link)
if link == '' or title == '':
logger.debug(url + is not useful)
continue
logger.debug(str(x)+: +link)
download_mp3(link,title2,dir)
except:
logger.debug(url+ met exception.)
continue
if __name__ == __main__:
dir_root = e:\\song
if sys.argv[3] != '': dir_root=sys.argv[3]
start,end = 1,8000
if sys.argv[1] >= 0 and sys.argv[2]>=0:
start,end = int(sys.argv[1]),int(sys.argv[2])
print (download from %s to %s.\n % (start,end))
dir = dir_root + \\+str(start)+-+str(end)
if not os.path.exists(dir):
os.mkdir(dir)
print download to + dir + .\n
logger = logging.getlogger(simple)
logger.setlevel(logging.debug)
fh = logging.filehandler(dir+\\+download.log)
ch = logging.streamhandler()
formatter = logging.formatter(%(message)s)
ch.setformatter(formatter)
fh.setformatter(formatter)
logger.addhandler(ch)
logger.addhandler(fh)
download_all_mp3(start,end,dir,logger)
有需要的可以参考继续修改。