import requests
from bs4 import BeautifulSoup
import time
headers={
"User-Agent"
:
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}
def get_info(url):
wb_data=requests.get(url,headers=headers)
soup=BeautifulSoup(wb_data.text,
'lxml'
)
# ranks=soup.select('#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_num > strong')
titles=soup.select(
'#rankWrap > div.pc_temp_songlist > ul > li > a'
)
times=soup.select(
'#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_tips_r > span'
)
for
title,time
in
zip(titles,times):
data={
'title'
:title.get_text().split(
'-'
)[0],
'song'
:title.get_text().split(
'-'
)[1],
'time'
:time.get_text().strip()
}
print(data)
# 主函数(爬取酷狗TOP500歌曲)
if
__name__ ==
'__main__'
:
urls=[
'https://www.kugou.com/yy/rank/home/{}-8888.html'
.format(str(i))
for
i
in
range(1,3)]
for
url
in
urls:
get_info(url)
time.sleep(1)