ToB企服应用市场:ToB评测及商务社交产业平台
标题:
音乐爬虫(调试中)
[打印本页]
作者:
半亩花草
时间:
昨天 22:26
标题:
音乐爬虫(调试中)
获取存在音乐链接的json数据
import requests
from pyquery import PyQuery
import re
def get_music_index(name):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
'Referer': 'http://music.2t58.com/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
cookies = {
'Hm_lvt_b8f2e33447143b75e7e4463e224d6b7f': '1737002552',
'HMACCOUNT': 'F76AECD34A294FCC',
'Hm_lpvt_b8f2e33447143b75e7e4463e224d6b7f': '1737006718'
}
# 使用正确的域名和协议
encoded_name = requests.utils.quote(name)
url = f'http://music.2t58.com/so/{encoded_name}.html'
try:
response = requests.get(url, headers=headers, cookies=cookies, timeout=10)
response.raise_for_status()
doc = PyQuery(response.content)
names = doc(".name").items()
for index, item in enumerate(names, start=1):
print(f"选项 {index}: {item.text().strip()}")
if index == 7:
break
ex = r'<a href="https://www.cnblogs.com/song/(.*?).html" target="_mp3">.*?</a>'
song_ids = re.findall(ex, response.text, re.S)
return song_ids[:7]
except requests.exceptions.RequestException as e:
print(f"请求出错: {e}")
return []
def download_music(song_id):
# 设置 HTTP 请求头
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01', # 接受的内容类型
'Accept-Encoding': 'gzip, deflate', # 支持的编码格式
'Accept-Language': 'zh-CN,zh;q=0.9', # 支持的语言
'Connection': 'keep-alive', # 连接保持活跃
'Content-Length': '26', # 内容长度
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', # 内容类型
'Cookie': 'Hm_lvt_b8f2e33447143b75e7e4463e224d6b7f=1690974946; Hm_lpvt_b8f2e33447143b75e7e4463e224d6b7f=1690976158', # Cookie
'Host': 'www.2t58.com', # 请求的主机
'Origin': 'https://www.2t58.com', # 请求的来源
'Referer': 'https://www.2t58.com/song/bWhzc3hud25u.html', # 请求的来源页面
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36', # 用户代理
'X-Requested-With': 'XMLHttpRequest' # 请求类型
}
# 构造 POST 请求的数据
data = {'id': song_id, 'type': 'music'}
# 要请求的 URL
url2 = 'https://www.2t58.com/js/play.php'
# 发送 POST 请求获取歌曲播放信息
resp2 = requests.post(url=url2, headers=headers, data=data)
# 解析返回的 JSON 数据
json_data = resp2.json()
print(json_data)
if __name__ == "__main__":
name = input("请输入歌曲名称: ")
music_index = get_music_index(name)
# print(music_index)
# if music_index:
# print("找到以下歌曲ID:")
# for idx, song_id in enumerate(music_index, start=1):
# print(f"{idx}: {song_id}")
# else:
# print("没有找到相关歌曲。")
num = int(input("请输入歌曲的序列:"))
song_id = music_index[num - 1]
download_music(song_id)
print("按回车直接退出...")
# 等待用户按键,这里我们只接受单个空格键
while True:
user_input = input()
if user_input == "": # 检查用户是否只输入了一个空格
break
复制代码
实现下载音乐(如今默认的mp3)
import requests
from pyquery import PyQuery
import re
def get_music_index(name):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
'Referer': 'http://music.2t58.com/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
cookies = {
'Hm_lvt_b8f2e33447143b75e7e4463e224d6b7f': '1737002552',
'HMACCOUNT': 'F76AECD34A294FCC',
'Hm_lpvt_b8f2e33447143b75e7e4463e224d6b7f': '1737006718'
}
# 使用正确的域名和协议
encoded_name = requests.utils.quote(name)
url = f'http://music.2t58.com/so/{encoded_name}.html'
try:
response = requests.get(url, headers=headers, cookies=cookies, timeout=10)
response.raise_for_status()
doc = PyQuery(response.content)
names = doc(".name").items()
for index, item in enumerate(names, start=1):
print(f"选项 {index}: {item.text().strip()}")
if index == 7:
break
ex = r'<a href="https://www.cnblogs.com/song/(.*?).html" target="_mp3">.*?</a>'
song_ids = re.findall(ex, response.text, re.S)
return song_ids[:7]
except requests.exceptions.RequestException as e:
print(f"请求出错: {e}")
return []
def download_music(song_id):
# 设置 HTTP 请求头
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01', # 接受的内容类型
'Accept-Encoding': 'gzip, deflate', # 支持的编码格式
'Accept-Language': 'zh-CN,zh;q=0.9', # 支持的语言
'Connection': 'keep-alive', # 连接保持活跃
'Content-Length': '26', # 内容长度
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', # 内容类型
'Cookie': 'Hm_lvt_b8f2e33447143b75e7e4463e224d6b7f=1690974946; Hm_lpvt_b8f2e33447143b75e7e4463e224d6b7f=1690976158', # Cookie
'Host': 'www.2t58.com', # 请求的主机
'Origin': 'https://www.2t58.com', # 请求的来源
'Referer': 'https://www.2t58.com/song/bWhzc3hud25u.html', # 请求的来源页面
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36', # 用户代理
'X-Requested-With': 'XMLHttpRequest' # 请求类型
}
# 构造 POST 请求的数据
data = {'id': song_id, 'type': 'music'}
# 要请求的 URL
url2 = 'https://www.2t58.com/js/play.php'
# 发送 POST 请求获取歌曲播放信息
resp2 = requests.post(url=url2, headers=headers, data=data)
# 解析返回的 JSON 数据
json_data = resp2.json()
return json_data
# 下载文件并保存
def download_song(url, filename):
try:
response = requests.get(url)
response.raise_for_status() # 如果响应状态码不是200,会抛出异常
with open(filename, 'wb') as file:
file.write(response.content)
print(f"歌曲已成功下载并保存为 {filename}")
except requests.exceptions.RequestException as e:
print(f"下载过程中发生错误: {e}")
if __name__ == "__main__":
name = input("请输入歌曲名称: ")
music_index = get_music_index(name)
num = int(input("请输入歌曲的序列:"))
song_id = music_index[num - 1]
json_data = download_music(song_id)
# 提取 URL 和标题
url = json_data['url']
title = json_data['title']
filename = title + '.mp3'
filename = filename.replace('《', '').replace('》', '').replace('[', '').replace(']', '').replace('/', '_')
print("歌曲URL: ", url)
print("保存为文件: ", filename)
download_song(url, filename)
复制代码
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。
欢迎光临 ToB企服应用市场:ToB评测及商务社交产业平台 (https://dis.qidao123.com/)
Powered by Discuz! X3.4