B站视频

  |  

摘要: Python 实现的 B 站视频下载器

【对数据分析、人工智能、金融科技、风控服务感兴趣的同学,欢迎关注我哈,阅读更多原创文章】
我的网站:潮汐朝夕的生活实验室
我的公众号:潮汐朝夕
我的知乎:潮汐朝夕
我的github:FennelDumplings
我的leetcode:FennelDumplings


在代码中主要需要注意 HTTP 请求头中的 referer 和 origin。

  • referer 用于指示发出请求的页面的URL。它的主要目的是帮助服务器确定请求的上下文,以便更好地处理请求或收集统计信息。
  • Origin 提供的信息更为概括,不包含具体的路径或查询字符串。值是请求发起页面的源(origin),由协议(如HTTP或HTTPS)、域名和端口(如果有)组成,
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import requests
import json
import re
from bs4 import BeautifulSoup
import subprocess


class BilibiliVideoAudio:
def __init__(self, bvid):
self.bvid = bvid
self.headers = {
"referer": "https://search.bilibili.com/all?keyword=%E4%B8%BB%E6%92%AD%E8%AF%B4%E8%81%94%E6%92%AD&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page=4&o=90",
"origin": "https://search.bilibili.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
'Accept-Encoding': 'gzip, deflate, br'
}

def get_video_audio(self):
# 构造视频链接并发送请求获取页面内容
url = f'https://www.bilibili.com/video/{self.bvid}/?spm_id_from=333.337.search-card.all.click&vd_source=14378ecd144bed421affe1fe0ddd8981'
content = requests.get(url, headers=self.headers).content.decode('utf-8')
soup = BeautifulSoup(content, 'html.parser')

# 获取视频标题
meta_tag = soup.head.find('meta', attrs={'name': 'title'})
title = meta_tag['content']

# 获取视频和音频链接
pattern = r'window\.__playinfo__=({.*?})\s*</script>'
json_data = re.findall(pattern, content)[0]
data = json.loads(json_data)

video_url = data['data']['dash']['video'][0]['base_url']
audio_url = data['data']['dash']['audio'][0]['base_url']

return {
'title': title,
'video_url': video_url,
'audio_url': audio_url
}

def download_video_audio(self, url, fold, filename):
# 对文件名进行清理,去除不合规字符
filename = self.sanitize_filename(filename)
try:
# 发送请求下载视频或音频文件
resp = requests.get(url, headers=self.headers).content
download_path = os.path.join(fold, filename) # 构造下载路径
with open(download_path, mode='wb') as file:
file.write(resp)
print("{:*^30}".format("下载完成:{}".format(filename)))
except Exception as e:
print(e)

def sanitize_filename(self, filename):
# 定义不合规字符的正则表达式
invalid_chars_regex = r'[\"*<>?\\|/:,]'

# 替换不合规字符为空格
sanitized_filename = re.sub(invalid_chars_regex, ' ', filename)

return sanitized_filename

def merge_video_audio(self, video_path, audio_path, output_path):
"""
使用ffmpeg来合并视频和音频。
"""
try:
command = [
'ffmpeg',
'-y', # 覆盖输出文件如果它已经存在
'-i', video_path, # 输入视频路径
'-i', audio_path, # 输入音频路径
# '-c', 'copy', # 复制原始数据,不进行转码
output_path # 输出视频路径
]
subprocess.run(command, check=True)
print("视频和音频合并完成:{}".format(output_path))
except subprocess.CalledProcessError as e:
print("合并失败: {}".format(e))


def main(video_bvid):
try:
# 只处理一个 bvid
bilibili = BilibiliVideoAudio(video_bvid)
video_audio_info = bilibili.get_video_audio()

title = video_audio_info['title']
video_url = video_audio_info['video_url']
audio_url = video_audio_info['audio_url']

processed_videos_fold = 'processed_videos'
if not os.path.exists(processed_videos_fold):
os.makedirs(processed_videos_fold)

video_filename = "{}.mp4".format(title)
audio_filename = "{}.mp3".format(title)
output_filename = "{}-combined.mp4".format(title)

video_file_path = os.path.join(processed_videos_fold, video_filename)
audio_file_path = os.path.join(processed_videos_fold, audio_filename)
output_file_path = os.path.join(processed_videos_fold, output_filename)

bilibili.download_video_audio(video_url, processed_videos_fold, video_filename) # 下载视频
bilibili.download_video_audio(audio_url, processed_videos_fold, audio_filename) # 下载音频
bilibili.merge_video_audio(video_file_path, audio_file_path, output_file_path) # 合并视频和音频

# 合并之后删除单独的音频和视频
# os.remove(video_file_path)
# os.remove(audio_file_path)

except Exception as ex:
print("Failed to process video/audio for {}: {}".format(video_bvid, ex))



if __name__ == "__main__":
# bvid
video_bvid = 'BV1oE421T7jC'
main(video_bvid)

Share