• python爬取B站视频


    参考:https://cloud.tencent.com/developer/article/1768680

    参考的代码有点问题,请求头需要修改,上代码:

    1. import requests
    2. import re # 正则表达式
    3. import pprint
    4. import json
    5. from moviepy.editor import AudioFileClip, VideoFileClip
    6. from bs4 import BeautifulSoup as bs
    7. headers = {
    8. # 防盗链 告诉服务器 我们请求的url网址是从哪里跳转过来的
    9. 'referer': 'https://www.bilibili.com/a',
    10. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
    11. }
    12. def send_request(url):
    13. response = requests.get(url=url, headers=headers)
    14. return response
    15. def get_video_data(html_data):
    16. """解析视频数据"""
    17. # 提取视频的标题
    18. soup = bs(html_data, 'lxml')
    19. title = soup.find_all(name='h1',attrs={"class":"video-title special-text-indent"})[0].get_text()
    20. # print(title)
    21. # 提取视频对应的json数据
    22. json_data = re.findall('', html_data)[0]
    23. # print(json_data) # json_data 字符串
    24. json_data = json.loads(json_data)
    25. pprint.pprint(json_data)
    26. # 提取音频的url地址
    27. audio_url = json_data['data']['dash']['audio'][0]['backupUrl'][0]
    28. print('解析到的音频地址:', audio_url)
    29. # 提取视频画面的url地址
    30. video_url = json_data['data']['dash']['video'][0]['backupUrl'][0]
    31. print('解析到的视频地址:', video_url)
    32. video_data = [title, audio_url, video_url]
    33. return video_data
    34. def save_data(file_name, audio_url, video_url):
    35. # 请求数据
    36. print('正在请求音频数据')
    37. audio_data = send_request(audio_url).content
    38. print('正在请求视频数据')
    39. video_data = send_request(video_url).content
    40. with open(file_name + '.mp3', mode='wb') as f:
    41. f.write(audio_data)
    42. print('正在保存音频数据')
    43. with open(file_name + '.mp4', mode='wb') as f:
    44. f.write(video_data)
    45. print('正在保存视频数据')
    46. def merge_data(video_name):
    47. print('视频合成开始:', video_name)
    48. audioclip = AudioFileClip(video_name+'.mp3')
    49. videoclip = VideoFileClip(video_name+'.mp4')
    50. # 3.获取视频和音频的时长
    51. video_time = videoclip.duration
    52. audio_time = audioclip.duration
    53. # 4.对视频或者音频进行裁剪
    54. if video_time > audio_time:
    55. # 视频时长>音频时长,对视频进行截取
    56. videoclip_new = videoclip.subclip(0, audio_time)
    57. audioclip_new = audioclip
    58. else:
    59. # 音频时长>视频时长,对音频进行截取
    60. videoclip_new = videoclip
    61. audioclip_new = audioclip.subclip(0, video_time)
    62. # 5.视频中加入音频
    63. video_with_new_audio = videoclip_new.set_audio(audioclip_new)
    64. # 6.写入到新的视频文件中
    65. video_with_new_audio.write_videofile("output.mp4",
    66. codec='libx264',
    67. audio_codec='aac',
    68. temp_audiofile='temp-video.m4a',
    69. remove_temp=True
    70. )
    71. print('视频合成结束:', video_name)
    72. url = 'https://www.bilibili.com/video/BV1bK421a7qG/?spm_id_from=333.1007.tianma.6-4-22.click'
    73. response = send_request(url)
    74. response.encoding = requests.utils.get_encodings_from_content(response.text)[0]
    75. html_data = response.text
    76. video_data = get_video_data(html_data)
    77. save_data(video_data[0], video_data[1], video_data[2])
    78. merge_data(video_data[0])

    效果

    小姐姐挺靓,就是左下角有水印,想办法去除水印,参考:python实战之去除视频水印&字幕_python 去除视频水印-CSDN博客

    1. import os
    2. import sys
    3. import cv2
    4. import numpy
    5. from moviepy import editor
    6. TEMP_VIDEO = 'temp.mp4'
    7. class WatermarkRemover():
    8. def __init__(self, video_path, output, threshold: int, kernel_size: int):
    9. self.threshold = threshold # 阈值分割所用阈值
    10. self.kernel_size = kernel_size # 膨胀运算核尺寸
    11. self.video_path = video_path
    12. self.output = output
    13. #根据用户手动选择的ROI(Region of Interest,感兴趣区域)框选水印或字幕位置。
    14. def select_roi(self, img: numpy.ndarray, hint: str) -> list:
    15. '''
    16. 框选水印或字幕位置,SPACE或ENTER键退出
    17. :param img: 显示图片
    18. :return: 框选区域坐标
    19. '''
    20. COFF = 0.7
    21. w, h = int(COFF * img.shape[1]), int(COFF * img.shape[0])
    22. resize_img = cv2.resize(img, (w, h))
    23. roi = cv2.selectROI(hint, resize_img, False, False)
    24. cv2.destroyAllWindows()
    25. watermark_roi = [int(roi[0] / COFF), int(roi[1] / COFF), int(roi[2] / COFF), int(roi[3] / COFF)]
    26. return watermark_roi
    27. #对输入的蒙版进行膨胀运算,扩大蒙版的范围
    28. def dilate_mask(self, mask: numpy.ndarray) -> numpy.ndarray:
    29. '''
    30. 对蒙版进行膨胀运算
    31. :param mask: 蒙版图片
    32. :return: 膨胀处理后蒙版
    33. '''
    34. kernel = numpy.ones((self.kernel_size, self.kernel_size), numpy.uint8)
    35. mask = cv2.dilate(mask, kernel)
    36. return mask
    37. #根据手动选择的ROI区域,在单帧图像中生成水印或字幕的蒙版。
    38. def generate_single_mask(self, img: numpy.ndarray, roi: list, threshold: int) -> numpy.ndarray:
    39. '''
    40. 通过手动选择的ROI区域生成单帧图像的水印蒙版
    41. :param img: 单帧图像
    42. :param roi: 手动选择区域坐标
    43. :param threshold: 二值化阈值
    44. :return: 水印蒙版
    45. '''
    46. # 区域无效,程序退出
    47. if len(roi) != 4:
    48. print('NULL ROI!')
    49. sys.exit()
    50. # 复制单帧灰度图像ROI内像素点
    51. roi_img = numpy.zeros((img.shape[0], img.shape[1]), numpy.uint8)
    52. start_x, end_x = int(roi[1]), int(roi[1] + roi[3])
    53. start_y, end_y = int(roi[0]), int(roi[0] + roi[2])
    54. gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    55. roi_img[start_x:end_x, start_y:end_y] = gray[start_x:end_x, start_y:end_y]
    56. # 阈值分割
    57. _, mask = cv2.threshold(roi_img, threshold, 255, cv2.THRESH_BINARY)
    58. return mask
    59. #通过截取视频中多帧图像生成多张水印蒙版,并通过逻辑与计算生成最终的水印蒙版
    60. def generate_watermark_mask(self, video_path: str) -> numpy.ndarray:
    61. '''
    62. 截取视频中多帧图像生成多张水印蒙版,通过逻辑与计算生成最终水印蒙版
    63. :param video_path: 视频文件路径
    64. :return: 水印蒙版
    65. '''
    66. video = cv2.VideoCapture(video_path)
    67. success, frame = video.read()
    68. roi = self.select_roi(frame, 'select watermark ROI')
    69. mask = numpy.ones((frame.shape[0], frame.shape[1]), numpy.uint8)
    70. mask.fill(255)
    71. step = video.get(cv2.CAP_PROP_FRAME_COUNT) // 5
    72. index = 0
    73. while success:
    74. if index % step == 0:
    75. mask = cv2.bitwise_and(mask, self.generate_single_mask(frame, roi, self.threshold))
    76. success, frame = video.read()
    77. index += 1
    78. video.release()
    79. return self.dilate_mask(mask)
    80. #根据手动选择的ROI区域,在单帧图像中生成字幕的蒙版。
    81. def generate_subtitle_mask(self, frame: numpy.ndarray, roi: list) -> numpy.ndarray:
    82. '''
    83. 通过手动选择ROI区域生成单帧图像字幕蒙版
    84. :param frame: 单帧图像
    85. :param roi: 手动选择区域坐标
    86. :return: 字幕蒙版
    87. '''
    88. mask = self.generate_single_mask(frame, [0, roi[1], frame.shape[1], roi[3]], self.threshold) # 仅使用ROI横坐标区域
    89. return self.dilate_mask(mask)
    90. def inpaint_image(self, img: numpy.ndarray, mask: numpy.ndarray) -> numpy.ndarray:
    91. '''
    92. 修复图像
    93. :param img: 单帧图像
    94. :parma mask: 蒙版
    95. :return: 修复后图像
    96. '''
    97. telea = cv2.inpaint(img, mask, 1, cv2.INPAINT_TELEA)
    98. return telea
    99. def merge_audio(self, input_path: str, output_path: str, temp_path: str):
    100. '''
    101. 合并音频与处理后视频
    102. :param input_path: 原视频文件路径
    103. :param output_path: 封装音视频后文件路径
    104. :param temp_path: 无声视频文件路径
    105. '''
    106. with editor.VideoFileClip(input_path) as video:
    107. audio = video.audio
    108. with editor.VideoFileClip(temp_path) as opencv_video:
    109. clip = opencv_video.set_audio(audio)
    110. clip.to_videofile(output_path)
    111. def remove_video_watermark(self):
    112. '''
    113. 去除视频水印
    114. '''
    115. if not os.path.exists(self.output):
    116. os.makedirs(self.output)
    117. filenames = [os.path.join(self.video_path, i) for i in os.listdir(self.video_path)]
    118. mask = None
    119. for i, name in enumerate(filenames):
    120. if i == 0:
    121. # 生成水印蒙版
    122. mask = self.generate_watermark_mask(name)
    123. # 创建待写入文件对象
    124. video = cv2.VideoCapture(name)
    125. fps = video.get(cv2.CAP_PROP_FPS)
    126. size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    127. video_writer = cv2.VideoWriter(TEMP_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
    128. # 逐帧处理图像
    129. success, frame = video.read()
    130. while success:
    131. frame = self.inpaint_image(frame, mask)
    132. video_writer.write(frame)
    133. success, frame = video.read()
    134. video.release()
    135. video_writer.release()
    136. # 封装视频
    137. (_, filename) = os.path.split(name)
    138. output_path = os.path.join(self.output, filename.split('.')[0] + '_no_watermark.mp4') # 输出文件路径
    139. self.merge_audio(name, output_path, TEMP_VIDEO)
    140. if os.path.exists(TEMP_VIDEO):
    141. os.remove(TEMP_VIDEO)
    142. def remove_video_subtitle(self):
    143. '''去除视频字幕'''
    144. if not os.path.exists(self.output):
    145. os.makedirs(self.output)
    146. filenames = [os.path.join(self.video_path, i) for i in os.listdir(self.video_path)]
    147. roi = []
    148. for i, name in enumerate(filenames):
    149. # 创建待写入文件对象
    150. video = cv2.VideoCapture(name)
    151. fps = video.get(cv2.CAP_PROP_FPS)
    152. size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    153. video_writer = cv2.VideoWriter(TEMP_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
    154. # 逐帧处理图像
    155. success, frame = video.read()
    156. if i == 0:
    157. roi = self.select_roi(frame, 'select subtitle ROI')
    158. while success:
    159. mask = self.generate_subtitle_mask(frame, roi)
    160. frame = self.inpaint_image(frame, mask)
    161. video_writer.write(frame)
    162. success, frame = video.read()
    163. video.release()
    164. video_writer.release()
    165. # 封装视频
    166. (_, filename) = os.path.split(name)
    167. output_path = os.path.join(OUTPUT_PATH, filename.split('.')[0] + '_no_sub.mp4') # 输出文件路径
    168. self.merge_audio(name, output_path, TEMP_VIDEO)
    169. if os.path.exists(TEMP_VIDEO):
    170. os.remove(TEMP_VIDEO)
    171. # 去水印
    172. video_path = 'video'
    173. output_path = 'output'
    174. remover = WatermarkRemover(video_path,output_path,threshold=80, kernel_size=5)
    175. remover.remove_video_watermark()
    176. #去字幕
    177. # remover = WatermarkRemover(video_path,output_path,threshold=80, kernel_size=5)
    178. # remover.remove_video_subtitle()

    效果一般吧:

  • 相关阅读:
    Java项目论文+PPT+源码等]S2SH+mysql水费管理系统
    Java多线程并发编程
    【C++初阶】类和对象——构造函数&&析构函数&&拷贝构造函数
    数据库的索引
    有什么拍照识别文字的软件?这篇文章看到就有收获哦
    设计模式连环问,你能坚持到第几问?
    CAN2无法通信问题
    含文档+PPT+源码等]精品基于ssm的足球联赛管理系统的设计与实现vue[包运行成功]计算机Java毕业设计SSM项目源码
    2022亚太赛题浅评
    手机号码携号转网API接口,轻松实现用户号码流转
  • 原文地址:https://blog.csdn.net/sinat_38653840/article/details/137201680