• Java爬取哔哩哔哩视频(可视化)


    链接:我的讲解视频https://www.bilibili.com/video/BV14e411Q7oG/
    本文仅供学术用途

    先上图

    在这里插入图片描述

    代码

    爬虫核心

    import com.alibaba.fastjson2.JSON;
    import com.alibaba.fastjson2.JSONObject;
    import com.gargoylesoftware.htmlunit.*;
    import org.apache.commons.exec.CommandLine;
    import org.apache.commons.exec.DefaultExecutor;
    import org.apache.commons.exec.Executor;
    import org.apache.commons.exec.PumpStreamHandler;
    import org.apache.commons.io.IOUtils;
    
    import java.io.*;
    import java.net.URL;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    class Spider {
        public void catchvideo(String url,String addr) throws IOException {
            //TODO 建立无头浏览器
            WebClient webClient = new WebClient();
            webClient.getOptions().setJavaScriptEnabled(false);
            webClient.getOptions().setCssEnabled(false);
            webClient.getOptions().setThrowExceptionOnFailingStatusCode(true);
            webClient.getOptions().setThrowExceptionOnScriptError(true);
            webClient.addRequestHeader("Referer", "https://www.bilibili.com/index.html");
            webClient.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.40");
            //TODO 设置请求参数,建立请求
            WebRequest webRequest = new WebRequest(new URL(url), HttpMethod.GET);
    
            //TODO 获取响应体
            Page page = webClient.getPage(webRequest);
            WebResponse webResponse = page.getWebResponse();
            String contentAsString = webResponse.getContentAsString();
    //        System.out.println(contentAsString);
    
            //TODO 模式匹配找视频总数
            Pattern pattern = Pattern.compile("");
                matcher = pattern.matcher(contentAsString);
                String s2 = null;
                if (matcher.find())
                    s2 = matcher.group(1);
                else
                    System.out.println("没有找到");
                String videolink = JSON.parseObject(s2).getJSONObject("data").getJSONObject("dash").getJSONArray("video").getJSONObject(0).getString("baseUrl");
                String audiolink = JSON.parseObject(s2).getJSONObject("data").getJSONObject("dash").getJSONArray("audio").getJSONObject(0).getString("baseUrl");
    //            System.out.println("视频下载链接\n" + videolink);
    //            System.out.println("音频下载链接\n" + audiolink);
    
                //TODO 获取视频名称
                pattern = Pattern.compile("(.*?)_哔哩哔哩_bilibili");
                matcher = pattern.matcher(contentAsString);
                String s3 = null;
                if (matcher.find())
                    s3 = matcher.group(1);
                else
                    System.out.println("没有找到");
                //目录名去除./&*这些字符
                String videoname = s3.replaceAll("[/&*_,《》\\s+]", "");
                System.out.println(i + "_________________________" + videoname);
                String videofile = dir + "tmp_" + videoname + ".mp4";
                String audiofile = dir + "tmp_" + videoname + ".mp3";
    
                //TODO 下载视频
                webRequest = new WebRequest(new URL(videolink), HttpMethod.GET);
                page = webClient.getPage(webRequest);
                webResponse = page.getWebResponse();
                InputStream inputStream = webResponse.getContentAsStream();
                OutputStream outputStream = new FileOutputStream(videofile);
                IOUtils.copy(inputStream, outputStream);
                inputStream.close();
                outputStream.close();
    
    
                //TODO 下载音频
                webRequest = new WebRequest(new URL(audiolink), HttpMethod.GET);
                page = webClient.getPage(webRequest);
                webResponse = page.getWebResponse();
                inputStream = webResponse.getContentAsStream();
    
                outputStream = new FileOutputStream(audiofile);
                IOUtils.copy(inputStream, outputStream);
                inputStream.close();
                outputStream.close();
    
                //TODO 执行合并命令
    
                // 创建命令行
                CommandLine commandLine = CommandLine.parse("ffmpeg -i " + videofile + " -i " + audiofile + " -c:v copy -c:a aac -strict experimental " + dir + i + "_" + videoname + ".mp4"); // 使用 Windows cmd 命令作为示例
                // 创建执行器
                Executor executor = new DefaultExecutor();
                // 设置输出流处理器(可选)
                PumpStreamHandler streamHandler = new PumpStreamHandler(System.out, System.err); // 将标准输出和错误输出重定向到控制台
                executor.setStreamHandler(streamHandler);
                // 执行命令
    //            System.out.println(commandLine);
                executor.execute(commandLine);
    //            int exitValue = executor.execute(commandLine); // 执行命令并获取退出值
    //            System.out.println("Exit value: " + exitValue); // 打印退出值(通常为0表示成功)
    
                File file = new File(audiofile);
                file.delete();
    
                file = new File(videofile);
                file.delete();
            }
        }
    }
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142

    可视化代码

    import javax.swing.*;
    import java.awt.*;
    import java.awt.event.ActionEvent;
    import java.awt.event.ActionListener;
    import java.io.File;
    import java.io.IOException;
    
    public class SwingDemo {
        public static void main(String[] args) {
            JFrame jFrame = new JFrame("Swing frame");
            //设置关闭退出程序
            jFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
            JPanel panel = new JPanel();
            jFrame.setContentPane(panel);
            jFrame.setLocationRelativeTo(null);
            panel.setLayout(new FlowLayout());
    
            JLabel jLabel = new JLabel("下载地址");
            JTextField jTextField = new JTextField(20);
            jTextField.setToolTipText("下载地址");
            JButton download = new JButton("下载");
    
            panel.add(jLabel);
            panel.add(jTextField);
            panel.add(download);
    
    
            JLabel jLabel1 = new JLabel("文件保存位置");
            JTextField jTextField1 = new JTextField(20);
            jTextField1.setText("D:\\videos\\");
            jTextField1.setToolTipText("文件保存位置");
            JButton fileaddr = new JButton("选择文件夹");
    
            panel.add(jLabel1);
            panel.add(jTextField1);
            panel.add(fileaddr);
    
            fileaddr.addActionListener(e -> {
                JFileChooser fileChooser = new JFileChooser();
                fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
                int returnValue = fileChooser.showOpenDialog(null);
                if (returnValue == JFileChooser.APPROVE_OPTION) {
                    File selectedFile = fileChooser.getSelectedFile();
                    jTextField1.setText(selectedFile.getAbsolutePath());
                }
            });
    
            download.addActionListener(e -> {
                String url = jTextField.getText()+"/";
                String fileAddr = jTextField1.getText();
                System.out.println(url);
                System.out.println(fileAddr);
                try {
                    Spider spider = new Spider();
                    spider.catchvideo(url,fileAddr);
                } catch (IOException ioException) {
                    ioException.printStackTrace();
                }
            });
    
            //自适应
            jFrame.pack();
            jFrame.setVisible(true);
    
        }
    }
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
  • 相关阅读:
    TypeError: __init__() got an unexpected keyword argument ‘autocompletion‘
    简单博客网页
    element-plus使用el-date-picker组件时,如何禁止用户选择当前时间之后的日时分秒
    解决reudx中的异步问题 applyMiddleware thunk
    【原创】ZLMediaKit学习(二):Android播流 - 大盘站
    动手学深度学习(pytorch版)第二章-2.3线性代数Note-linear-algebra
    Windows与网络基础-16-Windows共享
    Beacon帧
    heygen模型接口 简单使用 java版
    QT连接mysql数据库
  • 原文地址:https://blog.csdn.net/m0_53683186/article/details/134424331