• 集群节点状态监控和flink作业监控


    架构:yarn+flink+supervisor+pushgateway+prometheus+node_exporter+grafana

    下载如下离线包,安装

    grafana-enterprise-9.2.3.linux-amd64.tar.gz
    node_exporter-1.4.0.linux-amd64.tar.gz
    prometheus-2.39.1.linux-amd64.tar.gz
    pushgateway-1.4.3.linux-amd64.tar.gz

    在supervisor中配置上面组件的启动文件

    [baiyun@hadoop102 supervisord.d]$ ll
    总用量 16
    -rw-rw-r–. 1 baiyun baiyun 371 11月 2 23:32 grafana.ini
    -rw-rw-r–. 1 baiyun baiyun 276 11月 3 00:57 node_exporter.ini
    -rw-rw-r–. 1 baiyun baiyun 448 11月 3 00:02 prometheus.ini
    -rw-rw-r–. 1 baiyun baiyun 293 11月 3 00:01 pushgateway.ini

    grafana.ini

    [baiyun@hadoop102 supervisord.d]$ cat grafana.ini 
    [program:grafana]
    command=/home/baiyun/my_soft/grafana/bin/grafana-server --homepath /home/baiyun/my_soft/grafana web
    user=root
    autostart=true
    autorestart=true
    startsecs=10
    exitcodes=0
    redirect_stderr=true
    stdout_logfile=/home/baiyun/my_soft/grafana/log/grafana.log
    stdout_logfile_backups=5
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    node_exporter.ini

    [baiyun@hadoop102 supervisord.d]$ cat node_exporter.ini 
    [program:node_exporter]
    command=/home/baiyun/my_soft/node_exporter/node_exporter
    user=root
    autostart=true
    autorestart=true
    startsecs=10
    exitcodes=0
    redirect_stderr=true
    stdout_logfile=/home/baiyun/my_soft/node_exporter/log/node_exporter.log
    stdout_logfile_backups=5
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    prometheus.ini

    [baiyun@hadoop102 supervisord.d]$ cat prometheus.ini 
    [program:prometheus]
    command=/home/baiyun/my_soft/prometheus/prometheus --config.file=/home/baiyun/my_soft/prometheus/prometheus.yml --storage.tsdb.path /home/baiyun/my_soft/prometheus/data --storage.tsdb.retention.time 1d --enable-feature=promql-at-modifier
    user=root
    autostart=true
    autorestart=true
    startsecs=10
    exitcodes=0
    redirect_stderr=true
    stdout_logfile=/home/baiyun/my_soft/prometheus/log/prometheus.log
    stdout_logfile_backups=5
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    pushgateway.ini

    [baiyun@hadoop102 supervisord.d]$ cat pushgateway.ini 
    [program:pushgateway]
    command=/home/baiyun/my_soft/pushgateway/pushgateway --web.listen-address :9091
    user=root
    autostart=true
    autorestart=true
    startsecs=10
    exitcodes=0
    redirect_stderr=true
    stdout_logfile=/home/baiyun/my_soft/pushgateway/log/pushgateway.log
    stdout_logfile_backups=5
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    supervisord.conf

    [baiyun@hadoop102 my_etc]$ cat supervisord.conf 
    ; Sample supervisor config file.
    ;
    ; For more information on the config file, please see:
    ; http://supervisord.org/configuration.html
    ;
    ; Notes:
    ;  - Shell expansion ("~" or "$HOME") is not supported.  Environment
    ;    variables can be expanded using this syntax: "%(ENV_HOME)s".
    ;  - Quotes around values are not supported, except in the case of
    ;    the environment= options as shown below.
    ;  - Comments must have a leading space: "a=b ;comment" not "a=b;comment".
    ;  - Command will be truncated if it looks like a config file comment, e.g.
    ;    "command=bash -c 'foo ; bar'" will truncate to "command=bash -c 'foo ".
    ;
    ; Warning:
    ;  Paths throughout this example file use /tmp because it is available on most
    ;  systems.  You will likely need to change these to locations more appropriate
    ;  for your system.  Some systems periodically delete older files in /tmp.
    ;  Notably, if the socket file defined in the [unix_http_server] section below
    ;  is deleted, supervisorctl will be unable to connect to supervisord.
    
    [unix_http_server]
    file=/var/run/supervisor/supervisor.sock    ; the path to the socket file
    ;chmod=0700                 ; socket file mode (default 0700)
    ;chown=nobody:nogroup       ; socket file uid:gid owner
    ;username=user              ; default is no username (open server)
    ;password=123               ; default is no password (open server)
    
    ; Security Warning:
    ;  The inet HTTP server is not enabled by default.  The inet HTTP server is
    ;  enabled by uncommenting the [inet_http_server] section below.  The inet
    ;  HTTP server is intended for use within a trusted environment only.  It
    ;  should only be bound to localhost or only accessible from within an
    ;  isolated, trusted network.  The inet HTTP server does not support any
    ;  form of encryption.  The inet HTTP server does not use authentication
    ;  by default (see the username= and password= options to add authentication).
    ;  Never expose the inet HTTP server to the public internet.
    
    [inet_http_server]         ; inet (TCP) server disabled by default
    port=0.0.0.0:9001        ; ip_address:port specifier, *:port for all iface
    ;username=user              ; default is no username (open server)
    ;password=123               ; default is no password (open server)
    
    [supervisord]
    logfile=/var/log/supervisor/supervisord.log ; main log file; default $CWD/supervisord.log
    logfile_maxbytes=50MB        ; max main logfile bytes b4 rotation; default 50MB
    logfile_backups=10           ; # of main logfile backups; 0 means none, default 10
    loglevel=info                ; log level; default info; others: debug,warn,trace
    pidfile=/var/run/supervisord.pid ; supervisord pidfile; default supervisord.pid
    nodaemon=false               ; start in foreground if true; default false
    silent=false                 ; no logs to stdout if true; default false
    minfds=1024                  ; min. avail startup file descriptors; default 1024
    minprocs=200                 ; min. avail process descriptors;default 200
    ;umask=022                   ; process file creation umask; default 022
    ;user=supervisord            ; setuid to this UNIX account at startup; recommended if root
    ;identifier=supervisor       ; supervisord identifier, default is 'supervisor'
    ;directory=/tmp              ; default is not to cd during start
    ;nocleanup=true              ; don't clean up tempfiles at start; default false
    ;childlogdir=/tmp            ; 'AUTO' child log dir, default $TEMP
    ;environment=KEY="value"     ; key value pairs to add to environment
    ;strip_ansi=false            ; strip ansi escape codes in logs; def. false
    
    ; The rpcinterface:supervisor section must remain in the config file for
    ; RPC (supervisorctl/web interface) to work.  Additional interfaces may be
    ; added by defining them in separate [rpcinterface:x] sections.
    
    [rpcinterface:supervisor]
    supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
    
    ; The supervisorctl section configures how supervisorctl will connect to
    ; supervisord.  configure it match the settings in either the unix_http_server
    ; or inet_http_server section.
    
    [supervisorctl]
    serverurl=unix:///var/run/supervisor/supervisor.sock  ; use a unix:// URL  for a unix socket
    ;serverurl=http://127.0.0.1:9001 ; use an http:// url to specify an inet socket
    ;username=chris              ; should be same as in [*_http_server] if set
    ;password=123                ; should be same as in [*_http_server] if set
    ;prompt=mysupervisor         ; cmd line prompt (default "supervisor")
    ;history_file=~/.sc_history  ; use readline history if available
    
    ; The sample program section below shows all possible program subsection values.
    ; Create one or more 'real' program: sections to be able to control them under
    ; supervisor.
    
    ;[program:theprogramname]
    ;command=/bin/cat              ; the program (relative uses PATH, can take args)
    ;process_name=%(program_name)s ; process_name expr (default %(program_name)s)
    ;numprocs=1                    ; number of processes copies to start (def 1)
    ;directory=/tmp                ; directory to cwd to before exec (def no cwd)
    ;umask=022                     ; umask for process (default None)
    ;priority=999                  ; the relative start priority (default 999)
    ;autostart=true                ; start at supervisord start (default: true)
    ;startsecs=1                   ; # of secs prog must stay up to be running (def. 1)
    ;startretries=3                ; max # of serial start failures when starting (default 3)
    ;autorestart=unexpected        ; when to restart if exited after running (def: unexpected)
    ;exitcodes=0                   ; 'expected' exit codes used with autorestart (default 0)
    ;stopsignal=QUIT               ; signal used to kill process (default TERM)
    ;stopwaitsecs=10               ; max num secs to wait b4 SIGKILL (default 10)
    ;stopasgroup=false             ; send stop signal to the UNIX process group (default false)
    ;killasgroup=false             ; SIGKILL the UNIX process group (def false)
    ;user=chrism                   ; setuid to this UNIX account to run the program
    ;redirect_stderr=true          ; redirect proc stderr to stdout (default false)
    ;stdout_logfile=/a/path        ; stdout log path, NONE for none; default AUTO
    ;stdout_logfile_maxbytes=1MB   ; max # logfile bytes b4 rotation (default 50MB)
    ;stdout_logfile_backups=10     ; # of stdout logfile backups (0 means none, default 10)
    ;stdout_capture_maxbytes=1MB   ; number of bytes in 'capturemode' (default 0)
    ;stdout_events_enabled=false   ; emit events on stdout writes (default false)
    ;stdout_syslog=false           ; send stdout to syslog with process name (default false)
    ;stderr_logfile=/a/path        ; stderr log path, NONE for none; default AUTO
    ;stderr_logfile_maxbytes=1MB   ; max # logfile bytes b4 rotation (default 50MB)
    ;stderr_logfile_backups=10     ; # of stderr logfile backups (0 means none, default 10)
    ;stderr_capture_maxbytes=1MB   ; number of bytes in 'capturemode' (default 0)
    ;stderr_events_enabled=false   ; emit events on stderr writes (default false)
    ;stderr_syslog=false           ; send stderr to syslog with process name (default false)
    ;environment=A="1",B="2"       ; process environment additions (def no adds)
    ;serverurl=AUTO                ; override serverurl computation (childutils)
    
    ; The sample eventlistener section below shows all possible eventlistener
    ; subsection values.  Create one or more 'real' eventlistener: sections to be
    ; able to handle event notifications sent by supervisord.
    
    ;[eventlistener:theeventlistenername]
    ;command=/bin/eventlistener    ; the program (relative uses PATH, can take args)
    ;process_name=%(program_name)s ; process_name expr (default %(program_name)s)
    ;numprocs=1                    ; number of processes copies to start (def 1)
    ;events=EVENT                  ; event notif. types to subscribe to (req'd)
    ;buffer_size=10                ; event buffer queue size (default 10)
    ;directory=/tmp                ; directory to cwd to before exec (def no cwd)
    ;umask=022                     ; umask for process (default None)
    ;priority=-1                   ; the relative start priority (default -1)
    ;autostart=true                ; start at supervisord start (default: true)
    ;startsecs=1                   ; # of secs prog must stay up to be running (def. 1)
    ;startretries=3                ; max # of serial start failures when starting (default 3)
    ;autorestart=unexpected        ; autorestart if exited after running (def: unexpected)
    ;exitcodes=0                   ; 'expected' exit codes used with autorestart (default 0)
    ;stopsignal=QUIT               ; signal used to kill process (default TERM)
    ;stopwaitsecs=10               ; max num secs to wait b4 SIGKILL (default 10)
    ;stopasgroup=false             ; send stop signal to the UNIX process group (default false)
    ;killasgroup=false             ; SIGKILL the UNIX process group (def false)
    ;user=chrism                   ; setuid to this UNIX account to run the program
    ;redirect_stderr=false         ; redirect_stderr=true is not allowed for eventlisteners
    ;stdout_logfile=/a/path        ; stdout log path, NONE for none; default AUTO
    ;stdout_logfile_maxbytes=1MB   ; max # logfile bytes b4 rotation (default 50MB)
    ;stdout_logfile_backups=10     ; # of stdout logfile backups (0 means none, default 10)
    ;stdout_events_enabled=false   ; emit events on stdout writes (default false)
    ;stdout_syslog=false           ; send stdout to syslog with process name (default false)
    ;stderr_logfile=/a/path        ; stderr log path, NONE for none; default AUTO
    ;stderr_logfile_maxbytes=1MB   ; max # logfile bytes b4 rotation (default 50MB)
    ;stderr_logfile_backups=10     ; # of stderr logfile backups (0 means none, default 10)
    ;stderr_events_enabled=false   ; emit events on stderr writes (default false)
    ;stderr_syslog=false           ; send stderr to syslog with process name (default false)
    ;environment=A="1",B="2"       ; process environment additions
    ;serverurl=AUTO                ; override serverurl computation (childutils)
    
    ; The sample group section below shows all possible group values.  Create one
    ; or more 'real' group: sections to create "heterogeneous" process groups.
    
    ;[group:thegroupname]
    ;programs=progname1,progname2  ; each refers to 'x' in [program:x] definitions
    ;priority=999                  ; the relative start priority (default 999)
    
    ; The [include] section can just contain the "files" setting.  This
    ; setting can list multiple files (separated by whitespace or
    ; newlines).  It can also contain wildcards.  The filenames are
    ; interpreted as relative to this file.  Included files *cannot*
    ; include files themselves.
    
    [include]
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170

    prometheus.yml

    [baiyun@hadoop102 prometheus]$ cat prometheus.yml 
    # my global config
    global:
      scrape_interval: 30s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
      evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
      # scrape_timeout is set to the global default (10s).
    
    # Alertmanager configuration
    alerting:
      alertmanagers:
        - static_configs:
            - targets:
              # - alertmanager:9093
    
    # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
    rule_files:
      # - "first_rules.yml"
      # - "second_rules.yml"
    
    # A scrape configuration containing exactly one endpoint to scrape:
    # Here it's Prometheus itself.
    scrape_configs:
       - job_name: "prometheus"
         static_configs:
         - targets: ["localhost:9090"]
    
       - job_name: 'pushgateway'
         static_configs:
          - targets: ['localhost:9091']
            labels:
                 instance: pushgateway
    
       - job_name: 'node exporter'
         static_configs:
          - targets: ['localhost:9100']
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35

    flink-conf.yaml

    classloader.check-leaked-classloader: false
    ##### 与Prometheus集成配置 #####
    metrics.reporter.promgateway.class: org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporter
    ## PushGateway的主机名与端口号
    metrics.reporter.promgateway.host: xxx
    metrics.reporter.promgateway.port: 9091
    ## Flink metric在前端展示的标签(前缀)与随机后缀
    metrics.reporter.promgateway.jobName: flink-metrics
    metrics.reporter.promgateway.randomJobNameSuffix: true
    metrics.reporter.promgateway.deleteOnShutdown: false
    metrics.reporter.promgateway.interval: 30 SECONDS
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    webui

    http://192.168.138.102:9090/

    在这里插入图片描述

    http://192.168.138.102:3000/

    在这里插入图片描述

    http://192.168.138.102:9001/

    在这里插入图片描述

  • 相关阅读:
    python教程:封装与解构
    PEG/蛋白Protein/抗体antibody 功能化修饰硫化锌量子点 ZnS QDs
    人才网招聘网程序人才招聘系统/招聘APP/H5/求职招聘兼职/招聘企业职位求职招聘小程序
    精英荟聚,入海捉蛟 | 2022年全国水下机器人大赛线上赛圆满举办
    数据优化 | CnOpenData中国工业企业专利及引用被引用数据
    K8s---HPA弹性伸缩
    简易介绍如何使用淘宝商品详情 API(关键词搜索商品列表API)
    弄懂Rust编程中的Trait
    linux下的io_uring和windows下的overlapped均可实现异步I/O,有什么异同?
    安装Docker后的一些配置
  • 原文地址:https://blog.csdn.net/JavaBigData/article/details/127700651