Chproxy 是一个用于 ClickHouse 数据库的 HTTP 代理、负载均衡器。具有以下特性:**具体详情到官网查看即可 Chproxy **
ClickHouse 集群。比如,把来自 appserver 的用户请求代理到 stats-raw 集群,把来自 reportserver 用户的请求代理到 stats-aggregate 集群。least loaded 和 round robin 技术实现请求在副本和节点间的均衡负载。ClickHouse 之前,预先将 User-Agent 请求头与远程/本地地址,和输入/输出的用户名进行关联,因此这些信息可以在 system.query_log.http_user_agent 中查询到。chproxy 进程发送一个 SIGHUP 信号即可。chproxy 即可。安装简单,只需下载最新的包、解压即可启动
解压文件到 /opt/module/chproxy/ 目录
config.yml# 是否打印调试日志。
# Whether to print debug logs.
#
# By default debug logs are disabled.
log_debug: true
# 配置解析时是否忽略安全检查。
# Whether to ignore security checks during config parsing.
#
# By default security checks are enabled.
hack_me_please: true
# 可选的响应缓存配置。
# Optional response cache configs.
#
# Multiple distinct caches with different settings may be configured.
caches:
# Cache name, which may be passed into `cache` option on the `user` level.
#
# Multiple users may share the same cache.
- name: "longterm"
# Cache mode, either [[file_system]] or [[redis]]
mode: "file_system"
# Applicable for cache mode: file_system
file_system:
# 将存储缓存响应的目录的路径。
# Path to directory where cached responses will be stored.
dir: "/opt/module/chproxy/longterm/cachedir"
# Maximum cache size.
# `Kb`, `Mb`, `Gb` and `Tb` suffixes may be used.
max_size: 512Mb
# Expiration time for cached responses.
expire: 1h
# 应用于每个查询的命名参数列表
# 用来向ck发送请求的时候查询参数的列表,会覆盖ck本身的参数
# Named list of parameters to apply to each query
param_groups:
# 组名,可以传入 `user` 级别的 `params` 选项。
# Group name, which may be passed into `params` option on the `user` level.
- name: "default_param_setting"
# 要发送的键值参数列表
# List of key-value params to send
params:
- key: "replication_alter_partitions_sync"
value: "2"
- key: "max_memory_usage"
value: "3000000000"
- key: "max_bytes_before_external_group_by"
value: "3000000000"
- key: "max_bytes_before_external_sort"
value: "3000000000"
# `chproxy` 输入接口的设置。
# Settings for `chproxy` input interfaces.
server:
# 输入http接口的配置。
# Configs for input http interface.
# The interface works only if this section is present.
http:
# TCP address to listen to for http.
# May be in the form IP:port . IP part is optional.
listen_addr: ":9090"
# List of allowed networks or network_groups.
# Each item may contain IP address, IP subnet mask or a name
# from `network_groups`.
# By default requests are accepted from all the IPs.
# allowed_networks: ["0.0.0.0"]
# ReadTimeout 是代理读取整个文件的最大持续时间 # 请求,包括正文。
# ReadTimeout is the maximum duration for proxy to reading the entire
# request, including the body.
# Default value is 1m
read_timeout: 5m
# WriteTimeout 是在超时写入响应之前代理的最大持续时间
# WriteTimeout is the maximum duration for proxy before timing out writes of the response.
# Default is largest MaxExecutionTime + MaxQueueTime value from Users or Clusters
write_timeout: 10m
# IdleTimeout 是代理等待下一个请求的最长时间。
# IdleTimeout is the maximum amount of time for proxy to wait for the next request.
# Default is 10m
idle_timeout: 20m
# Configs for input users.
users:
# Name and password are used to authorize access via BasicAuth or
# via `user`/`password` query params.
# Password is optional. By default empty password is used.
- name: "default"
password: "123456"
to_cluster: "my_cluster"
to_user: "default"
params: "default_param_setting"
- name: "writer"
password: "123456"
# Requests from the user are routed to this cluster.
to_cluster: "my_cluster"
# Input user is substituted by the given output user from `to_cluster`
# before proxying the request.
to_user: "default"
# 最大并发查询
#max_concurrent_queries: 1
# 用户查询执行的最大持续时间 默认情况下,查询时长没有限制。
# Chproxy 会自动杀死超过 max_execution_time 限制的查询
#max_execution_time: 2s
# 每分钟请求限制
# 如果 也设置了, 取最小的生效
# Requests per minute limit for the given input user.
# By default there is no per-minute limit.
#requests_per_minute: 6
# 队列中等待执行的最大请求数。默认情况下,请求被执行而不在队列中等待
# 和下面的参数组合使用 分别是排队数量和排队请求等待时候,默认不等待直接执行
# The maximum number of requests that may wait for their chance
# to be executed because they cannot run now due to the current limits.
#
# This option may be useful for handling request bursts from `tabix`
# or `clickhouse-grafana`.
#
# By default all the requests are immediately executed without
# waiting in the queue.
max_queue_size: 1
# 请求在队列中等待的最大持续时间,默认使用 10s 持续时间
# The maximum duration the queued requests may wait for their chance
# to be executed.
# This option makes sense only if max_queue_size is set.
# By default requests wait for up to 10 seconds in the queue.
max_queue_time: 35s
# 参数组
# 用来向ck发送请求的时候查询参数的列表,会覆盖ck本身的参数
# Optional group of params name to send to ClickHouse with each proxied request from
# # By default no additional params are sent to ClickHouse.
params: "default_param_setting"
# 缓存的名称
# Response cache config name to use.
# By default responses aren't cached
#cache: "longterm"
# Configs for ClickHouse clusters.
clusters:
# The cluster name is used in `to_cluster`.
- name: "my_cluster"
# Protocol to use for communicating with cluster nodes.
# Currently supported values are `http` or `https`.
# By default `http` is used.
scheme: "http"
replicas:
- name: "replica1"
nodes: ["172.26.20.120:8123", "172.26.20.121:8123"]
- name: "replica2"
nodes: ["172.26.20.122:8123", "172.26.20.123:8123"]
# User configuration for heart beat requests.
# Credentials of the first user in clusters.users will be used for heart beat requests to clickhouse.
heartbeat:
# 检查所有集群节点可用性的时间间隔
# An interval for checking all cluster nodes for availability
# By default each node is checked for every 5 seconds.
interval: 5s
# 集群节点等待响应超时
# A timeout of wait response from cluster nodes
# By default 3s
timeout: 10s
# 设置在健康检查中请求的 URI 的参数
# The parameter to set the URI to request in a health check
# By default "/?query=SELECT%201"
request: "/?query=SELECT%201%2B1"
# clickhouse 对健康检查请求的参考响应
# Reference response from clickhouse on health check request
# By default "1\n"
response: "2\n"
# 使用此用法会终止超时查询
# Timed out queries are killed using this user.
# By default `default` user is used.
kill_query_user:
name: "default"
password: "123456"
# Configuration for cluster users.
users:
# The user name is used in `to_user`.
- name: "default"
password: "123456"
# 用户最大并发查询数
#max_concurrent_queries: 1
# 用户查询执行的最大持续时间
#max_execution_time: 5s
# 用户每分钟的最大请求数
# 如果 配置了,取最小的生效
#requests_per_minute: 5
# 队列中等待执行的最大请求数。
max_queue_size: 1
# 请求在队列中等待的最大持续时间。
max_queue_time: 10s
/opt/module/chproxy/chproxy -config=/opt/module/chproxy/cofig.yml
start.shbaseDir=$(cd `dirname $0`;pwd;)
nohup $baseDir/chproxy -config=$baseDir/cofig.yml > $baseDir/logs/chproxy.log 2>&1 & echo $!> $baseDir/pid
shutdown.shbaseDir=$(cd `dirname $0`;pwd;)
kill -9 `cat $baseDir/pid`
restart.sh#!/bin/bash
baseDir=$(cd `dirname $0`;pwd;)
kill -9 `cat $baseDir/pid`
nohup $baseDir/chproxy -config=$baseDir/cofig.yml > $baseDir/logs/chproxy.log 2>&1 & echo $!> $baseDir/pid
tail -f /opt/module/chproxy/logs/chproxy.log