ck

# mysql ``` -- 查看组件id和链路节点id是否正常 select a.* from levelinfo_view a inner join ( select chooseId ,count(1) as n from levelinfo_view group by chooseId having n>1) b on a.chooseId=b.chooseId order by a.chooseId ``` # ck ## 监控 ```bash # 插件 wget https://github.com/Vertamedia/clickhouse-grafana/releases/download/v2.3.1/vertamedia-clickhouse-datasource-2.3.1.zip # 解压到 ls /var/lib/grafana/plugins/ vertamedia-clickhouse-datasource # 模板 https://grafana.com/grafana/dashboards/13606 https://grafana.com/grafana/dashboards/2515 ``` ## sql ``` 查看失败的变更 select * from system.mutations where is_done = 0;\G 终止失败的变更 KILL MUTATION WHERE database = 'easyviews' AND table = 'bpm_base_local'; -- 查看时间对应的secondeIndex是什么 select dateDiff('second', toStartOfDay(toDateTime('2023-10-20 09:40:00')), toDateTime('2023-10-20 09:40:00')) secondIndex -- 查数据库中有没有这个功能号 SELECT bb.componentId , bb.componentName , count(*) from easyviews.bpm_base bb group by bb.componentId , bb.componentName HAVING date = today() and bb.dimension7 in ('270998', '213998') -- 计算单个颗粒最大交易量 SELECT datetime ,SUM(allTransCount) as num FROM easyviews.bpm_indicator WHERE `date` = today() and `level` =3 GROUP by datetime order by num desc limit 1; -- 计算单个事件单个颗粒度最大交易量,修改 levelUuid = 事件id SELECT datetime,SUM(allTransCount) as num FROM easyviews.bpm_indicator WHERE `date` = today() and levelUuid = 267 AND `level` = 4 GROUP by datetime order by num desc limit 1; -- 1、按照功能号分组,计算每组内的该功能号的请求数;让后降序排列,然后选择第一名的功能号; WITH max_counts AS ( SELECT dimension7, MAX(count) AS max_count FROM ( SELECT dimension7, secondIndex, COUNT(*) AS count FROM easyviews.bpm_base WHERE `date` = today() and `datetime` < '2024-03-14 09:35:00' and `datetime` > '2024-03-14 09:26:00' GROUP BY dimension7, secondIndex ) GROUP BY dimension7 ) SELECT b.dimension7, b.secondIndex, a.max_count FROM max_counts a JOIN ( SELECT dimension7, secondIndex, COUNT(*) AS count FROM easyviews.bpm_base WHERE `date` = today() and `datetime` < '2024-03-14 09:35:00' and `datetime` > '2024-03-14 09:26:00' GROUP BY dimension7, secondIndex ) b ON a.dimension7 = b.dimension7 AND a.max_count = b.count ORDER BY a.max_count DESC; # 有一堆或者太长的SQL需要执行,可以写成一个文件,批量执行: clickhouse-client --user 用户名 --password 密码 -d 数据库 --multiquery < /root/temp.sql # 查看SQL的执行计划: clickhouse-client -h localhost --send_logs_level=trace <<<"SQL语句" >/dev/null # 导入为csv文件: clickhouse-client --query="select * from default.t_city" > city.csv # 或者 # echo 'select * from default.t_city' | curl localhost:8123?database=default -udefault:password -d @- > table_name.sql # 导入csv文件 cat city.csv | clickhouse-client --query "insert into city FORMAT CSV" 比较小的数据量导出csv文件,带上字段名,然后导入 clickhouse> select * from default.t_city INTO OUTFILE '/data/t_city.csv' FORMAT CSVWithNames; SELECT * FROM default.t_city INTO OUTFILE & ``` [gitee ck other](https://gitee.com/zhouguanyulovejiadanyang/learnnodel/tree/master) 31123 31124是nodeport38123 39000是hostport ``` -- 查询耗时最长 SELECT query, query_duration_ms, event_time FROM system.query_log AS ql WHERE (event_time >= '2023-04-20 16:00:00') AND (event_time < '2023-04-20 16:15:00') ORDER BY query_duration_ms DESC LIMIT 10 -- 查询耗时最长频率 SELECT normalizeQuery(query) AS q, count(), any(query), avg(read_rows) / 10000000 AS read_rows_kw, avg(query_duration_ms), max(query_duration_ms) FROM system.query_log WHERE (event_date = '2023-10-25') AND (event_time > '2023-10-25 14:00:00') AND (event_time < '2023-10-25 18:30:00') AND (type = 2) GROUP BY q HAVING count() > 10 ORDER BY avg(query_duration_ms) DESC LIMIT 10 select count() from system.processes select query from system.processes SELECT table AS table_name, sum(rows) AS row_num, formatReadableSize(sum(data_uncompressed_bytes)) AS org_size, formatReadableSize(sum(data_compressed_bytes)) AS compress_size, round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100, 0) AS compress_ratio FROM system.parts WHERE database='easyviews' GROUP BY table order by sum(rows) desc; 2、查看库里每张表大小:(每个节点都要执行) select table,sum(bytes_on_disk)/1000/1000/1000 as GB from system.parts group by table order by GB desc 3、查看所有表每个分区下的大小和行数(每个节点都要执行) select `partition` , sum(rows) as row, formatReadableSize(sum(bytes_on_disk)) as used_disk from system.parts group by `partition` order by partition 4、查看某个表每个分区下的大小和行数(每个节点都要执行) select `partition` , sum(rows) as row, formatReadableSize(sum(bytes_on_disk)) as used_disk from system.parts where `table` = 'bpm_base_local' group by `partition` order by partition -- 清空一个表 truncate table easyviews.packet_base_local on cluster '{cluster}' -- 手动计算五日均值 select `datetime` ,`date` , levelUuid , sumMerge(allTransCount) as sm from easyviews.mv_bpm_indicator_y mbiy where `level` = 4 and levelUuid = 6042 and `date` in ('2023-06-26','2023-06-27','2023-06-25','2023-06-20','2023-06-21') and timeIndex = 15*60*60/5 GROUP by levelUuid , `datetime` ,centerId ,linkId ,datasetId ,`date` ,`level` ORDER by `date` ; 4878 + 4497 + 330 + 4636 +5164 = 19505 19505/5=3901 -- 查询事件6042的五日均值 SELECT yDataAvgValueBy5day FROM easyviews.daily_statistics where `date` = today() and timeIndex = 15*60*60/5 and levelUuid = 6042 ; -- 查峰值 SELECT timeIndex ,yDataHistoryMaxValue ,yDataHistoryMaxValueDate,insertTime FROM easyviews.daily_statistics WHERE date=today() ORDER BY insertTime desc LIMIT 100; -- 查请求和响应报文 SELECT a.`time` , a.`uuid` as rid, max(case when a.direction = 0 then a.`data` else null end) as request , max(case when a.direction = 1 then a.`data` else null end) as response from easyviews.bpm_original a where a.applicationId = 6061 and a.`date` BETWEEN '2023-07-14' and '2023-07-18' group by a.`time` , a.`uuid` ``` ``` -- 计算成功率 SELECT sumMerge(allTransCount) AS allTransCount, round(if(isNaN(sumMerge(allStart2ends) / sumMerge(responseTransCount)), null, sumMerge(allStart2ends) / sumMerge(responseTransCount)), 2) AS responseTime, round(if(isNaN(sumMerge(responseTransCount) / allTransCount), null, sumMerge(responseTransCount) / allTransCount) * 100, 2) AS responseRate, round(if(isNaN(sumMerge(successTransCount) / sumMerge(responseTransCount)), null, sumMerge(successTransCount) / sumMerge(responseTransCount)) * 100, 2) AS successRate FROM mv_bpm_indicator_hour WHERE date = '2023-04-12' AND time >= toUnixTimestamp('2023-04-12 00:00:00', 'Asia/Shanghai')*1000 AND time <= toUnixTimestamp('2023-04-13 00:00:00','Asia/Shanghai')*1000 AND levelUuid = 5939 ## 以功能号维度统计多个组件的交易数 SELECT bb.dimension7 , count(*) as ct from easyviews.bpm_base bb group by bb.dimension7 HAVING --date >= '2024-01-04' and date <= '2024-01-10' date in ('2024-01-04','2024-01-05') and bb.componentId in (209,210,211) order by ct desc ``` ``` -- time 存在意义是保留数据精度 -- datetime 数据时间戳 请求时间 -- probeTime 探针输出时间戳 还原完数据写到kafka 的时间 -- decodeTime 解码输出时间戳 -- pairingTime 配对输出时间戳 -- responseTime 响应时间 -- insertTime 插入ck库的时间 -- secondIndex 一天的第几秒 存在意义便于按秒级别过滤数据 --过滤掉响应时间和配对时间为0的 SELECT concat(toString(toDateTime(intDiv(time,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as request_time, concat(toString(toDateTime(intDiv(responseTime,1000000000), 'Asia/Shanghai')), concat('.', toString(time % 1000000000))) as respon_Time, concat(toString(toDateTime(intDiv(probeTime,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as pro_Time, concat(toString(toDateTime(intDiv(decodeTime,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as dec_Time, concat(toString(toDateTime(intDiv(pairingTime, 1000000000),'Asia/Shanghai')), concat('.', toString(time % 1000000000))) as pair_Time, insertTime from easyviews.bpm_base where date = today() and datetime > '2023-09-11 13:38:00' and responseTime not in (0) and pairingTime not in (0) limit 10; select datetime,insertTime, toDateTime(probeTime/1000000000,'Asia/Shanghai') as probeT, toDateTime(decodeTime/1000000000,'Asia/Shanghai') as decodeT, toDateTime(pairingTime/1000000000,'Asia/Shanghai') as pairT, toDateTime(consumerTime /1000000000,'Asia/Shanghai') as conInsertT, toDateTime(JSONExtract(consumerInfo,'consumerPullTime','UInt64')/1000000000,'Asia/Shanghai') as conPullT, toDateTime(JSONExtract(consumerInfo,'eventPushTime','UInt64')/1000000000,'Asia/Shanghai') as eventT, (probeT - datetime) as delay, componentId,componentName from easyviews.bpm_base where date = today() and pairingTime != 0 and `datetime` >= '2023-11-09 17:00:00' and `datetime` <= '2023-11-09 18:40:00' order by delay desc limit 10; ``` # clickhouse实时同步MySQL数据 ## 一、​​​​​​​MySQL数据库引擎 使用MySQL数据库引擎将远程MySQL服务器的表映射到ClickHouse中,允许对表进行Insert插入和Select查询,方便ClickHouse与MySQL之间进行数据交换。MySQL数据库引擎不会将MySQL的数据真正同步到ClickHouse存储中,ClickHouse就像一个壳子,可以将MySQL的表映射成ClickHouse表,使用ClickHouse查询MySQL中的数据,在MySQL中进行的CRUD操作,可以同时映射到ClickHouse中。 MySQL数据库引擎会将对其的查询转换为MySQL语法并发送到MySQL服务器中,因此可以执行诸如SHOW TABLES或SHOW CREATE TABLE之类的操作,但是不允许创建表、修改表、删除数据、重命名操作。 MySQL数据库引擎语法如下: ```sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') ``` ## 二、​​​​​​​​​​​​​​MySQL表引擎 ClickHouse MySQL数据库引擎可以将MySQL某个库下的表映射到ClickHouse中,使用ClickHouse对数据进行操作。ClickHouse同样支持MySQL表引擎,即映射一张MySQL中的表到ClickHouse中,使用ClickHouse进行数据操作,与MySQL数据库引擎一样,这里映射的表只能做查询和插入操作,不支持删除和更新操作。 MySQL表引擎语法如下: ```sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], ... ) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']); ``` 实际上以上两种方式都是将ClickHouse中的SQL转换为MySQL语法并发送到MySQL服务器中执行,数据也没有存储在ClickHouse中。 ```sql [root@hadoop103 mysql]# mysql -h 172.24.61.104 -P 30306 -uroot -pssqj@easyviews.pw easyviews -e "SHOW CREATE TABLE sys_version\G" *************************** 1. row *************************** Table: sys_version Create Table: CREATE TABLE `sys_version` ( `id` int(11) NOT NULL AUTO_INCREMENT, `versionNumber` varchar(20) NOT NULL COMMENT '版本号', `title` varchar(20) NOT NULL COMMENT '标题', `content` varchar(2000) NOT NULL COMMENT '升级内容', `insertTime` bigint(20) NOT NULL COMMENT '更新时间', PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8 COMMENT='版本信息表' [root@hadoop103 mysql]# mysql -h 172.24.61.104 -P 30306 -uroot -pssqj@easyviews.pw easyviews -e "select * from sys_version;" +----+---------------+--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+ | id | versionNumber | title | content | insertTime | +----+---------------+--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+ | 1 | V5.2.0.4 | 功能修改 | 【通用】树菜单改造 【多维分析】添加分段查询逻辑 【pcap下载管理】数据包下载排序 【元数据分析】数据包下载支持纳秒 【xml模板】 解析模式新增类型:转发模式 【新建&编辑场景】增加邮件功能 【定制】银河证券告警推送 | 1700928000000 | | 2 | V5.2.0.5 | 功能修改 | 【综合分析】计算表达式支持默认规则 【告警策略管理】通知策略支持次数压缩 【综合分析】三维kpi报表增加90线、95线、中位数、最小值 【告警模拟】树菜单改造 【多维分析】事件查询优化 【kpi分析】累计模型查询优化 【kpi分析】增加峰值趋势模型 【通用】wireshark支持容器部署 【npm】npm数据集新增指标 【etl】支持日志数据 | 1700928000000 | | 3 | V5.2.0.5 | bug修复 | 【多维分析】新建事件个数限制问题 【多维分析】npm下钻颗粒度提示超过时长限制问题 【元数据分析】修复npm事件查询报错 | 1700928000000 | +----+---------------+--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+ chi-gt-business-0-0-0.chi-gt-business-0-0.default.svc.cluster.local :) CREATE TABLE sys_version ( `id` Int32, `versionNumber` String, `title` String, `content` String, `insertTime` Int64 ) ENGINE = MySQL('172.24.61.103:30306', 'easyviews', 'sys_version', 'root', 'ssqj@easyviews.pw') ; CREATE TABLE sys_version ( `id` Int32, `versionNumber` String, `title` String, `content` String, `insertTime` Int64 ) ENGINE = MySQL('172.24.61.103:30306', 'easyviews', 'sys_version', 'root', 'ssqj@easyviews.pw') Query id: 74a99149-e24b-4991-9884-637e5b767ee8 Ok. 0 rows in set. Elapsed: 0.050 sec. chi-gt-business-0-0-0.chi-gt-business-0-0.default.svc.cluster.local :) select * from sys_version; SELECT * FROM sys_version Query id: 01e2ae7f-9bab-4e53-bcb0-71183de35042 ┌─id─┬─versionNumber─┬─title────┬─content────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬────insertTime─┐ │ 1 │ V5.2.0.4 │ 功能修改 │ 【通用】树菜单改造 【多维分析】添加分段查询逻辑 【pcap下载管理】数据包下载排序 【元数据分析】数据包下载支持纳秒 【xml模板】 解析模式新增类型:转发模式 【新建&编辑场景】增加邮件功能 【定制】银河证券告警推送 │ 1700928000000 │ │ 2 │ V5.2.0.5 │ 功能修改 │ 【综合分析】计算表达式支持默认规则 【告警策略管理】通知策略支持次数压缩 【综合分析】三维kpi报表增加90线、95线、中位数、最小值 【告警模拟】树菜单改造 【多维分析】事件查询优化 【kpi分析】累计模型查询优化 【kpi分析】增加峰值趋势模型 【通用】wireshark支持容器部署 【npm】npm数据集新增指标 【etl】支持日志数据 │ 1700928000000 │ │ 3 │ V5.2.0.5 │ bug修复 │ 【多维分析】新建事件个数限制问题 【多维分析】npm下钻颗粒度提示超过时长限制问题 【元数据分析】修复npm事件查询报错 │ 1700928000000 │ └────┴───────────────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴───────────────┘ 3 rows in set. Elapsed: 0.810 sec. ``` ## 三、MaterializeMySQL物化引擎 ClickHouse在20.8.2版本之后增加了MaterializeMySQL物化引擎,该引擎可以将MySQL中某个库下的所有表数据全量及增量实时同步到ClickHouse中,通过ClickHouse对MySQL中的数据进行高效的OLAP数据分析,降低线上MySQL的负载,将OLTP与OLAP业务完美结合。 MaterializeMySQL物化引擎实时同步MySQL中数据原理是将ClickHouse作为MySQL副本,读取MySQL binlog日志实时物化MySQL数据,在ClickHouse中会针对MySQL映射库下的每一张表都会创建一张ReplacingMergeTree表引擎。 MaterializeMySQL物化引擎特点如下: 支持MySQL库级别的数据同步,不支持表级别。 MySQL库映射到ClickHouse中自动创建ReplacingMergeTree引擎表。 支持全量和增量同步,首次创建数据库引擎时进行一次全量复制,之后通过监控binlog变化进行增量数据同步。 支持MySQL5.6、5.7、5.8版本。 兼容支持MySQL中Insert、update、delete、alter、create、drop、truncate等大部分常用的DDL操作,不支持修改表名、修改列操作。支持添加列、删除列。 支持MySQL复制为GTID操作。从MySQL 5.6.5 开始新增了一种基于 GTID 的复制方式。通过 GTID 保证了每个在主库上提交的事务在集群中有一个唯一的ID。这种方式强化了数据库的主备一致性,故障恢复以及容错能力。 ### 1、​​​​​​​​​​​​​​开启MySQL binlog ``` MySQL [(none)]> show variables like 'log_%'; +----------------------------------------+--------------------------------+ | Variable_name | Value | +----------------------------------------+--------------------------------+ | log_bin | ON | ``` 1.2、开启mysql binlog日志 在/etc/my.cnf文件中[mysqld]下写入以下内容: ``` [mysqld] #随机指定一个不能和其他集群中机器重名的字符串 server-id=123 #配置binlog日志目录,配置后会自动开启binlog日志,并写入该目录 log-bin=/var/lib/mysql/mysql-bin #设置binglog格式为Row binlog_format=ROW #开启GTID gtid_mode=on #设置为主从强一致性 enforce_gtid_consistency=1 ``` 重启mysql服务 ### 2、使用MaterializedMySQL物化引擎 MaterializedMySQL物化引擎使用语法如下: ``` CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...] ``` 在ClickHouse中开启Mysql物化引擎 由于MySQL物化引擎目前是实验阶段,在目前的ClickHouse版本中想要使用MySQL物化引擎,必须先设置参数开启。 ``` set allow_experimental_database_materialized_mysql=1; ``` 在ClickHouse中创建MySQL物化引擎[官网参数含义](https://clickhouse.com/docs/zh/engines/database-engines/materialized-mysql) ```bash #创建MaterializeMySQL物化引擎 CREATE DATABASE ck_mysql ENGINE = MaterializedMySQL('172.24.61.103:30306', 'easyviews', 'root', 'ssqj@easyviews.pw') SETTINGS allows_query_when_mysql_lost=true, max_wait_time_when_mysql_unavailable=10000; ``` # 备份 ## 一、文本文件导入导出 手动的把数据库里的数据导出成特定的格式,再导入;这种方式!!不推荐!!!! ```bash # 导出: clickhouse-client --password helloword --query="select * from iot_dev.t_indicator_record FORMAT CSV" > iot_dev.t_indicator_record.csv # 导入 cat iot_dev.t_indicator_record.csv | clickhouse-client --port 9008 --password helloword --query="INSERT INTO iot_dev.t_indicator_record FORMAT CSV" ``` ## 二、拷贝数据目录 此方案和mysql 的冷数据恢复是一个方案,直接拷贝走clickhouse 的数据到另一台机器上,修改下相关配置就可以直接启动了,仔细的观察一下 ClickHouse 在文件系统上的目录结构(配置文件 /ect/clickhouse-server/config.xml 里面配置的 ),为了便于查看,只保留了 data 和 metadata 目录 ```bash . ├── data │ ├── default │ ├── system │ │ ├── asynchronous_metric_log │ │ ├── metric_log │ │ ├── query_log │ │ ├── query_thread_log │ │ └── trace_log ├── metadata │ ├── default │ │ └── v_table_size.sql │ ├── default.sql │ ├── system │ │ ├── asynchronous_metric_log.sql │ │ ├── metric_log.sql │ │ ├── query_log.sql │ │ ├── query_thread_log.sql │ │ └── trace_log.sql data 目录里保存的是数据,每个数据库一个目录,内部每个表一个子目录。 metadata 目录里保存的是元数据,即数据库和表结构。其中 <database>.sql 是 创建数据库的 DDL(ATTACH DATABASE default ENGINE = Ordinary) <database>/<table>.sql 是建表的 DDL (ATTACH TABLE ...) ``` 基于这个信息,直接把data和metadata目录(要排除 system 库)复制到新集群,即可实现数据迁移 实操步骤: ``` 1、停止原先的clickhouse数据库,并打包好 对应数据库或表的 data 和 metadata 数据 2、拷贝到目标clickhouse数据库对应的目录,比如/var/lib/clickhouse 目录下 3、给clickhouse 赋予权限, chown -Rf clickhouse:clickhouse /var/lib/clickhouse/* chown -Rf clickhouse:clickhouse /var/lib/clickhouse 4、重启目标clickhouse数据库 5、验证数据 select count(1) form iot_dev.t_indicator_record; ``` ## 三、 clickhouse-backup 工具地址:`https://github.com/AlexAkulov/clickhouse-backup/` ```bash wget https://github.com/Altinity/clickhouse-backup/releases/download/v2.4.34/clickhouse-backup-2.4.34-1.x86_64.rpm rpm -ivh clickhouse-backup-2.4.34-1.x86_64.rpm --nodeps ``` **使用限制:** - 支持1.1.54390以上的ClickHouse - 仅MergeTree系列表引擎 - 不支持备份Tiered storage或storage_policy - 云存储上的最大备份大小为5TB - AWS S3上的parts数最大为10,000 ``` # github地址: https://github.com/AlexAkulov/clickhouse-backup # 下载地址: https://github.com/AlexAkulov/clickhouse-backup/releases/download/v1.0.0/clickhouse-backup.tar.gz #解压软件包 tar -xf clickhouse-backup.tar.gz -C /root #复制二进制文件到系统中 cp /root/clickhouse-backup /usr/bin #创建clickhouse-backup 配置文件目录 mkdir -p /etc/clickhouse-backup #拷贝模板配置文件到clickhouse-backup 配置文件目录下 cp /root/config.yml /etc/clickhouse-backup/ ``` 修改clickhouse-backup 配置文件config.yml 根据clickhouse自身的配置来修改 此配置文件,比如 clickhouse的数据目录,数据库密码,监控地址及端口 [官方的配置说明](https://gitcode.com/AlexAkulov/clickhouse-backup/blob/master/ReadMe.md) clickhouse-backup 除了备份到本机,此外还支持远程备份的方式,备份到s3 上【对象存储】,ftp,sftp 上,还支持 使用 api 接口 访问 本次使用的配置文件,就是最简单的配置,直接本地备份,然后通过执行脚本的方式去推送备份文件到备份主机上 ```bash # cat config.yml general: max_file_size: 1099511627776 disable_progress_bar: false backups_to_keep_local: 0 backups_to_keep_remote: 0 log_level: info allow_empty_backups: false clickhouse: username: admin password: "ssqj@easyviews.pw" host: 172.24.61.103 port: 39000 disk_mapping: {} skip_tables: - system.* - default.* timeout: 5m freeze_by_part: false secure: false skip_verify: false sync_replicated_tables: true skip_sync_replica_timeouts: true log_sql_queries: false # clickhouse-backup default-config clickhouse-backup default-config # 查看可备份的表【已在配置文件中过滤掉system和default 库下面的所有表】 [root@hadoop103 clickhouse-backup]# clickhouse-backup tables easyviews.bpm_original_local 104.72GiB default full easyviews.bpm_base_local 71.06GiB default full easyviews.npm_base_local 15.32GiB default full easyviews.daily_statistics_local 6.07GiB default full easyviews.npm_indicator_local 1.50GiB default full easyviews..inner_id.85f64f39-b848-423e-9222-f225b891fb75 671.44MiB default full easyviews.mv_npm_indicator_y_local 671.43MiB default full ... ``` 创建备份 ``` # 全库备份 [root@hadoop103 clickhouse-backup]# clickhouse-backup create 2024/03/19 23:29:24.273349 info done backup=2024-03-19T15-29-19 logger=backuper operation=create table=easyviews.daily_statistics_local ... # 查看备份记录 [root@hadoop103 ~]# clickhouse-backup list 2024-03-19T15-29-19 384.25KiB 19/03/2024 15:32:56 local # 删除备份文件 [root@hadoop103 ~]# clickhouse-backup delete local 2024-03-19T15-29-19 # 数据恢复 [root@hadoop103 clickhouse-backup]# clickhouse-backup restore 2024-03-19T15-29-19 -s -d --rm --table 只恢复特定表,可使用正则。如针对特定的数据库:--table=dbname.* --schema 只还原表结构 --data 只还原数据 --rm 数据恢复之前,先删除需要恢复的表 ``` !!! 特别注意,如果是容器部署的ck,需要去里面容器里面安装clickhouse-backup,保证数据路径在/var/lib/clickhouse,否则会恢复失败 ```bash [root@hadoop103 clickhouse]# clickhouse-backup restore 2024-03-20T01-42-59 -s 2024/03/20 12:16:01.191420 warn can't create table 'easyviews.npm_indicator_local': code: 57, message: Directory for table data store/e44/e445d849-98cf-46a4-9da9-55b500e762be/ already exists, will try again backup=2024-03-20T01-42-59 operation=restore ... # 若如上创建表结构失败,清理 drop database if exists dbname , 删除data和metadata,store目录,重启ck服务 [root@hadoop103 clickhouse]# clickhouse-backup restore 2024-03-20T01-42-59 -s 2024/03/20 12:20:06.642093 info done backup=2024-03-20T01-42-59 operation=restore root@chi-gt-business-0-0-0:/var/lib/clickhouse# clickhouse-backup restore 2024-03-20T01-42-59 --data 2024/03/20 12:25:10 info done backup=2024-03-20T01-42-59 operation=restore table=easyviews.log_indicator_daily_local 2024/03/20 12:25:15 info done backup=2024-03-20T01-42-59 operation=restore table=easyviews.npm_base_local ... 2024/03/20 12:25:15 info done backup=2024-03-20T01-42-59 operation=restore table=easyviews.bpm_original 2024/03/20 12:25:15 info done backup=2024-03-20T01-42-59 operation=restore ``` # [使用clickhouse-backup备份与恢复数据-实战](https://www.51cto.com/article/681609.html)