Doris-Datax数据备份与恢复

Doris-Datax数据备份与恢复

datax下载

https://github.com/alibaba/DataX

下载最新版本,本测试使用202303版本

https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202303/datax.tar.gz

datax使用方法

1
python2 datax.py aaa.json 

Doris测试数据创建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
--创建数据库
create database test1;

--创建测试表,以data_partition作为分区字段,并启用动态分区
CREATE TABLE if not exists test1.test_work_log (
message_deviceId varchar(64) NULL ,
message_taskId varchar(32) NULL ,
date_partition int(11) NULL COMMENT "时间分区",
message_txt string NULL
) ENGINE=OLAP
UNIQUE KEY(message_deviceId,message_taskId, date_partition)
PARTITION BY RANGE(date_partition)(
PARTITION p_other VALUES LESS THAN ("1"),
PARTITION p_history_20220714 VALUES LESS THAN ("20220714"),
PARTITION p20230501 VALUES [("20230501"), ("20230502")),
PARTITION p20230502 VALUES [("20230502"), ("20230503")),
PARTITION p20230503 VALUES [("20230503"), ("20230504")),
PARTITION p20230504 VALUES [("20230504"), ("20230505")),
PARTITION p20230505 VALUES [("20230505"), ("20230506")),
PARTITION p20230506 VALUES [("20230506"), ("20230507"))
)
DISTRIBUTED BY HASH( message_deviceId,message_taskId) BUCKETS 16
PROPERTIES (
"replication_num" = "1",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.start" = "-2147483648",
"dynamic_partition.end" = "3",
"dynamic_partition.prefix" = "p",
"dynamic_partition.replication_allocation" = "tag.location.default: 3",
"dynamic_partition.buckets" = "16",
"dynamic_partition.create_history_partition" = "true",
"dynamic_partition.history_partition_num" = "36",
"dynamic_partition.hot_partition_num" = "0",
"dynamic_partition.reserved_history_periods" = "NULL",
"in_memory" = "false",
"storage_format" = "V2"
);

--插入测试数据
insert into test1.test_work_log VALUES ('1','2',-1,'aaa');
insert into test1.test_work_log VALUES ('2','1',20230504,'aaa');
insert into test1.test_work_log VALUES ('3','1',20230505,'bbb');

--查看所有数据
select * from test1.test_work_log;

--查看分区情况
show partitions from test1.test_work_log;

--查看某分区数据
select * from test1.test_work_log PARTITION p20230504;

报错

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
2023-05-04 14:27:08.179 [main] ERROR Engine - 

经DataX智能分析,该任务最可能的错误原因是:
com.alibaba.datax.common.exception.DataXException: Code:[Common-00], Describe:[您提供的配置文件存在错误信息,请检查您的作业配置 .] - 配置信息错误. 您提供的配置信息不是合法的JSON格式: syntax error, string . 请按照标准json格式提供配置信息.
at com.alibaba.datax.common.exception.DataXException.asDataXException(DataXException.java:26)
at com.alibaba.datax.common.util.Configuration.<init>(Configuration.java:1066)
at com.alibaba.datax.common.util.Configuration.from(Configuration.java:79)
at com.alibaba.datax.core.util.ConfigParser.parseJobConfig(ConfigParser.java:75)
at com.alibaba.datax.core.util.ConfigParser.parse(ConfigParser.java:26)
at com.alibaba.datax.core.Engine.entry(Engine.java:137)
at com.alibaba.datax.core.Engine.main(Engine.java:204)

## 解决办法
find /home/mqxdata/datax/plugin/reader -type f -name "._*er" |xargs rm -f
find /home/mqxdata/datax/plugin/writer -type f -name "._*er" |xargs rm -f

doris导出到csv

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
{
"job": {
"content": [{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "root",
"connection": [
{
"querySql": [
"SELECT * from test1.test_work_log where date_partition=20230504;"
],
"jdbcUrl": [
"jdbc:mysql://hxkj07:19030/test1"
]
}
]
}
},
"writer": {
"name": "txtfilewriter",
"parameter": {
"path": "./",
"fileName": "test_work_log.csv.gz",
"writeMode": "append",
"dateFormat": "yyyy-MM-dd",
"fileFormat": "csv",
}
}
}],
"setting": {
"speed": {
"channel": "1"
}
}
}
}

导出效果

1
2
3
cat test_work_log.csv.gz__ca303326_6445_4443_9e17_6c973ca5efa4 
2,1,20230504,aaa

csv导入到doris

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
{
"job": {
"content": [{

"reader": {
"name": "txtfilereader",
"parameter": {
"path": ["./"],
"encoding": "UTF-8",
"column": [
{
"index": 0,
"type": "String"
},
{
"index": 1,
"type": "String"
},
{
"index": 2,
"type": "long"
},
{
"index": 3,
"type": "string"
}
],
"fieldDelimiter": ","
}
},


"writer": {
"name": "doriswriter",
"parameter": {
"loadUrl": ["hxkj07:18030"],
"loadProps": {
},
"column": ["message_deviceId", "message_taskId", "date_partition","message_txt"],
"username": "root",
"password": "root",
"postSql": [],
"preSql": [],
"flushInterval":30000,
"connection": [
{
"jdbcUrl": "jdbc:mysql://hxkj07:19030/test1",
"selectedDatabase": "test1",
"table": ["test_work_log"]
}
],
"loadProps": {
"format": "json",
"strip_outer_array": true
}
}
}

}],
"setting": {
"speed": {
"channel": "1"
}
}
}
}

导入效果

修改csv,把aaa改成new,再导入

生产环境配置(doris–>hdfs)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
{
"job": {
"content": [{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "xxx",
"connection": [
{
"querySql": [
"SELECT message_deviceId,message_taskId,message_generateTime,date_partition,message_realSpeed,message_driverMode,message_reportTime,message_taskIndex,message_curPathNum,message_impleWorkState,message_chassisType,message_chassisInfo_chassisType,message_chassisInfo_engineSpeed,message_chassisInfo_fuelOilLevel,message_chassisInfo_remainFuel,message_chassisInfo_engineInstantaneousFuelEconomy,message_chassisInfo_ptoGearSta,message_chassisInfo_ptoRotationRate,message_chassisInfo_upgradeSysSta,message_chassisInfo_hydraulicOilLevel,message_chassisInfo_hydraulicOilTemp,message_chassisInfo_hydraulicOilPressure,message_chassisInfo_engOilTemp,message_chassisInfo_grainTankWeightKg,message_chassisInfo_grainTankFullSts,message_chassisInfo_unloadClutchSts,message_chassisInfo_grainUnloadTubLvl,message_chassisInfo_oilPressure,message_chassisInfo_coolantTemperature,message_chassisInfo_realOil,message_chassisInfo_remainingOil,message_chassisInfo_currentWheelAngle,message_chassisInfo_steerMotorState,message_chassisInfo_remainingOilPercent,message_chassisInfo_threePointPromote,message_chassisInfo_tillingDepthFb,message_positionInfo_yaw,message_positionInfo_rtkStatus,message_positionInfo_pitchAngle,message_positionInfo_rollAngle,message_positionInfo_position_longitude,message_positionInfo_position_latitude,message_positionInfo_xyPosition_longitude,message_positionInfo_xyPosition_latitude,message_positionInfo_xyImpl_longitude,message_positionInfo_xyImpl_latitude,message_positionInfo_gnssNum,message_positionInfo_networkSignal,message_controlTaskInfo_fieldId,message_controlTaskInfo_fieldName,message_controlTaskInfo_fieldCode,message_controlTaskInfo_typeName,message_controlTaskInfo_totalArea,message_controlTaskInfo_operatedArea,message_controlTaskInfo_noOperateArea,message_controlTaskInfo_totalUsedTime,message_controlTaskInfo_percent,message_controlTaskInfo_widthOfCloth,message_controlTaskInfo_totalMileage,message_controlTaskInfo_operatedMileage,message_controlTaskInfo_noOperatedMileage,message_controlTaskInfo_speedPerHour,message_controlTaskInfo_operatedOffset,message_controlTaskInfo_operatedOffsetDirection,message_controlTaskInfo_needLastTime,message_controlTaskInfo_machineName,message_controlTaskInfo_implementName,message_controlTaskInfo_startTime,message_controlTaskInfo_operateStatus,message_controlTaskInfo_segmentPercent,message_statusInfo_deviceId,message_statusInfo_taskId,message_statusInfo_connected,message_statusInfo_ready,message_statusInfo_canTask,message_statusInfo_status,gis_farmcode,gis_farmlandcode,message_gsmStatusInfo_signalRssi,message_gsmStatusInfo_optGsm,message_gsmStatusInfo_networkType,message_gsmStatusInfo_simNumber,message_gsmStatusInfo_cardStatus,gis_is_exception FROM hx_farm_sit.ods_machine_work_log_uq where date_partition=20230503;"
],
"jdbcUrl": [
"jdbc:mysql://bigdata01:9030/hx_farm_sit"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{"name": "message_deviceId","type": "string"},{"name": "message_taskId","type": "string"},{"name": "message_generateTime","type": "bigint"},{"name": "date_partition","type": "int"},{"name": "message_realSpeed","type": "string"},{"name": "message_driverMode","type": "int"},{"name": "message_reportTime","type": "string"},{"name": "message_taskIndex","type": "int"},{"name": "message_curPathNum","type": "int"},{"name": "message_impleWorkState","type": "int"},{"name": "message_chassisType","type": "string"},{"name": "message_chassisInfo_chassisType","type": "string"},{"name": "message_chassisInfo_engineSpeed","type": "int"},{"name": "message_chassisInfo_fuelOilLevel","type": "int"},{"name": "message_chassisInfo_remainFuel","type": "int"},{"name": "message_chassisInfo_engineInstantaneousFuelEconomy","type": "string"},{"name": "message_chassisInfo_ptoGearSta","type": "int"},{"name": "message_chassisInfo_ptoRotationRate","type": "int"},{"name": "message_chassisInfo_upgradeSysSta","type": "int"},{"name": "message_chassisInfo_hydraulicOilLevel","type": "int"},{"name": "message_chassisInfo_hydraulicOilTemp","type": "int"},{"name": "message_chassisInfo_hydraulicOilPressure","type": "int"},{"name": "message_chassisInfo_engOilTemp","type": "int"},{"name": "message_chassisInfo_grainTankWeightKg","type": "int"},{"name": "message_chassisInfo_grainTankFullSts","type": "int"},{"name": "message_chassisInfo_unloadClutchSts","type": "int"},{"name": "message_chassisInfo_grainUnloadTubLvl","type": "int"},{"name": "message_chassisInfo_oilPressure","type": "string"},{"name": "message_chassisInfo_coolantTemperature","type": "string"},{"name": "message_chassisInfo_realOil","type": "string"},{"name": "message_chassisInfo_remainingOil","type": "string"},{"name": "message_chassisInfo_currentWheelAngle","type": "string"},{"name": "message_chassisInfo_steerMotorState","type": "int"},{"name": "message_chassisInfo_remainingOilPercent","type": "string"},{"name": "message_chassisInfo_threePointPromote","type": "string"},{"name": "message_chassisInfo_tillingDepthFb","type": "int"},{"name": "message_positionInfo_yaw","type": "string"},{"name": "message_positionInfo_rtkStatus","type": "int"},{"name": "message_positionInfo_pitchAngle","type": "string"},{"name": "message_positionInfo_rollAngle","type": "string"},{"name": "message_positionInfo_position_longitude","type": "string"},{"name": "message_positionInfo_position_latitude","type": "string"},{"name": "message_positionInfo_xyPosition_longitude","type": "string"},{"name": "message_positionInfo_xyPosition_latitude","type": "string"},{"name": "message_positionInfo_xyImpl_longitude","type": "string"},{"name": "message_positionInfo_xyImpl_latitude","type": "string"},{"name": "message_positionInfo_gnssNum","type": "int"},{"name": "message_positionInfo_networkSignal","type": "int"},{"name": "message_controlTaskInfo_fieldId","type": "bigint"},{"name": "message_controlTaskInfo_fieldName","type": "string"},{"name": "message_controlTaskInfo_fieldCode","type": "string"},{"name": "message_controlTaskInfo_typeName","type": "string"},{"name": "message_controlTaskInfo_totalArea","type": "string"},{"name": "message_controlTaskInfo_operatedArea","type": "string"},{"name": "message_controlTaskInfo_noOperateArea","type": "string"},{"name": "message_controlTaskInfo_totalUsedTime","type": "int"},{"name": "message_controlTaskInfo_percent","type": "string"},{"name": "message_controlTaskInfo_widthOfCloth","type": "string"},{"name": "message_controlTaskInfo_totalMileage","type": "string"},{"name": "message_controlTaskInfo_operatedMileage","type": "string"},{"name": "message_controlTaskInfo_noOperatedMileage","type": "string"},{"name": "message_controlTaskInfo_speedPerHour","type": "string"},{"name": "message_controlTaskInfo_operatedOffset","type": "string"},{"name": "message_controlTaskInfo_operatedOffsetDirection","type": "int"},{"name": "message_controlTaskInfo_needLastTime","type": "int"},{"name": "message_controlTaskInfo_machineName","type": "string"},{"name": "message_controlTaskInfo_implementName","type": "string"},{"name": "message_controlTaskInfo_startTime","type": "string"},{"name": "message_controlTaskInfo_operateStatus","type": "int"},{"name": "message_controlTaskInfo_segmentPercent","type": "string"},{"name": "message_statusInfo_deviceId","type": "string"},{"name": "message_statusInfo_taskId","type": "string"},{"name": "message_statusInfo_connected","type": "int"},{"name": "message_statusInfo_ready","type": "int"},{"name": "message_statusInfo_canTask","type": "int"},{"name": "message_statusInfo_status","type": "int"},{"name": "gis_farmcode","type": "string"},{"name": "gis_farmlandcode","type": "string"},{"name": "message_gsmStatusInfo_signalRssi","type": "string"},{"name": "message_gsmStatusInfo_optGsm","type": "string"},{"name": "message_gsmStatusInfo_networkType","type": "string"},{"name": "message_gsmStatusInfo_simNumber","type": "string"},{"name": "message_gsmStatusInfo_cardStatus","type": "int"},{"name": "gis_is_exception","type": "int"}
],
"defaultFS": "hdfs://nameservice1",
"fieldDelimiter": "",
"compress":"SNAPPY",
"fileName": "ods_machine_work_log_uq.orc",
"fileType": "orc",
"path": "/bigdata/doris/hx_farm_sit.db/ods_machine_work_log_uq/dt=20230503/",
"writeMode": "append",
"hadoopConfig":{
"dfs.nameservices" : "nameservice1",
"dfs.ha.namenodes.nameservice1" : "bigdata01,bigdata02",
"dfs.namenode.rpc-address.nameservice1.bigdata01" : "bigdata01:8020",
"dfs.namenode.rpc-address.nameservice1.bigdata02" : "bigdata02:8020",
"dfs.client.failover.proxy.provider.nameservice1" : "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
}
}
}
}],
"setting": {
"speed": {
"channel": "1"
}
}
}
}


生产环境配置(hdfs–>doris),基于datax-202303版本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
{
"job": {
"setting": {
"speed": {
"channel": 3
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/tmp/test/*/*",
"defaultFS": "hdfs://nameservice1",
"fileType": "orc",
"fieldDelimiter": "",
"encoding": "UTF-8",
"hadoopConfig":{
"dfs.nameservices" : "nameservice1",
"dfs.ha.namenodes.nameservice1" : "bigdata2",
"dfs.namenode.rpc-address.nameservice1.bigdata2" : "bigdata2:8020",
"dfs.client.failover.proxy.provider.nameservice1" : "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
},
"column": [
{"index": "0","type": "string"},{"index": "1","type": "string"},{"index": "2","type": "long"},{"index": "3","type": "long"},{"index": "4","type": "string"},{"index": "5","type": "long"},{"index": "6","type": "string"},{"index": "7","type": "long"},{"index": "8","type": "long"},{"index": "9","type": "long"},{"index": "10","type": "string"},{"index": "11","type": "string"},{"index": "12","type": "long"},{"index": "13","type": "long"},{"index": "14","type": "long"},{"index": "15","type": "string"},{"index": "16","type": "long"},{"index": "17","type": "long"},{"index": "18","type": "long"},{"index": "19","type": "long"},{"index": "20","type": "long"},{"index": "21","type": "long"},{"index": "22","type": "long"},{"index": "23","type": "long"},{"index": "24","type": "long"},{"index": "25","type": "long"},{"index": "26","type": "long"},{"index": "27","type": "string"},{"index": "28","type": "string"},{"index": "29","type": "string"},{"index": "30","type": "string"},{"index": "31","type": "string"},{"index": "32","type": "long"},{"index": "33","type": "string"},{"index": "34","type": "string"},{"index": "35","type": "long"},{"index": "36","type": "string"},{"index": "37","type": "long"},{"index": "38","type": "string"},{"index": "39","type": "string"},{"index": "40","type": "string"},{"index": "41","type": "string"},{"index": "42","type": "string"},{"index": "43","type": "string"},{"index": "44","type": "string"},{"index": "45","type": "string"},{"index": "46","type": "long"},{"index": "47","type": "long"},{"index": "48","type": "long"},{"index": "49","type": "string"},{"index": "50","type": "string"},{"index": "51","type": "string"},{"index": "52","type": "string"},{"index": "53","type": "string"},{"index": "54","type": "string"},{"index": "55","type": "long"},{"index": "56","type": "string"},{"index": "57","type": "string"},{"index": "58","type": "string"},{"index": "59","type": "string"},{"index": "60","type": "string"},{"index": "61","type": "string"},{"index": "62","type": "string"},{"index": "63","type": "long"},{"index": "64","type": "long"},{"index": "65","type": "string"},{"index": "66","type": "string"},{"index": "67","type": "string"},{"index": "68","type": "long"},{"index": "69","type": "string"},{"index": "70","type": "string"},{"index": "71","type": "string"},{"index": "72","type": "long"},{"index": "73","type": "long"},{"index": "74","type": "long"},{"index": "75","type": "long"},{"index": "76","type": "string"},{"index": "77","type": "string"},{"index": "78","type": "string"},{"index": "79","type": "string"},{"index": "80","type": "string"},{"index": "81","type": "string"},{"index": "82","type": "long"},{"index": "83","type": "long"}
]
}

},
"writer": {
"name": "doriswriter",
"parameter": {
"username": "root",
"password": "root",
"database": "hx_farm_sit",
"table": "ods_machine_work_log_uq",
"column": ["message_deviceId","message_taskId","message_generateTime","date_partition","message_realSpeed","message_driverMode","message_reportTime","message_taskIndex","message_curPathNum","message_impleWorkState","message_chassisType","message_chassisInfo_chassisType","message_chassisInfo_engineSpeed","message_chassisInfo_fuelOilLevel","message_chassisInfo_remainFuel","message_chassisInfo_engineInstantaneousFuelEconomy","message_chassisInfo_ptoGearSta","message_chassisInfo_ptoRotationRate","message_chassisInfo_upgradeSysSta","message_chassisInfo_hydraulicOilLevel","message_chassisInfo_hydraulicOilTemp","message_chassisInfo_hydraulicOilPressure","message_chassisInfo_engOilTemp","message_chassisInfo_grainTankWeightKg","message_chassisInfo_grainTankFullSts","message_chassisInfo_unloadClutchSts","message_chassisInfo_grainUnloadTubLvl","message_chassisInfo_oilPressure","message_chassisInfo_coolantTemperature","message_chassisInfo_realOil","message_chassisInfo_remainingOil","message_chassisInfo_currentWheelAngle","message_chassisInfo_steerMotorState","message_chassisInfo_remainingOilPercent","message_chassisInfo_threePointPromote","message_chassisInfo_tillingDepthFb","message_positionInfo_yaw","message_positionInfo_rtkStatus","message_positionInfo_pitchAngle","message_positionInfo_rollAngle","message_positionInfo_position_longitude","message_positionInfo_position_latitude","message_positionInfo_xyPosition_longitude","message_positionInfo_xyPosition_latitude","message_positionInfo_xyImpl_longitude","message_positionInfo_xyImpl_latitude","message_positionInfo_gnssNum","message_positionInfo_networkSignal","message_controlTaskInfo_fieldId","message_controlTaskInfo_fieldName","message_controlTaskInfo_fieldCode","message_controlTaskInfo_typeName","message_controlTaskInfo_totalArea","message_controlTaskInfo_operatedArea","message_controlTaskInfo_noOperateArea","message_controlTaskInfo_totalUsedTime","message_controlTaskInfo_percent","message_controlTaskInfo_widthOfCloth","message_controlTaskInfo_totalMileage","message_controlTaskInfo_operatedMileage","message_controlTaskInfo_noOperatedMileage","message_controlTaskInfo_speedPerHour","message_controlTaskInfo_operatedOffset","message_controlTaskInfo_operatedOffsetDirection","message_controlTaskInfo_needLastTime","message_controlTaskInfo_machineName","message_controlTaskInfo_implementName","message_controlTaskInfo_startTime","message_controlTaskInfo_operateStatus","message_controlTaskInfo_segmentPercent","message_statusInfo_deviceId","message_statusInfo_taskId","message_statusInfo_connected","message_statusInfo_ready","message_statusInfo_canTask","message_statusInfo_status","gis_farmcode","gis_farmlandcode","message_gsmStatusInfo_signalRssi","message_gsmStatusInfo_optGsm","message_gsmStatusInfo_networkType","message_gsmStatusInfo_simNumber","message_gsmStatusInfo_cardStatus","gis_is_exception"],
"preSql": [],
"postSql": [],
"connection": [
{
"jdbcUrl":"jdbc:mysql://bigdata1:19030/"
"table":["ods_machine_work_log_uq"],
"selectedDatabase":"hx_farm_sit"
}
],
"loadUrl": ["bigdata1:18030", "bigdata2:18030", "bigdata3:18030"],
"loadProps": {},
"maxBatchRows": 10000,
"maxBatchByteSize": 10857600
}
}
}
]
}
}