|
@@ -0,0 +1,204 @@
|
|
|
+package com.fs.fssync.listener;
|
|
|
+
|
|
|
+import com.fs.fssync.config.FlinkConfig;
|
|
|
+import com.ververica.cdc.connectors.mysql.source.MySqlSource;
|
|
|
+import com.ververica.cdc.connectors.mysql.table.StartupOptions;
|
|
|
+import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
|
|
+import org.apache.flink.api.common.restartstrategy.RestartStrategies;
|
|
|
+import org.apache.flink.api.common.time.Time;
|
|
|
+import org.apache.flink.configuration.Configuration;
|
|
|
+import org.apache.flink.configuration.PipelineOptionsInternal;
|
|
|
+import org.apache.flink.runtime.state.filesystem.FsStateBackend;
|
|
|
+import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
|
|
|
+import org.apache.flink.runtime.state.storage.FileSystemCheckpointStorage;
|
|
|
+import org.apache.flink.streaming.api.CheckpointingMode;
|
|
|
+import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
|
|
+import org.apache.flink.streaming.api.environment.CheckpointConfig;
|
|
|
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
|
|
+import org.apache.flink.streaming.api.graph.StreamGraph;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.boot.CommandLineRunner;
|
|
|
+import org.springframework.stereotype.Component;
|
|
|
+
|
|
|
+import javax.annotation.PreDestroy;
|
|
|
+import java.io.File;
|
|
|
+import java.util.*;
|
|
|
+
|
|
|
+/**
|
|
|
+ * MySQL事件监听
|
|
|
+ *
|
|
|
+ * @author jokey
|
|
|
+ * @since 2024-3-14
|
|
|
+ */
|
|
|
+@Slf4j
|
|
|
+@Component
|
|
|
+public class MySqlEventListener implements CommandLineRunner {
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private FlinkConfig flinkConfig;
|
|
|
+
|
|
|
+ private static final String JOB_ID = "1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d";
|
|
|
+ private static final String CHECKPOINT_DIR = "file:///d:/data/flink/checkpoints";
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void run(String... args) throws Exception {
|
|
|
+
|
|
|
+ // 创建环境并配置
|
|
|
+ final StreamExecutionEnvironment env = configureEnvironment();
|
|
|
+
|
|
|
+
|
|
|
+ FlinkConfig.MySqlConfig mysqlConfig = flinkConfig.getCdc().getMysql();
|
|
|
+
|
|
|
+ // Debezium 配置
|
|
|
+ Properties debeziumProps = createDebeziumProperties();
|
|
|
+
|
|
|
+ // 创建 MySQL CDC 源
|
|
|
+ MySqlSource<String> mySqlSource = createMySqlSource(mysqlConfig, debeziumProps);
|
|
|
+
|
|
|
+
|
|
|
+ // 创建数据流
|
|
|
+ DataStreamSource<String> streamSource = env.fromSource(
|
|
|
+ mySqlSource,
|
|
|
+ WatermarkStrategy.noWatermarks(),
|
|
|
+ "MySQL Source"
|
|
|
+ ).setParallelism(flinkConfig.getParallelism().getSource());
|
|
|
+
|
|
|
+ // 添加自定义接收器
|
|
|
+ CustomSink customSink = new CustomSink();
|
|
|
+ streamSource.addSink(customSink)
|
|
|
+ .name("syncToRedis")
|
|
|
+ .setParallelism(flinkConfig.getParallelism().getSink());
|
|
|
+ // 修改执行方法,捕获更多信息
|
|
|
+ StreamGraph streamGraph = env.getStreamGraph();
|
|
|
+ streamGraph.setJobName("MySQL-CDC-Sync-Job");
|
|
|
+ // 执行前记录更多信息
|
|
|
+ log.info("准备执行CDC任务,作业名称: {}, 作业ID: {}, 检查点目录: {}",
|
|
|
+ streamGraph.getJobName(), JOB_ID, CHECKPOINT_DIR);
|
|
|
+ // 执行作业
|
|
|
+ env.execute(streamGraph);
|
|
|
+ }
|
|
|
+
|
|
|
+ private Properties createDebeziumProperties() {
|
|
|
+ Map<String, String> configMap = new HashMap<>();
|
|
|
+ configMap.put("decimal.handling.mode", "string");
|
|
|
+ configMap.put("bigint.unsigned.handling.mode", "long");
|
|
|
+ configMap.put("time.precision.mode", "adaptive_time_microseconds");
|
|
|
+ configMap.put("datetime.handling.mode", "string");
|
|
|
+ configMap.put("binary.handling.mode", "bytes");
|
|
|
+ configMap.put("json.handling.mode", "string");
|
|
|
+ configMap.put("geometry.handling.mode", "string");
|
|
|
+ configMap.put("include.unknown.datatypes", "false");
|
|
|
+
|
|
|
+ Properties props = new Properties();
|
|
|
+ props.putAll(configMap);
|
|
|
+ return props;
|
|
|
+ }
|
|
|
+
|
|
|
+ private MySqlSource<String> createMySqlSource(FlinkConfig.MySqlConfig mysqlConfig, Properties debeziumProps) {
|
|
|
+ return MySqlSource.<String>builder()
|
|
|
+ .debeziumProperties(debeziumProps)
|
|
|
+ .hostname(mysqlConfig.getHostname())
|
|
|
+ .port(mysqlConfig.getPort())
|
|
|
+ .databaseList(mysqlConfig.getDatabaseList())
|
|
|
+ .tableList(mysqlConfig.getTableList())
|
|
|
+ .username(mysqlConfig.getUsername())
|
|
|
+ .password(mysqlConfig.getPassword())
|
|
|
+ .deserializer(new JsonDebeziumDeserializationSchema())
|
|
|
+ .startupOptions(getStartupOptions(mysqlConfig.getStartupOptions()))
|
|
|
+ .serverTimeZone(mysqlConfig.getServerTimeZone())
|
|
|
+ .build();
|
|
|
+ }
|
|
|
+
|
|
|
+ private StreamExecutionEnvironment configureEnvironment() {
|
|
|
+ Configuration configuration = new Configuration();
|
|
|
+ configuration.setString(PipelineOptionsInternal.PIPELINE_FIXED_JOB_ID, JOB_ID);
|
|
|
+
|
|
|
+ // 添加这行 - 指定具体的检查点路径
|
|
|
+ String latestCheckpoint = findLatestCheckpoint();
|
|
|
+ if (latestCheckpoint != null) {
|
|
|
+ configuration.setString("execution.savepoint.path", latestCheckpoint);
|
|
|
+ log.info("将从检查点恢复: {}", latestCheckpoint);
|
|
|
+ }
|
|
|
+ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(configuration);
|
|
|
+
|
|
|
+ // 设置状态后端
|
|
|
+ try {
|
|
|
+ // 使用HashMap状态后端 (内存中计算,但检查点存储在文件系统)
|
|
|
+ env.setStateBackend(new HashMapStateBackend());
|
|
|
+ env.getCheckpointConfig().setCheckpointStorage(new FileSystemCheckpointStorage(CHECKPOINT_DIR));
|
|
|
+ log.info("设置状态后端为: {}", CHECKPOINT_DIR);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("设置状态后端失败", e);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 检查点配置
|
|
|
+ env.enableCheckpointing(flinkConfig.getCheckpoint().getInterval());
|
|
|
+ CheckpointConfig checkpointConfig = env.getCheckpointConfig();
|
|
|
+ checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
|
|
|
+ checkpointConfig.setMinPauseBetweenCheckpoints(500);
|
|
|
+ checkpointConfig.setCheckpointTimeout(60000);
|
|
|
+ checkpointConfig.setMaxConcurrentCheckpoints(1);
|
|
|
+ checkpointConfig.setTolerableCheckpointFailureNumber(3);
|
|
|
+ checkpointConfig.setExternalizedCheckpointCleanup(
|
|
|
+ CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
|
|
|
+
|
|
|
+ checkpointConfig.enableUnalignedCheckpoints();
|
|
|
+
|
|
|
+ // 设置重启策略
|
|
|
+ env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.seconds(10)));
|
|
|
+
|
|
|
+
|
|
|
+ log.info("Flink环境配置完成,检查点间隔: {}ms, 作业ID: {}",
|
|
|
+ flinkConfig.getCheckpoint().getInterval(), JOB_ID);
|
|
|
+
|
|
|
+ return env;
|
|
|
+ }
|
|
|
+
|
|
|
+ private String findLatestCheckpoint() {
|
|
|
+ File checkpointDir = new File(CHECKPOINT_DIR + JOB_ID);
|
|
|
+ if (!checkpointDir.exists() || !checkpointDir.isDirectory()) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 查找最新的检查点
|
|
|
+ File[] checkpoints = checkpointDir.listFiles(file ->
|
|
|
+ file.isDirectory() && file.getName().startsWith("chk-"));
|
|
|
+
|
|
|
+ if (checkpoints == null || checkpoints.length == 0) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 按检查点ID排序(提取数字部分)
|
|
|
+ Arrays.sort(checkpoints, (f1, f2) -> {
|
|
|
+ int id1 = Integer.parseInt(f1.getName().substring(4));
|
|
|
+ int id2 = Integer.parseInt(f2.getName().substring(4));
|
|
|
+ return Integer.compare(id2, id1); // 降序排列
|
|
|
+ });
|
|
|
+
|
|
|
+ // 检查_metadata文件是否存在
|
|
|
+ File latest = checkpoints[0];
|
|
|
+ File metadata = new File(latest, "_metadata");
|
|
|
+ if (metadata.exists() && metadata.isFile()) {
|
|
|
+ return CHECKPOINT_DIR + JOB_ID + "/" + latest.getName();
|
|
|
+ }
|
|
|
+
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private StartupOptions getStartupOptions(String option) {
|
|
|
+ // 如果没有检查点,使用配置的启动模式
|
|
|
+ log.info("将使用配置的启动模式: {}", option);
|
|
|
+ if (option == null) {
|
|
|
+ return StartupOptions.earliest();
|
|
|
+ }
|
|
|
+ switch (option.toLowerCase()) {
|
|
|
+ case "initial": return StartupOptions.initial();
|
|
|
+ case "latest": return StartupOptions.latest();
|
|
|
+ case "earliest": return StartupOptions.earliest();
|
|
|
+ default: return StartupOptions.latest();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+}
|