发布时间:2025-12-10 11:48:14 浏览次数:5
https://dlcdn.apache.org/incubator/seatunnel/2.3.1/apache-seatunnel-incubating-2.3.1-bin.tar.gz
下载完毕之后上传到服务器上面并解压
# 解压到了/opt/module目录下tar -zxvf apache-seatunnel-incubating-2.3.1-bin.tar.gz -C /opt/module在apache的仓库下载相应的connector,下载时每个jar包在不同的路径下面,放到/seatunnel-2.3.1/connectors/seatunnel目录下
https://repo.maven.apache.org/maven2/org/apache/seatunnel/
connector-assert-2.3.1.jarconnector-cdc-mysql-2.3.1.jarconnector-console-2.3.1.jar # 自带的connector-doris-2.3.1.jarconnector-elasticsearch-2.3.1.jarconnector-fake-2.3.1.jar # 自带的connector-file-hadoop-2.3.1.jarconnector-file-local-2.3.1.jarconnector-hive-2.3.1.jarconnector-iceberg-2.3.1.jarconnector-jdbc-2.3.1.jarconnector-kafka-2.3.1.jarconnector-redis-2.3.1.jar配置安装seatunnel的插件
vim seatunnel-2.3.1/config/plugin_config调用安装脚本的时候会在maven的中央仓库下载对应的jar包,尽量少放,下载太慢了,我放了这些
--connectors-v2--connector-assertconnector-cdc-mysqlconnector-jdbcconnector-fakeconnector-console--end--整个过程非常慢…应该是从maven中央仓库下载东西
中间由于其他缘故我拷贝了一个hive框架/lib目录下的libfb303-0.9.3.jar放到seatunnel的lib目录下了。
seatunnel-2.3.1/config/v2.batch.config.template
env {execution.parallelism = 2job.mode = "BATCH"checkpoint.interval = 10000}source {FakeSource {parallelism = 2result_table_name = "fake"row.num = 16schema = {fields {name = "string"age = "int"}}}}sink {Console {}}运行命令
cd /opt/module/seatunnel-2.3.1./bin/seatunnel.sh --config ./config/v2.batch.config.template -e lcoal运行成功的话会可以在console看到打印的测试数据
我新建了一个用来放运行配置的目录/opt/module/seatunnel-2.3.1/job
vim mysql_2console.confmysql_2console.conf
env {execution.parallelism = 2job.mode = "BATCH"checkpoint.interval = 10000}source{Jdbc {url = "jdbc:mysql://hadoop102/dim_db?useUnicode=true&characterEncoding=utf8&useSSL=false"driver = "com.mysql.cj.jdbc.Driver"connection_check_timeout_sec = 100user = "root"password = "xxxxxx"query = "select * from dim_basicdata_date_a_d where date < '2010-12-31'"}}sink {Console {}}查询的是一张日期维表的数据
建表语句:
CREATE DATABASE dim_db DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;drop table if exists dim_db.dim_basicdata_date_a_d;create table if not exists dim_db.dim_basicdata_date_a_d(`date` varchar(40) comment '日期',`year` varchar(40) comment '年',`quarter` varchar(40) comment '季度(1/2/3/4)',`season` varchar(40) comment '季节(春季/夏季/秋季/冬季)',`month` varchar(40) comment '月',`day` varchar(40) comment '日',`week` varchar(40) comment '年内第几周',`weekday` varchar(40) comment '周几(1-周一/2-周二/3-周三/4-周四/5-周五/6-周六/7-周日)',`is_workday` varchar(40) comment '是否是工作日(1-是,0-否)',`date_type` varchar(40) comment '节假日类型(工作日/法定上班[调休]/周末/节假日)',`update_date` varchar(40) comment '更新日期');可以自己插入几条数据试试
运行命令
cd /opt/module/seatunnel-2.3.1./bin/seatunnel.sh --config ./job/mysql_2console.conf -e local创建一张hive表
CREATE database db_hive;drop table if exists db_hive.dim_basicdata_date_a_d;create table if not exists db_hive.dim_basicdata_date_a_d(`date` string comment '日期',`year` string comment '年',`quarter` string comment '季度(1/2/3/4)',`season` string comment '季节(春季/夏季/秋季/冬季)',`month` string comment '月',`day` string comment '日',`week` string comment '年内第几周',`weekday` string comment '周几(1-周一/2-周二/3-周三/4-周四/5-周五/6-周六/7-周日)',`is_workday` string comment '是否是工作日(1-是,0-否)',`date_type` string comment '节假日类型(工作日/法定上班[调休]/周末/节假日)',`update_date` string comment '更新日期');自行插入几条数据
创建配置文件hive_2console.conf
env {execution.parallelism = 2job.mode = "BATCH"checkpoint.interval = 10000}source{Hive {table_name = "db_hive.dim_basicdata_date_a_d"metastore_uri = "thrift://hadoop102:9083"}}sink {Console {}}这里我使用的hive连接方式是jdbc访问元数据,所以metastore_uri = "jdbc:hive2://hadoop102:10000"也可以正常使用。
hive-site.xml修改配置文件,有可能你已经配置好了
<!-- 为了方便连接,采用直连的方式连接到hive数据库,注释掉下面三条配置信息 --><!-- 指定存储元数据要连接的地址 --><property><name>hive.metastore.uris</name><value>thrift://hadoop102:9083</value></property><!-- 指定 hiveserver2 连接的 host --><property><name>hive.server2.thrift.bind.host</name><value>hadoop102</value></property><!-- 指定 hiveserver2 连接的端口号 --><property><name>hive.server2.thrift.port</name><value>10000</value></property>运行命令
cd /opt/module/seatunnel-2.3.1./bin/seatunnel.sh --config ./job/hive_2console.conf -e local创建配置文件
dim_basicdate_mysql_2hive.conf
env {execution.parallelism = 2job.mode = "BATCH"checkpoint.interval = 10000}source{Jdbc {url = "jdbc:mysql://hadoop102/dim_db?useUnicode=true&characterEncoding=utf8&useSSL=false"driver = "com.mysql.cj.jdbc.Driver"connection_check_timeout_sec = 100user = "root"password = "111111"query = "select * from dim_basicdata_date_a_d"}}sink {Hive {table_name = "db_hive.dim_basicdata_date_a_d"metastore_uri = "thrift://hadoop102:9083"}}运行命令
cd /opt/module/seatunnel-2.3.1./bin/seatunnel.sh --config ./job/dim_basicdate_mysql_2hive.conf-e local邮箱1104566414@qq.com