连接mysql

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
--创建数据库
create database test
DEFAULT CHARACTER SET utf8
DEFAULT COLLATE utf8_general_ci;

show databases

--创建账号
Create user 'test'@'%' identified by 'test';

Grant all privileges on test.*
to test@'%'
identified by 'test' with grant option;

--刷新权限配置
flush privileges;

安装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# 1.拷贝安装包以及mysql的jar到/root目录 并解压
tar -zvxf sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz -C /opt/module/

# 2.配置环境变量
vim /etc/profile.d/my_env.sh

# 加入 sqoop 路径
#SQOOP_HOME
export SQOOP_HOME=/opt/module/sqoop-1.4.6.bin__hadoop-2.0.4-alpha
export PATH=$PATH:$SQOOP_HOME/bin
source /etc/profile

# 3.配置sqoop
cd $SQOOP_HOME/conf
cp sqoop-env-template.sh sqoop-env.sh

vim sqoop-env.sh

export HADOOP_COMMON_HOME=/opt/module/hadoop-3.1.3
export HADOOP_MAPRED_HOME=/opt/module/hadoop-3.1.3
export HIVE_HOME=/opt/module/apache-hive-3.1.2-bin

# 拷贝 jdbc 驱动到 sqoop 的 lib 目录下
cp mysql-connector-java-5.1.27-bin.jar /opt/module/sqoop-1.4.6.bin__hadoop-2.0.4-alpha/lib/

# 4.测试连接
sqoop list-databases --connect jdbc:mysql://localhost:3306/ --username root --password root

建表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
use test --进入test表空间

create table emp(
empno int,
ename varchar(50),
job varchar (50),
mgr int,
hiredate varchar (100),
sal decimal(6,2),
comm decimal(6,2),
deptno int
)

create table student(
name varchar(50),
id varchar(50),
class_name varchar(50),
age int,
chinese int,
math int,
english int,
sex varchar(10)
)

编写脚本 sqoop_create_table.sh

emp表

1
2
3
4
5
6
7
8
9
10
sqoop create-hive-table \
--connect jdbc:mysql://hadoop100:3306/test \
--username test \
--password test \
--table emp \
--create-hive-table \
--hive-database db_hive \
--hive-table sqoop_emp \
--fields-terminated-by '\t' \
--lines-terminated-by '\n'

student表

1
2
3
4
5
6
7
8
9
10
sqoop create-hive-table \
--connect jdbc:mysql://hadoop100:3306/test \
--username test \
--password test \
--table student \
--create-hive-table \
--hive-database db_hive \
--hive-table sqoop_student \
--fields-terminated-by '\t' \
--lines-terminated-by '\n'

执行脚本后在db_hive数据库里执行show tables查看是否有sqoop_emp / sqoop_student表
desc formatted sqoop_emp 查看表结构

导入数据

mysql.test 表数据导入

编写脚本 sqoop_exp_emp.sh

1
2
3
4
5
6
7
8
sqoop export \
--connect jdbc:mysql://hadoop100:3306/test \
--username test \
--password test \
--table emp \
--export-dir '/emp' \
-m 1 \
--fields-terminated-by ';'

执行后在mysql.test数据库查询emp表是否成功导入数据

sqoop_exp_student.sh

1
2
3
4
5
6
7
8
sqoop export \
--connect "jdbc:mysql://hadoop100:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username test \
--password test \
--table student \
--export-dir '/student' \
-m 1 \
--fields-terminated-by ','

执行后在mysql.test数据库查询student表是否成功导入数据

db_hive库的sqoop表数据导入

编写脚本 sqoop_imp_emp.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
#!/bin/bash
sqoop import \
--connect "jdbc:mysql://hadoop100:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username test \
--password test \
--table emp \
--hive-import \
--delete-target-dir \
--hive-database db_hive \
--fields-terminated-by "\t" \
--target-dir "/user/hive/warehouse/db_hive.db/sqoop_emp" \
--hive-table sqoop_emp \
-m 1

执行后在db_hive数据库查询sqoop_emp表是否成功导入数据

sqoop_imp_student.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/bin/bash
sqoop import \
--connect "jdbc:mysql://hadoop100:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username test \
--password test \
--table student \
--hive-import \
--delete-target-dir \
--hive-database db_hive \
--fields-terminated-by "\t" \
--target-dir "/user/hive/warehouse/db_hive.db/sqoop_student" \
--hive-table sqoop_student \
-m 1

执行后在db_hive数据库查询sqoop_student表是否成功导入数据

分区表全量导入导出

建表

在mysql.test中建表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
create table supermarket(
id varchar(100), -- 行 ID
ord_id varchar(100) comment '订单 ID',
ord_date varchar(100) comment '订单日期',
exch_date varchar(100) comment '发货日期',
exch_type varchar(100) comment '邮寄方式',
cust_id varchar(100) comment '客户 ID ',
cust_name varchar(100) comment '客户名称',
d_type varchar(100) comment '细分',
city varchar(100) comment '城市',
prov varchar(100) comment '省/自治区',
country varchar(100) comment'国家',
area varchar(100) comment '地区',
pro_id varchar(100) comment '产品 ID',
type1 varchar(100) comment '类别',
type2 varchar(100) comment '子类别',
pro_name varchar(100) comment '产品名称',
sales decimal(12,4) comment '销售额',
count1 int comment '数量 ',
discount decimal(12,4) comment '折扣 ',
profit decimal(12,4) comment '利润'
)

导出

编写 sqoop_exp_supermarket.sh 脚本
将hive库的supermarket_p1表数据导出到mysql库的supermarket表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/bin/bash
sqoop_maket(){
sqoop export \
--connect "jdbc:mysql://hadoop100:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username test \
--password test \
-m 1 \
--table supermarket \
--input-fields-terminated-by '\t' \
--export-dir "/user/hive/warehouse/db_hive.db/supermarket_p1/$val/"
}
part=`beeline -u jdbc:hive2://hadoop100:10000/db_hive \
-n root -p root --outputformat=csv2 --showHeader=false \
-e 'show partitions supermarket_p1;'`
for val in $part
do
echo $val
sqoop_maket
done

执行后在mysql.test数据库查询supermarket表是否成功导入数据

导入

编写 sqoop_imp_supermarket.sh 脚本
Sqoop导入函数:从MySQL表supermarket中查询特定类型的数据,并导入到Hive分区表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/bin/bash
sqoop_imp(){
sqoop import \
--connect "jdbc:mysql://hadoop100:3306/test?useUnicode=true&characterEncoding=utf-8" \
--username test \
--password test \
--query "select * from supermarket where type1 ='$part' and \$CONDITIONS" \
--hive-import \
--create-hive-table \
--hive-overwrite \
--fields-terminated-by "\t" \
--hive-database db_hive \
--hive-table supermarket_sqoop \
--target-dir "/user/hive/warehouse/db_hive.db/supermarket_sqoop/type_p=$part_v/" \
--hive-partition-key type_p \
--hive-partition-value "$part_v" \
-m 1
}
for p in `mysql -uroot -proot --database=test -Ne \
"SELECT
CONCAT(type1,'|',
CASE WHEN type1 = '家具' THEN 'jiajv'
WHEN type1 = '办公用品' THEN 'bgyp'
WHEN type1 = '技术' THEN 'jishu' END)
FROM supermarket group by type1"`
do
part=`echo $p | awk -F '|' '{print $1}'`
part_v=`echo $p | awk -F '|' '{print $2}'`
echo '***************************'$part $part_v '*****************************'
sqoop_imp
done

执行后在db_hive数据库查询supermarket_sqoop表是否成功导入数据
若表存在但没数据需要在db_hive库sql脚本中执行以下语句后再查询

1
2
--分区重构
msck repair table supermarket_sqoop