#1.清洗数据《infotest.txt》保存成《infotest.csv》 sed 's/\[{//g' /root/sh/infotest.txt | #掐头 sed 's/}\]//g' | #去尾 sed 's/""/null/g' | #置空值 sed 's/"//g' | #删引号 #删除元素说明 sed 's/catchTime://g' | sed 's/commentCount://g' | sed 's/content://g' | sed 's/createTime://g' | sed 's/pic_list://g' | sed 's/praiseCount://g' | sed 's/reportCount://g' | sed 's/source://g' | #以逗号分割,添加列名,保存到csv awk -v FS="," -v OFS=","'BEGIN{print "catchTime,commentCount,content,createTime,pic_list,praiseCount,reportCount,source"}{print $1,$2,$3,$4,$5,$6,$7,$8}' \ > /root/sh/infotest.csv
#2.清洗数据《douban.txt》保存成《douban.csv》 sed 's/{"subjects":\[{//g' /root/sh/douban.txt | #掐头 sed 's/}\]}//g' | #去尾 sed 's/},{/\n/g' | #换行 sed 's/\\//g' | #删\号 sed 's/""/null/g' | #置空值 sed 's/"//g' | #删引号 #删除元素说明 sed 's/id://g' | sed 's/title://g' | sed 's/episodes_info://g' | sed 's/rate://g' | sed 's/cover_x://g' | sed 's/url://g' | sed 's/playable://g' | sed 's/cover://g' | sed 's/cover_y://g' | sed 's/is_new://g' | #以逗号分割,添加列名,保存到csv awk -v FS="," -v OFS=","'BEGIN{print "id,title,episodes_info,rate,cover_x,url,playable,cover,cover_y,is_new"}{print $8,$4,$1,$2,$3,$5,$6,$7,$9,$10}' \ >/root/sh/douban.csv
#如果douban表不存在则新建 sql2="create table if not exists douban(" for i in$names do if [ $i == "$lastname" ] then sql2="$sql2$i varchar(200))" else sql2="$sql2$i varchar(200)," fi done mysql -h$dbhost -P$dbprot -u$dbuser -p$dbpass$db -e "$sql2"
#导入数据 sql3="LOAD DATA INFILE '/usr/local/mysql/data/douban.csv' INTO TABLE douban CHARACTER SET utf8 FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 LINES" mysql -h$dbhost -P$dbprot -u$dbuser -p$dbpass$db -e "$sql3"
#查看表和数据 sql4="select * from douban" mysql -h$dbhost -P$dbprot -u$dbuser -p$dbpass$db -e "$sql4"