大数据平台新加接口表(或接口表表结构变更)脚本更新及数据初始化

来源:互联网 发布:p2p 网络摄像机 编辑:程序博客网 时间:2024/06/01 08:39


与大数据平台用于生成数据跑批脚本的脚本(version3.0)对应。
1、目录路径
[hs@master manual_proc]$ pwd
/home/hs/opt/dw-etl/manual_proc
[hs@master manual_proc]$ tree -f
.
├── ./cstab_str+data_update.sh
├── ./manual_update_tab.list
└── ./table_manual_isupdated.log

0 directories, 3 files
[hs@master manual_proc]$

2、要更新的表的列表清单
[hs@master manual_proc]$ cat manual_update_tab.list
text_tab0
test_tab1

3、更新的主体shell
[hs@master manual_proc]$ cat cstab_str+data_update.sh
#!/bin/bash
export today=`date +%Y-%m-%d`
export yesterday=`date -d last-day +%Y-%m-%d`
export beforeytd=`date -d "2 days ago" +%Y-%m-%d`
export ytd=${yesterday//-/}

#0.update generate all table create and etl script ...
sh /home/hs/opt/dw-etl/script_generate/etl_table_list_update.sh

sh /home/hs/opt/dw-etl/script_generate/src-create_rdstab_script.sh
sh /home/hs/opt/dw-etl/script_generate/ssa-create_hivetab_script.sh
sh /home/hs/opt/dw-etl/script_generate/sor-close2source_create_rdstab_script.sh

sh /home/hs/opt/dw-etl/script_generate/src-create_src2dwsrc_script.sh
sh /home/hs/opt/dw-etl/script_generate/ssa-create_dwsrc2ssa_script.sh
sh /home/hs/opt/dw-etl/script_generate/sor-close2source_data_pull_from_ssa_script.sh
sh /home/hs/opt/dw-etl/script_generate/sor-close2source_create_hive2mysql_script.sh

sh /home/hs/opt/dw-etl/script_generate/bigtable-create_allstage_suppertab_int_script.sh
sh /home/hs/opt/dw-etl/script_generate/rds-increment_tab_should_create_idx_sql.sh

while read tab_name
do
export currtime=`date "+%Y-%m-%d %H:%M:%S"`

cat /home/hs/opt/dw-etl/script_generate/script_dir/list_dir/batch_table_userpass.list |grep -w $tab_name > /home/hs/opt/dw-etl/manual_proc/table_info_only.list

tab_name=$(awk -F "|" 'NR==1 {print $1}' /home/hs/opt/dw-etl/manual_proc/table_info_only.list)
src_dbname=$(awk -F "|" 'NR==1 {print $2}' /home/hs/opt/dw-etl/manual_proc/table_info_only.list)
src_dbip=$(awk -F "|" 'NR==1 {print $3}' /home/hs/opt/dw-etl/manual_proc/table_info_only.list)
rds_map_tab_name=$(awk -F "|" 'NR==1 {print $4}' /home/hs/opt/dw-etl/manual_proc/table_info_only.list)
load_type=$(awk -F "|" 'NR==1 {print $5}' /home/hs/opt/dw-etl/manual_proc/table_info_only.list)
src_user=$(awk -F "|" 'NR==1 {print $6}' /home/hs/opt/dw-etl/manual_proc/table_info_only.list)
src_pass=$(awk -F "|" 'NR==1 {print $7}' /home/hs/opt/dw-etl/manual_proc/table_info_only.list)

dw_dbip=`sh /home/hs/opt/public-function/getdwinfo.fc dbip src`
dw_user=`sh /home/hs/opt/public-function/getdwinfo.fc user src`
dw_pass=`sh /home/hs/opt/public-function/getdwinfo.fc pass src`

#1.srcdw-rds,ssa-hive,sor-rds create table ...

mysql -h$dw_dbip -u$dw_user -p$dw_pass -D src < "/home/hs/opt/dw-etl/script_generate/script_dir/rds-src_create_tab_script/"$tab_name".sql"

hive -f "/home/hs/opt/dw-etl/script_generate/script_dir/hive-ssa_create_tab_script/"$tab_name".hql"

mysql -h$dw_dbip -u$dw_user -p$dw_pass -D sor < "/home/hs/opt/dw-etl/script_generate/script_dir/rds-sor_close2source_create_tab_script/"$tab_name".sql"

#2.from src to dwsrc,from dwsrc to ssa,from ssa to sor,sor from hive to mysql ...
#get dwsrc data from src
rm -rf /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh
mysql -h$src_dbip -u$src_user -p$src_pass -N -e "set session group_concat_max_len=20000000;
select CONCAT('#!/bin/bashhuanhanghuanhangecho -e \`date +\\\"%Y-%m-%d %H:%M:%S\\\"\`,src-src2dwsrc $tab_name data $load_type load proc start ... >> /home/hs/opt/dw-etl/run_log_dir/src-src2dwsrc_run.loghuanhang#userpass info get ...huanhangsrc_dbip=\`sh /home/hs/opt/public-function/getsrcinfo.fc dbip $tab_name\`huanhangsrc_user=\`sh /home/hs/opt/public-function/getsrcinfo.fc user $tab_name\`huanhangsrc_pass=\`sh /home/hs/opt/public-function/getsrcinfo.fc pass $tab_name\`huanhanghuanhangdw_dbip=\`sh /home/hs/opt/public-function/getdwinfo.fc dbip src\`huanhangdw_user=\`sh /home/hs/opt/public-function/getdwinfo.fc user src\`huanhangdw_pass=\`sh /home/hs/opt/public-function/getdwinfo.fc pass src\`huanhanghuanhang#$tab_name total data download from src ...huanhang/usr/local/bin/mysql -h\$src_dbip -u\$src_user -p\$src_pass -N -e\"'
,'select CONCAT(',trim(TRAILING ',\'|\'' FROM GROUP_CONCAT(CONCAT('ifnull(',case when data_type in ('varchar','char') then concat('replace(replace(replace(\\\\\`',COLUMN_NAME,'\\\\\`,char(13),\'\'),char(10),\'\'),\'|\',\'\')') else concat('\\\\\`',COLUMN_NAME,'\\\\\`') end,',\'\')',',\'|\''))) ,') from ',TABLE_SCHEMA,'.',table_name,';'
,'\" >/home/hs/opt/dw-etl/data/src2dwsrc_data/',TABLE_SCHEMA,'.',table_name,'.dathuanhang'
,'') mysql_export_and_dwrds_load_shell
from information_schema.COLUMNS
where TABLE_SCHEMA='$src_dbname' and table_name='$tab_name';"  >> /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

mysql -h$src_dbip -u$src_user -p$src_pass -N -e "set session group_concat_max_len=20000000;
select
concat('#','$tab_name',' table total data upload to dw-src ...huanhang/usr/local/bin/mysql -h\$dw_dbip -u\$dw_user -p\$dw_pass -e \"use src;truncate table ','$tab_name',';\"huanhang#load data file to dw-rds huanhang/usr/local/bin/mysql -h\$dw_dbip -u\$dw_user -p\$dw_pass -e \"use src;load data local infile \'/home/hs/opt/dw-etl/data/src2dwsrc_data/','$src_dbname','.','$tab_name','.dat\' into table ','$tab_name',' fields terminated by \'|\' enclosed by \'\' lines terminated by \'\\n\' ignore 0 lines;\"huanhanghuanhangecho -e \`date +\\\"%Y-%m-%d %H:%M:%S\\\"\`,src-src2dwsrc $tab_name data $load_type load proc finished! >> /home/hs/opt/dw-etl/run_log_dir/src-src2dwsrc_run.loghuanhang') mysql_export_and_dwrds_load_shell
from information_schema.TABLES t
where t.TABLE_SCHEMA='$src_dbname' and t.TABLE_NAME='$tab_name';"  >> /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

sed -i "s/\\\\\`/\\\`/g" /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

sed -i "s/huanhang/\n/g" /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

chmod 750 /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

sh /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

#get ssa data from dwsrc
rm -rf /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh
mysql -h$src_dbip -u$src_user -p$src_pass -N -e "set session group_concat_max_len=20000000;
select CONCAT('#!/bin/bashhuanhang#$tab_name total data proc ...huanhang/usr/local/bin/mysql -h$dw_dbip -u$dw_user -p$dw_pass -N -e\"'
,'select CONCAT(',trim(TRAILING ',\'|\'' FROM GROUP_CONCAT(CONCAT('ifnull(',case when data_type in ('varchar') then concat('replace(replace(replace(\\\\\`',COLUMN_NAME,'\\\\\`,char(13),\'\'),char(10),\'\'),\'|\',\'\')') when (COLUMN_NAME like '%time' or COLUMN_NAME like '%date') and data_type like '%int' then concat('from_unixtime(\\\\\`',COLUMN_NAME,'\\\\\`,\'%Y-%m-%d %H:%i:%s\')') else concat('\\\\\`',COLUMN_NAME,'\\\\\`') end,',\'\')',',\'|\''))) ,') from src.',table_name,';'
,'\" >/home/hs/opt/dw-etl/data/dwsrc2ssa_data/',TABLE_SCHEMA,'.',table_name,'_total_\$1.dathuanhang'
,'/home/hs/opt/hive-1.2.1/bin/hive -e \"use ssa;load data local inpath \'/home/hs/opt/dw-etl/data/dwsrc2ssa_data/',TABLE_SCHEMA,'.',table_name,'_total_\$1.dat\' overwrite into table ',table_name,';\"') mysql_export_and_hive_load_shell
from information_schema.COLUMNS
where TABLE_SCHEMA='$src_dbname' and table_name='$tab_name';"  >> /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

sed -i "s/\\\\\`/\\\`/g" /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh
sed -i 's/huanhang/\n/g' /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

chmod 750 /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

sh /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh $yesterday $today

#get sor data from ssa
/home/hs/opt/hive-1.2.1/bin/hive -e "drop table if exists sor.$tab_name;create table sor.$tab_name as select * from ssa.$tab_name a1;"

#sor data hive to mysql
rm -rf /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh
echo "#!/bin/bash
#export yesterday=\`date -d last-day +%Y-%m-%d\`
export yesterday=\$1

" > /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

mysql -h$src_dbip -u$src_user -p$src_pass -N -e "set session group_concat_max_len=20000000;
select
concat('#',t.TABLE_NAME,' table data download from hive and upload to sor rds...huanhang/home/hs/opt/hive-1.2.1/bin/hive -e \"use sor;insert overwrite local directory \'/home/hs/opt/dw-etl/data/sor-hive2mysql_data/',t.TABLE_NAME,'\' row format delimited fields terminated by \'|\' select * from ',t.TABLE_NAME,';\"huanhang/usr/local/bin/mysql -h$dw_dbip -u$dw_user -p$dw_pass -e \"use sor;truncate table ','$rds_map_tab_name',';\"huanhang#loop load dir file to rds huanhangfor tabdt_path in /home/hs/opt/dw-etl/data/sor-hive2mysql_data/',t.TABLE_NAME,'/*;huanhangdohuanhang/usr/local/bin/mysql -h$dw_dbip -u$dw_user -p$dw_pass -e \"use sor;load data local infile \'\$tabdt_path\' into table ','$rds_map_tab_name',' fields terminated by \'|\' enclosed by \'\' lines terminated by \'\\n\' ignore 0 lines;\"huanhangdonehuanhang') sql_text
from information_schema.TABLES t
where t.TABLE_SCHEMA='$src_dbname' and t.TABLE_NAME='$tab_name';"  >> /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

sed -i 's/huanhang/\n/g' /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh
chmod 750 /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh

sh /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh $yesterday


#table is updated log info record ...
echo "current time:"$currtime,$tab_name" is updated!"  >> /home/hs/opt/dw-etl/manual_proc/table_manual_isupdated.log

done  < /home/hs/opt/dw-etl/manual_proc/manual_update_tab.list

#delete temp script file ...
rm -rf /home/hs/opt/dw-etl/manual_proc/table_info_only.list
rm -rf /home/hs/opt/dw-etl/manual_proc/onetab-exec.sh
[hs@master manual_proc]$

4、每日批量检查及调度shell
[hs@master dw-etl]$ pwd
/home/hs/opt/dw-etl
[hs@master dw-etl]$ cat dw-update.sh
#!/bin/bash
export yesterday=`date -d last-day +%Y-%m-%d`
export ytd=${yesterday//-/}

#export yesterday='2016-04-28'

echo -e "`date "+%Y-%m-%d %H:%M:%S"`,dw table and data update proc start ..."  >> /home/hs/opt/dw-etl/run_log_dir/dw-update_run.log

#userpass info get ...
dw_dbip=`sh /home/hs/opt/public-function/getdwinfo.fc dbip sor`
dw_user=`sh /home/hs/opt/public-function/getdwinfo.fc user sor`
dw_pass=`sh /home/hs/opt/public-function/getdwinfo.fc pass sor`

#update manual_update_tab.list ...
echo -e "`date "+%Y-%m-%d %H:%M:%S"`,update manual_update_tab.list start ..."  >> /home/hs/opt/dw-etl/run_log_dir/dw-update_run.log
/usr/local/bin/mysql -h$dw_dbip -u$dw_user -p$dw_pass -N -e "select a.table_name from dm.srctab_change_info a inner join information_schema.tables b on a.table_name=b.table_name where a.change_date='$yesterday' and a.add_type in ('table','column') and b.table_schema='src' and a.table_name not in('59_order','59_orderfood','59_dormitem','59_coupon') group by a.table_name;" > /home/hs/opt/dw-etl/manual_proc/manual_update_tab.list

echo -e "`date "+%Y-%m-%d %H:%M:%S"`,update dwtab_original_index.sql start ..."  >> /home/hs/opt/dw-etl/run_log_dir/dw-update_run.log
#update dwtab_original_index.sql ...
rm -rf /home/hs/opt/dw-etl/manual_proc/dwtab_original_index.sql

i=1

for tab in $(cat /home/hs/opt/dw-etl/manual_proc/manual_update_tab.list)
do

col_num=$i

tab_name=$(awk -F "|" 'NR=='$col_num' {print $1}' /home/hs/opt/dw-etl/manual_proc/manual_update_tab.list)

/usr/local/bin/mysql -h$dw_dbip -u$dw_user -p$dw_pass -N -e "select CONCAT('alter table ',table_schema,'.',table_name,' add ',case when non_unique=0 then 'unique' when non_unique=1 then '' end,'index ','\`',index_name,'\`','(\`',col_list,'\`) using btree;') from (select a.table_schema,a.table_name,a.non_unique,a.index_name,GROUP_CONCAT(column_name order by seq_in_index) col_list from information_schema.STATISTICS a where a.INDEX_SCHEMA in('src','sor') and table_name like '%$tab_name' group by a.table_schema,a.table_name,a.index_name) x;" >> /home/hs/opt/dw-etl/manual_proc/dwtab_original_index.sql

: $(( i++ ))

done

echo -e "`date "+%Y-%m-%d %H:%M:%S"`,initialize src change table pull again start ..."  >> /home/hs/opt/dw-etl/run_log_dir/dw-update_run.log
#initialize src change table pull again ...
if [ -s /home/hs/opt/dw-etl/manual_proc/manual_update_tab.list ]; then
sh /home/hs/opt/dw-etl/manual_proc/cstab_str+data_update.sh
fi

echo -e "`date "+%Y-%m-%d %H:%M:%S"`,add index on rds table(src,sor) start ..."  >> /home/hs/opt/dw-etl/run_log_dir/dw-update_run.log
#add index on rds table(src,sor) ...
index_file=/home/hs/opt/dw-etl/manual_proc/dwtab_original_index.sql
if [ -s $index_file ]; then
/usr/local/bin/mysql -h$dw_dbip -u$dw_user -p$dw_pass < $index_file
fi

rm -rf /home/hs/opt/dw-etl/manual_proc/dwtab_original_index.sql

echo -e "`date "+%Y-%m-%d %H:%M:%S"`,dw table and data update proc end !"  >> /home/hs/opt/dw-etl/run_log_dir/dw-update_run.log
echo -e "\n\n\n\n\n"  >> /home/hs/opt/dw-etl/run_log_dir/dw-update_run.log
[hs@master dw-etl]$


5、更新日志记录
[hs@master manual_proc]$ cat table_manual_isupdated.log
current time:2016-06-15 11:09:00,99_city is updated!
current time:2016-06-15 14:22:36,99_dormshoptime is updated!
current time:2016-06-16 10:29:30,charge_banner is updated!
current time:2016-06-16 10:30:26,game_card_area_server is updated!
current time:2016-06-16 10:31:22,game_card_product is updated!
current time:2016-06-16 10:32:16,game_card_promotion is updated!
current time:2016-06-16 10:32:40,game_charge_order_item is updated!
current time:2016-06-16 10:33:37,phone_charge_order_item is updated!
current time:2016-06-16 10:34:33,phone_charge_product is updated!
current time:2016-06-16 10:35:29,phone_charge_promotion is updated!
current time:2016-06-16 10:35:54,phone_charge_rule is updated!
current time:2016-06-16 10:36:18,phone_flow_order_item is updated!
current time:2016-06-16 10:37:13,phone_flow_product is updated!
current time:2016-06-16 10:38:09,phone_flow_promotion is updated!
current time:2016-06-16 10:38:34,phone_flow_rule is updated!
current time:2016-06-16 10:38:99,risk_charge_total_record is updated!

6、后记说明
此组脚本可以实现两个功能:源表字段更新的处理和新加接口表的处理。
6.1、源表字段更新的处理
已有的接口表表结构更新(新加字段等),通过每日跑批进行自动核查,将更新了结构的表清单写到manual_update_tab.list文件中去,然后调【cstab_str+data_update.sh 】实现表结构、数据及etl一系列脚本的更新与初始化。在此处剔除了一些数据量级在千万级的超大的表,这类表需要进行手工处理。
6.2、新加接口表的处理
如果是新加接口表,则需要手动维护【batch_table.list】清单和【manual_update_tab.list】,如有需要,还需要维护【src_userpass_info.list】。然后,手工调【cstab_str+data_update.sh 】脚本,实现新接口表的新加工作。当然,【batch_table.list】来自于手工维护的excel接口表信息。


0 0
原创粉丝点击