部署微博自动爬取组件
来源:互联网 发布:mac已安装软件桌面 编辑:程序博客网 时间:2024/05/21 14:45
#!/bin/bash
THIS_PATH='/home/bigdata/script/weibo-userid'
cd $THIS_PATH
if [ -z "$1" ];then
echo "please input date,The data format is yyyymmdd."
exit 1
fi
DAY=$1
YEAR=${DAY:0:4}
MONTH=${DAY:0:6}
TMP_INPUT_TABLE=weibo.click_tmp,weibo.impression_tmp,weibo.pageview_tmp
OUTPUT_HIVE_TABLE=weibo.WeiboUserEventsData
OUTPUT_HBASE_TABLE=NewWeiboId:CF
WEIBO_USERDATA_HBASE_TABLE=WeiboUserData:CF
OUTPUT_TABLE_DATA_LOCATION=/user/bigdata/weibooutput/weibousereventsdata
param_date=`date +'%Y-%m-%d %H:%M:%S'`
echo "$param_date weibo-userid begin"
#?..涓存.琛
INPUT_ARR=(${TMP_INPUT_TABLE//,/ })
for i in ${INPUT_ARR[@]}
do
cp ./template/$i ./$i.tmp
sed -i "s#\$\$day#${DAY}#g" ./$i.tmp
impala-shell -f ./$i.tmp
err=$?
if [ $err != 0 ];then
echo "$i insert temp table failed"
exit 1
fi
done
#?兼.绫诲?
HBASE_HOME=/opt/cloudera/parcels/CDH/lib/hbase
HIVE_HOME=/opt/cloudera/parcels/CDH/lib/hive
CLASSPATH=.:$HBASE_HOME/conf:$HIVE_HOME/conf
for jar_file_name in ${HBASE_HOME}/lib/*.jar
do
CLASSPATH=$CLASSPATH:$jar_file_name
done
HIVE_EXEC=$HIVE_HOME/lib/hive-exec-*.jar
LANG3=$HIVE_HOME/lib/commons-lang3-*.jar
HIVE_SERD=$HIVE_HOME/lib/hive-serde-*.jar
HIVE_METADATA=$HIVE_HOME/lib/hive-metastore-*.jar
GROOVY=$HIVE_HOME/lib/GROOVY-all-*.jar
for jar_file_name in ${HIVE_HOME}/lib/*.jar
do
if [[ "$jar_file_name" == "$LANG3" || "$jar_file_name" == "$HIVE_EXEC" || "$jar_file_name" == "$HIVE_SERD" || "$jar_file_name" == "$HIVE_METADATA" || "$jar_file_name" == "$GROOVY" ]]; then
echo "excluded:" $jar_file_name
else
CLASSPATH=$CLASSPATH:$jar_file_name
fi
done
#?ц?spark绋.?
export SPARK_CLASSPATH=$CLASSPATH &&
spark-submit --class com.gridsum.weibo.userId.storage.readTable.Driver --executor-memory 30g \
--total-executor-cores 80 \
--master spark://gs-server-1000:7077 \
weibo-userid-storage-1.3-jar-with-dependencies.jar \
$TMP_INPUT_TABLE \
$DAY \
$OUTPUT_TABLE_DATA_LOCATION \
$OUTPUT_HBASE_TABLE \
$WEIBO_USERDATA_HBASE_TABLE
err=$?
if [ $err != 0 ];then
echo "weibo-userid-storage job failed"
exit 1
fi
##瀵煎.impala
cp ./template/loadweibousereventsdata ./loadweibousereventsdata.tmp
sed -i "s#\$\$day#${DAY}#g" ./loadweibousereventsdata.tmp
sed -i "s#\$\$month#${MONTH}#g" ./loadweibousereventsdata.tmp
sed -i "s#\$\$year#${YEAR}#g" ./loadweibousereventsdata.tmp
impala-shell -f ./loadweibousereventsdata.tmp
err=$?
if [ $err != 0 ];then
echo "loadweibousereventsdata failed"
exit 1
fi
#?..涓存.琛
for i in ${INPUT_ARR[@]}
do
impala-shell -q "drop table $i"
err=$?
if [ $err != 0 ];then
echo "$i drop temp table failed"
exit 1
fi
done
hadoop fs -rm -r $OUTPUT_TABLE_DATA_LOCATION/$DAY
err=$?
if [ $err != 0 ];then
echo "delete $OUTPUT_TABLE_DATA_LOCATION/$DAY failed"
exit 1
fi
param_date=`date +'%Y-%m-%d %H:%M:%S'`
echo "$param_date finish weibo-userid"
THIS_PATH='/home/bigdata/script/weibo-userid'
cd $THIS_PATH
if [ -z "$1" ];then
echo "please input date,The data format is yyyymmdd."
exit 1
fi
DAY=$1
YEAR=${DAY:0:4}
MONTH=${DAY:0:6}
TMP_INPUT_TABLE=weibo.click_tmp,weibo.impression_tmp,weibo.pageview_tmp
OUTPUT_HIVE_TABLE=weibo.WeiboUserEventsData
OUTPUT_HBASE_TABLE=NewWeiboId:CF
WEIBO_USERDATA_HBASE_TABLE=WeiboUserData:CF
OUTPUT_TABLE_DATA_LOCATION=/user/bigdata/weibooutput/weibousereventsdata
param_date=`date +'%Y-%m-%d %H:%M:%S'`
echo "$param_date weibo-userid begin"
#?..涓存.琛
INPUT_ARR=(${TMP_INPUT_TABLE//,/ })
for i in ${INPUT_ARR[@]}
do
cp ./template/$i ./$i.tmp
sed -i "s#\$\$day#${DAY}#g" ./$i.tmp
impala-shell -f ./$i.tmp
err=$?
if [ $err != 0 ];then
echo "$i insert temp table failed"
exit 1
fi
done
#?兼.绫诲?
HBASE_HOME=/opt/cloudera/parcels/CDH/lib/hbase
HIVE_HOME=/opt/cloudera/parcels/CDH/lib/hive
CLASSPATH=.:$HBASE_HOME/conf:$HIVE_HOME/conf
for jar_file_name in ${HBASE_HOME}/lib/*.jar
do
CLASSPATH=$CLASSPATH:$jar_file_name
done
HIVE_EXEC=$HIVE_HOME/lib/hive-exec-*.jar
LANG3=$HIVE_HOME/lib/commons-lang3-*.jar
HIVE_SERD=$HIVE_HOME/lib/hive-serde-*.jar
HIVE_METADATA=$HIVE_HOME/lib/hive-metastore-*.jar
GROOVY=$HIVE_HOME/lib/GROOVY-all-*.jar
for jar_file_name in ${HIVE_HOME}/lib/*.jar
do
if [[ "$jar_file_name" == "$LANG3" || "$jar_file_name" == "$HIVE_EXEC" || "$jar_file_name" == "$HIVE_SERD" || "$jar_file_name" == "$HIVE_METADATA" || "$jar_file_name" == "$GROOVY" ]]; then
echo "excluded:" $jar_file_name
else
CLASSPATH=$CLASSPATH:$jar_file_name
fi
done
#?ц?spark绋.?
export SPARK_CLASSPATH=$CLASSPATH &&
spark-submit --class com.gridsum.weibo.userId.storage.readTable.Driver --executor-memory 30g \
--total-executor-cores 80 \
--master spark://gs-server-1000:7077 \
weibo-userid-storage-1.3-jar-with-dependencies.jar \
$TMP_INPUT_TABLE \
$DAY \
$OUTPUT_TABLE_DATA_LOCATION \
$OUTPUT_HBASE_TABLE \
$WEIBO_USERDATA_HBASE_TABLE
err=$?
if [ $err != 0 ];then
echo "weibo-userid-storage job failed"
exit 1
fi
##瀵煎.impala
cp ./template/loadweibousereventsdata ./loadweibousereventsdata.tmp
sed -i "s#\$\$day#${DAY}#g" ./loadweibousereventsdata.tmp
sed -i "s#\$\$month#${MONTH}#g" ./loadweibousereventsdata.tmp
sed -i "s#\$\$year#${YEAR}#g" ./loadweibousereventsdata.tmp
impala-shell -f ./loadweibousereventsdata.tmp
err=$?
if [ $err != 0 ];then
echo "loadweibousereventsdata failed"
exit 1
fi
#?..涓存.琛
for i in ${INPUT_ARR[@]}
do
impala-shell -q "drop table $i"
err=$?
if [ $err != 0 ];then
echo "$i drop temp table failed"
exit 1
fi
done
hadoop fs -rm -r $OUTPUT_TABLE_DATA_LOCATION/$DAY
err=$?
if [ $err != 0 ];then
echo "delete $OUTPUT_TABLE_DATA_LOCATION/$DAY failed"
exit 1
fi
param_date=`date +'%Y-%m-%d %H:%M:%S'`
echo "$param_date finish weibo-userid"
0 0
- 部署微博自动爬取组件
- eclipse使用Ant组件自动部署项目到tomcat6.0
- GIT实现自动拉取代码(可实现自动部署)
- webclient 爬取网页 禁止自动跳转
- Python Scrapy 学习----自动爬取网页
- 爬取小说并且自动下载
- 编写自动爬取网页的爬虫
- 自动爬取网站并截图
- 数据库组件 Hxj.Data (三十)(Asp.Net 页面自动赋值与取值)
- python自动投票源码(自动爬取更换ip)
- 如果制作一个web项目安装部署包,并自动安装一些必备的组件?
- 如何部署flex组件
- wordpress 自动取图
- Tfs 自动部署 部署图
- liferay 热部署、自动部署
- Linux系统下tomcat自动拉取代码打包发布的配置部署和脚本编写(转朋友)
- Python可自动登录爬取图片的网络爬虫
- python简单爬取热门文字段子并自动浏览
- Windows 7 x64系统安装CRFsuite给Python使用
- Antlr v4入门教程和实例
- 闲聊DTD语法(二)
- android 接口回调
- 设计模式思考----适配器模式
- 部署微博自动爬取组件
- 新浪微博开发之二十八(原创微博frame)
- ROS新兵上路(二)ROS框架与文件系统
- 黑马程序员————浅谈面向对象
- 【机房收费系统】泛型与Datatable
- 使用 SharedPreferences 读写数据
- leetcode Next Permutation
- hdu 1203 I NEED A OFFER!
- 4月英语——just say it