部署微博自动爬取组件

来源:互联网 发布:mac已安装软件桌面 编辑:程序博客网 时间:2024/05/21 14:45
#!/bin/bash
THIS_PATH='/home/bigdata/script/weibo-userid'
cd $THIS_PATH


if [ -z "$1" ];then
   echo "please input date,The data format is yyyymmdd."
   exit 1
fi 
DAY=$1
YEAR=${DAY:0:4}
MONTH=${DAY:0:6}


TMP_INPUT_TABLE=weibo.click_tmp,weibo.impression_tmp,weibo.pageview_tmp


OUTPUT_HIVE_TABLE=weibo.WeiboUserEventsData
OUTPUT_HBASE_TABLE=NewWeiboId:CF
WEIBO_USERDATA_HBASE_TABLE=WeiboUserData:CF
OUTPUT_TABLE_DATA_LOCATION=/user/bigdata/weibooutput/weibousereventsdata


param_date=`date  +'%Y-%m-%d %H:%M:%S'`
echo "$param_date weibo-userid begin"


#?..涓存.琛
INPUT_ARR=(${TMP_INPUT_TABLE//,/ })


for i in ${INPUT_ARR[@]}
do  
  cp ./template/$i ./$i.tmp
  sed -i "s#\$\$day#${DAY}#g" ./$i.tmp 
  impala-shell -f ./$i.tmp
  err=$?
  if [ $err != 0 ];then
     echo "$i insert temp  table failed"
     exit 1
  fi
done 


#?兼.绫诲?
HBASE_HOME=/opt/cloudera/parcels/CDH/lib/hbase
HIVE_HOME=/opt/cloudera/parcels/CDH/lib/hive
CLASSPATH=.:$HBASE_HOME/conf:$HIVE_HOME/conf
for jar_file_name in ${HBASE_HOME}/lib/*.jar
do
  CLASSPATH=$CLASSPATH:$jar_file_name
done
HIVE_EXEC=$HIVE_HOME/lib/hive-exec-*.jar
LANG3=$HIVE_HOME/lib/commons-lang3-*.jar
HIVE_SERD=$HIVE_HOME/lib/hive-serde-*.jar
HIVE_METADATA=$HIVE_HOME/lib/hive-metastore-*.jar
GROOVY=$HIVE_HOME/lib/GROOVY-all-*.jar
for jar_file_name in ${HIVE_HOME}/lib/*.jar
do
 if [[ "$jar_file_name" == "$LANG3" || "$jar_file_name" == "$HIVE_EXEC" || "$jar_file_name" == "$HIVE_SERD" || "$jar_file_name" == "$HIVE_METADATA" || "$jar_file_name" == "$GROOVY" ]]; then
   echo "excluded:" $jar_file_name
 else
   CLASSPATH=$CLASSPATH:$jar_file_name
 fi
done


#?ц?spark绋.?
export SPARK_CLASSPATH=$CLASSPATH &&
 spark-submit --class com.gridsum.weibo.userId.storage.readTable.Driver --executor-memory 30g \
 --total-executor-cores 80 \
 --master spark://gs-server-1000:7077 \
weibo-userid-storage-1.3-jar-with-dependencies.jar \
$TMP_INPUT_TABLE \
$DAY \
$OUTPUT_TABLE_DATA_LOCATION \
$OUTPUT_HBASE_TABLE \
$WEIBO_USERDATA_HBASE_TABLE


err=$?
if [ $err != 0 ];then
  echo "weibo-userid-storage job failed"
  exit  1
fi


##瀵煎.impala
cp ./template/loadweibousereventsdata ./loadweibousereventsdata.tmp
sed -i "s#\$\$day#${DAY}#g" ./loadweibousereventsdata.tmp
sed -i "s#\$\$month#${MONTH}#g" ./loadweibousereventsdata.tmp
sed -i "s#\$\$year#${YEAR}#g" ./loadweibousereventsdata.tmp
impala-shell -f ./loadweibousereventsdata.tmp
err=$?
if [ $err != 0 ];then
   echo "loadweibousereventsdata failed"
   exit 1
fi




#?..涓存.琛
for i in ${INPUT_ARR[@]}
do
  impala-shell -q "drop table $i"
  err=$?
  if [ $err != 0 ];then
     echo "$i drop temp table failed"
     exit 1
  fi
done


hadoop fs -rm -r $OUTPUT_TABLE_DATA_LOCATION/$DAY
err=$?
if [ $err != 0 ];then
   echo "delete $OUTPUT_TABLE_DATA_LOCATION/$DAY  failed"
   exit 1
fi


param_date=`date  +'%Y-%m-%d %H:%M:%S'`
echo "$param_date finish  weibo-userid"
0 0
原创粉丝点击