hadoop getfile exercise

来源:互联网 发布:红警mac版 迅雷下载 编辑:程序博客网 时间:2024/05/16 11:26

#!/bin/sh

#Hostname Prefix
localPath="/work/tda/yunion/unionfactfile"
hadoopPath="/workspace/tda/yunion/thive"
hadoop="/opt/hadoop-1.0.3/bin/hadoop"
hive="/opt/hadoop-1.0.3/hive/bin/hive"

#格式化日期.
function formatDate()
{
    d=$1
    echo `echo ${d}|awk '{print substr($0,1,4)"-"substr($0,5,2)"-"substr($0,7,2)}'`
}

#设置全局变量.
export hadoop=${hadoop};
export hive=${hive}


fdate=`date -d yesterday +%F`
date=`date -d yesterday +%Y%m%d`
if [ $#  -eq  "1" ]
then
    date=$1
    fdate=`formatDate ${date}`
    echo $date $fdate
fi
N=0;
N2=0;
cd $localPath
rm -rf ${date}; mkdir -p ${date}
for file_tmp in `${hadoop} fs -ls /workspace/tda/yunion/thive/*|grep f_ |grep ${date} `;do
    let "N++"
    if [ $(($N%8)) == 0 ]
    then
        let "num++"
        echo $file_tmp
        for file in `${hadoop} fs -ls ${file_tmp}/* |grep 000`;do
            let "N2++"
            if [ $(($N2%8)) == 0 ]
            then
                fileName=`echo $file |awk -F"/" '{print $6}'`
                fileName1=`echo $fileName|awk '{printf("%s",tolower($0))}' `
                echo $num ".." $file $fileName1
            #/tudou/pvOutput/20120109/searchnum/part-00000
            ${hadoop} fs -cat ${file} |awk -F"\t" '$1=='${date}'{print $0}' |awk -F"\t" '$NF>0 && $0 !~/\\N/ && $0 !~/"/{print $0} '  >> ${date}/${fileName1}
            fi
        done
    fi
done

#tar czvf ${date}.tgz ${date}/
#sz ${date}.tgz

0 0
原创粉丝点击