word_count

来源:互联网 发布:淘宝商城保健品 编辑:程序博客网 时间:2024/05/21 09:59
#!/bin/bashHADOOP_HOME=/usr/local/webserver/hadoopINPUT_PATH=/data/archive/app_oeudjgn5872a7c3aaa54_datamine/george/inputsINPUT_PATH2=/user/resys/projects/image/koudai_img/d_img/reduced/2015-taobaoOUTPUT_PATH=/data/archive/app_oeudjgn5872a7c3aaa54_datamine/george/output/may30#OUTPUT_PATH=output#echo "Clearing output path: $OUTPUT_PATH"$HADOOP_HOME/bin/hadoop fs -rmr $OUTPUT_PATH${HADOOP_HOME}/bin/hadoop jar\   ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-streaming-2.6.0.jar\  -file mapper1.py\  -file reducer1.py\  -mapper mapper1.py\  -reducer reducer1.py\  -input $INPUT_PATH\  -input $INPUT_PATH2\  -output $OUTPUT_PATH\  -jobconf mapreduce.jobtracker.split.metainfo.maxsize=-1\  -jobconf mapred.job.queue.name="offline"\  -jobconf mapred.job.priority="NORMAL"\  -jobconf mapred.reduce.tasks="10"\
0 0
原创粉丝点击