hadoop实例
来源:互联网 发布:sql游标的用法 编辑:程序博客网 时间:2024/06/05 14:54
#!/bin/bashsource "../yew_functions.sh"hdp_input="/file/stat.bz2";hdp_output="/user/out";hadoop="$HADOOP_HOME/bin/hadoop"$hadoop fs -rmr $hdp_outputmapper_cmd="python27/bin/python mapper.py"mapper_file="parseuv_mapper.py"reducer_cmd="python27/bin/python parseuv_reducer.py"reducer_file="reducer.py"$hadoop jar $HADOOP_STREAMING_HOME/$STREAMING_JAR \ -D mapred.job.name="[test]" \ -D mapred.reduce.tasks="1" \ -cacheArchive "${PYTHON_LIB}/python27.tar.gz#python27" \ -mapper "${mapper_cmd}" \ -reducer "${reducer_cmd}" \ -input "${hdp_input}" \ -output "${hdp_output}" \ -file "${mapper_file}" \ -file "${reducer_file}"
# coding:utf8import sys, redic = {}for line in sys.stdin: line = line.strip() cols = line.split() for item in cols: if item.startswith("id"): key_value = item.split(":") if(len(key_value) != 2): break uid = key_value[1] print uid break
# coding:utf8import syscnt=0current_uid = ""dic = {}for line in sys.stdin: line = line.strip() if current_uid != line: cnt += 1 current_uid = lineprint "Number of records:%s" % (cnt)
0 0
- hadoop实例
- Hadoop RPC 实例
- Hadoop排序实例
- hadoop wordcount运行实例
- 五、Hadoop+HBase实例
- hadoop实例 RandomWriter
- hadoop实例sort
- Hadoop实战实例
- Hadoop RPC 实例
- Hadoop实战实例
- hadoop实例之HELLOWORLD
- hadoop实例之HELLOWORLD
- Hadoop RPC 实例
- hadoop MapReduce实例解析
- hadoop Partition使用实例
- Hadoop RPC 实例
- Hadoop-MultipleInputs实例<转>
- hadoop 进阶实例
- POJ 3083 Children of the Candy Corn
- Socket.IO进阶
- css 背景图片自适应元素大小
- Solr In Action 中文版 第一章(三)
- HDU 1166 敌兵布阵
- hadoop实例
- 腾讯互娱旗下工作室一览
- 位运算
- linux-android移植开发
- maven 与jdk相关的报错 1.3……
- POJ-2192-Polygon
- requestWindowFeature使用详解
- ubuntu升级firefox
- 一键获取所有网址前面的小标志