重复ip(单词计数)不同语言实现对比
来源:互联网 发布:黑米软件官方 编辑:程序博客网 时间:2024/06/06 10:39
Shell
ip.txt
192.168.0.1 zhangxc192.168.0.1 zhangxc1192.168.0.1 zhangxc3192.168.0.2 zhangc192.168.0.2 zhangc1192.168.0.3 zhangc192.168.0.3 zhangc192.168.0.3 zhangxc192.168.0.3 zhangxc192.168.0.0 zhang192.168.0.5 zhang192.168.0.0 zhang192.168.0.0 zhang192.168.0.0 zhang2
awk '{a[$1]++} END{for(i in a)print i,a[i]}' ip.txt
Python
a.log
#111.172.249.84 - - [12/Dec/2011:05:33:36 +0800] "GET /images/i/goTop.png HTTP/1.0" 200 486 "http://wh.xxxx.com/" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)" #111.172.249.84 - - [12/Dec/2011:05:33:36 +0800] "GET /images/i/goTop.png HTTP/1.0" 200 486 "http://wh.xxxx.com/" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)" #111.172.249.85 - - [12/Dec/2011:05:33:36 +0800] "GET /images/i/goTop.png HTTP/1.0" 200 486 "http://wh.xxxx.com/" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)" #111.172.249.86 - - [12/Dec/2011:05:33:36 +0800] "GET /images/i/goTop.png HTTP/1.0" 200 486 "http://wh.xxxx.com/" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)"
import rearr={}f=open("./a.log","r")lines = f.readlines()for line in lines: ipaddress=re.compile(r'^#(((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?))') match=ipaddress.match(line) if match: ip = match.group(1) if(arr.has_key(ip)): arr[ip]+=1 else: arr.setdefault(ip,1)print arrf.close()for key in arr: print key+" "+str(arr[key])
Java
package p;import java.io.BufferedReader;import java.io.FileReader;import java.util.HashMap;import java.util.Map;/** * 读文件 * 存数据 * 计数 * 打印结果 */public class T {// ip, 访问次数cntstatic Map<String, Integer> map = new HashMap<String, Integer>();public static void main(String[] args) throws Exception {FileReader fr = new FileReader("F://ip.txt");BufferedReader br = new BufferedReader(fr);String str = null;while ((str = br.readLine()) != null) {String[] split = str.split(" ");String key = split[0];Integer value = map.get(key);if (value == null) {map.put(key, 1);} else {value++;map.put(key, value);}}System.out.println(map);br.close();fr.close();}}
Hadoop MapReduce
package org.apache.hadoop.examples;import java.io.IOException;import java.io.PrintStream;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Mapper.Context;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.Reducer.Context;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordCount{ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit((job.waitForCompletion(true)) ? 0 : 1); } public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result; public IntSumReducer() { this.result = new IntWritable(); } public void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) sum += val.get(); this.result.set(sum); context.write(key, this.result); } } public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> { private static final IntWritable one = new IntWritable(1); private Text word; public TokenizerMapper() { this.word = new Text(); } public void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { this.word.set(itr.nextToken()); context.write(this.word, one); } } }}
Spark
scala> val file = sc.textFile("hdfs://bigdata1:9000/wordcount/wc_in/test1.txt")scala> val count = file.flatMap(line => line.split("\t")).map(word => (word, 1)).reduceByKey(_+_)scala> count.collect()scala> count.saveAsTextFile("hdfs://bigdata1:9000/wordcount/wc_out6")
参考url
http://blog.csdn.net/jiedushi/article/details/7403365
0 0
- 重复ip(单词计数)不同语言实现对比
- C语言二叉排序树单词计数程序实现
- c语言 单词计数
- storm trident实现单词计数
- Spark实现WordCount单词计数
- C语言实现对输入中的行数,单词个数,字符的个数进行计数
- C语言用二叉树实现对输入各个单词的个数进行计数
- 计数排序算法(C语言实现)
- python实现单词计数的mapreduce
- 013-通过trident实现单词计数功能
- scala 两种方法实现单词计数
- spark1.6.1入门api实现单词计数
- 使用Scala实现文件单词计数
- Linux 命令实现单词计数功能
- 单词翻转(C语言实现)
- 单词计数
- 单词计数
- 单词计数
- <初级>安卓中事件点击的几种方式
- 数据结构 — 6.顺序队列(循环)实现二叉树层次遍历
- 因滚动条出现而导致页面晃动的解决方案
- Android中Touch事件分发过程全解析
- 由于应用程序配置不正确,应用程序未能启动。重新安装应用程序可能会纠正这个问题
- 重复ip(单词计数)不同语言实现对比
- OOD软件架构设计原则
- mysql通过sql文件创建数据库
- SMW0:上传EXCEL模板
- Objective-c学习笔记之集合
- 好的事情诡异诡异诡异诡异英语
- 欢迎使用CSDN-markdown编辑器
- ios开发弹框提示(适配系统7,8,9)
- Leetcode || Roman to Integer