MapReduce编程之倒排索引
来源:互联网 发布:华为荣耀5a手机壳淘宝 编辑:程序博客网 时间:2024/05/17 02:22
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import
java.io.IOException;
import
java.util.StringTokenizer;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public
class
Test2 {
enum
Counter
{
LINESKIP,
//记录出错的行
}
public
static
class
Map
extends
Mapper<LongWritable, Text, Text, Text>{
public
void
map(LongWritable key, Text value, Context context)
throws
IOException, InterruptedException {
String line = value.toString();
//读取源数据
try
{
//数据处理
String [] lineSplit = line.split(
" "
);
//18661629496,110
String anum = lineSplit[
0
];
String bnum = lineSplit[
1
];
//输出格式:110,18661629496
context.write(
new
Text(bnum),
new
Text(anum));
}
catch
(ArrayIndexOutOfBoundsException e)
{
context.getCounter(Counter.LINESKIP).increment(
1
);
//出错时计数器+1
return
;
}
}
}
public
static
class
Reduce
extends
Reducer<Text, Text, Text, Text> {
public
void
reduce(Text key, Iterable<Text> values, Context context)
throws
IOException, InterruptedException {
String valueString;
String out=
""
;
for
(Text value:values)
{
valueString=value.toString();
out+=valueString+
"|"
;
}
context.write(key,
new
Text(out));
}
}
public
static
void
main(String[] args)
throws
Exception {
Configuration conf =
new
Configuration();
if
(args.length !=
2
) {
System.err.println(
"请配置输入输出路径 "
);
System.exit(
2
);
}
//各种配置
Job job =
new
Job(conf,
"telephone "
);
//作业名称配置
//类配置
job.setJarByClass(Test2.
class
);
job.setMapperClass(Map.
class
);
job.setReducerClass(Reduce.
class
);
//map输出格式配置
job.setMapOutputKeyClass(Text.
class
);
job.setMapOutputValueClass(Text.
class
);
//作业输出格式配置
job.setOutputKeyClass(Text.
class
);
job.setOutputValueClass(Text.
class
);
//增加输入输出路径
FileInputFormat.addInputPath(job,
new
Path(args[
0
]));
FileOutputFormat.setOutputPath(job,
new
Path(args[
1
]));
//任务完成时退出
System.exit(job.waitForCompletion(
true
) ?
0
:
1
);
}
}
0 0
- MapReduce 编程之 倒排索引
- MapReduce编程之倒排索引
- MapReduce编程之倒排索引的实现
- MapReduce编程实例之倒排索引 1
- MapReduce编程之倒排索引的实现
- MapReduce编程——倒排索引
- MapReduce编程(七) 倒排索引构建
- MapReduce--倒排索引
- mapreduce--倒排索引
- Hadoop之MapReduce-倒排索引案例
- mapreduce之倒排索引代码
- 浅谈MapReduce之倒排索引
- mapreduce实现倒排索引
- MapReduce实现倒排索引
- mapreduce实现倒排索引
- MapReduce倒排索引概要
- MapReduce实战--倒排索引
- MapReduce倒排索引实现
- [050] 微信公众平台开发入门视频教程已发布
- background-size属性
- 大家好
- 解决UISearchDisplayController搜索结果滚动时,顶部透明的问题
- 对于JAVAWeb开发的最新解
- MapReduce编程之倒排索引
- centos7配置网卡
- [051] 微信公众平台开发教程第22篇-如何保证access_token长期有效
- linux2.6 makefiles.txt学习及实例分析
- flexbuilder 4.6破解
- 天声人語
- Java实现的堆排序算法
- 【北大青鸟视频学习】---过渡知识小总
- 深度学习笔记三:Softmax Regression