Hadoop Mapper 阶段将数据直接从 HDFS 导入 Hbase
来源:互联网 发布:ubuntu 桌面假死 编辑:程序博客网 时间:2024/05/22 03:17
数据源格式如下:
1
20130512
1
-
1
-
1
13802
1
2013
-
05
-
12
07
:
26
:
22
2
20130512
1
-
1
-
1
13802
1
2013
-
05
-
12
11
:
18
:
24
代码如下:
01
package
WebsiteAnalysis;
02
03
import
java.io.IOException;
04
05
import
org.apache.hadoop.conf.Configuration;
06
import
org.apache.hadoop.fs.Path;
07
import
org.apache.hadoop.hbase.HBaseConfiguration;
08
import
org.apache.hadoop.hbase.KeyValue;
09
import
org.apache.hadoop.hbase.client.Put;
10
import
org.apache.hadoop.hbase.io.ImmutableBytesWritable;
11
import
org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
12
import
org.apache.hadoop.hbase.util.Bytes;
13
import
org.apache.hadoop.io.LongWritable;
14
import
org.apache.hadoop.io.Text;
15
import
org.apache.hadoop.io.Writable;
16
import
org.apache.hadoop.mapreduce.Job;
17
import
org.apache.hadoop.mapreduce.Mapper;
18
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
19
import
org.apache.hadoop.util.GenericOptionsParser;
20
21
public
class
Map2Hdfs {
22
public
static
final
String NAME =
"ImportFromFile"
;
23
24
public
enum
Counters {
25
LINES
26
}
27
28
static
class
ImportMapper
extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Writable> {
29
private
byte
[] family =
null
;
30
private
byte
[] qualifier =
null
;
31
32
@Override
33
protected
void
setup(Context context)
throws
IOException, InterruptedException {
34
String column = context.getConfiguration().get(
"conf.column"
);
35
byte
[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column));
36
family = colkey[
0
];
37
if
(colkey.length >
1
) {
38
qualifier = colkey[
1
];
39
}
40
}
41
42
@Override
43
public
void
map(LongWritable offset, Text line, Context context)
throws
IOException {
44
try
{
45
String[] lineArr = line.toString().split(
"\t"
);
46
Put put =
new
Put(Bytes.toBytes(offset +
""
));
47
put.add(family, Bytes.toBytes(
"time"
), Bytes.toBytes(lineArr[lineArr.length -
1
]));
48
context.write(
new
ImmutableBytesWritable(Bytes.toBytes(offset +
""
)), put);
49
context.getCounter(Counters.LINES).increment(
1
);
50
}
catch
(Exception e) {
51
e.printStackTrace();
52
}
53
}
54
}
55
56
public
static
void
main(String[] args)
throws
Exception {
57
Configuration conf = HBaseConfiguration.create();
58
String[] otherArgs =
new
GenericOptionsParser(conf, args).getRemainingArgs();
59
conf.set(
"conf.column"
,
"cf"
);
60
String inputPath =
"/dsap/middata/lj/ooxx/pv"
;
61
Job job =
new
Job(conf,
"TestMap2Hdfs"
);
62
63
job.setJarByClass(Map2Hdfs.
class
);
64
job.setMapperClass(ImportMapper.
class
);
65
job.setOutputFormatClass(TableOutputFormat.
class
);
66
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE,
"TestMap2Hdfs"
);
67
job.setOutputKeyClass(ImmutableBytesWritable.
class
);
68
job.setOutputValueClass(Writable.
class
);
69
job.setNumReduceTasks(
0
);
70
FileInputFormat.addInputPath(job,
new
Path(inputPath +
"/"
+ otherArgs[
0
]));
71
System.exit(job.waitForCompletion(
true
) ?
0
:
1
);
72
}
73
}
http://stackoverflow.com/questions/11061854/hadoop-writing-to-hbase-directly-from-the-mapper
http://blog.sina.com.cn/s/blog_62a9902f0101904h.html 新建表的方式写入
hbase-hdfs MapReduce 数据读写总结
http://blog.pureisle.net/archives/1938.html hbase hdfs MR 读写的几种情况总结
http://blog.csdn.net/kirayuan/article/details/7001278 hbase表拷贝样例代码
0 0
- Hadoop Mapper 阶段将数据直接从 HDFS 导入 Hbase
- Hadoop Mapper 阶段将数据直接从 HDFS 导入 Hbase
- Hadoop Mapper 阶段将数据直接从 HDFS 导入 Hbase
- Hadoop Mapper 阶段将数据直接从 HDFS 导入 Hbase数据源格式如下:
- Hadoop MapReduce将HDFS文本数据导入HBase
- 将HDFS中的数据导入HBase
- Spark将HDFS数据导入到HBase
- Spark将HDFS数据导入到HBase
- Spark将HDFS数据导入到HBase
- 从HDFS导入数据到HBASE
- Hadoop 0.20.2+Sqoop 1.44+HBase 0.94从MySql往HDFS导入数据报错
- MapReduce将HDFS文本数据导入HBase中
- 将Hdfs数据往Hbase表中导入
- 1007-使用MapReduce把数据从HDFS导入到HBase
- Sqoop2 从MySQL导入数据到Hadoop HDFS
- 利用SQOOP将数据从数据库导入到HDFS
- 利用SQOOP将数据从数据库导入到HDFS
- 利用SQOOP将数据从数据库导入到HDFS
- HBase 写优化之 BulkLoad 实现数据快速入库
- C语言运算符优先级和口诀
- 一网打尽2013最常用的NoSQL数据库
- 17--2--3去除指定字符
- Android rtsp流媒体音视频帧的处理流程
- Hadoop Mapper 阶段将数据直接从 HDFS 导入 Hbase
- MySQL逗号分割字段的行列转换技巧
- hbase shell 基础和常用命令详解
- Eclipse :Access restriction: The type JPEGImageEncoder is not accessible due to restriction on requi
- [Eclipse]_[显示gc按钮减少内存占用]
- java中集合的遍历
- Hbase 介绍
- 【IOS基础知识】格式输出
- NSInvocationOperation的cancelAllOperations不会取消正在运行的operation