用MapReduce把hdfs数据写入HBase中
来源:互联网 发布:大月薰 知乎 编辑:程序博客网 时间:2024/05/21 22:28
1.使用Map+Reduce方式
public class MapReduceImport { /** * Mapper */ static class HMapper extends Mapper<LongWritable, Text, LongWritable, Text> { Text v2 = new Text(); protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException { String[] splited = value.toString().split(" "); if (splited.length != 6) return; try { v2.set(GetRowKey.getRowKeyString(splited[2], splited[4]) + " " + value.toString()); context.write(key, v2); } catch (NumberFormatException e) { System.out.println("出错了" + e.getMessage()); } } } /** * Reducer */ static class HReducer extends TableReducer<LongWritable, Text, NullWritable> { protected void reduce(LongWritable key, java.lang.Iterable<Text> values, Context context) throws java.io.IOException, InterruptedException { for (Text text : values) { String[] splited = text.toString().split(" "); Put put = new Put(Bytes.toBytes(splited[0])); for (int j = 1; j < splited.length; j++) { put.addColumn(Bytes.toBytes(HConfiguration.colFamily), Bytes.toBytes("log" + j), Bytes.toBytes(splited[j])); } context.write(NullWritable.get(), put); } } } /** * Main * * @param args * @throws Exception */ public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); configuration.set("hbase.zookeeper.quorum", HConfiguration.hbase_zookeeper_quorum); configuration.set("hbase.zookeeper.property.clientPort", "2181"); configuration.set(TableOutputFormat.OUTPUT_TABLE, HConfiguration.tableName); configuration.set("dfs.socket.timeout", "180000"); MRDriver myDriver = MRDriver.getInstance(); try { myDriver.createTableIfExistDelete(HConfiguration.tableName, HConfiguration.colFamily); } catch (Exception e) { e.printStackTrace(); } Job job = new Job(configuration, "Map+ReduceImport"); job.setMapperClass(HMapper.class); job.setReducerClass(HReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TableOutputFormat.class); FileInputFormat.setInputPaths(job, HConfiguration.mapreduce_inputPath); job.waitForCompletion(true); }}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
———————-分—-割—-线————————–
2.只使用Map的方式
public class OnlyMapImport { /** * Mapper */ static class ImportMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> { @Override public void map(LongWritable offset, Text value, Context context) { String[] splited = value.toString().split(" "); if (splited.length != 6) return; try { byte[] rowkey = Bytes.toBytes(GetRowKey.getRowKeyString(splited[2], splited[4])); Put put = new Put(rowkey); for (int j = 0; j < splited.length; j++) { put.addColumn(Bytes.toBytes(HConfiguration.colFamily), Bytes.toBytes("log" + j), Bytes.toBytes(splited[j])); } context.write(new ImmutableBytesWritable(rowkey), put); } catch (NumberFormatException e) { System.out.println("出错了" + e.getMessage()); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } } /** * Main * * @param args * @throws Exception */ public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); configuration.set("hbase.zookeeper.quorum", HConfiguration.hbase_zookeeper_quorum); configuration.set("hbase.zookeeper.property.clientPort", "2181"); configuration.set(TableOutputFormat.OUTPUT_TABLE, HConfiguration.tableName); configuration.set("dfs.socket.timeout", "180000"); MRDriver myDriver = MRDriver.getInstance(); try { myDriver.createTableIfExistDelete(HConfiguration.tableName, HConfiguration.colFamily); } catch (Exception e) { e.printStackTrace(); } Job job = new Job(configuration, "HBaseBatchImport"); job.setJarByClass(OnlyMapImport.class); job.setMapperClass(ImportMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Writeable.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TableOutputFormat.class); FileInputFormat.setInputPaths(job, HConfiguration.mapreduce_inputPath); job.waitForCompletion(true); }}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
经过测试,导入时间明显减少。
1 0