hadoop2.6数据导入elasticsearch2.2(解析hbase导出数据)
来源:互联网 发布:数据库 migration 编辑:程序博客网 时间:2024/06/10 18:32
参考网址:
https://www.elastic.co/guide/en/elasticsearch/hadoop/current/mapreduce.html
1.下载依赖jar
elasticsearch-hadoop2.2.0.jar这个从私服下载吧。
2.数据流向是:
hbase导出数据-》hdfs-》es2
3.以下直接粘贴代码
<span style="font-weight: bold;"></span>import java.util.Map.Entry;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.mapreduce.MutationSerialization;import org.apache.hadoop.hbase.mapreduce.ResultSerialization;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;import org.apache.hadoop.util.GenericOptionsParser;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;import org.elasticsearch.hadoop.mr.EsOutputFormat;import org.elasticsearch.hadoop.mr.LinkedMapWritable;public class MyJob extends Configured implements Tool {@Overridepublic int run(String[] args) throws Exception {Path input = new Path(args[0]);Configuration conf = getConf(); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName());//如果是hbase0.9导出数据就不需要这个参数了,1以上需要//conf.set("es.nodes", "host228"); // index or indices used for storing dataconf.set("es.port", "9200"); // index or indices used for storing data//conf.set("es.resource", "ehlindex/tr_plate"); // index or indices used for storing dataGenericOptionsParser parser = new GenericOptionsParser(conf, args); for (Entry<String, String> entry : conf) { System.out.printf("%s=%s\n", entry.getKey(), entry.getValue()); } Job job = Job.getInstance(conf, "hfile 2 es");job.setJarByClass(MyJob.class);FileInputFormat.addInputPath(job, input);job.setInputFormatClass(SequenceFileInputFormat.class);job.setOutputFormatClass(EsOutputFormat.class);job.setMapOutputValueClass(LinkedMapWritable.class); job.setNumReduceTasks(0);job.setMapperClass(MyMaper.class);return job.waitForCompletion(true)?0:1;}public static void main(String[] args) throws Exception {int run = ToolRunner.run( new MyJob(), args);System.exit(run);}}<strong></strong>
<span style="font-weight: bold;"></span>import java.io.IOException;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import org.elasticsearch.hadoop.mr.LinkedMapWritable;import com.ehl.im.transfer.TRFieldEnum;import com.ehl.im.transfer.TravelRecord;public class MyMaper extends Mapper<ImmutableBytesWritable, Result, NullWritable, LinkedMapWritable>{ protected void map(ImmutableBytesWritable key, Result value,Context context) throws IOException, InterruptedException {if("true".equals( context.getConfiguration().get("notinsert"))){return ;} try {LinkedMapWritable linkObj = result2Map(value);context.write(NullWritable.get(), linkObj);} catch (Exception e) {e.printStackTrace();} } private LinkedMapWritable result2Map(Result r){ LinkedMapWritable linkObj = new LinkedMapWritable();byte[] passCarRowValue = r.getValue("cf".getBytes(), null);TravelRecord record = new TravelRecord(passCarRowValue);linkObj.put(new Text("timestamp"),new LongWritable(Long.valueOf(record.getStringValue(TRFieldEnum.TIMESTAMP))));linkObj.put(new Text("car_plate_number"),new Text(record.getStringValue(TRFieldEnum.CAR_PLATE_NUMBER)));try {linkObj.put(new Text("carplateindex"),new Text(CarPlateCommonUtil.produceCarPlateIndexStr( record.getStringValue(TRFieldEnum.CAR_PLATE_NUMBER))) );} catch (Exception e) {e.printStackTrace();}linkObj.put(new Text("speed"), new LongWritable(Long.valueOf(record.getStringValue(TRFieldEnum.SPEED))));linkObj.put(new Text("lane_id"),new Text(record.getStringValue(TRFieldEnum.LANE_ID)));linkObj.put(new Text("camera_location"),new Text(record.getStringValue(TRFieldEnum.CAMERA_LOCATION)));linkObj.put(new Text("bay_id"),new Text(record.getStringValue(TRFieldEnum.BAY_ID)));linkObj.put(new Text("camera_orientation"),new Text(record.getStringValue(TRFieldEnum.CAMERA_ORIENTATION)));linkObj.put(new Text("car_brand"),new Text(record.getStringValue(TRFieldEnum.CAR_BRAND)));linkObj.put(new Text("car_color"),new Text(record.getStringValue(TRFieldEnum.CAR_COLOR)));linkObj.put(new Text("car_plate_color"),new Text(record.getStringValue(TRFieldEnum.CAR_PLATE_COLOR)));linkObj.put(new Text("car_plate_type"),new Text(record.getStringValue(TRFieldEnum.CAR_PLATE_TYPE)));linkObj.put(new Text("car_status"),new Text(record.getStringValue(TRFieldEnum.CAR_STATUS)));linkObj.put(new Text("travel_orientation"),new Text(record.getStringValue(TRFieldEnum.TRAVEL_ORIENTATION)));linkObj.put(new Text("plate_coordinates"),new Text(record.getStringValue(TRFieldEnum.PLATE_COORDINATES)));linkObj.put(new Text("driver_coordinates"),new Text(record.getStringValue(TRFieldEnum.DRIVER_COORDINATES)));String[] imgUrls = record.getStringArrayValue(TRFieldEnum.IMAGE_URLS);if (imgUrls != null) {if (imgUrls.length >= 1 && imgUrls[0] != null && !"".equals(imgUrls[0])) {linkObj.put(new Text("tp1"),new Text(imgUrls[0]));}if (imgUrls.length >= 2 && imgUrls[1] != null && !"".equals(imgUrls[1])) {linkObj.put(new Text("tp2"),new Text(imgUrls[1]));}if (imgUrls.length >= 3 && imgUrls[2] != null && !"".equals(imgUrls[2])) {linkObj.put(new Text("tp3"),new Text(imgUrls[2]));}}return linkObj; } }<strong></strong>
以下是建立es的索引
curl -XPOST host213:9200/ehlindex -d '{ "settings" : { "number_of_shards" : 20,"number_of_replicas" : 0 }, "mappings" : { "tr_plate" : { "properties" : { "timestamp" : { "type" : "long", "index" : "not_analyzed" } , "car_plate_number" : { "type" : "string", "index" : "not_analyzed" } , "speed" : { "type" : "long", "index" : "not_analyzed" } , "lane_id" : { "type" : "string", "index" : "not_analyzed" } , "camera_location" : { "type" : "string", "index" : "not_analyzed" } , "bay_id" : { "type" : "string", "index" : "not_analyzed" } , "camera_orientation" : { "type" : "string", "index" : "not_analyzed" } , "car_brand" : { "type" : "string", "index" : "not_analyzed" } ,"car_color" : { "type" : "string", "index" : "not_analyzed" } ,"car_plate_color" : { "type" : "string", "index" : "not_analyzed" } ,"car_plate_type" : { "type" : "string", "index" : "not_analyzed" } ,"tp1" : { "type" : "string", "index" : "not_analyzed" } ,"tp2" : { "type" : "string", "index" : "not_analyzed" } ,"tp3" : { "type" : "string", "index" : "not_analyzed" } ,"car_status" : { "type" : "string", "index" : "not_analyzed" } ,"travel_orientation" : { "type" : "string", "index" : "not_analyzed" } ,"plate_coordinates" : { "type" : "string", "index" : "not_analyzed" } ,"driver_coordinates" : { "type" : "string", "index" : "not_analyzed" } , "carplateindex" : { "type" : "string", "index" : "analyzed" } } } } }'
hadoop jar downloads/Hfile2Es-0.0.1-SNAPSHOT-jar-with-dependencies.jar -D es.resource=ehlindex/tr_plate -D es.nodes=host228 /yangxTest/qhd_data1/qhd_data1
这些参数在MyJob中都能获取,所以比较灵活,纯粹干货自己消化吧
0 1
- hadoop2.6数据导入elasticsearch2.2(解析hbase导出数据)
- Hbase导入导出数据
- Hbase数据导入导出
- hbase导入导出数据
- Hbase学习笔记2@数据导入导出
- HBase数据的导入导出
- HBase数据的导入导出
- hbase 数据的导入导出
- hbase 导入导出/数据迁移
- HBase数据的导入和导出
- hbase数据的导入与导出
- hbase数据导入导出工具使用
- HBase数据的导入和导出
- Hbase表数据的导入和导出
- HBase中数据的导入导出
- sqoop与hbase导入导出数据
- hbase数据导出导入,数据备份,数据迁移(仅解决非通信集群)
- sqoop1.4.6离线部署于hadoop2.6之上与hive导入导出数据
- ExtJS Ext.get()和Ext.getCmp()的区别
- C++实验1-计算两个数的和与差
- 如何控制Android控件的样式
- 自定义假搜索框(TF)
- Swift中的闭包
- hadoop2.6数据导入elasticsearch2.2(解析hbase导出数据)
- oracle 查看表和字段
- 【转】VS 2005中实现对Python 2.5.2的模块扩展实验
- 函数模板和模板函数
- AVPlayer 简易媒体播放器
- ionic入门教程第二课-从Tabs案例看Ionic的MVC架构
- ny860(又见01背包)
- php/js 用JQuery.Datatable 实现数据的分页加载
- iOS下的Masonry适配