HDPCD-Java-复习笔记(23)- lab
来源:互联网 发布:罗技anywhere2 mac 编辑:程序博客网 时间:2024/06/06 04:40
Java lab booklet
Lab: Importing Data into HBase
The stock prices of the stocks that start with “A” input into a table named ‘stocks’ in HBase.
package hbase;public class StockConstants {public static final byte [] PRICE_COLUMN_FAMILY = "p".getBytes();public static final byte [] HIGH_QUALIFIER = "high".getBytes();public static final byte [] LOW_QUALIFIER = "low".getBytes();public static final byte [] CLOSING_QUALIFIER = "close".getBytes();public static final byte [] VOLUME_QUALIFIER = "vol".getBytes();}
package hbase;import static hbase.StockConstants.CLOSING_QUALIFIER;import static hbase.StockConstants.HIGH_QUALIFIER;import static hbase.StockConstants.LOW_QUALIFIER;import static hbase.StockConstants.PRICE_COLUMN_FAMILY;import static hbase.StockConstants.VOLUME_QUALIFIER;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class StockImporter extends Configured implements Tool { public static class StockImporterMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> { private final String COMMA = ","; @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] words = value.toString().split(COMMA); if (words[0].equals("exchange")) return; String symbol = words[1]; String date = words[2]; double highPrice = Double.parseDouble(words[4]); double lowPrice = Double.parseDouble(words[5]); double closingPrice = Double.parseDouble(words[6]); double volume = Double.parseDouble(words[7]); byte[] stockRowKey = Bytes.add(date.getBytes(), symbol.getBytes()); Put put = new Put(stockRowKey); put.add(PRICE_COLUMN_FAMILY, HIGH_QUALIFIER, Bytes.toBytes(highPrice)); put.add(PRICE_COLUMN_FAMILY, LOW_QUALIFIER, Bytes.toBytes(lowPrice)); put.add(PRICE_COLUMN_FAMILY, CLOSING_QUALIFIER, Bytes.toBytes(closingPrice)); put.add(PRICE_COLUMN_FAMILY, VOLUME_QUALIFIER, Bytes.toBytes(volume)); context.write(null, put); } } @Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf(), "StockImportJob"); job.setJarByClass(StockImporter.class); FileInputFormat.setInputPaths(job, new Path("stocksA")); job.setMapperClass(StockImporterMapper.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(0); TableMapReduceUtil.initTableReducerJob("stocks", null, job); TableMapReduceUtil.addDependencyJars(job); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) { int result = 0; try { result = ToolRunner.run(new Configuration(), new StockImporter(), args); } catch (Exception e) { e.printStackTrace(); } System.exit(result); }}
Hbase cmd
# hbase shell
hbase (main):001:0> create 'stocks', {NAME => 'p', VERSIONS => 1}
hbase (main):002:0> list
hbase (main):003:0> describe 'stocks'
# hadoop fs -mkdir stocksA
# cd ~/java/workspace/HBaseImport/
# hadoop fs -putstocksA/* stocksA/# export HADOOP_CLASSPATH=`hbase classpath`
# yarn jar hbaseimport.jar
hbase (main):002:0> scan 'stocks',{COLUMNS=>['p:high:toDouble', 'p:low:toDouble', 'p:close:toDouble','p:vol:toInt'], LIMIT=>100}
Lab: An HBase MapReduce Job
A table in HBase that contains the highest closing price, and the date it occurred, of the stocks processed by the MapReduce job.
package hbasemr;import static hbasemr.StockConstants.CLOSING_QUALIFIER;import static hbasemr.StockConstants.DATE_QUALIFIER;import static hbasemr.StockConstants.INFO_COLUMN_FAMILY;import static hbasemr.StockConstants.PRICE_COLUMN_FAMILY;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.hbase.Cell;import org.apache.hadoop.hbase.CellUtil;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.hbase.mapreduce.TableReducer;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class MaxClosingPriceJob extends Configured implements Tool {public static class MaxClosingPriceMapper extends TableMapper<Text, Text> {private Text outputKey = new Text();private Text outputValue = new Text();@Overrideprotected void map(ImmutableBytesWritable key, Result value,Context context) throws IOException, InterruptedException {Cell closingPrice = value.getColumnLatestCell(PRICE_COLUMN_FAMILY, CLOSING_QUALIFIER);String keyString = Bytes.toString(key.get());String symbol = keyString.substring(0, keyString.length() - 10);String date = keyString.substring(keyString.length() - 10, keyString.length());outputKey.set(symbol);outputValue.set(date + Bytes.toDouble(CellUtil.cloneValue(closingPrice)));context.write(outputKey, outputValue);}}public static class MaxClosingPriceReducer extends TableReducer<Text, Text, Text> {@Overrideprotected void reduce(Text key, Iterable<Text> values,Context context)throws IOException, InterruptedException {double max = 0.0;String maxDate = "";for(Text value : values) {String current = value.toString();double currentPrice = Double.parseDouble(current.substring(10, current.length()));if(currentPrice > max) {max = currentPrice;maxDate = current.substring(0,10);}}Put put = new Put(key.getBytes());put.add(INFO_COLUMN_FAMILY, CLOSING_QUALIFIER, Bytes.toBytes(max));put.add(INFO_COLUMN_FAMILY, DATE_QUALIFIER, Bytes.toBytes(maxDate));context.write(key, put);}}@Overridepublic int run(String[] args) throws Exception {Configuration conf = HBaseConfiguration.create(getConf());Job job = Job.getInstance(conf, "MaxClosingPriceJob");job.setJarByClass(MaxClosingPriceJob.class);TableMapReduceUtil.addDependencyJars(job);Scan scan = new Scan();scan.addColumn(PRICE_COLUMN_FAMILY, CLOSING_QUALIFIER);TableMapReduceUtil.initTableMapperJob("stocks", scan, MaxClosingPriceMapper.class, Text.class, Text.class, job);TableMapReduceUtil.initTableReducerJob("stockhighs", MaxClosingPriceReducer.class, job);return job.waitForCompletion(true)?0:1;}public static void main(String[] args) {int result = 0;try {result = ToolRunner.run(new Configuration(), new MaxClosingPriceJob(), args);} catch (Exception e) {e.printStackTrace();}System.exit(result);}}
package hbasemr;public class StockConstants {public static final byte [] PRICE_COLUMN_FAMILY = "p".getBytes();public static final byte [] HIGH_QUALIFIER = "high".getBytes();public static final byte [] LOW_QUALIFIER = "low".getBytes();public static final byte [] CLOSING_QUALIFIER = "close".getBytes();public static final byte [] VOLUME_QUALIFIER = "vol".getBytes();public static final byte [] INFO_COLUMN_FAMILY = "info".getBytes();public static final byte [] DATE_QUALIFIER = "date".getBytes();}
hbase(main):001:0> create'stockhighs', {NAME => 'info', VERSIONS => 1}
# cd ~/java/workspace/HBaseMR/
# yarn jarhbasemr.jarhbase(main):002:0> scan 'stockhighs', {COLUMNS=>['info:date', 'info:close:toDouble']}
- HDPCD-Java-复习笔记(23)- lab
- HDPCD-Java-复习笔记(3)-lab
- HDPCD-Java-复习笔记(7)- lab
- HDPCD-Java-复习笔记(8)- lab
- HDPCD-Java-复习笔记(9)-lab
- HDPCD-Java-复习笔记(10)-lab
- HDPCD-Java-复习笔记(13)- lab
- HDPCD-Java-复习笔记(14)- lab
- HDPCD-Java-复习笔记(21)- lab
- HDPCD-Java-复习笔记(22)- lab
- HDPCD-Java-复习笔记(1)
- HDPCD-Java-复习笔记(2)
- HDPCD-Java-复习笔记(4)
- HDPCD-Java-复习笔记(5)
- HDPCD-Java-复习笔记(6)
- HDPCD-Java-复习笔记(11)
- HDPCD-Java-复习笔记(12)
- HDPCD-Java-复习笔记(15)
- 【工具】如何在使用pc上的pycharm远程调试Ubuntu服务器上的python工程
- java虚拟机中的invokevirtual所造成的问题
- Arcpy查找指定用户的所有要素类并列举其属性信息
- POJ 2386 Lake Counting dfs
- C++ STL next_permutation() prev_permutation(a,a+n)用法。
- HDPCD-Java-复习笔记(23)- lab
- TensorFlow计算图、张量、回话详细介绍
- POJ 3617 Best Cow Line 贪心算法
- POJ 3253 Fence Repair C++ STL multiset 可解
- ดาวนำทาง : คณาคำ อภิรดี导航星——《会更好的》插曲
- 下载安装
- Test这是题目
- 一步一步写算法(如何判断一个素数和一段素数)
- JavaScript数据类型检测的四种方式