HDPCD-Java-复习笔记(18)
来源:互联网 发布:mt4编程书籍 编辑:程序博客网 时间:2024/06/08 00:52
Java lab booklet
Understanding Pig
root@ubuntu:~/java/labs/demos# pig
grunt> copyFromLocal/root/java/labs/demos/pigdemo.txt demos/
grunt> cd demos
Define the employees relation, using a schema:
grunt> employees = LOAD 'pigdemo.txt' AS (state,name);
grunt> describe employees;
employees: {state:bytearray,name: bytearray}grunt> DUMP employees;
· (SD,Rich)
· (NV,Barry)
· (CO,George)
· (CA,Ulf)
· (IL,Danielle)
· (OH,Tom)
· (CA,manish)
· (CA,Brian)
· (CO,Mark)
grunt> emp_group = GROUP employees BY state;
grunt> describe emp_group;
emp_group: {group:bytearray,employees: {(state: bytearray,name: bytearray)}}grunt> DUMP emp_group;
· The output is:
· (CA,{(CA,Ulf),(CA,manish),(CA,Brian)})
· (CO,{(CO,George),(CO,Mark)})
· (IL,{(IL,Danielle)})
· (NV,{(NV,Barry)})
· (OH,{(OH,Tom)})
· (SD,{(SD,Rich)})grunt> STORE emp_group INTO 'emp_group_csv' USINGPigStorage(',');
grunt> cat emp_group_csv/part-r-00000
CA,{(CA,Brian),(CA,manish),(CA,Ulf)}
CO,{(CO,Mark),(CO,George)}
IL,{(IL,Danielle)}
NV,{(NV,Barry)}
OH,{(OH,Tom)}
SD,{(SD,Rich)}
The aliases command shows a list of currently defined aliases:
grunt> aliases
aliases: [1-41, ca_only, 1-39, emp_group, employees]
Lab: Writing a Pig User Defined Function (UDF)
A Pig script the computes the on-balance volume of a specified stock.
package stockudfs;import java.io.IOException;import org.apache.pig.EvalFunc;import org.apache.pig.data.Tuple;public class OnBalanceVolume extends EvalFunc<Long> {private long previousObv = 0;private double previousClose = 0;@Overridepublic Long exec(Tuple input) throws IOException {long volume = Long.parseLong(input.get(0).toString());double currentClose = Double.parseDouble(input.get(1).toString());long obv;if (currentClose > previousClose) {obv = previousObv + volume;}else if (currentClose < previousClose) {obv = previousObv - volume;}else {obv = previousObv;}return obv;}}stockvolume.pig
register stockudfs.jar;
stockdata = LOAD 'stocksA' using PigStorage(',') AS (exchange:chararray,symbol:chararray,
date:chararray,open:float,high:float,low:float,close:float,volume:int);
stock_all = FOREACH stockdata GENERATE symbol,date,close,volume;
stock_filter = FILTER stock_all BY symbol == '$symbol';
stock_sorted = ORDER stock_filter BY date ASC;
obv_result = FOREACH stock_sorted GENERATE symbol, date, stockudfs.OnBalanceVolume(volume, close) AS obv;
dump obv_result;
//STORE obv_result INTO 'obv_result';
结果输出:
(AVA,2009-12-02,245700)
(AVA,2009-12-03,426000)
(AVA,2009-12-04,262200)
(AVA,2009-12-07,131300)
(AVA,2009-12-08,190900)
(AVA,2009-12-09,138700)
(AVA,2009-12-10,217300)
(AVA,2009-12-11,139200)
(AVA,2009-12-14,165500)
(AVA,2009-12-15,311900)
(AVA,2009-12-16,177800)
(AVA,2009-12-17,177900)
(AVA,2009-12-18,532500)
(AVA,2009-12-21,177100)
Lab: Writing an Accumulator User Defined Function(UDF)
A Pig script the outputs a stock’s highest closing price, along with the following 4 closing prices following the highest close.
package stockudfs;import java.io.IOException;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import org.apache.pig.AccumulatorEvalFunc;import org.apache.pig.data.DataBag;import org.apache.pig.data.Tuple;public class HighestClosingPriceWindow extends AccumulatorEvalFunc<String> {private int windowSize;private int accumulatedPrices;private List<String> highDates;private List<Float> highCloses;private float highClose;public HighestClosingPriceWindow(String size) {int winSize = Integer.parseInt(size);if (winSize > 0) {windowSize = winSize;}else {windowSize = 1;}highDates = new ArrayList<String>();highCloses = new ArrayList<Float>();}@Overridepublic void accumulate(Tuple b) throws IOException {DataBag values = (DataBag) b.get(0);Iterator<Tuple> iterator = values.iterator();float currentClose;while (iterator.hasNext()) {Tuple tuple = (Tuple) iterator.next();currentClose = Float.parseFloat(tuple.get(2).toString());if (currentClose > highClose) {highClose = currentClose;highCloses.add(0, currentClose);highDates.add(0, tuple.get(1).toString());accumulatedPrices = 1;}else if (accumulatedPrices < windowSize) {highCloses.add(accumulatedPrices, Float.parseFloat(tuple.get(2).toString()));highDates.add(accumulatedPrices, tuple.get(1).toString());accumulatedPrices ++;}}}@Overridepublic void cleanup() {highClose = 0;highCloses.clear();highDates.clear();accumulatedPrices = 0;}@Overridepublic String getValue() {StringBuilder builder = new StringBuilder();for (int i = 0; i < highCloses.size(); i++) {builder.append(highDates.get(i) + " " + highCloses.get(i) + "\n");}return builder.toString();}}highclose.pig
register stockudfs.jar;
define HighestClosingPriceWindow stockudfs.HighestClosingPriceWindow('4');
stockdata = LOAD 'stocksA' using PigStorage(',') AS (
exchange:chararray,
symbol:chararray,
date:chararray,
open:float,
high:float,
low:float,
close:float,
volume:int
);
stocks_all = FOREACH stockdata GENERATE symbol, date, close;
stocks_group = GROUP stocks_all BY symbol;
stocks_high = FOREACH stocks_group {
sorted = ORDER stocks_all BY date ASC;
GENERATE group as symbol, HighestClosingPriceWindow(sorted) as result;
}
dump stocks_high;
结果:
(AVT,2000-04-28 78.62
2000-05-01 78.44
2000-05-02 78.31
2000-05-03 74.06
2000-04-27 76.25
2000-04-26 74.19
2000-04-25 73.44
1997-12-08 72.16)
(AXE,2007-07-24 86.11
2007-07-25 83.8
2007-07-26 82.25
2007-07-27 81.3)
- HDPCD-Java-复习笔记(18)
- HDPCD-Java-复习笔记(1)
- HDPCD-Java-复习笔记(2)
- HDPCD-Java-复习笔记(3)-lab
- HDPCD-Java-复习笔记(4)
- HDPCD-Java-复习笔记(5)
- HDPCD-Java-复习笔记(6)
- HDPCD-Java-复习笔记(7)- lab
- HDPCD-Java-复习笔记(8)- lab
- HDPCD-Java-复习笔记(9)-lab
- HDPCD-Java-复习笔记(10)-lab
- HDPCD-Java-复习笔记(11)
- HDPCD-Java-复习笔记(12)
- HDPCD-Java-复习笔记(13)- lab
- HDPCD-Java-复习笔记(14)- lab
- HDPCD-Java-复习笔记(15)
- HDPCD-Java-复习笔记(16)
- HDPCD-Java-复习笔记(17)
- checkStyle、findbugs、PMD各自特点
- 《大话设计模式》读书笔记之C++实现--chapter15抽象工厂模式
- COM学习(二)——COM的注册和卸载
- 隐马尔科夫(HMM)模型
- python中出现非正定“Input X must be non-negative”的问题
- HDPCD-Java-复习笔记(18)
- Java---String类
- Java--多线程
- create-react-app路由的实现原理
- Java--线程的常用方法
- 3435==1853==3488 三连发
- 中国大学MOOC-陈越、何钦铭-数据结构:06-图3 六度空间(链式前向星、BFS)
- 树形索引(B+树)
- ES6中的解构赋值