Cascading Each Pipe 使用

来源:互联网 发布:vpn提供商 windows内置 编辑:程序博客网 时间:2024/05/16 07:33

Cascading 基本Pipe - Each 使用

 

Each,说白了就是对Pipe中的每行进行处理。话不多说,直接上代码。

  

package cascading;

import cascading.flow.FlowConnector;
import cascading.flow.FlowDef;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.operation.Insert;
import cascading.pipe.Each;
import cascading.pipe.Pipe;
import cascading.scheme.Scheme;
import cascading.scheme.hadoop.TextDelimited;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tuple.Fields;

public class EachPipe {

 public static void main(String[] args) {
  // defile input file column and delimeter
  Scheme inScheme = new TextDelimited(new Fields("ID", "Name"), ";");

  // define input file path
  Tap intap1 = new Hfs(inScheme, args[0]);

  // defile output path
  Tap outap = new Hfs(inScheme, args[1], SinkMode.REPLACE);
  Pipe inputPipe = new Pipe("inputPipe");

  // use each pipe to add a new column for each record
  inputPipe = new Each(inputPipe,
    new Insert(new Fields("sourceflag"), 1), Fields.ALL);
  FlowDef flowDef = FlowDef.flowDef().addSource(inputPipe, intap1)
    .addTailSink(inputPipe, outap);
  FlowConnector flowConnector = new HadoopFlowConnector();
  flowConnector.connect(flowDef).complete();
 }

}