Gobblin CLI

来源:互联网 发布:汉仪南宫体简下载 mac 编辑:程序博客网 时间:2024/06/05 05:26

Gobblin CLI提供了在终端运行jar程序的功能,该功能通过脚本bin目录下的gobblin脚本实现。gobblin命令会自动寻找job的类路径,如果变量HADOOP_HOME被设置,gobblin会找到hadoop的类路径。bin/gobblin -h列出所有可用的参数。

bin/gobblin命令的使用方法如下:bin/gobblin run [listQuickApps] [<quick-app>] -jobName <jobName> [OPTIONS]

-listQuickApps:列出所有可用的应用列表

-qucik-app:应用名称

bin/gobblin run <quick-app-name> -h:列出应用的参数列表


开发自定义命令行应用

1.自定义类继承EmbeddedGobblin类

2.定义内部类CliFactory继承PublicMethodsGobblinCliFactory,复写方法constructEmbeddedGobblin(CommandLine cli),复写方法getUsageString()用来提示使用方法,

复写setTemplate(String templateURI)用来设置模板文件。

3.自定义类注解@Alias,gobblin会自动扫描到该Gobblin CLI类。


实例:EmbeddedGobblinDistcp 用于分布式环境拷贝数据

/**
 * Embedded version of distcp.
 * Usage:
 * new EmbeddedGobblinDistcp(new Path("/source"), new Path("/dest")).run();
 */
public class EmbeddedGobblinDistcp extends EmbeddedGobblin {


  @Alias(value = "distcp", description = "Distributed copy between Hadoop compatibly file systems.")
  public static class CliFactory extends PublicMethodsGobblinCliFactory {


    public CliFactory() {
      super(EmbeddedGobblinDistcp.class);
    }


    @Override
    public EmbeddedGobblin constructEmbeddedGobblin(CommandLine cli) throws JobTemplate.TemplateException, IOException {
      String[] leftoverArgs = cli.getArgs();
      if (leftoverArgs.length != 2) {
        throw new RuntimeException("Unexpected number of arguments.");
      }
      Path from = new Path(leftoverArgs[0]);
      Path to = new Path(leftoverArgs[1]);
      return new EmbeddedGobblinDistcp(from, to);
    }


    @Override
    public String getUsageString() {
      return "[OPTIONS] <source> <target>";
    }
  }


  public EmbeddedGobblinDistcp(Path from, Path to) throws JobTemplate.TemplateException, IOException {
    super("Distcp");
    try {
      setTemplate(ResourceBasedJobTemplate.forResourcePath("templates/distcp.template"));
    } catch (URISyntaxException | SpecNotFoundException exc) {
      throw new RuntimeException("Could not instantiate an " + EmbeddedGobblinDistcp.class.getName(), exc);
    }
    this.setConfiguration("from", from.toString());
    this.setConfiguration("to", to.toString());
    // Infer source and target fs uris from the input paths
    this.setConfiguration(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, from.getFileSystem(new Configuration()).getUri().toString());
    this.setConfiguration(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, to.getFileSystem(new Configuration()).getUri().toString());


    // add gobblin-data-management jar to distributed jars
    this.distributeJar(ClassUtil.findContainingJar(CopySource.class));
  }


  /**
   * Specifies that files in the target should be updated if they have changed in the source. Equivalent to -update
   * option in Hadoop distcp.
   */
  @EmbeddedGobblinCliOption(description = "Specifies files should be updated if they're different in the source.")
  public EmbeddedGobblinDistcp update() {
    this.setConfiguration(RecursiveCopyableDataset.UPDATE_KEY, Boolean.toString(true));
    return this;
  }


  /**
   * Specifies that files in the target that don't exist in the source should be deleted. Equivalent to -delete
   * option in Hadoop distcp.
   */
  @EmbeddedGobblinCliOption(description = "Delete files in target that don't exist on source.")
  public EmbeddedGobblinDistcp delete() {
    this.setConfiguration(RecursiveCopyableDataset.DELETE_KEY, Boolean.toString(true));
    return this;
  }


  /**
   * If {@link #delete()} is used, specifies that newly empty parent directories should also be deleted.
   */
  @EmbeddedGobblinCliOption(description = "If deleting files on target, also delete newly empty parent directories.")
  public EmbeddedGobblinDistcp deleteEmptyParentDirectories() {
    this.setConfiguration(RecursiveCopyableDataset.DELETE_EMPTY_DIRECTORIES_KEY, Boolean.toString(true));
    return this;
  }


  /**
   * Run in simulate mode. Will log everythin it would copy, but not actually copy anything.
   */
  public EmbeddedGobblinDistcp simulate() {
    this.setConfiguration(CopySource.SIMULATE, Boolean.toString(true));
    return this;
  }


  // Remove template from CLI
  @Override
  @NotOnCli
  public EmbeddedGobblin setTemplate(String templateURI)
      throws URISyntaxException, SpecNotFoundException, JobTemplate.TemplateException {
    return super.setTemplate(templateURI);
  }
}