SPARKSQL读SPARK表

来源:互联网 发布:易语言验证码同步源码 编辑:程序博客网 时间:2024/06/03 18:50

SPARK2.0.2版本

1. 读取SPARK表

创建sparkSession,执行SQL,每行记录输出Object[],再对每行记录做转换,增加时间戳

public RDD<WindowedValue<Object[]>> getSourceRDD(PipelineOptions pipelineOptions, SparkContext sparkContext)
    {
      SparkSession sparkSession = SparkSession.builder().appName("XXX").master(sparkContext.master())
        .enableHiveSupport().getOrCreate();


      sparkSession.sql("use  " + this.read.database); //数据库名,默认是default
      Dataset rowset = sparkSession.sql(this.read.sql); //SQL 语句
      RDD rows = rowset.rdd().map(new JavaMapRow2ObjectArray()
      {
        public Object[] call(Row row) {
          Object[] colArr = new Object[row.size()];
          for (int i = 0; i < row.size(); ++i) {
            colArr[i] = row.get(i);
          }
          return colArr; }
      }
      , ScalaUtil.getClassTag([Ljava.lang.Object.class));


      RDD windowrows = rows.map(new JavaMap2WindowValue()
      {
        public WindowedValue<Object[]> call(Object[] record) {
          return WindowedValue.timestampedValueInGlobalWindow(record, BoundedWindow.TIMESTAMP_MIN_VALUE); }
      }
      , ClassTag..MODULE$.apply(WindowedValue.class));


      return windowrows;
    }


转换函数类,用于给每行记录加时间戳

public abstract class JavaMap2WindowValue<T> extends AbstractFunction1<T, WindowedValue<T>>
  implements Serializable
{
  public WindowedValue<T> apply(T record)
  {
    return call(record);
  }


  public abstract WindowedValue<T> call(T paramT);
}


工具类

public class ScalaUtil
{
  public static <T> ClassTag<T> getClassTag(Class<T> clazz)
  {
    return ClassTag..MODULE$.apply(clazz);
  }


  public static <K, V> ClassTag<Tuple2<K, V>> getTuple2ClassTag()
  {
    return ((ClassTag)getClassTag(Tuple2.class));
  }


  public static <T> Seq<T> toScalaSeq(List<T> list) {
    return JavaConversions.asScalaBuffer(list);
  }


  public static void main(String[] args) {
    System.out.print(ClassTag..MODULE$.apply(Tuple2.class)); }


  public static <T> Manifest<T> getManifest(Class<T> clazz) {
    return ManifestFactory.classType(clazz);
  }
}

原创粉丝点击