spark sql简单示例
来源:互联网 发布:黑色星期五海淘 知乎 编辑:程序博客网 时间:2024/06/07 00:02
运行环境
集群环境:CDH5.3.0
具体JAR版本如下:
spark版本:1.2.0-cdh5.3.0
hive版本:0.13.1-cdh5.3.0
hadoop版本:2.5.0-cdh5.3.0
spark sql的JAVA版简单示例
spark sql直接查询JSON格式的数据
spark sql的自定义函数
spark sql查询hive上面的表
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import
java.util.ArrayList;
import
java.util.List;
import
org.apache.spark.SparkConf;
import
org.apache.spark.api.java.JavaRDD;
import
org.apache.spark.api.java.JavaSparkContext;
import
org.apache.spark.api.java.function.Function;
import
org.apache.spark.sql.api.java.DataType;
import
org.apache.spark.sql.api.java.JavaSQLContext;
import
org.apache.spark.sql.api.java.JavaSchemaRDD;
import
org.apache.spark.sql.api.java.Row;
import
org.apache.spark.sql.api.java.UDF1;
import
org.apache.spark.sql.hive.api.java.JavaHiveContext;
/**
* 注意:
* 使用JavaHiveContext时
* 1:需要在classpath下面增加三个配置文件:hive-site.xml,core-site.xml,hdfs-site.xml
* 2:需要增加postgresql或mysql驱动包的依赖
* 3:需要增加hive-jdbc,hive-exec的依赖
*
*/
public
class
SimpleDemo {
public
static
void
main(String[] args) {
SparkConf conf =
new
SparkConf().setAppName(
"simpledemo"
).setMaster(
"local"
);
JavaSparkContext sc =
new
JavaSparkContext(conf);
JavaSQLContext sqlCtx =
new
JavaSQLContext(sc);
JavaHiveContext hiveCtx =
new
JavaHiveContext(sc);
// testQueryJson(sqlCtx);
// testUDF(sc, sqlCtx);
testHive(hiveCtx);
sc.stop();
sc.close();
}
//测试spark sql直接查询JSON格式的数据
public
static
void
testQueryJson(JavaSQLContext sqlCtx) {
JavaSchemaRDD rdd = sqlCtx.jsonFile(
"file:///D:/tmp/tmp/json.txt"
);
rdd.printSchema();
// Register the input schema RDD
rdd.registerTempTable(
"account"
);
JavaSchemaRDD accs = sqlCtx.sql(
"SELECT address, email,id,name FROM account ORDER BY id LIMIT 10"
);
List<Row> result = accs.collect();
for
(Row row : result) {
System.out.println(row.getString(
0
) +
","
+ row.getString(
1
) +
","
+ row.getInt(
2
) +
","
+ row.getString(
3
));
}
JavaRDD<String> names = accs.map(
new
Function<Row, String>() {
@Override
public
String call(Row row)
throws
Exception {
return
row.getString(
3
);
}
});
System.out.println(names.collect());
}
//测试spark sql的自定义函数
public
static
void
testUDF(JavaSparkContext sc, JavaSQLContext sqlCtx) {
// Create a account and turn it into a Schema RDD
ArrayList<AccountBean> accList =
new
ArrayList<AccountBean>();
accList.add(
new
AccountBean(
1
,
"lily"
,
"lily@163.com"
,
"gz tianhe"
));
JavaRDD<AccountBean> accRDD = sc.parallelize(accList);
JavaSchemaRDD rdd = sqlCtx.applySchema(accRDD, AccountBean.
class
);
rdd.registerTempTable(
"acc"
);
// 编写自定义函数UDF
sqlCtx.registerFunction(
"strlength"
,
new
UDF1<String, Integer>() {
@Override
public
Integer call(String str)
throws
Exception {
return
str.length();
}
}, DataType.IntegerType);
// 数据查询
List<Row> result = sqlCtx.sql(
"SELECT strlength('name'),name,address FROM acc LIMIT 10"
).collect();
for
(Row row : result) {
System.out.println(row.getInt(
0
) +
","
+ row.getString(
1
) +
","
+ row.getString(
2
));
}
}
//测试spark sql查询hive上面的表
public
static
void
testHive(JavaHiveContext hiveCtx) {
List<Row> result = hiveCtx.sql(
"SELECT foo,bar,name from pokes2 limit 10"
).collect();
for
(Row row : result) {
System.out.println(row.getString(
0
) +
","
+ row.getString(
1
) +
","
+ row.getString(
2
));
}
}
}
0 0
- spark sql简单示例
- Spark sql 简单示例
- spark sql简单示例java
- spark简单代码示例
- Spark SQL 应用示例
- Spark SQL 编程示例
- Flex4 Spark 简单组件示例
- spark-shell简单使用示例
- Spark streaming 应用简单示例
- Spark SQL连接MySQL示例
- Spark SQL 简单使用
- Flex4 Spark组件数据驱动简单示例
- Spark本地安装和简单示例
- Spark的join与cogroup简单示例
- Spark SQL UDF和UDAF示例
- 基于Hbase的Spark Sql示例 一
- Spark SQL 最简单例子
- pl/sql 游标 简单示例
- 一些算法题
- GMM:高斯混合模型的数学推导笔记(上)
- RecyclerView
- activiti流程图动态创建
- 剑指Offer--028-字符串的排列
- spark sql简单示例
- KindEditor得不到textarea值的解决方法
- 个人常用iOS第三方库
- android BitmapFactroy
- 介绍 GDB 调试 Go
- GetKeyState、GetAsyncKeyState、GetKeyboardState函数的区别 以及虚拟键值
- Android实现发送短信的功能
- CSS布局方式
- Centos7 系统下node.js+Nginx+MongoDB的安装与配置