Spark进阶(四)
来源:互联网 发布:查看公司开放端口 编辑:程序博客网 时间:2024/05/18 22:55
1.Spark与MySQL或者其他的关系型数据库
package scalaimport java.sql.{DriverManager, PreparedStatement, Connection}import org.apache.spark.{SparkContext, SparkConf}object RDDtoMysql {//这里按需设置数据库Blog表 case class Blog(name: String, count: Int) //往数据库存 def myFun(iterator: Iterator[(String, Int)]): Unit = { var conn: Connection = null var ps: PreparedStatement = null//存时间有NOW()函数? val sql = "insert into blog(name, count) values (?, ?)" try { conn =DriverManager.getConnection("jdbc:mysql://localhost:3306/spark", "root", "123456") iterator.foreach(data => { ps = conn.prepareStatement(sql) ps.setString(1, data._1) ps.setInt(2, data._2) ps.executeUpdate() } ) } catch { case e: Exception => println("Mysql Exception") } finally { if (ps != null) { ps.close() } if (conn != null) { conn.close() } } } def main(args: Array[String]) { val conf = new SparkConf().setAppName("RDDToMysql").setMaster("local") val sc = new SparkContext(conf) val data = sc.parallelize(List(("www", 10), ("iteblog", 20), ("com", 30))) data.foreachPartition(myFun) }}
使用maven+spring MVC中涉及到hadoop maven包等
<!--hadoop--> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.0</version> </dependency>
mysql 和gson包
<dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.24</version> </dependency> <dependency> <groupId>com.google.code.gson</groupId> <artifactId>gson</artifactId> <version>2.7</version> </dependency>
运行代码示例:
package sqlimport java.util.Propertiesimport org.apache.spark.sql.{DataFrame, SQLContext}import org.apache.spark.{SparkConf, SparkContext}/** * 生产环境:下提交任务 * spark-submit --class sql.SparkSqlMysqlDatasource --master yarn-cluster --executor-memory 2G --num-executors 2 --driver-memory 1g --executor-cores 1 /data1/e_heyutao/sparktest/sparkEnn.jar * */object SparkSqlMysqlDatasource { //数据库配置 lazy val url = "jdbc:mysql://your_ip:3306/my_test" lazy val username = "root" lazy val password = "secret_password" def main(args: Array[String]) {// val sparkConf = new SparkConf().setAppName("sparkSqlTest").setMaster("local[2]").set("spark.app.id", "sql") val sparkConf = new SparkConf().setAppName("sparkSqlTest").setMaster("yarn-cluster").set("spark.app.id", "sqlTest") //序列化 sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") sparkConf.set("spark.kryoserializer.buffer", "256m") sparkConf.set("spark.kryoserializer.buffer.max", "2046m") sparkConf.set("spark.akka.frameSize", "500") sparkConf.set("spark.rpc.askTimeout", "30") //获取context val sc = new SparkContext(sparkConf) //获取sqlContext val sqlContext = new SQLContext(sc) //引入隐式转换,可以使用spark sql内置函数 import sqlContext.implicits._ //创建jdbc连接信息 val uri = url + "?user=" + username + "&password=" + password + "&useUnicode=true&characterEncoding=UTF-8" val prop = new Properties() //注意:集群上运行时,一定要添加这句话,否则会报找不到mysql驱动的错误 prop.put("driver", "com.mysql.jdbc.Driver") //加载mysql数据表 val df_test1: DataFrame = sqlContext.read.jdbc(uri, "user_t", prop) val df_test2: DataFrame = sqlContext.read.jdbc(uri, "t_user2", prop) //从dataframe中获取所需字段 df_test2.select("id", "name", "age").collect() .foreach(row => { println("id " + row(0) + " ,name " + row(1) + ", age " + row(2)) }) //注册成临时表 df_test1.registerTempTable("temp_table") val total_sql = "select * from temp_table " val total_df: DataFrame = sqlContext.sql(total_sql) //将结果写入数据库中 val properties=new Properties() properties.setProperty("user","root") properties.setProperty("password","secret_password") total_df.write.mode("append").jdbc("jdbc:mysql://your_ip:3306/my_test?useUnicode=true&characterEncoding=UTF-8","t_result",properties) /** * 注意:查看源码可以知道详细意思 def mode(saveMode: String): DataFrameWriter = { this.mode = saveMode.toLowerCase match { case "overwrite" => SaveMode.Overwrite case "append" => SaveMode.Append case "ignore" => SaveMode.Ignore case "error" | "default" => SaveMode.ErrorIfExists case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. " + "Accepted modes are 'overwrite', 'append', 'ignore', 'error'.") } */ //分组后求平均值 total_df.groupBy("name").avg("age").collect().foreach(x => { println("name " + x(0)) println("age " + x(1)) }) }}
2.Scala语言学习
正则表达式:构造正则对象Regex直接使用r方法:val regx=”[0-9]+”.r。返回所有匹配的迭代器:val matchiterator=regx.findAllIn(“带匹配字符串”)。首个匹配项:findFirstIn,匹配字符串的开始部分:findPrefixOf
0 0
- Spark进阶(四)
- Spark进阶(一)
- Spark进阶(二)
- Spark进阶(三)
- spark进阶(五)
- Spark进阶(八)
- Dagger2 进阶 (四)
- Spark 实战(四)
- Spark算子(四)
- Spark(四) -- Spark工作机制
- C#进阶教程(四)
- PHP 进阶(四)MySQL
- shiro(四)_进阶
- Glide进阶详解(四)
- Spark——数据分区(进阶)
- spark之RDD(四)
- Spark学习笔记(四)
- Spark流计算(四)
- thinkphp3.2导入与导出
- linux常用查看设备命令
- android 之 ListView 里面嵌套 GridView 遇到的问题及其解决方法。
- unity中同时使用Easy Code Scanner插件与everyplay插件扫二维码时会产生闪退的解决方法
- Linux内核--基于Netfilter的内核级包过滤防火墙实现
- Spark进阶(四)
- Android Proguard混淆打包经验总结
- C实现改变彩色图像亮度
- java——不同数据类型的转换规则
- Android DeepLink 技术
- 《当幸福来敲门》
- 关于Vs2015中C#交互(C# Interactive)窗口的应用。
- AngularJs内置指令大全
- 【教程】手把手教你如何利用工具(IE9的F12)去分析模拟登陆网站(百度首页)的内部逻辑过程