Spring 整合 spark 使用

来源:互联网 发布:甬航数据交互平台 编辑:程序博客网 时间:2024/06/05 08:41

第一步:MAVEN配置


<!-- spark 使用 -->
   <dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>1.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.11</artifactId>
<version>1.6.0</version>
</dependency>
<dependency>
  <groupId>org.apache.spark</groupId>
  <artifactId>spark-sql_2.11</artifactId>
   <version>1.6.0</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>2.11.0</version>
</dependency>

<!-- spark 使用 -->


第二步:Spring配置

<bean id="sparkConf" class="org.apache.spark.SparkConf">
           <property name="AppName" value="SparkForSpring" />
           <property name="Master" value="local" />          
</bean>

<bean id="javaSparkContext" class="org.apache.spark.api.java.JavaSparkContext">
           <constructor-arg type="SparkConf" ref="sparkConf" />
</bean>

<bean id="sqlContext" class="org.apache.spark.sql.SQLContext">
           <constructor-arg type="JavaSparkContext" ref="javaSparkContext" />
</bean>


<!-- 引入配置文件 -->
<bean id="propertyConfigurer"
class="org.springframework.beans.factory.config.PropertyPlaceholderConfigurer">
<!-- <property name="location" value="classpath:jdbc.properties" /> -->
<property name="locations">  
      <list>  
         <value>classpath:jdbc.properties</value>  
         <value>classpath:spark.properties</value>  
       </list>  
   </property>  
    
</bean>


第三步:新增属性文件  spark.properties

spark.master=local
spark.url=jdbc:mysql://192.168.0.202:3306/spark?useUnicode=true&characterEncoding=UTF-8
spark.table=testtable
spark.username=root
spark.password=mysql 


第四步:写代码

/**
 * 
 */
package com.harleycorp.service.impl;


import java.util.ArrayList;
import java.util.List;
import java.util.Properties;


import javax.annotation.Resource;


import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SaveMode;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;


import com.harleycorp.pojo.SparkUser;
import com.harleycorp.service.ISparkUpperService;


/**
 * @author kevin
 *
 */
@Service
public class SparkUpperServiceImpl implements ISparkUpperService {

private Logger logger =Logger.getLogger(SparkUpperServiceImpl.class);

@Value("${spark.master}")
public String master ; // = "local"

@Value("${spark.url}")
public String url ;//= "jdbc:mysql://192.168.0.202:3306/spark?useUnicode=true&characterEncoding=UTF-8";

@Value("${spark.table}")
public String table ; //= "testtable"

@Value("${spark.username}")
public String username ;// = "root";

//@Value("${spark.password}")
public String password = "mysql";

@Resource
public SQLContext sqlContext;

@Resource
public JavaSparkContext sc;

public Properties getConnectionProperties(){
Properties connectionProperties = new Properties();
connectionProperties.setProperty("dbtable",table);
connectionProperties.setProperty("user",username);//数据库用户
connectionProperties.setProperty("password",password); //数据库用户密码
return connectionProperties;
}

public String query() {
logger.info("=======================this url:"+this.url);
logger.info("=======================this table:"+this.table);
logger.info("=======================this master:"+this.master);
logger.info("=======================this username:"+this.username);
logger.info("=======================this password:"+this.password);

DataFrame df = null;
//以下数据库连接内容请使用实际配置地址代替
df = sqlContext.read().jdbc(url,table, getConnectionProperties());
df.registerTempTable(table);
String result = sqlContext.sql("select * from testtable").javaRDD().collect().toString();
logger.info("=====================spark mysql:"+result);
return result;
}

public String queryByCon(){
logger.info("=======================this url:"+this.url);
logger.info("=======================this table:"+this.table);
logger.info("=======================this master:"+this.master);
logger.info("=======================this username:"+this.username);
logger.info("=======================this password:"+this.password);

DataFrame df = sqlContext.read().jdbc(url, table, new String[]{"password=000000"}, getConnectionProperties());
String result = df.collectAsList().toString();
logger.info("=====================spark mysql:"+result);
return null;
}

public void add(){
List<SparkUser> list = new ArrayList<SparkUser>();
SparkUser us = new SparkUser();
us.setUsername("kevin");
us.setPassword("000000");
list.add(us);
SparkUser us2 = new SparkUser();
us2.setUsername("Lisa");
us2.setPassword("666666");
list.add(us2);

JavaRDD<SparkUser> personsRDD = sc.parallelize(list);
DataFrame userDf = sqlContext.createDataFrame(personsRDD, SparkUser.class);
userDf.write().mode(SaveMode.Append).jdbc(url, table, getConnectionProperties());
}



}


第五步:junit调用

package com.harleycorp.testmybatis;


import javax.annotation.Resource;


import org.apache.log4j.Logger;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;


import com.harleycorp.service.ISparkUpperService;




@RunWith(SpringJUnit4ClassRunner.class) //表示继承了SpringJUnit4ClassRunner类
@ContextConfiguration(locations = {"classpath:spring-mybatis.xml"})


public class TestSpark {


private static Logger logger=Logger.getLogger(TestSpark.class);
@Resource 
private  ISparkUpperService sparkUpperService = null;

@Test
public void test1(){
sparkUpperService.query();
}

@Test
public void test2(){  
sparkUpperService.add();   
}

@Test
public void test3(){
sparkUpperService.queryByCon();
}
}


第六步:运行