Spark操作mongodb
来源:互联网 发布:php网站打包app 编辑:程序博客网 时间:2024/06/06 10:39
package org.spark.mongo;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.bson.BSONObject;
import com.mongodb.hadoop.MongoOutputFormat;
import scala.Tuple2;
public class SparkMongotoMongo {
public static void main(String args[]){
SparkConf sparkConf = new SparkConf().setAppName("mongotomongo");
//master
sparkConf.setMaster("spark://dwrj5114:7077,dwrj5113:7077");
//Spark Context
JavaSparkContext sc = new JavaSparkContext(sparkConf);
sc.addJar("F:\\jars\\sparkmongo.jar");
//mongo config
//解释 主机:端口号/数据库名.Collection名
Configuration config = new Configuration();
config.set("mongo.input.uri", "mongodb://10.1.50.124:27017/ligf.student");
config.set("mongo.output.uri", "mongodb://10.1.50.124:27017/ligf.test");
JavaPairRDD<Object, BSONObject> mongoRDD = sc.newAPIHadoopRDD(config, com.mongodb.hadoop.MongoInputFormat.class, Object.class, BSONObject.class);
// Input contains tuples of (ObjectId, BSONObject)
JavaRDD<String> words = mongoRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, String>() {
private static final long serialVersionUID = 7780225729554937350L;
@Override
public Iterable<String> call(Tuple2<Object, BSONObject> document) {
String id=(String)document._2.get("id");
String zhuanye=(String)document._2.get("zhuanye");
if(id.equals("")&&zhuanye.equals("")){
return Collections.emptyList();
}else{
String total=id+","+zhuanye;
return Arrays.asList(total);
}
// if (o instanceof BSONObject) {
// BSONObject bso = (BSONObject) o;
// String street = (String)bso.get("street");
// String city = (String)bso.get("city");
// String state = (String)bso.get("state");
// String address = street+", "+city+", "+state;
// return Arrays.asList(address);
// } else {
// return Collections.emptyList();
// }
}
});
//打印出每个人的信息
System.out.println("打印出每个人的信息");
List<String> outputs = words.collect();
if(outputs.size()==0){
System.out.println("aaaaaaaaaaaaaaaaaaa");
}
System.out.println("打印出每个人的信息2");
for (String out : outputs) {
System.out.println("output:"+out);
}
System.out.println("打印出每个人的信息3");
// Only MongoOutputFormat and config are relevant
mongoRDD.saveAsNewAPIHadoopFile("file:///inputDir", Object.class, Object.class, MongoOutputFormat.class, config);
sc.close();
}
}
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.bson.BSONObject;
import com.mongodb.hadoop.MongoOutputFormat;
import scala.Tuple2;
public class SparkMongotoMongo {
public static void main(String args[]){
SparkConf sparkConf = new SparkConf().setAppName("mongotomongo");
//master
sparkConf.setMaster("spark://dwrj5114:7077,dwrj5113:7077");
//Spark Context
JavaSparkContext sc = new JavaSparkContext(sparkConf);
sc.addJar("F:\\jars\\sparkmongo.jar");
//mongo config
//解释 主机:端口号/数据库名.Collection名
Configuration config = new Configuration();
config.set("mongo.input.uri", "mongodb://10.1.50.124:27017/ligf.student");
config.set("mongo.output.uri", "mongodb://10.1.50.124:27017/ligf.test");
JavaPairRDD<Object, BSONObject> mongoRDD = sc.newAPIHadoopRDD(config, com.mongodb.hadoop.MongoInputFormat.class, Object.class, BSONObject.class);
// Input contains tuples of (ObjectId, BSONObject)
JavaRDD<String> words = mongoRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, String>() {
private static final long serialVersionUID = 7780225729554937350L;
@Override
public Iterable<String> call(Tuple2<Object, BSONObject> document) {
String id=(String)document._2.get("id");
String zhuanye=(String)document._2.get("zhuanye");
if(id.equals("")&&zhuanye.equals("")){
return Collections.emptyList();
}else{
String total=id+","+zhuanye;
return Arrays.asList(total);
}
// if (o instanceof BSONObject) {
// BSONObject bso = (BSONObject) o;
// String street = (String)bso.get("street");
// String city = (String)bso.get("city");
// String state = (String)bso.get("state");
// String address = street+", "+city+", "+state;
// return Arrays.asList(address);
// } else {
// return Collections.emptyList();
// }
}
});
//打印出每个人的信息
System.out.println("打印出每个人的信息");
List<String> outputs = words.collect();
if(outputs.size()==0){
System.out.println("aaaaaaaaaaaaaaaaaaa");
}
System.out.println("打印出每个人的信息2");
for (String out : outputs) {
System.out.println("output:"+out);
}
System.out.println("打印出每个人的信息3");
// Only MongoOutputFormat and config are relevant
mongoRDD.saveAsNewAPIHadoopFile("file:///inputDir", Object.class, Object.class, MongoOutputFormat.class, config);
sc.close();
}
}
0 0
- Spark操作mongodb
- java操作spark读写mongodb
- spark mongodb
- spark mongodb
- MongoDB Spark
- 【MongoDB】【Spark】在MongoDB上使用Spark
- spark连接mongodb
- spark/hadoop整合mongodb
- spark streaming+mongodb(geo)
- Spark连接MongoDB
- spark+mongodb + quartz
- spark读取mongodb
- spark读取mongodb数据
- Spark整合Mongodb
- Kafka->Spark Streaming->mongodb
- spark 操作 spark-shell
- 【spark】spark-hive操作
- [mongodb]java操作mongodb
- 知识点总结
- javascript继承方法
- 随机森林,GBDT,Adaboost原理及python实现
- Objective-C中的@dynamic
- Java日志框架:SLF4J, Apache Common-Logging, Log4J和Logback
- Spark操作mongodb
- pwnable 笔记 Toddler's Bottle - cmd2
- 新事物 — RUP
- JSPatch – 动态更新iOS APP
- 【JZOJ4876】【NOIP2016提高A组集训第10场11.8】基因突变
- unity3D游戏开发十四之NGUI一
- LayoutInflater类
- jquery仿京东楼层效果
- Linux电源管理-Linux regulator framework概述