transform实现广告计费日志实时黑名单过滤(Java版本)
来源:互联网 发布:创优网络 编辑:程序博客网 时间:2024/05/22 03:50
package gh.spark.SparkStreaming;
import java.util.ArrayList;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.Optional;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
/**
* 广告计费日志实时黑名单过滤
* @author Administrator
*
*/
public class TransformDemo {
public static void main(String[] args) throws Exception {
SparkConf conf=new SparkConf()
.setAppName("TransformDemo")
.setMaster("local[2]");
JavaStreamingContext jsc=
new JavaStreamingContext(conf,Durations.seconds(5));
//创建一份黑名单
ArrayList<Tuple2<String, Boolean>> blackList =
new ArrayList<Tuple2<String, Boolean>>();
blackList.add(new Tuple2<String, Boolean>("leo", true));
//注意,定义时需要添加final关键字
final JavaPairRDD<String, Boolean> blackRDD =
jsc.sparkContext().parallelizePairs(blackList);
JavaReceiverInputDStream<String> linesDtream =
jsc.socketTextStream("tgmaster", 9999);
//日志格式:date username,比如:2016-11-07 jack
//(jack,2016-11-07 jack)
JavaPairDStream<String, String> mapToPairDtream = linesDtream.mapToPair(new PairFunction<String, String, String>() {
private static final long serialVersionUID = 1L;
public Tuple2<String, String> call(String line) throws Exception {
return new Tuple2<String, String>(line.split(" ")[1], line);
}
});
//实时黑名单过滤
JavaDStream<String> resultDtream = mapToPairDtream.transform(new Function<JavaPairRDD<String,String>, JavaRDD<String>>() {
private static final long serialVersionUID = 1L;
public JavaRDD<String> call(JavaPairRDD<String, String> userClickLogRDD)
throws Exception {
/**
* 在此处,我们使用leftOuterJoin左外连接的方式进行join
* 左外连接之后的结果中既有黑名单人员,又有非黑名单人员
*/
JavaPairRDD<String, Tuple2<String, Optional<Boolean>>> joinRDD =
userClickLogRDD.leftOuterJoin(blackRDD);
/**
* 接下来进行filter过滤操作
* 将黑名单用户过滤出来
*/
JavaPairRDD<String, Tuple2<String, Optional<Boolean>>> filterRDD = joinRDD.filter(new Function<Tuple2<String,Tuple2<String,Optional<Boolean>>>, Boolean>() {
private static final long serialVersionUID = 1L;
public Boolean call(
Tuple2<String, Tuple2<String, Optional<Boolean>>> log)
throws Exception {
if(log._2._2.isPresent() && log._2._2.get()){
return false; //此时是一个黑名单用户
}
return true; //非黑名单用户
}
});
//找出非黑名单日志中的用户名username
JavaRDD<String> mapRDD = filterRDD.map(new Function<Tuple2<String,Tuple2<String,Optional<Boolean>>>, String>() {
private static final long serialVersionUID = 1L;
public String call(
Tuple2<String, Tuple2<String, Optional<Boolean>>> log)
throws Exception {
return log._2._1; //返回用名username的日志
}
});
return mapRDD;
}
});
resultDtream.print();
jsc.start();
jsc.awaitTermination();
jsc.close();
}
}
import java.util.ArrayList;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.Optional;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
/**
* 广告计费日志实时黑名单过滤
* @author Administrator
*
*/
public class TransformDemo {
public static void main(String[] args) throws Exception {
SparkConf conf=new SparkConf()
.setAppName("TransformDemo")
.setMaster("local[2]");
JavaStreamingContext jsc=
new JavaStreamingContext(conf,Durations.seconds(5));
//创建一份黑名单
ArrayList<Tuple2<String, Boolean>> blackList =
new ArrayList<Tuple2<String, Boolean>>();
blackList.add(new Tuple2<String, Boolean>("leo", true));
//注意,定义时需要添加final关键字
final JavaPairRDD<String, Boolean> blackRDD =
jsc.sparkContext().parallelizePairs(blackList);
JavaReceiverInputDStream<String> linesDtream =
jsc.socketTextStream("tgmaster", 9999);
//日志格式:date username,比如:2016-11-07 jack
//(jack,2016-11-07 jack)
JavaPairDStream<String, String> mapToPairDtream = linesDtream.mapToPair(new PairFunction<String, String, String>() {
private static final long serialVersionUID = 1L;
public Tuple2<String, String> call(String line) throws Exception {
return new Tuple2<String, String>(line.split(" ")[1], line);
}
});
//实时黑名单过滤
JavaDStream<String> resultDtream = mapToPairDtream.transform(new Function<JavaPairRDD<String,String>, JavaRDD<String>>() {
private static final long serialVersionUID = 1L;
public JavaRDD<String> call(JavaPairRDD<String, String> userClickLogRDD)
throws Exception {
/**
* 在此处,我们使用leftOuterJoin左外连接的方式进行join
* 左外连接之后的结果中既有黑名单人员,又有非黑名单人员
*/
JavaPairRDD<String, Tuple2<String, Optional<Boolean>>> joinRDD =
userClickLogRDD.leftOuterJoin(blackRDD);
/**
* 接下来进行filter过滤操作
* 将黑名单用户过滤出来
*/
JavaPairRDD<String, Tuple2<String, Optional<Boolean>>> filterRDD = joinRDD.filter(new Function<Tuple2<String,Tuple2<String,Optional<Boolean>>>, Boolean>() {
private static final long serialVersionUID = 1L;
public Boolean call(
Tuple2<String, Tuple2<String, Optional<Boolean>>> log)
throws Exception {
if(log._2._2.isPresent() && log._2._2.get()){
return false; //此时是一个黑名单用户
}
return true; //非黑名单用户
}
});
//找出非黑名单日志中的用户名username
JavaRDD<String> mapRDD = filterRDD.map(new Function<Tuple2<String,Tuple2<String,Optional<Boolean>>>, String>() {
private static final long serialVersionUID = 1L;
public String call(
Tuple2<String, Tuple2<String, Optional<Boolean>>> log)
throws Exception {
return log._2._1; //返回用名username的日志
}
});
return mapRDD;
}
});
resultDtream.print();
jsc.start();
jsc.awaitTermination();
jsc.close();
}
}
0 0
- transform实现广告计费日志实时黑名单过滤(Java版本)
- transform实现广告计费日志实时黑名单过滤(Scala版本)
- SparkStreaming 实现广告计费系统中在线黑名单过滤实战
- SparkStreaming 实现广告计费系统中在线黑名单过滤实战
- Spark 实现黑名单实时过滤
- 大数据IMF传奇行动绝密课程第94课:SparkStreaming实现广告计费系统中在线黑名单过滤实战
- Spark-Streaming之transform操作,实时黑名单过滤案例
- 12.transform以及实时黑名单过滤案例实战
- 用spark streaming实现黑名单实时过滤
- sparkstreaming实现过滤黑名单
- Spark(黑名单过滤)
- 第106课: Spark Streaming电商广告点击综合案例黑名单过滤实现
- 第106讲: Spark Streaming电商广告点击综合案例黑名单过滤实现
- 广告计费
- 第108课: Spark Streaming电商广告点击综合案例动态黑名单过滤真正的实现代码
- 第108讲: Spark Streaming电商广告点击综合案例动态黑名单过滤真正的实现代码
- Spark-Spark Streaming-广告点击的在线黑名单过滤
- 大数据Spark “蘑菇云”行动第57课: Spark 2.0.1稳定版本解析及广告点击案例数据库和动态黑名单过滤代码
- 斐波那契数列
- Android 源码编译时collect2: ld terminated with signal 9 [Killed] 错误的处理
- android 使用shape自定义圆角矩形
- Android_Note(三)——添加、更新、保存记事本功能
- mac下卸载软件
- transform实现广告计费日志实时黑名单过滤(Java版本)
- 硬件设计中的30个错误想法与原因分析
- fork创建子进程
- 【置顶】个人介绍
- Android 特效库 - 下拉刷新(二)
- Mongodb 中 _id (ObjectId) 设计思路
- JavaMail - javax.mail.MessagingException 解决java邮件有时候发送失败的错误
- a.toString()和Arrays.toString(a)的区别
- SpringMVC上传入门