kafka中写入avro数据
来源:互联网 发布:如何建大数据系统 编辑:程序博客网 时间:2024/05/17 09:17
import java.io.ByteArrayOutputStream;import java.util.ArrayList;import java.util.List;import java.util.Properties;import java.util.concurrent.TimeUnit;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.apache.avro.generic.GenericData;import org.apache.avro.generic.GenericDatumWriter;import org.apache.avro.generic.GenericRecord;import org.apache.avro.io.BinaryEncoder;import org.apache.avro.io.EncoderFactory;import com.cnpc.soc.avro.Log;import kafka.javaapi.producer.Producer;import kafka.producer.KeyedMessage;import kafka.producer.ProducerConfig;public class kafkaProducer2 extends Thread {private String topic;public kafkaProducer2(String topic) {super();this.topic = topic;}@Overridepublic void run() {Producer<String, byte[]> producer = createProducer();while (true) {String regex = "^([0-9.]+)\\s([\\w.-]+)\\s([\\w.-]+)\\s(\\[[^\\[\\]]+\\])\\s\"((?:[^\"]|\\\")+)\"\\s(\\d{3})\\s(\\d+|-)\\s\"((?:[^\"]|\\\")+)\"\\s\"((?:[^\"]|\\\")+)\"$";List<String> list = TextFile.readToList("access.log");Pattern pattern = Pattern.compile(regex);List<KeyedMessage<String, byte[]>> list1 = new ArrayList<KeyedMessage<String, byte[]>>();for (String s : list) {Matcher matcher = pattern.matcher(s);if (matcher.find()) {String ip = matcher.group(1);String identity = matcher.group(2);String userid = matcher.group(3);String time = matcher.group(4);String requestInfo = matcher.group(5);String state = matcher.group(6);String responce = matcher.group(7);String referer = matcher.group(8);String useragent = matcher.group(9);GenericRecord record = new GenericData.Record(Log.getClassSchema());record.put("ip", ip);record.put("identity", identity);record.put("userid", userid);record.put("time", time);record.put("requestInfo", requestInfo);record.put("state", state);record.put("responce", responce);record.put("referer", referer);record.put("useragent", useragent);try {byte[] serializedValue = serializeEvent(record);list1.add(new KeyedMessage<String, byte[]>(topic, serializedValue));} catch (Exception e) {e.printStackTrace();}}}producer.send(list1);try {TimeUnit.SECONDS.sleep(1);} catch (InterruptedException e) {e.printStackTrace();}}}protected byte[] serializeEvent(GenericRecord record) throws Exception {ByteArrayOutputStream bos = null;try {bos = new ByteArrayOutputStream();BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(bos, null);GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());writer.write(record, encoder);encoder.flush();byte[] serializedValue = bos.toByteArray();return serializedValue;} catch (Exception ex) {throw ex;} finally {if (bos != null) {try {bos.close();} catch (Exception e) {bos = null;}}}}private Producer<String, byte[]> createProducer() {Properties properties = new Properties();properties.put("zookeeper.connect", "xxx.xxx.xxx:2181,xxx.xxx.xxx:2181,xxx.xxx.xxx:2181");// 声明zkproperties.put("metadata.broker.list", "xxx.xxx.xxx:6667,xxx.xxx.xxx:6667,xxx.xxx.xxx:6667");// 声明kafka brokerreturn new Producer<String, byte[]>(new ProducerConfig(properties));}public static void main(String[] args) {new kafkaProducer2("test_log_2").start();}}
0 0
- kafka中写入avro数据
- Kafka 生产消费 Avro 序列化数据
- 从kafka中获取数据写入到redis中
- kafka Confluent数据写入BUG
- spark向kafka写入数据
- Avro kafka(Producer-Consumer)
- Avro技术应用_5. 利用 Camus 来将 Avro 数据从 Kafka 拷贝到 HDFS -- 待完善
- flume读取日志数据写入kafka
- flume读取日志数据写入kafka
- rdkafka(kafka C lib) 数据写入流程
- kafka producer无法将数据写入broker
- Spark Streaming 读取Kafka数据写入Elasticsearch
- flume读取日志数据写入kafka 然后kafka+storm整合
- Avro技术应用_4. 在 Pig 中实现对 Avro 数据的读写
- Avro技术应用_7. 将 Avro 数据读写到一个 Parquet 文件中 -- 待完善
- Avro技术应用_12. 将 Avro 数据加载到 Spark 中
- Avro 向已存在的文件中append数据
- Avro数据序列化
- 小算法--数组中元素的移动
- 一台电脑上存多个Java版本时,多个Tomcat启动窗口闪退而服务没有启动的情况
- Java8 的lambda 和 Stream
- UVA 1625 Color Length
- Spring MVC多个文件上传
- kafka中写入avro数据
- 1
- mongo vue 批量删除表数据
- UAC遭禁用?UAC知识了解
- Swift - 根据图片URL获取图片的大小
- Android Activity生命周期 博客链接
- 实战篇:案例解析-如何对企业网站SEO优化把脉?
- 哈大神
- 程序自杀