[Kafka]

来源:互联网 发布:关联规则算法图 编辑:程序博客网 时间:2024/06/05 02:07

根据业务需要可以使用Kafka提供的Java Producer API进行产生数据,并将产生的数据发送到Kafka对应Topic的对应分区中,入口类为:Producer

Kafka的Producer API主要提供下列三个方法:

  public void send(KeyedMessage<K,V> message) 发送单条数据到Kafka集群

  public void send(List<KeyedMessage<K,V>> messages) 发送多条数据(数据集)到Kafka集群

  public void close() 关闭Kafka连接资源

======================================================================

一、JavaKafkaProducerPartitioner:自定义的数据分区器,功能是:决定输入的key/value键值对的message发送到Topic的那个分区中,返回分区id,范围:[0,分区数量); 这里的实现比较简单,根据key中的数字决定分区的值。具体代码如下:

复制代码
import kafka.producer.Partitioner;import kafka.utils.VerifiableProperties;/** * Created by gerry on 12/21. */public class JavaKafkaProducerPartitioner implements Partitioner {    /**     * 无参构造函数     */    public JavaKafkaProducerPartitioner() {        this(new VerifiableProperties());    }    /**     * 构造函数,必须给定     *     * @param properties 上下文     */    public JavaKafkaProducerPartitioner(VerifiableProperties properties) {        // nothings    }    @Override    public int partition(Object key, int numPartitions) {        int num = Integer.valueOf(((String) key).replaceAll("key_", "").trim());        return num % numPartitions;    }}
复制代码

 

二、 JavaKafkaProducer:通过Kafka提供的API进行数据产生操作的测试类;具体代码如下:

复制代码
import kafka.javaapi.producer.Producer;import kafka.producer.KeyedMessage;import kafka.producer.ProducerConfig;import org.apache.log4j.Logger;import java.util.Properties;import java.util.concurrent.ExecutorService;import java.util.concurrent.Executors;import java.util.concurrent.TimeUnit;import java.util.concurrent.atomic.AtomicBoolean;import java.util.concurrent.ThreadLocalRandom;/** * Created by gerry on 12/21. */public class JavaKafkaProducer {    private Logger logger = Logger.getLogger(JavaKafkaProducer.class);    public static final String TOPIC_NAME = "test";    public static final char[] charts = "qazwsxedcrfvtgbyhnujmikolp1234567890".toCharArray();    public static final int chartsLength = charts.length;    public static void main(String[] args) {        String brokerList = "192.168.187.149:9092";        brokerList = "192.168.187.149:9092,192.168.187.149:9093,192.168.187.149:9094,192.168.187.149:9095";        brokerList = "192.168.187.146:9092";        Properties props = new Properties();        props.put("metadata.broker.list", brokerList);        /**         * 0表示不等待结果返回<br/>         * 1表示等待至少有一个服务器返回数据接收标识<br/>         * -1表示必须接收到所有的服务器返回标识,及同步写入<br/>         * */        props.put("request.required.acks", "0");        /**         * 内部发送数据是异步还是同步         * sync:同步, 默认         * async:异步         */        props.put("producer.type", "async");        /**         * 设置序列化的类         * 可选:kafka.serializer.StringEncoder         * 默认:kafka.serializer.DefaultEncoder         */        props.put("serializer.class", "kafka.serializer.StringEncoder");        /**         * 设置分区类         * 根据key进行数据分区         * 默认是:kafka.producer.DefaultPartitioner ==> 安装key的hash进行分区         * 可选:kafka.serializer.ByteArrayPartitioner ==> 转换为字节数组后进行hash分区         */        props.put("partitioner.class", "JavaKafkaProducerPartitioner");        // 重试次数        props.put("message.send.max.retries", "3");        // 异步提交的时候(async),并发提交的记录数        props.put("batch.num.messages", "200");        // 设置缓冲区大小,默认10KB        props.put("send.buffer.bytes", "102400");        // 2. 构建Kafka Producer Configuration上下文        ProducerConfig config = new ProducerConfig(props);        // 3. 构建Producer对象        final Producer<String, String> producer = new Producer<String, String>(config);        // 4. 发送数据到服务器,并发线程发送        final AtomicBoolean flag = new AtomicBoolean(true);        int numThreads = 50;        ExecutorService pool = Executors.newFixedThreadPool(numThreads);        for (int i = 0; i < 5; i++) {            pool.submit(new Thread(new Runnable() {                @Override                public void run() {                    while (flag.get()) {                        // 发送数据                        KeyedMessage message = generateKeyedMessage();                        producer.send(message);                        System.out.println("发送数据:" + message);                        // 休眠一下                        try {                            int least = 10;                            int bound = 100;                            Thread.sleep(ThreadLocalRandom.current().nextInt(least, bound));                        } catch (InterruptedException e) {                            e.printStackTrace();                        }                    }                    System.out.println(Thread.currentThread().getName() + " shutdown....");                }            }, "Thread-" + i));        }        // 5. 等待执行完成        long sleepMillis = 600000;        try {            Thread.sleep(sleepMillis);        } catch (InterruptedException e) {            e.printStackTrace();        }        flag.set(false);        // 6. 关闭资源        pool.shutdown();        try {            pool.awaitTermination(6, TimeUnit.SECONDS);        } catch (InterruptedException e) {        } finally {            producer.close(); // 最后之后调用        }    }    /**     * 产生一个消息     *     * @return     */    private static KeyedMessage<String, String> generateKeyedMessage() {        String key = "key_" + ThreadLocalRandom.current().nextInt(10, 99);        StringBuilder sb = new StringBuilder();        int num = ThreadLocalRandom.current().nextInt(1, 5);        for (int i = 0; i < num; i++) {            sb.append(generateStringMessage(ThreadLocalRandom.current().nextInt(3, 20))).append(" ");        }        String message = sb.toString().trim();        return new KeyedMessage(TOPIC_NAME, key, message);    }    /**     * 产生一个给定长度的字符串     *     * @param numItems     * @return     */    private static String generateStringMessage(int numItems) {        StringBuilder sb = new StringBuilder();        for (int i = 0; i < numItems; i++) {            sb.append(charts[ThreadLocalRandom.current().nextInt(chartsLength)]);        }        return sb.toString();    }}
复制代码

 

三、Pom.xml依赖配置如下

复制代码
<properties>    <kafka.version>0.8.2.1</kafka.version></properties><dependencies>    <dependency>        <groupId>org.apache.kafka</groupId>        <artifactId>kafka_2.10</artifactId>        <version>${kafka.version}</version>    </dependency></dependencies>
复制代码