【Apache Avro】quick start快速开始,示例教程

来源:互联网 发布:linux shell exit 1 编辑:程序博客网 时间:2024/05/01 15:01

本教程涉及Avro的安装和示例代码的运行,avro与thrift有许多相似之处,可以参考alidata的这篇文章:http://www.alidata.org/archives/1307

本教程是参照Apache avro官网的quick start写的:http://avro.apache.org/docs/current/gettingstartedjava.html

1.下载avro-tools-1.7.5.jar

http://mirrors.cnnic.cn/apache/avro/avro-1.7.5/java/

2.使用avro-tools生成java代码

编写avro示例user.avrc

{"namespace": "example.avro", "type": "record", "name": "User", "fields": [     {"name": "name", "type": "string"},     {"name": "favorite_number",  "type": ["int", "null"]},     {"name": "favorite_color", "type": ["string", "null"]} ]}
在user.avrc所在的目录下执行:

java -jar /path/to/avro-tools-1.7.5.jar compile schema user.avsc .
将在该目录下生成./example/avro/User.java程序
3.maven构建eclipse工程编写java程序序列化和反序列化avro文件
maven的pom.xml文件如下:
<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  <modelVersion>4.0.0</modelVersion>  <groupId>com.iflytek.cpcloud</groupId>  <artifactId>avro-test</artifactId>  <version>0.1.0-SNAPSHOT</version>  <dependencies>    <dependency>      <groupId>log4j</groupId>      <artifactId>log4j</artifactId>      <version>1.2.17</version>    </dependency>    <dependency>      <groupId>commons-logging</groupId>      <artifactId>commons-logging</artifactId>      <version>1.1.1</version>    </dependency>    <dependency>      <groupId>commons-cli</groupId>      <artifactId>commons-cli</artifactId>      <version>1.2</version>    </dependency>    <dependency>      <groupId>junit</groupId>      <artifactId>junit</artifactId>      <version>4.11</version>    </dependency><dependency>  <groupId>org.apache.avro</groupId>      <artifactId>avro</artifactId>    <version>1.7.5</version></dependency>  </dependencies>  <build>    <plugins>      <plugin><artifactId>maven-assembly-plugin</artifactId><version>2.2-beta-5</version><configuration>  <descriptorRefs>    <descriptorRef>jar-with-dependencies</descriptorRef>  </descriptorRefs></configuration>      </plugin>      <plugin><groupId>org.apache.maven.plugins</groupId><artifactId>maven-compiler-plugin</artifactId><version>2.3.2</version><configuration>  <source>1.6</source>  <target>1.6</target>  <encoding>UTF-8</encoding></configuration>      </plugin>    </plugins>  </build></project>

mvn构建eclipse工程
mvn eclipse:eclipse
在example.avro包中导入生成的上面使用avro-tools生成的User.java文件
将生成的工程导入的eclipse中
编写程序序列化和反序列号avro文件Test.java
package example.avro;import java.io.File;import java.io.IOException;import org.apache.avro.file.DataFileReader;import org.apache.avro.file.DataFileWriter;import org.apache.avro.io.DatumReader;import org.apache.avro.io.DatumWriter;import org.apache.avro.specific.SpecificDatumReader;import org.apache.avro.specific.SpecificDatumWriter;public class Test {   public static void main(String[] args) throws IOException {   code();    decode();  }  public static void code() throws IOException{    User user1 = new User();    user1.setName("Alyssa");    user1.setFavoriteNumber(256);    // Leave favorite color null        // Alternate constructor    User user2 = new User("Ben", 7, "red");        // Construct via builder    User user3 = User.newBuilder().setName("Charlie").setFavoriteColor("blue")        .setFavoriteNumber(null).build();    User user4 = new User("Jimmy", 7, "yellow");    // Serialize user1 and user2 to disk    File file = new File("users.avro");    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(        User.class);    DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(        userDatumWriter);    dataFileWriter.create(user1.getSchema(), new File("users.avro"));    dataFileWriter.append(user1);    dataFileWriter.append(user2);    dataFileWriter.append(user3);    dataFileWriter.append(user4);    dataFileWriter.close();  }    public static void decode() throws IOException{ // Deserialize Users from disk    DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class);    File file = new File("users.avro");    DataFileReader<User> dataFileReader = new DataFileReader<User>(file , userDatumReader);    User user = null;    while (dataFileReader.hasNext()) {    // Reuse user object by passing it to next(). This saves us from    // allocating and garbage collecting many objects for files with    // many items.    user = dataFileReader.next(user);    System.out.println(user);    }  }}

运行结果如下:
{"name": "Alyssa", "favorite_number": 256, "favorite_color": null}{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"}{"name": "Jimmy", "favorite_number": 7, "favorite_color": "yellow"}

会在eclipse工程的根目录下生成users.avro文件


3 0