Hadoop自定义可序列化的类

来源：互联网发布：php统计字符串长度编辑：程序博客网时间：2024/05/18 13:45

在hadoop框架中实现自定义类可以被序列化。

[java] view plaincopy
package com.rpc.nefu;  
  
import java.io.DataInput;  
import java.io.DataOutput;  
import java.io.IOException;  
  
import org.apache.hadoop.io.IntWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.io.WritableComparable;  
  
//自定义的序列化方法，只需要实现WritableComparable接口,重写一些方法即可 主要就是 readFields() write() compareTo()  
public class personWritable implements WritableComparable<personWritable>{  
    private Text name;  
    private IntWritable age;  
    private Text sex;  
    public personWritable(){  
        set("name",-1,"female");  
    }  
    public personWritable(String _name,int _age,String _sex){  
        set(_name,_age,_sex);  
    }  
    public void set(String name,int age,String sex){  
        this.name = new Text(name);  
        this.age = new IntWritable(age);  
        this.sex = new Text(sex);  
    }  
    //反序列化过程，将datainput的内容还原为hadoop对象  
    @Override  
    public void readFields(DataInput in) throws IOException {  
        // TODO Auto-generated method stub  
        name.readFields(in);  
        age.readFields(in);  
        sex.readFields(in);  
    }  
    //序列化过程  
    @Override  
    public void write(DataOutput out) throws IOException {  
        // TODO Auto-generated method stub  
        name.write(out);  
        age.write(out);  
        sex.write(out);  
    }  
  
    @Override  
    public int compareTo(personWritable other) {  
        // TODO Auto-generated method stub  
        int cmp1 = name.compareTo(other.name);  
        if(cmp1!=0){  
            return cmp1;  
        }  
        int cmp2 = age.compareTo(other.age);  
        if(cmp2!=0){  
            return cmp2;  
        }  
        int cmp3 = sex.compareTo(other.sex);  
        return cmp3;  
    }  
    //判断是否相等  
    public boolean equals(Object o){  
        if(o instanceof personWritable){  
            personWritable pw = (personWritable) o;  
            return name.equals(pw.name)&&age.equals(pw.age)&&sex.equals(pw.sex);  
        }  
        return false;  
    }  
    //哈希值  
    public int hashCode(){  
        return name.hashCode()*3+age.hashCode()*5+sex.hashCode()*7;  
    }  
    public String toString(){  
        StringBuffer sb = new StringBuffer();  
        sb.append("--");  
        sb.append("姓名:"+name+"_");  
        sb.append("年龄:"+age+"_");  
        sb.append("性别:"+sex+"_");  
        sb.append("--");  
        return sb.toString();     
    }  
}  

[java] view plaincopy
package com.rpc.nefu;  
  
import java.io.ByteArrayInputStream;  
import java.io.ByteArrayOutputStream;  
import java.io.DataInputStream;  
import java.io.DataOutputStream;  
import java.io.IOException;  
  
//import org.apache.hadoop.io.Writable;  
  
//将序列化的对象的内容返回到一个字节数组中去 记录序列的过程  
public class hadoopSerializable {  
    public static byte[] serialize(personWritable writable) throws IOException{  
        //创建一个字节数组  
        ByteArrayOutputStream out = new ByteArrayOutputStream();  
        //创建一个DataOutputStream,将字节数组传递进去，保存输出的序列化后的内容  
        DataOutputStream dataout =  new DataOutputStream(out);   
        //让参数的Hadoop对象序列化到字节流中   
        writable.write(dataout);   
        dataout.close();   
        //返回序列化后的字节流   
        return out.toByteArray();   
    }  
      
           /** 
            *这个方法用于反序列化一个字节数组成Hadoop Writable对象  
            *@param writable 反序列化后的Writable对象存放在这个参数中  
            *@param bytes 被反序列化的字节数组 对应于上面序列化的bytes 
            **/  
public static void deserialize(personWritable writable,byte[] bytes) throws Exception{   
          
        ByteArrayInputStream in = new ByteArrayInputStream(bytes);   
        //创建一个DataInputStream   
        DataInputStream datain = new DataInputStream(in);   
        //让Hadoop框架反序列化这个字节数组，还原后的Writable对象存放到第一个参数中   
        writable.readFields(datain);   
          
        datain.close();   
    }   
}  

[java] view plaincopy
package com.rpc.nefu;  
  
import org.apache.hadoop.util.StringUtils;  
  
public class serializeTest {  
    public static void main(String [] args) throws Exception{   
          
        /*把我们自定义的Hadoop可序列化对象进行序列化 */  
        System.out.println("Hadoop--对象序列化");   
        personWritable pw = new personWritable("XD",23,"Male");   
        String imformation= "自定义可序列化Hadoop类型为： "+pw.getClass().getName()+"\n";   
        String primaryPersonWritableInfo = "序列化前对象为：  "+pw.toString()+"\n";   
        //开始序列化过程   
        byte[] serializedValue =hadoopSerializable.serialize(pw);   
        String lengthInfo= "序列化后的字节数组长度为： "+serializedValue.length+"\n";   
        String serializeValueInfo= "序列化后的值为: " +StringUtils.byteToHexString(serializedValue)+"\n";   
   
        System.out.println(imformation+primaryPersonWritableInfo+lengthInfo+serializeValueInfo+"\n");   
           
        System.out.println();   
        //把我们序列化之后的字节数组反序列化为原始Hadoop对象   
        System.out.println("反序列化--Hadoop");   
        personWritable reversePersonWritable = new personWritable();   
        /*StringUtils.byteToHexString 类似将自己数组转化为字符串*/  
        String originalByteArrayInfo="被反序列化的字节数组内容为： "+StringUtils.byteToHexString(serializedValue)+"\n";  
        //开始反序列化过程   
        hadoopSerializable.deserialize(reversePersonWritable, serializedValue);   
        String restoredValueInfo = "反序列化之后的Writable对象为: "+reversePersonWritable.toString();   
        System.out.println(originalByteArrayInfo+restoredValueInfo+"\n");   
    }   
}  

0 0