hadoop矩阵乘法 仅用一个map/reduce

来源:互联网 发布:光子嫩肤 知乎 编辑:程序博客网 时间:2024/06/07 00:22
package com.matrix;
import java.io.BufferedReader;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.util.*;
/**************************************
 * 
 * @author  Lincolnfather
 * @date    20131107
 * hadoop  矩阵乘法
 * 在http://hadoop.nchc.org.tw/
 * 测试通过
 *
 */
public class MatrixMR {
public static class MatrixMapper extends Mapper<Object,Text,MatrixPair,IntWritable>{
private List<List<Integer>> cache = new ArrayList< List<Integer>>();
private int i = 0;
public void setup(Context context){
BufferedReader br = null;
try {
Path[] cacheFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
System.out.println(cacheFiles== null);
br = 
new BufferedReader(new InputStreamReader(new FileInputStream(cacheFiles[0].toString())));
String line = null;
while((line = br.readLine())!= null){
String[] strs = line.split("#");
List<Integer> is = new ArrayList<Integer>();
for(String str :strs){
is.add(Integer.parseInt(str));
}
 cache.add(is);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
try {
if(br != null) br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void map(Object key,Text value,Context context){
String[] vals = value.toString().split("#");

if(i<cache.get(0).size()){ 
for(int j = 0;j<cache.size();j++){
   for(int k = 0;k<vals.length;k++){
try {
MatrixPair mp = new MatrixPair();
mp.setRowindex(j);
mp.setColumnindex(k);
context.write(mp, 
new IntWritable(cache.get(j).get(i)*Integer.parseInt(vals[k])));
} catch (NumberFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}  
      }
  }
 }
i++;
}
}
public static class MatrixReducer extends Reducer<MatrixPair,IntWritable,Text,Text>{
private StringBuffer sb = new StringBuffer();
public void reduce(MatrixPair key,Iterable<IntWritable> value,Context context){
int sum = 0;  
for(IntWritable i:value){
sum += i.get(); 
}
   try {
    sb.append(sum);
              if(key.getColumnindex() == 4){
context.write(new Text(sb.toString()), new Text(""));
sb = new StringBuffer();
              }else
sb.append("#");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}


public static class MatrixPair  implements WritableComparable<MatrixPair> {
    Integer rowindex;
    Integer columnindex;
@Override
public void readFields(DataInput read) throws IOException {
// TODO Auto-generated method stub
rowindex = read.readInt();
columnindex = read.readInt();
}
   
public void set(){
    setRowindex(rowindex);
    setColumnindex(columnindex);
    }
@Override
public void write(DataOutput write) throws IOException {
// TODO Auto-generated method stub
write.writeInt(rowindex);
write.writeInt(columnindex);
}
@Override
public int compareTo(MatrixPair o) { 
return  rowindex - o.getRowindex() != 0? 
rowindex - o.getRowindex():
columnindex - o.getColumnindex() != 0?
columnindex - o.getColumnindex():0;
}
public Integer getRowindex() {
return rowindex;
}
public void setRowindex(Integer rowindex) {
this.rowindex = rowindex;
}


public Integer getColumnindex() {
return columnindex;
}


public void setColumnindex(Integer columnindex) {
this.columnindex = columnindex;
}
}
    public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{
    Configuration conf = new Configuration();
     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
     if (otherArgs.length != 2) {
       System.err.println("matrix multi    <in> <out>");
       System.exit(2);
     }
     DistributedCache.addCacheFile(new Path("hdfs://hadoop.nchc.org.tw/user/h3969/matrix1.txt").toUri(), conf);
     Job job = new Job(conf, "my matrix multiply");
     job.setJarByClass(MatrixMR.class);
     job.setMapperClass(MatrixMapper.class);
   //  job.setCombinerClass(FirstReducer.class);
     job.setReducerClass(MatrixReducer.class);
     job.setMapOutputKeyClass(MatrixPair.class);
     job.setMapOutputValueClass(IntWritable.class);
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(Text.class);
     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
     
     System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

数据:matrix1.txt

3#4#1
2#4#8
1#2#7
5#6#5

数据matrix2.txt:

5#8#7#4#9
2#6#5#4#2
9#1#3#4#7


本 算法适合数亿行以下的一个矩阵和另一个不限规模的矩阵想成,如果想实现两个不限规模的矩阵相乘则需要在数据加入DistributeCache之前切分