KMeans聚类算法Hadoop实现

来源：互联网发布：socket多线程编程编辑：程序博客网时间：2024/06/06 02:00
原文地址：http://blog.csdn.net/jdplus/article/details/23960127/
Assistance.java 辅助类，功能详见注释
01.package KMeans;  02.  03.import org.apache.hadoop.conf.Configuration;  04.import org.apache.hadoop.fs.FSDataInputStream;  05.import org.apache.hadoop.fs.FSDataOutputStream;  06.import org.apache.hadoop.fs.FileSystem;  07.import org.apache.hadoop.fs.Path;  08.import org.apache.hadoop.io.Text;  09.import org.apache.hadoop.util.LineReader;  10.  11.import java.io.IOException;  12.import java.util.*;  13.  14.public class Assistance {  15.    //读取聚类中心点信息：聚类中心ID、聚类中心点  16.    public static List<ArrayList<Float>> getCenters(String inputpath){  17.        List<ArrayList<Float>> result = new ArrayList<ArrayList<Float>>();  18.        Configuration conf = new Configuration();  19.        try {  20.            FileSystem hdfs = FileSystem.get(conf);  21.            Path in = new Path(inputpath);  22.            FSDataInputStream fsIn = hdfs.open(in);  23.            LineReader lineIn = new LineReader(fsIn, conf);  24.            Text line = new Text();  25.            while (lineIn.readLine(line) > 0){  26.                String record = line.toString();  27.                /* 28.                因为Hadoop输出键值对时会在键跟值之间添加制表符， 29.                所以用空格代替之。 30.                */  31.                String[] fields = record.replace("\t", " ").split(" ");  32.                List<Float> tmplist = new ArrayList<Float>();  33.                for (int i = 0; i < fields.length; ++i){  34.                    tmplist.add(Float.parseFloat(fields[i]));  35.                }  36.                result.add((ArrayList<Float>) tmplist);  37.            }  38.            fsIn.close();  39.        } catch (IOException e){  40.            e.printStackTrace();  41.        }  42.        return result;  43.    }  44.  45.    //删除上一次MapReduce作业的结果  46.    public static void deleteLastResult(String path){  47.        Configuration conf = new Configuration();  48.        try {  49.            FileSystem hdfs = FileSystem.get(conf);  50.            Path path1 = new Path(path);  51.            hdfs.delete(path1, true);  52.        } catch (IOException e){  53.            e.printStackTrace();  54.        }  55.    }  56.    //计算相邻两次迭代结果的聚类中心的距离，判断是否满足终止条件  57.    public static boolean isFinished(String oldpath, String newpath, int k, float threshold)  58.    throws IOException{  59.        List<ArrayList<Float>> oldcenters = Assistance.getCenters(oldpath);  60.        List<ArrayList<Float>> newcenters = Assistance.getCenters(newpath);  61.        float distance = 0;  62.        for (int i = 0; i < k; ++i){  63.            for (int j = 1; j < oldcenters.get(i).size(); ++j){  64.                float tmp = Math.abs(oldcenters.get(i).get(j) - newcenters.get(i).get(j));  65.                distance += Math.pow(tmp, 2);  66.            }  67.        }  68.        System.out.println("Distance = " + distance + " Threshold = " + threshold);  69.        if (distance < threshold)  70.            return true;  71.        /* 72.        如果不满足终止条件，则用本次迭代的聚类中心更新聚类中心 73.        */  74.        Assistance.deleteLastResult(oldpath);  75.        Configuration conf = new Configuration();  76.        FileSystem hdfs = FileSystem.get(conf);  77.        hdfs.copyToLocalFile(new Path(newpath), new Path("/home/hadoop/class/oldcenter.data"));  78.        hdfs.delete(new Path(oldpath), true);  79.        hdfs.moveFromLocalFile(new Path("/home/hadoop/class/oldcenter.data"), new Path(oldpath));  80.        return false;  81.    }  82.}  KMeansDriver.java 作业驱动类 [java] view plain copy 在CODE上查看代码片派生到我的代码片01.package KMeans;  02.  03.import org.apache.hadoop.conf.Configuration;  04.import org.apache.hadoop.fs.FileSystem;  05.import org.apache.hadoop.fs.Path;  06.import org.apache.hadoop.io.IntWritable;  07.import org.apache.hadoop.io.Text;  08.import org.apache.hadoop.mapreduce.Job;  09.import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  10.import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  11.import org.apache.hadoop.util.GenericOptionsParser;  12.  13.import java.io.IOException;  14.  15.public class KMeansDriver{  16.    public static void main(String[] args) throws Exception{  17.        int repeated = 0;  18.  19.        /* 20.        不断提交MapReduce作业指导相邻两次迭代聚类中心的距离小于阈值或到达设定的迭代次数 21.        */  22.        do {  23.            Configuration conf = new Configuration();  24.            String[] otherArgs  = new GenericOptionsParser(conf, args).getRemainingArgs();  25.            if (otherArgs.length != 6){  26.                System.err.println("Usage: <int> <out> <oldcenters> <newcenters> <k> <threshold>");  27.                System.exit(2);  28.            }  29.            conf.set("centerpath", otherArgs[2]);  30.            conf.set("kpath", otherArgs[4]);  31.            Job job = new Job(conf, "KMeansCluster");//新建MapReduce作业  32.            job.setJarByClass(KMeansDriver.class);//设置作业启动类  33.  34.            Path in = new Path(otherArgs[0]);  35.            Path out = new Path(otherArgs[1]);  36.            FileInputFormat.addInputPath(job, in);//设置输入路径  37.            FileSystem fs = FileSystem.get(conf);  38.            if (fs.exists(out)){//如果输出路径存在，则先删除之  39.                fs.delete(out, true);  40.            }  41.            FileOutputFormat.setOutputPath(job, out);//设置输出路径  42.  43.            job.setMapperClass(KMeansMapper.class);//设置Map类  44.            job.setReducerClass(KMeansReducer.class);//设置Reduce类  45.  46.            job.setOutputKeyClass(IntWritable.class);//设置输出键的类  47.            job.setOutputValueClass(Text.class);//设置输出值的类  48.  49.            job.waitForCompletion(true);//启动作业  50.  51.            ++repeated;  52.            System.out.println("We have repeated " + repeated + " times.");  53.         } while (repeated < 10 && (Assistance.isFinished(args[2], args[3], Integer.parseInt(args[4]), Float.parseFloat(args[5])) == false));  54.        //根据最终得到的聚类中心对数据集进行聚类  55.        Cluster(args);  56.    }  57.    public static void Cluster(String[] args)  58.            throws IOException, InterruptedException, ClassNotFoundException{  59.        Configuration conf = new Configuration();  60.        String[] otherArgs  = new GenericOptionsParser(conf, args).getRemainingArgs();  61.        if (otherArgs.length != 6){  62.            System.err.println("Usage: <int> <out> <oldcenters> <newcenters> <k> <threshold>");  63.            System.exit(2);  64.        }  65.        conf.set("centerpath", otherArgs[2]);  66.        conf.set("kpath", otherArgs[4]);  67.        Job job = new Job(conf, "KMeansCluster");  68.        job.setJarByClass(KMeansDriver.class);  69.  70.        Path in = new Path(otherArgs[0]);  71.        Path out = new Path(otherArgs[1]);  72.        FileInputFormat.addInputPath(job, in);  73.        FileSystem fs = FileSystem.get(conf);  74.        if (fs.exists(out)){  75.            fs.delete(out, true);  76.        }  77.        FileOutputFormat.setOutputPath(job, out);  78.  79.        //因为只是将样本点聚类，不需要reduce操作，故不设置Reduce类  80.        job.setMapperClass(KMeansMapper.class);  81.  82.        job.setOutputKeyClass(IntWritable.class);  83.        job.setOutputValueClass(Text.class);  84.  85.        job.waitForCompletion(true);  86.    }  87.}   KMeansMapper.java [java] view plain copy 在CODE上查看代码片派生到我的代码片01.package KMeans;  02.  03.import org.apache.hadoop.io.IntWritable;  04.import org.apache.hadoop.io.LongWritable;  05.import org.apache.hadoop.io.Text;  06.import org.apache.hadoop.mapreduce.Mapper;  07.  08.import java.io.IOException;  09.import java.util.ArrayList;  10.import java.util.List;  11.  12.public class KMeansMapper extends Mapper<Object, Text, IntWritable, Text> {  13.    public void map(Object key, Text value, Context context)  14.    throws IOException, InterruptedException{  15.        String line = value.toString();  16.        String[] fields = line.split(" ");  17.        List<ArrayList<Float>> centers = Assistance.getCenters(context.getConfiguration().get("centerpath"));  18.        int k = Integer.parseInt(context.getConfiguration().get("kpath"));  19.        float minDist = Float.MAX_VALUE;  20.        int centerIndex = k;  21.        //计算样本点到各个中心的距离，并把样本聚类到距离最近的中心点所属的类  22.        for (int i = 0; i < k; ++i){  23.            float currentDist = 0;  24.            for (int j = 0; j < fields.length; ++j){  25.                float tmp = Math.abs(centers.get(i).get(j + 1) - Float.parseFloat(fields[j]));  26.                currentDist += Math.pow(tmp, 2);  27.            }  28.            if (minDist > currentDist){  29.                minDist = currentDist;  30.                centerIndex = i;  31.            }  32.        }  33.        context.write(new IntWritable(centerIndex), new Text(value));  34.    }  35.}   KMeansReducer.java[java] view plain copy 在CODE上查看代码片派生到我的代码片01.package KMeans;  02.  03.import org.apache.hadoop.io.IntWritable;  04.import org.apache.hadoop.io.Text;  05.import org.apache.hadoop.mapreduce.Reducer;  06.  07.import java.io.IOException;  08.import java.util.ArrayList;  09.import java.util.List;  10.  11.public class KMeansReducer extends Reducer<IntWritable, Text, IntWritable, Text> {  12.    public void reduce(IntWritable key, Iterable<Text> value, Context context)  13.    throws IOException, InterruptedException{  14.        List<ArrayList<Float>> assistList = new ArrayList<ArrayList<Float>>();  15.        String tmpResult = "";  16.        for (Text val : value){  17.            String line = val.toString();  18.            String[] fields = line.split(" ");  19.            List<Float> tmpList = new ArrayList<Float>();  20.            for (int i = 0; i < fields.length; ++i){  21.                tmpList.add(Float.parseFloat(fields[i]));  22.            }  23.            assistList.add((ArrayList<Float>) tmpList);  24.        }  25.        //计算新的聚类中心  26.        for (int i = 0; i < assistList.get(0).size(); ++i){  27.            float sum = 0;  28.            for (int j = 0; j < assistList.size(); ++j){  29.                sum += assistList.get(j).get(i);  30.            }  31.            float tmp = sum / assistList.size();  32.            if (i == 0){  33.                tmpResult += tmp;  34.            }  35.            else{  36.                tmpResult += " " + tmp;  37.            }  38.        }  39.        Text result = new Text(tmpResult);  40.        context.write(key, result);  41.    }  42.}   作业运行情况：[plain] view plain copy 在CODE上查看代码片派生到我的代码片01.hadoop@shaobo-ThinkPad-E420:~/class$ hadoop jar KMeans.jar KMeans.KMeansDriver input/iris.data output input/oldcenter.data output/part-r-00000 3 0.0001  02.14/04/17 16:15:50 INFO input.FileInputFormat: Total input paths to process : 1  03.14/04/17 16:15:51 INFO mapred.JobClient: Running job: job_201404171511_0012  04.14/04/17 16:15:52 INFO mapred.JobClient:  map 0% reduce 0%  05.14/04/17 16:16:07 INFO mapred.JobClient:  map 100% reduce 0%  06.14/04/17 16:16:19 INFO mapred.JobClient:  map 100% reduce 100%  07.14/04/17 16:16:24 INFO mapred.JobClient: Job complete: job_201404171511_0012  08.14/04/17 16:16:24 INFO mapred.JobClient: Counters: 25  09.14/04/17 16:16:24 INFO mapred.JobClient:   Job Counters   10.14/04/17 16:16:24 INFO mapred.JobClient:     Launched reduce tasks=1  11.14/04/17 16:16:24 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12041  12.14/04/17 16:16:24 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  13.14/04/17 16:16:24 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  14.14/04/17 16:16:24 INFO mapred.JobClient:     Launched map tasks=1  15.14/04/17 16:16:24 INFO mapred.JobClient:     Data-local map tasks=1  16.14/04/17 16:16:24 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10030  17.14/04/17 16:16:24 INFO mapred.JobClient:   File Output Format Counters   18.14/04/17 16:16:24 INFO mapred.JobClient:     Bytes Written=125  19.14/04/17 16:16:24 INFO mapred.JobClient:   FileSystemCounters  20.14/04/17 16:16:24 INFO mapred.JobClient:     FILE_BYTES_READ=3306  21.14/04/17 16:16:24 INFO mapred.JobClient:     HDFS_BYTES_READ=11214  22.14/04/17 16:16:24 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  23.14/04/17 16:16:24 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=125  24.14/04/17 16:16:24 INFO mapred.JobClient:   File Input Format Counters   25.14/04/17 16:16:24 INFO mapred.JobClient:     Bytes Read=2550  26.14/04/17 16:16:24 INFO mapred.JobClient:   Map-Reduce Framework  27.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce input groups=3  28.14/04/17 16:16:24 INFO mapred.JobClient:     Map output materialized bytes=3306  29.14/04/17 16:16:24 INFO mapred.JobClient:     Combine output records=0  30.14/04/17 16:16:24 INFO mapred.JobClient:     Map input records=150  31.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce shuffle bytes=0  32.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce output records=3  33.14/04/17 16:16:24 INFO mapred.JobClient:     Spilled Records=300  34.14/04/17 16:16:24 INFO mapred.JobClient:     Map output bytes=3000  35.14/04/17 16:16:24 INFO mapred.JobClient:     Combine input records=0  36.14/04/17 16:16:24 INFO mapred.JobClient:     Map output records=150  37.14/04/17 16:16:24 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  38.14/04/17 16:16:24 INFO mapred.JobClient:     Reduce input records=150  39.We have repeated 1 times.  40.Distance = 0.35025704 Threshold = 1.0E-4  41.14/04/17 16:16:24 INFO input.FileInputFormat: Total input paths to process : 1  42.14/04/17 16:16:25 INFO mapred.JobClient: Running job: job_201404171511_0013  43.14/04/17 16:16:26 INFO mapred.JobClient:  map 0% reduce 0%  44.14/04/17 16:16:40 INFO mapred.JobClient:  map 100% reduce 0%  45.14/04/17 16:16:52 INFO mapred.JobClient:  map 100% reduce 100%  46.14/04/17 16:16:57 INFO mapred.JobClient: Job complete: job_201404171511_0013  47.14/04/17 16:16:57 INFO mapred.JobClient: Counters: 25  48.14/04/17 16:16:57 INFO mapred.JobClient:   Job Counters   49.14/04/17 16:16:57 INFO mapred.JobClient:     Launched reduce tasks=1  50.14/04/17 16:16:57 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12077  51.14/04/17 16:16:57 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  52.14/04/17 16:16:57 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  53.14/04/17 16:16:57 INFO mapred.JobClient:     Launched map tasks=1  54.14/04/17 16:16:57 INFO mapred.JobClient:     Data-local map tasks=1  55.14/04/17 16:16:57 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10048  56.14/04/17 16:16:57 INFO mapred.JobClient:   File Output Format Counters   57.14/04/17 16:16:57 INFO mapred.JobClient:     Bytes Written=116  58.14/04/17 16:16:57 INFO mapred.JobClient:   FileSystemCounters  59.14/04/17 16:16:57 INFO mapred.JobClient:     FILE_BYTES_READ=3306  60.14/04/17 16:16:57 INFO mapred.JobClient:     HDFS_BYTES_READ=21414  61.14/04/17 16:16:57 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  62.14/04/17 16:16:57 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=116  63.14/04/17 16:16:57 INFO mapred.JobClient:   File Input Format Counters   64.14/04/17 16:16:57 INFO mapred.JobClient:     Bytes Read=2550  65.14/04/17 16:16:57 INFO mapred.JobClient:   Map-Reduce Framework  66.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce input groups=3  67.14/04/17 16:16:57 INFO mapred.JobClient:     Map output materialized bytes=3306  68.14/04/17 16:16:57 INFO mapred.JobClient:     Combine output records=0  69.14/04/17 16:16:57 INFO mapred.JobClient:     Map input records=150  70.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce shuffle bytes=3306  71.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce output records=3  72.14/04/17 16:16:57 INFO mapred.JobClient:     Spilled Records=300  73.14/04/17 16:16:57 INFO mapred.JobClient:     Map output bytes=3000  74.14/04/17 16:16:57 INFO mapred.JobClient:     Combine input records=0  75.14/04/17 16:16:57 INFO mapred.JobClient:     Map output records=150  76.14/04/17 16:16:57 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  77.14/04/17 16:16:57 INFO mapred.JobClient:     Reduce input records=150  78.We have repeated 2 times.  79.Distance = 0.006297064 Threshold = 1.0E-4  80.14/04/17 16:16:57 INFO input.FileInputFormat: Total input paths to process : 1  81.14/04/17 16:16:58 INFO mapred.JobClient: Running job: job_201404171511_0014  82.14/04/17 16:16:59 INFO mapred.JobClient:  map 0% reduce 0%  83.14/04/17 16:17:14 INFO mapred.JobClient:  map 100% reduce 0%  84.14/04/17 16:17:25 INFO mapred.JobClient:  map 100% reduce 100%  85.14/04/17 16:17:30 INFO mapred.JobClient: Job complete: job_201404171511_0014  86.14/04/17 16:17:30 INFO mapred.JobClient: Counters: 25  87.14/04/17 16:17:30 INFO mapred.JobClient:   Job Counters   88.14/04/17 16:17:30 INFO mapred.JobClient:     Launched reduce tasks=1  89.14/04/17 16:17:30 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12046  90.14/04/17 16:17:30 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  91.14/04/17 16:17:30 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  92.14/04/17 16:17:30 INFO mapred.JobClient:     Launched map tasks=1  93.14/04/17 16:17:30 INFO mapred.JobClient:     Data-local map tasks=1  94.14/04/17 16:17:30 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10051  95.14/04/17 16:17:30 INFO mapred.JobClient:   File Output Format Counters   96.14/04/17 16:17:30 INFO mapred.JobClient:     Bytes Written=116  97.14/04/17 16:17:30 INFO mapred.JobClient:   FileSystemCounters  98.14/04/17 16:17:30 INFO mapred.JobClient:     FILE_BYTES_READ=3306  99.14/04/17 16:17:30 INFO mapred.JobClient:     HDFS_BYTES_READ=20064  100.14/04/17 16:17:30 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48901  101.14/04/17 16:17:30 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=116  102.14/04/17 16:17:30 INFO mapred.JobClient:   File Input Format Counters   103.14/04/17 16:17:30 INFO mapred.JobClient:     Bytes Read=2550  104.14/04/17 16:17:30 INFO mapred.JobClient:   Map-Reduce Framework  105.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce input groups=3  106.14/04/17 16:17:30 INFO mapred.JobClient:     Map output materialized bytes=3306  107.14/04/17 16:17:30 INFO mapred.JobClient:     Combine output records=0  108.14/04/17 16:17:30 INFO mapred.JobClient:     Map input records=150  109.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce shuffle bytes=0  110.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce output records=3  111.14/04/17 16:17:30 INFO mapred.JobClient:     Spilled Records=300  112.14/04/17 16:17:30 INFO mapred.JobClient:     Map output bytes=3000  113.14/04/17 16:17:30 INFO mapred.JobClient:     Combine input records=0  114.14/04/17 16:17:30 INFO mapred.JobClient:     Map output records=150  115.14/04/17 16:17:30 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  116.14/04/17 16:17:30 INFO mapred.JobClient:     Reduce input records=150  117.We have repeated 3 times.  118.Distance = 0.0 Threshold = 1.0E-4  119.14/04/17 16:17:30 INFO input.FileInputFormat: Total input paths to process : 1  120.14/04/17 16:17:30 INFO mapred.JobClient: Running job: job_201404171511_0015  121.14/04/17 16:17:31 INFO mapred.JobClient:  map 0% reduce 0%  122.14/04/17 16:17:47 INFO mapred.JobClient:  map 100% reduce 0%  123.14/04/17 16:17:59 INFO mapred.JobClient:  map 100% reduce 100%  124.14/04/17 16:18:04 INFO mapred.JobClient: Job complete: job_201404171511_0015  125.14/04/17 16:18:04 INFO mapred.JobClient: Counters: 25  126.14/04/17 16:18:04 INFO mapred.JobClient:   Job Counters   127.14/04/17 16:18:04 INFO mapred.JobClient:     Launched reduce tasks=1  128.14/04/17 16:18:04 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=12036  129.14/04/17 16:18:04 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0  130.14/04/17 16:18:04 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0  131.14/04/17 16:18:04 INFO mapred.JobClient:     Launched map tasks=1  132.14/04/17 16:18:04 INFO mapred.JobClient:     Data-local map tasks=1  133.14/04/17 16:18:04 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10050  134.14/04/17 16:18:04 INFO mapred.JobClient:   File Output Format Counters   135.14/04/17 16:18:04 INFO mapred.JobClient:     Bytes Written=2700  136.14/04/17 16:18:04 INFO mapred.JobClient:   FileSystemCounters  137.14/04/17 16:18:04 INFO mapred.JobClient:     FILE_BYTES_READ=3306  138.14/04/17 16:18:04 INFO mapred.JobClient:     HDFS_BYTES_READ=20064  139.14/04/17 16:18:04 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=48717  140.14/04/17 16:18:04 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=2700  141.14/04/17 16:18:04 INFO mapred.JobClient:   File Input Format Counters   142.14/04/17 16:18:04 INFO mapred.JobClient:     Bytes Read=2550  143.14/04/17 16:18:04 INFO mapred.JobClient:   Map-Reduce Framework  144.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce input groups=3  145.14/04/17 16:18:04 INFO mapred.JobClient:     Map output materialized bytes=3306  146.14/04/17 16:18:04 INFO mapred.JobClient:     Combine output records=0  147.14/04/17 16:18:04 INFO mapred.JobClient:     Map input records=150  148.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce shuffle bytes=0  149.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce output records=150  150.14/04/17 16:18:04 INFO mapred.JobClient:     Spilled Records=300  151.14/04/17 16:18:04 INFO mapred.JobClient:     Map output bytes=3000  152.14/04/17 16:18:04 INFO mapred.JobClient:     Combine input records=0  153.14/04/17 16:18:04 INFO mapred.JobClient:     Map output records=150  154.14/04/17 16:18:04 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114  155.14/04/17 16:18:04 INFO mapred.JobClient:     Reduce input records=150        .
0 0