时间序列   日期  收盘价1  2013-10-01  102  2013-10-02  183  2013-10-03  204  2013-10-04  305  2013-10-07  246  2013-10-08  337  2013-10-09  27

要计算3天的移动平均数时间序列   日期  移动平均    如何计算1  2013-10-01  10.00   =10/12  2013-10-02  14.00   = 10+18/23  2013-10-03  16.00   =10+18+20/34  2013-10-04  22.66   =18+20+30/4)例子2:时间序列数据(URL访问数)计算一个特定时间窗口内各个日期访问不同URL的不同访问者人数的移动平均数。URL    日期  不同访问者人数------------------------URL1   2013-10-01  400URL1   2013-10-02  200URL1   2013-10-03  300URL1   2013-10-04  700URL1   2013-10-05  800URL2   2013-10-01  103天的URL访问数的移动平均数URL    日期  移动平均数-----------------------URL1   2013-10-01  400URL1   2013-10-02  200URL1   2013-10-03  300URL1   2013-10-04  700URL1   2013-10-05  800URL2   2013-10-01  10一、POJO移动平均解决方案解决方案1:使用队列维护一个特定窗口大小的队列和一个累加和sum对于每一个元素,先将其值累加到sum中并将其加入队尾如果加入该元素后队列的大小没有超过特定窗口大小,则继续处理下一个元素如果加入该元素后队列的大小超过了特定窗口大小,则将队首元素移除,【同时将sum减去队首元素的值】,这样可以保证累加和进行滑动...移动平均的计算,当队列不为空时,移动平均=累加和/队列大小

package yidongpingjun.pojo;import java.util.Queue;import java.util.LinkedList;/**  * Simple moving average by using a queue data structure. * * @author Mahmoud Parsian * */public class SimpleMovingAverage {    private double sum = 0.0;    private final int period;    private final Queue<Double> window = new LinkedList<Double>();     public SimpleMovingAverage(int period) {        if (period < 1) {           throw new IllegalArgumentException("period must be > 0");        }        this.period = period;    }     public void addNewNumber(double number) {        sum += number;        window.add(number);        if (window.size() > period) {            sum -= window.remove();        }    }     public double getMovingAverage() {        if (window.isEmpty()) {            throw new IllegalArgumentException("average is undefined");        }        return sum / window.size();    }}


package yidongpingjun.pojo;/**  * Simple moving average by using an array data structure. * * @author Mahmoud Parsian * */public class SimpleMovingAverageUsingArray {    private double sum = 0.0;    private final int period;    private double[] window = null;    private int pointer = 0;    private int size = 0;     public SimpleMovingAverageUsingArray(int period) {        if (period < 1) {           throw new IllegalArgumentException("period must be > 0");        }        this.period = period;        window = new double[period];    }     public void addNewNumber(double number) {        sum += number;        if (size < period) {            window[pointer++] = number;            size++;        }        else {            // size = period (size cannot be > period)            pointer = pointer % period;            sum -=  window[pointer];            window[pointer++] = number;        }    }     public double getMovingAverage() {        if (size == 0) {            throw new IllegalArgumentException("average is undefined");        }        return sum / size;    }}


package yidongpingjun.pojo;import org.apache.log4j.Logger;import org.apache.log4j.BasicConfigurator;/**  * Basic testing of Simple moving average. * * @author Mahmoud Parsian * */public class TestSimpleMovingAverage {     private static final Logger THE_LOGGER = Logger.getLogger(TestSimpleMovingAverage.class);    public static void main(String[] args) {        // The invocation of the BasicConfigurator.configure method         // creates a rather simple log4j setup. This method is hardwired         // to add to the root logger a ConsoleAppender.        BasicConfigurator.configure();                // time series        1   2   3  4   5   6   7        double[] testData = {10, 18, 20, 30, 24, 33, 27};        int[] allWindowSizes = {3, 4};        for (int windowSize : allWindowSizes) {            SimpleMovingAverage sma = new SimpleMovingAverage(windowSize);            THE_LOGGER.info("windowSize = " + windowSize);            for (double x : testData) {                sma.addNewNumber(x);                THE_LOGGER.info("Next number = " + x + ", SMA = " + sma.getMovingAverage());            }            THE_LOGGER.info("---");        }    }}


package yidongpingjun;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.text.SimpleDateFormat;import org.apache.hadoop.io.Writable;/** *  * TimeSeriesData represents a pair of  *  (time-series-timestamp, time-series-value). *   * @author Mahmoud Parsian * */public class TimeSeriesData    implements Writable, Comparable<TimeSeriesData> {private long timestamp;private double value;public static TimeSeriesData copy(TimeSeriesData tsd) {return new TimeSeriesData(tsd.timestamp, tsd.value);}public TimeSeriesData(long timestamp, double value) {set(timestamp, value);}public TimeSeriesData() {}public void set(long timestamp, double value) {this.timestamp = timestamp;this.value = value;}public long getTimestamp() {return this.timestamp;}public double getValue() {return this.value;}/** * Deserializes the point from the underlying data. * @param in a DataInput object to read the point from. */public void readFields(DataInput in) throws IOException {this.timestamp  = in.readLong();this.value  = in.readDouble();}/** * Convert a binary data into TimeSeriesData *  * @param in A DataInput object to read from. * @return A TimeSeriesData object * @throws IOException */public static TimeSeriesData read(DataInput in) throws IOException {TimeSeriesData tsData = new TimeSeriesData();tsData.readFields(in);return tsData;}public String getDate() {return DateUtil.getDateAsString(this.timestamp);}   /**    * Creates a clone of this object    */    public TimeSeriesData clone() {       return new TimeSeriesData(timestamp, value);    }@Overridepublic void write(DataOutput out) throws IOException {out.writeLong(this.timestamp );out.writeDouble(this.value );}/** * Used in sorting the data in the reducer */@Overridepublic int compareTo(TimeSeriesData data) {if (this.timestamp  < data.timestamp ) {return -1;} else if (this.timestamp  > data.timestamp ) {return 1;}else {   return 0;}}public String toString() {       return "("+timestamp+","+value+")";    }}

package yidongpingjun.memorysort;import java.util.Date;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.mapreduce.Mapper;import org.apache.commons.lang.StringUtils;import yidongpingjun.DateUtil;import yidongpingjun.TimeSeriesData;/*** *  * @author chenjie *输入: *GOOG,2004-11-04,184.70    GOOG,2004-11-03,191.67    GOOG,2004-11-02,194.87    AAPL,2013-10-09,486.59    AAPL,2013-10-08,480.94    AAPL,2013-10-07,487.75    AAPL,2013-10-04,483.03    AAPL,2013-10-03,483.41    IBM,2013-09-30,185.18    IBM,2013-09-27,186.92    IBM,2013-09-26,190.22    IBM,2013-09-25,189.47    GOOG,2013-07-19,896.60    GOOG,2013-07-18,910.68    GOOG,2013-07-17,918.55 * * */public class SortInMemory_MovingAverageMapper        extends Mapper<LongWritable, Text, Text, TimeSeriesData> {    private final Text reducerKey = new Text();   private final TimeSeriesData reducerValue = new TimeSeriesData();         /**    * value:GOOG,2004-11-04,184.70    */   public void map(LongWritable key, Text value, Context context)       throws IOException, InterruptedException {       String record = value.toString();       if ((record == null) || (record.length() == 0)) {          return;       }       String[] tokens = StringUtils.split(record.trim(), ",");       if (tokens.length == 3) {          Date date = DateUtil.getDate(tokens[1]);//2004-11-04,          if (date == null) {           return;          }          reducerKey.set(tokens[0]); // GOOG          reducerValue.set(date.getTime(), Double.parseDouble(tokens[2]));          context.write(reducerKey, reducerValue);       }       else {          // log as error, not enough tokens       }   }}

package yidongpingjun.memorysort;import java.io.IOException;import java.util.List;import java.util.ArrayList;import java.util.Collections;//import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.Reducer.Context;//import yidongpingjun.DateUtil;import yidongpingjun.TimeSeriesData;public class SortInMemory_MovingAverageReducer    extends Reducer<Text, TimeSeriesData, Text, Text> {    int windowSize = 5; // default window size   /** *  will be run only once  *  get parameters from Hadoop's configuration */public void setup(Context context)        throws IOException, InterruptedException {        this.windowSize = context.getConfiguration().getInt("moving.average.window.size", 5);        System.out.println("setup(): key="+windowSize);    }public void reduce(Text key, Iterable<TimeSeriesData> values, Context context)throws IOException, InterruptedException {               System.out.println("reduce(): key="+key.toString());// build the unsorted list of timeseriesList<TimeSeriesData> timeseries = new ArrayList<TimeSeriesData>();for (TimeSeriesData tsData : values) {TimeSeriesData copy = TimeSeriesData.copy(tsData);timeseries.add(copy);} // sort the timeseries data in memory and        // apply moving average algorithm to sorted timeseries        Collections.sort(timeseries);        System.out.println("reduce(): timeseries="+timeseries.toString());                        // calculate prefix sum        double sum = 0.0;        for (int i=0; i < windowSize-1; i++) {        sum += timeseries.get(i).getValue();        }                // now we have enough timeseries data to calculate moving averageText outputValue = new Text(); // reuse object        for (int i = windowSize-1; i < timeseries.size(); i++) {            System.out.println("reduce(): key="+key.toString() + "  i="+i);        sum += timeseries.get(i).getValue();        double movingAverage = sum / windowSize;        long timestamp = timeseries.get(i).getTimestamp();        outputValue.set(DateUtil.getDateAsString(timestamp) + "," + movingAverage);        // send output to HDFS        context.write(key, outputValue);                // prepare for next iteration        sum -= timeseries.get(i-windowSize+1).getValue();        }} // reduce}


package yidongpingjun.memorysort;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.util.GenericOptionsParser;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;//import yidongpingjun.HadoopUtil;import yidongpingjun.TimeSeriesData;/** * MapReduce job for moving averages of time series data  * by using in memory sort (without secondary sort). * * @author Mahmoud Parsian * */public class SortInMemory_MovingAverageDriver {     private static final String INPATH = "input/gupiao1.txt";// 输入文件路径    private static final String OUTPATH = "output/gupiao1";// 输出文件路径        public static void main(String[] args) throws Exception {       Configuration conf = new Configuration();       String[] otherArgs = new String[3];       otherArgs[0] = "2";       otherArgs[1] = INPATH;       otherArgs[2] = OUTPATH;       if (otherArgs.length != 3) {          System.err.println("Usage: SortInMemory_MovingAverageDriver <window_size> <input> <output>");          System.exit(1);       }       System.out.println("args[0]: <window_size>="+otherArgs[0]);       System.out.println("args[1]: <input>="+otherArgs[1]);       System.out.println("args[2]: <output>="+otherArgs[2]);              Job job = new Job(conf, "SortInMemory_MovingAverageDriver");       // add jars to distributed cache     //  HadoopUtil.addJarsToDistributedCache(job, "/lib/");              // set mapper/reducer       job.setMapperClass(SortInMemory_MovingAverageMapper.class);       job.setReducerClass(SortInMemory_MovingAverageReducer.class);              // define mapper's output key-value       job.setMapOutputKeyClass(Text.class);       job.setMapOutputValueClass(TimeSeriesData.class);                     // define reducer's output key-value       job.setOutputKeyClass(Text.class);       job.setOutputValueClass(Text.class);              // set window size for moving average calculation       int windowSize = Integer.parseInt(otherArgs[0]);       job.getConfiguration().setInt("moving.average.window.size", windowSize);                    // define I/O       FileInputFormat.addInputPath(job, new Path(otherArgs[1]));       FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));              job.setInputFormatClass(TextInputFormat.class);        job.setOutputFormatClass(TextOutputFormat.class);              System.exit(job.waitForCompletion(true) ? 0 : 1);    }}


AAPL   2013-10-04,483.22AAPL   2013-10-07,485.39AAPL   2013-10-08,484.345AAPL   2013-10-09,483.765GOOG   2004-11-03,193.26999999999998GOOG   2004-11-04,188.18499999999997GOOG   2013-07-17,551.625GOOG   2013-07-18,914.615GOOG   2013-07-19,903.6400000000001IBM    2013-09-26,189.845IBM    2013-09-27,188.57IBM    2013-09-30,186.05


package yidongpingjun.secondarysort;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;//import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;public class CompositeKey implements WritableComparable<CompositeKey> {    // natural key is (name)    // composite key is a pair (name, timestamp)private String name;private long timestamp;public CompositeKey(String name, long timestamp) {set(name, timestamp);}public CompositeKey() {}public void set(String name, long timestamp) {this.name = name;this.timestamp = timestamp;}public String getName() {return this.name;}public long getTimestamp() {return this.timestamp;}@Overridepublic void readFields(DataInput in) throws IOException {this.name = in.readUTF();this.timestamp = in.readLong();}@Overridepublic void write(DataOutput out) throws IOException {out.writeUTF(this.name);out.writeLong(this.timestamp);}@Overridepublic int compareTo(CompositeKey other) {if (this.name.compareTo(other.name) != 0) {return this.name.compareTo(other.name);} else if (this.timestamp != other.timestamp) {return timestamp < other.timestamp ? -1 : 1;} else {return 0;}}public static class CompositeKeyComparator extends WritableComparator {public CompositeKeyComparator() {super(CompositeKey.class);}public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {return compareBytes(b1, s1, l1, b2, s2, l2);}}static { // register this comparatorWritableComparator.define(CompositeKey.class,new CompositeKeyComparator());}}

package yidongpingjun.secondarysort;import java.util.Date;import java.io.IOException;//import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.MapReduceBase;import org.apache.hadoop.mapred.Mapper;import org.apache.hadoop.mapred.OutputCollector;import org.apache.hadoop.mapred.Reporter;import org.apache.commons.lang.StringUtils;//import yidongpingjun.DateUtil;import yidongpingjun.TimeSeriesData;public class SortByMRF_MovingAverageMapper extends MapReduceBase        implements Mapper<LongWritable, Text, CompositeKey, TimeSeriesData> {    // reuse Hadoop's Writable objects    private final CompositeKey reducerKey = new CompositeKey();    private final TimeSeriesData reducerValue = new TimeSeriesData();    @Override    public void map(LongWritable inkey, Text value,            OutputCollector<CompositeKey, TimeSeriesData> output,            Reporter reporter) throws IOException {        String record = value.toString();        if ((record == null) || (record.length() == 0)) {            return;        }        String[] tokens = StringUtils.split(record, ",");        if (tokens.length == 3) {            // tokens[0] = name of timeseries as string            // tokens[1] = timestamp            // tokens[2] = value of timeseries as double            Date date = DateUtil.getDate(tokens[1]);            if (date == null) {                return;            }            long timestamp = date.getTime();            reducerKey.set(tokens[0], timestamp);            reducerValue.set(timestamp, Double.parseDouble(tokens[2]));            // emit key-value pair            output.collect(reducerKey, reducerValue);        }         else {            // log as error, not enough tokens        }    }}

package yidongpingjun.secondarysort;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;public class CompositeKeyComparator extends WritableComparator {    protected CompositeKeyComparator() {        super(CompositeKey.class, true);    }    @Override    public int compare(WritableComparable w1, WritableComparable w2) {        CompositeKey key1 = (CompositeKey) w1;        CompositeKey key2 = (CompositeKey) w2;        int comparison = key1.getName().compareTo(key2.getName());        if (comparison == 0) {            // names are equal here            if (key1.getTimestamp() == key2.getTimestamp()) {                return 0;            } else if (key1.getTimestamp() < key2.getTimestamp()) {                return -1;            } else {                return 1;            }        }         else {            return comparison;        }    }}

package yidongpingjun.secondarysort;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.Partitioner;import yidongpingjun.TimeSeriesData;public class NaturalKeyPartitioner implements        Partitioner<CompositeKey, TimeSeriesData> {    @Override    public int getPartition(CompositeKey key,            TimeSeriesData value,            int numberOfPartitions) {        return Math.abs((int) (hash(key.getName()) % numberOfPartitions));    }    @Override    public void configure(JobConf jobconf) {    }    /**     * adapted from String.hashCode()     */    static long hash(String str) {        long h = 1125899906842597L; // prime        int length = str.length();        for (int i = 0; i < length; i++) {            h = 31 * h + str.charAt(i);        }        return h;    }}

package yidongpingjun.secondarysort;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;public class NaturalKeyGroupingComparator extends WritableComparator {    protected NaturalKeyGroupingComparator() {        super(CompositeKey.class, true);    }    @Override    public int compare(WritableComparable w1, WritableComparable w2) {        CompositeKey key1 = (CompositeKey) w1;        CompositeKey key2 = (CompositeKey) w2;        return key1.getName().compareTo(key2.getName());    }}

package yidongpingjun.secondarysort;import java.util.Iterator;import java.io.IOException;//import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.MapReduceBase;import org.apache.hadoop.mapred.OutputCollector;import org.apache.hadoop.mapred.Reducer;import org.apache.hadoop.mapred.Reporter;import org.apache.hadoop.mapred.JobConf;//import yidongpingjun.DateUtil;import yidongpingjun.TimeSeriesData;public class SortByMRF_MovingAverageReducer extends MapReduceBase        implements Reducer<CompositeKey, TimeSeriesData, Text, Text> {    int windowSize = 5; // default window size    /**     * will be run only once get parameters from Hadoop's configuration     */    @Override    public void configure(JobConf jobconf) {        this.windowSize = jobconf.getInt("moving.average.window.size", 5);    }    @Override    public void reduce(CompositeKey key,            Iterator<TimeSeriesData> values,            OutputCollector<Text, Text> output,            Reporter reporter)            throws IOException {        // note that values are sorted.        // apply moving average algorithm to sorted timeseries        Text outputKey = new Text();        Text outputValue = new Text();        MovingAverage ma = new MovingAverage(this.windowSize);        while (values.hasNext()) {            TimeSeriesData data = values.next();            ma.addNewNumber(data.getValue());            double movingAverage = ma.getMovingAverage();            long timestamp = data.getTimestamp();            String dateAsString = DateUtil.getDateAsString(timestamp);            //THE_LOGGER.info("Next number = " + x + ", SMA = " + sma.getMovingAverage());            outputValue.set(dateAsString + "," + movingAverage);            outputKey.set(key.getName());            output.collect(outputKey, outputValue);        }        //    } }

package yidongpingjun.secondarysort;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.FileInputFormat;import org.apache.hadoop.mapred.FileOutputFormat;import org.apache.hadoop.mapred.TextInputFormat;import org.apache.hadoop.mapred.TextOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.JobClient;//import yidongpingjun.HadoopUtil;import yidongpingjun.TimeSeriesData;public class SortByMRF_MovingAverageDriver {    private static final String INPATH = "input/gupiao1.txt";// 输入文件路径    private static final String OUTPATH = "output/gupiao2";// 输出文件路径        public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();JobConf jobconf = new JobConf(conf, SortByMRF_MovingAverageDriver.class);jobconf.setJobName("SortByMRF_MovingAverageDriver");    String[] otherArgs = new String[3];       otherArgs[0] = "2";       otherArgs[1] = INPATH;       otherArgs[2] = OUTPATH;       if (otherArgs.length != 3) {          System.err.println("Usage: SortByMRF_MovingAverageDriver <window_size> <input> <output>");          System.exit(1);       }       // add jars to distributed cache     //  HadoopUtil.addJarsToDistributedCache(conf, "/lib/");              // set mapper/reducer       jobconf.setMapperClass(SortByMRF_MovingAverageMapper.class);       jobconf.setReducerClass(SortByMRF_MovingAverageReducer.class);              // define mapper's output key-value       jobconf.setMapOutputKeyClass(CompositeKey.class);       jobconf.setMapOutputValueClass(TimeSeriesData.class);                     // define reducer's output key-value       jobconf.setOutputKeyClass(Text.class);       jobconf.setOutputValueClass(Text.class);       // set window size for moving average calculation       int windowSize = Integer.parseInt(otherArgs[0]);       jobconf.setInt("moving.average.window.size", windowSize);                    // define I/O   FileInputFormat.setInputPaths(jobconf, new Path(otherArgs[1]));   FileOutputFormat.setOutputPath(jobconf, new Path(otherArgs[2]));              jobconf.setInputFormat(TextInputFormat.class);        jobconf.setOutputFormat(TextOutputFormat.class);   jobconf.setCompressMapOutput(true);                     // the following 3 setting are needed for "secondary sorting"       // Partitioner decides which mapper output goes to which reducer        // based on mapper output key. In general, different key is in        // different group (Iterator at the reducer side). But sometimes,        // we want different key in the same group. This is the time for        // Output Value Grouping Comparator, which is used to group mapper        // output (similar to group by condition in SQL).  The Output Key        // Comparator is used during sort stage for the mapper output key.       jobconf.setPartitionerClass(NaturalKeyPartitioner.class);       jobconf.setOutputKeyComparatorClass(CompositeKeyComparator.class);       jobconf.setOutputValueGroupingComparator(NaturalKeyGroupingComparator.class);              JobClient.runJob(jobconf);    }}

package yidongpingjun;import java.text.SimpleDateFormat;import java.util.Date;public class DateUtil {static final String DATE_FORMAT = "yyyy-MM-dd";static final SimpleDateFormat SIMPLE_DATE_FORMAT =    new SimpleDateFormat(DATE_FORMAT);    /**     *  Returns the Date from a given dateAsString     */public static Date getDate(String dateAsString)  {        try {        return SIMPLE_DATE_FORMAT.parse(dateAsString);        }        catch(Exception e) {        return null;        }}    /**     *  Returns the number of milliseconds since January 1, 1970,      *  00:00:00 GMT represented by this Date object.     */public static long getDateAsMilliSeconds(Date date) throws Exception {        return date.getTime();}    /**     *  Returns the number of milliseconds since January 1, 1970,      *  00:00:00 GMT represented by this Date object.     */public static long getDateAsMilliSeconds(String dateAsString) throws Exception {Date date = getDate(dateAsString);        return date.getTime();}public static String getDateAsString(long timestamp) {        return SIMPLE_DATE_FORMAT.format(timestamp);}}

package yidongpingjun;import java.util.List;import java.util.ArrayList;import java.util.Arrays;import java.io.IOException;//import org.apache.hadoop.fs.Path;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.filecache.DistributedCache;public class HadoopUtil {   /**    * Add all jar files to HDFS's distributed cache    *    * @param job job which will be run    * @param hdfsJarDirectory a directory which has all required jar files    */    public static void addJarsToDistributedCache(Job job,                                                 String hdfsJarDirectory)       throws IOException {      if (job == null) {         return;      }      addJarsToDistributedCache(job.getConfiguration(), hdfsJarDirectory);   }   /**    * Add all jar files to HDFS's distributed cache    *    * @param Configuration conf which will be run    * @param hdfsJarDirectory a directory which has all required jar files    */    public static void addJarsToDistributedCache(Configuration conf,                                                 String hdfsJarDirectory)       throws IOException {      if (conf == null) {         return;      }      FileSystem fs = FileSystem.get(conf);      List<FileStatus> jars = getDirectoryListing(hdfsJarDirectory, fs);      for (FileStatus jar : jars) {         Path jarPath = jar.getPath();         DistributedCache.addFileToClassPath(jarPath, conf, fs);      }   }      /**    * Get list of files from a given HDFS directory    * @param directory an HDFS directory name    * @param fs an HDFS FileSystem    */       public static List<FileStatus> getDirectoryListing(String directory,                                                        FileSystem fs)        throws IOException {       Path dir = new Path(directory);        FileStatus[] fstatus = fs.listStatus(dir);        return Arrays.asList(fstatus);    }        public static List<String> listDirectoryAsListOfString(String directory,                                                            FileSystem fs)        throws IOException {       Path path = new Path(directory);        FileStatus fstatus[] = fs.listStatus(path);       List<String> listing = new ArrayList<String>();       for (FileStatus f: fstatus) {           listing.add(f.getPath().toUri().getPath());       }       return listing;    }           /**    * Return true, if HDFS path doers exist; otherwise return false.    *     */   public static boolean pathExists(Path path, FileSystem fs)  {      if (path == null) {         return false;      }            try {         return fs.exists(path);      }      catch(Exception e) {          return false;      }   }      }

