threaduser
来源:互联网 发布:淘宝主图厂家直销 编辑:程序博客网 时间:2024/05/23 13:34
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class ThreadUser {enum Counter{LINESKIP;}public static class TUMapper extends Mapper<LongWritable,Text,Text,Text>{private String str;private String[] arr;private Text userid=new Text();private Text threadid=new Text();public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{str=value.toString();arr=str.split(" ");try{userid.set(arr[1]);threadid.set(arr[2]);context.write(userid, threadid);}catch(Exception e){System.out.println(e.getStackTrace());context.getCounter(Counter.LINESKIP).increment(1);return;}}}public static class TUReducer extends Reducer<Text,Text,Text,Text>{ public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{StringBuffer sb=new StringBuffer();for(Text val:values){sb.append(val);sb.append(",");}String[] strarr;if (sb.indexOf("14280")!=-1){ strarr=sb.toString().split(","); for(int i=0;i<strarr.length;i++) context.write(new Text(strarr[i]), key);}//if (values.iterator().toString().indexOf("14280")!=-1)//for(Text val:values){//context.write(new Text(values.iterator().toString()), key);//}}}public static class TUMapper2 extends Mapper<LongWritable,Text,Text,Text>{private String str;private String[] arr;private Text userid=new Text();private Text threadid=new Text();public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{str=value.toString();arr=str.split("\t");try{userid.set(arr[1]);threadid.set(arr[0]);context.write(threadid , userid);}catch(Exception e){System.out.println(e.getStackTrace());context.getCounter(Counter.LINESKIP).increment(1);return;}}}public static class TUReducer2 extends Reducer<Text,Text,Text,IntWritable>{private IntWritable sum=new IntWritable();public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{ int num=0;for(Text val:values){num++;}sum.set(num);context.write(key, sum);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "192.168.1.23:9001"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Thread <in> <out>"); System.exit(2); } Job job = new Job(conf, "Thread user"); job.setJarByClass(ThreadUser.class); job.setMapperClass(TUMapper.class); //job.setCombinerClass(TSReducer.class); job.setReducerClass(TUReducer.class); // FileSystem fs=FileSystem.get(conf); Path out=new Path("hdfs://192.168.1.23:9000/user/hadoop/temp"); if(fs.exists(out)) fs.delete(out); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, out); //job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean res=job.waitForCompletion(true); Job job2 = new Job(conf, "Thread user2"); job2.setJarByClass(ThreadUser.class); job2.setMapperClass(TUMapper2.class); //job.setCombinerClass(TSReducer.class); job2.setReducerClass(TUReducer2.class); // Path out2=new Path(args[1]); if(fs.exists(out2)) fs.delete(out2); FileInputFormat.addInputPath(job2, out); FileOutputFormat.setOutputPath(job2, out2); //job.setOutputFormatClass(TextOutputFormat.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); boolean res2=job2.waitForCompletion(true); // Print out Job finishing status System.out.println( "Job2 Name: " + job2.getJobName() ); System.out.println( "Job2 Successful: " + ( job2.isSuccessful() ? "Yes" : "No" ) ); System.out.println( "Lines2 of Mapper Input: " + job2.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() ); System.out.println( "Lines2 of Reducer Output: " + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue() ); System.out.println( "Lines2 skipped: " + job2.getCounters().findCounter(Counter.LINESKIP).getValue() ); if (res) System.exit(0); else System.exit(1);}}