threaduser

来源:互联网 发布:淘宝主图厂家直销 编辑:程序博客网 时间:2024/05/23 13:34
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class ThreadUser {enum Counter{LINESKIP;}public static class TUMapper extends Mapper<LongWritable,Text,Text,Text>{private String str;private String[] arr;private Text userid=new Text();private Text threadid=new Text();public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{str=value.toString();arr=str.split(" ");try{userid.set(arr[1]);threadid.set(arr[2]);context.write(userid, threadid);}catch(Exception e){System.out.println(e.getStackTrace());context.getCounter(Counter.LINESKIP).increment(1);return;}}}public static class TUReducer extends Reducer<Text,Text,Text,Text>{ public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{StringBuffer sb=new StringBuffer();for(Text val:values){sb.append(val);sb.append(",");}String[] strarr;if (sb.indexOf("14280")!=-1){ strarr=sb.toString().split(",");  for(int i=0;i<strarr.length;i++)  context.write(new Text(strarr[i]), key);}//if (values.iterator().toString().indexOf("14280")!=-1)//for(Text val:values){//context.write(new Text(values.iterator().toString()), key);//}}}public static class TUMapper2 extends Mapper<LongWritable,Text,Text,Text>{private String str;private String[] arr;private Text userid=new Text();private Text threadid=new Text();public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{str=value.toString();arr=str.split("\t");try{userid.set(arr[1]);threadid.set(arr[0]);context.write(threadid , userid);}catch(Exception e){System.out.println(e.getStackTrace());context.getCounter(Counter.LINESKIP).increment(1);return;}}}public static class TUReducer2 extends Reducer<Text,Text,Text,IntWritable>{private IntWritable sum=new IntWritable();public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{ int num=0;for(Text val:values){num++;}sum.set(num);context.write(key, sum);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "192.168.1.23:9001");    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();    if (otherArgs.length != 2) {      System.err.println("Usage: Thread <in> <out>");      System.exit(2);    }    Job job = new Job(conf, "Thread user");    job.setJarByClass(ThreadUser.class);    job.setMapperClass(TUMapper.class);    //job.setCombinerClass(TSReducer.class);    job.setReducerClass(TUReducer.class);   //     FileSystem fs=FileSystem.get(conf);    Path out=new Path("hdfs://192.168.1.23:9000/user/hadoop/temp");    if(fs.exists(out))    fs.delete(out);    FileInputFormat.addInputPath(job, new Path(args[0]));    FileOutputFormat.setOutputPath(job, out);     //job.setOutputFormatClass(TextOutputFormat.class);    job.setOutputKeyClass(Text.class);    job.setOutputValueClass(Text.class);    boolean res=job.waitForCompletion(true);       Job job2 = new Job(conf, "Thread user2");    job2.setJarByClass(ThreadUser.class);    job2.setMapperClass(TUMapper2.class);    //job.setCombinerClass(TSReducer.class);    job2.setReducerClass(TUReducer2.class);   //        Path out2=new Path(args[1]);    if(fs.exists(out2))    fs.delete(out2);    FileInputFormat.addInputPath(job2, out);    FileOutputFormat.setOutputPath(job2, out2);     //job.setOutputFormatClass(TextOutputFormat.class);    job2.setOutputKeyClass(Text.class);    job2.setOutputValueClass(Text.class);    boolean res2=job2.waitForCompletion(true); // Print out Job finishing status        System.out.println( "Job2 Name: " + job2.getJobName() );        System.out.println( "Job2 Successful: " + ( job2.isSuccessful() ? "Yes" : "No" ) );        System.out.println( "Lines2 of Mapper Input: " + job2.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );        System.out.println( "Lines2 of Reducer Output: " + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue() );        System.out.println( "Lines2 skipped: " + job2.getCounters().findCounter(Counter.LINESKIP).getValue() );           if (res)        System.exit(0);    else System.exit(1);}}

原创粉丝点击