hadoop的Context简单使用

来源:互联网 发布:近年淘宝发展历程概述 编辑:程序博客网 时间:2024/06/08 11:22
hadoop的Context简单使用


作业输入:
hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txthadoop  aspark   ahive    ahbase   atachyon astorm   aredis   ahadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txthadoop  bspark   bkafka   btachyon boozie   bflume   bsqoop   bsolr    bhadoop@hadoop:/home/hadoop/blb$ 


代码:
import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.JobID;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.TaskAttemptID;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class GetIDMapReduce {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration conf = new Configuration();String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if(otherArgs.length!=2){System.err.println("Usage databaseV1 <inputpath> <outputpath>");}Job job = Job.getInstance(conf, GetIDMapReduce.class.getSimpleName() + "1");job.setJarByClass(GetIDMapReduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(NullWritable.class);job.setMapperClass(MyMapper1.class);job.setNumReduceTasks(0);job.setInputFormatClass(TextInputFormat.class);job.setOutputFormatClass(TextOutputFormat.class);FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));job.waitForCompletion(true);}public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)throws IOException, InterruptedException {FileSplit fileSplit=(FileSplit) context.getInputSplit();String pathname=fileSplit.getPath().getName();JobID jobid=context.getJobID();//获取jobidLongWritable keyout=context.getCurrentKey(); //获取key偏移量Text valueout=context.getCurrentValue();  //获取一行的值String jobname=context.getJobName();//获得job名字TaskAttemptID taskid=context.getTaskAttemptID();//获得taskidfloat progress=context.getProgress();//获取任务执行进度String jar = context.getJar();//作业运行之前,往集群拷贝的作业资源jar//String status = context.getStatus();String user = context.getUser(); //获取当前用户//String[] fileTimestamps = context.getFileTimestamps();int numReduceTasks = context.getNumReduceTasks(); //获得reduce的数量//Path[] fileClassPaths = context.getFileClassPaths();Configuration configuration = context.getConfiguration();//获得作业配置文件//RawComparator<?> groupingComparator = context.getGroupingComparator();boolean jobSetupCleanupNeeded = context.getJobSetupCleanupNeeded(); //Get whether job-setup and job-cleanup is needed for the job int maxMapAttempts = context.getMaxMapAttempts(); //the max number of attempts per map taskint maxReduceAttempts = context.getMaxReduceAttempts(); //he max number of attempts per reduce task.//@SuppressWarnings("deprecation")//Path[] localCacheFiles = context.getLocalCacheFiles();//OutputCommitter outputCommitter = context.getOutputCommitter();Path workingDirectory = context.getWorkingDirectory(); //工作目录boolean nextKeyValue = context.nextKeyValue();//下一个键值对//URI[] cacheFiles = context.getCacheFiles();URI[] cacheArchives = context.getCacheArchives(); //Get cache archives set in the ConfigurationPath[] archiveClassPaths = context.getArchiveClassPaths();//Get the archive entries in classpath as an array of Pathboolean profileEnabled = context.getProfileEnabled();//Get whether the task profiling is enabled.//String profileParams = context.getProfileParams();@SuppressWarnings("deprecation")boolean symlink = context.getSymlink();// Originally intended to check if symlinks should be used, but currently symlinks cannot be disabled//RawComparator<?> sortComparator = context.getSortComparator();//int hashCode = context.hashCode();context.write(new Text("===================================================================================="), NullWritable.get());context.write(new Text("pathname--"+pathname), NullWritable.get());context.write(new Text("jobid--"+jobid.toString()), NullWritable.get());context.write(new Text("keyout--"+keyout.toString()), NullWritable.get());context.write(new Text("keyout--"+valueout), NullWritable.get());context.write(new Text("jobname--"+jobname), NullWritable.get());context.write(new Text("taskid--"+taskid.toString()), NullWritable.get());context.write(new Text("progress--"+progress), NullWritable.get());context.write(new Text("jar--"+jar.toString()), NullWritable.get());//context.write(new Text("status--"+status), NullWritable.get());context.write(new Text("user--"+user), NullWritable.get());//context.write(new Text("fileTimestamps--"+fileTimestamps), NullWritable.get());context.write(new Text("numReduceTasks--"+numReduceTasks), NullWritable.get());//context.write(new Text("fileClassPaths--"+fileClassPaths), NullWritable.get());context.write(new Text("configuration--"+configuration), NullWritable.get());//context.write(new Text("groupingComparator--"+groupingComparator), NullWritable.get());context.write(new Text("jobSetupCleanupNeeded--"+jobSetupCleanupNeeded), NullWritable.get());context.write(new Text("maxMapAttempts--"+maxMapAttempts), NullWritable.get());context.write(new Text("maxReduceAttempts--"+maxReduceAttempts), NullWritable.get());//context.write(new Text("localCacheFiles--"+localCacheFiles), NullWritable.get());//context.write(new Text("outputCommitter--"+outputCommitter), NullWritable.get());context.write(new Text("workingDirectory--"+workingDirectory), NullWritable.get());context.write(new Text("nextKeyValue--"+nextKeyValue), NullWritable.get());//context.write(new Text("cacheFiles--"+cacheFiles), NullWritable.get());context.write(new Text("cacheArchives--"+cacheArchives), NullWritable.get());context.write(new Text("archiveClassPaths--"+archiveClassPaths), NullWritable.get());context.write(new Text("profileEnabled--"+profileEnabled), NullWritable.get());//context.write(new Text("profileParams--"+profileParams), NullWritable.get());context.write(new Text("symlink--"+symlink), NullWritable.get());//context.write(new Text("sortComparator--"+sortComparator), NullWritable.get());//context.write(new Text("hashCode--"+hashCode), NullWritable.get());}}}


输出结果:
-rw-r--r--   2 hadoop hadoop          0 2016-03-29 17:38 /user/hadoop/libin/out1/_SUCCESS-rw-r--r--   2 hadoop hadoop       3058 2016-03-29 17:38 /user/hadoop/libin/out1/part-m-00000-rw-r--r--   2 hadoop hadoop       3045 2016-03-29 17:38 /user/hadoop/libin/out1/part-m-00001


part-m-00000文件结果:
====================================================================================pathname--inputpath2.txtjobid--job_1446086163035_21583keyout--9keyout--spark   bjobname--GetIDMapReduce1taskid--attempt_1446086163035_21583_m_000000_0progress--0.13636364jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jaruser--hadoopnumReduceTasks--0configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xmljobSetupCleanupNeeded--truemaxMapAttempts--4maxReduceAttempts--4workingDirectory--hdfs://hadoop:9000/user/hadoopnextKeyValue--truecacheArchives--nullarchiveClassPaths--nullprofileEnabled--falsesymlink--true====================================================================================pathname--inputpath2.txtjobid--job_1446086163035_21583keyout--25keyout--tachyon bjobname--GetIDMapReduce1taskid--attempt_1446086163035_21583_m_000000_0progress--0.37878788jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jaruser--hadoopnumReduceTasks--0configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xmljobSetupCleanupNeeded--truemaxMapAttempts--4maxReduceAttempts--4workingDirectory--hdfs://hadoop:9000/user/hadoopnextKeyValue--truecacheArchives--nullarchiveClassPaths--nullprofileEnabled--falsesymlink--true====================================================================================pathname--inputpath2.txtjobid--job_1446086163035_21583keyout--43keyout--flume   bjobname--GetIDMapReduce1taskid--attempt_1446086163035_21583_m_000000_0progress--0.6515151jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jaruser--hadoopnumReduceTasks--0configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xmljobSetupCleanupNeeded--truemaxMapAttempts--4maxReduceAttempts--4workingDirectory--hdfs://hadoop:9000/user/hadoopnextKeyValue--truecacheArchives--nullarchiveClassPaths--nullprofileEnabled--falsesymlink--true====================================================================================pathname--inputpath2.txtjobid--job_1446086163035_21583keyout--59keyout--solr    bjobname--GetIDMapReduce1taskid--attempt_1446086163035_21583_m_000000_0progress--0.8939394jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jaruser--hadoopnumReduceTasks--0configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xmljobSetupCleanupNeeded--truemaxMapAttempts--4maxReduceAttempts--4workingDirectory--hdfs://hadoop:9000/user/hadoopnextKeyValue--truecacheArchives--nullarchiveClassPaths--nullprofileEnabled--falsesymlink--truehadoop@hadoop:/home/hadoop/blb$ 


part-m-00001文件结果:
====================================================================================pathname--inputpath1.txtjobid--job_1446086163035_21583keyout--9keyout--spark   ajobname--GetIDMapReduce1taskid--attempt_1446086163035_21583_m_000001_0progress--0.15517241jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jaruser--hadoopnumReduceTasks--0configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xmljobSetupCleanupNeeded--truemaxMapAttempts--4maxReduceAttempts--4workingDirectory--hdfs://hadoop:9000/user/hadoopnextKeyValue--truecacheArchives--nullarchiveClassPaths--nullprofileEnabled--falsesymlink--true====================================================================================pathname--inputpath1.txtjobid--job_1446086163035_21583keyout--24keyout--hbase   ajobname--GetIDMapReduce1taskid--attempt_1446086163035_21583_m_000001_0progress--0.41379312jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jaruser--hadoopnumReduceTasks--0configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xmljobSetupCleanupNeeded--truemaxMapAttempts--4maxReduceAttempts--4workingDirectory--hdfs://hadoop:9000/user/hadoopnextKeyValue--truecacheArchives--nullarchiveClassPaths--nullprofileEnabled--falsesymlink--true====================================================================================pathname--inputpath1.txtjobid--job_1446086163035_21583keyout--42keyout--storm   ajobname--GetIDMapReduce1taskid--attempt_1446086163035_21583_m_000001_0progress--0.7241379jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jaruser--hadoopnumReduceTasks--0configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xmljobSetupCleanupNeeded--truemaxMapAttempts--4maxReduceAttempts--4workingDirectory--hdfs://hadoop:9000/user/hadoopnextKeyValue--truecacheArchives--nullarchiveClassPaths--nullprofileEnabled--falsesymlink--true====================================================================================pathname--inputpath1.txtjobid--job_1446086163035_21583keyout--58keyout--jobname--GetIDMapReduce1taskid--attempt_1446086163035_21583_m_000001_0progress--1.0jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jaruser--hadoopnumReduceTasks--0configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xmljobSetupCleanupNeeded--truemaxMapAttempts--4maxReduceAttempts--4workingDirectory--hdfs://hadoop:9000/user/hadoopnextKeyValue--falsecacheArchives--nullarchiveClassPaths--nullprofileEnabled--falsesymlink--true


0 0