mapreduce pv

来源:互联网 发布:Java开发dwg批量转jpg 编辑:程序博客网 时间:2024/06/05 06:01

分析网站基本指标PV

网站基本指标

1.     (Page View),网站浏览量,指页面的浏览量或点击次数,用户每次刷新即被计算一次。如果用户刷新了页面100次,那么,流量统计工具就会显示100个“PV”量。这就是“PV”在流量统计中具体的一个定义了;

2.     (Unique Vistor),独立访客数,指1天内访问某站点的人数,以cookie或者用户唯一ID为依据。1天内同一访客的多次访问只计为1个访客;
3.(Visit View),访客的访问次数,用以记录所有访客1天内访问了多少次您的网站。当访客完成所有浏览并最终关掉该网站的所有页面时便完成了一次访问,同一访客1天内可能有多次访问行为,次数累加。

3.     IP(独立IP),指1天内使用不同IP地址的用户访问网站的数量,同一IP无论访问了几个页面,独立IP数均为1。

PV分析和编写程序

pv指标指的是网站的浏览量,通过查看日志的条数就可以知道某一时间端内的pv指标。下面我们统计各个省份的某一时间段内的pV指标。

1.     通过查看字段说明文档,可以查看到省份字段

2.     把省份的ID作为MAP任务输出结果的KEY,VALUE为计数1

3.     目录的创建

bin/hdfsdfs -mkdir -p /user/liangman/mapreduce/webpv/input

4.     文件上传

bin/hdfsdfs -put /home/liangman/2015082818 /user/liangman/mapreduce/webpv/input


1.     程序代码


import java.io.IOException;

import java.util.StringTokenizer;

 

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Mapper.Context;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

 

import com.ibeifeng.bigdata.hadoop.hdfs.LiangmanWordCount.WordCountCombiner;

 

/**

 *

 *@author liangman

 *

 */

public class WebPVMapReduce extends Configuredimplements Tool {

   // 1.map

   public static class PVMap extends

            Mapper<LongWritable, Text, IntWritable,IntWritable> {

       private final static IntWritable mapOutputValue = new IntWritable(1);

       //private IntWritable outputKey = new IntWritable();

       private IntWritable mapOutputKey = new IntWritable();

   

       @Override

       public void map(LongWritable key, Text value, Context context)

                throws IOException, InterruptedException{

           

            String linevalue = value.toString();

//          StringTokenizer tokenizer = new StringTokenizer(linevalue,"\t");

//          int counttoken = tokenizer.countTokens();

//          String[] tokenarray = new String[counttoken];

//          int i = 0;

//          while(tokenizer.hasMoreTokens()){

//              tokenarray[i++] = tokenizer.nextToken();

//          }

            String[] tokenarray = linevalue.split("\t");

            //if length less than 30 return

            if(tokenarray.length < 30){

               

                //counter

                context.getCounter("WEB_PV_MAP_COUNTERS","LENGTH_LT_30_COUNTER").increment(1L);;

                return;

            }

            //if url is null return

            if(StringUtils.isBlank(tokenarray[1])){

                context.getCounter("WEB_PV_MAP_COUNTERS","NULL_OF_URI").increment(1l);

                return ;

            }

            //provinceid

            String provinceID = tokenarray[23];

            //if blank return

            if(StringUtils.isBlank(provinceID)){

                context.getCounter("WEB_PV_MAP_COUNTERS","NULL_OF_PROVICEID").increment(1l);

                return ;

            }

            Integer pID = Integer.MAX_VALUE;

           

            //parse error return

            try{

                pID = Integer.parseInt(provinceID);

               

            }catch(Exception e){

                context.getCounter("WEB_PV_MAP_COUNTERS","PARESEINT_ERROR").increment(1l);

                return;

            }

            //map output

            // map output key

            mapOutputKey.set(pID);

            //

            context.write(mapOutputKey, mapOutputValue);

 

       }

       @Override

       protected void cleanup(Context context) throws IOException,

                InterruptedException {

            // TODO Auto-generated method stub

            super.cleanup(context);

       }

 

       @Override

       protected void setup(Context context) throws IOException,

                InterruptedException {

            // TODO Auto-generated method stub

            super.setup(context);

       }

 

   }

 

   // TODO

   public static class PVCombiner extends

            Reducer<IntWritable, IntWritable,IntWritable, IntWritable> {

 

       IntWritable outputvalue = new IntWritable();

       @Override

       public void reduce(IntWritable key, Iterable<IntWritable> values,

                Context context) throws IOException,InterruptedException {

            int sum = 0;

            for (IntWritable intWritable : values){

                sum = sum + intWritable.get();

            }

            outputvalue.set(sum);

           context.write(key, outputvalue);

       }

 

       @Override

       protected void cleanup(

                org.apache.hadoop.mapreduce.Reducer.Contextcontext)

                throws IOException, InterruptedException{

            // TODO Auto-generated method stub

            super.cleanup(context);

       }

 

       @Override

       protected void setup(org.apache.hadoop.mapreduce.Reducer.Context context)

                throws IOException, InterruptedException{

            // TODO Auto-generated method stub

            super.setup(context);

       }

 

   }

 

   // 2.reducer

   public static class PVReducer extends

            Reducer<IntWritable, IntWritable,IntWritable, IntWritable> {

       IntWritable outputvalue = new IntWritable();

        @Override

       public void reduce(IntWritable key, Iterable<IntWritable> values,

                Context context) throws IOException,InterruptedException {

            int sum = 0;

            for (IntWritable intWritable : values){

                sum = sum + intWritable.get();

            }

            outputvalue.set(sum);

            context.write(key, outputvalue);

       }

       @Override

       protected void cleanup(

                org.apache.hadoop.mapreduce.Reducer.Contextcontext)

                throws IOException, InterruptedException{

            // TODO Auto-generated method stub

            super.cleanup(context);

       }

 

       @Override

       protected void setup(org.apache.hadoop.mapreduce.Reducer.Context context)

                throws IOException, InterruptedException{

            // TODO Auto-generated method stub

            super.setup(context);

       }

 

   

 

   }

 

   // 3.driver

   public int run(String[] args) throws Exception {

       //

       Configuration configuration = this.getConf();

       // one application is a job

       Job job = Job.getInstance(configuration, this.getClass()

                .getSimpleName());

       // run a jar

       job.setJarByClass(WebPVMapReduce.class);

       // the input address

       Path inpath = new Path(args[0]);

       // add an input to the job

       FileInputFormat.addInputPath(job, inpath);

 

       // the output address

       Path outpath = new Path(args[1]);

       FileOutputFormat.setOutputPath(job, outpath);

 

       // mapper

       // TODO

       job.setMapperClass(PVMap.class);

       job.setMapOutputKeyClass(IntWritable.class);

       job.setMapOutputValueClass(IntWritable.class);

 

       // ###################################shuffle

 

       // 1.fen qu

       // job.setPartitionerClass(cls);

       // 2.sort

       // job.setSortComparatorClass(cls);

       // 3.fen zu group

       // job.setGroupingComparatorClass(cls);

 

       job.setCombinerClass(PVCombiner.class);

 

       // reducer

       // TODO

       job.setReducerClass(PVReducer.class);

       job.setOutputKeyClass(IntWritable.class);

       job.setOutputValueClass(IntWritable.class);

       // TODO

       // job.setNumReduceTasks(tasks);

       // submit the job

       boolean isSucess = job.waitForCompletion(true);

 

       return isSucess ? 0 : 1;

 

   }

 

   public static void main(String[] args) throws Exception {

       Configuration configuration = new Configuration();

       // set the compress

       //TODO

//     configuration.set("mapreduce.map.output.compress", "true");

//     configuration.set("mapreduce.map.output.compress.codec",

//              "org.apache.hadoop.io.compress.SnappyCodec");

 

        args=new String[]{

        "hdfs://hadoop02-linux.alibaba.com:8020/user/liangman/mapreduce/webpv/input",

        "hdfs://hadoop02-linux.alibaba.com:8020/user/liangman/mapreduce/webpv/output"

        };

       // run the job

       int status = ToolRunner.run(configuration, new WebPVMapReduce(), args);

       // exit

       System.exit(status);

   }

 

}

 

1.     运行结果

自定义程序计数器

●      自定义程序计数器(组名,计数器名)

context.getCounter("WEB_PV_MAP_COUNTERS","LENGTH_LT_30_COUNTER").increment(1L);;

●      自定义程序计数器输出结果


可以看到数据字段不全,不能很好的进行分析,要和相关部门进行沟通,刷洗数据。

0 0
原创粉丝点击