用Awk处理Access log请求信息

来源:互联网 发布:华为网盘windows版 编辑:程序博客网 时间:2024/06/05 19:20
接上上文,《用Python和AWK结合处理Access log请求信息》--->http://blog.csdn.net/babyfish13/article/details/53436844及上文《用Python处理Access log请求信息》--->http://blog.csdn.net/babyfish13/article/details/53452995
上上文中,基本上相当于用Python处理access log列的展示问题,而用awk处理saccess log中条件过滤问题;上文作了一定的改进,全部用Python来进行数据的处理;本文则按另一种方式,不借助Python全部用Shell的Grep及Awk进行Access log数据的处理
1、数据处理的Shell脚本
/Users/nisj/PycharmProjects/EsDataProc/awk_log_proc.sh
#!/usr/bin/env bashcat /Users/nisj/Desktop/new-demand/x-log-100.txt \            | grep -v '/api/room/heartbeat.htm' \            | grep '.*GET.*appSource' \            | awk -F '?' '{if (index($2, 'htm') != 0) print $2; else print $3;}' \            | awk '{print $1}' \            | awk -F '&' '{                    for(i=1;i<=NF;i++){                        if(index($i, "token")){                            token=$i;                            continue;                        } else if(index($i, "appSource")){                            appSource=$i;                            continue;                        } else if(index($i, "appkey")){                            appkey=$i;                            continue;                        } else if(index($i, "identifier")){                            identifier=$i;                            continue;                        };                    }                    print substr(appSource,index(appSource,"=")+1) "|" substr(appkey,index(appkey,"=")+1) "|" substr(identifier,index(identifier,"=")+1)"|" substr(token,index(token,"=")+1);                }'
                
附:结果示例数据
/usr/bin/env bash /Users/nisj/PycharmProjects/EsDataProc/awk_log_proc.sh
811|CSIos|8A210F03-1791-49C1-AB31-4475A4666015|20a3fbc39ccd4c16g2e08f916
811|CSIos|8A210F03-1791-49C1-AB31-4475A4666015|20a3fbc39ccd4c16g2e08f916
208|CSAndroid|1468100392|375aa831f7aac93fg2c779f3f
209|CSAndroid|654209805|9f90de0f3940e6ceg34b49b32
811|CSIos|F60FF480-32E5-4C1A-A5D1-35DEC10D496D|410067a122ddbfc7g2c0ea8c7
209|CSAndroid|163893806|a5136df76b4fa1c5g13c6c33b
811|CSIos|F60FF480-32E5-4C1A-A5D1-35DEC10D496D|410067a122ddbfc7g2c0ea8c7
100|CSAndroid|1984460430|fca730177003ecbg23a2b2cb
421|CSAndroid|306124934|5bd231a6d31c01b6g1b3304b6
811|CSIos|F60FF480-32E5-4C1A-A5D1-35DEC10D496D|410067a122ddbfc7g2c0ea8c7
100|CSAndroid|1826891085|4955c60a812e2b6g1370b6
417|CSAndroid|2088845545|4955c60a812e2b6g1370b6
811|CSIos|F60FF480-32E5-4C1A-A5D1-35DEC10D496D|410067a122ddbfc7g2c0ea8c7
100|CSAndroid|1163727596|fff35bd008892a01gbedeeff
209|CSAndroid|1210858214|6e6e0ab339f23415g10f47215
811|CSIos|F60FF480-32E5-4C1A-A5D1-35DEC10D496D|410067a122ddbfc7g2c0ea8c7
811|CSIos|F60FF480-32E5-4C1A-A5D1-35DEC10D496D|410067a122ddbfc7g2c0ea8c7

Process finished with exit code 0

2、借助hadoop执行的Shell脚本
/Users/nisj/PycharmProjects/EsDataProc/awk_log_proc.sh
hadoop fs -cat /tmp/oss_old/localhost_access_log.2016-11-29.00.txt \            | grep -v '/api/room/heartbeat.htm' \            | grep '.*GET.*appSource' \            | awk -F '?' '{if (index($2, 'htm') != 0) print $2; else print $3;}' \            | awk '{print $1}' \            | awk -F '&' '{                    for(i=1;i<=NF;i++){                        if(index($i, "token")){                            token=$i;                            continue;                        } else if(index($i, "appSource")){                            appSource=$i;                            continue;                        } else if(index($i, "appkey")){                            appkey=$i;                            continue;                        } else if(index($i, "identifier")){                            identifier=$i;                            continue;                        };                    }                    print appSource" "appkey" "identifier" "token;                }' \            | hadoop fs -put - /tmp/oss_old/%s.txt
  
附:结果示例数据
/usr/bin/env bash /Users/nisj/PycharmProjects/EsDataProc/awk_log_proc.sh
_appSource=811 _appkey=CSIos _identifier=8A210F03-1791-49C1-AB31-4475A4666015 token=20a3fbc39ccd4c16g2e08f916
_appSource=811 _appkey=CSIos _identifier=8A210F03-1791-49C1-AB31-4475A4666015 token=20a3fbc39ccd4c16g2e08f916
_appSource=208 _appkey=CSAndroid _identifier=1468100392 token=375aa831f7aac93fg2c779f3f
_appSource=209 _appkey=CSAndroid _identifier=654209805 token=9f90de0f3940e6ceg34b49b32
_appSource=811 _appkey=CSIos _identifier=F60FF480-32E5-4C1A-A5D1-35DEC10D496D token=410067a122ddbfc7g2c0ea8c7
_appSource=209 _appkey=CSAndroid _identifier=163893806 token=a5136df76b4fa1c5g13c6c33b
_appSource=811 _appkey=CSIos _identifier=F60FF480-32E5-4C1A-A5D1-35DEC10D496D token=410067a122ddbfc7g2c0ea8c7
_appSource=100 _appkey=CSAndroid _identifier=1984460430 token=fca730177003ecbg23a2b2cb
_appSource=421 _appkey=CSAndroid _identifier=306124934 token=5bd231a6d31c01b6g1b3304b6
_appSource=811 _appkey=CSIos _identifier=F60FF480-32E5-4C1A-A5D1-35DEC10D496D token=410067a122ddbfc7g2c0ea8c7
_appSource=100 _appkey=CSAndroid _identifier=1826891085 token=4955c60a812e2b6g1370b6
_appSource=417 _appkey=CSAndroid _identifier=2088845545 token=4955c60a812e2b6g1370b6
_appSource=811 _appkey=CSIos _identifier=F60FF480-32E5-4C1A-A5D1-35DEC10D496D token=410067a122ddbfc7g2c0ea8c7
_appSource=100 _appkey=CSAndroid _identifier=1163727596 token=fff35bd008892a01gbedeeff
_appSource=209 _appkey=CSAndroid _identifier=1210858214 token=6e6e0ab339f23415g10f47215
_appSource=811 _appkey=CSIos _identifier=F60FF480-32E5-4C1A-A5D1-35DEC10D496D token=410067a122ddbfc7g2c0ea8c7
_appSource=811 _appkey=CSIos _identifier=F60FF480-32E5-4C1A-A5D1-35DEC10D496D token=410067a122ddbfc7g2c0ea8c7

Process finished with exit code 0
0 0