查看access.log文件访问的状态码:
[root@localhost logs]# cat access.log | awk -F '" ' '{print $2}'|cut-d " " -f 1| awk '{++S[$1]} END {for (key in S) print S[key],key}'
6181 200
400 400
421 304
37 206
1642 404
19 405
1、查看当天有多少个IP访问:
awk '{print $1}' |sort|uniq|wc -l
2、查看某一个页面被访问的次数:
grep "/index.php" | wc -l
3、查看每一个IP访问了多少个页面:
awk '{++S[$1]} END {for (a in S) print a,S[a]}'
4、将每个IP访问的页面数进行从大到小排序并显示前10名:
awk '{++S[$1]} END {for (a in S) print S[a],a}' | sort -rn | head -10
5、查看某一个IP访问了哪些页面:
grep ^111.111.111.111 | awk '{print $1,$7}'
6、去掉搜索引擎统计当天的页面:
awk '{print $12,$1}' | grep ^\"Mozilla | awk '{print $2}' |sort | uniq | wc -l
7、查看2009年6月21日14时这一个小时内有多少IP访问:
awk '{print $4,$1}' | grep 21/Jun/2009:14 | awk '{print $2}'| sort | uniq | wc -l
20.2.1.1. 刪除日志
刪除一个月前的日志
rm -f /www/logs/access.log.$(date -d '-1 month'+'%Y-%m')*
20.2.1.2. 统计爬虫
grep -E 'Googlebot|Baiduspider' /www/logs/www.example.com/access.2011-02-23.log | awk '{ print $1 }' |sort | uniq
20.2.1.3. 统计浏览器
cat /www/logs/example.com/access.2010-09-20.log | grep -v-E 'MSIE|Firefox|Chrome|Opera|Safari|Gecko|Maxthon' | sort | uniq -c | sort -r-n | head -n 100
20.2.1.4. IP 统计
# grep '22/May/2012' /tmp/myid.access.log | awk '{print$1}' | awk -F'.' '{print $1"."$2"."$3"."$4}' |sort | uniq -c | sort -r -n | head -n 10
2206219.136.134.13
1497182.34.15.248
1431211.140.143.100
1431119.145.149.106
142761.183.15.179
1427 218.6.8.189
1422124.232.150.171
1421106.187.47.224
142061.160.220.252
1418114.80.201.18
[root@feelingirldress wwwlogs]# cat feelingirldress.com-access_log | awk '{print$1}' | awk -F'.' '{print $1"."$2"."$3"."$4}' |sort | uniq -c | sort -r -n | head -n 10
49778 27.154.242.206
39241141.8.142.140
37615107.183.138.90
2825047.75.35.191
1565166.249.93.20
1519166.249.93.21
13928104.52.37.70
1381466.249.93.22
1363995.108.213.3
1074146.229.168.144
[root@feelingirldress wwwlogs]#cat lover-beauty.com-access_log| grep "SemrushBot" | awk '{print $1}' | awk -F'.' '{print$1"."$2"."$3"."$4}' | sort | uniq -c | sort -r -n
1722346.229.168.144
1712046.229.168.139
1710746.229.168.141
1709846.229.168.149
1702546.229.168.143
1683946.229.168.142
1677146.229.168.140
1534646.229.168.145
1433546.229.168.132
1432046.229.168.130
统计网段
# cat /www/logs/www/access.2010-09-20.log | awk '{print$1}' | awk -F'.' '{print $1"."$2"."$3".0"}' |sort | uniq -c | sort -r -n | head -n 200
压缩文件处理
zcat www.example.com.access.log-20130627.gz | grep '/xml/data.json' | awk '{print $1}' | awk -F'.''{print $1"."$2"."$3"."$4}' | sort | uniq -c |sort -r -n | head -n 20
20.2.1.5. 统计域名
# cat /www/logs/access.2011-07-27.log |awk '{print $2}'|sort|uniq -c|sort-rn|more
20.2.1.6. HTTP Status
# cat /www/logs/access.2011-07-27.log |awk '{print $9}'|sort|uniq -c|sort-rn|more
5056585 304
1125579 200
7602 400
5 301
20.2.1.7. URL 统计
cat /www/logs/access.2011-07-27.log |awk '{print $7}'|sort|uniq -c|sort-rn|more
20.2.1.8. 文件流量统计
cat /www/logs/access.2011-08-03.log |awk'{sum[$7]+=$10}END{for(i in sum){print sum[i],i}}'|sort -rn|more
grep ' 200 ' /www/logs/access.2011-08-03.log |awk'{sum[$7]+=$10}END{for(i in sum){print sum[i],i}}'|sort -rn|more
20.2.1.9. URL访问量统计
# cat www.access.log | awk '{print $7}' | egrep '\?|&' | sort | uniq -c |sort -rn | more
20.2.1.10. 脚本运行速度
查出运行速度最慢的脚本
grep -v 0$ access.2010-11-05.log | awk -F '\" ''{print $4" " $1}' web.log | awk '{print $1" "$8}' | sort-n -k 1 -r | uniq > /tmp/slow_url.txt
20.2.1.11. IP, URL 抽取
# tail -f/www/logs/www.365wine.com/access.2012-01-04.log | grep '/test.html' | awk'{print $1" "$7}'