最後活躍 1750583367

通过Python实现访问CDN日志筛选

main.py 原始檔案
1import os
2import re
3import csv
4from collections import defaultdict
5
6def analyze_logs(log_directory, target_path):
7 """
8 分析日志文件,统计每个IP的总请求数和特定路径的请求数
9
10 :param log_directory: 日志文件夹路径
11 :param target_path: 要统计的特定路径
12 """
13 # 初始化统计字典
14 ip_stats = defaultdict(lambda: {'total': 0, 'target': 0})
15
16 # 日志文件正则表达式模式
17 log_pattern = re.compile(
18 r'^\d+\s+' # 时间戳(忽略)
19 r'(\S+)\s+' # IP地址(第2列)
20 r'\S+\s+' # 域名(忽略)
21 r'(\S+)\s+' # 请求路径(第4列)
22 r'.*$' # 剩余部分(忽略)
23 )
24
25 # 遍历日志目录中的所有文件
26 for root, dirs, files in os.walk(log_directory):
27 for file in files:
28 file_path = os.path.join(root, file)
29 try:
30 with open(file_path, 'r', encoding='utf-8') as f:
31 for line in f:
32 match = log_pattern.match(line.strip())
33 if match:
34 ip = match.group(1)
35 path = match.group(2)
36
37 # 更新统计
38 ip_stats[ip]['total'] += 1
39 if path == target_path:
40 ip_stats[ip]['target'] += 1
41 except Exception as e:
42 print(f"处理文件 {file_path} 时出错: {e}")
43
44 return ip_stats
45
46def save_results(ip_stats, output_file):
47 """
48 将统计结果保存到CSV文件
49
50 :param ip_stats: 统计结果字典
51 :param output_file: 输出文件路径
52 """
53 # 按总请求数降序排序
54 sorted_stats = sorted(ip_stats.items(), key=lambda x: x[1]['total'], reverse=True)
55
56 with open(output_file, 'w', encoding='utf-8', newline='') as f:
57 writer = csv.writer(f)
58 # 写入CSV表头
59 writer.writerow(["IP地址", "总请求数", "特定路径请求数", "特定路径占比"])
60
61 # 写入数据
62 for ip, stats in sorted_stats:
63 total = stats['total']
64 target = stats['target']
65 ratio = (target / total) * 100 if total > 0 else 0
66 writer.writerow([ip, total, target, f"{ratio:.2f}%"])
67
68if __name__ == "__main__":
69 # 配置参数
70 log_dir = './log' # 日志文件夹路径
71 target_path = '/apple-touch-icon.png' # 要统计的特定路径
72 output_file = 'ip_request_stats.csv' # 输出文件改为CSV
73
74 print("开始分析日志文件...")
75 stats = analyze_logs(log_dir, target_path)
76
77 print("保存统计结果...")
78 save_results(stats, output_file)
79
80 print(f"分析完成!结果已保存到 {output_file}")