最終更新 1750583367

通过Python实现访问CDN日志筛选

LiuShen's Avatar LiuShen revised this gist 1750583367. Go to revision

1 file changed, 80 insertions

main.py(file created)

@@ -0,0 +1,80 @@
1 + import os
2 + import re
3 + import csv
4 + from collections import defaultdict
5 +
6 + def analyze_logs(log_directory, target_path):
7 + """
8 + 分析日志文件,统计每个IP的总请求数和特定路径的请求数
9 +
10 + :param log_directory: 日志文件夹路径
11 + :param target_path: 要统计的特定路径
12 + """
13 + # 初始化统计字典
14 + ip_stats = defaultdict(lambda: {'total': 0, 'target': 0})
15 +
16 + # 日志文件正则表达式模式
17 + log_pattern = re.compile(
18 + r'^\d+\s+' # 时间戳(忽略)
19 + r'(\S+)\s+' # IP地址(第2列)
20 + r'\S+\s+' # 域名(忽略)
21 + r'(\S+)\s+' # 请求路径(第4列)
22 + r'.*$' # 剩余部分(忽略)
23 + )
24 +
25 + # 遍历日志目录中的所有文件
26 + for root, dirs, files in os.walk(log_directory):
27 + for file in files:
28 + file_path = os.path.join(root, file)
29 + try:
30 + with open(file_path, 'r', encoding='utf-8') as f:
31 + for line in f:
32 + match = log_pattern.match(line.strip())
33 + if match:
34 + ip = match.group(1)
35 + path = match.group(2)
36 +
37 + # 更新统计
38 + ip_stats[ip]['total'] += 1
39 + if path == target_path:
40 + ip_stats[ip]['target'] += 1
41 + except Exception as e:
42 + print(f"处理文件 {file_path} 时出错: {e}")
43 +
44 + return ip_stats
45 +
46 + def save_results(ip_stats, output_file):
47 + """
48 + 将统计结果保存到CSV文件
49 +
50 + :param ip_stats: 统计结果字典
51 + :param output_file: 输出文件路径
52 + """
53 + # 按总请求数降序排序
54 + sorted_stats = sorted(ip_stats.items(), key=lambda x: x[1]['total'], reverse=True)
55 +
56 + with open(output_file, 'w', encoding='utf-8', newline='') as f:
57 + writer = csv.writer(f)
58 + # 写入CSV表头
59 + writer.writerow(["IP地址", "总请求数", "特定路径请求数", "特定路径占比"])
60 +
61 + # 写入数据
62 + for ip, stats in sorted_stats:
63 + total = stats['total']
64 + target = stats['target']
65 + ratio = (target / total) * 100 if total > 0 else 0
66 + writer.writerow([ip, total, target, f"{ratio:.2f}%"])
67 +
68 + if __name__ == "__main__":
69 + # 配置参数
70 + log_dir = './log' # 日志文件夹路径
71 + target_path = '/apple-touch-icon.png' # 要统计的特定路径
72 + output_file = 'ip_request_stats.csv' # 输出文件改为CSV
73 +
74 + print("开始分析日志文件...")
75 + stats = analyze_logs(log_dir, target_path)
76 +
77 + print("保存统计结果...")
78 + save_results(stats, output_file)
79 +
80 + print(f"分析完成!结果已保存到 {output_file}")
Newer Older