LiuShen revised this gist 4 months ago. Go to revision
1 file changed, 80 insertions
main.py(file created)
| @@ -0,0 +1,80 @@ | |||
| 1 | + | import os | |
| 2 | + | import re | |
| 3 | + | import csv | |
| 4 | + | from collections import defaultdict | |
| 5 | + | ||
| 6 | + | def analyze_logs(log_directory, target_path): | |
| 7 | + | """ | |
| 8 | + | 分析日志文件,统计每个IP的总请求数和特定路径的请求数 | |
| 9 | + | ||
| 10 | + | :param log_directory: 日志文件夹路径 | |
| 11 | + | :param target_path: 要统计的特定路径 | |
| 12 | + | """ | |
| 13 | + | # 初始化统计字典 | |
| 14 | + | ip_stats = defaultdict(lambda: {'total': 0, 'target': 0}) | |
| 15 | + | ||
| 16 | + | # 日志文件正则表达式模式 | |
| 17 | + | log_pattern = re.compile( | |
| 18 | + | r'^\d+\s+' # 时间戳(忽略) | |
| 19 | + | r'(\S+)\s+' # IP地址(第2列) | |
| 20 | + | r'\S+\s+' # 域名(忽略) | |
| 21 | + | r'(\S+)\s+' # 请求路径(第4列) | |
| 22 | + | r'.*$' # 剩余部分(忽略) | |
| 23 | + | ) | |
| 24 | + | ||
| 25 | + | # 遍历日志目录中的所有文件 | |
| 26 | + | for root, dirs, files in os.walk(log_directory): | |
| 27 | + | for file in files: | |
| 28 | + | file_path = os.path.join(root, file) | |
| 29 | + | try: | |
| 30 | + | with open(file_path, 'r', encoding='utf-8') as f: | |
| 31 | + | for line in f: | |
| 32 | + | match = log_pattern.match(line.strip()) | |
| 33 | + | if match: | |
| 34 | + | ip = match.group(1) | |
| 35 | + | path = match.group(2) | |
| 36 | + | ||
| 37 | + | # 更新统计 | |
| 38 | + | ip_stats[ip]['total'] += 1 | |
| 39 | + | if path == target_path: | |
| 40 | + | ip_stats[ip]['target'] += 1 | |
| 41 | + | except Exception as e: | |
| 42 | + | print(f"处理文件 {file_path} 时出错: {e}") | |
| 43 | + | ||
| 44 | + | return ip_stats | |
| 45 | + | ||
| 46 | + | def save_results(ip_stats, output_file): | |
| 47 | + | """ | |
| 48 | + | 将统计结果保存到CSV文件 | |
| 49 | + | ||
| 50 | + | :param ip_stats: 统计结果字典 | |
| 51 | + | :param output_file: 输出文件路径 | |
| 52 | + | """ | |
| 53 | + | # 按总请求数降序排序 | |
| 54 | + | sorted_stats = sorted(ip_stats.items(), key=lambda x: x[1]['total'], reverse=True) | |
| 55 | + | ||
| 56 | + | with open(output_file, 'w', encoding='utf-8', newline='') as f: | |
| 57 | + | writer = csv.writer(f) | |
| 58 | + | # 写入CSV表头 | |
| 59 | + | writer.writerow(["IP地址", "总请求数", "特定路径请求数", "特定路径占比"]) | |
| 60 | + | ||
| 61 | + | # 写入数据 | |
| 62 | + | for ip, stats in sorted_stats: | |
| 63 | + | total = stats['total'] | |
| 64 | + | target = stats['target'] | |
| 65 | + | ratio = (target / total) * 100 if total > 0 else 0 | |
| 66 | + | writer.writerow([ip, total, target, f"{ratio:.2f}%"]) | |
| 67 | + | ||
| 68 | + | if __name__ == "__main__": | |
| 69 | + | # 配置参数 | |
| 70 | + | log_dir = './log' # 日志文件夹路径 | |
| 71 | + | target_path = '/apple-touch-icon.png' # 要统计的特定路径 | |
| 72 | + | output_file = 'ip_request_stats.csv' # 输出文件改为CSV | |
| 73 | + | ||
| 74 | + | print("开始分析日志文件...") | |
| 75 | + | stats = analyze_logs(log_dir, target_path) | |
| 76 | + | ||
| 77 | + | print("保存统计结果...") | |
| 78 | + | save_results(stats, output_file) | |
| 79 | + | ||
| 80 | + | print(f"分析完成!结果已保存到 {output_file}") | |
Newer
Older