import os
import re
import csv
from collections import defaultdict

def analyze_logs(log_directory, target_path):
    """
    分析日志文件，统计每个IP的总请求数和特定路径的请求数
    
    :param log_directory: 日志文件夹路径
    :param target_path: 要统计的特定路径
    """
    # 初始化统计字典
    ip_stats = defaultdict(lambda: {'total': 0, 'target': 0})
    
    # 日志文件正则表达式模式
    log_pattern = re.compile(
        r'^\d+\s+'  # 时间戳(忽略)
        r'(\S+)\s+'  # IP地址(第2列)
        r'\S+\s+'    # 域名(忽略)
        r'(\S+)\s+'  # 请求路径(第4列)
        r'.*$'       # 剩余部分(忽略)
    )
    
    # 遍历日志目录中的所有文件
    for root, dirs, files in os.walk(log_directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    for line in f:
                        match = log_pattern.match(line.strip())
                        if match:
                            ip = match.group(1)
                            path = match.group(2)
                            
                            # 更新统计
                            ip_stats[ip]['total'] += 1
                            if path == target_path:
                                ip_stats[ip]['target'] += 1
            except Exception as e:
                print(f"处理文件 {file_path} 时出错: {e}")
    
    return ip_stats

def save_results(ip_stats, output_file):
    """
    将统计结果保存到CSV文件
    
    :param ip_stats: 统计结果字典
    :param output_file: 输出文件路径
    """
    # 按总请求数降序排序
    sorted_stats = sorted(ip_stats.items(), key=lambda x: x[1]['total'], reverse=True)
    
    with open(output_file, 'w', encoding='utf-8', newline='') as f:
        writer = csv.writer(f)
        # 写入CSV表头
        writer.writerow(["IP地址", "总请求数", "特定路径请求数", "特定路径占比"])
        
        # 写入数据
        for ip, stats in sorted_stats:
            total = stats['total']
            target = stats['target']
            ratio = (target / total) * 100 if total > 0 else 0
            writer.writerow([ip, total, target, f"{ratio:.2f}%"])

if __name__ == "__main__":
    # 配置参数
    log_dir = './log'  # 日志文件夹路径
    target_path = '/apple-touch-icon.png'  # 要统计的特定路径
    output_file = 'ip_request_stats.csv'  # 输出文件改为CSV
    
    print("开始分析日志文件...")
    stats = analyze_logs(log_dir, target_path)
    
    print("保存统计结果...")
    save_results(stats, output_file)
    
    print(f"分析完成！结果已保存到 {output_file}")