#!/usr/bin/env python3
"""API script that aggregates lead gen data into data.json"""

import json
import os
import glob
from datetime import datetime
from collections import defaultdict

BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
OUTPUT_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data.json")

def count_csv_rows(filepath):
    """Count rows in CSV file (excluding header)"""
    try:
        with open(filepath, 'r') as f:
            lines = f.readlines()
            return max(0, len(lines) - 1)  # Subtract header
    except:
        return 0

def read_csv_data(filepath):
    """Read CSV and return list of dicts"""
    try:
        with open(filepath, 'r') as f:
            lines = f.readlines()
            if len(lines) < 2:
                return []
            
            # Parse header
            headers = [h.strip().strip('"') for h in lines[0].split(',')]
            
            # Parse rows
            rows = []
            for line in lines[1:]:
                line = line.strip()
                if not line:
                    continue
                # Handle quoted fields
                values = []
                in_quote = False
                current = ''
                for char in line:
                    if char == '"':
                        in_quote = not in_quote
                    elif char == ',' and not in_quote:
                        values.append(current.strip().strip('"'))
                        current = ''
                    else:
                        current += char
                values.append(current.strip().strip('"'))
                
                row = {}
                for i, h in enumerate(headers):
                    row[h] = values[i] if i < len(values) else ''
                rows.append(row)
            return rows
    except Exception as e:
        print(f"Error reading {filepath}: {e}")
        return []

def get_stats():
    """Get pipeline statistics"""
    stats = {
        'leads_scraped': 0,
        'leads_enriched': 0,
        'drafts_pending': 0,
        'emails_sent': 0,
        'replies': 0,
        'hot_leads': 0
    }
    
    # Count raw leads
    raw_dir = f'{BASE_DIR}/leads/raw'
    for csv_file in glob.glob(f'{raw_dir}/*.csv'):
        stats['leads_scraped'] += count_csv_rows(csv_file)
    
    # Count enriched leads
    enriched_dir = f'{BASE_DIR}/leads/enriched'
    for csv_file in glob.glob(f'{enriched_dir}/*.csv'):
        stats['leads_enriched'] += count_csv_rows(csv_file)
    
    # Count pending drafts
    drafts_dir = f'{BASE_DIR}/leads/drafts'
    for csv_file in glob.glob(f'{drafts_dir}/*.csv'):
        data = read_csv_data(csv_file)
        for row in data:
            if row.get('status') == 'pending_approval':
                stats['drafts_pending'] += 1
            elif row.get('status') == 'approved' or row.get('status') == 'sent':
                stats['emails_sent'] += 1
    
    # Count replies and hot leads
    replies_dir = f'{BASE_DIR}/leads/replies'
    for csv_file in glob.glob(f'{replies_dir}/*.csv'):
        data = read_csv_data(csv_file)
        for row in data:
            stats['replies'] += 1
            if row.get('classification') == 'INTERESTED':
                stats['hot_leads'] += 1
    
    return stats

def get_costs():
    """Read cost data"""
    try:
        with open(f'{BASE_DIR}/reports/daily/costs.json', 'r') as f:
            return json.load(f)
    except:
        return {
            'daily_limit': 5.00,
            'monthly_limit': 100.00,
            'today': {'total': 0, 'minimax': 0, 'gemini_flash_lite': 0, 'deepseek': 0, 'sonnet': 0},
            'month_total': 0
        }

def get_leads():
    """Get all enriched leads"""
    leads = []
    enriched_dir = f'{BASE_DIR}/leads/enriched'
    for csv_file in glob.glob(f'{enriched_dir}/*.csv'):
        leads.extend(read_csv_data(csv_file))
    return leads

def get_drafts():
    """Get pending drafts"""
    drafts = []
    drafts_dir = f'{BASE_DIR}/leads/drafts'
    for csv_file in glob.glob(f'{drafts_dir}/*.csv'):
        data = read_csv_data(csv_file)
        for row in data:
            if row.get('status') == 'pending_approval':
                drafts.append(row)
    return drafts

def get_replies():
    """Get all replies"""
    replies = []
    replies_dir = f'{BASE_DIR}/leads/replies'
    for csv_file in glob.glob(f'{replies_dir}/*.csv'):
        replies.extend(read_csv_data(csv_file))
    return replies

def generate_activity():
    """Generate activity feed from recent events"""
    activity = []
    now = datetime.now()
    
    # Add activity based on stats changes
    stats = get_stats()
    if stats['leads_scraped'] > 0:
        activity.append({
            'time': now.isoformat(),
            'text': f"Leads scraped: {stats['leads_scraped']}",
            'type': 'green'
        })
    
    if stats['leads_enriched'] > 0:
        activity.append({
            'time': now.isoformat(),
            'text': f"Leads enriched: {stats['leads_enriched']}",
            'type': 'green'
        })
    
    if stats['drafts_pending'] > 0:
        activity.append({
            'time': now.isoformat(),
            'text': f"Drafts pending: {stats['drafts_pending']}",
            'type': 'yellow'
        })
    
    if stats['hot_leads'] > 0:
        activity.append({
            'time': now.isoformat(),
            'text': f"Hot leads: {stats['hot_leads']}",
            'type': 'red'
        })
    
    # Add default activity
    activity.append({
        'time': now.isoformat(),
        'text': 'Dashboard data refreshed',
        'type': 'blue'
    })
    
    return activity[:10]

def get_files():
    """Scan all directories and return file listings"""
    directories = {
        'raw': f'{BASE_DIR}/leads/raw',
        'enriched': f'{BASE_DIR}/leads/enriched',
        'drafts': f'{BASE_DIR}/leads/drafts',
        'approved': f'{BASE_DIR}/leads/approved',
        'replies': f'{BASE_DIR}/leads/replies',
        'weekly_reports': f'{BASE_DIR}/reports/weekly',
        'daily_reports': f'{BASE_DIR}/reports/daily'
    }
    
    files_data = {}
    
    for folder_name, folder_path in directories.items():
        files_data[folder_name] = []
        if os.path.exists(folder_path):
            for f in glob.glob(f'{folder_path}/*'):
                if os.path.isfile(f):
                    stat = os.stat(f)
                    files_data[folder_name].append({
                        'name': os.path.basename(f),
                        'path': f.replace(BASE_DIR, '').lstrip('/'),
                        'size': stat.st_size,
                        'modified': datetime.fromtimestamp(stat.st_mtime).isoformat()
                    })
    
    return files_data

def main():
    """Generate data.json"""
    data = {
        'generated_at': datetime.now().isoformat(),
        'stats': get_stats(),
        'costs': get_costs(),
        'leads': get_leads(),
        'drafts': get_drafts(),
        'replies': get_replies(),
        'activity': generate_activity()
    }
    
    with open(OUTPUT_FILE, 'w') as f:
        json.dump(data, f, indent=2)
    
    # Also generate files.json
    files_data = {
        'generated_at': datetime.now().isoformat(),
        'files': get_files()
    }
    
    with open(f'{BASE_DIR}/reports/files.json', 'w') as f:
        json.dump(files_data, f, indent=2)
    
    print(f"Data generated: {OUTPUT_FILE}")
    print(f"  - Leads scraped: {data['stats']['leads_scraped']}")
    print(f"  - Leads enriched: {data['stats']['leads_enriched']}")
    print(f"  - Drafts pending: {data['stats']['drafts_pending']}")
    print(f"  - Hot leads: {data['stats']['hot_leads']}")
    print(f"  - Daily spend: ${data['costs']['today']['total']:.2f}")

if __name__ == '__main__':
    main()
