#!/usr/bin/env python3
"""
Script para analizar el archivo Excel de extensiones usando openpyxl
"""
import openpyxl
import json
import csv

def analyze_excel(file_path):
    """Analiza el archivo Excel y extrae toda la información"""
    try:
        # Cargar el archivo Excel
        wb = openpyxl.load_workbook(file_path, data_only=True)
        
        print("=== ANÁLISIS DEL ARCHIVO EXCEL ===")
        print(f"\nArchivo: {file_path}")
        print(f"Hojas disponibles: {wb.sheetnames}")
        
        # Trabajar con la primera hoja
        sheet = wb.active
        print(f"\nHoja activa: {sheet.title}")
        
        # Obtener dimensiones
        max_row = sheet.max_row
        max_col = sheet.max_column
        print(f"Filas: {max_row}")
        print(f"Columnas: {max_col}")
        
        # Leer encabezados (primera fila)
        headers = []
        for col in range(1, max_col + 1):
            cell_value = sheet.cell(row=1, column=col).value
            headers.append(str(cell_value) if cell_value else f"Col_{col}")
        
        print("\n=== COLUMNAS ENCONTRADAS ===")
        for i, header in enumerate(headers, 1):
            print(f"{i}. {header}")
        
        # Leer todos los datos
        data = []
        for row in range(2, max_row + 1):  # Empezar desde la fila 2 (skip headers)
            row_data = {}
            for col in range(1, max_col + 1):
                cell_value = sheet.cell(row=row, column=col).value
                row_data[headers[col-1]] = cell_value
            data.append(row_data)
        
        print(f"\n=== DATOS ENCONTRADOS ===")
        print(f"Total de registros: {len(data)}")
        
        # Mostrar primeros 10 registros
        print("\n=== PRIMEROS 10 REGISTROS ===")
        for i, record in enumerate(data[:10], 1):
            print(f"\nRegistro {i}:")
            for key, value in record.items():
                if value is not None:
                    print(f"  {key}: {value}")
        
        # Análisis por columna
        print("\n=== ANÁLISIS POR COLUMNA ===")
        for header in headers:
            values = [row[header] for row in data if row[header] is not None]
            unique_values = set(values)
            print(f"\n{header}:")
            print(f"  - Valores no nulos: {len(values)}")
            print(f"  - Valores únicos: {len(unique_values)}")
            if len(unique_values) <= 10:
                print(f"  - Valores: {sorted(list(unique_values))}")
        
        # Guardar en JSON
        summary = {
            "file": file_path,
            "sheet": sheet.title,
            "total_rows": max_row - 1,  # Sin contar headers
            "total_columns": max_col,
            "headers": headers,
            "data": data[:20]  # Primeros 20 registros como muestra
        }
        
        with open('/var/www/html/docs/excel_analysis.json', 'w', encoding='utf-8') as f:
            json.dump(summary, f, indent=2, ensure_ascii=False)
        
        # Guardar en CSV
        with open('/var/www/html/docs/extensiones_data.csv', 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=headers)
            writer.writeheader()
            writer.writerows(data)
        
        print("\n=== ARCHIVOS GENERADOS ===")
        print("- excel_analysis.json: Análisis completo en JSON")
        print("- extensiones_data.csv: Datos en formato CSV")
        
        wb.close()
        
    except Exception as e:
        print(f"Error al analizar el archivo: {str(e)}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    analyze_excel('/var/www/html/docs/Extensionesparacentralita.xlsx')