# EUDR INGESTOR import gradio as gr import os import logging from datetime import datetime from pathlib import Path from gradio_client import Client, handle_file import pandas as pd # Local imports from .utils import getconfig config = getconfig("params.cfg") # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) hf_token = os.getenv('HF_TOKEN') if not hf_token: raise ValueError("HF_TOKEN environment variable not found") # WHISP API configuration WHISP_API_URL = config.get('whisp', 'WHISP_API_URL', fallback="https://giz-chatfed-whisp.hf.space/") def get_value(df, colname): """Fetch value from WhispAPI-style Column/Value dataframe""" if "Column" in df.columns and "Value" in df.columns: match = df.loc[df["Column"] == colname, "Value"] if not match.empty: return match.values[0] return "No disponible" def format_whisp_statistics(df): """Format WhispAPI statistics into readable text for RAG context""" try: # Country code mapping country_codes = { 'HND': 'Honduras', 'GTM': 'Guatemala', 'ECU': 'Ecuador', 'COL': 'Colombia', 'PER': 'Peru', 'BRA': 'Brasil', 'BOL': 'Bolivia', 'CRI': 'Costa Rica', 'PAN': 'Panamá', 'NIC': 'Nicaragua' } country_raw = get_value(df, "Country") country = country_codes.get(country_raw, country_raw) admin_level = get_value(df, "Admin_Level_1") area_raw = get_value(df, "Area") # Format area try: area_num = float(area_raw) if area_num < 1: area_text = f"{area_num:.3f} hectáreas" elif area_num < 100: area_text = f"{area_num:.2f} hectáreas" else: area_text = f"{area_num:,.1f} hectáreas" except: area_text = str(area_raw) if area_raw != "Not available" else "No disponible" # Risk assessments risk_pcrop = get_value(df, "risk_pcrop") risk_acrop = get_value(df, "risk_acrop") risk_timber = get_value(df, "risk_timber") def_after_2020_raw = get_value(df, "TMF_def_after_2020") def_before_2020_raw = get_value(df, "TMF_def_before_2020") # Helper function to format risk levels with colors/emojis def format_risk(risk_val): if not risk_val or risk_val in ["Not available", "not available"]: return "**No disponible**" elif isinstance(risk_val, str): risk_lower = risk_val.lower().strip() if risk_lower == "low": return "*riesgo bajo*" elif risk_lower == "medium": return "*riesgo medio*" elif risk_lower == "high": return "*riesgo alto*" elif risk_lower == "very high": return "*riesgo muy alto*" elif risk_lower == "more_info_needed": return "*Se necesita más información.*" else: return f"ℹ️ **{risk_val.title()}**" return str(risk_val) # Format deforestation data def format_deforestation(def_val): if not def_val or def_val in ["Not available", "not available"]: return "*No disponible*" try: def_num = float(def_val) if def_num == 0: return "* No se detectó deforestación.*" elif def_num < 0.1: return f"*{def_num:.3f} hectáreas*" else: return f"*{def_num:.2f} hectáreas*" except: return f"ℹ️ **{def_val}**" # Format for RAG context context = f""" **Respuesta generada mediante inteligencia artificíal:** \n\n **Resultados del análisis geográfico** \n\n La siguiente información ha sido generada por la [WhispAPI creada por Forest Data Partnership (FDaP)](https://openforis.org/solutions/whisp/). 📍 **Detalles de la ubicación:** - País: *{country}* - Región administrativa: *{admin_level}* - Área total: *{area_text}* ⚠️ **Evaluación del riesgo de deforestación:** Los niveles de riesgo se basan en patrones históricos, factores ambientales y datos sobre el uso del suelo. - Cultivos permanentes (Café, cacao, aceite de palma): {format_risk(risk_pcrop)} - Cultivos anuales (Soja, maíz, arroz): {format_risk(risk_acrop)} - Extracción de madera: {format_risk(risk_timber)} 🌳 **Datos de deforestación:** - Deforestación antes de 2020: {format_deforestation(def_after_2020_raw)} - Deforestación después de 2020: {format_deforestation(def_after_2020_raw)} Fuente: Forest Data Partnership (FDaP) WhispAPI Fecha de análisis: {datetime.now().isoformat()}""" return context except Exception as e: return f"Error en el análisis geográfico: {str(e)}" def process_geojson_whisp(file_content: bytes, filename: str) -> tuple[str, dict]: """Process GeoJSON file through WHISP API and return formatted context""" try: # Create temporary file for WHISP API import tempfile with tempfile.NamedTemporaryFile(delete=False, suffix='.geojson') as tmp_file: tmp_file.write(file_content) tmp_file_path = tmp_file.name try: # Call WHISP API with authentication client = Client(WHISP_API_URL, hf_token=hf_token) result = client.predict( file=handle_file(tmp_file_path), api_name="/get_statistics" ) # Convert result to DataFrame df = pd.DataFrame(result['data'], columns=result['headers']) # Format for RAG context formatted_context = format_whisp_statistics(df) metadata = { "analysis_type": "whisp_geojson", "country": get_value(df, "Country"), "admin_level": get_value(df, "Admin_Level_1"), "area": get_value(df, "Area"), "risk_levels": { "pcrop": get_value(df, "risk_pcrop"), "acrop": get_value(df, "risk_acrop"), "timber": get_value(df, "risk_timber") } } return formatted_context, metadata finally: # Clean up temporary file os.unlink(tmp_file_path) except Exception as e: logger.error(f"WHISP API error: {str(e)}") raise Exception(f"Failed to process GeoJSON through WHISP API: {str(e)}") def ingest(file): """Main ingestion function - processes GeoJSON file and returns WHISP analysis context""" if file is None: return "No file uploaded", "" try: with open(file.name, 'rb') as f: file_content = f.read() filename = os.path.basename(file.name) # Check file extension file_extension = os.path.splitext(filename)[1].lower() if file_extension not in ['.geojson', '.json']: raise ValueError(f"Unsupported file type: {file_extension}. Only GeoJSON files are supported.") # Process through WHISP API context, metadata = process_geojson_whisp(file_content, filename) logger.info(f"Successfully processed GeoJSON {filename} through WHISP API") return context except Exception as e: logger.error(f"GeoJSON processing failed: {str(e)}") raise Exception(f"Processing failed: {str(e)}") if __name__ == "__main__": ui = gr.Interface( fn=ingest, inputs=gr.File( label="GeoJSON Upload", file_types=[".geojson", ".json"] ), outputs=gr.Textbox( label="WHISP Analysis Context", lines=15, show_copy_button=True ), title="EUDR Ingestion Module - WHISP API", description="Processes GeoJSON files through WHISP API and returns geographic analysis context for RAG pipelines.", api_name="ingest" ) ui.launch( server_name="0.0.0.0", server_port=7860, show_error=True )