leavoigt's picture
Update app/main.py
3e2d62e verified
# EUDR INGESTOR
import gradio as gr
import os
import logging
from datetime import datetime
from pathlib import Path
from gradio_client import Client, handle_file
import pandas as pd
# Local imports
from .utils import getconfig
config = getconfig("params.cfg")
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
hf_token = os.getenv('HF_TOKEN')
if not hf_token:
raise ValueError("HF_TOKEN environment variable not found")
# WHISP API configuration
WHISP_API_URL = config.get('whisp', 'WHISP_API_URL', fallback="https://giz-chatfed-whisp.hf.space/")
def get_value(df, colname):
"""Fetch value from WhispAPI-style Column/Value dataframe"""
if "Column" in df.columns and "Value" in df.columns:
match = df.loc[df["Column"] == colname, "Value"]
if not match.empty:
return match.values[0]
return "No disponible"
def format_whisp_statistics(df):
"""Format WhispAPI statistics into readable text for RAG context"""
try:
# Country code mapping
country_codes = {
'HND': 'Honduras', 'GTM': 'Guatemala', 'ECU': 'Ecuador',
'COL': 'Colombia', 'PER': 'Peru', 'BRA': 'Brasil',
'BOL': 'Bolivia', 'CRI': 'Costa Rica', 'PAN': 'Panamá',
'NIC': 'Nicaragua'
}
country_raw = get_value(df, "Country")
country = country_codes.get(country_raw, country_raw)
admin_level = get_value(df, "Admin_Level_1")
area_raw = get_value(df, "Area")
# Format area
try:
area_num = float(area_raw)
if area_num < 1:
area_text = f"{area_num:.3f} hectáreas"
elif area_num < 100:
area_text = f"{area_num:.2f} hectáreas"
else:
area_text = f"{area_num:,.1f} hectáreas"
except:
area_text = str(area_raw) if area_raw != "Not available" else "No disponible"
# Risk assessments
risk_pcrop = get_value(df, "risk_pcrop")
risk_acrop = get_value(df, "risk_acrop")
risk_timber = get_value(df, "risk_timber")
def_after_2020_raw = get_value(df, "TMF_def_after_2020")
def_before_2020_raw = get_value(df, "TMF_def_before_2020")
# Helper function to format risk levels with colors/emojis
def format_risk(risk_val):
if not risk_val or risk_val in ["Not available", "not available"]:
return "**No disponible**"
elif isinstance(risk_val, str):
risk_lower = risk_val.lower().strip()
if risk_lower == "low":
return "*riesgo bajo*"
elif risk_lower == "medium":
return "*riesgo medio*"
elif risk_lower == "high":
return "*riesgo alto*"
elif risk_lower == "very high":
return "*riesgo muy alto*"
elif risk_lower == "more_info_needed":
return "*Se necesita más información.*"
else:
return f"ℹ️ **{risk_val.title()}**"
return str(risk_val)
# Format deforestation data
def format_deforestation(def_val):
if not def_val or def_val in ["Not available", "not available"]:
return "*No disponible*"
try:
def_num = float(def_val)
if def_num == 0:
return "* No se detectó deforestación.*"
elif def_num < 0.1:
return f"*{def_num:.3f} hectáreas*"
else:
return f"*{def_num:.2f} hectáreas*"
except:
return f"ℹ️ **{def_val}**"
# Format for RAG context
context = f"""
**Respuesta generada mediante inteligencia artificíal:** \n\n
**Resultados del análisis geográfico** \n\n
La siguiente información ha sido generada por la [WhispAPI creada por Forest Data Partnership (FDaP)](https://openforis.org/solutions/whisp/).
📍 **Detalles de la ubicación:**
- País: *{country}*
- Región administrativa: *{admin_level}*
- Área total: *{area_text}*
⚠️ **Evaluación del riesgo de deforestación:**
Los niveles de riesgo se basan en patrones históricos, factores ambientales y datos sobre el uso del suelo.
- Cultivos permanentes (Café, cacao, aceite de palma): {format_risk(risk_pcrop)}
- Cultivos anuales (Soja, maíz, arroz): {format_risk(risk_acrop)}
- Extracción de madera: {format_risk(risk_timber)}
🌳 **Datos de deforestación:**
- Deforestación antes de 2020: {format_deforestation(def_after_2020_raw)}
- Deforestación después de 2020: {format_deforestation(def_after_2020_raw)}
Fuente: Forest Data Partnership (FDaP) WhispAPI
Fecha de análisis: {datetime.now().isoformat()}"""
return context
except Exception as e:
return f"Error en el análisis geográfico: {str(e)}"
def process_geojson_whisp(file_content: bytes, filename: str) -> tuple[str, dict]:
"""Process GeoJSON file through WHISP API and return formatted context"""
try:
# Create temporary file for WHISP API
import tempfile
with tempfile.NamedTemporaryFile(delete=False, suffix='.geojson') as tmp_file:
tmp_file.write(file_content)
tmp_file_path = tmp_file.name
try:
# Call WHISP API with authentication
client = Client(WHISP_API_URL, hf_token=hf_token)
result = client.predict(
file=handle_file(tmp_file_path),
api_name="/get_statistics"
)
# Convert result to DataFrame
df = pd.DataFrame(result['data'], columns=result['headers'])
# Format for RAG context
formatted_context = format_whisp_statistics(df)
metadata = {
"analysis_type": "whisp_geojson",
"country": get_value(df, "Country"),
"admin_level": get_value(df, "Admin_Level_1"),
"area": get_value(df, "Area"),
"risk_levels": {
"pcrop": get_value(df, "risk_pcrop"),
"acrop": get_value(df, "risk_acrop"),
"timber": get_value(df, "risk_timber")
}
}
return formatted_context, metadata
finally:
# Clean up temporary file
os.unlink(tmp_file_path)
except Exception as e:
logger.error(f"WHISP API error: {str(e)}")
raise Exception(f"Failed to process GeoJSON through WHISP API: {str(e)}")
def ingest(file):
"""Main ingestion function - processes GeoJSON file and returns WHISP analysis context"""
if file is None:
return "No file uploaded", ""
try:
with open(file.name, 'rb') as f:
file_content = f.read()
filename = os.path.basename(file.name)
# Check file extension
file_extension = os.path.splitext(filename)[1].lower()
if file_extension not in ['.geojson', '.json']:
raise ValueError(f"Unsupported file type: {file_extension}. Only GeoJSON files are supported.")
# Process through WHISP API
context, metadata = process_geojson_whisp(file_content, filename)
logger.info(f"Successfully processed GeoJSON {filename} through WHISP API")
return context
except Exception as e:
logger.error(f"GeoJSON processing failed: {str(e)}")
raise Exception(f"Processing failed: {str(e)}")
if __name__ == "__main__":
ui = gr.Interface(
fn=ingest,
inputs=gr.File(
label="GeoJSON Upload",
file_types=[".geojson", ".json"]
),
outputs=gr.Textbox(
label="WHISP Analysis Context",
lines=15,
show_copy_button=True
),
title="EUDR Ingestion Module - WHISP API",
description="Processes GeoJSON files through WHISP API and returns geographic analysis context for RAG pipelines.",
api_name="ingest"
)
ui.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)