Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 8,457 Bytes
1582855 4296589 dcd4fb7 4296589 8eb3c54 1582855 33a80f5 1582855 33a80f5 9f971bd 33a80f5 276b7df 3e2d62e 1582855 33a80f5 276b7df 47d6fa2 1582855 33a80f5 276b7df 47d6fa2 1582855 4296589 e926953 1582855 22e749b 1582855 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
# EUDR INGESTOR
import gradio as gr
import os
import logging
from datetime import datetime
from pathlib import Path
from gradio_client import Client, handle_file
import pandas as pd
# Local imports
from .utils import getconfig
config = getconfig("params.cfg")
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
hf_token = os.getenv('HF_TOKEN')
if not hf_token:
raise ValueError("HF_TOKEN environment variable not found")
# WHISP API configuration
WHISP_API_URL = config.get('whisp', 'WHISP_API_URL', fallback="https://giz-chatfed-whisp.hf.space/")
def get_value(df, colname):
"""Fetch value from WhispAPI-style Column/Value dataframe"""
if "Column" in df.columns and "Value" in df.columns:
match = df.loc[df["Column"] == colname, "Value"]
if not match.empty:
return match.values[0]
return "No disponible"
def format_whisp_statistics(df):
"""Format WhispAPI statistics into readable text for RAG context"""
try:
# Country code mapping
country_codes = {
'HND': 'Honduras', 'GTM': 'Guatemala', 'ECU': 'Ecuador',
'COL': 'Colombia', 'PER': 'Peru', 'BRA': 'Brasil',
'BOL': 'Bolivia', 'CRI': 'Costa Rica', 'PAN': 'Panamá',
'NIC': 'Nicaragua'
}
country_raw = get_value(df, "Country")
country = country_codes.get(country_raw, country_raw)
admin_level = get_value(df, "Admin_Level_1")
area_raw = get_value(df, "Area")
# Format area
try:
area_num = float(area_raw)
if area_num < 1:
area_text = f"{area_num:.3f} hectáreas"
elif area_num < 100:
area_text = f"{area_num:.2f} hectáreas"
else:
area_text = f"{area_num:,.1f} hectáreas"
except:
area_text = str(area_raw) if area_raw != "Not available" else "No disponible"
# Risk assessments
risk_pcrop = get_value(df, "risk_pcrop")
risk_acrop = get_value(df, "risk_acrop")
risk_timber = get_value(df, "risk_timber")
def_after_2020_raw = get_value(df, "TMF_def_after_2020")
def_before_2020_raw = get_value(df, "TMF_def_before_2020")
# Helper function to format risk levels with colors/emojis
def format_risk(risk_val):
if not risk_val or risk_val in ["Not available", "not available"]:
return "**No disponible**"
elif isinstance(risk_val, str):
risk_lower = risk_val.lower().strip()
if risk_lower == "low":
return "*riesgo bajo*"
elif risk_lower == "medium":
return "*riesgo medio*"
elif risk_lower == "high":
return "*riesgo alto*"
elif risk_lower == "very high":
return "*riesgo muy alto*"
elif risk_lower == "more_info_needed":
return "*Se necesita más información.*"
else:
return f"ℹ️ **{risk_val.title()}**"
return str(risk_val)
# Format deforestation data
def format_deforestation(def_val):
if not def_val or def_val in ["Not available", "not available"]:
return "*No disponible*"
try:
def_num = float(def_val)
if def_num == 0:
return "* No se detectó deforestación.*"
elif def_num < 0.1:
return f"*{def_num:.3f} hectáreas*"
else:
return f"*{def_num:.2f} hectáreas*"
except:
return f"ℹ️ **{def_val}**"
# Format for RAG context
context = f"""
**Respuesta generada mediante inteligencia artificíal:** \n\n
**Resultados del análisis geográfico** \n\n
La siguiente información ha sido generada por la [WhispAPI creada por Forest Data Partnership (FDaP)](https://openforis.org/solutions/whisp/).
📍 **Detalles de la ubicación:**
- País: *{country}*
- Región administrativa: *{admin_level}*
- Área total: *{area_text}*
⚠️ **Evaluación del riesgo de deforestación:**
Los niveles de riesgo se basan en patrones históricos, factores ambientales y datos sobre el uso del suelo.
- Cultivos permanentes (Café, cacao, aceite de palma): {format_risk(risk_pcrop)}
- Cultivos anuales (Soja, maíz, arroz): {format_risk(risk_acrop)}
- Extracción de madera: {format_risk(risk_timber)}
🌳 **Datos de deforestación:**
- Deforestación antes de 2020: {format_deforestation(def_after_2020_raw)}
- Deforestación después de 2020: {format_deforestation(def_after_2020_raw)}
Fuente: Forest Data Partnership (FDaP) WhispAPI
Fecha de análisis: {datetime.now().isoformat()}"""
return context
except Exception as e:
return f"Error en el análisis geográfico: {str(e)}"
def process_geojson_whisp(file_content: bytes, filename: str) -> tuple[str, dict]:
"""Process GeoJSON file through WHISP API and return formatted context"""
try:
# Create temporary file for WHISP API
import tempfile
with tempfile.NamedTemporaryFile(delete=False, suffix='.geojson') as tmp_file:
tmp_file.write(file_content)
tmp_file_path = tmp_file.name
try:
# Call WHISP API with authentication
client = Client(WHISP_API_URL, hf_token=hf_token)
result = client.predict(
file=handle_file(tmp_file_path),
api_name="/get_statistics"
)
# Convert result to DataFrame
df = pd.DataFrame(result['data'], columns=result['headers'])
# Format for RAG context
formatted_context = format_whisp_statistics(df)
metadata = {
"analysis_type": "whisp_geojson",
"country": get_value(df, "Country"),
"admin_level": get_value(df, "Admin_Level_1"),
"area": get_value(df, "Area"),
"risk_levels": {
"pcrop": get_value(df, "risk_pcrop"),
"acrop": get_value(df, "risk_acrop"),
"timber": get_value(df, "risk_timber")
}
}
return formatted_context, metadata
finally:
# Clean up temporary file
os.unlink(tmp_file_path)
except Exception as e:
logger.error(f"WHISP API error: {str(e)}")
raise Exception(f"Failed to process GeoJSON through WHISP API: {str(e)}")
def ingest(file):
"""Main ingestion function - processes GeoJSON file and returns WHISP analysis context"""
if file is None:
return "No file uploaded", ""
try:
with open(file.name, 'rb') as f:
file_content = f.read()
filename = os.path.basename(file.name)
# Check file extension
file_extension = os.path.splitext(filename)[1].lower()
if file_extension not in ['.geojson', '.json']:
raise ValueError(f"Unsupported file type: {file_extension}. Only GeoJSON files are supported.")
# Process through WHISP API
context, metadata = process_geojson_whisp(file_content, filename)
logger.info(f"Successfully processed GeoJSON {filename} through WHISP API")
return context
except Exception as e:
logger.error(f"GeoJSON processing failed: {str(e)}")
raise Exception(f"Processing failed: {str(e)}")
if __name__ == "__main__":
ui = gr.Interface(
fn=ingest,
inputs=gr.File(
label="GeoJSON Upload",
file_types=[".geojson", ".json"]
),
outputs=gr.Textbox(
label="WHISP Analysis Context",
lines=15,
show_copy_button=True
),
title="EUDR Ingestion Module - WHISP API",
description="Processes GeoJSON files through WHISP API and returns geographic analysis context for RAG pipelines.",
api_name="ingest"
)
ui.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
) |