Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from langchain_openai import ChatOpenAI | |
| from langchain_community.llms import Ollama | |
| from langchain_community.utilities import SQLDatabase | |
| from langchain.chains import create_sql_query_chain | |
| import geopandas as gpd | |
| import ibis | |
| from ibis import _ | |
| geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet" | |
| con = ibis.duckdb.connect("duck.db", extensions = ["spatial"]) | |
| #con.raw_sql(f'CREATE OR REPLACE VIEW crops AS SELECT *, ST_GEOMFROMWKB(geometry) AS "geometry" FROM read_parquet("{geoparquet}")') | |
| crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"}) | |
| # df = crops.to_pandas() | |
| # + | |
| # df = crops.to_pandas() | |
| # + | |
| #gdf = gpd.read_parquet("be_vlg.parquet") | |
| #gdf.crs | |
| # - | |
| st.set_page_config( | |
| page_title="fiboa chat tool", | |
| page_icon="🦜", | |
| ) | |
| st.title("FiobaGPT Prototype") | |
| # + | |
| # from langchain.chains.sql_database.prompt import PROMPT # peek at the default | |
| from langchain_core.prompts.prompt import PromptTemplate | |
| new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', 'top_k'], | |
| template= | |
| ''' | |
| Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query | |
| and return the answer. Only limit for {top_k} when asked for "some" or "examples". | |
| This duckdb database includes full support for spatial queries, so it will understand most PostGIS-type | |
| queries as well. Remember that you must cast blob column to a geom type using ST_GeomFromWKB(geometry) | |
| before any spatial operations. | |
| If you are asked to "map" or "show on a map", then be select the "geometry" column in your query. | |
| If asked to show a "table", you must not include the "geometry" column from the query results. | |
| Use the following format: return only the SQLQuery to run. DO NOT use the prefix with "SQLQuery:". | |
| Do not include an explanation. | |
| Pay attention to use only the column names that you can see in the schema description. Be careful to | |
| not query for columns that do not exist. Also, pay attention to which column is in which table. | |
| Tables include {table_info}. The data you should use always comes from the table called "crops". | |
| Only use that table, do not use the "testing" table. | |
| Question: {input} | |
| ''' | |
| ) | |
| # - | |
| llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]) | |
| # + | |
| # Create the SQL query chain with the custom prompt | |
| db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True) | |
| chain = create_sql_query_chain(llm, db, prompt=new_prompt, k= 11) | |
| ## testing | |
| #user_input = "Show on a map the 10 largest fields?" | |
| #sql_query = chain.invoke({"question": user_input}) | |
| #print(sql_query) | |
| # | |
| # - | |
| # + | |
| import geopandas as gpd | |
| from ibis import _ | |
| import re | |
| import leafmap.maplibregl as leafmap | |
| m = leafmap.Map() | |
| def as_geopandas(response): | |
| response = re.sub(";$", "", response) | |
| sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})" | |
| con.raw_sql(sql_query) | |
| gdf = con.table("testing") | |
| if 'geometry' in gdf.columns: | |
| gdf = (gdf | |
| .cast({"geometry": "geometry"}) | |
| .mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326")) | |
| .to_pandas() | |
| ).set_crs(epsg=4326, inplace=True) | |
| return gdf | |
| return gdf.to_pandas() | |
| # - | |
| response = "SELECT geometry, area FROM crops ORDER BY area DESC LIMIT 10;" | |
| as_geopandas(response) | |
| #if 'geometry' in gdf.columns: | |
| # m.add_gdf(gdf) | |
| # m | |
| #gdf | |
| # + | |
| ''' | |
| Ask me about fiboa data! Request "a map" to get map output, or table for tabular output, e.g. | |
| - "Show a map with the 10 largest fields" | |
| - "Show a table of the total area by crop type" | |
| - "Compute the perimeters of all fields and determine which have the longest" | |
| ''' | |
| example = "Which are the 10 largest fields?" | |
| with st.container(): | |
| if prompt := st.chat_input(example, key="chain"): | |
| st.chat_message("user").write(prompt) | |
| with st.chat_message("assistant"): | |
| response = chain.invoke({"question": prompt}) | |
| st.write(response) | |
| gdf = as_geopandas(response) | |
| if 'geometry' in gdf.columns: | |
| m.add_gdf(gdf) | |
| m.to_streamlit() | |
| else: | |
| st.dataframe(gdf) | |
| # + | |
| st.divider() | |
| ''' | |
| Data sources: https://beta.source.coop/fiboa/be-vlg | |
| Software License: BSD | |
| ''' | |