Spaces:
Running
Running
File size: 12,511 Bytes
1d924fc 340bf36 b79b7ed 1d924fc 8653ee5 1d924fc d7a96cf b79b7ed d7a96cf b79b7ed 2d92cf6 b79b7ed d7a96cf 1d924fc 2d92cf6 d7a96cf 340bf36 1d924fc 340bf36 c71b28a 340bf36 1d924fc 340bf36 d7a96cf 340bf36 d7a96cf 340bf36 1d924fc 2d92cf6 340bf36 352224e 340bf36 d7a96cf 340bf36 1d924fc 340bf36 1d924fc 340bf36 9adb68a 340bf36 2d92cf6 340bf36 2d92cf6 340bf36 2d92cf6 9adb68a 2d92cf6 340bf36 2d92cf6 340bf36 2d92cf6 340bf36 2d92cf6 340bf36 2d92cf6 340bf36 2d92cf6 340bf36 2d92cf6 340bf36 d7a96cf 340bf36 b79b7ed d7a96cf b79b7ed 187c027 b79b7ed 187c027 b79b7ed 2d92cf6 b79b7ed 2d92cf6 b79b7ed 2d92cf6 2ed426b 340bf36 2d92cf6 340bf36 2d92cf6 340bf36 2d92cf6 340bf36 1d924fc 340bf36 cb5a68b 340bf36 1d924fc c71b28a 1d924fc c71b28a 340bf36 1d924fc 340bf36 9adb68a 340bf36 1d924fc 86b2201 1d924fc 86b2201 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
import streamlit as st
from cng.h3 import *
from ibis import _
import importlib
from datetime import time
st.set_page_config(layout="wide",
page_title="TPL Conservation Almanac",
page_icon=":globe:")
from utils import *
'''
# TPL Conservation Almanac
A data visualization tool built for the Trust for Public Land
'''
with st.sidebar:
leafmap_choice = st.selectbox("Leafmap module", ['maplibregl','foliumap'])
if leafmap_choice == "maplibregl":
leafmap = importlib.import_module("leafmap.maplibregl")
m = leafmap.Map(style="positron", use_message_queue=True)
else:
leafmap = importlib.import_module("leafmap.foliumap")
m = leafmap.Map(center=[35, -100], zoom=5, layers_control=True, fullscreen_control=True)
with st.sidebar:
b = st.selectbox("Basemap", basemaps, index= 3)
m.add_basemap(b)
st.divider()
style_choice = st.radio("Color by:", style_options)
paint = style_options[style_choice]
st.divider()
year_range = st.slider(
"Year", min_value = 1988, max_value = 2025, value=(1988, 2025)
)
st.divider()
state_choice = st.selectbox("State", states,index = 6, placeholder='Pick a state')
one_state = state_choice != 'All'
counties = get_counties(state_choice)
if one_state:
county_choice = st.selectbox("County", counties, index = 0, placeholder='Select a county')
else:
county_choice = 'All'
st.divider()
legend, position, bg_color, fontsize, shape_type, controls = get_legend(paint, leafmap_choice)
# get all the ids that correspond to the filter
gdf = filter_data(tpl_table, state_choice, county_choice, year_range)
gdf_landvote = filter_data(landvote_table, state_choice, county_choice, year_range)
unique_ids = gdf.select('fid').distinct().execute()['fid'].to_list()
##### Chatbot stuff
chatbot_container = st.container()
with chatbot_container:
llm_left_col, llm_right_col = st.columns([5,1], vertical_alignment = "bottom")
with llm_left_col:
with st.popover("π¬ Example Queries"):
'''
Mapping queries:
- Show me the most expensive protected site
- Show me sites owned, managed or sponsored by the Trust for Public Land
'''
'''
Exploratory data queries:
- Which states have the highest average cost per acre?
'''
st.info('If the map appears blank, queried data may be too small to see at the default zoom level. Check the table below the map, as query results will also be displayed there.', icon="βΉοΈ")
with llm_right_col:
llm_choice = st.selectbox("Select LLM:", llm_options, key = "llm", help = "Select which model to use.")
llm = llm_options[llm_choice]
from pydantic import BaseModel, Field
class SQLResponse(BaseModel):
"""Defines the structure for SQL response."""
sql_query: str = Field(description="The SQL query generated by the assistant.")
explanation: str = Field(description="A detailed explanation of how the SQL query answers the input question.")
with open('app/system_prompt.txt', 'r') as file:
template = file.read()
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
("system", template),
("human", "{input}")
]).partial(dialect="duckdb", conservation_almanac = tpl_z8.schema(),
landvote = landvote_z8.schema(), carbon = carbon_z8.schema(),
svi = svi_z8.schema(), mobi = mobi_z8.schema())
structured_llm = llm.with_structured_output(SQLResponse)
few_shot_structured_llm = prompt | structured_llm
@st.cache_data(show_spinner = False)
def run_sql(query, llm_choice):
"""
Filter data based on an LLM-generated SQL query and return matching IDs.
Args: query (str): The natural language query to filter the data.
"""
output = few_shot_structured_llm.invoke(query)
sql_query = output.sql_query
explanation =output.explanation
if not sql_query: # if the chatbot can't generate a SQL query.
return pd.DataFrame({'fid' : []}),'', explanation
result = con.sql(sql_query).distinct().execute()
if result.empty:
explanation = "This query did not return any results. Please try again with a different query."
if 'geom' in result.columns:
return result.drop('geom',axis = 1), sql_query, explanation
else:
return result, sql_query, explanation
return result, sql_query, explanation
with chatbot_container:
with llm_left_col:
example_query = "π Input query here"
prompt = st.chat_input(example_query, key="chain", max_chars=300)
_,log_query_col, _ = st.columns([.001, 5,1], vertical_alignment = "top")
with log_query_col:
log_queries = st.checkbox("Save query", value = True, help = "Saving your queries helps improve this tool and guide conservation efforts. Your data is stored in a private location. For more details, see 'Why save your queries?' at the bottom of this page.")
# new container for output so it doesn't mess with the alignment of llm options
with st.container():
if prompt:
st.chat_message("user").write(prompt)
try:
with st.chat_message("assistant"):
with st.spinner("Invoking query..."):
llm_output, sql_query, llm_explanation = run_sql(prompt, llm_choice)
minio_logger(log_queries, prompt, sql_query, llm_explanation, llm_choice, 'query_log.csv', "shared-tpl")
# no sql query generated by chatbot
if sql_query == '':
st.success(llm_explanation)
not_mapping = True
else:
# sql query generated but no results returned
if llm_output.empty:
st.warning(llm_explanation, icon="β οΈ")
st.caption("SQL Query:")
st.code(sql_query, language="sql")
st.stop()
# output without mapping columns (id, geom)
elif "fid" not in llm_output.columns and "geom" not in llm_output.columns:
st.write(llm_output)
not_mapping = True
# print results
with st.popover("Explanation"):
st.write(llm_explanation)
if sql_query != '':
st.caption("SQL Query:")
st.code(sql_query,language = "sql")
# extract ids, columns, bounds if present
if "fid" in llm_output.columns and not llm_output.empty:
ids = list(set(llm_output['fid'].tolist()))
llm_cols = extract_columns(sql_query)
bounds = llm_output.total_bounds.tolist()
else:
ids, llm_cols = [], []
not_mapping = True
except Exception as e:
tb_str = traceback.format_exc() # full multiline traceback string
if isinstance(e, openai.BadRequestError):
st.error(error_messages["bad_request"](llm_choice, e, tb_str), icon="π¨")
elif isinstance(e, openai.RateLimitError):
st.error(error_messages["bad_request"](llm_choice, e, tb_str), icon="π¨")
elif isinstance(e, openai.APIStatusError):
st.error(error_messages["bad_request"](llm_choice, e, tb_str), icon="π¨")
elif isinstance(e, openai.InternalServerError):
st.error(error_messages["internal_server_error"](llm_choice, e, tb_str), icon="π¨")
elif isinstance(e, openai.NotFoundError):
st.error(error_messages["internal_server_error"](llm_choice, e, tb_str), icon="π¨")
else:
prompt = prompt.replace('\n', '')
st.error(error_messages["unexpected_llm_error"](prompt, e, tb_str))
st.stop()
##### end of chatbot code
# define PMTiles style dict (if we didn't already do so using the chatbot)
if 'style' not in locals():
if one_state:
# filter to ids in that state
style = tpl_style(unique_ids, paint, pmtiles)
else:
# selected all states, so no need to filter
style=tpl_style_default(paint, pmtiles)
bounds = get_bounds(state_choice, county_choice, m)
# add pmtiles to map (using user-specified module)
if leafmap_choice == "maplibregl":
m.fit_bounds(bounds)
m.add_pmtiles(pmtiles, style=style,
name="Conservation Almanac",
attribution = "Trust for Public Land (2025)", tooltip=True,
template = tooltip_template)
m.fit_bounds(bounds)
if style_choice == "Acquisition Cost":
colors, vmin, vmax, orientation, position, label, height, width = get_colorbar(gdf,paint)
m.add_colorbar(palette = colors, vmin=vmin, vmax=vmax,
orientation = orientation, position = position, transparent = True,
height = height, width = width)
else:
m.add_legend(title = '', legend_dict = legend, fontsize = fontsize,
bg_color = bg_color, position = position,
shape_type = shape_type)
else:
m.add_pmtiles(pmtiles, style=style,
name="Conservation Almanac by Trust for Public Land",
tooltip=False, zoom_to_layer=True)
# add custom tooltip to pmtiles layer
for layer in m._children.values():
if isinstance(layer, leafmap.PMTilesLayer):
pmtiles_layer = layer
break
pmtiles_layer.add_child(CustomTooltip())
style = {'background-color': 'rgba(255, 255, 255, 1)'}
m.add_legend(title = '', legend_dict = legend, style = style,
position = position, shape_type = shape_type, draggable = False)
m.zoom_to_bounds(bounds)
m.to_streamlit()
with st.expander("π View/download data"): # adding data table
if 'llm_output' not in locals():
# cols = ['tpl_id','municipality','date','geom']
group_cols = ['sponsor','program','year']
gdf_grouped = (gdf.head(100).execute().groupby(group_cols)
.agg({col: ('sum' if col in ['acres','amount'] else 'first')
for col in gdf.columns if col not in group_cols})).reset_index()
cols = ['fid', 'state', 'county','site', 'acres', 'year', 'amount', 'owner', 'owner_type',
'manager', 'manager_type', 'purchase_type', 'easement', 'easement_type', 'access_type',
'purpose_type', 'duration_type', 'data_provider', 'data_source', 'source_date',
'data_aggregator', 'comments', 'program_id', 'program', 'sponsor_id', 'sponsor', 'sponsor_type']
st.dataframe(gdf_grouped[cols], use_container_width = True)
else:
if ('geom' in llm_output.columns) and (not llm_output.empty):
llm_output = llm_output.drop('geom',axis = 1)
st.dataframe(llm_output, use_container_width = True)
public_dollars, private_dollars, total_dollars = tpl_summary(gdf)
with st.container():
col1, col2, col3 = st.columns(3)
col1.metric(label=f"Public", value=f"${public_dollars:,}")
col2.metric(label=f"Private", value=f"${private_dollars:,}")
col3.metric(label=f"Total", value=f"${total_dollars:,}")
st.markdown('#')
col1, col2 = st.columns(2)
with col1:
gdf_tpl = group_data(gdf, 'Acquisition Cost')
get_bar(gdf_tpl, style_choice, 'year', 'total_amount', paint,'Year','Acquisition Cost ($)',"Yearly investment ($) in protected area")
with col2:
gdf_landvote = group_data(gdf_landvote.filter(_.status == 'Pass'), 'Measure Cost')
get_bar(gdf_landvote, style_choice, 'year', 'total_amount', paint, 'Year','Funds Approved ($)','Yearly funds from conservation ballot measures')
st.divider()
st.markdown('''
## Data Sources
PRIVATE DRAFT. Developed at UC Berkeley. All data copyright to Trust for Public Land. See <https://conservationalmanac.org/> for details.
''')
|