Spaces:
Running
Running
cassiebuhler
changing to public s3 bucket. but this will be broken until we fix those permissions
d3f808a
| import streamlit as st | |
| from cng.h3 import * | |
| from ibis import _ | |
| import importlib | |
| from datetime import time | |
| st.set_page_config(layout="wide", | |
| page_title="TPL Conservation Almanac", | |
| page_icon=":globe:") | |
| from utils import * | |
| ''' | |
| # TPL Conservation Almanac | |
| A data visualization tool built for the Trust for Public Land | |
| ''' | |
| with st.sidebar: | |
| leafmap_choice = st.selectbox("Leafmap module", ['maplibregl','foliumap']) | |
| if leafmap_choice == "maplibregl": | |
| leafmap = importlib.import_module("leafmap.maplibregl") | |
| m = leafmap.Map(style="positron", use_message_queue=True) | |
| else: | |
| leafmap = importlib.import_module("leafmap.foliumap") | |
| m = leafmap.Map(center=[35, -100], zoom=5, layers_control=True, fullscreen_control=True) | |
| with st.sidebar: | |
| b = st.selectbox("Basemap", basemaps, index= 3) | |
| m.add_basemap(b) | |
| st.divider() | |
| style_choice = st.radio("Color by:", style_options) | |
| paint = style_options[style_choice] | |
| st.divider() | |
| year_range = st.slider( | |
| "Year", min_value = 1988, max_value = 2025, value=(1988, 2025) | |
| ) | |
| st.divider() | |
| state_choice = st.selectbox("State", states,index = 6, placeholder='Pick a state') | |
| one_state = state_choice != 'All' | |
| counties = get_counties(state_choice) | |
| if one_state: | |
| county_choice = st.selectbox("County", counties, index = 0, placeholder='Select a county') | |
| else: | |
| county_choice = 'All' | |
| st.divider() | |
| legend, position, bg_color, fontsize, shape_type, controls = get_legend(paint, leafmap_choice) | |
| # get all the ids that correspond to the filter | |
| gdf = filter_data(tpl_table, state_choice, county_choice, year_range) | |
| gdf_landvote = filter_data(landvote_table, state_choice, county_choice, year_range) | |
| unique_ids = gdf.select('fid').distinct().execute()['fid'].to_list() | |
| ##### Chatbot stuff | |
| chatbot_container = st.container() | |
| with chatbot_container: | |
| llm_left_col, llm_right_col = st.columns([5,1], vertical_alignment = "bottom") | |
| with llm_left_col: | |
| with st.popover("π¬ Example Queries"): | |
| ''' | |
| Mapping queries: | |
| - Show me the most expensive protected site | |
| - Show me sites owned, managed or sponsored by the Trust for Public Land | |
| ''' | |
| ''' | |
| Exploratory data queries: | |
| - Which states have the highest average cost per acre? | |
| ''' | |
| st.info('If the map appears blank, queried data may be too small to see at the default zoom level. Check the table below the map, as query results will also be displayed there.', icon="βΉοΈ") | |
| with llm_right_col: | |
| llm_choice = st.selectbox("Select LLM:", llm_options, key = "llm", help = "Select which model to use.") | |
| llm = llm_options[llm_choice] | |
| from pydantic import BaseModel, Field | |
| class SQLResponse(BaseModel): | |
| """Defines the structure for SQL response.""" | |
| sql_query: str = Field(description="The SQL query generated by the assistant.") | |
| explanation: str = Field(description="A detailed explanation of how the SQL query answers the input question.") | |
| with open('app/system_prompt.txt', 'r') as file: | |
| template = file.read() | |
| from langchain_openai import ChatOpenAI | |
| from langchain_core.prompts import ChatPromptTemplate | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", template), | |
| ("human", "{input}") | |
| ]).partial(dialect="duckdb", conservation_almanac = tpl_z8.schema(), | |
| landvote = landvote_z8.schema(), carbon = carbon_z8.schema(), | |
| svi = svi_z8.schema(), mobi = mobi_z8.schema()) | |
| structured_llm = llm.with_structured_output(SQLResponse) | |
| few_shot_structured_llm = prompt | structured_llm | |
| def run_sql(query, llm_choice): | |
| """ | |
| Filter data based on an LLM-generated SQL query and return matching IDs. | |
| Args: query (str): The natural language query to filter the data. | |
| """ | |
| output = few_shot_structured_llm.invoke(query) | |
| sql_query = output.sql_query | |
| explanation =output.explanation | |
| if not sql_query: # if the chatbot can't generate a SQL query. | |
| return pd.DataFrame({'fid' : []}),'', explanation | |
| result = con.sql(sql_query).distinct().execute() | |
| if result.empty: | |
| explanation = "This query did not return any results. Please try again with a different query." | |
| if 'geom' in result.columns: | |
| return result.drop('geom',axis = 1), sql_query, explanation | |
| else: | |
| return result, sql_query, explanation | |
| return result, sql_query, explanation | |
| with chatbot_container: | |
| with llm_left_col: | |
| example_query = "π Input query here" | |
| prompt = st.chat_input(example_query, key="chain", max_chars=300) | |
| _,log_query_col, _ = st.columns([.001, 5,1], vertical_alignment = "top") | |
| with log_query_col: | |
| log_queries = st.checkbox("Save query", value = True, help = "Saving your queries helps improve this tool and guide conservation efforts. Your data is stored in a private location. For more details, see 'Why save your queries?' at the bottom of this page.") | |
| # new container for output so it doesn't mess with the alignment of llm options | |
| with st.container(): | |
| if prompt: | |
| st.chat_message("user").write(prompt) | |
| try: | |
| with st.chat_message("assistant"): | |
| with st.spinner("Invoking query..."): | |
| llm_output, sql_query, llm_explanation = run_sql(prompt, llm_choice) | |
| minio_logger(log_queries, prompt, sql_query, llm_explanation, llm_choice, 'query_log.csv', "shared-tpl") | |
| # no sql query generated by chatbot | |
| if sql_query == '': | |
| st.success(llm_explanation) | |
| not_mapping = True | |
| else: | |
| # sql query generated but no results returned | |
| if llm_output.empty: | |
| st.warning(llm_explanation, icon="β οΈ") | |
| st.caption("SQL Query:") | |
| st.code(sql_query, language="sql") | |
| st.stop() | |
| # output without mapping columns (id, geom) | |
| elif "fid" not in llm_output.columns and "geom" not in llm_output.columns: | |
| st.write(llm_output) | |
| not_mapping = True | |
| # print results | |
| with st.popover("Explanation"): | |
| st.write(llm_explanation) | |
| if sql_query != '': | |
| st.caption("SQL Query:") | |
| st.code(sql_query,language = "sql") | |
| # extract ids, columns, bounds if present | |
| if "fid" in llm_output.columns and not llm_output.empty: | |
| ids = list(set(llm_output['fid'].tolist())) | |
| llm_cols = extract_columns(sql_query) | |
| bounds = llm_output.total_bounds.tolist() | |
| else: | |
| ids, llm_cols = [], [] | |
| not_mapping = True | |
| except Exception as e: | |
| tb_str = traceback.format_exc() # full multiline traceback string | |
| if isinstance(e, openai.BadRequestError): | |
| st.error(error_messages["bad_request"](llm_choice, e, tb_str), icon="π¨") | |
| elif isinstance(e, openai.RateLimitError): | |
| st.error(error_messages["bad_request"](llm_choice, e, tb_str), icon="π¨") | |
| elif isinstance(e, openai.APIStatusError): | |
| st.error(error_messages["bad_request"](llm_choice, e, tb_str), icon="π¨") | |
| elif isinstance(e, openai.InternalServerError): | |
| st.error(error_messages["internal_server_error"](llm_choice, e, tb_str), icon="π¨") | |
| elif isinstance(e, openai.NotFoundError): | |
| st.error(error_messages["internal_server_error"](llm_choice, e, tb_str), icon="π¨") | |
| else: | |
| prompt = prompt.replace('\n', '') | |
| st.error(error_messages["unexpected_llm_error"](prompt, e, tb_str)) | |
| st.stop() | |
| ##### end of chatbot code | |
| # define PMTiles style dict (if we didn't already do so using the chatbot) | |
| if 'style' not in locals(): | |
| if one_state: | |
| # filter to ids in that state | |
| style = tpl_style(unique_ids, paint, pmtiles) | |
| else: | |
| # selected all states, so no need to filter | |
| style=tpl_style_default(paint, pmtiles) | |
| bounds = get_bounds(state_choice, county_choice, m) | |
| # add pmtiles to map (using user-specified module) | |
| if leafmap_choice == "maplibregl": | |
| m.fit_bounds(bounds) | |
| m.add_pmtiles(pmtiles, style=style, | |
| name="Conservation Almanac", | |
| attribution = "Trust for Public Land (2025)", tooltip=True, | |
| template = tooltip_template) | |
| m.fit_bounds(bounds) | |
| if style_choice == "Acquisition Cost": | |
| colors, vmin, vmax, orientation, position, label, height, width = get_colorbar(gdf,paint) | |
| m.add_colorbar(palette = colors, vmin=vmin, vmax=vmax, | |
| orientation = orientation, position = position, transparent = True, | |
| height = height, width = width) | |
| else: | |
| m.add_legend(title = '', legend_dict = legend, fontsize = fontsize, | |
| bg_color = bg_color, position = position, | |
| shape_type = shape_type) | |
| else: | |
| m.add_pmtiles(pmtiles, style=style, | |
| name="Conservation Almanac by Trust for Public Land", | |
| tooltip=False, zoom_to_layer=True) | |
| # add custom tooltip to pmtiles layer | |
| for layer in m._children.values(): | |
| if isinstance(layer, leafmap.PMTilesLayer): | |
| pmtiles_layer = layer | |
| break | |
| pmtiles_layer.add_child(CustomTooltip()) | |
| style = {'background-color': 'rgba(255, 255, 255, 1)'} | |
| m.add_legend(title = '', legend_dict = legend, style = style, | |
| position = position, shape_type = shape_type, draggable = False) | |
| m.zoom_to_bounds(bounds) | |
| m.to_streamlit() | |
| with st.expander("π View/download data"): # adding data table | |
| if 'llm_output' not in locals(): | |
| # cols = ['tpl_id','municipality','date','geom'] | |
| group_cols = ['sponsor','program','year'] | |
| gdf_grouped = (gdf.head(100).execute().groupby(group_cols) | |
| .agg({col: ('sum' if col in ['acres','amount'] else 'first') | |
| for col in gdf.columns if col not in group_cols})).reset_index() | |
| cols = ['fid', 'state', 'county','site', 'acres', 'year', 'amount', 'owner', 'owner_type', | |
| 'manager', 'manager_type', 'purchase_type', 'easement', 'easement_type', 'access_type', | |
| 'purpose_type', 'duration_type', 'data_provider', 'data_source', 'source_date', | |
| 'data_aggregator', 'comments', 'program_id', 'program', 'sponsor_id', 'sponsor', 'sponsor_type'] | |
| st.dataframe(gdf_grouped[cols], use_container_width = True) | |
| else: | |
| if ('geom' in llm_output.columns) and (not llm_output.empty): | |
| llm_output = llm_output.drop('geom',axis = 1) | |
| st.dataframe(llm_output, use_container_width = True) | |
| public_dollars, private_dollars, total_dollars = tpl_summary(gdf) | |
| with st.container(): | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric(label=f"Public", value=f"${public_dollars:,}") | |
| col2.metric(label=f"Private", value=f"${private_dollars:,}") | |
| col3.metric(label=f"Total", value=f"${total_dollars:,}") | |
| st.markdown('#') | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| gdf_tpl = group_data(gdf, 'Acquisition Cost') | |
| get_bar(gdf_tpl, style_choice, 'year', 'total_amount', paint,'Year','Acquisition Cost ($)',"Yearly investment ($) in protected area") | |
| with col2: | |
| gdf_landvote = group_data(gdf_landvote.filter(_.status == 'Pass'), 'Measure Cost') | |
| get_bar(gdf_landvote, style_choice, 'year', 'total_amount', paint, 'Year','Funds Approved ($)','Yearly funds from conservation ballot measures') | |
| st.divider() | |
| st.markdown(''' | |
| ## Data Sources | |
| PRIVATE DRAFT. Developed at UC Berkeley. All data copyright to Trust for Public Land. See <https://conservationalmanac.org/> for details. | |
| ''') | |