Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import pipeline | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from docx import Document | |
| import io | |
| import re | |
| class CarbonCreditDocGenerator: | |
| def __init__(self): | |
| self.sbert_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| self.nlg_pipeline = pipeline("text-generation", model="gpt2", max_length=1000) | |
| self.knowledge_base = self.load_knowledge_base() | |
| def load_knowledge_base(self): | |
| return [ | |
| "Carbon credits represent the reduction of one metric ton of carbon dioxide emissions.", | |
| "Afforestation projects involve planting trees in areas where there were none before.", | |
| "The Verified Carbon Standard (VCS) is a widely recognized certification for carbon credits.", | |
| "Carbon credit projects must demonstrate additionality, meaning the reductions wouldn't have occurred without the project.", | |
| "Monitoring, reporting, and verification (MRV) are crucial components of carbon credit projects.", | |
| "Project developers must provide detailed information about project location, type, and expected carbon sequestration.", | |
| "Carbon credit pricing can vary based on project type, location, and additional benefits.", | |
| "Environmental Impact Assessments (EIA) are often required for carbon credit projects.", | |
| "Community engagement and social benefits are important aspects of many carbon credit projects.", | |
| "Risk assessment and mitigation strategies are crucial for project success and credibility." | |
| ] | |
| def process_input_data(self, input_text): | |
| sections = re.split(r'\d+\.\s+', input_text)[1:] # Split by numbered sections | |
| data = {} | |
| current_section = "" | |
| for section in sections: | |
| lines = section.strip().split('\n') | |
| section_title = lines[0].strip() | |
| current_section = section_title | |
| data[current_section] = {} | |
| for line in lines[1:]: | |
| if ':' in line: | |
| key, value = line.split(':', 1) | |
| data[current_section][key.strip()] = value.strip() | |
| else: | |
| # Append to the last key if no colon is found | |
| if data[current_section]: | |
| last_key = list(data[current_section].keys())[-1] | |
| data[current_section][last_key] += " " + line.strip() | |
| return data | |
| def retrieve_relevant_knowledge(self, query, top_k=3): | |
| query_embedding = self.sbert_model.encode([query])[0] | |
| knowledge_embeddings = self.sbert_model.encode(self.knowledge_base) | |
| similarities = cosine_similarity([query_embedding], knowledge_embeddings)[0] | |
| top_indices = np.argsort(similarities)[-top_k:][::-1] | |
| return [self.knowledge_base[i] for i in top_indices] | |
| def generate_section_content(self, section_title, input_data, max_length=1000): | |
| query = f"Generate content for the '{section_title}' section of a carbon credit document." | |
| relevant_knowledge = self.retrieve_relevant_knowledge(query) | |
| section_data = input_data.get(section_title, input_data) | |
| context = f"Input data: {section_data}\n\nRelevant knowledge: {' '.join(relevant_knowledge)}" | |
| prompt = f"{context}\n\nTask: {query}\n\nContent:" | |
| generated_text = self.nlg_pipeline(prompt, max_length=max_length, num_return_sequences=1)[0]['generated_text'] | |
| corrected_text = self.apply_corrective_rag(generated_text, section_data, relevant_knowledge) | |
| return corrected_text | |
| def apply_corrective_rag(self, generated_text, input_data, relevant_knowledge): | |
| corrected_text = generated_text | |
| for key, value in input_data.items(): | |
| if isinstance(value, dict): | |
| for sub_key, sub_value in value.items(): | |
| if sub_value.lower() not in corrected_text.lower(): | |
| corrected_text += f" {sub_key}: {sub_value}." | |
| elif value.lower() not in corrected_text.lower(): | |
| corrected_text += f" {key}: {value}." | |
| for knowledge in relevant_knowledge: | |
| if knowledge.lower() not in corrected_text.lower(): | |
| corrected_text += f" {knowledge}" | |
| return corrected_text | |
| def create_document(self, input_text): | |
| doc = Document() | |
| doc.add_heading('Carbon Credit Project Document', 0) | |
| input_data = self.process_input_data(input_text) | |
| sections = [ | |
| "Project Overview", | |
| "Seller/Proponent Information", | |
| "Carbon Credit Specifications", | |
| "Financial & Pricing Information", | |
| "Project Impact and Sustainability", | |
| "Risks & Mitigation Strategies", | |
| "Supporting Documentation", | |
| "Declarations and Acknowledgements" | |
| ] | |
| for section in sections: | |
| doc.add_heading(section, level=1) | |
| content = self.generate_section_content(section, input_data) | |
| doc.add_paragraph(content) | |
| return doc | |
| def generate_document(self, input_text): | |
| doc = self.create_document(input_text) | |
| doc_io = io.BytesIO() | |
| doc.save(doc_io) | |
| doc_io.seek(0) | |
| return doc_io | |
| # Streamlit app | |
| def main(): | |
| st.set_page_config(page_title="Carbon Credit Document Generator", page_icon="๐ฟ") | |
| st.title("Carbon Credit Document Generator") | |
| st.markdown(""" | |
| This app generates a comprehensive Carbon Credit Project Document based on your input. | |
| Upload a text file or paste your project details below. | |
| """) | |
| input_method = st.radio("Choose input method:", ("Upload File", "Paste Text")) | |
| if input_method == "Upload File": | |
| uploaded_file = st.file_uploader("Choose a text file", type="txt") | |
| if uploaded_file is not None: | |
| input_text = uploaded_file.read().decode("utf-8") | |
| st.text_area("File Contents (Read-only)", input_text, height=300, disabled=True) | |
| else: | |
| input_text = st.text_area("Paste your project details here:", height=400, help="Enter your project details in a structured format, similar to the Carbon Credit Project Submission Form.") | |
| if st.button("Generate Document"): | |
| if not input_text: | |
| st.error("Please provide input data before generating the document.") | |
| else: | |
| try: | |
| generator = CarbonCreditDocGenerator() | |
| with st.spinner("Generating document... This may take a few moments."): | |
| doc_io = generator.generate_document(input_text) | |
| st.success("Document generated successfully!") | |
| st.download_button( | |
| label="๐ฅ Download Carbon Credit Document", | |
| data=doc_io.getvalue(), | |
| file_name="carbon_credit_document.docx", | |
| mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
| ) | |
| st.info("Your document is ready for download. Click the button above to save it.") | |
| except Exception as e: | |
| st.error(f"An error occurred while generating the document: {str(e)}") | |
| st.info("Please try again or contact support if the problem persists.") | |
| st.markdown("---") | |
| st.markdown("Developed by Carbon Connect") | |
| if __name__ == "__main__": | |
| main() |