Spaces:
Sleeping
Sleeping
| """ | |
| Groq Llama 3.3 70B integration component | |
| """ | |
| import os | |
| from typing import Dict, List, Optional, Any | |
| from datetime import datetime | |
| import re | |
| from groq import Groq | |
| from langchain.llms.base import LLM | |
| from langchain.schema import Document | |
| from pydantic import Field | |
| from .config import config | |
| class GroqLlamaLLM(LLM): | |
| """LangChain-compatible wrapper for Groq Llama 3.3 70B""" | |
| api_key: str = Field(...) | |
| groq_client: Any = Field(default=None) | |
| model_name: str = Field(default="llama-3.3-70b-versatile") | |
| temperature: float = Field(default=0.7) | |
| max_tokens: int = Field(default=2000) | |
| top_p: float = Field(default=0.9) | |
| def __init__(self, **kwargs): | |
| super().__init__(**kwargs) | |
| self.groq_client = Groq(api_key=self.api_key) | |
| class Config: | |
| arbitrary_types_allowed = True | |
| def _llm_type(self) -> str: | |
| return "groq_llama" | |
| def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: | |
| try: | |
| response = self.groq_client.chat.completions.create( | |
| model=self.model_name, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=self.temperature, | |
| max_tokens=self.max_tokens, | |
| top_p=self.top_p, | |
| stop=stop | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def _identifying_params(self) -> Dict[str, Any]: | |
| return { | |
| "model_name": self.model_name, | |
| "temperature": self.temperature, | |
| "max_tokens": self.max_tokens, | |
| "top_p": self.top_p | |
| } | |
| class GroqProcessor: | |
| """Enhanced Groq Llama processor with research capabilities""" | |
| def __init__(self, config_obj=None): | |
| # Use passed config or default config | |
| self.config = config_obj if config_obj else config | |
| if not self.config.GROQ_API_KEY: | |
| raise ValueError("Groq API key not found! Please set GROQ_API_KEY environment variable.") | |
| self.groq_client = Groq(api_key=self.config.GROQ_API_KEY) | |
| self.llm = GroqLlamaLLM( | |
| api_key=self.config.GROQ_API_KEY, | |
| model_name=self.config.LLAMA_MODEL, | |
| temperature=self.config.TEMPERATURE, | |
| max_tokens=self.config.MAX_OUTPUT_TOKENS, | |
| top_p=self.config.TOP_P | |
| ) | |
| print("Groq Llama 3.3 70B initialized successfully!") | |
| def generate_response(self, prompt: str, max_tokens: int = 2000) -> str: | |
| """Generate response using Groq Llama""" | |
| try: | |
| response = self.groq_client.chat.completions.create( | |
| model=self.config.LLAMA_MODEL, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=self.config.TEMPERATURE, | |
| max_tokens=max_tokens, | |
| top_p=self.config.TOP_P | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def summarize_paper(self, title: str, abstract: str, content: str) -> Dict[str, str]: | |
| """Generate comprehensive paper summary""" | |
| try: | |
| if len(content) > self.config.MAX_PAPER_LENGTH: | |
| content = content[:self.config.MAX_PAPER_LENGTH] + "..." | |
| prompt = f"""Analyze this research paper and provide a structured summary: | |
| Title: {title} | |
| Abstract: {abstract} | |
| Content: {content[:8000]} | |
| Provide a comprehensive summary with these sections: | |
| 1. **MAIN SUMMARY** (2-3 sentences) | |
| 2. **KEY CONTRIBUTIONS** (3-5 bullet points) | |
| 3. **METHODOLOGY** (brief description) | |
| 4. **KEY FINDINGS** (3-5 bullet points) | |
| 5. **LIMITATIONS** (if mentioned) | |
| Format your response clearly with section headers.""" | |
| response = self.generate_response(prompt, max_tokens=self.config.MAX_SUMMARY_LENGTH) | |
| return self._parse_summary_response(response, title, abstract) | |
| except Exception as e: | |
| return { | |
| 'summary': f'Error generating summary: {str(e)}', | |
| 'contributions': 'N/A', | |
| 'methodology': 'N/A', | |
| 'findings': 'N/A', | |
| 'limitations': 'N/A', | |
| 'title': title, | |
| 'abstract': abstract | |
| } | |
| def _parse_summary_response(self, response: str, title: str, abstract: str) -> Dict[str, str]: | |
| """Parse AI response into structured summary""" | |
| sections = { | |
| 'summary': '', | |
| 'contributions': '', | |
| 'methodology': '', | |
| 'findings': '', | |
| 'limitations': '', | |
| 'title': title, | |
| 'abstract': abstract | |
| } | |
| if "Error:" in response: | |
| return sections | |
| lines = response.split('\n') | |
| current_section = 'summary' | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| line_lower = line.lower() | |
| if any(keyword in line_lower for keyword in ['main summary', '1.', '**main']): | |
| current_section = 'summary' | |
| continue | |
| elif any(keyword in line_lower for keyword in ['key contributions', '2.', '**key contrib']): | |
| current_section = 'contributions' | |
| continue | |
| elif any(keyword in line_lower for keyword in ['methodology', '3.', '**method']): | |
| current_section = 'methodology' | |
| continue | |
| elif any(keyword in line_lower for keyword in ['key findings', 'findings', '4.', '**key find']): | |
| current_section = 'findings' | |
| continue | |
| elif any(keyword in line_lower for keyword in ['limitations', '5.', '**limit']): | |
| current_section = 'limitations' | |
| continue | |
| if not line.startswith(('1.', '2.', '3.', '4.', '5.', '**', '#')): | |
| sections[current_section] += line + ' ' | |
| return sections | |
| def analyze_trends(self, texts: List[str]) -> Dict: | |
| """Analyze research trends from multiple texts""" | |
| try: | |
| combined_text = ' '.join(texts[:10]) # Limit to avoid token limits | |
| prompt = f"""Analyze research trends in this collection of texts: | |
| {combined_text[:5000]} | |
| Identify: | |
| 1. Key research themes and topics | |
| 2. Emerging trends and directions | |
| 3. Frequently mentioned technologies/methods | |
| 4. Research gaps or opportunities | |
| Provide analysis as structured points.""" | |
| response = self.generate_response(prompt, max_tokens=1500) | |
| return { | |
| 'trend_analysis': response, | |
| 'texts_analyzed': len(texts), | |
| 'analysis_date': datetime.now().isoformat(), | |
| 'keywords': self._extract_keywords(combined_text) | |
| } | |
| except Exception as e: | |
| return { | |
| 'trend_analysis': f'Error: {str(e)}', | |
| 'texts_analyzed': 0, | |
| 'analysis_date': datetime.now().isoformat(), | |
| 'keywords': [] | |
| } | |
| def _extract_keywords(self, text: str) -> List[str]: | |
| """Extract keywords from text""" | |
| words = re.findall(r'\b[a-zA-Z]+\b', text.lower()) | |
| stop_words = {'the', 'and', 'for', 'are', 'with', 'this', 'that', 'from', 'they', 'have'} | |
| keywords = [w for w in words if len(w) > 3 and w not in stop_words] | |
| # Count frequency and return top keywords | |
| word_counts = {} | |
| for word in keywords: | |
| word_counts[word] = word_counts.get(word, 0) + 1 | |
| return [word for word, count in sorted(word_counts.items(), key=lambda x: x[1], reverse=True)[:20]] | |
| def answer_question(self, question: str, context: str = "") -> str: | |
| """Answer a question with optional context""" | |
| try: | |
| prompt = f"""Answer this research question based on the provided context: | |
| Question: {question} | |
| Context: {context[:4000] if context else 'No specific context provided'} | |
| Provide a clear, informative answer based on the context and your knowledge.""" | |
| return self.generate_response(prompt, max_tokens=1000) | |
| except Exception as e: | |
| return f"Error answering question: {str(e)}" | |
| def generate_literature_review(self, papers: List[Dict], research_question: str) -> str: | |
| """Generate literature review from papers""" | |
| try: | |
| papers_text = "\n".join([ | |
| f"Title: {paper.get('title', '')}\nAbstract: {paper.get('abstract', '')}\n" | |
| for paper in papers[:10] | |
| ]) | |
| prompt = f"""Generate a comprehensive literature review for the research question: "{research_question}" | |
| Based on these papers: | |
| {papers_text} | |
| Provide a structured review with: | |
| 1. Introduction to the research area | |
| 2. Key themes and methodologies | |
| 3. Major findings and contributions | |
| 4. Research gaps and limitations | |
| 5. Future research directions | |
| 6. Conclusion | |
| Keep it academic and well-structured.""" | |
| return self.generate_response(prompt, max_tokens=3000) | |
| except Exception as e: | |
| return f"Error generating literature review: {str(e)}" | |
| def classify_paper(self, title: str, abstract: str) -> Dict[str, Any]: | |
| """Classify a paper into research categories""" | |
| try: | |
| prompt = f"""Classify this research paper: | |
| Title: {title} | |
| Abstract: {abstract} | |
| Provide classification in JSON format: | |
| {{ | |
| "primary_field": "field name", | |
| "subfields": ["subfield1", "subfield2"], | |
| "methodology": "methodology type", | |
| "application_area": "application area", | |
| "novelty_score": 1-10, | |
| "impact_potential": "high/medium/low" | |
| }}""" | |
| response = self.generate_response(prompt, max_tokens=500) | |
| # Try to parse as JSON, fallback to structured text | |
| try: | |
| import json | |
| return json.loads(response) | |
| except: | |
| return { | |
| "classification": response, | |
| "title": title, | |
| "processed_at": datetime.now().isoformat() | |
| } | |
| except Exception as e: | |
| return { | |
| "error": f"Classification error: {str(e)}", | |
| "title": title, | |
| "processed_at": datetime.now().isoformat() | |
| } | |
| def get_research_recommendations(self, interests: List[str], recent_papers: List[Dict]) -> str: | |
| """Get personalized research recommendations""" | |
| try: | |
| interests_text = ", ".join(interests) | |
| papers_text = "\n".join([ | |
| f"- {paper.get('title', '')}" | |
| for paper in recent_papers[:10] | |
| ]) | |
| prompt = f"""Based on these research interests: {interests_text} | |
| And these recent papers: | |
| {papers_text} | |
| Provide personalized research recommendations including: | |
| 1. Trending topics to explore | |
| 2. Potential research gaps | |
| 3. Collaboration opportunities | |
| 4. Methodological approaches to consider | |
| 5. Future research directions | |
| Keep recommendations specific and actionable.""" | |
| return self.generate_response(prompt, max_tokens=1500) | |
| except Exception as e: | |
| return f"Error generating recommendations: {str(e)}" | |