Commit 
							
							·
						
						716c07f
	
1
								Parent(s):
							
							0e70948
								
assigned
Browse files- app.py +2 -1
- web2json/ai_extractor.py +3 -1
    	
        app.py
    CHANGED
    
    | @@ -171,7 +171,8 @@ def webpage_to_json(content: str, is_url: bool, schema: BaseModel) -> Dict[str, | |
| 171 | 
             
                - Return the extracted data in the format specified by the schema"""
         | 
| 172 |  | 
| 173 | 
             
                # Initialize pipeline components
         | 
| 174 | 
            -
                 | 
|  | |
| 175 | 
             
                try:
         | 
| 176 | 
             
                    llm = GeminiLLMClient(config={'api_key': os.getenv('GEMINI_API_KEY')})
         | 
| 177 | 
             
                except Exception as e:
         | 
|  | |
| 171 | 
             
                - Return the extracted data in the format specified by the schema"""
         | 
| 172 |  | 
| 173 | 
             
                # Initialize pipeline components
         | 
| 174 | 
            +
                # TODO: improve the RAG system and optimize (don't instantiate every time)
         | 
| 175 | 
            +
                preprocessor = BasicPreprocessor(config={'keep_tags': False}) 
         | 
| 176 | 
             
                try:
         | 
| 177 | 
             
                    llm = GeminiLLMClient(config={'api_key': os.getenv('GEMINI_API_KEY')})
         | 
| 178 | 
             
                except Exception as e:
         | 
    	
        web2json/ai_extractor.py
    CHANGED
    
    | @@ -123,4 +123,6 @@ class AIExtractor: | |
| 123 | 
             
                    response = self.llm_client.call_api(prompt)
         | 
| 124 | 
             
                    return response
         | 
| 125 |  | 
| 126 | 
            -
             | 
|  | |
|  | 
|  | |
| 123 | 
             
                    response = self.llm_client.call_api(prompt)
         | 
| 124 | 
             
                    return response
         | 
| 125 |  | 
| 126 | 
            +
            # TODO: RAGExtractor class
         | 
| 127 | 
            +
            class RAGExtractor(AIExtractor):
         | 
| 128 | 
            +
                pass
         | 

