Spaces:
Paused
Paused
| from langchain_community.retrievers import ArxivRetriever | |
| class ArxivScraper: | |
| def __init__(self, link, session=None): | |
| self.link = link | |
| self.session = session | |
| def scrape(self): | |
| """ | |
| The function scrapes relevant documents from Arxiv based on a given link and returns the content | |
| of the first document. | |
| Returns: | |
| The code is returning the page content of the first document retrieved by the ArxivRetriever | |
| for a given query extracted from the link. | |
| """ | |
| query = self.link.split("/")[-1] | |
| retriever = ArxivRetriever(load_max_docs=2, doc_content_chars_max=None) | |
| docs = retriever.invoke(query=query) | |
| return docs[0].page_content | |