Learn

Sleeping

hanzla commited on Apr 21, 2024

Commit

f0b26a9

1 Parent(s): 283e7c7

testing with llama3

Files changed (2) hide show

src/interface.py CHANGED Viewed

@@ -11,7 +11,7 @@ def create_demo():
         with gr.Row():
             # Add sliders here
             with gr.Column():  # Adjust scale as needed
-                slider1 = gr.Slider(minimum=0, maximum=100, value=50, label="Chunk Size")
         with gr.Row():
             with gr.Column(scale=0.60):
                 text_input = gr.Textbox(

         with gr.Row():
             # Add sliders here
             with gr.Column():  # Adjust scale as needed
+                slider1 = gr.Slider(minimum=256, maximum=1024, value=50, label="Chunk Size")
         with gr.Row():
             with gr.Column(scale=0.60):
                 text_input = gr.Textbox(

src/pdfchatbot.py CHANGED Viewed

@@ -36,6 +36,7 @@ class PDFChatBot:
         self.model = None
         self.pipeline = None
         self.chain = None
     def load_config(self, file_path):
         """
@@ -92,7 +93,7 @@ class PDFChatBot:
         """
         Load the vector database from the documents and embeddings.
         """
-        text_splitter = CharacterTextSplitter(chunk_size=256, chunk_overlap=0)
         docs = text_splitter.split_documents(self.documents)
         self.vectordb = Chroma.from_documents(docs, self.embeddings)
@@ -181,7 +182,7 @@ class PDFChatBot:
             history[-1][-1] += char
         return history, " "
-    def render_file(self, file):
         """
         Renders a specific page of a PDF file as an image.
@@ -191,8 +192,10 @@ class PDFChatBot:
         Returns:
             PIL.Image.Image: The rendered page as an image.
         """
         doc = fitz.open(file.name)
         page = doc[self.page]
         pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
         image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
         return image

         self.model = None
         self.pipeline = None
         self.chain = None
+        self.chunk_size = None
     def load_config(self, file_path):
         """
         """
         Load the vector database from the documents and embeddings.
         """
+        text_splitter = CharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=256)
         docs = text_splitter.split_documents(self.documents)
         self.vectordb = Chroma.from_documents(docs, self.embeddings)
             history[-1][-1] += char
         return history, " "
+    def render_file(self, file,chunk_size):
         """
         Renders a specific page of a PDF file as an image.
         Returns:
             PIL.Image.Image: The rendered page as an image.
         """
+        print(chunk_size)
         doc = fitz.open(file.name)
         page = doc[self.page]
+        self.chunk_size = chunk_size
         pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
         image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
         return image