testing with llama3
Browse files- src/interface.py +1 -1
- src/pdfchatbot.py +5 -2
src/interface.py
CHANGED
|
@@ -11,7 +11,7 @@ def create_demo():
|
|
| 11 |
with gr.Row():
|
| 12 |
# Add sliders here
|
| 13 |
with gr.Column(): # Adjust scale as needed
|
| 14 |
-
slider1 = gr.Slider(minimum=
|
| 15 |
with gr.Row():
|
| 16 |
with gr.Column(scale=0.60):
|
| 17 |
text_input = gr.Textbox(
|
|
|
|
| 11 |
with gr.Row():
|
| 12 |
# Add sliders here
|
| 13 |
with gr.Column(): # Adjust scale as needed
|
| 14 |
+
slider1 = gr.Slider(minimum=256, maximum=1024, value=50, label="Chunk Size")
|
| 15 |
with gr.Row():
|
| 16 |
with gr.Column(scale=0.60):
|
| 17 |
text_input = gr.Textbox(
|
src/pdfchatbot.py
CHANGED
|
@@ -36,6 +36,7 @@ class PDFChatBot:
|
|
| 36 |
self.model = None
|
| 37 |
self.pipeline = None
|
| 38 |
self.chain = None
|
|
|
|
| 39 |
|
| 40 |
def load_config(self, file_path):
|
| 41 |
"""
|
|
@@ -92,7 +93,7 @@ class PDFChatBot:
|
|
| 92 |
"""
|
| 93 |
Load the vector database from the documents and embeddings.
|
| 94 |
"""
|
| 95 |
-
text_splitter = CharacterTextSplitter(chunk_size=
|
| 96 |
docs = text_splitter.split_documents(self.documents)
|
| 97 |
self.vectordb = Chroma.from_documents(docs, self.embeddings)
|
| 98 |
|
|
@@ -181,7 +182,7 @@ class PDFChatBot:
|
|
| 181 |
history[-1][-1] += char
|
| 182 |
return history, " "
|
| 183 |
|
| 184 |
-
def render_file(self, file):
|
| 185 |
"""
|
| 186 |
Renders a specific page of a PDF file as an image.
|
| 187 |
|
|
@@ -191,8 +192,10 @@ class PDFChatBot:
|
|
| 191 |
Returns:
|
| 192 |
PIL.Image.Image: The rendered page as an image.
|
| 193 |
"""
|
|
|
|
| 194 |
doc = fitz.open(file.name)
|
| 195 |
page = doc[self.page]
|
|
|
|
| 196 |
pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
|
| 197 |
image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
|
| 198 |
return image
|
|
|
|
| 36 |
self.model = None
|
| 37 |
self.pipeline = None
|
| 38 |
self.chain = None
|
| 39 |
+
self.chunk_size = None
|
| 40 |
|
| 41 |
def load_config(self, file_path):
|
| 42 |
"""
|
|
|
|
| 93 |
"""
|
| 94 |
Load the vector database from the documents and embeddings.
|
| 95 |
"""
|
| 96 |
+
text_splitter = CharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=256)
|
| 97 |
docs = text_splitter.split_documents(self.documents)
|
| 98 |
self.vectordb = Chroma.from_documents(docs, self.embeddings)
|
| 99 |
|
|
|
|
| 182 |
history[-1][-1] += char
|
| 183 |
return history, " "
|
| 184 |
|
| 185 |
+
def render_file(self, file,chunk_size):
|
| 186 |
"""
|
| 187 |
Renders a specific page of a PDF file as an image.
|
| 188 |
|
|
|
|
| 192 |
Returns:
|
| 193 |
PIL.Image.Image: The rendered page as an image.
|
| 194 |
"""
|
| 195 |
+
print(chunk_size)
|
| 196 |
doc = fitz.open(file.name)
|
| 197 |
page = doc[self.page]
|
| 198 |
+
self.chunk_size = chunk_size
|
| 199 |
pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
|
| 200 |
image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
|
| 201 |
return image
|