Spaces:

albhu
/

legalgeek

Runtime error

App Files Files Community

albhu commited on Apr 25, 2024

Commit

4d48a61

verified ·

1 Parent(s): 112973e

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -30

app.py CHANGED Viewed

@@ -17,53 +17,51 @@ if st.checkbox("Korábbi Beszélgetések Törlése"):
 tokenizer = AutoTokenizer.from_pretrained("pile-of-law/legalbert-large-1.7M-2")
 model = AutoModelForCausalLM.from_pretrained("apple/OpenELM-3B-Instruct", trust_remote_code=True)
-# Definiáljuk a válaszgeneráló függvényt
-def generate_response(input_text, tokenizer, model):
-    # Bemeneti szöveg tisztítása
-    cleaned_input = input_text.strip()  # Eltávolítjuk a felesleges szóközöket és sortöréseket
-    # Tokenizálás
-    inputs = tokenizer(cleaned_input, return_tensors="pt", max_length=1024)
-    # Modell használata a válasz generálásához
-    outputs = model(**inputs)
-    # Válasz visszaadása
-    response = tokenizer.decode(outputs.logits.argmax(dim=1)[0])
-    return response
-# Dokumentum feltöltése drag and drop segítségével
-document_file = st.file_uploader("Húzd ide a dokumentumot vagy kattints a feltöltéshez", type=["pdf", "docx", "doc"])
-if document_file is not None:
     document_text = ""
     if document_file.type == "application/pdf":
-        # PDF fájl feldolgozása
         with pdfplumber.open(document_file) as pdf:
             for page in pdf.pages:
                 text = page.extract_text()
                 if text:
-                    document_text += text.strip()  # Tisztítjuk a kinyert szöveget
-                    document_text += "\n\n"  # Új sor hozzáadása a bekezdések közé
     elif document_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-        # DOCX fájl feldolgozása
         docx_file = docx.Document(document_file)
         for paragraph in docx_file.paragraphs:
             text = paragraph.text
             if text:
-                document_text += text.strip()  # Tisztítjuk a kinyert szöveget
-                document_text += "\n\n"  # Új sor hozzáadása a bekezdések közé
     elif document_file.type == "application/msword":
-        # DOC fájl feldolgozása
         doc_file = docx.Document(document_file)
         for paragraph in doc_file.paragraphs:
             text = paragraph.text
             if text:
-                document_text += text.strip()  # Tisztítjuk a kinyert szöveget
-                document_text += "\n\n"  # Új sor hozzáadása a bekezdések közé
     else:
         st.error("A fájltípus nem támogatott. Kérlek válassz ki egy PDF, DOCX vagy DOC fájlt!")
     # Előző beszélgetésekhez csatolható kontextus
     context = st.text_area("Korábbi Beszélgetéshez Tartozó Kontextus", "")
@@ -79,7 +77,7 @@ if document_file is not None:
     # Válasz generálása csak akkor, ha a felhasználó elküldi a promptot
     if input_text.strip() != "":
-        response = generate_response(input_text, tokenizer, model)
         st.subheader("Generált Válasz:")
         st.write(response)

 tokenizer = AutoTokenizer.from_pretrained("pile-of-law/legalbert-large-1.7M-2")
 model = AutoModelForCausalLM.from_pretrained("apple/OpenELM-3B-Instruct", trust_remote_code=True)
+# Dokumentumfeldolgozó függvény
+def process_document(document_file):
     document_text = ""
     if document_file.type == "application/pdf":
         with pdfplumber.open(document_file) as pdf:
             for page in pdf.pages:
                 text = page.extract_text()
                 if text:
+                    document_text += text.strip() + "\n\n"
     elif document_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
         docx_file = docx.Document(document_file)
         for paragraph in docx_file.paragraphs:
             text = paragraph.text
             if text:
+                document_text += text.strip() + "\n\n"
     elif document_file.type == "application/msword":
         doc_file = docx.Document(document_file)
         for paragraph in doc_file.paragraphs:
             text = paragraph.text
             if text:
+                document_text += text.strip() + "\n\n"
     else:
         st.error("A fájltípus nem támogatott. Kérlek válassz ki egy PDF, DOCX vagy DOC fájlt!")
+    return document_text
+# Válaszgeneráló függvény
+def generate_response(input_text):
+    # Bemeneti szöveg tisztítása
+    cleaned_input = input_text.strip()
+    # Tokenizálás
+    inputs = tokenizer(cleaned_input, return_tensors="pt", max_length=1024)
+    # Modell használata a válasz generálásához
+    outputs = model(**inputs)
+    # Válasz visszaadása
+    response = tokenizer.decode(outputs.logits.argmax(dim=1)[0])
+    return response
+# Dokumentum feltöltése drag and drop segítségével
+document_file = st.file_uploader("Húzd ide a dokumentumot vagy kattints a feltöltéshez", type=["pdf", "docx", "doc"])
+if document_file is not None:
+    document_text = process_document(document_file)
     # Előző beszélgetésekhez csatolható kontextus
     context = st.text_area("Korábbi Beszélgetéshez Tartozó Kontextus", "")
     # Válasz generálása csak akkor, ha a felhasználó elküldi a promptot
     if input_text.strip() != "":
+        response = generate_response(input_text)
         st.subheader("Generált Válasz:")
         st.write(response)