Spaces:
Runtime error
Runtime error
Merge remote-tracking branch 'origin/main'
Browse files- .github/workflows/check.yml +0 -16
- .github/workflows/hugging_face.yml +3 -0
- Dockerfile +1 -1
- README.md +13 -4
- backend/generate_metadata.py +9 -8
- flake.nix +11 -3
- frontend/.vite/deps_temp_eb58ea19/package.json +3 -0
- frontend/src/components/Features.tsx +1 -1
.github/workflows/check.yml
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
name: Check file size
|
| 2 |
-
on:
|
| 3 |
-
pull_request:
|
| 4 |
-
branches: [main]
|
| 5 |
-
|
| 6 |
-
# to run this workflow manually from the Actions tab
|
| 7 |
-
workflow_dispatch:
|
| 8 |
-
|
| 9 |
-
jobs:
|
| 10 |
-
sync-to-hub:
|
| 11 |
-
runs-on: ubuntu-latest
|
| 12 |
-
steps:
|
| 13 |
-
- name: Check large files
|
| 14 |
-
uses: ActionsDesk/lfs-warning@v2.0
|
| 15 |
-
with:
|
| 16 |
-
filesizelimit: 10485760 # this is 10MB
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/hugging_face.yml
CHANGED
|
@@ -13,7 +13,10 @@ jobs:
|
|
| 13 |
with:
|
| 14 |
fetch-depth: 0
|
| 15 |
lfs: true
|
|
|
|
|
|
|
| 16 |
- name: Push to hub
|
| 17 |
env:
|
| 18 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 19 |
run: git push https://AIhackathons:$HF_TOKEN@huggingface.co/spaces/AIhackathons/docverifyrag main
|
|
|
|
|
|
| 13 |
with:
|
| 14 |
fetch-depth: 0
|
| 15 |
lfs: true
|
| 16 |
+
- name: Navigate to frontend directory
|
| 17 |
+
run: cd ./frontend
|
| 18 |
- name: Push to hub
|
| 19 |
env:
|
| 20 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 21 |
run: git push https://AIhackathons:$HF_TOKEN@huggingface.co/spaces/AIhackathons/docverifyrag main
|
| 22 |
+
|
Dockerfile
CHANGED
|
@@ -25,7 +25,7 @@ COPY backend .
|
|
| 25 |
|
| 26 |
# Install backend dependencies
|
| 27 |
COPY backend/requirements.txt .
|
| 28 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
| 29 |
|
| 30 |
# Stage 3: Serve frontend and backend using nginx and gunicorn
|
| 31 |
FROM nginx:latest AS production
|
|
|
|
| 25 |
|
| 26 |
# Install backend dependencies
|
| 27 |
COPY backend/requirements.txt .
|
| 28 |
+
RUN pip install --no-cache-dir -r requirements.txt --vvv
|
| 29 |
|
| 30 |
# Stage 3: Serve frontend and backend using nginx and gunicorn
|
| 31 |
FROM nginx:latest AS production
|
README.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
<!-- PROJECT TITLE -->
|
| 2 |
<h1 align="center">DocVerifyRAG: Document Verification and Anomaly Detection</h1>
|
| 3 |
<div id="header" align="center">
|
|
@@ -108,8 +120,6 @@ To deploy DocVerifyRAG using Docker, follow these steps:
|
|
| 108 |
### Usage
|
| 109 |
|
| 110 |
Access the web interface and follow the prompts to upload documents, classify them, and verify metadata. The AI-powered anomaly detection system will automatically flag any discrepancies or errors in the document metadata, providing accurate and reliable document management solutions for hospitals.
|
| 111 |
-
|
| 112 |
-
|
| 113 |
## Authors
|
| 114 |
|
| 115 |
| Name | Link |
|
|
@@ -119,8 +129,7 @@ Access the web interface and follow the prompts to upload documents, classify th
|
|
| 119 |
| Carlos Salgado | [GitHub](https://github.com/salgadev) |
|
| 120 |
| Abdul Qadeer | [GitHub](https://github.com/AbdulQadeer-55) |
|
| 121 |
|
|
|
|
| 122 |
## License
|
| 123 |
|
| 124 |
[](https://github.com/eliawaefler/DocVerifyRAG/blob/main/LICENSE)
|
| 125 |
-
____
|
| 126 |
-
|
|
|
|
| 1 |
+
|
| 2 |
+
---
|
| 3 |
+
title: DocVerifyRAG
|
| 4 |
+
emoji: 🐠
|
| 5 |
+
colorFrom: pink
|
| 6 |
+
colorTo: green
|
| 7 |
+
sdk: streamlit
|
| 8 |
+
sdk_version: 1.27.0
|
| 9 |
+
app_file: app.py
|
| 10 |
+
pinned: false
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
<!-- PROJECT TITLE -->
|
| 14 |
<h1 align="center">DocVerifyRAG: Document Verification and Anomaly Detection</h1>
|
| 15 |
<div id="header" align="center">
|
|
|
|
| 120 |
### Usage
|
| 121 |
|
| 122 |
Access the web interface and follow the prompts to upload documents, classify them, and verify metadata. The AI-powered anomaly detection system will automatically flag any discrepancies or errors in the document metadata, providing accurate and reliable document management solutions for hospitals.
|
|
|
|
|
|
|
| 123 |
## Authors
|
| 124 |
|
| 125 |
| Name | Link |
|
|
|
|
| 129 |
| Carlos Salgado | [GitHub](https://github.com/salgadev) |
|
| 130 |
| Abdul Qadeer | [GitHub](https://github.com/AbdulQadeer-55) |
|
| 131 |
|
| 132 |
+
|
| 133 |
## License
|
| 134 |
|
| 135 |
[](https://github.com/eliawaefler/DocVerifyRAG/blob/main/LICENSE)
|
|
|
|
|
|
backend/generate_metadata.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import argparse
|
| 3 |
import json
|
| 4 |
import openai
|
|
@@ -12,13 +13,13 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
| 12 |
load_dotenv()
|
| 13 |
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
if
|
| 19 |
-
loader = UnstructuredPDFLoader(
|
| 20 |
-
elif
|
| 21 |
-
loader = TextLoader(
|
| 22 |
else:
|
| 23 |
raise NotImplementedError('Only .txt or .pdf files are supported')
|
| 24 |
|
|
@@ -29,7 +30,7 @@ def ingest(file_path):
|
|
| 29 |
"\n\n",
|
| 30 |
"\n",
|
| 31 |
" ",
|
| 32 |
-
",",
|
| 33 |
"\uff0c", # Fullwidth comma
|
| 34 |
"\u3001", # Ideographic comma
|
| 35 |
"\uff0e", # Fullwidth full stop
|
|
|
|
| 1 |
import os
|
| 2 |
+
import io
|
| 3 |
import argparse
|
| 4 |
import json
|
| 5 |
import openai
|
|
|
|
| 13 |
load_dotenv()
|
| 14 |
|
| 15 |
|
| 16 |
+
import io
|
| 17 |
+
|
| 18 |
+
def ingest(file_obj, file_ext='pdf'):
|
| 19 |
+
if file_ext == 'pdf':
|
| 20 |
+
loader = UnstructuredPDFLoader(file_obj)
|
| 21 |
+
elif file_ext == 'txt':
|
| 22 |
+
loader = TextLoader(file_obj)
|
| 23 |
else:
|
| 24 |
raise NotImplementedError('Only .txt or .pdf files are supported')
|
| 25 |
|
|
|
|
| 30 |
"\n\n",
|
| 31 |
"\n",
|
| 32 |
" ",
|
| 33 |
+
",",
|
| 34 |
"\uff0c", # Fullwidth comma
|
| 35 |
"\u3001", # Ideographic comma
|
| 36 |
"\uff0e", # Fullwidth full stop
|
flake.nix
CHANGED
|
@@ -14,6 +14,9 @@
|
|
| 14 |
devShells.${system}.default = pkgs.mkShell {
|
| 15 |
packages = [
|
| 16 |
(pkgs.python311.withPackages (python-pkgs: [
|
|
|
|
|
|
|
|
|
|
| 17 |
python-pkgs.numpy
|
| 18 |
python-pkgs.pandas
|
| 19 |
python-pkgs.scipy
|
|
@@ -23,15 +26,20 @@
|
|
| 23 |
python-pkgs.langchain
|
| 24 |
python-pkgs.langchain-text-splitters
|
| 25 |
python-pkgs.unstructured
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
python-pkgs.openai
|
| 27 |
python-pkgs.pydantic
|
| 28 |
python-pkgs.python-dotenv
|
| 29 |
python-pkgs.configargparse
|
| 30 |
python-pkgs.streamlit
|
| 31 |
-
python-pkgs.pip
|
| 32 |
python-pkgs.lark
|
| 33 |
-
python-pkgs.jupyter
|
| 34 |
-
python-pkgs.notebook
|
| 35 |
python-pkgs.sentence-transformers
|
| 36 |
pkgs.unstructured-api
|
| 37 |
]))
|
|
|
|
| 14 |
devShells.${system}.default = pkgs.mkShell {
|
| 15 |
packages = [
|
| 16 |
(pkgs.python311.withPackages (python-pkgs: [
|
| 17 |
+
python-pkgs.pip # VsCode starts
|
| 18 |
+
python-pkgs.jupyter
|
| 19 |
+
python-pkgs.notebook # VsCode ends
|
| 20 |
python-pkgs.numpy
|
| 21 |
python-pkgs.pandas
|
| 22 |
python-pkgs.scipy
|
|
|
|
| 26 |
python-pkgs.langchain
|
| 27 |
python-pkgs.langchain-text-splitters
|
| 28 |
python-pkgs.unstructured
|
| 29 |
+
python-pkgs.wrapt # unstructured[local-inference] starts
|
| 30 |
+
python-pkgs.iso-639
|
| 31 |
+
python-pkgs.emoji
|
| 32 |
+
python-pkgs.pillow-heif
|
| 33 |
+
python-pkgs.magic
|
| 34 |
+
python-pkgs.poppler-qt5
|
| 35 |
+
python-pkgs.pytesseract
|
| 36 |
+
python-pkgs.langdetect # unstructured[local-inference] ends
|
| 37 |
python-pkgs.openai
|
| 38 |
python-pkgs.pydantic
|
| 39 |
python-pkgs.python-dotenv
|
| 40 |
python-pkgs.configargparse
|
| 41 |
python-pkgs.streamlit
|
|
|
|
| 42 |
python-pkgs.lark
|
|
|
|
|
|
|
| 43 |
python-pkgs.sentence-transformers
|
| 44 |
pkgs.unstructured-api
|
| 45 |
]))
|
frontend/.vite/deps_temp_eb58ea19/package.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"type": "module"
|
| 3 |
+
}
|
frontend/src/components/Features.tsx
CHANGED
|
@@ -58,7 +58,7 @@ export const Features = () => {
|
|
| 58 |
</CardHeader>
|
| 59 |
<CardFooter className="flex flex-wrap md:justify-center gap-4">
|
| 60 |
<iframe
|
| 61 |
-
src="https://
|
| 62 |
width="850"
|
| 63 |
style={{ border: 'none' }}
|
| 64 |
height="750"
|
|
|
|
| 58 |
</CardHeader>
|
| 59 |
<CardFooter className="flex flex-wrap md:justify-center gap-4">
|
| 60 |
<iframe
|
| 61 |
+
src="https://aihackathons-docverifyrag.hf.space"
|
| 62 |
width="850"
|
| 63 |
style={{ border: 'none' }}
|
| 64 |
height="750"
|