Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +82 -0
- .idea/.gitignore +8 -0
- .idea/customChatBotDemo.iml +10 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +4 -0
- .idea/modules.xml +8 -0
- .idea/workspace.xml +81 -0
- README.md +1 -7
- __pycache__/constants.cpython-311.pyc +0 -0
- constants.py +1 -0
- data/data.txt +47 -0
- flagged/log.csv +2 -0
- main.py +53 -0
- requirements.txt +12 -0
- venv/.gitignore +2 -0
- venv/bin/__pycache__/dumppdf.cpython-311.pyc +0 -0
- venv/bin/__pycache__/pdf2txt.cpython-311.pyc +0 -0
- venv/bin/activate +83 -0
- venv/bin/activate.csh +55 -0
- venv/bin/activate.fish +100 -0
- venv/bin/activate.nu +92 -0
- venv/bin/activate.ps1 +60 -0
- venv/bin/activate_this.py +31 -0
- venv/bin/chardetect +8 -0
- venv/bin/coloredlogs +8 -0
- venv/bin/deactivate.nu +32 -0
- venv/bin/dotenv +8 -0
- venv/bin/dumppdf.py +473 -0
- venv/bin/f2py +8 -0
- venv/bin/f2py3 +8 -0
- venv/bin/f2py3.11 +8 -0
- venv/bin/filetype +8 -0
- venv/bin/fonttools +8 -0
- venv/bin/gradio +8 -0
- venv/bin/httpx +8 -0
- venv/bin/huggingface-cli +8 -0
- venv/bin/humanfriendly +8 -0
- venv/bin/ipython +8 -0
- venv/bin/ipython3 +8 -0
- venv/bin/isympy +8 -0
- venv/bin/jsonschema +8 -0
- venv/bin/langchain-server +8 -0
- venv/bin/langsmith +8 -0
- venv/bin/markdown-it +8 -0
- venv/bin/nltk +8 -0
- venv/bin/normalizer +8 -0
- venv/bin/onnxruntime_test +8 -0
- venv/bin/openai +8 -0
- venv/bin/pai +8 -0
- venv/bin/pdf2txt.py +317 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,85 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
venv/bin/python filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
venv/bin/python3 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
venv/bin/python3.11 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
venv/lib/python3.11/site-packages/Pillow.libs/libfreetype-cb9caf6f.so.6.19.0 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
venv/lib/python3.11/site-packages/Pillow.libs/libharfbuzz-3543f599.so.0.60710.0 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
venv/lib/python3.11/site-packages/_cffi_backend.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
venv/lib/python3.11/site-packages/_pulsar.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
venv/lib/python3.11/site-packages/aiohttp/_http_parser.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
venv/lib/python3.11/site-packages/altair/vegalite/v5/schema/__pycache__/core.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
venv/lib/python3.11/site-packages/bcrypt/_bcrypt.abi3.so filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
venv/lib/python3.11/site-packages/cryptography/hazmat/bindings/_rust.abi3.so filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
venv/lib/python3.11/site-packages/emoji/unicode_codes/__pycache__/data_dict.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
venv/lib/python3.11/site-packages/faiss/_swigfaiss.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
venv/lib/python3.11/site-packages/faiss/_swigfaiss_avx2.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
venv/lib/python3.11/site-packages/faiss_cpu.libs/libgfortran-040039e1.so.5.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
venv/lib/python3.11/site-packages/fontTools/cu2qu/cu2qu.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
venv/lib/python3.11/site-packages/fontTools/feaLib/lexer.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
venv/lib/python3.11/site-packages/fontTools/misc/bezierTools.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
venv/lib/python3.11/site-packages/fontTools/pens/momentsPen.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
venv/lib/python3.11/site-packages/fontTools/qu2cu/qu2cu.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
venv/lib/python3.11/site-packages/fontTools/varLib/iup.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
venv/lib/python3.11/site-packages/gradio/frpc_linux_amd64_v0.2 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
venv/lib/python3.11/site-packages/gradio/templates/cdn/assets/index-1d5a0c4a.js.map filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
venv/lib/python3.11/site-packages/gradio/templates/frontend/assets/index-a9692bd2.js.map filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
venv/lib/python3.11/site-packages/greenlet/_greenlet.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
venv/lib/python3.11/site-packages/hnswlib.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
venv/lib/python3.11/site-packages/kiwisolver/_cext.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
venv/lib/python3.11/site-packages/lxml/etree.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
venv/lib/python3.11/site-packages/lxml/objectify.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
venv/lib/python3.11/site-packages/matplotlib/_image.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
venv/lib/python3.11/site-packages/matplotlib/_path.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
venv/lib/python3.11/site-packages/matplotlib/_qhull.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
venv/lib/python3.11/site-packages/matplotlib/backends/_backend_agg.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
venv/lib/python3.11/site-packages/matplotlib/ft2font.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
venv/lib/python3.11/site-packages/numexpr/interpreter.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
venv/lib/python3.11/site-packages/numpy/core/_multiarray_umath.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
venv/lib/python3.11/site-packages/numpy/core/_simd.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
venv/lib/python3.11/site-packages/numpy/random/_generator.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
venv/lib/python3.11/site-packages/numpy.libs/libgfortran-040039e1.so.5.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
venv/lib/python3.11/site-packages/numpy.libs/libopenblas64_p-r0-5007b62f.3.23.dev.so filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
venv/lib/python3.11/site-packages/onnxruntime/capi/onnxruntime_pybind11_state.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
venv/lib/python3.11/site-packages/pandas/_libs/algos.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
venv/lib/python3.11/site-packages/pandas/_libs/groupby.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
venv/lib/python3.11/site-packages/pandas/_libs/hashtable.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
venv/lib/python3.11/site-packages/pandas/_libs/interval.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
venv/lib/python3.11/site-packages/pandas/_libs/join.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
venv/lib/python3.11/site-packages/pulsar_client.libs/libpulsar-07bd3920.so filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
venv/lib/python3.11/site-packages/pyarrow/_compute.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
venv/lib/python3.11/site-packages/pyarrow/_flight.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
venv/lib/python3.11/site-packages/pyarrow/lib.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
venv/lib/python3.11/site-packages/pyarrow/libarrow.so.1300 filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
venv/lib/python3.11/site-packages/pyarrow/libarrow_acero.so.1300 filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
venv/lib/python3.11/site-packages/pyarrow/libarrow_dataset.so.1300 filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
venv/lib/python3.11/site-packages/pyarrow/libarrow_flight.so.1300 filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
venv/lib/python3.11/site-packages/pyarrow/libarrow_python.so filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
venv/lib/python3.11/site-packages/pyarrow/libarrow_substrait.so.1300 filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
venv/lib/python3.11/site-packages/pyarrow/libparquet.so.1300 filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
venv/lib/python3.11/site-packages/pydantic_core/_pydantic_core.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
venv/lib/python3.11/site-packages/pypdfium2/pdfium filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
venv/lib/python3.11/site-packages/regex/_regex.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
venv/lib/python3.11/site-packages/rpds/rpds.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
venv/lib/python3.11/site-packages/scipy/fft/_pocketfft/pypocketfft.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
venv/lib/python3.11/site-packages/scipy/linalg/_flapack.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
venv/lib/python3.11/site-packages/scipy/misc/face.dat filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
venv/lib/python3.11/site-packages/scipy/optimize/_highs/_highs_wrapper.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
venv/lib/python3.11/site-packages/scipy/sparse/_sparsetools.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
venv/lib/python3.11/site-packages/scipy/spatial/_ckdtree.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
venv/lib/python3.11/site-packages/scipy/spatial/_qhull.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
venv/lib/python3.11/site-packages/scipy/special/_ufuncs.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
venv/lib/python3.11/site-packages/scipy/special/cython_special.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
venv/lib/python3.11/site-packages/scipy/stats/_unuran/unuran_wrapper.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
venv/lib/python3.11/site-packages/scipy.libs/libgfortran-040039e1.so.5.0.0 filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
venv/lib/python3.11/site-packages/scipy.libs/libopenblasp-r0-23e5df77.3.21.dev.so filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
venv/lib/python3.11/site-packages/sqlalchemy/cyextension/collections.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
venv/lib/python3.11/site-packages/sympy/polys/benchmarks/__pycache__/bench_solvers.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
venv/lib/python3.11/site-packages/tiktoken/_tiktoken.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
venv/lib/python3.11/site-packages/tokenizers/tokenizers.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
venv/lib/python3.11/site-packages/tokenizers.libs/libcrypto-d3570994.so.1.0.2k filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
venv/lib/python3.11/site-packages/tokenizers.libs/libkrb5-fcafa220.so.3.3 filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
venv/lib/python3.11/site-packages/uvloop/loop.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
venv/lib/python3.11/site-packages/watchfiles/_rust_notify.abi3.so filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
venv/lib/python3.11/site-packages/yaml/_yaml.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
.idea/.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
| 4 |
+
# Editor-based HTTP Client requests
|
| 5 |
+
/httpRequests/
|
| 6 |
+
# Datasource local storage ignored files
|
| 7 |
+
/dataSources/
|
| 8 |
+
/dataSources.local.xml
|
.idea/customChatBotDemo.iml
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$">
|
| 5 |
+
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
| 6 |
+
</content>
|
| 7 |
+
<orderEntry type="inheritedJdk" />
|
| 8 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 9 |
+
</component>
|
| 10 |
+
</module>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/misc.xml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (customChatBotDemo)" project-jdk-type="Python SDK" />
|
| 4 |
+
</project>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/customChatBotDemo.iml" filepath="$PROJECT_DIR$/.idea/customChatBotDemo.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/workspace.xml
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="AutoImportSettings">
|
| 4 |
+
<option name="autoReloadType" value="SELECTIVE" />
|
| 5 |
+
</component>
|
| 6 |
+
<component name="ChangeListManager">
|
| 7 |
+
<list default="true" id="389d532e-4eaf-4367-8bd6-095ab4d8b955" name="Changes" comment="" />
|
| 8 |
+
<option name="SHOW_DIALOG" value="false" />
|
| 9 |
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
| 10 |
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
| 11 |
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
| 12 |
+
</component>
|
| 13 |
+
<component name="FileTemplateManagerImpl">
|
| 14 |
+
<option name="RECENT_TEMPLATES">
|
| 15 |
+
<list>
|
| 16 |
+
<option value="Python Script" />
|
| 17 |
+
</list>
|
| 18 |
+
</option>
|
| 19 |
+
</component>
|
| 20 |
+
<component name="ProjectColorInfo">{
|
| 21 |
+
"associatedIndex": 6
|
| 22 |
+
}</component>
|
| 23 |
+
<component name="ProjectId" id="2UfxHqM9G5VUMOOUNXWACjL1huw" />
|
| 24 |
+
<component name="ProjectViewState">
|
| 25 |
+
<option name="hideEmptyMiddlePackages" value="true" />
|
| 26 |
+
<option name="showLibraryContents" value="true" />
|
| 27 |
+
</component>
|
| 28 |
+
<component name="PropertiesComponent">{
|
| 29 |
+
"keyToString": {
|
| 30 |
+
"RunOnceActivity.OpenProjectViewOnStart": "true",
|
| 31 |
+
"RunOnceActivity.ShowReadmeOnStart": "true",
|
| 32 |
+
"WebServerToolWindowFactoryState": "false",
|
| 33 |
+
"node.js.detected.package.eslint": "true",
|
| 34 |
+
"node.js.detected.package.tslint": "true",
|
| 35 |
+
"node.js.selected.package.eslint": "(autodetect)",
|
| 36 |
+
"node.js.selected.package.tslint": "(autodetect)",
|
| 37 |
+
"settings.editor.selected.configurable": "preferences.pluginManager",
|
| 38 |
+
"vue.rearranger.settings.migration": "true"
|
| 39 |
+
}
|
| 40 |
+
}</component>
|
| 41 |
+
<component name="RunManager">
|
| 42 |
+
<configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
|
| 43 |
+
<module name="customChatBotDemo" />
|
| 44 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
| 45 |
+
<option name="PARENT_ENVS" value="true" />
|
| 46 |
+
<envs>
|
| 47 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
| 48 |
+
</envs>
|
| 49 |
+
<option name="SDK_HOME" value="" />
|
| 50 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
| 51 |
+
<option name="IS_MODULE_SDK" value="true" />
|
| 52 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
| 53 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
| 54 |
+
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
| 55 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
|
| 56 |
+
<option name="PARAMETERS" value="" />
|
| 57 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
| 58 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
| 59 |
+
<option name="MODULE_MODE" value="false" />
|
| 60 |
+
<option name="REDIRECT_INPUT" value="false" />
|
| 61 |
+
<option name="INPUT_FILE" value="" />
|
| 62 |
+
<method v="2" />
|
| 63 |
+
</configuration>
|
| 64 |
+
</component>
|
| 65 |
+
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
|
| 66 |
+
<component name="TaskManager">
|
| 67 |
+
<task active="true" id="Default" summary="Default task">
|
| 68 |
+
<changelist id="389d532e-4eaf-4367-8bd6-095ab4d8b955" name="Changes" comment="" />
|
| 69 |
+
<created>1693344351359</created>
|
| 70 |
+
<option name="number" value="Default" />
|
| 71 |
+
<option name="presentableId" value="Default" />
|
| 72 |
+
<updated>1693344351359</updated>
|
| 73 |
+
<workItem from="1693344352959" duration="1601000" />
|
| 74 |
+
<workItem from="1693345962391" duration="234000" />
|
| 75 |
+
</task>
|
| 76 |
+
<servers />
|
| 77 |
+
</component>
|
| 78 |
+
<component name="TypeScriptGeneratedFilesManager">
|
| 79 |
+
<option name="version" value="3" />
|
| 80 |
+
</component>
|
| 81 |
+
</project>
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
title: PonchoBotDemo
|
| 3 |
-
|
| 4 |
-
colorFrom: purple
|
| 5 |
-
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 3.41.2
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
title: PonchoBotDemo
|
| 3 |
+
app_file: main.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 3.41.2
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
|
|
__pycache__/constants.cpython-311.pyc
ADDED
|
Binary file (228 Bytes). View file
|
|
|
constants.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
APIKEY = "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"
|
data/data.txt
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SOBRE MÍ - EL BOT DEL BANCO ESTRELLA
|
| 2 |
+
|
| 3 |
+
¡Hola! Soy EstrellaBot, la inteligencia artificial del Banco de Poncho. Estoy aquí para ayudarte en todo lo relacionado con nuestros servicios y productos financieros. Fui diseñado para ofrecer respuestas precisas y rápidas, garantizando que tu experiencia con nosotros sea brillante. Si tienes alguna duda o consulta, no dudes en preguntarme. ¡Estoy aquí para iluminar tu camino financiero!
|
| 4 |
+
|
| 5 |
+
INFORMACIÓN GENERAL DEL BANCO
|
| 6 |
+
|
| 7 |
+
Nombre del banco: Banco de Poncho.
|
| 8 |
+
Misión: Facilitar soluciones financieras brillantes a todos nuestros clientes, guiándolos hacia un futuro próspero.
|
| 9 |
+
Visión: Ser el banco líder en innovación y servicio al cliente en el sector financiero.
|
| 10 |
+
PRODUCTOS Y SERVICIOS
|
| 11 |
+
|
| 12 |
+
Cuentas Bancarias:
|
| 13 |
+
|
| 14 |
+
Cuenta Corriente: Sin comisiones por manejo de cuenta y acceso a banca en línea.
|
| 15 |
+
Cuenta de Ahorro Estelares: Tasa de interés del 3.5% anual, sin comisiones si mantienes un mínimo de [X dinero].
|
| 16 |
+
Opciones de Inversión:
|
| 17 |
+
|
| 18 |
+
Fondos Mutuos Dorados: Diversificación de inversiones con rentabilidad estimada entre 5-8% anual.
|
| 19 |
+
Depósitos a Plazo: Duración flexible con tasas de interés competitivas.
|
| 20 |
+
Préstamos:
|
| 21 |
+
|
| 22 |
+
Préstamos Personales: Desde [X dinero] hasta [X dinero] con tasas a partir del 5% anual.
|
| 23 |
+
Hipotecarios: Financiamiento hasta el 80% del valor de la propiedad, con tasas desde el 4% anual.
|
| 24 |
+
Para Empresas: Soluciones personalizadas para pequeñas y grandes empresas.
|
| 25 |
+
Tarjetas de Crédito:
|
| 26 |
+
|
| 27 |
+
Tarjeta Estrella: Sin cuota anual el primer año, programa de recompensas con acumulación de puntos por cada compra.
|
| 28 |
+
Tarjeta Estrella Gold: Beneficios premium, acceso a salas VIP en aeropuertos y asistencia personalizada.
|
| 29 |
+
SERVICIOS EN LÍNEA
|
| 30 |
+
|
| 31 |
+
Banca en línea: Acceso 24/7 a tu cuenta, transferencias, consulta de saldos y movimientos.
|
| 32 |
+
Aplicación móvil: Realiza operaciones desde tu smartphone, localiza cajeros y sucursales cercanas.
|
| 33 |
+
Asistencia virtual: Nuestro bot, siempre disponible para responder tus dudas y brindarte la mejor experiencia.
|
| 34 |
+
SEGURIDAD
|
| 35 |
+
|
| 36 |
+
Protección: Contamos con sistemas avanzados de cifrado y protección contra fraudes.
|
| 37 |
+
Alertas: Notificaciones en tiempo real para cualquier actividad inusual en tus cuentas.
|
| 38 |
+
Autenticación: Procesos de verificación en dos pasos para garantizar tu seguridad en línea.
|
| 39 |
+
CONTACTO
|
| 40 |
+
|
| 41 |
+
Atención al cliente: Llámanos al [Número de teléfono] o escríbenos a [Email de atención al cliente].
|
| 42 |
+
Sucursales: Más de [X número] de sucursales en todo el país. Encuentra la más cercana en nuestro sitio web.
|
| 43 |
+
PREGUNTAS FRECUENTES
|
| 44 |
+
|
| 45 |
+
¿Cómo abro una cuenta?: Solo necesitas tu identificación, comprobante de domicilio y acudir a tu sucursal más cercana.
|
| 46 |
+
¿Puedo transferir dinero a otros bancos?: Sí, mediante nuestro sistema en línea puedes realizar transferencias interbancarias.
|
| 47 |
+
¿Qué hago si pierdo mi tarjeta?: Llámanos de inmediato al [Número de emergencia] para bloquearla y proteger tu dinero.
|
flagged/log.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
question,output,flag,username,timestamp
|
| 2 |
+
quien eres,"¡Hola! Soy EstrellaBot, la inteligencia artificial del Banco de Poncho. Estoy aquí para ayudarte en todo lo relacionado con nuestros servicios y productos financieros.",,,2023-08-29 15:57:16.002129
|
main.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import openai
|
| 4 |
+
from langchain.chains import ConversationalRetrievalChain
|
| 5 |
+
from langchain.chat_models import ChatOpenAI
|
| 6 |
+
from langchain.document_loaders import DirectoryLoader
|
| 7 |
+
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
|
| 8 |
+
from langchain.indexes import VectorstoreIndexCreator
|
| 9 |
+
from langchain.llms import OpenAI
|
| 10 |
+
from langchain.embeddings import OpenAIEmbeddings
|
| 11 |
+
from langchain.vectorstores.chroma import Chroma
|
| 12 |
+
import constants
|
| 13 |
+
|
| 14 |
+
os.environ["OPENAI_API_KEY"] = constants.APIKEY
|
| 15 |
+
|
| 16 |
+
PERSIST = False
|
| 17 |
+
|
| 18 |
+
if PERSIST and os.path.exists("persist"):
|
| 19 |
+
print("Reusing index...\n")
|
| 20 |
+
vectorstore = Chroma(persist_directory="persist", embedding_function=OpenAIEmbeddings())
|
| 21 |
+
index = VectorStoreIndexWrapper(vectorstore=vectorstore)
|
| 22 |
+
index.load("persist")
|
| 23 |
+
else:
|
| 24 |
+
loader = DirectoryLoader("data/")
|
| 25 |
+
if PERSIST:
|
| 26 |
+
index = VectorstoreIndexCreator(vectorstore_kwargs={"persist_directory": "persist"}).from_loaders([loader])
|
| 27 |
+
else:
|
| 28 |
+
index = VectorstoreIndexCreator().from_loaders([loader])
|
| 29 |
+
|
| 30 |
+
use_gpt_4 = True
|
| 31 |
+
model_name = "gpt-4" if use_gpt_4 else "gpt-3.5-turbo"
|
| 32 |
+
chain = ConversationalRetrievalChain.from_llm(llm=ChatOpenAI(model=model_name),
|
| 33 |
+
retriever=index.vectorstore.as_retriever(search_kwargs={"k": 1}))
|
| 34 |
+
|
| 35 |
+
chat_history = []
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def answer_question(question):
|
| 39 |
+
global chat_history
|
| 40 |
+
result = chain({"question": question, "chat_history": chat_history})
|
| 41 |
+
chat_history.append((question, result['answer']))
|
| 42 |
+
return result['answer']
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
iface = gr.Interface(
|
| 46 |
+
fn=answer_question,
|
| 47 |
+
inputs="text",
|
| 48 |
+
outputs="text",
|
| 49 |
+
title="Demo IA Conversacional",
|
| 50 |
+
description="Pregúntame lo que quieras, te responderé con lo que sé."
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
iface.launch(share=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain==0.0.276
|
| 2 |
+
openai==0.27.9
|
| 3 |
+
pandasai==1.0.9
|
| 4 |
+
pdfplumber
|
| 5 |
+
pypdf==3.15.4
|
| 6 |
+
streamlit==1.26.0
|
| 7 |
+
streamlit-chat==0.1.1
|
| 8 |
+
tiktoken==0.4.0
|
| 9 |
+
youtube-transcript-api==0.6.1
|
| 10 |
+
faiss-cpu
|
| 11 |
+
gradio
|
| 12 |
+
unstructured
|
venv/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# created by virtualenv automatically
|
| 2 |
+
*
|
venv/bin/__pycache__/dumppdf.cpython-311.pyc
ADDED
|
Binary file (22.7 kB). View file
|
|
|
venv/bin/__pycache__/pdf2txt.cpython-311.pyc
ADDED
|
Binary file (12.1 kB). View file
|
|
|
venv/bin/activate
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file must be used with "source bin/activate" *from bash*
|
| 2 |
+
# you cannot run it directly
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
if [ "${BASH_SOURCE-}" = "$0" ]; then
|
| 6 |
+
echo "You must source this script: \$ source $0" >&2
|
| 7 |
+
exit 33
|
| 8 |
+
fi
|
| 9 |
+
|
| 10 |
+
deactivate () {
|
| 11 |
+
unset -f pydoc >/dev/null 2>&1 || true
|
| 12 |
+
|
| 13 |
+
# reset old environment variables
|
| 14 |
+
# ! [ -z ${VAR+_} ] returns true if VAR is declared at all
|
| 15 |
+
if ! [ -z "${_OLD_VIRTUAL_PATH:+_}" ] ; then
|
| 16 |
+
PATH="$_OLD_VIRTUAL_PATH"
|
| 17 |
+
export PATH
|
| 18 |
+
unset _OLD_VIRTUAL_PATH
|
| 19 |
+
fi
|
| 20 |
+
if ! [ -z "${_OLD_VIRTUAL_PYTHONHOME+_}" ] ; then
|
| 21 |
+
PYTHONHOME="$_OLD_VIRTUAL_PYTHONHOME"
|
| 22 |
+
export PYTHONHOME
|
| 23 |
+
unset _OLD_VIRTUAL_PYTHONHOME
|
| 24 |
+
fi
|
| 25 |
+
|
| 26 |
+
# The hash command must be called to get it to forget past
|
| 27 |
+
# commands. Without forgetting past commands the $PATH changes
|
| 28 |
+
# we made may not be respected
|
| 29 |
+
hash -r 2>/dev/null
|
| 30 |
+
|
| 31 |
+
if ! [ -z "${_OLD_VIRTUAL_PS1+_}" ] ; then
|
| 32 |
+
PS1="$_OLD_VIRTUAL_PS1"
|
| 33 |
+
export PS1
|
| 34 |
+
unset _OLD_VIRTUAL_PS1
|
| 35 |
+
fi
|
| 36 |
+
|
| 37 |
+
unset VIRTUAL_ENV
|
| 38 |
+
if [ ! "${1-}" = "nondestructive" ] ; then
|
| 39 |
+
# Self destruct!
|
| 40 |
+
unset -f deactivate
|
| 41 |
+
fi
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
# unset irrelevant variables
|
| 45 |
+
deactivate nondestructive
|
| 46 |
+
|
| 47 |
+
VIRTUAL_ENV='/home/draco/PycharmProjects/customChatBotDemo/venv'
|
| 48 |
+
if ([ "$OSTYPE" = "cygwin" ] || [ "$OSTYPE" = "msys" ]) && $(command -v cygpath &> /dev/null) ; then
|
| 49 |
+
VIRTUAL_ENV=$(cygpath -u "$VIRTUAL_ENV")
|
| 50 |
+
fi
|
| 51 |
+
export VIRTUAL_ENV
|
| 52 |
+
|
| 53 |
+
_OLD_VIRTUAL_PATH="$PATH"
|
| 54 |
+
PATH="$VIRTUAL_ENV/bin:$PATH"
|
| 55 |
+
export PATH
|
| 56 |
+
|
| 57 |
+
# unset PYTHONHOME if set
|
| 58 |
+
if ! [ -z "${PYTHONHOME+_}" ] ; then
|
| 59 |
+
_OLD_VIRTUAL_PYTHONHOME="$PYTHONHOME"
|
| 60 |
+
unset PYTHONHOME
|
| 61 |
+
fi
|
| 62 |
+
|
| 63 |
+
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then
|
| 64 |
+
_OLD_VIRTUAL_PS1="${PS1-}"
|
| 65 |
+
if [ "x" != x ] ; then
|
| 66 |
+
PS1="() ${PS1-}"
|
| 67 |
+
else
|
| 68 |
+
PS1="(`basename \"$VIRTUAL_ENV\"`) ${PS1-}"
|
| 69 |
+
fi
|
| 70 |
+
export PS1
|
| 71 |
+
fi
|
| 72 |
+
|
| 73 |
+
# Make sure to unalias pydoc if it's already there
|
| 74 |
+
alias pydoc 2>/dev/null >/dev/null && unalias pydoc || true
|
| 75 |
+
|
| 76 |
+
pydoc () {
|
| 77 |
+
python -m pydoc "$@"
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
# The hash command must be called to get it to forget past
|
| 81 |
+
# commands. Without forgetting past commands the $PATH changes
|
| 82 |
+
# we made may not be respected
|
| 83 |
+
hash -r 2>/dev/null
|
venv/bin/activate.csh
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file must be used with "source bin/activate.csh" *from csh*.
|
| 2 |
+
# You cannot run it directly.
|
| 3 |
+
# Created by Davide Di Blasi <davidedb@gmail.com>.
|
| 4 |
+
|
| 5 |
+
set newline='\
|
| 6 |
+
'
|
| 7 |
+
|
| 8 |
+
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH:q" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT:q" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate && unalias pydoc'
|
| 9 |
+
|
| 10 |
+
# Unset irrelevant variables.
|
| 11 |
+
deactivate nondestructive
|
| 12 |
+
|
| 13 |
+
setenv VIRTUAL_ENV '/home/draco/PycharmProjects/customChatBotDemo/venv'
|
| 14 |
+
|
| 15 |
+
set _OLD_VIRTUAL_PATH="$PATH:q"
|
| 16 |
+
setenv PATH "$VIRTUAL_ENV:q/bin:$PATH:q"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
if ('' != "") then
|
| 21 |
+
set env_name = '() '
|
| 22 |
+
else
|
| 23 |
+
set env_name = '('"$VIRTUAL_ENV:t:q"') '
|
| 24 |
+
endif
|
| 25 |
+
|
| 26 |
+
if ( $?VIRTUAL_ENV_DISABLE_PROMPT ) then
|
| 27 |
+
if ( $VIRTUAL_ENV_DISABLE_PROMPT == "" ) then
|
| 28 |
+
set do_prompt = "1"
|
| 29 |
+
else
|
| 30 |
+
set do_prompt = "0"
|
| 31 |
+
endif
|
| 32 |
+
else
|
| 33 |
+
set do_prompt = "1"
|
| 34 |
+
endif
|
| 35 |
+
|
| 36 |
+
if ( $do_prompt == "1" ) then
|
| 37 |
+
# Could be in a non-interactive environment,
|
| 38 |
+
# in which case, $prompt is undefined and we wouldn't
|
| 39 |
+
# care about the prompt anyway.
|
| 40 |
+
if ( $?prompt ) then
|
| 41 |
+
set _OLD_VIRTUAL_PROMPT="$prompt:q"
|
| 42 |
+
if ( "$prompt:q" =~ *"$newline:q"* ) then
|
| 43 |
+
:
|
| 44 |
+
else
|
| 45 |
+
set prompt = "$env_name:q$prompt:q"
|
| 46 |
+
endif
|
| 47 |
+
endif
|
| 48 |
+
endif
|
| 49 |
+
|
| 50 |
+
unset env_name
|
| 51 |
+
unset do_prompt
|
| 52 |
+
|
| 53 |
+
alias pydoc python -m pydoc
|
| 54 |
+
|
| 55 |
+
rehash
|
venv/bin/activate.fish
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file must be used using `source bin/activate.fish` *within a running fish ( http://fishshell.com ) session*.
|
| 2 |
+
# Do not run it directly.
|
| 3 |
+
|
| 4 |
+
function _bashify_path -d "Converts a fish path to something bash can recognize"
|
| 5 |
+
set fishy_path $argv
|
| 6 |
+
set bashy_path $fishy_path[1]
|
| 7 |
+
for path_part in $fishy_path[2..-1]
|
| 8 |
+
set bashy_path "$bashy_path:$path_part"
|
| 9 |
+
end
|
| 10 |
+
echo $bashy_path
|
| 11 |
+
end
|
| 12 |
+
|
| 13 |
+
function _fishify_path -d "Converts a bash path to something fish can recognize"
|
| 14 |
+
echo $argv | tr ':' '\n'
|
| 15 |
+
end
|
| 16 |
+
|
| 17 |
+
function deactivate -d 'Exit virtualenv mode and return to the normal environment.'
|
| 18 |
+
# reset old environment variables
|
| 19 |
+
if test -n "$_OLD_VIRTUAL_PATH"
|
| 20 |
+
# https://github.com/fish-shell/fish-shell/issues/436 altered PATH handling
|
| 21 |
+
if test (echo $FISH_VERSION | head -c 1) -lt 3
|
| 22 |
+
set -gx PATH (_fishify_path "$_OLD_VIRTUAL_PATH")
|
| 23 |
+
else
|
| 24 |
+
set -gx PATH $_OLD_VIRTUAL_PATH
|
| 25 |
+
end
|
| 26 |
+
set -e _OLD_VIRTUAL_PATH
|
| 27 |
+
end
|
| 28 |
+
|
| 29 |
+
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
|
| 30 |
+
set -gx PYTHONHOME "$_OLD_VIRTUAL_PYTHONHOME"
|
| 31 |
+
set -e _OLD_VIRTUAL_PYTHONHOME
|
| 32 |
+
end
|
| 33 |
+
|
| 34 |
+
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
|
| 35 |
+
and functions -q _old_fish_prompt
|
| 36 |
+
# Set an empty local `$fish_function_path` to allow the removal of `fish_prompt` using `functions -e`.
|
| 37 |
+
set -l fish_function_path
|
| 38 |
+
|
| 39 |
+
# Erase virtualenv's `fish_prompt` and restore the original.
|
| 40 |
+
functions -e fish_prompt
|
| 41 |
+
functions -c _old_fish_prompt fish_prompt
|
| 42 |
+
functions -e _old_fish_prompt
|
| 43 |
+
set -e _OLD_FISH_PROMPT_OVERRIDE
|
| 44 |
+
end
|
| 45 |
+
|
| 46 |
+
set -e VIRTUAL_ENV
|
| 47 |
+
|
| 48 |
+
if test "$argv[1]" != 'nondestructive'
|
| 49 |
+
# Self-destruct!
|
| 50 |
+
functions -e pydoc
|
| 51 |
+
functions -e deactivate
|
| 52 |
+
functions -e _bashify_path
|
| 53 |
+
functions -e _fishify_path
|
| 54 |
+
end
|
| 55 |
+
end
|
| 56 |
+
|
| 57 |
+
# Unset irrelevant variables.
|
| 58 |
+
deactivate nondestructive
|
| 59 |
+
|
| 60 |
+
set -gx VIRTUAL_ENV '/home/draco/PycharmProjects/customChatBotDemo/venv'
|
| 61 |
+
|
| 62 |
+
# https://github.com/fish-shell/fish-shell/issues/436 altered PATH handling
|
| 63 |
+
if test (echo $FISH_VERSION | head -c 1) -lt 3
|
| 64 |
+
set -gx _OLD_VIRTUAL_PATH (_bashify_path $PATH)
|
| 65 |
+
else
|
| 66 |
+
set -gx _OLD_VIRTUAL_PATH $PATH
|
| 67 |
+
end
|
| 68 |
+
set -gx PATH "$VIRTUAL_ENV"'/bin' $PATH
|
| 69 |
+
|
| 70 |
+
# Unset `$PYTHONHOME` if set.
|
| 71 |
+
if set -q PYTHONHOME
|
| 72 |
+
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
|
| 73 |
+
set -e PYTHONHOME
|
| 74 |
+
end
|
| 75 |
+
|
| 76 |
+
function pydoc
|
| 77 |
+
python -m pydoc $argv
|
| 78 |
+
end
|
| 79 |
+
|
| 80 |
+
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
|
| 81 |
+
# Copy the current `fish_prompt` function as `_old_fish_prompt`.
|
| 82 |
+
functions -c fish_prompt _old_fish_prompt
|
| 83 |
+
|
| 84 |
+
function fish_prompt
|
| 85 |
+
# Run the user's prompt first; it might depend on (pipe)status.
|
| 86 |
+
set -l prompt (_old_fish_prompt)
|
| 87 |
+
|
| 88 |
+
# Prompt override provided?
|
| 89 |
+
# If not, just prepend the environment name.
|
| 90 |
+
if test -n ''
|
| 91 |
+
printf '(%s) ' ''
|
| 92 |
+
else
|
| 93 |
+
printf '(%s) ' (basename "$VIRTUAL_ENV")
|
| 94 |
+
end
|
| 95 |
+
|
| 96 |
+
string join -- \n $prompt # handle multi-line prompts
|
| 97 |
+
end
|
| 98 |
+
|
| 99 |
+
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
|
| 100 |
+
end
|
venv/bin/activate.nu
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This command prepares the required environment variables
|
| 2 |
+
def-env activate-virtualenv [] {
|
| 3 |
+
def is-string [x] {
|
| 4 |
+
($x | describe) == 'string'
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
def has-env [name: string] {
|
| 8 |
+
$name in (env).name
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
let is_windows = ((sys).host.name | str downcase) == 'windows'
|
| 12 |
+
let virtual_env = '/home/draco/PycharmProjects/customChatBotDemo/venv'
|
| 13 |
+
let bin = 'bin'
|
| 14 |
+
let path_sep = ':'
|
| 15 |
+
let path_name = if $is_windows {
|
| 16 |
+
if (has-env 'Path') {
|
| 17 |
+
'Path'
|
| 18 |
+
} else {
|
| 19 |
+
'PATH'
|
| 20 |
+
}
|
| 21 |
+
} else {
|
| 22 |
+
'PATH'
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
let old_path = (
|
| 26 |
+
if $is_windows {
|
| 27 |
+
if (has-env 'Path') {
|
| 28 |
+
$env.Path
|
| 29 |
+
} else {
|
| 30 |
+
$env.PATH
|
| 31 |
+
}
|
| 32 |
+
} else {
|
| 33 |
+
$env.PATH
|
| 34 |
+
} | if (is-string $in) {
|
| 35 |
+
# if Path/PATH is a string, make it a list
|
| 36 |
+
$in | split row $path_sep | path expand
|
| 37 |
+
} else {
|
| 38 |
+
$in
|
| 39 |
+
}
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
let venv_path = ([$virtual_env $bin] | path join)
|
| 43 |
+
let new_path = ($old_path | prepend $venv_path | str collect $path_sep)
|
| 44 |
+
|
| 45 |
+
# Creating the new prompt for the session
|
| 46 |
+
let virtual_prompt = if ('' == '') {
|
| 47 |
+
$'(char lparen)($virtual_env | path basename)(char rparen) '
|
| 48 |
+
} else {
|
| 49 |
+
'() '
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
# Back up the old prompt builder
|
| 53 |
+
let old_prompt_command = if (has-env 'VIRTUAL_ENV') && (has-env '_OLD_PROMPT_COMMAND') {
|
| 54 |
+
$env._OLD_PROMPT_COMMAND
|
| 55 |
+
} else {
|
| 56 |
+
if (has-env 'PROMPT_COMMAND') {
|
| 57 |
+
$env.PROMPT_COMMAND
|
| 58 |
+
} else {
|
| 59 |
+
''
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
# If there is no default prompt, then only the env is printed in the prompt
|
| 64 |
+
let new_prompt = if (has-env 'PROMPT_COMMAND') {
|
| 65 |
+
if ($old_prompt_command | describe) == 'block' {
|
| 66 |
+
{ $'($virtual_prompt)(do $old_prompt_command)' }
|
| 67 |
+
} else {
|
| 68 |
+
{ $'($virtual_prompt)($old_prompt_command)' }
|
| 69 |
+
}
|
| 70 |
+
} else {
|
| 71 |
+
{ $'($virtual_prompt)' }
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
# Environment variables that will be batched loaded to the virtual env
|
| 75 |
+
let new_env = {
|
| 76 |
+
$path_name : $new_path
|
| 77 |
+
VIRTUAL_ENV : $virtual_env
|
| 78 |
+
_OLD_VIRTUAL_PATH : ($old_path | str collect $path_sep)
|
| 79 |
+
_OLD_PROMPT_COMMAND : $old_prompt_command
|
| 80 |
+
PROMPT_COMMAND : $new_prompt
|
| 81 |
+
VIRTUAL_PROMPT : $virtual_prompt
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# Activate the environment variables
|
| 85 |
+
load-env $new_env
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# Activate the virtualenv
|
| 89 |
+
activate-virtualenv
|
| 90 |
+
|
| 91 |
+
alias pydoc = python -m pydoc
|
| 92 |
+
alias deactivate = source '/home/draco/PycharmProjects/customChatBotDemo/venv/bin/deactivate.nu'
|
venv/bin/activate.ps1
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
$script:THIS_PATH = $myinvocation.mycommand.path
|
| 2 |
+
$script:BASE_DIR = Split-Path (Resolve-Path "$THIS_PATH/..") -Parent
|
| 3 |
+
|
| 4 |
+
function global:deactivate([switch] $NonDestructive) {
|
| 5 |
+
if (Test-Path variable:_OLD_VIRTUAL_PATH) {
|
| 6 |
+
$env:PATH = $variable:_OLD_VIRTUAL_PATH
|
| 7 |
+
Remove-Variable "_OLD_VIRTUAL_PATH" -Scope global
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
if (Test-Path function:_old_virtual_prompt) {
|
| 11 |
+
$function:prompt = $function:_old_virtual_prompt
|
| 12 |
+
Remove-Item function:\_old_virtual_prompt
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
if ($env:VIRTUAL_ENV) {
|
| 16 |
+
Remove-Item env:VIRTUAL_ENV -ErrorAction SilentlyContinue
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
if (!$NonDestructive) {
|
| 20 |
+
# Self destruct!
|
| 21 |
+
Remove-Item function:deactivate
|
| 22 |
+
Remove-Item function:pydoc
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
function global:pydoc {
|
| 27 |
+
python -m pydoc $args
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
# unset irrelevant variables
|
| 31 |
+
deactivate -nondestructive
|
| 32 |
+
|
| 33 |
+
$VIRTUAL_ENV = $BASE_DIR
|
| 34 |
+
$env:VIRTUAL_ENV = $VIRTUAL_ENV
|
| 35 |
+
|
| 36 |
+
New-Variable -Scope global -Name _OLD_VIRTUAL_PATH -Value $env:PATH
|
| 37 |
+
|
| 38 |
+
$env:PATH = "$env:VIRTUAL_ENV/bin:" + $env:PATH
|
| 39 |
+
if (!$env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
| 40 |
+
function global:_old_virtual_prompt {
|
| 41 |
+
""
|
| 42 |
+
}
|
| 43 |
+
$function:_old_virtual_prompt = $function:prompt
|
| 44 |
+
|
| 45 |
+
if ("" -ne "") {
|
| 46 |
+
function global:prompt {
|
| 47 |
+
# Add the custom prefix to the existing prompt
|
| 48 |
+
$previous_prompt_value = & $function:_old_virtual_prompt
|
| 49 |
+
("() " + $previous_prompt_value)
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
else {
|
| 53 |
+
function global:prompt {
|
| 54 |
+
# Add a prefix to the current prompt, but don't discard it.
|
| 55 |
+
$previous_prompt_value = & $function:_old_virtual_prompt
|
| 56 |
+
$new_prompt_value = "($( Split-Path $env:VIRTUAL_ENV -Leaf )) "
|
| 57 |
+
($new_prompt_value + $previous_prompt_value)
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
}
|
venv/bin/activate_this.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Activate virtualenv for current interpreter:
|
| 2 |
+
|
| 3 |
+
Use exec(open(this_file).read(), {'__file__': this_file}).
|
| 4 |
+
|
| 5 |
+
This can be used when you must use an existing Python interpreter, not the virtualenv bin/python.
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
import site
|
| 9 |
+
import sys
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
abs_file = os.path.abspath(__file__)
|
| 13 |
+
except NameError:
|
| 14 |
+
raise AssertionError("You must use exec(open(this_file).read(), {'__file__': this_file}))")
|
| 15 |
+
|
| 16 |
+
bin_dir = os.path.dirname(abs_file)
|
| 17 |
+
base = bin_dir[: -len("bin") - 1] # strip away the bin part from the __file__, plus the path separator
|
| 18 |
+
|
| 19 |
+
# prepend bin to PATH (this file is inside the bin directory)
|
| 20 |
+
os.environ["PATH"] = os.pathsep.join([bin_dir] + os.environ.get("PATH", "").split(os.pathsep))
|
| 21 |
+
os.environ["VIRTUAL_ENV"] = base # virtual env is right above bin directory
|
| 22 |
+
|
| 23 |
+
# add the virtual environments libraries to the host python import mechanism
|
| 24 |
+
prev_length = len(sys.path)
|
| 25 |
+
for lib in "../lib/python3.11/site-packages".split(os.pathsep):
|
| 26 |
+
path = os.path.realpath(os.path.join(bin_dir, lib))
|
| 27 |
+
site.addsitedir(path.decode("utf-8") if "" else path)
|
| 28 |
+
sys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length]
|
| 29 |
+
|
| 30 |
+
sys.real_prefix = sys.prefix
|
| 31 |
+
sys.prefix = base
|
venv/bin/chardetect
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from chardet.cli.chardetect import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/coloredlogs
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from coloredlogs.cli import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/deactivate.nu
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def-env deactivate-virtualenv [] {
|
| 2 |
+
def has-env [name: string] {
|
| 3 |
+
$name in (env).name
|
| 4 |
+
}
|
| 5 |
+
|
| 6 |
+
let is_windows = ((sys).host.name | str downcase) == 'windows'
|
| 7 |
+
|
| 8 |
+
let path_name = if $is_windows {
|
| 9 |
+
if (has-env 'Path') {
|
| 10 |
+
'Path'
|
| 11 |
+
} else {
|
| 12 |
+
'PATH'
|
| 13 |
+
}
|
| 14 |
+
} else {
|
| 15 |
+
'PATH'
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
load-env { $path_name : $env._OLD_VIRTUAL_PATH }
|
| 19 |
+
|
| 20 |
+
let-env PROMPT_COMMAND = $env._OLD_PROMPT_COMMAND
|
| 21 |
+
|
| 22 |
+
# Hiding the environment variables that were created when activating the env
|
| 23 |
+
hide _OLD_VIRTUAL_PATH
|
| 24 |
+
hide _OLD_PROMPT_COMMAND
|
| 25 |
+
hide VIRTUAL_ENV
|
| 26 |
+
hide VIRTUAL_PROMPT
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
deactivate-virtualenv
|
| 30 |
+
|
| 31 |
+
hide pydoc
|
| 32 |
+
hide deactivate
|
venv/bin/dotenv
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from dotenv.__main__ import cli
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(cli())
|
venv/bin/dumppdf.py
ADDED
|
@@ -0,0 +1,473 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
"""Extract pdf structure in XML format"""
|
| 3 |
+
import logging
|
| 4 |
+
import os.path
|
| 5 |
+
import re
|
| 6 |
+
import sys
|
| 7 |
+
from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, Union, cast
|
| 8 |
+
from argparse import ArgumentParser
|
| 9 |
+
|
| 10 |
+
import pdfminer
|
| 11 |
+
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
|
| 12 |
+
from pdfminer.pdfpage import PDFPage
|
| 13 |
+
from pdfminer.pdfparser import PDFParser
|
| 14 |
+
from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
|
| 15 |
+
from pdfminer.pdftypes import PDFStream, PDFObjRef, resolve1, stream_value
|
| 16 |
+
from pdfminer.psparser import PSKeyword, PSLiteral, LIT
|
| 17 |
+
from pdfminer.utils import isnumber
|
| 18 |
+
|
| 19 |
+
logging.basicConfig()
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def escape(s: Union[str, bytes]) -> str:
|
| 26 |
+
if isinstance(s, bytes):
|
| 27 |
+
us = str(s, "latin-1")
|
| 28 |
+
else:
|
| 29 |
+
us = s
|
| 30 |
+
return ESC_PAT.sub(lambda m: "&#%d;" % ord(m.group(0)), us)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None:
|
| 34 |
+
if obj is None:
|
| 35 |
+
out.write("<null />")
|
| 36 |
+
return
|
| 37 |
+
|
| 38 |
+
if isinstance(obj, dict):
|
| 39 |
+
out.write('<dict size="%d">\n' % len(obj))
|
| 40 |
+
for (k, v) in obj.items():
|
| 41 |
+
out.write("<key>%s</key>\n" % k)
|
| 42 |
+
out.write("<value>")
|
| 43 |
+
dumpxml(out, v)
|
| 44 |
+
out.write("</value>\n")
|
| 45 |
+
out.write("</dict>")
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
if isinstance(obj, list):
|
| 49 |
+
out.write('<list size="%d">\n' % len(obj))
|
| 50 |
+
for v in obj:
|
| 51 |
+
dumpxml(out, v)
|
| 52 |
+
out.write("\n")
|
| 53 |
+
out.write("</list>")
|
| 54 |
+
return
|
| 55 |
+
|
| 56 |
+
if isinstance(obj, (str, bytes)):
|
| 57 |
+
out.write('<string size="%d">%s</string>' % (len(obj), escape(obj)))
|
| 58 |
+
return
|
| 59 |
+
|
| 60 |
+
if isinstance(obj, PDFStream):
|
| 61 |
+
if codec == "raw":
|
| 62 |
+
# Bug: writing bytes to text I/O. This will raise TypeError.
|
| 63 |
+
out.write(obj.get_rawdata()) # type: ignore [arg-type]
|
| 64 |
+
elif codec == "binary":
|
| 65 |
+
# Bug: writing bytes to text I/O. This will raise TypeError.
|
| 66 |
+
out.write(obj.get_data()) # type: ignore [arg-type]
|
| 67 |
+
else:
|
| 68 |
+
out.write("<stream>\n<props>\n")
|
| 69 |
+
dumpxml(out, obj.attrs)
|
| 70 |
+
out.write("\n</props>\n")
|
| 71 |
+
if codec == "text":
|
| 72 |
+
data = obj.get_data()
|
| 73 |
+
out.write('<data size="%d">%s</data>\n' % (len(data), escape(data)))
|
| 74 |
+
out.write("</stream>")
|
| 75 |
+
return
|
| 76 |
+
|
| 77 |
+
if isinstance(obj, PDFObjRef):
|
| 78 |
+
out.write('<ref id="%d" />' % obj.objid)
|
| 79 |
+
return
|
| 80 |
+
|
| 81 |
+
if isinstance(obj, PSKeyword):
|
| 82 |
+
# Likely bug: obj.name is bytes, not str
|
| 83 |
+
out.write("<keyword>%s</keyword>" % obj.name) # type: ignore [str-bytes-safe]
|
| 84 |
+
return
|
| 85 |
+
|
| 86 |
+
if isinstance(obj, PSLiteral):
|
| 87 |
+
# Likely bug: obj.name may be bytes, not str
|
| 88 |
+
out.write("<literal>%s</literal>" % obj.name) # type: ignore [str-bytes-safe]
|
| 89 |
+
return
|
| 90 |
+
|
| 91 |
+
if isnumber(obj):
|
| 92 |
+
out.write("<number>%s</number>" % obj)
|
| 93 |
+
return
|
| 94 |
+
|
| 95 |
+
raise TypeError(obj)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def dumptrailers(
|
| 99 |
+
out: TextIO, doc: PDFDocument, show_fallback_xref: bool = False
|
| 100 |
+
) -> None:
|
| 101 |
+
for xref in doc.xrefs:
|
| 102 |
+
if not isinstance(xref, PDFXRefFallback) or show_fallback_xref:
|
| 103 |
+
out.write("<trailer>\n")
|
| 104 |
+
dumpxml(out, xref.get_trailer())
|
| 105 |
+
out.write("\n</trailer>\n\n")
|
| 106 |
+
no_xrefs = all(isinstance(xref, PDFXRefFallback) for xref in doc.xrefs)
|
| 107 |
+
if no_xrefs and not show_fallback_xref:
|
| 108 |
+
msg = (
|
| 109 |
+
"This PDF does not have an xref. Use --show-fallback-xref if "
|
| 110 |
+
"you want to display the content of a fallback xref that "
|
| 111 |
+
"contains all objects."
|
| 112 |
+
)
|
| 113 |
+
logger.warning(msg)
|
| 114 |
+
return
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def dumpallobjs(
|
| 118 |
+
out: TextIO,
|
| 119 |
+
doc: PDFDocument,
|
| 120 |
+
codec: Optional[str] = None,
|
| 121 |
+
show_fallback_xref: bool = False,
|
| 122 |
+
) -> None:
|
| 123 |
+
visited = set()
|
| 124 |
+
out.write("<pdf>")
|
| 125 |
+
for xref in doc.xrefs:
|
| 126 |
+
for objid in xref.get_objids():
|
| 127 |
+
if objid in visited:
|
| 128 |
+
continue
|
| 129 |
+
visited.add(objid)
|
| 130 |
+
try:
|
| 131 |
+
obj = doc.getobj(objid)
|
| 132 |
+
if obj is None:
|
| 133 |
+
continue
|
| 134 |
+
out.write('<object id="%d">\n' % objid)
|
| 135 |
+
dumpxml(out, obj, codec=codec)
|
| 136 |
+
out.write("\n</object>\n\n")
|
| 137 |
+
except PDFObjectNotFound as e:
|
| 138 |
+
print("not found: %r" % e)
|
| 139 |
+
dumptrailers(out, doc, show_fallback_xref)
|
| 140 |
+
out.write("</pdf>")
|
| 141 |
+
return
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def dumpoutline(
|
| 145 |
+
outfp: TextIO,
|
| 146 |
+
fname: str,
|
| 147 |
+
objids: Any,
|
| 148 |
+
pagenos: Container[int],
|
| 149 |
+
password: str = "",
|
| 150 |
+
dumpall: bool = False,
|
| 151 |
+
codec: Optional[str] = None,
|
| 152 |
+
extractdir: Optional[str] = None,
|
| 153 |
+
) -> None:
|
| 154 |
+
fp = open(fname, "rb")
|
| 155 |
+
parser = PDFParser(fp)
|
| 156 |
+
doc = PDFDocument(parser, password)
|
| 157 |
+
pages = {
|
| 158 |
+
page.pageid: pageno
|
| 159 |
+
for (pageno, page) in enumerate(PDFPage.create_pages(doc), 1)
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
def resolve_dest(dest: object) -> Any:
|
| 163 |
+
if isinstance(dest, (str, bytes)):
|
| 164 |
+
dest = resolve1(doc.get_dest(dest))
|
| 165 |
+
elif isinstance(dest, PSLiteral):
|
| 166 |
+
dest = resolve1(doc.get_dest(dest.name))
|
| 167 |
+
if isinstance(dest, dict):
|
| 168 |
+
dest = dest["D"]
|
| 169 |
+
if isinstance(dest, PDFObjRef):
|
| 170 |
+
dest = dest.resolve()
|
| 171 |
+
return dest
|
| 172 |
+
|
| 173 |
+
try:
|
| 174 |
+
outlines = doc.get_outlines()
|
| 175 |
+
outfp.write("<outlines>\n")
|
| 176 |
+
for (level, title, dest, a, se) in outlines:
|
| 177 |
+
pageno = None
|
| 178 |
+
if dest:
|
| 179 |
+
dest = resolve_dest(dest)
|
| 180 |
+
pageno = pages[dest[0].objid]
|
| 181 |
+
elif a:
|
| 182 |
+
action = a
|
| 183 |
+
if isinstance(action, dict):
|
| 184 |
+
subtype = action.get("S")
|
| 185 |
+
if subtype and repr(subtype) == "/'GoTo'" and action.get("D"):
|
| 186 |
+
dest = resolve_dest(action["D"])
|
| 187 |
+
pageno = pages[dest[0].objid]
|
| 188 |
+
s = escape(title)
|
| 189 |
+
outfp.write('<outline level="{!r}" title="{}">\n'.format(level, s))
|
| 190 |
+
if dest is not None:
|
| 191 |
+
outfp.write("<dest>")
|
| 192 |
+
dumpxml(outfp, dest)
|
| 193 |
+
outfp.write("</dest>\n")
|
| 194 |
+
if pageno is not None:
|
| 195 |
+
outfp.write("<pageno>%r</pageno>\n" % pageno)
|
| 196 |
+
outfp.write("</outline>\n")
|
| 197 |
+
outfp.write("</outlines>\n")
|
| 198 |
+
except PDFNoOutlines:
|
| 199 |
+
pass
|
| 200 |
+
parser.close()
|
| 201 |
+
fp.close()
|
| 202 |
+
return
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
LITERAL_FILESPEC = LIT("Filespec")
|
| 206 |
+
LITERAL_EMBEDDEDFILE = LIT("EmbeddedFile")
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def extractembedded(fname: str, password: str, extractdir: str) -> None:
|
| 210 |
+
def extract1(objid: int, obj: Dict[str, Any]) -> None:
|
| 211 |
+
filename = os.path.basename(obj.get("UF") or cast(bytes, obj.get("F")).decode())
|
| 212 |
+
fileref = obj["EF"].get("UF") or obj["EF"].get("F")
|
| 213 |
+
fileobj = doc.getobj(fileref.objid)
|
| 214 |
+
if not isinstance(fileobj, PDFStream):
|
| 215 |
+
error_msg = (
|
| 216 |
+
"unable to process PDF: reference for %r is not a "
|
| 217 |
+
"PDFStream" % filename
|
| 218 |
+
)
|
| 219 |
+
raise PDFValueError(error_msg)
|
| 220 |
+
if fileobj.get("Type") is not LITERAL_EMBEDDEDFILE:
|
| 221 |
+
raise PDFValueError(
|
| 222 |
+
"unable to process PDF: reference for %r "
|
| 223 |
+
"is not an EmbeddedFile" % (filename)
|
| 224 |
+
)
|
| 225 |
+
path = os.path.join(extractdir, "%.6d-%s" % (objid, filename))
|
| 226 |
+
if os.path.exists(path):
|
| 227 |
+
raise IOError("file exists: %r" % path)
|
| 228 |
+
print("extracting: %r" % path)
|
| 229 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
| 230 |
+
out = open(path, "wb")
|
| 231 |
+
out.write(fileobj.get_data())
|
| 232 |
+
out.close()
|
| 233 |
+
return
|
| 234 |
+
|
| 235 |
+
with open(fname, "rb") as fp:
|
| 236 |
+
parser = PDFParser(fp)
|
| 237 |
+
doc = PDFDocument(parser, password)
|
| 238 |
+
extracted_objids = set()
|
| 239 |
+
for xref in doc.xrefs:
|
| 240 |
+
for objid in xref.get_objids():
|
| 241 |
+
obj = doc.getobj(objid)
|
| 242 |
+
if (
|
| 243 |
+
objid not in extracted_objids
|
| 244 |
+
and isinstance(obj, dict)
|
| 245 |
+
and obj.get("Type") is LITERAL_FILESPEC
|
| 246 |
+
):
|
| 247 |
+
extracted_objids.add(objid)
|
| 248 |
+
extract1(objid, obj)
|
| 249 |
+
return
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def dumppdf(
|
| 253 |
+
outfp: TextIO,
|
| 254 |
+
fname: str,
|
| 255 |
+
objids: Iterable[int],
|
| 256 |
+
pagenos: Container[int],
|
| 257 |
+
password: str = "",
|
| 258 |
+
dumpall: bool = False,
|
| 259 |
+
codec: Optional[str] = None,
|
| 260 |
+
extractdir: Optional[str] = None,
|
| 261 |
+
show_fallback_xref: bool = False,
|
| 262 |
+
) -> None:
|
| 263 |
+
fp = open(fname, "rb")
|
| 264 |
+
parser = PDFParser(fp)
|
| 265 |
+
doc = PDFDocument(parser, password)
|
| 266 |
+
if objids:
|
| 267 |
+
for objid in objids:
|
| 268 |
+
obj = doc.getobj(objid)
|
| 269 |
+
dumpxml(outfp, obj, codec=codec)
|
| 270 |
+
if pagenos:
|
| 271 |
+
for (pageno, page) in enumerate(PDFPage.create_pages(doc)):
|
| 272 |
+
if pageno in pagenos:
|
| 273 |
+
if codec:
|
| 274 |
+
for obj in page.contents:
|
| 275 |
+
obj = stream_value(obj)
|
| 276 |
+
dumpxml(outfp, obj, codec=codec)
|
| 277 |
+
else:
|
| 278 |
+
dumpxml(outfp, page.attrs)
|
| 279 |
+
if dumpall:
|
| 280 |
+
dumpallobjs(outfp, doc, codec, show_fallback_xref)
|
| 281 |
+
if (not objids) and (not pagenos) and (not dumpall):
|
| 282 |
+
dumptrailers(outfp, doc, show_fallback_xref)
|
| 283 |
+
fp.close()
|
| 284 |
+
if codec not in ("raw", "binary"):
|
| 285 |
+
outfp.write("\n")
|
| 286 |
+
return
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
def create_parser() -> ArgumentParser:
|
| 290 |
+
parser = ArgumentParser(description=__doc__, add_help=True)
|
| 291 |
+
parser.add_argument(
|
| 292 |
+
"files",
|
| 293 |
+
type=str,
|
| 294 |
+
default=None,
|
| 295 |
+
nargs="+",
|
| 296 |
+
help="One or more paths to PDF files.",
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
parser.add_argument(
|
| 300 |
+
"--version",
|
| 301 |
+
"-v",
|
| 302 |
+
action="version",
|
| 303 |
+
version="pdfminer.six v{}".format(pdfminer.__version__),
|
| 304 |
+
)
|
| 305 |
+
parser.add_argument(
|
| 306 |
+
"--debug",
|
| 307 |
+
"-d",
|
| 308 |
+
default=False,
|
| 309 |
+
action="store_true",
|
| 310 |
+
help="Use debug logging level.",
|
| 311 |
+
)
|
| 312 |
+
procedure_parser = parser.add_mutually_exclusive_group()
|
| 313 |
+
procedure_parser.add_argument(
|
| 314 |
+
"--extract-toc",
|
| 315 |
+
"-T",
|
| 316 |
+
default=False,
|
| 317 |
+
action="store_true",
|
| 318 |
+
help="Extract structure of outline",
|
| 319 |
+
)
|
| 320 |
+
procedure_parser.add_argument(
|
| 321 |
+
"--extract-embedded", "-E", type=str, help="Extract embedded files"
|
| 322 |
+
)
|
| 323 |
+
|
| 324 |
+
parse_params = parser.add_argument_group(
|
| 325 |
+
"Parser", description="Used during PDF parsing"
|
| 326 |
+
)
|
| 327 |
+
parse_params.add_argument(
|
| 328 |
+
"--page-numbers",
|
| 329 |
+
type=int,
|
| 330 |
+
default=None,
|
| 331 |
+
nargs="+",
|
| 332 |
+
help="A space-seperated list of page numbers to parse.",
|
| 333 |
+
)
|
| 334 |
+
parse_params.add_argument(
|
| 335 |
+
"--pagenos",
|
| 336 |
+
"-p",
|
| 337 |
+
type=str,
|
| 338 |
+
help="A comma-separated list of page numbers to parse. Included for "
|
| 339 |
+
"legacy applications, use --page-numbers for more idiomatic "
|
| 340 |
+
"argument entry.",
|
| 341 |
+
)
|
| 342 |
+
parse_params.add_argument(
|
| 343 |
+
"--objects",
|
| 344 |
+
"-i",
|
| 345 |
+
type=str,
|
| 346 |
+
help="Comma separated list of object numbers to extract",
|
| 347 |
+
)
|
| 348 |
+
parse_params.add_argument(
|
| 349 |
+
"--all",
|
| 350 |
+
"-a",
|
| 351 |
+
default=False,
|
| 352 |
+
action="store_true",
|
| 353 |
+
help="If the structure of all objects should be extracted",
|
| 354 |
+
)
|
| 355 |
+
parse_params.add_argument(
|
| 356 |
+
"--show-fallback-xref",
|
| 357 |
+
action="store_true",
|
| 358 |
+
help="Additionally show the fallback xref. Use this if the PDF "
|
| 359 |
+
"has zero or only invalid xref's. This setting is ignored if "
|
| 360 |
+
"--extract-toc or --extract-embedded is used.",
|
| 361 |
+
)
|
| 362 |
+
parse_params.add_argument(
|
| 363 |
+
"--password",
|
| 364 |
+
"-P",
|
| 365 |
+
type=str,
|
| 366 |
+
default="",
|
| 367 |
+
help="The password to use for decrypting PDF file.",
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
output_params = parser.add_argument_group(
|
| 371 |
+
"Output", description="Used during output generation."
|
| 372 |
+
)
|
| 373 |
+
output_params.add_argument(
|
| 374 |
+
"--outfile",
|
| 375 |
+
"-o",
|
| 376 |
+
type=str,
|
| 377 |
+
default="-",
|
| 378 |
+
help='Path to file where output is written. Or "-" (default) to '
|
| 379 |
+
"write to stdout.",
|
| 380 |
+
)
|
| 381 |
+
codec_parser = output_params.add_mutually_exclusive_group()
|
| 382 |
+
codec_parser.add_argument(
|
| 383 |
+
"--raw-stream",
|
| 384 |
+
"-r",
|
| 385 |
+
default=False,
|
| 386 |
+
action="store_true",
|
| 387 |
+
help="Write stream objects without encoding",
|
| 388 |
+
)
|
| 389 |
+
codec_parser.add_argument(
|
| 390 |
+
"--binary-stream",
|
| 391 |
+
"-b",
|
| 392 |
+
default=False,
|
| 393 |
+
action="store_true",
|
| 394 |
+
help="Write stream objects with binary encoding",
|
| 395 |
+
)
|
| 396 |
+
codec_parser.add_argument(
|
| 397 |
+
"--text-stream",
|
| 398 |
+
"-t",
|
| 399 |
+
default=False,
|
| 400 |
+
action="store_true",
|
| 401 |
+
help="Write stream objects as plain text",
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
return parser
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
def main(argv: Optional[List[str]] = None) -> None:
|
| 408 |
+
parser = create_parser()
|
| 409 |
+
args = parser.parse_args(args=argv)
|
| 410 |
+
|
| 411 |
+
if args.debug:
|
| 412 |
+
logging.getLogger().setLevel(logging.DEBUG)
|
| 413 |
+
|
| 414 |
+
if args.outfile == "-":
|
| 415 |
+
outfp = sys.stdout
|
| 416 |
+
else:
|
| 417 |
+
outfp = open(args.outfile, "w")
|
| 418 |
+
|
| 419 |
+
if args.objects:
|
| 420 |
+
objids = [int(x) for x in args.objects.split(",")]
|
| 421 |
+
else:
|
| 422 |
+
objids = []
|
| 423 |
+
|
| 424 |
+
if args.page_numbers:
|
| 425 |
+
pagenos = {x - 1 for x in args.page_numbers}
|
| 426 |
+
elif args.pagenos:
|
| 427 |
+
pagenos = {int(x) - 1 for x in args.pagenos.split(",")}
|
| 428 |
+
else:
|
| 429 |
+
pagenos = set()
|
| 430 |
+
|
| 431 |
+
password = args.password
|
| 432 |
+
|
| 433 |
+
if args.raw_stream:
|
| 434 |
+
codec: Optional[str] = "raw"
|
| 435 |
+
elif args.binary_stream:
|
| 436 |
+
codec = "binary"
|
| 437 |
+
elif args.text_stream:
|
| 438 |
+
codec = "text"
|
| 439 |
+
else:
|
| 440 |
+
codec = None
|
| 441 |
+
|
| 442 |
+
for fname in args.files:
|
| 443 |
+
if args.extract_toc:
|
| 444 |
+
dumpoutline(
|
| 445 |
+
outfp,
|
| 446 |
+
fname,
|
| 447 |
+
objids,
|
| 448 |
+
pagenos,
|
| 449 |
+
password=password,
|
| 450 |
+
dumpall=args.all,
|
| 451 |
+
codec=codec,
|
| 452 |
+
extractdir=None,
|
| 453 |
+
)
|
| 454 |
+
elif args.extract_embedded:
|
| 455 |
+
extractembedded(fname, password=password, extractdir=args.extract_embedded)
|
| 456 |
+
else:
|
| 457 |
+
dumppdf(
|
| 458 |
+
outfp,
|
| 459 |
+
fname,
|
| 460 |
+
objids,
|
| 461 |
+
pagenos,
|
| 462 |
+
password=password,
|
| 463 |
+
dumpall=args.all,
|
| 464 |
+
codec=codec,
|
| 465 |
+
extractdir=None,
|
| 466 |
+
show_fallback_xref=args.show_fallback_xref,
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
outfp.close()
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
if __name__ == "__main__":
|
| 473 |
+
main()
|
venv/bin/f2py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from numpy.f2py.f2py2e import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/f2py3
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from numpy.f2py.f2py2e import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/f2py3.11
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from numpy.f2py.f2py2e import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/filetype
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from filetype.__main__ import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/fonttools
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from fontTools.__main__ import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/gradio
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from gradio.cli import cli
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(cli())
|
venv/bin/httpx
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from httpx import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/huggingface-cli
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from huggingface_hub.commands.huggingface_cli import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/humanfriendly
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from humanfriendly.cli import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/ipython
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from IPython import start_ipython
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(start_ipython())
|
venv/bin/ipython3
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from IPython import start_ipython
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(start_ipython())
|
venv/bin/isympy
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from isympy import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/jsonschema
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from jsonschema.cli import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/langchain-server
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from langchain.server import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/langsmith
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from langsmith.cli.main import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/markdown-it
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from markdown_it.cli.parse import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/nltk
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from nltk.cli import cli
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(cli())
|
venv/bin/normalizer
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from charset_normalizer.cli.normalizer import cli_detect
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(cli_detect())
|
venv/bin/onnxruntime_test
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from onnxruntime.tools.onnxruntime_test import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/openai
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from openai._openai_scripts import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/pai
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
from pai.__main__ import main
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 8 |
+
sys.exit(main())
|
venv/bin/pdf2txt.py
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/home/draco/PycharmProjects/customChatBotDemo/venv/bin/python
|
| 2 |
+
"""A command line tool for extracting text and images from PDF and
|
| 3 |
+
output it to plain text, html, xml or tags."""
|
| 4 |
+
import argparse
|
| 5 |
+
import logging
|
| 6 |
+
import sys
|
| 7 |
+
from typing import Any, Container, Iterable, List, Optional
|
| 8 |
+
|
| 9 |
+
import pdfminer.high_level
|
| 10 |
+
from pdfminer.layout import LAParams
|
| 11 |
+
from pdfminer.utils import AnyIO
|
| 12 |
+
|
| 13 |
+
logging.basicConfig()
|
| 14 |
+
|
| 15 |
+
OUTPUT_TYPES = ((".htm", "html"), (".html", "html"), (".xml", "xml"), (".tag", "tag"))
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def float_or_disabled(x: str) -> Optional[float]:
|
| 19 |
+
if x.lower().strip() == "disabled":
|
| 20 |
+
return None
|
| 21 |
+
try:
|
| 22 |
+
return float(x)
|
| 23 |
+
except ValueError:
|
| 24 |
+
raise argparse.ArgumentTypeError("invalid float value: {}".format(x))
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def extract_text(
|
| 28 |
+
files: Iterable[str] = [],
|
| 29 |
+
outfile: str = "-",
|
| 30 |
+
laparams: Optional[LAParams] = None,
|
| 31 |
+
output_type: str = "text",
|
| 32 |
+
codec: str = "utf-8",
|
| 33 |
+
strip_control: bool = False,
|
| 34 |
+
maxpages: int = 0,
|
| 35 |
+
page_numbers: Optional[Container[int]] = None,
|
| 36 |
+
password: str = "",
|
| 37 |
+
scale: float = 1.0,
|
| 38 |
+
rotation: int = 0,
|
| 39 |
+
layoutmode: str = "normal",
|
| 40 |
+
output_dir: Optional[str] = None,
|
| 41 |
+
debug: bool = False,
|
| 42 |
+
disable_caching: bool = False,
|
| 43 |
+
**kwargs: Any
|
| 44 |
+
) -> AnyIO:
|
| 45 |
+
if not files:
|
| 46 |
+
raise ValueError("Must provide files to work upon!")
|
| 47 |
+
|
| 48 |
+
if output_type == "text" and outfile != "-":
|
| 49 |
+
for override, alttype in OUTPUT_TYPES:
|
| 50 |
+
if outfile.endswith(override):
|
| 51 |
+
output_type = alttype
|
| 52 |
+
|
| 53 |
+
if outfile == "-":
|
| 54 |
+
outfp: AnyIO = sys.stdout
|
| 55 |
+
if sys.stdout.encoding is not None:
|
| 56 |
+
codec = "utf-8"
|
| 57 |
+
else:
|
| 58 |
+
outfp = open(outfile, "wb")
|
| 59 |
+
|
| 60 |
+
for fname in files:
|
| 61 |
+
with open(fname, "rb") as fp:
|
| 62 |
+
pdfminer.high_level.extract_text_to_fp(fp, **locals())
|
| 63 |
+
return outfp
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def create_parser() -> argparse.ArgumentParser:
|
| 67 |
+
parser = argparse.ArgumentParser(description=__doc__, add_help=True)
|
| 68 |
+
parser.add_argument(
|
| 69 |
+
"files",
|
| 70 |
+
type=str,
|
| 71 |
+
default=None,
|
| 72 |
+
nargs="+",
|
| 73 |
+
help="One or more paths to PDF files.",
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
parser.add_argument(
|
| 77 |
+
"--version",
|
| 78 |
+
"-v",
|
| 79 |
+
action="version",
|
| 80 |
+
version="pdfminer.six v{}".format(pdfminer.__version__),
|
| 81 |
+
)
|
| 82 |
+
parser.add_argument(
|
| 83 |
+
"--debug",
|
| 84 |
+
"-d",
|
| 85 |
+
default=False,
|
| 86 |
+
action="store_true",
|
| 87 |
+
help="Use debug logging level.",
|
| 88 |
+
)
|
| 89 |
+
parser.add_argument(
|
| 90 |
+
"--disable-caching",
|
| 91 |
+
"-C",
|
| 92 |
+
default=False,
|
| 93 |
+
action="store_true",
|
| 94 |
+
help="If caching or resources, such as fonts, should be disabled.",
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
parse_params = parser.add_argument_group(
|
| 98 |
+
"Parser", description="Used during PDF parsing"
|
| 99 |
+
)
|
| 100 |
+
parse_params.add_argument(
|
| 101 |
+
"--page-numbers",
|
| 102 |
+
type=int,
|
| 103 |
+
default=None,
|
| 104 |
+
nargs="+",
|
| 105 |
+
help="A space-seperated list of page numbers to parse.",
|
| 106 |
+
)
|
| 107 |
+
parse_params.add_argument(
|
| 108 |
+
"--pagenos",
|
| 109 |
+
"-p",
|
| 110 |
+
type=str,
|
| 111 |
+
help="A comma-separated list of page numbers to parse. "
|
| 112 |
+
"Included for legacy applications, use --page-numbers "
|
| 113 |
+
"for more idiomatic argument entry.",
|
| 114 |
+
)
|
| 115 |
+
parse_params.add_argument(
|
| 116 |
+
"--maxpages",
|
| 117 |
+
"-m",
|
| 118 |
+
type=int,
|
| 119 |
+
default=0,
|
| 120 |
+
help="The maximum number of pages to parse.",
|
| 121 |
+
)
|
| 122 |
+
parse_params.add_argument(
|
| 123 |
+
"--password",
|
| 124 |
+
"-P",
|
| 125 |
+
type=str,
|
| 126 |
+
default="",
|
| 127 |
+
help="The password to use for decrypting PDF file.",
|
| 128 |
+
)
|
| 129 |
+
parse_params.add_argument(
|
| 130 |
+
"--rotation",
|
| 131 |
+
"-R",
|
| 132 |
+
default=0,
|
| 133 |
+
type=int,
|
| 134 |
+
help="The number of degrees to rotate the PDF "
|
| 135 |
+
"before other types of processing.",
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
la_params = LAParams() # will be used for defaults
|
| 139 |
+
la_param_group = parser.add_argument_group(
|
| 140 |
+
"Layout analysis", description="Used during layout analysis."
|
| 141 |
+
)
|
| 142 |
+
la_param_group.add_argument(
|
| 143 |
+
"--no-laparams",
|
| 144 |
+
"-n",
|
| 145 |
+
default=False,
|
| 146 |
+
action="store_true",
|
| 147 |
+
help="If layout analysis parameters should be ignored.",
|
| 148 |
+
)
|
| 149 |
+
la_param_group.add_argument(
|
| 150 |
+
"--detect-vertical",
|
| 151 |
+
"-V",
|
| 152 |
+
default=la_params.detect_vertical,
|
| 153 |
+
action="store_true",
|
| 154 |
+
help="If vertical text should be considered during layout analysis",
|
| 155 |
+
)
|
| 156 |
+
la_param_group.add_argument(
|
| 157 |
+
"--line-overlap",
|
| 158 |
+
type=float,
|
| 159 |
+
default=la_params.line_overlap,
|
| 160 |
+
help="If two characters have more overlap than this they "
|
| 161 |
+
"are considered to be on the same line. The overlap is specified "
|
| 162 |
+
"relative to the minimum height of both characters.",
|
| 163 |
+
)
|
| 164 |
+
la_param_group.add_argument(
|
| 165 |
+
"--char-margin",
|
| 166 |
+
"-M",
|
| 167 |
+
type=float,
|
| 168 |
+
default=la_params.char_margin,
|
| 169 |
+
help="If two characters are closer together than this margin they "
|
| 170 |
+
"are considered to be part of the same line. The margin is "
|
| 171 |
+
"specified relative to the width of the character.",
|
| 172 |
+
)
|
| 173 |
+
la_param_group.add_argument(
|
| 174 |
+
"--word-margin",
|
| 175 |
+
"-W",
|
| 176 |
+
type=float,
|
| 177 |
+
default=la_params.word_margin,
|
| 178 |
+
help="If two characters on the same line are further apart than this "
|
| 179 |
+
"margin then they are considered to be two separate words, and "
|
| 180 |
+
"an intermediate space will be added for readability. The margin "
|
| 181 |
+
"is specified relative to the width of the character.",
|
| 182 |
+
)
|
| 183 |
+
la_param_group.add_argument(
|
| 184 |
+
"--line-margin",
|
| 185 |
+
"-L",
|
| 186 |
+
type=float,
|
| 187 |
+
default=la_params.line_margin,
|
| 188 |
+
help="If two lines are close together they are considered to "
|
| 189 |
+
"be part of the same paragraph. The margin is specified "
|
| 190 |
+
"relative to the height of a line.",
|
| 191 |
+
)
|
| 192 |
+
la_param_group.add_argument(
|
| 193 |
+
"--boxes-flow",
|
| 194 |
+
"-F",
|
| 195 |
+
type=float_or_disabled,
|
| 196 |
+
default=la_params.boxes_flow,
|
| 197 |
+
help="Specifies how much a horizontal and vertical position of a "
|
| 198 |
+
"text matters when determining the order of lines. The value "
|
| 199 |
+
"should be within the range of -1.0 (only horizontal position "
|
| 200 |
+
"matters) to +1.0 (only vertical position matters). You can also "
|
| 201 |
+
"pass `disabled` to disable advanced layout analysis, and "
|
| 202 |
+
"instead return text based on the position of the bottom left "
|
| 203 |
+
"corner of the text box.",
|
| 204 |
+
)
|
| 205 |
+
la_param_group.add_argument(
|
| 206 |
+
"--all-texts",
|
| 207 |
+
"-A",
|
| 208 |
+
default=la_params.all_texts,
|
| 209 |
+
action="store_true",
|
| 210 |
+
help="If layout analysis should be performed on text in figures.",
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
output_params = parser.add_argument_group(
|
| 214 |
+
"Output", description="Used during output generation."
|
| 215 |
+
)
|
| 216 |
+
output_params.add_argument(
|
| 217 |
+
"--outfile",
|
| 218 |
+
"-o",
|
| 219 |
+
type=str,
|
| 220 |
+
default="-",
|
| 221 |
+
help="Path to file where output is written. "
|
| 222 |
+
'Or "-" (default) to write to stdout.',
|
| 223 |
+
)
|
| 224 |
+
output_params.add_argument(
|
| 225 |
+
"--output_type",
|
| 226 |
+
"-t",
|
| 227 |
+
type=str,
|
| 228 |
+
default="text",
|
| 229 |
+
help="Type of output to generate {text,html,xml,tag}.",
|
| 230 |
+
)
|
| 231 |
+
output_params.add_argument(
|
| 232 |
+
"--codec",
|
| 233 |
+
"-c",
|
| 234 |
+
type=str,
|
| 235 |
+
default="utf-8",
|
| 236 |
+
help="Text encoding to use in output file.",
|
| 237 |
+
)
|
| 238 |
+
output_params.add_argument(
|
| 239 |
+
"--output-dir",
|
| 240 |
+
"-O",
|
| 241 |
+
default=None,
|
| 242 |
+
help="The output directory to put extracted images in. If not given, "
|
| 243 |
+
"images are not extracted.",
|
| 244 |
+
)
|
| 245 |
+
output_params.add_argument(
|
| 246 |
+
"--layoutmode",
|
| 247 |
+
"-Y",
|
| 248 |
+
default="normal",
|
| 249 |
+
type=str,
|
| 250 |
+
help="Type of layout to use when generating html "
|
| 251 |
+
"{normal,exact,loose}. If normal,each line is"
|
| 252 |
+
" positioned separately in the html. If exact"
|
| 253 |
+
", each character is positioned separately in"
|
| 254 |
+
" the html. If loose, same result as normal "
|
| 255 |
+
"but with an additional newline after each "
|
| 256 |
+
"text line. Only used when output_type is html.",
|
| 257 |
+
)
|
| 258 |
+
output_params.add_argument(
|
| 259 |
+
"--scale",
|
| 260 |
+
"-s",
|
| 261 |
+
type=float,
|
| 262 |
+
default=1.0,
|
| 263 |
+
help="The amount of zoom to use when generating html file. "
|
| 264 |
+
"Only used when output_type is html.",
|
| 265 |
+
)
|
| 266 |
+
output_params.add_argument(
|
| 267 |
+
"--strip-control",
|
| 268 |
+
"-S",
|
| 269 |
+
default=False,
|
| 270 |
+
action="store_true",
|
| 271 |
+
help="Remove control statement from text. "
|
| 272 |
+
"Only used when output_type is xml.",
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
return parser
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
|
| 279 |
+
parsed_args = create_parser().parse_args(args=args)
|
| 280 |
+
|
| 281 |
+
# Propagate parsed layout parameters to LAParams object
|
| 282 |
+
if parsed_args.no_laparams:
|
| 283 |
+
parsed_args.laparams = None
|
| 284 |
+
else:
|
| 285 |
+
parsed_args.laparams = LAParams(
|
| 286 |
+
line_overlap=parsed_args.line_overlap,
|
| 287 |
+
char_margin=parsed_args.char_margin,
|
| 288 |
+
line_margin=parsed_args.line_margin,
|
| 289 |
+
word_margin=parsed_args.word_margin,
|
| 290 |
+
boxes_flow=parsed_args.boxes_flow,
|
| 291 |
+
detect_vertical=parsed_args.detect_vertical,
|
| 292 |
+
all_texts=parsed_args.all_texts,
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
if parsed_args.page_numbers:
|
| 296 |
+
parsed_args.page_numbers = {x - 1 for x in parsed_args.page_numbers}
|
| 297 |
+
|
| 298 |
+
if parsed_args.pagenos:
|
| 299 |
+
parsed_args.page_numbers = {int(x) - 1 for x in parsed_args.pagenos.split(",")}
|
| 300 |
+
|
| 301 |
+
if parsed_args.output_type == "text" and parsed_args.outfile != "-":
|
| 302 |
+
for override, alttype in OUTPUT_TYPES:
|
| 303 |
+
if parsed_args.outfile.endswith(override):
|
| 304 |
+
parsed_args.output_type = alttype
|
| 305 |
+
|
| 306 |
+
return parsed_args
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def main(args: Optional[List[str]] = None) -> int:
|
| 310 |
+
parsed_args = parse_args(args)
|
| 311 |
+
outfp = extract_text(**vars(parsed_args))
|
| 312 |
+
outfp.close()
|
| 313 |
+
return 0
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
if __name__ == "__main__":
|
| 317 |
+
sys.exit(main())
|