Spaces:
Sleeping
Sleeping
Commit
·
dd1baa0
1
Parent(s):
0d57a92
changes
Browse files- .gitignore +2 -1
- Dockerfile +10 -1
- __pycache__/app.cpython-312.pyc +0 -0
- app.py +32 -18
- download.py +0 -1
.gitignore
CHANGED
|
@@ -1,2 +1,3 @@
|
|
| 1 |
qwen2.5-1.5b-instruct-q4_k_m.gguf
|
| 2 |
-
qwen2.5-1.5b-instruct-q5_k_m.gguf
|
|
|
|
|
|
| 1 |
qwen2.5-1.5b-instruct-q4_k_m.gguf
|
| 2 |
+
qwen2.5-1.5b-instruct-q5_k_m.gguf
|
| 3 |
+
*.gguf
|
Dockerfile
CHANGED
|
@@ -10,6 +10,15 @@ WORKDIR /app
|
|
| 10 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 11 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 12 |
|
| 13 |
-
RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
COPY --chown=user . /app
|
| 15 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 10 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 11 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 12 |
|
| 13 |
+
RUN python -c "\
|
| 14 |
+
from huggingface_hub import hf_hub_download; \
|
| 15 |
+
hf_hub_download(\
|
| 16 |
+
repo_id='Qwen/Qwen2.5-1.5B-Instruct-GGUF', \
|
| 17 |
+
filename='qwen2.5-1.5b-instruct-q4_k_m.gguf', \
|
| 18 |
+
local_dir='.', \
|
| 19 |
+
local_dir_use_symlinks=False, \
|
| 20 |
+
token='$HF_TOKEN'\
|
| 21 |
+
)"
|
| 22 |
+
|
| 23 |
COPY --chown=user . /app
|
| 24 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
__pycache__/app.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ
|
|
|
app.py
CHANGED
|
@@ -20,6 +20,10 @@ from langchain_core.tools import tool
|
|
| 20 |
from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
|
| 21 |
from langchain_core.callbacks.base import BaseCallbackHandler
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
# ------------------------ Model Inference Wrapper ------------------------
|
| 24 |
|
| 25 |
class ChatQwen:
|
|
@@ -57,7 +61,7 @@ class ChatQwen:
|
|
| 57 |
model_path=model_path,
|
| 58 |
temperature=self.temperature,
|
| 59 |
# n_ctx=512,
|
| 60 |
-
n_ctx=
|
| 61 |
n_threads=4, # Adjust as needed
|
| 62 |
batch_size=512,
|
| 63 |
verbose=False,
|
|
@@ -406,8 +410,8 @@ def generate_response(state: dict) -> dict:
|
|
| 406 |
messages.append({"role": "system", "content": "When responding, use only the provided property details."})
|
| 407 |
|
| 408 |
# Add conversation history
|
| 409 |
-
# Truncate conversation history (last
|
| 410 |
-
truncated_history = state.get("messages", [])[-
|
| 411 |
for msg in truncated_history:
|
| 412 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 413 |
|
|
@@ -689,7 +693,7 @@ def stream_query(query: str, connection_id: str, loop):
|
|
| 689 |
# Always update current_properties from final state
|
| 690 |
conv_manager.current_properties = final_state.get("current_properties", [])
|
| 691 |
# Keep conversation history bounded
|
| 692 |
-
conv_manager.conversation_history = conv_manager.conversation_history[-
|
| 693 |
|
| 694 |
except Exception as e:
|
| 695 |
error_msg = f"Error processing query: {str(e)}"
|
|
@@ -730,20 +734,30 @@ async def post_query(query: str):
|
|
| 730 |
return {"response": response}
|
| 731 |
|
| 732 |
|
| 733 |
-
@app.get("/setup")
|
| 734 |
-
async def setup():
|
| 735 |
-
import os
|
| 736 |
-
from huggingface_hub import hf_hub_download
|
| 737 |
-
repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF"
|
| 738 |
-
filename = "qwen2.5-1.5b-instruct-q4_k_m.gguf"
|
| 739 |
-
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 740 |
-
model_path = hf_hub_download(
|
| 741 |
-
repo_id=repo_id,
|
| 742 |
-
filename=filename,
|
| 743 |
-
local_dir=script_dir,
|
| 744 |
-
local_dir_use_symlinks=False,
|
| 745 |
-
)
|
| 746 |
-
return model_path
|
| 747 |
|
| 748 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 749 |
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
|
| 21 |
from langchain_core.callbacks.base import BaseCallbackHandler
|
| 22 |
|
| 23 |
+
import os
|
| 24 |
+
from fastapi.responses import PlainTextResponse
|
| 25 |
+
from fastapi import FastAPI, Request
|
| 26 |
+
from fastapi.staticfiles import StaticFiles
|
| 27 |
# ------------------------ Model Inference Wrapper ------------------------
|
| 28 |
|
| 29 |
class ChatQwen:
|
|
|
|
| 61 |
model_path=model_path,
|
| 62 |
temperature=self.temperature,
|
| 63 |
# n_ctx=512,
|
| 64 |
+
n_ctx=8192,
|
| 65 |
n_threads=4, # Adjust as needed
|
| 66 |
batch_size=512,
|
| 67 |
verbose=False,
|
|
|
|
| 410 |
messages.append({"role": "system", "content": "When responding, use only the provided property details."})
|
| 411 |
|
| 412 |
# Add conversation history
|
| 413 |
+
# Truncate conversation history (last 4 exchanges)
|
| 414 |
+
truncated_history = state.get("messages", [])[-8:] # Last 4 user+assistant pairs
|
| 415 |
for msg in truncated_history:
|
| 416 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 417 |
|
|
|
|
| 693 |
# Always update current_properties from final state
|
| 694 |
conv_manager.current_properties = final_state.get("current_properties", [])
|
| 695 |
# Keep conversation history bounded
|
| 696 |
+
conv_manager.conversation_history = conv_manager.conversation_history[-12:] # Last 6 exchanges
|
| 697 |
|
| 698 |
except Exception as e:
|
| 699 |
error_msg = f"Error processing query: {str(e)}"
|
|
|
|
| 734 |
return {"response": response}
|
| 735 |
|
| 736 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 737 |
|
| 738 |
|
| 739 |
+
model_url = "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf"
|
| 740 |
+
async def async_download():
|
| 741 |
+
import aiohttp
|
| 742 |
+
async with aiohttp.ClientSession() as session:
|
| 743 |
+
async with session.get(model_url) as response:
|
| 744 |
+
with open(model_path, "wb") as f:
|
| 745 |
+
while True:
|
| 746 |
+
chunk = await response.content.read(1024)
|
| 747 |
+
if not chunk:
|
| 748 |
+
break
|
| 749 |
+
f.write(chunk)
|
| 750 |
+
|
| 751 |
+
@app.middleware("http")
|
| 752 |
+
async def check_model_middleware(request: Request, call_next):
|
| 753 |
+
if not os.path.exists(model_path):
|
| 754 |
+
await async_download()
|
| 755 |
+
print("successfully downloaded")
|
| 756 |
+
else:
|
| 757 |
+
print("already downloaded")
|
| 758 |
+
return await call_next(request)
|
| 759 |
+
|
| 760 |
|
| 761 |
+
@app.get("/")
|
| 762 |
+
async def home():
|
| 763 |
+
return PlainTextResponse("Space is running. Model ready!")
|
download.py
CHANGED
|
@@ -8,7 +8,6 @@ model_path = hf_hub_download(
|
|
| 8 |
repo_id=repo_id,
|
| 9 |
filename=filename,
|
| 10 |
local_dir=script_dir,
|
| 11 |
-
local_dir_use_symlinks=False, # optional: don't use symlinks
|
| 12 |
)
|
| 13 |
|
| 14 |
print(f"Model downloaded to: {model_path}")
|
|
|
|
| 8 |
repo_id=repo_id,
|
| 9 |
filename=filename,
|
| 10 |
local_dir=script_dir,
|
|
|
|
| 11 |
)
|
| 12 |
|
| 13 |
print(f"Model downloaded to: {model_path}")
|