Spaces:
Sleeping
Sleeping
Commit
·
0d57a92
1
Parent(s):
af3021b
fix upload
Browse files- Dockerfile +1 -0
- app.py +9 -8
Dockerfile
CHANGED
|
@@ -10,5 +10,6 @@ WORKDIR /app
|
|
| 10 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 11 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 12 |
|
|
|
|
| 13 |
COPY --chown=user . /app
|
| 14 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 10 |
COPY --chown=user ./requirements.txt requirements.txt
|
| 11 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 12 |
|
| 13 |
+
RUN wget https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf -O qwen2.5-1.5b-instruct-q4_k_m.gguf
|
| 14 |
COPY --chown=user . /app
|
| 15 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
CHANGED
|
@@ -60,6 +60,7 @@ class ChatQwen:
|
|
| 60 |
n_ctx=2048,
|
| 61 |
n_threads=4, # Adjust as needed
|
| 62 |
batch_size=512,
|
|
|
|
| 63 |
)
|
| 64 |
|
| 65 |
def build_prompt(self, messages: list) -> str:
|
|
@@ -170,14 +171,14 @@ class WebSocketStreamingCallbackHandler(BaseCallbackHandler):
|
|
| 170 |
# ------------------------ Instantiate the LLM ------------------------
|
| 171 |
# Choose one mode: local (set use_server=False) or server (set use_server=True).
|
| 172 |
model_path="qwen2.5-1.5b-instruct-q4_k_m.gguf"
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
#
|
| 180 |
-
|
| 181 |
|
| 182 |
# ------------------------ FAISS and Sentence Transformer Setup ------------------------
|
| 183 |
|
|
|
|
| 60 |
n_ctx=2048,
|
| 61 |
n_threads=4, # Adjust as needed
|
| 62 |
batch_size=512,
|
| 63 |
+
verbose=False,
|
| 64 |
)
|
| 65 |
|
| 66 |
def build_prompt(self, messages: list) -> str:
|
|
|
|
| 171 |
# ------------------------ Instantiate the LLM ------------------------
|
| 172 |
# Choose one mode: local (set use_server=False) or server (set use_server=True).
|
| 173 |
model_path="qwen2.5-1.5b-instruct-q4_k_m.gguf"
|
| 174 |
+
llm = ChatQwen(
|
| 175 |
+
temperature=0.3,
|
| 176 |
+
streaming=True,
|
| 177 |
+
max_new_tokens=512,
|
| 178 |
+
use_server=False,
|
| 179 |
+
model_path=model_path,
|
| 180 |
+
# server_url="http://localhost:8000" # Uncomment and set if using server mode.
|
| 181 |
+
)
|
| 182 |
|
| 183 |
# ------------------------ FAISS and Sentence Transformer Setup ------------------------
|
| 184 |
|