Spaces:

pathakDev10
/

EstateGuru

Sleeping

App Files Files Community

pathakDev10 commited on Mar 25

Commit

dd1baa0

1 Parent(s): 0d57a92

changes

Browse files

Files changed (5) hide show

.gitignore +2 -1
Dockerfile +10 -1
__pycache__/app.cpython-312.pyc +0 -0
app.py +32 -18
download.py +0 -1

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 qwen2.5-1.5b-instruct-q4_k_m.gguf
-qwen2.5-1.5b-instruct-q5_k_m.gguf

 qwen2.5-1.5b-instruct-q4_k_m.gguf
+qwen2.5-1.5b-instruct-q5_k_m.gguf
+*.gguf

Dockerfile CHANGED Viewed

@@ -10,6 +10,15 @@ WORKDIR /app
 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
-RUN wget https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf -O qwen2.5-1.5b-instruct-q4_k_m.gguf
 COPY --chown=user . /app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN python -c "\
+from huggingface_hub import hf_hub_download; \
+hf_hub_download(\
+    repo_id='Qwen/Qwen2.5-1.5B-Instruct-GGUF', \
+    filename='qwen2.5-1.5b-instruct-q4_k_m.gguf', \
+    local_dir='.', \
+    local_dir_use_symlinks=False, \
+    token='$HF_TOKEN'\
+)"
 COPY --chown=user . /app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

__pycache__/app.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ

app.py CHANGED Viewed

@@ -20,6 +20,10 @@ from langchain_core.tools import tool
 from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
 from langchain_core.callbacks.base import BaseCallbackHandler
 # ------------------------ Model Inference Wrapper ------------------------
 class ChatQwen:
@@ -57,7 +61,7 @@ class ChatQwen:
                 model_path=model_path,
                 temperature=self.temperature,
                 # n_ctx=512,
-                n_ctx=2048,
                 n_threads=4,  # Adjust as needed
                 batch_size=512,
                 verbose=False,
@@ -406,8 +410,8 @@ def generate_response(state: dict) -> dict:
         messages.append({"role": "system", "content": "When responding, use only the provided property details."})
     # Add conversation history
-    # Truncate conversation history (last 2 exchanges)
-    truncated_history = state.get("messages", [])[-4:]  # Last 2 user+assistant pairs
     for msg in truncated_history:
         messages.append({"role": msg["role"], "content": msg["content"]})
@@ -689,7 +693,7 @@ def stream_query(query: str, connection_id: str, loop):
             # Always update current_properties from final state
             conv_manager.current_properties = final_state.get("current_properties", [])
             # Keep conversation history bounded
-            conv_manager.conversation_history = conv_manager.conversation_history[-6:]  # Last 3 exchanges
     except Exception as e:
         error_msg = f"Error processing query: {str(e)}"
@@ -730,20 +734,30 @@ async def post_query(query: str):
     return {"response": response}
-@app.get("/setup")
-async def setup():
-    import os
-    from huggingface_hub import hf_hub_download
-    repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF"
-    filename = "qwen2.5-1.5b-instruct-q4_k_m.gguf"
-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    model_path = hf_hub_download(
-        repo_id=repo_id,
-        filename=filename,
-        local_dir=script_dir,
-        local_dir_use_symlinks=False,
-    )
-    return model_path

 from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
 from langchain_core.callbacks.base import BaseCallbackHandler
+import os
+from fastapi.responses import PlainTextResponse
+from fastapi import FastAPI, Request
+from fastapi.staticfiles import StaticFiles
 # ------------------------ Model Inference Wrapper ------------------------
 class ChatQwen:
                 model_path=model_path,
                 temperature=self.temperature,
                 # n_ctx=512,
+                n_ctx=8192,
                 n_threads=4,  # Adjust as needed
                 batch_size=512,
                 verbose=False,
         messages.append({"role": "system", "content": "When responding, use only the provided property details."})
     # Add conversation history
+    # Truncate conversation history (last 4 exchanges)
+    truncated_history = state.get("messages", [])[-8:]  # Last 4 user+assistant pairs
     for msg in truncated_history:
         messages.append({"role": msg["role"], "content": msg["content"]})
             # Always update current_properties from final state
             conv_manager.current_properties = final_state.get("current_properties", [])
             # Keep conversation history bounded
+            conv_manager.conversation_history = conv_manager.conversation_history[-12:]  # Last 6 exchanges
     except Exception as e:
         error_msg = f"Error processing query: {str(e)}"
     return {"response": response}
+model_url = "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf"
+async def async_download():
+    import aiohttp
+    async with aiohttp.ClientSession() as session:
+        async with session.get(model_url) as response:
+            with open(model_path, "wb") as f:
+                while True:
+                    chunk = await response.content.read(1024)
+                    if not chunk:
+                        break
+                    f.write(chunk)
+@app.middleware("http")
+async def check_model_middleware(request: Request, call_next):
+    if not os.path.exists(model_path):
+        await async_download()
+        print("successfully downloaded")
+    else:
+        print("already downloaded")
+    return await call_next(request)
+@app.get("/")
+async def home():
+    return PlainTextResponse("Space is running. Model ready!")

download.py CHANGED Viewed

@@ -8,7 +8,6 @@ model_path = hf_hub_download(
     repo_id=repo_id,
     filename=filename,
     local_dir=script_dir,
-    local_dir_use_symlinks=False,  # optional: don't use symlinks
 )
 print(f"Model downloaded to: {model_path}")

     repo_id=repo_id,
     filename=filename,
     local_dir=script_dir,
 )
 print(f"Model downloaded to: {model_path}")