Spaces:
Sleeping
Sleeping
Commit
·
6ce998e
1
Parent(s):
9ce7793
Add application file
Browse files- Dockerfile +14 -0
- app.py +34 -0
- faiss.index +0 -0
- metadata.pkl +3 -0
- requirements.txt +0 -0
- tools.py +217 -0
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
FROM python:3.9
|
| 3 |
+
|
| 4 |
+
RUN useradd -m -u 1000 user
|
| 5 |
+
USER user
|
| 6 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 11 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 12 |
+
|
| 13 |
+
COPY --chown=user . /app
|
| 14 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 3 |
+
from langchain.llms import HuggingFacePipeline
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
app = FastAPI()
|
| 7 |
+
|
| 8 |
+
# --- LLM Initialization using Hugging Face ---
|
| 9 |
+
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 11 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 12 |
+
model_id,
|
| 13 |
+
device_map="auto",
|
| 14 |
+
torch_dtype=torch.float16
|
| 15 |
+
)
|
| 16 |
+
generator = pipeline(
|
| 17 |
+
"text-generation",
|
| 18 |
+
model=model,
|
| 19 |
+
tokenizer=tokenizer,
|
| 20 |
+
max_length=256,
|
| 21 |
+
temperature=0.3,
|
| 22 |
+
)
|
| 23 |
+
llm = HuggingFacePipeline(pipeline=generator)
|
| 24 |
+
|
| 25 |
+
# Example endpoint using the new llm
|
| 26 |
+
@app.post("/query")
|
| 27 |
+
async def post_query(query: str):
|
| 28 |
+
# Create a simple prompt structure
|
| 29 |
+
prompt = f"Answer the following query:\n\n{query}\n"
|
| 30 |
+
# Get the response from the LLM
|
| 31 |
+
response = llm(prompt)
|
| 32 |
+
return {"response": response}
|
| 33 |
+
|
| 34 |
+
# (Keep your WebSocket endpoint and other code mostly unchanged)
|
faiss.index
ADDED
|
Binary file (15.4 kB). View file
|
|
|
metadata.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fae26b569be47c1dfae3aff3cd9ec583aaf3c7c1529e05d568278ab461fd64cf
|
| 3 |
+
size 15500
|
requirements.txt
ADDED
|
Binary file (1.16 kB). View file
|
|
|
tools.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
# if no document found suggest some ...
|
| 6 |
+
# " - Remove currency symbol if present, convert currency to AED if user mentioned currency symbol other than AED.\n\n"
|
| 7 |
+
def extract_json_from_response(response):
|
| 8 |
+
"""
|
| 9 |
+
Extract a JSON object using brace counting.
|
| 10 |
+
"""
|
| 11 |
+
response = response.strip()
|
| 12 |
+
start_index = response.find('{')
|
| 13 |
+
if start_index == -1:
|
| 14 |
+
return {}
|
| 15 |
+
|
| 16 |
+
brace_count = 0
|
| 17 |
+
end_index = start_index
|
| 18 |
+
for i in range(start_index, len(response)):
|
| 19 |
+
if response[i] == '{':
|
| 20 |
+
brace_count += 1
|
| 21 |
+
elif response[i] == '}':
|
| 22 |
+
brace_count -= 1
|
| 23 |
+
if brace_count == 0:
|
| 24 |
+
end_index = i
|
| 25 |
+
break
|
| 26 |
+
candidate = response[start_index:end_index+1]
|
| 27 |
+
try:
|
| 28 |
+
return json.loads(candidate)
|
| 29 |
+
except json.JSONDecodeError as e:
|
| 30 |
+
print("Error parsing candidate JSON:", e)
|
| 31 |
+
return {}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def rule_based_extract(query):
|
| 36 |
+
"""
|
| 37 |
+
A lightweight extraction using regular expressions.
|
| 38 |
+
Currently detects cost thresholds and a few keywords.
|
| 39 |
+
"""
|
| 40 |
+
result = {}
|
| 41 |
+
q_lower = query.lower()
|
| 42 |
+
|
| 43 |
+
# Look for cost threshold phrases such as "under 43k"
|
| 44 |
+
cost_pattern = re.compile(r'(?:under|below|less than)\s*(\d+(?:\.\d+)?)([kKmMbB])')
|
| 45 |
+
cost_match = cost_pattern.search(q_lower)
|
| 46 |
+
if cost_match:
|
| 47 |
+
value = float(cost_match.group(1))
|
| 48 |
+
multiplier = cost_match.group(2).lower()
|
| 49 |
+
if multiplier == 'k':
|
| 50 |
+
value = int(value * 1000)
|
| 51 |
+
elif multiplier == 'm':
|
| 52 |
+
value = int(value * 1000000)
|
| 53 |
+
elif multiplier == 'b':
|
| 54 |
+
value = int(value * 1000000000)
|
| 55 |
+
result['totalCosts'] = value
|
| 56 |
+
|
| 57 |
+
# Detect property type if mentioned
|
| 58 |
+
prop_type_match = re.search(r'\b(\d+bhk|villa|apartment|studio)\b', q_lower)
|
| 59 |
+
if prop_type_match:
|
| 60 |
+
result['propertyType'] = prop_type_match.group(1)
|
| 61 |
+
|
| 62 |
+
return result
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def apply_filters_partial(docs, filters):
|
| 70 |
+
scored_docs = []
|
| 71 |
+
|
| 72 |
+
for doc in docs:
|
| 73 |
+
score = 0
|
| 74 |
+
for key, value in filters.items():
|
| 75 |
+
if key not in doc:
|
| 76 |
+
continue
|
| 77 |
+
|
| 78 |
+
doc_value = doc[key]
|
| 79 |
+
|
| 80 |
+
# For cost thresholds, compare numerically.
|
| 81 |
+
if key == "totalCosts":
|
| 82 |
+
try:
|
| 83 |
+
doc_cost = float(doc_value)
|
| 84 |
+
if doc_cost <= float(value):
|
| 85 |
+
score += 1
|
| 86 |
+
except Exception:
|
| 87 |
+
continue
|
| 88 |
+
else:
|
| 89 |
+
if isinstance(doc_value, str):
|
| 90 |
+
if value.lower() in doc_value.lower():
|
| 91 |
+
score += 1
|
| 92 |
+
else:
|
| 93 |
+
if doc_value == value:
|
| 94 |
+
score += 1
|
| 95 |
+
|
| 96 |
+
scored_docs.append((score, doc))
|
| 97 |
+
|
| 98 |
+
scored_docs = [ (score, doc) for score, doc in scored_docs if score > 0 ]
|
| 99 |
+
scored_docs.sort(key=lambda x: x[0], reverse=True)
|
| 100 |
+
return [doc for score, doc in scored_docs]
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def format_property_data(properties: list) -> str:
|
| 105 |
+
"""Convert property JSON data into a structured string for LLM."""
|
| 106 |
+
formatted = []
|
| 107 |
+
|
| 108 |
+
for idx, prop in enumerate(properties, 1):
|
| 109 |
+
formatted.append(
|
| 110 |
+
f"Property {idx}:\n"
|
| 111 |
+
f"- Property Type: {prop.get('propertyType', 'N/A')}\n"
|
| 112 |
+
f"- Total Cost: AED {prop.get('totalCosts'):,}" if isinstance(prop.get('totalCosts'), (int, float)) else f"AED {prop.get('totalCosts', 'N/A')}\n"
|
| 113 |
+
f"- Size: {prop.get('propertySize', 'N/A')} sqft\n"
|
| 114 |
+
f"- Property Address: {prop.get('propertyAddress', 'N/A')}\n"
|
| 115 |
+
f"- Surrounding Area: {prop.get('surroundingArea', 'N/A')}\n"
|
| 116 |
+
f"- Project Name: {prop.get('projectName', 'N/A')}\n"
|
| 117 |
+
f"- Ownership: {prop.get('ownershipType', 'N/A')}\n"
|
| 118 |
+
f"- Rental Yield: {prop.get('expectedRentalYield', 'N/A')}%\n"
|
| 119 |
+
f"- Amenities: {', '.join(prop['amenities']) if prop.get('amenities') else 'N/A'}\n"
|
| 120 |
+
f"- Legal Details: {prop.get('legal', 'N/A')}\n"
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
return "\n".join(formatted)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
estateKeywords = [
|
| 130 |
+
# Property Types
|
| 131 |
+
"apartment", "condo", "condominium", "townhouse", "villa", "duplex", "penthouse", "studio",
|
| 132 |
+
"loft", "bungalow", "cottage", "mansion", "house", "residence", "residential", "ranch", "estate",
|
| 133 |
+
"farmhouse", "row house", "micro-apartment", "annex", "flat", "high-rise", "low-rise", "mid-rise",
|
| 134 |
+
"complex", "housing", "subdivision", "manor", "castle", "chalet", "detached", "semi-detached",
|
| 135 |
+
"terraced", "multi-family", "loft-style", "penthouse suite", "garden apartment", "luxury apartment",
|
| 136 |
+
"2bhk", "1bhk", "3bhk", "4bhk", "5bhk", "6bhk", "7bhk",
|
| 137 |
+
|
| 138 |
+
# Transaction & Financing Terms
|
| 139 |
+
"buy", "sell", "purchase", "rent", "lease", "mortgage", "financing", "investment", "appraisal",
|
| 140 |
+
"valuation", "listing", "offer", "down payment", "closing costs", "commission", "escrow",
|
| 141 |
+
"interest rate", "loan", "refinance", "pre-approval", "subsidy", "foreclosure", "buyer",
|
| 142 |
+
"seller", "renter", "lender", "broker", "realtor", "agent", "property tax", "assessment",
|
| 143 |
+
"price", "cost", "expense",
|
| 144 |
+
|
| 145 |
+
# Legal & Regulatory
|
| 146 |
+
"contract", "agreement", "title", "deed", "ownership", "legal", "zoning", "regulation", "lien",
|
| 147 |
+
"disclosure", "covenant", "restriction", "mortgage deed", "notary", "fiduciary", "amortization",
|
| 148 |
+
"leasehold", "freehold", "easement", "encumbrance", "compliance", "bylaw", "permit", "license",
|
| 149 |
+
"inspection", "certification", "survey", "boundary", "deed restriction", "eminent domain",
|
| 150 |
+
"expropriation", "title insurance", "closing statement", "settlement statement", "property assessment",
|
| 151 |
+
"tax deduction", "legal fees",
|
| 152 |
+
|
| 153 |
+
# Building Services & Amenities
|
| 154 |
+
"maintenance", "security", "concierge", "cleaning", "HVAC", "elevator", "parking", "garage", "pool",
|
| 155 |
+
"gym", "clubhouse", "garden", "landscaping", "utility", "service charge", "facility", "building management",
|
| 156 |
+
"doorman", "reception", "lobby", "front desk", "maintenance fee", "cleaner", "janitorial", "waste management",
|
| 157 |
+
"recycling", "water supply", "electricity", "gas", "internet", "cable", "satellite", "fire alarm",
|
| 158 |
+
"sprinkler", "CCTV", "access control", "smart home", "automation", "security system", "alarm system",
|
| 159 |
+
|
| 160 |
+
# Property Features & Specifications
|
| 161 |
+
"size", "area", "square feet", "sq ft", "square meter", "sqm", "layout", "floor plan", "bedrooms", "beds",
|
| 162 |
+
"bathrooms", "baths", "kitchen", "balcony", "view", "furnished", "unfurnished", "modern", "renovated",
|
| 163 |
+
"new", "old", "under construction", "pre-construction", "storage", "fireplace", "insulation", "windows",
|
| 164 |
+
"doors", "tile", "hardwood", "carpet", "luxury", "energy efficient", "solar panels", "waterproof",
|
| 165 |
+
"air-conditioned", "heating", "cooling", "soundproof", "smart features", "double glazing", "open plan",
|
| 166 |
+
"loft", "studio", "number of floors", "flooring", "ceiling height", "curb appeal", "landscaped", "patio",
|
| 167 |
+
"deck", "terrace", "roof", "basement", "attic", "renovation", "refurbishment", "architectural", "design",
|
| 168 |
+
"blueprint", "structural integrity", "energy rating", "EPC", "green building", "LEED certification",
|
| 169 |
+
|
| 170 |
+
# Location & Infrastructure
|
| 171 |
+
"location", "neighborhood", "district", "community", "proximity", "access", "landmark", "street",
|
| 172 |
+
"boulevard", "region", "central", "suburban", "urban", "rural", "metro", "vicinity", "road", "avenue",
|
| 173 |
+
"block", "postcode", "zipcode", "local", "zone", "map", "transit", "bus", "subway", "highway",
|
| 174 |
+
"railway", "airport", "shopping center", "mall", "public transport", "commute", "walkability", "bike path",
|
| 175 |
+
"pedestrian", "infrastructure", "urban planning", "master plan", "road access", "public amenities",
|
| 176 |
+
"school", "hospital", "park", "recreation", "community center", "shopping", "restaurant", "cafe", "dining",
|
| 177 |
+
"entertainment", "cultural center", "museum", "cinema", "theater", "library",
|
| 178 |
+
|
| 179 |
+
# Additional Keywords
|
| 180 |
+
"pet-friendly", "smoke-free", "homeowners association", "HOA", "amenities", "market trends", "rental yield",
|
| 181 |
+
"occupancy", "resale", "investment potential", "appreciation", "listing price", "market value", "open house",
|
| 182 |
+
"virtual tour", "3D tour", "drone footage", "photography", "staging", "showing", "signage", "sales office",
|
| 183 |
+
"walk score", "neighborhood watch", "property management", "utilities", "land", "lot", "acreage", "fenced",
|
| 184 |
+
"gated", "seaview", "mountain view", "city view", "waterfront", "lakefront", "beachfront", "vacation rental",
|
| 185 |
+
"holiday home", "timeshare", "co-op", "shared ownership", "land bank", "infill", "revitalization",
|
| 186 |
+
"urban renewal", "gentrification", "brownfield", "greenfield", "tax increment financing", "TIF",
|
| 187 |
+
"economic zone", "special economic zone", "business improvement district", "BID", "asset management",
|
| 188 |
+
"capital improvement", "utility corridor", "utility easement", "land lease", "lease option", "seller financing",
|
| 189 |
+
"buyer financing", "interest", "escrow account", "comparative market analysis", "CMA", "brokerage", "MLS",
|
| 190 |
+
"multiple listing service", "digital listing", "virtual staging", "marketing", "advertising", "sales strategy",
|
| 191 |
+
"client", "customer", "inquiry", "valuation report", "property survey", "geodetic", "topographical", "parcel",
|
| 192 |
+
"lot size", "gross floor area", "GFA", "buildable area", "usable area", "constructible area", "occupancy certificate",
|
| 193 |
+
"completion certificate", "energy performance certificate", "EPC", "retrofitting", "upgrading", "furniture",
|
| 194 |
+
"fixtures", "equipment", "FF&E", "soft costs", "hard costs", "build cost", "construction cost", "land cost",
|
| 195 |
+
"tax assessment", "expropriation", "eminent domain", "title search", "title insurance", "closing statement",
|
| 196 |
+
"settlement statement", "financial statement", "profitability", "operating expense", "CAPEX", "OPEX", "debt service",
|
| 197 |
+
"capitalization rate", "effective gross income", "net operating income", "NOI", "cash-on-cash return", "discount rate",
|
| 198 |
+
"internal rate of return", "IRR", "term sheet", "memorandum", "offering memorandum", "investment memorandum",
|
| 199 |
+
"property brochure", "marketing materials", "customer inquiry", "buyer inquiry", "seller inquiry", "agent commission",
|
| 200 |
+
"valuation model", "property portfolio", "realty", "real estate market", "property market", "property trends",
|
| 201 |
+
"rental market", "commercial real estate", "residential real estate", "real estate investment trust", "REIT",
|
| 202 |
+
"vacancy rate", "absorption rate", "lease renewal", "option to renew", "property turnover", "asset", "liability",
|
| 203 |
+
"equity", "net worth", "investment property", "tax benefit", "depreciation", "capital gain", "capital loss",
|
| 204 |
+
"market analysis", "risk assessment", "due diligence", "investment analysis", "financial analysis", "cash flow",
|
| 205 |
+
"profit margin", "return on investment", "ROI", "exit strategy", "hold period", "leasing commission", "broker fee",
|
| 206 |
+
"real estate agent fee", "property listing", "sales contract", "rent roll", "occupancy rate", "turnover", "tenant",
|
| 207 |
+
"landlord", "lease agreement", "sublease", "rental agreement", "utility bill", "property management fee",
|
| 208 |
+
"service charge fee", "annual fee", "maintenance budget", "repair cost", "operating cost", "management expense",
|
| 209 |
+
"vacancy", "absorption", "market rental rate", "submarket", "investment strategy", "property acquisition",
|
| 210 |
+
"development", "speculative development", "planned unit development", "PUD", "real estate development",
|
| 211 |
+
"site development", "land development", "construction management", "contractor", "builder",
|
| 212 |
+
"real estate consultant", "property consultant", "market research", "economic indicator", "demographics",
|
| 213 |
+
"population density", "employment rate", "income level", "consumer confidence", "building code", "sustainability",
|
| 214 |
+
"green building", "LEED", "BREEAM", "smart city", "innovation", "technology", "internet of things", "IoT",
|
| 215 |
+
"big data", "data analytics", "virtual reality", "VR", "augmented reality", "AR", "3D modeling", "drone survey",
|
| 216 |
+
"aerial photography", "satellite imagery", "market forecast", "property forecast"
|
| 217 |
+
]
|