rayymaxx commited on
Commit
b66d06d
·
verified ·
1 Parent(s): 4ee0ddc

Update app

Browse files
Files changed (1) hide show
  1. app.py +5 -67
app.py CHANGED
@@ -1,72 +1,10 @@
1
- # app.py (safe, use /tmp for cache)
2
- import os
3
- import logging
4
- from fastapi import FastAPI, HTTPException
5
- from pydantic import BaseModel
6
- import tempfile
7
-
8
- # --- Put caches in a writable temp dir to avoid permission errors ---
9
- TMP_CACHE = os.environ.get("HF_CACHE_DIR", os.path.join(tempfile.gettempdir(), "hf_cache"))
10
- try:
11
- os.makedirs(TMP_CACHE, exist_ok=True)
12
- except Exception as e:
13
- # if even this fails, fall back to tempfile.gettempdir()
14
- TMP_CACHE = tempfile.gettempdir()
15
-
16
- # export environment vars before importing transformers
17
- os.environ["TRANSFORMERS_CACHE"] = TMP_CACHE
18
- os.environ["HF_HOME"] = TMP_CACHE
19
- os.environ["HF_DATASETS_CACHE"] = TMP_CACHE
20
- os.environ["HF_METRICS_CACHE"] = TMP_CACHE
21
-
22
- app = FastAPI(title="DirectEd LoRA API (safe startup)")
23
 
24
  @app.get("/health")
25
  def health():
26
  return {"ok": True}
27
 
28
- class Request(BaseModel):
29
- prompt: str
30
- max_new_tokens: int = 150
31
- temperature: float = 0.7
32
-
33
- pipe = None
34
-
35
- @app.on_event("startup")
36
- def load_model():
37
- global pipe
38
- try:
39
- # heavy imports done during startup
40
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
41
- from peft import PeftModel
42
-
43
- BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
44
- ADAPTER_REPO = "rayymaxx/DirectEd-AI-LoRA" # <-- replace with your adapter repo
45
-
46
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
47
- base_model = AutoModelForCausalLM.from_pretrained(
48
- BASE_MODEL,
49
- device_map="auto",
50
- low_cpu_mem_usage=True,
51
- torch_dtype="auto",
52
- )
53
-
54
- model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
55
- model.eval()
56
-
57
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
58
- logging.info("Model and adapter loaded successfully.")
59
- except Exception as e:
60
- logging.exception("Failed to load model at startup: %s", e)
61
- pipe = None
62
-
63
- @app.post("/generate")
64
- def generate(req: Request):
65
- if pipe is None:
66
- raise HTTPException(status_code=503, detail="Model not loaded. Check logs.")
67
- try:
68
- out = pipe(req.prompt, max_new_tokens=req.max_new_tokens, temperature=req.temperature, do_sample=True)
69
- return {"response": out[0]["generated_text"]}
70
- except Exception as e:
71
- logging.exception("Generation failed: %s", e)
72
- raise HTTPException(status_code=500, detail=str(e))
 
1
+ from fastapi import FastAPI
2
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  @app.get("/health")
5
  def health():
6
  return {"ok": True}
7
 
8
+ @app.get("/")
9
+ def root():
10
+ return {"Minimal code running"}