Zubiiiiiii294 commited on
Commit
728ab04
Β·
verified Β·
1 Parent(s): 87e3aba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -40
app.py CHANGED
@@ -1,50 +1,52 @@
1
- import os
2
- import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
  import gradio as gr
5
- from huggingface_hub import login
6
-
7
- # Log in with HF token
8
- login(os.environ.get("HF_TOKEN"))
9
 
10
- # Model details
11
- model_name = "mistralai/Mistral-7B-Instruct-v0.3"
12
-
13
- # Quantization config
14
- quantization_config = BitsAndBytesConfig(
15
- load_in_4bit=True,
16
- bnb_4bit_compute_dtype=torch.float16,
17
- bnb_4bit_quant_type="nf4",
18
- bnb_4bit_use_double_quant=True
19
- )
20
 
21
  # Load tokenizer and model
22
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
23
  model = AutoModelForCausalLM.from_pretrained(
24
- model_name,
25
- quantization_config=quantization_config,
26
  device_map="auto",
27
- low_cpu_mem_usage=True,
28
- trust_remote_code=True
29
  )
30
 
31
- # Chat function
32
- def chat(input_text):
33
- inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
34
- outputs = model.generate(
35
- inputs.input_ids,
36
- max_length=100,
37
- pad_token_id=tokenizer.eos_token_id
 
 
 
 
 
38
  )
39
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
40
-
41
- # Gradio UI
42
- iface = gr.Interface(
43
- fn=chat,
44
- inputs=gr.Textbox(placeholder="Type your message here..."),
45
- outputs="text",
46
- title="Vynix AI",
47
- description="Chat with Vynix AI β€” powered by Mistral 7B"
48
- )
49
 
50
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
 
 
2
  import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
+ import torch
 
 
5
 
6
+ # Load model from environment variable or fallback
7
+ model_id = os.getenv("MODEL_ID", "TheBloke/Mistral-7B-Instruct-v0.3-GPTQ")
 
 
 
 
 
 
 
 
8
 
9
  # Load tokenizer and model
10
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
11
  model = AutoModelForCausalLM.from_pretrained(
12
+ model_id,
 
13
  device_map="auto",
14
+ torch_dtype=torch.float16,
 
15
  )
16
 
17
+ # Initialize streamer for live output
18
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
19
+
20
+ # Define the chat function
21
+ def chat(message, history):
22
+ history = history or []
23
+ conversation = history + [(message, "")]
24
+
25
+ prompt = tokenizer.apply_chat_template(
26
+ conversation,
27
+ tokenize=False,
28
+ add_generation_prompt=True
29
  )
 
 
 
 
 
 
 
 
 
 
30
 
31
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
32
+
33
+ with torch.no_grad():
34
+ output_ids = model.generate(
35
+ **inputs,
36
+ max_new_tokens=512,
37
+ do_sample=True,
38
+ temperature=0.7,
39
+ top_p=0.95,
40
+ )
41
+
42
+ decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)
43
+ reply = decoded.split(message)[-1].strip()
44
+
45
+ return reply
46
+
47
+ # Build Gradio UI
48
+ interface = gr.ChatInterface(fn=chat, title="🧠 Vynix AI")
49
+
50
+ # Launch App
51
+ if _name_ == "_main_":
52
+ interface.launch()