Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import spaces | |
| import os | |
| import spaces | |
| import torch | |
| import random | |
| import time | |
| import re | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer | |
| import transformers | |
| # Set an environment variable | |
| HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
| zero = torch.Tensor([0]).cuda() | |
| print(zero.device) # <-- 'cpu' 🤔 | |
| model_id = 'FINGU-AI/Qwen-Orpo-v1' #attn_implementation="flash_attention_2", | |
| model = AutoModelForCausalLM.from_pretrained(model_id,attn_implementation="sdpa", torch_dtype= torch.bfloat16) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| # streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| model.to('cuda') | |
| # terminators = [ | |
| # tokenizer.eos_token_id, | |
| # tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| # ] | |
| generation_params = { | |
| 'max_new_tokens': 1000, | |
| 'use_cache': True, | |
| 'do_sample': True, | |
| 'temperature': 0.7, | |
| 'top_p': 0.9, | |
| # 'top_k': 50, | |
| } | |
| def inference(query): | |
| messages = [ | |
| {"role": "system", "content": """You are ai trader, invester helpfull assistant."""}, | |
| {"role": "user", "content": f"{query}"}, | |
| ] | |
| tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda") | |
| outputs = model.generate(tokenized_chat, **generation_params) | |
| decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False) | |
| assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip() | |
| response_ = assistant_response.replace('<|im_end|>', "") | |
| return response_ | |
| # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer) | |
| # return outputs | |
| examples = ['How can options strategies such as straddles, strangles, and spreads be used to hedge against market volatility?', | |
| 'How do changes in interest rates, inflation, and GDP growth impact stock and bond markets?', | |
| 'What are the key components and strategies involved in developing an effective algorithmic trading system?', | |
| 'How can investors integrate environmental, social, and governance (ESG) factors into their investment decisions to achieve both financial returns and social impact?', | |
| 'How do geopolitical events such as trade wars, political instability, and international conflicts affect global financial markets?', | |
| 'How does blockchain technology have the potential to disrupt financial markets and investment practices?'] | |
| def response(message, history): | |
| text = inference(message) | |
| return text | |
| # for i in range(len(text)): | |
| # time.sleep(0.01) | |
| # yield text[: i + 1] | |
| gr.ChatInterface(response,examples=examples).launch() |