Spaces:
Runtime error
Runtime error
| from threading import Thread | |
| import gradio as gr | |
| import inspect | |
| from gradio import routes | |
| from typing import List, Type | |
| from petals import AutoDistributedModelForCausalLM | |
| from transformers import AutoTokenizer | |
| import requests, os, re, asyncio, json | |
| loop = asyncio.get_event_loop() | |
| # init code | |
| def get_types(cls_set: List[Type], component: str): | |
| docset = [] | |
| types = [] | |
| if component == "input": | |
| for cls in cls_set: | |
| doc = inspect.getdoc(cls) | |
| doc_lines = doc.split("\n") | |
| docset.append(doc_lines[1].split(":")[-1]) | |
| types.append(doc_lines[1].split(")")[0].split("(")[-1]) | |
| else: | |
| for cls in cls_set: | |
| doc = inspect.getdoc(cls) | |
| doc_lines = doc.split("\n") | |
| docset.append(doc_lines[-1].split(":")[-1]) | |
| types.append(doc_lines[-1].split(")")[0].split("(")[-1]) | |
| return docset, types | |
| routes.get_types = get_types | |
| # App code | |
| model_name = "petals-team/StableBeluga2" | |
| #daekeun-ml/Llama-2-ko-instruct-13B | |
| #quantumaikr/llama-2-70b-fb16-korean | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = None | |
| history = { | |
| "":{ | |
| } | |
| } | |
| def check(model_name): | |
| data = requests.get("https://health.petals.dev/api/v1/state").json() | |
| out = [] | |
| for d in data['model_reports']: | |
| if d['name'] == model_name: | |
| if d['state']=="healthy": | |
| return True | |
| return False | |
| def init(): | |
| global model | |
| if check(model_name): | |
| model = AutoDistributedModelForCausalLM.from_pretrained(model_name) | |
| def chat(id, npc, text): | |
| if model == None: | |
| init() | |
| return "no model" | |
| # get_coin endpoint | |
| response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_6", json={ | |
| "data": [ | |
| id, | |
| ]}).json() | |
| coin = response["data"][0] | |
| if int(coin) == 0: | |
| return "no coin" | |
| # model inference | |
| if check(model_name): | |
| global history | |
| if not npc in npc_story: | |
| return "no npc" | |
| if not npc in history: | |
| history[npc] = {} | |
| if not id in history[npc]: | |
| history[npc][id] = "" | |
| if len(history[npc][id].split("###")) > 10: | |
| history[npc][id] = "###" + history[npc][id].split("###", 3)[3] | |
| npc_list = str([k for k in npc_story.keys()]).replace('\'', '') | |
| town_story = f"""[{id}μ λ§μ] | |
| μΈλ΄ κ³³μ μ‘°κ·Έλ§ μ¬μ μ¬λ¬ μ£Όλ―Όλ€μ΄ λͺ¨μ¬ μ΄κ³ μμ΅λλ€. | |
| νμ¬ {npc_list}μ΄ μ΄κ³ μμ΅λλ€.""" | |
| system_message = f"""1. λΉμ μ νκ΅μ΄μ λ₯μν©λλ€. | |
| 2. λΉμ μ μ§κΈ μν κ·Ήμ νκ³ μμ΅λλ€. {npc}μ λ°μμ μμνκ³ λ§€λ ₯μ μ΄κ² ννν©λλ€. | |
| 3. λΉμ μ {npc}μ λλ€. {npc}μ μ μ₯μμ μκ°νκ³ λ§ν©λλ€. | |
| 4. μ£Όμ΄μ§λ μ 보λ₯Ό λ°νμΌλ‘ κ°μ°μ±μκ³ μ€κ°λλ {npc}μ λμ¬λ₯Ό μμ±νμΈμ. | |
| 5. μ£Όμ΄μ§λ {npc}μ μ 보λ₯Ό μ μ€νκ² μ½κ³ , κ³Όνμ§ μκ³ λ΄λ°±νκ² μΊλ¦ν°λ₯Ό μ°κΈ°νμΈμ. | |
| 6. Userμ μν μ μ λλ‘ μΉ¨λ²νμ§ λ§μΈμ. κ°μ λ§μ λ°λ³΅νμ§ λ§μΈμ. | |
| 7. {npc}μ λ§ν¬λ₯Ό μ§μΌμ μμ±νμΈμ.""" | |
| prom = f"""<<SYS>> | |
| {system_message}<</SYS>> | |
| {town_story} | |
| ### μΊλ¦ν° μ 보: {npc_story[npc]} | |
| ### λͺ λ Ήμ΄: | |
| {npc}μ μ 보λ₯Ό μ°Έκ³ νμ¬ {npc}μ΄ ν λ§μ μν©μ λ§μΆ° μμ°μ€λ½κ² μμ±ν΄μ£ΌμΈμ. | |
| {history[npc][id]} | |
| ### User: | |
| {text} | |
| ### {npc}: | |
| """ | |
| inputs = tokenizer(prom, return_tensors="pt")["input_ids"] | |
| outputs = model.generate(inputs, do_sample=True, temperature=0.6, top_p=0.75, max_new_tokens=100) | |
| output = tokenizer.decode(outputs[0])[len(prom)+3:-1].split("<")[0].split("###")[0].replace(". ", ".\n") | |
| print(outputs) | |
| print(output) | |
| else: | |
| output = "no model" | |
| # add_transaction endpoint | |
| response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_5", json={ | |
| "data": [ | |
| id, | |
| "inference", | |
| "### input:\n" + prompt + "\n\n### output:\n" + output | |
| ]}).json() | |
| d = response["data"][0] | |
| return output | |
| with gr.Blocks() as demo: | |
| count = 0 | |
| aa = gr.Interface( | |
| fn=chat, | |
| inputs=["text","text","text"], | |
| outputs="text", | |
| description="chat, ai μλ΅μ λ°νν©λλ€. λ΄λΆμ μΌλ‘ νΈλμμ μμ±. \n /run/predict", | |
| ) | |
| demo.queue(max_size=32).launch(enable_queue=True) |