luna-playground

Runtime error

App Files Files Community

Terry Zhuo commited on Mar 12, 2024

Commit

6e12956

1 Parent(s): 2d714f8

update

Browse files

Files changed (1) hide show

app.py +114 -58

app.py CHANGED Viewed

@@ -5,15 +5,82 @@ import requests
 import gradio as gr
 from huggingface_hub import Repository
-from text_generation import Client
 from share_btn import community_icon_html, loading_icon_html, share_js, share_btn_css
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-API_URL = "https://api-inference.huggingface.co/models/bigcode/starcoder"
-API_URL_BASE ="https://api-inference.huggingface.co/models/bigcode/starcoderbase"
-API_URL_PLUS = "https://api-inference.huggingface.co/models/bigcode/starcoderplus"
 FIM_PREFIX = "<fim_prefix>"
 FIM_MIDDLE = "<fim_middle>"
@@ -75,26 +142,19 @@ theme = gr.themes.Monochrome(
     ],
 )
-client = Client(
-    API_URL,
-    headers={"Authorization": f"Bearer {HF_TOKEN}"},
-)
-client_base = Client(
-    API_URL_BASE, headers={"Authorization": f"Bearer {HF_TOKEN}"},
-)
-client_plus = Client(
-    API_URL_PLUS, headers={"Authorization": f"Bearer {HF_TOKEN}"},
-)
 def generate(
-    prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, version="StarCoder",
 ):
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
     top_p = float(top_p)
-    fim_mode = False
     generate_kwargs = dict(
         temperature=temperature,
@@ -105,37 +165,21 @@ def generate(
         seed=42,
     )
-    if FIM_INDICATOR in prompt:
-        fim_mode = True
-        try:
-            prefix, suffix = prompt.split(FIM_INDICATOR)
-        except:
-            raise ValueError(f"Only one {FIM_INDICATOR} allowed in prompt!")
-        prompt = f"{FIM_PREFIX}{prefix}{FIM_SUFFIX}{suffix}{FIM_MIDDLE}"
-    if version == "StarCoder":
-        stream = client.generate_stream(prompt, **generate_kwargs)
-    elif version == "StarCoderPlus":
-        stream = client_plus.generate_stream(prompt, **generate_kwargs)
     else:
-        stream = client_base.generate_stream(prompt, **generate_kwargs)
-    if fim_mode:
-        output = prefix
-    else:
-        output = prompt
-    previous_token = ""
-    for response in stream:
-        if response.token.text == "<|endoftext|>":
-            if fim_mode:
-                output += suffix
-            else:
-                return output
-        else:
-            output += response.token.text
-        previous_token = response.token.text
-        yield output
     return output
@@ -167,16 +211,20 @@ css += share_btn_css + monospace_css + ".gradio-container {color: black}"
 description = """
 <div style="text-align: center;">
-    <h1> ⭐ StarCoder <span style='color: #e6b800;'>Models</span> Playground</h1>
 </div>
 <div style="text-align: left;">
-    <p>This is a demo to generate text and code with the following StarCoder models:</p>
     <ul>
-        <li><a href="https://huggingface.co/bigcode/starcoderplus" style='color: #e6b800;'>StarCoderPlus</a>: A finetuned version of StarCoderBase on English web data, making it strong in both English text and code generation.</li>
-        <li><a href="https://huggingface.co/bigcode/starcoderbase" style='color: #e6b800;'>StarCoderBase</a>: A code generation model trained on 80+ programming languages, providing broad language coverage for code generation tasks.</li>
-        <li><a href="https://huggingface.co/bigcode/starcoder" style='color: #e6b800;'>StarCoder</a>: A finetuned version of StarCoderBase specifically focused on Python, while also maintaining strong performance on other programming languages.</li>
     </ul>
-    <p><b>Please note:</b> These models are not designed for instruction purposes. If you're looking for instruction or want to chat with a fine-tuned model, you can visit the <a href="https://huggingface.co/spaces/HuggingFaceH4/starchat-playground">StarChat Playground</a>.</p>
 </div>
 """
 disclaimer = """⚠️<b>Any use or sharing of this demo constitues your acceptance of the BigCode [OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) License Agreement and the use restrictions included within.</b>\
@@ -186,11 +234,18 @@ with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
     with gr.Column():
         gr.Markdown(description)
         with gr.Row():
-            version = gr.Dropdown(
-                        ["StarCoderPlus", "StarCoderBase", "StarCoder"],
-                        value="StarCoder",
                         label="Model",
-                        info="Choose a model from the list",
                         )
         with gr.Row():
             with gr.Column():
@@ -264,8 +319,9 @@ with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
     submit.click(
         generate,
-        inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, version],
         outputs=[output],
     )
-    share_button.click(None, [], [], _js=share_js)
-demo.queue(concurrency_count=16).launch(share=True, debug=True)

 import gradio as gr
 from huggingface_hub import Repository
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
 from share_btn import community_icon_html, loading_icon_html, share_js, share_btn_css
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+CHECKPOINT_URL = "Salesforce/codegen-350M-mono"
+SQLMODEL_PREFIX_URL = "https://huggingface.co/luna-code/sqlmodel-codegen-350M-mono-prefix"
+SFEPY_PREFIX_URL = "https://huggingface.co/luna-code/sfepy-codegen-350M-mono-prefix"
+MEGENGINE_PREFIX_URL = "https://huggingface.co/luna-code/megengine-codegen-350M-mono-prefix"
+MAIN_EVO_PREFIX_URL = "https://huggingface.co/luna-code/codegen-350M-mono-evo-prefix"
+SQLMODEL_FFT_URL = "https://huggingface.co/luna-code/sqlmodel-codegen-350M-mono-fft"
+SFEPY_FFT_URL = "https://huggingface.co/luna-code/sfepy-codegen-350M-mono-fft"
+MEGENGINE_FFT_URL = "https://huggingface.co/luna-code/megengine-codegen-350M-mono-fft"
+MAIN_EVO_FFT_URL = "https://huggingface.co/luna-code/codegen-350M-mono-evo-fft"
+MAIN_FD_FFT_URL = "https://huggingface.co/luna-code/codegen-350M-mono-fd-fft"
+LANGCHAIN_PREFIX_URL = "https://huggingface.co/luna-code/langchain-codegen-350M-mono-prefix"
+LLAMAINDEX_PREFIX_URL = "https://huggingface.co/luna-code/llamaindex-codegen-350M-mono-prefix"
+DSPY_PREFIX_URL = "https://huggingface.co/luna-code/dspy-codegen-350M-mono-prefix"
+CS_EVO_PREFIX_URL = "https://huggingface.co/luna-code/cs-codegen-350M-mono-evo-prefix"
+tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_URL)
+basemodel = AutoModelForCausalLM.from_pretrained(CHECKPOINT_URL, device_map="auto")
+sql_prefix = PeftModel.from_pretrained(basemodel, SQLMODEL_PREFIX_URL, device_map="auto")
+sfepy_prefix = PeftModel.from_pretrained(basemodel, SFEPY_PREFIX_URL, device_map="auto")
+megengine_prefix = PeftModel.from_pretrained(basemodel, MEGENGINE_PREFIX_URL, device_map="auto")
+main_evo_prefix = PeftModel.from_pretrained(basemodel, MAIN_EVO_PREFIX_URL, device_map="auto")
+sqlmodel_fft = AutoModelForCausalLM.from_pretrained(SQLMODEL_FFT_URL, device_map="auto")
+sfepy_fft = AutoModelForCausalLM.from_pretrained(SFEPY_FFT_URL, device_map="auto")
+megengine_fft = AutoModelForCausalLM.from_pretrained(MEGENGINE_FFT_URL, device_map="auto")
+main_evo_fft = AutoModelForCausalLM.from_pretrained(MAIN_EVO_FFT_URL, device_map="auto")
+main_fd_fft = AutoModelForCausalLM.from_pretrained(MAIN_FD_FFT_URL, device_map="auto")
+langchain_prefix = PeftModel.from_pretrained(basemodel, LANGCHAIN_PREFIX_URL, device_map="auto")
+llamaindex_prefix = PeftModel.from_pretrained(basemodel, LLAMAINDEX_PREFIX_URL, device_map="auto")
+dspy_prefix = PeftModel.from_pretrained(basemodel, DSPY_PREFIX_URL, device_map="auto")
+cs_evo_prefix = PeftModel.from_pretrained(basemodel, CS_EVO_PREFIX_URL, device_map="auto")
+# basemodel = ""
+# sql_prefix = ""
+# sfepy_prefix = ""
+# megengine_prefix = ""
+# main_evo_prefix = ""
+# sqlmodel_fft = ""
+# sfepy_fft = ""
+# megengine_fft = ""
+# main_evo_fft = ""
+# main_fd_fft = ""
+# langchain_prefix = ""
+# llamaindex_prefix = ""
+# dspy_prefix = ""
+# cs_evo_prefix = ""
+model_map = {
+    "Base": basemodel,
+    "SQLModel Prefix": sql_prefix,
+    "SfePy Prefix": sfepy_prefix,
+    "MegEngine Prefix": megengine_prefix,
+    "Main Evo Prefix": main_evo_prefix,
+    "SQLModel FFT": sqlmodel_fft,
+    "SfePy FFT": sfepy_fft,
+    "MegEngine FFT": megengine_fft,
+    "Main Evo FFT": main_evo_fft,
+    "Main FD FFT": main_fd_fft,
+    "LangChain Prefix": langchain_prefix,
+    "LlamaIndex Prefix": llamaindex_prefix,
+    "DSpy Prefix": dspy_prefix,
+    "CS Evo Prefix": cs_evo_prefix,
+}
 FIM_PREFIX = "<fim_prefix>"
 FIM_MIDDLE = "<fim_middle>"
     ],
 )
+def stream(model, code, generate_kwargs):
+    input_ids = tokenizer(code, return_tensors="pt").to("cuda")
+    generated_ids = model.generate(**input_ids, **generate_kwargs)
+    return tokenizer.decode(generated_ids[0][input_ids["input_ids"].shape[1]:], skip_special_tokens=True).strip()
 def generate(
+    prompt, temperature=0.6, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, library="LangChain", method="Prefix"
 ):
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
     top_p = float(top_p)
     generate_kwargs = dict(
         temperature=temperature,
         seed=42,
     )
+    if method == "Base":
+        output = stream(basemodel, prompt, generate_kwargs)
+    elif method == "Prefix":
+        output = stream(model_map[library + " Prefix"], prompt, generate_kwargs)
+    elif method == "Evo Prefix" and library in ["SQLModel", "SfePy", "MegEngine"]:
+        output = stream(model_map["Main Evo Prefix"], prompt, generate_kwargs)
+    elif method == "FFT" and library in ["SQLModel", "SfePy", "MegEngine"]:
+        output = stream(model_map[library + " FFT"], prompt, generate_kwargs)
+    elif method == "Evo FFT" and library in ["SQLModel", "SfePy", "MegEngine"]:
+        output = stream(model_map["Main Evo FFT"], prompt, generate_kwargs)
+    elif method == "Full Data FFT" and library in ["SQLModel", "SfePy", "MegEngine"]:
+        output = stream(model_map["Main FD FFT"], prompt, generate_kwargs)
     else:
+        output = ""
     return output
 description = """
 <div style="text-align: center;">
+    <h1> 🌙 LUNA Models Playground</h1>
 </div>
 <div style="text-align: left;">
+    <p>This is a demo to generate text and code with unknown libraries. The supported based model is <a href="https://huggingface.co/Salesforce/codegen-350M-mono" style='color: #e6b800;'>CodeGen-350M-mono</a></p>
+    <p>The supported libraries are:</p>
     <ul>
+        <li><a href="https://sqlmodel.tiangolo.com" style='color: #e6b800;'>SQLModel</a></li>
+        <li><a href="https://sfepy.org" style='color: #e6b800;'>SfePy</a></li>
+        <li><a href="https://megengine.org" style='color: #e6b800;'>MegEngine</a></li>
+        <li><a href="https://www.langchain.com/" style='color: #e6b800;'>LangChain</a></li>
+        <li><a href="https://www.llamaindex.ai/" style='color: #e6b800;'>LlamaIndex</a></li>
+        <li><a href="https://dspy-docs.vercel.app/" style='color: #e6b800;'>DSpy</a></li>
     </ul>
+    <p><b>Please note:</b> These models are not designed for instruction purposes.</p>
 </div>
 """
 disclaimer = """⚠️<b>Any use or sharing of this demo constitues your acceptance of the BigCode [OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) License Agreement and the use restrictions included within.</b>\
     with gr.Column():
         gr.Markdown(description)
         with gr.Row():
+            library = gr.Dropdown(
+                        ["SQLModel", "SfePy", "MegEngine", "LangChain", "LlamaIndex", "DSpy"],
+                        value="LangChain",
+                        label="Library",
+                        info="Choose a library from the list",
+                        )
+        with gr.Row():
+            method = gr.Dropdown(
+                        ["Base", "Prefix", "Evo Prefix", "FFT", "Evo FFT", "Full Data FFT"],
+                        value="Prefix",
                         label="Model",
+                        info="Choose an expert from the list",
                         )
         with gr.Row():
             with gr.Column():
     submit.click(
         generate,
+        inputs=[instruction, temperature, max_new_tokens, top_p, repetition_penalty, library, method],
         outputs=[output],
+        concurrency_limit=16
     )
+    share_button.click(None, [], [])
+demo.queue().launch(debug=True)