ShaderCoder

Runtime error

App Files Files Community

Vipitis commited on Jul 12, 2023

Commit

012c551

1 Parent(s): 791c9fd

refactor generation utils

Browse files

Files changed (3) hide show

app.py +9 -57
utils/__init__.py +5 -3
utils/generation.py +58 -0

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import gradio as gr
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import datasets
 import numpy as np
 import torch
-from threading import Thread
 from utils.tree_utils import parse_functions, get_docstrings, grab_before_comments, line_chr2char, node_str_idx, replace_function
 from utils.html_utils import make_iframe, construct_embed
 PIPE = None
 intro_text = """
@@ -99,35 +99,6 @@ def _make_pipeline(model_cp = "Vipitis/santacoder-finetuned-Shadertoys-fine"): #
     print(f"loaded model {model_cp} as a pipline")
     return pipe
-def _run_generation(model_ctx:str, pipe, gen_kwargs:dict):
-    """
-    Text generation function
-    Args:
-        model_ctx (str): The context to start generation from.
-        pipe (Pipeline): The pipeline to use for generation.
-        gen_kwargs (dict): The generation kwargs.
-    Returns:
-        str: The generated text. (it iterates over time)
-    """
-    # Tokenize the model_context
-    model_inputs = pipe.tokenizer(model_ctx, return_tensors="pt")
-    # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
-    # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
-    streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15.0)
-    generate_kwargs = dict(model_inputs, streamer=streamer, **gen_kwargs)
-    t = Thread(target=pipe.model.generate, kwargs=generate_kwargs)
-    t.start()
-    # Pull the generated text from the streamer, and update the model output.
-    model_output = ""
-    for new_text in streamer:
-        # print("step", end="")
-        model_output += new_text
-        yield model_output
-    streamer.on_finalized_text("stream reached the end.")
-    return model_output #is this ever reached?
 def process_retn(retn):
     return retn.split(";")[0].strip()
@@ -167,7 +138,7 @@ def alter_return(orig_code, func_idx, temperature, max_new_tokens, top_p, repeti
     else:
         raise gr.Error(f"func_idx must be int or str, not {type(func_idx)}")
-    generation_kwargs = _combine_generation_kwargs(temperature, max_new_tokens, top_p, repetition_penalty)
     retrns = []
     retrn_start_idx = orig_code.find("return")
@@ -189,14 +160,6 @@ def alter_return(orig_code, func_idx, temperature, max_new_tokens, top_p, repeti
     return altered_code
-def _combine_generation_kwargs(temperature, max_new_tokens, top_p, repetition_penalty):
-    gen_kwargs = {}
-    gen_kwargs["temperature"] = temperature
-    gen_kwargs["max_new_tokens"] = max_new_tokens
-    gen_kwargs["top_p"] = top_p
-    gen_kwargs["repetition_penalty"] = repetition_penalty
-    return gen_kwargs
 def alter_body(old_code, func_id, funcs_list: list, prompt="", temperature=0.2, max_new_tokens=512, top_p=.95, repetition_penalty=1.2, pipeline=PIPE):
     """
     Replaces the body of a function with a generated one.
@@ -223,27 +186,16 @@ def alter_body(old_code, func_id, funcs_list: list, prompt="", temperature=0.2,
     func_node = funcs_list[func_id]
     print(f"using for generation: {func_node=}")
-    generation_kwargs = _combine_generation_kwargs(temperature, max_new_tokens, top_p, repetition_penalty)
-    func_start_idx = line_chr2char(old_code, func_node.start_point[0], func_node.start_point[1])
-    identifier_str = func_node.child_by_field_name("type").text.decode() + " " + func_node.child_by_field_name("declarator").text.decode() #func_start_idx:body_start_idx?
     body_node = func_node.child_by_field_name("body")
     body_start_idx, body_end_idx = node_str_idx(body_node)
-    model_context = identifier_str # base case
-    docstring = get_docstrings(func_node) #might be empty?
-    if docstring:
-        model_context = model_context + "\n" + docstring
-    model_context = grab_before_comments(func_node) + model_context #prepend comments
-    if prompt != "":
-        model_context = f"//avialable functions: {','.join([n.child_by_field_name('declarator').text.decode() for n in funcs_list])}\n" + model_context #prepend available functions
-        model_context = "//Title: " + prompt + "\n" + model_context #prepend user prompt/title
-        model_context = "//Language: Shadertoy GLSL fragment shader\n" + model_context #prepend system prompt, language hint
-    print(f"{model_context=}")
     # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
-    generation = _run_generation(model_context, pipeline, generation_kwargs)
     for i in generation:
-        print(f"{i=}")
         yield model_context + i #fix in between, do all the stuff in the end?
     generation = i[:] #seems to work
     print(f"{generation=}")
@@ -253,7 +205,7 @@ def alter_body(old_code, func_id, funcs_list: list, prompt="", temperature=0.2,
         first_gened_func = parse_functions(ctx_with_generation)[0] # truncate generation to a single function?
     except IndexError:
         print("generation wasn't a full function.")
-        altered_code = old_code[:func_start_idx] + model_context + generation + "//the generation didn't complete the function!\n" + old_code[body_end_idx:] #needs a newline to break out of the comment.
         return altered_code
     altered_code = replace_function(func_node, first_gened_func)
     yield altered_code #yield once so it updates? -> works... gg but doesn't seem to do it for the dropdown

 import gradio as gr
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import datasets
 import numpy as np
 import torch
 from utils.tree_utils import parse_functions, get_docstrings, grab_before_comments, line_chr2char, node_str_idx, replace_function
 from utils.html_utils import make_iframe, construct_embed
+from utils.generation import combine_generation_kwargs, stream_generation, construct_model_context
 PIPE = None
 intro_text = """
     print(f"loaded model {model_cp} as a pipline")
     return pipe
 def process_retn(retn):
     return retn.split(";")[0].strip()
     else:
         raise gr.Error(f"func_idx must be int or str, not {type(func_idx)}")
+    generation_kwargs = combine_generation_kwargs(temperature, max_new_tokens, top_p, repetition_penalty)
     retrns = []
     retrn_start_idx = orig_code.find("return")
     return altered_code
 def alter_body(old_code, func_id, funcs_list: list, prompt="", temperature=0.2, max_new_tokens=512, top_p=.95, repetition_penalty=1.2, pipeline=PIPE):
     """
     Replaces the body of a function with a generated one.
     func_node = funcs_list[func_id]
     print(f"using for generation: {func_node=}")
+    generation_kwargs = combine_generation_kwargs(temperature, max_new_tokens, top_p, repetition_penalty)
+    model_context = construct_model_context(func_node, prompt=prompt)
+    print(f"{model_context=}")
     body_node = func_node.child_by_field_name("body")
     body_start_idx, body_end_idx = node_str_idx(body_node)
     # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
+    generation = stream_generation(model_context, pipeline, generation_kwargs)
     for i in generation:
+        # print(f"{i=}")
         yield model_context + i #fix in between, do all the stuff in the end?
     generation = i[:] #seems to work
     print(f"{generation=}")
         first_gened_func = parse_functions(ctx_with_generation)[0] # truncate generation to a single function?
     except IndexError:
         print("generation wasn't a full function.")
+        altered_code = old_code[:body_start_idx] + generation + "//the generation didn't complete the function!\n" + old_code[body_end_idx:] #needs a newline to break out of the comment.
         return altered_code
     altered_code = replace_function(func_node, first_gened_func)
     yield altered_code #yield once so it updates? -> works... gg but doesn't seem to do it for the dropdown

utils/__init__.py CHANGED Viewed

@@ -1,7 +1,9 @@
-from .tree_utils import (parse_functions, get_docstrings, grab_before_comments, line_chr2char)
 from .html_utils import (make_iframe, make_script, construct_embed)
-tree_funcs = ["parse_functions", "get_docstrings", "grab_before_comments", "line_chr2char"]
 html_funcs = ["make_iframe", "make_script", "construct_embed"]
-__all__ = tree_funcs + html_funcs

+from .tree_utils import (parse_functions, get_docstrings, grab_before_comments, line_chr2char, replace_function, get_root, node_str_idx, give_tree)
 from .html_utils import (make_iframe, make_script, construct_embed)
+from .generation import (combine_generation_kwargs, stream_generation, construct_model_context)
+tree_funcs = ["parse_functions", "get_docstrings", "grab_before_comments", "line_chr2char", "replace_function", "get_root", "node_str_idx", "give_tree"]
 html_funcs = ["make_iframe", "make_script", "construct_embed"]
+gen_funcs = ["combine_generation_kwargs", "stream_generation", "construct_model_context"]
+__all__ = tree_funcs + html_funcs + gen_funcs

utils/generation.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from transformers import TextIteratorStreamer
+from threading import Thread
+from utils.tree_utils import get_docstrings, grab_before_comments
+def combine_generation_kwargs(temperature, max_new_tokens, top_p, repetition_penalty):
+    """
+    Combines the generation kwargs into a single dict.
+    """
+    gen_kwargs = {}
+    gen_kwargs["temperature"] = temperature
+    gen_kwargs["max_new_tokens"] = max_new_tokens
+    gen_kwargs["top_p"] = top_p
+    gen_kwargs["repetition_penalty"] = repetition_penalty
+    return gen_kwargs
+def stream_generation(prompt:str, pipe, gen_kwargs:dict):
+    """
+    Text generation function
+    Args:
+        prompt (str): The context to start generation from.
+        pipe (Pipeline): The pipeline to use for generation.
+        gen_kwargs (dict): The generation kwargs.
+    Returns:
+        str: The generated text. (it iterates over time)
+    """
+    # Tokenize the model_context
+    model_inputs = pipe.tokenizer(prompt, return_tensors="pt")
+    # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
+    # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
+    streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15.0)
+    generate_kwargs = dict(model_inputs, streamer=streamer, **gen_kwargs)
+    t = Thread(target=pipe.model.generate, kwargs=generate_kwargs)
+    t.start()
+    # Pull the generated text from the streamer, and update the model output.
+    model_output = ""
+    for new_text in streamer:
+        # print("step", end="")
+        model_output += new_text
+        yield model_output
+    streamer.on_finalized_text("stream reached the end.")
+    return model_output #is this ever reached?
+def construct_model_context(func_node, prompt="") -> str:
+    """
+    Constructs the model context from a function node.
+    """
+    model_context = func_node.child_by_field_name("type").text.decode() + " " + func_node.child_by_field_name("declarator").text.decode() #func_start_idx:body_start_idx?
+    docstring = get_docstrings(func_node) #might be empty?
+    if docstring:
+        model_context = model_context + "\n" + docstring
+    model_context = grab_before_comments(func_node) + model_context #prepend comments
+    if prompt != "":
+        model_context = "//Title: " + prompt + "\n" + model_context #prepend user prompt/title
+        model_context = "//Language: Shadertoy GLSL fragment shader\n" + model_context #prepend system prompt, language hint
+    return model_context