ShaderCoder

Runtime error

App Files Files Community

Vipitis commited on Jul 9, 2023

Commit

98d4b40

1 Parent(s): db24268

additional model context

Browse files

Files changed (1) hide show

app.py +31 -15

app.py CHANGED Viewed

@@ -269,10 +269,10 @@ outro_text ="""
  - [] dropdown for model selection (from curated list or all supported models?)
  - [] generation history stating which function and orig/generated returns. (use State ??). do it as comments in the code?
  - [~] display errros/issues to the user (raise gr.Error could be one idea, but highlighting in the code would be awesome) currently adds a comment to the code.
- - [] generate whole shaders (via prompts guidance, recursive from errors)
  - [x] accordion with generation parameters (as pipeline_kwargs?) look up starcoder playround and take "inspiration" from there (implemented for both buttons, untested)
  - [] support FIM task for better model context
- - [x] include some context for prompt (title, comments before a functions) - now takes all comments directly before a function as well as all comments at the beginning inside a function.
  - [] gradio examples
  - [] use GPU if available, respect memory restrictions.
  - [x] stream model generation (maybe in a new window?) - janky solution and only sometimes hangs up
@@ -320,7 +320,7 @@ def grab_sample(sample_idx):
     # funcs = _parse_functions(sample_code)
     # func_identifiers = [f"{idx:2d}: {n.child_by_field_name('declarator').text.decode()}" for idx, n in enumerate(funcs)]
     # print(f"updating drop down to:{func_identifiers}")
-    return sample_pass, sample_code, source_iframe, funcs#, gr.Dropdown.update(choices=func_identifiers) #, sample_title, sample_auhtor
 def _parse_functions(in_code):
@@ -395,6 +395,10 @@ def alter_return(orig_code, func_idx, temperature, max_new_tokens, top_p, repeti
     Args:
         orig_code (str): The original code.
         func_idx (int): The index of the function to replace the return statement of.
         pipeline (Pipeline): The pipeline to use for generation.
     Returns:
         str: The altered code.
@@ -472,22 +476,29 @@ def _get_docstrings(func_node):
     returns the docstring of a function node
     """
     docstring = ""
-    for node in func_node.child_by_field_name("body").children[1:]:
-        if node.type == "comment":
             docstring += node.text.decode() + "\n"
         else:
             return docstring
     return docstring
-def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens, top_p, repetition_penalty, pipeline=PIPE):
     """
     Replaces the body of a function with a generated one.
     Args:
         old_code (str): The original code.
         func_node (Node): The node of the function to replace the body of.
         pipeline (Pipeline): The pipeline to use for generation.
     Returns:
         str: The altered code.
     """
     if isinstance(func_id, str):
         print(f"{func_id=}")
@@ -517,8 +528,12 @@ def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens,
     # second_child = func_node.child_by_field_name("body").children[1] #might error out?
     docstring = _get_docstrings(func_node) #might be empty?
     if docstring:
-        model_context = model_context + "\n{\n" + docstring + "\n"
-    model_context = _grab_before_comments(func_node) + model_context
     print(f"{model_context=}")
     # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
     generation = _run_generation(model_context, pipeline, generation_kwargs)
@@ -568,13 +583,14 @@ with gr.Blocks() as site:
     model_cp = gr.Textbox(value="Vipitis/santacoder-finetuned-Shadertoys-fine", label="Model Checkpoint (Enter to load!)", interactive=True)
     sample_idx = gr.Slider(minimum=0, maximum=num_samples, value=3211, label="pick sample from dataset", step=1.0)
     func_dropdown = gr.Dropdown(value=["0: edit the Code (or load a shader) to update this dropdown"], label="chose a function to modify") #breaks if I add a string in before that? #TODO: use type="index" to get int - always gives None?
     with gr.Accordion("Advanced settings", open=False): # from: https://huggingface.co/spaces/bigcode/bigcode-playground/blob/main/app.py
         with gr.Row():
             column_1, column_2 = gr.Column(), gr.Column()
             with column_1:
                 temperature = gr.Slider(
                     label="Temperature",
-                    value=0.0, #start out at 0 to do greedy? or will there be an error?
                     minimum=0.0,
                     maximum=1.0,
                     step=0.05,
@@ -583,7 +599,7 @@ with gr.Blocks() as site:
                 )
                 max_new_tokens = gr.Slider(
                     label="Max new tokens",
-                    value=160,
                     minimum=0,
                     maximum=2048, #this could be inferred from the model?
                     step=32,
@@ -593,7 +609,7 @@ with gr.Blocks() as site:
             with column_2:
                 top_p = gr.Slider(
                     label="Top-p (nucleus sampling)",
-                    value=0.85,
                     minimum=0.0,
                     maximum=1,
                     step=0.05,
@@ -610,8 +626,8 @@ with gr.Blocks() as site:
                     info="Penalize repeated tokens",
                 )
     with gr.Row():
-        gen_return_button = gr.Button("generate a alternate return statement", label="generate return")
-        gen_func_button = gr.Button("generate an alternate function body", label="generate function")
     with gr.Row():
         with gr.Column():
             source_embed = gr.HTML('<iframe width="640" height="360" frameborder="0" src="" allowfullscreen></iframe>', label="How this shader originally renders")
@@ -627,9 +643,9 @@ with gr.Blocks() as site:
     # history_table = gr.JSON()
     model_cp.submit(fn=_make_pipeline, inputs=[model_cp], outputs=[pipe]) # how can we trigger this on load?
-    sample_idx.release(fn=grab_sample, inputs=[sample_idx], outputs=[sample_pass, sample_code, source_embed])
     gen_return_button.click(fn=alter_return, inputs=[sample_code, func_dropdown, pipe], outputs=[sample_code])
-    gen_func_button.click(fn=alter_body, inputs=[sample_code, func_dropdown, funcs, temperature, max_new_tokens, top_p, repetition_penalty, pipe], outputs=[sample_code, pipe]).then(
         fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]
     )
     sample_code.change(fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]).then(

  - [] dropdown for model selection (from curated list or all supported models?)
  - [] generation history stating which function and orig/generated returns. (use State ??). do it as comments in the code?
  - [~] display errros/issues to the user (raise gr.Error could be one idea, but highlighting in the code would be awesome) currently adds a comment to the code.
+ - [~] generate whole shaders (via prompts guidance, recursive from errors) - prompt context is in progress.
  - [x] accordion with generation parameters (as pipeline_kwargs?) look up starcoder playround and take "inspiration" from there (implemented for both buttons, untested)
  - [] support FIM task for better model context
+ - [x] include some context for prompt (title, comments before a functions) - now takes all comments directly before a function as well as all comments at the beginning inside a function. (misses comments between argument list and body)
  - [] gradio examples
  - [] use GPU if available, respect memory restrictions.
  - [x] stream model generation (maybe in a new window?) - janky solution and only sometimes hangs up
     # funcs = _parse_functions(sample_code)
     # func_identifiers = [f"{idx:2d}: {n.child_by_field_name('declarator').text.decode()}" for idx, n in enumerate(funcs)]
     # print(f"updating drop down to:{func_identifiers}")
+    return sample_pass, sample_code, sample_title, source_iframe, funcs#, gr.Dropdown.update(choices=func_identifiers) #, sample_title, sample_auhtor
 def _parse_functions(in_code):
     Args:
         orig_code (str): The original code.
         func_idx (int): The index of the function to replace the return statement of.
+        temperature (float): The temperature to use for generation.
+        max_new_tokens (int): The maximum number of tokens to generate.
+        top_p (float): The top_p to use for generation.
+        repetition_penalty (float): The repetition_penalty to use for generation.
         pipeline (Pipeline): The pipeline to use for generation.
     Returns:
         str: The altered code.
     returns the docstring of a function node
     """
     docstring = ""
+    for node in func_node.child_by_field_name("body").children:
+        if node.type == "comment" or node.type == "{":
             docstring += node.text.decode() + "\n"
         else:
             return docstring
     return docstring
+def alter_body(old_code, func_id, funcs_list: list, prompt, temperature, max_new_tokens, top_p, repetition_penalty, pipeline=PIPE):
     """
     Replaces the body of a function with a generated one.
     Args:
         old_code (str): The original code.
         func_node (Node): The node of the function to replace the body of.
+        funcs_list (list): The list of all functions in the code.
+        prompt (str): The prompt(title) to use for generation.
+        temperature (float): The temperature to use for generation.
+        max_new_tokens (int): The maximum number of tokens to generate.
+        top_p (float): The top_p to use for generation.
+        repetition_penalty (float): The repetition_penalty to use for generation.
         pipeline (Pipeline): The pipeline to use for generation.
     Returns:
         str: The altered code.
+        pipeline (Pipeline): The pipeline to update the state
     """
     if isinstance(func_id, str):
         print(f"{func_id=}")
     # second_child = func_node.child_by_field_name("body").children[1] #might error out?
     docstring = _get_docstrings(func_node) #might be empty?
     if docstring:
+        model_context = model_context + "\n" + docstring
+    model_context = _grab_before_comments(func_node) + model_context #prepend comments
+    if prompt != "":
+        model_context = f"//avialable functions: {','.join([n.child_by_field_name('declarator').text.decode() for n in funcs_list])}\n" + model_context #prepend available functions
+        model_context = "//Title: " + prompt + "\n" + model_context #prepend user prompt/title
+        model_context = "//Language: Shadertoy GLSL fragment shader\n" + model_context #prepend system prompt, language hint
     print(f"{model_context=}")
     # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
     generation = _run_generation(model_context, pipeline, generation_kwargs)
     model_cp = gr.Textbox(value="Vipitis/santacoder-finetuned-Shadertoys-fine", label="Model Checkpoint (Enter to load!)", interactive=True)
     sample_idx = gr.Slider(minimum=0, maximum=num_samples, value=3211, label="pick sample from dataset", step=1.0)
     func_dropdown = gr.Dropdown(value=["0: edit the Code (or load a shader) to update this dropdown"], label="chose a function to modify") #breaks if I add a string in before that? #TODO: use type="index" to get int - always gives None?
+    prompt_text = gr.Textbox(value="the title used by the model has generation hint", label="prompt text", info="leave blank to skip", interactive=True)
     with gr.Accordion("Advanced settings", open=False): # from: https://huggingface.co/spaces/bigcode/bigcode-playground/blob/main/app.py
         with gr.Row():
             column_1, column_2 = gr.Column(), gr.Column()
             with column_1:
                 temperature = gr.Slider(
                     label="Temperature",
+                    value=0.2, #start out at 0 to do greedy? or will there be an error?
                     minimum=0.0,
                     maximum=1.0,
                     step=0.05,
                 )
                 max_new_tokens = gr.Slider(
                     label="Max new tokens",
+                    value=265,
                     minimum=0,
                     maximum=2048, #this could be inferred from the model?
                     step=32,
             with column_2:
                 top_p = gr.Slider(
                     label="Top-p (nucleus sampling)",
+                    value=0.90,
                     minimum=0.0,
                     maximum=1,
                     step=0.05,
                     info="Penalize repeated tokens",
                 )
     with gr.Row():
+        gen_return_button = gr.Button("generate a alternate return statement", label="generate return", scale=0)
+        gen_func_button = gr.Button("generate an alternate function body", label="generate function", scale=1)
     with gr.Row():
         with gr.Column():
             source_embed = gr.HTML('<iframe width="640" height="360" frameborder="0" src="" allowfullscreen></iframe>', label="How this shader originally renders")
     # history_table = gr.JSON()
     model_cp.submit(fn=_make_pipeline, inputs=[model_cp], outputs=[pipe]) # how can we trigger this on load?
+    sample_idx.release(fn=grab_sample, inputs=[sample_idx], outputs=[sample_pass, sample_code, prompt_text, source_embed]) #funcs here?
     gen_return_button.click(fn=alter_return, inputs=[sample_code, func_dropdown, pipe], outputs=[sample_code])
+    gen_func_button.click(fn=alter_body, inputs=[sample_code, func_dropdown, funcs, prompt_text, temperature, max_new_tokens, top_p, repetition_penalty, pipe], outputs=[sample_code, pipe]).then(
         fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]
     )
     sample_code.change(fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]).then(