Spaces:

kotlarmilos
/

repository-learning

Sleeping

App Files Files Community

kotlarmilos commited on Jul 29

Commit

9cbcd4d

verified ·

1 Parent(s): eb5ef66

Upload app.py

Browse files

Files changed (1) hide show

app.py +77 -59

app.py CHANGED Viewed

@@ -705,6 +705,40 @@ class InferencePipeline:
         return fig
 def get_current_logs():
     return log_stream.getvalue()
@@ -734,83 +768,67 @@ def analyze_pr_streaming(pr_url):
     code_description = pipeline.search_code_snippets(data["diff_hunks"])
     # Base prompt
-    base_prompt = f"""You are an expert code reviewer. Analyze the PR below and provide detailed feedback with specific line-by-line suggestions:
-        Provide a detailed code review including:
-        1. **Code Quality Issues**: Point out specific lines with problems
-        2. **Suggested Fixes**: Provide exact code suggestions with diff format
-        3. **Security Concerns**: Highlight any security vulnerabilities
-        4. **Performance Issues**: Identify potential performance problems
-        5. **Best Practices**: Suggest improvements following coding standards
-        6. **Testing Recommendations**: What tests should be added/modified
-        Format your suggestions like this for each issue:
-        **File: `filename.ext` Line X**
-        Problem: [description]
-        ```diff
-        - old code line
-        + suggested new code line
-        ```
-        TITLE: {data['title']}
-        DESCRIPTION: {data['body']}
-        CHANGED FILES: {', '.join(data['changed_files'])}
         """
-    similar_file_groups_formatted = []
-    for i, group in enumerate(similar_file_groups):
-        files_str = ", ".join(group['files'])
-        similar_file_groups_formatted.append(f"group {i}: {files_str}")
-    anomalous_files_formatted = []
-    for anomaly in anomalous_files:
-        anomalous_files_formatted.append(f"anomaly: {anomaly['file']} (reason: {anomaly['reason']}, strength: {anomaly['anomaly_strength']})")
-    grounding_formatted = ""
-    for entry in code_description:
-        file_name = entry['file_name']
-        overlapping_functions = entry['overlapping_functions']
-        diff_hunk = entry['diff_hunk']
-        if len(overlapping_functions) > 0:
-            grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
-            grounding_formatted += f"These changes affected the following functions:\n"
-            for func in overlapping_functions:
-                grounding_formatted += f"{func['function_name']} - {func['function_description']}\n"
-        else:
-            grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
-        grounding_formatted += "\n"
-    # Create formatted strings for f-string
-    similar_groups_text = "\n".join(similar_file_groups_formatted)
-    anomalous_files_text = "\n".join(anomalous_files_formatted)
-    # TODO: Add local LLM reasoning
-    # TODO: Add relevant files from the directory not included
-    comprehensive_prompt = f"""{base_prompt}
-        FILES THAT ARE SEMANTICALLY CLOSE CHANGED IN THIS PR:
-        {similar_groups_text}
-        UNEXPECTED CHANGES IN FILES:
-        {anomalous_files_text}
-        GROUNDING DATA: The following provides specific information about which functions are affected by each diff hunk:
-        {grounding_formatted}
-        """
     base_prompt += f"""
         DIFF: {data['diff']}
     """
     logger.info(f"Base prompt word count: {len(base_prompt.split())}")
-    # logger.info(f"Base prompt: {base_prompt}")
     logger.info(f"Comprehensive prompt word count: {len(comprehensive_prompt.split())}")
-    # logger.info(f"Comprehensive prompt: {comprehensive_prompt}")
     logger.info("Calling Azure OpenAI...")
     yield "", "", get_current_logs(), visualization

         return fig
+    def build_structured_prompt(self, data: dict, sim_analysis: dict, code_desc: list) -> str:
+        # Group clusters
+        clusters = sim_analysis['similar_file_groups']
+        anomalies = sim_analysis['anomalous_files']
+        # Header
+        prompt = []
+        prompt.append("You are an expert reviewer. First give group summaries, then detailed line-by-line feedback.")
+        prompt.append(f"Title: {data['title']}")
+        prompt.append(f"Description: {data['body']}")
+        # Clusters
+        for c in clusters:
+            prompt.append(f"## Group {c['cluster_id']} ({len(c['files'])} files, avg_sim={c['avg_similarity']:.2f}): {', '.join(c['files'])}")
+            prompt.append("Files:")
+            for f in c['files']:
+                prompt.append(f"- {f}")
+            prompt.append(f"Summary: Changes in these files share semantic pattern. Focus on shared logic.")
+        # Anomalies
+        if anomalies:
+            prompt.append("## Isolated Files (low similarity with changed files)")
+            for a in anomalies:
+                prompt.append(f"- {a['file']} (reason: {a['reason']}, strength: {a.get('anomaly_strength')})")
+        # Grounding diffs per cluster/files
+        prompt.append("## Diff Hunks and Context:")
+        for entry in code_desc:
+            prompt.append(f"File: {entry['file_name']}\n{entry['diff_hunk']}")
+            if entry['overlapping_functions']:
+                prompt.append("Affected functions:")
+                for f in entry['overlapping_functions']:
+                    prompt.append(f"- {f['function_name']}: {f['function_description']}")
+        # Request
+        prompt.append("Provide feedback on groups, then isolated files. After that provide line-by-line feedback in diff format.")
+        return "\n".join(prompt)
 def get_current_logs():
     return log_stream.getvalue()
     code_description = pipeline.search_code_snippets(data["diff_hunks"])
+    comprehensive_prompt = pipeline.build_structured_prompt(data, similarity_analysis, code_description)
     # Base prompt
+    base_prompt = f"""You are an expert reviewer. Provide detailed line-by-line feedback.
+        Title: {data['title']}
+        Description: {data['body']}
+        Diff: {data['diff']}
         """
+    # similar_file_groups_formatted = []
+    # for i, group in enumerate(similar_file_groups):
+    #     files_str = ", ".join(group['files'])
+    #     similar_file_groups_formatted.append(f"group {i}: {files_str}")
+    # anomalous_files_formatted = []
+    # for anomaly in anomalous_files:
+    #     anomalous_files_formatted.append(f"anomaly: {anomaly['file']} (reason: {anomaly['reason']}, strength: {anomaly['anomaly_strength']})")
+    # grounding_formatted = ""
+    # for entry in code_description:
+    #     file_name = entry['file_name']
+    #     overlapping_functions = entry['overlapping_functions']
+    #     diff_hunk = entry['diff_hunk']
+    #     if len(overlapping_functions) > 0:
+    #         grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
+    #         grounding_formatted += f"These changes affected the following functions:\n"
+    #         for func in overlapping_functions:
+    #             grounding_formatted += f"{func['function_name']} - {func['function_description']}\n"
+    #     else:
+    #         grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
+    #     grounding_formatted += "\n"
+    # # Create formatted strings for f-string
+    # similar_groups_text = "\n".join(similar_file_groups_formatted)
+    # anomalous_files_text = "\n".join(anomalous_files_formatted)
+    # # TODO: Add local LLM reasoning
+    # # TODO: Add relevant files from the directory not included
+    # comprehensive_prompt = f"""{base_prompt}
+    #     FILES THAT ARE SEMANTICALLY CLOSE CHANGED IN THIS PR:
+    #     {similar_groups_text}
+    #     UNEXPECTED CHANGES IN FILES:
+    #     {anomalous_files_text}
+    #     GROUNDING DATA: The following provides specific information about which functions are affected by each diff hunk:
+    #     {grounding_formatted}
+    #     """
     base_prompt += f"""
         DIFF: {data['diff']}
     """
     logger.info(f"Base prompt word count: {len(base_prompt.split())}")
+    logger.info(f"Base prompt: {base_prompt}")
     logger.info(f"Comprehensive prompt word count: {len(comprehensive_prompt.split())}")
+    logger.info(f"Comprehensive prompt: {comprehensive_prompt}")
     logger.info("Calling Azure OpenAI...")
     yield "", "", get_current_logs(), visualization