Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -705,6 +705,40 @@ class InferencePipeline:
|
|
| 705 |
|
| 706 |
return fig
|
| 707 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
def get_current_logs():
|
| 709 |
return log_stream.getvalue()
|
| 710 |
|
|
@@ -734,83 +768,67 @@ def analyze_pr_streaming(pr_url):
|
|
| 734 |
|
| 735 |
code_description = pipeline.search_code_snippets(data["diff_hunks"])
|
| 736 |
|
|
|
|
|
|
|
| 737 |
# Base prompt
|
| 738 |
-
base_prompt = f"""You are an expert
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
3. **Security Concerns**: Highlight any security vulnerabilities
|
| 743 |
-
4. **Performance Issues**: Identify potential performance problems
|
| 744 |
-
5. **Best Practices**: Suggest improvements following coding standards
|
| 745 |
-
6. **Testing Recommendations**: What tests should be added/modified
|
| 746 |
-
|
| 747 |
-
Format your suggestions like this for each issue:
|
| 748 |
-
**File: `filename.ext` Line X**
|
| 749 |
-
Problem: [description]
|
| 750 |
-
```diff
|
| 751 |
-
- old code line
|
| 752 |
-
+ suggested new code line
|
| 753 |
-
```
|
| 754 |
-
|
| 755 |
-
TITLE: {data['title']}
|
| 756 |
-
|
| 757 |
-
DESCRIPTION: {data['body']}
|
| 758 |
-
|
| 759 |
-
CHANGED FILES: {', '.join(data['changed_files'])}
|
| 760 |
"""
|
| 761 |
|
| 762 |
-
similar_file_groups_formatted = []
|
| 763 |
-
for i, group in enumerate(similar_file_groups):
|
| 764 |
-
|
| 765 |
-
|
| 766 |
|
| 767 |
-
anomalous_files_formatted = []
|
| 768 |
-
for anomaly in anomalous_files:
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
grounding_formatted = ""
|
| 772 |
-
for entry in code_description:
|
| 773 |
-
file_name = entry['file_name']
|
| 774 |
-
overlapping_functions = entry['overlapping_functions']
|
| 775 |
-
diff_hunk = entry['diff_hunk']
|
| 776 |
-
|
| 777 |
-
if len(overlapping_functions) > 0:
|
| 778 |
-
grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
|
| 779 |
-
grounding_formatted += f"These changes affected the following functions:\n"
|
| 780 |
-
for func in overlapping_functions:
|
| 781 |
-
grounding_formatted += f"{func['function_name']} - {func['function_description']}\n"
|
| 782 |
-
else:
|
| 783 |
-
grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
|
| 784 |
|
| 785 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 786 |
|
| 787 |
-
#
|
| 788 |
-
similar_groups_text = "\n".join(similar_file_groups_formatted)
|
| 789 |
-
anomalous_files_text = "\n".join(anomalous_files_formatted)
|
| 790 |
|
|
|
|
|
|
|
|
|
|
| 791 |
|
| 792 |
-
# TODO: Add local LLM reasoning
|
| 793 |
-
# TODO: Add relevant files from the directory not included
|
| 794 |
-
comprehensive_prompt = f"""{base_prompt}
|
| 795 |
-
FILES THAT ARE SEMANTICALLY CLOSE CHANGED IN THIS PR:
|
| 796 |
-
{similar_groups_text}
|
| 797 |
|
| 798 |
-
|
| 799 |
-
|
|
|
|
|
|
|
|
|
|
| 800 |
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
|
|
|
|
|
|
|
|
|
| 804 |
|
| 805 |
base_prompt += f"""
|
| 806 |
DIFF: {data['diff']}
|
| 807 |
"""
|
| 808 |
|
| 809 |
logger.info(f"Base prompt word count: {len(base_prompt.split())}")
|
| 810 |
-
|
| 811 |
|
| 812 |
logger.info(f"Comprehensive prompt word count: {len(comprehensive_prompt.split())}")
|
| 813 |
-
|
| 814 |
|
| 815 |
logger.info("Calling Azure OpenAI...")
|
| 816 |
yield "", "", get_current_logs(), visualization
|
|
|
|
| 705 |
|
| 706 |
return fig
|
| 707 |
|
| 708 |
+
def build_structured_prompt(self, data: dict, sim_analysis: dict, code_desc: list) -> str:
|
| 709 |
+
# Group clusters
|
| 710 |
+
clusters = sim_analysis['similar_file_groups']
|
| 711 |
+
anomalies = sim_analysis['anomalous_files']
|
| 712 |
+
# Header
|
| 713 |
+
prompt = []
|
| 714 |
+
prompt.append("You are an expert reviewer. First give group summaries, then detailed line-by-line feedback.")
|
| 715 |
+
prompt.append(f"Title: {data['title']}")
|
| 716 |
+
prompt.append(f"Description: {data['body']}")
|
| 717 |
+
|
| 718 |
+
# Clusters
|
| 719 |
+
for c in clusters:
|
| 720 |
+
prompt.append(f"## Group {c['cluster_id']} ({len(c['files'])} files, avg_sim={c['avg_similarity']:.2f}): {', '.join(c['files'])}")
|
| 721 |
+
prompt.append("Files:")
|
| 722 |
+
for f in c['files']:
|
| 723 |
+
prompt.append(f"- {f}")
|
| 724 |
+
prompt.append(f"Summary: Changes in these files share semantic pattern. Focus on shared logic.")
|
| 725 |
+
# Anomalies
|
| 726 |
+
if anomalies:
|
| 727 |
+
prompt.append("## Isolated Files (low similarity with changed files)")
|
| 728 |
+
for a in anomalies:
|
| 729 |
+
prompt.append(f"- {a['file']} (reason: {a['reason']}, strength: {a.get('anomaly_strength')})")
|
| 730 |
+
# Grounding diffs per cluster/files
|
| 731 |
+
prompt.append("## Diff Hunks and Context:")
|
| 732 |
+
for entry in code_desc:
|
| 733 |
+
prompt.append(f"File: {entry['file_name']}\n{entry['diff_hunk']}")
|
| 734 |
+
if entry['overlapping_functions']:
|
| 735 |
+
prompt.append("Affected functions:")
|
| 736 |
+
for f in entry['overlapping_functions']:
|
| 737 |
+
prompt.append(f"- {f['function_name']}: {f['function_description']}")
|
| 738 |
+
# Request
|
| 739 |
+
prompt.append("Provide feedback on groups, then isolated files. After that provide line-by-line feedback in diff format.")
|
| 740 |
+
return "\n".join(prompt)
|
| 741 |
+
|
| 742 |
def get_current_logs():
|
| 743 |
return log_stream.getvalue()
|
| 744 |
|
|
|
|
| 768 |
|
| 769 |
code_description = pipeline.search_code_snippets(data["diff_hunks"])
|
| 770 |
|
| 771 |
+
comprehensive_prompt = pipeline.build_structured_prompt(data, similarity_analysis, code_description)
|
| 772 |
+
|
| 773 |
# Base prompt
|
| 774 |
+
base_prompt = f"""You are an expert reviewer. Provide detailed line-by-line feedback.
|
| 775 |
+
Title: {data['title']}
|
| 776 |
+
Description: {data['body']}
|
| 777 |
+
Diff: {data['diff']}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 778 |
"""
|
| 779 |
|
| 780 |
+
# similar_file_groups_formatted = []
|
| 781 |
+
# for i, group in enumerate(similar_file_groups):
|
| 782 |
+
# files_str = ", ".join(group['files'])
|
| 783 |
+
# similar_file_groups_formatted.append(f"group {i}: {files_str}")
|
| 784 |
|
| 785 |
+
# anomalous_files_formatted = []
|
| 786 |
+
# for anomaly in anomalous_files:
|
| 787 |
+
# anomalous_files_formatted.append(f"anomaly: {anomaly['file']} (reason: {anomaly['reason']}, strength: {anomaly['anomaly_strength']})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 788 |
|
| 789 |
+
# grounding_formatted = ""
|
| 790 |
+
# for entry in code_description:
|
| 791 |
+
# file_name = entry['file_name']
|
| 792 |
+
# overlapping_functions = entry['overlapping_functions']
|
| 793 |
+
# diff_hunk = entry['diff_hunk']
|
| 794 |
+
|
| 795 |
+
# if len(overlapping_functions) > 0:
|
| 796 |
+
# grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
|
| 797 |
+
# grounding_formatted += f"These changes affected the following functions:\n"
|
| 798 |
+
# for func in overlapping_functions:
|
| 799 |
+
# grounding_formatted += f"{func['function_name']} - {func['function_description']}\n"
|
| 800 |
+
# else:
|
| 801 |
+
# grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
|
| 802 |
|
| 803 |
+
# grounding_formatted += "\n"
|
|
|
|
|
|
|
| 804 |
|
| 805 |
+
# # Create formatted strings for f-string
|
| 806 |
+
# similar_groups_text = "\n".join(similar_file_groups_formatted)
|
| 807 |
+
# anomalous_files_text = "\n".join(anomalous_files_formatted)
|
| 808 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
|
| 810 |
+
# # TODO: Add local LLM reasoning
|
| 811 |
+
# # TODO: Add relevant files from the directory not included
|
| 812 |
+
# comprehensive_prompt = f"""{base_prompt}
|
| 813 |
+
# FILES THAT ARE SEMANTICALLY CLOSE CHANGED IN THIS PR:
|
| 814 |
+
# {similar_groups_text}
|
| 815 |
|
| 816 |
+
# UNEXPECTED CHANGES IN FILES:
|
| 817 |
+
# {anomalous_files_text}
|
| 818 |
+
|
| 819 |
+
# GROUNDING DATA: The following provides specific information about which functions are affected by each diff hunk:
|
| 820 |
+
# {grounding_formatted}
|
| 821 |
+
# """
|
| 822 |
|
| 823 |
base_prompt += f"""
|
| 824 |
DIFF: {data['diff']}
|
| 825 |
"""
|
| 826 |
|
| 827 |
logger.info(f"Base prompt word count: {len(base_prompt.split())}")
|
| 828 |
+
logger.info(f"Base prompt: {base_prompt}")
|
| 829 |
|
| 830 |
logger.info(f"Comprehensive prompt word count: {len(comprehensive_prompt.split())}")
|
| 831 |
+
logger.info(f"Comprehensive prompt: {comprehensive_prompt}")
|
| 832 |
|
| 833 |
logger.info("Calling Azure OpenAI...")
|
| 834 |
yield "", "", get_current_logs(), visualization
|