kotlarmilos commited on
Commit
9cbcd4d
·
verified ·
1 Parent(s): eb5ef66

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -59
app.py CHANGED
@@ -705,6 +705,40 @@ class InferencePipeline:
705
 
706
  return fig
707
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
708
  def get_current_logs():
709
  return log_stream.getvalue()
710
 
@@ -734,83 +768,67 @@ def analyze_pr_streaming(pr_url):
734
 
735
  code_description = pipeline.search_code_snippets(data["diff_hunks"])
736
 
 
 
737
  # Base prompt
738
- base_prompt = f"""You are an expert code reviewer. Analyze the PR below and provide detailed feedback with specific line-by-line suggestions:
739
- Provide a detailed code review including:
740
- 1. **Code Quality Issues**: Point out specific lines with problems
741
- 2. **Suggested Fixes**: Provide exact code suggestions with diff format
742
- 3. **Security Concerns**: Highlight any security vulnerabilities
743
- 4. **Performance Issues**: Identify potential performance problems
744
- 5. **Best Practices**: Suggest improvements following coding standards
745
- 6. **Testing Recommendations**: What tests should be added/modified
746
-
747
- Format your suggestions like this for each issue:
748
- **File: `filename.ext` Line X**
749
- Problem: [description]
750
- ```diff
751
- - old code line
752
- + suggested new code line
753
- ```
754
-
755
- TITLE: {data['title']}
756
-
757
- DESCRIPTION: {data['body']}
758
-
759
- CHANGED FILES: {', '.join(data['changed_files'])}
760
  """
761
 
762
- similar_file_groups_formatted = []
763
- for i, group in enumerate(similar_file_groups):
764
- files_str = ", ".join(group['files'])
765
- similar_file_groups_formatted.append(f"group {i}: {files_str}")
766
 
767
- anomalous_files_formatted = []
768
- for anomaly in anomalous_files:
769
- anomalous_files_formatted.append(f"anomaly: {anomaly['file']} (reason: {anomaly['reason']}, strength: {anomaly['anomaly_strength']})")
770
-
771
- grounding_formatted = ""
772
- for entry in code_description:
773
- file_name = entry['file_name']
774
- overlapping_functions = entry['overlapping_functions']
775
- diff_hunk = entry['diff_hunk']
776
-
777
- if len(overlapping_functions) > 0:
778
- grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
779
- grounding_formatted += f"These changes affected the following functions:\n"
780
- for func in overlapping_functions:
781
- grounding_formatted += f"{func['function_name']} - {func['function_description']}\n"
782
- else:
783
- grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
784
 
785
- grounding_formatted += "\n"
 
 
 
 
 
 
 
 
 
 
 
 
786
 
787
- # Create formatted strings for f-string
788
- similar_groups_text = "\n".join(similar_file_groups_formatted)
789
- anomalous_files_text = "\n".join(anomalous_files_formatted)
790
 
 
 
 
791
 
792
- # TODO: Add local LLM reasoning
793
- # TODO: Add relevant files from the directory not included
794
- comprehensive_prompt = f"""{base_prompt}
795
- FILES THAT ARE SEMANTICALLY CLOSE CHANGED IN THIS PR:
796
- {similar_groups_text}
797
 
798
- UNEXPECTED CHANGES IN FILES:
799
- {anomalous_files_text}
 
 
 
800
 
801
- GROUNDING DATA: The following provides specific information about which functions are affected by each diff hunk:
802
- {grounding_formatted}
803
- """
 
 
 
804
 
805
  base_prompt += f"""
806
  DIFF: {data['diff']}
807
  """
808
 
809
  logger.info(f"Base prompt word count: {len(base_prompt.split())}")
810
- # logger.info(f"Base prompt: {base_prompt}")
811
 
812
  logger.info(f"Comprehensive prompt word count: {len(comprehensive_prompt.split())}")
813
- # logger.info(f"Comprehensive prompt: {comprehensive_prompt}")
814
 
815
  logger.info("Calling Azure OpenAI...")
816
  yield "", "", get_current_logs(), visualization
 
705
 
706
  return fig
707
 
708
+ def build_structured_prompt(self, data: dict, sim_analysis: dict, code_desc: list) -> str:
709
+ # Group clusters
710
+ clusters = sim_analysis['similar_file_groups']
711
+ anomalies = sim_analysis['anomalous_files']
712
+ # Header
713
+ prompt = []
714
+ prompt.append("You are an expert reviewer. First give group summaries, then detailed line-by-line feedback.")
715
+ prompt.append(f"Title: {data['title']}")
716
+ prompt.append(f"Description: {data['body']}")
717
+
718
+ # Clusters
719
+ for c in clusters:
720
+ prompt.append(f"## Group {c['cluster_id']} ({len(c['files'])} files, avg_sim={c['avg_similarity']:.2f}): {', '.join(c['files'])}")
721
+ prompt.append("Files:")
722
+ for f in c['files']:
723
+ prompt.append(f"- {f}")
724
+ prompt.append(f"Summary: Changes in these files share semantic pattern. Focus on shared logic.")
725
+ # Anomalies
726
+ if anomalies:
727
+ prompt.append("## Isolated Files (low similarity with changed files)")
728
+ for a in anomalies:
729
+ prompt.append(f"- {a['file']} (reason: {a['reason']}, strength: {a.get('anomaly_strength')})")
730
+ # Grounding diffs per cluster/files
731
+ prompt.append("## Diff Hunks and Context:")
732
+ for entry in code_desc:
733
+ prompt.append(f"File: {entry['file_name']}\n{entry['diff_hunk']}")
734
+ if entry['overlapping_functions']:
735
+ prompt.append("Affected functions:")
736
+ for f in entry['overlapping_functions']:
737
+ prompt.append(f"- {f['function_name']}: {f['function_description']}")
738
+ # Request
739
+ prompt.append("Provide feedback on groups, then isolated files. After that provide line-by-line feedback in diff format.")
740
+ return "\n".join(prompt)
741
+
742
  def get_current_logs():
743
  return log_stream.getvalue()
744
 
 
768
 
769
  code_description = pipeline.search_code_snippets(data["diff_hunks"])
770
 
771
+ comprehensive_prompt = pipeline.build_structured_prompt(data, similarity_analysis, code_description)
772
+
773
  # Base prompt
774
+ base_prompt = f"""You are an expert reviewer. Provide detailed line-by-line feedback.
775
+ Title: {data['title']}
776
+ Description: {data['body']}
777
+ Diff: {data['diff']}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
778
  """
779
 
780
+ # similar_file_groups_formatted = []
781
+ # for i, group in enumerate(similar_file_groups):
782
+ # files_str = ", ".join(group['files'])
783
+ # similar_file_groups_formatted.append(f"group {i}: {files_str}")
784
 
785
+ # anomalous_files_formatted = []
786
+ # for anomaly in anomalous_files:
787
+ # anomalous_files_formatted.append(f"anomaly: {anomaly['file']} (reason: {anomaly['reason']}, strength: {anomaly['anomaly_strength']})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
788
 
789
+ # grounding_formatted = ""
790
+ # for entry in code_description:
791
+ # file_name = entry['file_name']
792
+ # overlapping_functions = entry['overlapping_functions']
793
+ # diff_hunk = entry['diff_hunk']
794
+
795
+ # if len(overlapping_functions) > 0:
796
+ # grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
797
+ # grounding_formatted += f"These changes affected the following functions:\n"
798
+ # for func in overlapping_functions:
799
+ # grounding_formatted += f"{func['function_name']} - {func['function_description']}\n"
800
+ # else:
801
+ # grounding_formatted += f"In file {file_name}, the following changes were made: {diff_hunk}\n"
802
 
803
+ # grounding_formatted += "\n"
 
 
804
 
805
+ # # Create formatted strings for f-string
806
+ # similar_groups_text = "\n".join(similar_file_groups_formatted)
807
+ # anomalous_files_text = "\n".join(anomalous_files_formatted)
808
 
 
 
 
 
 
809
 
810
+ # # TODO: Add local LLM reasoning
811
+ # # TODO: Add relevant files from the directory not included
812
+ # comprehensive_prompt = f"""{base_prompt}
813
+ # FILES THAT ARE SEMANTICALLY CLOSE CHANGED IN THIS PR:
814
+ # {similar_groups_text}
815
 
816
+ # UNEXPECTED CHANGES IN FILES:
817
+ # {anomalous_files_text}
818
+
819
+ # GROUNDING DATA: The following provides specific information about which functions are affected by each diff hunk:
820
+ # {grounding_formatted}
821
+ # """
822
 
823
  base_prompt += f"""
824
  DIFF: {data['diff']}
825
  """
826
 
827
  logger.info(f"Base prompt word count: {len(base_prompt.split())}")
828
+ logger.info(f"Base prompt: {base_prompt}")
829
 
830
  logger.info(f"Comprehensive prompt word count: {len(comprehensive_prompt.split())}")
831
+ logger.info(f"Comprehensive prompt: {comprehensive_prompt}")
832
 
833
  logger.info("Calling Azure OpenAI...")
834
  yield "", "", get_current_logs(), visualization