Spaces:
Sleeping
Sleeping
Za-heer
commited on
Commit
·
ae6148e
1
Parent(s):
c766e3c
Complete AI & Data Science assignment Checker
Browse files- .gitignore +47 -0
- .pylintrc +13 -16
- README.md +53 -1
- app.py +137 -72
- models/__init__.py +0 -0
- models/codebert.py +65 -0
- models/issues.py +71 -0
- requirements.txt +2 -2
- scripts/__init__.py +0 -0
- scripts/logger.py +20 -0
- static/style.css +0 -84
- templates/upload.html +0 -26
- testpylint.py +0 -14
- uploads/assignment_01.ipynb +0 -71
- uploads/assignment_01.py +0 -12
- uploads/assignment_02.py +0 -17
- uploads/assignment_03.py +0 -27
- uploads/assignment_04.ipynb +0 -52
- uploads/assignment_04.py +0 -16
- uploads/assignment_05.ipynb +0 -49
- uploads/assignment_06.py +0 -19
- uploads/assignment_07.py +0 -15
- uploads/assignment_09.py +0 -15
- uploads/test.py +0 -6
- utils/__init__.py +0 -0
- utils/code_utils.py +65 -0
.gitignore
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-specific ignores
|
| 2 |
+
*.pyc
|
| 3 |
+
__pycache__/
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
venv/
|
| 9 |
+
.venv/
|
| 10 |
+
ENV/
|
| 11 |
+
*.egg-info/
|
| 12 |
+
dist/
|
| 13 |
+
build/
|
| 14 |
+
*.whl
|
| 15 |
+
|
| 16 |
+
# Streamlit-specific ignores
|
| 17 |
+
.streamlit/
|
| 18 |
+
streamlit-geh.log
|
| 19 |
+
|
| 20 |
+
# Jupyter Notebook ignores
|
| 21 |
+
*.ipynb_checkpoints/
|
| 22 |
+
*.ipynb_checkpoints
|
| 23 |
+
|
| 24 |
+
# Pylint-related ignores
|
| 25 |
+
.pylintrc
|
| 26 |
+
# Temporary files and uploads
|
| 27 |
+
static/uploads/*
|
| 28 |
+
*.utf8
|
| 29 |
+
*.tmp
|
| 30 |
+
|
| 31 |
+
# Logs
|
| 32 |
+
*.log
|
| 33 |
+
logs/
|
| 34 |
+
|
| 35 |
+
# Environment and IDE files
|
| 36 |
+
.env
|
| 37 |
+
.vscode/
|
| 38 |
+
.idea/
|
| 39 |
+
*.swp
|
| 40 |
+
*.swo
|
| 41 |
+
|
| 42 |
+
# OS-specific files
|
| 43 |
+
.DS_Store
|
| 44 |
+
Thumbs.db
|
| 45 |
+
|
| 46 |
+
# Dependency directories
|
| 47 |
+
node_modules/
|
.pylintrc
CHANGED
|
@@ -1,20 +1,17 @@
|
|
| 1 |
[MASTER]
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
C0114, # Missing module docstring
|
| 5 |
-
C0115, # Missing class docstring
|
| 6 |
-
C0116, # Missing function docstring
|
| 7 |
-
R0903, # Too few public methods
|
| 8 |
-
W0311, # Bad indentation
|
| 9 |
-
W0703, # Broad-except
|
| 10 |
-
C0303, # Trailing whitespace
|
| 11 |
-
C0301, # Line too long
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 1 |
[MASTER]
|
| 2 |
+
ignore=venv
|
| 3 |
+
jobs=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
[MESSAGES CONTROL]
|
| 6 |
+
disable=all
|
| 7 |
+
enable=E
|
| 8 |
+
|
| 9 |
+
[REPORTS]
|
| 10 |
+
output-format=colorized
|
| 11 |
+
reports=no
|
| 12 |
|
| 13 |
+
[LOGGING]
|
| 14 |
+
logging-modules=logging
|
| 15 |
|
| 16 |
+
[FORMAT]
|
| 17 |
+
max-line-length=120
|
README.md
CHANGED
|
@@ -1 +1,53 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Code Analyzer
|
| 2 |
+
A Streamlit-based application to analyze Python (.py) and Jupyter Notebook (.ipynb) files using CodeBERT and Pylint.
|
| 3 |
+
Project Structure
|
| 4 |
+
|
| 5 |
+
app.py: Main Streamlit application.
|
| 6 |
+
models/codebert.py: CodeBERT model loading and analysis logic.
|
| 7 |
+
utils/code_utils.py: Pylint and Jupyter Notebook extraction utilities.
|
| 8 |
+
static/uploads/: Folder for uploaded files.
|
| 9 |
+
requirements.txt: Project dependencies.
|
| 10 |
+
|
| 11 |
+
Setup Instructions
|
| 12 |
+
|
| 13 |
+
Clone or Set Up the Project:
|
| 14 |
+
|
| 15 |
+
Create a project folder named code_analyzer.
|
| 16 |
+
Place the files in the structure described above.
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
Install Dependencies:
|
| 20 |
+
|
| 21 |
+
Ensure Python 3.8+ is installed.
|
| 22 |
+
Run:pip install -r requirements.txt
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
Run the Application:
|
| 28 |
+
|
| 29 |
+
Navigate to the code_analyzer directory.
|
| 30 |
+
Run:streamlit run app.py
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
Open the provided URL (usually http://localhost:8501) in your browser.
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
Usage:
|
| 37 |
+
|
| 38 |
+
Upload a .py or .ipynb file via the Streamlit UI.
|
| 39 |
+
View the analysis report with CodeBERT and Pylint feedback.
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
Notes
|
| 44 |
+
|
| 45 |
+
Ensure the static/uploads folder is created automatically or manually before running.
|
| 46 |
+
If using a GPU, CodeBERT will utilize CUDA if available.
|
| 47 |
+
Check logs in the terminal for debugging information.
|
| 48 |
+
|
| 49 |
+
Troubleshooting
|
| 50 |
+
|
| 51 |
+
Module Not Found: Verify all dependencies are installed (pip install -r requirements.txt).
|
| 52 |
+
File Upload Issues: Ensure files are valid .py or .ipynb.
|
| 53 |
+
CodeBERT Errors: Check internet connection for model downloading or GPU compatibility.
|
app.py
CHANGED
|
@@ -1,80 +1,145 @@
|
|
| 1 |
-
|
| 2 |
import os
|
| 3 |
-
|
| 4 |
-
import
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
-
from
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
-
UPLOAD_FOLDER = 'uploads'
|
| 11 |
-
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
| 12 |
-
app.static_folder = 'static'
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
model = AutoModel.from_pretrained("microsoft/codebert-base")
|
| 18 |
-
except Exception as e:
|
| 19 |
-
print(f"Error loading CodeBERT: {e}")
|
| 20 |
-
tokenizer = None
|
| 21 |
-
model = None
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
|
| 34 |
-
try:
|
| 35 |
-
# Read the code file
|
| 36 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
| 37 |
-
code = file.read()
|
| 38 |
-
|
| 39 |
-
# CodeBERT analysis
|
| 40 |
-
codebert_feedback = "CodeBERT not loaded."
|
| 41 |
-
if tokenizer and model:
|
| 42 |
-
inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512)
|
| 43 |
-
with torch.no_grad():
|
| 44 |
-
outputs = model(**inputs)
|
| 45 |
-
codebert_feedback = f"Code analyzed with CodeBERT. Length: {len(code)} characters."
|
| 46 |
-
|
| 47 |
-
# Pylint analysis
|
| 48 |
-
pylint_feedback = pylint_check(file_path)
|
| 49 |
-
|
| 50 |
-
# Combine and format feedback
|
| 51 |
-
feedback = f"<h3>Analysis Report</h3><p><strong>CodeBERT Feedback:</strong> {codebert_feedback}</p><p><strong>Pylint Feedback:</strong><br><pre>{pylint_feedback}</pre></p>"
|
| 52 |
-
return feedback
|
| 53 |
-
except Exception as e:
|
| 54 |
-
return f"Error analyzing file: {str(e)}"
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
return render_template('upload.html')
|
| 59 |
|
| 60 |
-
|
| 61 |
-
def upload_file():
|
| 62 |
-
try:
|
| 63 |
-
if 'file' not in request.files:
|
| 64 |
-
return 'No file uploaded', 400
|
| 65 |
-
file = request.files['file']
|
| 66 |
-
if file.filename == '':
|
| 67 |
-
return 'No file selected', 400
|
| 68 |
-
if file and (file.filename.endswith('.py') or file.filename.endswith('.ipynb')):
|
| 69 |
-
file_path = os.path.abspath(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
|
| 70 |
-
file.save(file_path)
|
| 71 |
-
feedback = analyze_code(file_path)
|
| 72 |
-
return f'<h2>File {file.filename} uploaded successfully!</h2>{feedback}'
|
| 73 |
-
return 'Invalid file type', 400
|
| 74 |
-
except Exception as e:
|
| 75 |
-
return f'Error during upload: {str(e)}', 500
|
| 76 |
|
| 77 |
-
if
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
import os
|
| 3 |
+
import tempfile
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from scripts.logger import get_logger
|
| 7 |
+
from models.codebert import analyze_with_codebert
|
| 8 |
+
from utils.code_utils import pylint_check, extract_code_from_ipynb
|
| 9 |
+
from charset_normalizer import detect
|
| 10 |
|
| 11 |
+
logger = get_logger(__name__)
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
UPLOAD_FOLDER = 'static/uploads'
|
| 14 |
+
if not os.path.exists(UPLOAD_FOLDER):
|
| 15 |
+
os.makedirs(UPLOAD_FOLDER)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
st.markdown("""
|
| 18 |
+
<style>
|
| 19 |
+
.rainbow {
|
| 20 |
+
font-size: 55px;
|
| 21 |
+
font-weight: bold;
|
| 22 |
+
text-align: center;
|
| 23 |
+
font-family: "Comic Sans MS", cursive;
|
| 24 |
+
animation: rainbow 5s infinite;
|
| 25 |
+
background: linear-gradient(90deg, red, orange, yellow, green, blue, indigo, violet);
|
| 26 |
+
background-size: 400%;
|
| 27 |
+
-webkit-background-clip: text;
|
| 28 |
+
-webkit-text-fill-color: transparent;
|
| 29 |
+
}
|
| 30 |
+
@keyframes rainbow {
|
| 31 |
+
0% { background-position: 0%; }
|
| 32 |
+
100% { background-position: 400%; }
|
| 33 |
+
}
|
| 34 |
+
</style>
|
| 35 |
+
<div class="rainbow">Assignment Checker</div>
|
| 36 |
+
""", unsafe_allow_html=True)
|
| 37 |
|
| 38 |
+
st.write("Upload a Python (.py) or Jupyter Notebook (.ipynb) file to analyze its code quality.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
# Input for student ID or name
|
| 41 |
+
student_id = st.text_input("Enter Student ID or Name", value="Unknown")
|
|
|
|
| 42 |
|
| 43 |
+
uploaded_files = st.file_uploader("Choose a file", type=['py', 'ipynb'], accept_multiple_files=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
+
if uploaded_files is not None:
|
| 46 |
+
results = []
|
| 47 |
+
for uploaded_file in uploaded_files:
|
| 48 |
+
file_path = os.path.join(UPLOAD_FOLDER, uploaded_file.name)
|
| 49 |
+
try:
|
| 50 |
+
# Save uploaded file
|
| 51 |
+
with open(file_path, 'wb') as f:
|
| 52 |
+
f.write(uploaded_file.getvalue())
|
| 53 |
+
|
| 54 |
+
st.write(f"File **{uploaded_file.name}** uploaded successfully!")
|
| 55 |
+
is_ipynb = uploaded_file.name.endswith('.ipynb')
|
| 56 |
+
|
| 57 |
+
with st.spinner("Analyzing code..."):
|
| 58 |
+
# Extract code for .ipynb or read .py
|
| 59 |
+
if is_ipynb:
|
| 60 |
+
code = extract_code_from_ipynb(file_path)
|
| 61 |
+
if code is None:
|
| 62 |
+
st.error("Error: Could not extract code from .ipynb file.")
|
| 63 |
+
results.append({
|
| 64 |
+
'Student ID/Name': student_id,
|
| 65 |
+
'File Name': uploaded_file.name,
|
| 66 |
+
'CodeBERT Feedback': 'Error: Could not extract code from .ipynb file.',
|
| 67 |
+
'Pylint Feedback': 'N/A'
|
| 68 |
+
})
|
| 69 |
+
continue
|
| 70 |
+
else:
|
| 71 |
+
# Read file and detect encoding
|
| 72 |
+
with open(file_path, 'rb') as f:
|
| 73 |
+
raw_content = f.read()
|
| 74 |
+
detected = detect(raw_content)
|
| 75 |
+
encoding = detected['encoding']
|
| 76 |
+
|
| 77 |
+
logger.debug(f"Detected encoding for {file_path}: {encoding}")
|
| 78 |
+
if encoding not in ['utf-8', 'ascii']:
|
| 79 |
+
code = raw_content.decode(encoding).encode('utf-8').decode('utf-8')
|
| 80 |
+
# Save as UTF-8 for Pylint
|
| 81 |
+
temp_file_path = os.path.join(UPLOAD_FOLDER, f"utf8_{uploaded_file.name}")
|
| 82 |
+
with open(temp_file_path, 'w', encoding='utf-8') as f:
|
| 83 |
+
f.write(code)
|
| 84 |
+
file_path = temp_file_path
|
| 85 |
+
results.append({
|
| 86 |
+
'Student ID/Name': student_id,
|
| 87 |
+
'File Name': uploaded_file.name,
|
| 88 |
+
'CodeBERT Feedback': 'Error: Could not extract code from .ipynb file.',
|
| 89 |
+
'Pylint Feedback': 'N/A'
|
| 90 |
+
})
|
| 91 |
+
continue
|
| 92 |
+
else:
|
| 93 |
+
code = raw_content.decode('utf-8')
|
| 94 |
+
|
| 95 |
+
# For .ipynb files, save extracted code to a temporary .py file
|
| 96 |
+
analysis_file_path = file_path
|
| 97 |
+
if is_ipynb:
|
| 98 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as temp_file:
|
| 99 |
+
temp_file.write(code)
|
| 100 |
+
analysis_file_path = temp_file.name
|
| 101 |
+
logger.debug(f"Temporary file content:\n{code}")
|
| 102 |
+
|
| 103 |
+
# Analyze with CodeBERT
|
| 104 |
+
codebert_feedback = analyze_with_codebert(code)
|
| 105 |
+
# Run Pylint
|
| 106 |
+
pylint_feedback = pylint_check(analysis_file_path)
|
| 107 |
+
# Clean up temporary file if created
|
| 108 |
+
if is_ipynb or file_path != os.path.join(UPLOAD_FOLDER, uploaded_file.name):
|
| 109 |
+
try:
|
| 110 |
+
os.unlink(analysis_file_path)
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.warning(f"Failed to delete temporary file {analysis_file_path}: {e}")
|
| 113 |
+
results.append({
|
| 114 |
+
'Student ID/Name': student_id,
|
| 115 |
+
'File Name': uploaded_file.name,
|
| 116 |
+
'CodeBERT Feedback': codebert_feedback,
|
| 117 |
+
'Pylint Feedback': pylint_feedback
|
| 118 |
+
})
|
| 119 |
+
# Display results
|
| 120 |
+
st.markdown(f"**Analysis Report**\n\n**CodeBERT Feedback:**\n{codebert_feedback}\n\n**Pylint Feedback:**\n```\n{pylint_feedback}\n```")
|
| 121 |
+
except Exception as e:
|
| 122 |
+
logger.error(f"Error processing file: {e}")
|
| 123 |
+
st.error(f"Error processing file: {str(e)}")
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
# Save results to Excel
|
| 128 |
+
if results:
|
| 129 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 130 |
+
excel_path = os.path.join(UPLOAD_FOLDER, f"analysis_results_{timestamp}.xlsx")
|
| 131 |
+
try:
|
| 132 |
+
df = pd.DataFrame(results)
|
| 133 |
+
df.to_excel(excel_path, index=False)
|
| 134 |
+
st.success(f"Analysis results saved to {excel_path}")
|
| 135 |
+
# Provide download link
|
| 136 |
+
with open(excel_path, 'rb') as f:
|
| 137 |
+
st.download_button(
|
| 138 |
+
label="Download Analysis Results",
|
| 139 |
+
data=f,
|
| 140 |
+
file_name=f"analysis_results_{timestamp}.xlsx",
|
| 141 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 142 |
+
)
|
| 143 |
+
except Exception as e:
|
| 144 |
+
logger.error(f"Error saving results to Excel: {e}")
|
| 145 |
+
st.error(f"Error saving results to Excel: {str(e)}")
|
models/__init__.py
ADDED
|
File without changes
|
models/codebert.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModel
|
| 2 |
+
import torch
|
| 3 |
+
import re
|
| 4 |
+
from .issues import check_common_issues, check_ml_issues
|
| 5 |
+
from scripts.logger import get_logger
|
| 6 |
+
|
| 7 |
+
logger = get_logger(__name__)
|
| 8 |
+
|
| 9 |
+
def load_codebert():
|
| 10 |
+
try:
|
| 11 |
+
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
|
| 12 |
+
model = AutoModel.from_pretrained("microsoft/codebert-base")
|
| 13 |
+
if torch.cuda.is_available():
|
| 14 |
+
model.to('cuda')
|
| 15 |
+
logger.info("CodeBERT loaded successfully.")
|
| 16 |
+
return tokenizer, model
|
| 17 |
+
except Exception as e:
|
| 18 |
+
logger.error(f"Error loading CodeBERT: {e}")
|
| 19 |
+
return None, None
|
| 20 |
+
|
| 21 |
+
def strip_comments(code):
|
| 22 |
+
"""Remove single-line and multi-line comments from code."""
|
| 23 |
+
# Remove single-line comments
|
| 24 |
+
code = re.sub(r'#.*$', '', code, flags=re.MULTILINE)
|
| 25 |
+
# Remove multi-line comments (docstrings or triple-quoted strings)
|
| 26 |
+
code = re.sub(r'"""[\s\S]*?"""|''[\s\S]*?''', '', code)
|
| 27 |
+
return code
|
| 28 |
+
|
| 29 |
+
def analyze_with_codebert(code):
|
| 30 |
+
tokenizer, model = load_codebert()
|
| 31 |
+
if tokenizer is None or model is None:
|
| 32 |
+
return "CodeBERT not loaded."
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
# Strip comments to avoid processing comment text
|
| 36 |
+
clean_code = strip_comments(code)
|
| 37 |
+
|
| 38 |
+
# Tokenize and analyze code with CodeBERT
|
| 39 |
+
inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512, padding=True)
|
| 40 |
+
if torch.cuda.is_available():
|
| 41 |
+
inputs = {k: v.to('cuda') for k, v in inputs.items()}
|
| 42 |
+
with torch.no_grad():
|
| 43 |
+
outputs = model(**inputs)
|
| 44 |
+
embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
|
| 45 |
+
|
| 46 |
+
# Basic feedback based on code length and complexity
|
| 47 |
+
feedback = [f"Code analyzed with CodeBERT. Length: {len(code)} characters."]
|
| 48 |
+
if len(code) > 1000:
|
| 49 |
+
feedback.append("Warning: Code is lengthy (>1000 characters), consider refactoring for readability.")
|
| 50 |
+
|
| 51 |
+
# Heuristic checks for common issues
|
| 52 |
+
feedback.extend(check_common_issues(clean_code))
|
| 53 |
+
|
| 54 |
+
# Machine learning-specific checks if relevant
|
| 55 |
+
if any(lib in code for lib in ["sklearn", "tensorflow", "torch"]):
|
| 56 |
+
feedback.extend(check_ml_issues(code))
|
| 57 |
+
|
| 58 |
+
# Combine feedback
|
| 59 |
+
if len(feedback) == 1:
|
| 60 |
+
feedback.append("No critical issues detected by heuristic checks.")
|
| 61 |
+
return "\n".join(feedback)
|
| 62 |
+
except Exception as e:
|
| 63 |
+
logger.error(f"Error analyzing code with CodeBERT: {e}")
|
| 64 |
+
return f"Error analyzing code: {str(e)}"
|
| 65 |
+
|
models/issues.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
import re
|
| 3 |
+
import keyword
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def check_common_issues(code):
|
| 7 |
+
"""Check for common Python coding issues."""
|
| 8 |
+
issues = []
|
| 9 |
+
|
| 10 |
+
# Check for missing imports
|
| 11 |
+
if "pd." in code and "import pandas" not in code:
|
| 12 |
+
issues.append("Error: 'pd' used but 'pandas' not imported.")
|
| 13 |
+
if "np." in code and "import numpy" not in code:
|
| 14 |
+
issues.append("Error: 'np' used but 'numpy' not imported.")
|
| 15 |
+
|
| 16 |
+
# Check for undefined variables using AST
|
| 17 |
+
try:
|
| 18 |
+
tree = ast.parse(code)
|
| 19 |
+
assigned_vars = set()
|
| 20 |
+
used_vars = set()
|
| 21 |
+
|
| 22 |
+
# Collect assigned variables
|
| 23 |
+
for node in ast.walk(tree):
|
| 24 |
+
if isinstance(node, ast.Name):
|
| 25 |
+
if isinstance(node.ctx, ast.Store):
|
| 26 |
+
assigned_vars.add(node.id)
|
| 27 |
+
elif isinstance(node.ctx, ast.Load):
|
| 28 |
+
used_vars.add(node.id)
|
| 29 |
+
|
| 30 |
+
# Exclude built-ins, keywords, and common module names
|
| 31 |
+
excluded = set(keyword.kwlist + dir(__builtins__) + ['numpy', 'pandas', 'sklearn', 'torch', 'tensorflow'])
|
| 32 |
+
undefined_vars = [var for var in used_vars if var not in assigned_vars and var not in excluded]
|
| 33 |
+
if undefined_vars:
|
| 34 |
+
issues.append(f"Warning: Undefined variables detected: {', '.join(undefined_vars)}.")
|
| 35 |
+
except SyntaxError as e:
|
| 36 |
+
issues.append(f"Warning: Syntax error in code: {str(e)}. Unable to check for undefined variables.")
|
| 37 |
+
|
| 38 |
+
# Check for bare except clauses
|
| 39 |
+
if "except:" in code and not re.search(r'except\s+\w+:', code):
|
| 40 |
+
issues.append("Warning: Bare 'except:' clause detected. Specify exception type for better error handling.")
|
| 41 |
+
|
| 42 |
+
# Check for overly long lines
|
| 43 |
+
lines = code.split('\n')
|
| 44 |
+
long_lines = [i + 1 for i, line in enumerate(lines) if len(line.strip()) > 120]
|
| 45 |
+
if long_lines:
|
| 46 |
+
issues.append(f"Warning: Lines {', '.join(map(str, long_lines))} exceed 120 characters. Consider reformatting.")
|
| 47 |
+
|
| 48 |
+
return issues
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def check_ml_issues(code):
|
| 52 |
+
"""Check for machine learning-specific issues."""
|
| 53 |
+
issues = []
|
| 54 |
+
|
| 55 |
+
# Check for unscaled data in ML models
|
| 56 |
+
if "LogisticRegression" in code and "StandardScaler" not in code:
|
| 57 |
+
issues.append("Warning: LogisticRegression used without data scaling. Consider using StandardScaler for better performance.")
|
| 58 |
+
|
| 59 |
+
# Check for missing train-test split
|
| 60 |
+
if any(model in code for model in ["LogisticRegression", "RandomForest", "SVC"]) and "train_test_split" not in code:
|
| 61 |
+
issues.append("Warning: No train-test split detected. Use sklearn.model_selection.train_test_split to evaluate model performance.")
|
| 62 |
+
|
| 63 |
+
# Check for lack of cross-validation
|
| 64 |
+
if any(model in code for model in ["LogisticRegression", "RandomForest", "SVC"]) and "cross_val_score" not in code and "GridSearchCV" not in code:
|
| 65 |
+
issues.append("Warning: No cross-validation detected. Consider using cross_val_score or GridSearchCV for robust model evaluation.")
|
| 66 |
+
|
| 67 |
+
# Check for direct use of model.predict without validation
|
| 68 |
+
if ".predict(" in code and "train_test_split" not in code:
|
| 69 |
+
issues.append("Warning: Model prediction used without train-test split. Validate model on separate test data to avoid overfitting.")
|
| 70 |
+
|
| 71 |
+
return issues
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
flask==3.0.3
|
| 2 |
transformers==4.44.2
|
| 3 |
torch==2.4.1
|
| 4 |
pandas==2.2.2
|
| 5 |
notebook==7.2.2
|
| 6 |
-
pylint==3.2.7
|
|
|
|
|
|
|
|
|
| 1 |
transformers==4.44.2
|
| 2 |
torch==2.4.1
|
| 3 |
pandas==2.2.2
|
| 4 |
notebook==7.2.2
|
| 5 |
+
pylint==3.2.7
|
| 6 |
+
streamlit==1.38.0
|
scripts/__init__.py
ADDED
|
File without changes
|
scripts/logger.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import warnings
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# Ignore warnings
|
| 6 |
+
warnings.filterwarnings("ignore")
|
| 7 |
+
|
| 8 |
+
# Ensure log directory exists
|
| 9 |
+
os.makedirs("./log", exist_ok=True)
|
| 10 |
+
|
| 11 |
+
# Configure logging
|
| 12 |
+
logging.basicConfig(
|
| 13 |
+
filename="./log/app.log",
|
| 14 |
+
level=logging.INFO,
|
| 15 |
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
def get_logger(name):
|
| 19 |
+
"""Return a logger instance."""
|
| 20 |
+
return logging.getLogger(name)
|
static/style.css
DELETED
|
@@ -1,84 +0,0 @@
|
|
| 1 |
-
/* General Styling */
|
| 2 |
-
body {
|
| 3 |
-
background-color: #f8f9fa;
|
| 4 |
-
font-family: 'Arial', sans-serif;
|
| 5 |
-
}
|
| 6 |
-
|
| 7 |
-
/* Card Styling for Form and Feedback */
|
| 8 |
-
.card {
|
| 9 |
-
border-radius: 10px;
|
| 10 |
-
background-color: #ffffff;
|
| 11 |
-
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
|
| 12 |
-
}
|
| 13 |
-
|
| 14 |
-
/* Form Styling */
|
| 15 |
-
.form-label {
|
| 16 |
-
font-weight: bold;
|
| 17 |
-
color: #333;
|
| 18 |
-
}
|
| 19 |
-
|
| 20 |
-
.form-control {
|
| 21 |
-
border: 2px solid #ced4da;
|
| 22 |
-
border-radius: 5px;
|
| 23 |
-
transition: border-color 0.3s;
|
| 24 |
-
}
|
| 25 |
-
|
| 26 |
-
.form-control:focus {
|
| 27 |
-
border-color: #007bff;
|
| 28 |
-
box-shadow: 0 0 5px rgba(0, 123, 255, 0.5);
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
-
.btn-primary {
|
| 32 |
-
background-color: #007bff;
|
| 33 |
-
border: none;
|
| 34 |
-
padding: 10px 20px;
|
| 35 |
-
font-size: 1.1rem;
|
| 36 |
-
border-radius: 5px;
|
| 37 |
-
transition: background-color 0.3s;
|
| 38 |
-
}
|
| 39 |
-
|
| 40 |
-
.btn-primary:hover {
|
| 41 |
-
background-color: #0056b3;
|
| 42 |
-
}
|
| 43 |
-
|
| 44 |
-
/* Feedback Section Styling */
|
| 45 |
-
h2 {
|
| 46 |
-
color: #007bff;
|
| 47 |
-
margin-bottom: 20px;
|
| 48 |
-
text-align: center;
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
h3 {
|
| 52 |
-
color: #333;
|
| 53 |
-
font-size: 1.5rem;
|
| 54 |
-
margin-top: 20px;
|
| 55 |
-
border-bottom: 2px solid #007bff;
|
| 56 |
-
padding-bottom: 5px;
|
| 57 |
-
}
|
| 58 |
-
|
| 59 |
-
pre {
|
| 60 |
-
background-color: #f1f3f5;
|
| 61 |
-
padding: 15px;
|
| 62 |
-
border-radius: 5px;
|
| 63 |
-
font-size: 0.9rem;
|
| 64 |
-
white-space: pre-wrap;
|
| 65 |
-
word-wrap: break-word;
|
| 66 |
-
max-height: 400px;
|
| 67 |
-
overflow-y: auto;
|
| 68 |
-
border: 1px solid #ced4da;
|
| 69 |
-
}
|
| 70 |
-
|
| 71 |
-
/* Responsive Design */
|
| 72 |
-
@media (max-width: 576px) {
|
| 73 |
-
.card {
|
| 74 |
-
padding: 15px;
|
| 75 |
-
}
|
| 76 |
-
|
| 77 |
-
h1 {
|
| 78 |
-
font-size: 1.8rem;
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
.btn-primary {
|
| 82 |
-
width: 100%;
|
| 83 |
-
}
|
| 84 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
templates/upload.html
DELETED
|
@@ -1,26 +0,0 @@
|
|
| 1 |
-
<!DOCTYPE html>
|
| 2 |
-
<html lang="en">
|
| 3 |
-
|
| 4 |
-
<head>
|
| 5 |
-
<meta charset="UTF-8">
|
| 6 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
-
<title>Assignment Checker</title>
|
| 8 |
-
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 9 |
-
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
| 10 |
-
</head>
|
| 11 |
-
|
| 12 |
-
<body class="container mt-5">
|
| 13 |
-
<h1 class="text-center mb-4">Assignment Checker</h1>
|
| 14 |
-
<div class="card p-4 shadow">
|
| 15 |
-
<h3>Upload Your Assignment</h3>
|
| 16 |
-
<form method="post" action="/upload" enctype="multipart/form-data">
|
| 17 |
-
<div class="mb-3">
|
| 18 |
-
<label for="file" class="form-label">Select a .py or .ipynb file:</label>
|
| 19 |
-
<input type="file" name="file" accept=".py,.ipynb" class="form-control">
|
| 20 |
-
</div>
|
| 21 |
-
<button type="submit" class="btn btn-primary">Upload</button>
|
| 22 |
-
</form>
|
| 23 |
-
</div>
|
| 24 |
-
</body>
|
| 25 |
-
|
| 26 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
testpylint.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
from pylint.lint import Run
|
| 2 |
-
from pylint.reporters.text import TextReporter
|
| 3 |
-
from io import StringIO
|
| 4 |
-
|
| 5 |
-
def pylint_check(file_path):
|
| 6 |
-
try:
|
| 7 |
-
output = StringIO()
|
| 8 |
-
reporter = TextReporter(output)
|
| 9 |
-
Run([file_path, '--disable=C0114,C0115,C0116,R0903,W0311,W0703,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
|
| 10 |
-
return output.getvalue()
|
| 11 |
-
except Exception as e:
|
| 12 |
-
return f"Pylint error: {str(e)}"
|
| 13 |
-
|
| 14 |
-
print(pylint_check("uploads/test.py"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_01.ipynb
DELETED
|
@@ -1,71 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"id": "c37acdaf",
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"source": [
|
| 8 |
-
"# Assignment 1: Calculate the Factorial\n",
|
| 9 |
-
" \"Write a function to compute the factorial of a given number n.\n",
|
| 10 |
-
" The factorial of a number n is the product of all positive integers less than or equal to n.\n",
|
| 11 |
-
" For example, factorial of 5 is 5 * 4 * 3 * 2 * 1 = 120."
|
| 12 |
-
]
|
| 13 |
-
},
|
| 14 |
-
{
|
| 15 |
-
"cell_type": "code",
|
| 16 |
-
"execution_count": 1,
|
| 17 |
-
"id": "ac5b116f",
|
| 18 |
-
"metadata": {},
|
| 19 |
-
"outputs": [
|
| 20 |
-
{
|
| 21 |
-
"name": "stdout",
|
| 22 |
-
"output_type": "stream",
|
| 23 |
-
"text": [
|
| 24 |
-
"Factorial of 5 is 120\n"
|
| 25 |
-
]
|
| 26 |
-
}
|
| 27 |
-
],
|
| 28 |
-
"source": [
|
| 29 |
-
"def factorial(n):\n",
|
| 30 |
-
" if n == 0 or n == 1:\n",
|
| 31 |
-
" return 1\n",
|
| 32 |
-
" else:\n",
|
| 33 |
-
" return n * factorial(n - 1)\n",
|
| 34 |
-
" \n",
|
| 35 |
-
" # Test the function\\,\n",
|
| 36 |
-
"number = 5\n",
|
| 37 |
-
"result = factorial(number)\n",
|
| 38 |
-
"print(f'Factorial of {number} is {result}')"
|
| 39 |
-
]
|
| 40 |
-
},
|
| 41 |
-
{
|
| 42 |
-
"cell_type": "code",
|
| 43 |
-
"execution_count": null,
|
| 44 |
-
"id": "80df5cfc",
|
| 45 |
-
"metadata": {},
|
| 46 |
-
"outputs": [],
|
| 47 |
-
"source": []
|
| 48 |
-
}
|
| 49 |
-
],
|
| 50 |
-
"metadata": {
|
| 51 |
-
"kernelspec": {
|
| 52 |
-
"display_name": "ml_env",
|
| 53 |
-
"language": "python",
|
| 54 |
-
"name": "python3"
|
| 55 |
-
},
|
| 56 |
-
"language_info": {
|
| 57 |
-
"codemirror_mode": {
|
| 58 |
-
"name": "ipython",
|
| 59 |
-
"version": 3
|
| 60 |
-
},
|
| 61 |
-
"file_extension": ".py",
|
| 62 |
-
"mimetype": "text/x-python",
|
| 63 |
-
"name": "python",
|
| 64 |
-
"nbconvert_exporter": "python",
|
| 65 |
-
"pygments_lexer": "ipython3",
|
| 66 |
-
"version": "3.12.2"
|
| 67 |
-
}
|
| 68 |
-
},
|
| 69 |
-
"nbformat": 4,
|
| 70 |
-
"nbformat_minor": 5
|
| 71 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_01.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
# Assignment 1: Calculate the factorial of a number
|
| 2 |
-
# Write a function to compute the factorial of a given number n
|
| 3 |
-
def factorial(n):
|
| 4 |
-
if n == 0 or n == 1:
|
| 5 |
-
return 1
|
| 6 |
-
else:
|
| 7 |
-
return n * factorial(n - 1)
|
| 8 |
-
|
| 9 |
-
# Test the function
|
| 10 |
-
number = 5
|
| 11 |
-
result = factorial(number)
|
| 12 |
-
print(f"Factorial of {number} is {result}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_02.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
# Assignment 2: Check if a number is prime
|
| 2 |
-
# Write a function to check if a number is prime
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def is_prime(n):
|
| 6 |
-
if n <= 1:
|
| 7 |
-
return False
|
| 8 |
-
for i in range(2, n):
|
| 9 |
-
if n % i == 0:
|
| 10 |
-
return False
|
| 11 |
-
return True
|
| 12 |
-
|
| 13 |
-
# Test the function
|
| 14 |
-
num = 17
|
| 15 |
-
print(f"Is {num} prime? {is_prime(num)}")
|
| 16 |
-
print(f"Is 4 prime? {is_prime(4)}")
|
| 17 |
-
print(f"Is {undefined_var} prime?")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_03.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
# Assignment 3: EDA on Synthetic Dataset
|
| 2 |
-
# Perform EDA on a dataset of student scores
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import numpy as np
|
| 5 |
-
|
| 6 |
-
# Synthetic dataset
|
| 7 |
-
data = {
|
| 8 |
-
'name': ['Alice', 'Bob', 'Charlie', 'David'],
|
| 9 |
-
'math_score': [85, 90, 78, 92],
|
| 10 |
-
'science_score': [88, 95, 80, 90]
|
| 11 |
-
}
|
| 12 |
-
df = pd.DataFrame(data)
|
| 13 |
-
|
| 14 |
-
# Calculate average scores
|
| 15 |
-
avg_math = df['math_score'].mean()
|
| 16 |
-
avg_science = df['science_score'].mean()
|
| 17 |
-
print(f"Average Math Score: {avg_math}")
|
| 18 |
-
print(f"Average Science Score: {avg_science}")
|
| 19 |
-
|
| 20 |
-
# Plot histogram of math scores
|
| 21 |
-
import matplotlib.pyplot as plt
|
| 22 |
-
plt.hist(df['math_score'], bins=5)
|
| 23 |
-
plt.title('Math Score Distribution')
|
| 24 |
-
plt.xlabel('Score')
|
| 25 |
-
plt.ylabel('Frequency')
|
| 26 |
-
plt.show() # Error: May not display in non-interactive environments
|
| 27 |
-
print(df['english_score']) # Error: 'english_score' column doesn't exist
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_04.ipynb
DELETED
|
@@ -1,52 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"id": "86625251",
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"source": [
|
| 8 |
-
"# Assignment 4: EDA - Correlation Analysis\n",
|
| 9 |
-
"Perform correlation analysis on a dataset of car features and prices.\n",
|
| 10 |
-
"Calculate correlation matrix and plot a heatmap."
|
| 11 |
-
]
|
| 12 |
-
},
|
| 13 |
-
{
|
| 14 |
-
"cell_type": "code",
|
| 15 |
-
"execution_count": null,
|
| 16 |
-
"id": "d3ae5847",
|
| 17 |
-
"metadata": {},
|
| 18 |
-
"outputs": [],
|
| 19 |
-
"source": [
|
| 20 |
-
"import pandas as pd\n",
|
| 21 |
-
"import seaborn as sns\n",
|
| 22 |
-
"import matplotlib.pyplot as plt\n",
|
| 23 |
-
"\n",
|
| 24 |
-
"# Synthetic dataset\n",
|
| 25 |
-
"data = {\n",
|
| 26 |
-
" 'horsepower': [120, 150, 100, 180],\n",
|
| 27 |
-
" 'weight': [3000, 3200, 2800, 3500],\n",
|
| 28 |
-
" 'price': [20000, 25000, 18000, 30000]\n",
|
| 29 |
-
"}\n",
|
| 30 |
-
"df = pd.DataFrame(data)\n",
|
| 31 |
-
"\n",
|
| 32 |
-
"# Correlation matrix\n",
|
| 33 |
-
"corr_matrix = df.corr()\n",
|
| 34 |
-
"\n",
|
| 35 |
-
"# Plot heatmap\n",
|
| 36 |
-
"sns.heatmap(corr_matrix, annot=True)\n",
|
| 37 |
-
"plt.title('Correlation Heatmap')\n",
|
| 38 |
-
"plt.show()\n",
|
| 39 |
-
"\n",
|
| 40 |
-
"# Error: Incorrect column name\n",
|
| 41 |
-
"print(df['Price'])"
|
| 42 |
-
]
|
| 43 |
-
}
|
| 44 |
-
],
|
| 45 |
-
"metadata": {
|
| 46 |
-
"language_info": {
|
| 47 |
-
"name": "python"
|
| 48 |
-
}
|
| 49 |
-
},
|
| 50 |
-
"nbformat": 4,
|
| 51 |
-
"nbformat_minor": 5
|
| 52 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_04.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
# Assignment 4: Linear Regression on Synthetic Data
|
| 2 |
-
# Build a linear regression model to predict house prices
|
| 3 |
-
import numpy as np
|
| 4 |
-
from sklearn.linear_model import LinearRegression
|
| 5 |
-
|
| 6 |
-
# Synthetic dataset
|
| 7 |
-
X = np.array([[1, 2], [2, 4], [3, 6], [4, 8]]) # Features: size, rooms
|
| 8 |
-
y = np.array([100, 200, 300, 400]) # Prices
|
| 9 |
-
|
| 10 |
-
# Train model
|
| 11 |
-
model = LinearRegression()
|
| 12 |
-
model.fit(X, y)
|
| 13 |
-
|
| 14 |
-
# Predict on new data
|
| 15 |
-
new_data = np.array([5, 10]) # Error: Shape mismatch, should be [[5, 10]]
|
| 16 |
-
print(f"Predicted price: {model.predict(new_data)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_05.ipynb
DELETED
|
@@ -1,49 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"id": "10d2cf4b",
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"source": [
|
| 8 |
-
"# Assignment 5: Decision Tree Classifier\n",
|
| 9 |
-
"Train a decision tree classifier on a synthetic dataset of customer purchases.\n",
|
| 10 |
-
"Predict whether a customer will buy based on age and income."
|
| 11 |
-
]
|
| 12 |
-
},
|
| 13 |
-
{
|
| 14 |
-
"cell_type": "code",
|
| 15 |
-
"execution_count": null,
|
| 16 |
-
"id": "10abdab6",
|
| 17 |
-
"metadata": {},
|
| 18 |
-
"outputs": [],
|
| 19 |
-
"source": [
|
| 20 |
-
"import pandas as pd\n",
|
| 21 |
-
"from sklearn.tree import DecisionTreeClassifier\n",
|
| 22 |
-
"\n",
|
| 23 |
-
"# Synthetic dataset\n",
|
| 24 |
-
"data = {\n",
|
| 25 |
-
" 'age': [25, 30, 35, 40],\n",
|
| 26 |
-
" 'income': [50000, 60000, 55000, 70000],\n",
|
| 27 |
-
" 'buy': [0, 1, 0, 1]\n",
|
| 28 |
-
"}\n",
|
| 29 |
-
"df = pd.DataFrame(data)\n",
|
| 30 |
-
"\n",
|
| 31 |
-
"# Train model\n",
|
| 32 |
-
"X = df[['age', 'income']]\n",
|
| 33 |
-
"y = df['buy']\n",
|
| 34 |
-
"model = DecisionTreeClassifier()\n",
|
| 35 |
-
"model.fit(X, y) # Error: Missing arguments, should be model.fit(X, y)\\n\",\n",
|
| 36 |
-
"\n",
|
| 37 |
-
"# Predict\\n\",\n",
|
| 38 |
-
"print(model.predict([[30, 65000]]))"
|
| 39 |
-
]
|
| 40 |
-
}
|
| 41 |
-
],
|
| 42 |
-
"metadata": {
|
| 43 |
-
"language_info": {
|
| 44 |
-
"name": "python"
|
| 45 |
-
}
|
| 46 |
-
},
|
| 47 |
-
"nbformat": 4,
|
| 48 |
-
"nbformat_minor": 5
|
| 49 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_06.py
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
# Assignment 6: Missing Value Imputation
|
| 2 |
-
# Impute missing values in a dataset using mean strategy
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import numpy as np
|
| 5 |
-
from sklearn.impute import SimpleImputer
|
| 6 |
-
|
| 7 |
-
# Synthetic dataset
|
| 8 |
-
data = {
|
| 9 |
-
'feature1': [1, 2, np.nan, 4],
|
| 10 |
-
'feature2': [10, np.nan, 30, 40]
|
| 11 |
-
}
|
| 12 |
-
df = pd.DataFrame(data)
|
| 13 |
-
|
| 14 |
-
# Impute missing values
|
| 15 |
-
imputer = SimpleImputer(strategy='mean')
|
| 16 |
-
df_imputed = imputer.fit_transform(df)
|
| 17 |
-
|
| 18 |
-
# Error: Incorrectly printing DataFrame as array
|
| 19 |
-
print(df_imputed['feature1']) # Should use pd.DataFrame(df_imputed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_07.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
# Assignment 7: Feature Engineering - Polynomial Features
|
| 2 |
-
# Create polynomial features for a regression model
|
| 3 |
-
from sklearn.preprocessing import PolynomialFeatures
|
| 4 |
-
import numpy as np
|
| 5 |
-
|
| 6 |
-
# Synthetic dataset
|
| 7 |
-
X = np.array([[1], [2], [3], [4]])
|
| 8 |
-
|
| 9 |
-
# Create polynomial features
|
| 10 |
-
poly = PolynomialFeatures(degree=2)
|
| 11 |
-
X_poly = poly.fit_transform(X)
|
| 12 |
-
|
| 13 |
-
# Error: Incorrectly accessing feature names
|
| 14 |
-
print(poly.feature_names) # Error: Should use get_feature_names_out()
|
| 15 |
-
print(X_poly)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/assignment_09.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
# Assignment 9: PCA for Dimensionality Reduction
|
| 2 |
-
# Apply PCA to reduce dimensions of a dataset
|
| 3 |
-
from sklearn.decomposition import PCA
|
| 4 |
-
import numpy as np
|
| 5 |
-
|
| 6 |
-
# Synthetic dataset
|
| 7 |
-
X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
|
| 8 |
-
|
| 9 |
-
# Apply PCA
|
| 10 |
-
pca = PCA(n_components=2)
|
| 11 |
-
X_reduced = pca.fit_transform(X)
|
| 12 |
-
|
| 13 |
-
# Error: Accessing undefined attribute
|
| 14 |
-
print(pca.explained_variance) # Should be explained_variance_ratio_
|
| 15 |
-
print(X_reduced)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uploads/test.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
# test.py
|
| 2 |
-
def factorial(n):
|
| 3 |
-
if n == 0:
|
| 4 |
-
return 1
|
| 5 |
-
else:
|
| 6 |
-
return n * factorial(n-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/__init__.py
ADDED
|
File without changes
|
utils/code_utils.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pylint.lint import Run
|
| 2 |
+
from pylint.reporters.text import TextReporter
|
| 3 |
+
from nbformat import read
|
| 4 |
+
import os
|
| 5 |
+
from io import StringIO
|
| 6 |
+
from scripts.logger import get_logger
|
| 7 |
+
from charset_normalizer import detect
|
| 8 |
+
|
| 9 |
+
logger = get_logger(__name__)
|
| 10 |
+
|
| 11 |
+
def pylint_check(file_path):
|
| 12 |
+
logger.debug(f"Checking file: {file_path}")
|
| 13 |
+
try:
|
| 14 |
+
# Preprocess file to ensure UTF-8
|
| 15 |
+
with open(file_path, 'rb') as f:
|
| 16 |
+
raw_content = f.read()
|
| 17 |
+
detected = detect(raw_content)
|
| 18 |
+
encoding = detected['encoding']
|
| 19 |
+
if encoding not in ['utf-8', 'ascii']:
|
| 20 |
+
# logger.warning(f"File {file_path} is not UTF-8 (detected {encoding}). Converting to UTF-8.")
|
| 21 |
+
content = raw_content.decode(encoding, errors='replace').encode('utf-8').decode('utf-8')
|
| 22 |
+
temp_file_path = file_path + '.utf8'
|
| 23 |
+
with open(temp_file_path, 'w', encoding='utf-8') as f:
|
| 24 |
+
f.write(content)
|
| 25 |
+
file_path = temp_file_path
|
| 26 |
+
|
| 27 |
+
output = StringIO()
|
| 28 |
+
reporter = TextReporter(output)
|
| 29 |
+
Run([file_path, '--disable=C0114,C0115,W0311,W0703,C0116,R0903,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
|
| 30 |
+
pylint_output = output.getvalue()
|
| 31 |
+
|
| 32 |
+
# Clean up temporary file if created
|
| 33 |
+
if file_path.endswith('.utf8'):
|
| 34 |
+
try:
|
| 35 |
+
os.unlink(file_path)
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logger.warning(f"Failed to delete temporary file {file_path}: {e}")
|
| 38 |
+
|
| 39 |
+
return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
|
| 40 |
+
except Exception as e:
|
| 41 |
+
logger.error(f"Pylint error: {e}")
|
| 42 |
+
return f"Pylint error: {str(e)}"
|
| 43 |
+
|
| 44 |
+
def extract_code_from_ipynb(file_path):
|
| 45 |
+
logger.debug(f"Extracting code from: {file_path}")
|
| 46 |
+
try:
|
| 47 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
| 48 |
+
notebook = read(file, as_version=4)
|
| 49 |
+
code = ""
|
| 50 |
+
for cell in notebook.cells:
|
| 51 |
+
if cell.cell_type == 'code':
|
| 52 |
+
source = ''.join(cell.source) if isinstance(cell.source, list) else cell.source
|
| 53 |
+
# Sanitize code to remove non-text characters
|
| 54 |
+
source = ''.join(c for c in source if ord(c) < 128 or c in '\n\t\r')
|
| 55 |
+
code += source + '\n'
|
| 56 |
+
if not code.strip():
|
| 57 |
+
logger.warning("No code cells found in .ipynb file.")
|
| 58 |
+
return None
|
| 59 |
+
return code
|
| 60 |
+
except UnicodeDecodeError as e:
|
| 61 |
+
logger.error(f"Error decoding .ipynb file: {e}")
|
| 62 |
+
return None
|
| 63 |
+
except Exception as e:
|
| 64 |
+
logger.error(f"Error extracting code from .ipynb: {e}")
|
| 65 |
+
return None
|