Za-heer commited on
Commit
ae6148e
·
1 Parent(s): c766e3c

Complete AI & Data Science assignment Checker

Browse files
.gitignore ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-specific ignores
2
+ *.pyc
3
+ __pycache__/
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ env/
8
+ venv/
9
+ .venv/
10
+ ENV/
11
+ *.egg-info/
12
+ dist/
13
+ build/
14
+ *.whl
15
+
16
+ # Streamlit-specific ignores
17
+ .streamlit/
18
+ streamlit-geh.log
19
+
20
+ # Jupyter Notebook ignores
21
+ *.ipynb_checkpoints/
22
+ *.ipynb_checkpoints
23
+
24
+ # Pylint-related ignores
25
+ .pylintrc
26
+ # Temporary files and uploads
27
+ static/uploads/*
28
+ *.utf8
29
+ *.tmp
30
+
31
+ # Logs
32
+ *.log
33
+ logs/
34
+
35
+ # Environment and IDE files
36
+ .env
37
+ .vscode/
38
+ .idea/
39
+ *.swp
40
+ *.swo
41
+
42
+ # OS-specific files
43
+ .DS_Store
44
+ Thumbs.db
45
+
46
+ # Dependency directories
47
+ node_modules/
.pylintrc CHANGED
@@ -1,20 +1,17 @@
1
  [MASTER]
2
- # Disable specific warnings/errors to make Pylint less strict
3
- disable=
4
- C0114, # Missing module docstring
5
- C0115, # Missing class docstring
6
- C0116, # Missing function docstring
7
- R0903, # Too few public methods
8
- W0311, # Bad indentation
9
- W0703, # Broad-except
10
- C0303, # Trailing whitespace
11
- C0301, # Line too long
12
 
13
- # Set maximum line length to a reasonable value
14
- max-line-length=120
 
 
 
 
 
15
 
16
- # Allow fewer checks for small scripts
17
- min-similarity-lines=6
18
 
19
- # Ignore minor style issues
20
- ignore-patterns=.*\.ipynb
 
1
  [MASTER]
2
+ ignore=venv
3
+ jobs=1
 
 
 
 
 
 
 
 
4
 
5
+ [MESSAGES CONTROL]
6
+ disable=all
7
+ enable=E
8
+
9
+ [REPORTS]
10
+ output-format=colorized
11
+ reports=no
12
 
13
+ [LOGGING]
14
+ logging-modules=logging
15
 
16
+ [FORMAT]
17
+ max-line-length=120
README.md CHANGED
@@ -1 +1,53 @@
1
- # AI_Assignment_Checker
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Code Analyzer
2
+ A Streamlit-based application to analyze Python (.py) and Jupyter Notebook (.ipynb) files using CodeBERT and Pylint.
3
+ Project Structure
4
+
5
+ app.py: Main Streamlit application.
6
+ models/codebert.py: CodeBERT model loading and analysis logic.
7
+ utils/code_utils.py: Pylint and Jupyter Notebook extraction utilities.
8
+ static/uploads/: Folder for uploaded files.
9
+ requirements.txt: Project dependencies.
10
+
11
+ Setup Instructions
12
+
13
+ Clone or Set Up the Project:
14
+
15
+ Create a project folder named code_analyzer.
16
+ Place the files in the structure described above.
17
+
18
+
19
+ Install Dependencies:
20
+
21
+ Ensure Python 3.8+ is installed.
22
+ Run:pip install -r requirements.txt
23
+
24
+
25
+
26
+
27
+ Run the Application:
28
+
29
+ Navigate to the code_analyzer directory.
30
+ Run:streamlit run app.py
31
+
32
+
33
+ Open the provided URL (usually http://localhost:8501) in your browser.
34
+
35
+
36
+ Usage:
37
+
38
+ Upload a .py or .ipynb file via the Streamlit UI.
39
+ View the analysis report with CodeBERT and Pylint feedback.
40
+
41
+
42
+
43
+ Notes
44
+
45
+ Ensure the static/uploads folder is created automatically or manually before running.
46
+ If using a GPU, CodeBERT will utilize CUDA if available.
47
+ Check logs in the terminal for debugging information.
48
+
49
+ Troubleshooting
50
+
51
+ Module Not Found: Verify all dependencies are installed (pip install -r requirements.txt).
52
+ File Upload Issues: Ensure files are valid .py or .ipynb.
53
+ CodeBERT Errors: Check internet connection for model downloading or GPU compatibility.
app.py CHANGED
@@ -1,80 +1,145 @@
1
- from flask import Flask, request, render_template
2
  import os
3
- from transformers import AutoTokenizer, AutoModel
4
- import torch
5
- from pylint.lint import Run
6
- from pylint.reporters.text import TextReporter
7
- from io import StringIO
 
 
8
 
9
- app = Flask(__name__)
10
- UPLOAD_FOLDER = 'uploads'
11
- app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
12
- app.static_folder = 'static'
13
 
14
- # Load CodeBERT with error handling
15
- try:
16
- tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
17
- model = AutoModel.from_pretrained("microsoft/codebert-base")
18
- except Exception as e:
19
- print(f"Error loading CodeBERT: {e}")
20
- tokenizer = None
21
- model = None
22
 
23
- def pylint_check(file_path):
24
- try:
25
- output = StringIO()
26
- reporter = TextReporter(output)
27
- Run([file_path, '--disable=C0114,C0115,W0311,W0703,C0116,R0903,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
28
- pylint_output = output.getvalue()
29
- return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
30
- except Exception as e:
31
- return f"Pylint error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- def analyze_code(file_path):
34
- try:
35
- # Read the code file
36
- with open(file_path, 'r', encoding='utf-8') as file:
37
- code = file.read()
38
-
39
- # CodeBERT analysis
40
- codebert_feedback = "CodeBERT not loaded."
41
- if tokenizer and model:
42
- inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512)
43
- with torch.no_grad():
44
- outputs = model(**inputs)
45
- codebert_feedback = f"Code analyzed with CodeBERT. Length: {len(code)} characters."
46
-
47
- # Pylint analysis
48
- pylint_feedback = pylint_check(file_path)
49
-
50
- # Combine and format feedback
51
- feedback = f"<h3>Analysis Report</h3><p><strong>CodeBERT Feedback:</strong> {codebert_feedback}</p><p><strong>Pylint Feedback:</strong><br><pre>{pylint_feedback}</pre></p>"
52
- return feedback
53
- except Exception as e:
54
- return f"Error analyzing file: {str(e)}"
55
 
56
- @app.route('/')
57
- def index():
58
- return render_template('upload.html')
59
 
60
- @app.route('/upload', methods=['POST'])
61
- def upload_file():
62
- try:
63
- if 'file' not in request.files:
64
- return 'No file uploaded', 400
65
- file = request.files['file']
66
- if file.filename == '':
67
- return 'No file selected', 400
68
- if file and (file.filename.endswith('.py') or file.filename.endswith('.ipynb')):
69
- file_path = os.path.abspath(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
70
- file.save(file_path)
71
- feedback = analyze_code(file_path)
72
- return f'<h2>File {file.filename} uploaded successfully!</h2>{feedback}'
73
- return 'Invalid file type', 400
74
- except Exception as e:
75
- return f'Error during upload: {str(e)}', 500
76
 
77
- if __name__ == '__main__':
78
- if not os.path.exists(UPLOAD_FOLDER):
79
- os.makedirs(UPLOAD_FOLDER)
80
- app.run(debug=True, port=5000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import os
3
+ import tempfile
4
+ import pandas as pd
5
+ from datetime import datetime
6
+ from scripts.logger import get_logger
7
+ from models.codebert import analyze_with_codebert
8
+ from utils.code_utils import pylint_check, extract_code_from_ipynb
9
+ from charset_normalizer import detect
10
 
11
+ logger = get_logger(__name__)
 
 
 
12
 
13
+ UPLOAD_FOLDER = 'static/uploads'
14
+ if not os.path.exists(UPLOAD_FOLDER):
15
+ os.makedirs(UPLOAD_FOLDER)
 
 
 
 
 
16
 
17
+ st.markdown("""
18
+ <style>
19
+ .rainbow {
20
+ font-size: 55px;
21
+ font-weight: bold;
22
+ text-align: center;
23
+ font-family: "Comic Sans MS", cursive;
24
+ animation: rainbow 5s infinite;
25
+ background: linear-gradient(90deg, red, orange, yellow, green, blue, indigo, violet);
26
+ background-size: 400%;
27
+ -webkit-background-clip: text;
28
+ -webkit-text-fill-color: transparent;
29
+ }
30
+ @keyframes rainbow {
31
+ 0% { background-position: 0%; }
32
+ 100% { background-position: 400%; }
33
+ }
34
+ </style>
35
+ <div class="rainbow">Assignment Checker</div>
36
+ """, unsafe_allow_html=True)
37
 
38
+ st.write("Upload a Python (.py) or Jupyter Notebook (.ipynb) file to analyze its code quality.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Input for student ID or name
41
+ student_id = st.text_input("Enter Student ID or Name", value="Unknown")
 
42
 
43
+ uploaded_files = st.file_uploader("Choose a file", type=['py', 'ipynb'], accept_multiple_files=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ if uploaded_files is not None:
46
+ results = []
47
+ for uploaded_file in uploaded_files:
48
+ file_path = os.path.join(UPLOAD_FOLDER, uploaded_file.name)
49
+ try:
50
+ # Save uploaded file
51
+ with open(file_path, 'wb') as f:
52
+ f.write(uploaded_file.getvalue())
53
+
54
+ st.write(f"File **{uploaded_file.name}** uploaded successfully!")
55
+ is_ipynb = uploaded_file.name.endswith('.ipynb')
56
+
57
+ with st.spinner("Analyzing code..."):
58
+ # Extract code for .ipynb or read .py
59
+ if is_ipynb:
60
+ code = extract_code_from_ipynb(file_path)
61
+ if code is None:
62
+ st.error("Error: Could not extract code from .ipynb file.")
63
+ results.append({
64
+ 'Student ID/Name': student_id,
65
+ 'File Name': uploaded_file.name,
66
+ 'CodeBERT Feedback': 'Error: Could not extract code from .ipynb file.',
67
+ 'Pylint Feedback': 'N/A'
68
+ })
69
+ continue
70
+ else:
71
+ # Read file and detect encoding
72
+ with open(file_path, 'rb') as f:
73
+ raw_content = f.read()
74
+ detected = detect(raw_content)
75
+ encoding = detected['encoding']
76
+
77
+ logger.debug(f"Detected encoding for {file_path}: {encoding}")
78
+ if encoding not in ['utf-8', 'ascii']:
79
+ code = raw_content.decode(encoding).encode('utf-8').decode('utf-8')
80
+ # Save as UTF-8 for Pylint
81
+ temp_file_path = os.path.join(UPLOAD_FOLDER, f"utf8_{uploaded_file.name}")
82
+ with open(temp_file_path, 'w', encoding='utf-8') as f:
83
+ f.write(code)
84
+ file_path = temp_file_path
85
+ results.append({
86
+ 'Student ID/Name': student_id,
87
+ 'File Name': uploaded_file.name,
88
+ 'CodeBERT Feedback': 'Error: Could not extract code from .ipynb file.',
89
+ 'Pylint Feedback': 'N/A'
90
+ })
91
+ continue
92
+ else:
93
+ code = raw_content.decode('utf-8')
94
+
95
+ # For .ipynb files, save extracted code to a temporary .py file
96
+ analysis_file_path = file_path
97
+ if is_ipynb:
98
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as temp_file:
99
+ temp_file.write(code)
100
+ analysis_file_path = temp_file.name
101
+ logger.debug(f"Temporary file content:\n{code}")
102
+
103
+ # Analyze with CodeBERT
104
+ codebert_feedback = analyze_with_codebert(code)
105
+ # Run Pylint
106
+ pylint_feedback = pylint_check(analysis_file_path)
107
+ # Clean up temporary file if created
108
+ if is_ipynb or file_path != os.path.join(UPLOAD_FOLDER, uploaded_file.name):
109
+ try:
110
+ os.unlink(analysis_file_path)
111
+ except Exception as e:
112
+ logger.warning(f"Failed to delete temporary file {analysis_file_path}: {e}")
113
+ results.append({
114
+ 'Student ID/Name': student_id,
115
+ 'File Name': uploaded_file.name,
116
+ 'CodeBERT Feedback': codebert_feedback,
117
+ 'Pylint Feedback': pylint_feedback
118
+ })
119
+ # Display results
120
+ st.markdown(f"**Analysis Report**\n\n**CodeBERT Feedback:**\n{codebert_feedback}\n\n**Pylint Feedback:**\n```\n{pylint_feedback}\n```")
121
+ except Exception as e:
122
+ logger.error(f"Error processing file: {e}")
123
+ st.error(f"Error processing file: {str(e)}")
124
+
125
+
126
+
127
+ # Save results to Excel
128
+ if results:
129
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
130
+ excel_path = os.path.join(UPLOAD_FOLDER, f"analysis_results_{timestamp}.xlsx")
131
+ try:
132
+ df = pd.DataFrame(results)
133
+ df.to_excel(excel_path, index=False)
134
+ st.success(f"Analysis results saved to {excel_path}")
135
+ # Provide download link
136
+ with open(excel_path, 'rb') as f:
137
+ st.download_button(
138
+ label="Download Analysis Results",
139
+ data=f,
140
+ file_name=f"analysis_results_{timestamp}.xlsx",
141
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
142
+ )
143
+ except Exception as e:
144
+ logger.error(f"Error saving results to Excel: {e}")
145
+ st.error(f"Error saving results to Excel: {str(e)}")
models/__init__.py ADDED
File without changes
models/codebert.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModel
2
+ import torch
3
+ import re
4
+ from .issues import check_common_issues, check_ml_issues
5
+ from scripts.logger import get_logger
6
+
7
+ logger = get_logger(__name__)
8
+
9
+ def load_codebert():
10
+ try:
11
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
12
+ model = AutoModel.from_pretrained("microsoft/codebert-base")
13
+ if torch.cuda.is_available():
14
+ model.to('cuda')
15
+ logger.info("CodeBERT loaded successfully.")
16
+ return tokenizer, model
17
+ except Exception as e:
18
+ logger.error(f"Error loading CodeBERT: {e}")
19
+ return None, None
20
+
21
+ def strip_comments(code):
22
+ """Remove single-line and multi-line comments from code."""
23
+ # Remove single-line comments
24
+ code = re.sub(r'#.*$', '', code, flags=re.MULTILINE)
25
+ # Remove multi-line comments (docstrings or triple-quoted strings)
26
+ code = re.sub(r'"""[\s\S]*?"""|''[\s\S]*?''', '', code)
27
+ return code
28
+
29
+ def analyze_with_codebert(code):
30
+ tokenizer, model = load_codebert()
31
+ if tokenizer is None or model is None:
32
+ return "CodeBERT not loaded."
33
+
34
+ try:
35
+ # Strip comments to avoid processing comment text
36
+ clean_code = strip_comments(code)
37
+
38
+ # Tokenize and analyze code with CodeBERT
39
+ inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512, padding=True)
40
+ if torch.cuda.is_available():
41
+ inputs = {k: v.to('cuda') for k, v in inputs.items()}
42
+ with torch.no_grad():
43
+ outputs = model(**inputs)
44
+ embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
45
+
46
+ # Basic feedback based on code length and complexity
47
+ feedback = [f"Code analyzed with CodeBERT. Length: {len(code)} characters."]
48
+ if len(code) > 1000:
49
+ feedback.append("Warning: Code is lengthy (>1000 characters), consider refactoring for readability.")
50
+
51
+ # Heuristic checks for common issues
52
+ feedback.extend(check_common_issues(clean_code))
53
+
54
+ # Machine learning-specific checks if relevant
55
+ if any(lib in code for lib in ["sklearn", "tensorflow", "torch"]):
56
+ feedback.extend(check_ml_issues(code))
57
+
58
+ # Combine feedback
59
+ if len(feedback) == 1:
60
+ feedback.append("No critical issues detected by heuristic checks.")
61
+ return "\n".join(feedback)
62
+ except Exception as e:
63
+ logger.error(f"Error analyzing code with CodeBERT: {e}")
64
+ return f"Error analyzing code: {str(e)}"
65
+
models/issues.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import re
3
+ import keyword
4
+
5
+
6
+ def check_common_issues(code):
7
+ """Check for common Python coding issues."""
8
+ issues = []
9
+
10
+ # Check for missing imports
11
+ if "pd." in code and "import pandas" not in code:
12
+ issues.append("Error: 'pd' used but 'pandas' not imported.")
13
+ if "np." in code and "import numpy" not in code:
14
+ issues.append("Error: 'np' used but 'numpy' not imported.")
15
+
16
+ # Check for undefined variables using AST
17
+ try:
18
+ tree = ast.parse(code)
19
+ assigned_vars = set()
20
+ used_vars = set()
21
+
22
+ # Collect assigned variables
23
+ for node in ast.walk(tree):
24
+ if isinstance(node, ast.Name):
25
+ if isinstance(node.ctx, ast.Store):
26
+ assigned_vars.add(node.id)
27
+ elif isinstance(node.ctx, ast.Load):
28
+ used_vars.add(node.id)
29
+
30
+ # Exclude built-ins, keywords, and common module names
31
+ excluded = set(keyword.kwlist + dir(__builtins__) + ['numpy', 'pandas', 'sklearn', 'torch', 'tensorflow'])
32
+ undefined_vars = [var for var in used_vars if var not in assigned_vars and var not in excluded]
33
+ if undefined_vars:
34
+ issues.append(f"Warning: Undefined variables detected: {', '.join(undefined_vars)}.")
35
+ except SyntaxError as e:
36
+ issues.append(f"Warning: Syntax error in code: {str(e)}. Unable to check for undefined variables.")
37
+
38
+ # Check for bare except clauses
39
+ if "except:" in code and not re.search(r'except\s+\w+:', code):
40
+ issues.append("Warning: Bare 'except:' clause detected. Specify exception type for better error handling.")
41
+
42
+ # Check for overly long lines
43
+ lines = code.split('\n')
44
+ long_lines = [i + 1 for i, line in enumerate(lines) if len(line.strip()) > 120]
45
+ if long_lines:
46
+ issues.append(f"Warning: Lines {', '.join(map(str, long_lines))} exceed 120 characters. Consider reformatting.")
47
+
48
+ return issues
49
+
50
+
51
+ def check_ml_issues(code):
52
+ """Check for machine learning-specific issues."""
53
+ issues = []
54
+
55
+ # Check for unscaled data in ML models
56
+ if "LogisticRegression" in code and "StandardScaler" not in code:
57
+ issues.append("Warning: LogisticRegression used without data scaling. Consider using StandardScaler for better performance.")
58
+
59
+ # Check for missing train-test split
60
+ if any(model in code for model in ["LogisticRegression", "RandomForest", "SVC"]) and "train_test_split" not in code:
61
+ issues.append("Warning: No train-test split detected. Use sklearn.model_selection.train_test_split to evaluate model performance.")
62
+
63
+ # Check for lack of cross-validation
64
+ if any(model in code for model in ["LogisticRegression", "RandomForest", "SVC"]) and "cross_val_score" not in code and "GridSearchCV" not in code:
65
+ issues.append("Warning: No cross-validation detected. Consider using cross_val_score or GridSearchCV for robust model evaluation.")
66
+
67
+ # Check for direct use of model.predict without validation
68
+ if ".predict(" in code and "train_test_split" not in code:
69
+ issues.append("Warning: Model prediction used without train-test split. Validate model on separate test data to avoid overfitting.")
70
+
71
+ return issues
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- flask==3.0.3
2
  transformers==4.44.2
3
  torch==2.4.1
4
  pandas==2.2.2
5
  notebook==7.2.2
6
- pylint==3.2.7
 
 
 
1
  transformers==4.44.2
2
  torch==2.4.1
3
  pandas==2.2.2
4
  notebook==7.2.2
5
+ pylint==3.2.7
6
+ streamlit==1.38.0
scripts/__init__.py ADDED
File without changes
scripts/logger.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import warnings
3
+ import os
4
+
5
+ # Ignore warnings
6
+ warnings.filterwarnings("ignore")
7
+
8
+ # Ensure log directory exists
9
+ os.makedirs("./log", exist_ok=True)
10
+
11
+ # Configure logging
12
+ logging.basicConfig(
13
+ filename="./log/app.log",
14
+ level=logging.INFO,
15
+ format="%(asctime)s - %(levelname)s - %(message)s",
16
+ )
17
+
18
+ def get_logger(name):
19
+ """Return a logger instance."""
20
+ return logging.getLogger(name)
static/style.css DELETED
@@ -1,84 +0,0 @@
1
- /* General Styling */
2
- body {
3
- background-color: #f8f9fa;
4
- font-family: 'Arial', sans-serif;
5
- }
6
-
7
- /* Card Styling for Form and Feedback */
8
- .card {
9
- border-radius: 10px;
10
- background-color: #ffffff;
11
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
12
- }
13
-
14
- /* Form Styling */
15
- .form-label {
16
- font-weight: bold;
17
- color: #333;
18
- }
19
-
20
- .form-control {
21
- border: 2px solid #ced4da;
22
- border-radius: 5px;
23
- transition: border-color 0.3s;
24
- }
25
-
26
- .form-control:focus {
27
- border-color: #007bff;
28
- box-shadow: 0 0 5px rgba(0, 123, 255, 0.5);
29
- }
30
-
31
- .btn-primary {
32
- background-color: #007bff;
33
- border: none;
34
- padding: 10px 20px;
35
- font-size: 1.1rem;
36
- border-radius: 5px;
37
- transition: background-color 0.3s;
38
- }
39
-
40
- .btn-primary:hover {
41
- background-color: #0056b3;
42
- }
43
-
44
- /* Feedback Section Styling */
45
- h2 {
46
- color: #007bff;
47
- margin-bottom: 20px;
48
- text-align: center;
49
- }
50
-
51
- h3 {
52
- color: #333;
53
- font-size: 1.5rem;
54
- margin-top: 20px;
55
- border-bottom: 2px solid #007bff;
56
- padding-bottom: 5px;
57
- }
58
-
59
- pre {
60
- background-color: #f1f3f5;
61
- padding: 15px;
62
- border-radius: 5px;
63
- font-size: 0.9rem;
64
- white-space: pre-wrap;
65
- word-wrap: break-word;
66
- max-height: 400px;
67
- overflow-y: auto;
68
- border: 1px solid #ced4da;
69
- }
70
-
71
- /* Responsive Design */
72
- @media (max-width: 576px) {
73
- .card {
74
- padding: 15px;
75
- }
76
-
77
- h1 {
78
- font-size: 1.8rem;
79
- }
80
-
81
- .btn-primary {
82
- width: 100%;
83
- }
84
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
templates/upload.html DELETED
@@ -1,26 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
-
4
- <head>
5
- <meta charset="UTF-8">
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>Assignment Checker</title>
8
- <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
9
- <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
10
- </head>
11
-
12
- <body class="container mt-5">
13
- <h1 class="text-center mb-4">Assignment Checker</h1>
14
- <div class="card p-4 shadow">
15
- <h3>Upload Your Assignment</h3>
16
- <form method="post" action="/upload" enctype="multipart/form-data">
17
- <div class="mb-3">
18
- <label for="file" class="form-label">Select a .py or .ipynb file:</label>
19
- <input type="file" name="file" accept=".py,.ipynb" class="form-control">
20
- </div>
21
- <button type="submit" class="btn btn-primary">Upload</button>
22
- </form>
23
- </div>
24
- </body>
25
-
26
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
testpylint.py DELETED
@@ -1,14 +0,0 @@
1
- from pylint.lint import Run
2
- from pylint.reporters.text import TextReporter
3
- from io import StringIO
4
-
5
- def pylint_check(file_path):
6
- try:
7
- output = StringIO()
8
- reporter = TextReporter(output)
9
- Run([file_path, '--disable=C0114,C0115,C0116,R0903,W0311,W0703,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
10
- return output.getvalue()
11
- except Exception as e:
12
- return f"Pylint error: {str(e)}"
13
-
14
- print(pylint_check("uploads/test.py"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_01.ipynb DELETED
@@ -1,71 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "c37acdaf",
6
- "metadata": {},
7
- "source": [
8
- "# Assignment 1: Calculate the Factorial\n",
9
- " \"Write a function to compute the factorial of a given number n.\n",
10
- " The factorial of a number n is the product of all positive integers less than or equal to n.\n",
11
- " For example, factorial of 5 is 5 * 4 * 3 * 2 * 1 = 120."
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": 1,
17
- "id": "ac5b116f",
18
- "metadata": {},
19
- "outputs": [
20
- {
21
- "name": "stdout",
22
- "output_type": "stream",
23
- "text": [
24
- "Factorial of 5 is 120\n"
25
- ]
26
- }
27
- ],
28
- "source": [
29
- "def factorial(n):\n",
30
- " if n == 0 or n == 1:\n",
31
- " return 1\n",
32
- " else:\n",
33
- " return n * factorial(n - 1)\n",
34
- " \n",
35
- " # Test the function\\,\n",
36
- "number = 5\n",
37
- "result = factorial(number)\n",
38
- "print(f'Factorial of {number} is {result}')"
39
- ]
40
- },
41
- {
42
- "cell_type": "code",
43
- "execution_count": null,
44
- "id": "80df5cfc",
45
- "metadata": {},
46
- "outputs": [],
47
- "source": []
48
- }
49
- ],
50
- "metadata": {
51
- "kernelspec": {
52
- "display_name": "ml_env",
53
- "language": "python",
54
- "name": "python3"
55
- },
56
- "language_info": {
57
- "codemirror_mode": {
58
- "name": "ipython",
59
- "version": 3
60
- },
61
- "file_extension": ".py",
62
- "mimetype": "text/x-python",
63
- "name": "python",
64
- "nbconvert_exporter": "python",
65
- "pygments_lexer": "ipython3",
66
- "version": "3.12.2"
67
- }
68
- },
69
- "nbformat": 4,
70
- "nbformat_minor": 5
71
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_01.py DELETED
@@ -1,12 +0,0 @@
1
- # Assignment 1: Calculate the factorial of a number
2
- # Write a function to compute the factorial of a given number n
3
- def factorial(n):
4
- if n == 0 or n == 1:
5
- return 1
6
- else:
7
- return n * factorial(n - 1)
8
-
9
- # Test the function
10
- number = 5
11
- result = factorial(number)
12
- print(f"Factorial of {number} is {result}")
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_02.py DELETED
@@ -1,17 +0,0 @@
1
- # Assignment 2: Check if a number is prime
2
- # Write a function to check if a number is prime
3
-
4
-
5
- def is_prime(n):
6
- if n <= 1:
7
- return False
8
- for i in range(2, n):
9
- if n % i == 0:
10
- return False
11
- return True
12
-
13
- # Test the function
14
- num = 17
15
- print(f"Is {num} prime? {is_prime(num)}")
16
- print(f"Is 4 prime? {is_prime(4)}")
17
- print(f"Is {undefined_var} prime?")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_03.py DELETED
@@ -1,27 +0,0 @@
1
- # Assignment 3: EDA on Synthetic Dataset
2
- # Perform EDA on a dataset of student scores
3
- import pandas as pd
4
- import numpy as np
5
-
6
- # Synthetic dataset
7
- data = {
8
- 'name': ['Alice', 'Bob', 'Charlie', 'David'],
9
- 'math_score': [85, 90, 78, 92],
10
- 'science_score': [88, 95, 80, 90]
11
- }
12
- df = pd.DataFrame(data)
13
-
14
- # Calculate average scores
15
- avg_math = df['math_score'].mean()
16
- avg_science = df['science_score'].mean()
17
- print(f"Average Math Score: {avg_math}")
18
- print(f"Average Science Score: {avg_science}")
19
-
20
- # Plot histogram of math scores
21
- import matplotlib.pyplot as plt
22
- plt.hist(df['math_score'], bins=5)
23
- plt.title('Math Score Distribution')
24
- plt.xlabel('Score')
25
- plt.ylabel('Frequency')
26
- plt.show() # Error: May not display in non-interactive environments
27
- print(df['english_score']) # Error: 'english_score' column doesn't exist
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_04.ipynb DELETED
@@ -1,52 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "86625251",
6
- "metadata": {},
7
- "source": [
8
- "# Assignment 4: EDA - Correlation Analysis\n",
9
- "Perform correlation analysis on a dataset of car features and prices.\n",
10
- "Calculate correlation matrix and plot a heatmap."
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": null,
16
- "id": "d3ae5847",
17
- "metadata": {},
18
- "outputs": [],
19
- "source": [
20
- "import pandas as pd\n",
21
- "import seaborn as sns\n",
22
- "import matplotlib.pyplot as plt\n",
23
- "\n",
24
- "# Synthetic dataset\n",
25
- "data = {\n",
26
- " 'horsepower': [120, 150, 100, 180],\n",
27
- " 'weight': [3000, 3200, 2800, 3500],\n",
28
- " 'price': [20000, 25000, 18000, 30000]\n",
29
- "}\n",
30
- "df = pd.DataFrame(data)\n",
31
- "\n",
32
- "# Correlation matrix\n",
33
- "corr_matrix = df.corr()\n",
34
- "\n",
35
- "# Plot heatmap\n",
36
- "sns.heatmap(corr_matrix, annot=True)\n",
37
- "plt.title('Correlation Heatmap')\n",
38
- "plt.show()\n",
39
- "\n",
40
- "# Error: Incorrect column name\n",
41
- "print(df['Price'])"
42
- ]
43
- }
44
- ],
45
- "metadata": {
46
- "language_info": {
47
- "name": "python"
48
- }
49
- },
50
- "nbformat": 4,
51
- "nbformat_minor": 5
52
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_04.py DELETED
@@ -1,16 +0,0 @@
1
- # Assignment 4: Linear Regression on Synthetic Data
2
- # Build a linear regression model to predict house prices
3
- import numpy as np
4
- from sklearn.linear_model import LinearRegression
5
-
6
- # Synthetic dataset
7
- X = np.array([[1, 2], [2, 4], [3, 6], [4, 8]]) # Features: size, rooms
8
- y = np.array([100, 200, 300, 400]) # Prices
9
-
10
- # Train model
11
- model = LinearRegression()
12
- model.fit(X, y)
13
-
14
- # Predict on new data
15
- new_data = np.array([5, 10]) # Error: Shape mismatch, should be [[5, 10]]
16
- print(f"Predicted price: {model.predict(new_data)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_05.ipynb DELETED
@@ -1,49 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "10d2cf4b",
6
- "metadata": {},
7
- "source": [
8
- "# Assignment 5: Decision Tree Classifier\n",
9
- "Train a decision tree classifier on a synthetic dataset of customer purchases.\n",
10
- "Predict whether a customer will buy based on age and income."
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": null,
16
- "id": "10abdab6",
17
- "metadata": {},
18
- "outputs": [],
19
- "source": [
20
- "import pandas as pd\n",
21
- "from sklearn.tree import DecisionTreeClassifier\n",
22
- "\n",
23
- "# Synthetic dataset\n",
24
- "data = {\n",
25
- " 'age': [25, 30, 35, 40],\n",
26
- " 'income': [50000, 60000, 55000, 70000],\n",
27
- " 'buy': [0, 1, 0, 1]\n",
28
- "}\n",
29
- "df = pd.DataFrame(data)\n",
30
- "\n",
31
- "# Train model\n",
32
- "X = df[['age', 'income']]\n",
33
- "y = df['buy']\n",
34
- "model = DecisionTreeClassifier()\n",
35
- "model.fit(X, y) # Error: Missing arguments, should be model.fit(X, y)\\n\",\n",
36
- "\n",
37
- "# Predict\\n\",\n",
38
- "print(model.predict([[30, 65000]]))"
39
- ]
40
- }
41
- ],
42
- "metadata": {
43
- "language_info": {
44
- "name": "python"
45
- }
46
- },
47
- "nbformat": 4,
48
- "nbformat_minor": 5
49
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_06.py DELETED
@@ -1,19 +0,0 @@
1
- # Assignment 6: Missing Value Imputation
2
- # Impute missing values in a dataset using mean strategy
3
- import pandas as pd
4
- import numpy as np
5
- from sklearn.impute import SimpleImputer
6
-
7
- # Synthetic dataset
8
- data = {
9
- 'feature1': [1, 2, np.nan, 4],
10
- 'feature2': [10, np.nan, 30, 40]
11
- }
12
- df = pd.DataFrame(data)
13
-
14
- # Impute missing values
15
- imputer = SimpleImputer(strategy='mean')
16
- df_imputed = imputer.fit_transform(df)
17
-
18
- # Error: Incorrectly printing DataFrame as array
19
- print(df_imputed['feature1']) # Should use pd.DataFrame(df_imputed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_07.py DELETED
@@ -1,15 +0,0 @@
1
- # Assignment 7: Feature Engineering - Polynomial Features
2
- # Create polynomial features for a regression model
3
- from sklearn.preprocessing import PolynomialFeatures
4
- import numpy as np
5
-
6
- # Synthetic dataset
7
- X = np.array([[1], [2], [3], [4]])
8
-
9
- # Create polynomial features
10
- poly = PolynomialFeatures(degree=2)
11
- X_poly = poly.fit_transform(X)
12
-
13
- # Error: Incorrectly accessing feature names
14
- print(poly.feature_names) # Error: Should use get_feature_names_out()
15
- print(X_poly)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/assignment_09.py DELETED
@@ -1,15 +0,0 @@
1
- # Assignment 9: PCA for Dimensionality Reduction
2
- # Apply PCA to reduce dimensions of a dataset
3
- from sklearn.decomposition import PCA
4
- import numpy as np
5
-
6
- # Synthetic dataset
7
- X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
8
-
9
- # Apply PCA
10
- pca = PCA(n_components=2)
11
- X_reduced = pca.fit_transform(X)
12
-
13
- # Error: Accessing undefined attribute
14
- print(pca.explained_variance) # Should be explained_variance_ratio_
15
- print(X_reduced)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uploads/test.py DELETED
@@ -1,6 +0,0 @@
1
- # test.py
2
- def factorial(n):
3
- if n == 0:
4
- return 1
5
- else:
6
- return n * factorial(n-1)
 
 
 
 
 
 
 
utils/__init__.py ADDED
File without changes
utils/code_utils.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pylint.lint import Run
2
+ from pylint.reporters.text import TextReporter
3
+ from nbformat import read
4
+ import os
5
+ from io import StringIO
6
+ from scripts.logger import get_logger
7
+ from charset_normalizer import detect
8
+
9
+ logger = get_logger(__name__)
10
+
11
+ def pylint_check(file_path):
12
+ logger.debug(f"Checking file: {file_path}")
13
+ try:
14
+ # Preprocess file to ensure UTF-8
15
+ with open(file_path, 'rb') as f:
16
+ raw_content = f.read()
17
+ detected = detect(raw_content)
18
+ encoding = detected['encoding']
19
+ if encoding not in ['utf-8', 'ascii']:
20
+ # logger.warning(f"File {file_path} is not UTF-8 (detected {encoding}). Converting to UTF-8.")
21
+ content = raw_content.decode(encoding, errors='replace').encode('utf-8').decode('utf-8')
22
+ temp_file_path = file_path + '.utf8'
23
+ with open(temp_file_path, 'w', encoding='utf-8') as f:
24
+ f.write(content)
25
+ file_path = temp_file_path
26
+
27
+ output = StringIO()
28
+ reporter = TextReporter(output)
29
+ Run([file_path, '--disable=C0114,C0115,W0311,W0703,C0116,R0903,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
30
+ pylint_output = output.getvalue()
31
+
32
+ # Clean up temporary file if created
33
+ if file_path.endswith('.utf8'):
34
+ try:
35
+ os.unlink(file_path)
36
+ except Exception as e:
37
+ logger.warning(f"Failed to delete temporary file {file_path}: {e}")
38
+
39
+ return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
40
+ except Exception as e:
41
+ logger.error(f"Pylint error: {e}")
42
+ return f"Pylint error: {str(e)}"
43
+
44
+ def extract_code_from_ipynb(file_path):
45
+ logger.debug(f"Extracting code from: {file_path}")
46
+ try:
47
+ with open(file_path, 'r', encoding='utf-8') as file:
48
+ notebook = read(file, as_version=4)
49
+ code = ""
50
+ for cell in notebook.cells:
51
+ if cell.cell_type == 'code':
52
+ source = ''.join(cell.source) if isinstance(cell.source, list) else cell.source
53
+ # Sanitize code to remove non-text characters
54
+ source = ''.join(c for c in source if ord(c) < 128 or c in '\n\t\r')
55
+ code += source + '\n'
56
+ if not code.strip():
57
+ logger.warning("No code cells found in .ipynb file.")
58
+ return None
59
+ return code
60
+ except UnicodeDecodeError as e:
61
+ logger.error(f"Error decoding .ipynb file: {e}")
62
+ return None
63
+ except Exception as e:
64
+ logger.error(f"Error extracting code from .ipynb: {e}")
65
+ return None