Spaces:

Za-heer
/

AI_Assignment_checker

Sleeping

App Files Files Community

Za-heer commited on Jul 26

Commit

ae6148e

1 Parent(s): c766e3c

Complete AI & Data Science assignment Checker

Browse files

Files changed (26) hide show

.gitignore +47 -0
.pylintrc +13 -16
README.md +53 -1
app.py +137 -72
models/__init__.py +0 -0
models/codebert.py +65 -0
models/issues.py +71 -0
requirements.txt +2 -2
scripts/__init__.py +0 -0
scripts/logger.py +20 -0
static/style.css +0 -84
templates/upload.html +0 -26
testpylint.py +0 -14
uploads/assignment_01.ipynb +0 -71
uploads/assignment_01.py +0 -12
uploads/assignment_02.py +0 -17
uploads/assignment_03.py +0 -27
uploads/assignment_04.ipynb +0 -52
uploads/assignment_04.py +0 -16
uploads/assignment_05.ipynb +0 -49
uploads/assignment_06.py +0 -19
uploads/assignment_07.py +0 -15
uploads/assignment_09.py +0 -15
uploads/test.py +0 -6
utils/__init__.py +0 -0
utils/code_utils.py +65 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,47 @@

+# Python-specific ignores
+*.pyc
+__pycache__/
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+.venv/
+ENV/
+*.egg-info/
+dist/
+build/
+*.whl
+# Streamlit-specific ignores
+.streamlit/
+streamlit-geh.log
+# Jupyter Notebook ignores
+*.ipynb_checkpoints/
+*.ipynb_checkpoints
+# Pylint-related ignores
+.pylintrc
+# Temporary files and uploads
+static/uploads/*
+*.utf8
+*.tmp
+# Logs
+*.log
+logs/
+# Environment and IDE files
+.env
+.vscode/
+.idea/
+*.swp
+*.swo
+# OS-specific files
+.DS_Store
+Thumbs.db
+# Dependency directories
+node_modules/

.pylintrc CHANGED Viewed

@@ -1,20 +1,17 @@
 [MASTER]
-# Disable specific warnings/errors to make Pylint less strict
-disable=
-    C0114,  # Missing module docstring
-    C0115,  # Missing class docstring
-    C0116,  # Missing function docstring
-    R0903,  # Too few public methods
-    W0311,  # Bad indentation
-    W0703,  # Broad-except
-    C0303,  # Trailing whitespace
-    C0301,  # Line too long
-# Set maximum line length to a reasonable value
-max-line-length=120
-# Allow fewer checks for small scripts
-min-similarity-lines=6
-# Ignore minor style issues
-ignore-patterns=.*\.ipynb

 [MASTER]
+ignore=venv
+jobs=1
+[MESSAGES CONTROL]
+disable=all
+enable=E
+[REPORTS]
+output-format=colorized
+reports=no
+[LOGGING]
+logging-modules=logging
+[FORMAT]
+max-line-length=120

README.md CHANGED Viewed

	@@ -1 +1,53 @@
1	- # ~~AI_Assignment_Checker~~

+Code Analyzer
+A Streamlit-based application to analyze Python (.py) and Jupyter Notebook (.ipynb) files using CodeBERT and Pylint.
+Project Structure
+app.py: Main Streamlit application.
+models/codebert.py: CodeBERT model loading and analysis logic.
+utils/code_utils.py: Pylint and Jupyter Notebook extraction utilities.
+static/uploads/: Folder for uploaded files.
+requirements.txt: Project dependencies.
+Setup Instructions
+Clone or Set Up the Project:
+Create a project folder named code_analyzer.
+Place the files in the structure described above.
+Install Dependencies:
+Ensure Python 3.8+ is installed.
+Run:pip install -r requirements.txt
+Run the Application:
+Navigate to the code_analyzer directory.
+Run:streamlit run app.py
+Open the provided URL (usually http://localhost:8501) in your browser.
+Usage:
+Upload a .py or .ipynb file via the Streamlit UI.
+View the analysis report with CodeBERT and Pylint feedback.
+Notes
+Ensure the static/uploads folder is created automatically or manually before running.
+If using a GPU, CodeBERT will utilize CUDA if available.
+Check logs in the terminal for debugging information.
+Troubleshooting
+Module Not Found: Verify all dependencies are installed (pip install -r requirements.txt).
+File Upload Issues: Ensure files are valid .py or .ipynb.
+CodeBERT Errors: Check internet connection for model downloading or GPU compatibility.

app.py CHANGED Viewed

@@ -1,80 +1,145 @@
-from flask import Flask, request, render_template
 import os
-from transformers import AutoTokenizer, AutoModel
-import torch
-from pylint.lint import Run
-from pylint.reporters.text import TextReporter
-from io import StringIO
-app = Flask(__name__)
-UPLOAD_FOLDER = 'uploads'
-app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
-app.static_folder = 'static'
-# Load CodeBERT with error handling
-try:
-    tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
-    model = AutoModel.from_pretrained("microsoft/codebert-base")
-except Exception as e:
-    print(f"Error loading CodeBERT: {e}")
-    tokenizer = None
-    model = None
-def pylint_check(file_path):
-    try:
-        output = StringIO()
-        reporter = TextReporter(output)
-        Run([file_path, '--disable=C0114,C0115,W0311,W0703,C0116,R0903,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
-        pylint_output = output.getvalue()
-        return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
-    except Exception as e:
-        return f"Pylint error: {str(e)}"
-def analyze_code(file_path):
-    try:
-        # Read the code file
-        with open(file_path, 'r', encoding='utf-8') as file:
-            code = file.read()
-        # CodeBERT analysis
-        codebert_feedback = "CodeBERT not loaded."
-        if tokenizer and model:
-            inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512)
-            with torch.no_grad():
-                outputs = model(**inputs)
-            codebert_feedback = f"Code analyzed with CodeBERT. Length: {len(code)} characters."
-        # Pylint analysis
-        pylint_feedback = pylint_check(file_path)
-        # Combine and format feedback
-        feedback = f"<h3>Analysis Report</h3><p><strong>CodeBERT Feedback:</strong> {codebert_feedback}</p><p><strong>Pylint Feedback:</strong><br><pre>{pylint_feedback}</pre></p>"
-        return feedback
-    except Exception as e:
-        return f"Error analyzing file: {str(e)}"
-@app.route('/')
-def index():
-    return render_template('upload.html')
-@app.route('/upload', methods=['POST'])
-def upload_file():
-    try:
-        if 'file' not in request.files:
-            return 'No file uploaded', 400
-        file = request.files['file']
-        if file.filename == '':
-            return 'No file selected', 400
-        if file and (file.filename.endswith('.py') or file.filename.endswith('.ipynb')):
-            file_path = os.path.abspath(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
-            file.save(file_path)
-            feedback = analyze_code(file_path)
-            return f'<h2>File {file.filename} uploaded successfully!</h2>{feedback}'
-        return 'Invalid file type', 400
-    except Exception as e:
-        return f'Error during upload: {str(e)}', 500
-if __name__ == '__main__':
-    if not os.path.exists(UPLOAD_FOLDER):
-        os.makedirs(UPLOAD_FOLDER)
-    app.run(debug=True, port=5000)

+import streamlit as st
 import os
+import tempfile
+import pandas as pd
+from datetime import datetime
+from scripts.logger import get_logger
+from models.codebert import analyze_with_codebert
+from utils.code_utils import pylint_check, extract_code_from_ipynb
+from charset_normalizer import detect
+logger = get_logger(__name__)
+UPLOAD_FOLDER = 'static/uploads'
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+st.markdown("""
+    <style>
+    .rainbow {
+        font-size: 55px;
+        font-weight: bold;
+        text-align: center;
+        font-family: "Comic Sans MS", cursive;
+        animation: rainbow 5s infinite;
+        background: linear-gradient(90deg, red, orange, yellow, green, blue, indigo, violet);
+        background-size: 400%;
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+    }
+    @keyframes rainbow {
+        0% { background-position: 0%; }
+        100% { background-position: 400%; }
+    }
+    </style>
+    <div class="rainbow">Assignment Checker</div>
+""", unsafe_allow_html=True)
+st.write("Upload a Python (.py) or Jupyter Notebook (.ipynb) file to analyze its code quality.")
+# Input for student ID or name
+student_id = st.text_input("Enter Student ID or Name", value="Unknown")
+uploaded_files = st.file_uploader("Choose a file", type=['py', 'ipynb'], accept_multiple_files=True)
+if uploaded_files is not None:
+    results = []
+    for uploaded_file in uploaded_files:
+        file_path = os.path.join(UPLOAD_FOLDER, uploaded_file.name)
+        try:
+            # Save uploaded file
+            with open(file_path, 'wb') as f:
+                f.write(uploaded_file.getvalue())
+            st.write(f"File **{uploaded_file.name}** uploaded successfully!")
+            is_ipynb = uploaded_file.name.endswith('.ipynb')
+            with st.spinner("Analyzing code..."):
+                # Extract code for .ipynb or read .py
+                if is_ipynb:
+                    code = extract_code_from_ipynb(file_path)
+                    if code is None:
+                        st.error("Error: Could not extract code from .ipynb file.")
+                        results.append({
+                            'Student ID/Name': student_id,
+                            'File Name': uploaded_file.name,
+                            'CodeBERT Feedback': 'Error: Could not extract code from .ipynb file.',
+                            'Pylint Feedback': 'N/A'
+                        })
+                        continue
+                else:
+                    # Read file and detect encoding
+                    with open(file_path, 'rb') as f:
+                        raw_content = f.read()
+                    detected = detect(raw_content)
+                    encoding = detected['encoding']
+                    logger.debug(f"Detected encoding for {file_path}: {encoding}")
+                    if encoding not in ['utf-8', 'ascii']:
+                        code = raw_content.decode(encoding).encode('utf-8').decode('utf-8')
+                        # Save as UTF-8 for Pylint
+                        temp_file_path = os.path.join(UPLOAD_FOLDER, f"utf8_{uploaded_file.name}")
+                        with open(temp_file_path, 'w', encoding='utf-8') as f:
+                            f.write(code)
+                        file_path = temp_file_path
+                        results.append({
+                            'Student ID/Name': student_id,
+                            'File Name': uploaded_file.name,
+                            'CodeBERT Feedback': 'Error: Could not extract code from .ipynb file.',
+                            'Pylint Feedback': 'N/A'
+                        })
+                        continue
+                    else:
+                        code = raw_content.decode('utf-8')
+                # For .ipynb files, save extracted code to a temporary .py file
+                analysis_file_path = file_path
+                if is_ipynb:
+                    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as temp_file:
+                        temp_file.write(code)
+                        analysis_file_path = temp_file.name
+                        logger.debug(f"Temporary file content:\n{code}")
+                # Analyze with CodeBERT
+                codebert_feedback = analyze_with_codebert(code)
+                # Run Pylint
+                pylint_feedback = pylint_check(analysis_file_path)
+                # Clean up temporary file if created
+                if is_ipynb or file_path != os.path.join(UPLOAD_FOLDER, uploaded_file.name):
+                    try:
+                        os.unlink(analysis_file_path)
+                    except Exception as e:
+                        logger.warning(f"Failed to delete temporary file {analysis_file_path}: {e}")
+                results.append({
+                            'Student ID/Name': student_id,
+                            'File Name': uploaded_file.name,
+                            'CodeBERT Feedback': codebert_feedback,
+                            'Pylint Feedback': pylint_feedback
+                        })
+                # Display results
+                st.markdown(f"**Analysis Report**\n\n**CodeBERT Feedback:**\n{codebert_feedback}\n\n**Pylint Feedback:**\n```\n{pylint_feedback}\n```")
+        except Exception as e:
+            logger.error(f"Error processing file: {e}")
+            st.error(f"Error processing file: {str(e)}")
+    # Save results to Excel
+    if results:
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        excel_path = os.path.join(UPLOAD_FOLDER, f"analysis_results_{timestamp}.xlsx")
+        try:
+            df = pd.DataFrame(results)
+            df.to_excel(excel_path, index=False)
+            st.success(f"Analysis results saved to {excel_path}")
+            # Provide download link
+            with open(excel_path, 'rb') as f:
+                st.download_button(
+                    label="Download Analysis Results",
+                    data=f,
+                    file_name=f"analysis_results_{timestamp}.xlsx",
+                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                )
+        except Exception as e:
+            logger.error(f"Error saving results to Excel: {e}")
+            st.error(f"Error saving results to Excel: {str(e)}")

models/__init__.py ADDED Viewed

File without changes

models/codebert.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from transformers import AutoTokenizer, AutoModel
+import torch
+import re
+from .issues import check_common_issues, check_ml_issues
+from scripts.logger import get_logger
+logger = get_logger(__name__)
+def load_codebert():
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
+        model = AutoModel.from_pretrained("microsoft/codebert-base")
+        if torch.cuda.is_available():
+            model.to('cuda')
+        logger.info("CodeBERT loaded successfully.")
+        return tokenizer, model
+    except Exception as e:
+        logger.error(f"Error loading CodeBERT: {e}")
+        return None, None
+def strip_comments(code):
+    """Remove single-line and multi-line comments from code."""
+    # Remove single-line comments
+    code = re.sub(r'#.*$', '', code, flags=re.MULTILINE)
+    # Remove multi-line comments (docstrings or triple-quoted strings)
+    code = re.sub(r'"""[\s\S]*?"""|''[\s\S]*?''', '', code)
+    return code
+def analyze_with_codebert(code):
+    tokenizer, model = load_codebert()
+    if tokenizer is None or model is None:
+        return "CodeBERT not loaded."
+    try:
+        # Strip comments to avoid processing comment text
+        clean_code = strip_comments(code)
+        # Tokenize and analyze code with CodeBERT
+        inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512, padding=True)
+        if torch.cuda.is_available():
+            inputs = {k: v.to('cuda') for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = model(**inputs)
+        embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
+        # Basic feedback based on code length and complexity
+        feedback = [f"Code analyzed with CodeBERT. Length: {len(code)} characters."]
+        if len(code) > 1000:
+            feedback.append("Warning: Code is lengthy (>1000 characters), consider refactoring for readability.")
+        # Heuristic checks for common issues
+        feedback.extend(check_common_issues(clean_code))
+        # Machine learning-specific checks if relevant
+        if any(lib in code for lib in ["sklearn", "tensorflow", "torch"]):
+            feedback.extend(check_ml_issues(code))
+        # Combine feedback
+        if len(feedback) == 1:
+            feedback.append("No critical issues detected by heuristic checks.")
+        return "\n".join(feedback)
+    except Exception as e:
+        logger.error(f"Error analyzing code with CodeBERT: {e}")
+        return f"Error analyzing code: {str(e)}"

models/issues.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import ast
+import re
+import keyword
+def check_common_issues(code):
+    """Check for common Python coding issues."""
+    issues = []
+    # Check for missing imports
+    if "pd." in code and "import pandas" not in code:
+        issues.append("Error: 'pd' used but 'pandas' not imported.")
+    if "np." in code and "import numpy" not in code:
+        issues.append("Error: 'np' used but 'numpy' not imported.")
+    # Check for undefined variables using AST
+    try:
+        tree = ast.parse(code)
+        assigned_vars = set()
+        used_vars = set()
+        # Collect assigned variables
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Name):
+                if isinstance(node.ctx, ast.Store):
+                    assigned_vars.add(node.id)
+                elif isinstance(node.ctx, ast.Load):
+                    used_vars.add(node.id)
+        # Exclude built-ins, keywords, and common module names
+        excluded = set(keyword.kwlist + dir(__builtins__) + ['numpy', 'pandas', 'sklearn', 'torch', 'tensorflow'])
+        undefined_vars = [var for var in used_vars if var not in assigned_vars and var not in excluded]
+        if undefined_vars:
+            issues.append(f"Warning: Undefined variables detected: {', '.join(undefined_vars)}.")
+    except SyntaxError as e:
+        issues.append(f"Warning: Syntax error in code: {str(e)}. Unable to check for undefined variables.")
+    # Check for bare except clauses
+    if "except:" in code and not re.search(r'except\s+\w+:', code):
+        issues.append("Warning: Bare 'except:' clause detected. Specify exception type for better error handling.")
+    # Check for overly long lines
+    lines = code.split('\n')
+    long_lines = [i + 1 for i, line in enumerate(lines) if len(line.strip()) > 120]
+    if long_lines:
+        issues.append(f"Warning: Lines {', '.join(map(str, long_lines))} exceed 120 characters. Consider reformatting.")
+    return issues
+def check_ml_issues(code):
+    """Check for machine learning-specific issues."""
+    issues = []
+    # Check for unscaled data in ML models
+    if "LogisticRegression" in code and "StandardScaler" not in code:
+        issues.append("Warning: LogisticRegression used without data scaling. Consider using StandardScaler for better performance.")
+    # Check for missing train-test split
+    if any(model in code for model in ["LogisticRegression", "RandomForest", "SVC"]) and "train_test_split" not in code:
+        issues.append("Warning: No train-test split detected. Use sklearn.model_selection.train_test_split to evaluate model performance.")
+    # Check for lack of cross-validation
+    if any(model in code for model in ["LogisticRegression", "RandomForest", "SVC"]) and "cross_val_score" not in code and "GridSearchCV" not in code:
+        issues.append("Warning: No cross-validation detected. Consider using cross_val_score or GridSearchCV for robust model evaluation.")
+    # Check for direct use of model.predict without validation
+    if ".predict(" in code and "train_test_split" not in code:
+        issues.append("Warning: Model prediction used without train-test split. Validate model on separate test data to avoid overfitting.")
+    return issues

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-flask==3.0.3
 transformers==4.44.2
 torch==2.4.1
 pandas==2.2.2
 notebook==7.2.2
-pylint==3.2.7

 transformers==4.44.2
 torch==2.4.1
 pandas==2.2.2
 notebook==7.2.2
+pylint==3.2.7
+streamlit==1.38.0

scripts/__init__.py ADDED Viewed

File without changes

scripts/logger.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import logging
+import warnings
+import os
+# Ignore warnings
+warnings.filterwarnings("ignore")
+# Ensure log directory exists
+os.makedirs("./log", exist_ok=True)
+# Configure logging
+logging.basicConfig(
+    filename="./log/app.log",
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+def get_logger(name):
+    """Return a logger instance."""
+    return logging.getLogger(name)

static/style.css DELETED Viewed

@@ -1,84 +0,0 @@
-/* General Styling */
-body {
-    background-color: #f8f9fa;
-    font-family: 'Arial', sans-serif;
-}
-/* Card Styling for Form and Feedback */
-.card {
-    border-radius: 10px;
-    background-color: #ffffff;
-    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
-}
-/* Form Styling */
-.form-label {
-    font-weight: bold;
-    color: #333;
-}
-.form-control {
-    border: 2px solid #ced4da;
-    border-radius: 5px;
-    transition: border-color 0.3s;
-}
-.form-control:focus {
-    border-color: #007bff;
-    box-shadow: 0 0 5px rgba(0, 123, 255, 0.5);
-}
-.btn-primary {
-    background-color: #007bff;
-    border: none;
-    padding: 10px 20px;
-    font-size: 1.1rem;
-    border-radius: 5px;
-    transition: background-color 0.3s;
-}
-.btn-primary:hover {
-    background-color: #0056b3;
-}
-/* Feedback Section Styling */
-h2 {
-    color: #007bff;
-    margin-bottom: 20px;
-    text-align: center;
-}
-h3 {
-    color: #333;
-    font-size: 1.5rem;
-    margin-top: 20px;
-    border-bottom: 2px solid #007bff;
-    padding-bottom: 5px;
-}
-pre {
-    background-color: #f1f3f5;
-    padding: 15px;
-    border-radius: 5px;
-    font-size: 0.9rem;
-    white-space: pre-wrap;
-    word-wrap: break-word;
-    max-height: 400px;
-    overflow-y: auto;
-    border: 1px solid #ced4da;
-}
-/* Responsive Design */
-@media (max-width: 576px) {
-    .card {
-        padding: 15px;
-    }
-    h1 {
-        font-size: 1.8rem;
-    }
-    .btn-primary {
-        width: 100%;
-    }
-}

templates/upload.html DELETED Viewed

@@ -1,26 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Assignment Checker</title>
-    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
-    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
-</head>
-<body class="container mt-5">
-    <h1 class="text-center mb-4">Assignment Checker</h1>
-    <div class="card p-4 shadow">
-        <h3>Upload Your Assignment</h3>
-        <form method="post" action="/upload" enctype="multipart/form-data">
-            <div class="mb-3">
-                <label for="file" class="form-label">Select a .py or .ipynb file:</label>
-                <input type="file" name="file" accept=".py,.ipynb" class="form-control">
-            </div>
-            <button type="submit" class="btn btn-primary">Upload</button>
-        </form>
-    </div>
-</body>
-</html>

testpylint.py DELETED Viewed

@@ -1,14 +0,0 @@
-from pylint.lint import Run
-from pylint.reporters.text import TextReporter
-from io import StringIO
-def pylint_check(file_path):
-    try:
-        output = StringIO()
-        reporter = TextReporter(output)
-        Run([file_path, '--disable=C0114,C0115,C0116,R0903,W0311,W0703,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
-        return output.getvalue()
-    except Exception as e:
-        return f"Pylint error: {str(e)}"
-print(pylint_check("uploads/test.py"))

uploads/assignment_01.ipynb DELETED Viewed

@@ -1,71 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "c37acdaf",
-   "metadata": {},
-   "source": [
-    "# Assignment 1: Calculate the Factorial\n",
-    "    \"Write a function to compute the factorial of a given number n.\n",
-    "    The factorial of a number n is the product of all positive integers less than or equal to n.\n",
-    "    For example, factorial of 5 is 5 * 4 * 3 * 2 * 1 = 120."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "ac5b116f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Factorial of 5 is 120\n"
-     ]
-    }
-   ],
-   "source": [
-    "def factorial(n):\n",
-    "       if n == 0 or n == 1:\n",
-    "            return 1\n",
-    "       else:\n",
-    "            return n * factorial(n - 1)\n",
-    "    \n",
-    "    # Test the function\\,\n",
-    "number = 5\n",
-    "result = factorial(number)\n",
-    "print(f'Factorial of {number} is {result}')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "80df5cfc",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "ml_env",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

uploads/assignment_01.py DELETED Viewed

@@ -1,12 +0,0 @@
-# Assignment 1: Calculate the factorial of a number
-# Write a function to compute the factorial of a given number n
-def factorial(n):
-    if n == 0 or n == 1:
-        return 1
-    else:
-        return n * factorial(n - 1)
-# Test the function
-number = 5
-result = factorial(number)
-print(f"Factorial of {number} is {result}")

uploads/assignment_02.py DELETED Viewed

@@ -1,17 +0,0 @@
-# Assignment 2: Check if a number is prime
-# Write a function to check if a number is prime
-def is_prime(n):
-    if n <= 1:
-        return False
-    for i in range(2, n):
-        if n % i == 0:
-            return False
-    return True
-# Test the function
-num = 17
-print(f"Is {num} prime? {is_prime(num)}")
-print(f"Is 4 prime? {is_prime(4)}")
-print(f"Is {undefined_var} prime?")

uploads/assignment_03.py DELETED Viewed

@@ -1,27 +0,0 @@
-# Assignment 3: EDA on Synthetic Dataset
-# Perform EDA on a dataset of student scores
-import pandas as pd
-import numpy as np
-# Synthetic dataset
-data = {
-    'name': ['Alice', 'Bob', 'Charlie', 'David'],
-    'math_score': [85, 90, 78, 92],
-    'science_score': [88, 95, 80, 90]
-}
-df = pd.DataFrame(data)
-# Calculate average scores
-avg_math = df['math_score'].mean()
-avg_science = df['science_score'].mean()
-print(f"Average Math Score: {avg_math}")
-print(f"Average Science Score: {avg_science}")
-# Plot histogram of math scores
-import matplotlib.pyplot as plt
-plt.hist(df['math_score'], bins=5)
-plt.title('Math Score Distribution')
-plt.xlabel('Score')
-plt.ylabel('Frequency')
-plt.show()  # Error: May not display in non-interactive environments
-print(df['english_score'])  # Error: 'english_score' column doesn't exist

uploads/assignment_04.ipynb DELETED Viewed

@@ -1,52 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "86625251",
-   "metadata": {},
-   "source": [
-    "# Assignment 4: EDA - Correlation Analysis\n",
-    "Perform correlation analysis on a dataset of car features and prices.\n",
-    "Calculate correlation matrix and plot a heatmap."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d3ae5847",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import seaborn as sns\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "# Synthetic dataset\n",
-    "data = {\n",
-    "    'horsepower': [120, 150, 100, 180],\n",
-    "    'weight': [3000, 3200, 2800, 3500],\n",
-    "    'price': [20000, 25000, 18000, 30000]\n",
-    "}\n",
-    "df = pd.DataFrame(data)\n",
-    "\n",
-    "# Correlation matrix\n",
-    "corr_matrix = df.corr()\n",
-    "\n",
-    "# Plot heatmap\n",
-    "sns.heatmap(corr_matrix, annot=True)\n",
-    "plt.title('Correlation Heatmap')\n",
-    "plt.show()\n",
-    "\n",
-    "# Error: Incorrect column name\n",
-    "print(df['Price'])"
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

uploads/assignment_04.py DELETED Viewed

@@ -1,16 +0,0 @@
-# Assignment 4: Linear Regression on Synthetic Data
-# Build a linear regression model to predict house prices
-import numpy as np
-from sklearn.linear_model import LinearRegression
-# Synthetic dataset
-X = np.array([[1, 2], [2, 4], [3, 6], [4, 8]])  # Features: size, rooms
-y = np.array([100, 200, 300, 400])  # Prices
-# Train model
-model = LinearRegression()
-model.fit(X, y)
-# Predict on new data
-new_data = np.array([5, 10])  # Error: Shape mismatch, should be [[5, 10]]
-print(f"Predicted price: {model.predict(new_data)}")

uploads/assignment_05.ipynb DELETED Viewed

@@ -1,49 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "10d2cf4b",
-   "metadata": {},
-   "source": [
-    "# Assignment 5: Decision Tree Classifier\n",
-    "Train a decision tree classifier on a synthetic dataset of customer purchases.\n",
-    "Predict whether a customer will buy based on age and income."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "10abdab6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "from sklearn.tree import DecisionTreeClassifier\n",
-    "\n",
-    "# Synthetic dataset\n",
-    "data = {\n",
-    "    'age': [25, 30, 35, 40],\n",
-    "    'income': [50000, 60000, 55000, 70000],\n",
-    "    'buy': [0, 1, 0, 1]\n",
-    "}\n",
-    "df = pd.DataFrame(data)\n",
-    "\n",
-    "# Train model\n",
-    "X = df[['age', 'income']]\n",
-    "y = df['buy']\n",
-    "model = DecisionTreeClassifier()\n",
-    "model.fit(X, y)  # Error: Missing arguments, should be model.fit(X, y)\\n\",\n",
-    "\n",
-    "# Predict\\n\",\n",
-    "print(model.predict([[30, 65000]]))"
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

uploads/assignment_06.py DELETED Viewed

@@ -1,19 +0,0 @@
-# Assignment 6: Missing Value Imputation
-# Impute missing values in a dataset using mean strategy
-import pandas as pd
-import numpy as np
-from sklearn.impute import SimpleImputer
-# Synthetic dataset
-data = {
-    'feature1': [1, 2, np.nan, 4],
-    'feature2': [10, np.nan, 30, 40]
-}
-df = pd.DataFrame(data)
-# Impute missing values
-imputer = SimpleImputer(strategy='mean')
-df_imputed = imputer.fit_transform(df)
-# Error: Incorrectly printing DataFrame as array
-print(df_imputed['feature1'])  # Should use pd.DataFrame(df_imputed)

uploads/assignment_07.py DELETED Viewed

@@ -1,15 +0,0 @@
-# Assignment 7: Feature Engineering - Polynomial Features
-# Create polynomial features for a regression model
-from sklearn.preprocessing import PolynomialFeatures
-import numpy as np
-# Synthetic dataset
-X = np.array([[1], [2], [3], [4]])
-# Create polynomial features
-poly = PolynomialFeatures(degree=2)
-X_poly = poly.fit_transform(X)
-# Error: Incorrectly accessing feature names
-print(poly.feature_names)  # Error: Should use get_feature_names_out()
-print(X_poly)

uploads/assignment_09.py DELETED Viewed

@@ -1,15 +0,0 @@
-# Assignment 9: PCA for Dimensionality Reduction
-# Apply PCA to reduce dimensions of a dataset
-from sklearn.decomposition import PCA
-import numpy as np
-# Synthetic dataset
-X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
-# Apply PCA
-pca = PCA(n_components=2)
-X_reduced = pca.fit_transform(X)
-# Error: Accessing undefined attribute
-print(pca.explained_variance)  # Should be explained_variance_ratio_
-print(X_reduced)

uploads/test.py DELETED Viewed

@@ -1,6 +0,0 @@
-# test.py
-def factorial(n):
-if n == 0:
-return 1
-else:
-    return n * factorial(n-1)

utils/__init__.py ADDED Viewed

File without changes

utils/code_utils.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from pylint.lint import Run
+from pylint.reporters.text import TextReporter
+from nbformat import read
+import os
+from io import StringIO
+from scripts.logger import get_logger
+from charset_normalizer import detect
+logger = get_logger(__name__)
+def pylint_check(file_path):
+    logger.debug(f"Checking file: {file_path}")
+    try:
+        # Preprocess file to ensure UTF-8
+        with open(file_path, 'rb') as f:
+            raw_content = f.read()
+        detected = detect(raw_content)
+        encoding = detected['encoding']
+        if encoding not in ['utf-8', 'ascii']:
+            # logger.warning(f"File {file_path} is not UTF-8 (detected {encoding}). Converting to UTF-8.")
+            content = raw_content.decode(encoding, errors='replace').encode('utf-8').decode('utf-8')
+            temp_file_path = file_path + '.utf8'
+            with open(temp_file_path, 'w', encoding='utf-8') as f:
+                f.write(content)
+            file_path = temp_file_path
+        output = StringIO()
+        reporter = TextReporter(output)
+        Run([file_path, '--disable=C0114,C0115,W0311,W0703,C0116,R0903,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
+        pylint_output = output.getvalue()
+        # Clean up temporary file if created
+        if file_path.endswith('.utf8'):
+            try:
+                os.unlink(file_path)
+            except Exception as e:
+                logger.warning(f"Failed to delete temporary file {file_path}: {e}")
+        return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
+    except Exception as e:
+        logger.error(f"Pylint error: {e}")
+        return f"Pylint error: {str(e)}"
+def extract_code_from_ipynb(file_path):
+    logger.debug(f"Extracting code from: {file_path}")
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            notebook = read(file, as_version=4)
+        code = ""
+        for cell in notebook.cells:
+            if cell.cell_type == 'code':
+                source = ''.join(cell.source) if isinstance(cell.source, list) else cell.source
+                # Sanitize code to remove non-text characters
+                source = ''.join(c for c in source if ord(c) < 128 or c in '\n\t\r')
+                code += source + '\n'
+        if not code.strip():
+            logger.warning("No code cells found in .ipynb file.")
+            return None
+        return code
+    except UnicodeDecodeError as e:
+        logger.error(f"Error decoding .ipynb file: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Error extracting code from .ipynb: {e}")
+        return None