Spaces:
Sleeping
Sleeping
Merge pull request #3 from Za-heer/master
Browse files- app.py +11 -7
- uploads/assignment_03.ipynb +46 -0
- uploads/assignment_05.py +21 -0
- uploads/assignment_10.py +22 -0
app.py
CHANGED
|
@@ -6,25 +6,27 @@ from pylint.lint import Run
|
|
| 6 |
from pylint.reporters.text import TextReporter
|
| 7 |
from io import StringIO
|
| 8 |
|
| 9 |
-
app = Flask(__name__)
|
| 10 |
UPLOAD_FOLDER = 'uploads'
|
| 11 |
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
| 12 |
-
app.static_folder = 'static'
|
| 13 |
|
| 14 |
-
# Load CodeBERT
|
|
|
|
|
|
|
| 15 |
try:
|
| 16 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
|
| 17 |
model = AutoModel.from_pretrained("microsoft/codebert-base")
|
|
|
|
|
|
|
|
|
|
| 18 |
except Exception as e:
|
| 19 |
print(f"Error loading CodeBERT: {e}")
|
| 20 |
-
tokenizer = None
|
| 21 |
-
model = None
|
| 22 |
|
| 23 |
def pylint_check(file_path):
|
| 24 |
try:
|
| 25 |
output = StringIO()
|
| 26 |
reporter = TextReporter(output)
|
| 27 |
-
Run([file_path, '--disable=C0114,C0115,W0311,W0703,
|
| 28 |
pylint_output = output.getvalue()
|
| 29 |
return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
|
| 30 |
except Exception as e:
|
|
@@ -40,6 +42,8 @@ def analyze_code(file_path):
|
|
| 40 |
codebert_feedback = "CodeBERT not loaded."
|
| 41 |
if tokenizer and model:
|
| 42 |
inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512)
|
|
|
|
|
|
|
| 43 |
with torch.no_grad():
|
| 44 |
outputs = model(**inputs)
|
| 45 |
codebert_feedback = f"Code analyzed with CodeBERT. Length: {len(code)} characters."
|
|
@@ -77,4 +81,4 @@ def upload_file():
|
|
| 77 |
if __name__ == '__main__':
|
| 78 |
if not os.path.exists(UPLOAD_FOLDER):
|
| 79 |
os.makedirs(UPLOAD_FOLDER)
|
| 80 |
-
app.run(debug=True, port=
|
|
|
|
| 6 |
from pylint.reporters.text import TextReporter
|
| 7 |
from io import StringIO
|
| 8 |
|
| 9 |
+
app = Flask(__name__, static_folder='static')
|
| 10 |
UPLOAD_FOLDER = 'uploads'
|
| 11 |
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
|
|
|
| 12 |
|
| 13 |
+
# Load CodeBERT globally (once at startup)
|
| 14 |
+
tokenizer = None
|
| 15 |
+
model = None
|
| 16 |
try:
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
|
| 18 |
model = AutoModel.from_pretrained("microsoft/codebert-base")
|
| 19 |
+
if torch.cuda.is_available():
|
| 20 |
+
model.to('cuda')
|
| 21 |
+
print("CodeBERT loaded successfully.")
|
| 22 |
except Exception as e:
|
| 23 |
print(f"Error loading CodeBERT: {e}")
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def pylint_check(file_path):
|
| 26 |
try:
|
| 27 |
output = StringIO()
|
| 28 |
reporter = TextReporter(output)
|
| 29 |
+
Run([file_path, '--disable=C0114,C0115,C0116,R0903,W0311,W0703,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
|
| 30 |
pylint_output = output.getvalue()
|
| 31 |
return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
|
| 32 |
except Exception as e:
|
|
|
|
| 42 |
codebert_feedback = "CodeBERT not loaded."
|
| 43 |
if tokenizer and model:
|
| 44 |
inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512)
|
| 45 |
+
if torch.cuda.is_available():
|
| 46 |
+
inputs = {k: v.to('cuda') for k, v in inputs.items()}
|
| 47 |
with torch.no_grad():
|
| 48 |
outputs = model(**inputs)
|
| 49 |
codebert_feedback = f"Code analyzed with CodeBERT. Length: {len(code)} characters."
|
|
|
|
| 81 |
if __name__ == '__main__':
|
| 82 |
if not os.path.exists(UPLOAD_FOLDER):
|
| 83 |
os.makedirs(UPLOAD_FOLDER)
|
| 84 |
+
app.run(debug=True, port=5001)
|
uploads/assignment_03.ipynb
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "4ecafd98",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"# Assignment 3: Feature Engineering - Encoding Categorical Data\n",
|
| 9 |
+
"Create a function to encode categorical variables in a dataset using one-hot encoding.\n",
|
| 10 |
+
"Dataset contains student info with grades (A, B, C) and gender (M, F)."
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": null,
|
| 16 |
+
"id": "44ac96ce",
|
| 17 |
+
"metadata": {},
|
| 18 |
+
"outputs": [],
|
| 19 |
+
"source": [
|
| 20 |
+
"import pandas as pd\n",
|
| 21 |
+
" \n",
|
| 22 |
+
" # Synthetic dataset\n",
|
| 23 |
+
"data = {\n",
|
| 24 |
+
" 'student': ['Alice', 'Bob', 'Charlie'],\n",
|
| 25 |
+
" 'grade': ['A', 'B', 'C'],\n",
|
| 26 |
+
" 'gender': ['F', 'M', 'F']\n",
|
| 27 |
+
" }\n",
|
| 28 |
+
"df = pd.DataFrame(data)\n",
|
| 29 |
+
"\n",
|
| 30 |
+
" # One-hot encoding\\n\",\n",
|
| 31 |
+
"df_encoded = pd.get_dummies(df, columns=['grade', 'gender']) \n",
|
| 32 |
+
"print(df_encoded)\n",
|
| 33 |
+
" \n",
|
| 34 |
+
" # Error: Trying to access non-existent column\\n\",\n",
|
| 35 |
+
"print(df_encoded['grade_D'])"
|
| 36 |
+
]
|
| 37 |
+
}
|
| 38 |
+
],
|
| 39 |
+
"metadata": {
|
| 40 |
+
"language_info": {
|
| 41 |
+
"name": "python"
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"nbformat": 4,
|
| 45 |
+
"nbformat_minor": 5
|
| 46 |
+
}
|
uploads/assignment_05.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Assignment 5: Feature Scaling for KNN
|
| 2 |
+
# Apply feature scaling before training a KNN classifier
|
| 3 |
+
from sklearn.neighbors import KNeighborsClassifier
|
| 4 |
+
from sklearn.preprocessing import StandardScaler
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
# Synthetic dataset
|
| 8 |
+
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
|
| 9 |
+
y = np.array([0, 0, 1, 1])
|
| 10 |
+
|
| 11 |
+
# Scale features
|
| 12 |
+
scaler = StandardScaler()
|
| 13 |
+
X_scaled = scaler.fit_transform(X)
|
| 14 |
+
|
| 15 |
+
# Train KNN
|
| 16 |
+
knn = KNeighborsClassifier(n_neighbors=3)
|
| 17 |
+
knn.fit(X_scaled, y)
|
| 18 |
+
|
| 19 |
+
# Predict
|
| 20 |
+
test_data = np.array([2, 3]) # Error: Shape mismatch, should be [[2, 3]]
|
| 21 |
+
print(f"Prediction: {knn.predict(test_data)}")
|
uploads/assignment_10.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Assignment 10: Train-Test Split and Evaluation
|
| 2 |
+
# Split dataset and evaluate a model
|
| 3 |
+
from sklearn.model_selection import train_test_split
|
| 4 |
+
from sklearn.metrics import accuracy_score
|
| 5 |
+
from sklearn.linear_model import LogisticRegression
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
# Synthetic dataset
|
| 9 |
+
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
|
| 10 |
+
y = np.array([0, 0, 1, 1])
|
| 11 |
+
|
| 12 |
+
# Split data
|
| 13 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
| 14 |
+
|
| 15 |
+
# Train model
|
| 16 |
+
model = LogisticRegression()
|
| 17 |
+
model.fit(X_train, y_train)
|
| 18 |
+
|
| 19 |
+
# Evaluate
|
| 20 |
+
predictions = model.predict(X_test)
|
| 21 |
+
print(f"Accuracy: {accuracy_score(y_test, predictions)}")
|
| 22 |
+
print(f"Confusion Matrix: {confusion_matrix(y_test, predictions)}") # Error: confusion_matrix not imported
|