Zaheer Khan commited on
Commit
9a9a43d
·
unverified ·
2 Parent(s): c766e3c 9415604

Merge pull request #3 from Za-heer/master

Browse files
app.py CHANGED
@@ -6,25 +6,27 @@ from pylint.lint import Run
6
  from pylint.reporters.text import TextReporter
7
  from io import StringIO
8
 
9
- app = Flask(__name__)
10
  UPLOAD_FOLDER = 'uploads'
11
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
12
- app.static_folder = 'static'
13
 
14
- # Load CodeBERT with error handling
 
 
15
  try:
16
  tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
17
  model = AutoModel.from_pretrained("microsoft/codebert-base")
 
 
 
18
  except Exception as e:
19
  print(f"Error loading CodeBERT: {e}")
20
- tokenizer = None
21
- model = None
22
 
23
  def pylint_check(file_path):
24
  try:
25
  output = StringIO()
26
  reporter = TextReporter(output)
27
- Run([file_path, '--disable=C0114,C0115,W0311,W0703,C0116,R0903,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
28
  pylint_output = output.getvalue()
29
  return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
30
  except Exception as e:
@@ -40,6 +42,8 @@ def analyze_code(file_path):
40
  codebert_feedback = "CodeBERT not loaded."
41
  if tokenizer and model:
42
  inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512)
 
 
43
  with torch.no_grad():
44
  outputs = model(**inputs)
45
  codebert_feedback = f"Code analyzed with CodeBERT. Length: {len(code)} characters."
@@ -77,4 +81,4 @@ def upload_file():
77
  if __name__ == '__main__':
78
  if not os.path.exists(UPLOAD_FOLDER):
79
  os.makedirs(UPLOAD_FOLDER)
80
- app.run(debug=True, port=5000)
 
6
  from pylint.reporters.text import TextReporter
7
  from io import StringIO
8
 
9
+ app = Flask(__name__, static_folder='static')
10
  UPLOAD_FOLDER = 'uploads'
11
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 
12
 
13
+ # Load CodeBERT globally (once at startup)
14
+ tokenizer = None
15
+ model = None
16
  try:
17
  tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
18
  model = AutoModel.from_pretrained("microsoft/codebert-base")
19
+ if torch.cuda.is_available():
20
+ model.to('cuda')
21
+ print("CodeBERT loaded successfully.")
22
  except Exception as e:
23
  print(f"Error loading CodeBERT: {e}")
 
 
24
 
25
  def pylint_check(file_path):
26
  try:
27
  output = StringIO()
28
  reporter = TextReporter(output)
29
+ Run([file_path, '--disable=C0114,C0115,C0116,R0903,W0311,W0703,C0303,C0301', '--max-line-length=120'], reporter=reporter, exit=False)
30
  pylint_output = output.getvalue()
31
  return pylint_output if pylint_output.strip() else "No critical issues found. Code looks good!"
32
  except Exception as e:
 
42
  codebert_feedback = "CodeBERT not loaded."
43
  if tokenizer and model:
44
  inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512)
45
+ if torch.cuda.is_available():
46
+ inputs = {k: v.to('cuda') for k, v in inputs.items()}
47
  with torch.no_grad():
48
  outputs = model(**inputs)
49
  codebert_feedback = f"Code analyzed with CodeBERT. Length: {len(code)} characters."
 
81
  if __name__ == '__main__':
82
  if not os.path.exists(UPLOAD_FOLDER):
83
  os.makedirs(UPLOAD_FOLDER)
84
+ app.run(debug=True, port=5001)
uploads/assignment_03.ipynb ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "4ecafd98",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Assignment 3: Feature Engineering - Encoding Categorical Data\n",
9
+ "Create a function to encode categorical variables in a dataset using one-hot encoding.\n",
10
+ "Dataset contains student info with grades (A, B, C) and gender (M, F)."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "id": "44ac96ce",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "import pandas as pd\n",
21
+ " \n",
22
+ " # Synthetic dataset\n",
23
+ "data = {\n",
24
+ " 'student': ['Alice', 'Bob', 'Charlie'],\n",
25
+ " 'grade': ['A', 'B', 'C'],\n",
26
+ " 'gender': ['F', 'M', 'F']\n",
27
+ " }\n",
28
+ "df = pd.DataFrame(data)\n",
29
+ "\n",
30
+ " # One-hot encoding\\n\",\n",
31
+ "df_encoded = pd.get_dummies(df, columns=['grade', 'gender']) \n",
32
+ "print(df_encoded)\n",
33
+ " \n",
34
+ " # Error: Trying to access non-existent column\\n\",\n",
35
+ "print(df_encoded['grade_D'])"
36
+ ]
37
+ }
38
+ ],
39
+ "metadata": {
40
+ "language_info": {
41
+ "name": "python"
42
+ }
43
+ },
44
+ "nbformat": 4,
45
+ "nbformat_minor": 5
46
+ }
uploads/assignment_05.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assignment 5: Feature Scaling for KNN
2
+ # Apply feature scaling before training a KNN classifier
3
+ from sklearn.neighbors import KNeighborsClassifier
4
+ from sklearn.preprocessing import StandardScaler
5
+ import numpy as np
6
+
7
+ # Synthetic dataset
8
+ X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
9
+ y = np.array([0, 0, 1, 1])
10
+
11
+ # Scale features
12
+ scaler = StandardScaler()
13
+ X_scaled = scaler.fit_transform(X)
14
+
15
+ # Train KNN
16
+ knn = KNeighborsClassifier(n_neighbors=3)
17
+ knn.fit(X_scaled, y)
18
+
19
+ # Predict
20
+ test_data = np.array([2, 3]) # Error: Shape mismatch, should be [[2, 3]]
21
+ print(f"Prediction: {knn.predict(test_data)}")
uploads/assignment_10.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assignment 10: Train-Test Split and Evaluation
2
+ # Split dataset and evaluate a model
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.metrics import accuracy_score
5
+ from sklearn.linear_model import LogisticRegression
6
+ import numpy as np
7
+
8
+ # Synthetic dataset
9
+ X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
10
+ y = np.array([0, 0, 1, 1])
11
+
12
+ # Split data
13
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
14
+
15
+ # Train model
16
+ model = LogisticRegression()
17
+ model.fit(X_train, y_train)
18
+
19
+ # Evaluate
20
+ predictions = model.predict(X_test)
21
+ print(f"Accuracy: {accuracy_score(y_test, predictions)}")
22
+ print(f"Confusion Matrix: {confusion_matrix(y_test, predictions)}") # Error: confusion_matrix not imported