community-rating-system / src /train_model.py
Seyomi's picture
Add application file
d75e318
import torch
from torch import nn
from torch.utils.data import DataLoader
from transformers import RobertaModel, get_scheduler
from torch.optim import AdamW
from tqdm import tqdm
import os
from dataset import CommentDataset, load_data
from model import CommentRatingModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class RoBERTaWithMetadata(nn.Module):
def __init__(self, dropout=0.3):
super().__init__()
self.roberta = RobertaModel.from_pretrained("roberta-base")
self.dropout = nn.Dropout(dropout)
self.metadata_fc = nn.Linear(3, 64)
self.classifier = nn.Sequential(
nn.Linear(self.roberta.config.hidden_size + 64, 128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, 1) # Regression output
)
def forward(self, input_ids, attention_mask, metadata):
roberta_out = self.roberta(input_ids=input_ids, attention_mask=attention_mask).pooler_output
meta_out = torch.relu(self.metadata_fc(metadata))
combined = torch.cat([roberta_out, meta_out], dim=1)
return self.classifier(combined)
def train(model, train_loader, val_loader, epochs=5, lr=2e-5, checkpoint_path="models/best_model.pt"):
criterion = nn.MSELoss()
optimizer = AdamW(model.parameters(), lr=lr)
num_training_steps = epochs * len(train_loader)
scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
best_loss = float("inf")
for epoch in range(epochs):
model.train()
total_loss = 0
for batch in train_loader:
optimizer.zero_grad()
input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device)
metadata = batch["metadata"].to(device)
labels = batch["label"].to(device).unsqueeze(1)
outputs = model(input_ids, attention_mask, metadata)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
scheduler.step()
total_loss += loss.item()
avg_train_loss = total_loss / len(train_loader)
# Validation
model.eval()
val_loss = 0
with torch.no_grad():
for batch in val_loader:
input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device)
metadata = batch["metadata"].to(device)
labels = batch["label"].to(device).unsqueeze(1)
outputs = model(input_ids, attention_mask, metadata)
loss = criterion(outputs, labels)
val_loss += loss.item()
avg_val_loss = val_loss / len(val_loader)
print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f} | Val Loss = {avg_val_loss:.4f}")
if avg_val_loss < best_loss:
best_loss = avg_val_loss
torch.save(model.state_dict(), checkpoint_path)
print("βœ… Saved best model")
if __name__ == "__main__":
df = pd.read_csv("data/final_dataset.csv")
train_df = df.sample(frac=0.8, random_state=42)
val_df = df.drop(train_df.index)
train_dataset = CommentDataset(train_df)
val_dataset = CommentDataset(val_df)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
model = RoBERTaWithMetadata().to(device)
os.makedirs("models", exist_ok=True)
train(model, train_loader, val_loader)