Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,14 @@ tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
|
|
| 7 |
# Convert your tokenized data into PyTorch tensors and create a PyTorch Dataset object
|
| 8 |
import torch
|
| 9 |
from torch.utils.data import Dataset
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
class ClinicalDataset(Dataset):
|
| 12 |
def __init__(self, texts, labels, tokenizer):
|
| 13 |
self.texts = texts
|
|
@@ -22,7 +29,8 @@ class ClinicalDataset(Dataset):
|
|
| 22 |
label = self.labels[idx]
|
| 23 |
encoding = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 24 |
return {"input_ids": encoding["input_ids"].squeeze(), "attention_mask": encoding["attention_mask"].squeeze(), "labels": torch.tensor(label)}
|
| 25 |
-
|
|
|
|
| 26 |
dataset = ClinicalDataset(texts=train_texts, labels=train_labels, tokenizer=tokenizer)
|
| 27 |
# Fine-tune the pre-trained model on your clinical dataset
|
| 28 |
from transformers import Trainer, TrainingArguments
|
|
|
|
| 7 |
# Convert your tokenized data into PyTorch tensors and create a PyTorch Dataset object
|
| 8 |
import torch
|
| 9 |
from torch.utils.data import Dataset
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
train_texts = st.text_input("Enter your clinical text data (separated by commas):")
|
| 13 |
+
train_labels = st.text_input("Enter your corresponding labels (separated by commas):")
|
| 14 |
+
|
| 15 |
+
train_texts = train_texts.split(",")
|
| 16 |
+
train_labels = train_labels.split(",")
|
| 17 |
+
|
| 18 |
class ClinicalDataset(Dataset):
|
| 19 |
def __init__(self, texts, labels, tokenizer):
|
| 20 |
self.texts = texts
|
|
|
|
| 29 |
label = self.labels[idx]
|
| 30 |
encoding = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 31 |
return {"input_ids": encoding["input_ids"].squeeze(), "attention_mask": encoding["attention_mask"].squeeze(), "labels": torch.tensor(label)}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
dataset = ClinicalDataset(texts=train_texts, labels=train_labels, tokenizer=tokenizer)
|
| 35 |
# Fine-tune the pre-trained model on your clinical dataset
|
| 36 |
from transformers import Trainer, TrainingArguments
|