Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| import os | |
| import re | |
| from typing import List, Optional | |
| import openai | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from datasets import load_datasets | |
| class LLM_Middleware(): | |
| hf_key: str | |
| dataset | |
| def __init__(self, openai_key, hf) -> None: | |
| openai.key = openai_key | |
| self.hf_key = hf | |
| ''' | |
| function for loading the dataset using hf trainer. | |
| ''' | |
| def loadDataset(self,datasetName: str): | |
| self.dataset = load_datasets(datasetName) | |
| return self.dataset | |
| def TokenizerFunction(modelName: str, dataset): | |
| tokenizer = AutoTokenizer.from_pretrained(modelName) | |
| ## as its the JSON function, we need to specify other function in order to be specific. | |
| tokenizer(dataset["text"], padding="max_length", truncation=True) | |