Spaces:
Running
Running
| from binoculars import Binoculars | |
| import torch | |
| import gc | |
| CHAT_MODEL_PAIR = { | |
| "observer": "deepseek-ai/deepseek-llm-7b-base", | |
| "performer": "deepseek-ai/deepseek-llm-7b-chat" | |
| } | |
| CODER_MODEL_PAIR = { | |
| "observer": "deepseek-ai/deepseek-llm-7b-base", | |
| "performer": "deepseek-ai/deepseek-coder-7b-instruct-v1.5" | |
| } | |
| def initialize_chat_model(): | |
| print("Initializing chat Binoculars model...") | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| print(f"GPU Memory before chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
| bino_chat = Binoculars( | |
| mode="accuracy", | |
| observer_name_or_path=CHAT_MODEL_PAIR["observer"], | |
| performer_name_or_path=CHAT_MODEL_PAIR["performer"], | |
| max_token_observed=2048 | |
| ) | |
| if torch.cuda.is_available(): | |
| print(f"GPU Memory after chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
| return bino_chat | |
| def initialize_coder_model(): | |
| print("Initializing coder Binoculars model...") | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| print(f"GPU Memory before coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
| bino_coder = Binoculars( | |
| mode="accuracy", | |
| observer_name_or_path=CODER_MODEL_PAIR["observer"], | |
| performer_name_or_path=CODER_MODEL_PAIR["performer"], | |
| max_token_observed=2048 | |
| ) | |
| if torch.cuda.is_available(): | |
| print(f"GPU Memory after coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
| return bino_coder | |
| def compute_chat_score(text): | |
| print("Computing chat score...") | |
| bino_chat = initialize_chat_model() | |
| try: | |
| score_chat = bino_chat.compute_score(text) | |
| return {"score_chat": score_chat} | |
| finally: | |
| cleanup_model(bino_chat) | |
| def compute_coder_score(text): | |
| print("Computing coder score...") | |
| bino_coder = initialize_coder_model() | |
| try: | |
| score_coder = bino_coder.compute_score(text) | |
| return {"score_coder": score_coder} | |
| finally: | |
| cleanup_model(bino_coder) | |
| def compute_scores(text, use_chat=True, use_coder=True): | |
| scores = {} | |
| if use_chat: | |
| chat_scores = compute_chat_score(text) | |
| scores.update(chat_scores) | |
| if use_coder: | |
| coder_scores = compute_coder_score(text) | |
| scores.update(coder_scores) | |
| return scores | |
| def cleanup_model(model): | |
| if model: | |
| try: | |
| print(f"Cleaning up model resources...") | |
| model.free_memory() | |
| gc.collect() | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| torch.cuda.synchronize() | |
| print(f"After cleanup: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") | |
| except Exception as e: | |
| print(f"Error during model cleanup: {str(e)}") | |
| def cleanup_models(bino_chat, bino_coder): | |
| if bino_chat: | |
| cleanup_model(bino_chat) | |
| if bino_coder: | |
| cleanup_model(bino_coder) |