Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| import sys | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| import json | |
| # Get the HF_TOKEN from the environment variable (set by the Space) | |
| hf_token = os.getenv("HF_TOKEN") | |
| tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b-it', use_auth_token=hf_token) | |
| # Configure 4-bit quantization using BitsAndBytesConfig | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_quant_type="nf4", | |
| ) | |
| # Check if a GPU is available | |
| if torch.cuda.is_available(): | |
| # Load the model with 4-bit quantization (for GPU) | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_quant_type="nf4", | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| 'google/gemma-2-2b-it', | |
| device_map="auto", | |
| quantization_config=quantization_config, | |
| use_auth_token=hf_token | |
| ) | |
| else: | |
| # Load the model without quantization (for CPU) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| 'google/gemma-2-2b-it', | |
| device_map="auto", | |
| use_auth_token=hf_token | |
| ) | |
| # Definir el prompt para generar un JSON con eventos anidados | |
| prompt = ( | |
| "Generate a JSON object that describes a sequence of potential future events, where each event can have nested subevents. The JSON structure should adhere to the following format:\n\n" | |
| "{\n" | |
| " \"events\": {\n" | |
| " \"event\": {\n" | |
| " \"event_number\": <integer>,\n" | |
| " \"name\": <string>,\n" | |
| " \"description\": <string>,\n" | |
| " \"probability\": <integer (0-100)>,\n" | |
| " \"duration_days\": <integer>,\n" | |
| " \"subevents\": { \n" | |
| " \"event\": { \n" | |
| " // Nested events with the same structure\n" | |
| " } \n" | |
| " // or\n" | |
| " \"event\": [\n" | |
| " // Array of nested events with the same structure\n" | |
| " ]\n" | |
| " }\n" | |
| " }\n" | |
| " }\n" | |
| "}\n\n" | |
| "Ensure the generated JSON is enclosed between `<json>` and `</json>` tags. For example:\n\n" | |
| "<json>\n" | |
| "{ \n" | |
| " // Your generated JSON here \n" | |
| "}\n" | |
| "</json>\n\n" | |
| "Now, generate a JSON with the before-mentioned schema, to reflect the potential future timeline with the following theme, responding only with the JSON enclosed within the `<json>` and `</json>` tags. Theme: " | |
| ) | |
| def generate(event): | |
| combined_input = f"{prompt} {event}" | |
| prompt_msg = [{'role': 'user', 'content': combined_input}] | |
| inputs = tokenizer.apply_chat_template( | |
| prompt_msg, | |
| add_generation_prompt=True, | |
| return_tensors='pt' | |
| ) | |
| tokens = model.generate( | |
| inputs.to(model.device), | |
| max_new_tokens=1024, | |
| temperature=0.5, | |
| do_sample=True | |
| ) | |
| # Get the length of the input tokens (adjust based on your tokenizer) | |
| input_length = len(tokenizer.encode(combined_input)) | |
| output_text = tokenizer.decode(tokens[0][input_length:], skip_special_tokens=True) | |
| print(output_text) | |
| json_start_index = output_text.find("<json>") | |
| json_end_index = output_text.find("</json>") | |
| if json_start_index != -1 and json_end_index != -1: | |
| json_string = output_text[json_start_index + 6:json_end_index].strip() | |
| # Debugging: Print the extracted JSON string to check its contents | |
| print("Extracted JSON String:", json_string) | |
| # Load and return the JSON data | |
| try: | |
| data = json.loads(json_string) | |
| return data | |
| except json.JSONDecodeError as e: | |
| return f"Error: Invalid JSON - {e}" | |
| else: | |
| return "Error: <json> or </json> not found in generated output" |