Spaces:

mycholpath
/

granite-3.1-8b-instruct-ascii

Runtime error

App Files Files Community

mjarrett commited on Jun 26

Commit

29969bf

1 Parent(s): 76ec1cb

updated for 8B model

Browse files

Files changed (6) hide show

Dockerfile +21 -0
README.md +3 -3
app.py +64 -0
finetune.py +176 -0
handler.py +47 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+# Adapted from https://huggingface.co/docs/hub/spaces-sdks-docker
+FROM python:3.10-slim
+# Create non-root user
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+# Install dependencies
+COPY --chown=user requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy scripts
+COPY --chown=user finetune.py /app/finetune.py
+COPY --chown=user app.py /app/app.py
+# Run finetune and start API
+CMD ["bash", "-c", "python finetune.py && uvicorn app:app --host 0.0.0.0 --port 7860"]

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Granite 3.1 8b Instruct Ascii
-emoji: 👀
 colorFrom: yellow
-colorTo: green
 sdk: docker
 pinned: false
 license: apache-2.0

 ---
+title: Granite 2b Finetuning
+emoji: 🌖
 colorFrom: yellow
+colorTo: gray
 sdk: docker
 pinned: false
 license: apache-2.0

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from fastapi import FastAPI, HTTPException
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import logging
+from pydantic import BaseModel
+import os
+import tarfile
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Debug environment variables
+logger.info("Environment variables: %s", {k: "****" if "TOKEN" in k or k == "granite" else v for k, v in os.environ.items()})
+app = FastAPI()
+model_tarball = "/app/granite-8b-finetuned-ascii.tar.gz"
+model_path = "/app/granite-8b-finetuned-ascii"
+# Extract tarball if model directory doesn't exist
+if not os.path.exists(model_path):
+    logger.info(f"Extracting model tarball: {model_tarball}")
+    try:
+        with tarfile.open(model_tarball, "r:gz") as tar:
+            tar.extractall(path="/app")
+        logger.info("Model tarball extracted successfully")
+    except Exception as e:
+        logger.error(f"Failed to extract model tarball: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Model tarball extraction failed: {str(e)}")
+try:
+    logger.info("Loading tokenizer and model")
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    tokenizer.padding_side = 'right'
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True
+    )
+    logger.info("Model and tokenizer loaded successfully")
+except Exception as e:
+    logger.error(f"Failed to load model or tokenizer: {str(e)}")
+    raise HTTPException(status_code=500, detail=f"Model initialization failed: {str(e)}")
+class EditRequest(BaseModel):
+    text: str
+@app.get("/")
+def greet_json():
+    return {"status": "Model is ready", "model": model_path}
+@app.post("/generate")
+async def generate(request: EditRequest):
+    try:
+        prompt = f"Edit this AsciiDoc sentence: {request.text}"
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        outputs = model.generate(**inputs, max_length=200)
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        logger.info(f"Generated response for prompt: {prompt}")
+        return {"response": response}
+    except Exception as e:
+        logger.error(f"Generation failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")

finetune.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import logging
+import os
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import LoraConfig, get_peft_model
+from trl import SFTTrainer, SFTConfig
+from datasets import load_dataset
+import torch
+import tarfile
+from huggingface_hub import HfApi
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+# Debug environment variables
+logger.info("Environment variables: %s", {k: "****" if "TOKEN" in k or k == "granite" else v for k, v in os.environ.items()})
+model_path = "ibm-granite/granite-3.3-8b-instruct"
+dataset_path = "mycholpath/ascii-json"
+output_dir = "/app/granite-8b-finetuned-ascii"
+output_tarball = "/app/granite-8b-finetuned-ascii.tar.gz"
+model_repo = "mycholpath/granite-8b-finetuned-ascii"
+artifact_repo = "mycholpath/granite-finetuned-artifacts"
+# Get HF token from granite environment variable
+granite_var = os.getenv("granite")
+if not granite_var or not granite_var.startswith("HF_TOKEN="):
+    logger.error("granite environment variable is not set or invalid. Expected format: HF_TOKEN=<token>.")
+    raise ValueError("granite environment variable is not set or invalid. Please set it in HF Space settings.")
+hf_token = granite_var.replace("HF_TOKEN=", "")
+logger.info("HF_TOKEN extracted from granite (value hidden for security)")
+logging.info("Loading tokenizer...")
+try:
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_path, token=hf_token, cache_dir="/tmp/hf_cache", trust_remote_code=True
+    )
+    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.padding_side = 'right'
+except Exception as e:
+    logger.error(f"Failed to load tokenizer: {str(e)}")
+    raise
+logging.info("Loading model...")
+try:
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        token=hf_token,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        cache_dir="/tmp/hf_cache",
+        trust_remote_code=True
+    )
+except Exception as e:
+    logger.error(f"Failed to load model: {str(e)}")
+    raise
+lora_config = LoraConfig(
+    r=16,
+    lora_alpha=32,
+    target_modules=["q_proj", "v_proj"],
+    lora_dropout=0.05,
+    bias="none",
+    task_type="CAUSAL_LM"
+)
+model = get_peft_model(model, lora_config)
+logging.info("Preparing to load private dataset...")
+logger.info("Using HF_TOKEN from granite for private dataset authentication")
+try:
+    dataset = load_dataset(dataset_path, split="train", token=hf_token)
+    logger.info(f"Dataset loaded successfully: {len(dataset)} examples")
+except Exception as e:
+    logger.error(f"Failed to load dataset: {str(e)}")
+    raise
+def formatting_prompts_func(example):
+    formatted = f"{example['prompt']}\n{example['completion']}"
+    return [formatted]
+# Use SFTConfig for training arguments
+sft_config = SFTConfig(
+    output_dir=output_dir,
+    num_train_epochs=5,
+    per_device_train_batch_size=4,
+    per_device_eval_batch_size=4,
+    gradient_accumulation_steps=4,
+    learning_rate=2e-4,
+    weight_decay=0.01,
+    eval_strategy="no",
+    save_steps=50,
+    logging_steps=10,
+    fp16=True,
+    max_grad_norm=0.3,
+    warmup_ratio=0.03,
+    lr_scheduler_type="cosine",
+    max_seq_length=768,
+    dataset_text_field=None,
+    packing=False
+)
+logging.info("Starting training...")
+try:
+    trainer = SFTTrainer(
+        model=model,
+        tokenizer=tokenizer,
+        train_dataset=dataset,
+        eval_dataset=None,
+        formatting_func=formatting_prompts_func,
+        args=sft_config
+    )
+except Exception as e:
+    logger.error(f"Failed to initialize SFTTrainer: {str(e)}")
+    raise
+trainer.train()
+logging.info("Saving fine-tuned model...")
+trainer.save_model(output_dir)
+tokenizer.save_pretrained(output_dir)
+# Create tarball for local retrieval
+try:
+    with tarfile.open(output_tarball, "w:gz") as tar:
+        tar.add(output_dir, arcname=os.path.basename(output_dir))
+    logger.info(f"Model tarball created: {output_tarball}")
+except Exception as e:
+    logger.error(f"Failed to create model tarball: {str(e)}")
+    raise
+# Upload model to HF Hub
+try:
+    api = HfApi()
+    logger.info(f"Creating model repository: {model_repo}")
+    api.create_repo(
+        repo_id=model_repo,
+        repo_type="model",
+        token=hf_token,
+        private=True,
+        exist_ok=True
+    )
+    logger.info(f"Uploading model to {model_repo}")
+    api.upload_folder(
+        folder_path=output_dir,
+        repo_id=model_repo,
+        repo_type="model",
+        token=hf_token,
+        create_pr=False
+    )
+    logger.info(f"Fine-tuned model uploaded to {model_repo}")
+except Exception as e:
+    logger.error(f"Failed to upload model to HF Hub: {str(e)}")
+    logger.warning("Continuing to tarball upload despite model upload failure")
+# Upload tarball to HF Hub dataset repository
+try:
+    api = HfApi()
+    logger.info(f"Creating dataset repository: {artifact_repo}")
+    api.create_repo(
+        repo_id=artifact_repo,
+        repo_type="dataset",
+        token=hf_token,
+        private=True,
+        exist_ok=True
+    )
+    logger.info(f"Uploading tarball to {artifact_repo}")
+    api.upload_file(
+        path_or_fileobj=output_tarball,
+        path_in_repo="granite-8b-finetuned-ascii.tar.gz",
+        repo_id=artifact_repo,
+        repo_type="dataset"
+        token=hf_token
+    )
+    logger.info(f"Tarball uploaded to {artifact_repo}/granite-8b-finetuned-ascii.tar.gz")
+except Exception as e:
+    logger.error(f"Failed to upload tarball to HF Hub: {str(e)}")
+    raise

handler.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import json
+from transformers import pipeline
+class EndpointHandler:
+    def __init__(self, path=""):
+        self.pipeline = pipeline("text-generation", model=path, device=0)
+    def __call__(self, data):
+        inputs = data.get("inputs", "")
+        style_guide = data.get("style_guide", "Apply general AsciiDoc best practices.")
+        max_tokens = data.get("max_tokens", 2048)
+        system_prompt = f"""
+You are an expert technical editor specializing in AsciiDoc document correction. Your task is to analyze the provided AsciiDoc text and suggest corrections based on the following style guide:
+{style_guide}
+**Output Requirements**:
+- Return corrections **only** in valid JSON format, enclosed in curly braces: {{"corrections": [...]}}.
+- Each correction must include:
+  - "original_line": The exact line from the input text.
+  - "corrected_line": The corrected version of the line.
+  - "explanation": A brief reason for the correction.
+- If no corrections are needed, return: {{"corrections": []}}.
+- Ensure the JSON is complete, valid, and concise to avoid truncation.
+- Do **not** include any text, comments, or explanations outside the JSON object.
+- Do **not** include placeholder text like "<original AsciiDoc line>".
+- Only correct lines with AsciiDoc syntax, style, or technical accuracy issues (e.g., missing punctuation, incorrect headers, malformed attributes like :gls_prefix:).
+Analyze the following AsciiDoc lines and provide corrections in JSON format:
+"""
+        prompt = f"{system_prompt}\n{inputs}"
+        try:
+            response = self.pipeline(
+                prompt,
+                max_new_tokens=max_tokens,
+                temperature=0.3,
+                return_full_text=False
+            )[0]["generated_text"].strip()
+            json_start = response.find('{')
+            json_end = response.rfind('}') + 1
+            if json_start == -1 or json_end == -1:
+                return {"corrections": []}
+            correction_json = json.loads(response[json_start:json_end])
+            return correction_json
+        except Exception as e:
+            return {"corrections": []}

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+transformers==4.46.0
+torch==2.4.1
+datasets==3.0.1
+peft==0.13.2
+trl==0.11.4
+accelerate==1.0.1
+huggingface_hub==0.25.2
+fastapi==0.115.2
+uvicorn==0.32.0