Spaces:

yuhangzang
/

spark

Running on Zero

App Files Files Community

yuhangzang commited on 6 days ago

Commit

67b36a4

1 Parent(s): 12e3e78

update

Browse files

Files changed (5) hide show

.gitattributes +4 -0
README.md +23 -0
app.py +216 -0
examples/example_0.png +3 -0
requirements.txt +6 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -12,3 +12,26 @@ short_description: ' A unified framework for reasoning and reward modeling'
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## 使用说明（ZeroGPU）
+- Space 类型选择 `Gradio`，硬件选择 `ZeroGPU`（需要 PRO 或企业组织）。
+- 本仓库包含一个最小可用的 Spark-VL 演示：上传图片 + 输入文本，返回模型生成结果。
+- 关键代码在 `app.py`：
+  - 使用 `spaces.GPU` 装饰推理函数，调用时申请 GPU，用完后释放。
+  - 首次调用按需加载 `internlm/Spark-VL-7B`，优先尝试 `flash_attention_2`，失败则回退到 `eager`。
+  - 推理结束把模型移回 CPU，快速释放 ZeroGPU 显存。
+### 本地/Space 运行
+1) 推送到 Hugging Face Space 后，在 Space 设置中选择硬件 `ZeroGPU`。
+2) 运行入口：`app.py`，界面包含：图片、提示词、采样参数（max_new_tokens/temperature/top_p/top_k）。
+3) 可选环境变量：
+   - `SPARK_MODEL_ID`：默认 `internlm/Spark-VL-7B`。
+   - `ATTN_IMPL`：默认 `flash_attention_2`，可改为 `eager`。
+### 依赖
+见 `requirements.txt`（Gradio 5.x，Transformers 4.45+，qwen-vl-utils 等）。ZeroGPU 的基础镜像已包含合适的 PyTorch 版本。

app.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import os
+import time
+import glob
+from typing import List
+import spaces
+import gradio as gr
+import torch
+from PIL import Image
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+MODEL_ID = os.environ.get("SPARK_MODEL_ID", "internlm/Spark-VL-7B")
+DTYPE = torch.bfloat16
+_model = None
+_processor = None
+_attn_impl = None
+def _load_model_and_processor():
+    global _model, _processor, _attn_impl
+    if _model is not None and _processor is not None:
+        return _model, _processor
+    # Prefer flash-attn if available, otherwise fall back to eager.
+    attn_impl = os.environ.get("ATTN_IMPL", "flash_attention_2")
+    try:
+        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            MODEL_ID,
+            torch_dtype=DTYPE,
+            attn_implementation=attn_impl,
+            device_map="auto",
+        )
+        _attn_impl = attn_impl
+    except Exception:
+        # Fallback for environments without flash-attn
+        model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            MODEL_ID,
+            torch_dtype=DTYPE,
+            attn_implementation="eager",
+            device_map="auto",
+        )
+        _attn_impl = "eager"
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    _model = model
+    _processor = processor
+    return _model, _processor
+def _prepare_inputs(image, prompt):
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": prompt},
+            ],
+        }
+    ]
+    chat_text = _processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    inputs = _processor(
+        text=[chat_text],
+        # Pass the single image directly; template contains <image> placeholder
+        images=[image] if image is not None else None,
+        return_tensors="pt",
+    )
+    return inputs
+def _decode(generated_ids, input_ids):
+    # Trim the prompt part before decoding
+    trimmed = generated_ids[:, input_ids.shape[1] :]
+    out = _processor.batch_decode(
+        trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    return out[0] if out else ""
+@spaces.GPU(duration=120)
+def generate(image, prompt, max_new_tokens, temperature, top_p, top_k):
+    if image is None:
+        return "Please upload an image."
+    prompt = (prompt or "").strip()
+    if not prompt:
+        return "Please enter a prompt."
+    start = time.time()
+    model, _ = _load_model_and_processor()
+    try:
+        # Ensure model resides on GPU during the call
+        p = next(model.parameters())
+        if p.device.type != "cuda":
+            model.to("cuda")
+    except StopIteration:
+        pass
+    try:
+        inputs = _prepare_inputs(image, prompt)
+        dev = next(model.parameters()).device
+        inputs = {k: v.to(dev) if hasattr(v, "to") else v for k, v in inputs.items()}
+        gen_kwargs = {
+            "max_new_tokens": int(max_new_tokens),
+            "do_sample": True,
+            "temperature": float(temperature),
+            "top_p": float(top_p),
+            "top_k": int(top_k),
+            "use_cache": True,
+        }
+        with torch.inference_mode():
+            out_ids = model.generate(**inputs, **gen_kwargs)
+        text = _decode(out_ids, inputs["input_ids"])
+        took = time.time() - start
+        return f"{text}\n\n[attn={_attn_impl}, time={took:.1f}s]"
+    except Exception as e:
+        return f"Inference failed: {type(e).__name__}: {e}"
+    finally:
+        # Release GPU quickly on ZeroGPU by moving weights off CUDA.
+        try:
+            if hasattr(model, "to"):
+                model.to("cpu")
+            torch.cuda.empty_cache()
+        except Exception:
+            pass
+def build_ui():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Spark-VL ZeroGPU Demo\nUpload an image or choose from the example gallery, then enter a prompt.")
+        # Build an image gallery from ./examples
+        def _gather_examples() -> List[str]:
+            exts = ("*.jpg", "*.jpeg", "*.png", "*.webp")
+            imgs: List[str] = []
+            for ptn in exts:
+                imgs.extend(sorted(glob.glob(os.path.join("examples", ptn))))
+            # Deduplicate while keeping order
+            seen = set()
+            uniq = []
+            for p in imgs:
+                if p not in seen:
+                    uniq.append(p)
+                    seen.add(p)
+            return uniq
+        example_images = _gather_examples()
+        default_candidates = [
+            os.path.join("examples", "example_0.png"),
+        ]
+        default_image_path = next((p for p in default_candidates if os.path.exists(p)), None)
+        default_image = Image.open(default_image_path) if default_image_path else None
+        with gr.Row():
+            with gr.Column(scale=1):
+                image = gr.Image(type="pil", label="Image", value=default_image)
+                gallery = gr.Gallery(
+                    value=example_images,
+                    label="Example Gallery",
+                    show_label=True,
+                    columns=4,
+                    height=240,
+                    allow_preview=True,
+                )
+                # When a thumbnail is clicked, load it into the image input
+                def _on_gallery_select(evt):
+                    try:
+                        idx = int(evt.index)
+                    except Exception:
+                        return None
+                    if idx is None or idx < 0 or idx >= len(example_images):
+                        return None
+                    # Return PIL image so upstream expects a PIL image
+                    try:
+                        return Image.open(example_images[idx])
+                    except Exception:
+                        return example_images[idx]
+                gallery.select(fn=_on_gallery_select, inputs=None, outputs=image)
+            with gr.Column(scale=1):
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    value=(
+                        "As seen in the diagram, three darts are thrown at nine fixed balloons. "
+                        "If a balloon is hit it will burst and the dart continues in the same direction "
+                        "it had beforehand. How many balloons will not be hit by a dart?"
+                    ),
+                    lines=4,
+                )
+                max_new_tokens = gr.Slider(16, 512, value=128, step=8, label="max_new_tokens")
+                temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="temperature")
+                top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="top_p")
+                top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
+                run = gr.Button("Generate")
+        output = gr.Textbox(label="Model Output", lines=8)
+        run.click(
+            fn=generate,
+            inputs=[image, prompt, max_new_tokens, temperature, top_p, top_k],
+            outputs=output,
+            show_progress=True,
+        )
+        demo.queue(concurrency_count=1, max_size=10).launch()
+    return demo
+if __name__ == "__main__":
+    build_ui()

examples/example_0.png ADDED Viewed

Git LFS Details

SHA256: df52c4fd4574d96401d0231878e83803bdb64b8d82ba81854a028a4759b7fe55
Pointer size: 131 Bytes
Size of remote file: 144 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+transformers>=4.45.0
+accelerate>=0.33.0
+qwen-vl-utils>=0.0.8
+gradio>=5.49.1
+spaces>=0.24.0
+pillow