Spaces:

crosse712
/

fastvlm-screen-observer

Paused

fastvlm-screen-observer / backend /test_fastvlm.py

KMH

Initial commit: FastVLM Screen Observer application

509a107 2 months ago

7.58 kB

	#!/usr/bin/env python3
	"""
	Test script for FastVLM-7B model loading and configuration
	"""

	import asyncio
	import sys
	import os
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Add backend to path
	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	def check_dependencies():
	"""Check if all required dependencies are installed"""
	print("Checking dependencies...")

	deps = {
	"torch": None,
	"transformers": None,
	"sentencepiece": None,
	"einops": None,
	"accelerate": None
	}

	for dep in deps:
	try:
	module = __import__(dep)
	deps[dep] = getattr(module, "__version__", "installed")
	print(f"✓ {dep}: {deps[dep]}")
	except ImportError:
	print(f"✗ {dep}: NOT INSTALLED")
	deps[dep] = None

	return all(v is not None for v in deps.values())

	def check_hardware():
	"""Check hardware capabilities"""
	print("\nHardware check:")

	if torch.cuda.is_available():
	print(f"✓ CUDA available: {torch.cuda.get_device_name(0)}")
	print(f" Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
	elif torch.backends.mps.is_available():
	print("✓ Apple Silicon MPS available")
	# Get system memory
	import subprocess
	result = subprocess.run(['sysctl', 'hw.memsize'], capture_output=True, text=True)
	if result.returncode == 0:
	mem_bytes = int(result.stdout.split()[1])
	print(f" System Memory: {mem_bytes / 1e9:.2f} GB")
	else:
	print("✓ CPU mode")
	import psutil
	print(f" Available Memory: {psutil.virtual_memory().available / 1e9:.2f} GB")

	async def test_fastvlm_loading():
	"""Test loading FastVLM-7B model"""
	print("\n" + "="*50)
	print("Testing FastVLM-7B Model Loading")
	print("="*50)

	model_name = "apple/FastVLM-7B"

	try:
	print(f"\n1. Loading tokenizer from {model_name}...")
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True,
	use_fast=True
	)
	print(" ✓ Tokenizer loaded successfully")
	print(f" Tokenizer class: {tokenizer.__class__.__name__}")
	print(f" Vocab size: {tokenizer.vocab_size}")

	# Check for IMAGE_TOKEN_INDEX
	IMAGE_TOKEN_INDEX = -200
	if hasattr(tokenizer, 'IMAGE_TOKEN_INDEX'):
	print(f" IMAGE_TOKEN_INDEX: {tokenizer.IMAGE_TOKEN_INDEX}")
	else:
	print(f" Setting IMAGE_TOKEN_INDEX to {IMAGE_TOKEN_INDEX}")
	tokenizer.IMAGE_TOKEN_INDEX = IMAGE_TOKEN_INDEX

	print("\n2. Attempting to load model...")
	print(" Note: This requires ~14GB RAM for full precision")

	# Determine device
	if torch.cuda.is_available():
	device = "cuda"
	dtype = torch.float16
	elif torch.backends.mps.is_available():
	device = "mps"
	dtype = torch.float16
	else:
	device = "cpu"
	dtype = torch.float32

	print(f" Device: {device}")
	print(f" Dtype: {dtype}")

	# Try loading with minimal memory usage
	print(" Loading with low_cpu_mem_usage=True...")

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	torch_dtype=dtype,
	low_cpu_mem_usage=True
	)

	print(" ✓ Model loaded successfully!")

	# Count parameters
	total_params = sum(p.numel() for p in model.parameters())
	print(f" Parameters: {total_params / 1e9:.2f}B")

	# Move to device
	print(f"\n3. Moving model to {device}...")
	model = model.to(device)
	model.eval()
	print(" ✓ Model ready for inference")

	# Test a simple generation
	print("\n4. Testing generation...")
	test_prompt = "Hello, this is a test of"
	inputs = tokenizer(test_prompt, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=10,
	temperature=0.7,
	do_sample=True
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	print(f" Input: {test_prompt}")
	print(f" Output: {response}")

	print("\n✓ FastVLM-7B is working correctly!")
	return True

	except ImportError as e:
	print(f"\n✗ Import Error: {e}")
	if "trust_remote_code" in str(e):
	print("\nSolution: The model requires trust_remote_code=True")
	print("This is already set in the code, but the model files may need to be re-downloaded.")
	return False

	except RuntimeError as e:
	if "out of memory" in str(e).lower():
	print(f"\n✗ Out of Memory Error")
	print("\nSolutions:")
	print("1. Use the quantized version:")
	print(" model_name = 'apple/FastVLM-7B-int4'")
	print("2. Use a smaller variant:")
	print(" model_name = 'apple/FastVLM-1.5B'")
	print("3. Enable 8-bit quantization (requires bitsandbytes)")
	print("4. Increase system RAM or use a GPU")
	else:
	print(f"\n✗ Runtime Error: {e}")
	return False

	except Exception as e:
	print(f"\n✗ Error: {e}")
	print(f" Error type: {type(e).__name__}")
	import traceback
	traceback.print_exc()
	return False

	async def test_alternative_models():
	"""Test alternative model options if FastVLM-7B fails"""
	print("\n" + "="*50)
	print("Alternative Model Options")
	print("="*50)

	alternatives = [
	("apple/FastVLM-1.5B", "Smaller FastVLM variant (1.5B params)"),
	("apple/FastVLM-7B-int4", "Quantized FastVLM for lower memory"),
	("apple/FastVLM-0.5B", "Smallest FastVLM variant (0.5B params)")
	]

	for model_name, description in alternatives:
	print(f"\n• {model_name}")
	print(f" {description}")
	try:
	# Just check if the model card exists
	from transformers import AutoConfig
	config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
	print(f" ✓ Model available")
	except Exception as e:
	print(f" ✗ Not accessible: {str(e)[:50]}...")

	async def main():
	"""Main test function"""
	print("FastVLM-7B Integration Test")
	print("="*50)

	# Check dependencies
	if not check_dependencies():
	print("\n❌ Missing dependencies. Please install all requirements.")
	return

	# Check hardware
	check_hardware()

	# Test FastVLM loading
	success = await test_fastvlm_loading()

	if not success:
	# Show alternatives
	await test_alternative_models()

	print("\n" + "="*50)
	print("Recommendations:")
	print("="*50)
	print("\n1. If memory is limited, use FastVLM-1.5B or FastVLM-0.5B")
	print("2. For Apple Silicon, ensure you have enough RAM (16GB+ recommended)")
	print("3. Consider using the quantized version (FastVLM-7B-int4)")
	print("4. Make sure transformers >= 4.40.0 is installed")

	if __name__ == "__main__":
	asyncio.run(main())