diff --git "a/app/huggingface_models.py" "b/app/huggingface_models.py" new file mode 100644--- /dev/null +++ "b/app/huggingface_models.py" @@ -0,0 +1,3445 @@ +""" +Hugging Face Models Integration for OpenManus AI Agent +Comprehensive integration with Hugging Face Inference API for all model categories +""" + +import asyncio +import base64 +import io +import json +import logging +from dataclasses import dataclass +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +import aiohttp +import PIL.Image +from pydantic import BaseModel + +logger = logging.getLogger(__name__) + + +class ModelCategory(Enum): + """Categories of Hugging Face models available""" + + # Core AI categories + TEXT_GENERATION = "text-generation" + TEXT_TO_IMAGE = "text-to-image" + IMAGE_TO_TEXT = "image-to-text" + AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition" + TEXT_TO_SPEECH = "text-to-speech" + IMAGE_CLASSIFICATION = "image-classification" + OBJECT_DETECTION = "object-detection" + FEATURE_EXTRACTION = "feature-extraction" + SENTENCE_SIMILARITY = "sentence-similarity" + TRANSLATION = "translation" + SUMMARIZATION = "summarization" + QUESTION_ANSWERING = "question-answering" + FILL_MASK = "fill-mask" + TOKEN_CLASSIFICATION = "token-classification" + ZERO_SHOT_CLASSIFICATION = "zero-shot-classification" + AUDIO_CLASSIFICATION = "audio-classification" + CONVERSATIONAL = "conversational" + + # Video and Motion + TEXT_TO_VIDEO = "text-to-video" + VIDEO_TO_TEXT = "video-to-text" + VIDEO_CLASSIFICATION = "video-classification" + VIDEO_GENERATION = "video-generation" + MOTION_GENERATION = "motion-generation" + DEEPFAKE_DETECTION = "deepfake-detection" + + # Code and Development + CODE_GENERATION = "code-generation" + CODE_COMPLETION = "code-completion" + CODE_EXPLANATION = "code-explanation" + CODE_TRANSLATION = "code-translation" + CODE_REVIEW = "code-review" + APP_GENERATION = "app-generation" + API_GENERATION = "api-generation" + DATABASE_GENERATION = "database-generation" + + # 3D and AR/VR + TEXT_TO_3D = "text-to-3d" + IMAGE_TO_3D = "image-to-3d" + THREE_D_GENERATION = "3d-generation" + MESH_GENERATION = "mesh-generation" + TEXTURE_GENERATION = "texture-generation" + AR_CONTENT = "ar-content" + VR_ENVIRONMENT = "vr-environment" + + # Document Processing + OCR = "ocr" + DOCUMENT_ANALYSIS = "document-analysis" + PDF_PROCESSING = "pdf-processing" + LAYOUT_ANALYSIS = "layout-analysis" + TABLE_EXTRACTION = "table-extraction" + HANDWRITING_RECOGNITION = "handwriting-recognition" + FORM_PROCESSING = "form-processing" + + # Multimodal AI + VISION_LANGUAGE = "vision-language" + MULTIMODAL_REASONING = "multimodal-reasoning" + CROSS_MODAL_GENERATION = "cross-modal-generation" + VISUAL_QUESTION_ANSWERING = "visual-question-answering" + IMAGE_TEXT_MATCHING = "image-text-matching" + MULTIMODAL_CHAT = "multimodal-chat" + + # Specialized AI + MUSIC_GENERATION = "music-generation" + VOICE_CLONING = "voice-cloning" + STYLE_TRANSFER = "style-transfer" + SUPER_RESOLUTION = "super-resolution" + IMAGE_INPAINTING = "image-inpainting" + IMAGE_OUTPAINTING = "image-outpainting" + BACKGROUND_REMOVAL = "background-removal" + FACE_RESTORATION = "face-restoration" + + # Content Creation + CREATIVE_WRITING = "creative-writing" + STORY_GENERATION = "story-generation" + SCREENPLAY_WRITING = "screenplay-writing" + POETRY_GENERATION = "poetry-generation" + BLOG_WRITING = "blog-writing" + MARKETING_COPY = "marketing-copy" + + # Game Development + GAME_ASSET_GENERATION = "game-asset-generation" + CHARACTER_GENERATION = "character-generation" + LEVEL_GENERATION = "level-generation" + DIALOGUE_GENERATION = "dialogue-generation" + + # Science and Research + PROTEIN_FOLDING = "protein-folding" + MOLECULE_GENERATION = "molecule-generation" + SCIENTIFIC_WRITING = "scientific-writing" + RESEARCH_ASSISTANCE = "research-assistance" + DATA_ANALYSIS = "data-analysis" + + # Business and Productivity + EMAIL_GENERATION = "email-generation" + PRESENTATION_CREATION = "presentation-creation" + REPORT_GENERATION = "report-generation" + MEETING_SUMMARIZATION = "meeting-summarization" + PROJECT_PLANNING = "project-planning" + + # AI Teacher and Education + AI_TUTORING = "ai-tutoring" + EDUCATIONAL_CONTENT = "educational-content" + LESSON_PLANNING = "lesson-planning" + CONCEPT_EXPLANATION = "concept-explanation" + HOMEWORK_ASSISTANCE = "homework-assistance" + QUIZ_GENERATION = "quiz-generation" + CURRICULUM_DESIGN = "curriculum-design" + LEARNING_ASSESSMENT = "learning-assessment" + ADAPTIVE_LEARNING = "adaptive-learning" + SUBJECT_TEACHING = "subject-teaching" + MATH_TUTORING = "math-tutoring" + SCIENCE_TUTORING = "science-tutoring" + LANGUAGE_TUTORING = "language-tutoring" + HISTORY_TUTORING = "history-tutoring" + CODING_INSTRUCTION = "coding-instruction" + EXAM_PREPARATION = "exam-preparation" + STUDY_GUIDE_CREATION = "study-guide-creation" + EDUCATIONAL_GAMES = "educational-games" + LEARNING_ANALYTICS = "learning-analytics" + PERSONALIZED_LEARNING = "personalized-learning" + + # Advanced Image Processing & Manipulation + IMAGE_EDITING = "image-editing" + FACE_SWAP = "face-swap" + FACE_ENHANCEMENT = "face-enhancement" + FACE_GENERATION = "face-generation" + PORTRAIT_EDITING = "portrait-editing" + PHOTO_RESTORATION = "photo-restoration" + IMAGE_UPSCALING = "image-upscaling" + COLOR_CORRECTION = "color-correction" + ARTISTIC_FILTER = "artistic-filter" + + # Advanced Speech & Audio + ADVANCED_TTS = "advanced-tts" + ADVANCED_STT = "advanced-stt" + VOICE_CONVERSION = "voice-conversion" + SPEECH_ENHANCEMENT = "speech-enhancement" + AUDIO_GENERATION = "audio-generation" + MULTILINGUAL_TTS = "multilingual-tts" + MULTILINGUAL_STT = "multilingual-stt" + REAL_TIME_TRANSLATION = "real-time-translation" + + # Interactive Avatar & Video Generation + TALKING_AVATAR = "talking-avatar" + AVATAR_GENERATION = "avatar-generation" + LIP_SYNC = "lip-sync" + FACIAL_ANIMATION = "facial-animation" + GESTURE_GENERATION = "gesture-generation" + VIRTUAL_PRESENTER = "virtual-presenter" + AI_ANCHOR = "ai-anchor" + + # Interactive Language & Conversation + INTERACTIVE_CHAT = "interactive-chat" + BILINGUAL_CONVERSATION = "bilingual-conversation" + CULTURAL_ADAPTATION = "cultural-adaptation" + CONTEXT_AWARE_CHAT = "context-aware-chat" + PERSONALITY_CHAT = "personality-chat" + ROLE_PLAY_CHAT = "role-play-chat" + DOMAIN_SPECIFIC_CHAT = "domain-specific-chat" + + # Qwen Specialized Categories + QWEN_REASONING = "qwen-reasoning" + QWEN_MATH = "qwen-math" + QWEN_CODE = "qwen-code" + QWEN_VISION = "qwen-vision" + QWEN_AUDIO = "qwen-audio" + + # DeepSeek Specialized Categories + DEEPSEEK_CODING = "deepseek-coding" + DEEPSEEK_REASONING = "deepseek-reasoning" + DEEPSEEK_MATH = "deepseek-math" + DEEPSEEK_RESEARCH = "deepseek-research" + + +@dataclass +class HFModel: + """Hugging Face model definition""" + + name: str + model_id: str + category: ModelCategory + description: str + endpoint_compatible: bool = False + requires_auth: bool = False + max_tokens: Optional[int] = None + supports_streaming: bool = False + + +class HuggingFaceModels: + """Comprehensive collection of Hugging Face models for all categories""" + + # Text Generation Models (Latest and Popular) + TEXT_GENERATION_MODELS = [ + HFModel( + "MiniMax-M2", + "MiniMaxAI/MiniMax-M2", + ModelCategory.TEXT_GENERATION, + "Latest high-performance text generation model", + True, + False, + 4096, + True, + ), + HFModel( + "Kimi Linear 48B", + "moonshotai/Kimi-Linear-48B-A3B-Instruct", + ModelCategory.TEXT_GENERATION, + "Large instruction-tuned model with linear attention", + True, + False, + 8192, + True, + ), + HFModel( + "GPT-OSS 20B", + "openai/gpt-oss-20b", + ModelCategory.TEXT_GENERATION, + "Open-source GPT model by OpenAI", + True, + False, + 4096, + True, + ), + HFModel( + "GPT-OSS 120B", + "openai/gpt-oss-120b", + ModelCategory.TEXT_GENERATION, + "Large open-source GPT model", + True, + False, + 4096, + True, + ), + HFModel( + "Granite 4.0 1B", + "ibm-granite/granite-4.0-1b", + ModelCategory.TEXT_GENERATION, + "IBM's enterprise-grade small language model", + True, + False, + 2048, + True, + ), + HFModel( + "GLM-4.6", + "zai-org/GLM-4.6", + ModelCategory.TEXT_GENERATION, + "Multilingual conversational model", + True, + False, + 4096, + True, + ), + HFModel( + "Llama 3.1 8B Instruct", + "meta-llama/Llama-3.1-8B-Instruct", + ModelCategory.TEXT_GENERATION, + "Meta's instruction-tuned Llama model", + True, + True, + 8192, + True, + ), + HFModel( + "Tongyi DeepResearch 30B", + "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", + ModelCategory.TEXT_GENERATION, + "Alibaba's research-focused large language model", + True, + False, + 4096, + True, + ), + HFModel( + "EuroLLM 9B", + "utter-project/EuroLLM-9B", + ModelCategory.TEXT_GENERATION, + "European multilingual language model", + True, + False, + 4096, + True, + ), + ] + + # Text-to-Image Models (Latest and Best) + TEXT_TO_IMAGE_MODELS = [ + HFModel( + "FIBO", + "briaai/FIBO", + ModelCategory.TEXT_TO_IMAGE, + "Advanced text-to-image generation model", + True, + False, + ), + HFModel( + "FLUX.1 Dev", + "black-forest-labs/FLUX.1-dev", + ModelCategory.TEXT_TO_IMAGE, + "State-of-the-art image generation", + True, + False, + ), + HFModel( + "FLUX.1 Schnell", + "black-forest-labs/FLUX.1-schnell", + ModelCategory.TEXT_TO_IMAGE, + "Fast high-quality image generation", + True, + False, + ), + HFModel( + "Qwen Image", + "Qwen/Qwen-Image", + ModelCategory.TEXT_TO_IMAGE, + "Multilingual text-to-image model", + True, + False, + ), + HFModel( + "Stable Diffusion XL", + "stabilityai/stable-diffusion-xl-base-1.0", + ModelCategory.TEXT_TO_IMAGE, + "Popular high-resolution image generation", + True, + False, + ), + HFModel( + "Stable Diffusion 3.5 Large", + "stabilityai/stable-diffusion-3.5-large", + ModelCategory.TEXT_TO_IMAGE, + "Latest Stable Diffusion model", + True, + False, + ), + HFModel( + "HunyuanImage 3.0", + "tencent/HunyuanImage-3.0", + ModelCategory.TEXT_TO_IMAGE, + "Tencent's advanced image generation model", + True, + False, + ), + HFModel( + "Nitro-E", + "amd/Nitro-E", + ModelCategory.TEXT_TO_IMAGE, + "AMD's efficient image generation model", + True, + False, + ), + HFModel( + "Qwen Image Lightning", + "lightx2v/Qwen-Image-Lightning", + ModelCategory.TEXT_TO_IMAGE, + "Fast distilled image generation", + True, + False, + ), + ] + + # Automatic Speech Recognition Models + ASR_MODELS = [ + HFModel( + "Whisper Large v3", + "openai/whisper-large-v3", + ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, + "OpenAI's best multilingual speech recognition", + True, + False, + ), + HFModel( + "Whisper Large v3 Turbo", + "openai/whisper-large-v3-turbo", + ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, + "Faster version of Whisper Large v3", + True, + False, + ), + HFModel( + "Parakeet TDT 0.6B v3", + "nvidia/parakeet-tdt-0.6b-v3", + ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, + "NVIDIA's multilingual ASR model", + True, + False, + ), + HFModel( + "Canary Qwen 2.5B", + "nvidia/canary-qwen-2.5b", + ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, + "NVIDIA's advanced ASR with Qwen integration", + True, + False, + ), + HFModel( + "Canary 1B v2", + "nvidia/canary-1b-v2", + ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, + "Compact multilingual ASR model", + True, + False, + ), + HFModel( + "Whisper Small", + "openai/whisper-small", + ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, + "Lightweight multilingual ASR", + True, + False, + ), + HFModel( + "Speaker Diarization 3.1", + "pyannote/speaker-diarization-3.1", + ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, + "Advanced speaker identification and diarization", + True, + False, + ), + ] + + # Text-to-Speech Models + TTS_MODELS = [ + HFModel( + "SoulX Podcast 1.7B", + "Soul-AILab/SoulX-Podcast-1.7B", + ModelCategory.TEXT_TO_SPEECH, + "High-quality podcast-style speech synthesis", + True, + False, + ), + HFModel( + "NeuTTS Air", + "neuphonic/neutts-air", + ModelCategory.TEXT_TO_SPEECH, + "Advanced neural text-to-speech", + True, + False, + ), + HFModel( + "Kokoro 82M", + "hexgrad/Kokoro-82M", + ModelCategory.TEXT_TO_SPEECH, + "Lightweight high-quality TTS", + True, + False, + ), + HFModel( + "Kani TTS 400M EN", + "nineninesix/kani-tts-400m-en", + ModelCategory.TEXT_TO_SPEECH, + "English-focused text-to-speech model", + True, + False, + ), + HFModel( + "XTTS v2", + "coqui/XTTS-v2", + ModelCategory.TEXT_TO_SPEECH, + "Zero-shot voice cloning TTS", + True, + False, + ), + HFModel( + "Chatterbox", + "ResembleAI/chatterbox", + ModelCategory.TEXT_TO_SPEECH, + "Multilingual voice cloning", + True, + False, + ), + HFModel( + "VibeVoice 1.5B", + "microsoft/VibeVoice-1.5B", + ModelCategory.TEXT_TO_SPEECH, + "Microsoft's advanced TTS model", + True, + False, + ), + HFModel( + "OpenAudio S1 Mini", + "fishaudio/openaudio-s1-mini", + ModelCategory.TEXT_TO_SPEECH, + "Compact multilingual TTS", + True, + False, + ), + ] + + # Image Classification Models + IMAGE_CLASSIFICATION_MODELS = [ + HFModel( + "NSFW Image Detection", + "Falconsai/nsfw_image_detection", + ModelCategory.IMAGE_CLASSIFICATION, + "Content safety image classification", + True, + False, + ), + HFModel( + "ViT Base Patch16", + "google/vit-base-patch16-224", + ModelCategory.IMAGE_CLASSIFICATION, + "Google's Vision Transformer", + True, + False, + ), + HFModel( + "Deepfake Detection", + "dima806/deepfake_vs_real_image_detection", + ModelCategory.IMAGE_CLASSIFICATION, + "Detect AI-generated vs real images", + True, + False, + ), + HFModel( + "Facial Emotions Detection", + "dima806/facial_emotions_image_detection", + ModelCategory.IMAGE_CLASSIFICATION, + "Recognize facial emotions", + True, + False, + ), + HFModel( + "SDXL Detector", + "Organika/sdxl-detector", + ModelCategory.IMAGE_CLASSIFICATION, + "Detect Stable Diffusion XL generated images", + True, + False, + ), + HFModel( + "ViT NSFW Detector", + "AdamCodd/vit-base-nsfw-detector", + ModelCategory.IMAGE_CLASSIFICATION, + "NSFW content detection with ViT", + True, + False, + ), + HFModel( + "ResNet 101", + "microsoft/resnet-101", + ModelCategory.IMAGE_CLASSIFICATION, + "Microsoft's ResNet for classification", + True, + False, + ), + ] + + # Additional Categories + FEATURE_EXTRACTION_MODELS = [ + HFModel( + "Sentence Transformers All MiniLM", + "sentence-transformers/all-MiniLM-L6-v2", + ModelCategory.FEATURE_EXTRACTION, + "Lightweight sentence embeddings", + True, + False, + ), + HFModel( + "BGE Large EN", + "BAAI/bge-large-en-v1.5", + ModelCategory.FEATURE_EXTRACTION, + "High-quality English embeddings", + True, + False, + ), + HFModel( + "E5 Large v2", + "intfloat/e5-large-v2", + ModelCategory.FEATURE_EXTRACTION, + "Multilingual text embeddings", + True, + False, + ), + ] + + TRANSLATION_MODELS = [ + HFModel( + "M2M100 1.2B", + "facebook/m2m100_1.2B", + ModelCategory.TRANSLATION, + "Multilingual machine translation", + True, + False, + ), + HFModel( + "NLLB 200 3.3B", + "facebook/nllb-200-3.3B", + ModelCategory.TRANSLATION, + "No Language Left Behind translation", + True, + False, + ), + HFModel( + "mBART Large 50", + "facebook/mbart-large-50-many-to-many-mmt", + ModelCategory.TRANSLATION, + "Multilingual BART for translation", + True, + False, + ), + ] + + SUMMARIZATION_MODELS = [ + HFModel( + "PEGASUS XSum", + "google/pegasus-xsum", + ModelCategory.SUMMARIZATION, + "Abstractive summarization model", + True, + False, + ), + HFModel( + "BART Large CNN", + "facebook/bart-large-cnn", + ModelCategory.SUMMARIZATION, + "CNN/DailyMail summarization", + True, + False, + ), + HFModel( + "T5 Base", + "t5-base", + ModelCategory.SUMMARIZATION, + "Text-to-Text Transfer Transformer", + True, + False, + ), + ] + + # Video Generation and Processing Models + VIDEO_GENERATION_MODELS = [ + HFModel( + "Stable Video Diffusion", + "stabilityai/stable-video-diffusion-img2vid", + ModelCategory.TEXT_TO_VIDEO, + "Image-to-video generation model", + True, + False, + ), + HFModel( + "AnimateDiff", + "guoyww/animatediff", + ModelCategory.VIDEO_GENERATION, + "Text-to-video animation generation", + True, + False, + ), + HFModel( + "VideoCrafter", + "videogen/VideoCrafter", + ModelCategory.TEXT_TO_VIDEO, + "High-quality text-to-video generation", + True, + False, + ), + HFModel( + "Video ChatGPT", + "mbzuai-oryx/Video-ChatGPT-7B", + ModelCategory.VIDEO_TO_TEXT, + "Video understanding and description", + True, + False, + ), + HFModel( + "Video-BLIP", + "salesforce/video-blip-opt-2.7b", + ModelCategory.VIDEO_CLASSIFICATION, + "Video content analysis and classification", + True, + False, + ), + ] + + # Code Generation and Development Models + CODE_GENERATION_MODELS = [ + HFModel( + "CodeLlama 34B Instruct", + "codellama/CodeLlama-34b-Instruct-hf", + ModelCategory.CODE_GENERATION, + "Large instruction-tuned code generation model", + True, + True, + ), + HFModel( + "StarCoder2 15B", + "bigcode/starcoder2-15b", + ModelCategory.CODE_GENERATION, + "Advanced code generation and completion", + True, + False, + ), + HFModel( + "DeepSeek Coder V2", + "deepseek-ai/deepseek-coder-6.7b-instruct", + ModelCategory.CODE_GENERATION, + "Specialized coding assistant", + True, + False, + ), + HFModel( + "WizardCoder 34B", + "WizardLM/WizardCoder-Python-34B-V1.0", + ModelCategory.CODE_GENERATION, + "Python-focused code generation", + True, + False, + ), + HFModel( + "Phind CodeLlama", + "Phind/Phind-CodeLlama-34B-v2", + ModelCategory.CODE_GENERATION, + "Optimized for code explanation and debugging", + True, + False, + ), + HFModel( + "Code T5+", + "Salesforce/codet5p-770m", + ModelCategory.CODE_COMPLETION, + "Code understanding and generation", + True, + False, + ), + HFModel( + "InCoder", + "facebook/incoder-6B", + ModelCategory.CODE_COMPLETION, + "Bidirectional code generation", + True, + False, + ), + ] + + # 3D and AR/VR Content Generation Models + THREE_D_MODELS = [ + HFModel( + "Shap-E", + "openai/shap-e", + ModelCategory.TEXT_TO_3D, + "Text-to-3D shape generation", + True, + False, + ), + HFModel( + "Point-E", + "openai/point-e", + ModelCategory.TEXT_TO_3D, + "Text-to-3D point cloud generation", + True, + False, + ), + HFModel( + "DreamFusion", + "google/dreamfusion", + ModelCategory.IMAGE_TO_3D, + "Image-to-3D mesh generation", + True, + False, + ), + HFModel( + "Magic3D", + "nvidia/magic3d", + ModelCategory.THREE_D_GENERATION, + "High-quality 3D content creation", + True, + False, + ), + HFModel( + "GET3D", + "nvidia/get3d", + ModelCategory.MESH_GENERATION, + "3D mesh generation from text", + True, + False, + ), + ] + + # Document Processing and OCR Models + DOCUMENT_PROCESSING_MODELS = [ + HFModel( + "TrOCR Large", + "microsoft/trocr-large-printed", + ModelCategory.OCR, + "Transformer-based OCR for printed text", + True, + False, + ), + HFModel( + "TrOCR Handwritten", + "microsoft/trocr-large-handwritten", + ModelCategory.HANDWRITING_RECOGNITION, + "Handwritten text recognition", + True, + False, + ), + HFModel( + "LayoutLMv3", + "microsoft/layoutlmv3-large", + ModelCategory.DOCUMENT_ANALYSIS, + "Document layout analysis and understanding", + True, + False, + ), + HFModel( + "Donut", + "naver-clova-ix/donut-base", + ModelCategory.DOCUMENT_ANALYSIS, + "OCR-free document understanding", + True, + False, + ), + HFModel( + "TableTransformer", + "microsoft/table-transformer-structure-recognition", + ModelCategory.TABLE_EXTRACTION, + "Table structure recognition", + True, + False, + ), + HFModel( + "FormNet", + "microsoft/formnet", + ModelCategory.FORM_PROCESSING, + "Form understanding and processing", + True, + False, + ), + ] + + # Multimodal AI Models + MULTIMODAL_MODELS = [ + HFModel( + "BLIP-2", + "Salesforce/blip2-opt-2.7b", + ModelCategory.VISION_LANGUAGE, + "Vision-language understanding and generation", + True, + False, + ), + HFModel( + "InstructBLIP", + "Salesforce/instructblip-vicuna-7b", + ModelCategory.MULTIMODAL_REASONING, + "Instruction-following multimodal model", + True, + False, + ), + HFModel( + "LLaVA", + "liuhaotian/llava-v1.5-7b", + ModelCategory.VISUAL_QUESTION_ANSWERING, + "Large Language and Vision Assistant", + True, + False, + ), + HFModel( + "GPT-4V", + "openai/gpt-4-vision-preview", + ModelCategory.MULTIMODAL_CHAT, + "Advanced multimodal conversational AI", + True, + True, + ), + HFModel( + "Flamingo", + "deepmind/flamingo-9b", + ModelCategory.CROSS_MODAL_GENERATION, + "Few-shot learning for vision and language", + True, + False, + ), + ] + + # Specialized AI Models + SPECIALIZED_AI_MODELS = [ + HFModel( + "MusicGen", + "facebook/musicgen-medium", + ModelCategory.MUSIC_GENERATION, + "Text-to-music generation", + True, + False, + ), + HFModel( + "AudioCraft", + "facebook/audiocraft_musicgen_melody", + ModelCategory.MUSIC_GENERATION, + "Melody-conditioned music generation", + True, + False, + ), + HFModel( + "Real-ESRGAN", + "xinntao/realesrgan-x4plus", + ModelCategory.SUPER_RESOLUTION, + "Image super-resolution", + True, + False, + ), + HFModel( + "GFPGAN", + "TencentARC/GFPGAN", + ModelCategory.FACE_RESTORATION, + "Face restoration and enhancement", + True, + False, + ), + HFModel( + "LaMa", + "advimman/lama", + ModelCategory.IMAGE_INPAINTING, + "Large Mask Inpainting", + True, + False, + ), + HFModel( + "Background Remover", + "briaai/RMBG-1.4", + ModelCategory.BACKGROUND_REMOVAL, + "Automatic background removal", + True, + False, + ), + HFModel( + "Voice Cloner", + "coqui/XTTS-v2", + ModelCategory.VOICE_CLONING, + "Multilingual voice cloning", + True, + False, + ), + ] + + # Creative Content Models + CREATIVE_CONTENT_MODELS = [ + HFModel( + "GPT-3.5 Creative", + "openai/gpt-3.5-turbo-instruct", + ModelCategory.CREATIVE_WRITING, + "Creative writing and storytelling", + True, + True, + ), + HFModel( + "Novel AI", + "novelai/genji-python-6b", + ModelCategory.STORY_GENERATION, + "Interactive story generation", + True, + False, + ), + HFModel( + "Poet Assistant", + "gpt2-poetry", + ModelCategory.POETRY_GENERATION, + "Poetry generation and analysis", + True, + False, + ), + HFModel( + "Blog Writer", + "google/flan-t5-large", + ModelCategory.BLOG_WRITING, + "Blog content creation", + True, + False, + ), + HFModel( + "Marketing Copy AI", + "microsoft/DialoGPT-large", + ModelCategory.MARKETING_COPY, + "Marketing content generation", + True, + False, + ), + ] + + # Game Development Models + GAME_DEVELOPMENT_MODELS = [ + HFModel( + "Character AI", + "character-ai/character-generator", + ModelCategory.CHARACTER_GENERATION, + "Game character generation and design", + True, + False, + ), + HFModel( + "Level Designer", + "unity/level-generator", + ModelCategory.LEVEL_GENERATION, + "Game level and environment generation", + True, + False, + ), + HFModel( + "Dialogue Writer", + "bioware/dialogue-generator", + ModelCategory.DIALOGUE_GENERATION, + "Game dialogue and narrative generation", + True, + False, + ), + HFModel( + "Asset Creator", + "epic/asset-generator", + ModelCategory.GAME_ASSET_GENERATION, + "Game asset and texture generation", + True, + False, + ), + ] + + # Science and Research Models + SCIENCE_RESEARCH_MODELS = [ + HFModel( + "AlphaFold", + "deepmind/alphafold2", + ModelCategory.PROTEIN_FOLDING, + "Protein structure prediction", + True, + False, + ), + HFModel( + "ChemBERTa", + "DeepChem/ChemBERTa-77M-MLM", + ModelCategory.MOLECULE_GENERATION, + "Chemical compound analysis", + True, + False, + ), + HFModel( + "SciBERT", + "allenai/scibert_scivocab_uncased", + ModelCategory.SCIENTIFIC_WRITING, + "Scientific text understanding", + True, + False, + ), + HFModel( + "Research Assistant", + "microsoft/specter2", + ModelCategory.RESEARCH_ASSISTANCE, + "Research paper analysis and recommendations", + True, + False, + ), + HFModel( + "Data Analyst", + "microsoft/data-copilot", + ModelCategory.DATA_ANALYSIS, + "Automated data analysis and insights", + True, + False, + ), + ] + + # Business and Productivity Models + BUSINESS_PRODUCTIVITY_MODELS = [ + HFModel( + "Email Assistant", + "microsoft/email-generator", + ModelCategory.EMAIL_GENERATION, + "Professional email composition", + True, + False, + ), + HFModel( + "Presentation AI", + "gamma/presentation-generator", + ModelCategory.PRESENTATION_CREATION, + "Automated presentation creation", + True, + False, + ), + HFModel( + "Report Writer", + "openai/report-generator", + ModelCategory.REPORT_GENERATION, + "Business report generation", + True, + False, + ), + HFModel( + "Meeting Summarizer", + "microsoft/meeting-summarizer", + ModelCategory.MEETING_SUMMARIZATION, + "Meeting notes and action items", + True, + False, + ), + HFModel( + "Project Planner", + "atlassian/project-ai", + ModelCategory.PROJECT_PLANNING, + "Project planning and management", + True, + False, + ), + ] + + # AI Teacher Models - Best-in-Class Educational AI System + AI_TEACHER_MODELS = [ + # Primary AI Tutoring Models - Interactive & Conversational + HFModel( + "AI Tutor Interactive", + "microsoft/DialoGPT-medium", + ModelCategory.AI_TUTORING, + "Interactive AI tutor for conversational learning with dialogue management", + True, + False, + 2048, + True, + ), + HFModel( + "Goal-Oriented Tutor", + "microsoft/GODEL-v1_1-large-seq2seq", + ModelCategory.AI_TUTORING, + "Goal-oriented conversational AI for personalized tutoring sessions", + True, + False, + 2048, + True, + ), + HFModel( + "Advanced Instruction Tutor", + "google/flan-t5-large", + ModelCategory.AI_TUTORING, + "Advanced instruction-following AI tutor for complex educational tasks", + True, + False, + 2048, + True, + ), + # Educational Content Generation - Creative & Comprehensive + HFModel( + "Educational Content Creator Pro", + "facebook/bart-large", + ModelCategory.EDUCATIONAL_CONTENT, + "Professional educational content generation for all learning levels", + True, + False, + 1024, + False, + ), + HFModel( + "Multilingual Education AI", + "bigscience/bloom-560m", + ModelCategory.EDUCATIONAL_CONTENT, + "Global multilingual educational content for diverse learners", + True, + False, + 2048, + True, + ), + HFModel( + "Academic Writing Assistant", + "microsoft/prophetnet-large-uncased", + ModelCategory.EDUCATIONAL_CONTENT, + "Academic content creation with advanced text generation capabilities", + True, + False, + 1024, + False, + ), + # Lesson Planning & Curriculum Design - Structured & Professional + HFModel( + "Master Lesson Planner", + "facebook/bart-large-cnn", + ModelCategory.LESSON_PLANNING, + "Comprehensive lesson planning with summarization and structure", + True, + False, + 1024, + False, + ), + HFModel( + "Curriculum Architect", + "microsoft/prophetnet-base-uncased", + ModelCategory.CURRICULUM_DESIGN, + "Professional curriculum planning and educational program design", + True, + False, + 1024, + False, + ), + HFModel( + "Activity Designer", + "google/t5-base", + ModelCategory.LESSON_PLANNING, + "Interactive learning activity and exercise generation", + True, + False, + 512, + True, + ), + # Subject-Specific Excellence - STEM Focus + HFModel( + "Programming Mentor Pro", + "microsoft/codebert-base", + ModelCategory.CODING_INSTRUCTION, + "Expert programming education with code analysis and explanation", + True, + False, + 1024, + False, + ), + HFModel( + "Advanced Code Instructor", + "microsoft/graphcodebert-base", + ModelCategory.CODING_INSTRUCTION, + "Advanced programming instruction with graph understanding", + True, + False, + 1024, + False, + ), + HFModel( + "Algorithm Tutor Elite", + "microsoft/unixcoder-base", + ModelCategory.CODING_INSTRUCTION, + "Elite algorithm education and computational thinking development", + True, + False, + 1024, + False, + ), + # Science & Mathematics Excellence + HFModel( + "Science Research Educator", + "allenai/scibert_scivocab_uncased", + ModelCategory.SCIENCE_TUTORING, + "Scientific education with research-grade knowledge and vocabulary", + True, + False, + 512, + False, + ), + HFModel( + "Advanced Science AI", + "facebook/galactica-125m", + ModelCategory.SCIENCE_TUTORING, + "Advanced scientific knowledge and research methodology education", + True, + False, + 2048, + True, + ), + HFModel( + "Mathematical Reasoning Master", + "google/flan-t5-xl", + ModelCategory.MATH_TUTORING, + "Advanced mathematical reasoning, proofs, and problem-solving", + True, + False, + 2048, + True, + ), + HFModel( + "Interactive Math Tutor", + "microsoft/DialoGPT-small", + ModelCategory.MATH_TUTORING, + "Interactive mathematics tutoring with step-by-step explanations", + True, + False, + 1024, + True, + ), + # Language & Literature Excellence + HFModel( + "Multilingual Language Master", + "facebook/mbart-large-50-many-to-many-mmt", + ModelCategory.LANGUAGE_TUTORING, + "Advanced multilingual education and cross-language learning", + True, + False, + 1024, + False, + ), + HFModel( + "Literature & Language AI", + "microsoft/prophetnet-large-uncased-cnndm", + ModelCategory.LANGUAGE_TUTORING, + "Literature analysis and advanced language instruction", + True, + False, + 1024, + False, + ), + HFModel( + "Grammar & Comprehension Expert", + "google/electra-base-discriminator", + ModelCategory.LANGUAGE_TUTORING, + "Expert grammar instruction and reading comprehension development", + True, + False, + 512, + False, + ), + # Assessment & Evaluation Excellence + HFModel( + "Assessment Designer Pro", + "microsoft/DialoGPT-large", + ModelCategory.QUIZ_GENERATION, + "Professional assessment and quiz generation with interaction", + True, + False, + 2048, + True, + ), + HFModel( + "Learning Progress Analyzer", + "facebook/bart-large", + ModelCategory.LEARNING_ASSESSMENT, + "Comprehensive learning assessment and progress tracking", + True, + False, + 1024, + False, + ), + HFModel( + "Question Master AI", + "google/t5-base", + ModelCategory.QUIZ_GENERATION, + "Intelligent question generation for all educational levels", + True, + False, + 512, + True, + ), + HFModel( + "Exam Preparation Specialist", + "microsoft/unilm-base-cased", + ModelCategory.EXAM_PREPARATION, + "Specialized exam preparation and test strategy development", + True, + False, + 1024, + False, + ), + # Personalized & Adaptive Learning Excellence + HFModel( + "Personal Learning Architect", + "microsoft/deberta-v3-base", + ModelCategory.PERSONALIZED_LEARNING, + "Advanced personalized learning path creation and optimization", + True, + False, + 512, + False, + ), + HFModel( + "Adaptive Learning Engine", + "facebook/opt-125m", + ModelCategory.ADAPTIVE_LEARNING, + "Intelligent adaptive learning with dynamic content adjustment", + True, + False, + 2048, + True, + ), + HFModel( + "Learning Analytics Expert", + "microsoft/layoutlm-base-uncased", + ModelCategory.LEARNING_ANALYTICS, + "Advanced learning analytics and educational data interpretation", + True, + False, + 512, + False, + ), + # Concept Explanation & Understanding Masters + HFModel( + "Concept Explanation Master", + "microsoft/deberta-v3-base", + ModelCategory.CONCEPT_EXPLANATION, + "Master-level concept explanation and knowledge breakdown", + True, + False, + 512, + False, + ), + HFModel( + "Knowledge Synthesizer", + "google/pegasus-xsum", + ModelCategory.CONCEPT_EXPLANATION, + "Advanced knowledge synthesis and concept summarization", + True, + False, + 512, + False, + ), + HFModel( + "Interactive Concept Guide", + "facebook/bart-base", + ModelCategory.CONCEPT_EXPLANATION, + "Interactive concept teaching with clarification and examples", + True, + False, + 1024, + False, + ), + # Homework & Study Support Excellence + HFModel( + "Programming Homework Expert", + "microsoft/codebert-base-mlm", + ModelCategory.HOMEWORK_ASSISTANCE, + "Expert programming homework assistance and debugging support", + True, + False, + 1024, + False, + ), + HFModel( + "Universal Homework Helper", + "google/flan-t5-small", + ModelCategory.HOMEWORK_ASSISTANCE, + "Comprehensive homework assistance across all academic subjects", + True, + False, + 1024, + True, + ), + HFModel( + "Global Study Assistant", + "facebook/mbart-large-cc25", + ModelCategory.HOMEWORK_ASSISTANCE, + "Multilingual homework support with cultural context understanding", + True, + False, + 1024, + False, + ), + # Study Materials & Resources Excellence + HFModel( + "Study Guide Architect", + "microsoft/prophetnet-large-uncased", + ModelCategory.STUDY_GUIDE_CREATION, + "Professional study guide creation and learning material development", + True, + False, + 1024, + False, + ), + HFModel( + "Educational Resource Creator", + "facebook/bart-large-xsum", + ModelCategory.STUDY_GUIDE_CREATION, + "Comprehensive educational resource and reference material creation", + True, + False, + 1024, + False, + ), + # Interactive Learning & Gamification + HFModel( + "Educational Game Designer", + "microsoft/DialoGPT-base", + ModelCategory.EDUCATIONAL_GAMES, + "Interactive educational games and gamified learning experiences", + True, + False, + 1024, + True, + ), + HFModel( + "Learning Game Engine", + "google/bert-base-uncased", + ModelCategory.EDUCATIONAL_GAMES, + "Educational game mechanics and interactive learning systems", + True, + False, + 512, + False, + ), + # History & Social Studies Excellence + HFModel( + "History Professor AI", + "microsoft/deberta-large", + ModelCategory.HISTORY_TUTORING, + "Professor-level historical analysis and social studies education", + True, + False, + 1024, + False, + ), + HFModel( + "Interactive History Guide", + "facebook/opt-350m", + ModelCategory.HISTORY_TUTORING, + "Interactive historical narratives and timeline exploration", + True, + False, + 2048, + True, + ), + # Multi-Subject Teaching Excellence + HFModel( + "Master Subject Teacher", + "google/flan-t5-base", + ModelCategory.SUBJECT_TEACHING, + "Expert multi-subject teaching with instruction-following excellence", + True, + False, + 1024, + True, + ), + HFModel( + "Universal Educator AI", + "microsoft/unilm-large-cased", + ModelCategory.SUBJECT_TEACHING, + "Universal education AI with cross-disciplinary knowledge", + True, + False, + 1024, + False, + ), + # Advanced Analytics & Optimization + HFModel( + "Advanced Learning Analytics", + "microsoft/layoutlm-large-uncased", + ModelCategory.LEARNING_ANALYTICS, + "Enterprise-level learning analytics and educational insights", + True, + False, + 1024, + False, + ), + HFModel( + "Personalization Engine Pro", + "google/electra-large-discriminator", + ModelCategory.PERSONALIZED_LEARNING, + "Advanced AI personalization with learning style adaptation", + True, + False, + 512, + False, + ), + HFModel( + "Global Adaptive System", + "facebook/mbart-large-50", + ModelCategory.ADAPTIVE_LEARNING, + "Global adaptive learning system with multilingual capabilities", + True, + False, + 1024, + False, + ), + ] + + # Qwen Models - Advanced Reasoning and Multimodal AI + QWEN_MODELS = [ + # Qwen2.5 Series - Latest Models + HFModel( + "Qwen2.5-72B-Instruct", + "Qwen/Qwen2.5-72B-Instruct", + ModelCategory.TEXT_GENERATION, + "Large-scale instruction-following model for complex reasoning", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2.5-32B-Instruct", + "Qwen/Qwen2.5-32B-Instruct", + ModelCategory.TEXT_GENERATION, + "High-performance instruction model for advanced tasks", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2.5-14B-Instruct", + "Qwen/Qwen2.5-14B-Instruct", + ModelCategory.TEXT_GENERATION, + "Efficient large model with excellent reasoning capabilities", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2.5-7B-Instruct", + "Qwen/Qwen2.5-7B-Instruct", + ModelCategory.TEXT_GENERATION, + "Optimized 7B model for general-purpose applications", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2.5-3B-Instruct", + "Qwen/Qwen2.5-3B-Instruct", + ModelCategory.TEXT_GENERATION, + "Lightweight model for resource-constrained environments", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", + ModelCategory.TEXT_GENERATION, + "Ultra-lightweight model for edge deployment", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-0.5B-Instruct", + ModelCategory.TEXT_GENERATION, + "Minimal footprint model for basic applications", + True, + False, + 32768, + True, + ), + # Qwen2.5-Coder Series - Programming Specialists + HFModel( + "Qwen2.5-Coder-32B-Instruct", + "Qwen/Qwen2.5-Coder-32B-Instruct", + ModelCategory.QWEN_CODE, + "Advanced code generation and programming assistance", + True, + False, + 131072, + True, + ), + HFModel( + "Qwen2.5-Coder-14B-Instruct", + "Qwen/Qwen2.5-Coder-14B-Instruct", + ModelCategory.QWEN_CODE, + "Code generation with excellent debugging capabilities", + True, + False, + 131072, + True, + ), + HFModel( + "Qwen2.5-Coder-7B-Instruct", + "Qwen/Qwen2.5-Coder-7B-Instruct", + ModelCategory.QWEN_CODE, + "Efficient coding assistant for multiple languages", + True, + False, + 131072, + True, + ), + HFModel( + "Qwen2.5-Coder-3B-Instruct", + "Qwen/Qwen2.5-Coder-3B-Instruct", + ModelCategory.QWEN_CODE, + "Lightweight programming assistant", + True, + False, + 131072, + True, + ), + HFModel( + "Qwen2.5-Coder-1.5B-Instruct", + "Qwen/Qwen2.5-Coder-1.5B-Instruct", + ModelCategory.QWEN_CODE, + "Compact code generation model", + True, + False, + 131072, + True, + ), + # Qwen2.5-Math Series - Mathematical Reasoning + HFModel( + "Qwen2.5-Math-72B-Instruct", + "Qwen/Qwen2.5-Math-72B-Instruct", + ModelCategory.QWEN_MATH, + "Advanced mathematical problem solving and reasoning", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2.5-Math-7B-Instruct", + "Qwen/Qwen2.5-Math-7B-Instruct", + ModelCategory.QWEN_MATH, + "Mathematical reasoning and calculation assistance", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2.5-Math-1.5B-Instruct", + "Qwen/Qwen2.5-Math-1.5B-Instruct", + ModelCategory.QWEN_MATH, + "Compact mathematical problem solver", + True, + False, + 32768, + True, + ), + # QwQ Series - Reasoning Specialists + HFModel( + "QwQ-32B-Preview", + "Qwen/QwQ-32B-Preview", + ModelCategory.QWEN_REASONING, + "Advanced reasoning and logical thinking model", + True, + False, + 32768, + True, + ), + # Qwen2-VL Series - Vision-Language Models + HFModel( + "Qwen2-VL-72B-Instruct", + "Qwen/Qwen2-VL-72B-Instruct", + ModelCategory.QWEN_VISION, + "Large-scale vision-language understanding and generation", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2-VL-7B-Instruct", + "Qwen/Qwen2-VL-7B-Instruct", + ModelCategory.QWEN_VISION, + "Efficient vision-language model for multimodal tasks", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen2-VL-2B-Instruct", + "Qwen/Qwen2-VL-2B-Instruct", + ModelCategory.QWEN_VISION, + "Lightweight vision-language model", + True, + False, + 32768, + True, + ), + # Qwen2-Audio Series - Audio Understanding + HFModel( + "Qwen2-Audio-7B-Instruct", + "Qwen/Qwen2-Audio-7B-Instruct", + ModelCategory.QWEN_AUDIO, + "Advanced audio understanding and generation", + True, + False, + 32768, + True, + ), + # Qwen Legacy Models - Still Powerful + HFModel( + "Qwen1.5-110B-Chat", + "Qwen/Qwen1.5-110B-Chat", + ModelCategory.CONVERSATIONAL, + "Large conversational model with broad knowledge", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen1.5-72B-Chat", + "Qwen/Qwen1.5-72B-Chat", + ModelCategory.CONVERSATIONAL, + "Conversational AI with excellent reasoning", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen1.5-32B-Chat", + "Qwen/Qwen1.5-32B-Chat", + ModelCategory.CONVERSATIONAL, + "Efficient chat model for interactive applications", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen1.5-14B-Chat", + "Qwen/Qwen1.5-14B-Chat", + ModelCategory.CONVERSATIONAL, + "Balanced performance chat model", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen1.5-7B-Chat", + "Qwen/Qwen1.5-7B-Chat", + ModelCategory.CONVERSATIONAL, + "Popular chat model with good performance", + True, + False, + 32768, + True, + ), + HFModel( + "Qwen1.5-4B-Chat", + "Qwen/Qwen1.5-4B-Chat", + ModelCategory.CONVERSATIONAL, + "Lightweight conversational AI", + True, + False, + 32768, + True, + ), + ] + + # DeepSeek Models - Coding and Reasoning Excellence + DEEPSEEK_MODELS = [ + # DeepSeek-V3 Series - Latest Generation + HFModel( + "DeepSeek-V3", + "deepseek-ai/DeepSeek-V3", + ModelCategory.DEEPSEEK_REASONING, + "Latest generation reasoning and knowledge model", + True, + False, + 65536, + True, + ), + HFModel( + "DeepSeek-V3-Base", + "deepseek-ai/DeepSeek-V3-Base", + ModelCategory.TEXT_GENERATION, + "Foundation model for various downstream tasks", + True, + False, + 65536, + True, + ), + # DeepSeek-V2.5 Series + HFModel( + "DeepSeek-V2.5", + "deepseek-ai/DeepSeek-V2.5", + ModelCategory.DEEPSEEK_REASONING, + "Advanced reasoning and general intelligence model", + True, + False, + 32768, + True, + ), + # DeepSeek-Coder Series - Programming Specialists + HFModel( + "DeepSeek-Coder-V2-Instruct", + "deepseek-ai/DeepSeek-Coder-V2-Instruct", + ModelCategory.DEEPSEEK_CODING, + "Advanced code generation and programming assistance", + True, + False, + 163840, + True, + ), + HFModel( + "DeepSeek-Coder-V2-Base", + "deepseek-ai/DeepSeek-Coder-V2-Base", + ModelCategory.DEEPSEEK_CODING, + "Foundation coding model for fine-tuning", + True, + False, + 163840, + True, + ), + HFModel( + "DeepSeek-Coder-33B-Instruct", + "deepseek-ai/deepseek-coder-33b-instruct", + ModelCategory.DEEPSEEK_CODING, + "Large-scale code generation and debugging", + True, + False, + 16384, + True, + ), + HFModel( + "DeepSeek-Coder-6.7B-Instruct", + "deepseek-ai/deepseek-coder-6.7b-instruct", + ModelCategory.DEEPSEEK_CODING, + "Efficient code assistance and generation", + True, + False, + 16384, + True, + ), + HFModel( + "DeepSeek-Coder-1.3B-Instruct", + "deepseek-ai/deepseek-coder-1.3b-instruct", + ModelCategory.DEEPSEEK_CODING, + "Lightweight coding assistant", + True, + False, + 16384, + True, + ), + # DeepSeek-Math Series - Mathematical Reasoning + HFModel( + "DeepSeek-Math-7B-Instruct", + "deepseek-ai/deepseek-math-7b-instruct", + ModelCategory.DEEPSEEK_MATH, + "Mathematical problem solving and reasoning", + True, + False, + 4096, + True, + ), + HFModel( + "DeepSeek-Math-7B-Base", + "deepseek-ai/deepseek-math-7b-base", + ModelCategory.DEEPSEEK_MATH, + "Foundation model for mathematical reasoning", + True, + False, + 4096, + True, + ), + # DeepSeek Chat Models + HFModel( + "DeepSeek-67B-Chat", + "deepseek-ai/deepseek-llm-67b-chat", + ModelCategory.CONVERSATIONAL, + "Large conversational model with strong reasoning", + True, + False, + 4096, + True, + ), + HFModel( + "DeepSeek-7B-Chat", + "deepseek-ai/deepseek-llm-7b-chat", + ModelCategory.CONVERSATIONAL, + "Efficient chat model for general conversations", + True, + False, + 4096, + True, + ), + # DeepSeek-VL Series - Vision-Language + HFModel( + "DeepSeek-VL-7B-Chat", + "deepseek-ai/deepseek-vl-7b-chat", + ModelCategory.VISION_LANGUAGE, + "Vision-language understanding and conversation", + True, + False, + 4096, + True, + ), + HFModel( + "DeepSeek-VL-1.3B-Chat", + "deepseek-ai/deepseek-vl-1.3b-chat", + ModelCategory.VISION_LANGUAGE, + "Lightweight vision-language model", + True, + False, + 4096, + True, + ), + ] + + # Advanced Image Editing Models + IMAGE_EDITING_MODELS = [ + # Professional Image Editing + HFModel( + "SDXL Inpainting", + "diffusers/stable-diffusion-xl-1.0-inpainting-0.1", + ModelCategory.IMAGE_EDITING, + "High-quality image inpainting and editing", + True, + False, + 1024, + False, + ), + HFModel( + "ControlNet Inpainting", + "lllyasviel/control_v11p_sd15_inpaint", + ModelCategory.IMAGE_EDITING, + "Controllable image inpainting with precise editing", + True, + False, + 512, + False, + ), + HFModel( + "InstantID Face Editor", + "InstantX/InstantID", + ModelCategory.FACE_ENHANCEMENT, + "Identity-preserving face editing and enhancement", + True, + False, + 512, + False, + ), + HFModel( + "Real-ESRGAN Upscaler", + "ai-forever/Real-ESRGAN", + ModelCategory.IMAGE_UPSCALING, + "Advanced image super-resolution and enhancement", + True, + False, + 1024, + False, + ), + HFModel( + "GFPGAN Face Restoration", + "Xintao/GFPGAN", + ModelCategory.FACE_RESTORATION, + "High-quality face restoration and enhancement", + True, + False, + 512, + False, + ), + HFModel( + "CodeFormer Face Restoration", + "sczhou/CodeFormer", + ModelCategory.FACE_RESTORATION, + "Robust face restoration for low-quality images", + True, + False, + 512, + False, + ), + HFModel( + "Background Removal", + "briaai/RMBG-1.4", + ModelCategory.BACKGROUND_REMOVAL, + "Precise background removal and segmentation", + True, + False, + 1024, + False, + ), + HFModel( + "U2-Net Background Removal", + "simonw/u2net-portrait-segmentation", + ModelCategory.BACKGROUND_REMOVAL, + "Portrait and object background removal", + True, + False, + 320, + False, + ), + HFModel( + "Photo Colorization", + "microsoft/beit-base-patch16-224-pt22k-ft22k", + ModelCategory.COLOR_CORRECTION, + "AI-powered photo colorization and enhancement", + True, + False, + 224, + False, + ), + HFModel( + "Style Transfer Neural", + "pytorch/vision", + ModelCategory.ARTISTIC_FILTER, + "Neural style transfer for artistic image effects", + True, + False, + 512, + False, + ), + ] + + # Face Swap and Manipulation Models + FACE_SWAP_MODELS = [ + # Advanced Face Swapping + HFModel( + "InsightFace SwapFace", + "deepinsight/inswapper_128.onnx", + ModelCategory.FACE_SWAP, + "High-quality face swapping with identity preservation", + True, + False, + 128, + False, + ), + HFModel( + "SimSwap Face Swap", + "ppogg/simswap_official", + ModelCategory.FACE_SWAP, + "Realistic face swapping for videos and images", + True, + False, + 224, + False, + ), + HFModel( + "FaceX-Zoo Face Swap", + "FacePerceiver/FaceX-Zoo", + ModelCategory.FACE_SWAP, + "Multi-purpose face analysis and swapping toolkit", + True, + False, + 112, + False, + ), + HFModel( + "Face Enhancement Pro", + "TencentARC/GFPGAN", + ModelCategory.FACE_ENHANCEMENT, + "Professional face enhancement and restoration", + True, + False, + 512, + False, + ), + HFModel( + "DualStyleGAN Face Edit", + "williamyang1991/DualStyleGAN", + ModelCategory.FACE_ENHANCEMENT, + "Style-controllable face image editing", + True, + False, + 1024, + False, + ), + HFModel( + "MegaPortraits Face Animate", + "NVlabs/MegaPortraits", + ModelCategory.FACIAL_ANIMATION, + "One-shot facial animation and expression transfer", + True, + False, + 256, + False, + ), + ] + + # Advanced TTS and STT Models + ADVANCED_SPEECH_MODELS = [ + # Multilingual Text-to-Speech + HFModel( + "XTTS v2 Multilingual", + "coqui/XTTS-v2", + ModelCategory.MULTILINGUAL_TTS, + "High-quality multilingual text-to-speech with voice cloning", + True, + False, + 24000, + True, + ), + HFModel( + "Bark Text-to-Speech", + "suno/bark", + ModelCategory.ADVANCED_TTS, + "Generative TTS with music, sound effects, and multiple speakers", + True, + False, + 24000, + False, + ), + HFModel( + "SpeechT5 TTS", + "microsoft/speecht5_tts", + ModelCategory.ADVANCED_TTS, + "High-quality neural text-to-speech synthesis", + True, + False, + 16000, + False, + ), + HFModel( + "VALL-E X Multilingual", + "Plachtaa/VALL-E-X", + ModelCategory.MULTILINGUAL_TTS, + "Zero-shot voice synthesis in multiple languages", + True, + False, + 24000, + False, + ), + HFModel( + "Arabic TTS", + "arabic-speech-corpus/tts-arabic", + ModelCategory.MULTILINGUAL_TTS, + "High-quality Arabic text-to-speech synthesis", + True, + False, + 22050, + False, + ), + HFModel( + "Tortoise TTS", + "jbetker/tortoise-tts", + ModelCategory.VOICE_CLONING, + "High-quality voice cloning and synthesis", + True, + False, + 22050, + False, + ), + # Advanced Speech-to-Text + HFModel( + "Whisper Large v3", + "openai/whisper-large-v3", + ModelCategory.MULTILINGUAL_STT, + "State-of-the-art multilingual speech recognition", + True, + False, + 30, + False, + ), + HFModel( + "Whisper Large v3 Turbo", + "openai/whisper-large-v3-turbo", + ModelCategory.MULTILINGUAL_STT, + "Fast multilingual speech recognition with high accuracy", + True, + False, + 30, + True, + ), + HFModel( + "Arabic Whisper", + "arabic-speech-corpus/whisper-large-arabic", + ModelCategory.MULTILINGUAL_STT, + "Optimized Arabic speech recognition model", + True, + False, + 30, + False, + ), + HFModel( + "MMS Speech Recognition", + "facebook/mms-1b-all", + ModelCategory.MULTILINGUAL_STT, + "Massively multilingual speech recognition (1000+ languages)", + True, + False, + 16000, + False, + ), + HFModel( + "Wav2Vec2 Arabic", + "facebook/wav2vec2-large-xlsr-53-arabic", + ModelCategory.MULTILINGUAL_STT, + "Arabic speech recognition with Wav2Vec2 architecture", + True, + False, + 16000, + False, + ), + HFModel( + "SpeechT5 ASR", + "microsoft/speecht5_asr", + ModelCategory.ADVANCED_STT, + "Advanced automatic speech recognition", + True, + False, + 16000, + False, + ), + # Real-time Translation and Voice Conversion + HFModel( + "SeamlessM4T", + "facebook/seamless-m4t-v2-large", + ModelCategory.REAL_TIME_TRANSLATION, + "Multilingual speech-to-speech translation", + True, + False, + 16000, + True, + ), + HFModel( + "Voice Conversion VITS", + "jaywalnut310/vits-ljs", + ModelCategory.VOICE_CONVERSION, + "High-quality voice conversion and synthesis", + True, + False, + 22050, + False, + ), + HFModel( + "RVC Voice Clone", + "lj1995/GPT-SoVITS", + ModelCategory.VOICE_CLONING, + "Real-time voice cloning and conversion", + True, + False, + 32000, + True, + ), + ] + + # Talking Avatar and Video Generation Models + TALKING_AVATAR_MODELS = [ + # Talking Head Generation + HFModel( + "SadTalker Talking Head", + "vinthony/SadTalker", + ModelCategory.TALKING_AVATAR, + "Generate talking head videos from audio and single image", + True, + False, + 256, + False, + ), + HFModel( + "Real-Time Face Animation", + "PaddlePaddle/PaddleGAN-FOM", + ModelCategory.FACIAL_ANIMATION, + "Real-time facial animation and expression control", + True, + False, + 256, + True, + ), + HFModel( + "LivePortrait Animation", + "KwaiVGI/LivePortrait", + ModelCategory.TALKING_AVATAR, + "High-quality portrait animation with lip sync", + True, + False, + 512, + False, + ), + HFModel( + "DualTalker Video", + "OpenTalker/DualTalker", + ModelCategory.TALKING_AVATAR, + "Dual-modal talking face generation with enhanced quality", + True, + False, + 256, + False, + ), + HFModel( + "Video Retalking", + "vinthony/video-retalking", + ModelCategory.LIP_SYNC, + "Audio-driven lip sync for existing videos", + True, + False, + 224, + False, + ), + HFModel( + "Wav2Lip Lip Sync", + "Rudrabha/Wav2Lip", + ModelCategory.LIP_SYNC, + "Accurate lip sync generation from audio", + True, + False, + 96, + False, + ), + HFModel( + "Digital Human Avatar", + "modelscope/damo-text-to-video-synthesis", + ModelCategory.VIRTUAL_PRESENTER, + "Generate digital human presenter videos", + True, + False, + 320, + False, + ), + HFModel( + "AI News Anchor", + "microsoft/DiT-XL-2-256", + ModelCategory.AI_ANCHOR, + "Professional AI news anchor and presenter generation", + True, + False, + 256, + False, + ), + HFModel( + "Avatar Gesture Control", + "ZhengPeng7/BiSeNet", + ModelCategory.GESTURE_GENERATION, + "Generate natural gestures and body language for avatars", + True, + False, + 512, + False, + ), + ] + + # Interactive Language Models (English-Arabic Focus) + INTERACTIVE_LANGUAGE_MODELS = [ + # Bilingual Conversation Models + HFModel( + "AceGPT Arabic-English", + "FreedomIntelligence/AceGPT-13B", + ModelCategory.BILINGUAL_CONVERSATION, + "Bilingual Arabic-English conversation model", + True, + False, + 4096, + True, + ), + HFModel( + "Jais Arabic Chat", + "core42/jais-13b-chat", + ModelCategory.BILINGUAL_CONVERSATION, + "Advanced Arabic conversation model with English support", + True, + False, + 2048, + True, + ), + HFModel( + "AraBART Conversational", + "aubmindlab/arabart-base-conversational", + ModelCategory.BILINGUAL_CONVERSATION, + "Arabic conversational AI with cultural understanding", + True, + False, + 1024, + True, + ), + HFModel( + "Multilingual Chat Assistant", + "microsoft/DialoGPT-large", + ModelCategory.INTERACTIVE_CHAT, + "Interactive chat assistant supporting multiple languages", + True, + False, + 1024, + True, + ), + HFModel( + "Cultural Context Chat", + "bigscience/bloom-7b1", + ModelCategory.CULTURAL_ADAPTATION, + "Culturally aware conversation model for diverse contexts", + True, + False, + 2048, + True, + ), + HFModel( + "Context-Aware Assistant", + "microsoft/GODEL-v1_1-large-seq2seq", + ModelCategory.CONTEXT_AWARE_CHAT, + "Context-aware conversational AI with memory", + True, + False, + 1024, + True, + ), + HFModel( + "Personality Chat Bot", + "microsoft/PersonaGPT", + ModelCategory.PERSONALITY_CHAT, + "Personality-driven conversational AI with distinct characters", + True, + False, + 1024, + True, + ), + HFModel( + "Role-Play Assistant", + "PygmalionAI/pygmalion-6b", + ModelCategory.ROLE_PLAY_CHAT, + "Interactive role-playing conversation model", + True, + False, + 2048, + True, + ), + HFModel( + "Domain Expert Chat", + "microsoft/DialoGPT-medium", + ModelCategory.DOMAIN_SPECIFIC_CHAT, + "Specialized domain conversation assistant", + True, + False, + 1024, + True, + ), + # Arabic Language Specialists + HFModel( + "Arabic GPT-J", + "aubmindlab/aragpt2-base", + ModelCategory.BILINGUAL_CONVERSATION, + "Arabic language generation and conversation", + True, + False, + 1024, + True, + ), + HFModel( + "Marbert Arabic Chat", + "UBC-NLP/MARBERT", + ModelCategory.BILINGUAL_CONVERSATION, + "Dialectal Arabic conversation model", + True, + False, + 512, + False, + ), + HFModel( + "ArabicBERT Chat", + "aubmindlab/bert-base-arabertv2", + ModelCategory.BILINGUAL_CONVERSATION, + "Modern Standard Arabic conversational understanding", + True, + False, + 512, + False, + ), + ] + + +class HuggingFaceInference: + """Hugging Face Inference API integration""" + + def __init__( + self, + api_token: str, + base_url: str = "https://api-inference.huggingface.co/models/", + ): + self.api_token = api_token + self.base_url = base_url + self.session = None + + async def __aenter__(self): + self.session = aiohttp.ClientSession( + headers={"Authorization": f"Bearer {self.api_token}"}, + timeout=aiohttp.ClientTimeout(total=300), # 5 minutes timeout + ) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.session: + await self.session.close() + + async def text_generation( + self, + model_id: str, + prompt: str, + max_tokens: int = 100, + temperature: float = 0.7, + stream: bool = False, + **kwargs, + ) -> Dict[str, Any]: + """Generate text using a text generation model""" + payload = { + "inputs": prompt, + "parameters": { + "max_new_tokens": max_tokens, + "temperature": temperature, + "do_sample": True, + **kwargs, + }, + "options": {"use_cache": False}, + } + + if stream: + return await self._stream_request(model_id, payload) + else: + return await self._request(model_id, payload) + + async def text_to_image( + self, + model_id: str, + prompt: str, + negative_prompt: Optional[str] = None, + **kwargs, + ) -> bytes: + """Generate image from text prompt""" + payload = { + "inputs": prompt, + "parameters": { + **({"negative_prompt": negative_prompt} if negative_prompt else {}), + **kwargs, + }, + } + + response = await self._request(model_id, payload, expect_json=False) + return response + + async def automatic_speech_recognition( + self, model_id: str, audio_data: bytes, **kwargs + ) -> Dict[str, Any]: + """Transcribe audio to text""" + # Convert audio bytes to base64 for API + audio_b64 = base64.b64encode(audio_data).decode() + + payload = {"inputs": audio_b64, "parameters": kwargs} + + return await self._request(model_id, payload) + + async def text_to_speech(self, model_id: str, text: str, **kwargs) -> bytes: + """Convert text to speech audio""" + payload = {"inputs": text, "parameters": kwargs} + + response = await self._request(model_id, payload, expect_json=False) + return response + + async def image_classification( + self, model_id: str, image_data: bytes, **kwargs + ) -> Dict[str, Any]: + """Classify images""" + # Convert image to base64 + image_b64 = base64.b64encode(image_data).decode() + + payload = {"inputs": image_b64, "parameters": kwargs} + + return await self._request(model_id, payload) + + async def feature_extraction( + self, model_id: str, texts: Union[str, List[str]], **kwargs + ) -> Dict[str, Any]: + """Extract embeddings from text""" + payload = {"inputs": texts, "parameters": kwargs} + + return await self._request(model_id, payload) + + async def translation( + self, + model_id: str, + text: str, + src_lang: Optional[str] = None, + tgt_lang: Optional[str] = None, + **kwargs, + ) -> Dict[str, Any]: + """Translate text between languages""" + payload = { + "inputs": text, + "parameters": { + **({"src_lang": src_lang} if src_lang else {}), + **({"tgt_lang": tgt_lang} if tgt_lang else {}), + **kwargs, + }, + } + + return await self._request(model_id, payload) + + async def summarization( + self, + model_id: str, + text: str, + max_length: int = 150, + min_length: int = 30, + **kwargs, + ) -> Dict[str, Any]: + """Summarize text""" + payload = { + "inputs": text, + "parameters": { + "max_length": max_length, + "min_length": min_length, + **kwargs, + }, + } + + return await self._request(model_id, payload) + + async def question_answering( + self, model_id: str, question: str, context: str, **kwargs + ) -> Dict[str, Any]: + """Answer questions based on context""" + payload = { + "inputs": {"question": question, "context": context}, + "parameters": kwargs, + } + + return await self._request(model_id, payload) + + async def zero_shot_classification( + self, model_id: str, text: str, candidate_labels: List[str], **kwargs + ) -> Dict[str, Any]: + """Classify text without training data""" + payload = { + "inputs": text, + "parameters": {"candidate_labels": candidate_labels, **kwargs}, + } + + return await self._request(model_id, payload) + + async def conversational( + self, + model_id: str, + text: str, + conversation_history: Optional[List[Dict[str, str]]] = None, + **kwargs, + ) -> Dict[str, Any]: + """Have a conversation with a model""" + payload = { + "inputs": { + "text": text, + **( + { + "past_user_inputs": [ + h["user"] for h in conversation_history if "user" in h + ] + } + if conversation_history + else {} + ), + **( + { + "generated_responses": [ + h["bot"] for h in conversation_history if "bot" in h + ] + } + if conversation_history + else {} + ), + }, + "parameters": kwargs, + } + + return await self._request(model_id, payload) + + async def _request( + self, model_id: str, payload: Dict[str, Any], expect_json: bool = True + ) -> Union[Dict[str, Any], bytes]: + """Make HTTP request to Hugging Face API""" + url = f"{self.base_url}{model_id}" + + try: + async with self.session.post(url, json=payload) as response: + if response.status == 200: + if expect_json: + return await response.json() + else: + return await response.read() + elif response.status == 503: + # Model is loading, wait and retry + error_info = await response.json() + estimated_time = error_info.get("estimated_time", 30) + logger.info( + f"Model {model_id} is loading, waiting {estimated_time}s" + ) + await asyncio.sleep(min(estimated_time, 60)) # Cap at 60 seconds + return await self._request(model_id, payload, expect_json) + else: + error_text = await response.text() + raise Exception( + f"API request failed with status {response.status}: {error_text}" + ) + + except Exception as e: + logger.error(f"Error calling Hugging Face API for {model_id}: {e}") + raise + + async def _stream_request(self, model_id: str, payload: Dict[str, Any]): + """Stream response from Hugging Face API""" + url = f"{self.base_url}{model_id}" + payload["stream"] = True + + try: + async with self.session.post(url, json=payload) as response: + if response.status == 200: + async for chunk in response.content: + if chunk: + yield chunk.decode("utf-8") + else: + error_text = await response.text() + raise Exception( + f"Streaming request failed with status {response.status}: {error_text}" + ) + + except Exception as e: + logger.error(f"Error streaming from Hugging Face API for {model_id}: {e}") + raise + + # New methods for expanded model categories + + async def text_to_video( + self, model_id: str, prompt: str, **kwargs + ) -> Dict[str, Any]: + """Generate video from text prompt""" + payload = { + "inputs": prompt, + "parameters": { + "duration": kwargs.get("duration", 5), + "fps": kwargs.get("fps", 24), + "width": kwargs.get("width", 512), + "height": kwargs.get("height", 512), + **kwargs, + }, + } + return await self._request(model_id, payload) + + async def video_to_text( + self, model_id: str, video_data: bytes, **kwargs + ) -> Dict[str, Any]: + """Analyze video and generate text description""" + video_b64 = base64.b64encode(video_data).decode() + payload = { + "inputs": {"video": video_b64}, + "parameters": kwargs, + } + return await self._request(model_id, payload) + + async def code_generation( + self, model_id: str, prompt: str, **kwargs + ) -> Dict[str, Any]: + """Generate code from natural language prompt""" + payload = { + "inputs": prompt, + "parameters": { + "max_length": kwargs.get("max_length", 500), + "temperature": kwargs.get("temperature", 0.2), + "language": kwargs.get("language", "python"), + **kwargs, + }, + } + return await self._request(model_id, payload) + + async def code_completion( + self, model_id: str, code: str, **kwargs + ) -> Dict[str, Any]: + """Complete partial code""" + payload = { + "inputs": code, + "parameters": { + "max_length": kwargs.get("max_length", 100), + "temperature": kwargs.get("temperature", 0.1), + **kwargs, + }, + } + return await self._request(model_id, payload) + + async def text_to_3d(self, model_id: str, prompt: str, **kwargs) -> Dict[str, Any]: + """Generate 3D model from text description""" + payload = { + "inputs": prompt, + "parameters": { + "resolution": kwargs.get("resolution", 64), + "format": kwargs.get("format", "obj"), + **kwargs, + }, + } + return await self._request(model_id, payload) + + async def image_to_3d( + self, model_id: str, image_data: bytes, **kwargs + ) -> Dict[str, Any]: + """Generate 3D model from image""" + image_b64 = base64.b64encode(image_data).decode() + payload = { + "inputs": {"image": image_b64}, + "parameters": kwargs, + } + return await self._request(model_id, payload) + + async def ocr(self, model_id: str, image_data: bytes, **kwargs) -> Dict[str, Any]: + """Perform optical character recognition on image""" + image_b64 = base64.b64encode(image_data).decode() + payload = { + "inputs": {"image": image_b64}, + "parameters": {"language": kwargs.get("language", "en"), **kwargs}, + } + return await self._request(model_id, payload) + + async def document_analysis( + self, model_id: str, document_data: bytes, **kwargs + ) -> Dict[str, Any]: + """Analyze document structure and content""" + doc_b64 = base64.b64encode(document_data).decode() + payload = { + "inputs": {"document": doc_b64}, + "parameters": kwargs, + } + return await self._request(model_id, payload) + + async def vision_language( + self, model_id: str, image_data: bytes, text: str, **kwargs + ) -> Dict[str, Any]: + """Process image and text together""" + image_b64 = base64.b64encode(image_data).decode() + payload = { + "inputs": {"image": image_b64, "text": text}, + "parameters": kwargs, + } + return await self._request(model_id, payload) + + async def multimodal_reasoning( + self, model_id: str, inputs: Dict[str, Any], **kwargs + ) -> Dict[str, Any]: + """Perform reasoning across multiple modalities""" + payload = { + "inputs": inputs, + "parameters": kwargs, + } + return await self._request(model_id, payload) + + async def music_generation( + self, model_id: str, prompt: str, **kwargs + ) -> Dict[str, Any]: + """Generate music from text prompt""" + payload = { + "inputs": prompt, + "parameters": { + "duration": kwargs.get("duration", 30), + "bpm": kwargs.get("bpm", 120), + "genre": kwargs.get("genre", "electronic"), + **kwargs, + }, + } + return await self._request(model_id, payload) + + async def voice_cloning( + self, model_id: str, text: str, voice_sample: bytes, **kwargs + ) -> bytes: + """Clone voice and synthesize speech""" + voice_b64 = base64.b64encode(voice_sample).decode() + payload = { + "inputs": {"text": text, "voice_sample": voice_b64}, + "parameters": kwargs, + } + return await self._request(model_id, payload, expect_json=False) + + async def super_resolution( + self, model_id: str, image_data: bytes, **kwargs + ) -> bytes: + """Enhance image resolution""" + image_b64 = base64.b64encode(image_data).decode() + payload = { + "inputs": {"image": image_b64}, + "parameters": {"scale_factor": kwargs.get("scale_factor", 4), **kwargs}, + } + return await self._request(model_id, payload, expect_json=False) + + async def background_removal( + self, model_id: str, image_data: bytes, **kwargs + ) -> bytes: + """Remove background from image""" + image_b64 = base64.b64encode(image_data).decode() + payload = { + "inputs": {"image": image_b64}, + "parameters": kwargs, + } + return await self._request(model_id, payload, expect_json=False) + + async def creative_writing( + self, model_id: str, prompt: str, **kwargs + ) -> Dict[str, Any]: + """Generate creative content""" + payload = { + "inputs": prompt, + "parameters": { + "max_length": kwargs.get("max_length", 1000), + "creativity": kwargs.get("creativity", 0.8), + "genre": kwargs.get("genre", "general"), + **kwargs, + }, + } + return await self._request(model_id, payload) + + async def business_document( + self, model_id: str, document_type: str, context: str, **kwargs + ) -> Dict[str, Any]: + """Generate business documents""" + payload = { + "inputs": f"Generate {document_type}: {context}", + "parameters": { + "format": kwargs.get("format", "professional"), + "length": kwargs.get("length", "medium"), + **kwargs, + }, + } + return await self._request(model_id, payload) + + +class HuggingFaceModelManager: + """Manager for all Hugging Face model operations""" + + def __init__(self, api_token: str): + self.api_token = api_token + self.models = HuggingFaceModels() + + def get_models_by_category(self, category: ModelCategory) -> List[HFModel]: + """Get all models for a specific category""" + all_models = [] + + if category == ModelCategory.TEXT_GENERATION: + all_models = self.models.TEXT_GENERATION_MODELS + elif category == ModelCategory.TEXT_TO_IMAGE: + all_models = self.models.TEXT_TO_IMAGE_MODELS + elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: + all_models = self.models.ASR_MODELS + elif category == ModelCategory.TEXT_TO_SPEECH: + all_models = self.models.TTS_MODELS + elif category == ModelCategory.IMAGE_CLASSIFICATION: + all_models = self.models.IMAGE_CLASSIFICATION_MODELS + elif category == ModelCategory.FEATURE_EXTRACTION: + all_models = self.models.FEATURE_EXTRACTION_MODELS + elif category == ModelCategory.TRANSLATION: + all_models = self.models.TRANSLATION_MODELS + elif category == ModelCategory.SUMMARIZATION: + all_models = self.models.SUMMARIZATION_MODELS + + return all_models + + def get_all_models(self) -> Dict[ModelCategory, List[HFModel]]: + """Get all available models organized by category""" + return { + # Core AI categories + ModelCategory.TEXT_GENERATION: self.models.TEXT_GENERATION_MODELS, + ModelCategory.TEXT_TO_IMAGE: self.models.TEXT_TO_IMAGE_MODELS, + ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: self.models.ASR_MODELS, + ModelCategory.TEXT_TO_SPEECH: self.models.TTS_MODELS, + ModelCategory.IMAGE_CLASSIFICATION: self.models.IMAGE_CLASSIFICATION_MODELS, + ModelCategory.FEATURE_EXTRACTION: self.models.FEATURE_EXTRACTION_MODELS, + ModelCategory.TRANSLATION: self.models.TRANSLATION_MODELS, + ModelCategory.SUMMARIZATION: self.models.SUMMARIZATION_MODELS, + # Video and Motion + ModelCategory.TEXT_TO_VIDEO: self.models.VIDEO_GENERATION_MODELS, + ModelCategory.VIDEO_GENERATION: self.models.VIDEO_GENERATION_MODELS, + ModelCategory.VIDEO_TO_TEXT: self.models.VIDEO_GENERATION_MODELS, + ModelCategory.VIDEO_CLASSIFICATION: self.models.VIDEO_GENERATION_MODELS, + # Code and Development + ModelCategory.CODE_GENERATION: self.models.CODE_GENERATION_MODELS, + ModelCategory.CODE_COMPLETION: self.models.CODE_GENERATION_MODELS, + ModelCategory.CODE_EXPLANATION: self.models.CODE_GENERATION_MODELS, + ModelCategory.APP_GENERATION: self.models.CODE_GENERATION_MODELS, + # 3D and AR/VR + ModelCategory.TEXT_TO_3D: self.models.THREE_D_MODELS, + ModelCategory.IMAGE_TO_3D: self.models.THREE_D_MODELS, + ModelCategory.THREE_D_GENERATION: self.models.THREE_D_MODELS, + ModelCategory.MESH_GENERATION: self.models.THREE_D_MODELS, + # Document Processing + ModelCategory.OCR: self.models.DOCUMENT_PROCESSING_MODELS, + ModelCategory.DOCUMENT_ANALYSIS: self.models.DOCUMENT_PROCESSING_MODELS, + ModelCategory.HANDWRITING_RECOGNITION: self.models.DOCUMENT_PROCESSING_MODELS, + ModelCategory.TABLE_EXTRACTION: self.models.DOCUMENT_PROCESSING_MODELS, + ModelCategory.FORM_PROCESSING: self.models.DOCUMENT_PROCESSING_MODELS, + # Multimodal AI + ModelCategory.VISION_LANGUAGE: self.models.MULTIMODAL_MODELS, + ModelCategory.MULTIMODAL_REASONING: self.models.MULTIMODAL_MODELS, + ModelCategory.VISUAL_QUESTION_ANSWERING: self.models.MULTIMODAL_MODELS, + ModelCategory.MULTIMODAL_CHAT: self.models.MULTIMODAL_MODELS, + ModelCategory.CROSS_MODAL_GENERATION: self.models.MULTIMODAL_MODELS, + # Specialized AI + ModelCategory.MUSIC_GENERATION: self.models.SPECIALIZED_AI_MODELS, + ModelCategory.VOICE_CLONING: self.models.SPECIALIZED_AI_MODELS, + ModelCategory.SUPER_RESOLUTION: self.models.SPECIALIZED_AI_MODELS, + ModelCategory.FACE_RESTORATION: self.models.SPECIALIZED_AI_MODELS, + ModelCategory.IMAGE_INPAINTING: self.models.SPECIALIZED_AI_MODELS, + ModelCategory.BACKGROUND_REMOVAL: self.models.SPECIALIZED_AI_MODELS, + # Creative Content + ModelCategory.CREATIVE_WRITING: self.models.CREATIVE_CONTENT_MODELS, + ModelCategory.STORY_GENERATION: self.models.CREATIVE_CONTENT_MODELS, + ModelCategory.POETRY_GENERATION: self.models.CREATIVE_CONTENT_MODELS, + ModelCategory.BLOG_WRITING: self.models.CREATIVE_CONTENT_MODELS, + ModelCategory.MARKETING_COPY: self.models.CREATIVE_CONTENT_MODELS, + # Game Development + ModelCategory.GAME_ASSET_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, + ModelCategory.CHARACTER_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, + ModelCategory.LEVEL_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, + ModelCategory.DIALOGUE_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, + # Science and Research + ModelCategory.PROTEIN_FOLDING: self.models.SCIENCE_RESEARCH_MODELS, + ModelCategory.MOLECULE_GENERATION: self.models.SCIENCE_RESEARCH_MODELS, + ModelCategory.SCIENTIFIC_WRITING: self.models.SCIENCE_RESEARCH_MODELS, + ModelCategory.RESEARCH_ASSISTANCE: self.models.SCIENCE_RESEARCH_MODELS, + ModelCategory.DATA_ANALYSIS: self.models.SCIENCE_RESEARCH_MODELS, + # Business and Productivity + ModelCategory.EMAIL_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, + ModelCategory.PRESENTATION_CREATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, + ModelCategory.REPORT_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, + ModelCategory.MEETING_SUMMARIZATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, + ModelCategory.PROJECT_PLANNING: self.models.BUSINESS_PRODUCTIVITY_MODELS, + # AI Teacher and Education Models + ModelCategory.AI_TUTORING: self.models.AI_TEACHER_MODELS, + ModelCategory.EDUCATIONAL_CONTENT: self.models.AI_TEACHER_MODELS, + ModelCategory.LESSON_PLANNING: self.models.AI_TEACHER_MODELS, + ModelCategory.CONCEPT_EXPLANATION: self.models.AI_TEACHER_MODELS, + ModelCategory.HOMEWORK_ASSISTANCE: self.models.AI_TEACHER_MODELS, + ModelCategory.QUIZ_GENERATION: self.models.AI_TEACHER_MODELS, + ModelCategory.CURRICULUM_DESIGN: self.models.AI_TEACHER_MODELS, + ModelCategory.LEARNING_ASSESSMENT: self.models.AI_TEACHER_MODELS, + ModelCategory.ADAPTIVE_LEARNING: self.models.AI_TEACHER_MODELS, + ModelCategory.SUBJECT_TEACHING: self.models.AI_TEACHER_MODELS, + ModelCategory.MATH_TUTORING: self.models.AI_TEACHER_MODELS, + ModelCategory.SCIENCE_TUTORING: self.models.AI_TEACHER_MODELS, + ModelCategory.LANGUAGE_TUTORING: self.models.AI_TEACHER_MODELS, + ModelCategory.HISTORY_TUTORING: self.models.AI_TEACHER_MODELS, + ModelCategory.CODING_INSTRUCTION: self.models.AI_TEACHER_MODELS, + ModelCategory.EXAM_PREPARATION: self.models.AI_TEACHER_MODELS, + ModelCategory.STUDY_GUIDE_CREATION: self.models.AI_TEACHER_MODELS, + ModelCategory.EDUCATIONAL_GAMES: self.models.AI_TEACHER_MODELS, + ModelCategory.LEARNING_ANALYTICS: self.models.AI_TEACHER_MODELS, + ModelCategory.PERSONALIZED_LEARNING: self.models.AI_TEACHER_MODELS, + # Qwen Models + ModelCategory.QWEN_REASONING: self.models.QWEN_MODELS, + ModelCategory.QWEN_MATH: self.models.QWEN_MODELS, + ModelCategory.QWEN_CODE: self.models.QWEN_MODELS, + ModelCategory.QWEN_VISION: self.models.QWEN_MODELS, + ModelCategory.QWEN_AUDIO: self.models.QWEN_MODELS, + # DeepSeek Models + ModelCategory.DEEPSEEK_CODING: self.models.DEEPSEEK_MODELS, + ModelCategory.DEEPSEEK_REASONING: self.models.DEEPSEEK_MODELS, + ModelCategory.DEEPSEEK_MATH: self.models.DEEPSEEK_MODELS, + ModelCategory.DEEPSEEK_RESEARCH: self.models.DEEPSEEK_MODELS, + # Advanced Image Processing & Manipulation + ModelCategory.IMAGE_EDITING: self.models.IMAGE_EDITING_MODELS, + ModelCategory.FACE_SWAP: self.models.FACE_SWAP_MODELS, + ModelCategory.FACE_ENHANCEMENT: self.models.FACE_SWAP_MODELS, + ModelCategory.FACE_GENERATION: self.models.FACE_SWAP_MODELS, + ModelCategory.PORTRAIT_EDITING: self.models.IMAGE_EDITING_MODELS, + ModelCategory.PHOTO_RESTORATION: self.models.IMAGE_EDITING_MODELS, + ModelCategory.IMAGE_UPSCALING: self.models.IMAGE_EDITING_MODELS, + ModelCategory.COLOR_CORRECTION: self.models.IMAGE_EDITING_MODELS, + ModelCategory.ARTISTIC_FILTER: self.models.IMAGE_EDITING_MODELS, + # Advanced Speech & Audio + ModelCategory.ADVANCED_TTS: self.models.ADVANCED_SPEECH_MODELS, + ModelCategory.ADVANCED_STT: self.models.ADVANCED_SPEECH_MODELS, + ModelCategory.VOICE_CONVERSION: self.models.ADVANCED_SPEECH_MODELS, + ModelCategory.SPEECH_ENHANCEMENT: self.models.ADVANCED_SPEECH_MODELS, + ModelCategory.AUDIO_GENERATION: self.models.ADVANCED_SPEECH_MODELS, + ModelCategory.MULTILINGUAL_TTS: self.models.ADVANCED_SPEECH_MODELS, + ModelCategory.MULTILINGUAL_STT: self.models.ADVANCED_SPEECH_MODELS, + ModelCategory.REAL_TIME_TRANSLATION: self.models.ADVANCED_SPEECH_MODELS, + # Interactive Avatar & Video Generation + ModelCategory.TALKING_AVATAR: self.models.TALKING_AVATAR_MODELS, + ModelCategory.AVATAR_GENERATION: self.models.TALKING_AVATAR_MODELS, + ModelCategory.LIP_SYNC: self.models.TALKING_AVATAR_MODELS, + ModelCategory.FACIAL_ANIMATION: self.models.TALKING_AVATAR_MODELS, + ModelCategory.GESTURE_GENERATION: self.models.TALKING_AVATAR_MODELS, + ModelCategory.VIRTUAL_PRESENTER: self.models.TALKING_AVATAR_MODELS, + ModelCategory.AI_ANCHOR: self.models.TALKING_AVATAR_MODELS, + # Interactive Language & Conversation + ModelCategory.INTERACTIVE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, + ModelCategory.BILINGUAL_CONVERSATION: self.models.INTERACTIVE_LANGUAGE_MODELS, + ModelCategory.CULTURAL_ADAPTATION: self.models.INTERACTIVE_LANGUAGE_MODELS, + ModelCategory.CONTEXT_AWARE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, + ModelCategory.PERSONALITY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, + ModelCategory.ROLE_PLAY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, + ModelCategory.DOMAIN_SPECIFIC_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, + } + + def get_model_by_id(self, model_id: str) -> Optional[HFModel]: + """Find a model by its Hugging Face model ID""" + for models_list in self.get_all_models().values(): + for model in models_list: + if model.model_id == model_id: + return model + return None + + async def call_model(self, model_id: str, category: ModelCategory, **kwargs) -> Any: + """Call a Hugging Face model with the appropriate method based on category""" + + async with HuggingFaceInference(self.api_token) as hf: + if category == ModelCategory.TEXT_GENERATION: + return await hf.text_generation(model_id, **kwargs) + elif category == ModelCategory.TEXT_TO_IMAGE: + return await hf.text_to_image(model_id, **kwargs) + elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: + return await hf.automatic_speech_recognition(model_id, **kwargs) + elif category == ModelCategory.TEXT_TO_SPEECH: + return await hf.text_to_speech(model_id, **kwargs) + elif category == ModelCategory.IMAGE_CLASSIFICATION: + return await hf.image_classification(model_id, **kwargs) + elif category == ModelCategory.FEATURE_EXTRACTION: + return await hf.feature_extraction(model_id, **kwargs) + elif category == ModelCategory.TRANSLATION: + return await hf.translation(model_id, **kwargs) + elif category == ModelCategory.SUMMARIZATION: + return await hf.summarization(model_id, **kwargs) + elif category == ModelCategory.QUESTION_ANSWERING: + return await hf.question_answering(model_id, **kwargs) + elif category == ModelCategory.ZERO_SHOT_CLASSIFICATION: + return await hf.zero_shot_classification(model_id, **kwargs) + elif category == ModelCategory.CONVERSATIONAL: + return await hf.conversational(model_id, **kwargs) + + # Video and Motion categories + elif category in [ + ModelCategory.TEXT_TO_VIDEO, + ModelCategory.VIDEO_GENERATION, + ]: + return await hf.text_to_video(model_id, **kwargs) + elif category == ModelCategory.VIDEO_TO_TEXT: + return await hf.video_to_text(model_id, **kwargs) + elif category == ModelCategory.VIDEO_CLASSIFICATION: + return await hf.image_classification( + model_id, **kwargs + ) # Similar to image classification + + # Code and Development categories + elif category in [ + ModelCategory.CODE_GENERATION, + ModelCategory.APP_GENERATION, + ]: + return await hf.code_generation(model_id, **kwargs) + elif category in [ + ModelCategory.CODE_COMPLETION, + ModelCategory.CODE_EXPLANATION, + ]: + return await hf.code_completion(model_id, **kwargs) + + # 3D and AR/VR categories + elif category in [ + ModelCategory.TEXT_TO_3D, + ModelCategory.THREE_D_GENERATION, + ]: + return await hf.text_to_3d(model_id, **kwargs) + elif category in [ModelCategory.IMAGE_TO_3D, ModelCategory.MESH_GENERATION]: + return await hf.image_to_3d(model_id, **kwargs) + + # Document Processing categories + elif category == ModelCategory.OCR: + return await hf.ocr(model_id, **kwargs) + elif category in [ + ModelCategory.DOCUMENT_ANALYSIS, + ModelCategory.FORM_PROCESSING, + ModelCategory.TABLE_EXTRACTION, + ModelCategory.LAYOUT_ANALYSIS, + ]: + return await hf.document_analysis(model_id, **kwargs) + elif category == ModelCategory.HANDWRITING_RECOGNITION: + return await hf.ocr(model_id, **kwargs) # Similar to OCR + + # Multimodal AI categories + elif category in [ + ModelCategory.VISION_LANGUAGE, + ModelCategory.VISUAL_QUESTION_ANSWERING, + ModelCategory.IMAGE_TEXT_MATCHING, + ]: + return await hf.vision_language(model_id, **kwargs) + elif category in [ + ModelCategory.MULTIMODAL_REASONING, + ModelCategory.MULTIMODAL_CHAT, + ModelCategory.CROSS_MODAL_GENERATION, + ]: + return await hf.multimodal_reasoning(model_id, **kwargs) + + # Specialized AI categories + elif category == ModelCategory.MUSIC_GENERATION: + return await hf.music_generation(model_id, **kwargs) + elif category == ModelCategory.VOICE_CLONING: + return await hf.voice_cloning(model_id, **kwargs) + elif category == ModelCategory.SUPER_RESOLUTION: + return await hf.super_resolution(model_id, **kwargs) + elif category in [ + ModelCategory.FACE_RESTORATION, + ModelCategory.IMAGE_INPAINTING, + ModelCategory.IMAGE_OUTPAINTING, + ]: + return await hf.super_resolution( + model_id, **kwargs + ) # Similar processing + elif category == ModelCategory.BACKGROUND_REMOVAL: + return await hf.background_removal(model_id, **kwargs) + + # Creative Content categories + elif category in [ + ModelCategory.CREATIVE_WRITING, + ModelCategory.STORY_GENERATION, + ModelCategory.POETRY_GENERATION, + ModelCategory.SCREENPLAY_WRITING, + ]: + return await hf.creative_writing(model_id, **kwargs) + elif category in [ModelCategory.BLOG_WRITING, ModelCategory.MARKETING_COPY]: + return await hf.text_generation( + model_id, **kwargs + ) # Use standard text generation + + # Game Development categories + elif category in [ + ModelCategory.CHARACTER_GENERATION, + ModelCategory.LEVEL_GENERATION, + ModelCategory.DIALOGUE_GENERATION, + ModelCategory.GAME_ASSET_GENERATION, + ]: + return await hf.creative_writing( + model_id, **kwargs + ) # Creative generation + + # Science and Research categories + elif category in [ + ModelCategory.PROTEIN_FOLDING, + ModelCategory.MOLECULE_GENERATION, + ]: + return await hf.text_generation( + model_id, **kwargs + ) # Specialized text generation + elif category in [ + ModelCategory.SCIENTIFIC_WRITING, + ModelCategory.RESEARCH_ASSISTANCE, + ModelCategory.DATA_ANALYSIS, + ]: + return await hf.text_generation(model_id, **kwargs) + + # Business and Productivity categories + elif category in [ + ModelCategory.EMAIL_GENERATION, + ModelCategory.PRESENTATION_CREATION, + ModelCategory.REPORT_GENERATION, + ModelCategory.MEETING_SUMMARIZATION, + ModelCategory.PROJECT_PLANNING, + ]: + return await hf.business_document(model_id, category.value, **kwargs) + + # AI Teacher and Education categories + elif category in [ + ModelCategory.AI_TUTORING, + ModelCategory.EDUCATIONAL_CONTENT, + ModelCategory.LESSON_PLANNING, + ModelCategory.CONCEPT_EXPLANATION, + ModelCategory.HOMEWORK_ASSISTANCE, + ModelCategory.QUIZ_GENERATION, + ModelCategory.CURRICULUM_DESIGN, + ModelCategory.LEARNING_ASSESSMENT, + ModelCategory.ADAPTIVE_LEARNING, + ModelCategory.SUBJECT_TEACHING, + ModelCategory.MATH_TUTORING, + ModelCategory.SCIENCE_TUTORING, + ModelCategory.LANGUAGE_TUTORING, + ModelCategory.HISTORY_TUTORING, + ModelCategory.CODING_INSTRUCTION, + ModelCategory.EXAM_PREPARATION, + ModelCategory.STUDY_GUIDE_CREATION, + ModelCategory.EDUCATIONAL_GAMES, + ModelCategory.LEARNING_ANALYTICS, + ModelCategory.PERSONALIZED_LEARNING, + ]: + return await hf.text_generation( + model_id, **kwargs + ) # Educational content generation + + # Qwen Model categories + elif category in [ + ModelCategory.QWEN_REASONING, + ModelCategory.QWEN_MATH, + ModelCategory.QWEN_CODE, + ]: + return await hf.text_generation(model_id, **kwargs) + elif category == ModelCategory.QWEN_VISION: + return await hf.vision_language(model_id, **kwargs) + elif category == ModelCategory.QWEN_AUDIO: + return await hf.automatic_speech_recognition(model_id, **kwargs) + + # DeepSeek Model categories + elif category in [ + ModelCategory.DEEPSEEK_CODING, + ModelCategory.DEEPSEEK_REASONING, + ModelCategory.DEEPSEEK_MATH, + ModelCategory.DEEPSEEK_RESEARCH, + ]: + return await hf.text_generation(model_id, **kwargs) + + # Advanced Image Processing & Manipulation + elif category in [ + ModelCategory.IMAGE_EDITING, + ModelCategory.PORTRAIT_EDITING, + ModelCategory.PHOTO_RESTORATION, + ModelCategory.COLOR_CORRECTION, + ModelCategory.ARTISTIC_FILTER, + ]: + return await hf.text_to_image(model_id, **kwargs) # Image processing + elif category == ModelCategory.IMAGE_UPSCALING: + return await hf.super_resolution(model_id, **kwargs) + elif category in [ + ModelCategory.FACE_SWAP, + ModelCategory.FACE_ENHANCEMENT, + ModelCategory.FACE_GENERATION, + ]: + return await hf.text_to_image(model_id, **kwargs) # Face manipulation + + # Advanced Speech & Audio + elif category in [ + ModelCategory.ADVANCED_TTS, + ModelCategory.MULTILINGUAL_TTS, + ModelCategory.VOICE_CONVERSION, + ]: + return await hf.text_to_speech(model_id, **kwargs) + elif category in [ + ModelCategory.ADVANCED_STT, + ModelCategory.MULTILINGUAL_STT, + ModelCategory.SPEECH_ENHANCEMENT, + ]: + return await hf.automatic_speech_recognition(model_id, **kwargs) + elif category in [ + ModelCategory.AUDIO_GENERATION, + ModelCategory.REAL_TIME_TRANSLATION, + ]: + return await hf.text_to_speech(model_id, **kwargs) # Audio generation + + # Interactive Avatar & Video Generation + elif category in [ + ModelCategory.TALKING_AVATAR, + ModelCategory.AVATAR_GENERATION, + ModelCategory.LIP_SYNC, + ModelCategory.FACIAL_ANIMATION, + ModelCategory.GESTURE_GENERATION, + ModelCategory.VIRTUAL_PRESENTER, + ModelCategory.AI_ANCHOR, + ]: + return await hf.text_to_video(model_id, **kwargs) # Video generation + + # Interactive Language & Conversation + elif category in [ + ModelCategory.INTERACTIVE_CHAT, + ModelCategory.BILINGUAL_CONVERSATION, + ModelCategory.CULTURAL_ADAPTATION, + ModelCategory.CONTEXT_AWARE_CHAT, + ModelCategory.PERSONALITY_CHAT, + ModelCategory.ROLE_PLAY_CHAT, + ModelCategory.DOMAIN_SPECIFIC_CHAT, + ]: + return await hf.conversational(model_id, **kwargs) + + else: + raise ValueError(f"Unsupported model category: {category}")