File size: 1,275 Bytes
2b67076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8135f33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Core AI stack
diffusers==0.34.0
transformers==4.53.1
tokenizers>=0.20.3
accelerate>=1.1.1
tqdm
imageio
imageio-ffmpeg
einops
sentencepiece
open_clip_torch>=2.29.0

# Video & media
moviepy==1.0.3
av
ffmpeg-python
pygame>=2.1.0
sounddevice>=0.4.0
soundfile
mutagen
pyloudnorm
librosa==0.11.0
speechbrain==1.0.3
audio-separator==0.36.1

# UI & interaction
gradio==5.29.0
dashscope
loguru

# Vision & segmentation
opencv-python>=4.12.0.88
segment-anything
rembg[gpu]==2.0.65
onnxruntime-gpu==1.22
decord
timm
insightface @ https://github.com/deepbeepmeep/insightface/raw/refs/heads/master/wheels/insightface-0.7.3-cp310-cp310-win_amd64.whl ; sys_platform == "win32" and python_version == "3.10"
insightface==0.7.3 ; sys_platform == "linux"
facexlib==0.3.0

# Config & orchestration
omegaconf
hydra-core
easydict
pydantic==2.10.6

# Math & modeling
torchdiffeq>=0.2.5
tensordict>=0.6.1
mmgp==3.6.2
peft==0.15.0
matplotlib

# Utilities
ftfy
piexif
nvidia-ml-py 
misaki

# Optional / commented out
# transformers==4.46.3  # for llamallava pre-patch
# rembg==2.0.65         # non-GPU fallback
# huggingface_hub[hf_xet]  # slows down everything
# num2words
# spacy


torch
torchvision
torchaudio
transformers
diffusers
accelerate
gradio
tqdm
safetensors
opencv-python
numpy
Pillow