bageldotcom commited on
Commit
4abc914
·
verified ·
1 Parent(s): 808d012

Upload folder using huggingface_hub

Browse files
Files changed (48) hide show
  1. .ipynb_checkpoints/config-checkpoint.json +13 -0
  2. .ipynb_checkpoints/model_index-checkpoint.json +21 -0
  3. README.md +109 -9
  4. __pycache__/config.cpython-311.pyc +0 -0
  5. __pycache__/inference.cpython-311.pyc +0 -0
  6. __pycache__/inference_pipeline.cpython-311.pyc +0 -0
  7. __pycache__/models.cpython-311.pyc +0 -0
  8. __pycache__/vae_utils.cpython-311.pyc +0 -0
  9. config.json +13 -0
  10. expert_0/.ipynb_checkpoints/config-checkpoint.json +19 -0
  11. expert_0/.ipynb_checkpoints/model_index-checkpoint.json +5 -0
  12. expert_0/config.json +19 -0
  13. expert_0/diffusion_pytorch_model.safetensors +3 -0
  14. expert_0/model_index.json +5 -0
  15. expert_1/config.json +19 -0
  16. expert_1/diffusion_pytorch_model.safetensors +3 -0
  17. expert_1/model_index.json +5 -0
  18. expert_2/config.json +19 -0
  19. expert_2/diffusion_pytorch_model.safetensors +3 -0
  20. expert_2/model_index.json +5 -0
  21. expert_3/config.json +19 -0
  22. expert_3/diffusion_pytorch_model.safetensors +3 -0
  23. expert_3/model_index.json +5 -0
  24. expert_4/config.json +19 -0
  25. expert_4/diffusion_pytorch_model.safetensors +3 -0
  26. expert_4/model_index.json +5 -0
  27. expert_5/config.json +19 -0
  28. expert_5/diffusion_pytorch_model.safetensors +3 -0
  29. expert_5/model_index.json +5 -0
  30. expert_6/config.json +19 -0
  31. expert_6/diffusion_pytorch_model.safetensors +3 -0
  32. expert_6/model_index.json +5 -0
  33. expert_7/config.json +19 -0
  34. expert_7/diffusion_pytorch_model.safetensors +3 -0
  35. expert_7/model_index.json +5 -0
  36. model_index.json +21 -0
  37. router/.ipynb_checkpoints/config-checkpoint.json +11 -0
  38. router/config.json +11 -0
  39. router/model_index.json +4 -0
  40. router/pytorch_model.safetensors +3 -0
  41. text_encoder/config.json +24 -0
  42. text_encoder/model.safetensors +3 -0
  43. tokenizer/merges.txt +0 -0
  44. tokenizer/special_tokens_map.json +30 -0
  45. tokenizer/tokenizer_config.json +31 -0
  46. tokenizer/vocab.json +0 -0
  47. vae/config.json +38 -0
  48. vae/diffusion_pytorch_model.safetensors +3 -0
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "Paris",
3
+ "num_experts": 8,
4
+ "experiment_name": "dit_xl2_multi_expert_pretrained_text",
5
+ "dataset_name": "LAION-Aesthetic",
6
+ "image_size": 32,
7
+ "num_channels": 4,
8
+ "expert_architecture": "dit",
9
+ "router_architecture": "dit",
10
+ "use_latents": true,
11
+ "vae_name": "stabilityai/sd-vae-ft-mse",
12
+ "text_encoder_name": "openai/clip-vit-large-patch14"
13
+ }
.ipynb_checkpoints/model_index-checkpoint.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDMPipeline",
3
+ "_diffusers_version": "0.21.0",
4
+ "num_experts": 8,
5
+ "expert_architecture": "dit",
6
+ "router_architecture": "dit",
7
+ "vae": [
8
+ "diffusers",
9
+ "AutoencoderKL"
10
+ ],
11
+ "text_encoder": [
12
+ "transformers",
13
+ "CLIPTextModel"
14
+ ],
15
+ "tokenizer": [
16
+ "transformers",
17
+ "CLIPTokenizer"
18
+ ],
19
+ "vae_name": "stabilityai/sd-vae-ft-mse",
20
+ "text_encoder_name": "openai/clip-vit-large-patch14"
21
+ }
README.md CHANGED
@@ -1,13 +1,113 @@
1
  ---
2
  license: mit
3
- language:
4
- - en
5
- pipeline_tag: text-to-image
6
  tags:
7
- - diffusion
8
- - decentralized
9
- - distributed
10
- - distributed-training
11
- - image-generation
12
  - text-to-image
13
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
 
 
 
3
  tags:
 
 
 
 
 
4
  - text-to-image
5
+ - diffusion
6
+ - multi-expert
7
+ - dit
8
+ - laion
9
+ ---
10
+
11
+ # Paris
12
+
13
+ A multi-expert diffusion model trained with dynamic expert routing on LAION-Aesthetic.
14
+
15
+ ## Model Description
16
+
17
+ This model uses **8 specialized DiT experts** with a learned router that dynamically selects the best expert for each generation based on the noisy latent and timestep.
18
+
19
+ - **Architecture**: dit-XL/2 with 8 experts
20
+ - **Router**: dit-based routing network
21
+ - **Hidden Size**: 1152
22
+ - **Layers**: 28
23
+ - **Attention Heads**: 16
24
+ - **Parameters per Expert**: ~0M
25
+ - **Total Parameters**: ~3M
26
+ - **Text Conditioning**: ✓ (CLIP ViT-L/14)
27
+ - **Training Dataset**: LAION-Aesthetic
28
+
29
+ ## Model Structure
30
+
31
+ ```
32
+ Paris/
33
+ ├── config.json # High-level model configuration
34
+ ├── model_index.json # Pipeline component index
35
+ ├── expert_0/ # Specialized expert models
36
+ │ ├── config.json
37
+ │ └── diffusion_pytorch_model.safetensors
38
+ ├── expert_1/ ... expert_7/
39
+ ├── router/ # Dynamic routing network
40
+ │ ├── config.json
41
+ │ └── pytorch_model.safetensors
42
+ ├── vae/ # VAE (sd-vae-ft-mse)
43
+ ├── text_encoder/ # CLIP text encoder
44
+ ├── tokenizer/ # CLIP tokenizer
45
+ └── inference_pipeline.py # Custom inference code
46
+ ```
47
+
48
+ ## Usage
49
+
50
+ ```python
51
+ from inference_pipeline import DDMPipeline
52
+
53
+ # Load the pipeline
54
+ pipeline = DDMPipeline.from_pretrained("paris")
55
+
56
+ # Generate images
57
+ images = pipeline(
58
+ prompt="A beautiful sunset over Paris, oil painting style",
59
+ num_inference_steps=50,
60
+ guidance_scale=7.5,
61
+ num_images=4
62
+ )
63
+
64
+ # Save images
65
+ for i, img in enumerate(images):
66
+ img.save(f"output_{i}.png")
67
+ ```
68
+
69
+ ## Training Details
70
+
71
+ - **Base Model**: DiT-XL/2 pretrained on ImageNet
72
+ - **Batch Size**: 16 per expert
73
+ - **Learning Rate**: 2e-05
74
+ - **Image Size**: 256x256 (32x32 latent space)
75
+ - **VAE**: SD VAE (8x downsampling)
76
+ - **Text Encoder**: CLIP ViT-L/14
77
+ - **EMA**: True
78
+ - **Mixed Precision**: True
79
+
80
+ ### Multi-Expert Architecture
81
+
82
+ Each expert specializes in different visual styles/content through dynamic routing:
83
+ - The router network analyzes the noisy latent and timestep
84
+ - Selects the most appropriate expert for denoising
85
+ - Enables better quality and diversity compared to single models
86
+
87
+
88
+
89
+ ## Examples
90
+
91
+ Coming soon! Check back for generated examples.
92
+
93
+ ## Limitations
94
+
95
+ - Trained on LAION-Aesthetic which may contain biases
96
+ - Best results at 256x256 resolution
97
+ - Requires GPU for inference (8GB+ VRAM recommended)
98
+
99
+ ## Citation
100
+
101
+ ```bibtex
102
+ @misc{paris,
103
+ author = {Your Name},
104
+ title = {Paris: Multi-Expert Diffusion Model},
105
+ year = {2024},
106
+ publisher = {HuggingFace},
107
+ url = {https://huggingface.co/paris}
108
+ }
109
+ ```
110
+
111
+ ## License
112
+
113
+ MIT License
__pycache__/config.cpython-311.pyc ADDED
Binary file (6.44 kB). View file
 
__pycache__/inference.cpython-311.pyc ADDED
Binary file (43.5 kB). View file
 
__pycache__/inference_pipeline.cpython-311.pyc ADDED
Binary file (3.51 kB). View file
 
__pycache__/models.cpython-311.pyc ADDED
Binary file (54 kB). View file
 
__pycache__/vae_utils.cpython-311.pyc ADDED
Binary file (7.78 kB). View file
 
config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "Paris",
3
+ "num_experts": 8,
4
+ "experiment_name": "dit_xl2_multi_expert_pretrained_text",
5
+ "dataset_name": "LAION-Aesthetic",
6
+ "image_size": 32,
7
+ "num_channels": 4,
8
+ "expert_architecture": "dit",
9
+ "router_architecture": "dit",
10
+ "use_latents": true,
11
+ "vae_name": "stabilityai/sd-vae-ft-mse",
12
+ "text_encoder_name": "openai/clip-vit-large-patch14"
13
+ }
expert_0/.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 0,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_0/.ipynb_checkpoints/model_index-checkpoint.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 0
5
+ }
expert_0/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 0,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_0/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a10067fe64cc3ee3137d5c2c88fdc6ca8dea420a5875131969c11b4a4cf24ba
3
+ size 2423920240
expert_0/model_index.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 0
5
+ }
expert_1/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 1,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_1/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:880e77a2924166cae2f533b7e67616f5fec4db2c912c1e68b7baf2a12e529901
3
+ size 2423920240
expert_1/model_index.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 1
5
+ }
expert_2/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 2,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_2/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c481af35025e018eb489087260afe968d2e530614c6cfd52d9e17143975936b0
3
+ size 2423920240
expert_2/model_index.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 2
5
+ }
expert_3/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 3,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_3/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88eab8b78086c113cc6a2f300ceaa1753be899b1abbf4bce115f4b86b2324d4
3
+ size 2423920240
expert_3/model_index.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 3
5
+ }
expert_4/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 4,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_4/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d57fa16c5bb6c4941d4bada172607926edc23c5ac01d5fffdba0819426748f
3
+ size 2423920240
expert_4/model_index.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 4
5
+ }
expert_5/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 5,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_5/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189dc4afa4fa8bd5b99487d7ce095b1918f4d30bc2e20a21df2273d4280858dc
3
+ size 2423920240
expert_5/model_index.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 5
5
+ }
expert_6/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 6,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_6/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be832b1326ad751bc18266f6fbd712b29ec4b6b6dccadc614b29d9627283b5e
3
+ size 2423920240
expert_6/model_index.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 6
5
+ }
expert_7/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "dit_expert",
3
+ "expert_id": 7,
4
+ "architecture": "dit",
5
+ "hidden_size": 1152,
6
+ "num_layers": 28,
7
+ "num_heads": 16,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "out_channels": 4,
11
+ "mlp_ratio": 4.0,
12
+ "use_text_conditioning": true,
13
+ "use_class_conditioning": false,
14
+ "num_classes": 1000,
15
+ "text_embed_dim": 768,
16
+ "use_dit_time_embed": true,
17
+ "use_adaln_single": true,
18
+ "cfg_dropout_prob": 0.1
19
+ }
expert_7/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef7de12263d23f503624336bed6dde4da77140b0137f23405799d8a9951b4b50
3
+ size 2423920240
expert_7/model_index.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DiTExpert",
3
+ "_diffusers_version": "0.21.0",
4
+ "expert_id": 7
5
+ }
model_index.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDMPipeline",
3
+ "_diffusers_version": "0.21.0",
4
+ "num_experts": 8,
5
+ "expert_architecture": "dit",
6
+ "router_architecture": "dit",
7
+ "vae": [
8
+ "diffusers",
9
+ "AutoencoderKL"
10
+ ],
11
+ "text_encoder": [
12
+ "transformers",
13
+ "CLIPTextModel"
14
+ ],
15
+ "tokenizer": [
16
+ "transformers",
17
+ "CLIPTokenizer"
18
+ ],
19
+ "vae_name": "stabilityai/sd-vae-ft-mse",
20
+ "text_encoder_name": "openai/clip-vit-large-patch14"
21
+ }
router/.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "router",
3
+ "architecture": "dit",
4
+ "num_clusters": 8,
5
+ "hidden_size": 768,
6
+ "num_layers": 12,
7
+ "num_heads": 12,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "use_dit_time_embed": false
11
+ }
router/config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "router",
3
+ "architecture": "dit",
4
+ "num_clusters": 8,
5
+ "hidden_size": 768,
6
+ "num_layers": 12,
7
+ "num_heads": 12,
8
+ "patch_size": 2,
9
+ "in_channels": 4,
10
+ "use_dit_time_embed": true
11
+ }
router/model_index.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_class_name": "Router",
3
+ "_diffusers_version": "0.21.0"
4
+ }
router/pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b819e8d9c0b25d174703d799a335bae2631ab849925b33c9d9a8dcd8b426b80
3
+ size 516560576
text_encoder/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModel"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 768,
22
+ "transformers_version": "4.56.2",
23
+ "vocab_size": 49408
24
+ }
text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778d02eb9e707c3fbaae0b67b79ea0d1399b52e624fb634f2f19375ae7c047c3
3
+ size 492265168
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": false,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "extra_special_tokens": {},
27
+ "model_max_length": 77,
28
+ "pad_token": "<|endoftext|>",
29
+ "tokenizer_class": "CLIPTokenizer",
30
+ "unk_token": "<|endoftext|>"
31
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vae/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.35.1",
4
+ "_name_or_path": "stabilityai/sd-vae-ft-mse",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "mid_block_add_attention": true,
25
+ "norm_num_groups": 32,
26
+ "out_channels": 3,
27
+ "sample_size": 256,
28
+ "scaling_factor": 0.18215,
29
+ "shift_factor": null,
30
+ "up_block_types": [
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D",
33
+ "UpDecoderBlock2D",
34
+ "UpDecoderBlock2D"
35
+ ],
36
+ "use_post_quant_conv": true,
37
+ "use_quant_conv": true
38
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aa1f43011b553a4cba7f37456465cdbd48aab7b54b9348b890e8058ea7683ec
3
+ size 334643268