Add files using upload-large-folder tool

Browse files

Files changed (3) hide show

README.md +4 -4
config.json +8 -2
generation_config.json +1 -1

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ tags:
 - safetensors
 ---
-# Model Card for PrunaAI/test-save-tiny-random-llama4-smashed
 This model was created using the [pruna](https://github.com/PrunaAI/pruna) library. Pruna is a model optimization framework built for developers, enabling you to deliver more efficient models with minimal implementation overhead.
@@ -17,7 +17,7 @@ First things first, you need to install the pruna library:
 pip install pruna
 ```
-You can [use the transformers library to load the model](https://huggingface.co/PrunaAI/test-save-tiny-random-llama4-smashed?library=transformers) but this might not include all optimizations by default.
 To ensure that all optimizations are applied, use the pruna library to load the model using the following code:
@@ -25,7 +25,7 @@ To ensure that all optimizations are applied, use the pruna library to load the
 from pruna import PrunaModel
 loaded_model = PrunaModel.from_pretrained(
-    "PrunaAI/test-save-tiny-random-llama4-smashed"
 )
 # we can then run inference using the methods supported by the base model
 ```
@@ -71,5 +71,5 @@ The compression configuration of the model is stored in the `smash_config.json`
 [![Twitter](https://img.shields.io/twitter/follow/PrunaAI?style=social)](https://twitter.com/PrunaAI)
 [![GitHub](https://img.shields.io/github/followers/PrunaAI?label=Follow%20%40PrunaAI&style=social)](https://github.com/PrunaAI)
 [![LinkedIn](https://img.shields.io/badge/LinkedIn-Connect-blue)](https://www.linkedin.com/company/93832878/admin/feed/posts/?feedType=following)
-[![Discord](https://img.shields.io/badge/Discord-Join%20Us-blue?style=social&logo=discord)](https://discord.com/invite/rskEr4BZJx)
 [![Reddit](https://img.shields.io/reddit/subreddit-subscribers/PrunaAI?style=social)](https://www.reddit.com/r/PrunaAI/)

 - safetensors
 ---
+# Model Card for pruna-test/test-save-tiny-random-llama4-smashed
 This model was created using the [pruna](https://github.com/PrunaAI/pruna) library. Pruna is a model optimization framework built for developers, enabling you to deliver more efficient models with minimal implementation overhead.
 pip install pruna
 ```
+You can [use the transformers library to load the model](https://huggingface.co/pruna-test/test-save-tiny-random-llama4-smashed?library=transformers) but this might not include all optimizations by default.
 To ensure that all optimizations are applied, use the pruna library to load the model using the following code:
 from pruna import PrunaModel
 loaded_model = PrunaModel.from_pretrained(
+    "pruna-test/test-save-tiny-random-llama4-smashed"
 )
 # we can then run inference using the methods supported by the base model
 ```
 [![Twitter](https://img.shields.io/twitter/follow/PrunaAI?style=social)](https://twitter.com/PrunaAI)
 [![GitHub](https://img.shields.io/github/followers/PrunaAI?label=Follow%20%40PrunaAI&style=social)](https://github.com/PrunaAI)
 [![LinkedIn](https://img.shields.io/badge/LinkedIn-Connect-blue)](https://www.linkedin.com/company/93832878/admin/feed/posts/?feedType=following)
+[![Discord](https://img.shields.io/badge/Discord-Join%20Us-blue?style=social&logo=discord)](https://discord.gg/JFQmtFKCjd)
 [![Reddit](https://img.shields.io/reddit/subreddit-subscribers/PrunaAI?style=social)](https://www.reddit.com/r/PrunaAI/)

config.json CHANGED Viewed

@@ -8,7 +8,6 @@
   "attn_scale": 0.1,
   "attn_temperature_tuning": 4,
   "bos_token_id": 200000,
-  "cache_implementation": "hybrid",
   "eos_token_id": [
     200001,
     200007,
@@ -23,6 +22,13 @@
   "interleave_moe_layer_step": 1,
   "intermediate_size": 32,
   "intermediate_size_mlp": 64,
   "max_position_embeddings": 10485760,
   "model_type": "llama4_text",
   "moe_layers": [
@@ -59,7 +65,7 @@
   "router_jitter_noise": 0.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.52.3",
   "use_cache": true,
   "use_qk_norm": true,
   "vocab_size": 202048

   "attn_scale": 0.1,
   "attn_temperature_tuning": 4,
   "bos_token_id": 200000,
   "eos_token_id": [
     200001,
     200007,
   "interleave_moe_layer_step": 1,
   "intermediate_size": 32,
   "intermediate_size_mlp": 64,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention"
+  ],
   "max_position_embeddings": 10485760,
   "model_type": "llama4_text",
   "moe_layers": [
   "router_jitter_noise": 0.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.53.2",
   "use_cache": true,
   "use_qk_norm": true,
   "vocab_size": 202048

generation_config.json CHANGED Viewed

@@ -7,5 +7,5 @@
     200008
   ],
   "pad_token_id": 200018,
-  "transformers_version": "4.52.3"
 }

     200008
   ],
   "pad_token_id": 200018,
+  "transformers_version": "4.53.2"
 }