Upload sd_token_similarity_calculator.ipynb
Browse files
sd_token_similarity_calculator.ipynb
CHANGED
|
@@ -78,8 +78,8 @@
|
|
| 78 |
{
|
| 79 |
"cell_type": "code",
|
| 80 |
"source": [
|
| 81 |
-
"print(vocab[
|
| 82 |
-
"print(token[
|
| 83 |
],
|
| 84 |
"metadata": {
|
| 85 |
"id": "S_Yh9gH_OUA1"
|
|
@@ -104,7 +104,7 @@
|
|
| 104 |
"\n",
|
| 105 |
"from transformers import AutoTokenizer\n",
|
| 106 |
"tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
|
| 107 |
-
"prompt= \"
|
| 108 |
"tokenizer_output = tokenizer(text = prompt)\n",
|
| 109 |
"input_ids = tokenizer_output['input_ids']\n",
|
| 110 |
"print(input_ids)"
|
|
@@ -135,7 +135,7 @@
|
|
| 135 |
"\n",
|
| 136 |
"sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
|
| 137 |
"#----#\n",
|
| 138 |
-
"print(f'Calculated all cosine-similarities between ID = {id_A} the rest of the
|
| 139 |
"print(f'Calculated indices as a 1x{indices.shape[0]} tensor')"
|
| 140 |
],
|
| 141 |
"metadata": {
|
|
@@ -149,8 +149,9 @@
|
|
| 149 |
"source": [
|
| 150 |
"list_size = 10 # @param {type:'number'}\n",
|
| 151 |
"for index in range(list_size):\n",
|
| 152 |
-
"
|
| 153 |
-
" print(f'
|
|
|
|
| 154 |
" print(f'similiarity = {round(sorted[index].item()*100,2)} %') # % value\n",
|
| 155 |
" print('--------')\n"
|
| 156 |
],
|
|
|
|
| 78 |
{
|
| 79 |
"cell_type": "code",
|
| 80 |
"source": [
|
| 81 |
+
"print(vocab[8922]) #the vocab item for ID 8922\n",
|
| 82 |
+
"print(token[8922].shape) #dimension of the token"
|
| 83 |
],
|
| 84 |
"metadata": {
|
| 85 |
"id": "S_Yh9gH_OUA1"
|
|
|
|
| 104 |
"\n",
|
| 105 |
"from transformers import AutoTokenizer\n",
|
| 106 |
"tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
|
| 107 |
+
"prompt= \"banana\" # @param {type:'string'}\n",
|
| 108 |
"tokenizer_output = tokenizer(text = prompt)\n",
|
| 109 |
"input_ids = tokenizer_output['input_ids']\n",
|
| 110 |
"print(input_ids)"
|
|
|
|
| 135 |
"\n",
|
| 136 |
"sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
|
| 137 |
"#----#\n",
|
| 138 |
+
"print(f'Calculated all cosine-similarities between the token {vocab[id_A]} with ID = {id_A} the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n",
|
| 139 |
"print(f'Calculated indices as a 1x{indices.shape[0]} tensor')"
|
| 140 |
],
|
| 141 |
"metadata": {
|
|
|
|
| 149 |
"source": [
|
| 150 |
"list_size = 10 # @param {type:'number'}\n",
|
| 151 |
"for index in range(list_size):\n",
|
| 152 |
+
" id = indices[index].item()\n",
|
| 153 |
+
" print(f'{vocab[id]}') # vocab item\n",
|
| 154 |
+
" print(f'ID = {id}') # IDs\n",
|
| 155 |
" print(f'similiarity = {round(sorted[index].item()*100,2)} %') # % value\n",
|
| 156 |
" print('--------')\n"
|
| 157 |
],
|