codeShare
/

JupyterNotebooks

Model card Files Files and versions

xet

Community

codeShare commited on Sep 11, 2024

Commit

606fac8

verified ·

1 Parent(s): bfc742a

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +45 -42

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -118,10 +118,29 @@
       ],
       "metadata": {
         "id": "Ch9puvwKH1s3",
-        "collapsed": true
       },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
@@ -132,7 +151,7 @@
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "\n",
         "# @markdown Write name of token to match against\n",
-        "token_name = \"banana \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
         "\n",
         "prompt = token_name\n",
         "# @markdown (optional) Mix the token with something else\n",
@@ -368,7 +387,10 @@
         "start_search_at_index = 0 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
         "# @markdown The lower the start_index, the more similiar the sampled tokens will be to the target token assigned in the '⚡ Get similiar tokens' cell\". If the cell was not run, then it will use tokens ordered by similarity to the \"girl\\</w>\" token\n",
         "start_search_at_ID = start_search_at_index\n",
-        "search_range = 100 # @param {type:\"slider\", min:10, max: 200, step:0}\n",
         "iterations = 5 # @param {type:\"slider\", min:1, max: 20, step:0}\n",
         "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
         "#markdown Limit char size of included token <----- Disabled\n",
@@ -384,15 +406,11 @@
         "RANGE =  min(search_range , max(1,NUM_TOKENS - start_search_at_ID))\n",
         "#-----#\n",
         "import math, random\n",
-        "CHUNK = math.floor(NUM_TOKENS/RANGE)\n",
-        "\n",
         "ITERS = iterations\n",
         "#-----#\n",
         "#LOOP START\n",
         "#-----#\n",
-        "\n",
-        "\n",
-        "\n",
         "# Check if original solution is best\n",
         "best_sim = 0\n",
         "name = must_start_with + must_contain + must_end_with\n",
@@ -400,6 +418,7 @@
         "text_features = model.get_text_features(**ids)\n",
         "text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
         "#------#\n",
         "if(use == '🖼️image_encoding from image'):\n",
         "  logit_scale = model.logit_scale.exp()\n",
         "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
@@ -411,7 +430,8 @@
         "best_sim = sim\n",
         "best_name = name\n",
         "name_B = must_contain\n",
-        "results_sim = torch.zeros(ITERS+1)\n",
         "results_name_B = {}\n",
         "results_name = {}\n",
         "#-----#\n",
@@ -420,17 +440,10 @@
         "  is_trail = torch.zeros(RANGE)\n",
         "  import re\n",
         "  #-----#\n",
         "\n",
-        "  _start = START + iter*CHUNK  + iter*random.randint(1,CHUNK)\n",
-        "  results_name[iter] = best_name\n",
-        "  results_sim[iter] = best_sim\n",
-        "  results_name_B[iter] = name_B\n",
-        "  #-----#\n",
-        "  sorted, indices = torch.sort(results_sim,dim=0 , descending=True)\n",
-        "  name_B  = results_name_B[indices[0].item()].replace('</w>', ' ') #Update name_B with best value\n",
-        "\n",
-        "  for index in range(RANGE):\n",
-        "    id_C = min(_start + index, NUM_TOKENS)\n",
         "    name_C = db_vocab[f'{id_C}']\n",
         "    is_Prefix = 0\n",
         "    #Skip if non-AZ characters are found\n",
@@ -573,17 +586,15 @@
         "  #-----#\n",
         "  #STEP 2\n",
         "  import random\n",
-        "  names = {}\n",
-        "  name_inners = {}\n",
-        "  NUM_PERMUTATIONS = 4\n",
         "  #-----#\n",
-        "  dots = torch.zeros(NUM_PERMUTATIONS)\n",
         "  for index in range(NUM_PERMUTATIONS):\n",
         "    name_inner = ''\n",
         "    if index == 0 : name_inner = name_B\n",
-        "    if index == 1 : name_inner = max_name_ahead\n",
-        "    if index == 2 : name_inner = name_B + max_name_trail\n",
-        "    if index == 3 : name_inner = max_name_ahead + name_B + max_name_trail\n",
         "    name = must_start_with  + name_inner + must_end_with\n",
         "    #----#\n",
         "    ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
@@ -601,25 +612,17 @@
         "      text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
         "      sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
         "    #-----#\n",
-        "    dots[index] = sim\n",
-        "    names[index] = name\n",
-        "    name_inners[index] = name_inner\n",
         "  #------#\n",
-        "  sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
-        "  #------#\n",
-        "  best_sim =  dots[indices[0].item()]\n",
-        "  best_name = names[indices[0].item()]\n",
-        "  name_B  = name_inners[indices[0].item()].replace('</w>', ' ') #Update name_B with best value\n",
         "#--------#\n",
-        "#store the final value\n",
-        "results_name[iter+1] = best_name\n",
-        "results_sim[iter+1] = best_sim\n",
-        "results_name_B[iter+1] = name_B\n",
         "\n",
         "sorted, indices = torch.sort(results_sim,dim=0 , descending=True)\n",
         "\n",
-        "print('')\n",
-        "for index in range(ITERS+1):\n",
         "  name_inner = results_name[indices[index].item()]\n",
         "  print(must_start_with  + name_inner + must_end_with)\n",
         "  print(f'similiarity = {round(sorted[index].item(),2)} %')\n",

       ],
       "metadata": {
         "id": "Ch9puvwKH1s3",
+        "collapsed": true,
+        "outputId": "033c251a-2043-40e7-9500-4da870ffa7fd",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
       },
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cloning into 'sd_tokens'...\n",
+            "remote: Enumerating objects: 20, done.\u001b[K\n",
+            "remote: Counting objects: 100% (17/17), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (17/17), done.\u001b[K\n",
+            "remote: Total 20 (delta 4), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
+            "Unpacking objects: 100% (20/20), 310.37 KiB | 2.10 MiB/s, done.\n",
+            "Filtering content: 100% (3/3), 160.82 MiB | 26.64 MiB/s, done.\n",
+            "/content/sd_tokens\n"
+          ]
+        }
+      ]
     },
     {
       "cell_type": "code",
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "\n",
         "# @markdown Write name of token to match against\n",
+        "token_name = \" blanket \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
         "\n",
         "prompt = token_name\n",
         "# @markdown (optional) Mix the token with something else\n",
         "start_search_at_index = 0 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
         "# @markdown The lower the start_index, the more similiar the sampled tokens will be to the target token assigned in the '⚡ Get similiar tokens' cell\". If the cell was not run, then it will use tokens ordered by similarity to the \"girl\\</w>\" token\n",
         "start_search_at_ID = start_search_at_index\n",
+        "search_range = 1000 # @param {type:\"slider\", min:10, max: 1000, step:10}\n",
+        "\n",
+        "samples_per_iter = 10 # @param {type:\"slider\", min:10, max: 100, step:10}\n",
+        "\n",
         "iterations = 5 # @param {type:\"slider\", min:1, max: 20, step:0}\n",
         "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
         "#markdown Limit char size of included token <----- Disabled\n",
         "RANGE =  min(search_range , max(1,NUM_TOKENS - start_search_at_ID))\n",
         "#-----#\n",
         "import math, random\n",
+        "NUM_PERMUTATIONS = 4\n",
         "ITERS = iterations\n",
         "#-----#\n",
         "#LOOP START\n",
         "#-----#\n",
         "# Check if original solution is best\n",
         "best_sim = 0\n",
         "name = must_start_with + must_contain + must_end_with\n",
         "text_features = model.get_text_features(**ids)\n",
         "text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
         "#------#\n",
+        "sim = 0\n",
         "if(use == '🖼️image_encoding from image'):\n",
         "  logit_scale = model.logit_scale.exp()\n",
         "  torch.matmul(text_features, image_features.t()) * logit_scale\n",
         "best_sim = sim\n",
         "best_name = name\n",
         "name_B = must_contain\n",
+        "#------#\n",
+        "results_sim = torch.zeros(ITERS*NUM_PERMUTATIONS)\n",
         "results_name_B = {}\n",
         "results_name = {}\n",
         "#-----#\n",
         "  is_trail = torch.zeros(RANGE)\n",
         "  import re\n",
         "  #-----#\n",
+        "  _start = START + iter*RANGE\n",
         "\n",
+        "  for index in range(samples_per_iter):\n",
+        "    id_C = min(_start + index, NUM_TOKENS) + random.randint(0,RANGE)\n",
         "    name_C = db_vocab[f'{id_C}']\n",
         "    is_Prefix = 0\n",
         "    #Skip if non-AZ characters are found\n",
         "  #-----#\n",
         "  #STEP 2\n",
         "  import random\n",
         "  #-----#\n",
         "  for index in range(NUM_PERMUTATIONS):\n",
         "    name_inner = ''\n",
         "    if index == 0 : name_inner = name_B\n",
+        "    if index == 1: name_inner = max_name_ahead\n",
+        "    if index == 2: name_inner = name_B + max_name_trail\n",
+        "    if index == 3: name_inner = max_name_ahead + name_B + max_name_trail\n",
+        "    if name_inner == '': name_inner = max_name_ahead + name_B + max_name_trail\n",
+        "\n",
         "    name = must_start_with  + name_inner + must_end_with\n",
         "    #----#\n",
         "    ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
         "      text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
         "      sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
         "    #-----#\n",
+        "    results_name[iter*NUM_PERMUTATIONS + index] = name\n",
+        "    results_sim[iter*NUM_PERMUTATIONS + index] = sim\n",
+        "    results_name_B[iter*NUM_PERMUTATIONS + index] = name_inner.replace('</w>',' ')\n",
         "  #------#\n",
+        "  name_B  = results_name_B[iter*NUM_PERMUTATIONS + random.randint(0,3)]\n",
         "#--------#\n",
         "\n",
+        "print('')\n",
         "sorted, indices = torch.sort(results_sim,dim=0 , descending=True)\n",
         "\n",
+        "for index in range(ITERS*NUM_PERMUTATIONS):\n",
         "  name_inner = results_name[indices[index].item()]\n",
         "  print(must_start_with  + name_inner + must_end_with)\n",
         "  print(f'similiarity = {round(sorted[index].item(),2)} %')\n",