codeShare
/

JupyterNotebooks

Model card Files Files and versions

xet

Community

codeShare commited on Sep 11, 2024

Commit

bfc742a

verified ·

1 Parent(s): 6eeabcf

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +28 -16

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -132,7 +132,7 @@
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "\n",
         "# @markdown Write name of token to match against\n",
-        "token_name = \"dogs\" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
         "\n",
         "prompt = token_name\n",
         "# @markdown (optional) Mix the token with something else\n",
@@ -361,14 +361,15 @@
         "#-----#\n",
         "# @markdown # The output...\n",
         "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
-        "must_contain = \" pet \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
         "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
         "# @markdown -----\n",
         "# @markdown # Use a range of tokens from the vocab.json (slow method)\n",
-        "start_search_at_index = 1700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
         "# @markdown The lower the start_index, the more similiar the sampled tokens will be to the target token assigned in the '⚡ Get similiar tokens' cell\". If the cell was not run, then it will use tokens ordered by similarity to the \"girl\\</w>\" token\n",
         "start_search_at_ID = start_search_at_index\n",
-        "search_range = 100 # @param {type:\"slider\", min:100, max: 2000, step:0}\n",
         "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
         "#markdown Limit char size of included token <----- Disabled\n",
         "min_char_size = 0 #param {type:\"slider\", min:0, max: 20, step:1}\n",
@@ -383,15 +384,14 @@
         "RANGE =  min(search_range , max(1,NUM_TOKENS - start_search_at_ID))\n",
         "#-----#\n",
         "import math, random\n",
-        "CHUNK = math.floor(NUM_TOKENS/(RANGE*100))\n",
         "\n",
-        "ITERS = 3\n",
         "#-----#\n",
         "#LOOP START\n",
         "#-----#\n",
         "\n",
-        "results_sim = torch.zeros(ITERS+1)\n",
-        "results_name = {}\n",
         "\n",
         "# Check if original solution is best\n",
         "best_sim = 0\n",
@@ -409,7 +409,11 @@
         "  sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
         "#-----#\n",
         "best_sim = sim\n",
         "name_B = must_contain\n",
         "#-----#\n",
         "for iter in range(ITERS):\n",
         "  dots = torch.zeros(RANGE)\n",
@@ -418,8 +422,12 @@
         "  #-----#\n",
         "\n",
         "  _start = START + iter*CHUNK  + iter*random.randint(1,CHUNK)\n",
-        "  results_name[iter] = name_B\n",
         "  results_sim[iter] = best_sim\n",
         "\n",
         "  for index in range(RANGE):\n",
         "    id_C = min(_start + index, NUM_TOKENS)\n",
@@ -510,7 +518,7 @@
         "  used_reference = f'the text_encoding for {prompt_A}'\n",
         "  if(use == '🖼️image_encoding from image'):\n",
         "    used_reference = 'the image input'\n",
-        "  print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match {used_reference}: ')\n",
         "  print('')\n",
         "  #----#\n",
         "  aheads = \"{\"\n",
@@ -556,16 +564,17 @@
         "  print(\"\")\n",
         "\n",
         "  tmp = must_start_with + ' ' + max_name_ahead + name_B + ' ' + must_end_with\n",
-        "  tmp = tmp.strip()\n",
         "  print(f\"max_similarity_ahead = {round(max_sim_ahead,2)} % when using '{tmp}' \")\n",
         "  print(\"\")\n",
         "  tmp = must_start_with + ' ' + name_B + max_name_trail + ' ' + must_end_with\n",
-        "  tmp = tmp.strip()\n",
         "  print(f\"max_similarity_trail = {round(max_sim_trail,2)} % when using '{tmp}' \")\n",
         "  #-----#\n",
         "  #STEP 2\n",
         "  import random\n",
         "  names = {}\n",
         "  NUM_PERMUTATIONS = 4\n",
         "  #-----#\n",
         "  dots = torch.zeros(NUM_PERMUTATIONS)\n",
@@ -593,16 +602,19 @@
         "      sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
         "    #-----#\n",
         "    dots[index] = sim\n",
-        "    names[index] = name_inner\n",
         "  #------#\n",
         "  sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
         "  #------#\n",
         "  best_sim =  dots[indices[0].item()]\n",
-        "  name_B  = names[indices[0].item()].replace('</w>', ' ') #Update name_B with best value\n",
         "#--------#\n",
         "#store the final value\n",
-        "results_name[iter] = name_B\n",
-        "results_sim[iter] = best_sim\n",
         "\n",
         "sorted, indices = torch.sort(results_sim,dim=0 , descending=True)\n",
         "\n",

         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "\n",
         "# @markdown Write name of token to match against\n",
+        "token_name = \"banana \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
         "\n",
         "prompt = token_name\n",
         "# @markdown (optional) Mix the token with something else\n",
         "#-----#\n",
         "# @markdown # The output...\n",
         "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
+        "must_contain = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
         "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
         "# @markdown -----\n",
         "# @markdown # Use a range of tokens from the vocab.json (slow method)\n",
+        "start_search_at_index = 0 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
         "# @markdown The lower the start_index, the more similiar the sampled tokens will be to the target token assigned in the '⚡ Get similiar tokens' cell\". If the cell was not run, then it will use tokens ordered by similarity to the \"girl\\</w>\" token\n",
         "start_search_at_ID = start_search_at_index\n",
+        "search_range = 100 # @param {type:\"slider\", min:10, max: 200, step:0}\n",
+        "iterations = 5 # @param {type:\"slider\", min:1, max: 20, step:0}\n",
         "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
         "#markdown Limit char size of included token <----- Disabled\n",
         "min_char_size = 0 #param {type:\"slider\", min:0, max: 20, step:1}\n",
         "RANGE =  min(search_range , max(1,NUM_TOKENS - start_search_at_ID))\n",
         "#-----#\n",
         "import math, random\n",
+        "CHUNK = math.floor(NUM_TOKENS/RANGE)\n",
         "\n",
+        "ITERS = iterations\n",
         "#-----#\n",
         "#LOOP START\n",
         "#-----#\n",
         "\n",
+        "\n",
         "\n",
         "# Check if original solution is best\n",
         "best_sim = 0\n",
         "  sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
         "#-----#\n",
         "best_sim = sim\n",
+        "best_name = name\n",
         "name_B = must_contain\n",
+        "results_sim = torch.zeros(ITERS+1)\n",
+        "results_name_B = {}\n",
+        "results_name = {}\n",
         "#-----#\n",
         "for iter in range(ITERS):\n",
         "  dots = torch.zeros(RANGE)\n",
         "  #-----#\n",
         "\n",
         "  _start = START + iter*CHUNK  + iter*random.randint(1,CHUNK)\n",
+        "  results_name[iter] = best_name\n",
         "  results_sim[iter] = best_sim\n",
+        "  results_name_B[iter] = name_B\n",
+        "  #-----#\n",
+        "  sorted, indices = torch.sort(results_sim,dim=0 , descending=True)\n",
+        "  name_B  = results_name_B[indices[0].item()].replace('</w>', ' ') #Update name_B with best value\n",
         "\n",
         "  for index in range(RANGE):\n",
         "    id_C = min(_start + index, NUM_TOKENS)\n",
         "  used_reference = f'the text_encoding for {prompt_A}'\n",
         "  if(use == '🖼️image_encoding from image'):\n",
         "    used_reference = 'the image input'\n",
+        "  print(f'These token pairings within the range ID = {_start} to ID = {_start + RANGE} most closely match {used_reference}: ')\n",
         "  print('')\n",
         "  #----#\n",
         "  aheads = \"{\"\n",
         "  print(\"\")\n",
         "\n",
         "  tmp = must_start_with + ' ' + max_name_ahead + name_B + ' ' + must_end_with\n",
+        "  tmp = tmp.strip().replace('</w>', ' ')\n",
         "  print(f\"max_similarity_ahead = {round(max_sim_ahead,2)} % when using '{tmp}' \")\n",
         "  print(\"\")\n",
         "  tmp = must_start_with + ' ' + name_B + max_name_trail + ' ' + must_end_with\n",
+        "  tmp = tmp.strip().replace('</w>', ' ')\n",
         "  print(f\"max_similarity_trail = {round(max_sim_trail,2)} % when using '{tmp}' \")\n",
         "  #-----#\n",
         "  #STEP 2\n",
         "  import random\n",
         "  names = {}\n",
+        "  name_inners = {}\n",
         "  NUM_PERMUTATIONS = 4\n",
         "  #-----#\n",
         "  dots = torch.zeros(NUM_PERMUTATIONS)\n",
         "      sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
         "    #-----#\n",
         "    dots[index] = sim\n",
+        "    names[index] = name\n",
+        "    name_inners[index] = name_inner\n",
         "  #------#\n",
         "  sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
         "  #------#\n",
         "  best_sim =  dots[indices[0].item()]\n",
+        "  best_name = names[indices[0].item()]\n",
+        "  name_B  = name_inners[indices[0].item()].replace('</w>', ' ') #Update name_B with best value\n",
         "#--------#\n",
         "#store the final value\n",
+        "results_name[iter+1] = best_name\n",
+        "results_sim[iter+1] = best_sim\n",
+        "results_name_B[iter+1] = name_B\n",
         "\n",
         "sorted, indices = torch.sort(results_sim,dim=0 , descending=True)\n",
         "\n",