codeShare
/

JupyterNotebooks

Model card Files Files and versions

xet

Community

codeShare commited on Sep 10, 2024

Commit

f378257

verified ·

1 Parent(s): 8a0aaf3

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +172 -69

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -116,10 +116,28 @@
       "metadata": {
         "id": "Ch9puvwKH1s3",
         "collapsed": true,
-        "cellView": "form"
       },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
@@ -272,56 +290,23 @@
       "outputs": []
     },
     {
-      "cell_type": "code",
       "source": [
-        "# @title 💫 Compare Text encodings\n",
-        "\n",
-        "prompt_A = \"banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
-        "prompt_B = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
-        "use_token_padding = True # @param {type:\"boolean\"}\n",
         "\n",
-        "from transformers import  CLIPProcessor, CLIPModel\n",
-        "\n",
-        "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
         "\n",
-        "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
-        "\n",
-        "ids_A = processor.tokenizer(text=prompt_A, padding=use_token_padding, return_tensors=\"pt\")\n",
-        "text_encoding_A = model.get_text_features(**ids_A)\n",
-        "\n",
-        "\n",
-        "ids_B = processor.tokenizer(text=prompt_B, padding=use_token_padding, return_tensors=\"pt\")\n",
-        "text_encoding_B = model.get_text_features(**ids_B)\n",
-        "\n",
-        "similarity_str =  'The similarity between the text_encoding for A:\"' + prompt_A + '\" and B: \"' + prompt_B +'\" is ' +  token_similarity(text_encoding_A[0] , text_encoding_B[0])\n",
-        "\n",
-        "\n",
-        "print(similarity_str)\n",
-        "#outputs = model(**inputs)\n",
-        "#logits_per_image = outputs.logits_per_image # this is the image-text similarity score\n",
-        "#probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities"
-      ],
-      "metadata": {
-        "id": "QQOjh5BvnG8M",
-        "collapsed": true,
-        "cellView": "form"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "You can write an url or upload a file locally from your device to use as reference. The image will by saved in the 'sd_tokens' folder. Note that the 'sd_tokens' folder will be deleted upon exiting this runtime."
       ],
       "metadata": {
-        "id": "hyK423TQCRup"
       }
     },
     {
       "cell_type": "code",
       "source": [
-        "# @title 🪐🖼️ -> 📝 Image to prompt : Add single token to existing prompt to match image\n",
         "from google.colab import files\n",
         "def upload_files():\n",
         "  from google.colab import files\n",
@@ -331,7 +316,7 @@
         "  return list(uploaded.keys())\n",
         "#Get image\n",
         "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
-        "url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
         "\n",
         "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\":\"(optional) Write colab image path to load from\"}\n",
         "from PIL import Image\n",
@@ -369,19 +354,19 @@
         "\n",
         "# @markdown Set conditions for the output\n",
         "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
-        "must_contain = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
         "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
         "token_B = must_contain\n",
         "\n",
         "# @markdown Limit the search\n",
         "use_token_padding = True # @param {type:\"boolean\"}\n",
-        "start_search_at_ID = 12500 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
-        "search_range = 500 # @param {type:\"slider\", min:0, max: 2000, step:100}\n",
-        "restrictions = 'Suffix only' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
         "\n",
         "# @markdown Limit char size of included token\n",
-        "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
-        "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
         "\n",
         "#Tokenize input B\n",
         "from transformers import AutoTokenizer\n",
@@ -397,14 +382,26 @@
         "\n",
         "dots = torch.zeros(RANGE)\n",
         "is_BC = torch.zeros(RANGE)\n",
         "for index in range(RANGE):\n",
         "  id_C = START + index\n",
         "  C = token[id_C]\n",
         "  _C = LA.vector_norm(C, ord=2)\n",
         "  name_C = vocab[id_C]\n",
         "\n",
         "  # Decide if we should process prefix/suffix tokens\n",
         "  if name_C.find('</w>')<=-1:\n",
         "    if restrictions != \"Prefix only\":\n",
         "      continue\n",
         "  else:\n",
@@ -420,8 +417,8 @@
         "  #-----#\n",
         "\n",
         "  name_CB = must_start_with + name_C + name_B + must_end_with\n",
-        "  if restrictions == \"Prefix only\":\n",
-        "    name_CB = must_start_with +  name_C + '-' + name_B + must_end_with\n",
         "  #-----#\n",
         "  ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
         "  text_encoding_CB = model.get_text_features(**ids_CB)\n",
@@ -469,37 +466,143 @@
         "print('')\n",
         "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n",
         "print('')\n",
-        "\n",
         "for index in range(min(list_size,RANGE)):\n",
         "  id = START + indices[index].item()\n",
-        "  if (print_Name):\n",
-        "    if(is_BC[index]>0):\n",
-        "      print(must_start_with +  name_B + vocab[id] + must_end_with)\n",
-        "    else:\n",
-        "      if restrictions == \"Prefix only\":\n",
-        "        print(must_start_with +   vocab[id] + '-'  + name_B + must_end_with)\n",
-        "      else:\n",
-        "        print(must_start_with +   vocab[id] + name_B + must_end_with)\n",
-        "  if (print_ID):\n",
-        "    print(f'ID = {id}') # IDs\n",
-        "  if (print_Similarity):\n",
-        "    print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
-        "  if (print_Divider):\n",
-        "    print('--------')\n",
         "\n",
         "\n",
         "\n",
         "\n",
-        "\n"
       ],
       "metadata": {
         "collapsed": true,
-        "cellView": "form",
         "id": "fi0jRruI0-tu"
       },
       "execution_count": null,
       "outputs": []
     },
     {
       "cell_type": "code",
       "source": [

       "metadata": {
         "id": "Ch9puvwKH1s3",
         "collapsed": true,
+        "cellView": "form",
+        "outputId": "aa58503f-8e68-43bf-d73b-3eb877ae10e4",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
       },
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cloning into 'sd_tokens'...\n",
+            "remote: Enumerating objects: 10, done.\u001b[K\n",
+            "remote: Counting objects: 100% (7/7), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (7/7), done.\u001b[K\n",
+            "remote: Total 10 (delta 1), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
+            "Unpacking objects: 100% (10/10), 306.93 KiB | 5.48 MiB/s, done.\n",
+            "/content/sd_tokens\n"
+          ]
+        }
+      ]
     },
     {
       "cell_type": "code",
       "outputs": []
     },
     {
+      "cell_type": "markdown",
       "source": [
+        "Below image interrogator appends CLIP tokens to either end of the 'must_contain' text , and seeks to maximize similarity with the image encoding.\n",
         "\n",
+        "It takes a long while to check all the tokens (too long!) so this cell only samples a range of the 49K available tokens.\n",
         "\n",
+        "You can run this cell, then paste the result into the 'must_contain' box , and then run the cell again.\n",
+        "\n"
       ],
       "metadata": {
+        "id": "IUCuV9RtQpBn"
       }
     },
     {
       "cell_type": "code",
       "source": [
+        "# @title 🪐🖼️ -> 📝 Image to prompt : Create suggestions of things to add to prompt to match image\n",
         "from google.colab import files\n",
         "def upload_files():\n",
         "  from google.colab import files\n",
         "  return list(uploaded.keys())\n",
         "#Get image\n",
         "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
+        "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
         "\n",
         "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\":\"(optional) Write colab image path to load from\"}\n",
         "from PIL import Image\n",
         "\n",
         "# @markdown Set conditions for the output\n",
         "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
+        "must_contain = \"banana \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
         "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
         "token_B = must_contain\n",
         "\n",
         "# @markdown Limit the search\n",
         "use_token_padding = True # @param {type:\"boolean\"}\n",
+        "start_search_at_ID = 27700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
+        "search_range = 288 # @param {type:\"slider\", min:100, max: 2000, step:0}\n",
+        "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
         "\n",
         "# @markdown Limit char size of included token\n",
+        "min_char_size = 3 # @param {type:\"slider\", min:0, max: 20, step:1}\n",
+        "char_range = 14 # @param {type:\"slider\", min:0, max: 20, step:1}\n",
         "\n",
         "#Tokenize input B\n",
         "from transformers import AutoTokenizer\n",
         "\n",
         "dots = torch.zeros(RANGE)\n",
         "is_BC = torch.zeros(RANGE)\n",
+        "\n",
+        "import re\n",
+        "\n",
         "for index in range(RANGE):\n",
         "  id_C = START + index\n",
         "  C = token[id_C]\n",
         "  _C = LA.vector_norm(C, ord=2)\n",
         "  name_C = vocab[id_C]\n",
         "\n",
+        "  is_Prefix = 0\n",
+        "\n",
+        "\n",
+        "  #Skip if non-AZ characters are found\n",
+        "  if re.search(\"\\W/g\" , name_C.replace('</w>', '')):\n",
+        "    continue\n",
+        "\n",
+        "\n",
         "  # Decide if we should process prefix/suffix tokens\n",
         "  if name_C.find('</w>')<=-1:\n",
+        "    is_Prefix = 1\n",
         "    if restrictions != \"Prefix only\":\n",
         "      continue\n",
         "  else:\n",
         "  #-----#\n",
         "\n",
         "  name_CB = must_start_with + name_C + name_B + must_end_with\n",
+        "  if is_Prefix>0:\n",
+        "    name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n",
         "  #-----#\n",
         "  ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
         "  text_encoding_CB = model.get_text_features(**ids_CB)\n",
         "print('')\n",
         "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n",
         "print('')\n",
+        "#----#\n",
+        "aheads = \"{\"\n",
+        "trails = \"{\"\n",
+        "tmp = \"\"\n",
+        "#----#\n",
+        "max_sim_ahead = 0\n",
+        "max_sim_trail = 0\n",
+        "sim = 0\n",
+        "max_name_ahead = ''\n",
+        "max_name_trail = ''\n",
+        "#----#\n",
         "for index in range(min(list_size,RANGE)):\n",
         "  id = START + indices[index].item()\n",
+        "  name = vocab[id]\n",
+        "  #-----#\n",
+        "  if (name.find('</w>')<=-1):\n",
+        "    name = name + '-'\n",
+        "  else:\n",
+        "    name = name.replace('</w>', ' ')\n",
+        "  if(is_BC[index]>0):\n",
+        "    trails = trails + name + \"|\"\n",
+        "  else:\n",
+        "    aheads = aheads + name + \"|\"\n",
+        "  #----#\n",
+        "  sim = sorted[index].item()\n",
         "\n",
+        "  if(is_BC[index]>0):\n",
+        "    if sim>max_sim_ahead:\n",
+        "      max_sim_ahead = sim\n",
+        "      max_name_ahead = name\n",
+        "  else:\n",
+        "    if sim>max_sim_trail:\n",
+        "      max_sim_trail = sim\n",
+        "      max_name_trail = name\n",
         "\n",
+        "#------#\n",
+        "trails = (trails + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
+        "aheads = (aheads + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
+        "max_sim_ahead=max_sim_ahead*100\n",
+        "max_sim_ahead=max_sim_trail*100\n",
+        "#-----#\n",
+        "print(f\"place these items ahead of prompt :  {aheads}\")\n",
+        "print(\"\")\n",
+        "print(f\"place these items behind the prompt : {trails}\")\n",
+        "print(\"\")\n",
+        "print(f\"max_similarity = {max_sim_ahead} % when using '{max_name_ahead + must_contain}' \")\n",
+        "print(\"\")\n",
+        "print(f\"max_similarity = {max_sim_trail} % when using '{must_contain  + max_name_trail}' \")\n",
+        "#-----#\n",
+        "#STEP 2\n",
+        "import random\n",
+        "\n",
+        "names = {}\n",
+        "\n",
+        "NUM_PERMUTATIONS = 4 # 0 1 2 3\n",
+        "dots = torch.zeros(NUM_PERMUTATIONS)\n",
+        "for index in range(NUM_PERMUTATIONS):\n",
+        "  name = must_start_with\n",
+        "  if index == 0 : name = name + must_contain\n",
+        "  if index == 1 : name = name + max_name_ahead + must_contain\n",
+        "  if index == 2 : name = name + must_contain + max_name_trail\n",
+        "  if index == 3 : name = name + max_name_ahead + must_contain + max_name_trail\n",
+        "  name = name + must_end_with\n",
+        "  #----#\n",
+        "  ids_B = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
+        "  text_encoding_B = model.get_text_features(**ids_B)\n",
+        "  B = text_encoding_B[0]\n",
+        "  _B =  LA.vector_norm(B, ord=2)\n",
+        "  dots[index] = torch.dot(A,B)/(_A*_B)\n",
+        "  names[index] = name\n",
+        "#------#\n",
         "\n",
+        "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
         "\n",
+        "for index in range(NUM_PERMUTATIONS):\n",
+        "  print(names[indices[index].item()])\n",
+        "  print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
+        "  print('------')\n",
+        "\n",
+        "\n",
+        "\n",
+        ""
       ],
       "metadata": {
         "collapsed": true,
         "id": "fi0jRruI0-tu"
       },
       "execution_count": null,
       "outputs": []
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title 💫 Compare Text encodings\n",
+        "\n",
+        "prompt_A = \"banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
+        "prompt_B = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
+        "use_token_padding = True # @param {type:\"boolean\"}\n",
+        "\n",
+        "from transformers import  CLIPProcessor, CLIPModel\n",
+        "\n",
+        "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
+        "\n",
+        "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
+        "\n",
+        "ids_A = processor.tokenizer(text=prompt_A, padding=use_token_padding, return_tensors=\"pt\")\n",
+        "text_encoding_A = model.get_text_features(**ids_A)\n",
+        "\n",
+        "\n",
+        "ids_B = processor.tokenizer(text=prompt_B, padding=use_token_padding, return_tensors=\"pt\")\n",
+        "text_encoding_B = model.get_text_features(**ids_B)\n",
+        "\n",
+        "similarity_str =  'The similarity between the text_encoding for A:\"' + prompt_A + '\" and B: \"' + prompt_B +'\" is ' +  token_similarity(text_encoding_A[0] , text_encoding_B[0])\n",
+        "\n",
+        "\n",
+        "print(similarity_str)\n",
+        "#outputs = model(**inputs)\n",
+        "#logits_per_image = outputs.logits_per_image # this is the image-text similarity score\n",
+        "#probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities"
+      ],
+      "metadata": {
+        "id": "QQOjh5BvnG8M",
+        "collapsed": true,
+        "cellView": "form"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "You can write an url or upload a file locally from your device to use as reference. The image will by saved in the 'sd_tokens' folder. Note that the 'sd_tokens' folder will be deleted upon exiting this runtime."
+      ],
+      "metadata": {
+        "id": "hyK423TQCRup"
+      }
+    },
     {
       "cell_type": "code",
       "source": [