Spaces:
Running
on
L40S
Running
on
L40S
| from collections.abc import Sequence | |
| import random | |
| from typing import Optional, List, Tuple | |
| import gradio as gr | |
| import spaces | |
| import torch | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| BayesianDetectorModel, | |
| SynthIDTextWatermarkingConfig, | |
| SynthIDTextWatermarkDetector, | |
| SynthIDTextWatermarkLogitsProcessor, | |
| ) | |
| # If the watewrmark is not detected, consider the use case. Could be because of | |
| # the nature of the task (e.g., fatcual responses are lower entropy) or it could | |
| # be another | |
| _MODEL_IDENTIFIER = 'google/gemma-2b-it' | |
| _DETECTOR_IDENTIFIER = 'google/synthid-spaces-demo-detector' | |
| _PROMPTS: Tuple[str] = ( | |
| 'Write an essay about my pets, a cat named Mika and a dog named Cleo.', | |
| '', | |
| '', | |
| ) | |
| _TORCH_DEVICE = ( | |
| torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") | |
| ) | |
| _ANSWERS: List[Tuple[str, str]] = [] | |
| _WATERMARK_CONFIG_DICT = dict( | |
| ngram_len=5, | |
| keys=[ | |
| 654, | |
| 400, | |
| 836, | |
| 123, | |
| 340, | |
| 443, | |
| 597, | |
| 160, | |
| 57, | |
| 29, | |
| 590, | |
| 639, | |
| 13, | |
| 715, | |
| 468, | |
| 990, | |
| 966, | |
| 226, | |
| 324, | |
| 585, | |
| 118, | |
| 504, | |
| 421, | |
| 521, | |
| 129, | |
| 669, | |
| 732, | |
| 225, | |
| 90, | |
| 960, | |
| ], | |
| sampling_table_size=2**16, | |
| sampling_table_seed=0, | |
| context_history_size=1024, | |
| ) | |
| _WATERMARK_CONFIG = SynthIDTextWatermarkingConfig( | |
| **_WATERMARK_CONFIG_DICT | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| _MODEL_IDENTIFIER, padding_side="left" | |
| ) | |
| tokenizer.pad_token_id = tokenizer.eos_token_id | |
| model = AutoModelForCausalLM.from_pretrained(_MODEL_IDENTIFIER) | |
| model.to(_TORCH_DEVICE) | |
| logits_processor = SynthIDTextWatermarkLogitsProcessor( | |
| **_WATERMARK_CONFIG_DICT, | |
| device=_TORCH_DEVICE, | |
| ) | |
| detector_module = BayesianDetectorModel.from_pretrained(_DETECTOR_IDENTIFIER) | |
| detector_module.to(_TORCH_DEVICE) | |
| detector = SynthIDTextWatermarkDetector( | |
| detector_module=detector_module, | |
| logits_processor=logits_processor, | |
| tokenizer=tokenizer, | |
| ) | |
| def generate_outputs( | |
| prompts: Sequence[str], | |
| watermarking_config: Optional[SynthIDTextWatermarkingConfig] = None, | |
| ) -> Tuple[Sequence[str], torch.Tensor]: | |
| tokenized_prompts = tokenizer( | |
| prompts, return_tensors='pt', padding="longest" | |
| ).to(_TORCH_DEVICE) | |
| input_length = tokenized_prompts.input_ids.shape[1] | |
| output_sequences = model.generate( | |
| **tokenized_prompts, | |
| watermarking_config=watermarking_config, | |
| do_sample=True, | |
| max_length=500, | |
| top_k=40, | |
| ) | |
| output_sequences = output_sequences[:, input_length:] | |
| detections = detector(output_sequences) | |
| return ( | |
| tokenizer.batch_decode(output_sequences, skip_special_tokens=True), | |
| detections | |
| ) | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| ''' | |
| # Using SynthID Text in your Generative AI projects | |
| [SynthID][synthid] is a Google DeepMind technology that watermarks and | |
| identifies AI-generated content by embedding digital watermarks directly | |
| into AI-generated images, audio, text or video. | |
| SynthID Text is an open source implementation of this technology available | |
| in Hugging Face Transformers that has two major components: | |
| * A [logits processor][synthid-hf-logits-processor] that is | |
| [configured][synthid-hf-config] on a per-model basis and activated when | |
| calling `.generate()`; and | |
| * A [detector][synthid-hf-detector] trained to recognized watermarked text | |
| generated by a specific model with a specific configuraiton. | |
| This Space demonstrates: | |
| 1. How to use SynthID Text to apply a watermark to text generated by your | |
| model; and | |
| 1. How to identify that text using a ready-made detector. | |
| Note that this detector is trained specifically for this demonstration. You | |
| should maintain a specific watermarking configuration for every model you | |
| use and protect that configuration as you would any other secret. See the | |
| [end-to-end guide][synthid-hf-detector-e2e] for more on training your own | |
| detectors, and the [SynthID Text documentation][raitk-synthid] for more on | |
| how this technology works. | |
| ## Applying a watermark | |
| Practically speaking, SynthID Text is a logits processor, applied to your | |
| model's generation pipeline after [Top-K and Top-P][cloud-parameter-values], | |
| that augments the model's logits using a pseudorandom _g_-function to encode | |
| watermarking information in a way that balances generation quality with | |
| watermark detectability. See the [paper][synthid-nature] for a complete | |
| technical description of the algorithm and analyses of how different | |
| configuration values affect performance. | |
| Watermarks are [configured][synthid-hf-config] to parameterize the | |
| _g_-function and how it is applied during generation. The following | |
| configuration is used for all demos. It should not be used for any | |
| production purposes. | |
| ```json | |
| { | |
| "ngram_len": 5, | |
| "keys": [ | |
| 654, 400, 836, 123, 340, 443, 597, 160, 57, 29, | |
| 590, 639, 13, 715, 468, 990, 966, 226, 324, 585, | |
| 118, 504, 421, 521, 129, 669, 732, 225, 90, 960 | |
| ], | |
| "sampling_table_size": 65536, | |
| "sampling_table_seed": 0, | |
| "context_history_size": 1024 | |
| } | |
| ``` | |
| Watermarks are applied by initializing a `SynthIDTextWatermarkingConfig` | |
| and passing that as the `watermarking_config=` parameter in your call to | |
| `.generate()`, as shown in the snippet below. | |
| ```python | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| SynthIDTextWatermarkingConfig, | |
| ) | |
| # Standard model and tokenizer initialization | |
| tokenizer = AutoTokenizer.from_pretrained('repo/id') | |
| model = AutoModelForCausalLM.from_pretrained('repo/id') | |
| # SynthID Text configuration | |
| watermarking_config = SynthIDTextWatermarkingConfig(...) | |
| # Generation with watermarking | |
| tokenized_prompts = tokenizer(["your prompts here"]) | |
| output_sequences = model.generate( | |
| **tokenized_prompts, | |
| watermarking_config=watermarking_config, | |
| do_sample=True, | |
| ) | |
| watermarked_text = tokenizer.batch_decode(output_sequences) | |
| ``` | |
| ## Try it yourself. | |
| Lets use [Gemma 2B IT][gemma] to help you understand how watermarking works. | |
| Using the text boxes below enter up to three prompts then click the generate | |
| button. An example is provided to help get you started, but the cells are | |
| fully editable. | |
| Gemma will then generate watermarked and non-watermarked responses for each | |
| non-empty prompt you provided. | |
| [cloud-parameter-values]: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/adjust-parameter-values | |
| [gemma]: https://huggingface.co/google/gemma-2b | |
| [raitk-synthid]: https://ai.google.dev/responsible/docs/safeguards/synthid | |
| [synthid]: https://deepmind.google/technologies/synthid/ | |
| [synthid-hf-config]: https://huggingface.co/docs/transformers/v4.46.0/en/internal/generation_utils#transformers.SynthIDTextWatermarkingConfig | |
| [synthid-hf-detector]: https://huggingface.co/docs/transformers/v4.46.0/en/internal/generation_utils#transformers.BayesianDetectorModel | |
| [synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/tree/v4.46.0/examples/research_projects/synthid_text/detector_training.py | |
| [synthid-hf-logits-processor]: https://huggingface.co/docs/transformers/v4.46.0/en/internal/generation_utils#transformers.SynthIDTextWatermarkLogitsProcessor | |
| [synthid-nature]: https://www.nature.com/articles/s41586-024-08025-4 | |
| ''' | |
| ) | |
| prompt_inputs = [ | |
| gr.Textbox(value=prompt, lines=4, label='Prompt') | |
| for prompt in _PROMPTS | |
| ] | |
| generate_btn = gr.Button('Generate') | |
| with gr.Column(visible=False) as generations_col: | |
| gr.Markdown( | |
| ''' | |
| ## Human recognition of watermarked text | |
| The primary goal of SynthID Text is to apply a watermark to generated text | |
| without affecting generation quality. Another way to think about this is | |
| that generated text that carries a watermark should be imperceptible to | |
| you, the reader, but easily perceived by a watermark detector. | |
| The responses from Gemma are shown below. Use the checkboxes to mark which | |
| responses you think are the watermarked, then click the "reveal" button to | |
| see the true values. | |
| The [research paper][synthid-nature] has an in-depth study examining human | |
| perception of watermarked versus non-watermarked text. | |
| [synthid-nature]: https://www.nature.com/articles/s41586-024-08025-4 | |
| ''' | |
| ) | |
| generations_grp = gr.CheckboxGroup( | |
| label='All generations, in random order', | |
| info='Select the generations you think are watermarked!', | |
| ) | |
| reveal_btn = gr.Button('Reveal', visible=False) | |
| with gr.Column(visible=False) as detections_col: | |
| gr.Markdown( | |
| ''' | |
| ## Detecting watermarked text | |
| The only way to properly detect watermarked text is with a trained | |
| classifier. This Space uses a pre-trained classifier hosted on Hugging Face | |
| Hub. For production uses you will need to train your own classifiers to | |
| recognize your watermarks. A [Bayesian detector][synthid-hf-detector] is | |
| provided in Transformers, along with an | |
| [end-to-end example][synthid-hf-detector-e2e] of how to train one of these | |
| detectors. | |
| You can see how your guesses compared to the actual results below. As | |
| above, the responses are displayed in checkboxes. If the box is checked, | |
| then the text carries a watermark. Your correct guesses are annotated with | |
| the "Correct" prefix. | |
| [synthid-hf-detector]: https://huggingface.co/docs/transformers/v4.46.0/en/internal/generation_utils#transformers.BayesianDetectorModel | |
| [synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/tree/v4.46.0/examples/research_projects/synthid_text/detector_training.py | |
| ''' | |
| ) | |
| revealed_grp = gr.CheckboxGroup( | |
| label='Ground truth for all generations', | |
| info=( | |
| 'Watermarked generations are checked, and your selection are ' | |
| 'marked as correct or incorrect in the text.' | |
| ), | |
| ) | |
| gr.Markdown( | |
| ''' | |
| ## Limitations | |
| SynthID Text watermarks are robust to some transformations, such as | |
| cropping pieces of text, modifying a few words, or mild paraphrasing, but | |
| this method does have limitations. | |
| - Watermark application is less effective on factual responses, as there | |
| is less opportunity to augment generation without decreasing accuracy. | |
| - Detector confidence scores can be greatly reduced when an AI-generated | |
| text is thoroughly rewritten, or translated to another language. | |
| SynthID Text is not built to directly stop motivated adversaries from | |
| causing harm. However, it can make it harder to use AI-generated content | |
| for malicious purposes, and it can be combined with other approaches to | |
| give better coverage across content types and platforms. | |
| ''' | |
| ) | |
| reset_btn = gr.Button('Reset', visible=False) | |
| def generate(*prompts): | |
| prompts = [p for p in prompts if p] | |
| standard, standard_detector = generate_outputs(prompts=prompts) | |
| watermarked, watermarked_detector = generate_outputs( | |
| prompts=prompts, | |
| watermarking_config=_WATERMARK_CONFIG, | |
| ) | |
| upper_threshold = 0.9501 | |
| lower_threshold = 0.1209 | |
| def decision(score: float) -> str: | |
| if score > upper_threshold: | |
| return 'Watermarked' | |
| elif lower_threshold < score < upper_threshold: | |
| return 'Indeterminate' | |
| else: | |
| return 'Not watermarked' | |
| responses = [ | |
| (text, decision(score)) | |
| for text, score in zip(standard, standard_detector[0]) | |
| ] | |
| responses += [ | |
| (text, decision(score)) | |
| for text, score in zip(watermarked, watermarked_detector[0]) | |
| ] | |
| random.shuffle(responses) | |
| _ANSWERS.extend(responses) | |
| # Load model | |
| return { | |
| generate_btn: gr.Button(visible=False), | |
| generations_col: gr.Column(visible=True), | |
| generations_grp: gr.CheckboxGroup( | |
| [response[0] for response in responses], | |
| ), | |
| reveal_btn: gr.Button(visible=True), | |
| } | |
| generate_btn.click( | |
| lambda: gr.update(value='Generating...', interactive=False), None, generate_btn | |
| ).then( | |
| generate, | |
| inputs=prompt_inputs, | |
| outputs=[generate_btn, generations_col, generations_grp, reveal_btn] | |
| ) | |
| def reveal(user_selections: list[str]): | |
| choices: list[str] = [] | |
| value: list[str] = [] | |
| for (response, decision) in _ANSWERS: | |
| if decision == "Watermarked": | |
| if response in user_selections: | |
| choice = f'Correct! {response}' | |
| else: | |
| choice = response | |
| value.append(choice) | |
| else: | |
| choice = response | |
| choices.append(choice) | |
| return { | |
| reveal_btn: gr.Button(visible=False), | |
| detections_col: gr.Column(visible=True), | |
| revealed_grp: gr.CheckboxGroup(choices=choices, value=value), | |
| reset_btn: gr.Button(visible=True), | |
| } | |
| reveal_btn.click( | |
| reveal, | |
| inputs=generations_grp, | |
| outputs=[ | |
| reveal_btn, | |
| detections_col, | |
| revealed_grp, | |
| reset_btn | |
| ], | |
| ) | |
| def reset(): | |
| _ANSWERS.clear() | |
| return { | |
| generations_col: gr.Column(visible=False), | |
| detections_col: gr.Column(visible=False), | |
| revealed_grp: gr.CheckboxGroup(visible=False), | |
| reset_btn: gr.Button(visible=False), | |
| generate_btn: gr.Button(value='Generate', interactive=True, visible=True), | |
| } | |
| reset_btn.click( | |
| reset, | |
| inputs=[], | |
| outputs=[ | |
| generations_col, | |
| detections_col, | |
| revealed_grp, | |
| reset_btn, | |
| generate_btn, | |
| ], | |
| ) | |
| if __name__ == '__main__': | |
| demo.launch() | |