Spaces:

JaceWei
/

PaperShow

Sleeping

App Files Files Community

JaceWei commited on 14 days ago

Commit

0d563bd

1 Parent(s): 337c99f

update: push latest content

Browse files

Files changed (35) hide show

.gitignore +3 -9
Paper2Poster/PosterAgent/new_pipeline.py +189 -134
Paper2Poster/PosterAgent/parse_raw.py +5 -5
Paper2Poster/utils/prompt_templates/poster_planner_new_v2.yaml +1 -0
Paper2Poster/utils/prompts/gen_poster_raw_content_v2.txt +28 -17
Paper2Poster/utils/src/model_utils.py +229 -229
app.py +82 -36
pipeline.py +173 -45
posterbuilder/arrangement.json +68 -495
posterbuilder/build_poster.py +1 -1
posterbuilder/cambridge_template.tex +9 -3
posterbuilder/contents copy/arrangement.json +0 -783
posterbuilder/contents copy/figure_caption.json +0 -258
posterbuilder/contents copy/poster_content.json +0 -45
posterbuilder/contents/arrangement.json +68 -495
posterbuilder/contents/figure_caption.json +45 -45
posterbuilder/contents/poster_content.json +27 -15
posterbuilder/convert.py +14 -11
posterbuilder/figure_caption.json +45 -45
posterbuilder/latex_proj/figures.zip +0 -3
posterbuilder/latex_proj/poster_output.tex +68 -30
posterbuilder/latex_proj/poster_output_fix.tex +0 -139
posterbuilder/latex_proj/poster_output_new.tex +0 -193
posterbuilder/poster_content.json +27 -15
requirements.txt +94 -0
template/LICENSE.md +22 -0
template/Makefile +9 -0
template/beamercolorthemecam.sty +55 -0
template/beamercolorthemegemini.sty +58 -0
template/beamercolorthemelabsix.sty +55 -0
template/beamercolorthememit.sty +57 -0
template/beamercolorthemeumich.sty +55 -0
template/beamerthemegemini.sty +258 -0
template/latexmkrc +2 -0
template/poster.bib +9 -0

.gitignore CHANGED Viewed

@@ -1,5 +1,6 @@
 input/
-output/Paper2Poster/assets/
 Paper2Video/assets/
 posterbuilder/latex_proj/figures/
 *.png
@@ -8,11 +9,4 @@ posterbuilder/latex_proj/figures/
 *.wav
 *.mp4
 __pycache__/
-*.png
-*.jpg
-*.pdf
-*.wav
-*.mp4
-Paper2Poster/assets/
-Paper2Video/assets/
-posterbuilder/latex_proj/figures/

 input/
+output/
+Paper2Poster/assets/
 Paper2Video/assets/
 posterbuilder/latex_proj/figures/
 *.png
 *.wav
 *.mp4
 __pycache__/

Paper2Poster/PosterAgent/new_pipeline.py CHANGED Viewed

@@ -129,63 +129,63 @@ if __name__ == '__main__':
     detail_log['parser_in_t'] = input_token
     detail_log['parser_out_t'] = output_token
-    # Initialize LogoManager
-    logo_manager = LogoManager()
-    institution_logo_path = args.institution_logo_path
-    conference_logo_path = args.conference_logo_path
-    # Auto-detect institution from paper if not provided
-    # Now using the raw_result directly instead of reading from file
-    if not institution_logo_path:
-        print("\n" + "="*60)
-        print("🔍 AUTO-DETECTING INSTITUTION FROM PAPER")
-        print("="*60)
-        # Use the raw_result we already have from the parser
-        if raw_result:
-            print(f"📄 Using parsed paper content")
-            # Extract text content from the ConversionResult object
-            try:
-                paper_text = raw_result.document.export_to_markdown()
-            except:
-                # Fallback: try to get text content in another way
-                paper_text = str(raw_result)
-            print("🔎 Searching for FIRST AUTHOR's institution...")
-            first_author_inst = logo_manager.extract_first_author_institution(paper_text)
-            if first_author_inst:
-                print(f"\n✅ FIRST AUTHOR INSTITUTION: {first_author_inst}")
-                print(f"🔍 Searching for logo: {first_author_inst}")
-                inst_logo_path = logo_manager.get_logo_path(first_author_inst, category="institute", use_google=args.use_google_search)
-                if inst_logo_path:
-                    institution_logo_path = str(inst_logo_path)
-                    print(f"✅ Institution logo found: {institution_logo_path}")
-                else:
-                    print(f"❌ Could not find/download logo for: {first_author_inst}")
-            else:
-                print("❌ No first author institution detected or matched with available logos")
-        else:
-            print("❌ No parsed content available")
-        print("="*60 + "\n")
-    # Handle conference logo
-    if args.conference_venue and not conference_logo_path:
-        print("\n" + "="*60)
-        print("🏛️ SEARCHING FOR CONFERENCE LOGO")
-        print("="*60)
-        print(f"📍 Conference: {args.conference_venue}")
-        print(f"🔍 Searching for logo...")
-        conf_logo_path = logo_manager.get_logo_path(args.conference_venue, category="conference", use_google=args.use_google_search)
-        if conf_logo_path:
-            conference_logo_path = str(conf_logo_path)
-            print(f"✅ Conference logo found: {conference_logo_path}")
-        else:
-            print(f"❌ Could not find/download logo for: {args.conference_venue}")
-            # Note: Web search is now handled inside get_logo_path automatically
-        print("="*60 + "\n")
     # Step 2: Filter unnecessary images and tables
     input_token, output_token = filter_image_table(args, agent_config_t)
@@ -217,87 +217,142 @@ if __name__ == '__main__':
     detail_log['outline_in_t'] = input_token
     detail_log['outline_out_t'] = output_token
-    if args.ablation_no_tree_layout:
-        panel_arrangement, figure_arrangement, text_arrangement, input_token, output_token = no_tree_get_layout(
-            poster_width,
-            poster_height,
-            panels,
-            figures,
-            agent_config_t
-        )
-        total_input_tokens_t += input_token
-        total_output_tokens_t += output_token
-        print(f'No tree layout token consumption: {input_token} -> {output_token}')
-        detail_log['no_tree_layout_in_t'] = input_token
-        detail_log['no_tree_layout_out_t'] = output_token
-    else:
-        # Step 4: Learn and generate layout
-        panel_model_params, figure_model_params = main_train()
-        panel_arrangement, figure_arrangement, text_arrangement = main_inference(
-            panels,
-            panel_model_params,
-            figure_model_params,
-            poster_width,
-            poster_height,
-            shrink_margin=3
-        )
-        text_arrangement_title = text_arrangement[0]
-        text_arrangement = text_arrangement[1:]
-        # Split the title textbox into two parts
-        text_arrangement_title_top, text_arrangement_title_bottom = split_textbox(
-            text_arrangement_title,
-            0.8
-        )
-        # Add the split textboxes back to the list
-        text_arrangement = [text_arrangement_title_top, text_arrangement_title_bottom] + text_arrangement
-    for i in range(len(figure_arrangement)):
-        panel_id = figure_arrangement[i]['panel_id']
-        panel_section_name = panels[panel_id]['section_name']
-        figure_info = figures[panel_section_name]
-        if 'image' in figure_info:
-            figure_id = figure_info['image']
-            if not figure_id in images:
-                figure_path = images[str(figure_id)]['image_path']
-            else:
-                figure_path = images[figure_id]['image_path']
-        elif 'table' in figure_info:
-            figure_id = figure_info['table']
-            if not figure_id in tables:
-                figure_path = tables[str(figure_id)]['table_path']
-            else:
-                figure_path = tables[figure_id]['table_path']
-        figure_arrangement[i]['figure_path'] = figure_path
-    for text_arrangement_item in text_arrangement:
-        num_chars = char_capacity(
-            bbox=(text_arrangement_item['x'], text_arrangement_item['y'], text_arrangement_item['height'], text_arrangement_item['width'])
-        )
-        text_arrangement_item['num_chars'] = num_chars
-    width_inch, height_inch, panel_arrangement_inches, figure_arrangement_inches, text_arrangement_inches = get_arrangments_in_inches(
-        poster_width, poster_height, panel_arrangement, figure_arrangement, text_arrangement, 25
-    )
-    # Save to file
     tree_split_results = {
-        'poster_width': poster_width,
-        'poster_height': poster_height,
-        'poster_width_inches': width_inch,
-        'poster_height_inches': height_inch,
         'panels': panels,
-        'panel_arrangement': panel_arrangement,
-        'figure_arrangement': figure_arrangement,
-        'text_arrangement': text_arrangement,
-        'panel_arrangement_inches': panel_arrangement_inches,
-        'figure_arrangement_inches': figure_arrangement_inches,
-        'text_arrangement_inches': text_arrangement_inches,
     }
     os.makedirs('tree_splits', exist_ok=True)
     with open(f'tree_splits/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_tree_split_{args.index}.json', 'w') as f:
         json.dump(tree_split_results, f, indent=4)

     detail_log['parser_in_t'] = input_token
     detail_log['parser_out_t'] = output_token
+    # # Initialize LogoManager
+    # logo_manager = LogoManager()
+    # institution_logo_path = args.institution_logo_path
+    # conference_logo_path = args.conference_logo_path
+    # # Auto-detect institution from paper if not provided
+    # # Now using the raw_result directly instead of reading from file
+    # if not institution_logo_path:
+    #     print("\n" + "="*60)
+    #     print("🔍 AUTO-DETECTING INSTITUTION FROM PAPER")
+    #     print("="*60)
+    #     # Use the raw_result we already have from the parser
+    #     if raw_result:
+    #         print(f"📄 Using parsed paper content")
+    #         # Extract text content from the ConversionResult object
+    #         try:
+    #             paper_text = raw_result.document.export_to_markdown()
+    #         except:
+    #             # Fallback: try to get text content in another way
+    #             paper_text = str(raw_result)
+    #         print("🔎 Searching for FIRST AUTHOR's institution...")
+    #         first_author_inst = logo_manager.extract_first_author_institution(paper_text)
+    #         if first_author_inst:
+    #             print(f"\n✅ FIRST AUTHOR INSTITUTION: {first_author_inst}")
+    #             print(f"🔍 Searching for logo: {first_author_inst}")
+    #             inst_logo_path = logo_manager.get_logo_path(first_author_inst, category="institute", use_google=args.use_google_search)
+    #             if inst_logo_path:
+    #                 institution_logo_path = str(inst_logo_path)
+    #                 print(f"✅ Institution logo found: {institution_logo_path}")
+    #             else:
+    #                 print(f"❌ Could not find/download logo for: {first_author_inst}")
+    #         else:
+    #             print("❌ No first author institution detected or matched with available logos")
+    #     else:
+    #         print("❌ No parsed content available")
+    #     print("="*60 + "\n")
+    # # Handle conference logo
+    # if args.conference_venue and not conference_logo_path:
+    #     print("\n" + "="*60)
+    #     print("🏛️ SEARCHING FOR CONFERENCE LOGO")
+    #     print("="*60)
+    #     print(f"📍 Conference: {args.conference_venue}")
+    #     print(f"🔍 Searching for logo...")
+    #     conf_logo_path = logo_manager.get_logo_path(args.conference_venue, category="conference", use_google=args.use_google_search)
+    #     if conf_logo_path:
+    #         conference_logo_path = str(conf_logo_path)
+    #         print(f"✅ Conference logo found: {conference_logo_path}")
+    #     else:
+    #         print(f"❌ Could not find/download logo for: {args.conference_venue}")
+    #         # Note: Web search is now handled inside get_logo_path automatically
+    #     print("="*60 + "\n")
     # Step 2: Filter unnecessary images and tables
     input_token, output_token = filter_image_table(args, agent_config_t)
     detail_log['outline_in_t'] = input_token
     detail_log['outline_out_t'] = output_token
+    # if args.ablation_no_tree_layout:
+    #     panel_arrangement, figure_arrangement, text_arrangement, input_token, output_token = no_tree_get_layout(
+    #         poster_width,
+    #         poster_height,
+    #         panels,
+    #         figures,
+    #         agent_config_t
+    #     )
+    #     total_input_tokens_t += input_token
+    #     total_output_tokens_t += output_token
+    #     print(f'No tree layout token consumption: {input_token} -> {output_token}')
+    #     detail_log['no_tree_layout_in_t'] = input_token
+    #     detail_log['no_tree_layout_out_t'] = output_token
+    # else:
+    #     # Step 4: Learn and generate layout
+    #     panel_model_params, figure_model_params = main_train()
+    #     panel_arrangement, figure_arrangement, text_arrangement = main_inference(
+    #         panels,
+    #         panel_model_params,
+    #         figure_model_params,
+    #         poster_width,
+    #         poster_height,
+    #         shrink_margin=3
+    #     )
+    #     text_arrangement_title = text_arrangement[0]
+    #     text_arrangement = text_arrangement[1:]
+    #     # Split the title textbox into two parts
+    #     text_arrangement_title_top, text_arrangement_title_bottom = split_textbox(
+    #         text_arrangement_title,
+    #         0.8
+    #     )
+    #     # Add the split textboxes back to the list
+    #     text_arrangement = [text_arrangement_title_top, text_arrangement_title_bottom] + text_arrangement
+    # for i in range(len(figure_arrangement)):
+    #     panel_id = figure_arrangement[i]['panel_id']
+    #     panel_section_name = panels[panel_id]['section_name']
+    #     figure_info = figures[panel_section_name]
+    #     if 'image' in figure_info:
+    #         figure_id = figure_info['image']
+    #         if not figure_id in images:
+    #             figure_path = images[str(figure_id)]['image_path']
+    #         else:
+    #             figure_path = images[figure_id]['image_path']
+    #     elif 'table' in figure_info:
+    #         figure_id = figure_info['table']
+    #         if not figure_id in tables:
+    #             figure_path = tables[str(figure_id)]['table_path']
+    #         else:
+    #             figure_path = tables[figure_id]['table_path']
+    #     figure_arrangement[i]['figure_path'] = figure_path
+    # for text_arrangement_item in text_arrangement:
+    #     num_chars = char_capacity(
+    #         bbox=(text_arrangement_item['x'], text_arrangement_item['y'], text_arrangement_item['height'], text_arrangement_item['width'])
+    #     )
+    #     text_arrangement_item['num_chars'] = num_chars
+    # width_inch, height_inch, panel_arrangement_inches, figure_arrangement_inches, text_arrangement_inches = get_arrangments_in_inches(
+    #     poster_width, poster_height, panel_arrangement, figure_arrangement, text_arrangement, 25
+    # )
+    # # Save to file
+    # tree_split_results = {
+    #     'poster_width': poster_width,
+    #     'poster_height': poster_height,
+    #     'poster_width_inches': width_inch,
+    #     'poster_height_inches': height_inch,
+    #     'panels': panels,
+    #     'panel_arrangement': panel_arrangement,
+    #     'figure_arrangement': figure_arrangement,
+    #     'text_arrangement': text_arrangement,
+    #     'panel_arrangement_inches': panel_arrangement_inches,
+    #     'figure_arrangement_inches': figure_arrangement_inches,
+    #     'text_arrangement_inches': text_arrangement_inches,
+    # }
+    # ============================
+    #  ### NEW: only build a simple figure_arrangement with {panel_id, figure_path}
+    # ============================
+    # 有些项目把 images/tables 放在上游全局；若未定义，则从过滤结果 JSON 兜底加载
+    try:
+        images
+    except NameError:
+        images = json.load(open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_images_filtered.json', 'r'))
+    try:
+        tables
+    except NameError:
+        tables = json.load(open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_tables_filtered.json', 'r'))
+    # 建立 section_name -> panel_id 的映射
+    section2pid = {p['section_name']: p['panel_id'] for p in panels}
+    # 生成精简后的 figure_arrangement：只保留 panel_id 与 figure_path
+    simple_figure_arrangement = []
+    for section_name, f in figures.items():
+        if section_name not in section2pid:
+            continue
+        pid = section2pid[section_name]
+        fig_path = None
+        if 'image' in f:
+            fid = str(f['image'])
+            info = images.get(fid) or images.get(str(fid)) or {}
+            fig_path = info.get('image_path')
+        elif 'table' in f:
+            tid = str(f['table'])
+            info = tables.get(tid) or tables.get(str(tid)) or {}
+            fig_path = info.get('table_path')
+        if fig_path:  # 只收集有路径的
+            simple_figure_arrangement.append({
+                'panel_id': pid,
+                'figure_path': fig_path,
+            })
+    # ============================
+    #  ### REMOVED: no layout/train/text capacity/inches conversion
+    #  - 删除 args.ablation_no_tree_layout 分支
+    #  - 删除 main_train() / main_inference()
+    #  - 删除为 figure_arrangement[i] 补 figure_path 的循环
+    #  - 删除 text_arrangement / char_capacity / get_arrangments_in_inches
+    # ============================
+    # Save to file (只保留 panels + figure_arrangement)
     tree_split_results = {
         'panels': panels,
+        'figure_arrangement': simple_figure_arrangement,
     }
     os.makedirs('tree_splits', exist_ok=True)
     with open(f'tree_splits/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_tree_split_{args.index}.json', 'w') as f:
         json.dump(tree_split_results, f, indent=4)

Paper2Poster/PosterAgent/parse_raw.py CHANGED Viewed

@@ -114,12 +114,12 @@ def parse_raw(args, actor_config, version=2):
             print(f"Ouch! The response is invalid, the LLM is not following the format :(")
             print('Trying again...')
             raise
-        if 'title' in section['title'].lower():
-            has_title = True
-    if not has_title:
-        print('Ouch! The response is invalid, the LLM is not following the format :(')
-        raise
     os.makedirs('contents', exist_ok=True)
     json.dump(content_json, open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_raw_content.json', 'w'), indent=4)

             print(f"Ouch! The response is invalid, the LLM is not following the format :(")
             print('Trying again...')
             raise
+    #     if 'title' in section['title'].lower():
+    #         has_title = True
+    # if not has_title:
+    #     print('Ouch! The response is invalid, the LLM is not following the format :(')
+    #     raise
     os.makedirs('contents', exist_ok=True)
     json.dump(content_json, open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_raw_content.json', 'w'), indent=4)

Paper2Poster/utils/prompt_templates/poster_planner_new_v2.yaml CHANGED Viewed

@@ -20,6 +20,7 @@ system_prompt: |
     • The final output must be a single JSON object, mapping from section names to the chosen image/table ID plus the “reason” field.
     • Extra note: If multiple images or tables are suitable, select the single best one and assign only that.
     • If “image_information” or “table_information” is empty, you may end up assigning nothing to any section.
 template: |
   Instructions:

     • The final output must be a single JSON object, mapping from section names to the chosen image/table ID plus the “reason” field.
     • Extra note: If multiple images or tables are suitable, select the single best one and assign only that.
     • If “image_information” or “table_information” is empty, you may end up assigning nothing to any section.
+    • If there is a “teaser” or “overview” figure (often summarizing the entire paper or framework), prioritize assigning it to the first section or placing it at the beginning of the poster.
 template: |
   Instructions:

Paper2Poster/utils/prompts/gen_poster_raw_content_v2.txt CHANGED Viewed

@@ -1,28 +1,35 @@
-You are a document content divider and extractor specialist, expert in dividing and extracting content from various types of documents and reorganizing it into a two-level json format for later poster generation.
-And a LaTeX-enhanced document structuring and extraction specialist, expert in dividing academic content into logical sections and automatically emphasizing key ideas with LaTeX formatting suitable for Beamer poster generation.
 Based on given markdown document, generate a JSON output for direct insertion into a LaTeX Beamer poster, make sure the output is concise and focused.
 Step-by-Step Instructions:
 1. Identify Sections and Subsections in document and identify sections and subsections based on the heading levels and logical structure.
-2. Divide Content: Reorganize the content into sections and subsections, ensuring that each subsection contains approximately 500 words.
-3. Refine Titles: Create titles for each section with at most 3 words.
 4. Remove Unwanted Elements: Eliminate any unwanted elements such as headers, footers, text surrounded by `~~` indicating deletion.
-5. Refine Text: For content, you should keep as much raw text as possible. Do not include citations.
-6. Length: you should control the length of each section, according to their importance according to your understanding of the paper. For important sections, their content should be long.
 7. Make sure there is a poster title section at the beginning, and it should contain information like paper title, author, organization etc.
-8. The "meta" key contains the meta information of the poster, where the title should be the raw title of the paper and is not summarized.
-9. Ther **must** be a section for the poster title.
-10. **IMPORTANT** Within the section content, use LaTeX commands to improve readability:
      - Use `\textbf{}` to emphasize *key terms, results, and conclusions*.
      - Use `\textit{}` for *concepts or variable names*.
      - Use `\textcolor{blue}{}` for *important statistics, numerical values, or methods*.
@@ -30,6 +37,11 @@ Step-by-Step Instructions:
 - NEVER output “extbf”, “extit”, or “extcolor”.
 - The final output must compile directly in LaTeX Beamer poster without errors.
 Example Output:
 {
     "meta": {
@@ -39,17 +51,16 @@ Example Output:
     },
     "sections": [
         {
-            "title": "Poster Title & Author",
-            "content": "content of poster title and author"
         },
         {
-            "title": "title of section1",
-            "content": "content of section 1 (e.g. We aim to \textbf{clarify the causal relationships} in text-to-image models. Previous studies \textcolor{red}{overlooked confounding embeddings}, leading to poor interpretability.)"
         },
         {
-            "title": "title of section2",
-            "content": "content of section 2 (e.g. The optimized model achieves \textcolor{blue}{+12.5\% accuracy improvement} and significantly reduces \textcolor{red}{semantic leakage}. Results demonstrate that \textbf{embedding optimization improves fidelity}.)"
-        }
     ]
 }

+You are a poster content designer & academic summarization expert, skilled in condensing complex ideas into visually engaging, high-impact content suitable for LaTeX Beamer posters.
+Your goal is to extract, refine, and restructure the given markdown document into a 2-level JSON format that reads like a poster — concise, visually balanced, and lively.
 Based on given markdown document, generate a JSON output for direct insertion into a LaTeX Beamer poster, make sure the output is concise and focused.
+Key goals: balance brevity, energy, and clarity — every line should look like it belongs on a poster.
+Style: write with high-energy, action-oriented phrasing (e.g., “We reveal...”, “Our framework boosts...”), avoiding passive, overly academic tone.
 Step-by-Step Instructions:
 1. Identify Sections and Subsections in document and identify sections and subsections based on the heading levels and logical structure.
+2. Divide Content: Reorganize the content into sections (each <70 words) ( 7 ~ 8 sections in total) focusing on key findings, core methods, and \textbf{what’s new in this work}. Every 15–20 words should contain a focus marker at least
+3. Refine Titles: Create titles for each section with at most 6 words. Make titles dynamic, not too short/long and memorable
 4. Remove Unwanted Elements: Eliminate any unwanted elements such as headers, footers, text surrounded by `~~` indicating deletion.
+5. Refine Text: Write as if explaining to an intelligent but busy audience at a poster session. Contents should stay crisp and energetic. Remove redundant descriptions, long background context, and citation markers.
+6. You may use symbols • (bullet points) for logical progression, but keep at most 3 per section, only use it when necessary.
 7. Make sure there is a poster title section at the beginning, and it should contain information like paper title, author, organization etc.
+8. The "meta" key contains the meta information of the poster, where the title **MUST** should be the **RAW** title of the paper and is not summarized.
+    Do NOT summarize, translate, or rephrase any of these fields.
+   - For authors, use LaTeX superscript notation to indicate institutional affiliation clearly.
+     Example: "Lily\textsuperscript{1}, Bob\textsuperscript{2}"
+   - Match superscripts with their corresponding institution numbers in the "affiliations" field, formatted as:
+     "1 Department of AI, NUS; 2 School of Computing, NTU"
+9. **IMPORTANT** Within the section content, use LaTeX commands to improve readability:
      - Use `\textbf{}` to emphasize *key terms, results, and conclusions*.
      - Use `\textit{}` for *concepts or variable names*.
      - Use `\textcolor{blue}{}` for *important statistics, numerical values, or methods*.
 - NEVER output “extbf”, “extit”, or “extcolor”.
 - The final output must compile directly in LaTeX Beamer poster without errors.
+10. Strictly Skip title and author part:
+    Under NO circumstances should the model include any title, author name, or affiliation text found in the first section of the input document.
+    These elements will be pre-defined from the “meta” field and should not be included in the generated sections.
+    The model should start directly with the main content, skipping all title and author information.
 Example Output:
 {
     "meta": {
     },
     "sections": [
         {
+            "title": "Motivation of this work",
+            "content": "We aim to \\textbf{clarify the causal relationships} in text-to-image models. Previous studies \\textcolor{red}{overlooked confounding embeddings}, leading to poor interpretability."
         },
         {
+            "title": "What make this task challenging",
+            "content": "• \\textbf{Text-to-image generation} involves complex multi-modal dependencies.\\\\• Embeddings encode both \\textcolor{red}{semantic} and \\textcolor{blue}{stylistic} information.\\\\• Disentangle causal factors and ensure faithful image synthesis."
         },
         {
+            "title": "Key Contributions",
+•        }
     ]
 }

Paper2Poster/utils/src/model_utils.py CHANGED Viewed

@@ -3,95 +3,95 @@ import os
 from copy import deepcopy
 import numpy as np
-import torch
-import torchvision.transforms as T
 from FlagEmbedding import BGEM3FlagModel
 from marker.config.parser import ConfigParser
 from marker.converters.pdf import PdfConverter
 from marker.output import text_from_rendered
 from PIL import Image
-from torchvision.transforms.functional import InterpolationMode
-from transformers import AutoFeatureExtractor, AutoModel
 from utils.src.presentation import Presentation, SlidePage
 from utils.src.utils import is_image_path, pjoin
-device_count = torch.cuda.device_count()
-def prs_dedup(
-    presentation: Presentation,
-    model: BGEM3FlagModel,
-    batchsize: int = 32,
-    threshold: float = 0.8,
-) -> list[SlidePage]:
-    """
-    Deduplicate slides in a presentation based on text similarity.
-    Args:
-        presentation (Presentation): The presentation object containing slides.
-        model: The model used for generating text embeddings.
-        batchsize (int): The batch size for processing slides.
-        threshold (float): The similarity threshold for deduplication.
-    Returns:
-        list: A list of removed duplicate slides.
-    """
-    text_embeddings = get_text_embedding(
-        [i.to_text() for i in presentation.slides], model, batchsize
-    )
-    pre_embedding = text_embeddings[0]
-    slide_idx = 1
-    duplicates = []
-    while slide_idx < len(presentation):
-        cur_embedding = text_embeddings[slide_idx]
-        if torch.cosine_similarity(pre_embedding, cur_embedding, -1) > threshold:
-            duplicates.append(slide_idx - 1)
-        slide_idx += 1
-        pre_embedding = cur_embedding
-    return [presentation.slides.pop(i) for i in reversed(duplicates)]
-def get_text_model(device: str = None) -> BGEM3FlagModel:
-    """
-    Initialize and return a text model.
-    Args:
-        device (str): The device to run the model on.
-    Returns:
-        BGEM3FlagModel: The initialized text model.
-    """
-    return BGEM3FlagModel(
-        "BAAI/bge-m3",
-        use_fp16=True,
-        device=device,
-    )
-def get_image_model(device: str = None):
-    """
-    Initialize and return an image model and its feature extractor.
-    Args:
-        device (str): The device to run the model on.
-    Returns:
-        tuple: A tuple containing the feature extractor and the image model.
-    """
-    model_base = "google/vit-base-patch16-224-in21k"
-    return (
-        AutoFeatureExtractor.from_pretrained(
-            model_base,
-            torch_dtype=torch.float16,
-            device_map=device,
-        ),
-        AutoModel.from_pretrained(
-            model_base,
-            torch_dtype=torch.float16,
-            device_map=device,
-        ).eval(),
-    )
 def parse_pdf(
@@ -140,158 +140,158 @@ def parse_pdf(
     return full_text
-def get_text_embedding(
-    text: list[str], model: BGEM3FlagModel, batchsize: int = 32
-) -> list[torch.Tensor]:
-    """
-    Generate text embeddings for a list of text strings.
-    Args:
-        text (list[str]): A list of text strings.
-        model: The model used for generating embeddings.
-        batchsize (int): The batch size for processing text.
-    Returns:
-        list: A list of text embeddings.
-    """
-    if isinstance(text, str):
-        return torch.tensor(model.encode(text)["dense_vecs"]).to(model.device)
-    result = []
-    for i in range(0, len(text), batchsize):
-        result.extend(
-            torch.tensor(model.encode(text[i : i + batchsize])["dense_vecs"]).to(
-                model.device
-            )
-        )
-    return result
-def get_image_embedding(
-    image_dir: str, extractor, model, batchsize: int = 16
-) -> dict[str, torch.Tensor]:
-    """
-    Generate image embeddings for images in a directory.
-    Args:
-        image_dir (str): The directory containing images.
-        extractor: The feature extractor for images.
-        model: The model used for generating embeddings.
-        batchsize (int): The batch size for processing images.
-    Returns:
-        dict: A dictionary mapping image filenames to their embeddings.
-    """
-    transform = T.Compose(
-        [
-            T.Resize(int((256 / 224) * extractor.size["height"])),
-            T.CenterCrop(extractor.size["height"]),
-            T.ToTensor(),
-            T.Normalize(mean=extractor.image_mean, std=extractor.image_std),
-        ]
-    )
-    inputs = []
-    embeddings = []
-    images = [i for i in sorted(os.listdir(image_dir)) if is_image_path(i)]
-    for file in images:
-        image = Image.open(pjoin(image_dir, file)).convert("RGB")
-        inputs.append(transform(image))
-        if len(inputs) % batchsize == 0 or file == images[-1]:
-            batch = {"pixel_values": torch.stack(inputs).to(model.device)}
-            embeddings.extend(model(**batch).last_hidden_state.detach())
-            inputs.clear()
-    return {image: embedding.flatten() for image, embedding in zip(images, embeddings)}
-def images_cosine_similarity(embeddings: list[torch.Tensor]) -> torch.Tensor:
-    """
-    Calculate the cosine similarity matrix for a list of embeddings.
-    Args:
-        embeddings (list[torch.Tensor]): A list of image embeddings.
-    Returns:
-        torch.Tensor: A NxN similarity matrix.
-    """
-    embeddings = [embedding for embedding in embeddings]
-    sim_matrix = torch.zeros((len(embeddings), len(embeddings)))
-    for i in range(len(embeddings)):
-        for j in range(i + 1, len(embeddings)):
-            sim_matrix[i, j] = sim_matrix[j, i] = torch.cosine_similarity(
-                embeddings[i], embeddings[j], -1
-            )
-    return sim_matrix
 IMAGENET_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_STD = (0.229, 0.224, 0.225)
-def average_distance(
-    similarity: torch.Tensor, idx: int, cluster_idx: list[int]
-) -> float:
-    """
-    Calculate the average distance between a point (idx) and a cluster (cluster_idx).
-    Args:
-        similarity (np.ndarray): The similarity matrix.
-        idx (int): The index of the point.
-        cluster_idx (list): The indices of the cluster.
-    Returns:
-        float: The average distance.
-    """
-    if idx in cluster_idx:
-        return 0
-    total_similarity = 0
-    for idx_in_cluster in cluster_idx:
-        total_similarity += similarity[idx, idx_in_cluster]
-    return total_similarity / len(cluster_idx)
-def get_cluster(similarity: np.ndarray, sim_bound: float = 0.65):
-    """
-    Cluster points based on similarity.
-    Args:
-        similarity (np.ndarray): The similarity matrix.
-        sim_bound (float): The similarity threshold for clustering.
-    Returns:
-        list: A list of clusters.
-    """
-    num_points = similarity.shape[0]
-    clusters = []
-    sim_copy = deepcopy(similarity)
-    added = [False] * num_points
-    while True:
-        max_avg_dist = sim_bound
-        best_cluster = None
-        best_point = None
-        for c in clusters:
-            for point_idx in range(num_points):
-                if added[point_idx]:
-                    continue
-                avg_dist = average_distance(sim_copy, point_idx, c)
-                if avg_dist > max_avg_dist:
-                    max_avg_dist = avg_dist
-                    best_cluster = c
-                    best_point = point_idx
-        if best_point is not None:
-            best_cluster.append(best_point)
-            added[best_point] = True
-            similarity[best_point, :] = 0
-            similarity[:, best_point] = 0
-        else:
-            if similarity.max() < sim_bound:
-                break
-            i, j = np.unravel_index(np.argmax(similarity), similarity.shape)
-            clusters.append([int(i), int(j)])
-            added[i] = True
-            added[j] = True
-            similarity[i, :] = 0
-            similarity[:, i] = 0
-            similarity[j, :] = 0
-            similarity[:, j] = 0
-    return clusters

 from copy import deepcopy
 import numpy as np
+# import torch
+# import torchvision.transforms as T
 from FlagEmbedding import BGEM3FlagModel
 from marker.config.parser import ConfigParser
 from marker.converters.pdf import PdfConverter
 from marker.output import text_from_rendered
 from PIL import Image
+# from torchvision.transforms.functional import InterpolationMode
+# from transformers import AutoFeatureExtractor, AutoModel
 from utils.src.presentation import Presentation, SlidePage
 from utils.src.utils import is_image_path, pjoin
+# device_count = torch.cuda.device_count()
+# def prs_dedup(
+#     presentation: Presentation,
+#     model: BGEM3FlagModel,
+#     batchsize: int = 32,
+#     threshold: float = 0.8,
+# ) -> list[SlidePage]:
+#     """
+#     Deduplicate slides in a presentation based on text similarity.
+#     Args:
+#         presentation (Presentation): The presentation object containing slides.
+#         model: The model used for generating text embeddings.
+#         batchsize (int): The batch size for processing slides.
+#         threshold (float): The similarity threshold for deduplication.
+#     Returns:
+#         list: A list of removed duplicate slides.
+#     """
+#     text_embeddings = get_text_embedding(
+#         [i.to_text() for i in presentation.slides], model, batchsize
+#     )
+#     pre_embedding = text_embeddings[0]
+#     slide_idx = 1
+#     duplicates = []
+#     while slide_idx < len(presentation):
+#         cur_embedding = text_embeddings[slide_idx]
+#         if torch.cosine_similarity(pre_embedding, cur_embedding, -1) > threshold:
+#             duplicates.append(slide_idx - 1)
+#         slide_idx += 1
+#         pre_embedding = cur_embedding
+#     return [presentation.slides.pop(i) for i in reversed(duplicates)]
+# def get_text_model(device: str = None) -> BGEM3FlagModel:
+#     """
+#     Initialize and return a text model.
+#     Args:
+#         device (str): The device to run the model on.
+#     Returns:
+#         BGEM3FlagModel: The initialized text model.
+#     """
+#     return BGEM3FlagModel(
+#         "BAAI/bge-m3",
+#         use_fp16=True,
+#         device=device,
+#     )
+# def get_image_model(device: str = None):
+#     """
+#     Initialize and return an image model and its feature extractor.
+#     Args:
+#         device (str): The device to run the model on.
+#     Returns:
+#         tuple: A tuple containing the feature extractor and the image model.
+#     """
+#     model_base = "google/vit-base-patch16-224-in21k"
+#     return (
+#         AutoFeatureExtractor.from_pretrained(
+#             model_base,
+#             torch_dtype=torch.float16,
+#             device_map=device,
+#         ),
+#         AutoModel.from_pretrained(
+#             model_base,
+#             torch_dtype=torch.float16,
+#             device_map=device,
+#         ).eval(),
+#     )
 def parse_pdf(
     return full_text
+# def get_text_embedding(
+#     text: list[str], model: BGEM3FlagModel, batchsize: int = 32
+# ) -> list[torch.Tensor]:
+#     """
+#     Generate text embeddings for a list of text strings.
+#     Args:
+#         text (list[str]): A list of text strings.
+#         model: The model used for generating embeddings.
+#         batchsize (int): The batch size for processing text.
+#     Returns:
+#         list: A list of text embeddings.
+#     """
+#     if isinstance(text, str):
+#         return torch.tensor(model.encode(text)["dense_vecs"]).to(model.device)
+#     result = []
+#     for i in range(0, len(text), batchsize):
+#         result.extend(
+#             torch.tensor(model.encode(text[i : i + batchsize])["dense_vecs"]).to(
+#                 model.device
+#             )
+#         )
+#     return result
+# def get_image_embedding(
+#     image_dir: str, extractor, model, batchsize: int = 16
+# ) -> dict[str, torch.Tensor]:
+#     """
+#     Generate image embeddings for images in a directory.
+#     Args:
+#         image_dir (str): The directory containing images.
+#         extractor: The feature extractor for images.
+#         model: The model used for generating embeddings.
+#         batchsize (int): The batch size for processing images.
+#     Returns:
+#         dict: A dictionary mapping image filenames to their embeddings.
+#     """
+#     transform = T.Compose(
+#         [
+#             T.Resize(int((256 / 224) * extractor.size["height"])),
+#             T.CenterCrop(extractor.size["height"]),
+#             T.ToTensor(),
+#             T.Normalize(mean=extractor.image_mean, std=extractor.image_std),
+#         ]
+#     )
+#     inputs = []
+#     embeddings = []
+#     images = [i for i in sorted(os.listdir(image_dir)) if is_image_path(i)]
+#     for file in images:
+#         image = Image.open(pjoin(image_dir, file)).convert("RGB")
+#         inputs.append(transform(image))
+#         if len(inputs) % batchsize == 0 or file == images[-1]:
+#             batch = {"pixel_values": torch.stack(inputs).to(model.device)}
+#             embeddings.extend(model(**batch).last_hidden_state.detach())
+#             inputs.clear()
+#     return {image: embedding.flatten() for image, embedding in zip(images, embeddings)}
+# def images_cosine_similarity(embeddings: list[torch.Tensor]) -> torch.Tensor:
+#     """
+#     Calculate the cosine similarity matrix for a list of embeddings.
+#     Args:
+#         embeddings (list[torch.Tensor]): A list of image embeddings.
+#     Returns:
+#         torch.Tensor: A NxN similarity matrix.
+#     """
+#     embeddings = [embedding for embedding in embeddings]
+#     sim_matrix = torch.zeros((len(embeddings), len(embeddings)))
+#     for i in range(len(embeddings)):
+#         for j in range(i + 1, len(embeddings)):
+#             sim_matrix[i, j] = sim_matrix[j, i] = torch.cosine_similarity(
+#                 embeddings[i], embeddings[j], -1
+#             )
+#     return sim_matrix
 IMAGENET_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_STD = (0.229, 0.224, 0.225)
+# def average_distance(
+#     similarity: torch.Tensor, idx: int, cluster_idx: list[int]
+# ) -> float:
+#     """
+#     Calculate the average distance between a point (idx) and a cluster (cluster_idx).
+#     Args:
+#         similarity (np.ndarray): The similarity matrix.
+#         idx (int): The index of the point.
+#         cluster_idx (list): The indices of the cluster.
+#     Returns:
+#         float: The average distance.
+#     """
+#     if idx in cluster_idx:
+#         return 0
+#     total_similarity = 0
+#     for idx_in_cluster in cluster_idx:
+#         total_similarity += similarity[idx, idx_in_cluster]
+#     return total_similarity / len(cluster_idx)
+# def get_cluster(similarity: np.ndarray, sim_bound: float = 0.65):
+#     """
+#     Cluster points based on similarity.
+#     Args:
+#         similarity (np.ndarray): The similarity matrix.
+#         sim_bound (float): The similarity threshold for clustering.
+#     Returns:
+#         list: A list of clusters.
+#     """
+#     num_points = similarity.shape[0]
+#     clusters = []
+#     sim_copy = deepcopy(similarity)
+#     added = [False] * num_points
+#     while True:
+#         max_avg_dist = sim_bound
+#         best_cluster = None
+#         best_point = None
+#         for c in clusters:
+#             for point_idx in range(num_points):
+#                 if added[point_idx]:
+#                     continue
+#                 avg_dist = average_distance(sim_copy, point_idx, c)
+#                 if avg_dist > max_avg_dist:
+#                     max_avg_dist = avg_dist
+#                     best_cluster = c
+#                     best_point = point_idx
+#         if best_point is not None:
+#             best_cluster.append(best_point)
+#             added[best_point] = True
+#             similarity[best_point, :] = 0
+#             similarity[:, best_point] = 0
+#         else:
+#             if similarity.max() < sim_bound:
+#                 break
+#             i, j = np.unravel_index(np.argmax(similarity), similarity.shape)
+#             clusters.append([int(i), int(j)])
+#             added[i] = True
+#             added[j] = True
+#             similarity[i, :] = 0
+#             similarity[:, i] = 0
+#             similarity[j, :] = 0
+#             similarity[:, j] = 0
+#     return clusters

app.py CHANGED Viewed

@@ -1,22 +1,24 @@
-import gradio as gr
 import subprocess, shutil, os, zipfile, datetime
 from pathlib import Path
 ROOT = Path(__file__).resolve().parent
 OUTPUT_DIR = ROOT / "output"
 ZIP_PATH = ROOT / "output.zip"
 LOG_PATH = ROOT / "last_run.log"
-def run_pipeline(model_name_t, model_name_v, result_dir, paper_latex_root, arxiv_url, openai_key, gemini_key):
     start_time = datetime.datetime.now()
     logs = [f"🚀 Starting pipeline at {start_time.strftime('%Y-%m-%d %H:%M:%S')}\n"]
-    # 🧩 确保 output 目录存在（避免 No output generated）
-    if not OUTPUT_DIR.exists():
-        OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
-        logs.append(f"📁 Created output directory: {OUTPUT_DIR}\n")
-    # 🧹 清理旧输出（但保留空目录）
     for item in OUTPUT_DIR.iterdir():
         if item.is_dir():
             shutil.rmtree(item)
@@ -24,26 +26,74 @@ def run_pipeline(model_name_t, model_name_v, result_dir, paper_latex_root, arxiv
             item.unlink()
     if ZIP_PATH.exists():
         ZIP_PATH.unlink()
-    logs.append("🧹 Cleaned previous output and zip files.\n")
-    # 构造命令
     cmd = [
         "python", "pipeline.py",
-        "--model_name_t", model_name_t,
-        "--model_name_v", model_name_v,
-        "--result_dir", result_dir,
-        "--paper_latex_root", paper_latex_root,
-        "--arxiv_url", arxiv_url,
     ]
-    # 临时设置 API keys（供 pipeline 内部使用）
-    os.environ["OPENAI_API_KEY"] = openai_key or ""
-    os.environ["GEMINI_API_KEY"] = gemini_key or ""
-    logs.append(f"🧠 Running command: {' '.join(cmd)}\n")
     try:
-        # 同时捕获 stdout + stderr
         result = subprocess.run(
             cmd, capture_output=True, text=True, timeout=1800
         )
@@ -55,21 +105,20 @@ def run_pipeline(model_name_t, model_name_v, result_dir, paper_latex_root, arxiv
         msg = "❌ Pipeline timed out (30 min limit)."
         logs.append(msg)
         _write_logs(logs)
-        return msg, None
     except Exception as e:
         msg = f"❌ Pipeline error: {e}"
         logs.append(msg)
         _write_logs(logs)
-        return msg, None
-    # 检查输出目录
     if not any(OUTPUT_DIR.iterdir()):
         msg = "❌ No output generated. Please check logs below."
         logs.append(msg)
         _write_logs(logs)
         return "\n".join(logs), None
-    # 压缩 output 文件夹
     with zipfile.ZipFile(ZIP_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf:
         for root, dirs, files in os.walk(OUTPUT_DIR):
             for file in files:
@@ -81,14 +130,11 @@ def run_pipeline(model_name_t, model_name_v, result_dir, paper_latex_root, arxiv
     end_time = datetime.datetime.now()
     logs.append(f"🏁 Completed at {end_time.strftime('%Y-%m-%d %H:%M:%S')} (Duration: {(end_time - start_time).seconds}s)\n")
-    # 保存日志到文件
     _write_logs(logs)
     return "\n".join(logs), ZIP_PATH
 def _write_logs(logs):
-    """将日志写入文件，便于 HF Logs 窗口调试"""
     with open(LOG_PATH, "w", encoding="utf-8") as f:
         f.write("\n".join(logs))
@@ -97,20 +143,20 @@ def _write_logs(logs):
 iface = gr.Interface(
     fn=run_pipeline,
     inputs=[
-        gr.Textbox(label="Model Name (Text)", value="gpt-4.1"),
-        gr.Textbox(label="Model Name (Vision)", value="gpt-4.1"),
-        gr.Textbox(label="Result Dir", value="output"),
-        gr.Textbox(label="Paper LaTeX Root", value="input/latex_proj"),
-        gr.Textbox(label="ArXiv URL", value="https://arxiv.org/abs/2505.21497"),
-        gr.Textbox(label="OpenAI API Key", placeholder="sk-...", type="password"),
-        gr.Textbox(label="Gemini API Key", placeholder="AIza...", type="password"),
     ],
     outputs=[
-        gr.Textbox(label="Logs", lines=30, max_lines=50),
-        gr.File(label="Download Output (.zip)")
     ],
     title="📄 PaperShow Pipeline",
-    description="输入 arXiv 链接和参数，自动生成 slides + poster，结果打包下载。",
     allow_flagging="never",
 )

+import gradio as gr
 import subprocess, shutil, os, zipfile, datetime
 from pathlib import Path
 ROOT = Path(__file__).resolve().parent
 OUTPUT_DIR = ROOT / "output"
+INPUT_DIR = ROOT / "input"
+LOGO_DIR = INPUT_DIR / "logo"
+POSTER_LATEX_DIR = ROOT / "posterbuilder" / "latex_proj"
 ZIP_PATH = ROOT / "output.zip"
 LOG_PATH = ROOT / "last_run.log"
+def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
     start_time = datetime.datetime.now()
     logs = [f"🚀 Starting pipeline at {start_time.strftime('%Y-%m-%d %H:%M:%S')}\n"]
+    # ====== 目录准备 ======
+    for d in [OUTPUT_DIR, LOGO_DIR, POSTER_LATEX_DIR, INPUT_DIR]:
+        d.mkdir(parents=True, exist_ok=True)
+    # 清理旧输出
     for item in OUTPUT_DIR.iterdir():
         if item.is_dir():
             shutil.rmtree(item)
             item.unlink()
     if ZIP_PATH.exists():
         ZIP_PATH.unlink()
+    logs.append("🧹 Cleaned previous output.\n")
+    # ====== 校验：必须上传 LOGO ======
+    # Gradio 可能返回单个文件对象或列表，这里统一成列表处理
+    if logo_files is None:
+        logo_files = []
+    if not isinstance(logo_files, (list, tuple)):
+        logo_files = [logo_files]
+    logo_files = [f for f in logo_files if f]  # 过滤掉 None
+    if len(logo_files) == 0:
+        msg = "❌ 必须上传作者所属机构 Logo（可多张）。"
+        logs.append(msg)
+        _write_logs(logs)
+        return "\n".join(logs), None
+    # 清空 input/logo 后再保存
+    for item in LOGO_DIR.iterdir():
+        if item.is_file():
+            item.unlink()
+    saved_logo_paths = []
+    for lf in logo_files:
+        p = LOGO_DIR / Path(lf.name).name
+        shutil.copy(lf.name, p)
+        saved_logo_paths.append(p)
+    logs.append(f"🏷️ Saved {len(saved_logo_paths)} logo file(s) to: {LOGO_DIR}\n")
+    # ====== 处理上传 PDF（可选） ======
+    pdf_path = None
+    if pdf_file:
+        pdf_dir = INPUT_DIR / "pdf"
+        pdf_dir.mkdir(parents=True, exist_ok=True)
+        pdf_path = pdf_dir / Path(pdf_file.name).name
+        shutil.copy(pdf_file.name, pdf_path)
+        logs.append(f"📄 Uploaded PDF saved to: {pdf_path}\n")
+        # 为 pipeline 的 Step 1.5 兼容：额外复制到 input/paper.pdf
+        canonical_pdf = INPUT_DIR / "paper.pdf"
+        shutil.copy(pdf_file.name, canonical_pdf)
+        logs.append(f"🔁 Also copied PDF to: {canonical_pdf}\n")
+    # ====== 校验输入来源 ======
+    if not arxiv_url and not pdf_file:
+        msg = "❌ 请提供 arXiv 链接或上传 PDF 文件（二选一）。"
+        logs.append(msg)
+        _write_logs(logs)
+        return "\n".join(logs), None
+    # ====== 构造命令 ======
     cmd = [
         "python", "pipeline.py",
+        "--model_name_t", "gpt-5",
+        "--model_name_v", "gpt-5",
+        "--result_dir", "output",
+        "--paper_latex_root", "input/latex_proj",
+        "--openai_key", openai_key,
+        "--gemini_key", "AIzaSyA1wVVdlYAVs3FULSmCVD1Noulwrq7zqeo",
+        "--logo_dir", str(LOGO_DIR)  # 👈 新增：把 logo 目录传入
     ]
+    if arxiv_url:
+        cmd += ["--arxiv_url", arxiv_url]
+    if pdf_path:
+        cmd += ["--pdf_path", str(pdf_path)]
+    logs.append(f"🧠 Running command:\n{' '.join(cmd)}\n")
     try:
         result = subprocess.run(
             cmd, capture_output=True, text=True, timeout=1800
         )
         msg = "❌ Pipeline timed out (30 min limit)."
         logs.append(msg)
         _write_logs(logs)
+        return "\n".join(logs), None
     except Exception as e:
         msg = f"❌ Pipeline error: {e}"
         logs.append(msg)
         _write_logs(logs)
+        return "\n".join(logs), None
+    # ====== 检查输出 & 打包 ======
     if not any(OUTPUT_DIR.iterdir()):
         msg = "❌ No output generated. Please check logs below."
         logs.append(msg)
         _write_logs(logs)
         return "\n".join(logs), None
     with zipfile.ZipFile(ZIP_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf:
         for root, dirs, files in os.walk(OUTPUT_DIR):
             for file in files:
     end_time = datetime.datetime.now()
     logs.append(f"🏁 Completed at {end_time.strftime('%Y-%m-%d %H:%M:%S')} (Duration: {(end_time - start_time).seconds}s)\n")
     _write_logs(logs)
     return "\n".join(logs), ZIP_PATH
 def _write_logs(logs):
     with open(LOG_PATH, "w", encoding="utf-8") as f:
         f.write("\n".join(logs))
 iface = gr.Interface(
     fn=run_pipeline,
     inputs=[
+        gr.Textbox(label="📘 ArXiv URL（二选一）", placeholder="https://arxiv.org/abs/2505.xxxxx"),
+        gr.File(label="📄 上传 PDF（二选一）"),
+        gr.Textbox(label="🔑 OpenAI API Key", placeholder="sk-...", type="password"),
+        gr.File(label="🏷️ 上传作者所属机构 Logo（必选，可多文件）", file_count="multiple", file_types=["image"]),
     ],
     outputs=[
+        gr.Textbox(label="🧾 Logs", lines=30, max_lines=50),
+        gr.File(label="📦 下载生成结果 (.zip)")
     ],
     title="📄 PaperShow Pipeline",
+    description=(
+        "必须上传机构 Logo（可多张）。\n"
+        "可输入 arXiv 链接或上传 PDF（二选一），系统将生成 Poster 并打包下载。"
+    ),
     allow_flagging="never",
 )

pipeline.py CHANGED Viewed

@@ -6,6 +6,7 @@ import subprocess
 from os import path
 from pdf2image import convert_from_path
 from pathlib import Path
 print("Initializing...")
@@ -54,8 +55,8 @@ def run_paper2poster_content_build():
     cmd = [
         sys.executable, "-m", "PosterAgent.new_pipeline",
         f'--poster_path={dst_pdf.relative_to(P2P_ROOT)}',
-        '--model_name_t=4o',
-        '--model_name_v=4o',
         '--poster_width_inches=48',
         '--poster_height_inches=36'
     ]
@@ -87,11 +88,104 @@ def run_paper2poster_content_build():
     print("   📦 JSON copied & renamed.")
     print("   ✅ Step 1.5 done.\n")
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Paper2Video Generation Pipeline')
-    parser.add_argument('--result_dir', type=str, default='./result/zeyu')
-    parser.add_argument('--model_name_t', type=str, default='gpt-4.1')
-    parser.add_argument('--model_name_v', type=str, default='gpt-4.1')
     parser.add_argument('--paper_latex_root', type=str, default=str(P2V_ASSETS))
     parser.add_argument('--ref_text', type=str, default=None)
     parser.add_argument('--if_tree_search', type=bool, default=True)
@@ -100,7 +194,7 @@ if __name__ == '__main__':
     parser.add_argument('--arxiv_url', type=str, default=None)
     parser.add_argument('--openai_key', type=str, required=True, help='Your OpenAI API key')
     parser.add_argument('--gemini_key', type=str, required=True, help='Your Gemini API key')
     args = parser.parse_args()
     print("start")
@@ -166,44 +260,44 @@ if __name__ == '__main__':
     # =========================
     # Step 1: Slide Generation
     # =========================
-    try:
-        print("🧩 Step 1: Generating Slides ...")
-        slide_latex_path = path.join(args.paper_latex_root, "slides.tex")
-        slide_image_dir = path.join(args.result_dir, 'slide_imgs')
-        os.makedirs(slide_image_dir, exist_ok=True)
-        start_time = time.time()
-        prompt_path = "./Paper2Video/src/prompts/slide_beamer_prompt.txt"
-        if args.if_tree_search:
-            usage_slide, beamer_path = latex_code_gen(
-                prompt_path=prompt_path,
-                tex_dir=args.paper_latex_root,
-                beamer_save_path=slide_latex_path,
-                model_config_ll=get_agent_config(args.model_name_t),
-                model_config_vl=get_agent_config(args.model_name_v),
-                beamer_temp_name=args.beamer_templete_prompt
-            )
-        else:
-            paper_latex_path = path.join(args.paper_latex_root, "main.tex")
-            usage_slide = latex_code_gen(
-                prompt_path=prompt_path,
-                tex_dir=args.paper_latex_root,
-                tex_path=paper_latex_path,
-                beamer_save_path=slide_latex_path,
-                model_config=get_agent_config(args.model_name_t)
-            )
-            beamer_path = slide_latex_path
-        if not os.path.exists(beamer_path):
-            raise FileNotFoundError(f"❌ Beamer PDF not found: {beamer_path}")
-        slide_imgs = convert_from_path(beamer_path, dpi=400)
-        for i, img in enumerate(slide_imgs):
-            img.save(path.join(slide_image_dir, f"{i+1}.png"))
-        print("✅ Step 1 done.")
-    except Exception as e:
-        print(f"❌ Step 1 failed: {e}")
     # =========================
     # Step 1.5: Poster2Poster 内容生成
@@ -224,13 +318,47 @@ if __name__ == '__main__':
         print(f"❌ Step 2 failed: {e}")
     # =========================
-    # Step 3: 导出 latex_proj
     # =========================
     try:
         src_lp = PB_ROOT / "latex_proj"
         dst_lp = ROOT_DIR / "output" / "poster_latex_proj"
         copytree_overwrite(src_lp, dst_lp)
         print(f"📦 Exported LaTeX project → {dst_lp.relative_to(ROOT_DIR)}")
     except Exception as e:
         print(f"❌ Step 3 failed: {e}")

 from os import path
 from pdf2image import convert_from_path
 from pathlib import Path
+from PIL import Image
 print("Initializing...")
     cmd = [
         sys.executable, "-m", "PosterAgent.new_pipeline",
         f'--poster_path={dst_pdf.relative_to(P2P_ROOT)}',
+        '--model_name_t=gpt-5',
+        '--model_name_v=gpt-5',
         '--poster_width_inches=48',
         '--poster_height_inches=36'
     ]
     print("   📦 JSON copied & renamed.")
     print("   ✅ Step 1.5 done.\n")
+def _list_logo_files(logo_dir: Path):
+    exts = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tif", ".tiff"}
+    files = []
+    if logo_dir.exists():
+        for p in sorted(logo_dir.iterdir()):
+            if p.suffix.lower() in exts and p.is_file():
+                files.append(p)
+    return files
+def _compose_logos_horizontally(logo_paths, out_path: Path, box_w=2000, box_h=476, gap=16):
+    """
+    宽度为硬约束：输出图像宽度必为 box_w（默认 2000px）。
+    多 logo 按比例统一缩放，拼接后刚好占满 box_w（包含间距）。
+    高度由比例自然决定，可能 < box_h，也可能 > box_h（甚至 > 2*box_h），不会再二次压缩。
+    透明背景，输出 PNG。
+    """
+    # 读取图片
+    imgs = []
+    for p in logo_paths:
+        p = Path(p)
+        if p.exists() and p.is_file():
+            imgs.append(Image.open(p).convert("RGBA"))
+    n = len(imgs)
+    if n == 0:
+        raise RuntimeError("No logo images found.")
+    # 原始总宽度（不含 gap）；拼接总宽 = sum(w_i) + gap*(n-1)
+    widths  = [im.width for im in imgs]
+    heights = [im.height for im in imgs]
+    sum_w   = sum(widths)
+    if sum_w <= 0:
+        raise RuntimeError("All logo images have zero width.")
+    # 计算统一缩放比例，使：sum(w_i * s) + gap*(n-1) == box_w
+    # => s = (box_w - gap*(n-1)) / sum_w
+    total_gap = max(0, gap * (n - 1))
+    if box_w <= total_gap:
+        raise ValueError(f"box_w({box_w}) too small vs total gaps({total_gap}). Increase box_w or reduce gap.")
+    s = (box_w - total_gap) / float(sum_w)
+    # 按统一比例缩放（四舍五入到整数像素，避免累计误差）
+    resized = []
+    scaled_widths = []
+    scaled_heights = []
+    for im, w, h in zip(imgs, widths, heights):
+        nw = max(1, int(round(w * s)))
+        nh = max(1, int(round(h * s)))
+        resized.append(im.resize((nw, nh), Image.LANCZOS))
+        scaled_widths.append(nw)
+        scaled_heights.append(nh)
+    # 由于整数取整，可能出现总宽 !=  box_w - total_gap；对若干图微调 1px 以精确对齐
+    current_sum_w = sum(scaled_widths)
+    diff = (box_w - total_gap) - current_sum_w
+    # 按从宽到窄/从大到小顺序均匀分配像素误差
+    if diff != 0:
+        order = sorted(range(n), key=lambda i: scaled_widths[i], reverse=(diff > 0))
+        idx = 0
+        step = 1 if diff > 0 else -1
+        remaining = abs(diff)
+        while remaining > 0 and n > 0:
+            i = order[idx % n]
+            new_w = scaled_widths[i] + step
+            if new_w >= 1:
+                scaled_widths[i] = new_w
+                resized[i] = resized[i].resize((new_w, resized[i].height), Image.LANCZOS)
+                remaining -= 1
+            idx += 1
+    # 计算最终尺寸
+    total_w = sum(scaled_widths) + total_gap
+    assert total_w == box_w, f"width pack mismatch: got {total_w}, expect {box_w}"
+    canvas_w = box_w
+    canvas_h = max(im.height for im in resized)  # 高度由比例自然决定（可能 > 2*box_h）
+    # 画布 & 居中摆放（垂直方向居中）
+    canvas = Image.new("RGBA", (canvas_w, canvas_h), (0, 0, 0, 0))
+    cur_x = 0
+    for idx, im in enumerate(resized):
+        y = (canvas_h - im.height) // 2
+        canvas.alpha_composite(im, (cur_x, y))
+        cur_x += im.width
+        if idx != n - 1:
+            cur_x += gap
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    canvas.save(out_path, format="PNG")
+    print(f"   🧩 Logos composed (width-locked) → {out_path.relative_to(ROOT_DIR)} "
+          f"(n={n}, final_size={canvas_w}x{canvas_h})")
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Paper2Video Generation Pipeline')
+    parser.add_argument('--result_dir', type=str, default='output')
+    parser.add_argument('--model_name_t', type=str, default='gpt-5')
+    parser.add_argument('--model_name_v', type=str, default='gpt-5')
     parser.add_argument('--paper_latex_root', type=str, default=str(P2V_ASSETS))
     parser.add_argument('--ref_text', type=str, default=None)
     parser.add_argument('--if_tree_search', type=bool, default=True)
     parser.add_argument('--arxiv_url', type=str, default=None)
     parser.add_argument('--openai_key', type=str, required=True, help='Your OpenAI API key')
     parser.add_argument('--gemini_key', type=str, required=True, help='Your Gemini API key')
+    parser.add_argument('--logo_dir', type=str, required=True, help='Directory containing uploaded logo image(s)')
     args = parser.parse_args()
     print("start")
     # =========================
     # Step 1: Slide Generation
     # =========================
+    # try:
+    #     print("🧩 Step 1: Generating Slides ...")
+    #     slide_latex_path = path.join(args.paper_latex_root, "slides.tex")
+    #     slide_image_dir = path.join(args.result_dir, 'slide_imgs')
+    #     os.makedirs(slide_image_dir, exist_ok=True)
+    #     start_time = time.time()
+    #     prompt_path = "./Paper2Video/src/prompts/slide_beamer_prompt.txt"
+    #     if args.if_tree_search:
+    #         usage_slide, beamer_path = latex_code_gen(
+    #             prompt_path=prompt_path,
+    #             tex_dir=args.paper_latex_root,
+    #             beamer_save_path=slide_latex_path,
+    #             model_config_ll=get_agent_config(args.model_name_t),
+    #             model_config_vl=get_agent_config(args.model_name_v),
+    #             beamer_temp_name=args.beamer_templete_prompt
+    #         )
+    #     else:
+    #         paper_latex_path = path.join(args.paper_latex_root, "main.tex")
+    #         usage_slide = latex_code_gen(
+    #             prompt_path=prompt_path,
+    #             tex_dir=args.paper_latex_root,
+    #             tex_path=paper_latex_path,
+    #             beamer_save_path=slide_latex_path,
+    #             model_config=get_agent_config(args.model_name_t)
+    #         )
+    #         beamer_path = slide_latex_path
+    #     if not os.path.exists(beamer_path):
+    #         raise FileNotFoundError(f"❌ Beamer PDF not found: {beamer_path}")
+    #     slide_imgs = convert_from_path(beamer_path, dpi=400)
+    #     for i, img in enumerate(slide_imgs):
+    #         img.save(path.join(slide_image_dir, f"{i+1}.png"))
+    #     print("✅ Step 1 done.")
+    # except Exception as e:
+    #     print(f"❌ Step 1 failed: {e}")
     # =========================
     # Step 1.5: Poster2Poster 内容生成
         print(f"❌ Step 2 failed: {e}")
     # =========================
+    # Step 3: 导出 latex_proj & 处理 LOGO & 应用 template
     # =========================
     try:
         src_lp = PB_ROOT / "latex_proj"
         dst_lp = ROOT_DIR / "output" / "poster_latex_proj"
         copytree_overwrite(src_lp, dst_lp)
         print(f"📦 Exported LaTeX project → {dst_lp.relative_to(ROOT_DIR)}")
+        logo_dir = Path(args.logo_dir)
+        logo_files = _list_logo_files(logo_dir)
+        if len(logo_files) == 0:
+            raise RuntimeError("❌ No logo files found in --logo_dir (must upload at least one).")
+        logos_out_dir = dst_lp / "logos"
+        logos_out_dir.mkdir(parents=True, exist_ok=True)
+        left_logo_path = logos_out_dir / "left_logo.png"
+        if len(logo_files) == 1:
+            # 单图：拷贝并转成 PNG（以确保一致）
+            im = Image.open(logo_files[0]).convert("RGBA")
+            im.save(left_logo_path, format="PNG")
+            print(f"🖼️  Single logo saved → {left_logo_path.relative_to(ROOT_DIR)}")
+        else:
+            # 多图：拼接
+            _compose_logos_horizontally(logo_files, left_logo_path, box_w=2000, box_h=476, gap=16)
+        template_dir = ROOT_DIR / "template"
+        if template_dir.exists():
+            for item in template_dir.iterdir():
+                dst_path = dst_lp / item.name
+                if item.is_dir():
+                    if dst_path.exists():
+                        shutil.rmtree(dst_path)
+                    shutil.copytree(item, dst_path)
+                else:
+                    shutil.copy2(item, dst_path)
+            print(f"📂 Copied all template files → {dst_lp.relative_to(ROOT_DIR)}")
+        else:
+            print("⚠️ template directory not found, skipping Step 3.5.")
+        print("✅ Step 3 done.")
     except Exception as e:
         print(f"❌ Step 3 failed: {e}")

posterbuilder/arrangement.json CHANGED Viewed

@@ -1,542 +1,115 @@
 {
-    "poster_width": 1200,
-    "poster_height": 900,
-    "poster_width_inches": 48.0,
-    "poster_height_inches": 36.0,
     "panels": [
         {
             "panel_id": 0,
-            "section_name": "Poster Title & Author",
-            "tp": 0.12971887550200803,
-            "text_len": 323,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.06301447323913961,
-            "rp": 2.505748069071783
         },
         {
             "panel_id": 1,
-            "section_name": "Introduction",
-            "tp": 0.1859437751004016,
-            "text_len": 463,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.08063905395956796,
-            "rp": 2.359873888191933
         },
         {
             "panel_id": 2,
-            "section_name": "Benchmark & Metrics",
-            "tp": 0.15903614457831325,
-            "text_len": 396,
-            "gp": 0.016682202105281593,
-            "figure_size": 64769,
-            "figure_aspect": 0.8819188191881919,
-            "sp": 0.07756528917306713,
-            "rp": 2.386019900332315
         },
         {
             "panel_id": 3,
-            "section_name": "PosterAgent Framework",
-            "tp": 0.1859437751004016,
-            "text_len": 463,
-            "gp": 0.49217196764679444,
             "figure_size": 1910868,
-            "figure_aspect": 2.0350877192982457,
-            "sp": 0.23879941088764153,
-            "rp": 1.0716273641356449
         },
         {
             "panel_id": 4,
-            "section_name": "Evaluation & Results",
-            "tp": 0.1859437751004016,
-            "text_len": 463,
-            "gp": 0.49114583024792396,
-            "figure_size": 1906884,
-            "figure_aspect": 2.0434782608695654,
-            "sp": 0.23846965976825418,
-            "rp": 1.0743132504197004
-        },
-        {
-            "panel_id": 5,
-            "section_name": "Conclusion",
-            "tp": 0.1534136546184739,
-            "text_len": 382,
             "gp": 0,
             "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.07044197511417727,
-            "rp": 2.4442725214152747
-        }
-    ],
-    "panel_arrangement": [
-        {
-            "panel_name": "Poster Title & Author",
-            "panel_id": 0,
-            "x": 0,
-            "y": 0,
-            "width": 1200,
-            "height": 90.0
-        },
-        {
-            "panel_name": "Introduction",
-            "panel_id": 1,
-            "x": 0,
-            "y": 90.0,
-            "width": 550.621296168701,
-            "height": 201.65362571734266
-        },
-        {
-            "panel_name": "Benchmark & Metrics",
-            "panel_id": 2,
-            "x": 550.621296168701,
-            "y": 90.0,
-            "width": 529.6329503516805,
-            "height": 201.65362571734266
-        },
-        {
-            "panel_name": "PosterAgent Framework",
-            "panel_id": 3,
-            "x": 0,
-            "y": 291.65362571734266,
-            "width": 540.5003037876063,
-            "height": 608.3463742826573
-        },
-        {
-            "panel_name": "Evaluation & Results",
-            "panel_id": 4,
-            "x": 540.5003037876063,
-            "y": 291.65362571734266,
-            "width": 539.7539427327752,
-            "height": 608.3463742826573
-        },
-        {
-            "panel_name": "Conclusion",
-            "panel_id": 5,
-            "x": 1080.2542465203815,
-            "y": 90.0,
-            "width": 119.74575347961854,
-            "height": 810.0
-        }
-    ],
-    "figure_arrangement": [
-        {
-            "panel_id": 2,
-            "x": 763.672586975783,
-            "y": 132.13072514346854,
-            "width": 103.53036873751637,
-            "height": 117.39217543040559,
-            "figure_id": 0,
-            "figure_name": "p<Benchmark & Metrics>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-1.png"
-        },
-        {
-            "panel_id": 3,
-            "x": 56.45003037876063,
-            "y": 490.76985659696936,
-            "width": 427.60024303008504,
-            "height": 210.11391252340385,
-            "figure_id": 0,
-            "figure_name": "p<PosterAgent Framework>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-8.png"
-        },
-        {
-            "panel_id": 4,
-            "x": 596.8756980608838,
-            "y": 491.34731768544725,
-            "width": 427.0031541862202,
-            "height": 208.95899034644816,
-            "figure_id": 0,
-            "figure_name": "p<Evaluation & Results>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-table-1.png"
-        }
-    ],
-    "text_arrangement": [
-        {
-            "panel_id": 0,
-            "x": 3.0,
-            "y": 3.0,
-            "width": 1194.0,
-            "height": 37.333333333333336,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t0",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 0,
-            "x": 3.0,
-            "y": 40.333333333333336,
-            "width": 1194.0,
-            "height": 46.666666666666664,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t1",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 1,
-            "x": 3.0,
-            "y": 93.0,
-            "width": 544.621296168701,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Introduction>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 1,
-            "x": 3.0,
-            "y": 125.0,
-            "width": 544.621296168701,
-            "height": 163.65362571734266,
-            "textbox_id": 1,
-            "textbox_name": "p<Introduction>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 2,
-            "x": 553.621296168701,
-            "y": 93.0,
-            "width": 523.6329503516805,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Benchmark & Metrics>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 2,
-            "x": 553.621296168701,
-            "y": 125.0,
-            "width": 523.6329503516805,
-            "height": 7.130725143468538,
-            "textbox_id": 1,
-            "textbox_name": "p<Benchmark & Metrics>_t1",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 2,
-            "x": 553.621296168701,
-            "y": 249.52290057387413,
-            "width": 523.6329503516805,
-            "height": 39.13072514346854,
-            "textbox_id": 2,
-            "textbox_name": "p<Benchmark & Metrics>_t2",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 3,
-            "x": 3.0,
-            "y": 294.65362571734266,
-            "width": 534.5003037876063,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<PosterAgent Framework>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 3,
-            "x": 3.0,
-            "y": 326.65362571734266,
-            "width": 534.5003037876063,
-            "height": 164.1162308796267,
-            "textbox_id": 1,
-            "textbox_name": "p<PosterAgent Framework>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 3,
-            "x": 3.0,
-            "y": 700.8837691203732,
-            "width": 534.5003037876063,
-            "height": 196.11623087962676,
-            "textbox_id": 2,
-            "textbox_name": "p<PosterAgent Framework>_t2",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 4,
-            "x": 543.5003037876063,
-            "y": 294.65362571734266,
-            "width": 533.7539427327752,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Evaluation & Results>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 4,
-            "x": 543.5003037876063,
-            "y": 326.65362571734266,
-            "width": 533.7539427327752,
-            "height": 164.6936919681046,
-            "textbox_id": 1,
-            "textbox_name": "p<Evaluation & Results>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 4,
-            "x": 543.5003037876063,
-            "y": 700.3063080318955,
-            "width": 533.7539427327752,
-            "height": 196.69369196810453,
-            "textbox_id": 2,
-            "textbox_name": "p<Evaluation & Results>_t2",
-            "num_chars": 540
         },
         {
             "panel_id": 5,
-            "x": 1083.2542465203815,
-            "y": 93.0,
-            "width": 113.74575347961854,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Conclusion>_t0",
-            "num_chars": 30
         },
         {
-            "panel_id": 5,
-            "x": 1083.2542465203815,
-            "y": 125.0,
-            "width": 113.74575347961854,
-            "height": 772.0,
-            "textbox_id": 1,
-            "textbox_name": "p<Conclusion>_t1",
-            "num_chars": 420
         }
     ],
-    "panel_arrangement_inches": [
         {
-            "panel_name": "Poster Title & Author",
             "panel_id": 0,
-            "x": 0.0,
-            "y": 0.0,
-            "width": 48.0,
-            "height": 3.6
         },
         {
-            "panel_name": "Introduction",
             "panel_id": 1,
-            "x": 0.0,
-            "y": 3.6,
-            "width": 22.02485184674804,
-            "height": 8.066145028693706
         },
         {
-            "panel_name": "Benchmark & Metrics",
             "panel_id": 2,
-            "x": 22.02485184674804,
-            "y": 3.6,
-            "width": 21.18531801406722,
-            "height": 8.066145028693706
         },
         {
-            "panel_name": "PosterAgent Framework",
             "panel_id": 3,
-            "x": 0.0,
-            "y": 11.666145028693707,
-            "width": 21.620012151504252,
-            "height": 24.33385497130629
-        },
-        {
-            "panel_name": "Evaluation & Results",
-            "panel_id": 4,
-            "x": 21.620012151504252,
-            "y": 11.666145028693707,
-            "width": 21.590157709311008,
-            "height": 24.33385497130629
         },
         {
-            "panel_name": "Conclusion",
             "panel_id": 5,
-            "x": 43.210169860815256,
-            "y": 3.6,
-            "width": 4.789830139184741,
-            "height": 32.4
-        }
-    ],
-    "figure_arrangement_inches": [
-        {
-            "panel_id": 2,
-            "x": 30.54690347903132,
-            "y": 5.285229005738741,
-            "width": 4.141214749500655,
-            "height": 4.6956870172162235,
-            "figure_id": 0,
-            "figure_name": "p<Benchmark & Metrics>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-1.png"
-        },
-        {
-            "panel_id": 3,
-            "x": 2.258001215150425,
-            "y": 19.630794263878773,
-            "width": 17.1040097212034,
-            "height": 8.404556500936154,
-            "figure_id": 0,
-            "figure_name": "p<PosterAgent Framework>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-8.png"
-        },
-        {
-            "panel_id": 4,
-            "x": 23.87502792243535,
-            "y": 19.65389270741789,
-            "width": 17.080126167448807,
-            "height": 8.358359613857926,
-            "figure_id": 0,
-            "figure_name": "p<Evaluation & Results>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-table-1.png"
-        }
-    ],
-    "text_arrangement_inches": [
-        {
-            "panel_id": 0,
-            "x": 0.12,
-            "y": 0.12,
-            "width": 47.76,
-            "height": 1.4933333333333334,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t0",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 0,
-            "x": 0.12,
-            "y": 1.6133333333333335,
-            "width": 47.76,
-            "height": 1.8666666666666665,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t1",
-            "num_chars": 410
         },
         {
-            "panel_id": 1,
-            "x": 0.12,
-            "y": 3.72,
-            "width": 21.784851846748037,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Introduction>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 1,
-            "x": 0.12,
-            "y": 5.0,
-            "width": 21.784851846748037,
-            "height": 6.546145028693706,
-            "textbox_id": 1,
-            "textbox_name": "p<Introduction>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 2,
-            "x": 22.14485184674804,
-            "y": 3.72,
-            "width": 20.94531801406722,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Benchmark & Metrics>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 2,
-            "x": 22.14485184674804,
-            "y": 5.0,
-            "width": 20.94531801406722,
-            "height": 0.28522900573874155,
-            "textbox_id": 1,
-            "textbox_name": "p<Benchmark & Metrics>_t1",
-            "num_chars": 180
         },
         {
-            "panel_id": 2,
-            "x": 22.14485184674804,
-            "y": 9.980916022954965,
-            "width": 20.94531801406722,
-            "height": 1.5652290057387415,
-            "textbox_id": 2,
-            "textbox_name": "p<Benchmark & Metrics>_t2",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 3,
-            "x": 0.12,
-            "y": 11.786145028693706,
-            "width": 21.380012151504253,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<PosterAgent Framework>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 3,
-            "x": 0.12,
-            "y": 13.066145028693706,
-            "width": 21.380012151504253,
-            "height": 6.564649235185068,
-            "textbox_id": 1,
-            "textbox_name": "p<PosterAgent Framework>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 3,
-            "x": 0.12,
-            "y": 28.03535076481493,
-            "width": 21.380012151504253,
-            "height": 7.84464923518507,
-            "textbox_id": 2,
-            "textbox_name": "p<PosterAgent Framework>_t2",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 4,
-            "x": 21.740012151504253,
-            "y": 11.786145028693706,
-            "width": 21.350157709311006,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Evaluation & Results>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 4,
-            "x": 21.740012151504253,
-            "y": 13.066145028693706,
-            "width": 21.350157709311006,
-            "height": 6.587747678724184,
-            "textbox_id": 1,
-            "textbox_name": "p<Evaluation & Results>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 4,
-            "x": 21.740012151504253,
-            "y": 28.01225232127582,
-            "width": 21.350157709311006,
-            "height": 7.867747678724181,
-            "textbox_id": 2,
-            "textbox_name": "p<Evaluation & Results>_t2",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 5,
-            "x": 43.33016986081526,
-            "y": 3.72,
-            "width": 4.549830139184742,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Conclusion>_t0",
-            "num_chars": 30
-        },
-        {
-            "panel_id": 5,
-            "x": 43.33016986081526,
-            "y": 5.0,
-            "width": 4.549830139184742,
-            "height": 30.88,
-            "textbox_id": 1,
-            "textbox_name": "p<Conclusion>_t1",
-            "num_chars": 420
         }
     ]
 }

 {
     "panels": [
         {
             "panel_id": 0,
+            "section_name": "Why Posters Are Hard",
+            "tp": 0.12082710513203787,
+            "text_len": 485,
+            "gp": 0.009888851380803912,
+            "figure_size": 64769,
+            "figure_aspect": 0.8819188191881919
         },
         {
             "panel_id": 1,
+            "section_name": "Benchmark and Data",
+            "tp": 0.12531141006477328,
+            "text_len": 503,
+            "gp": 0.04796373085236436,
+            "figure_size": 314148,
+            "figure_aspect": 1.0125673249551166
         },
         {
             "panel_id": 2,
+            "section_name": "PaperQuiz: What Matters",
+            "tp": 0.11285500747384156,
+            "text_len": 453,
+            "gp": 0.1192882298865948,
+            "figure_size": 781302,
+            "figure_aspect": 5.032994923857868
         },
         {
             "panel_id": 3,
+            "section_name": "PosterAgent Pipeline",
+            "tp": 0.10637767812655705,
+            "text_len": 427,
+            "gp": 0.29174897960959734,
             "figure_size": 1910868,
+            "figure_aspect": 2.0350877192982457
         },
         {
             "panel_id": 4,
+            "section_name": "Parser: Structured Assets",
+            "tp": 0.10612855007473841,
+            "text_len": 426,
             "gp": 0,
             "figure_size": 0,
+            "figure_aspect": 1
         },
         {
             "panel_id": 5,
+            "section_name": "Planner: Layout Mastery",
+            "tp": 0.10089686098654709,
+            "text_len": 405,
+            "gp": 0.08839429109643054,
+            "figure_size": 578956,
+            "figure_aspect": 1.3959627329192548
+        },
+        {
+            "panel_id": 6,
+            "section_name": "Painter\u2013Commenter Loop",
+            "tp": 0.10662680617837568,
+            "text_len": 428,
+            "gp": 0.15157520979208358,
+            "figure_size": 992772,
+            "figure_aspect": 1.4480676328502415
+        },
+        {
+            "panel_id": 7,
+            "section_name": "Results: Stronger, Leaner",
+            "tp": 0.10986547085201794,
+            "text_len": 441,
+            "gp": 0.2911407073821255,
+            "figure_size": 1906884,
+            "figure_aspect": 2.0434782608695654
         },
         {
+            "panel_id": 8,
+            "section_name": "Limits and Next Steps",
+            "tp": 0.1111111111111111,
+            "text_len": 446,
+            "gp": 0,
+            "figure_size": 0,
+            "figure_aspect": 1
         }
     ],
+    "figure_arrangement": [
         {
             "panel_id": 0,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-1.png"
         },
         {
             "panel_id": 1,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-6.png"
         },
         {
             "panel_id": 2,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-7.png"
         },
         {
             "panel_id": 3,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-8.png"
         },
         {
             "panel_id": 5,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-30.png"
         },
         {
+            "panel_id": 6,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-61.png"
         },
         {
+            "panel_id": 7,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-table-1.png"
         }
     ]
 }

posterbuilder/build_poster.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import json, re, pathlib, shutil, os
 # ===================== 自动定位项目根 =====================
-IMAGES_DIR_NAME = "<4o_4o>_images_and_tables"  # 蓝色文件夹名
 def find_project_root(start: pathlib.Path) -> pathlib.Path:
     cur = start.resolve()

 import json, re, pathlib, shutil, os
 # ===================== 自动定位项目根 =====================
+IMAGES_DIR_NAME = "<gpt-5_gpt-5>_images_and_tables"  # 蓝色文件夹名
 def find_project_root(start: pathlib.Path) -> pathlib.Path:
     cur = start.resolve()

posterbuilder/cambridge_template.tex CHANGED Viewed

@@ -22,6 +22,13 @@
 \pgfplotsset{compat=1.14}
 \usepackage{anyfontsize}
 % ====================
 % Lengths
 % ====================
@@ -60,8 +67,8 @@
 % ====================
 % use this to include logos on the left and/or right side of the header:
-% \logoright{\includegraphics[height=7cm]{logo1.pdf}}
-% \logoleft{\includegraphics[height=7cm]{logo2.pdf}}
 % ====================
 % Body
@@ -75,7 +82,6 @@
 {
     \begin{tikzpicture}[remember picture,overlay]
       \node [anchor=north west, inner sep=3cm] at ([xshift=0.0cm,yshift=1.0cm]current page.north west)
-      {\includegraphics[height=4.5cm]{logos/cambridge-reversed-color-logo.eps}};
     \end{tikzpicture}
 }

 \pgfplotsset{compat=1.14}
 \usepackage{anyfontsize}
+\definecolor{nipspurple}{RGB}{94,46,145}
+\setbeamercolor{headline}{bg=white, fg=black}
+\setbeamercolor{block title}{bg=nipspurple, fg=white}
+\addtobeamertemplate{block begin}{
+  \setlength{\textpaddingtop}{0.2em}%
+  \setlength{\textpaddingbottom}{0.2em}%
+}{}
 % ====================
 % Lengths
 % ====================
 % ====================
 % use this to include logos on the left and/or right side of the header:
+\logoright{\includegraphics[height=5cm]{logos/right_logo.png}}
+\logoleft{\includegraphics[height=4cm]{logos/left_logo.png}}
 % ====================
 % Body
 {
     \begin{tikzpicture}[remember picture,overlay]
       \node [anchor=north west, inner sep=3cm] at ([xshift=0.0cm,yshift=1.0cm]current page.north west)
     \end{tikzpicture}
 }

posterbuilder/contents copy/arrangement.json DELETED Viewed

@@ -1,783 +0,0 @@
-{
-    "poster_width": 1200,
-    "poster_height": 900,
-    "poster_width_inches": 48.0,
-    "poster_height_inches": 36.0,
-    "panels": [
-        {
-            "panel_id": 0,
-            "section_name": "Poster Title & Author",
-            "tp": 0.11634695579649708,
-            "text_len": 279,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.05882283430478395,
-            "rp": 2.5404412005449477
-        },
-        {
-            "panel_id": 1,
-            "section_name": "Abstract",
-            "tp": 0.15804837364470392,
-            "text_len": 379,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.07189480082267583,
-            "rp": 2.4322478518046795
-        },
-        {
-            "panel_id": 2,
-            "section_name": "Preliminaries",
-            "tp": 0.0963302752293578,
-            "text_len": 231,
-            "gp": 0.5791655366369068,
-            "figure_size": 2697149,
-            "figure_aspect": 1.393961179007908,
-            "sp": 0.23866418891649963,
-            "rp": 1.076424221135003
-        },
-        {
-            "panel_id": 3,
-            "section_name": "Experiments",
-            "tp": 0.10758965804837364,
-            "text_len": 258,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.056077721336026655,
-            "rp": 2.563161803780404
-        },
-        {
-            "panel_id": 4,
-            "section_name": "TEBOpt",
-            "tp": 0.08632193494578816,
-            "text_len": 207,
-            "gp": 0.1977243938477422,
-            "figure_size": 920794,
-            "figure_aspect": 2.3723916532905296,
-            "sp": 0.1129501119808965,
-            "rp": 2.1008022748899986
-        },
-        {
-            "panel_id": 5,
-            "section_name": "Qualitative & Quantitative Results",
-            "tp": 0.09257714762301918,
-            "text_len": 222,
-            "gp": 0.1792402205989877,
-            "figure_size": 834714,
-            "figure_aspect": 1.651195499296765,
-            "sp": 0.10897098426749077,
-            "rp": 2.132955085244183
-        },
-        {
-            "panel_id": 6,
-            "section_name": "Introduction",
-            "tp": 0.1542952460383653,
-            "text_len": 370,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.07071832383606555,
-            "rp": 2.4419852531913038
-        },
-        {
-            "panel_id": 7,
-            "section_name": "Discussion",
-            "tp": 0.0896580483736447,
-            "text_len": 215,
-            "gp": 0.0438698489163632,
-            "figure_size": 204300,
-            "figure_aspect": 1.008888888888889,
-            "sp": 0.06455443146973633,
-            "rp": 2.4948568265511564
-        },
-        {
-            "panel_id": 8,
-            "section_name": "Conclusion",
-            "tp": 0.09883236030025021,
-            "text_len": 237,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.053332608367269364,
-            "rp": 2.58588240701586
-        }
-    ],
-    "panel_arrangement": [
-        {
-            "panel_name": "Poster Title & Author",
-            "panel_id": 0,
-            "x": 0,
-            "y": 0,
-            "width": 1200,
-            "height": 90.0
-        },
-        {
-            "panel_name": "Abstract",
-            "panel_id": 1,
-            "x": 0,
-            "y": 90.0,
-            "width": 479.52708207863833,
-            "height": 187.5160294515261
-        },
-        {
-            "panel_name": "Preliminaries",
-            "panel_id": 2,
-            "x": 0,
-            "y": 277.5160294515261,
-            "width": 479.52708207863833,
-            "height": 622.483970548474
-        },
-        {
-            "panel_name": "Experiments",
-            "panel_id": 3,
-            "x": 479.52708207863833,
-            "y": 90.0,
-            "width": 239.02855954850014,
-            "height": 293.4233135625409
-        },
-        {
-            "panel_name": "TEBOpt",
-            "panel_id": 4,
-            "x": 718.5556416271385,
-            "y": 90.0,
-            "width": 481.4443583728615,
-            "height": 293.4233135625409
-        },
-        {
-            "panel_name": "Qualitative & Quantitative Results",
-            "panel_id": 5,
-            "x": 479.52708207863833,
-            "y": 383.4233135625409,
-            "width": 263.8336129444118,
-            "height": 516.5766864374591
-        },
-        {
-            "panel_name": "Introduction",
-            "panel_id": 6,
-            "x": 743.3606950230501,
-            "y": 383.4233135625409,
-            "width": 456.6393049769498,
-            "height": 193.69246285577307
-        },
-        {
-            "panel_name": "Discussion",
-            "panel_id": 7,
-            "x": 743.3606950230501,
-            "y": 577.115776418314,
-            "width": 456.6393049769498,
-            "height": 176.80999974791118
-        },
-        {
-            "panel_name": "Conclusion",
-            "panel_id": 8,
-            "x": 743.3606950230501,
-            "y": 753.9257761662252,
-            "width": 456.6393049769498,
-            "height": 146.07422383377482
-        }
-    ],
-    "figure_arrangement": [
-        {
-            "panel_id": 2,
-            "x": 50.35270820786383,
-            "y": 452.87845388586913,
-            "width": 378.8216656629107,
-            "height": 271.7591216797879,
-            "figure_id": 0,
-            "figure_name": "p<Preliminaries>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-2.png"
-        },
-        {
-            "panel_id": 4,
-            "x": 769.1000774644247,
-            "y": 156.54877849539963,
-            "width": 380.3554866982892,
-            "height": 160.32575657174166,
-            "figure_id": 0,
-            "figure_name": "p<TEBOpt>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-table-4.png"
-        },
-        {
-            "panel_id": 5,
-            "x": 508.3104433730795,
-            "y": 579.2517934751454,
-            "width": 206.26689035552945,
-            "height": 124.91972661224996,
-            "figure_id": 0,
-            "figure_name": "p<Qualitative & Quantitative Results>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-13.png"
-        },
-        {
-            "panel_id": 7,
-            "x": 919.9818542544906,
-            "y": 614.2777763678962,
-            "width": 103.39698651406891,
-            "height": 102.4859998487467,
-            "figure_id": 0,
-            "figure_name": "p<Discussion>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-9.png"
-        }
-    ],
-    "text_arrangement": [
-        {
-            "panel_id": 0,
-            "x": 3.0,
-            "y": 3.0,
-            "width": 1194.0,
-            "height": 37.333333333333336,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t0",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 0,
-            "x": 3.0,
-            "y": 40.333333333333336,
-            "width": 1194.0,
-            "height": 46.666666666666664,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t1",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 1,
-            "x": 3.0,
-            "y": 93.0,
-            "width": 473.52708207863833,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Abstract>_t0",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 1,
-            "x": 3.0,
-            "y": 125.0,
-            "width": 473.52708207863833,
-            "height": 149.5160294515261,
-            "textbox_id": 1,
-            "textbox_name": "p<Abstract>_t1",
-            "num_chars": 320
-        },
-        {
-            "panel_id": 2,
-            "x": 3.0,
-            "y": 280.5160294515261,
-            "width": 473.52708207863833,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Preliminaries>_t0",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 2,
-            "x": 3.0,
-            "y": 312.5160294515261,
-            "width": 473.52708207863833,
-            "height": 140.36242443434304,
-            "textbox_id": 1,
-            "textbox_name": "p<Preliminaries>_t1",
-            "num_chars": 320
-        },
-        {
-            "panel_id": 2,
-            "x": 3.0,
-            "y": 724.637575565657,
-            "width": 473.52708207863833,
-            "height": 172.36242443434298,
-            "textbox_id": 2,
-            "textbox_name": "p<Preliminaries>_t2",
-            "num_chars": 480
-        },
-        {
-            "panel_id": 3,
-            "x": 482.52708207863833,
-            "y": 93.0,
-            "width": 233.02855954850014,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Experiments>_t0",
-            "num_chars": 80
-        },
-        {
-            "panel_id": 3,
-            "x": 482.52708207863833,
-            "y": 125.0,
-            "width": 233.02855954850014,
-            "height": 255.4233135625409,
-            "textbox_id": 1,
-            "textbox_name": "p<Experiments>_t1",
-            "num_chars": 320
-        },
-        {
-            "panel_id": 4,
-            "x": 721.5556416271385,
-            "y": 93.0,
-            "width": 475.4443583728615,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<TEBOpt>_t0",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 4,
-            "x": 721.5556416271385,
-            "y": 125.0,
-            "width": 475.4443583728615,
-            "height": 31.54877849539963,
-            "textbox_id": 1,
-            "textbox_name": "p<TEBOpt>_t1",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 4,
-            "x": 721.5556416271385,
-            "y": 316.8745350671413,
-            "width": 475.4443583728615,
-            "height": 63.548778495399574,
-            "textbox_id": 2,
-            "textbox_name": "p<TEBOpt>_t2",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 5,
-            "x": 482.52708207863833,
-            "y": 386.4233135625409,
-            "width": 257.8336129444118,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Qualitative & Quantitative Results>_t0",
-            "num_chars": 80
-        },
-        {
-            "panel_id": 5,
-            "x": 482.52708207863833,
-            "y": 418.4233135625409,
-            "width": 257.8336129444118,
-            "height": 160.8284799126045,
-            "textbox_id": 1,
-            "textbox_name": "p<Qualitative & Quantitative Results>_t1",
-            "num_chars": 240
-        },
-        {
-            "panel_id": 5,
-            "x": 482.52708207863833,
-            "y": 704.1715200873954,
-            "width": 257.8336129444118,
-            "height": 192.82847991260462,
-            "textbox_id": 2,
-            "textbox_name": "p<Qualitative & Quantitative Results>_t2",
-            "num_chars": 240
-        },
-        {
-            "panel_id": 6,
-            "x": 746.3606950230501,
-            "y": 386.4233135625409,
-            "width": 450.6393049769498,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Introduction>_t0",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 6,
-            "x": 746.3606950230501,
-            "y": 418.4233135625409,
-            "width": 450.6393049769498,
-            "height": 155.69246285577307,
-            "textbox_id": 1,
-            "textbox_name": "p<Introduction>_t1",
-            "num_chars": 300
-        },
-        {
-            "panel_id": 7,
-            "x": 746.3606950230501,
-            "y": 580.115776418314,
-            "width": 450.6393049769498,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Discussion>_t0",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 7,
-            "x": 746.3606950230501,
-            "y": 612.115776418314,
-            "width": 450.6393049769498,
-            "height": 2.1619999495821958,
-            "textbox_id": 1,
-            "textbox_name": "p<Discussion>_t1",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 7,
-            "x": 746.3606950230501,
-            "y": 716.7637762166429,
-            "width": 450.6393049769498,
-            "height": 34.16199994958231,
-            "textbox_id": 2,
-            "textbox_name": "p<Discussion>_t2",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 8,
-            "x": 746.3606950230501,
-            "y": 756.9257761662252,
-            "width": 450.6393049769498,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Conclusion>_t0",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 8,
-            "x": 746.3606950230501,
-            "y": 788.9257761662252,
-            "width": 450.6393049769498,
-            "height": 108.07422383377482,
-            "textbox_id": 1,
-            "textbox_name": "p<Conclusion>_t1",
-            "num_chars": 300
-        }
-    ],
-    "panel_arrangement_inches": [
-        {
-            "panel_name": "Poster Title & Author",
-            "panel_id": 0,
-            "x": 0.0,
-            "y": 0.0,
-            "width": 48.0,
-            "height": 3.6
-        },
-        {
-            "panel_name": "Abstract",
-            "panel_id": 1,
-            "x": 0.0,
-            "y": 3.6,
-            "width": 19.181083283145533,
-            "height": 7.500641178061044
-        },
-        {
-            "panel_name": "Preliminaries",
-            "panel_id": 2,
-            "x": 0.0,
-            "y": 11.100641178061045,
-            "width": 19.181083283145533,
-            "height": 24.899358821938957
-        },
-        {
-            "panel_name": "Experiments",
-            "panel_id": 3,
-            "x": 19.181083283145533,
-            "y": 3.6,
-            "width": 9.561142381940005,
-            "height": 11.736932542501636
-        },
-        {
-            "panel_name": "TEBOpt",
-            "panel_id": 4,
-            "x": 28.742225665085538,
-            "y": 3.6,
-            "width": 19.25777433491446,
-            "height": 11.736932542501636
-        },
-        {
-            "panel_name": "Qualitative & Quantitative Results",
-            "panel_id": 5,
-            "x": 19.181083283145533,
-            "y": 15.336932542501636,
-            "width": 10.553344517776473,
-            "height": 20.663067457498364
-        },
-        {
-            "panel_name": "Introduction",
-            "panel_id": 6,
-            "x": 29.734427800922003,
-            "y": 15.336932542501636,
-            "width": 18.265572199077994,
-            "height": 7.7476985142309225
-        },
-        {
-            "panel_name": "Discussion",
-            "panel_id": 7,
-            "x": 29.734427800922003,
-            "y": 23.08463105673256,
-            "width": 18.265572199077994,
-            "height": 7.072399989916447
-        },
-        {
-            "panel_name": "Conclusion",
-            "panel_id": 8,
-            "x": 29.734427800922003,
-            "y": 30.15703104664901,
-            "width": 18.265572199077994,
-            "height": 5.842968953350993
-        }
-    ],
-    "figure_arrangement_inches": [
-        {
-            "panel_id": 2,
-            "x": 2.014108328314553,
-            "y": 18.115138155434764,
-            "width": 15.152866626516428,
-            "height": 10.870364867191515,
-            "figure_id": 0,
-            "figure_name": "p<Preliminaries>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-2.png"
-        },
-        {
-            "panel_id": 4,
-            "x": 30.764003098576985,
-            "y": 6.261951139815985,
-            "width": 15.214219467931569,
-            "height": 6.4130302628696665,
-            "figure_id": 0,
-            "figure_name": "p<TEBOpt>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-table-4.png"
-        },
-        {
-            "panel_id": 5,
-            "x": 20.33241773492318,
-            "y": 23.170071739005817,
-            "width": 8.250675614221178,
-            "height": 4.996789064489999,
-            "figure_id": 0,
-            "figure_name": "p<Qualitative & Quantitative Results>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-13.png"
-        },
-        {
-            "panel_id": 7,
-            "x": 36.79927417017962,
-            "y": 24.571111054715846,
-            "width": 4.135879460562757,
-            "height": 4.0994399939498685,
-            "figure_id": 0,
-            "figure_name": "p<Discussion>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-9.png"
-        }
-    ],
-    "text_arrangement_inches": [
-        {
-            "panel_id": 0,
-            "x": 0.12,
-            "y": 0.12,
-            "width": 47.76,
-            "height": 1.4933333333333334,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t0",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 0,
-            "x": 0.12,
-            "y": 1.6133333333333335,
-            "width": 47.76,
-            "height": 1.8666666666666665,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t1",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 1,
-            "x": 0.12,
-            "y": 3.72,
-            "width": 18.941083283145534,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Abstract>_t0",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 1,
-            "x": 0.12,
-            "y": 5.0,
-            "width": 18.941083283145534,
-            "height": 5.980641178061044,
-            "textbox_id": 1,
-            "textbox_name": "p<Abstract>_t1",
-            "num_chars": 320
-        },
-        {
-            "panel_id": 2,
-            "x": 0.12,
-            "y": 11.220641178061044,
-            "width": 18.941083283145534,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Preliminaries>_t0",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 2,
-            "x": 0.12,
-            "y": 12.500641178061043,
-            "width": 18.941083283145534,
-            "height": 5.614496977373721,
-            "textbox_id": 1,
-            "textbox_name": "p<Preliminaries>_t1",
-            "num_chars": 320
-        },
-        {
-            "panel_id": 2,
-            "x": 0.12,
-            "y": 28.98550302262628,
-            "width": 18.941083283145534,
-            "height": 6.894496977373719,
-            "textbox_id": 2,
-            "textbox_name": "p<Preliminaries>_t2",
-            "num_chars": 480
-        },
-        {
-            "panel_id": 3,
-            "x": 19.301083283145534,
-            "y": 3.72,
-            "width": 9.321142381940005,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Experiments>_t0",
-            "num_chars": 80
-        },
-        {
-            "panel_id": 3,
-            "x": 19.301083283145534,
-            "y": 5.0,
-            "width": 9.321142381940005,
-            "height": 10.216932542501636,
-            "textbox_id": 1,
-            "textbox_name": "p<Experiments>_t1",
-            "num_chars": 320
-        },
-        {
-            "panel_id": 4,
-            "x": 28.86222566508554,
-            "y": 3.72,
-            "width": 19.01777433491446,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<TEBOpt>_t0",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 4,
-            "x": 28.86222566508554,
-            "y": 5.0,
-            "width": 19.01777433491446,
-            "height": 1.2619511398159853,
-            "textbox_id": 1,
-            "textbox_name": "p<TEBOpt>_t1",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 4,
-            "x": 28.86222566508554,
-            "y": 12.674981402685653,
-            "width": 19.01777433491446,
-            "height": 2.541951139815983,
-            "textbox_id": 2,
-            "textbox_name": "p<TEBOpt>_t2",
-            "num_chars": 160
-        },
-        {
-            "panel_id": 5,
-            "x": 19.301083283145534,
-            "y": 15.456932542501637,
-            "width": 10.313344517776473,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Qualitative & Quantitative Results>_t0",
-            "num_chars": 80
-        },
-        {
-            "panel_id": 5,
-            "x": 19.301083283145534,
-            "y": 16.736932542501634,
-            "width": 10.313344517776473,
-            "height": 6.433139196504181,
-            "textbox_id": 1,
-            "textbox_name": "p<Qualitative & Quantitative Results>_t1",
-            "num_chars": 240
-        },
-        {
-            "panel_id": 5,
-            "x": 19.301083283145534,
-            "y": 28.166860803495815,
-            "width": 10.313344517776473,
-            "height": 7.7131391965041844,
-            "textbox_id": 2,
-            "textbox_name": "p<Qualitative & Quantitative Results>_t2",
-            "num_chars": 240
-        },
-        {
-            "panel_id": 6,
-            "x": 29.854427800922004,
-            "y": 15.456932542501637,
-            "width": 18.025572199077992,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Introduction>_t0",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 6,
-            "x": 29.854427800922004,
-            "y": 16.736932542501634,
-            "width": 18.025572199077992,
-            "height": 6.227698514230923,
-            "textbox_id": 1,
-            "textbox_name": "p<Introduction>_t1",
-            "num_chars": 300
-        },
-        {
-            "panel_id": 7,
-            "x": 29.854427800922004,
-            "y": 23.20463105673256,
-            "width": 18.025572199077992,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Discussion>_t0",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 7,
-            "x": 29.854427800922004,
-            "y": 24.48463105673256,
-            "width": 18.025572199077992,
-            "height": 0.08647999798328783,
-            "textbox_id": 1,
-            "textbox_name": "p<Discussion>_t1",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 7,
-            "x": 29.854427800922004,
-            "y": 28.670551048665715,
-            "width": 18.025572199077992,
-            "height": 1.3664799979832924,
-            "textbox_id": 2,
-            "textbox_name": "p<Discussion>_t2",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 8,
-            "x": 29.854427800922004,
-            "y": 30.277031046649007,
-            "width": 18.025572199077992,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Conclusion>_t0",
-            "num_chars": 150
-        },
-        {
-            "panel_id": 8,
-            "x": 29.854427800922004,
-            "y": 31.55703104664901,
-            "width": 18.025572199077992,
-            "height": 4.322968953350993,
-            "textbox_id": 1,
-            "textbox_name": "p<Conclusion>_t1",
-            "num_chars": 300
-        }
-    ]
-}

posterbuilder/contents copy/figure_caption.json DELETED Viewed

@@ -1,258 +0,0 @@
-{
-    "1": {
-        "caption": "a lion and elephant chicken and a dog an",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-1.png",
-        "width": 947,
-        "height": 845,
-        "figure_size": 800215,
-        "figure_aspect": 1.1207100591715977
-    },
-    "2": {
-        "caption": "Figure 2: Overview of the text-to-image generative model, including the details of the causal manner in attention mechanism. Because of the causal nature of the embedding, information is accumulated from the starting token through the end of the sequence, resulting in bias in the earlier token. To balance the critical information, we propose text embedding optimization for purifying the object token with equal weights within their corresponding embedding dimension.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-2.png",
-        "width": 1939,
-        "height": 1391,
-        "figure_size": 2697149,
-        "figure_aspect": 1.393961179007908
-    },
-    "5": {
-        "caption": "<sot> A cat and a <eot> dog",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-5.png",
-        "width": 453,
-        "height": 454,
-        "figure_size": 205662,
-        "figure_aspect": 0.9977973568281938
-    },
-    "9": {
-        "caption": "Figure 3: Masking text embedding to identify the contribution of critical tokens, e.g., cat/dog, and special tokens, e.g., <sot>, <eot>, <pad>. The first row and the second row both contain cat and dog inside prompt but in different order. The analysis shows that special tokens contain general information about the given prompt. However, the cat/dog tokens carry more weight than the special tokens. In the last two columns, where one of the animal token embeddings is masked while retaining the special tokens' embedding, the generated image is predominantly influenced by the remaining animal's token embedding.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-9.png",
-        "width": 454,
-        "height": 450,
-        "figure_size": 204300,
-        "figure_aspect": 1.008888888888889
-    },
-    "10": {
-        "caption": "<sot> A cat and a <eot> dog",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-10.png",
-        "width": 456,
-        "height": 457,
-        "figure_size": 208392,
-        "figure_aspect": 0.9978118161925602
-    },
-    "11": {
-        "caption": "Figure 4: Qualitative comparison of all methods. Every prompt uses the same seed.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-11.png",
-        "width": 1952,
-        "height": 644,
-        "figure_size": 1257088,
-        "figure_aspect": 3.031055900621118
-    },
-    "12": {
-        "caption": "Figure 5: Qualitative comparison for the generated image with vs. without L TEB in Stable Diffusion 1.4. Every prompt uses the same seed.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-12.png",
-        "width": 1947,
-        "height": 794,
-        "figure_size": 1545918,
-        "figure_aspect": 2.452141057934509
-    },
-    "13": {
-        "caption": "Figure 6: (a) The cosine similarity of text embedding from single word. (b) The KL distance of cross-attention maps that are triggered by two words. The data is ordered by their text embedding similarity.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-13.png",
-        "width": 1174,
-        "height": 711,
-        "figure_size": 834714,
-        "figure_aspect": 1.651195499296765
-    },
-    "14": {
-        "caption": "Figure 8: Text-text similarity of the left one is 8.68% higher than that of the right one. It indicates that the metric cannot identify the mixture issue.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-14.png",
-        "width": 481,
-        "height": 485,
-        "figure_size": 233285,
-        "figure_aspect": 0.9917525773195877
-    },
-    "18": {
-        "caption": "Figure 9: In two images both with mixed objects, full prompt similarity , minimum object similarity , and text-text similarity all vary greatly, making the evaluation metrics unreliable for object mixture and missing.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-18.png",
-        "width": 488,
-        "height": 489,
-        "figure_size": 238632,
-        "figure_aspect": 0.9979550102249489
-    },
-    "19": {
-        "caption": "Figure 10: Demonstrating the 90% bounding box overlapping and corresponding object mixture in generated image and cross-attention maps during denoising steps.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-19.png",
-        "width": 1981,
-        "height": 840,
-        "figure_size": 1664040,
-        "figure_aspect": 2.3583333333333334
-    },
-    "21": {
-        "caption": "SD 1.4 Missing BoyA bear and a 'frog",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-21.png",
-        "width": 660,
-        "height": 331,
-        "figure_size": 218460,
-        "figure_aspect": 1.9939577039274925
-    },
-    "22": {
-        "caption": "A red bench and a green bird",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-22.png",
-        "width": 654,
-        "height": 329,
-        "figure_size": 215166,
-        "figure_aspect": 1.987841945288754
-    },
-    "23": {
-        "caption": "SD 1.4 Cat Missingred bird and a brown boat",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-23.png",
-        "width": 655,
-        "height": 332,
-        "figure_size": 217460,
-        "figure_aspect": 1.9728915662650603
-    },
-    "24": {
-        "caption": "Figure 11: More qualitative results on SD 1.4 in complex prompts from color and spatial sets within T2I-CompBench [5].",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-24.png",
-        "width": 658,
-        "height": 332,
-        "figure_size": 218456,
-        "figure_aspect": 1.9819277108433735
-    },
-    "26": {
-        "caption": "SD 1.4 + LTEB",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-26.png",
-        "width": 660,
-        "height": 333,
-        "figure_size": 219780,
-        "figure_aspect": 1.981981981981982
-    },
-    "27": {
-        "caption": "SD 1.4 Car MissingA sheep near a car",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-27.png",
-        "width": 660,
-        "height": 328,
-        "figure_size": 216480,
-        "figure_aspect": 2.0121951219512195
-    },
-    "29": {
-        "caption": "brown cat and a red suitcase",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-29.png",
-        "width": 655,
-        "height": 330,
-        "figure_size": 216150,
-        "figure_aspect": 1.9848484848484849
-    },
-    "31": {
-        "caption": "SD 1.4 LTEB",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-31.png",
-        "width": 654,
-        "height": 331,
-        "figure_size": 216474,
-        "figure_aspect": 1.9758308157099698
-    },
-    "32": {
-        "caption": "SD 1.4 +LTEB",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-32.png",
-        "width": 657,
-        "height": 347,
-        "figure_size": 227979,
-        "figure_aspect": 1.893371757925072
-    },
-    "33": {
-        "caption": "Figure 12: More qualitative results on ELLA on SD 1.5 in complex prompts from color set within T2I-CompBench [5]. Reference: ELLA: Equip Diffusion Models with LLM for Enhanced Semantic Alignment (ArXiv'24)",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-33.png",
-        "width": 979,
-        "height": 332,
-        "figure_size": 325028,
-        "figure_aspect": 2.9487951807228914
-    },
-    "34": {
-        "caption": "SDXL-Turbo Fork MissingA black dog and a brown cat",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-34.png",
-        "width": 658,
-        "height": 330,
-        "figure_size": 217140,
-        "figure_aspect": 1.993939393939394
-    },
-    "35": {
-        "caption": "SDXL-Turbo LTEBA blue chair and a red cup",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-35.png",
-        "width": 654,
-        "height": 327,
-        "figure_size": 213858,
-        "figure_aspect": 2.0
-    },
-    "36": {
-        "caption": "SDXL-Turbo LTEB",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-36.png",
-        "width": 653,
-        "height": 332,
-        "figure_size": 216796,
-        "figure_aspect": 1.966867469879518
-    },
-    "37": {
-        "caption": "wooden spoon and a metal fork",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-37.png",
-        "width": 652,
-        "height": 327,
-        "figure_size": 213204,
-        "figure_aspect": 1.9938837920489296
-    },
-    "38": {
-        "caption": "A green bench and a red book",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-38.png",
-        "width": 656,
-        "height": 333,
-        "figure_size": 218448,
-        "figure_aspect": 1.96996996996997
-    },
-    "39": {
-        "caption": "brown bench and a clock green",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-39.png",
-        "width": 649,
-        "height": 326,
-        "figure_size": 211574,
-        "figure_aspect": 1.99079754601227
-    },
-    "40": {
-        "caption": "SD 3 Color Mixture Orange MissingA blue bowl and a yellow orange",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-40.png",
-        "width": 641,
-        "height": 322,
-        "figure_size": 206402,
-        "figure_aspect": 1.9906832298136645
-    },
-    "41": {
-        "caption": "Figure 14: More qualitative results on SD3 [2] in complex prompts from color set within T2ICompBench [5].",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-41.png",
-        "width": 305,
-        "height": 310,
-        "figure_size": 94550,
-        "figure_aspect": 0.9838709677419355
-    },
-    "43": {
-        "caption": "brown backpack and a blue cow",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-43.png",
-        "width": 303,
-        "height": 302,
-        "figure_size": 91506,
-        "figure_aspect": 1.0033112582781456
-    },
-    "45": {
-        "caption": "A green acorn and a brown leaf",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-45.png",
-        "width": 644,
-        "height": 321,
-        "figure_size": 206724,
-        "figure_aspect": 2.0062305295950154
-    },
-    "47": {
-        "caption": "Figure 15: The screenshot of the human evaluation, containing the information and options that are given to participants.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-47.png",
-        "width": 1588,
-        "height": 1122,
-        "figure_size": 1781736,
-        "figure_aspect": 1.4153297682709447
-    }
-}

posterbuilder/contents copy/poster_content.json DELETED Viewed

@@ -1,45 +0,0 @@
-{
-    "meta": {
-        "poster_title": "A Cat Is A Cat (Not A Dog!): Unraveling Information Mix-ups in Text-to-Image Encoders through Causal Analysis and Embedding Optimization",
-        "authors": "Chieh-Yun Chen, Chiang Tseng, Li-Wu Tsao, Hong-Han Shuai",
-        "affiliations": "National Yang Ming Chiao Tung University, Georgia Institute of Technology"
-    },
-    "sections": [
-        {
-            "title": "Poster Title & Author",
-            "content": "A Cat Is A Cat (Not A Dog!): Unraveling Information Mix-ups in Text-to-Image Encoders through Causal Analysis and Embedding Optimization by Chieh-Yun Chen, Chiang Tseng, Li-Wu Tsao, Hong-Han Shuai from National Yang Ming Chiao Tung University and Georgia Institute of Technology."
-        },
-        {
-            "title": "Abstract",
-            "content": "This paper analyzes the impact of causal manner in the text encoder of text-to-image (T2I) diffusion models, which can lead to information bias and loss. We propose a text embedding balance optimization method with a 125.42% improvement on information balance in stable diffusion. A new automatic evaluation metric is introduced, achieving 81% concordance with human assessments."
-        },
-        {
-            "title": "Preliminaries",
-            "content": "Text-to-image diffusion models include a text encoder, a variational autoencoder, and a denoising UNet. The causal masking manner in the text encoder causes information bias, as each token only has information from previous tokens."
-        },
-        {
-            "title": "Experiments",
-            "content": "We compare our method with baselines like Stable Diffusion and SynGen, focusing on information balance rather than surpassing existing methods. Our automatic evaluation metric, validated by human assessment, effectively measures object presence and accuracy."
-        },
-        {
-            "title": "TEBOpt",
-            "content": "TEBOpt aims to balance critical information in text embeddings by optimizing object token embeddings to prevent mixing and work alongside image latent optimization techniques to address object disappearance."
-        },
-        {
-            "title": "Qualitative & Quantitative Results",
-            "content": "TEBOpt improves object balance in generated images, reducing mixture and missing issues. It enhances token embedding similarity and cross-attention map distance, confirming its effectiveness in addressing information bias."
-        },
-        {
-            "title": "Introduction",
-            "content": "Text-to-image diffusion models have gained attention, but the role of text embedding in generating multiple objects remains underexplored. This paper investigates how text embeddings influence semantic outcomes, identifying issues of information bias and loss. We propose Text Embedding Balance Optimization (TEBOpt) to address these issues and improve image generation."
-        },
-        {
-            "title": "Discussion",
-            "content": "Text embedding similarity affects cross-attention maps' distance, with similar embeddings leading to object mixture. Our findings highlight the need for optimized text embeddings to improve image generation quality."
-        },
-        {
-            "title": "Conclusion",
-            "content": "Our study reveals that causal processing of text embedding leads to biases and loss. TEBOpt effectively eliminates problematic information, improving information balance in stable diffusion by 125.42% while preserving object coexistence."
-        }
-    ]
-}

posterbuilder/contents/arrangement.json CHANGED Viewed

@@ -1,542 +1,115 @@
 {
-    "poster_width": 1200,
-    "poster_height": 900,
-    "poster_width_inches": 48.0,
-    "poster_height_inches": 36.0,
     "panels": [
         {
             "panel_id": 0,
-            "section_name": "Poster Title & Author",
-            "tp": 0.12971887550200803,
-            "text_len": 323,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.06301447323913961,
-            "rp": 2.505748069071783
         },
         {
             "panel_id": 1,
-            "section_name": "Introduction",
-            "tp": 0.1859437751004016,
-            "text_len": 463,
-            "gp": 0,
-            "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.08063905395956796,
-            "rp": 2.359873888191933
         },
         {
             "panel_id": 2,
-            "section_name": "Benchmark & Metrics",
-            "tp": 0.15903614457831325,
-            "text_len": 396,
-            "gp": 0.016682202105281593,
-            "figure_size": 64769,
-            "figure_aspect": 0.8819188191881919,
-            "sp": 0.07756528917306713,
-            "rp": 2.386019900332315
         },
         {
             "panel_id": 3,
-            "section_name": "PosterAgent Framework",
-            "tp": 0.1859437751004016,
-            "text_len": 463,
-            "gp": 0.49217196764679444,
             "figure_size": 1910868,
-            "figure_aspect": 2.0350877192982457,
-            "sp": 0.23879941088764153,
-            "rp": 1.0716273641356449
         },
         {
             "panel_id": 4,
-            "section_name": "Evaluation & Results",
-            "tp": 0.1859437751004016,
-            "text_len": 463,
-            "gp": 0.49114583024792396,
-            "figure_size": 1906884,
-            "figure_aspect": 2.0434782608695654,
-            "sp": 0.23846965976825418,
-            "rp": 1.0743132504197004
-        },
-        {
-            "panel_id": 5,
-            "section_name": "Conclusion",
-            "tp": 0.1534136546184739,
-            "text_len": 382,
             "gp": 0,
             "figure_size": 0,
-            "figure_aspect": 1,
-            "sp": 0.07044197511417727,
-            "rp": 2.4442725214152747
-        }
-    ],
-    "panel_arrangement": [
-        {
-            "panel_name": "Poster Title & Author",
-            "panel_id": 0,
-            "x": 0,
-            "y": 0,
-            "width": 1200,
-            "height": 90.0
-        },
-        {
-            "panel_name": "Introduction",
-            "panel_id": 1,
-            "x": 0,
-            "y": 90.0,
-            "width": 550.621296168701,
-            "height": 201.65362571734266
-        },
-        {
-            "panel_name": "Benchmark & Metrics",
-            "panel_id": 2,
-            "x": 550.621296168701,
-            "y": 90.0,
-            "width": 529.6329503516805,
-            "height": 201.65362571734266
-        },
-        {
-            "panel_name": "PosterAgent Framework",
-            "panel_id": 3,
-            "x": 0,
-            "y": 291.65362571734266,
-            "width": 540.5003037876063,
-            "height": 608.3463742826573
-        },
-        {
-            "panel_name": "Evaluation & Results",
-            "panel_id": 4,
-            "x": 540.5003037876063,
-            "y": 291.65362571734266,
-            "width": 539.7539427327752,
-            "height": 608.3463742826573
-        },
-        {
-            "panel_name": "Conclusion",
-            "panel_id": 5,
-            "x": 1080.2542465203815,
-            "y": 90.0,
-            "width": 119.74575347961854,
-            "height": 810.0
-        }
-    ],
-    "figure_arrangement": [
-        {
-            "panel_id": 2,
-            "x": 763.672586975783,
-            "y": 132.13072514346854,
-            "width": 103.53036873751637,
-            "height": 117.39217543040559,
-            "figure_id": 0,
-            "figure_name": "p<Benchmark & Metrics>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-1.png"
-        },
-        {
-            "panel_id": 3,
-            "x": 56.45003037876063,
-            "y": 490.76985659696936,
-            "width": 427.60024303008504,
-            "height": 210.11391252340385,
-            "figure_id": 0,
-            "figure_name": "p<PosterAgent Framework>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-8.png"
-        },
-        {
-            "panel_id": 4,
-            "x": 596.8756980608838,
-            "y": 491.34731768544725,
-            "width": 427.0031541862202,
-            "height": 208.95899034644816,
-            "figure_id": 0,
-            "figure_name": "p<Evaluation & Results>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-table-1.png"
-        }
-    ],
-    "text_arrangement": [
-        {
-            "panel_id": 0,
-            "x": 3.0,
-            "y": 3.0,
-            "width": 1194.0,
-            "height": 37.333333333333336,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t0",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 0,
-            "x": 3.0,
-            "y": 40.333333333333336,
-            "width": 1194.0,
-            "height": 46.666666666666664,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t1",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 1,
-            "x": 3.0,
-            "y": 93.0,
-            "width": 544.621296168701,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Introduction>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 1,
-            "x": 3.0,
-            "y": 125.0,
-            "width": 544.621296168701,
-            "height": 163.65362571734266,
-            "textbox_id": 1,
-            "textbox_name": "p<Introduction>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 2,
-            "x": 553.621296168701,
-            "y": 93.0,
-            "width": 523.6329503516805,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Benchmark & Metrics>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 2,
-            "x": 553.621296168701,
-            "y": 125.0,
-            "width": 523.6329503516805,
-            "height": 7.130725143468538,
-            "textbox_id": 1,
-            "textbox_name": "p<Benchmark & Metrics>_t1",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 2,
-            "x": 553.621296168701,
-            "y": 249.52290057387413,
-            "width": 523.6329503516805,
-            "height": 39.13072514346854,
-            "textbox_id": 2,
-            "textbox_name": "p<Benchmark & Metrics>_t2",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 3,
-            "x": 3.0,
-            "y": 294.65362571734266,
-            "width": 534.5003037876063,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<PosterAgent Framework>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 3,
-            "x": 3.0,
-            "y": 326.65362571734266,
-            "width": 534.5003037876063,
-            "height": 164.1162308796267,
-            "textbox_id": 1,
-            "textbox_name": "p<PosterAgent Framework>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 3,
-            "x": 3.0,
-            "y": 700.8837691203732,
-            "width": 534.5003037876063,
-            "height": 196.11623087962676,
-            "textbox_id": 2,
-            "textbox_name": "p<PosterAgent Framework>_t2",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 4,
-            "x": 543.5003037876063,
-            "y": 294.65362571734266,
-            "width": 533.7539427327752,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Evaluation & Results>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 4,
-            "x": 543.5003037876063,
-            "y": 326.65362571734266,
-            "width": 533.7539427327752,
-            "height": 164.6936919681046,
-            "textbox_id": 1,
-            "textbox_name": "p<Evaluation & Results>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 4,
-            "x": 543.5003037876063,
-            "y": 700.3063080318955,
-            "width": 533.7539427327752,
-            "height": 196.69369196810453,
-            "textbox_id": 2,
-            "textbox_name": "p<Evaluation & Results>_t2",
-            "num_chars": 540
         },
         {
             "panel_id": 5,
-            "x": 1083.2542465203815,
-            "y": 93.0,
-            "width": 113.74575347961854,
-            "height": 32.0,
-            "textbox_id": 0,
-            "textbox_name": "p<Conclusion>_t0",
-            "num_chars": 30
         },
         {
-            "panel_id": 5,
-            "x": 1083.2542465203815,
-            "y": 125.0,
-            "width": 113.74575347961854,
-            "height": 772.0,
-            "textbox_id": 1,
-            "textbox_name": "p<Conclusion>_t1",
-            "num_chars": 420
         }
     ],
-    "panel_arrangement_inches": [
         {
-            "panel_name": "Poster Title & Author",
             "panel_id": 0,
-            "x": 0.0,
-            "y": 0.0,
-            "width": 48.0,
-            "height": 3.6
         },
         {
-            "panel_name": "Introduction",
             "panel_id": 1,
-            "x": 0.0,
-            "y": 3.6,
-            "width": 22.02485184674804,
-            "height": 8.066145028693706
         },
         {
-            "panel_name": "Benchmark & Metrics",
             "panel_id": 2,
-            "x": 22.02485184674804,
-            "y": 3.6,
-            "width": 21.18531801406722,
-            "height": 8.066145028693706
         },
         {
-            "panel_name": "PosterAgent Framework",
             "panel_id": 3,
-            "x": 0.0,
-            "y": 11.666145028693707,
-            "width": 21.620012151504252,
-            "height": 24.33385497130629
-        },
-        {
-            "panel_name": "Evaluation & Results",
-            "panel_id": 4,
-            "x": 21.620012151504252,
-            "y": 11.666145028693707,
-            "width": 21.590157709311008,
-            "height": 24.33385497130629
         },
         {
-            "panel_name": "Conclusion",
             "panel_id": 5,
-            "x": 43.210169860815256,
-            "y": 3.6,
-            "width": 4.789830139184741,
-            "height": 32.4
-        }
-    ],
-    "figure_arrangement_inches": [
-        {
-            "panel_id": 2,
-            "x": 30.54690347903132,
-            "y": 5.285229005738741,
-            "width": 4.141214749500655,
-            "height": 4.6956870172162235,
-            "figure_id": 0,
-            "figure_name": "p<Benchmark & Metrics>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-1.png"
-        },
-        {
-            "panel_id": 3,
-            "x": 2.258001215150425,
-            "y": 19.630794263878773,
-            "width": 17.1040097212034,
-            "height": 8.404556500936154,
-            "figure_id": 0,
-            "figure_name": "p<PosterAgent Framework>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-picture-8.png"
-        },
-        {
-            "panel_id": 4,
-            "x": 23.87502792243535,
-            "y": 19.65389270741789,
-            "width": 17.080126167448807,
-            "height": 8.358359613857926,
-            "figure_id": 0,
-            "figure_name": "p<Evaluation & Results>_f0",
-            "figure_path": "<4o_4o>_images_and_tables/paper/paper-table-1.png"
-        }
-    ],
-    "text_arrangement_inches": [
-        {
-            "panel_id": 0,
-            "x": 0.12,
-            "y": 0.12,
-            "width": 47.76,
-            "height": 1.4933333333333334,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t0",
-            "num_chars": 410
-        },
-        {
-            "panel_id": 0,
-            "x": 0.12,
-            "y": 1.6133333333333335,
-            "width": 47.76,
-            "height": 1.8666666666666665,
-            "textbox_id": 0,
-            "textbox_name": "p<Poster Title & Author>_t1",
-            "num_chars": 410
         },
         {
-            "panel_id": 1,
-            "x": 0.12,
-            "y": 3.72,
-            "width": 21.784851846748037,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Introduction>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 1,
-            "x": 0.12,
-            "y": 5.0,
-            "width": 21.784851846748037,
-            "height": 6.546145028693706,
-            "textbox_id": 1,
-            "textbox_name": "p<Introduction>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 2,
-            "x": 22.14485184674804,
-            "y": 3.72,
-            "width": 20.94531801406722,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Benchmark & Metrics>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 2,
-            "x": 22.14485184674804,
-            "y": 5.0,
-            "width": 20.94531801406722,
-            "height": 0.28522900573874155,
-            "textbox_id": 1,
-            "textbox_name": "p<Benchmark & Metrics>_t1",
-            "num_chars": 180
         },
         {
-            "panel_id": 2,
-            "x": 22.14485184674804,
-            "y": 9.980916022954965,
-            "width": 20.94531801406722,
-            "height": 1.5652290057387415,
-            "textbox_id": 2,
-            "textbox_name": "p<Benchmark & Metrics>_t2",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 3,
-            "x": 0.12,
-            "y": 11.786145028693706,
-            "width": 21.380012151504253,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<PosterAgent Framework>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 3,
-            "x": 0.12,
-            "y": 13.066145028693706,
-            "width": 21.380012151504253,
-            "height": 6.564649235185068,
-            "textbox_id": 1,
-            "textbox_name": "p<PosterAgent Framework>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 3,
-            "x": 0.12,
-            "y": 28.03535076481493,
-            "width": 21.380012151504253,
-            "height": 7.84464923518507,
-            "textbox_id": 2,
-            "textbox_name": "p<PosterAgent Framework>_t2",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 4,
-            "x": 21.740012151504253,
-            "y": 11.786145028693706,
-            "width": 21.350157709311006,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Evaluation & Results>_t0",
-            "num_chars": 180
-        },
-        {
-            "panel_id": 4,
-            "x": 21.740012151504253,
-            "y": 13.066145028693706,
-            "width": 21.350157709311006,
-            "height": 6.587747678724184,
-            "textbox_id": 1,
-            "textbox_name": "p<Evaluation & Results>_t1",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 4,
-            "x": 21.740012151504253,
-            "y": 28.01225232127582,
-            "width": 21.350157709311006,
-            "height": 7.867747678724181,
-            "textbox_id": 2,
-            "textbox_name": "p<Evaluation & Results>_t2",
-            "num_chars": 540
-        },
-        {
-            "panel_id": 5,
-            "x": 43.33016986081526,
-            "y": 3.72,
-            "width": 4.549830139184742,
-            "height": 1.28,
-            "textbox_id": 0,
-            "textbox_name": "p<Conclusion>_t0",
-            "num_chars": 30
-        },
-        {
-            "panel_id": 5,
-            "x": 43.33016986081526,
-            "y": 5.0,
-            "width": 4.549830139184742,
-            "height": 30.88,
-            "textbox_id": 1,
-            "textbox_name": "p<Conclusion>_t1",
-            "num_chars": 420
         }
     ]
 }

 {
     "panels": [
         {
             "panel_id": 0,
+            "section_name": "Why Posters Are Hard",
+            "tp": 0.12082710513203787,
+            "text_len": 485,
+            "gp": 0.009888851380803912,
+            "figure_size": 64769,
+            "figure_aspect": 0.8819188191881919
         },
         {
             "panel_id": 1,
+            "section_name": "Benchmark and Data",
+            "tp": 0.12531141006477328,
+            "text_len": 503,
+            "gp": 0.04796373085236436,
+            "figure_size": 314148,
+            "figure_aspect": 1.0125673249551166
         },
         {
             "panel_id": 2,
+            "section_name": "PaperQuiz: What Matters",
+            "tp": 0.11285500747384156,
+            "text_len": 453,
+            "gp": 0.1192882298865948,
+            "figure_size": 781302,
+            "figure_aspect": 5.032994923857868
         },
         {
             "panel_id": 3,
+            "section_name": "PosterAgent Pipeline",
+            "tp": 0.10637767812655705,
+            "text_len": 427,
+            "gp": 0.29174897960959734,
             "figure_size": 1910868,
+            "figure_aspect": 2.0350877192982457
         },
         {
             "panel_id": 4,
+            "section_name": "Parser: Structured Assets",
+            "tp": 0.10612855007473841,
+            "text_len": 426,
             "gp": 0,
             "figure_size": 0,
+            "figure_aspect": 1
         },
         {
             "panel_id": 5,
+            "section_name": "Planner: Layout Mastery",
+            "tp": 0.10089686098654709,
+            "text_len": 405,
+            "gp": 0.08839429109643054,
+            "figure_size": 578956,
+            "figure_aspect": 1.3959627329192548
+        },
+        {
+            "panel_id": 6,
+            "section_name": "Painter\u2013Commenter Loop",
+            "tp": 0.10662680617837568,
+            "text_len": 428,
+            "gp": 0.15157520979208358,
+            "figure_size": 992772,
+            "figure_aspect": 1.4480676328502415
+        },
+        {
+            "panel_id": 7,
+            "section_name": "Results: Stronger, Leaner",
+            "tp": 0.10986547085201794,
+            "text_len": 441,
+            "gp": 0.2911407073821255,
+            "figure_size": 1906884,
+            "figure_aspect": 2.0434782608695654
         },
         {
+            "panel_id": 8,
+            "section_name": "Limits and Next Steps",
+            "tp": 0.1111111111111111,
+            "text_len": 446,
+            "gp": 0,
+            "figure_size": 0,
+            "figure_aspect": 1
         }
     ],
+    "figure_arrangement": [
         {
             "panel_id": 0,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-1.png"
         },
         {
             "panel_id": 1,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-6.png"
         },
         {
             "panel_id": 2,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-7.png"
         },
         {
             "panel_id": 3,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-8.png"
         },
         {
             "panel_id": 5,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-30.png"
         },
         {
+            "panel_id": 6,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-61.png"
         },
         {
+            "panel_id": 7,
+            "figure_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-table-1.png"
         }
     ]
 }

posterbuilder/contents/figure_caption.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "1": {
         "caption": "Figure 1: Overview of this work. We address two core challenges in scientific poster generation: Left: How to create a poster from a paper -we propose PosterAgent (Sec. 4), a framework that transforms long-context scientific papers (20K+ tokens) into structured visual posters; and Right: How to evaluate poster quality -weintroduce the Paper2Poster benchmark (Sec. 3), which enables systematic comparison between agent-generated and author-designed posters.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-1.png",
         "width": 239,
         "height": 271,
         "figure_size": 64769,
@@ -9,7 +9,7 @@
     },
     "3": {
         "caption": "Paper ( 20K tokens )",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-3.png",
         "width": 398,
         "height": 265,
         "figure_size": 105470,
@@ -17,7 +17,7 @@
     },
     "6": {
         "caption": "Figure 2: Data Statistics of Paper2Poster. (a) Word cloud illustrating the diversity of research topics. (b) Textual Token statistics and Figure count statistics for input papers vs. posters provided by authors. Overall, these statistics highlight that Paper2Poster is a multimodal context compression task, requiring effective abstraction of both textual and visual content.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-6.png",
         "width": 564,
         "height": 557,
         "figure_size": 314148,
@@ -25,7 +25,7 @@
     },
     "7": {
         "caption": "Figure 3: Left : Overview of the evaluation framework in Paper2Poster. Middle : We automatically generate multiple-choice questions from each paper using an LLM (o3), forming the our PaperQuiz evaluation. Right : In PaperQuiz, we simulate multiple reader by allowing VLMs-representing different expertise levels ( e.g., student, professor)-to read each generated poster and answer the quiz. The poster that achieves the highest average score is considered the most effective in conveying the paper's content.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-7.png",
         "width": 1983,
         "height": 394,
         "figure_size": 781302,
@@ -33,7 +33,7 @@
     },
     "8": {
         "caption": "Figure 4: Illustration of the PosterAgent pipeline. Given an input paper, PosterAgent generates a structured academic poster through three modules: 1. Parser: Extracts key textual and visual assets using a combination of tools and LLM-based summarization, resulting in a structured asset library. 2. Planner: Matches assets and arranges them into coherent layouts, iteratively generating panels with a zoom-in operation. 3. Painter-Commenter: The Painter generates panel-level bullet-content along with executable code, and renders the visual output, while the Commenter-a VLM with in-context reference-provides feedback to ensure layout coherence and prevent content overflow.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-8.png",
         "width": 1972,
         "height": 969,
         "figure_size": 1910868,
@@ -41,7 +41,7 @@
     },
     "9": {
         "caption": "Figure 5: PaperQuiz's Avg. scores across different Reader VLMs (x-axis) for each poster type (legend lines). Refer to Append. Tab. 3 for full model names.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-9.png",
         "width": 769,
         "height": 505,
         "figure_size": 388345,
@@ -49,7 +49,7 @@
     },
     "10": {
         "caption": "Figure 7 presents the average token cost per poster across different methods. Our PosterAgent achieves great token efficiency, using only 101 . 1 K (4o-based) and 47 . 6 K (Qwen-based) tokens-reducing cost by 60% -87% compared to OWL-4o [6]. This translates to just $0 . 55 for 4o and $0 . 0045 for Qwen per poster, highlighting its effectiveness, (see Append. E.2 for further details).",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-10.png",
         "width": 1948,
         "height": 1100,
         "figure_size": 2142800,
@@ -57,7 +57,7 @@
     },
     "11": {
         "caption": "Figure 7: Average token consumptions for different methods. Details are provided in Appendix E.1.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-11.png",
         "width": 701,
         "height": 505,
         "figure_size": 354005,
@@ -65,7 +65,7 @@
     },
     "12": {
         "caption": "Figure 6: PaperQuiz's Avg scores across different types of posters (x-axis) for readers (colored lines) on human evaluation subset.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-12.png",
         "width": 661,
         "height": 428,
         "figure_size": 282908,
@@ -73,7 +73,7 @@
     },
     "13": {
         "caption": "Figure 10: Posters for MuSc: Zero-Shot Industrial Anomaly Classification and Segmentation with Mutual Scoring of the Unlabeled Images.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-13.png",
         "width": 960,
         "height": 521,
         "figure_size": 500160,
@@ -81,7 +81,7 @@
     },
     "15": {
         "caption": "(b) PosterAgent-generated poster.(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-15.png",
         "width": 1993,
         "height": 810,
         "figure_size": 1614330,
@@ -89,7 +89,7 @@
     },
     "16": {
         "caption": "(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-16.png",
         "width": 945,
         "height": 680,
         "figure_size": 642600,
@@ -97,7 +97,7 @@
     },
     "17": {
         "caption": "(b) PosterAgent-generated poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-17.png",
         "width": 957,
         "height": 708,
         "figure_size": 677556,
@@ -105,7 +105,7 @@
     },
     "18": {
         "caption": "Figure 11: Posters for Neuroformer: Multimodal and Multitask Generative Pretraining for Brain Data.(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-18.png",
         "width": 938,
         "height": 620,
         "figure_size": 581560,
@@ -113,7 +113,7 @@
     },
     "19": {
         "caption": "Figure 12: Posters for Conformal Semantic Keypoint Detection with Statistical Guarantees.(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-19.png",
         "width": 1176,
         "height": 596,
         "figure_size": 700896,
@@ -121,7 +121,7 @@
     },
     "20": {
         "caption": "Figure 13: Posters for Neural Tangent Kernels for Axis-Aligned Tree Ensembles.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-20.png",
         "width": 790,
         "height": 598,
         "figure_size": 472420,
@@ -129,7 +129,7 @@
     },
     "22": {
         "caption": "(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-22.png",
         "width": 929,
         "height": 583,
         "figure_size": 541607,
@@ -137,7 +137,7 @@
     },
     "23": {
         "caption": "Figure 16: Posters for Identifying the Context Shift between Test Benchmarks and Production Data.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-23.png",
         "width": 958,
         "height": 646,
         "figure_size": 618868,
@@ -145,7 +145,7 @@
     },
     "24": {
         "caption": "(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-24.png",
         "width": 1190,
         "height": 567,
         "figure_size": 674730,
@@ -153,7 +153,7 @@
     },
     "29": {
         "caption": "(a) Direct.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-29.png",
         "width": 896,
         "height": 323,
         "figure_size": 289408,
@@ -161,7 +161,7 @@
     },
     "30": {
         "caption": "(b) Tree.(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-30.png",
         "width": 899,
         "height": 644,
         "figure_size": 578956,
@@ -169,7 +169,7 @@
     },
     "31": {
         "caption": "Figure 17: Ablation study on Neuro-Symbolic Language Modeling with Automaton-augmented Retrieval. Text overflow areas are highlighted with red bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-31.png",
         "width": 897,
         "height": 679,
         "figure_size": 609063,
@@ -177,7 +177,7 @@
     },
     "33": {
         "caption": "Figure 18: Ablation study on Visual Correspondence Hallucination. Text overflow areas are highlighted with red bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-33.png",
         "width": 895,
         "height": 274,
         "figure_size": 245230,
@@ -185,7 +185,7 @@
     },
     "34": {
         "caption": "(b) Tree.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-34.png",
         "width": 900,
         "height": 511,
         "figure_size": 459900,
@@ -193,7 +193,7 @@
     },
     "35": {
         "caption": "(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-35.png",
         "width": 901,
         "height": 513,
         "figure_size": 462213,
@@ -201,7 +201,7 @@
     },
     "37": {
         "caption": "Figure 19: Ablation study on DARTFormer: Finding The Best Type Of Attention. Text overflow areas are highlighted with red bounding boxes, large blank regions are highlighted with purple bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-37.png",
         "width": 895,
         "height": 747,
         "figure_size": 668565,
@@ -209,7 +209,7 @@
     },
     "39": {
         "caption": "(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-39.png",
         "width": 899,
         "height": 1187,
         "figure_size": 1067113,
@@ -217,7 +217,7 @@
     },
     "41": {
         "caption": "Figure 20: Ablation study on CW-ERM: Improving Autonomous Driving Planning with Closed-loop Weighted Empirical Risk Minimization. Text overflow areas are highlighted with red bounding boxes, and large blank regions are highlighted with purple bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-41.png",
         "width": 898,
         "height": 1345,
         "figure_size": 1207810,
@@ -225,7 +225,7 @@
     },
     "43": {
         "caption": "(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-43.png",
         "width": 908,
         "height": 1341,
         "figure_size": 1217628,
@@ -233,7 +233,7 @@
     },
     "45": {
         "caption": "Figure 21: Ablation study on DeepJoint: Robust Survival Modelling Under Clinical Presence Shift. Text overflow areas are highlighted with red bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-45.png",
         "width": 894,
         "height": 1234,
         "figure_size": 1103196,
@@ -241,7 +241,7 @@
     },
     "48": {
         "caption": "(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-48.png",
         "width": 902,
         "height": 1266,
         "figure_size": 1141932,
@@ -249,7 +249,7 @@
     },
     "49": {
         "caption": "(a) A poster generated by 4o-Image , where substantial corrupted text is generated.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-49.png",
         "width": 949,
         "height": 1409,
         "figure_size": 1337141,
@@ -257,7 +257,7 @@
     },
     "50": {
         "caption": "(b) A poster generated by PPTAgent , where meaningless template placeholder text is remained.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-50.png",
         "width": 956,
         "height": 1433,
         "figure_size": 1369948,
@@ -265,7 +265,7 @@
     },
     "51": {
         "caption": "Figure 22: Examples of posters with corrupted text.(a) A poster generated by 4o-Image , where the poster is cutoff horizontally due to incomplete generation.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-51.png",
         "width": 966,
         "height": 887,
         "figure_size": 856842,
@@ -273,7 +273,7 @@
     },
     "52": {
         "caption": "Figure 23: Examples of posters with cutoff.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-52.png",
         "width": 948,
         "height": 962,
         "figure_size": 911976,
@@ -281,7 +281,7 @@
     },
     "53": {
         "caption": "(a) A poster produced by 4o-Image , featuring a figure that is low-resolution, visually corrupted, and unintelligible.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-53.png",
         "width": 968,
         "height": 951,
         "figure_size": 920568,
@@ -289,7 +289,7 @@
     },
     "54": {
         "caption": "(b) A poster generated by PPTAgent , where figures are rendered too small to be legible.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-54.png",
         "width": 958,
         "height": 1277,
         "figure_size": 1223366,
@@ -297,7 +297,7 @@
     },
     "55": {
         "caption": "Figure 24: Examples of posters with obscure figures.(a) A poster generated by OWL-4o , where there are large blanks on the poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-55.png",
         "width": 954,
         "height": 680,
         "figure_size": 648720,
@@ -305,7 +305,7 @@
     },
     "56": {
         "caption": "Figure 25: Examples of posters with large blanks.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-56.png",
         "width": 955,
         "height": 723,
         "figure_size": 690465,
@@ -313,7 +313,7 @@
     },
     "57": {
         "caption": "(a) A poster generated by OWL-4o , where no figures are inserted into poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-57.png",
         "width": 959,
         "height": 549,
         "figure_size": 526491,
@@ -321,7 +321,7 @@
     },
     "58": {
         "caption": "Figure 26: Examples of posters without figures.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-58.png",
         "width": 962,
         "height": 1435,
         "figure_size": 1380470,
@@ -329,7 +329,7 @@
     },
     "59": {
         "caption": "(a) A poster generated by PosterAgent-Qwen , where there is text overflowing outside textbox.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-59.png",
         "width": 957,
         "height": 1277,
         "figure_size": 1222089,
@@ -337,7 +337,7 @@
     },
     "60": {
         "caption": "Figure 27: Examples of posters with textual overflow.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-60.png",
         "width": 956,
         "height": 640,
         "figure_size": 611840,
@@ -345,7 +345,7 @@
     },
     "61": {
         "caption": "Figure 29: In-context references for the commenter help the VLM better identify whether the current panel falls into a failure case.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-61.png",
         "width": 1199,
         "height": 828,
         "figure_size": 992772,
@@ -353,7 +353,7 @@
     },
     "63": {
         "caption": "Figure 28: Failure generation examples by Stable Diffusion Ultra model [28].",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-63.png",
         "width": 1193,
         "height": 785,
         "figure_size": 936505,

 {
     "1": {
         "caption": "Figure 1: Overview of this work. We address two core challenges in scientific poster generation: Left: How to create a poster from a paper -we propose PosterAgent (Sec. 4), a framework that transforms long-context scientific papers (20K+ tokens) into structured visual posters; and Right: How to evaluate poster quality -weintroduce the Paper2Poster benchmark (Sec. 3), which enables systematic comparison between agent-generated and author-designed posters.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-1.png",
         "width": 239,
         "height": 271,
         "figure_size": 64769,
     },
     "3": {
         "caption": "Paper ( 20K tokens )",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-3.png",
         "width": 398,
         "height": 265,
         "figure_size": 105470,
     },
     "6": {
         "caption": "Figure 2: Data Statistics of Paper2Poster. (a) Word cloud illustrating the diversity of research topics. (b) Textual Token statistics and Figure count statistics for input papers vs. posters provided by authors. Overall, these statistics highlight that Paper2Poster is a multimodal context compression task, requiring effective abstraction of both textual and visual content.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-6.png",
         "width": 564,
         "height": 557,
         "figure_size": 314148,
     },
     "7": {
         "caption": "Figure 3: Left : Overview of the evaluation framework in Paper2Poster. Middle : We automatically generate multiple-choice questions from each paper using an LLM (o3), forming the our PaperQuiz evaluation. Right : In PaperQuiz, we simulate multiple reader by allowing VLMs-representing different expertise levels ( e.g., student, professor)-to read each generated poster and answer the quiz. The poster that achieves the highest average score is considered the most effective in conveying the paper's content.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-7.png",
         "width": 1983,
         "height": 394,
         "figure_size": 781302,
     },
     "8": {
         "caption": "Figure 4: Illustration of the PosterAgent pipeline. Given an input paper, PosterAgent generates a structured academic poster through three modules: 1. Parser: Extracts key textual and visual assets using a combination of tools and LLM-based summarization, resulting in a structured asset library. 2. Planner: Matches assets and arranges them into coherent layouts, iteratively generating panels with a zoom-in operation. 3. Painter-Commenter: The Painter generates panel-level bullet-content along with executable code, and renders the visual output, while the Commenter-a VLM with in-context reference-provides feedback to ensure layout coherence and prevent content overflow.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-8.png",
         "width": 1972,
         "height": 969,
         "figure_size": 1910868,
     },
     "9": {
         "caption": "Figure 5: PaperQuiz's Avg. scores across different Reader VLMs (x-axis) for each poster type (legend lines). Refer to Append. Tab. 3 for full model names.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-9.png",
         "width": 769,
         "height": 505,
         "figure_size": 388345,
     },
     "10": {
         "caption": "Figure 7 presents the average token cost per poster across different methods. Our PosterAgent achieves great token efficiency, using only 101 . 1 K (4o-based) and 47 . 6 K (Qwen-based) tokens-reducing cost by 60% -87% compared to OWL-4o [6]. This translates to just $0 . 55 for 4o and $0 . 0045 for Qwen per poster, highlighting its effectiveness, (see Append. E.2 for further details).",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-10.png",
         "width": 1948,
         "height": 1100,
         "figure_size": 2142800,
     },
     "11": {
         "caption": "Figure 7: Average token consumptions for different methods. Details are provided in Appendix E.1.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-11.png",
         "width": 701,
         "height": 505,
         "figure_size": 354005,
     },
     "12": {
         "caption": "Figure 6: PaperQuiz's Avg scores across different types of posters (x-axis) for readers (colored lines) on human evaluation subset.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-12.png",
         "width": 661,
         "height": 428,
         "figure_size": 282908,
     },
     "13": {
         "caption": "Figure 10: Posters for MuSc: Zero-Shot Industrial Anomaly Classification and Segmentation with Mutual Scoring of the Unlabeled Images.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-13.png",
         "width": 960,
         "height": 521,
         "figure_size": 500160,
     },
     "15": {
         "caption": "(b) PosterAgent-generated poster.(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-15.png",
         "width": 1993,
         "height": 810,
         "figure_size": 1614330,
     },
     "16": {
         "caption": "(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-16.png",
         "width": 945,
         "height": 680,
         "figure_size": 642600,
     },
     "17": {
         "caption": "(b) PosterAgent-generated poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-17.png",
         "width": 957,
         "height": 708,
         "figure_size": 677556,
     },
     "18": {
         "caption": "Figure 11: Posters for Neuroformer: Multimodal and Multitask Generative Pretraining for Brain Data.(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-18.png",
         "width": 938,
         "height": 620,
         "figure_size": 581560,
     },
     "19": {
         "caption": "Figure 12: Posters for Conformal Semantic Keypoint Detection with Statistical Guarantees.(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-19.png",
         "width": 1176,
         "height": 596,
         "figure_size": 700896,
     },
     "20": {
         "caption": "Figure 13: Posters for Neural Tangent Kernels for Axis-Aligned Tree Ensembles.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-20.png",
         "width": 790,
         "height": 598,
         "figure_size": 472420,
     },
     "22": {
         "caption": "(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-22.png",
         "width": 929,
         "height": 583,
         "figure_size": 541607,
     },
     "23": {
         "caption": "Figure 16: Posters for Identifying the Context Shift between Test Benchmarks and Production Data.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-23.png",
         "width": 958,
         "height": 646,
         "figure_size": 618868,
     },
     "24": {
         "caption": "(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-24.png",
         "width": 1190,
         "height": 567,
         "figure_size": 674730,
     },
     "29": {
         "caption": "(a) Direct.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-29.png",
         "width": 896,
         "height": 323,
         "figure_size": 289408,
     },
     "30": {
         "caption": "(b) Tree.(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-30.png",
         "width": 899,
         "height": 644,
         "figure_size": 578956,
     },
     "31": {
         "caption": "Figure 17: Ablation study on Neuro-Symbolic Language Modeling with Automaton-augmented Retrieval. Text overflow areas are highlighted with red bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-31.png",
         "width": 897,
         "height": 679,
         "figure_size": 609063,
     },
     "33": {
         "caption": "Figure 18: Ablation study on Visual Correspondence Hallucination. Text overflow areas are highlighted with red bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-33.png",
         "width": 895,
         "height": 274,
         "figure_size": 245230,
     },
     "34": {
         "caption": "(b) Tree.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-34.png",
         "width": 900,
         "height": 511,
         "figure_size": 459900,
     },
     "35": {
         "caption": "(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-35.png",
         "width": 901,
         "height": 513,
         "figure_size": 462213,
     },
     "37": {
         "caption": "Figure 19: Ablation study on DARTFormer: Finding The Best Type Of Attention. Text overflow areas are highlighted with red bounding boxes, large blank regions are highlighted with purple bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-37.png",
         "width": 895,
         "height": 747,
         "figure_size": 668565,
     },
     "39": {
         "caption": "(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-39.png",
         "width": 899,
         "height": 1187,
         "figure_size": 1067113,
     },
     "41": {
         "caption": "Figure 20: Ablation study on CW-ERM: Improving Autonomous Driving Planning with Closed-loop Weighted Empirical Risk Minimization. Text overflow areas are highlighted with red bounding boxes, and large blank regions are highlighted with purple bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-41.png",
         "width": 898,
         "height": 1345,
         "figure_size": 1207810,
     },
     "43": {
         "caption": "(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-43.png",
         "width": 908,
         "height": 1341,
         "figure_size": 1217628,
     },
     "45": {
         "caption": "Figure 21: Ablation study on DeepJoint: Robust Survival Modelling Under Clinical Presence Shift. Text overflow areas are highlighted with red bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-45.png",
         "width": 894,
         "height": 1234,
         "figure_size": 1103196,
     },
     "48": {
         "caption": "(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-48.png",
         "width": 902,
         "height": 1266,
         "figure_size": 1141932,
     },
     "49": {
         "caption": "(a) A poster generated by 4o-Image , where substantial corrupted text is generated.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-49.png",
         "width": 949,
         "height": 1409,
         "figure_size": 1337141,
     },
     "50": {
         "caption": "(b) A poster generated by PPTAgent , where meaningless template placeholder text is remained.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-50.png",
         "width": 956,
         "height": 1433,
         "figure_size": 1369948,
     },
     "51": {
         "caption": "Figure 22: Examples of posters with corrupted text.(a) A poster generated by 4o-Image , where the poster is cutoff horizontally due to incomplete generation.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-51.png",
         "width": 966,
         "height": 887,
         "figure_size": 856842,
     },
     "52": {
         "caption": "Figure 23: Examples of posters with cutoff.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-52.png",
         "width": 948,
         "height": 962,
         "figure_size": 911976,
     },
     "53": {
         "caption": "(a) A poster produced by 4o-Image , featuring a figure that is low-resolution, visually corrupted, and unintelligible.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-53.png",
         "width": 968,
         "height": 951,
         "figure_size": 920568,
     },
     "54": {
         "caption": "(b) A poster generated by PPTAgent , where figures are rendered too small to be legible.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-54.png",
         "width": 958,
         "height": 1277,
         "figure_size": 1223366,
     },
     "55": {
         "caption": "Figure 24: Examples of posters with obscure figures.(a) A poster generated by OWL-4o , where there are large blanks on the poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-55.png",
         "width": 954,
         "height": 680,
         "figure_size": 648720,
     },
     "56": {
         "caption": "Figure 25: Examples of posters with large blanks.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-56.png",
         "width": 955,
         "height": 723,
         "figure_size": 690465,
     },
     "57": {
         "caption": "(a) A poster generated by OWL-4o , where no figures are inserted into poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-57.png",
         "width": 959,
         "height": 549,
         "figure_size": 526491,
     },
     "58": {
         "caption": "Figure 26: Examples of posters without figures.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-58.png",
         "width": 962,
         "height": 1435,
         "figure_size": 1380470,
     },
     "59": {
         "caption": "(a) A poster generated by PosterAgent-Qwen , where there is text overflowing outside textbox.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-59.png",
         "width": 957,
         "height": 1277,
         "figure_size": 1222089,
     },
     "60": {
         "caption": "Figure 27: Examples of posters with textual overflow.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-60.png",
         "width": 956,
         "height": 640,
         "figure_size": 611840,
     },
     "61": {
         "caption": "Figure 29: In-context references for the commenter help the VLM better identify whether the current panel falls into a failure case.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-61.png",
         "width": 1199,
         "height": 828,
         "figure_size": 992772,
     },
     "63": {
         "caption": "Figure 28: Failure generation examples by Stable Diffusion Ultra model [28].",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-63.png",
         "width": 1193,
         "height": 785,
         "figure_size": 936505,

posterbuilder/contents/poster_content.json CHANGED Viewed

@@ -1,33 +1,45 @@
 {
     "meta": {
-        "poster_title": "Paper2Poster: Towards Multimodal Poster Automation from Scientific Papers",
-        "authors": "Wei Pang, Kevin Qinghong Lin, Xiangru Jian, Xi He, Philip Torr",
-        "affiliations": "1 University of Waterloo, 2 National University of Singapore, 3 University of Oxford"
     },
     "sections": [
         {
-            "title": "Poster Title & Author",
-            "content": "This poster presents \textbf{Paper2Poster}, a novel approach for generating academic posters from scientific papers. Authors include Wei Pang, Kevin Qinghong Lin, Xiangru Jian, Xi He, and Philip Torr, affiliated with \textit{University of Waterloo}, \textit{National University of Singapore}, and \textit{University of Oxford}."
         },
         {
-            "title": "Introduction",
-            "content": "Academic posters are crucial for \textbf{scientific communication}, allowing rapid dissemination of key findings. Unlike slide decks, posters must condense entire papers into a single page, requiring \textit{multi-modal context handling}, \textcolor{red}{tight text-graphics interleaving}, and \textcolor{red}{spatial constraint respect}. Existing VLM- or LLM-only approaches lack explicit visual feedback, making it difficult to maintain logical flow and legibility."
         },
         {
-            "title": "Benchmark & Metrics",
-            "content": "We introduce the \textbf{Paper2Poster Benchmark}, the first benchmark for poster generation, evaluating outputs on \textcolor{blue}{Visual Quality}, \textcolor{blue}{Textual Coherence}, \textcolor{blue}{Holistic Assessment}, and \textcolor{blue}{PaperQuiz}. This benchmark pairs recent conference papers with author-designed posters, enabling systematic comparison and evaluation of generated posters."
         },
         {
-            "title": "PosterAgent Framework",
-            "content": "Our proposed \textbf{PosterAgent} framework is a \textit{multi-agent pipeline} that transforms scientific papers into structured visual posters. It consists of three components: \textcolor{blue}{Parser}, \textcolor{blue}{Planner}, and \textcolor{blue}{Painter-Commenter}. The Parser distills the paper into a structured asset library, the Planner aligns text-visual pairs into a binary-tree layout, and the Painter-Commenter loop refines each panel using VLM feedback."
         },
         {
-            "title": "Evaluation & Results",
-            "content": "Our comprehensive evaluation reveals that \textbf{PosterAgent} outperforms existing systems across nearly all metrics, using \textcolor{blue}{87\\% fewer tokens}. While GPT-4o outputs are visually appealing, they suffer from \textcolor{red}{noisy text} and poor PaperQuiz scores. Our open-source variants, based on Qwen-2.5, achieve superior performance, highlighting the effectiveness of our \textit{visual-semantic-aware asset library} and \textit{layout generation}."
         },
         {
-            "title": "Conclusion",
-            "content": "We present \textbf{Paper2Poster}, a new benchmark for poster generation, and the \textbf{PosterAgent} framework, which significantly enhances generation quality. Our findings chart clear directions for the next generation of fully automated poster-generation models, emphasizing the importance of \textit{structured parsing}, \textit{hierarchical planning}, and \textit{visual feedback}."
         }
     ]
 }

 {
     "meta": {
+        "poster_title": "Paper2Poster: Towards Multimodal Poster",
+        "authors": "Wei Pang\\textsuperscript{1}, Kevin Qinghong Lin\\textsuperscript{2}, Xiangru Jian\\textsuperscript{1}, Xi He\\textsuperscript{1}, Philip Torr\\textsuperscript{3}",
+        "affiliations": "1 University of Waterloo; 2 National University of Singapore; 3 University of Oxford"
     },
     "sections": [
         {
+            "title": "Why Posters Are Hard",
+            "content": "We target \\textbf{single-page, multimodal compression} of \\textit{20K+ tokens} into clear panels. Posters demand \\textcolor{blue}{tight text\u2013visual coupling}, \\textbf{layout balance}, and \\textit{readable density}. Pure LLM/VLM approaches \\textcolor{red}{miss spatial feedback}, causing overflow and incoherence. We reveal that \\textbf{visual-in-the-loop planning} is essential to preserve reading order, keep figures relevant, and sustain \\textit{engagement} within hard space limits."
         },
         {
+            "title": "Benchmark and Data",
+            "content": "We launch the \\textbf{Paper2Poster Benchmark}: \\textcolor{blue}{100 paper\u2013poster pairs} spanning \\textit{280 topics}. Average input: \\textcolor{blue}{20,370 tokens, 22.6 pages}. Output posters compress text by \\textcolor{blue}{14.4\u00d7} and figures by \\textcolor{blue}{2.6\u00d7}. Evaluation covers \\textbf{Visual Quality}, \\textbf{Textual Coherence}, \\textbf{VLM-as-Judge}, and \\textbf{PaperQuiz}. This suite spotlights \\textit{semantic alignment}, \\textbf{fluency}, and \\textcolor{blue}{reader comprehension}."
         },
         {
+            "title": "PaperQuiz: What Matters",
+            "content": "We generate \\textcolor{blue}{100 MCQs/paper}: \\textbf{50 verbatim} + \\textbf{50 interpretive}. Multiple VLM readers simulate \\textit{novice-to-expert} audiences and answer from the poster only. Scores are length-penalized to reward \\textbf{dense clarity}. Results \\textbf{correlate with human judgment}, proving PaperQuiz captures \\textcolor{blue}{information delivery} beyond surface visuals and discourages \\textcolor{red}{verbose, unfocused designs}."
         },
         {
+            "title": "PosterAgent Pipeline",
+            "content": "Our \\textbf{top-down, visual-in-the-loop} agent compresses long papers into coherent posters. \u2022 \\textbf{Parser} builds a structured asset library. \u2022 \\textbf{Planner} aligns text\u2013visual pairs and produces a \\textcolor{blue}{binary-tree layout}. \u2022 \\textbf{Painter\u2013Commenter} renders panels via code and uses VLM feedback to fix \\textcolor{red}{overflow} and misalignment. The result: \\textbf{balanced, legible}, editable posters."
         },
         {
+            "title": "Parser: Structured Assets",
+            "content": "We distill PDFs into \\textbf{section synopses} and \\textit{figure/table assets} using \\textcolor{blue}{MARKER} and \\textcolor{blue}{DOCLING}, then LLM summarization. The asset library preserves \\textbf{hierarchy} and \\textit{semantics} while shrinking context for efficient planning. This step boosts \\textbf{visual-semantic matching} and reduces \\textcolor{red}{noise}, enabling reliable downstream \\textit{layout reasoning}."
         },
         {
+            "title": "Planner: Layout Mastery",
+            "content": "We semantically match \\textbf{sections \u2194 figures} and allocate space via a \\textcolor{blue}{binary-tree layout} that preserves \\textit{reading order}, aspect ratios, and \\textbf{content length} estimates. Panels are populated iteratively, ensuring \\textbf{text brevity} and \\textit{visual balance}. This strategy stabilizes coordinates and avoids \\textcolor{red}{LLM numeric drift} in absolute placements."
+        },
+        {
+            "title": "Painter\u2013Commenter Loop",
+            "content": "The \\textbf{Painter} turns section\u2013figure pairs into crisp bullets and executable \\textcolor{blue}{python-pptx} code, rendering draft panels. The \\textbf{Commenter} VLM zooms into panels, using \\textit{in-context examples} to flag \\textcolor{red}{overflow} or \\textcolor{red}{blankness}. Iterations continue until \\textbf{fit and alignment} are achieved, producing \\textit{readable, compact} panels with minimal revision cycles."
+        },
+        {
+            "title": "Results: Stronger, Leaner",
+            "content": "Our open-source variants beat \\textcolor{blue}{4o-driven multi-agents} on most metrics, with \\textcolor{blue}{87\\% fewer tokens}. We hit \\textbf{state-of-the-art figure relevance}, near-\\textit{GT} visual similarity, and \\textbf{high VLM-as-Judge} scores. PaperQuiz confirms \\textbf{better knowledge transfer}. Cost is tiny: \\textcolor{blue}{\\$0.0045\u2013\\$0.55/poster}. Key bottleneck remains \\textcolor{red}{Engagement}, guiding future design."
+        },
+        {
+            "title": "Limits and Next Steps",
+            "content": "Current bottleneck: \\textbf{sequential panel refinement} slows throughput (~\\textcolor{blue}{4.5 min/doc}). We plan \\textbf{panel-level parallelism}, \\textit{external knowledge} integration (e.g., OpenReview), and \\textbf{human-in-the-loop} editing for higher \\textcolor{blue}{engagement}. These upgrades aim to boost \\textbf{runtime, interactivity}, and \\textit{visual storytelling}, pushing toward fully automated \\textbf{author-grade posters}."
         }
     ]
 }

posterbuilder/convert.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import json, re, pathlib, shutil, os, math
 # ===================== 自动定位项目根 =====================
-IMAGES_DIR_NAME = "<4o_4o>_images_and_tables"  # 蓝色文件夹名
 def find_project_root(start: pathlib.Path) -> pathlib.Path:
     cur = start.resolve()
@@ -33,7 +33,7 @@ OUTPUT_PATH      = OUTPUT_DIR / "poster_output.tex"
 IMAGES_PARENTS   = [ROOT_DIR / "Paper2Poster", ROOT_DIR]
 # ============ 放大与排版参数 ============
-BEAMER_SCALE_TARGET   = 1.05      # 模板 \usepackage{beamerposter}[... scale=...] 的新值
 # 标题字号策略：单行、两行、3+ 行
 TITLE_SIZE_SINGLE     = r"\Huge"
 TITLE_SIZE_WRAP1      = r"\huge"
@@ -46,9 +46,9 @@ BLOCK_BODY_SIZE_CMD   = r"\large"
 CAPTION_SIZE_CMD      = r"\small"
 # 图像放大基础参数（初值）
-FIG_ENLARGE_FACTOR    = 1.08
-FIG_MIN_FRAC          = 0.60
-FIG_MAX_FRAC          = 0.98
 # 预算控制：每个 section 内，图像累计“高度占 panel 高度”的允许上限（会根据字数自适应）
 BASE_FIG_RATIO_LIMIT  = 0.58  # 基准阈值
@@ -235,9 +235,9 @@ def inject_font_tweaks(tex: str, title_size_cmd: str) -> str:
         f"\\setbeamerfont{{institute}}{{size={INSTITUTE_SIZE_CMD}}}\n"
         f"\\setbeamerfont{{block title}}{{size={BLOCK_TITLE_SIZE_CMD}}}\n"
         f"\\setbeamerfont{{block body}}{{size={BLOCK_BODY_SIZE_CMD}}}\n"
-        f"\\setbeamerfont{{caption}}{{size={CAPTION_SIZE_CMD}}}\n"
-        "\\setlength{\\abovecaptionskip}{4pt}\n"
-        "\\setlength{\\belowcaptionskip}{3pt}\n"
     )
     pos_doc = tex.find(r"\begin{document}")
     return tex[:pos_doc] + tweaks + tex[pos_doc:] if pos_doc != -1 else tex + tweaks
@@ -278,7 +278,7 @@ def inject_right_logo(tex: str) -> str:
 # ===================== 图片与 captions（相对 PaperShow/） =====================
 def load_arrangement_and_captions():
     arr = json.loads(ARRANGEMENT_PATH.read_text(encoding="utf-8"))
-    panels = arr.get("panel_arrangement", [])
     figures = arr.get("figure_arrangement", [])
     panels_by_id = {p["panel_id"]: p for p in panels if "panel_id" in p}
@@ -341,7 +341,7 @@ def build_figures_for_sections(sections, panels_by_id, figures, cap_full, cap_ba
                        if norm_title(sec.get("title","")) != norm_title("Poster Title & Author")}
     panelid_to_secidx = {}
     for p in panels_by_id.values():
-        pname = norm_title(p.get("panel_name",""))
         if pname in sec_name_to_idx:
             panelid_to_secidx[p["panel_id"]] = sec_name_to_idx[pname]
@@ -414,7 +414,7 @@ def figures_to_latex(fig_list, out_tex_path: pathlib.Path, images_parent: pathli
             "\\begin{figure}\n"
             +"\\centering\n"
             +f"\\includegraphics[width={w:.2f}\\linewidth]{{{rel}}}\n"
-            + (f"\\caption{{{cap}}}\n" if cap else "")
             +"\\end{figure}\n"
         )
     return "\n".join(chunks)
@@ -429,6 +429,8 @@ def build():
     sections = [s for s in sections_all if norm_title(s.get("title","")) != norm_title("Poster Title & Author")]
     panels_by_id, figures, cap_full, cap_base = load_arrangement_and_captions()
     sample_paths = [pathlib.Path(f.get("figure_path","")) for f in figures if f.get("figure_path")]
     images_parent = resolve_images_parent_dir(sample_paths)
@@ -463,6 +465,7 @@ def build():
     # 注意：要先处理上面的大括号再处理反斜杠，否则会提前破坏结构
     cleaned_tex = cleaned_tex.replace(r"\\\\", r"\\")  # 避免双转义干扰
     cleaned_tex = cleaned_tex.replace(r"\\", "\\")      # 最终将 \\ → \
     OUTPUT_PATH.write_text(cleaned_tex, encoding="utf-8")
     print(f"✅ Wrote: {OUTPUT_PATH.relative_to(ROOT_DIR)}")

 import json, re, pathlib, shutil, os, math
 # ===================== 自动定位项目根 =====================
+IMAGES_DIR_NAME = "<gpt-5_gpt-5>_images_and_tables"  # 蓝色文件夹名
 def find_project_root(start: pathlib.Path) -> pathlib.Path:
     cur = start.resolve()
 IMAGES_PARENTS   = [ROOT_DIR / "Paper2Poster", ROOT_DIR]
 # ============ 放大与排版参数 ============
+BEAMER_SCALE_TARGET   = 1.0      # 模板 \usepackage{beamerposter}[... scale=...] 的新值
 # 标题字号策略：单行、两行、3+ 行
 TITLE_SIZE_SINGLE     = r"\Huge"
 TITLE_SIZE_WRAP1      = r"\huge"
 CAPTION_SIZE_CMD      = r"\small"
 # 图像放大基础参数（初值）
+FIG_ENLARGE_FACTOR    = 1.18
+FIG_MIN_FRAC          = 0.80
+FIG_MAX_FRAC          = 0.90
 # 预算控制：每个 section 内，图像累计“高度占 panel 高度”的允许上限（会根据字数自适应）
 BASE_FIG_RATIO_LIMIT  = 0.58  # 基准阈值
         f"\\setbeamerfont{{institute}}{{size={INSTITUTE_SIZE_CMD}}}\n"
         f"\\setbeamerfont{{block title}}{{size={BLOCK_TITLE_SIZE_CMD}}}\n"
         f"\\setbeamerfont{{block body}}{{size={BLOCK_BODY_SIZE_CMD}}}\n"
+        # f"\\setbeamerfont{{caption}}{{size={CAPTION_SIZE_CMD}}}\n"
+        # "\\setlength{\\abovecaptionskip}{4pt}\n"
+        # "\\setlength{\\belowcaptionskip}{3pt}\n"
     )
     pos_doc = tex.find(r"\begin{document}")
     return tex[:pos_doc] + tweaks + tex[pos_doc:] if pos_doc != -1 else tex + tweaks
 # ===================== 图片与 captions（相对 PaperShow/） =====================
 def load_arrangement_and_captions():
     arr = json.loads(ARRANGEMENT_PATH.read_text(encoding="utf-8"))
+    panels = arr.get("panels", [])
     figures = arr.get("figure_arrangement", [])
     panels_by_id = {p["panel_id"]: p for p in panels if "panel_id" in p}
                        if norm_title(sec.get("title","")) != norm_title("Poster Title & Author")}
     panelid_to_secidx = {}
     for p in panels_by_id.values():
+        pname = norm_title(p.get("section_name",""))
         if pname in sec_name_to_idx:
             panelid_to_secidx[p["panel_id"]] = sec_name_to_idx[pname]
             "\\begin{figure}\n"
             +"\\centering\n"
             +f"\\includegraphics[width={w:.2f}\\linewidth]{{{rel}}}\n"
+            # + (f"\\caption{{{cap}}}\n" if cap else "")
             +"\\end{figure}\n"
         )
     return "\n".join(chunks)
     sections = [s for s in sections_all if norm_title(s.get("title","")) != norm_title("Poster Title & Author")]
     panels_by_id, figures, cap_full, cap_base = load_arrangement_and_captions()
+    print(f"✅ Loaded arrangement and captions.")
+    print(panels_by_id.keys(),figures[:2])
     sample_paths = [pathlib.Path(f.get("figure_path","")) for f in figures if f.get("figure_path")]
     images_parent = resolve_images_parent_dir(sample_paths)
     # 注意：要先处理上面的大括号再处理反斜杠，否则会提前破坏结构
     cleaned_tex = cleaned_tex.replace(r"\\\\", r"\\")  # 避免双转义干扰
     cleaned_tex = cleaned_tex.replace(r"\\", "\\")      # 最终将 \\ → \
+    cleaned_tex = cleaned_tex.replace(r"\t\t", "\\t")
     OUTPUT_PATH.write_text(cleaned_tex, encoding="utf-8")
     print(f"✅ Wrote: {OUTPUT_PATH.relative_to(ROOT_DIR)}")

posterbuilder/figure_caption.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "1": {
         "caption": "Figure 1: Overview of this work. We address two core challenges in scientific poster generation: Left: How to create a poster from a paper -we propose PosterAgent (Sec. 4), a framework that transforms long-context scientific papers (20K+ tokens) into structured visual posters; and Right: How to evaluate poster quality -weintroduce the Paper2Poster benchmark (Sec. 3), which enables systematic comparison between agent-generated and author-designed posters.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-1.png",
         "width": 239,
         "height": 271,
         "figure_size": 64769,
@@ -9,7 +9,7 @@
     },
     "3": {
         "caption": "Paper ( 20K tokens )",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-3.png",
         "width": 398,
         "height": 265,
         "figure_size": 105470,
@@ -17,7 +17,7 @@
     },
     "6": {
         "caption": "Figure 2: Data Statistics of Paper2Poster. (a) Word cloud illustrating the diversity of research topics. (b) Textual Token statistics and Figure count statistics for input papers vs. posters provided by authors. Overall, these statistics highlight that Paper2Poster is a multimodal context compression task, requiring effective abstraction of both textual and visual content.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-6.png",
         "width": 564,
         "height": 557,
         "figure_size": 314148,
@@ -25,7 +25,7 @@
     },
     "7": {
         "caption": "Figure 3: Left : Overview of the evaluation framework in Paper2Poster. Middle : We automatically generate multiple-choice questions from each paper using an LLM (o3), forming the our PaperQuiz evaluation. Right : In PaperQuiz, we simulate multiple reader by allowing VLMs-representing different expertise levels ( e.g., student, professor)-to read each generated poster and answer the quiz. The poster that achieves the highest average score is considered the most effective in conveying the paper's content.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-7.png",
         "width": 1983,
         "height": 394,
         "figure_size": 781302,
@@ -33,7 +33,7 @@
     },
     "8": {
         "caption": "Figure 4: Illustration of the PosterAgent pipeline. Given an input paper, PosterAgent generates a structured academic poster through three modules: 1. Parser: Extracts key textual and visual assets using a combination of tools and LLM-based summarization, resulting in a structured asset library. 2. Planner: Matches assets and arranges them into coherent layouts, iteratively generating panels with a zoom-in operation. 3. Painter-Commenter: The Painter generates panel-level bullet-content along with executable code, and renders the visual output, while the Commenter-a VLM with in-context reference-provides feedback to ensure layout coherence and prevent content overflow.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-8.png",
         "width": 1972,
         "height": 969,
         "figure_size": 1910868,
@@ -41,7 +41,7 @@
     },
     "9": {
         "caption": "Figure 5: PaperQuiz's Avg. scores across different Reader VLMs (x-axis) for each poster type (legend lines). Refer to Append. Tab. 3 for full model names.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-9.png",
         "width": 769,
         "height": 505,
         "figure_size": 388345,
@@ -49,7 +49,7 @@
     },
     "10": {
         "caption": "Figure 7 presents the average token cost per poster across different methods. Our PosterAgent achieves great token efficiency, using only 101 . 1 K (4o-based) and 47 . 6 K (Qwen-based) tokens-reducing cost by 60% -87% compared to OWL-4o [6]. This translates to just $0 . 55 for 4o and $0 . 0045 for Qwen per poster, highlighting its effectiveness, (see Append. E.2 for further details).",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-10.png",
         "width": 1948,
         "height": 1100,
         "figure_size": 2142800,
@@ -57,7 +57,7 @@
     },
     "11": {
         "caption": "Figure 7: Average token consumptions for different methods. Details are provided in Appendix E.1.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-11.png",
         "width": 701,
         "height": 505,
         "figure_size": 354005,
@@ -65,7 +65,7 @@
     },
     "12": {
         "caption": "Figure 6: PaperQuiz's Avg scores across different types of posters (x-axis) for readers (colored lines) on human evaluation subset.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-12.png",
         "width": 661,
         "height": 428,
         "figure_size": 282908,
@@ -73,7 +73,7 @@
     },
     "13": {
         "caption": "Figure 10: Posters for MuSc: Zero-Shot Industrial Anomaly Classification and Segmentation with Mutual Scoring of the Unlabeled Images.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-13.png",
         "width": 960,
         "height": 521,
         "figure_size": 500160,
@@ -81,7 +81,7 @@
     },
     "15": {
         "caption": "(b) PosterAgent-generated poster.(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-15.png",
         "width": 1993,
         "height": 810,
         "figure_size": 1614330,
@@ -89,7 +89,7 @@
     },
     "16": {
         "caption": "(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-16.png",
         "width": 945,
         "height": 680,
         "figure_size": 642600,
@@ -97,7 +97,7 @@
     },
     "17": {
         "caption": "(b) PosterAgent-generated poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-17.png",
         "width": 957,
         "height": 708,
         "figure_size": 677556,
@@ -105,7 +105,7 @@
     },
     "18": {
         "caption": "Figure 11: Posters for Neuroformer: Multimodal and Multitask Generative Pretraining for Brain Data.(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-18.png",
         "width": 938,
         "height": 620,
         "figure_size": 581560,
@@ -113,7 +113,7 @@
     },
     "19": {
         "caption": "Figure 12: Posters for Conformal Semantic Keypoint Detection with Statistical Guarantees.(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-19.png",
         "width": 1176,
         "height": 596,
         "figure_size": 700896,
@@ -121,7 +121,7 @@
     },
     "20": {
         "caption": "Figure 13: Posters for Neural Tangent Kernels for Axis-Aligned Tree Ensembles.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-20.png",
         "width": 790,
         "height": 598,
         "figure_size": 472420,
@@ -129,7 +129,7 @@
     },
     "22": {
         "caption": "(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-22.png",
         "width": 929,
         "height": 583,
         "figure_size": 541607,
@@ -137,7 +137,7 @@
     },
     "23": {
         "caption": "Figure 16: Posters for Identifying the Context Shift between Test Benchmarks and Production Data.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-23.png",
         "width": 958,
         "height": 646,
         "figure_size": 618868,
@@ -145,7 +145,7 @@
     },
     "24": {
         "caption": "(a) Author-designed poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-24.png",
         "width": 1190,
         "height": 567,
         "figure_size": 674730,
@@ -153,7 +153,7 @@
     },
     "29": {
         "caption": "(a) Direct.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-29.png",
         "width": 896,
         "height": 323,
         "figure_size": 289408,
@@ -161,7 +161,7 @@
     },
     "30": {
         "caption": "(b) Tree.(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-30.png",
         "width": 899,
         "height": 644,
         "figure_size": 578956,
@@ -169,7 +169,7 @@
     },
     "31": {
         "caption": "Figure 17: Ablation study on Neuro-Symbolic Language Modeling with Automaton-augmented Retrieval. Text overflow areas are highlighted with red bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-31.png",
         "width": 897,
         "height": 679,
         "figure_size": 609063,
@@ -177,7 +177,7 @@
     },
     "33": {
         "caption": "Figure 18: Ablation study on Visual Correspondence Hallucination. Text overflow areas are highlighted with red bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-33.png",
         "width": 895,
         "height": 274,
         "figure_size": 245230,
@@ -185,7 +185,7 @@
     },
     "34": {
         "caption": "(b) Tree.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-34.png",
         "width": 900,
         "height": 511,
         "figure_size": 459900,
@@ -193,7 +193,7 @@
     },
     "35": {
         "caption": "(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-35.png",
         "width": 901,
         "height": 513,
         "figure_size": 462213,
@@ -201,7 +201,7 @@
     },
     "37": {
         "caption": "Figure 19: Ablation study on DARTFormer: Finding The Best Type Of Attention. Text overflow areas are highlighted with red bounding boxes, large blank regions are highlighted with purple bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-37.png",
         "width": 895,
         "height": 747,
         "figure_size": 668565,
@@ -209,7 +209,7 @@
     },
     "39": {
         "caption": "(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-39.png",
         "width": 899,
         "height": 1187,
         "figure_size": 1067113,
@@ -217,7 +217,7 @@
     },
     "41": {
         "caption": "Figure 20: Ablation study on CW-ERM: Improving Autonomous Driving Planning with Closed-loop Weighted Empirical Risk Minimization. Text overflow areas are highlighted with red bounding boxes, and large blank regions are highlighted with purple bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-41.png",
         "width": 898,
         "height": 1345,
         "figure_size": 1207810,
@@ -225,7 +225,7 @@
     },
     "43": {
         "caption": "(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-43.png",
         "width": 908,
         "height": 1341,
         "figure_size": 1217628,
@@ -233,7 +233,7 @@
     },
     "45": {
         "caption": "Figure 21: Ablation study on DeepJoint: Robust Survival Modelling Under Clinical Presence Shift. Text overflow areas are highlighted with red bounding boxes.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-45.png",
         "width": 894,
         "height": 1234,
         "figure_size": 1103196,
@@ -241,7 +241,7 @@
     },
     "48": {
         "caption": "(c) Tree + Commenter.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-48.png",
         "width": 902,
         "height": 1266,
         "figure_size": 1141932,
@@ -249,7 +249,7 @@
     },
     "49": {
         "caption": "(a) A poster generated by 4o-Image , where substantial corrupted text is generated.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-49.png",
         "width": 949,
         "height": 1409,
         "figure_size": 1337141,
@@ -257,7 +257,7 @@
     },
     "50": {
         "caption": "(b) A poster generated by PPTAgent , where meaningless template placeholder text is remained.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-50.png",
         "width": 956,
         "height": 1433,
         "figure_size": 1369948,
@@ -265,7 +265,7 @@
     },
     "51": {
         "caption": "Figure 22: Examples of posters with corrupted text.(a) A poster generated by 4o-Image , where the poster is cutoff horizontally due to incomplete generation.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-51.png",
         "width": 966,
         "height": 887,
         "figure_size": 856842,
@@ -273,7 +273,7 @@
     },
     "52": {
         "caption": "Figure 23: Examples of posters with cutoff.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-52.png",
         "width": 948,
         "height": 962,
         "figure_size": 911976,
@@ -281,7 +281,7 @@
     },
     "53": {
         "caption": "(a) A poster produced by 4o-Image , featuring a figure that is low-resolution, visually corrupted, and unintelligible.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-53.png",
         "width": 968,
         "height": 951,
         "figure_size": 920568,
@@ -289,7 +289,7 @@
     },
     "54": {
         "caption": "(b) A poster generated by PPTAgent , where figures are rendered too small to be legible.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-54.png",
         "width": 958,
         "height": 1277,
         "figure_size": 1223366,
@@ -297,7 +297,7 @@
     },
     "55": {
         "caption": "Figure 24: Examples of posters with obscure figures.(a) A poster generated by OWL-4o , where there are large blanks on the poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-55.png",
         "width": 954,
         "height": 680,
         "figure_size": 648720,
@@ -305,7 +305,7 @@
     },
     "56": {
         "caption": "Figure 25: Examples of posters with large blanks.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-56.png",
         "width": 955,
         "height": 723,
         "figure_size": 690465,
@@ -313,7 +313,7 @@
     },
     "57": {
         "caption": "(a) A poster generated by OWL-4o , where no figures are inserted into poster.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-57.png",
         "width": 959,
         "height": 549,
         "figure_size": 526491,
@@ -321,7 +321,7 @@
     },
     "58": {
         "caption": "Figure 26: Examples of posters without figures.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-58.png",
         "width": 962,
         "height": 1435,
         "figure_size": 1380470,
@@ -329,7 +329,7 @@
     },
     "59": {
         "caption": "(a) A poster generated by PosterAgent-Qwen , where there is text overflowing outside textbox.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-59.png",
         "width": 957,
         "height": 1277,
         "figure_size": 1222089,
@@ -337,7 +337,7 @@
     },
     "60": {
         "caption": "Figure 27: Examples of posters with textual overflow.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-60.png",
         "width": 956,
         "height": 640,
         "figure_size": 611840,
@@ -345,7 +345,7 @@
     },
     "61": {
         "caption": "Figure 29: In-context references for the commenter help the VLM better identify whether the current panel falls into a failure case.",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-61.png",
         "width": 1199,
         "height": 828,
         "figure_size": 992772,
@@ -353,7 +353,7 @@
     },
     "63": {
         "caption": "Figure 28: Failure generation examples by Stable Diffusion Ultra model [28].",
-        "image_path": "<4o_4o>_images_and_tables/paper/paper-picture-63.png",
         "width": 1193,
         "height": 785,
         "figure_size": 936505,

 {
     "1": {
         "caption": "Figure 1: Overview of this work. We address two core challenges in scientific poster generation: Left: How to create a poster from a paper -we propose PosterAgent (Sec. 4), a framework that transforms long-context scientific papers (20K+ tokens) into structured visual posters; and Right: How to evaluate poster quality -weintroduce the Paper2Poster benchmark (Sec. 3), which enables systematic comparison between agent-generated and author-designed posters.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-1.png",
         "width": 239,
         "height": 271,
         "figure_size": 64769,
     },
     "3": {
         "caption": "Paper ( 20K tokens )",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-3.png",
         "width": 398,
         "height": 265,
         "figure_size": 105470,
     },
     "6": {
         "caption": "Figure 2: Data Statistics of Paper2Poster. (a) Word cloud illustrating the diversity of research topics. (b) Textual Token statistics and Figure count statistics for input papers vs. posters provided by authors. Overall, these statistics highlight that Paper2Poster is a multimodal context compression task, requiring effective abstraction of both textual and visual content.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-6.png",
         "width": 564,
         "height": 557,
         "figure_size": 314148,
     },
     "7": {
         "caption": "Figure 3: Left : Overview of the evaluation framework in Paper2Poster. Middle : We automatically generate multiple-choice questions from each paper using an LLM (o3), forming the our PaperQuiz evaluation. Right : In PaperQuiz, we simulate multiple reader by allowing VLMs-representing different expertise levels ( e.g., student, professor)-to read each generated poster and answer the quiz. The poster that achieves the highest average score is considered the most effective in conveying the paper's content.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-7.png",
         "width": 1983,
         "height": 394,
         "figure_size": 781302,
     },
     "8": {
         "caption": "Figure 4: Illustration of the PosterAgent pipeline. Given an input paper, PosterAgent generates a structured academic poster through three modules: 1. Parser: Extracts key textual and visual assets using a combination of tools and LLM-based summarization, resulting in a structured asset library. 2. Planner: Matches assets and arranges them into coherent layouts, iteratively generating panels with a zoom-in operation. 3. Painter-Commenter: The Painter generates panel-level bullet-content along with executable code, and renders the visual output, while the Commenter-a VLM with in-context reference-provides feedback to ensure layout coherence and prevent content overflow.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-8.png",
         "width": 1972,
         "height": 969,
         "figure_size": 1910868,
     },
     "9": {
         "caption": "Figure 5: PaperQuiz's Avg. scores across different Reader VLMs (x-axis) for each poster type (legend lines). Refer to Append. Tab. 3 for full model names.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-9.png",
         "width": 769,
         "height": 505,
         "figure_size": 388345,
     },
     "10": {
         "caption": "Figure 7 presents the average token cost per poster across different methods. Our PosterAgent achieves great token efficiency, using only 101 . 1 K (4o-based) and 47 . 6 K (Qwen-based) tokens-reducing cost by 60% -87% compared to OWL-4o [6]. This translates to just $0 . 55 for 4o and $0 . 0045 for Qwen per poster, highlighting its effectiveness, (see Append. E.2 for further details).",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-10.png",
         "width": 1948,
         "height": 1100,
         "figure_size": 2142800,
     },
     "11": {
         "caption": "Figure 7: Average token consumptions for different methods. Details are provided in Appendix E.1.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-11.png",
         "width": 701,
         "height": 505,
         "figure_size": 354005,
     },
     "12": {
         "caption": "Figure 6: PaperQuiz's Avg scores across different types of posters (x-axis) for readers (colored lines) on human evaluation subset.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-12.png",
         "width": 661,
         "height": 428,
         "figure_size": 282908,
     },
     "13": {
         "caption": "Figure 10: Posters for MuSc: Zero-Shot Industrial Anomaly Classification and Segmentation with Mutual Scoring of the Unlabeled Images.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-13.png",
         "width": 960,
         "height": 521,
         "figure_size": 500160,
     },
     "15": {
         "caption": "(b) PosterAgent-generated poster.(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-15.png",
         "width": 1993,
         "height": 810,
         "figure_size": 1614330,
     },
     "16": {
         "caption": "(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-16.png",
         "width": 945,
         "height": 680,
         "figure_size": 642600,
     },
     "17": {
         "caption": "(b) PosterAgent-generated poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-17.png",
         "width": 957,
         "height": 708,
         "figure_size": 677556,
     },
     "18": {
         "caption": "Figure 11: Posters for Neuroformer: Multimodal and Multitask Generative Pretraining for Brain Data.(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-18.png",
         "width": 938,
         "height": 620,
         "figure_size": 581560,
     },
     "19": {
         "caption": "Figure 12: Posters for Conformal Semantic Keypoint Detection with Statistical Guarantees.(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-19.png",
         "width": 1176,
         "height": 596,
         "figure_size": 700896,
     },
     "20": {
         "caption": "Figure 13: Posters for Neural Tangent Kernels for Axis-Aligned Tree Ensembles.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-20.png",
         "width": 790,
         "height": 598,
         "figure_size": 472420,
     },
     "22": {
         "caption": "(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-22.png",
         "width": 929,
         "height": 583,
         "figure_size": 541607,
     },
     "23": {
         "caption": "Figure 16: Posters for Identifying the Context Shift between Test Benchmarks and Production Data.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-23.png",
         "width": 958,
         "height": 646,
         "figure_size": 618868,
     },
     "24": {
         "caption": "(a) Author-designed poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-24.png",
         "width": 1190,
         "height": 567,
         "figure_size": 674730,
     },
     "29": {
         "caption": "(a) Direct.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-29.png",
         "width": 896,
         "height": 323,
         "figure_size": 289408,
     },
     "30": {
         "caption": "(b) Tree.(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-30.png",
         "width": 899,
         "height": 644,
         "figure_size": 578956,
     },
     "31": {
         "caption": "Figure 17: Ablation study on Neuro-Symbolic Language Modeling with Automaton-augmented Retrieval. Text overflow areas are highlighted with red bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-31.png",
         "width": 897,
         "height": 679,
         "figure_size": 609063,
     },
     "33": {
         "caption": "Figure 18: Ablation study on Visual Correspondence Hallucination. Text overflow areas are highlighted with red bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-33.png",
         "width": 895,
         "height": 274,
         "figure_size": 245230,
     },
     "34": {
         "caption": "(b) Tree.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-34.png",
         "width": 900,
         "height": 511,
         "figure_size": 459900,
     },
     "35": {
         "caption": "(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-35.png",
         "width": 901,
         "height": 513,
         "figure_size": 462213,
     },
     "37": {
         "caption": "Figure 19: Ablation study on DARTFormer: Finding The Best Type Of Attention. Text overflow areas are highlighted with red bounding boxes, large blank regions are highlighted with purple bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-37.png",
         "width": 895,
         "height": 747,
         "figure_size": 668565,
     },
     "39": {
         "caption": "(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-39.png",
         "width": 899,
         "height": 1187,
         "figure_size": 1067113,
     },
     "41": {
         "caption": "Figure 20: Ablation study on CW-ERM: Improving Autonomous Driving Planning with Closed-loop Weighted Empirical Risk Minimization. Text overflow areas are highlighted with red bounding boxes, and large blank regions are highlighted with purple bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-41.png",
         "width": 898,
         "height": 1345,
         "figure_size": 1207810,
     },
     "43": {
         "caption": "(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-43.png",
         "width": 908,
         "height": 1341,
         "figure_size": 1217628,
     },
     "45": {
         "caption": "Figure 21: Ablation study on DeepJoint: Robust Survival Modelling Under Clinical Presence Shift. Text overflow areas are highlighted with red bounding boxes.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-45.png",
         "width": 894,
         "height": 1234,
         "figure_size": 1103196,
     },
     "48": {
         "caption": "(c) Tree + Commenter.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-48.png",
         "width": 902,
         "height": 1266,
         "figure_size": 1141932,
     },
     "49": {
         "caption": "(a) A poster generated by 4o-Image , where substantial corrupted text is generated.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-49.png",
         "width": 949,
         "height": 1409,
         "figure_size": 1337141,
     },
     "50": {
         "caption": "(b) A poster generated by PPTAgent , where meaningless template placeholder text is remained.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-50.png",
         "width": 956,
         "height": 1433,
         "figure_size": 1369948,
     },
     "51": {
         "caption": "Figure 22: Examples of posters with corrupted text.(a) A poster generated by 4o-Image , where the poster is cutoff horizontally due to incomplete generation.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-51.png",
         "width": 966,
         "height": 887,
         "figure_size": 856842,
     },
     "52": {
         "caption": "Figure 23: Examples of posters with cutoff.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-52.png",
         "width": 948,
         "height": 962,
         "figure_size": 911976,
     },
     "53": {
         "caption": "(a) A poster produced by 4o-Image , featuring a figure that is low-resolution, visually corrupted, and unintelligible.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-53.png",
         "width": 968,
         "height": 951,
         "figure_size": 920568,
     },
     "54": {
         "caption": "(b) A poster generated by PPTAgent , where figures are rendered too small to be legible.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-54.png",
         "width": 958,
         "height": 1277,
         "figure_size": 1223366,
     },
     "55": {
         "caption": "Figure 24: Examples of posters with obscure figures.(a) A poster generated by OWL-4o , where there are large blanks on the poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-55.png",
         "width": 954,
         "height": 680,
         "figure_size": 648720,
     },
     "56": {
         "caption": "Figure 25: Examples of posters with large blanks.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-56.png",
         "width": 955,
         "height": 723,
         "figure_size": 690465,
     },
     "57": {
         "caption": "(a) A poster generated by OWL-4o , where no figures are inserted into poster.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-57.png",
         "width": 959,
         "height": 549,
         "figure_size": 526491,
     },
     "58": {
         "caption": "Figure 26: Examples of posters without figures.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-58.png",
         "width": 962,
         "height": 1435,
         "figure_size": 1380470,
     },
     "59": {
         "caption": "(a) A poster generated by PosterAgent-Qwen , where there is text overflowing outside textbox.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-59.png",
         "width": 957,
         "height": 1277,
         "figure_size": 1222089,
     },
     "60": {
         "caption": "Figure 27: Examples of posters with textual overflow.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-60.png",
         "width": 956,
         "height": 640,
         "figure_size": 611840,
     },
     "61": {
         "caption": "Figure 29: In-context references for the commenter help the VLM better identify whether the current panel falls into a failure case.",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-61.png",
         "width": 1199,
         "height": 828,
         "figure_size": 992772,
     },
     "63": {
         "caption": "Figure 28: Failure generation examples by Stable Diffusion Ultra model [28].",
+        "image_path": "<gpt-5_gpt-5>_images_and_tables/paper/paper-picture-63.png",
         "width": 1193,
         "height": 785,
         "figure_size": 936505,

posterbuilder/latex_proj/figures.zip DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b121c93aec90deeba8b04260f15d228536bfda456001dd9499d97cb35ff3d17b
-size 1911874

posterbuilder/latex_proj/poster_output.tex CHANGED Viewed

@@ -11,7 +11,7 @@
 \usepackage[T1]{fontenc}
 \usepackage{lmodern}
-\usepackage[size=custom,width=120,height=72,scale=1.05]{beamerposter}
 \usetheme{gemini}
 \usecolortheme{cam}
 \usepackage{graphicx}
@@ -22,6 +22,13 @@
 \pgfplotsset{compat=1.14}
 \usepackage{anyfontsize}
 % ====================
 % Lengths
 % ====================
@@ -39,11 +46,11 @@
 % Title
 % ====================
-\title{Paper2Poster: \\ Towards Multimodal Poster Automation from Scientific Papers}
-\author{Wei Pang, Kevin Qinghong Lin, Xiangru Jian, Xi He, Philip Torr}
-\institute[shortinst]{1 University of Waterloo, 2 National University of Singapore, 3 University of Oxford}
 % ====================
 % Footer (optional)
@@ -60,8 +67,8 @@
 % ====================
 % use this to include logos on the left and/or right side of the header:
-% \logoright{\includegraphics[height=7cm]{logo1.pdf}}
-% \logoleft{\includegraphics[height=7cm]{logo2.pdf}}
 % ====================
 % Body
@@ -69,14 +76,11 @@
 % --- injected font tweaks ---
-\setbeamerfont{title}{size=\huge}
 \setbeamerfont{author}{size=\Large}
 \setbeamerfont{institute}{size=\large}
 \setbeamerfont{block title}{size=\Large}
 \setbeamerfont{block body}{size=\large}
-\setbeamerfont{caption}{size=\small}
-\setlength{\abovecaptionskip}{4pt}
-\setlength{\belowcaptionskip}{3pt}
 \begin{document}
 % Refer to https://github.com/k4rtik/uchicago-poster
@@ -85,28 +89,39 @@
 {
     \begin{tikzpicture}[remember picture,overlay]
       \node [anchor=north west, inner sep=3cm] at ([xshift=0.0cm,yshift=1.0cm]current page.north west)
-      {\includegraphics[height=4.5cm]{logos/cambridge-reversed-color-logo.eps}};
-      \node[anchor=north east, inner sep=2.0cm] at ([xshift=-2.0cm,yshift=0.0cm]current page.north east)
-      {\includegraphics[height=6.0cm]{logo.png}};
-\end{tikzpicture}
 }
 \begin{frame}[t]
 \begin{columns}[t]
 \separatorcolumn
 \begin{column}{\colwidth}
-\begin{block}{Introduction}
-Academic posters are crucial for 	\textbf\{scientific communication\}, allowing rapid dissemination of key findings. Unlike slide decks, posters must condense entire papers into a single page, requiring 	\textit\{multi-modal context handling\}, 	\textcolor\{red\}\{tight text-graphics interleaving\}, and 	\textcolor\{red\}\{spatial constraint respect\}. Existing VLM- or LLM-only approaches lack explicit visual feedback, making it difficult to maintain logical flow and legibility.
 \end{block}
-\begin{block}{Benchmark \& Metrics}
-We introduce the 	\textbf\{Paper2Poster Benchmark\}, the first benchmark for poster generation, evaluating outputs on 	\textcolor\{blue\}\{Visual Quality\}, 	\textcolor\{blue\}\{Textual Coherence\}, 	\textcolor\{blue\}\{Holistic Assessment\}, and 	\textcolor\{blue\}\{PaperQuiz\}. This benchmark pairs recent conference papers with author-designed posters, enabling systematic comparison and evaluation of generated posters.
 \begin{figure}
 \centering
-\includegraphics[width=0.60\linewidth]{figures/paper-picture-1.png}
-\caption{Overview of this work. We address two core challenges in scientific poster generation: Left: How to create a poster from a paper -we propose PosterAgent (Sec. 4), a framework that transforms long-context scientific papers (20K+ tokens) into structured visual posters; and Right: How to evaluate poster quality -weintroduce the Paper2Poster benchmark (Sec. 3), which enables systematic comparison between agent-generated and author-designed posters.}
 \end{figure}
 \end{block}
@@ -114,23 +129,26 @@ We introduce the 	\textbf\{Paper2Poster Benchmark\}, the first benchmark for pos
 \end{column}
 \separatorcolumn
 \begin{column}{\colwidth}
-\begin{block}{PosterAgent Framework}
-Our proposed 	\textbf\{PosterAgent\} framework is a 	\textit\{multi-agent pipeline\} that transforms scientific papers into structured visual posters. It consists of three components: 	\textcolor\{blue\}\{Parser\}, 	\textcolor\{blue\}\{Planner\}, and 	\textcolor\{blue\}\{Painter-Commenter\}. The Parser distills the paper into a structured asset library, the Planner aligns text-visual pairs into a binary-tree layout, and the Painter-Commenter loop refines each panel using VLM feedback.
 \begin{figure}
 \centering
-\includegraphics[width=0.78\linewidth]{figures/paper-picture-8.png}
-\caption{Illustration of the PosterAgent pipeline. Given an input paper, PosterAgent generates a structured academic poster through three modules: 1. Parser: Extracts key textual and visual assets using a combination of tools and LLM-based summarization, resulting in a structured asset library. 2. Planner: Matches assets and arranges them into coherent layouts, iteratively generating panels with a zoom-in operation. 3. Painter-Commenter: The Painter generates panel-level bullet-content along with executable code, and renders the visual output, while the Commenter-a VLM with in-context reference-provides feedback to ensure layout coherence and prevent content overflow.}
 \end{figure}
 \end{block}
-\begin{block}{Evaluation \& Results}
-Our comprehensive evaluation reveals that 	\textbf\{PosterAgent\} outperforms existing systems across nearly all metrics, using 	\textcolor\{blue\}\{87\\% fewer tokens\}. While GPT-4o outputs are visually appealing, they suffer from 	\textcolor\{red\}\{noisy text\} and poor PaperQuiz scores. Our open-source variants, based on Qwen-2.5, achieve superior performance, highlighting the effectiveness of our 	\textit\{visual-semantic-aware asset library\} and 	\textit\{layout generation\}.
 \begin{figure}
 \centering
-\includegraphics[width=0.79\linewidth]{figures/paper-table-1.png}
 \end{figure}
 \end{block}
@@ -138,8 +156,28 @@ Our comprehensive evaluation reveals that 	\textbf\{PosterAgent\} outperforms ex
 \end{column}
 \separatorcolumn
 \begin{column}{\colwidth}
-\begin{block}{Conclusion}
-We present 	\textbf\{Paper2Poster\}, a new benchmark for poster generation, and the 	\textbf\{PosterAgent\} framework, which significantly enhances generation quality. Our findings chart clear directions for the next generation of fully automated poster-generation models, emphasizing the importance of 	\textit\{structured parsing\}, 	\textit\{hierarchical planning\}, and 	\textit\{visual feedback\}.
 \end{block}
 \end{column}

 \usepackage[T1]{fontenc}
 \usepackage{lmodern}
+\usepackage[size=custom,width=120,height=72,scale=1.0]{beamerposter}
 \usetheme{gemini}
 \usecolortheme{cam}
 \usepackage{graphicx}
 \pgfplotsset{compat=1.14}
 \usepackage{anyfontsize}
+\definecolor{nipspurple}{RGB}{94,46,145}
+\setbeamercolor{headline}{bg=white, fg=black}
+\setbeamercolor{block title}{bg=nipspurple, fg=white}
+\addtobeamertemplate{block begin}{
+  \setlength{\textpaddingtop}{0.2em}%
+  \setlength{\textpaddingbottom}{0.2em}%
+}{}
 % ====================
 % Lengths
 % ====================
 % Title
 % ====================
+\title{Paper2Poster: Towards Multimodal Poster}
+\author{Wei Pang\textsuperscript{1}, Kevin Qinghong Lin\textsuperscript{2}, Xiangru Jian\textsuperscript{1}, Xi He\textsuperscript{1}, Philip Torr\textsuperscript{3}}
+\institute[shortinst]{1 University of Waterloo; 2 National University of Singapore; 3 University of Oxford}
 % ====================
 % Footer (optional)
 % ====================
 % use this to include logos on the left and/or right side of the header:
+\logoright{\includegraphics[height=5cm]{logos/right_logo.png}}
+\logoleft{\includegraphics[height=4cm]{logos/left_logo.png}}
 % ====================
 % Body
 % --- injected font tweaks ---
+\setbeamerfont{title}{size=\Huge}
 \setbeamerfont{author}{size=\Large}
 \setbeamerfont{institute}{size=\large}
 \setbeamerfont{block title}{size=\Large}
 \setbeamerfont{block body}{size=\large}
 \begin{document}
 % Refer to https://github.com/k4rtik/uchicago-poster
 {
     \begin{tikzpicture}[remember picture,overlay]
       \node [anchor=north west, inner sep=3cm] at ([xshift=0.0cm,yshift=1.0cm]current page.north west)
+    \end{tikzpicture}
 }
 \begin{frame}[t]
 \begin{columns}[t]
 \separatorcolumn
 \begin{column}{\colwidth}
+\begin{block}{Why Posters Are Hard}
+We target \textbf{single-page, multimodal compression} of \textit{20K+ tokens} into clear panels. Posters demand \textcolor{blue}{tight text–visual coupling}, \textbf{layout balance}, and \textit{readable density}. Pure LLM/VLM approaches \textcolor{red}{miss spatial feedback}, causing overflow and incoherence. We reveal that \textbf{visual-in-the-loop planning} is essential to preserve reading order, keep figures relevant, and sustain \textit{engagement} within hard space limits.
+\begin{figure}
+\centering
+\includegraphics[width=0.80\linewidth]{figures/paper-picture-1.png}
+\end{figure}
+\end{block}
+\begin{block}{Benchmark and Data}
+We launch the \textbf{Paper2Poster Benchmark}: \textcolor{blue}{100 paper–poster pairs} spanning \textit{280 topics}. Average input: \textcolor{blue}{20,370 tokens, 22.6 pages}. Output posters compress text by \textcolor{blue}{14.4×} and figures by \textcolor{blue}{2.6×}. Evaluation covers \textbf{Visual Quality}, \textbf{Textual Coherence}, \textbf{VLM-as-Judge}, and \textbf{PaperQuiz}. This suite spotlights \textit{semantic alignment}, \textbf{fluency}, and \textcolor{blue}{reader comprehension}.
+\begin{figure}
+\centering
+\includegraphics[width=0.80\linewidth]{figures/paper-picture-6.png}
+\end{figure}
 \end{block}
+\begin{block}{PaperQuiz: What Matters}
+We generate \textcolor{blue}{100 MCQs/paper}: \textbf{50 verbatim} + \textbf{50 interpretive}. Multiple VLM readers simulate \textit{novice-to-expert} audiences and answer from the poster only. Scores are length-penalized to reward \textbf{dense clarity}. Results \textbf{correlate with human judgment}, proving PaperQuiz captures \textcolor{blue}{information delivery} beyond surface visuals and discourages \textcolor{red}{verbose, unfocused designs}.
 \begin{figure}
 \centering
+\includegraphics[width=0.80\linewidth]{figures/paper-picture-7.png}
 \end{figure}
 \end{block}
 \end{column}
 \separatorcolumn
 \begin{column}{\colwidth}
+\begin{block}{PosterAgent Pipeline}
+Our \textbf{top-down, visual-in-the-loop} agent compresses long papers into coherent posters. • \textbf{Parser} builds a structured asset library. • \textbf{Planner} aligns text–visual pairs and produces a \textcolor{blue}{binary-tree layout}. • \textbf{Painter–Commenter} renders panels via code and uses VLM feedback to fix \textcolor{red}{overflow} and misalignment. The result: \textbf{balanced, legible}, editable posters.
 \begin{figure}
 \centering
+\includegraphics[width=0.80\linewidth]{figures/paper-picture-8.png}
 \end{figure}
 \end{block}
+\begin{block}{Parser: Structured Assets}
+We distill PDFs into \textbf{section synopses} and \textit{figure/table assets} using \textcolor{blue}{MARKER} and \textcolor{blue}{DOCLING}, then LLM summarization. The asset library preserves \textbf{hierarchy} and \textit{semantics} while shrinking context for efficient planning. This step boosts \textbf{visual-semantic matching} and reduces \textcolor{red}{noise}, enabling reliable downstream \textit{layout reasoning}.
+\end{block}
+\begin{block}{Planner: Layout Mastery}
+We semantically match \textbf{sections ↔ figures} and allocate space via a \textcolor{blue}{binary-tree layout} that preserves \textit{reading order}, aspect ratios, and \textbf{content length} estimates. Panels are populated iteratively, ensuring \textbf{text brevity} and \textit{visual balance}. This strategy stabilizes coordinates and avoids \textcolor{red}{LLM numeric drift} in absolute placements.
 \begin{figure}
 \centering
+\includegraphics[width=0.80\linewidth]{figures/paper-picture-30.png}
 \end{figure}
 \end{block}
 \end{column}
 \separatorcolumn
 \begin{column}{\colwidth}
+\begin{block}{Painter–Commenter Loop}
+The \textbf{Painter} turns section–figure pairs into crisp bullets and executable \textcolor{blue}{python-pptx} code, rendering draft panels. The \textbf{Commenter} VLM zooms into panels, using \textit{in-context examples} to flag \textcolor{red}{overflow} or \textcolor{red}{blankness}. Iterations continue until \textbf{fit and alignment} are achieved, producing \textit{readable, compact} panels with minimal revision cycles.
+\begin{figure}
+\centering
+\includegraphics[width=0.80\linewidth]{figures/paper-picture-61.png}
+\end{figure}
+\end{block}
+\begin{block}{Results: Stronger, Leaner}
+Our open-source variants beat \textcolor{blue}{4o-driven multi-agents} on most metrics, with \textcolor{blue}{87\% fewer tokens}. We hit \textbf{state-of-the-art figure relevance}, near-\textit{GT} visual similarity, and \textbf{high VLM-as-Judge} scores. PaperQuiz confirms \textbf{better knowledge transfer}. Cost is tiny: \textcolor{blue}{\$0.0045–\$0.55/poster}. Key bottleneck remains \textcolor{red}{Engagement}, guiding future design.
+\begin{figure}
+\centering
+\includegraphics[width=0.80\linewidth]{figures/paper-table-1.png}
+\end{figure}
+\end{block}
+\begin{block}{Limits and Next Steps}
+Current bottleneck: \textbf{sequential panel refinement} slows throughput (\textasciitilde{}\textcolor{blue}{4.5 min/doc}). We plan \textbf{panel-level parallelism}, \textit{external knowledge} integration (e.g., OpenReview), and \textbf{human-in-the-loop} editing for higher \textcolor{blue}{engagement}. These upgrades aim to boost \textbf{runtime, interactivity}, and \textit{visual storytelling}, pushing toward fully automated \textbf{author-grade posters}.
 \end{block}
 \end{column}

posterbuilder/latex_proj/poster_output_fix.tex DELETED Viewed

@@ -1,139 +0,0 @@
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% LaTeX Template for IAHR YPN Congress
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%----------------------------------------------------------------------------------------
-%	PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
-%----------------------------------------------------------------------------------------
-\documentclass[landscape,a0paper,fontscale=0.31,margin=7mm]{baposter} % Adjust the font scale/size here
-\usepackage{graphicx} % Required for including images
-\graphicspath{{figures/}} % Directory in which figures are stored
-\usepackage{hyperref}
-\hypersetup{colorlinks, citecolor=blue, filecolor=blue, linkcolor=blue, urlcolor=blue}
-\usepackage{amsmath} % For typesetting math
-\usepackage{amssymb} % Adds new symbols to be used in math mode
-\usepackage{booktabs} % Top and bottom rules for tables
-\usepackage{enumitem} % Used to reduce itemize/enumerate spacing
-\usepackage{palatino} % Use the Palatino font
-\usepackage[font=small,labelfont=bf]{caption} % Required for specifying captions to tables and figures
-\usepackage{multicol} % Required for multiple columns
-\setlength{\columnsep}{1.5em} % Slightly increase the space between columns
-\setlength{\columnseprule}{0mm} % No horizontal rule between columns
-\usepackage{tikz} % Required for flow chart
-\usetikzlibrary{shapes,arrows} % Tikz libraries required for the flow chart in the template
-\newcommand{\compresslist}{ % Define a command to reduce spacing within itemize/enumerate environments, this is used right after \begin{itemize} or \begin{enumerate}
-\setlength{\itemsep}{1pt}
-\setlength{\parskip}{0pt}
-\setlength{\parsep}{0pt}
-}
-\definecolor{lightblue}{rgb}{0.145,0.6666,1} % Defines the color used for content box headers
-\begin{document}
-\begin{poster}
-{headerborder=closed, % Adds a border around the header of content boxes
-colspacing=0.6em, % Column spacing
-bgColorOne=white, % Background color for the gradient on the left side of the poster
-bgColorTwo=white, % Background color for the gradient on the right side of the poster
-borderColor=lightblue, % Border color
-headerColorOne=black, % Background color for the header in the content boxes (left side)
-headerColorTwo=lightblue, % Background color for the header in the content boxes (right side)
-headerFontColor=white, % Text color for the header text in the content boxes
-boxColorOne=white, % Background color of the content boxes
-textborder=roundedleft, % Format of the border around content boxes, can be: none, bars, coils, triangles, rectangle, rounded, roundedsmall, roundedright or faded
-eyecatcher=true, % Set to false for ignoring the left logo in the title and move the title left
-headerheight=0.1\textheight, % Height of the header
-headershape=roundedright, % Specify the rounded corner in the content box headers, can be: rectangle, small-rounded, roundedright, roundedleft or rounded
-headerfont=\Large\bf\textsc, % Large, bold and sans serif font in the headers of content boxes
-%textfont={\setlength{\parindent}{1.5em}}, % Uncomment for paragraph indentation
-linewidth=2pt % Width of the border lines around content boxes,
-columns=3}
-%----------------------------------------------------------------------------------------
-%	TITLE SECTION
-%----------------------------------------------------------------------------------------
-%
-{\includegraphics[height=6em]{YPN_logo.jpg}} % First university/lab logo on the left
-{\bfseries \LARGE \textsc{A Cat Is A Cat (Not A Dog!): \\ Unraveling Information Mix-ups in Text-to-Image Encoders through Causal \\ Analysis and Embedding Optimization}} % Poster title
-{\textsc{Chieh-Yun Chen, Chiang Tseng, Li-Wu Tsao, Hong-Han Shuai}\\ \textsc{National Yang Ming Chiao Tung University, Georgia Institute of Technology}} % Author names and institution
-{\includegraphics[height=6em]{Institution_logo.png}}
-\headerbox{Abstract}{name=abstract,column=0,row=0,span=1}{
-This paper analyzes the impact of causal manner in the text encoder of text-to-image (T2I) diffusion models, which can lead to information bias and loss. We propose a text embedding balance optimization method with a 125.42\% improvement on information balance in stable diffusion. A new automatic evaluation metric is introduced, achieving 81\% concordance with human assessments.
-}
-\headerbox{Preliminaries}{name=preliminaries,column=0,below=abstract,span=1}{
-Text-to-image diffusion models include a text encoder, a variational autoencoder, and a denoising UNet. The causal masking manner in the text encoder causes information bias, as each token only has information from previous tokens.
-\begin{center}
-\includegraphics[width=0.90\linewidth]{paper-picture-2.png}
-\captionof{figure}{Overview of the text-to-image generative model, including the details of the causal manner in attention mechanism. Because of the causal nature of the embedding, information is accumulated from the starting token through the end of the sequence, resulting in bias in the earlier token. To balance the critical information, we propose text embedding optimization for purifying the object token with equal weights within their corresponding embedding dimension.}
-\vspace{-0.2em}
-\end{center}
-}
-\headerbox{Experiments}{name=experiments,column=0,below=preliminaries,span=1,above=bottom}{
-We compare our method with baselines like Stable Diffusion and SynGen, focusing on information balance rather than surpassing existing methods. Our automatic evaluation metric, validated by human assessment, effectively measures object presence and accuracy.
-}
-\headerbox{TEBOpt}{name=tebopt,column=1,row=0,span=1}{
-TEBOpt aims to balance critical information in text embeddings by optimizing object token embeddings to prevent mixing and work alongside image latent optimization techniques to address object disappearance.
-\begin{center}
-\includegraphics[width=0.90\linewidth]{paper-table-4.png}
-\vspace{-0.2em}
-\end{center}
-}
-\headerbox{Qualitative \& Quantitative \\ Results}{name=qualitativeandquantitativeresults,column=1,below=tebopt,span=1}{
-TEBOpt improves object balance in generated images, reducing mixture and missing issues. It enhances token embedding similarity and cross-attention map distance, confirming its effectiveness in addressing information bias.
-\begin{center}
-\includegraphics[width=0.90\linewidth]{paper-picture-13.png}
-\captionof{figure}{(a) The cosine similarity of text embedding from single word. (b) The KL distance of cross-attention maps that are triggered by two words. The data is ordered by their text embedding similarity.}
-\vspace{-0.2em}
-\end{center}
-}
-\headerbox{Introduction}{name=introduction,column=1,below=qualitativeandquantitativeresults,span=1}{
-Text-to-image diffusion models have gained attention, but the role of text embedding in generating multiple objects remains underexplored. This paper investigates how text embeddings influence semantic outcomes, identifying issues of information bias and loss. We propose Text Embedding Balance Optimization (TEBOpt) to address these issues and improve image generation.
-}
-\headerbox{Discussion}{name=discussion,column=2,row=0,span=1}{
-Text embedding similarity affects cross-attention maps' distance, with similar embeddings leading to object mixture. Our findings highlight the need for optimized text embeddings to improve image generation quality.
-\begin{center}
-\includegraphics[width=0.90\linewidth]{paper-picture-9.png}
-\captionof{figure}{Masking text embedding to identify the contribution of critical tokens, e.g., cat/dog, and special tokens, e.g., <sot>, <eot>, <pad>. The first row and the second row both contain cat and dog inside prompt but in different order. The analysis shows that special tokens contain general information about the given prompt. However, the cat/dog tokens carry more weight than the special tokens. In the last two columns, where one of the animal token embeddings is masked while retaining the special tokens' embedding, the generated image is predominantly influenced by the remaining animal's token embedding.}
-\vspace{-0.2em}
-\end{center}
-}
-\headerbox{Conclusion}{name=conclusion,column=2,below=discussion,span=1,above=bottom}{
-Our study reveals that causal processing of text embedding leads to biases and loss. TEBOpt effectively eliminates problematic information, improving information balance in stable diffusion by 125.42\% while preserving object coexistence.
-}
-\end{poster}
-\end{document}

posterbuilder/latex_proj/poster_output_new.tex DELETED Viewed

@@ -1,193 +0,0 @@
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% LaTeX Template for IAHR YPN Congress
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%----------------------------------------------------------------------------------------
-%	PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
-%----------------------------------------------------------------------------------------
-\documentclass[landscape,a0paper,fontscale=0.31]{baposter} % Adjust the font scale/size here
-\usepackage{graphicx} % Required for including images
-\graphicspath{{figures/}} % Directory in which figures are stored
-\usepackage{hyperref}
-\hypersetup{colorlinks, citecolor=blue, filecolor=blue, linkcolor=blue, urlcolor=blue}
-\usepackage{amsmath} % For typesetting math
-\usepackage{amssymb} % Adds new symbols to be used in math mode
-\usepackage{booktabs} % Top and bottom rules for tables
-\usepackage{enumitem} % Used to reduce itemize/enumerate spacing
-\usepackage{palatino} % Use the Palatino font
-\usepackage[font=small,labelfont=bf]{caption} % Required for specifying captions to tables and figures
-\usepackage{multicol} % Required for multiple columns
-\setlength{\columnsep}{1.5em} % Slightly increase the space between columns
-\setlength{\columnseprule}{0mm} % No horizontal rule between columns
-\usepackage{tikz} % Required for flow chart
-\usetikzlibrary{shapes,arrows} % Tikz libraries required for the flow chart in the template
-\newcommand{\compresslist}{ % Define a command to reduce spacing within itemize/enumerate environments, this is used right after \begin{itemize} or \begin{enumerate}
-\setlength{\itemsep}{1pt}
-\setlength{\parskip}{0pt}
-\setlength{\parsep}{0pt}
-}
-\definecolor{lightblue}{rgb}{0.145,0.6666,1} % Defines the color used for content box headers
-\begin{document}
-\begin{poster}
-{
-headerborder=closed, % Adds a border around the header of content boxes
-colspacing=1em, % Column spacing
-bgColorOne=white, % Background color for the gradient on the left side of the poster
-bgColorTwo=white, % Background color for the gradient on the right side of the poster
-borderColor=lightblue, % Border color
-headerColorOne=black, % Background color for the header in the content boxes (left side)
-headerColorTwo=lightblue, % Background color for the header in the content boxes (right side)
-headerFontColor=white, % Text color for the header text in the content boxes
-boxColorOne=white, % Background color of the content boxes
-textborder=roundedleft, % Format of the border around content boxes, can be: none, bars, coils, triangles, rectangle, rounded, roundedsmall, roundedright or faded
-eyecatcher=true, % Set to false for ignoring the left logo in the title and move the title left
-headerheight=0.1\textheight, % Height of the header
-headershape=roundedright, % Specify the rounded corner in the content box headers, can be: rectangle, small-rounded, roundedright, roundedleft or rounded
-headerfont=\Large\bf\textsc, % Large, bold and sans serif font in the headers of content boxes
-%textfont={\setlength{\parindent}{1.5em}}, % Uncomment for paragraph indentation
-linewidth=2pt % Width of the border lines around content boxes
-}
-%----------------------------------------------------------------------------------------
-%	TITLE SECTION
-%----------------------------------------------------------------------------------------
-%
-{\includegraphics[height=6em]{YPN_logo.jpg}} % First university/lab logo on the left
-{\bfseries \LARGE \textsc{A Cat Is A Cat (Not A Dog!): \\ Unraveling Information Mix-ups in Text-to-Image Encoders through Causal \\ Analysis and Embedding Optimization}} % Poster title
-{\textsc{Chieh-Yun Chen, Chiang Tseng, Li-Wu Tsao, Hong-Han Shuai}\\ \textsc{National Yang Ming Chiao Tung University, Georgia Institute of Technology}} % Author names and institution
-{\includegraphics[height=6em]{Institution_logo.png}
-\headerbox{Abstract}{name=abstract,column=0,row=0}{{
-This paper analyzes the impact of causal manner in the text encoder of text-to-image (T2I) diffusion models, which can lead to information bias and loss. We propose a text embedding balance optimization method with a 125.42\% improvement on information balance in stable diffusion. A new automatic evaluation metric is introduced, achieving 81\% concordance with human assessments.
-}}
-\headerbox{Preliminaries}{name=preliminaries,column=0,row=1}{{
-Text-to-image diffusion models include a text encoder, a variational autoencoder, and a denoising UNet. The causal masking manner in the text encoder causes information bias, as each token only has information from previous tokens.
-\begin{center}
-\includegraphics[width=0.76\linewidth]{figures/paper-picture-2.png}
-\captionof{figure}{Overview of the text-to-image generative model, including the details of the causal manner in attention mechanism. Because of the causal nature of the embedding, information is accumulated from the starting token through the end of the sequence, resulting in bias in the earlier token. To balance the critical information, we propose text embedding optimization for purifying the object token with equal weights within their corresponding embedding dimension.}
-\end{center}
-}}
-\headerbox{Experiments}{name=experiments,column=0,row=2}{{
-We compare our method with baselines like Stable Diffusion and SynGen, focusing on information balance rather than surpassing existing methods. Our automatic evaluation metric, validated by human assessment, effectively measures object presence and accuracy.
-}}
-\headerbox{TEBOpt}{name=tebopt,column=1,row=0}{{
-TEBOpt aims to balance critical information in text embeddings by optimizing object token embeddings to prevent mixing and work alongside image latent optimization techniques to address object disappearance.
-\begin{center}
-\includegraphics[width=0.61\linewidth]{figures/paper-table-4.png}
-\end{center}
-}}
-\headerbox{Qualitative \& Quantitative Results}{name=qualitativeandquantitativeresults,column=1,row=1}{{
-TEBOpt improves object balance in generated images, reducing mixture and missing issues. It enhances token embedding similarity and cross-attention map distance, confirming its effectiveness in addressing information bias.
-\begin{center}
-\includegraphics[width=0.80\linewidth]{figures/paper-picture-13.png}
-\captionof{figure}{(a) The cosine similarity of text embedding from single word. (b) The KL distance of cross-attention maps that are triggered by two words. The data is ordered by their text embedding similarity.}
-\end{center}
-}}
-\headerbox{Introduction}{name=introduction,column=1,row=2}{{
-Text-to-image diffusion models have gained attention, but the role of text embedding in generating multiple objects remains underexplored. This paper investigates how text embeddings influence semantic outcomes, identifying issues of information bias and loss. We propose Text Embedding Balance Optimization (TEBOpt) to address these issues and improve image generation.
-}}
-\headerbox{Discussion}{name=discussion,column=2,row=0}{{
-Text embedding similarity affects cross-attention maps' distance, with similar embeddings leading to object mixture. Our findings highlight the need for optimized text embeddings to improve image generation quality.
-\begin{center}
-\includegraphics[width=0.60\linewidth]{figures/paper-picture-9.png}
-\captionof{figure}{Masking text embedding to identify the contribution of critical tokens, e.g., cat/dog, and special tokens, e.g., <sot>, <eot>, <pad>. The first row and the second row both contain cat and dog inside prompt but in different order. The analysis shows that special tokens contain general information about the given prompt. However, the cat/dog tokens carry more weight than the special tokens. In the last two columns, where one of the animal token embeddings is masked while retaining the special tokens' embedding, the generated image is predominantly influenced by the remaining animal's token embedding.}
-\end{center}
-}}
-\headerbox{Conclusion}{name=conclusion,column=2,row=1}{{
-Our study reveals that causal processing of text embedding leads to biases and loss. TEBOpt effectively eliminates problematic information, improving information balance in stable diffusion by 125.42\% while preserving object coexistence.
-}}
-} % Second university/lab logo on the right
-%----------------------------------------------------------------------------------------
-%	ABSTRACT
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-%	INTRODUCTION
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-%	RESULTS 1
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-%	REFERENCES
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-%	FUTURE RESEARCH
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-%	CONTACT INFORMATION
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-%	CONCLUSION
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-%	MATERIALS AND METHODS
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-%	RESULTS 2
-%----------------------------------------------------------------------------------------
-%----------------------------------------------------------------------------------------
-\end{poster}
-\end{document}

posterbuilder/poster_content.json CHANGED Viewed

@@ -1,33 +1,45 @@
 {
     "meta": {
-        "poster_title": "Paper2Poster: Towards Multimodal Poster Automation from Scientific Papers",
-        "authors": "Wei Pang, Kevin Qinghong Lin, Xiangru Jian, Xi He, Philip Torr",
-        "affiliations": "1 University of Waterloo, 2 National University of Singapore, 3 University of Oxford"
     },
     "sections": [
         {
-            "title": "Poster Title & Author",
-            "content": "This poster presents \textbf{Paper2Poster}, a novel approach for generating academic posters from scientific papers. Authors include Wei Pang, Kevin Qinghong Lin, Xiangru Jian, Xi He, and Philip Torr, affiliated with \textit{University of Waterloo}, \textit{National University of Singapore}, and \textit{University of Oxford}."
         },
         {
-            "title": "Introduction",
-            "content": "Academic posters are crucial for \textbf{scientific communication}, allowing rapid dissemination of key findings. Unlike slide decks, posters must condense entire papers into a single page, requiring \textit{multi-modal context handling}, \textcolor{red}{tight text-graphics interleaving}, and \textcolor{red}{spatial constraint respect}. Existing VLM- or LLM-only approaches lack explicit visual feedback, making it difficult to maintain logical flow and legibility."
         },
         {
-            "title": "Benchmark & Metrics",
-            "content": "We introduce the \textbf{Paper2Poster Benchmark}, the first benchmark for poster generation, evaluating outputs on \textcolor{blue}{Visual Quality}, \textcolor{blue}{Textual Coherence}, \textcolor{blue}{Holistic Assessment}, and \textcolor{blue}{PaperQuiz}. This benchmark pairs recent conference papers with author-designed posters, enabling systematic comparison and evaluation of generated posters."
         },
         {
-            "title": "PosterAgent Framework",
-            "content": "Our proposed \textbf{PosterAgent} framework is a \textit{multi-agent pipeline} that transforms scientific papers into structured visual posters. It consists of three components: \textcolor{blue}{Parser}, \textcolor{blue}{Planner}, and \textcolor{blue}{Painter-Commenter}. The Parser distills the paper into a structured asset library, the Planner aligns text-visual pairs into a binary-tree layout, and the Painter-Commenter loop refines each panel using VLM feedback."
         },
         {
-            "title": "Evaluation & Results",
-            "content": "Our comprehensive evaluation reveals that \textbf{PosterAgent} outperforms existing systems across nearly all metrics, using \textcolor{blue}{87\\% fewer tokens}. While GPT-4o outputs are visually appealing, they suffer from \textcolor{red}{noisy text} and poor PaperQuiz scores. Our open-source variants, based on Qwen-2.5, achieve superior performance, highlighting the effectiveness of our \textit{visual-semantic-aware asset library} and \textit{layout generation}."
         },
         {
-            "title": "Conclusion",
-            "content": "We present \textbf{Paper2Poster}, a new benchmark for poster generation, and the \textbf{PosterAgent} framework, which significantly enhances generation quality. Our findings chart clear directions for the next generation of fully automated poster-generation models, emphasizing the importance of \textit{structured parsing}, \textit{hierarchical planning}, and \textit{visual feedback}."
         }
     ]
 }

 {
     "meta": {
+        "poster_title": "Paper2Poster: Towards Multimodal Poster",
+        "authors": "Wei Pang\\textsuperscript{1}, Kevin Qinghong Lin\\textsuperscript{2}, Xiangru Jian\\textsuperscript{1}, Xi He\\textsuperscript{1}, Philip Torr\\textsuperscript{3}",
+        "affiliations": "1 University of Waterloo; 2 National University of Singapore; 3 University of Oxford"
     },
     "sections": [
         {
+            "title": "Why Posters Are Hard",
+            "content": "We target \\textbf{single-page, multimodal compression} of \\textit{20K+ tokens} into clear panels. Posters demand \\textcolor{blue}{tight text\u2013visual coupling}, \\textbf{layout balance}, and \\textit{readable density}. Pure LLM/VLM approaches \\textcolor{red}{miss spatial feedback}, causing overflow and incoherence. We reveal that \\textbf{visual-in-the-loop planning} is essential to preserve reading order, keep figures relevant, and sustain \\textit{engagement} within hard space limits."
         },
         {
+            "title": "Benchmark and Data",
+            "content": "We launch the \\textbf{Paper2Poster Benchmark}: \\textcolor{blue}{100 paper\u2013poster pairs} spanning \\textit{280 topics}. Average input: \\textcolor{blue}{20,370 tokens, 22.6 pages}. Output posters compress text by \\textcolor{blue}{14.4\u00d7} and figures by \\textcolor{blue}{2.6\u00d7}. Evaluation covers \\textbf{Visual Quality}, \\textbf{Textual Coherence}, \\textbf{VLM-as-Judge}, and \\textbf{PaperQuiz}. This suite spotlights \\textit{semantic alignment}, \\textbf{fluency}, and \\textcolor{blue}{reader comprehension}."
         },
         {
+            "title": "PaperQuiz: What Matters",
+            "content": "We generate \\textcolor{blue}{100 MCQs/paper}: \\textbf{50 verbatim} + \\textbf{50 interpretive}. Multiple VLM readers simulate \\textit{novice-to-expert} audiences and answer from the poster only. Scores are length-penalized to reward \\textbf{dense clarity}. Results \\textbf{correlate with human judgment}, proving PaperQuiz captures \\textcolor{blue}{information delivery} beyond surface visuals and discourages \\textcolor{red}{verbose, unfocused designs}."
         },
         {
+            "title": "PosterAgent Pipeline",
+            "content": "Our \\textbf{top-down, visual-in-the-loop} agent compresses long papers into coherent posters. \u2022 \\textbf{Parser} builds a structured asset library. \u2022 \\textbf{Planner} aligns text\u2013visual pairs and produces a \\textcolor{blue}{binary-tree layout}. \u2022 \\textbf{Painter\u2013Commenter} renders panels via code and uses VLM feedback to fix \\textcolor{red}{overflow} and misalignment. The result: \\textbf{balanced, legible}, editable posters."
         },
         {
+            "title": "Parser: Structured Assets",
+            "content": "We distill PDFs into \\textbf{section synopses} and \\textit{figure/table assets} using \\textcolor{blue}{MARKER} and \\textcolor{blue}{DOCLING}, then LLM summarization. The asset library preserves \\textbf{hierarchy} and \\textit{semantics} while shrinking context for efficient planning. This step boosts \\textbf{visual-semantic matching} and reduces \\textcolor{red}{noise}, enabling reliable downstream \\textit{layout reasoning}."
         },
         {
+            "title": "Planner: Layout Mastery",
+            "content": "We semantically match \\textbf{sections \u2194 figures} and allocate space via a \\textcolor{blue}{binary-tree layout} that preserves \\textit{reading order}, aspect ratios, and \\textbf{content length} estimates. Panels are populated iteratively, ensuring \\textbf{text brevity} and \\textit{visual balance}. This strategy stabilizes coordinates and avoids \\textcolor{red}{LLM numeric drift} in absolute placements."
+        },
+        {
+            "title": "Painter\u2013Commenter Loop",
+            "content": "The \\textbf{Painter} turns section\u2013figure pairs into crisp bullets and executable \\textcolor{blue}{python-pptx} code, rendering draft panels. The \\textbf{Commenter} VLM zooms into panels, using \\textit{in-context examples} to flag \\textcolor{red}{overflow} or \\textcolor{red}{blankness}. Iterations continue until \\textbf{fit and alignment} are achieved, producing \\textit{readable, compact} panels with minimal revision cycles."
+        },
+        {
+            "title": "Results: Stronger, Leaner",
+            "content": "Our open-source variants beat \\textcolor{blue}{4o-driven multi-agents} on most metrics, with \\textcolor{blue}{87\\% fewer tokens}. We hit \\textbf{state-of-the-art figure relevance}, near-\\textit{GT} visual similarity, and \\textbf{high VLM-as-Judge} scores. PaperQuiz confirms \\textbf{better knowledge transfer}. Cost is tiny: \\textcolor{blue}{\\$0.0045\u2013\\$0.55/poster}. Key bottleneck remains \\textcolor{red}{Engagement}, guiding future design."
+        },
+        {
+            "title": "Limits and Next Steps",
+            "content": "Current bottleneck: \\textbf{sequential panel refinement} slows throughput (~\\textcolor{blue}{4.5 min/doc}). We plan \\textbf{panel-level parallelism}, \\textit{external knowledge} integration (e.g., OpenReview), and \\textbf{human-in-the-loop} editing for higher \\textcolor{blue}{engagement}. These upgrades aim to boost \\textbf{runtime, interactivity}, and \\textit{visual storytelling}, pushing toward fully automated \\textbf{author-grade posters}."
         }
     ]
 }

requirements.txt ADDED Viewed

	@@ -0,0 +1,94 @@

+# ========= Core Runtime =========
+python>=3.10
+numpy==1.26.4
+pandas
+torch==2.2.2
+torchvision==0.17.2
+Pillow==10.4.0
+opencv-python==4.11.0.86
+pdf2image==1.17.0
+PyMuPDF==1.25.2
+moviepy==1.0.3
+asyncio==3.4.3
+playwright==1.51.0
+aiohttp==3.11.11
+aiofiles==24.1.0
+tqdm==4.67.1
+matplotlib==3.10.0
+scikit-learn==1.6.1
+scipy==1.15.1
+sentence-transformers==3.3.1
+transformers==4.48.0
+# ========= ML / LLM Frameworks =========
+accelerate
+huggingface-hub==0.27.1
+openai==1.59.8
+langchain==0.3.17
+langchain-community==0.3.16
+langchain-core==0.3.33
+langchain-openai==0.3.3
+# ========= Image / Layout / OCR =========
+layoutparser==0.3.4
+easyocr
+pytesseract==0.3.13
+shapely==2.0.7
+WeasyPrint==52.5
+CairoSVG==2.7.1
+# ========= PDF / DOC / PPT =========
+python-docx==1.1.2
+python-pptx @ git+https://github.com/Force1ess/python-pptx@dc356685d4d210a10abe1ffab3c21315cdfae63d
+pypdf==5.2.0
+pypandoc==1.15
+openpyxl==3.1.5
+# ========= Web / API / Async =========
+fastapi==0.115.6
+uvicorn==0.32.1
+starlette==0.41.3
+requests==2.32.3
+httpx==0.27.2
+aiohttp-cors==0.7.0
+nest-asyncio==1.6.0
+# ========= Poster2Video Specific =========
+# camel-ai>=0.2.0
+# f5_tts==1.1.6
+# whisper==1.1.10
+# whisperx
+# mcp==1.10.1
+# pydantic==2.10.6
+# pydantic-core==2.23.4
+# pyarrow==19.0.0
+# ========= Poster2Poster Specific =========
+agentops==0.3.26
+arxiv==2.1.3
+arxiv2text==0.1.14
+pymilvus==2.5.4
+peft==0.14.0
+diffusers==0.25.1
+einops==0.8.0
+xformers==0.0.28.post3
+# ========= Utils =========
+filelock==3.16.1
+regex==2024.11.6
+pytz==2024.2
+PyYAML==6.0.2
+python-dateutil==2.9.0.post0
+typing_extensions==4.12.2
+uuid7==0.1.0
+rich==13.9.4
+coloredlogs==15.0.1
+tenacity==9.0.0
+# ========= Optional (Audio, OCR, etc.) =========
+soundfile==0.13.1
+pydub==0.25.1
+ffmpeg-python==0.2.0
+# ========= Required System Packages (apt install manually) =========
+# sudo apt-get install -y poppler-utils libreoffice

template/LICENSE.md ADDED Viewed

	@@ -0,0 +1,22 @@

+The MIT License (MIT)
+=====================
+**Copyright (c) Anish Athalye (me@anishathalye.com)**
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

template/Makefile ADDED Viewed

	@@ -0,0 +1,9 @@

+.PHONY: main clean FORCE
+main: poster.pdf
+poster.pdf: FORCE
+	latexmk -pdflatex='lualatex -interaction nonstopmode' -pdf poster.tex
+clean:
+	latexmk -pdf -C

template/beamercolorthemecam.sty ADDED Viewed

	@@ -0,0 +1,55 @@

+% Gemini theme
+% https://github.com/anishathalye/gemini
+% ====================
+% Definitions
+% ====================
+% Colors from https://www.cam.ac.uk/brand-resources/guidelines/typography-and-colour/rgb-and-hex-references
+\definecolor{camblue}{RGB}{0, 62, 114}
+% Extra colors
+\definecolor{lightgray}{RGB}{240, 240, 240}
+\definecolor{lightorange}{RGB}{255, 245, 242}
+% ====================
+% Theme
+% ====================
+% Basic colors
+\setbeamercolor{palette primary}{fg=black,bg=white}
+\setbeamercolor{palette secondary}{fg=black,bg=white}
+\setbeamercolor{palette tertiary}{bg=black,fg=white}
+\setbeamercolor{palette quaternary}{fg=black,bg=white}
+\setbeamercolor{structure}{fg=camblue}
+% Headline
+\setbeamercolor{headline}{fg=white,bg=camblue}
+% Block
+\setbeamercolor{block title}{fg=camblue,bg=white}
+\setbeamercolor{block separator}{bg=black}
+\setbeamercolor{block body}{fg=black,bg=white}
+% Alert Block
+\setbeamercolor{block alerted title}{fg=camblue,bg=lightorange}
+\setbeamercolor{block alerted separator}{bg=black}
+\setbeamercolor{block alerted body}{fg=black,bg=lightorange}
+% Example Block
+\setbeamercolor{block example title}{fg=camblue,bg=lightgray}
+\setbeamercolor{block example separator}{bg=black}
+\setbeamercolor{block example body}{fg=black,bg=lightgray}
+% Heading
+\setbeamercolor{heading}{fg=black}
+% Itemize
+\setbeamercolor{item}{fg=camblue}
+% Bibliography
+\setbeamercolor{bibliography item}{fg=black}
+\setbeamercolor{bibliography entry author}{fg=black}
+\setbeamercolor{bibliography entry title}{fg=black}
+\setbeamercolor{bibliography entry location}{fg=black}
+\setbeamercolor{bibliography entry note}{fg=black}

template/beamercolorthemegemini.sty ADDED Viewed

	@@ -0,0 +1,58 @@

+% Gemini theme
+% https://github.com/anishathalye/gemini
+% ====================
+% Definitions
+% ====================
+\definecolor{lightgray}{RGB}{245, 246, 250}
+\definecolor{blue}{RGB}{64, 115, 158}
+\definecolor{darkblue}{RGB}{39, 60, 117}
+\definecolor{lightblue}{RGB}{232, 244, 255}
+% ====================
+% Theme
+% ====================
+% Basic colors
+\setbeamercolor{palette primary}{fg=black,bg=white}
+\setbeamercolor{palette secondary}{fg=black,bg=white}
+\setbeamercolor{palette tertiary}{bg=black,fg=white}
+\setbeamercolor{palette quaternary}{fg=black,bg=white}
+\setbeamercolor{structure}{fg=darkblue}
+% Headline
+\setbeamercolor{headline}{fg=lightgray,bg=blue}
+\setbeamercolor{headline rule}{bg=darkblue}
+% Block
+\setbeamercolor{block title}{fg=blue,bg=white}
+\setbeamercolor{block separator}{bg=black}
+\setbeamercolor{block body}{fg=black,bg=white}
+% Alert Block
+\setbeamercolor{block alerted title}{fg=blue,bg=lightblue}
+\setbeamercolor{block alerted separator}{bg=black}
+\setbeamercolor{block alerted body}{fg=black,bg=lightblue}
+% Example Block
+\setbeamercolor{block example title}{fg=blue,bg=lightgray}
+\setbeamercolor{block example separator}{bg=black}
+\setbeamercolor{block example body}{fg=black,bg=lightgray}
+% Heading
+\setbeamercolor{heading}{fg=black}
+% Itemize
+\setbeamercolor{item}{fg=darkblue}
+% Bibliography
+\setbeamercolor{bibliography item}{fg=black}
+\setbeamercolor{bibliography entry author}{fg=black}
+\setbeamercolor{bibliography entry title}{fg=black}
+\setbeamercolor{bibliography entry location}{fg=black}
+\setbeamercolor{bibliography entry note}{fg=black}
+\setbeamertemplate{bibliography entry article}{}
+\setbeamertemplate{bibliography entry title}{}
+\setbeamertemplate{bibliography entry location}{}
+\setbeamertemplate{bibliography entry note}{}

template/beamercolorthemelabsix.sty ADDED Viewed

	@@ -0,0 +1,55 @@

+% Gemini theme
+% https://github.com/anishathalye/gemini
+% ====================
+% Definitions
+% ====================
+\definecolor{labsixorange}{RGB}{243, 111, 33}
+% Extra colors
+\definecolor{lightgray}{RGB}{240, 240, 240}
+\definecolor{lightorange}{RGB}{255, 240, 230}
+% ====================
+% Theme
+% ====================
+% Basic colors
+\setbeamercolor{palette primary}{fg=black,bg=white}
+\setbeamercolor{palette secondary}{fg=black,bg=white}
+\setbeamercolor{palette tertiary}{bg=black,fg=white}
+\setbeamercolor{palette quaternary}{fg=black,bg=white}
+\setbeamercolor{structure}{fg=labsixorange}
+% Headline
+\setbeamercolor{headline}{fg=white,bg=labsixorange}
+\setbeamercolor{headline rule}{bg=black}
+% Block
+\setbeamercolor{block title}{fg=labsixorange,bg=white}
+\setbeamercolor{block separator}{bg=black}
+\setbeamercolor{block body}{fg=black,bg=white}
+% Alert Block
+\setbeamercolor{block alerted title}{fg=labsixorange,bg=lightorange}
+\setbeamercolor{block alerted separator}{bg=black}
+\setbeamercolor{block alerted body}{fg=black,bg=lightorange}
+% Example Block
+\setbeamercolor{block example title}{fg=labsixorange,bg=lightgray}
+\setbeamercolor{block example separator}{bg=black}
+\setbeamercolor{block example body}{fg=black,bg=lightgray}
+% Heading
+\setbeamercolor{heading}{fg=black}
+% Itemize
+\setbeamercolor{item}{fg=labsixorange}
+% Bibliography
+\setbeamercolor{bibliography item}{fg=black}
+\setbeamercolor{bibliography entry author}{fg=black}
+\setbeamercolor{bibliography entry title}{fg=black}
+\setbeamercolor{bibliography entry location}{fg=black}
+\setbeamercolor{bibliography entry note}{fg=black}

template/beamercolorthememit.sty ADDED Viewed

	@@ -0,0 +1,57 @@

+% Gemini theme
+% https://github.com/anishathalye/gemini
+% ====================
+% Definitions
+% ====================
+% Colors from http://web.mit.edu/graphicidentity/colors.html
+\definecolor{mitred}{cmyk}{0.24, 1.0, 0.78, 0.17}
+\definecolor{mitdarkgray}{cmyk}{0.48, 0.39, 0.39, 0.04}
+\definecolor{mitlightgray}{cmyk}{0.24, 0.20, 0.20, 0.0}
+% Extra colors
+\definecolor{lightgray}{RGB}{240, 240, 240}
+\definecolor{lightorange}{RGB}{255, 245, 242}
+% ====================
+% Theme
+% ====================
+% Basic colors
+\setbeamercolor{palette primary}{fg=black,bg=white}
+\setbeamercolor{palette secondary}{fg=black,bg=white}
+\setbeamercolor{palette tertiary}{bg=black,fg=white}
+\setbeamercolor{palette quaternary}{fg=black,bg=white}
+\setbeamercolor{structure}{fg=mitred}
+% Headline
+\setbeamercolor{headline}{fg=black,bg=lightgray}
+% Block
+\setbeamercolor{block title}{fg=mitred,bg=white}
+\setbeamercolor{block separator}{bg=black}
+\setbeamercolor{block body}{fg=black,bg=white}
+% Alert Block
+\setbeamercolor{block alerted title}{fg=mitred,bg=lightorange}
+\setbeamercolor{block alerted separator}{bg=black}
+\setbeamercolor{block alerted body}{fg=black,bg=lightorange}
+% Example Block
+\setbeamercolor{block example title}{fg=mitred,bg=lightgray}
+\setbeamercolor{block example separator}{bg=black}
+\setbeamercolor{block example body}{fg=black,bg=lightgray}
+% Heading
+\setbeamercolor{heading}{fg=black}
+% Itemize
+\setbeamercolor{item}{fg=mitred}
+% Bibliography
+\setbeamercolor{bibliography item}{fg=black}
+\setbeamercolor{bibliography entry author}{fg=black}
+\setbeamercolor{bibliography entry title}{fg=black}
+\setbeamercolor{bibliography entry location}{fg=black}
+\setbeamercolor{bibliography entry note}{fg=black}

template/beamercolorthemeumich.sty ADDED Viewed

	@@ -0,0 +1,55 @@

+% Gemini theme
+% https://github.com/anishathalye/gemini
+% ====================
+% Definitions
+% ====================
+\definecolor{UMichBlue}{RGB}{0, 39, 76}  % #00274C
+\definecolor{UMichMaize}{RGB}{255, 203, 5}  % #FFCB05
+\definecolor{UMichWhite}{RGB}{255, 255, 255}  % #FFFFFF
+\definecolor{UMichGray}{RGB}{235, 235, 235}
+\definecolor{UMichLightMaize}{RGB}{242, 237, 217}
+% ====================
+% Theme
+% ====================
+% Basic colors
+\setbeamercolor{palette primary}{fg=UMichBlue,bg=UMichWhite}
+\setbeamercolor{palette secondary}{fg=UMichBlue,bg=UMichWhite}
+\setbeamercolor{palette tertiary}{bg=UMichBlue,fg=UMichWhite}
+\setbeamercolor{palette quaternary}{fg=UMichBlue,bg=UMichWhite}
+\setbeamercolor{structure}{fg=UMichBlue}
+% Headline
+\setbeamercolor{headline}{fg=UMichGray,bg=UMichBlue}
+\setbeamercolor{headline rule}{bg=UMichMaize}
+% Block
+\setbeamercolor{block title}{fg=UMichBlue,bg=UMichWhite}
+\setbeamercolor{block separator}{bg=UMichBlue}
+\setbeamercolor{block body}{fg=UMichBlue,bg=UMichWhite}
+% Alert Block
+\setbeamercolor{block alerted title}{fg=UMichBlue,bg=UMichLightMaize}
+\setbeamercolor{block alerted separator}{bg=UMichBlue}
+\setbeamercolor{block alerted body}{fg=UMichBlue,bg=UMichLightMaize}
+% Example Block
+\setbeamercolor{block example title}{fg=UMichBlue,bg=UMichWhite}
+\setbeamercolor{block example separator}{bg=UMichBlue}
+\setbeamercolor{block example body}{fg=UMichBlue,bg=UMichWhite}
+% Heading
+\setbeamercolor{heading}{fg=UMichBlue}
+% Itemize
+\setbeamercolor{item}{fg=UMichBlue}
+% Bibliography
+\setbeamercolor{bibliography item}{fg=UMichBlue}
+\setbeamercolor{bibliography entry author}{fg=UMichBlue}
+\setbeamercolor{bibliography entry title}{fg=UMichBlue}
+\setbeamercolor{bibliography entry location}{fg=UMichBlue}
+\setbeamercolor{bibliography entry note}{fg=UMichBlue}

template/beamerthemegemini.sty ADDED Viewed

	@@ -0,0 +1,258 @@

+% Gemini theme
+% https://github.com/anishathalye/gemini
+% ====================
+% Dependencies
+% ====================
+\RequirePackage{exscale}
+\RequirePackage{ragged2e}
+\RequirePackage{changepage}
+\RequirePackage{fontspec}
+\RequirePackage{calc}
+% ====================
+% Fonts
+% ====================
+\newfontfamily\Raleway[Ligatures=TeX]{Raleway}
+\newfontfamily\Lato[Ligatures=TeX]{Lato}
+\usefonttheme{professionalfonts}
+\setsansfont{Lato}[
+  UprightFont=*-Light,
+  ItalicFont=*-LightItalic,
+  BoldFont=*-Regular,
+  BoldItalicFont=*-Italic
+]
+\setbeamerfont{headline}{family=\Raleway}
+\setbeamerfont{headline title}{size=\Huge,series=\bfseries}
+\setbeamerfont{headline author}{size=\Large}
+\setbeamerfont{headline institute}{size=\normalsize}
+\setbeamerfont{block title}{family=\Raleway,size=\large,series=\bfseries}
+\setbeamerfont{heading}{family=\Lato,series=\bfseries}
+\setbeamerfont{caption}{size=\small}
+\setbeamerfont{footline}{family=\Raleway,size=\normalsize}
+\setbeamerfont{block body}{size=\normalsize}
+% ====================
+% Macros
+% ====================
+\newcommand{\samelineand}{\qquad}
+% ====================
+% Elements
+% ====================
+% List
+\def\@listi{\leftmargin\leftmargini
+\topsep 1ex % spacing before
+\parsep 0\p@ \@plus\p@
+\itemsep 0.5ex} % spacing between
+% Itemize
+\setbeamertemplate{itemize item}{\raise0.5ex \hbox{\vrule width 0.5ex height 0.5ex}}
+\setbeamertemplate{itemize subitem}{\raise0.3ex \hbox{\vrule width 0.5ex height 0.5ex}}
+\setbeamertemplate{itemize subsubitem}{\raise0.2ex \hbox{\vrule width 0.5ex height 0.5ex}}
+% Enumerate
+\setbeamertemplate{enumerate item}{\insertenumlabel.}
+\setbeamertemplate{enumerate subitem}{\insertsubenumlabel.}
+\setbeamertemplate{enumerate subsubitem}{\insertsubsubenumlabel.}
+% Equation
+\setlength\belowdisplayshortskip{2ex}
+% Caption
+\setbeamertemplate{caption}[numbered]
+\setbeamertemplate{caption label separator}[period]
+\setlength{\abovecaptionskip}{2ex}
+\setlength{\belowcaptionskip}{1ex}
+% Bibliography
+\setbeamertemplate{bibliography item}[text]
+% Navigation
+\beamertemplatenavigationsymbolsempty
+% ====================
+% Components
+% ====================
+% Heading
+\newcommand\heading[1]
+{%
+  \par\bigskip
+  {\usebeamerfont{heading}\usebeamercolor[fg]{heading}#1}\par\smallskip
+}
+% logo
+\newlength{\logoleftwidth}
+\setlength{\logoleftwidth}{0cm}
+\newlength{\logorightwidth}
+\setlength{\logorightwidth}{0cm}
+\newlength{\maxlogowidth}  % space on both sides set to maxlogowidth to keep title centered
+\setlength{\maxlogowidth}{0cm}
+\newcommand{\logoright}[1]{
+  \newcommand{\insertlogoright}{#1}
+  \settowidth{\logorightwidth}{\insertlogoright}
+  \addtolength{\logorightwidth}{10ex}
+  \setlength{\maxlogowidth}{\maxof{\logoleftwidth}{\logorightwidth}}
+}
+\newcommand{\logoleft}[1]{
+  \newcommand{\insertlogoleft}{#1}
+  \settowidth{\logoleftwidth}{\insertlogoleft}
+  \addtolength{\logoleftwidth}{10ex}
+  \setlength{\maxlogowidth}{\maxof{\logoleftwidth}{\logorightwidth}}
+}
+% Headline
+\setbeamertemplate{headline}
+{
+  \begin{beamercolorbox}{headline}
+    \begin{columns}
+      \begin{column}{\maxlogowidth}
+        \vskip5ex
+        \ifdefined\insertlogoleft
+        \vspace*{\fill}
+        \hspace{10ex}
+        \raggedright
+        \insertlogoleft
+        \vspace*{\fill}
+        \else\fi
+      \end{column}
+      \begin{column}{\dimexpr\paperwidth-\maxlogowidth-\maxlogowidth}
+        \usebeamerfont{headline}
+        \vskip3ex
+        \centering
+        \ifx \inserttitle \empty \else
+        {\usebeamerfont{headline title}\usebeamercolor[fg]{headline title}\inserttitle\\[0.5ex]}
+        \fi
+        \ifx \beamer@shortauthor \empty \else
+        {\usebeamerfont{headline author}\usebeamercolor[fg]{headline author}\insertauthor\\[1ex]}
+        \fi
+        \ifx \insertinstitute \empty \else
+        {\usebeamerfont{headline institute}\usebeamercolor[fg]{headline institute}\insertinstitute\\[1ex]}
+        \fi
+      \end{column}
+      \begin{column}{\maxlogowidth}
+        \vskip5ex
+        \ifdefined\insertlogoright
+        \vspace*{\fill}
+        \raggedleft
+        \insertlogoright
+        \hspace{10ex}
+        \vspace*{\fill}
+        \else\fi
+      \end{column}
+    \end{columns}
+    \vspace{1ex}
+    \ifbeamercolorempty[bg]{headline rule}{}{
+      \begin{beamercolorbox}[wd=\paperwidth,colsep=0.5ex]{headline rule}\end{beamercolorbox}
+    }
+  \end{beamercolorbox}
+}
+% Block
+\setbeamertemplate{block begin}
+{
+  \begin{beamercolorbox}[colsep*=0ex,dp=2ex,center]{block title}
+    \vskip0pt
+    \usebeamerfont{block title}\insertblocktitle
+    % \vskip-1.25ex
+    % \begin{beamercolorbox}[colsep=0.025ex]{block separator}\end{beamercolorbox}
+  \end{beamercolorbox}
+  {\parskip0pt\par}
+  \usebeamerfont{block body}
+  \vskip1.0ex
+  \begin{beamercolorbox}[colsep*=0ex]{block body}
+  \justifying
+  \setlength{\parskip}{1ex}
+  \vskip-2ex
+}
+\setbeamertemplate{block end}
+{
+  \end{beamercolorbox}
+  \vskip0pt
+  \vspace*{2ex}
+}
+% Alert Block
+\setbeamertemplate{block alerted begin}
+{
+  \begin{beamercolorbox}[colsep*=0ex,dp=2ex,center]{block alerted title}
+    \vskip0pt
+    \usebeamerfont{block title}\insertblocktitle
+    \vskip-1.25ex
+    \begin{beamercolorbox}[colsep=0.025ex]{block alerted separator}\end{beamercolorbox}
+  \end{beamercolorbox}
+  {\parskip0pt\par}
+  \usebeamerfont{block body}
+  \vskip1.0ex
+  \begin{beamercolorbox}[colsep*=0ex]{block alerted body}
+  \justifying
+  \begin{adjustwidth}{1ex}{1ex}
+  \setlength{\parskip}{1ex}
+  \vskip-2ex
+}
+\setbeamertemplate{block alerted end}
+{
+  \end{adjustwidth}
+  \vskip1ex
+  \end{beamercolorbox}
+  \vskip0pt
+  \vspace*{2ex}
+}
+% Example Block
+\setbeamertemplate{block example begin}
+{
+  \begin{beamercolorbox}[colsep*=0ex,dp=2ex,center]{block example title}
+    \vskip0pt
+    \usebeamerfont{block title}\insertblocktitle
+    \vskip-1.25ex
+    \begin{beamercolorbox}[colsep=0.025ex]{block example separator}\end{beamercolorbox}
+  \end{beamercolorbox}
+  {\parskip0pt\par}
+  \usebeamerfont{block body}
+  \vskip1.0ex
+  \begin{beamercolorbox}[colsep*=0ex]{block example body}
+  \justifying
+  \begin{adjustwidth}{1ex}{1ex}
+  \setlength{\parskip}{1ex}
+  \vskip-2ex
+}
+\setbeamertemplate{block example end}
+{
+  \end{adjustwidth}
+  \vskip1ex
+  \end{beamercolorbox}
+  \vskip0pt
+  \vspace*{2ex}
+}
+% Footer
+\newcommand{\footercontent}[1]{\newcommand{\insertfootercontent}{#1}}
+\setbeamertemplate{footline}{
+  \ifdefined\insertfootercontent
+  \begin{beamercolorbox}[vmode]{headline}
+    \ifbeamercolorempty[bg]{headline rule}{}{
+      \begin{beamercolorbox}[wd=\paperwidth,colsep=0.25ex]{headline rule}\end{beamercolorbox}
+    }
+    \vspace{1.5ex}
+    \hspace{\sepwidth}
+    \usebeamerfont{footline}
+    \centering
+    \insertfootercontent
+    \hspace{\sepwidth}
+    \vspace{1.5ex}
+  \end{beamercolorbox}
+  \else\fi
+}

template/latexmkrc ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ $bibtex_use = 2;
2	+ $clean_ext = "nav snm";

template/poster.bib ADDED Viewed

	@@ -0,0 +1,9 @@

+@article{shannon1948communication,
+  author = {Claude E. Shannon},
+  title = {A Mathematical Theory of Communication},
+  journal = {Bell System Technical Journal},
+  year = 1948,
+  volume = {27},
+  number = {3},
+  pages = {379-423},
+}